aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
commit1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9
parente6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
downloadsrc-1d5ae1026e831016fc29fd927877c86af904481f.tar.gz
src-1d5ae1026e831016fc29fd927877c86af904481f.zip
Vendor import of stripped llvm trunk r375505, the last commit before thevendor/llvm/llvm-trunk-r375505vendor/llvm
upstream Subversion repository was made read-only, and the LLVM project migrated to GitHub: https://llvm.org/svn/llvm-project/llvm/trunk@375505
Notes
Notes: svn path=/vendor/llvm/dist/; revision=353940 svn path=/vendor/llvm/llvm-r375505/; revision=353941; tag=vendor/llvm/llvm-trunk-r375505
-rw-r--r--include/llvm-c/Core.h22
-rw-r--r--include/llvm-c/DebugInfo.h47
-rw-r--r--include/llvm-c/Remarks.h17
-rw-r--r--include/llvm-c/Transforms/IPO.h18
-rw-r--r--include/llvm-c/Transforms/Scalar.h6
-rw-r--r--include/llvm-c/lto.h94
-rw-r--r--include/llvm/ADT/APFloat.h5
-rw-r--r--include/llvm/ADT/APInt.h9
-rw-r--r--include/llvm/ADT/Any.h4
-rw-r--r--include/llvm/ADT/ArrayRef.h6
-rw-r--r--include/llvm/ADT/DenseMap.h57
-rw-r--r--include/llvm/ADT/DenseMapInfo.h13
-rw-r--r--include/llvm/ADT/DirectedGraph.h270
-rw-r--r--include/llvm/ADT/Hashing.h1
-rw-r--r--include/llvm/ADT/IntervalMap.h4
-rw-r--r--include/llvm/ADT/PointerIntPair.h11
-rw-r--r--include/llvm/ADT/PointerUnion.h30
-rw-r--r--include/llvm/ADT/STLExtras.h168
-rw-r--r--include/llvm/ADT/SmallBitVector.h2
-rw-r--r--include/llvm/ADT/Statistic.h102
-rw-r--r--include/llvm/ADT/StringExtras.h2
-rw-r--r--include/llvm/ADT/StringMap.h59
-rw-r--r--include/llvm/ADT/StringRef.h18
-rw-r--r--include/llvm/ADT/StringSet.h8
-rw-r--r--include/llvm/ADT/TinyPtrVector.h38
-rw-r--r--include/llvm/ADT/VariadicFunction.h330
-rw-r--r--include/llvm/ADT/iterator_range.h1
-rw-r--r--include/llvm/Analysis/AliasAnalysis.h2
-rw-r--r--include/llvm/Analysis/AliasSetTracker.h5
-rw-r--r--include/llvm/Analysis/AssumptionCache.h4
-rw-r--r--include/llvm/Analysis/CFG.h2
-rw-r--r--include/llvm/Analysis/CFLAndersAliasAnalysis.h5
-rw-r--r--include/llvm/Analysis/CFLSteensAliasAnalysis.h5
-rw-r--r--include/llvm/Analysis/CGSCCPassManager.h31
-rw-r--r--include/llvm/Analysis/CaptureTracking.h6
-rw-r--r--include/llvm/Analysis/DDG.h430
-rw-r--r--include/llvm/Analysis/DOTGraphTraitsPass.h4
-rw-r--r--include/llvm/Analysis/DependenceGraphBuilder.h119
-rw-r--r--include/llvm/Analysis/DivergenceAnalysis.h16
-rw-r--r--include/llvm/Analysis/GlobalsModRef.h12
-rw-r--r--include/llvm/Analysis/InstructionSimplify.h36
-rw-r--r--include/llvm/Analysis/LazyCallGraph.h10
-rw-r--r--include/llvm/Analysis/LegacyDivergenceAnalysis.h16
-rw-r--r--include/llvm/Analysis/Loads.h22
-rw-r--r--include/llvm/Analysis/LoopAnalysisManager.h10
-rw-r--r--include/llvm/Analysis/LoopCacheAnalysis.h281
-rw-r--r--include/llvm/Analysis/LoopInfo.h37
-rw-r--r--include/llvm/Analysis/LoopInfoImpl.h8
-rw-r--r--include/llvm/Analysis/MemoryBuiltins.h26
-rw-r--r--include/llvm/Analysis/MemoryDependenceAnalysis.h14
-rw-r--r--include/llvm/Analysis/MemorySSA.h4
-rw-r--r--include/llvm/Analysis/MemorySSAUpdater.h3
-rw-r--r--include/llvm/Analysis/MustExecute.h285
-rw-r--r--include/llvm/Analysis/Passes.h7
-rw-r--r--include/llvm/Analysis/ProfileSummaryInfo.h23
-rw-r--r--include/llvm/Analysis/RegionInfoImpl.h2
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h6
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h22
-rw-r--r--include/llvm/Analysis/TargetLibraryInfo.h17
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h180
-rw-r--r--include/llvm/Analysis/TargetTransformInfoImpl.h55
-rw-r--r--include/llvm/Analysis/TypeMetadataUtils.h2
-rw-r--r--include/llvm/Analysis/Utils/Local.h22
-rw-r--r--include/llvm/Analysis/ValueTracking.h67
-rw-r--r--include/llvm/Analysis/VectorUtils.h144
-rw-r--r--include/llvm/BinaryFormat/Dwarf.def198
-rw-r--r--include/llvm/BinaryFormat/Dwarf.h125
-rw-r--r--include/llvm/BinaryFormat/ELF.h66
-rw-r--r--include/llvm/BinaryFormat/ELFRelocs/AArch64.def7
-rw-r--r--include/llvm/BinaryFormat/MachO.h5
-rw-r--r--include/llvm/BinaryFormat/Magic.h1
-rw-r--r--include/llvm/BinaryFormat/Minidump.h68
-rw-r--r--include/llvm/BinaryFormat/MinidumpConstants.def41
-rw-r--r--include/llvm/BinaryFormat/Wasm.h14
-rw-r--r--include/llvm/BinaryFormat/XCOFF.h116
-rw-r--r--include/llvm/Bitcode/BitcodeAnalyzer.h1
-rw-r--r--include/llvm/Bitcode/LLVMBitCodes.h2
-rw-r--r--include/llvm/Bitstream/BitCodes.h5
-rw-r--r--include/llvm/Bitstream/BitstreamReader.h1
-rw-r--r--include/llvm/CodeGen/AccelTable.h2
-rw-r--r--include/llvm/CodeGen/AsmPrinter.h21
-rw-r--r--include/llvm/CodeGen/BasicTTIImpl.h73
-rw-r--r--include/llvm/CodeGen/CallingConvLower.h18
-rw-r--r--include/llvm/CodeGen/DFAPacketizer.h44
-rw-r--r--include/llvm/CodeGen/DIE.h12
-rw-r--r--include/llvm/CodeGen/FastISel.h4
-rw-r--r--include/llvm/CodeGen/FunctionLoweringInfo.h2
-rw-r--r--include/llvm/CodeGen/GlobalISel/CallLowering.h127
-rw-r--r--include/llvm/CodeGen/GlobalISel/CombinerHelper.h127
-rw-r--r--include/llvm/CodeGen/GlobalISel/CombinerInfo.h15
-rw-r--r--include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h11
-rw-r--r--include/llvm/CodeGen/GlobalISel/GISelKnownBits.h111
-rw-r--r--include/llvm/CodeGen/GlobalISel/IRTranslator.h12
-rw-r--r--include/llvm/CodeGen/GlobalISel/InstructionSelector.h34
-rw-r--r--include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h66
-rw-r--r--include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h92
-rw-r--r--include/llvm/CodeGen/GlobalISel/LegalizerHelper.h20
-rw-r--r--include/llvm/CodeGen/GlobalISel/LegalizerInfo.h61
-rw-r--r--include/llvm/CodeGen/GlobalISel/MIPatternMatch.h20
-rw-r--r--include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h93
-rw-r--r--include/llvm/CodeGen/GlobalISel/Utils.h22
-rw-r--r--include/llvm/CodeGen/ISDOpcodes.h41
-rw-r--r--include/llvm/CodeGen/LiveInterval.h6
-rw-r--r--include/llvm/CodeGen/LiveIntervals.h21
-rw-r--r--include/llvm/CodeGen/LiveRangeCalc.h (renamed from lib/CodeGen/LiveRangeCalc.h)10
-rw-r--r--include/llvm/CodeGen/LiveRegUnits.h4
-rw-r--r--include/llvm/CodeGen/MIRYamlMapping.h1
-rw-r--r--include/llvm/CodeGen/MachineBasicBlock.h36
-rw-r--r--include/llvm/CodeGen/MachineCombinerPattern.h21
-rw-r--r--include/llvm/CodeGen/MachineDominators.h63
-rw-r--r--include/llvm/CodeGen/MachineFrameInfo.h62
-rw-r--r--include/llvm/CodeGen/MachineFunction.h61
-rw-r--r--include/llvm/CodeGen/MachineInstr.h99
-rw-r--r--include/llvm/CodeGen/MachineInstrBuilder.h62
-rw-r--r--include/llvm/CodeGen/MachineLoopUtils.h41
-rw-r--r--include/llvm/CodeGen/MachineMemOperand.h7
-rw-r--r--include/llvm/CodeGen/MachineModuleInfo.h52
-rw-r--r--include/llvm/CodeGen/MachineOperand.h49
-rw-r--r--include/llvm/CodeGen/MachinePipeliner.h80
-rw-r--r--include/llvm/CodeGen/MachinePostDominators.h46
-rw-r--r--include/llvm/CodeGen/MachineRegionInfo.h2
-rw-r--r--include/llvm/CodeGen/MachineRegisterInfo.h70
-rw-r--r--include/llvm/CodeGen/MachineScheduler.h1
-rw-r--r--include/llvm/CodeGen/ModuloSchedule.h367
-rw-r--r--include/llvm/CodeGen/PBQP/Math.h12
-rw-r--r--include/llvm/CodeGen/Passes.h4
-rw-r--r--include/llvm/CodeGen/Register.h118
-rw-r--r--include/llvm/CodeGen/RegisterClassInfo.h2
-rw-r--r--include/llvm/CodeGen/RegisterPressure.h9
-rw-r--r--include/llvm/CodeGen/RegisterScavenging.h24
-rw-r--r--include/llvm/CodeGen/ScheduleDAGInstrs.h12
-rw-r--r--include/llvm/CodeGen/SelectionDAG.h95
-rw-r--r--include/llvm/CodeGen/SelectionDAGISel.h36
-rw-r--r--include/llvm/CodeGen/SelectionDAGNodes.h105
-rw-r--r--include/llvm/CodeGen/StackProtector.h6
-rw-r--r--include/llvm/CodeGen/SwitchLoweringUtils.h5
-rw-r--r--include/llvm/CodeGen/TargetCallingConv.h23
-rw-r--r--include/llvm/CodeGen/TargetFrameLowering.h30
-rw-r--r--include/llvm/CodeGen/TargetInstrInfo.h102
-rw-r--r--include/llvm/CodeGen/TargetLowering.h399
-rw-r--r--include/llvm/CodeGen/TargetLoweringObjectFileImpl.h34
-rw-r--r--include/llvm/CodeGen/TargetPassConfig.h2
-rw-r--r--include/llvm/CodeGen/TargetRegisterInfo.h94
-rw-r--r--include/llvm/CodeGen/TargetSubtargetInfo.h10
-rw-r--r--include/llvm/CodeGen/ValueTypes.h4
-rw-r--r--include/llvm/CodeGen/ValueTypes.td247
-rw-r--r--include/llvm/CodeGen/VirtRegMap.h43
-rw-r--r--include/llvm/DebugInfo/CodeView/CVTypeVisitor.h4
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h13
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeViewRegisters.def128
-rw-r--r--include/llvm/DebugInfo/CodeView/EnumTables.h11
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolDeserializer.h2
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolRecord.h304
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeDeserializer.h2
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeRecordMapping.h1
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h5
-rw-r--r--include/llvm/DebugInfo/DIContext.h14
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h4
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h68
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFAttribute.h2
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFContext.h8
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h13
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h6
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h6
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h4
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h8
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h2
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h10
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugLine.h27
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h35
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h4
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h2
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDie.h2
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFExpression.h14
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFFormValue.h10
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFListTable.h77
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFObject.h30
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h2
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFUnit.h51
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h2
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFVerifier.h4
-rw-r--r--include/llvm/DebugInfo/GSYM/FileEntry.h7
-rw-r--r--include/llvm/DebugInfo/GSYM/FileWriter.h124
-rw-r--r--include/llvm/DebugInfo/GSYM/FunctionInfo.h154
-rw-r--r--include/llvm/DebugInfo/GSYM/GsymCreator.h229
-rw-r--r--include/llvm/DebugInfo/GSYM/GsymReader.h228
-rw-r--r--include/llvm/DebugInfo/GSYM/Header.h129
-rw-r--r--include/llvm/DebugInfo/GSYM/InlineInfo.h63
-rw-r--r--include/llvm/DebugInfo/GSYM/LineEntry.h7
-rw-r--r--include/llvm/DebugInfo/GSYM/LineTable.h198
-rw-r--r--include/llvm/DebugInfo/GSYM/Range.h33
-rw-r--r--include/llvm/DebugInfo/GSYM/StringTable.h7
-rw-r--r--include/llvm/DebugInfo/PDB/GenericError.h2
-rw-r--r--include/llvm/DebugInfo/PDB/Native/SymbolCache.h2
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbol.h2
-rw-r--r--include/llvm/DebugInfo/Symbolize/Symbolize.h1
-rw-r--r--include/llvm/Demangle/Demangle.h9
-rw-r--r--include/llvm/Demangle/DemangleConfig.h7
-rw-r--r--include/llvm/Demangle/ItaniumDemangle.h419
-rw-r--r--include/llvm/Demangle/MicrosoftDemangle.h1
-rw-r--r--include/llvm/Demangle/MicrosoftDemangleNodes.h7
-rw-r--r--include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h39
-rw-r--r--include/llvm/ExecutionEngine/JITLink/JITLink.h1244
-rw-r--r--include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h17
-rw-r--r--include/llvm/ExecutionEngine/JITLink/MachO_arm64.h60
-rw-r--r--include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h1
-rw-r--r--include/llvm/ExecutionEngine/JITSymbol.h5
-rw-r--r--include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h10
-rw-r--r--include/llvm/ExecutionEngine/Orc/Core.h137
-rw-r--r--include/llvm/ExecutionEngine/Orc/ExecutionUtils.h46
-rw-r--r--include/llvm/ExecutionEngine/Orc/IRTransformLayer.h3
-rw-r--r--include/llvm/ExecutionEngine/Orc/LLJIT.h4
-rw-r--r--include/llvm/ExecutionEngine/Orc/LambdaResolver.h5
-rw-r--r--include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h40
-rw-r--r--include/llvm/ExecutionEngine/Orc/LazyReexports.h13
-rw-r--r--include/llvm/ExecutionEngine/Orc/Legacy.h2
-rw-r--r--include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h23
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h4
-rw-r--r--include/llvm/ExecutionEngine/Orc/RPCSerialization.h12
-rw-r--r--include/llvm/ExecutionEngine/Orc/RPCUtils.h65
-rw-r--r--include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h6
-rw-r--r--include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h21
-rw-r--r--include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h84
-rw-r--r--include/llvm/ExecutionEngine/Orc/Speculation.h207
-rw-r--r--include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h53
-rw-r--r--include/llvm/ExecutionEngine/RuntimeDyld.h23
-rw-r--r--include/llvm/IR/Attributes.h49
-rw-r--r--include/llvm/IR/AutoUpgrade.h10
-rw-r--r--include/llvm/IR/BasicBlock.h5
-rw-r--r--include/llvm/IR/CallSite.h9
-rw-r--r--include/llvm/IR/CallingConv.h13
-rw-r--r--include/llvm/IR/Constant.h6
-rw-r--r--include/llvm/IR/ConstantRange.h10
-rw-r--r--include/llvm/IR/DataLayout.h125
-rw-r--r--include/llvm/IR/DebugInfoFlags.def6
-rw-r--r--include/llvm/IR/DebugInfoMetadata.h4
-rw-r--r--include/llvm/IR/DerivedTypes.h75
-rw-r--r--include/llvm/IR/DiagnosticInfo.h25
-rw-r--r--include/llvm/IR/FixedMetadataKinds.def43
-rw-r--r--include/llvm/IR/Function.h15
-rw-r--r--include/llvm/IR/GlobalAlias.h4
-rw-r--r--include/llvm/IR/GlobalIFunc.h4
-rw-r--r--include/llvm/IR/GlobalIndirectSymbol.h8
-rw-r--r--include/llvm/IR/GlobalObject.h26
-rw-r--r--include/llvm/IR/GlobalVariable.h1
-rw-r--r--include/llvm/IR/IRBuilder.h90
-rw-r--r--include/llvm/IR/InlineAsm.h1
-rw-r--r--include/llvm/IR/InstrTypes.h12
-rw-r--r--include/llvm/IR/Instruction.h10
-rw-r--r--include/llvm/IR/Instructions.h99
-rw-r--r--include/llvm/IR/IntrinsicInst.h23
-rw-r--r--include/llvm/IR/Intrinsics.h10
-rw-r--r--include/llvm/IR/Intrinsics.td295
-rw-r--r--include/llvm/IR/IntrinsicsAArch64.td125
-rw-r--r--include/llvm/IR/IntrinsicsAMDGPU.td121
-rw-r--r--include/llvm/IR/IntrinsicsARM.td9
-rw-r--r--include/llvm/IR/IntrinsicsBPF.td3
-rw-r--r--include/llvm/IR/IntrinsicsMips.td16
-rw-r--r--include/llvm/IR/IntrinsicsNVVM.td125
-rw-r--r--include/llvm/IR/IntrinsicsWebAssembly.td58
-rw-r--r--include/llvm/IR/IntrinsicsX86.td12
-rw-r--r--include/llvm/IR/LLVMContext.h31
-rw-r--r--include/llvm/IR/MDBuilder.h5
-rw-r--r--include/llvm/IR/Metadata.h4
-rw-r--r--include/llvm/IR/Module.h1
-rw-r--r--include/llvm/IR/ModuleSummaryIndex.h18
-rw-r--r--include/llvm/IR/ModuleSummaryIndexYAML.h2
-rw-r--r--include/llvm/IR/Operator.h21
-rw-r--r--include/llvm/IR/PassManager.h5
-rw-r--r--include/llvm/IR/PassManagerInternal.h2
-rw-r--r--include/llvm/IR/PatternMatch.h155
-rw-r--r--include/llvm/IR/RemarkStreamer.h28
-rw-r--r--include/llvm/IR/Type.h15
-rw-r--r--include/llvm/IR/User.h2
-rw-r--r--include/llvm/IR/Value.h58
-rw-r--r--include/llvm/IR/ValueMap.h15
-rw-r--r--include/llvm/InitializePasses.h10
-rw-r--r--include/llvm/LTO/Config.h2
-rw-r--r--include/llvm/LTO/LTO.h10
-rw-r--r--include/llvm/LTO/legacy/LTOCodeGenerator.h2
-rw-r--r--include/llvm/LinkAllPasses.h2
-rw-r--r--include/llvm/MC/MCAsmInfo.h18
-rw-r--r--include/llvm/MC/MCAsmInfoXCOFF.h5
-rw-r--r--include/llvm/MC/MCAsmMacro.h11
-rw-r--r--include/llvm/MC/MCContext.h23
-rw-r--r--include/llvm/MC/MCDirectives.h1
-rw-r--r--include/llvm/MC/MCDwarf.h3
-rw-r--r--include/llvm/MC/MCExpr.h8
-rw-r--r--include/llvm/MC/MCFixup.h119
-rw-r--r--include/llvm/MC/MCFragment.h16
-rw-r--r--include/llvm/MC/MCInstPrinter.h2
-rw-r--r--include/llvm/MC/MCInstrAnalysis.h6
-rw-r--r--include/llvm/MC/MCInstrDesc.h23
-rw-r--r--include/llvm/MC/MCLinkerOptimizationHint.h2
-rw-r--r--include/llvm/MC/MCRegister.h110
-rw-r--r--include/llvm/MC/MCRegisterInfo.h98
-rw-r--r--include/llvm/MC/MCSection.h7
-rw-r--r--include/llvm/MC/MCSectionXCOFF.h22
-rw-r--r--include/llvm/MC/MCStreamer.h41
-rw-r--r--include/llvm/MC/MCSubtargetInfo.h46
-rw-r--r--include/llvm/MC/MCSymbolWasm.h7
-rw-r--r--include/llvm/MC/MCSymbolXCOFF.h32
-rw-r--r--include/llvm/MC/MCWasmObjectWriter.h4
-rw-r--r--include/llvm/MC/MCXCOFFStreamer.h2
-rw-r--r--include/llvm/MC/StringTableBuilder.h2
-rw-r--r--include/llvm/MC/SubtargetFeature.h139
-rw-r--r--include/llvm/MCA/CodeEmitter.h72
-rw-r--r--include/llvm/MCA/Context.h5
-rw-r--r--include/llvm/MCA/HardwareUnits/LSUnit.h18
-rw-r--r--include/llvm/MCA/HardwareUnits/RegisterFile.h2
-rw-r--r--include/llvm/MCA/HardwareUnits/ResourceManager.h51
-rw-r--r--include/llvm/MCA/HardwareUnits/RetireControlUnit.h33
-rw-r--r--include/llvm/MCA/HardwareUnits/Scheduler.h13
-rw-r--r--include/llvm/MCA/Instruction.h51
-rw-r--r--include/llvm/MCA/SourceMgr.h5
-rw-r--r--include/llvm/MCA/Stages/RetireStage.h6
-rw-r--r--include/llvm/Object/Archive.h7
-rw-r--r--include/llvm/Object/Binary.h16
-rw-r--r--include/llvm/Object/COFF.h36
-rw-r--r--include/llvm/Object/ELF.h112
-rw-r--r--include/llvm/Object/ELFObjectFile.h31
-rw-r--r--include/llvm/Object/ELFTypes.h6
-rw-r--r--include/llvm/Object/MachO.h1
-rw-r--r--include/llvm/Object/MachOUniversal.h14
-rw-r--r--include/llvm/Object/Minidump.h77
-rw-r--r--include/llvm/Object/ObjectFile.h21
-rw-r--r--include/llvm/Object/StackMapParser.h4
-rw-r--r--include/llvm/Object/TapiFile.h60
-rw-r--r--include/llvm/Object/TapiUniversal.h109
-rw-r--r--include/llvm/Object/WindowsResource.h55
-rw-r--r--include/llvm/Object/XCOFFObjectFile.h132
-rw-r--r--include/llvm/ObjectYAML/DWARFYAML.h2
-rw-r--r--include/llvm/ObjectYAML/ELFYAML.h116
-rw-r--r--include/llvm/ObjectYAML/MachOYAML.h3
-rw-r--r--include/llvm/ObjectYAML/MinidumpYAML.h64
-rw-r--r--include/llvm/ObjectYAML/WasmYAML.h2
-rw-r--r--include/llvm/ObjectYAML/yaml2obj.h67
-rw-r--r--include/llvm/Pass.h5
-rw-r--r--include/llvm/Passes/PassBuilder.h7
-rw-r--r--include/llvm/ProfileData/Coverage/CoverageMapping.h16
-rw-r--r--include/llvm/ProfileData/Coverage/CoverageMappingWriter.h3
-rw-r--r--include/llvm/ProfileData/InstrProf.h18
-rw-r--r--include/llvm/ProfileData/InstrProfReader.h12
-rw-r--r--include/llvm/ProfileData/SampleProf.h178
-rw-r--r--include/llvm/ProfileData/SampleProfReader.h272
-rw-r--r--include/llvm/ProfileData/SampleProfWriter.h118
-rw-r--r--include/llvm/Remarks/BitstreamRemarkContainer.h106
-rw-r--r--include/llvm/Remarks/BitstreamRemarkParser.h116
-rw-r--r--include/llvm/Remarks/BitstreamRemarkSerializer.h196
-rw-r--r--include/llvm/Remarks/Remark.h36
-rw-r--r--include/llvm/Remarks/RemarkFormat.h4
-rw-r--r--include/llvm/Remarks/RemarkParser.h38
-rw-r--r--include/llvm/Remarks/RemarkSerializer.h70
-rw-r--r--include/llvm/Remarks/RemarkStringTable.h24
-rw-r--r--include/llvm/Remarks/YAMLRemarkSerializer.h108
-rw-r--r--include/llvm/Support/AArch64TargetParser.def72
-rw-r--r--include/llvm/Support/AArch64TargetParser.h3
-rw-r--r--include/llvm/Support/ARMTargetParser.def2
-rw-r--r--include/llvm/Support/ARMTargetParser.h20
-rw-r--r--include/llvm/Support/AlignOf.h134
-rw-r--r--include/llvm/Support/Alignment.h403
-rw-r--r--include/llvm/Support/Allocator.h22
-rw-r--r--include/llvm/Support/Automaton.h253
-rw-r--r--include/llvm/Support/BinaryStreamArray.h2
-rw-r--r--include/llvm/Support/BinaryStreamReader.h2
-rw-r--r--include/llvm/Support/CRC.h45
-rw-r--r--include/llvm/Support/CommandLine.h3
-rw-r--r--include/llvm/Support/Compiler.h81
-rw-r--r--include/llvm/Support/DataExtractor.h196
-rw-r--r--include/llvm/Support/Endian.h10
-rw-r--r--include/llvm/Support/Error.h42
-rw-r--r--include/llvm/Support/FileCheck.h604
-rw-r--r--include/llvm/Support/FileCollector.h79
-rw-r--r--include/llvm/Support/FileSystem.h30
-rw-r--r--include/llvm/Support/FileUtilities.h38
-rw-r--r--include/llvm/Support/Format.h5
-rw-r--r--include/llvm/Support/GenericDomTree.h6
-rw-r--r--include/llvm/Support/GenericDomTreeConstruction.h8
-rw-r--r--include/llvm/Support/GlobPattern.h2
-rw-r--r--include/llvm/Support/Host.h28
-rw-r--r--include/llvm/Support/JamCRC.h48
-rw-r--r--include/llvm/Support/MachineValueType.h419
-rw-r--r--include/llvm/Support/MathExtras.h187
-rw-r--r--include/llvm/Support/Mutex.h105
-rw-r--r--include/llvm/Support/MutexGuard.h40
-rw-r--r--include/llvm/Support/OnDiskHashTable.h3
-rw-r--r--include/llvm/Support/Parallel.h27
-rw-r--r--include/llvm/Support/RWMutex.h321
-rw-r--r--include/llvm/Support/Regex.h18
-rw-r--r--include/llvm/Support/Registry.h2
-rw-r--r--include/llvm/Support/SHA1.h2
-rw-r--r--include/llvm/Support/ScalableSize.h43
-rw-r--r--include/llvm/Support/Signals.h11
-rw-r--r--include/llvm/Support/SwapByteOrder.h38
-rw-r--r--include/llvm/Support/TargetOpcodes.def26
-rw-r--r--include/llvm/Support/TargetRegistry.h4
-rw-r--r--include/llvm/Support/TimeProfiler.h2
-rw-r--r--include/llvm/Support/TrailingObjects.h18
-rw-r--r--include/llvm/Support/TypeSize.h201
-rw-r--r--include/llvm/Support/UnicodeCharRanges.h3
-rw-r--r--include/llvm/Support/UniqueLock.h68
-rw-r--r--include/llvm/Support/VirtualFileSystem.h16
-rw-r--r--include/llvm/Support/Win64EH.h4
-rw-r--r--include/llvm/Support/X86TargetParser.def4
-rw-r--r--include/llvm/Support/YAMLTraits.h11
-rw-r--r--include/llvm/Support/circular_raw_ostream.h4
-rw-r--r--include/llvm/Support/raw_ostream.h27
-rw-r--r--include/llvm/Support/type_traits.h18
-rw-r--r--include/llvm/TableGen/Automaton.td95
-rw-r--r--include/llvm/TableGen/Error.h1
-rw-r--r--include/llvm/TableGen/Record.h14
-rw-r--r--include/llvm/Target/GenericOpcodes.td87
-rw-r--r--include/llvm/Target/GlobalISel/Combine.td103
-rw-r--r--include/llvm/Target/GlobalISel/SelectionDAGCompat.td25
-rw-r--r--include/llvm/Target/Target.td33
-rw-r--r--include/llvm/Target/TargetCallingConv.td6
-rw-r--r--include/llvm/Target/TargetItinerary.td11
-rw-r--r--include/llvm/Target/TargetLoweringObjectFile.h3
-rw-r--r--include/llvm/Target/TargetMachine.h28
-rw-r--r--include/llvm/Target/TargetSchedule.td8
-rw-r--r--include/llvm/Target/TargetSelectionDAG.td146
-rw-r--r--include/llvm/TextAPI/MachO/Architecture.h4
-rw-r--r--include/llvm/TextAPI/MachO/ArchitectureSet.h4
-rw-r--r--include/llvm/TextAPI/MachO/InterfaceFile.h240
-rw-r--r--include/llvm/TextAPI/MachO/Platform.h45
-rw-r--r--include/llvm/TextAPI/MachO/Symbol.h35
-rw-r--r--include/llvm/TextAPI/MachO/Target.h68
-rw-r--r--include/llvm/TextAPI/MachO/TextAPIReader.h5
-rw-r--r--include/llvm/Transforms/IPO/Attributor.h1729
-rw-r--r--include/llvm/Transforms/IPO/GlobalDCE.h14
-rw-r--r--include/llvm/Transforms/IPO/HotColdSplitting.h39
-rw-r--r--include/llvm/Transforms/IPO/LowerTypeTests.h2
-rw-r--r--include/llvm/Transforms/IPO/WholeProgramDevirt.h26
-rw-r--r--include/llvm/Transforms/Instrumentation.h4
-rw-r--r--include/llvm/Transforms/Instrumentation/InstrProfiling.h5
-rw-r--r--include/llvm/Transforms/Instrumentation/MemorySanitizer.h12
-rw-r--r--include/llvm/Transforms/Instrumentation/SanitizerCoverage.h47
-rw-r--r--include/llvm/Transforms/Instrumentation/ThreadSanitizer.h2
-rw-r--r--include/llvm/Transforms/Scalar.h9
-rw-r--r--include/llvm/Transforms/Scalar/CallSiteSplitting.h5
-rw-r--r--include/llvm/Transforms/Scalar/ConstantHoisting.h10
-rw-r--r--include/llvm/Transforms/Scalar/Float2Int.h6
-rw-r--r--include/llvm/Transforms/Scalar/GVN.h7
-rw-r--r--include/llvm/Transforms/Scalar/GVNExpression.h9
-rw-r--r--include/llvm/Transforms/Scalar/LoopPassManager.h24
-rw-r--r--include/llvm/Transforms/Scalar/LoopUnrollPass.h14
-rw-r--r--include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h41
-rw-r--r--include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h18
-rw-r--r--include/llvm/Transforms/Scalar/Reassociate.h4
-rw-r--r--include/llvm/Transforms/Scalar/SCCP.h3
-rw-r--r--include/llvm/Transforms/Utils/BasicBlockUtils.h11
-rw-r--r--include/llvm/Transforms/Utils/BuildLibCalls.h27
-rw-r--r--include/llvm/Transforms/Utils/BypassSlowDivision.h13
-rw-r--r--include/llvm/Transforms/Utils/CodeExtractor.h57
-rw-r--r--include/llvm/Transforms/Utils/Local.h16
-rw-r--r--include/llvm/Transforms/Utils/LoopUtils.h5
-rw-r--r--include/llvm/Transforms/Utils/MisExpect.h43
-rw-r--r--include/llvm/Transforms/Utils/PredicateInfo.h10
-rw-r--r--include/llvm/Transforms/Utils/SimplifyLibCalls.h10
-rw-r--r--include/llvm/Transforms/Utils/UnrollLoop.h8
-rw-r--r--include/llvm/Transforms/Utils/ValueMapper.h9
-rw-r--r--include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h48
-rw-r--r--include/llvm/Transforms/Vectorize/LoopVectorize.h8
-rw-r--r--include/llvm/Transforms/Vectorize/SLPVectorizer.h9
-rw-r--r--include/llvm/XRay/FDRRecordProducer.h4
-rw-r--r--include/llvm/XRay/FDRRecords.h6
-rw-r--r--include/llvm/XRay/FileHeaderReader.h2
-rw-r--r--include/llvm/module.modulemap2
-rw-r--r--lib/Analysis/AliasAnalysis.cpp4
-rw-r--r--lib/Analysis/AliasSetTracker.cpp12
-rw-r--r--lib/Analysis/Analysis.cpp1
-rw-r--r--lib/Analysis/AssumptionCache.cpp12
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp42
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp19
-rw-r--r--lib/Analysis/CFG.cpp11
-rw-r--r--lib/Analysis/CFGPrinter.cpp2
-rw-r--r--lib/Analysis/CFLAndersAliasAnalysis.cpp19
-rw-r--r--lib/Analysis/CFLSteensAliasAnalysis.cpp20
-rw-r--r--lib/Analysis/CallGraph.cpp4
-rw-r--r--lib/Analysis/CaptureTracking.cpp46
-rw-r--r--lib/Analysis/ConstantFolding.cpp405
-rw-r--r--lib/Analysis/DDG.cpp203
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp8
-rw-r--r--lib/Analysis/DependenceGraphBuilder.cpp228
-rw-r--r--lib/Analysis/DivergenceAnalysis.cpp10
-rw-r--r--lib/Analysis/GlobalsModRef.cpp37
-rw-r--r--lib/Analysis/IVDescriptors.cpp3
-rw-r--r--lib/Analysis/IndirectCallPromotionAnalysis.cpp2
-rw-r--r--lib/Analysis/InlineCost.cpp23
-rw-r--r--lib/Analysis/InstructionSimplify.cpp320
-rw-r--r--lib/Analysis/LazyBranchProbabilityInfo.cpp5
-rw-r--r--lib/Analysis/LazyCallGraph.cpp13
-rw-r--r--lib/Analysis/LazyValueInfo.cpp37
-rw-r--r--lib/Analysis/LegacyDivergenceAnalysis.cpp36
-rw-r--r--lib/Analysis/Lint.cpp2
-rw-r--r--lib/Analysis/Loads.cpp238
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp45
-rw-r--r--lib/Analysis/LoopAnalysisManager.cpp2
-rw-r--r--lib/Analysis/LoopCacheAnalysis.cpp625
-rw-r--r--lib/Analysis/LoopInfo.cpp39
-rw-r--r--lib/Analysis/LoopUnrollAnalyzer.cpp2
-rw-r--r--lib/Analysis/MemDerefPrinter.cpp4
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp51
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp21
-rw-r--r--lib/Analysis/MemorySSA.cpp95
-rw-r--r--lib/Analysis/MemorySSAUpdater.cpp323
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp16
-rw-r--r--lib/Analysis/MustExecute.cpp118
-rw-r--r--lib/Analysis/OptimizationRemarkEmitter.cpp4
-rw-r--r--lib/Analysis/OrderedInstructions.cpp2
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp67
-rw-r--r--lib/Analysis/ScalarEvolution.cpp89
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp19
-rw-r--r--lib/Analysis/StackSafetyAnalysis.cpp4
-rw-r--r--lib/Analysis/SyncDependenceAnalysis.cpp61
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp44
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp64
-rw-r--r--lib/Analysis/TypeMetadataUtils.cpp32
-rw-r--r--lib/Analysis/VFABIDemangling.cpp418
-rw-r--r--lib/Analysis/ValueTracking.cpp658
-rw-r--r--lib/Analysis/VectorUtils.cpp20
-rw-r--r--lib/AsmParser/LLLexer.cpp1
-rw-r--r--lib/AsmParser/LLParser.cpp81
-rw-r--r--lib/AsmParser/LLParser.h4
-rw-r--r--lib/AsmParser/LLToken.h1
-rw-r--r--lib/AsmParser/Parser.cpp8
-rw-r--r--lib/BinaryFormat/Dwarf.cpp22
-rw-r--r--lib/BinaryFormat/Magic.cpp5
-rw-r--r--lib/Bitcode/Reader/BitcodeAnalyzer.cpp10
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp84
-rw-r--r--lib/Bitcode/Reader/MetadataLoader.cpp6
-rw-r--r--lib/Bitcode/Writer/BitWriter.cpp2
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp9
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp16
-rw-r--r--lib/CodeGen/Analysis.cpp12
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp255
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/ByteStreamer.h12
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp116
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h3
-rw-r--r--lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp12
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocStream.h19
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp176
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h31
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp644
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h22
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp95
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h95
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h19
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp47
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h14
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp3
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp12
-rw-r--r--lib/CodeGen/BranchFolding.cpp34
-rw-r--r--lib/CodeGen/BranchRelaxation.cpp22
-rw-r--r--lib/CodeGen/BreakFalseDeps.cpp23
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp22
-rw-r--r--lib/CodeGen/CallingConvLower.cpp42
-rw-r--r--lib/CodeGen/CodeGen.cpp5
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp122
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp9
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp81
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp12
-rw-r--r--lib/CodeGen/DetectDeadLanes.cpp56
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp345
-rw-r--r--lib/CodeGen/ExecutionDomainFix.cpp1
-rw-r--r--lib/CodeGen/ExpandMemCmp.cpp2
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp10
-rw-r--r--lib/CodeGen/GCMetadata.cpp2
-rw-r--r--lib/CodeGen/GCRootLowering.cpp4
-rw-r--r--lib/CodeGen/GlobalISel/CSEInfo.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp11
-rw-r--r--lib/CodeGen/GlobalISel/CallLowering.cpp288
-rw-r--r--lib/CodeGen/GlobalISel/Combiner.cpp14
-rw-r--r--lib/CodeGen/GlobalISel/CombinerHelper.cpp919
-rw-r--r--lib/CodeGen/GlobalISel/GISelKnownBits.cpp383
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp392
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelect.cpp38
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/Legalizer.cpp35
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp978
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp42
-rw-r--r--lib/CodeGen/GlobalISel/Localizer.cpp11
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp93
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp13
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBank.cpp1
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp17
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp98
-rw-r--r--lib/CodeGen/GlobalMerge.cpp8
-rw-r--r--lib/CodeGen/HardwareLoops.cpp2
-rw-r--r--lib/CodeGen/IfConversion.cpp200
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp8
-rw-r--r--lib/CodeGen/InlineSpiller.cpp22
-rw-r--r--lib/CodeGen/InterleavedLoadCombinePass.cpp4
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp34
-rw-r--r--lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp6
-rw-r--r--lib/CodeGen/LexicalScopes.cpp1
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp510
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp257
-rw-r--r--lib/CodeGen/LiveInterval.cpp7
-rw-r--r--lib/CodeGen/LiveIntervals.cpp59
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp20
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp5
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp14
-rw-r--r--lib/CodeGen/LiveRangeShrink.cpp4
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp2
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp12
-rw-r--r--lib/CodeGen/LiveStacks.cpp7
-rw-r--r--lib/CodeGen/LiveVariables.cpp29
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp10
-rw-r--r--lib/CodeGen/LowerEmuTLS.cpp7
-rw-r--r--lib/CodeGen/MIRCanonicalizerPass.cpp359
-rw-r--r--lib/CodeGen/MIRNamerPass.cpp77
-rw-r--r--lib/CodeGen/MIRParser/MILexer.cpp1
-rw-r--r--lib/CodeGen/MIRParser/MILexer.h2
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp60
-rw-r--r--lib/CodeGen/MIRParser/MIRParser.cpp18
-rw-r--r--lib/CodeGen/MIRPrinter.cpp16
-rw-r--r--lib/CodeGen/MIRVRegNamerUtils.cpp348
-rw-r--r--lib/CodeGen/MIRVRegNamerUtils.h91
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp64
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp28
-rw-r--r--lib/CodeGen/MachineCSE.cpp75
-rw-r--r--lib/CodeGen/MachineCombiner.cpp6
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp78
-rw-r--r--lib/CodeGen/MachineDominators.cpp23
-rw-r--r--lib/CodeGen/MachineFrameInfo.cpp38
-rw-r--r--lib/CodeGen/MachineFunction.cpp58
-rw-r--r--lib/CodeGen/MachineFunctionPass.cpp6
-rw-r--r--lib/CodeGen/MachineInstr.cpp116
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp14
-rw-r--r--lib/CodeGen/MachineLICM.cpp61
-rw-r--r--lib/CodeGen/MachineLoopUtils.cpp132
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp85
-rw-r--r--lib/CodeGen/MachineOperand.cpp70
-rw-r--r--lib/CodeGen/MachineOptimizationRemarkEmitter.cpp2
-rw-r--r--lib/CodeGen/MachineOutliner.cpp16
-rw-r--r--lib/CodeGen/MachinePipeliner.cpp1235
-rw-r--r--lib/CodeGen/MachinePostDominators.cpp55
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp12
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp6
-rw-r--r--lib/CodeGen/MachineScheduler.cpp59
-rw-r--r--lib/CodeGen/MachineSink.cpp73
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp24
-rw-r--r--lib/CodeGen/MachineVerifier.cpp163
-rw-r--r--lib/CodeGen/MacroFusion.cpp4
-rw-r--r--lib/CodeGen/ModuloSchedule.cpp2022
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp15
-rw-r--r--lib/CodeGen/PHIElimination.cpp43
-rw-r--r--lib/CodeGen/PatchableFunction.cpp2
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp83
-rw-r--r--lib/CodeGen/PreISelIntrinsicLowering.cpp2
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp8
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp2
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp6
-rw-r--r--lib/CodeGen/ReachingDefAnalysis.cpp1
-rw-r--r--lib/CodeGen/RegAllocBase.cpp4
-rw-r--r--lib/CodeGen/RegAllocFast.cpp117
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp16
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp12
-rw-r--r--lib/CodeGen/RegUsageInfoCollector.cpp10
-rw-r--r--lib/CodeGen/RegUsageInfoPropagate.cpp6
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp71
-rw-r--r--lib/CodeGen/RegisterPressure.cpp36
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp62
-rw-r--r--lib/CodeGen/RenameIndependentSubregs.cpp4
-rw-r--r--lib/CodeGen/SafeStack.cpp2
-rw-r--r--lib/CodeGen/ScalarizeMaskedMemIntrin.cpp167
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp57
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1758
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp77
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp222
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp430
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp510
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp56
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h61
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp46
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp50
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp139
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp283
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp495
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp35
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp34
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp1406
-rw-r--r--lib/CodeGen/ShrinkWrap.cpp5
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp5
-rw-r--r--lib/CodeGen/SplitKit.cpp6
-rw-r--r--lib/CodeGen/SplitKit.h2
-rw-r--r--lib/CodeGen/StackMaps.cpp8
-rw-r--r--lib/CodeGen/StackProtector.cpp67
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp8
-rw-r--r--lib/CodeGen/SwiftErrorValueTracking.cpp3
-rw-r--r--lib/CodeGen/TailDuplicator.cpp22
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp19
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp82
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp95
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp107
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp24
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp60
-rw-r--r--lib/CodeGen/TargetSchedule.cpp2
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp90
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp15
-rw-r--r--lib/CodeGen/ValueTypes.cpp150
-rw-r--r--lib/CodeGen/VirtRegMap.cpp71
-rw-r--r--lib/CodeGen/XRayInstrumentation.cpp2
-rw-r--r--lib/DebugInfo/CodeView/CVTypeVisitor.cpp15
-rw-r--r--lib/DebugInfo/CodeView/CodeViewRecordIO.cpp8
-rw-r--r--lib/DebugInfo/CodeView/EnumTables.cpp166
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp2
-rw-r--r--lib/DebugInfo/CodeView/SymbolRecordMapping.cpp2
-rw-r--r--lib/DebugInfo/CodeView/TypeRecordMapping.cpp238
-rw-r--r--lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp8
-rw-r--r--lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp105
-rw-r--r--lib/DebugInfo/DWARF/DWARFCompileUnit.cpp10
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp335
-rw-r--r--lib/DebugInfo/DWARF/DWARFDataExtractor.cpp13
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp10
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugAddr.cpp28
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp4
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugAranges.cpp12
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugFrame.cpp74
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp8
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLine.cpp159
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLoc.cpp257
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugMacro.cpp2
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp6
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp18
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp30
-rw-r--r--lib/DebugInfo/DWARF/DWARFDie.cpp54
-rw-r--r--lib/DebugInfo/DWARF/DWARFExpression.cpp13
-rw-r--r--lib/DebugInfo/DWARF/DWARFFormValue.cpp9
-rw-r--r--lib/DebugInfo/DWARF/DWARFGdbIndex.cpp2
-rw-r--r--lib/DebugInfo/DWARF/DWARFListTable.cpp70
-rw-r--r--lib/DebugInfo/DWARF/DWARFTypeUnit.cpp14
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnit.cpp231
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnitIndex.cpp12
-rw-r--r--lib/DebugInfo/DWARF/DWARFVerifier.cpp120
-rw-r--r--lib/DebugInfo/GSYM/FileWriter.cpp78
-rw-r--r--lib/DebugInfo/GSYM/FunctionInfo.cpp143
-rw-r--r--lib/DebugInfo/GSYM/GsymCreator.cpp275
-rw-r--r--lib/DebugInfo/GSYM/GsymReader.cpp265
-rw-r--r--lib/DebugInfo/GSYM/Header.cpp109
-rw-r--r--lib/DebugInfo/GSYM/InlineInfo.cpp100
-rw-r--r--lib/DebugInfo/GSYM/LineTable.cpp287
-rw-r--r--lib/DebugInfo/GSYM/Range.cpp47
-rw-r--r--lib/DebugInfo/MSF/MappedBlockStream.cpp6
-rw-r--r--lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp28
-rw-r--r--lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp2
-rw-r--r--lib/DebugInfo/PDB/DIA/DIASession.cpp46
-rw-r--r--lib/DebugInfo/PDB/GenericError.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/Hash.cpp5
-rw-r--r--lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp29
-rw-r--r--lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp24
-rw-r--r--lib/DebugInfo/PDB/Native/NativeSession.cpp10
-rw-r--r--lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFile.cpp18
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp17
-rw-r--r--lib/DebugInfo/PDB/Native/TpiHashing.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStream.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp2
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolFunc.cpp2
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp2
-rw-r--r--lib/DebugInfo/PDB/UDTLayout.cpp14
-rw-r--r--lib/DebugInfo/Symbolize/DIPrinter.cpp17
-rw-r--r--lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp32
-rw-r--r--lib/DebugInfo/Symbolize/SymbolizableObjectFile.h7
-rw-r--r--lib/DebugInfo/Symbolize/Symbolize.cpp52
-rw-r--r--lib/Demangle/ItaniumDemangle.cpp10
-rw-r--r--lib/Demangle/MicrosoftDemangle.cpp32
-rw-r--r--lib/Demangle/MicrosoftDemangleNodes.cpp51
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp34
-rw-r--r--lib/ExecutionEngine/GDBRegistrationListener.cpp8
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp4
-rw-r--r--lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h35
-rw-r--r--lib/ExecutionEngine/JITLink/EHFrameSupport.cpp216
-rw-r--r--lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h50
-rw-r--r--lib/ExecutionEngine/JITLink/JITLink.cpp158
-rw-r--r--lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp385
-rw-r--r--lib/ExecutionEngine/JITLink/JITLinkGeneric.h185
-rw-r--r--lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp63
-rw-r--r--lib/ExecutionEngine/JITLink/MachO.cpp3
-rw-r--r--lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp411
-rw-r--r--lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h138
-rw-r--r--lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp535
-rw-r--r--lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h269
-rw-r--r--lib/ExecutionEngine/JITLink/MachO_arm64.cpp736
-rw-r--r--lib/ExecutionEngine/JITLink/MachO_x86_64.cpp279
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp38
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp2
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp4
-rw-r--r--lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp127
-rw-r--r--lib/ExecutionEngine/Orc/CompileUtils.cpp2
-rw-r--r--lib/ExecutionEngine/Orc/Core.cpp506
-rw-r--r--lib/ExecutionEngine/Orc/ExecutionUtils.cpp92
-rw-r--r--lib/ExecutionEngine/Orc/IRCompileLayer.cpp4
-rw-r--r--lib/ExecutionEngine/Orc/IRTransformLayer.cpp2
-rw-r--r--lib/ExecutionEngine/Orc/IndirectionUtils.cpp27
-rw-r--r--lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp17
-rw-r--r--lib/ExecutionEngine/Orc/LLJIT.cpp38
-rw-r--r--lib/ExecutionEngine/Orc/Layer.cpp26
-rw-r--r--lib/ExecutionEngine/Orc/LazyReexports.cpp18
-rw-r--r--lib/ExecutionEngine/Orc/Legacy.cpp5
-rw-r--r--lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp258
-rw-r--r--lib/ExecutionEngine/Orc/OrcCBindingsStack.h11
-rw-r--r--lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp24
-rw-r--r--lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp307
-rw-r--r--lib/ExecutionEngine/Orc/Speculation.cpp146
-rw-r--r--lib/ExecutionEngine/Orc/ThreadSafeModule.cpp58
-rw-r--r--lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp8
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp64
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp8
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp2
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp54
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h2
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp17
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h8
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h5
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h5
-rw-r--r--lib/FuzzMutate/FuzzerCLI.cpp2
-rw-r--r--lib/IR/AsmWriter.cpp11
-rw-r--r--lib/IR/AttributeImpl.h6
-rw-r--r--lib/IR/Attributes.cpp84
-rw-r--r--lib/IR/AutoUpgrade.cpp157
-rw-r--r--lib/IR/BasicBlock.cpp7
-rw-r--r--lib/IR/ConstantFold.cpp25
-rw-r--r--lib/IR/ConstantRange.cpp76
-rw-r--r--lib/IR/Constants.cpp56
-rw-r--r--lib/IR/ConstantsContext.h12
-rw-r--r--lib/IR/Core.cpp114
-rw-r--r--lib/IR/DIBuilder.cpp2
-rw-r--r--lib/IR/DataLayout.cpp163
-rw-r--r--lib/IR/DebugInfo.cpp22
-rw-r--r--lib/IR/DebugInfoMetadata.cpp34
-rw-r--r--lib/IR/DiagnosticInfo.cpp11
-rw-r--r--lib/IR/Function.cpp103
-rw-r--r--lib/IR/Globals.cpp51
-rw-r--r--lib/IR/IRBuilder.cpp8
-rw-r--r--lib/IR/IRPrintingPasses.cpp18
-rw-r--r--lib/IR/InlineAsm.cpp10
-rw-r--r--lib/IR/Instruction.cpp2
-rw-r--r--lib/IR/Instructions.cpp167
-rw-r--r--lib/IR/IntrinsicInst.cpp15
-rw-r--r--lib/IR/LLVMContext.cpp31
-rw-r--r--lib/IR/LLVMContextImpl.cpp2
-rw-r--r--lib/IR/LegacyPassManager.cpp20
-rw-r--r--lib/IR/MDBuilder.cpp12
-rw-r--r--lib/IR/Metadata.cpp18
-rw-r--r--lib/IR/Module.cpp2
-rw-r--r--lib/IR/RemarkStreamer.cpp72
-rw-r--r--lib/IR/SafepointIRVerifier.cpp4
-rw-r--r--lib/IR/Type.cpp27
-rw-r--r--lib/IR/Value.cpp111
-rw-r--r--lib/IR/Verifier.cpp167
-rw-r--r--lib/LTO/Caching.cpp4
-rw-r--r--lib/LTO/LTO.cpp63
-rw-r--r--lib/LTO/LTOBackend.cpp18
-rw-r--r--lib/LTO/LTOCodeGenerator.cpp13
-rw-r--r--lib/LTO/LTOModule.cpp3
-rw-r--r--lib/LTO/SummaryBasedOptimizations.cpp2
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp139
-rw-r--r--lib/Linker/IRMover.cpp112
-rw-r--r--lib/Linker/LinkModules.cpp3
-rw-r--r--lib/MC/ELFObjectWriter.cpp86
-rw-r--r--lib/MC/MCAsmBackend.cpp5
-rw-r--r--lib/MC/MCAsmInfoXCOFF.cpp17
-rw-r--r--lib/MC/MCAsmMacro.cpp2
-rw-r--r--lib/MC/MCAsmStreamer.cpp140
-rw-r--r--lib/MC/MCAssembler.cpp38
-rw-r--r--lib/MC/MCContext.cpp29
-rw-r--r--lib/MC/MCDwarf.cpp44
-rw-r--r--lib/MC/MCELFStreamer.cpp7
-rw-r--r--lib/MC/MCExpr.cpp46
-rw-r--r--lib/MC/MCInstPrinter.cpp31
-rw-r--r--lib/MC/MCInstrAnalysis.cpp6
-rw-r--r--lib/MC/MCMachOStreamer.cpp1
-rw-r--r--lib/MC/MCObjectFileInfo.cpp14
-rw-r--r--lib/MC/MCObjectStreamer.cpp4
-rw-r--r--lib/MC/MCParser/AsmParser.cpp142
-rw-r--r--lib/MC/MCParser/COFFAsmParser.cpp155
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp4
-rw-r--r--lib/MC/MCParser/WasmAsmParser.cpp1
-rw-r--r--lib/MC/MCRegisterInfo.cpp48
-rw-r--r--lib/MC/MCSectionXCOFF.cpp50
-rw-r--r--lib/MC/MCStreamer.cpp84
-rw-r--r--lib/MC/MCSubtargetInfo.cpp25
-rw-r--r--lib/MC/MCWasmObjectTargetWriter.cpp5
-rw-r--r--lib/MC/MCWasmStreamer.cpp2
-rw-r--r--lib/MC/MCWinCOFFStreamer.cpp18
-rw-r--r--lib/MC/MCXCOFFStreamer.cpp54
-rw-r--r--lib/MC/MachObjectWriter.cpp14
-rw-r--r--lib/MC/StringTableBuilder.cpp10
-rw-r--r--lib/MC/WasmObjectWriter.cpp77
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp10
-rw-r--r--lib/MC/XCOFFObjectWriter.cpp533
-rw-r--r--lib/MCA/CodeEmitter.cpp37
-rw-r--r--lib/MCA/Context.cpp23
-rw-r--r--lib/MCA/HardwareUnits/LSUnit.cpp28
-rw-r--r--lib/MCA/HardwareUnits/RegisterFile.cpp16
-rw-r--r--lib/MCA/HardwareUnits/ResourceManager.cpp59
-rw-r--r--lib/MCA/HardwareUnits/RetireControlUnit.cpp65
-rw-r--r--lib/MCA/HardwareUnits/Scheduler.cpp12
-rw-r--r--lib/MCA/InstrBuilder.cpp44
-rw-r--r--lib/MCA/Instruction.cpp4
-rw-r--r--lib/MCA/Stages/DispatchStage.cpp19
-rw-r--r--lib/MCA/Stages/EntryStage.cpp2
-rw-r--r--lib/MCA/Stages/ExecuteStage.cpp22
-rw-r--r--lib/MCA/Stages/RetireStage.cpp8
-rw-r--r--lib/Object/Archive.cpp6
-rw-r--r--lib/Object/ArchiveWriter.cpp35
-rw-r--r--lib/Object/Binary.cpp3
-rw-r--r--lib/Object/COFFObjectFile.cpp198
-rw-r--r--lib/Object/Decompressor.cpp15
-rw-r--r--lib/Object/ELF.cpp2
-rw-r--r--lib/Object/ELFObjectFile.cpp38
-rw-r--r--lib/Object/MachOObjectFile.cpp48
-rw-r--r--lib/Object/MachOUniversal.cpp38
-rw-r--r--lib/Object/Minidump.cpp46
-rw-r--r--lib/Object/Object.cpp10
-rw-r--r--lib/Object/ObjectFile.cpp11
-rw-r--r--lib/Object/RelocationResolver.cpp67
-rw-r--r--lib/Object/SymbolicFile.cpp1
-rw-r--r--lib/Object/TapiFile.cpp104
-rw-r--r--lib/Object/TapiUniversal.cpp54
-rw-r--r--lib/Object/WasmObjectFile.cpp13
-rw-r--r--lib/Object/WindowsResource.cpp346
-rw-r--r--lib/Object/XCOFFObjectFile.cpp240
-rw-r--r--lib/ObjectYAML/COFFEmitter.cpp622
-rw-r--r--lib/ObjectYAML/CodeViewYAMLSymbols.cpp2
-rw-r--r--lib/ObjectYAML/ELFEmitter.cpp1152
-rw-r--r--lib/ObjectYAML/ELFYAML.cpp325
-rw-r--r--lib/ObjectYAML/MachOEmitter.cpp580
-rw-r--r--lib/ObjectYAML/MachOYAML.cpp9
-rw-r--r--lib/ObjectYAML/MinidumpEmitter.cpp247
-rw-r--r--lib/ObjectYAML/MinidumpYAML.cpp331
-rw-r--r--lib/ObjectYAML/WasmEmitter.cpp633
-rw-r--r--lib/ObjectYAML/WasmYAML.cpp4
-rw-r--r--lib/ObjectYAML/yaml2obj.cpp77
-rw-r--r--lib/Option/ArgList.cpp8
-rw-r--r--lib/Passes/PassBuilder.cpp164
-rw-r--r--lib/Passes/PassRegistry.def14
-rw-r--r--lib/ProfileData/Coverage/CoverageMapping.cpp60
-rw-r--r--lib/ProfileData/Coverage/CoverageMappingReader.cpp20
-rw-r--r--lib/ProfileData/Coverage/CoverageMappingWriter.cpp10
-rw-r--r--lib/ProfileData/GCOV.cpp12
-rw-r--r--lib/ProfileData/InstrProf.cpp18
-rw-r--r--lib/ProfileData/InstrProfReader.cpp44
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp2
-rw-r--r--lib/ProfileData/ProfileSummaryBuilder.cpp4
-rw-r--r--lib/ProfileData/SampleProf.cpp56
-rw-r--r--lib/ProfileData/SampleProfReader.cpp447
-rw-r--r--lib/ProfileData/SampleProfWriter.cpp279
-rw-r--r--lib/Remarks/BitstreamRemarkParser.cpp597
-rw-r--r--lib/Remarks/BitstreamRemarkParser.h83
-rw-r--r--lib/Remarks/BitstreamRemarkSerializer.cpp386
-rw-r--r--lib/Remarks/RemarkFormat.cpp4
-rw-r--r--lib/Remarks/RemarkParser.cpp72
-rw-r--r--lib/Remarks/RemarkSerializer.cpp54
-rw-r--r--lib/Remarks/RemarkStringTable.cpp28
-rw-r--r--lib/Remarks/YAMLRemarkParser.cpp165
-rw-r--r--lib/Remarks/YAMLRemarkParser.h38
-rw-r--r--lib/Remarks/YAMLRemarkSerializer.cpp134
-rw-r--r--lib/Support/AArch64TargetParser.cpp4
-rw-r--r--lib/Support/ABIBreak.cpp24
-rw-r--r--lib/Support/APInt.cpp52
-rw-r--r--lib/Support/ARMTargetParser.cpp8
-rw-r--r--lib/Support/CRC.cpp113
-rw-r--r--lib/Support/CachePruning.cpp2
-rw-r--r--lib/Support/CodeGenCoverage.cpp4
-rw-r--r--lib/Support/CommandLine.cpp2
-rw-r--r--lib/Support/CrashRecoveryContext.cpp8
-rw-r--r--lib/Support/DataExtractor.cpp160
-rw-r--r--lib/Support/Error.cpp17
-rw-r--r--lib/Support/FileCheck.cpp356
-rw-r--r--lib/Support/FileCheckImpl.h624
-rw-r--r--lib/Support/FileCollector.cpp268
-rw-r--r--lib/Support/FileOutputBuffer.cpp6
-rw-r--r--lib/Support/FileUtilities.cpp66
-rw-r--r--lib/Support/GlobPattern.cpp23
-rw-r--r--lib/Support/Host.cpp34
-rw-r--r--lib/Support/JSON.cpp2
-rw-r--r--lib/Support/JamCRC.cpp96
-rw-r--r--lib/Support/ManagedStatic.cpp13
-rw-r--r--lib/Support/MemoryBuffer.cpp31
-rw-r--r--lib/Support/Mutex.cpp123
-rw-r--r--lib/Support/Parallel.cpp31
-rw-r--r--lib/Support/Path.cpp6
-rw-r--r--lib/Support/PrettyStackTrace.cpp64
-rw-r--r--lib/Support/RWMutex.cpp58
-rw-r--r--lib/Support/Regex.cpp39
-rw-r--r--lib/Support/Signposts.cpp2
-rw-r--r--lib/Support/SpecialCaseList.cpp4
-rw-r--r--lib/Support/Statistic.cpp27
-rw-r--r--lib/Support/StringExtras.cpp4
-rw-r--r--lib/Support/TimeProfiler.cpp63
-rw-r--r--lib/Support/Timer.cpp10
-rw-r--r--lib/Support/Unix/Memory.inc6
-rw-r--r--lib/Support/Unix/Mutex.inc42
-rw-r--r--lib/Support/Unix/Path.inc73
-rw-r--r--lib/Support/Unix/Process.inc7
-rw-r--r--lib/Support/Unix/Program.inc4
-rw-r--r--lib/Support/Unix/RWMutex.inc50
-rw-r--r--lib/Support/Unix/Signals.inc15
-rw-r--r--lib/Support/VirtualFileSystem.cpp102
-rw-r--r--lib/Support/Windows/Mutex.inc56
-rw-r--r--lib/Support/Windows/Path.inc93
-rw-r--r--lib/Support/Windows/Program.inc2
-rw-r--r--lib/Support/Windows/RWMutex.inc128
-rw-r--r--lib/Support/Windows/Signals.inc3
-rw-r--r--lib/Support/Windows/WindowsSupport.h1
-rw-r--r--lib/Support/Windows/explicit_symbols.inc6
-rw-r--r--lib/Support/YAMLTraits.cpp16
-rw-r--r--lib/Support/Z3Solver.cpp2
-rw-r--r--lib/Support/raw_ostream.cpp35
-rw-r--r--lib/Support/regcomp.c7
-rw-r--r--lib/TableGen/Error.cpp2
-rw-r--r--lib/TableGen/Main.cpp21
-rw-r--r--lib/TableGen/Record.cpp11
-rw-r--r--lib/TableGen/SetTheory.cpp22
-rw-r--r--lib/TableGen/TGLexer.cpp4
-rw-r--r--lib/TableGen/TGParser.cpp28
-rw-r--r--lib/Target/AArch64/AArch64.h6
-rw-r--r--lib/Target/AArch64/AArch64.td80
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp12
-rw-r--r--lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp16
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp280
-rw-r--r--lib/Target/AArch64/AArch64CallLowering.cpp632
-rw-r--r--lib/Target/AArch64/AArch64CallLowering.h29
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.cpp38
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.h3
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td88
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp22
-rw-r--r--lib/Target/AArch64/AArch64Combine.td18
-rw-r--r--lib/Target/AArch64/AArch64CondBrTuning.cpp4
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp6
-rw-r--r--lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp4
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp76
-rw-r--r--lib/Target/AArch64/AArch64FalkorHWPFFix.cpp2
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp75
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp301
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h28
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp45
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp535
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h37
-rw-r--r--lib/Target/AArch64/AArch64InstrAtomics.td65
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td220
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp1054
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h12
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td253
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp1096
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp111
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.h3
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp160
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp2
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h17
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp16
-rw-r--r--lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp98
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.cpp39
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp69
-rw-r--r--lib/Target/AArch64/AArch64SIMDInstrOpt.cpp8
-rw-r--r--lib/Target/AArch64/AArch64SVEInstrInfo.td264
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp2
-rw-r--r--lib/Target/AArch64/AArch64SpeculationHardening.cpp13
-rw-r--r--lib/Target/AArch64/AArch64StackOffset.h138
-rw-r--r--lib/Target/AArch64/AArch64StackTagging.cpp394
-rw-r--r--lib/Target/AArch64/AArch64StackTaggingPreRA.cpp209
-rw-r--r--lib/Target/AArch64/AArch64StorePairSuppress.cpp2
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp50
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h48
-rw-r--r--lib/Target/AArch64/AArch64SystemOperands.td40
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp35
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.cpp4
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h3
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp29
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h14
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp123
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp13
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp22
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp3
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp5
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h20
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/AArch64/SVEInstrFormats.td366
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp2
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h25
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td16
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp21
-rw-r--r--lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp78
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.h10
-rw-r--r--lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp345
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.cpp701
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.h29
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallingConv.td27
-rw-r--r--lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp12
-rw-r--r--lib/Target/AMDGPU/AMDGPUFrameLowering.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUFrameLowering.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPUGISel.td78
-rw-r--r--lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def80
-rw-r--r--lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp9
-rw-r--r--lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp272
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp250
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h18
-rw-r--r--lib/Target/AMDGPU/AMDGPUInline.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td126
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp1144
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.h48
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructions.td216
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp1132
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.h52
-rw-r--r--lib/Target/AMDGPU/AMDGPULibCalls.cpp37
-rw-r--r--lib/Target/AMDGPU/AMDGPULibFunc.cpp14
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp20
-rw-r--r--lib/Target/AMDGPU/AMDGPUMCInstLower.cpp4
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp38
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.h6
-rw-r--r--lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp592
-rw-r--r--lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp1234
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.h56
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBanks.td6
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.cpp66
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUSearchableTables.td4
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp64
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h67
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp29
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp90
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h20
-rw-r--r--lib/Target/AMDGPU/AMDILCFGStructurizer.cpp8
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp595
-rw-r--r--lib/Target/AMDGPU/BUFInstructions.td694
-rw-r--r--lib/Target/AMDGPU/DSInstructions.td92
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp4
-rw-r--r--lib/Target/AMDGPU/EvergreenInstructions.td60
-rw-r--r--lib/Target/AMDGPU/FLATInstructions.td196
-rw-r--r--lib/Target/AMDGPU/GCNDPPCombine.cpp88
-rw-r--r--lib/Target/AMDGPU/GCNHazardRecognizer.cpp21
-rw-r--r--lib/Target/AMDGPU/GCNILPSched.cpp1
-rw-r--r--lib/Target/AMDGPU/GCNIterativeScheduler.cpp2
-rw-r--r--lib/Target/AMDGPU/GCNNSAReassign.cpp8
-rw-r--r--lib/Target/AMDGPU/GCNRegBankReassign.cpp14
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.cpp26
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.h2
-rw-r--r--lib/Target/AMDGPU/GCNSchedStrategy.cpp31
-rw-r--r--lib/Target/AMDGPU/GCNSchedStrategy.h3
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp37
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h6
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp4
-rw-r--r--lib/Target/AMDGPU/MIMGInstructions.td4
-rw-r--r--lib/Target/AMDGPU/R600AsmPrinter.cpp2
-rw-r--r--lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp4
-rw-r--r--lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp22
-rw-r--r--lib/Target/AMDGPU/R600FrameLowering.h6
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp7
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.cpp22
-rw-r--r--lib/Target/AMDGPU/R600MachineScheduler.cpp8
-rw-r--r--lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp12
-rw-r--r--lib/Target/AMDGPU/R600Packetizer.cpp4
-rw-r--r--lib/Target/AMDGPU/R600RegisterInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/SIAddIMGInit.cpp4
-rw-r--r--lib/Target/AMDGPU/SIDefines.h6
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp394
-rw-r--r--lib/Target/AMDGPU/SIFixupVectorISel.cpp3
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp114
-rw-r--r--lib/Target/AMDGPU/SIFormMemoryClauses.cpp22
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp34
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.h6
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp1052
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h54
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp6
-rw-r--r--lib/Target/AMDGPU/SIInstrFormats.td5
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp558
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h44
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td320
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td297
-rw-r--r--lib/Target/AMDGPU/SILoadStoreOptimizer.cpp1117
-rw-r--r--lib/Target/AMDGPU/SILowerControlFlow.cpp60
-rw-r--r--lib/Target/AMDGPU/SILowerI1Copies.cpp49
-rw-r--r--lib/Target/AMDGPU/SILowerSGPRSpills.cpp4
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp9
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h11
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp16
-rw-r--r--lib/Target/AMDGPU/SIMemoryLegalizer.cpp6
-rw-r--r--lib/Target/AMDGPU/SIModeRegister.cpp2
-rw-r--r--lib/Target/AMDGPU/SIOptimizeExecMasking.cpp2
-rw-r--r--lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp32
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp32
-rw-r--r--lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp12
-rw-r--r--lib/Target/AMDGPU/SIProgramInfo.h5
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp445
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h15
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.td464
-rw-r--r--lib/Target/AMDGPU/SIShrinkInstructions.cpp42
-rw-r--r--lib/Target/AMDGPU/SIWholeQuadMode.cpp35
-rw-r--r--lib/Target/AMDGPU/SMInstructions.td15
-rw-r--r--lib/Target/AMDGPU/SOPInstructions.td42
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp71
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h24
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp2
-rw-r--r--lib/Target/AMDGPU/VOP1Instructions.td65
-rw-r--r--lib/Target/AMDGPU/VOP2Instructions.td346
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td69
-rw-r--r--lib/Target/AMDGPU/VOP3PInstructions.td2
-rw-r--r--lib/Target/AMDGPU/VOPCInstructions.td30
-rw-r--r--lib/Target/AMDGPU/VOPInstructions.td12
-rw-r--r--lib/Target/ARC/ARCFrameLowering.h4
-rw-r--r--lib/Target/ARC/ARCISelLowering.cpp2
-rw-r--r--lib/Target/ARC/ARCMachineFunctionInfo.h4
-rw-r--r--lib/Target/ARC/ARCOptAddrMode.cpp16
-rw-r--r--lib/Target/ARC/ARCRegisterInfo.cpp2
-rw-r--r--lib/Target/ARC/ARCTargetMachine.cpp2
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp54
-rw-r--r--lib/Target/ARM/ARM.h2
-rw-r--r--lib/Target/ARM/ARM.td42
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp66
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp216
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h21
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp49
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h5
-rw-r--r--lib/Target/ARM/ARMBasicBlockInfo.cpp16
-rw-r--r--lib/Target/ARM/ARMBasicBlockInfo.h31
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp54
-rw-r--r--lib/Target/ARM/ARMCallLowering.h5
-rw-r--r--lib/Target/ARM/ARMCallingConv.cpp2
-rw-r--r--lib/Target/ARM/ARMCodeGenPrepare.cpp88
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp289
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp1
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp96
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp88
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp65
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h5
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp224
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp2073
-rw-r--r--lib/Target/ARM/ARMISelLowering.h45
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td23
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td127
-rw-r--r--lib/Target/ARM/ARMInstrMVE.td1430
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td191
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td16
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td98
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td96
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp41
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp32
-rw-r--r--lib/Target/ARM/ARMLowOverheadLoops.cpp364
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp4
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h8
-rw-r--r--lib/Target/ARM/ARMParallelDSP.cpp675
-rw-r--r--lib/Target/ARM/ARMPredicates.td2
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td18
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td4
-rw-r--r--lib/Target/ARM/ARMScheduleM4.td24
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp14
-rw-r--r--lib/Target/ARM/ARMSubtarget.h30
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp13
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp370
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h24
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp251
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp31
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h20
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp12
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h5
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp10
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp42
-rw-r--r--lib/Target/ARM/MVETailPredication.cpp519
-rw-r--r--lib/Target/ARM/MVEVPTBlockPass.cpp278
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp8
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp17
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp134
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp38
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp28
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.cpp11
-rw-r--r--lib/Target/AVR/AVRAsmPrinter.cpp2
-rw-r--r--lib/Target/AVR/AVRExpandPseudoInsts.cpp12
-rw-r--r--lib/Target/AVR/AVRFrameLowering.cpp5
-rw-r--r--lib/Target/AVR/AVRISelDAGToDAG.cpp2
-rw-r--r--lib/Target/AVR/AVRISelLowering.cpp27
-rw-r--r--lib/Target/AVR/AVRISelLowering.h4
-rw-r--r--lib/Target/AVR/AVRRegisterInfo.cpp2
-rw-r--r--lib/Target/AVR/AVRTargetMachine.cpp2
-rw-r--r--lib/Target/AVR/AsmParser/AVRAsmParser.cpp8
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp2
-rw-r--r--lib/Target/BPF/AsmParser/BPFAsmParser.cpp6
-rw-r--r--lib/Target/BPF/BPF.h4
-rw-r--r--lib/Target/BPF/BPFAbstractMemberAccess.cpp708
-rw-r--r--lib/Target/BPF/BPFAsmPrinter.cpp2
-rw-r--r--lib/Target/BPF/BPFCORE.h14
-rw-r--r--lib/Target/BPF/BPFFrameLowering.h2
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp170
-rw-r--r--lib/Target/BPF/BPFISelLowering.cpp21
-rw-r--r--lib/Target/BPF/BPFInstrInfo.cpp6
-rw-r--r--lib/Target/BPF/BPFInstrInfo.td2
-rw-r--r--lib/Target/BPF/BPFMIChecking.cpp1
-rw-r--r--lib/Target/BPF/BPFMIPeephole.cpp206
-rw-r--r--lib/Target/BPF/BPFMISimplifyPatchable.cpp27
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.cpp6
-rw-r--r--lib/Target/BPF/BPFTargetMachine.cpp16
-rw-r--r--lib/Target/BPF/BTF.h54
-rw-r--r--lib/Target/BPF/BTFDebug.cpp281
-rw-r--r--lib/Target/BPF/BTFDebug.h29
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp4
-rw-r--r--lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp2
-rw-r--r--lib/Target/Hexagon/BitTracker.cpp21
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp71
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp8
-rw-r--r--lib/Target/Hexagon/HexagonBlockRanges.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonBranchRelaxation.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonConstExtenders.cpp17
-rw-r--r--lib/Target/Hexagon/HexagonConstPropagation.cpp32
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp32
-rw-r--r--lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td696
-rw-r--r--lib/Target/Hexagon/HexagonDepOperands.td83
-rw-r--r--lib/Target/Hexagon/HexagonEarlyIfConv.cpp24
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp30
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp58
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonGenExtract.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonGenInsert.cpp27
-rw-r--r--lib/Target/Hexagon/HexagonGenMux.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonGenPredicate.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp56
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp156
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h15
-rw-r--r--lib/Target/Hexagon/HexagonISelLoweringHVX.cpp24
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp273
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h22
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td46
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonOptAddrMode.cpp12
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td194
-rw-r--r--lib/Target/Hexagon/HexagonPatternsHVX.td40
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp38
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp8
-rw-r--r--lib/Target/Hexagon/HexagonSplitDouble.cpp60
-rw-r--r--lib/Target/Hexagon/HexagonStoreWidening.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp19
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp12
-rw-r--r--lib/Target/Hexagon/HexagonTargetTransformInfo.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetTransformInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonVExtract.cpp12
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp37
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.h3
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp10
-rw-r--r--lib/Target/Hexagon/RDFCopy.cpp4
-rw-r--r--lib/Target/Hexagon/RDFDeadCode.cpp1
-rw-r--r--lib/Target/Hexagon/RDFGraph.cpp16
-rw-r--r--lib/Target/Hexagon/RDFLiveness.cpp8
-rw-r--r--lib/Target/Hexagon/RDFRegisters.cpp8
-rw-r--r--lib/Target/Hexagon/RDFRegisters.h8
-rw-r--r--lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp23
-rw-r--r--lib/Target/Lanai/LanaiAsmPrinter.cpp2
-rw-r--r--lib/Target/Lanai/LanaiDelaySlotFiller.cpp2
-rw-r--r--lib/Target/Lanai/LanaiFrameLowering.cpp4
-rw-r--r--lib/Target/Lanai/LanaiFrameLowering.h2
-rw-r--r--lib/Target/Lanai/LanaiISelLowering.cpp15
-rw-r--r--lib/Target/Lanai/LanaiISelLowering.h4
-rw-r--r--lib/Target/Lanai/LanaiInstrInfo.cpp9
-rw-r--r--lib/Target/Lanai/LanaiInstrInfo.h3
-rw-r--r--lib/Target/Lanai/LanaiRegisterInfo.cpp2
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp2
-rw-r--r--lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp12
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp4
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp8
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp1
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h3
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp27
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h2
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp696
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp16
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h5
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp11
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp96
-rw-r--r--lib/Target/Mips/MicroMipsDSPInstrInfo.td4
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td9
-rw-r--r--lib/Target/Mips/MicroMipsSizeReduction.cpp18
-rw-r--r--lib/Target/Mips/Mips.td12
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp16
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp2
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td36
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp12
-rw-r--r--lib/Target/Mips/MipsCallLowering.cpp150
-rw-r--r--lib/Target/Mips/MipsCallLowering.h8
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp63
-rw-r--r--lib/Target/Mips/MipsDSPInstrInfo.td19
-rw-r--r--lib/Target/Mips/MipsExpandPseudo.cpp54
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp12
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h5
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp53
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h5
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp164
-rw-r--r--lib/Target/Mips/MipsISelLowering.h13
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp3
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td30
-rw-r--r--lib/Target/Mips/MipsInstructionSelector.cpp206
-rw-r--r--lib/Target/Mips/MipsLegalizerInfo.cpp244
-rw-r--r--lib/Target/Mips/MipsLegalizerInfo.h3
-rw-r--r--lib/Target/Mips/MipsMSAInstrInfo.td55
-rw-r--r--lib/Target/Mips/MipsOptimizePICCall.cpp5
-rw-r--r--lib/Target/Mips/MipsPfmCounters.td18
-rw-r--r--lib/Target/Mips/MipsPreLegalizerCombiner.cpp3
-rw-r--r--lib/Target/Mips/MipsRegisterBankInfo.cpp322
-rw-r--r--lib/Target/Mips/MipsRegisterBankInfo.h9
-rw-r--r--lib/Target/Mips/MipsRegisterBanks.td2
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp55
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp54
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h6
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp124
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp20
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp8
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp17
-rw-r--r--lib/Target/Mips/MipsSubtarget.h15
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp18
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h14
-rw-r--r--lib/Target/NVPTX/NVPTX.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp34
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h2
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp58
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td13
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td169
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAlloca.cpp97
-rw-r--r--lib/Target/NVPTX/NVPTXLowerArgs.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXPeephole.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp13
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp10
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp25
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h14
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/PowerPC/P9InstrResources.td8
-rw-r--r--lib/Target/PowerPC/PPC.h8
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp482
-rw-r--r--lib/Target/PowerPC/PPCBranchCoalescing.cpp13
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp29
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp41
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp71
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h11
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp105
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp546
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h49
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td4
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td12
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td9
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp336
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h42
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td206
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td180
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp670
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp23
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp82
-rw-r--r--lib/Target/PowerPC/PPCPreEmitPeephole.cpp106
-rw-r--r--lib/Target/PowerPC/PPCQPXLoadSplat.cpp6
-rw-r--r--lib/Target/PowerPC/PPCReduceCRLogicals.cpp15
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp39
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td22
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp18
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h24
-rw-r--r--lib/Target/PowerPC/PPCTLSDynamicCall.cpp4
-rw-r--r--lib/Target/PowerPC/PPCTOCRegDeps.cpp9
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp38
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp68
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h12
-rw-r--r--lib/Target/PowerPC/PPCVSXCopy.cpp6
-rw-r--r--lib/Target/PowerPC/PPCVSXFMAMutate.cpp18
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp32
-rw-r--r--lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp282
-rw-r--r--lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp139
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp17
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp13
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp41
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h8
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp20
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h3
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp4
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h1
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp4
-rw-r--r--lib/Target/RISCV/RISCV.h7
-rw-r--r--lib/Target/RISCV/RISCV.td11
-rw-r--r--lib/Target/RISCV/RISCVCallLowering.cpp50
-rw-r--r--lib/Target/RISCV/RISCVCallLowering.h42
-rw-r--r--lib/Target/RISCV/RISCVCallingConv.td28
-rw-r--r--lib/Target/RISCV/RISCVExpandPseudoInsts.cpp54
-rw-r--r--lib/Target/RISCV/RISCVFrameLowering.cpp164
-rw-r--r--lib/Target/RISCV/RISCVFrameLowering.h9
-rw-r--r--lib/Target/RISCV/RISCVISelDAGToDAG.cpp5
-rw-r--r--lib/Target/RISCV/RISCVISelLowering.cpp323
-rw-r--r--lib/Target/RISCV/RISCVISelLowering.h6
-rw-r--r--lib/Target/RISCV/RISCVInstrInfo.cpp118
-rw-r--r--lib/Target/RISCV/RISCVInstrInfo.h18
-rw-r--r--lib/Target/RISCV/RISCVInstrInfo.td22
-rw-r--r--lib/Target/RISCV/RISCVInstrInfoA.td34
-rw-r--r--lib/Target/RISCV/RISCVInstrInfoC.td124
-rw-r--r--lib/Target/RISCV/RISCVInstrInfoF.td6
-rw-r--r--lib/Target/RISCV/RISCVInstructionSelector.cpp103
-rw-r--r--lib/Target/RISCV/RISCVLegalizerInfo.cpp23
-rw-r--r--lib/Target/RISCV/RISCVLegalizerInfo.h28
-rw-r--r--lib/Target/RISCV/RISCVMergeBaseOffset.cpp16
-rw-r--r--lib/Target/RISCV/RISCVRegisterBankInfo.cpp26
-rw-r--r--lib/Target/RISCV/RISCVRegisterBankInfo.h37
-rw-r--r--lib/Target/RISCV/RISCVRegisterBanks.td13
-rw-r--r--lib/Target/RISCV/RISCVRegisterInfo.cpp13
-rw-r--r--lib/Target/RISCV/RISCVRegisterInfo.h6
-rw-r--r--lib/Target/RISCV/RISCVRegisterInfo.td100
-rw-r--r--lib/Target/RISCV/RISCVSubtarget.cpp30
-rw-r--r--lib/Target/RISCV/RISCVSubtarget.h20
-rw-r--r--lib/Target/RISCV/RISCVTargetMachine.cpp31
-rw-r--r--lib/Target/RISCV/Utils/RISCVBaseInfo.h16
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp8
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp10
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp6
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp3
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp4
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp28
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h4
-rw-r--r--lib/Target/Sparc/SparcInstr64Bit.td2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp4
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td8
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp12
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp4
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp12
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZ.h1
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp20
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.h1
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp9
-rw-r--r--lib/Target/SystemZ/SystemZExpandPseudo.cpp152
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp6
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp11
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp244
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td32
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td166
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp168
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h29
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td22
-rw-r--r--lib/Target/SystemZ/SystemZInstrVector.td26
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp26
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td121
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td6
-rw-r--r--lib/Target/SystemZ/SystemZPatterns.td4
-rw-r--r--lib/Target/SystemZ/SystemZPostRewrite.cpp164
-rw-r--r--lib/Target/SystemZ/SystemZProcessors.td3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp19
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h9
-rw-r--r--lib/Target/SystemZ/SystemZSchedule.td2
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ15.td (renamed from lib/Target/SystemZ/SystemZScheduleArch13.td)64
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp11
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h8
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp1
-rw-r--r--lib/Target/TargetMachine.cpp20
-rw-r--r--lib/Target/TargetMachineC.cpp2
-rw-r--r--lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp100
-rw-r--r--lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp24
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp10
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp57
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h3
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp1
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h74
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp33
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h3
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp11
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp38
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGSort.cpp5
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp127
-rw-r--r--lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp22
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFastISel.cpp36
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp3
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp8
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFrameLowering.h4
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISD.def2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp131
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.cpp306
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.h2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrAtomics.td100
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td4
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrControl.td48
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrConv.td17
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp4
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.h2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.td3
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrMemory.td51
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrSIMD.td221
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp12
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp4
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp119
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp55
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.h3
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp12
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h13
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyPeephole.cpp107
-rw-r--r--lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegColoring.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegStackify.cpp24
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp6
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp5
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyUtilities.cpp21
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp170
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParserCommon.h4
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h25
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp5
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp6
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp19
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp61
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h11
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp5
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp2
-rw-r--r--lib/Target/X86/X86.h10
-rw-r--r--lib/Target/X86/X86.td56
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp8
-rw-r--r--lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp3
-rw-r--r--lib/Target/X86/X86AvoidTrailingCall.cpp108
-rw-r--r--lib/Target/X86/X86CallFrameOptimization.cpp26
-rw-r--r--lib/Target/X86/X86CallLowering.cpp49
-rw-r--r--lib/Target/X86/X86CallLowering.h5
-rw-r--r--lib/Target/X86/X86CallingConv.td2
-rw-r--r--lib/Target/X86/X86CmovConversion.cpp18
-rw-r--r--lib/Target/X86/X86CondBrFolding.cpp2
-rw-r--r--lib/Target/X86/X86DomainReassignment.cpp20
-rwxr-xr-xlib/Target/X86/X86EvexToVex.cpp2
-rw-r--r--lib/Target/X86/X86ExpandPseudo.cpp11
-rw-r--r--lib/Target/X86/X86FastISel.cpp15
-rw-r--r--lib/Target/X86/X86FixupBWInsts.cpp68
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp201
-rw-r--r--lib/Target/X86/X86FixupSetCC.cpp4
-rw-r--r--lib/Target/X86/X86FlagsCopyLowering.cpp13
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp6
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp106
-rw-r--r--lib/Target/X86/X86FrameLowering.h4
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp304
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp5926
-rw-r--r--lib/Target/X86/X86ISelLowering.h77
-rw-r--r--lib/Target/X86/X86IndirectBranchTracking.cpp2
-rw-r--r--lib/Target/X86/X86InsertPrefetch.cpp8
-rw-r--r--lib/Target/X86/X86InstrAVX512.td1457
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td10
-rw-r--r--lib/Target/X86/X86InstrBuilder.h6
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td33
-rw-r--r--lib/Target/X86/X86InstrCompiler.td139
-rw-r--r--lib/Target/X86/X86InstrControl.td85
-rw-r--r--lib/Target/X86/X86InstrExtension.td11
-rw-r--r--lib/Target/X86/X86InstrFoldTables.cpp287
-rw-r--r--lib/Target/X86/X86InstrFoldTables.h39
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td26
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp582
-rw-r--r--lib/Target/X86/X86InstrInfo.h28
-rw-r--r--lib/Target/X86/X86InstrInfo.td57
-rw-r--r--lib/Target/X86/X86InstrMMX.td33
-rw-r--r--lib/Target/X86/X86InstrMPX.td32
-rw-r--r--lib/Target/X86/X86InstrSSE.td551
-rw-r--r--lib/Target/X86/X86InstrSystem.td2
-rw-r--r--lib/Target/X86/X86InstrTSX.td2
-rw-r--r--lib/Target/X86/X86InstrXOP.td26
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp135
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h6
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp20
-rw-r--r--lib/Target/X86/X86LegalizerInfo.h3
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp313
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h8
-rw-r--r--lib/Target/X86/X86OptimizeLEAs.cpp60
-rw-r--r--lib/Target/X86/X86RegisterBankInfo.cpp4
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp31
-rw-r--r--lib/Target/X86/X86RetpolineThunks.cpp8
-rwxr-xr-xlib/Target/X86/X86SchedBroadwell.td8
-rw-r--r--lib/Target/X86/X86SchedHaswell.td8
-rw-r--r--lib/Target/X86/X86SchedPredicates.td57
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td8
-rw-r--r--lib/Target/X86/X86SchedSkylakeClient.td8
-rwxr-xr-xlib/Target/X86/X86SchedSkylakeServer.td8
-rw-r--r--lib/Target/X86/X86Schedule.td24
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td6
-rw-r--r--lib/Target/X86/X86ScheduleBdVer2.td6
-rw-r--r--lib/Target/X86/X86ScheduleBtVer2.td257
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td8
-rw-r--r--lib/Target/X86/X86ScheduleZnver1.td8
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--lib/Target/X86/X86SpeculativeLoadHardening.cpp59
-rw-r--r--lib/Target/X86/X86Subtarget.cpp18
-rw-r--r--lib/Target/X86/X86Subtarget.h23
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp49
-rw-r--r--lib/Target/X86/X86TargetMachine.h2
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp4
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h3
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp225
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h11
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp6
-rw-r--r--lib/Target/X86/X86WinAllocaExpander.cpp4
-rw-r--r--lib/Target/X86/X86WinEHState.cpp5
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp4
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp6
-rw-r--r--lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp2
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp21
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetTransformInfo.h3
-rw-r--r--lib/TextAPI/MachO/Architecture.cpp4
-rw-r--r--lib/TextAPI/MachO/InterfaceFile.cpp80
-rw-r--r--lib/TextAPI/MachO/Platform.cpp91
-rw-r--r--lib/TextAPI/MachO/Symbol.cpp9
-rw-r--r--lib/TextAPI/MachO/Target.cpp75
-rw-r--r--lib/TextAPI/MachO/TextStub.cpp606
-rw-r--r--lib/TextAPI/MachO/TextStubCommon.cpp93
-rw-r--r--lib/TextAPI/MachO/TextStubCommon.h8
-rw-r--r--lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp26
-rw-r--r--lib/ToolDrivers/llvm-lib/LibDriver.cpp236
-rw-r--r--lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp78
-rw-r--r--lib/Transforms/Coroutines/CoroCleanup.cpp7
-rw-r--r--lib/Transforms/Coroutines/CoroEarly.cpp26
-rw-r--r--lib/Transforms/Coroutines/CoroElide.cpp2
-rw-r--r--lib/Transforms/Coroutines/CoroFrame.cpp652
-rw-r--r--lib/Transforms/Coroutines/CoroInstr.h205
-rw-r--r--lib/Transforms/Coroutines/CoroInternal.h162
-rw-r--r--lib/Transforms/Coroutines/CoroSplit.cpp1166
-rw-r--r--lib/Transforms/Coroutines/Coroutines.cpp342
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp2
-rw-r--r--lib/Transforms/IPO/Attributor.cpp4959
-rw-r--r--lib/Transforms/IPO/BlockExtractor.cpp5
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp4
-rw-r--r--lib/Transforms/IPO/CrossDSOCFI.cpp10
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp38
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp43
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp156
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp176
-rw-r--r--lib/Transforms/IPO/HotColdSplitting.cpp61
-rw-r--r--lib/Transforms/IPO/IPO.cpp13
-rw-r--r--lib/Transforms/IPO/InferFunctionAttrs.cpp20
-rw-r--r--lib/Transforms/IPO/Inliner.cpp21
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp6
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp305
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp4
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp20
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp1
-rw-r--r--lib/Transforms/IPO/SCCP.cpp18
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp238
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp21
-rw-r--r--lib/Transforms/IPO/WholeProgramDevirt.cpp389
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp268
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp278
-rw-r--r--lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp121
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp102
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp870
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h116
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp93
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp77
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp455
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp370
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp48
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp171
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp67
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp98
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp2
-rw-r--r--lib/Transforms/Instrumentation/CFGMST.h4
-rw-r--r--lib/Transforms/Instrumentation/ControlHeightReduction.cpp26
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp2
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp49
-rw-r--r--lib/Transforms/Instrumentation/HWAddressSanitizer.cpp376
-rw-r--r--lib/Transforms/Instrumentation/IndirectCallPromotion.cpp2
-rw-r--r--lib/Transforms/Instrumentation/InstrOrderFile.cpp3
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp65
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp5
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp89
-rw-r--r--lib/Transforms/Instrumentation/PGOInstrumentation.cpp220
-rw-r--r--lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp6
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp164
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp54
-rw-r--r--lib/Transforms/Instrumentation/ValueProfileCollector.cpp78
-rw-r--r--lib/Transforms/Instrumentation/ValueProfileCollector.h79
-rw-r--r--lib/Transforms/Instrumentation/ValueProfilePlugins.inc75
-rw-r--r--lib/Transforms/ObjCARC/PtrState.cpp4
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp8
-rw-r--r--lib/Transforms/Scalar/CallSiteSplitting.cpp2
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp24
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp2
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp180
-rw-r--r--lib/Transforms/Scalar/DCE.cpp31
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp7
-rw-r--r--lib/Transforms/Scalar/DivRemPairs.cpp219
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp22
-rw-r--r--lib/Transforms/Scalar/FlattenCFGPass.cpp24
-rw-r--r--lib/Transforms/Scalar/Float2Int.cpp47
-rw-r--r--lib/Transforms/Scalar/GVN.cpp201
-rw-r--r--lib/Transforms/Scalar/GVNHoist.cpp17
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp2
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp389
-rw-r--r--lib/Transforms/Scalar/InferAddressSpaces.cpp38
-rw-r--r--lib/Transforms/Scalar/InstSimplifyPass.cpp48
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp18
-rw-r--r--lib/Transforms/Scalar/LICM.cpp55
-rw-r--r--lib/Transforms/Scalar/LoopDataPrefetch.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopFuse.cpp640
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp890
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp5
-rw-r--r--lib/Transforms/Scalar/LoopInterchange.cpp62
-rw-r--r--lib/Transforms/Scalar/LoopLoadElimination.cpp3
-rw-r--r--lib/Transforms/Scalar/LoopPredication.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp3
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp10
-rw-r--r--lib/Transforms/Scalar/LoopSimplifyCFG.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopSink.cpp9
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp20
-rw-r--r--lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp8
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp128
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp87
-rw-r--r--lib/Transforms/Scalar/LoopVersioningLICM.cpp31
-rw-r--r--lib/Transforms/Scalar/LowerConstantIntrinsics.cpp170
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp33
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp110
-rw-r--r--lib/Transforms/Scalar/MergeICmps.cpp2
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp167
-rw-r--r--lib/Transforms/Scalar/NaryReassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp25
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp2
-rw-r--r--lib/Transforms/Scalar/PlaceSafepoints.cpp6
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp190
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp6
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp75
-rw-r--r--lib/Transforms/Scalar/SROA.cpp40
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp9
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp2
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp25
-rw-r--r--lib/Transforms/Scalar/SpeculateAroundPHIs.cpp6
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp2
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp2
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp64
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp94
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp8
-rw-r--r--lib/Transforms/Utils/CanonicalizeAliases.cpp1
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp15
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp18
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp309
-rw-r--r--lib/Transforms/Utils/EntryExitInstrumenter.cpp2
-rw-r--r--lib/Transforms/Utils/Evaluator.cpp2
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp20
-rw-r--r--lib/Transforms/Utils/FunctionImportUtils.cpp2
-rw-r--r--lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp6
-rw-r--r--lib/Transforms/Utils/LibCallsShrinkWrap.cpp2
-rw-r--r--lib/Transforms/Utils/Local.cpp209
-rw-r--r--lib/Transforms/Utils/LoopRotationUtils.cpp27
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp15
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp12
-rw-r--r--lib/Transforms/Utils/LoopUnrollAndJam.cpp6
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp161
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp56
-rw-r--r--lib/Transforms/Utils/LoopVersioning.cpp4
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp5
-rw-r--r--lib/Transforms/Utils/MisExpect.cpp177
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp2
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp80
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp250
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp688
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp12
-rw-r--r--lib/Transforms/Utils/VNCoercion.cpp2
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp60
-rw-r--r--lib/Transforms/Vectorize/LoadStoreVectorizer.cpp26
-rw-r--r--lib/Transforms/Vectorize/LoopVectorizationLegality.cpp186
-rw-r--r--lib/Transforms/Vectorize/LoopVectorizationPlanner.h4
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp738
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp820
-rw-r--r--lib/Transforms/Vectorize/VPlan.cpp19
-rw-r--r--lib/Transforms/Vectorize/VPlan.h4
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp2
-rw-r--r--lib/Transforms/Vectorize/VPlanSLP.cpp13
-rw-r--r--lib/WindowsManifest/WindowsManifestMerger.cpp4
-rw-r--r--lib/XRay/FDRRecordProducer.cpp37
-rw-r--r--lib/XRay/FileHeaderReader.cpp14
-rw-r--r--lib/XRay/InstrumentationMap.cpp17
-rw-r--r--lib/XRay/Profile.cpp10
-rw-r--r--lib/XRay/RecordInitializer.cpp202
-rw-r--r--lib/XRay/Trace.cpp43
-rw-r--r--tools/bugpoint/BugDriver.h7
-rw-r--r--tools/bugpoint/ExtractFunction.cpp3
-rw-r--r--tools/bugpoint/OptimizerDriver.cpp12
-rw-r--r--tools/bugpoint/ToolRunner.cpp16
-rw-r--r--tools/bugpoint/bugpoint.cpp46
-rw-r--r--tools/llc/llc.cpp27
-rw-r--r--tools/lli/lli.cpp71
-rw-r--r--tools/llvm-ar/llvm-ar.cpp192
-rw-r--r--tools/llvm-as/llvm-as.cpp2
-rw-r--r--tools/llvm-cov/CodeCoverage.cpp24
-rw-r--r--tools/llvm-cov/SourceCoverageView.cpp8
-rw-r--r--tools/llvm-cov/TestingSupport.cpp10
-rw-r--r--tools/llvm-cxxdump/llvm-cxxdump.cpp6
-rw-r--r--tools/llvm-cxxmap/llvm-cxxmap.cpp2
-rw-r--r--tools/llvm-dis/llvm-dis.cpp4
-rw-r--r--tools/llvm-dwarfdump/Statistics.cpp263
-rw-r--r--tools/llvm-dwarfdump/llvm-dwarfdump.cpp2
-rw-r--r--tools/llvm-extract/llvm-extract.cpp16
-rw-r--r--tools/llvm-ifs/CMakeLists.txt10
-rw-r--r--tools/llvm-ifs/LLVMBuild.txt21
-rw-r--r--tools/llvm-ifs/llvm-ifs.cpp532
-rw-r--r--tools/llvm-link/llvm-link.cpp6
-rw-r--r--tools/llvm-lto/llvm-lto.cpp20
-rw-r--r--tools/llvm-lto2/llvm-lto2.cpp12
-rw-r--r--tools/llvm-mc/Disassembler.cpp15
-rw-r--r--tools/llvm-mc/Disassembler.h10
-rw-r--r--tools/llvm-mc/llvm-mc.cpp27
-rw-r--r--tools/llvm-mca/CodeRegion.cpp6
-rw-r--r--tools/llvm-mca/CodeRegionGenerator.cpp2
-rw-r--r--tools/llvm-mca/Views/BottleneckAnalysis.cpp40
-rw-r--r--tools/llvm-mca/Views/BottleneckAnalysis.h8
-rw-r--r--tools/llvm-mca/Views/InstructionInfoView.cpp31
-rw-r--r--tools/llvm-mca/Views/InstructionInfoView.h13
-rw-r--r--tools/llvm-mca/Views/TimelineView.cpp50
-rw-r--r--tools/llvm-mca/Views/TimelineView.h1
-rw-r--r--tools/llvm-mca/llvm-mca.cpp113
-rw-r--r--tools/llvm-modextract/llvm-modextract.cpp2
-rw-r--r--tools/llvm-nm/llvm-nm.cpp51
-rw-r--r--tools/llvm-objcopy/COFF/COFFObjcopy.cpp88
-rw-r--r--tools/llvm-objcopy/COFF/Reader.cpp18
-rw-r--r--tools/llvm-objcopy/COFF/Writer.cpp4
-rw-r--r--tools/llvm-objcopy/CommonOpts.td123
-rw-r--r--tools/llvm-objcopy/CopyConfig.cpp370
-rw-r--r--tools/llvm-objcopy/CopyConfig.h110
-rw-r--r--tools/llvm-objcopy/ELF/ELFConfig.cpp133
-rw-r--r--tools/llvm-objcopy/ELF/ELFConfig.h44
-rw-r--r--tools/llvm-objcopy/ELF/ELFObjcopy.cpp169
-rw-r--r--tools/llvm-objcopy/ELF/Object.cpp252
-rw-r--r--tools/llvm-objcopy/ELF/Object.h56
-rw-r--r--tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp350
-rw-r--r--tools/llvm-objcopy/MachO/MachOLayoutBuilder.h50
-rw-r--r--tools/llvm-objcopy/MachO/MachOObjcopy.cpp30
-rw-r--r--tools/llvm-objcopy/MachO/MachOReader.cpp45
-rw-r--r--tools/llvm-objcopy/MachO/MachOReader.h3
-rw-r--r--tools/llvm-objcopy/MachO/MachOWriter.cpp305
-rw-r--r--tools/llvm-objcopy/MachO/MachOWriter.h19
-rw-r--r--tools/llvm-objcopy/MachO/Object.h27
-rw-r--r--tools/llvm-objcopy/ObjcopyOpts.td141
-rw-r--r--tools/llvm-objcopy/StripOpts.td103
-rw-r--r--tools/llvm-objcopy/llvm-objcopy.cpp53
-rw-r--r--tools/llvm-objdump/COFFDump.cpp77
-rw-r--r--tools/llvm-objdump/ELFDump.cpp2
-rw-r--r--tools/llvm-objdump/MachODump.cpp375
-rw-r--r--tools/llvm-objdump/llvm-objdump.cpp543
-rw-r--r--tools/llvm-objdump/llvm-objdump.h36
-rw-r--r--tools/llvm-pdbutil/BytesOutputStyle.cpp2
-rw-r--r--tools/llvm-pdbutil/DumpOutputStyle.cpp9
-rw-r--r--tools/llvm-pdbutil/ExplainOutputStyle.cpp2
-rw-r--r--tools/llvm-pdbutil/InputFile.cpp17
-rw-r--r--tools/llvm-pdbutil/MinimalSymbolDumper.cpp5
-rw-r--r--tools/llvm-pdbutil/PrettyTypeDumper.cpp4
-rw-r--r--tools/llvm-pdbutil/llvm-pdbutil.cpp10
-rw-r--r--tools/llvm-profdata/llvm-profdata.cpp287
-rw-r--r--tools/llvm-readobj/ARMEHABIPrinter.h19
-rw-r--r--tools/llvm-readobj/ARMWinEHPrinter.cpp9
-rw-r--r--tools/llvm-readobj/COFFDumper.cpp362
-rw-r--r--tools/llvm-readobj/DwarfCFIEHPrinter.h54
-rw-r--r--tools/llvm-readobj/ELFDumper.cpp1648
-rw-r--r--tools/llvm-readobj/MachODumper.cpp61
-rw-r--r--tools/llvm-readobj/ObjDumper.cpp32
-rw-r--r--tools/llvm-readobj/ObjDumper.h11
-rw-r--r--tools/llvm-readobj/WasmDumper.cpp7
-rw-r--r--tools/llvm-readobj/Win64EHDumper.cpp13
-rw-r--r--tools/llvm-readobj/WindowsResourceDumper.cpp8
-rw-r--r--tools/llvm-readobj/XCOFFDumper.cpp402
-rw-r--r--tools/llvm-readobj/llvm-readobj.cpp148
-rw-r--r--tools/llvm-readobj/llvm-readobj.h25
-rw-r--r--tools/llvm-reduce/CMakeLists.txt26
-rw-r--r--tools/llvm-reduce/DeltaManager.h36
-rw-r--r--tools/llvm-reduce/LLVMBuild.txt24
-rw-r--r--tools/llvm-reduce/TestRunner.cpp42
-rw-r--r--tools/llvm-reduce/TestRunner.h46
-rw-r--r--tools/llvm-reduce/deltas/Delta.cpp162
-rw-r--r--tools/llvm-reduce/deltas/Delta.h76
-rw-r--r--tools/llvm-reduce/deltas/ReduceArguments.cpp125
-rw-r--r--tools/llvm-reduce/deltas/ReduceArguments.h21
-rw-r--r--tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp146
-rw-r--r--tools/llvm-reduce/deltas/ReduceBasicBlocks.h20
-rw-r--r--tools/llvm-reduce/deltas/ReduceFunctions.cpp77
-rw-r--r--tools/llvm-reduce/deltas/ReduceFunctions.h20
-rw-r--r--tools/llvm-reduce/deltas/ReduceGlobalVars.cpp74
-rw-r--r--tools/llvm-reduce/deltas/ReduceGlobalVars.h20
-rw-r--r--tools/llvm-reduce/deltas/ReduceInstructions.cpp65
-rw-r--r--tools/llvm-reduce/deltas/ReduceInstructions.h20
-rw-r--r--tools/llvm-reduce/deltas/ReduceMetadata.cpp138
-rw-r--r--tools/llvm-reduce/deltas/ReduceMetadata.h18
-rw-r--r--tools/llvm-reduce/llvm-reduce.cpp114
-rw-r--r--tools/llvm-rtdyld/llvm-rtdyld.cpp104
-rw-r--r--tools/llvm-stress/llvm-stress.cpp4
-rw-r--r--tools/llvm-symbolizer/llvm-symbolizer.cpp6
-rw-r--r--tools/llvm-xray/func-id-helper.cpp2
-rw-r--r--tools/llvm-xray/xray-account.cpp2
-rw-r--r--tools/llvm-xray/xray-converter.cpp4
-rw-r--r--tools/llvm-xray/xray-extract.cpp2
-rw-r--r--tools/llvm-xray/xray-fdr-dump.cpp2
-rw-r--r--tools/llvm-xray/xray-graph-diff.cpp2
-rw-r--r--tools/llvm-xray/xray-graph.cpp2
-rw-r--r--tools/opt/opt.cpp13
-rw-r--r--tools/vfabi-demangle-fuzzer/CMakeLists.txt7
-rw-r--r--tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp26
-rw-r--r--utils/TableGen/AsmMatcherEmitter.cpp19
-rw-r--r--utils/TableGen/AsmWriterEmitter.cpp3
-rw-r--r--utils/TableGen/CallingConvEmitter.cpp4
-rw-r--r--utils/TableGen/CodeEmitterGen.cpp305
-rw-r--r--utils/TableGen/CodeGenDAGPatterns.cpp42
-rw-r--r--utils/TableGen/CodeGenDAGPatterns.h4
-rw-r--r--utils/TableGen/CodeGenInstruction.cpp1
-rw-r--r--utils/TableGen/CodeGenInstruction.h1
-rw-r--r--utils/TableGen/CodeGenIntrinsics.h8
-rw-r--r--utils/TableGen/CodeGenMapTable.cpp12
-rw-r--r--utils/TableGen/CodeGenRegisters.cpp52
-rw-r--r--utils/TableGen/CodeGenRegisters.h26
-rw-r--r--utils/TableGen/CodeGenSchedule.cpp24
-rw-r--r--utils/TableGen/CodeGenTarget.cpp70
-rw-r--r--utils/TableGen/CodeGenTarget.h6
-rw-r--r--utils/TableGen/DAGISelEmitter.cpp2
-rw-r--r--utils/TableGen/DAGISelMatcher.h8
-rw-r--r--utils/TableGen/DAGISelMatcherEmitter.cpp22
-rw-r--r--utils/TableGen/DAGISelMatcherGen.cpp10
-rw-r--r--utils/TableGen/DAGISelMatcherOpt.cpp9
-rw-r--r--utils/TableGen/DFAEmitter.cpp394
-rw-r--r--utils/TableGen/DFAEmitter.h107
-rw-r--r--utils/TableGen/DFAPacketizerEmitter.cpp657
-rw-r--r--utils/TableGen/DisassemblerEmitter.cpp2
-rw-r--r--utils/TableGen/FixedLenDecoderEmitter.cpp93
-rw-r--r--utils/TableGen/GICombinerEmitter.cpp452
-rw-r--r--utils/TableGen/GlobalISel/CMakeLists.txt7
-rw-r--r--utils/TableGen/GlobalISel/CodeExpander.cpp93
-rw-r--r--utils/TableGen/GlobalISel/CodeExpander.h55
-rw-r--r--utils/TableGen/GlobalISel/CodeExpansions.h43
-rw-r--r--utils/TableGen/GlobalISelEmitter.cpp775
-rw-r--r--utils/TableGen/InfoByHwMode.cpp11
-rw-r--r--utils/TableGen/InfoByHwMode.h5
-rw-r--r--utils/TableGen/InstrDocsEmitter.cpp2
-rw-r--r--utils/TableGen/InstrInfoEmitter.cpp52
-rw-r--r--utils/TableGen/IntrinsicEmitter.cpp20
-rw-r--r--utils/TableGen/RISCVCompressInstEmitter.cpp13
-rw-r--r--utils/TableGen/RegisterInfoEmitter.cpp4
-rw-r--r--utils/TableGen/SearchableTableEmitter.cpp16
-rw-r--r--utils/TableGen/SubtargetEmitter.cpp8
-rw-r--r--utils/TableGen/SubtargetFeatureInfo.cpp12
-rw-r--r--utils/TableGen/TableGen.cpp157
-rw-r--r--utils/TableGen/TableGenBackends.h2
-rw-r--r--utils/TableGen/WebAssemblyDisassemblerEmitter.cpp2
-rw-r--r--utils/TableGen/X86DisassemblerTables.cpp2
-rw-r--r--utils/TableGen/X86EVEX2VEXTablesEmitter.cpp1
-rw-r--r--utils/TableGen/X86RecognizableInstr.cpp14
-rwxr-xr-xutils/add_argument_names.py82
-rw-r--r--utils/llvm-locstats/CMakeLists.txt12
-rwxr-xr-xutils/llvm-locstats/llvm-locstats.py209
2118 files changed, 120703 insertions, 48751 deletions
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index cac2f297056d..b84970956666 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -370,9 +370,13 @@ typedef enum {
LLVMAtomicRMWBinOpUMax, /**< Sets the value if it's greater than the
original using an unsigned comparison and return
the old one */
- LLVMAtomicRMWBinOpUMin /**< Sets the value if it's greater than the
- original using an unsigned comparison and return
- the old one */
+ LLVMAtomicRMWBinOpUMin, /**< Sets the value if it's greater than the
+ original using an unsigned comparison and return
+ the old one */
+ LLVMAtomicRMWBinOpFAdd, /**< Add a floating point value and return the
+ old one */
+ LLVMAtomicRMWBinOpFSub /**< Subtract a floating point value and return the
+ old one */
} LLVMAtomicRMWBinOp;
typedef enum {
@@ -1539,6 +1543,7 @@ LLVMTypeRef LLVMX86MMXType(void);
macro(GlobalVariable) \
macro(UndefValue) \
macro(Instruction) \
+ macro(UnaryOperator) \
macro(BinaryOperator) \
macro(CallInst) \
macro(IntrinsicInst) \
@@ -1571,6 +1576,8 @@ LLVMTypeRef LLVMX86MMXType(void);
macro(ResumeInst) \
macro(CleanupReturnInst) \
macro(CatchReturnInst) \
+ macro(CatchSwitchInst) \
+ macro(CallBrInst) \
macro(FuncletPadInst) \
macro(CatchPadInst) \
macro(CleanupPadInst) \
@@ -1592,7 +1599,10 @@ LLVMTypeRef LLVMX86MMXType(void);
macro(ZExtInst) \
macro(ExtractValueInst) \
macro(LoadInst) \
- macro(VAArgInst)
+ macro(VAArgInst) \
+ macro(AtomicCmpXchgInst) \
+ macro(AtomicRMWInst) \
+ macro(FenceInst)
/**
* @defgroup LLVMCCoreValueGeneral General APIs
@@ -3807,8 +3817,12 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
const char *Name);
LLVMBool LLVMGetVolatile(LLVMValueRef MemoryAccessInst);
void LLVMSetVolatile(LLVMValueRef MemoryAccessInst, LLVMBool IsVolatile);
+LLVMBool LLVMGetWeak(LLVMValueRef CmpXchgInst);
+void LLVMSetWeak(LLVMValueRef CmpXchgInst, LLVMBool IsWeak);
LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemoryAccessInst);
void LLVMSetOrdering(LLVMValueRef MemoryAccessInst, LLVMAtomicOrdering Ordering);
+LLVMAtomicRMWBinOp LLVMGetAtomicRMWBinOp(LLVMValueRef AtomicRMWInst);
+void LLVMSetAtomicRMWBinOp(LLVMValueRef AtomicRMWInst, LLVMAtomicRMWBinOp BinOp);
/* Casts */
LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val,
diff --git a/include/llvm-c/DebugInfo.h b/include/llvm-c/DebugInfo.h
index 33c8110a863c..41e9f96bbb92 100644
--- a/include/llvm-c/DebugInfo.h
+++ b/include/llvm-c/DebugInfo.h
@@ -32,7 +32,7 @@ typedef enum {
LLVMDIFlagPublic = 3,
LLVMDIFlagFwdDecl = 1 << 2,
LLVMDIFlagAppleBlock = 1 << 3,
- LLVMDIFlagBlockByrefStruct = 1 << 4,
+ LLVMDIFlagReservedBit4 = 1 << 4,
LLVMDIFlagVirtual = 1 << 5,
LLVMDIFlagArtificial = 1 << 6,
LLVMDIFlagExplicit = 1 << 7,
@@ -170,6 +170,19 @@ typedef unsigned LLVMMetadataKind;
typedef unsigned LLVMDWARFTypeEncoding;
/**
+ * Describes the kind of macro declaration used for LLVMDIBuilderCreateMacro.
+ * @see llvm::dwarf::MacinfoRecordType
+ * @note Values are from DW_MACINFO_* constants in the DWARF specification.
+ */
+typedef enum {
+ LLVMDWARFMacinfoRecordTypeDefine = 0x01,
+ LLVMDWARFMacinfoRecordTypeMacro = 0x02,
+ LLVMDWARFMacinfoRecordTypeStartFile = 0x03,
+ LLVMDWARFMacinfoRecordTypeEndFile = 0x04,
+ LLVMDWARFMacinfoRecordTypeVendorExt = 0xff
+} LLVMDWARFMacinfoRecordType;
+
+/**
* The current debug metadata version number.
*/
unsigned LLVMDebugMetadataVersion(void);
@@ -522,6 +535,38 @@ LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Builder,
LLVMDIFlags Flags);
/**
+ * Create debugging information entry for a macro.
+ * @param Builder The DIBuilder.
+ * @param ParentMacroFile Macro parent (could be NULL).
+ * @param Line Source line number where the macro is defined.
+ * @param RecordType DW_MACINFO_define or DW_MACINFO_undef.
+ * @param Name Macro name.
+ * @param NameLen Macro name length.
+ * @param Value Macro value.
+ * @param ValueLen Macro value length.
+ */
+LLVMMetadataRef LLVMDIBuilderCreateMacro(LLVMDIBuilderRef Builder,
+ LLVMMetadataRef ParentMacroFile,
+ unsigned Line,
+ LLVMDWARFMacinfoRecordType RecordType,
+ const char *Name, size_t NameLen,
+ const char *Value, size_t ValueLen);
+
+/**
+ * Create debugging information temporary entry for a macro file.
+ * List of macro node direct children will be calculated by DIBuilder,
+ * using the \p ParentMacroFile relationship.
+ * @param Builder The DIBuilder.
+ * @param ParentMacroFile Macro parent (could be NULL).
+ * @param Line Source line number where the macro file is included.
+ * @param File File descriptor containing the name of the macro file.
+ */
+LLVMMetadataRef
+LLVMDIBuilderCreateTempMacroFile(LLVMDIBuilderRef Builder,
+ LLVMMetadataRef ParentMacroFile, unsigned Line,
+ LLVMMetadataRef File);
+
+/**
* Create debugging information entry for an enumerator.
* @param Builder The DIBuilder.
* @param Name Enumerator name.
diff --git a/include/llvm-c/Remarks.h b/include/llvm-c/Remarks.h
index 88eb5120c57c..5444aebddd60 100644
--- a/include/llvm-c/Remarks.h
+++ b/include/llvm-c/Remarks.h
@@ -30,7 +30,8 @@ extern "C" {
* @{
*/
-#define REMARKS_API_VERSION 0
+// 0 -> 1: Bitstream remarks support.
+#define REMARKS_API_VERSION 1
/**
* The type of the emitted remark.
@@ -241,6 +242,20 @@ extern LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
uint64_t Size);
/**
+ * Creates a remark parser that can be used to parse the buffer located in \p
+ * Buf of size \p Size bytes.
+ *
+ * \p Buf cannot be `NULL`.
+ *
+ * This function should be paired with LLVMRemarkParserDispose() to avoid
+ * leaking resources.
+ *
+ * \since REMARKS_API_VERSION=1
+ */
+extern LLVMRemarkParserRef LLVMRemarkParserCreateBitstream(const void *Buf,
+ uint64_t Size);
+
+/**
* Returns the next remark in the file.
*
* The value pointed to by the return value needs to be disposed using a call to
diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h
index 7a82ed464141..51d007581283 100644
--- a/include/llvm-c/Transforms/IPO.h
+++ b/include/llvm-c/Transforms/IPO.h
@@ -34,6 +34,9 @@ void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM);
/** See llvm::createConstantMergePass function. */
void LLVMAddConstantMergePass(LLVMPassManagerRef PM);
+/** See llvm::createMergeFunctionsPass function. */
+void LLVMAddMergeFunctionsPass(LLVMPassManagerRef PM);
+
/** See llvm::createCalledValuePropagationPass function. */
void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM);
@@ -67,6 +70,21 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM);
/** See llvm::createInternalizePass function. */
void LLVMAddInternalizePass(LLVMPassManagerRef, unsigned AllButMain);
+/**
+ * Create and add the internalize pass to the given pass manager with the
+ * provided preservation callback.
+ *
+ * The context parameter is forwarded to the callback on each invocation.
+ * As such, it is the responsibility of the caller to extend its lifetime
+ * until execution of this pass has finished.
+ *
+ * @see llvm::createInternalizePass function.
+ */
+void LLVMAddInternalizePassWithMustPreservePredicate(
+ LLVMPassManagerRef PM,
+ void *Context,
+ LLVMBool (*MustPreserve)(LLVMValueRef, void *));
+
/** See llvm::createStripDeadPrototypesPass function. */
void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM);
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h
index 031cf98b2df2..6f3a3d8b3750 100644
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -35,6 +35,9 @@ extern "C" {
/** See llvm::createAggressiveDCEPass function. */
void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
+/** See llvm::createDeadCodeEliminationPass function. */
+void LLVMAddDCEPass(LLVMPassManagerRef PM);
+
/** See llvm::createBitTrackingDCEPass function. */
void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM);
@@ -144,6 +147,9 @@ void LLVMAddEarlyCSEMemSSAPass(LLVMPassManagerRef PM);
/** See llvm::createLowerExpectIntrinsicPass function */
void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM);
+/** See llvm::createLowerConstantIntrinsicsPass function */
+void LLVMAddLowerConstantIntrinsicsPass(LLVMPassManagerRef PM);
+
/** See llvm::createTypeBasedAliasAnalysisPass function */
void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM);
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 2467722b1954..41e6067cf44f 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -44,7 +44,7 @@ typedef bool lto_bool_t;
* @{
*/
-#define LTO_API_VERSION 24
+#define LTO_API_VERSION 25
/**
* \since prior to LTO_API_VERSION=3
@@ -550,6 +550,56 @@ extern void
lto_codegen_set_should_embed_uselists(lto_code_gen_t cg,
lto_bool_t ShouldEmbedUselists);
+/** Opaque reference to an LTO input file */
+typedef struct LLVMOpaqueLTOInput *lto_input_t;
+
+/**
+ * Creates an LTO input file from a buffer. The path
+ * argument is used for diagnotics as this function
+ * otherwise does not know which file the given buffer
+ * is associated with.
+ *
+ * \since LTO_API_VERSION=24
+ */
+extern lto_input_t lto_input_create(const void *buffer,
+ size_t buffer_size,
+ const char *path);
+
+/**
+ * Frees all memory internally allocated by the LTO input file.
+ * Upon return the lto_module_t is no longer valid.
+ *
+ * \since LTO_API_VERSION=24
+ */
+extern void lto_input_dispose(lto_input_t input);
+
+/**
+ * Returns the number of dependent library specifiers
+ * for the given LTO input file.
+ *
+ * \since LTO_API_VERSION=24
+ */
+extern unsigned lto_input_get_num_dependent_libraries(lto_input_t input);
+
+/**
+ * Returns the ith dependent library specifier
+ * for the given LTO input file. The returned
+ * string is not null-terminated.
+ *
+ * \since LTO_API_VERSION=24
+ */
+extern const char * lto_input_get_dependent_library(lto_input_t input,
+ size_t index,
+ size_t *size);
+
+/**
+ * Returns the list of libcall symbols that can be generated by LTO
+ * that might not be visible from the symbol table of bitcode files.
+ *
+ * \since prior to LTO_API_VERSION=25
+ */
+extern const char *const *lto_runtime_lib_symbols_list(size_t *size);
+
/**
* @} // endgoup LLVMCLTO
* @defgroup LLVMCTLTO ThinLTO
@@ -846,48 +896,6 @@ thinlto_codegen_set_cache_size_megabytes(thinlto_code_gen_t cg,
extern void thinlto_codegen_set_cache_size_files(thinlto_code_gen_t cg,
unsigned max_size_files);
-/** Opaque reference to an LTO input file */
-typedef struct LLVMOpaqueLTOInput *lto_input_t;
-
-/**
- * Creates an LTO input file from a buffer. The path
- * argument is used for diagnotics as this function
- * otherwise does not know which file the given buffer
- * is associated with.
- *
- * \since LTO_API_VERSION=24
- */
-extern lto_input_t lto_input_create(const void *buffer,
- size_t buffer_size,
- const char *path);
-
-/**
- * Frees all memory internally allocated by the LTO input file.
- * Upon return the lto_module_t is no longer valid.
- *
- * \since LTO_API_VERSION=24
- */
-extern void lto_input_dispose(lto_input_t input);
-
-/**
- * Returns the number of dependent library specifiers
- * for the given LTO input file.
- *
- * \since LTO_API_VERSION=24
- */
-extern unsigned lto_input_get_num_dependent_libraries(lto_input_t input);
-
-/**
- * Returns the ith dependent library specifier
- * for the given LTO input file. The returned
- * string is not null-terminated.
- *
- * \since LTO_API_VERSION=24
- */
-extern const char * lto_input_get_dependent_library(lto_input_t input,
- size_t index,
- size_t *size);
-
/**
* @} // endgroup LLVMCTLTO_CACHING
*/
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index a9648d35cf5d..1c4969733791 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -192,6 +192,11 @@ struct APFloatBase {
/// IEEE-754R 7: Default exception handling.
///
/// opUnderflow or opOverflow are always returned or-ed with opInexact.
+ ///
+ /// APFloat models this behavior specified by IEEE-754:
+ /// "For operations producing results in floating-point format, the default
+ /// result of an operation that signals the invalid operation exception
+ /// shall be a quiet NaN."
enum opStatus {
opOK = 0x00,
opInvalidOp = 0x01,
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 2381b75e08b1..8dce5a621bb3 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -1467,6 +1467,13 @@ public:
U.pVal[whichWord(BitPosition)] &= Mask;
}
+ /// Set bottom loBits bits to 0.
+ void clearLowBits(unsigned loBits) {
+ assert(loBits <= BitWidth && "More bits than bitwidth");
+ APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
+ *this &= Keep;
+ }
+
/// Set the sign bit to 0.
void clearSignBit() {
clearBit(BitWidth - 1);
@@ -1496,9 +1503,11 @@ public:
/// Insert the bits from a smaller APInt starting at bitPosition.
void insertBits(const APInt &SubBits, unsigned bitPosition);
+ void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);
/// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
APInt extractBits(unsigned numBits, unsigned bitPosition) const;
+ uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;
/// @}
/// \name Value Characterization Functions
diff --git a/include/llvm/ADT/Any.h b/include/llvm/ADT/Any.h
index 5dcd6e73c54f..49657e02a991 100644
--- a/include/llvm/ADT/Any.h
+++ b/include/llvm/ADT/Any.h
@@ -38,7 +38,7 @@ class Any {
explicit StorageImpl(T &&Value) : Value(std::move(Value)) {}
std::unique_ptr<StorageBase> clone() const override {
- return llvm::make_unique<StorageImpl<T>>(Value);
+ return std::make_unique<StorageImpl<T>>(Value);
}
const void *id() const override { return &TypeId<T>::Id; }
@@ -78,7 +78,7 @@ public:
int>::type = 0>
Any(T &&Value) {
using U = typename std::decay<T>::type;
- Storage = llvm::make_unique<StorageImpl<U>>(std::forward<T>(Value));
+ Storage = std::make_unique<StorageImpl<U>>(std::forward<T>(Value));
}
Any(Any &&Other) : Storage(std::move(Other.Storage)) {}
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index 773c88f7c9f9..f6455d3fa412 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -481,6 +481,12 @@ namespace llvm {
return Vec;
}
+ /// Construct an ArrayRef from a std::array.
+ template <typename T, std::size_t N>
+ ArrayRef<T> makeArrayRef(const std::array<T, N> &Arr) {
+ return Arr;
+ }
+
/// Construct an ArrayRef from an ArrayRef (no-op) (const)
template <typename T> ArrayRef<T> makeArrayRef(const ArrayRef<T> &Vec) {
return Vec;
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index a05cf8130d3c..948a6e6bfb38 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -38,33 +38,7 @@ namespace detail {
// implementation without requiring two members.
template <typename KeyT, typename ValueT>
struct DenseMapPair : public std::pair<KeyT, ValueT> {
-
- // FIXME: Switch to inheriting constructors when we drop support for older
- // clang versions.
- // NOTE: This default constructor is declared with '{}' rather than
- // '= default' to work around a separate bug in clang-3.8. This can
- // also go when we switch to inheriting constructors.
- DenseMapPair() {}
-
- DenseMapPair(const KeyT &Key, const ValueT &Value)
- : std::pair<KeyT, ValueT>(Key, Value) {}
-
- DenseMapPair(KeyT &&Key, ValueT &&Value)
- : std::pair<KeyT, ValueT>(std::move(Key), std::move(Value)) {}
-
- template <typename AltKeyT, typename AltValueT>
- DenseMapPair(AltKeyT &&AltKey, AltValueT &&AltValue,
- typename std::enable_if<
- std::is_convertible<AltKeyT, KeyT>::value &&
- std::is_convertible<AltValueT, ValueT>::value>::type * = 0)
- : std::pair<KeyT, ValueT>(std::forward<AltKeyT>(AltKey),
- std::forward<AltValueT>(AltValue)) {}
-
- template <typename AltPairT>
- DenseMapPair(AltPairT &&AltPair,
- typename std::enable_if<std::is_convertible<
- AltPairT, std::pair<KeyT, ValueT>>::value>::type * = nullptr)
- : std::pair<KeyT, ValueT>(std::forward<AltPairT>(AltPair)) {}
+ using std::pair<KeyT, ValueT>::pair;
KeyT &getFirst() { return std::pair<KeyT, ValueT>::first; }
const KeyT &getFirst() const { return std::pair<KeyT, ValueT>::first; }
@@ -748,7 +722,7 @@ public:
~DenseMap() {
this->destroyAll();
- operator delete(Buckets);
+ deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT));
}
void swap(DenseMap& RHS) {
@@ -768,7 +742,7 @@ public:
DenseMap& operator=(DenseMap &&other) {
this->destroyAll();
- operator delete(Buckets);
+ deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT));
init(0);
swap(other);
return *this;
@@ -776,7 +750,7 @@ public:
void copyFrom(const DenseMap& other) {
this->destroyAll();
- operator delete(Buckets);
+ deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT));
if (allocateBuckets(other.NumBuckets)) {
this->BaseT::copyFrom(other);
} else {
@@ -809,10 +783,12 @@ public:
this->moveFromOldBuckets(OldBuckets, OldBuckets+OldNumBuckets);
// Free the old table.
- operator delete(OldBuckets);
+ deallocate_buffer(OldBuckets, sizeof(BucketT) * OldNumBuckets,
+ alignof(BucketT));
}
void shrink_and_clear() {
+ unsigned OldNumBuckets = NumBuckets;
unsigned OldNumEntries = NumEntries;
this->destroyAll();
@@ -825,7 +801,8 @@ public:
return;
}
- operator delete(Buckets);
+ deallocate_buffer(Buckets, sizeof(BucketT) * OldNumBuckets,
+ alignof(BucketT));
init(NewNumBuckets);
}
@@ -861,7 +838,8 @@ private:
return false;
}
- Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT) * NumBuckets));
+ Buckets = static_cast<BucketT *>(
+ allocate_buffer(sizeof(BucketT) * NumBuckets, alignof(BucketT)));
return true;
}
};
@@ -1076,7 +1054,8 @@ public:
this->moveFromOldBuckets(OldRep.Buckets, OldRep.Buckets+OldRep.NumBuckets);
// Free the old table.
- operator delete(OldRep.Buckets);
+ deallocate_buffer(OldRep.Buckets, sizeof(BucketT) * OldRep.NumBuckets,
+ alignof(BucketT));
}
void shrink_and_clear() {
@@ -1160,15 +1139,17 @@ private:
if (Small)
return;
- operator delete(getLargeRep()->Buckets);
+ deallocate_buffer(getLargeRep()->Buckets,
+ sizeof(BucketT) * getLargeRep()->NumBuckets,
+ alignof(BucketT));
getLargeRep()->~LargeRep();
}
LargeRep allocateBuckets(unsigned Num) {
assert(Num > InlineBuckets && "Must allocate more buckets than are inline");
- LargeRep Rep = {
- static_cast<BucketT*>(operator new(sizeof(BucketT) * Num)), Num
- };
+ LargeRep Rep = {static_cast<BucketT *>(allocate_buffer(
+ sizeof(BucketT) * Num, alignof(BucketT))),
+ Num};
return Rep;
}
};
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index 5ef6f3ad1b04..bd4c60c8f13e 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -17,7 +17,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
-#include "llvm/Support/ScalableSize.h"
+#include "llvm/Support/TypeSize.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
@@ -67,6 +67,17 @@ template<> struct DenseMapInfo<char> {
}
};
+// Provide DenseMapInfo for unsigned chars.
+template <> struct DenseMapInfo<unsigned char> {
+ static inline unsigned char getEmptyKey() { return ~0; }
+ static inline unsigned char getTombstoneKey() { return ~0 - 1; }
+ static unsigned getHashValue(const unsigned char &Val) { return Val * 37U; }
+
+ static bool isEqual(const unsigned char &LHS, const unsigned char &RHS) {
+ return LHS == RHS;
+ }
+};
+
// Provide DenseMapInfo for unsigned shorts.
template <> struct DenseMapInfo<unsigned short> {
static inline unsigned short getEmptyKey() { return 0xFFFF; }
diff --git a/include/llvm/ADT/DirectedGraph.h b/include/llvm/ADT/DirectedGraph.h
new file mode 100644
index 000000000000..f6a358d99cd2
--- /dev/null
+++ b/include/llvm/ADT/DirectedGraph.h
@@ -0,0 +1,270 @@
+//===- llvm/ADT/DirectedGraph.h - Directed Graph ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface and a base class implementation for a
+// directed graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DIRECTEDGRAPH_H
+#define LLVM_ADT_DIRECTEDGRAPH_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// Represent an edge in the directed graph.
+/// The edge contains the target node it connects to.
+template <class NodeType, class EdgeType> class DGEdge {
+public:
+ DGEdge() = delete;
+ /// Create an edge pointing to the given node \p N.
+ explicit DGEdge(NodeType &N) : TargetNode(N) {}
+ explicit DGEdge(const DGEdge<NodeType, EdgeType> &E)
+ : TargetNode(E.TargetNode) {}
+ DGEdge<NodeType, EdgeType> &operator=(const DGEdge<NodeType, EdgeType> &E) {
+ TargetNode = E.TargetNode;
+ return *this;
+ }
+
+ /// Static polymorphism: delegate implementation (via isEqualTo) to the
+ /// derived class.
+ bool operator==(const EdgeType &E) const { return getDerived().isEqualTo(E); }
+ bool operator!=(const EdgeType &E) const { return !operator==(E); }
+
+ /// Retrieve the target node this edge connects to.
+ const NodeType &getTargetNode() const { return TargetNode; }
+ NodeType &getTargetNode() {
+ return const_cast<NodeType &>(
+ static_cast<const DGEdge<NodeType, EdgeType> &>(*this).getTargetNode());
+ }
+
+protected:
+ // As the default implementation use address comparison for equality.
+ bool isEqualTo(const EdgeType &E) const { return this == &E; }
+
+ // Cast the 'this' pointer to the derived type and return a reference.
+ EdgeType &getDerived() { return *static_cast<EdgeType *>(this); }
+ const EdgeType &getDerived() const {
+ return *static_cast<const EdgeType *>(this);
+ }
+
+ // The target node this edge connects to.
+ NodeType &TargetNode;
+};
+
+/// Represent a node in the directed graph.
+/// The node has a (possibly empty) list of outgoing edges.
+template <class NodeType, class EdgeType> class DGNode {
+public:
+ using EdgeListTy = SetVector<EdgeType *>;
+ using iterator = typename EdgeListTy::iterator;
+ using const_iterator = typename EdgeListTy::const_iterator;
+
+ /// Create a node with a single outgoing edge \p E.
+ explicit DGNode(EdgeType &E) : Edges() { Edges.insert(&E); }
+ DGNode() = default;
+
+ explicit DGNode(const DGNode<NodeType, EdgeType> &N) : Edges(N.Edges) {}
+ DGNode(DGNode<NodeType, EdgeType> &&N) : Edges(std::move(N.Edges)) {}
+
+ DGNode<NodeType, EdgeType> &operator=(const DGNode<NodeType, EdgeType> &N) {
+ Edges = N.Edges;
+ return *this;
+ }
+ DGNode<NodeType, EdgeType> &operator=(const DGNode<NodeType, EdgeType> &&N) {
+ Edges = std::move(N.Edges);
+ return *this;
+ }
+
+ /// Static polymorphism: delegate implementation (via isEqualTo) to the
+ /// derived class.
+ bool operator==(const NodeType &N) const { return getDerived().isEqualTo(N); }
+ bool operator!=(const NodeType &N) const { return !operator==(N); }
+
+ const_iterator begin() const { return Edges.begin(); }
+ const_iterator end() const { return Edges.end(); }
+ iterator begin() { return Edges.begin(); }
+ iterator end() { return Edges.end(); }
+ const EdgeType &front() const { return *Edges.front(); }
+ EdgeType &front() { return *Edges.front(); }
+ const EdgeType &back() const { return *Edges.back(); }
+ EdgeType &back() { return *Edges.back(); }
+
+ /// Collect in \p EL, all the edges from this node to \p N.
+ /// Return true if at least one edge was found, and false otherwise.
+ /// Note that this implementation allows more than one edge to connect
+ /// a given pair of nodes.
+ bool findEdgesTo(const NodeType &N, SmallVectorImpl<EdgeType *> &EL) const {
+ assert(EL.empty() && "Expected the list of edges to be empty.");
+ for (auto *E : Edges)
+ if (E->getTargetNode() == N)
+ EL.push_back(E);
+ return !EL.empty();
+ }
+
+ /// Add the given edge \p E to this node, if it doesn't exist already. Returns
+ /// true if the edge is added and false otherwise.
+ bool addEdge(EdgeType &E) { return Edges.insert(&E); }
+
+ /// Remove the given edge \p E from this node, if it exists.
+ void removeEdge(EdgeType &E) { Edges.remove(&E); }
+
+ /// Test whether there is an edge that goes from this node to \p N.
+ bool hasEdgeTo(const NodeType &N) const {
+ return (findEdgeTo(N) != Edges.end());
+ }
+
+ /// Retrieve the outgoing edges for the node.
+ const EdgeListTy &getEdges() const { return Edges; }
+ EdgeListTy &getEdges() {
+ return const_cast<EdgeListTy &>(
+ static_cast<const DGNode<NodeType, EdgeType> &>(*this).Edges);
+ }
+
+ /// Clear the outgoing edges.
+ void clear() { Edges.clear(); }
+
+protected:
+ // As the default implementation use address comparison for equality.
+ bool isEqualTo(const NodeType &N) const { return this == &N; }
+
+ // Cast the 'this' pointer to the derived type and return a reference.
+ NodeType &getDerived() { return *static_cast<NodeType *>(this); }
+ const NodeType &getDerived() const {
+ return *static_cast<const NodeType *>(this);
+ }
+
+ /// Find an edge to \p N. If more than one edge exists, this will return
+ /// the first one in the list of edges.
+ const_iterator findEdgeTo(const NodeType &N) const {
+ return llvm::find_if(
+ Edges, [&N](const EdgeType *E) { return E->getTargetNode() == N; });
+ }
+
+ // The list of outgoing edges.
+ EdgeListTy Edges;
+};
+
+/// Directed graph
+///
+/// The graph is represented by a table of nodes.
+/// Each node contains a (possibly empty) list of outgoing edges.
+/// Each edge contains the target node it connects to.
+template <class NodeType, class EdgeType> class DirectedGraph {
+protected:
+ using NodeListTy = SmallVector<NodeType *, 10>;
+ using EdgeListTy = SmallVector<EdgeType *, 10>;
+public:
+ using iterator = typename NodeListTy::iterator;
+ using const_iterator = typename NodeListTy::const_iterator;
+ using DGraphType = DirectedGraph<NodeType, EdgeType>;
+
+ DirectedGraph() = default;
+ explicit DirectedGraph(NodeType &N) : Nodes() { addNode(N); }
+ DirectedGraph(const DGraphType &G) : Nodes(G.Nodes) {}
+ DirectedGraph(DGraphType &&RHS) : Nodes(std::move(RHS.Nodes)) {}
+ DGraphType &operator=(const DGraphType &G) {
+ Nodes = G.Nodes;
+ return *this;
+ }
+ DGraphType &operator=(const DGraphType &&G) {
+ Nodes = std::move(G.Nodes);
+ return *this;
+ }
+
+ const_iterator begin() const { return Nodes.begin(); }
+ const_iterator end() const { return Nodes.end(); }
+ iterator begin() { return Nodes.begin(); }
+ iterator end() { return Nodes.end(); }
+ const NodeType &front() const { return *Nodes.front(); }
+ NodeType &front() { return *Nodes.front(); }
+ const NodeType &back() const { return *Nodes.back(); }
+ NodeType &back() { return *Nodes.back(); }
+
+ size_t size() const { return Nodes.size(); }
+
+ /// Find the given node \p N in the table.
+ const_iterator findNode(const NodeType &N) const {
+ return llvm::find_if(Nodes,
+ [&N](const NodeType *Node) { return *Node == N; });
+ }
+ iterator findNode(const NodeType &N) {
+ return const_cast<iterator>(
+ static_cast<const DGraphType &>(*this).findNode(N));
+ }
+
+ /// Add the given node \p N to the graph if it is not already present.
+ bool addNode(NodeType &N) {
+ if (findNode(N) != Nodes.end())
+ return false;
+ Nodes.push_back(&N);
+ return true;
+ }
+
+ /// Collect in \p EL all edges that are coming into node \p N. Return true
+ /// if at least one edge was found, and false otherwise.
+ bool findIncomingEdgesToNode(const NodeType &N, SmallVectorImpl<EdgeType*> &EL) const {
+ assert(EL.empty() && "Expected the list of edges to be empty.");
+ EdgeListTy TempList;
+ for (auto *Node : Nodes) {
+ if (*Node == N)
+ continue;
+ Node->findEdgesTo(N, TempList);
+ EL.insert(EL.end(), TempList.begin(), TempList.end());
+ TempList.clear();
+ }
+ return !EL.empty();
+ }
+
+ /// Remove the given node \p N from the graph. If the node has incoming or
+ /// outgoing edges, they are also removed. Return true if the node was found
+ /// and then removed, and false if the node was not found in the graph to
+ /// begin with.
+ bool removeNode(NodeType &N) {
+ iterator IT = findNode(N);
+ if (IT == Nodes.end())
+ return false;
+ // Remove incoming edges.
+ EdgeListTy EL;
+ for (auto *Node : Nodes) {
+ if (*Node == N)
+ continue;
+ Node->findEdgesTo(N, EL);
+ for (auto *E : EL)
+ Node->removeEdge(*E);
+ EL.clear();
+ }
+ N.clear();
+ Nodes.erase(IT);
+ return true;
+ }
+
+ /// Assuming nodes \p Src and \p Dst are already in the graph, connect node \p
+ /// Src to node \p Dst using the provided edge \p E. Return true if \p Src is
+ /// not already connected to \p Dst via \p E, and false otherwise.
+ bool connect(NodeType &Src, NodeType &Dst, EdgeType &E) {
+ assert(findNode(Src) != Nodes.end() && "Src node should be present.");
+ assert(findNode(Dst) != Nodes.end() && "Dst node should be present.");
+ assert((E.getTargetNode() == Dst) &&
+ "Target of the given edge does not match Dst.");
+ return Src.addEdge(E);
+ }
+
+protected:
+ // The list of nodes in the graph.
+ NodeListTy Nodes;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ADT_DIRECTEDGRAPH_H
diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h
index 008188bfa210..b22606bdb518 100644
--- a/include/llvm/ADT/Hashing.h
+++ b/include/llvm/ADT/Hashing.h
@@ -45,7 +45,6 @@
#define LLVM_ADT_HASHING_H
#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/type_traits.h"
#include <algorithm>
diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h
index 12828c4cfdab..a02876ee77f3 100644
--- a/include/llvm/ADT/IntervalMap.h
+++ b/include/llvm/ADT/IntervalMap.h
@@ -963,8 +963,8 @@ public:
private:
// The root data is either a RootLeaf or a RootBranchData instance.
- LLVM_ALIGNAS(RootLeaf) LLVM_ALIGNAS(RootBranchData)
- AlignedCharArrayUnion<RootLeaf, RootBranchData> data;
+ alignas(RootLeaf) alignas(RootBranchData)
+ AlignedCharArrayUnion<RootLeaf, RootBranchData> data;
// Tree height.
// 0: Leaves in root.
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 24a2bb67a36e..fa6bf1504469 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -13,6 +13,7 @@
#ifndef LLVM_ADT_POINTERINTPAIR_H
#define LLVM_ADT_POINTERINTPAIR_H
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
#include "llvm/Support/type_traits.h"
#include <cassert>
@@ -59,19 +60,19 @@ public:
IntType getInt() const { return (IntType)Info::getInt(Value); }
- void setPointer(PointerTy PtrVal) {
+ void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION {
Value = Info::updatePointer(Value, PtrVal);
}
- void setInt(IntType IntVal) {
+ void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION {
Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal));
}
- void initWithPointer(PointerTy PtrVal) {
+ void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION {
Value = Info::updatePointer(0, PtrVal);
}
- void setPointerAndInt(PointerTy PtrVal, IntType IntVal) {
+ void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION {
Value = Info::updateInt(Info::updatePointer(0, PtrVal),
static_cast<intptr_t>(IntVal));
}
@@ -89,7 +90,7 @@ public:
void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); }
- void setFromOpaqueValue(void *Val) {
+ void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION {
Value = reinterpret_cast<intptr_t>(Val);
}
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 2bcdf546c6e4..98c905775a77 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -54,21 +54,14 @@ struct PointerUnionTypeSelectorReturn<
};
namespace pointer_union_detail {
- constexpr int constexprMin(int a, int b) { return a < b ? a : b; }
/// Determine the number of bits required to store integers with values < n.
/// This is ceil(log2(n)).
constexpr int bitsRequired(unsigned n) {
return n > 1 ? 1 + bitsRequired((n + 1) / 2) : 0;
}
- // FIXME: In C++14, replace this with
- // std::min({PointerLikeTypeTraits<Ts>::NumLowBitsAvailable...})
- template <typename T> constexpr int lowBitsAvailable() {
- return PointerLikeTypeTraits<T>::NumLowBitsAvailable;
- }
- template <typename T1, typename T2, typename... Ts>
- constexpr int lowBitsAvailable() {
- return constexprMin(lowBitsAvailable<T1>(), lowBitsAvailable<T2, Ts...>());
+ template <typename... Ts> constexpr int lowBitsAvailable() {
+ return std::min<int>({PointerLikeTypeTraits<Ts>::NumLowBitsAvailable...});
}
/// Find the index of a type in a list of types. TypeIndex<T, Us...>::Index
@@ -167,10 +160,11 @@ class PointerUnion
void *, pointer_union_detail::bitsRequired(sizeof...(PTs)), int,
pointer_union_detail::PointerUnionUIntTraits<PTs...>>,
0, PTs...> {
- // The first type is special in some ways, but we don't want PointerUnion to
- // be a 'template <typename First, typename ...Rest>' because it's much more
- // convenient to have a name for the whole pack. So split off the first type
- // here.
+ // The first type is special because we want to directly cast a pointer to a
+ // default-initialized union to a pointer to the first type. But we don't
+ // want PointerUnion to be a 'template <typename First, typename ...Rest>'
+ // because it's much more convenient to have a name for the whole pack. So
+ // split off the first type here.
using First = typename pointer_union_detail::GetFirstType<PTs...>::type;
using Base = typename PointerUnion::PointerUnionMembers;
@@ -182,12 +176,7 @@ public:
/// Test if the pointer held in the union is null, regardless of
/// which type it is.
- bool isNull() const {
- // Convert from the void* to one of the pointer types, to make sure that
- // we recursively strip off low bits if we have a nested PointerUnion.
- return !PointerLikeTypeTraits<First>::getFromVoidPointer(
- this->Val.getPointer());
- }
+ bool isNull() const { return !this->Val.getPointer(); }
explicit operator bool() const { return !isNull(); }
@@ -226,7 +215,8 @@ public:
First *getAddrOfPtr1() {
assert(is<First>() && "Val is not the first pointer");
assert(
- get<First>() == this->Val.getPointer() &&
+ PointerLikeTypeTraits<First>::getAsVoidPointer(get<First>()) ==
+ this->Val.getPointer() &&
"Can't get the address because PointerLikeTypeTraits changes the ptr");
return const_cast<First *>(
reinterpret_cast<const First *>(this->Val.getAddrOfPointer()));
diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h
index 81dce0168c79..274933bc5204 100644
--- a/include/llvm/ADT/STLExtras.h
+++ b/include/llvm/ADT/STLExtras.h
@@ -95,18 +95,6 @@ template <class Ty> struct identity {
}
};
-template <class Ty> struct less_ptr {
- bool operator()(const Ty* left, const Ty* right) const {
- return *left < *right;
- }
-};
-
-template <class Ty> struct greater_ptr {
- bool operator()(const Ty* left, const Ty* right) const {
- return *right < *left;
- }
-};
-
/// An efficient, type-erasing, non-owning reference to a callable. This is
/// intended for use as the type of a function parameter that is not used
/// after the function in question returns.
@@ -530,10 +518,6 @@ bool all_of(R &&range, UnaryPredicate P);
template <typename R, typename UnaryPredicate>
bool any_of(R &&range, UnaryPredicate P);
-template <size_t... I> struct index_sequence;
-
-template <class... Ts> struct index_sequence_for;
-
namespace detail {
using std::declval;
@@ -568,38 +552,38 @@ struct zip_common : public zip_traits<ZipType, Iters...> {
std::tuple<Iters...> iterators;
protected:
- template <size_t... Ns> value_type deref(index_sequence<Ns...>) const {
+ template <size_t... Ns> value_type deref(std::index_sequence<Ns...>) const {
return value_type(*std::get<Ns>(iterators)...);
}
template <size_t... Ns>
- decltype(iterators) tup_inc(index_sequence<Ns...>) const {
+ decltype(iterators) tup_inc(std::index_sequence<Ns...>) const {
return std::tuple<Iters...>(std::next(std::get<Ns>(iterators))...);
}
template <size_t... Ns>
- decltype(iterators) tup_dec(index_sequence<Ns...>) const {
+ decltype(iterators) tup_dec(std::index_sequence<Ns...>) const {
return std::tuple<Iters...>(std::prev(std::get<Ns>(iterators))...);
}
public:
zip_common(Iters &&... ts) : iterators(std::forward<Iters>(ts)...) {}
- value_type operator*() { return deref(index_sequence_for<Iters...>{}); }
+ value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); }
const value_type operator*() const {
- return deref(index_sequence_for<Iters...>{});
+ return deref(std::index_sequence_for<Iters...>{});
}
ZipType &operator++() {
- iterators = tup_inc(index_sequence_for<Iters...>{});
+ iterators = tup_inc(std::index_sequence_for<Iters...>{});
return *reinterpret_cast<ZipType *>(this);
}
ZipType &operator--() {
static_assert(Base::IsBidirectional,
"All inner iterators must be at least bidirectional.");
- iterators = tup_dec(index_sequence_for<Iters...>{});
+ iterators = tup_dec(std::index_sequence_for<Iters...>{});
return *reinterpret_cast<ZipType *>(this);
}
};
@@ -618,7 +602,8 @@ struct zip_first : public zip_common<zip_first<Iters...>, Iters...> {
template <typename... Iters>
class zip_shortest : public zip_common<zip_shortest<Iters...>, Iters...> {
template <size_t... Ns>
- bool test(const zip_shortest<Iters...> &other, index_sequence<Ns...>) const {
+ bool test(const zip_shortest<Iters...> &other,
+ std::index_sequence<Ns...>) const {
return all_of(std::initializer_list<bool>{std::get<Ns>(this->iterators) !=
std::get<Ns>(other.iterators)...},
identity<bool>{});
@@ -630,7 +615,7 @@ public:
zip_shortest(Iters &&... ts) : Base(std::forward<Iters>(ts)...) {}
bool operator==(const zip_shortest<Iters...> &other) const {
- return !test(other, index_sequence_for<Iters...>{});
+ return !test(other, std::index_sequence_for<Iters...>{});
}
};
@@ -646,18 +631,21 @@ public:
private:
std::tuple<Args...> ts;
- template <size_t... Ns> iterator begin_impl(index_sequence<Ns...>) const {
+ template <size_t... Ns>
+ iterator begin_impl(std::index_sequence<Ns...>) const {
return iterator(std::begin(std::get<Ns>(ts))...);
}
- template <size_t... Ns> iterator end_impl(index_sequence<Ns...>) const {
+ template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) const {
return iterator(std::end(std::get<Ns>(ts))...);
}
public:
zippy(Args &&... ts_) : ts(std::forward<Args>(ts_)...) {}
- iterator begin() const { return begin_impl(index_sequence_for<Args...>{}); }
- iterator end() const { return end_impl(index_sequence_for<Args...>{}); }
+ iterator begin() const {
+ return begin_impl(std::index_sequence_for<Args...>{});
+ }
+ iterator end() const { return end_impl(std::index_sequence_for<Args...>{}); }
};
} // end namespace detail
@@ -727,20 +715,20 @@ private:
template <size_t... Ns>
bool test(const zip_longest_iterator<Iters...> &other,
- index_sequence<Ns...>) const {
+ std::index_sequence<Ns...>) const {
return llvm::any_of(
std::initializer_list<bool>{std::get<Ns>(this->iterators) !=
std::get<Ns>(other.iterators)...},
identity<bool>{});
}
- template <size_t... Ns> value_type deref(index_sequence<Ns...>) const {
+ template <size_t... Ns> value_type deref(std::index_sequence<Ns...>) const {
return value_type(
deref_or_none(std::get<Ns>(iterators), std::get<Ns>(end_iterators))...);
}
template <size_t... Ns>
- decltype(iterators) tup_inc(index_sequence<Ns...>) const {
+ decltype(iterators) tup_inc(std::index_sequence<Ns...>) const {
return std::tuple<Iters...>(
next_or_end(std::get<Ns>(iterators), std::get<Ns>(end_iterators))...);
}
@@ -750,17 +738,19 @@ public:
: iterators(std::forward<Iters>(ts.first)...),
end_iterators(std::forward<Iters>(ts.second)...) {}
- value_type operator*() { return deref(index_sequence_for<Iters...>{}); }
+ value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); }
- value_type operator*() const { return deref(index_sequence_for<Iters...>{}); }
+ value_type operator*() const {
+ return deref(std::index_sequence_for<Iters...>{});
+ }
zip_longest_iterator<Iters...> &operator++() {
- iterators = tup_inc(index_sequence_for<Iters...>{});
+ iterators = tup_inc(std::index_sequence_for<Iters...>{});
return *this;
}
bool operator==(const zip_longest_iterator<Iters...> &other) const {
- return !test(other, index_sequence_for<Iters...>{});
+ return !test(other, std::index_sequence_for<Iters...>{});
}
};
@@ -777,12 +767,13 @@ public:
private:
std::tuple<Args...> ts;
- template <size_t... Ns> iterator begin_impl(index_sequence<Ns...>) const {
+ template <size_t... Ns>
+ iterator begin_impl(std::index_sequence<Ns...>) const {
return iterator(std::make_pair(adl_begin(std::get<Ns>(ts)),
adl_end(std::get<Ns>(ts)))...);
}
- template <size_t... Ns> iterator end_impl(index_sequence<Ns...>) const {
+ template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) const {
return iterator(std::make_pair(adl_end(std::get<Ns>(ts)),
adl_end(std::get<Ns>(ts)))...);
}
@@ -790,8 +781,10 @@ private:
public:
zip_longest_range(Args &&... ts_) : ts(std::forward<Args>(ts_)...) {}
- iterator begin() const { return begin_impl(index_sequence_for<Args...>{}); }
- iterator end() const { return end_impl(index_sequence_for<Args...>{}); }
+ iterator begin() const {
+ return begin_impl(std::index_sequence_for<Args...>{});
+ }
+ iterator end() const { return end_impl(std::index_sequence_for<Args...>{}); }
};
} // namespace detail
@@ -847,7 +840,7 @@ class concat_iterator
/// Increments the first non-end iterator.
///
/// It is an error to call this with all iterators at the end.
- template <size_t... Ns> void increment(index_sequence<Ns...>) {
+ template <size_t... Ns> void increment(std::index_sequence<Ns...>) {
// Build a sequence of functions to increment each iterator if possible.
bool (concat_iterator::*IncrementHelperFns[])() = {
&concat_iterator::incrementHelper<Ns>...};
@@ -876,7 +869,7 @@ class concat_iterator
/// reference.
///
/// It is an error to call this with all iterators at the end.
- template <size_t... Ns> ValueT &get(index_sequence<Ns...>) const {
+ template <size_t... Ns> ValueT &get(std::index_sequence<Ns...>) const {
// Build a sequence of functions to get from iterator if possible.
ValueT *(concat_iterator::*GetHelperFns[])() const = {
&concat_iterator::getHelper<Ns>...};
@@ -901,11 +894,13 @@ public:
using BaseT::operator++;
concat_iterator &operator++() {
- increment(index_sequence_for<IterTs...>());
+ increment(std::index_sequence_for<IterTs...>());
return *this;
}
- ValueT &operator*() const { return get(index_sequence_for<IterTs...>()); }
+ ValueT &operator*() const {
+ return get(std::index_sequence_for<IterTs...>());
+ }
bool operator==(const concat_iterator &RHS) const {
return Begins == RHS.Begins && Ends == RHS.Ends;
@@ -928,10 +923,10 @@ public:
private:
std::tuple<RangeTs...> Ranges;
- template <size_t... Ns> iterator begin_impl(index_sequence<Ns...>) {
+ template <size_t... Ns> iterator begin_impl(std::index_sequence<Ns...>) {
return iterator(std::get<Ns>(Ranges)...);
}
- template <size_t... Ns> iterator end_impl(index_sequence<Ns...>) {
+ template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) {
return iterator(make_range(std::end(std::get<Ns>(Ranges)),
std::end(std::get<Ns>(Ranges)))...);
}
@@ -940,8 +935,8 @@ public:
concat_range(RangeTs &&... Ranges)
: Ranges(std::forward<RangeTs>(Ranges)...) {}
- iterator begin() { return begin_impl(index_sequence_for<RangeTs...>{}); }
- iterator end() { return end_impl(index_sequence_for<RangeTs...>{}); }
+ iterator begin() { return begin_impl(std::index_sequence_for<RangeTs...>{}); }
+ iterator end() { return end_impl(std::index_sequence_for<RangeTs...>{}); }
};
} // end namespace detail
@@ -990,28 +985,6 @@ struct on_first {
}
};
-// A subset of N3658. More stuff can be added as-needed.
-
-/// Represents a compile-time sequence of integers.
-template <class T, T... I> struct integer_sequence {
- using value_type = T;
-
- static constexpr size_t size() { return sizeof...(I); }
-};
-
-/// Alias for the common case of a sequence of size_ts.
-template <size_t... I>
-struct index_sequence : integer_sequence<std::size_t, I...> {};
-
-template <std::size_t N, std::size_t... I>
-struct build_index_impl : build_index_impl<N - 1, N - 1, I...> {};
-template <std::size_t... I>
-struct build_index_impl<0, I...> : index_sequence<I...> {};
-
-/// Creates a compile-time integer sequence for a parameter pack.
-template <class... Ts>
-struct index_sequence_for : build_index_impl<sizeof...(Ts)> {};
-
/// Utility type to build an inheritance chain that makes it easy to rank
/// overload candidates.
template <int N> struct rank : rank<N - 1> {};
@@ -1391,41 +1364,6 @@ void replace(Container &Cont, typename Container::iterator ContIt,
// Extra additions to <memory>
//===----------------------------------------------------------------------===//
-// Implement make_unique according to N3656.
-
-/// Constructs a `new T()` with the given args and returns a
-/// `unique_ptr<T>` which owns the object.
-///
-/// Example:
-///
-/// auto p = make_unique<int>();
-/// auto p = make_unique<std::tuple<int, int>>(0, 1);
-template <class T, class... Args>
-typename std::enable_if<!std::is_array<T>::value, std::unique_ptr<T>>::type
-make_unique(Args &&... args) {
- return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
-}
-
-/// Constructs a `new T[n]` with the given args and returns a
-/// `unique_ptr<T[]>` which owns the object.
-///
-/// \param n size of the new array.
-///
-/// Example:
-///
-/// auto p = make_unique<int[]>(2); // value-initializes the array with 0's.
-template <class T>
-typename std::enable_if<std::is_array<T>::value && std::extent<T>::value == 0,
- std::unique_ptr<T>>::type
-make_unique(size_t n) {
- return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
-}
-
-/// This function isn't used and is only here to provide better compile errors.
-template <class T, class... Args>
-typename std::enable_if<std::extent<T>::value != 0>::type
-make_unique(Args &&...) = delete;
-
struct FreeDeleter {
void operator()(void* v) {
::free(v);
@@ -1439,20 +1377,6 @@ struct pair_hash {
}
};
-/// A functor like C++14's std::less<void> in its absence.
-struct less {
- template <typename A, typename B> bool operator()(A &&a, B &&b) const {
- return std::forward<A>(a) < std::forward<B>(b);
- }
-};
-
-/// A functor like C++14's std::equal<void> in its absence.
-struct equal {
- template <typename A, typename B> bool operator()(A &&a, B &&b) const {
- return std::forward<A>(a) == std::forward<B>(b);
- }
-};
-
/// Binary functor that adapts to any other binary functor after dereferencing
/// operands.
template <typename T> struct deref {
@@ -1580,7 +1504,7 @@ template <typename R> detail::enumerator<R> enumerate(R &&TheRange) {
namespace detail {
template <typename F, typename Tuple, std::size_t... I>
-auto apply_tuple_impl(F &&f, Tuple &&t, index_sequence<I...>)
+auto apply_tuple_impl(F &&f, Tuple &&t, std::index_sequence<I...>)
-> decltype(std::forward<F>(f)(std::get<I>(std::forward<Tuple>(t))...)) {
return std::forward<F>(f)(std::get<I>(std::forward<Tuple>(t))...);
}
@@ -1593,9 +1517,9 @@ auto apply_tuple_impl(F &&f, Tuple &&t, index_sequence<I...>)
template <typename F, typename Tuple>
auto apply_tuple(F &&f, Tuple &&t) -> decltype(detail::apply_tuple_impl(
std::forward<F>(f), std::forward<Tuple>(t),
- build_index_impl<
+ std::make_index_sequence<
std::tuple_size<typename std::decay<Tuple>::type>::value>{})) {
- using Indices = build_index_impl<
+ using Indices = std::make_index_sequence<
std::tuple_size<typename std::decay<Tuple>::type>::value>;
return detail::apply_tuple_impl(std::forward<F>(f), std::forward<Tuple>(t),
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index 742450e6a951..61375c008022 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -290,7 +290,7 @@ public:
++Prev;
uintptr_t Bits = getSmallBits();
// Mask in previous bits.
- uintptr_t Mask = (1 << Prev) - 1;
+ uintptr_t Mask = (uintptr_t(1) << Prev) - 1;
Bits |= Mask;
if (Bits == ~uintptr_t(0) || Prev + 1 >= getSmallSize())
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index 2ac59da596ef..b7387ddcf1c7 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -44,38 +44,39 @@ class raw_ostream;
class raw_fd_ostream;
class StringRef;
-class Statistic {
+class StatisticBase {
public:
const char *DebugType;
const char *Name;
const char *Desc;
- std::atomic<unsigned> Value;
- std::atomic<bool> Initialized;
- unsigned getValue() const { return Value.load(std::memory_order_relaxed); }
+ StatisticBase(const char *DebugType, const char *Name, const char *Desc)
+ : DebugType(DebugType), Name(Name), Desc(Desc) {}
+
const char *getDebugType() const { return DebugType; }
const char *getName() const { return Name; }
const char *getDesc() const { return Desc; }
+};
- /// construct - This should only be called for non-global statistics.
- void construct(const char *debugtype, const char *name, const char *desc) {
- DebugType = debugtype;
- Name = name;
- Desc = desc;
- Value = 0;
- Initialized = false;
- }
+class TrackingStatistic : public StatisticBase {
+public:
+ std::atomic<unsigned> Value;
+ std::atomic<bool> Initialized;
+
+ TrackingStatistic(const char *DebugType, const char *Name, const char *Desc)
+ : StatisticBase(DebugType, Name, Desc), Value(0), Initialized(false) {}
+
+ unsigned getValue() const { return Value.load(std::memory_order_relaxed); }
// Allow use of this class as the value itself.
operator unsigned() const { return getValue(); }
-#if LLVM_ENABLE_STATS
- const Statistic &operator=(unsigned Val) {
+ const TrackingStatistic &operator=(unsigned Val) {
Value.store(Val, std::memory_order_relaxed);
return init();
}
- const Statistic &operator++() {
+ const TrackingStatistic &operator++() {
Value.fetch_add(1, std::memory_order_relaxed);
return init();
}
@@ -85,7 +86,7 @@ public:
return Value.fetch_add(1, std::memory_order_relaxed);
}
- const Statistic &operator--() {
+ const TrackingStatistic &operator--() {
Value.fetch_sub(1, std::memory_order_relaxed);
return init();
}
@@ -95,14 +96,14 @@ public:
return Value.fetch_sub(1, std::memory_order_relaxed);
}
- const Statistic &operator+=(unsigned V) {
+ const TrackingStatistic &operator+=(unsigned V) {
if (V == 0)
return *this;
Value.fetch_add(V, std::memory_order_relaxed);
return init();
}
- const Statistic &operator-=(unsigned V) {
+ const TrackingStatistic &operator-=(unsigned V) {
if (V == 0)
return *this;
Value.fetch_sub(V, std::memory_order_relaxed);
@@ -119,54 +120,57 @@ public:
init();
}
-#else // Statistics are disabled in release builds.
-
- const Statistic &operator=(unsigned Val) {
+protected:
+ TrackingStatistic &init() {
+ if (!Initialized.load(std::memory_order_acquire))
+ RegisterStatistic();
return *this;
}
- const Statistic &operator++() {
- return *this;
- }
+ void RegisterStatistic();
+};
- unsigned operator++(int) {
- return 0;
- }
+class NoopStatistic : public StatisticBase {
+public:
+ using StatisticBase::StatisticBase;
- const Statistic &operator--() {
- return *this;
- }
+ unsigned getValue() const { return 0; }
- unsigned operator--(int) {
- return 0;
- }
+ // Allow use of this class as the value itself.
+ operator unsigned() const { return 0; }
- const Statistic &operator+=(const unsigned &V) {
- return *this;
- }
+ const NoopStatistic &operator=(unsigned Val) { return *this; }
- const Statistic &operator-=(const unsigned &V) {
- return *this;
- }
+ const NoopStatistic &operator++() { return *this; }
- void updateMax(unsigned V) {}
+ unsigned operator++(int) { return 0; }
-#endif // LLVM_ENABLE_STATS
+ const NoopStatistic &operator--() { return *this; }
-protected:
- Statistic &init() {
- if (!Initialized.load(std::memory_order_acquire))
- RegisterStatistic();
- return *this;
- }
+ unsigned operator--(int) { return 0; }
- void RegisterStatistic();
+ const NoopStatistic &operator+=(const unsigned &V) { return *this; }
+
+ const NoopStatistic &operator-=(const unsigned &V) { return *this; }
+
+ void updateMax(unsigned V) {}
};
+#if LLVM_ENABLE_STATS
+using Statistic = TrackingStatistic;
+#else
+using Statistic = NoopStatistic;
+#endif
+
// STATISTIC - A macro to make definition of statistics really simple. This
// automatically passes the DEBUG_TYPE of the file into the statistic.
#define STATISTIC(VARNAME, DESC) \
- static llvm::Statistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC, {0}, {false}}
+ static llvm::Statistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC}
+
+// ALWAYS_ENABLED_STATISTIC - A macro to define a statistic like STATISTIC but
+// it is enabled even if LLVM_ENABLE_STATS is off.
+#define ALWAYS_ENABLED_STATISTIC(VARNAME, DESC) \
+ static llvm::TrackingStatistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC}
/// Enable the collection and printing of statistics.
void EnableStatistics(bool PrintOnExit = true);
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index 16ac90bd6c89..ef1a11e0619b 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -345,7 +345,7 @@ inline void join_items_impl(std::string &Result, Sep Separator, const Arg1 &A1,
join_items_impl(Result, Separator, std::forward<Args>(Items)...);
}
-inline size_t join_one_item_size(char C) { return 1; }
+inline size_t join_one_item_size(char) { return 1; }
inline size_t join_one_item_size(const char *S) { return S ? ::strlen(S) : 0; }
template <typename T> inline size_t join_one_item_size(const T &Str) {
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index 8a586fc26709..108185bd07b9 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -118,36 +118,59 @@ public:
}
};
-/// StringMapEntry - This is used to represent one value that is inserted into
-/// a StringMap. It contains the Value itself and the key: the string length
-/// and data.
+/// StringMapEntryStorage - Holds the value in a StringMapEntry.
+///
+/// Factored out into a separate base class to make it easier to specialize.
+/// This is primarily intended to support StringSet, which doesn't need a value
+/// stored at all.
template<typename ValueTy>
-class StringMapEntry : public StringMapEntryBase {
+class StringMapEntryStorage : public StringMapEntryBase {
public:
ValueTy second;
- explicit StringMapEntry(size_t strLen)
+ explicit StringMapEntryStorage(size_t strLen)
: StringMapEntryBase(strLen), second() {}
template <typename... InitTy>
- StringMapEntry(size_t strLen, InitTy &&... InitVals)
+ StringMapEntryStorage(size_t strLen, InitTy &&... InitVals)
: StringMapEntryBase(strLen), second(std::forward<InitTy>(InitVals)...) {}
- StringMapEntry(StringMapEntry &E) = delete;
-
- StringRef getKey() const {
- return StringRef(getKeyData(), getKeyLength());
- }
+ StringMapEntryStorage(StringMapEntryStorage &E) = delete;
const ValueTy &getValue() const { return second; }
ValueTy &getValue() { return second; }
void setValue(const ValueTy &V) { second = V; }
+};
+
+template<>
+class StringMapEntryStorage<NoneType> : public StringMapEntryBase {
+public:
+ explicit StringMapEntryStorage(size_t strLen, NoneType none = None)
+ : StringMapEntryBase(strLen) {}
+ StringMapEntryStorage(StringMapEntryStorage &E) = delete;
+
+ NoneType getValue() const { return None; }
+};
+
+/// StringMapEntry - This is used to represent one value that is inserted into
+/// a StringMap. It contains the Value itself and the key: the string length
+/// and data.
+template<typename ValueTy>
+class StringMapEntry final : public StringMapEntryStorage<ValueTy> {
+public:
+ using StringMapEntryStorage<ValueTy>::StringMapEntryStorage;
+
+ StringRef getKey() const {
+ return StringRef(getKeyData(), this->getKeyLength());
+ }
/// getKeyData - Return the start of the string data that is the key for this
/// value. The string data is always stored immediately after the
/// StringMapEntry object.
const char *getKeyData() const {return reinterpret_cast<const char*>(this+1);}
- StringRef first() const { return StringRef(getKeyData(), getKeyLength()); }
+ StringRef first() const {
+ return StringRef(getKeyData(), this->getKeyLength());
+ }
/// Create a StringMapEntry for the specified key construct the value using
/// \p InitiVals.
@@ -199,7 +222,7 @@ public:
template<typename AllocatorTy>
void Destroy(AllocatorTy &Allocator) {
// Free memory referenced by the item.
- size_t AllocSize = sizeof(StringMapEntry) + getKeyLength() + 1;
+ size_t AllocSize = sizeof(StringMapEntry) + this->getKeyLength() + 1;
this->~StringMapEntry();
Allocator.Deallocate(static_cast<void *>(this), AllocSize);
}
@@ -391,6 +414,16 @@ public:
return try_emplace(KV.first, std::move(KV.second));
}
+ /// Inserts an element or assigns to the current element if the key already
+ /// exists. The return type is the same as try_emplace.
+ template <typename V>
+ std::pair<iterator, bool> insert_or_assign(StringRef Key, V &&Val) {
+ auto Ret = try_emplace(Key, std::forward<V>(Val));
+ if (!Ret.second)
+ Ret.first->second = std::forward<V>(Val);
+ return Ret;
+ }
+
/// Emplace a new element for the specified key into the map if the key isn't
/// already in the map. The bool component of the returned pair is true
/// if and only if the insertion takes place, and the iterator component of
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 4661b1e68b2f..52baab17bede 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -67,6 +67,20 @@ namespace llvm {
return ::memcmp(Lhs,Rhs,Length);
}
+ // Constexpr version of std::strlen.
+ static constexpr size_t strLen(const char *Str) {
+#if __cplusplus > 201402L
+ return std::char_traits<char>::length(Str);
+#elif __has_builtin(__builtin_strlen) || defined(__GNUC__)
+ return __builtin_strlen(Str);
+#else
+ const char *Begin = Str;
+ while (*Str != '\0')
+ ++Str;
+ return Str - Begin;
+#endif
+ }
+
public:
/// @name Constructors
/// @{
@@ -79,8 +93,8 @@ namespace llvm {
StringRef(std::nullptr_t) = delete;
/// Construct a string ref from a cstring.
- /*implicit*/ StringRef(const char *Str)
- : Data(Str), Length(Str ? ::strlen(Str) : 0) {}
+ /*implicit*/ constexpr StringRef(const char *Str)
+ : Data(Str), Length(Str ? strLen(Str) : 0) {}
/// Construct a string ref from a pointer and length.
/*implicit*/ constexpr StringRef(const char *data, size_t length)
diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h
index af3a44a7b32c..60be09d3c326 100644
--- a/include/llvm/ADT/StringSet.h
+++ b/include/llvm/ADT/StringSet.h
@@ -24,8 +24,8 @@ namespace llvm {
/// StringSet - A wrapper for StringMap that provides set-like functionality.
template <class AllocatorTy = MallocAllocator>
- class StringSet : public StringMap<char, AllocatorTy> {
- using base = StringMap<char, AllocatorTy>;
+ class StringSet : public StringMap<NoneType, AllocatorTy> {
+ using base = StringMap<NoneType, AllocatorTy>;
public:
StringSet() = default;
@@ -37,13 +37,13 @@ namespace llvm {
std::pair<typename base::iterator, bool> insert(StringRef Key) {
assert(!Key.empty());
- return base::insert(std::make_pair(Key, '\0'));
+ return base::insert(std::make_pair(Key, None));
}
template <typename InputIt>
void insert(const InputIt &Begin, const InputIt &End) {
for (auto It = Begin; It != End; ++It)
- base::insert(std::make_pair(*It, '\0'));
+ base::insert(std::make_pair(*It, None));
}
template <typename ValueTy>
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index ac82451a9b21..6b76d35d4e92 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -31,6 +31,10 @@ class TinyPtrVector {
public:
using VecTy = SmallVector<EltTy, 4>;
using value_type = typename VecTy::value_type;
+ // EltTy must be the first pointer type so that is<EltTy> is true for the
+ // default-constructed PtrUnion. This allows an empty TinyPtrVector to
+ // naturally vend a begin/end iterator of type EltTy* without an additional
+ // check for the empty state.
using PtrUnion = PointerUnion<EltTy, VecTy *>;
private:
@@ -96,14 +100,14 @@ public:
if (RHS.Val.template is<EltTy>()) {
V->clear();
V->push_back(RHS.front());
- RHS.Val = (EltTy)nullptr;
+ RHS.Val = EltTy();
return *this;
}
delete V;
}
Val = RHS.Val;
- RHS.Val = (EltTy)nullptr;
+ RHS.Val = EltTy();
return *this;
}
@@ -213,9 +217,9 @@ public:
EltTy operator[](unsigned i) const {
assert(!Val.isNull() && "can't index into an empty vector");
- if (EltTy V = Val.template dyn_cast<EltTy>()) {
+ if (Val.template is<EltTy>()) {
assert(i == 0 && "tinyvector index out of range");
- return V;
+ return Val.template get<EltTy>();
}
assert(i < Val.template get<VecTy*>()->size() &&
@@ -225,29 +229,29 @@ public:
EltTy front() const {
assert(!empty() && "vector empty");
- if (EltTy V = Val.template dyn_cast<EltTy>())
- return V;
+ if (Val.template is<EltTy>())
+ return Val.template get<EltTy>();
return Val.template get<VecTy*>()->front();
}
EltTy back() const {
assert(!empty() && "vector empty");
- if (EltTy V = Val.template dyn_cast<EltTy>())
- return V;
+ if (Val.template is<EltTy>())
+ return Val.template get<EltTy>();
return Val.template get<VecTy*>()->back();
}
void push_back(EltTy NewVal) {
- assert(NewVal && "Can't add a null value");
-
// If we have nothing, add something.
if (Val.isNull()) {
Val = NewVal;
+ assert(!Val.isNull() && "Can't add a null value");
return;
}
// If we have a single value, convert to a vector.
- if (EltTy V = Val.template dyn_cast<EltTy>()) {
+ if (Val.template is<EltTy>()) {
+ EltTy V = Val.template get<EltTy>();
Val = new VecTy();
Val.template get<VecTy*>()->push_back(V);
}
@@ -267,7 +271,7 @@ public:
void clear() {
// If we have a single value, convert to empty.
if (Val.template is<EltTy>()) {
- Val = (EltTy)nullptr;
+ Val = EltTy();
} else if (VecTy *Vec = Val.template dyn_cast<VecTy*>()) {
// If we have a vector form, just clear it.
Vec->clear();
@@ -282,7 +286,7 @@ public:
// If we have a single value, convert to empty.
if (Val.template is<EltTy>()) {
if (I == begin())
- Val = (EltTy)nullptr;
+ Val = EltTy();
} else if (VecTy *Vec = Val.template dyn_cast<VecTy*>()) {
// multiple items in a vector; just do the erase, there is no
// benefit to collapsing back to a pointer
@@ -298,7 +302,7 @@ public:
if (Val.template is<EltTy>()) {
if (S == begin() && S != E)
- Val = (EltTy)nullptr;
+ Val = EltTy();
} else if (VecTy *Vec = Val.template dyn_cast<VecTy*>()) {
return Vec->erase(S, E);
}
@@ -313,7 +317,8 @@ public:
return std::prev(end());
}
assert(!Val.isNull() && "Null value with non-end insert iterator.");
- if (EltTy V = Val.template dyn_cast<EltTy>()) {
+ if (Val.template is<EltTy>()) {
+ EltTy V = Val.template get<EltTy>();
assert(I == begin());
Val = Elt;
push_back(V);
@@ -339,7 +344,8 @@ public:
}
Val = new VecTy();
- } else if (EltTy V = Val.template dyn_cast<EltTy>()) {
+ } else if (Val.template is<EltTy>()) {
+ EltTy V = Val.template get<EltTy>();
Val = new VecTy();
Val.template get<VecTy*>()->push_back(V);
}
diff --git a/include/llvm/ADT/VariadicFunction.h b/include/llvm/ADT/VariadicFunction.h
deleted file mode 100644
index 5aefb05ecdda..000000000000
--- a/include/llvm/ADT/VariadicFunction.h
+++ /dev/null
@@ -1,330 +0,0 @@
-//===- VariadicFunction.h - Variadic Functions ------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements compile-time type-safe variadic functions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADT_VARIADICFUNCTION_H
-#define LLVM_ADT_VARIADICFUNCTION_H
-
-#include "llvm/ADT/ArrayRef.h"
-
-namespace llvm {
-
-// Define macros to aid in expanding a comma separated series with the index of
-// the series pasted onto the last token.
-#define LLVM_COMMA_JOIN1(x) x ## 0
-#define LLVM_COMMA_JOIN2(x) LLVM_COMMA_JOIN1(x), x ## 1
-#define LLVM_COMMA_JOIN3(x) LLVM_COMMA_JOIN2(x), x ## 2
-#define LLVM_COMMA_JOIN4(x) LLVM_COMMA_JOIN3(x), x ## 3
-#define LLVM_COMMA_JOIN5(x) LLVM_COMMA_JOIN4(x), x ## 4
-#define LLVM_COMMA_JOIN6(x) LLVM_COMMA_JOIN5(x), x ## 5
-#define LLVM_COMMA_JOIN7(x) LLVM_COMMA_JOIN6(x), x ## 6
-#define LLVM_COMMA_JOIN8(x) LLVM_COMMA_JOIN7(x), x ## 7
-#define LLVM_COMMA_JOIN9(x) LLVM_COMMA_JOIN8(x), x ## 8
-#define LLVM_COMMA_JOIN10(x) LLVM_COMMA_JOIN9(x), x ## 9
-#define LLVM_COMMA_JOIN11(x) LLVM_COMMA_JOIN10(x), x ## 10
-#define LLVM_COMMA_JOIN12(x) LLVM_COMMA_JOIN11(x), x ## 11
-#define LLVM_COMMA_JOIN13(x) LLVM_COMMA_JOIN12(x), x ## 12
-#define LLVM_COMMA_JOIN14(x) LLVM_COMMA_JOIN13(x), x ## 13
-#define LLVM_COMMA_JOIN15(x) LLVM_COMMA_JOIN14(x), x ## 14
-#define LLVM_COMMA_JOIN16(x) LLVM_COMMA_JOIN15(x), x ## 15
-#define LLVM_COMMA_JOIN17(x) LLVM_COMMA_JOIN16(x), x ## 16
-#define LLVM_COMMA_JOIN18(x) LLVM_COMMA_JOIN17(x), x ## 17
-#define LLVM_COMMA_JOIN19(x) LLVM_COMMA_JOIN18(x), x ## 18
-#define LLVM_COMMA_JOIN20(x) LLVM_COMMA_JOIN19(x), x ## 19
-#define LLVM_COMMA_JOIN21(x) LLVM_COMMA_JOIN20(x), x ## 20
-#define LLVM_COMMA_JOIN22(x) LLVM_COMMA_JOIN21(x), x ## 21
-#define LLVM_COMMA_JOIN23(x) LLVM_COMMA_JOIN22(x), x ## 22
-#define LLVM_COMMA_JOIN24(x) LLVM_COMMA_JOIN23(x), x ## 23
-#define LLVM_COMMA_JOIN25(x) LLVM_COMMA_JOIN24(x), x ## 24
-#define LLVM_COMMA_JOIN26(x) LLVM_COMMA_JOIN25(x), x ## 25
-#define LLVM_COMMA_JOIN27(x) LLVM_COMMA_JOIN26(x), x ## 26
-#define LLVM_COMMA_JOIN28(x) LLVM_COMMA_JOIN27(x), x ## 27
-#define LLVM_COMMA_JOIN29(x) LLVM_COMMA_JOIN28(x), x ## 28
-#define LLVM_COMMA_JOIN30(x) LLVM_COMMA_JOIN29(x), x ## 29
-#define LLVM_COMMA_JOIN31(x) LLVM_COMMA_JOIN30(x), x ## 30
-#define LLVM_COMMA_JOIN32(x) LLVM_COMMA_JOIN31(x), x ## 31
-
-/// Class which can simulate a type-safe variadic function.
-///
-/// The VariadicFunction class template makes it easy to define
-/// type-safe variadic functions where all arguments have the same
-/// type.
-///
-/// Suppose we need a variadic function like this:
-///
-/// ResultT Foo(const ArgT &A_0, const ArgT &A_1, ..., const ArgT &A_N);
-///
-/// Instead of many overloads of Foo(), we only need to define a helper
-/// function that takes an array of arguments:
-///
-/// ResultT FooImpl(ArrayRef<const ArgT *> Args) {
-/// // 'Args[i]' is a pointer to the i-th argument passed to Foo().
-/// ...
-/// }
-///
-/// and then define Foo() like this:
-///
-/// const VariadicFunction<ResultT, ArgT, FooImpl> Foo;
-///
-/// VariadicFunction takes care of defining the overloads of Foo().
-///
-/// Actually, Foo is a function object (i.e. functor) instead of a plain
-/// function. This object is stateless and its constructor/destructor
-/// does nothing, so it's safe to create global objects and call Foo(...) at
-/// any time.
-///
-/// Sometimes we need a variadic function to have some fixed leading
-/// arguments whose types may be different from that of the optional
-/// arguments. For example:
-///
-/// bool FullMatch(const StringRef &S, const RE &Regex,
-/// const ArgT &A_0, ..., const ArgT &A_N);
-///
-/// VariadicFunctionN is for such cases, where N is the number of fixed
-/// arguments. It is like VariadicFunction, except that it takes N more
-/// template arguments for the types of the fixed arguments:
-///
-/// bool FullMatchImpl(const StringRef &S, const RE &Regex,
-/// ArrayRef<const ArgT *> Args) { ... }
-/// const VariadicFunction2<bool, const StringRef&,
-/// const RE&, ArgT, FullMatchImpl>
-/// FullMatch;
-///
-/// Currently VariadicFunction and friends support up-to 3
-/// fixed leading arguments and up-to 32 optional arguments.
-template <typename ResultT, typename ArgT,
- ResultT (*Func)(ArrayRef<const ArgT *>)>
-struct VariadicFunction {
- ResultT operator()() const {
- return Func(None);
- }
-
-#define LLVM_DEFINE_OVERLOAD(N) \
- ResultT operator()(LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
- const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
- return Func(makeArrayRef(Args)); \
- }
- LLVM_DEFINE_OVERLOAD(1)
- LLVM_DEFINE_OVERLOAD(2)
- LLVM_DEFINE_OVERLOAD(3)
- LLVM_DEFINE_OVERLOAD(4)
- LLVM_DEFINE_OVERLOAD(5)
- LLVM_DEFINE_OVERLOAD(6)
- LLVM_DEFINE_OVERLOAD(7)
- LLVM_DEFINE_OVERLOAD(8)
- LLVM_DEFINE_OVERLOAD(9)
- LLVM_DEFINE_OVERLOAD(10)
- LLVM_DEFINE_OVERLOAD(11)
- LLVM_DEFINE_OVERLOAD(12)
- LLVM_DEFINE_OVERLOAD(13)
- LLVM_DEFINE_OVERLOAD(14)
- LLVM_DEFINE_OVERLOAD(15)
- LLVM_DEFINE_OVERLOAD(16)
- LLVM_DEFINE_OVERLOAD(17)
- LLVM_DEFINE_OVERLOAD(18)
- LLVM_DEFINE_OVERLOAD(19)
- LLVM_DEFINE_OVERLOAD(20)
- LLVM_DEFINE_OVERLOAD(21)
- LLVM_DEFINE_OVERLOAD(22)
- LLVM_DEFINE_OVERLOAD(23)
- LLVM_DEFINE_OVERLOAD(24)
- LLVM_DEFINE_OVERLOAD(25)
- LLVM_DEFINE_OVERLOAD(26)
- LLVM_DEFINE_OVERLOAD(27)
- LLVM_DEFINE_OVERLOAD(28)
- LLVM_DEFINE_OVERLOAD(29)
- LLVM_DEFINE_OVERLOAD(30)
- LLVM_DEFINE_OVERLOAD(31)
- LLVM_DEFINE_OVERLOAD(32)
-#undef LLVM_DEFINE_OVERLOAD
-};
-
-template <typename ResultT, typename Param0T, typename ArgT,
- ResultT (*Func)(Param0T, ArrayRef<const ArgT *>)>
-struct VariadicFunction1 {
- ResultT operator()(Param0T P0) const {
- return Func(P0, None);
- }
-
-#define LLVM_DEFINE_OVERLOAD(N) \
- ResultT operator()(Param0T P0, LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
- const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
- return Func(P0, makeArrayRef(Args)); \
- }
- LLVM_DEFINE_OVERLOAD(1)
- LLVM_DEFINE_OVERLOAD(2)
- LLVM_DEFINE_OVERLOAD(3)
- LLVM_DEFINE_OVERLOAD(4)
- LLVM_DEFINE_OVERLOAD(5)
- LLVM_DEFINE_OVERLOAD(6)
- LLVM_DEFINE_OVERLOAD(7)
- LLVM_DEFINE_OVERLOAD(8)
- LLVM_DEFINE_OVERLOAD(9)
- LLVM_DEFINE_OVERLOAD(10)
- LLVM_DEFINE_OVERLOAD(11)
- LLVM_DEFINE_OVERLOAD(12)
- LLVM_DEFINE_OVERLOAD(13)
- LLVM_DEFINE_OVERLOAD(14)
- LLVM_DEFINE_OVERLOAD(15)
- LLVM_DEFINE_OVERLOAD(16)
- LLVM_DEFINE_OVERLOAD(17)
- LLVM_DEFINE_OVERLOAD(18)
- LLVM_DEFINE_OVERLOAD(19)
- LLVM_DEFINE_OVERLOAD(20)
- LLVM_DEFINE_OVERLOAD(21)
- LLVM_DEFINE_OVERLOAD(22)
- LLVM_DEFINE_OVERLOAD(23)
- LLVM_DEFINE_OVERLOAD(24)
- LLVM_DEFINE_OVERLOAD(25)
- LLVM_DEFINE_OVERLOAD(26)
- LLVM_DEFINE_OVERLOAD(27)
- LLVM_DEFINE_OVERLOAD(28)
- LLVM_DEFINE_OVERLOAD(29)
- LLVM_DEFINE_OVERLOAD(30)
- LLVM_DEFINE_OVERLOAD(31)
- LLVM_DEFINE_OVERLOAD(32)
-#undef LLVM_DEFINE_OVERLOAD
-};
-
-template <typename ResultT, typename Param0T, typename Param1T, typename ArgT,
- ResultT (*Func)(Param0T, Param1T, ArrayRef<const ArgT *>)>
-struct VariadicFunction2 {
- ResultT operator()(Param0T P0, Param1T P1) const {
- return Func(P0, P1, None);
- }
-
-#define LLVM_DEFINE_OVERLOAD(N) \
- ResultT operator()(Param0T P0, Param1T P1, \
- LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
- const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
- return Func(P0, P1, makeArrayRef(Args)); \
- }
- LLVM_DEFINE_OVERLOAD(1)
- LLVM_DEFINE_OVERLOAD(2)
- LLVM_DEFINE_OVERLOAD(3)
- LLVM_DEFINE_OVERLOAD(4)
- LLVM_DEFINE_OVERLOAD(5)
- LLVM_DEFINE_OVERLOAD(6)
- LLVM_DEFINE_OVERLOAD(7)
- LLVM_DEFINE_OVERLOAD(8)
- LLVM_DEFINE_OVERLOAD(9)
- LLVM_DEFINE_OVERLOAD(10)
- LLVM_DEFINE_OVERLOAD(11)
- LLVM_DEFINE_OVERLOAD(12)
- LLVM_DEFINE_OVERLOAD(13)
- LLVM_DEFINE_OVERLOAD(14)
- LLVM_DEFINE_OVERLOAD(15)
- LLVM_DEFINE_OVERLOAD(16)
- LLVM_DEFINE_OVERLOAD(17)
- LLVM_DEFINE_OVERLOAD(18)
- LLVM_DEFINE_OVERLOAD(19)
- LLVM_DEFINE_OVERLOAD(20)
- LLVM_DEFINE_OVERLOAD(21)
- LLVM_DEFINE_OVERLOAD(22)
- LLVM_DEFINE_OVERLOAD(23)
- LLVM_DEFINE_OVERLOAD(24)
- LLVM_DEFINE_OVERLOAD(25)
- LLVM_DEFINE_OVERLOAD(26)
- LLVM_DEFINE_OVERLOAD(27)
- LLVM_DEFINE_OVERLOAD(28)
- LLVM_DEFINE_OVERLOAD(29)
- LLVM_DEFINE_OVERLOAD(30)
- LLVM_DEFINE_OVERLOAD(31)
- LLVM_DEFINE_OVERLOAD(32)
-#undef LLVM_DEFINE_OVERLOAD
-};
-
-template <typename ResultT, typename Param0T, typename Param1T,
- typename Param2T, typename ArgT,
- ResultT (*Func)(Param0T, Param1T, Param2T, ArrayRef<const ArgT *>)>
-struct VariadicFunction3 {
- ResultT operator()(Param0T P0, Param1T P1, Param2T P2) const {
- return Func(P0, P1, P2, None);
- }
-
-#define LLVM_DEFINE_OVERLOAD(N) \
- ResultT operator()(Param0T P0, Param1T P1, Param2T P2, \
- LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
- const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
- return Func(P0, P1, P2, makeArrayRef(Args)); \
- }
- LLVM_DEFINE_OVERLOAD(1)
- LLVM_DEFINE_OVERLOAD(2)
- LLVM_DEFINE_OVERLOAD(3)
- LLVM_DEFINE_OVERLOAD(4)
- LLVM_DEFINE_OVERLOAD(5)
- LLVM_DEFINE_OVERLOAD(6)
- LLVM_DEFINE_OVERLOAD(7)
- LLVM_DEFINE_OVERLOAD(8)
- LLVM_DEFINE_OVERLOAD(9)
- LLVM_DEFINE_OVERLOAD(10)
- LLVM_DEFINE_OVERLOAD(11)
- LLVM_DEFINE_OVERLOAD(12)
- LLVM_DEFINE_OVERLOAD(13)
- LLVM_DEFINE_OVERLOAD(14)
- LLVM_DEFINE_OVERLOAD(15)
- LLVM_DEFINE_OVERLOAD(16)
- LLVM_DEFINE_OVERLOAD(17)
- LLVM_DEFINE_OVERLOAD(18)
- LLVM_DEFINE_OVERLOAD(19)
- LLVM_DEFINE_OVERLOAD(20)
- LLVM_DEFINE_OVERLOAD(21)
- LLVM_DEFINE_OVERLOAD(22)
- LLVM_DEFINE_OVERLOAD(23)
- LLVM_DEFINE_OVERLOAD(24)
- LLVM_DEFINE_OVERLOAD(25)
- LLVM_DEFINE_OVERLOAD(26)
- LLVM_DEFINE_OVERLOAD(27)
- LLVM_DEFINE_OVERLOAD(28)
- LLVM_DEFINE_OVERLOAD(29)
- LLVM_DEFINE_OVERLOAD(30)
- LLVM_DEFINE_OVERLOAD(31)
- LLVM_DEFINE_OVERLOAD(32)
-#undef LLVM_DEFINE_OVERLOAD
-};
-
-// Cleanup the macro namespace.
-#undef LLVM_COMMA_JOIN1
-#undef LLVM_COMMA_JOIN2
-#undef LLVM_COMMA_JOIN3
-#undef LLVM_COMMA_JOIN4
-#undef LLVM_COMMA_JOIN5
-#undef LLVM_COMMA_JOIN6
-#undef LLVM_COMMA_JOIN7
-#undef LLVM_COMMA_JOIN8
-#undef LLVM_COMMA_JOIN9
-#undef LLVM_COMMA_JOIN10
-#undef LLVM_COMMA_JOIN11
-#undef LLVM_COMMA_JOIN12
-#undef LLVM_COMMA_JOIN13
-#undef LLVM_COMMA_JOIN14
-#undef LLVM_COMMA_JOIN15
-#undef LLVM_COMMA_JOIN16
-#undef LLVM_COMMA_JOIN17
-#undef LLVM_COMMA_JOIN18
-#undef LLVM_COMMA_JOIN19
-#undef LLVM_COMMA_JOIN20
-#undef LLVM_COMMA_JOIN21
-#undef LLVM_COMMA_JOIN22
-#undef LLVM_COMMA_JOIN23
-#undef LLVM_COMMA_JOIN24
-#undef LLVM_COMMA_JOIN25
-#undef LLVM_COMMA_JOIN26
-#undef LLVM_COMMA_JOIN27
-#undef LLVM_COMMA_JOIN28
-#undef LLVM_COMMA_JOIN29
-#undef LLVM_COMMA_JOIN30
-#undef LLVM_COMMA_JOIN31
-#undef LLVM_COMMA_JOIN32
-
-} // end namespace llvm
-
-#endif // LLVM_ADT_VARIADICFUNCTION_H
diff --git a/include/llvm/ADT/iterator_range.h b/include/llvm/ADT/iterator_range.h
index 774c7c4e3366..aa8830943cab 100644
--- a/include/llvm/ADT/iterator_range.h
+++ b/include/llvm/ADT/iterator_range.h
@@ -44,6 +44,7 @@ public:
IteratorT begin() const { return begin_iterator; }
IteratorT end() const { return end_iterator; }
+ bool empty() const { return begin_iterator == end_iterator; }
};
/// Convenience function for iterating over sub-ranges.
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 948341554f23..282142f51bb3 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -949,7 +949,7 @@ template <typename DerivedT> class AAResultBase {
/// A pointer to the AAResults object that this AAResult is
/// aggregated within. May be null if not aggregated.
- AAResults *AAR;
+ AAResults *AAR = nullptr;
/// Helper to dispatch calls back through the derived type.
DerivedT &derived() { return static_cast<DerivedT &>(*this); }
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 34a509b7f4bb..187317e3831b 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -87,10 +87,11 @@ class AliasSet : public ilist_node<AliasSet> {
AAInfo = NewAAInfo;
else {
AAMDNodes Intersection(AAInfo.intersect(NewAAInfo));
- if (!Intersection) {
+ if (!Intersection.TBAA || !Intersection.Scope ||
+ !Intersection.NoAlias) {
// NewAAInfo conflicts with AAInfo.
AAInfo = DenseMapInfo<AAMDNodes>::getTombstoneKey();
- return SizeChanged;
+ SizeChanged = true;
}
AAInfo = Intersection;
}
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index b42846472f2e..0efbd59023d6 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -73,8 +73,8 @@ class AssumptionCache {
/// Get the vector of assumptions which affect a value from the cache.
SmallVector<WeakTrackingVH, 1> &getOrInsertAffectedValues(Value *V);
- /// Copy affected values in the cache for OV to be affected values for NV.
- void copyAffectedValuesInCache(Value *OV, Value *NV);
+ /// Move affected values in the cache for OV to be affected values for NV.
+ void transferAffectedValuesInCache(Value *OV, Value *NV);
/// Flag tracking whether we have scanned the function yet.
///
diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h
index bb55e76ac86a..68f137ba622c 100644
--- a/include/llvm/Analysis/CFG.h
+++ b/include/llvm/Analysis/CFG.h
@@ -46,6 +46,8 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
///
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
bool AllowIdenticalEdges = false);
+bool isCriticalEdge(const Instruction *TI, const BasicBlock *Succ,
+ bool AllowIdenticalEdges = false);
/// Determine whether instruction 'To' is reachable from 'From', without passing
/// through any blocks in ExclusionSet, returning true if uncertain.
diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
index 7c8b42b1d8d2..5f5e52af3d88 100644
--- a/include/llvm/Analysis/CFLAndersAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
@@ -41,7 +41,8 @@ class CFLAndersAAResult : public AAResultBase<CFLAndersAAResult> {
class FunctionInfo;
public:
- explicit CFLAndersAAResult(const TargetLibraryInfo &TLI);
+ explicit CFLAndersAAResult(
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
CFLAndersAAResult(CFLAndersAAResult &&RHS);
~CFLAndersAAResult();
@@ -74,7 +75,7 @@ private:
/// Build summary for a given function
FunctionInfo buildInfoFrom(const Function &);
- const TargetLibraryInfo &TLI;
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
/// Cached mapping of Functions to their StratifiedSets.
/// If a function's sets are currently being built, it is marked
diff --git a/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index cc7a47cd9a5f..135321616b7c 100644
--- a/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -42,7 +42,8 @@ class CFLSteensAAResult : public AAResultBase<CFLSteensAAResult> {
class FunctionInfo;
public:
- explicit CFLSteensAAResult(const TargetLibraryInfo &TLI);
+ explicit CFLSteensAAResult(
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI);
CFLSteensAAResult(CFLSteensAAResult &&Arg);
~CFLSteensAAResult();
@@ -90,7 +91,7 @@ public:
}
private:
- const TargetLibraryInfo &TLI;
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI;
/// Cached mapping of Functions to their StratifiedSets.
/// If a function's sets are currently being built, it is marked
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 8af5fb86995a..933f2210dafc 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -88,6 +88,7 @@
#ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H
#define LLVM_ANALYSIS_CGSCCPASSMANAGER_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/STLExtras.h"
@@ -583,10 +584,12 @@ public:
SmallVectorImpl<WeakTrackingVH> &CallHandles) {
assert(CallHandles.empty() && "Must start with a clear set of handles.");
- SmallVector<CallCount, 4> CallCounts;
+ SmallDenseMap<Function *, CallCount> CallCounts;
+ CallCount CountLocal = {0, 0};
for (LazyCallGraph::Node &N : C) {
- CallCounts.push_back({0, 0});
- CallCount &Count = CallCounts.back();
+ CallCount &Count =
+ CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal))
+ .first->second;
for (Instruction &I : instructions(N.getFunction()))
if (auto CS = CallSite(&I)) {
if (CS.getCalledFunction()) {
@@ -626,8 +629,6 @@ public:
// Check that we didn't miss any update scenario.
assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
assert(C->begin() != C->end() && "Cannot have an empty SCC!");
- assert((int)CallCounts.size() == C->size() &&
- "Cannot have changed the size of the SCC!");
// Check whether any of the handles were devirtualized.
auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) {
@@ -642,7 +643,7 @@ public:
if (!F)
return false;
- LLVM_DEBUG(dbgs() << "Found devirutalized call from "
+ LLVM_DEBUG(dbgs() << "Found devirtualized call from "
<< CS.getParent()->getParent()->getName() << " to "
<< F->getName() << "\n");
@@ -664,12 +665,20 @@ public:
// manner of transformations such as DCE and other things, but seems to
// work well in practice.
if (!Devirt)
- for (int i = 0, Size = C->size(); i < Size; ++i)
- if (CallCounts[i].Indirect > NewCallCounts[i].Indirect &&
- CallCounts[i].Direct < NewCallCounts[i].Direct) {
- Devirt = true;
- break;
+ // Iterate over the keys in NewCallCounts, if Function also exists in
+ // CallCounts, make the check below.
+ for (auto &Pair : NewCallCounts) {
+ auto &CallCountNew = Pair.second;
+ auto CountIt = CallCounts.find(Pair.first);
+ if (CountIt != CallCounts.end()) {
+ const auto &CallCountOld = CountIt->second;
+ if (CallCountOld.Indirect > CallCountNew.Indirect &&
+ CallCountOld.Direct < CallCountNew.Direct) {
+ Devirt = true;
+ break;
+ }
}
+ }
if (!Devirt) {
PA.intersect(std::move(PassPA));
diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h
index ca7abd34fea2..29921a51d5be 100644
--- a/include/llvm/Analysis/CaptureTracking.h
+++ b/include/llvm/Analysis/CaptureTracking.h
@@ -17,6 +17,7 @@ namespace llvm {
class Value;
class Use;
+ class DataLayout;
class Instruction;
class DominatorTree;
class OrderedBasicBlock;
@@ -83,6 +84,11 @@ namespace llvm {
/// use U. Return true to stop the traversal or false to continue looking
/// for more capturing instructions.
virtual bool captured(const Use *U) = 0;
+
+ /// isDereferenceableOrNull - Overload to allow clients with additional
+ /// knowledge about pointer dereferenceability to provide it and thereby
+ /// avoid conservative responses when a pointer is compared to null.
+ virtual bool isDereferenceableOrNull(Value *O, const DataLayout &DL);
};
/// PointerMayBeCaptured - Visit the value and the values derived from it and
diff --git a/include/llvm/Analysis/DDG.h b/include/llvm/Analysis/DDG.h
new file mode 100644
index 000000000000..0e1eb9d2cda3
--- /dev/null
+++ b/include/llvm/Analysis/DDG.h
@@ -0,0 +1,430 @@
+//===- llvm/Analysis/DDG.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Data-Dependence Graph (DDG).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DDG_H
+#define LLVM_ANALYSIS_DDG_H
+
+#include "llvm/ADT/DirectedGraph.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DependenceGraphBuilder.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/Instructions.h"
+#include <unordered_map>
+
+namespace llvm {
+class DDGNode;
+class DDGEdge;
+using DDGNodeBase = DGNode<DDGNode, DDGEdge>;
+using DDGEdgeBase = DGEdge<DDGNode, DDGEdge>;
+using DDGBase = DirectedGraph<DDGNode, DDGEdge>;
+class LPMUpdater;
+
+/// Data Dependence Graph Node
+/// The graph can represent the following types of nodes:
+/// 1. Single instruction node containing just one instruction.
+/// 2. Multiple instruction node where two or more instructions from
+/// the same basic block are merged into one node.
+/// 3. Root node is a special node that connects to all components such that
+/// there is always a path from it to any node in the graph.
+class DDGNode : public DDGNodeBase {
+public:
+ using InstructionListType = SmallVectorImpl<Instruction *>;
+
+ enum class NodeKind {
+ Unknown,
+ SingleInstruction,
+ MultiInstruction,
+ Root,
+ };
+
+ DDGNode() = delete;
+ DDGNode(const NodeKind K) : DDGNodeBase(), Kind(K) {}
+ DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {}
+ DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {}
+ virtual ~DDGNode() = 0;
+
+ DDGNode &operator=(const DDGNode &N) {
+ DGNode::operator=(N);
+ Kind = N.Kind;
+ return *this;
+ }
+
+ DDGNode &operator=(DDGNode &&N) {
+ DGNode::operator=(std::move(N));
+ Kind = N.Kind;
+ return *this;
+ }
+
+ /// Getter for the kind of this node.
+ NodeKind getKind() const { return Kind; }
+
+ /// Collect a list of instructions, in \p IList, for which predicate \p Pred
+ /// evaluates to true when iterating over instructions of this node. Return
+ /// true if at least one instruction was collected, and false otherwise.
+ bool collectInstructions(llvm::function_ref<bool(Instruction *)> const &Pred,
+ InstructionListType &IList) const;
+
+protected:
+ /// Setter for the kind of this node.
+ void setKind(NodeKind K) { Kind = K; }
+
+private:
+ NodeKind Kind;
+};
+
+/// Subclass of DDGNode representing the root node of the graph.
+/// There should only be one such node in a given graph.
+class RootDDGNode : public DDGNode {
+public:
+ RootDDGNode() : DDGNode(NodeKind::Root) {}
+ RootDDGNode(const RootDDGNode &N) = delete;
+ RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {}
+ ~RootDDGNode() {}
+
+ /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
+ static bool classof(const DDGNode *N) {
+ return N->getKind() == NodeKind::Root;
+ }
+ static bool classof(const RootDDGNode *N) { return true; }
+};
+
+/// Subclass of DDGNode representing single or multi-instruction nodes.
+class SimpleDDGNode : public DDGNode {
+public:
+ SimpleDDGNode() = delete;
+ SimpleDDGNode(Instruction &I);
+ SimpleDDGNode(const SimpleDDGNode &N);
+ SimpleDDGNode(SimpleDDGNode &&N);
+ ~SimpleDDGNode();
+
+ SimpleDDGNode &operator=(const SimpleDDGNode &N) {
+ DDGNode::operator=(N);
+ InstList = N.InstList;
+ return *this;
+ }
+
+ SimpleDDGNode &operator=(SimpleDDGNode &&N) {
+ DDGNode::operator=(std::move(N));
+ InstList = std::move(N.InstList);
+ return *this;
+ }
+
+ /// Get the list of instructions in this node.
+ const InstructionListType &getInstructions() const {
+ assert(!InstList.empty() && "Instruction List is empty.");
+ return InstList;
+ }
+ InstructionListType &getInstructions() {
+ return const_cast<InstructionListType &>(
+ static_cast<const SimpleDDGNode *>(this)->getInstructions());
+ }
+
+ /// Get the first/last instruction in the node.
+ Instruction *getFirstInstruction() const { return getInstructions().front(); }
+ Instruction *getLastInstruction() const { return getInstructions().back(); }
+
+ /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
+ static bool classof(const DDGNode *N) {
+ return N->getKind() == NodeKind::SingleInstruction ||
+ N->getKind() == NodeKind::MultiInstruction;
+ }
+ static bool classof(const SimpleDDGNode *N) { return true; }
+
+private:
+ /// Append the list of instructions in \p Input to this node.
+ void appendInstructions(const InstructionListType &Input) {
+ setKind((InstList.size() == 0 && Input.size() == 1)
+ ? NodeKind::SingleInstruction
+ : NodeKind::MultiInstruction);
+ InstList.insert(InstList.end(), Input.begin(), Input.end());
+ }
+ void appendInstructions(const SimpleDDGNode &Input) {
+ appendInstructions(Input.getInstructions());
+ }
+
+ /// List of instructions associated with a single or multi-instruction node.
+ SmallVector<Instruction *, 2> InstList;
+};
+
+/// Data Dependency Graph Edge.
+/// An edge in the DDG can represent a def-use relationship or
+/// a memory dependence based on the result of DependenceAnalysis.
+/// A rooted edge connects the root node to one of the components
+/// of the graph.
+class DDGEdge : public DDGEdgeBase {
+public:
+ /// The kind of edge in the DDG
+ enum class EdgeKind { Unknown, RegisterDefUse, MemoryDependence, Rooted };
+
+ explicit DDGEdge(DDGNode &N) = delete;
+ DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {}
+ DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {}
+ DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {}
+ DDGEdge &operator=(const DDGEdge &E) {
+ DDGEdgeBase::operator=(E);
+ Kind = E.Kind;
+ return *this;
+ }
+
+ DDGEdge &operator=(DDGEdge &&E) {
+ DDGEdgeBase::operator=(std::move(E));
+ Kind = E.Kind;
+ return *this;
+ }
+
+ /// Get the edge kind
+ EdgeKind getKind() const { return Kind; };
+
+ /// Return true if this is a def-use edge, and false otherwise.
+ bool isDefUse() const { return Kind == EdgeKind::RegisterDefUse; }
+
+ /// Return true if this is a memory dependence edge, and false otherwise.
+ bool isMemoryDependence() const { return Kind == EdgeKind::MemoryDependence; }
+
+ /// Return true if this is an edge stemming from the root node, and false
+ /// otherwise.
+ bool isRooted() const { return Kind == EdgeKind::Rooted; }
+
+private:
+ EdgeKind Kind;
+};
+
+/// Encapsulate some common data and functionality needed for different
+/// variations of data dependence graphs.
+template <typename NodeType> class DependenceGraphInfo {
+public:
+ using DependenceList = SmallVector<std::unique_ptr<Dependence>, 1>;
+
+ DependenceGraphInfo() = delete;
+ DependenceGraphInfo(const DependenceGraphInfo &G) = delete;
+ DependenceGraphInfo(const std::string &N, const DependenceInfo &DepInfo)
+ : Name(N), DI(DepInfo), Root(nullptr) {}
+ DependenceGraphInfo(DependenceGraphInfo &&G)
+ : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {}
+ virtual ~DependenceGraphInfo() {}
+
+ /// Return the label that is used to name this graph.
+ const StringRef getName() const { return Name; }
+
+ /// Return the root node of the graph.
+ NodeType &getRoot() const {
+ assert(Root && "Root node is not available yet. Graph construction may "
+ "still be in progress\n");
+ return *Root;
+ }
+
+protected:
+ // Name of the graph.
+ std::string Name;
+
+ // Store a copy of DependenceInfo in the graph, so that individual memory
+ // dependencies don't need to be stored. Instead when the dependence is
+ // queried it is recomputed using @DI.
+ const DependenceInfo DI;
+
+ // A special node in the graph that has an edge to every connected component of
+ // the graph, to ensure all nodes are reachable in a graph walk.
+ NodeType *Root = nullptr;
+};
+
+using DDGInfo = DependenceGraphInfo<DDGNode>;
+
+/// Data Dependency Graph
+class DataDependenceGraph : public DDGBase, public DDGInfo {
+ friend class DDGBuilder;
+
+public:
+ using NodeType = DDGNode;
+ using EdgeType = DDGEdge;
+
+ DataDependenceGraph() = delete;
+ DataDependenceGraph(const DataDependenceGraph &G) = delete;
+ DataDependenceGraph(DataDependenceGraph &&G)
+ : DDGBase(std::move(G)), DDGInfo(std::move(G)) {}
+ DataDependenceGraph(Function &F, DependenceInfo &DI);
+ DataDependenceGraph(const Loop &L, DependenceInfo &DI);
+ ~DataDependenceGraph();
+
+protected:
+ /// Add node \p N to the graph, if it's not added yet, and keep track of
+ /// the root node. Return true if node is successfully added.
+ bool addNode(NodeType &N);
+
+};
+
+/// Concrete implementation of a pure data dependence graph builder. This class
+/// provides custom implementation for the pure-virtual functions used in the
+/// generic dependence graph build algorithm.
+///
+/// For information about time complexity of the build algorithm see the
+/// comments near the declaration of AbstractDependenceGraphBuilder.
+class DDGBuilder : public AbstractDependenceGraphBuilder<DataDependenceGraph> {
+public:
+ DDGBuilder(DataDependenceGraph &G, DependenceInfo &D,
+ const BasicBlockListType &BBs)
+ : AbstractDependenceGraphBuilder(G, D, BBs) {}
+ DDGNode &createRootNode() final override {
+ auto *RN = new RootDDGNode();
+ assert(RN && "Failed to allocate memory for DDG root node.");
+ Graph.addNode(*RN);
+ return *RN;
+ }
+ DDGNode &createFineGrainedNode(Instruction &I) final override {
+ auto *SN = new SimpleDDGNode(I);
+ assert(SN && "Failed to allocate memory for simple DDG node.");
+ Graph.addNode(*SN);
+ return *SN;
+ }
+ DDGEdge &createDefUseEdge(DDGNode &Src, DDGNode &Tgt) final override {
+ auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::RegisterDefUse);
+ assert(E && "Failed to allocate memory for edge");
+ Graph.connect(Src, Tgt, *E);
+ return *E;
+ }
+ DDGEdge &createMemoryEdge(DDGNode &Src, DDGNode &Tgt) final override {
+ auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::MemoryDependence);
+ assert(E && "Failed to allocate memory for edge");
+ Graph.connect(Src, Tgt, *E);
+ return *E;
+ }
+ DDGEdge &createRootedEdge(DDGNode &Src, DDGNode &Tgt) final override {
+ auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::Rooted);
+ assert(E && "Failed to allocate memory for edge");
+ assert(isa<RootDDGNode>(Src) && "Expected root node");
+ Graph.connect(Src, Tgt, *E);
+ return *E;
+ }
+
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const DDGNode &N);
+raw_ostream &operator<<(raw_ostream &OS, const DDGNode::NodeKind K);
+raw_ostream &operator<<(raw_ostream &OS, const DDGEdge &E);
+raw_ostream &operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K);
+raw_ostream &operator<<(raw_ostream &OS, const DataDependenceGraph &G);
+
+//===--------------------------------------------------------------------===//
+// DDG Analysis Passes
+//===--------------------------------------------------------------------===//
+
+/// Analysis pass that builds the DDG for a loop.
+class DDGAnalysis : public AnalysisInfoMixin<DDGAnalysis> {
+public:
+ using Result = std::unique_ptr<DataDependenceGraph>;
+ Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR);
+
+private:
+ friend AnalysisInfoMixin<DDGAnalysis>;
+ static AnalysisKey Key;
+};
+
+/// Textual printer pass for the DDG of a loop.
+class DDGAnalysisPrinterPass : public PassInfoMixin<DDGAnalysisPrinterPass> {
+public:
+ explicit DDGAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+
+private:
+ raw_ostream &OS;
+};
+
+//===--------------------------------------------------------------------===//
+// GraphTraits specializations for the DDG
+//===--------------------------------------------------------------------===//
+
+/// non-const versions of the grapth trait specializations for DDG
+template <> struct GraphTraits<DDGNode *> {
+ using NodeRef = DDGNode *;
+
+ static DDGNode *DDGGetTargetNode(DGEdge<DDGNode, DDGEdge> *P) {
+ return &P->getTargetNode();
+ }
+
+ // Provide a mapped iterator so that the GraphTrait-based implementations can
+ // find the target nodes without having to explicitly go through the edges.
+ using ChildIteratorType =
+ mapped_iterator<DDGNode::iterator, decltype(&DDGGetTargetNode)>;
+ using ChildEdgeIteratorType = DDGNode::iterator;
+
+ static NodeRef getEntryNode(NodeRef N) { return N; }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return ChildIteratorType(N->begin(), &DDGGetTargetNode);
+ }
+ static ChildIteratorType child_end(NodeRef N) {
+ return ChildIteratorType(N->end(), &DDGGetTargetNode);
+ }
+
+ static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+ return N->begin();
+ }
+ static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); }
+};
+
+template <>
+struct GraphTraits<DataDependenceGraph *> : public GraphTraits<DDGNode *> {
+ using nodes_iterator = DataDependenceGraph::iterator;
+ static NodeRef getEntryNode(DataDependenceGraph *DG) {
+ return &DG->getRoot();
+ }
+ static nodes_iterator nodes_begin(DataDependenceGraph *DG) {
+ return DG->begin();
+ }
+ static nodes_iterator nodes_end(DataDependenceGraph *DG) { return DG->end(); }
+};
+
+/// const versions of the grapth trait specializations for DDG
+template <> struct GraphTraits<const DDGNode *> {
+ using NodeRef = const DDGNode *;
+
+ static const DDGNode *DDGGetTargetNode(const DGEdge<DDGNode, DDGEdge> *P) {
+ return &P->getTargetNode();
+ }
+
+ // Provide a mapped iterator so that the GraphTrait-based implementations can
+ // find the target nodes without having to explicitly go through the edges.
+ using ChildIteratorType =
+ mapped_iterator<DDGNode::const_iterator, decltype(&DDGGetTargetNode)>;
+ using ChildEdgeIteratorType = DDGNode::const_iterator;
+
+ static NodeRef getEntryNode(NodeRef N) { return N; }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return ChildIteratorType(N->begin(), &DDGGetTargetNode);
+ }
+ static ChildIteratorType child_end(NodeRef N) {
+ return ChildIteratorType(N->end(), &DDGGetTargetNode);
+ }
+
+ static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+ return N->begin();
+ }
+ static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); }
+};
+
+template <>
+struct GraphTraits<const DataDependenceGraph *>
+ : public GraphTraits<const DDGNode *> {
+ using nodes_iterator = DataDependenceGraph::const_iterator;
+ static NodeRef getEntryNode(const DataDependenceGraph *DG) {
+ return &DG->getRoot();
+ }
+ static nodes_iterator nodes_begin(const DataDependenceGraph *DG) {
+ return DG->begin();
+ }
+ static nodes_iterator nodes_end(const DataDependenceGraph *DG) {
+ return DG->end();
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DDG_H
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index 0410a3314659..c9e8df5db1c2 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -99,7 +99,7 @@ public:
errs() << "Writing '" << Filename << "'...";
- raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
std::string Title = GraphName + " for '" + F.getName().str() + "' function";
@@ -162,7 +162,7 @@ public:
errs() << "Writing '" << Filename << "'...";
- raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph);
if (!EC)
diff --git a/include/llvm/Analysis/DependenceGraphBuilder.h b/include/llvm/Analysis/DependenceGraphBuilder.h
new file mode 100644
index 000000000000..5f4bdb47043b
--- /dev/null
+++ b/include/llvm/Analysis/DependenceGraphBuilder.h
@@ -0,0 +1,119 @@
+//===- llvm/Analysis/DependenceGraphBuilder.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a builder interface that can be used to populate dependence
+// graphs such as DDG and PDG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+
+namespace llvm {
+
+/// This abstract builder class defines a set of high-level steps for creating
+/// DDG-like graphs. The client code is expected to inherit from this class and
+/// define concrete implementation for each of the pure virtual functions used
+/// in the high-level algorithm.
+template <class GraphType> class AbstractDependenceGraphBuilder {
+protected:
+ using BasicBlockListType = SmallVectorImpl<BasicBlock *>;
+
+private:
+ using NodeType = typename GraphType::NodeType;
+ using EdgeType = typename GraphType::EdgeType;
+
+public:
+ using ClassesType = EquivalenceClasses<BasicBlock *>;
+ using NodeListType = SmallVector<NodeType *, 4>;
+
+ AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D,
+ const BasicBlockListType &BBs)
+ : Graph(G), DI(D), BBList(BBs) {}
+ virtual ~AbstractDependenceGraphBuilder() {}
+
+ /// The main entry to the graph construction algorithm. It starts by
+ /// creating nodes in increasing order of granularity and then
+ /// adds def-use and memory edges.
+ ///
+ /// The algorithmic complexity of this implementation is O(V^2 * I^2), where V
+ /// is the number of vertecies (nodes) and I is the number of instructions in
+ /// each node. The total number of instructions, N, is equal to V * I,
+ /// therefore the worst-case time complexity is O(N^2). The average time
+ /// complexity is O((N^2)/2).
+ void populate() {
+ createFineGrainedNodes();
+ createDefUseEdges();
+ createMemoryDependencyEdges();
+ createAndConnectRootNode();
+ }
+
+ /// Create fine grained nodes. These are typically atomic nodes that
+ /// consist of a single instruction.
+ void createFineGrainedNodes();
+
+ /// Analyze the def-use chains and create edges from the nodes containing
+ /// definitions to the nodes containing the uses.
+ void createDefUseEdges();
+
+ /// Analyze data dependencies that exist between memory loads or stores,
+ /// in the graph nodes and create edges between them.
+ void createMemoryDependencyEdges();
+
+ /// Create a root node and add edges such that each node in the graph is
+ /// reachable from the root.
+ void createAndConnectRootNode();
+
+protected:
+ /// Create the root node of the graph.
+ virtual NodeType &createRootNode() = 0;
+
+ /// Create an atomic node in the graph given a single instruction.
+ virtual NodeType &createFineGrainedNode(Instruction &I) = 0;
+
+ /// Create a def-use edge going from \p Src to \p Tgt.
+ virtual EdgeType &createDefUseEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+ /// Create a memory dependence edge going from \p Src to \p Tgt.
+ virtual EdgeType &createMemoryEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+ /// Create a rooted edge going from \p Src to \p Tgt .
+ virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+ /// Deallocate memory of edge \p E.
+ virtual void destroyEdge(EdgeType &E) { delete &E; }
+
+ /// Deallocate memory of node \p N.
+ virtual void destroyNode(NodeType &N) { delete &N; }
+
+ /// Map types to map instructions to nodes used when populating the graph.
+ using InstToNodeMap = DenseMap<Instruction *, NodeType *>;
+
+ /// Reference to the graph that gets built by a concrete implementation of
+ /// this builder.
+ GraphType &Graph;
+
+ /// Dependence information used to create memory dependence edges in the
+ /// graph.
+ DependenceInfo &DI;
+
+ /// The list of basic blocks to consider when building the graph.
+ const BasicBlockListType &BBList;
+
+ /// A mapping from instructions to the corresponding nodes in the graph.
+ InstToNodeMap IMap;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h
index 3cfb9d13df94..2fac9c8b4b34 100644
--- a/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/include/llvm/Analysis/DivergenceAnalysis.h
@@ -73,9 +73,12 @@ public:
/// operands
bool isAlwaysUniform(const Value &Val) const;
- /// \brief Whether \p Val is a divergent value
+ /// \brief Whether \p Val is divergent at its definition.
bool isDivergent(const Value &Val) const;
+ /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent.
+ bool isDivergentUse(const Use &U) const;
+
void print(raw_ostream &OS, const Module *) const;
private:
@@ -189,12 +192,19 @@ public:
/// The GPU kernel this analysis result is for
const Function &getFunction() const { return DA.getFunction(); }
- /// Whether \p V is divergent.
+ /// Whether \p V is divergent at its definition.
bool isDivergent(const Value &V) const;
- /// Whether \p V is uniform/non-divergent
+ /// Whether \p U is divergent. Uses of a uniform value can be divergent.
+ bool isDivergentUse(const Use &U) const;
+
+ /// Whether \p V is uniform/non-divergent.
bool isUniform(const Value &V) const { return !isDivergent(V); }
+ /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
+ /// divergent.
+ bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
+
/// Print all divergent values in the kernel.
void print(raw_ostream &OS, const Module *) const;
};
diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h
index d3fcfc2d41ab..5d1c5a05206a 100644
--- a/include/llvm/Analysis/GlobalsModRef.h
+++ b/include/llvm/Analysis/GlobalsModRef.h
@@ -34,7 +34,7 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
class FunctionInfo;
const DataLayout &DL;
- const TargetLibraryInfo &TLI;
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
/// The globals that do not have their addresses taken.
SmallPtrSet<const GlobalValue *, 8> NonAddressTakenGlobals;
@@ -72,14 +72,18 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
/// could perform to the memory utilization here if this becomes a problem.
std::list<DeletionCallbackHandle> Handles;
- explicit GlobalsAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI);
+ explicit GlobalsAAResult(
+ const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
public:
GlobalsAAResult(GlobalsAAResult &&Arg);
~GlobalsAAResult();
- static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI,
- CallGraph &CG);
+ static GlobalsAAResult
+ analyzeModule(Module &M,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
+ CallGraph &CG);
//------------------------------------------------
// Implement the AliasAnalysis API
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 054ffca7215e..a5ffca13046b 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -31,6 +31,7 @@
#ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
#define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
+#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/User.h"
@@ -141,6 +142,13 @@ Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
const SimplifyQuery &Q);
+/// Given operands for the multiplication of a FMA, fold the result or return
+/// null. In contrast to SimplifyFMulInst, this function will not perform
+/// simplifications whose unrounded results differ when rounded to the argument
+/// type.
+Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF,
+ const SimplifyQuery &Q);
+
/// Given operands for a Mul, fold the result or return null.
Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
@@ -234,21 +242,19 @@ Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
/// Given operand for a UnaryOperator, fold the result or return null.
Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q);
-/// Given operand for an FP UnaryOperator, fold the result or return null.
-/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
-Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
- const SimplifyQuery &Q);
+/// Given operand for a UnaryOperator, fold the result or return null.
+/// Try to use FastMathFlags when folding the result.
+Value *SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q);
/// Given operands for a BinaryOperator, fold the result or return null.
Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const SimplifyQuery &Q);
-/// Given operands for an FP BinaryOperator, fold the result or return null.
-/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
-Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- FastMathFlags FMF, const SimplifyQuery &Q);
+/// Given operands for a BinaryOperator, fold the result or return null.
+/// Try to use FastMathFlags when folding the result.
+Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ FastMathFlags FMF, const SimplifyQuery &Q);
/// Given a callsite, fold the result or return null.
Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q);
@@ -263,12 +269,14 @@ Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
/// This first performs a normal RAUW of I with SimpleV. It then recursively
/// attempts to simplify those users updated by the operation. The 'I'
/// instruction must not be equal to the simplified value 'SimpleV'.
+/// If UnsimplifiedUsers is provided, instructions that could not be simplified
+/// are added to it.
///
/// The function returns true if any simplifications were performed.
-bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
- const TargetLibraryInfo *TLI = nullptr,
- const DominatorTree *DT = nullptr,
- AssumptionCache *AC = nullptr);
+bool replaceAndRecursivelySimplify(
+ Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI = nullptr,
+ const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr,
+ SmallSetVector<Instruction *, 8> *UnsimplifiedUsers = nullptr);
/// Recursively attempt to simplify an instruction.
///
diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h
index 2d83929211e2..20a35bef189b 100644
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@@ -931,7 +931,8 @@ public:
/// This sets up the graph and computes all of the entry points of the graph.
/// No function definitions are scanned until their nodes in the graph are
/// requested during traversal.
- LazyCallGraph(Module &M, TargetLibraryInfo &TLI);
+ LazyCallGraph(Module &M,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI);
LazyCallGraph(LazyCallGraph &&G);
LazyCallGraph &operator=(LazyCallGraph &&RHS);
@@ -1267,7 +1268,12 @@ public:
/// This just builds the set of entry points to the call graph. The rest is
/// built lazily as it is walked.
LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) {
- return LazyCallGraph(M, AM.getResult<TargetLibraryAnalysis>(M));
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+ return LazyCallGraph(M, GetTLI);
}
};
diff --git a/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
index 0a338b816640..e33b8f4129f3 100644
--- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -39,17 +39,18 @@ public:
void print(raw_ostream &OS, const Module *) const override;
// Returns true if V is divergent at its definition.
- //
- // Even if this function returns false, V may still be divergent when used
- // in a different basic block.
bool isDivergent(const Value *V) const;
+ // Returns true if U is divergent. Uses of a uniform value can be divergent.
+ bool isDivergentUse(const Use *U) const;
+
// Returns true if V is uniform/non-divergent.
- //
- // Even if this function returns true, V may still be divergent when used
- // in a different basic block.
bool isUniform(const Value *V) const { return !isDivergent(V); }
+ // Returns true if U is uniform/non-divergent. Uses of a uniform value can be
+ // divergent.
+ bool isUniformUse(const Use *U) const { return !isDivergentUse(U); }
+
// Keep the analysis results uptodate by removing an erased value.
void removeValue(const Value *V) { DivergentValues.erase(V); }
@@ -62,6 +63,9 @@ private:
// Stores all divergent values.
DenseSet<const Value *> DivergentValues;
+
+ // Stores divergent uses of possibly uniform values.
+ DenseSet<const Use *> DivergentUses;
};
} // End llvm namespace
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index 5df6bb02308d..9604b2521e89 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -20,7 +20,9 @@
namespace llvm {
class DataLayout;
+class Loop;
class MDNode;
+class ScalarEvolution;
/// Return true if this is always a dereferenceable pointer. If the context
/// instruction is specified perform context-sensitive analysis and return true
@@ -35,7 +37,8 @@ bool isDereferenceablePointer(const Value *V, Type *Ty,
/// performs context-sensitive analysis and returns true if the pointer is
/// dereferenceable at the specified instruction.
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
- unsigned Align, const DataLayout &DL,
+ MaybeAlign Alignment,
+ const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr);
@@ -43,7 +46,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
/// greater or equal than requested. If the context instruction is specified
/// performs context-sensitive analysis and returns true if the pointer is
/// dereferenceable at the specified instruction.
-bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
const APInt &Size, const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr);
@@ -56,11 +59,22 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
/// If it is not obviously safe to load from the specified pointer, we do a
/// quick local scan of the basic block containing ScanFrom, to determine if
/// the address is already accessed.
-bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
+bool isSafeToLoadUnconditionally(Value *V, MaybeAlign Alignment, APInt &Size,
const DataLayout &DL,
Instruction *ScanFrom = nullptr,
const DominatorTree *DT = nullptr);
+/// Return true if we can prove that the given load (which is assumed to be
+/// within the specified loop) would access only dereferenceable memory, and
+/// be properly aligned on every iteration of the specified loop regardless of
+/// its placement within the loop. (i.e. does not require predication beyond
+/// that required by the the header itself and could be hoisted into the header
+/// if desired.) This is more powerful than the variants above when the
+/// address loaded from is analyzeable by SCEV.
+bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
+ ScalarEvolution &SE,
+ DominatorTree &DT);
+
/// Return true if we know that executing a load from this value cannot trap.
///
/// If DT and ScanFrom are specified this method performs context-sensitive
@@ -69,7 +83,7 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
/// If it is not obviously safe to load from the specified pointer, we do a
/// quick local scan of the basic block containing ScanFrom, to determine if
/// the address is already accessed.
-bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
+bool isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment,
const DataLayout &DL,
Instruction *ScanFrom = nullptr,
const DominatorTree *DT = nullptr);
diff --git a/include/llvm/Analysis/LoopAnalysisManager.h b/include/llvm/Analysis/LoopAnalysisManager.h
index 368a810cfa67..a2e65a7310af 100644
--- a/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/include/llvm/Analysis/LoopAnalysisManager.h
@@ -86,8 +86,9 @@ typedef InnerAnalysisManagerProxy<LoopAnalysisManager, Function>
template <> class LoopAnalysisManagerFunctionProxy::Result {
public:
explicit Result(LoopAnalysisManager &InnerAM, LoopInfo &LI)
- : InnerAM(&InnerAM), LI(&LI) {}
- Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI) {
+ : InnerAM(&InnerAM), LI(&LI), MSSAUsed(false) {}
+ Result(Result &&Arg)
+ : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI), MSSAUsed(Arg.MSSAUsed) {
// We have to null out the analysis manager in the moved-from state
// because we are taking ownership of the responsibilty to clear the
// analysis state.
@@ -96,6 +97,7 @@ public:
Result &operator=(Result &&RHS) {
InnerAM = RHS.InnerAM;
LI = RHS.LI;
+ MSSAUsed = RHS.MSSAUsed;
// We have to null out the analysis manager in the moved-from state
// because we are taking ownership of the responsibilty to clear the
// analysis state.
@@ -112,6 +114,9 @@ public:
InnerAM->clear();
}
+ /// Mark MemorySSA as used so we can invalidate self if MSSA is invalidated.
+ void markMSSAUsed() { MSSAUsed = true; }
+
/// Accessor for the analysis manager.
LoopAnalysisManager &getManager() { return *InnerAM; }
@@ -130,6 +135,7 @@ public:
private:
LoopAnalysisManager *InnerAM;
LoopInfo *LI;
+ bool MSSAUsed;
};
/// Provide a specialized run method for the \c LoopAnalysisManagerFunctionProxy
diff --git a/include/llvm/Analysis/LoopCacheAnalysis.h b/include/llvm/Analysis/LoopCacheAnalysis.h
new file mode 100644
index 000000000000..ffec78b6db2c
--- /dev/null
+++ b/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -0,0 +1,281 @@
+//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the interface for the loop cache analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
+#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class LPMUpdater;
+using CacheCostTy = int64_t;
+using LoopVectorTy = SmallVector<Loop *, 8>;
+
+/// Represents a memory reference as a base pointer and a set of indexing
+/// operations. For example given the array reference A[i][2j+1][3k+2] in a
+/// 3-dim loop nest:
+/// for(i=0;i<n;++i)
+/// for(j=0;j<m;++j)
+/// for(k=0;k<o;++k)
+/// ... A[i][2j+1][3k+2] ...
+/// We expect:
+/// BasePointer -> A
+/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
+/// Sizes -> [m][o][4]
+class IndexedReference {
+ friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
+
+public:
+ /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
+ IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
+ ScalarEvolution &SE);
+
+ bool isValid() const { return IsValid; }
+ const SCEV *getBasePointer() const { return BasePointer; }
+ size_t getNumSubscripts() const { return Subscripts.size(); }
+ const SCEV *getSubscript(unsigned SubNum) const {
+ assert(SubNum < getNumSubscripts() && "Invalid subscript number");
+ return Subscripts[SubNum];
+ }
+ const SCEV *getFirstSubscript() const {
+ assert(!Subscripts.empty() && "Expecting non-empty container");
+ return Subscripts.front();
+ }
+ const SCEV *getLastSubscript() const {
+ assert(!Subscripts.empty() && "Expecting non-empty container");
+ return Subscripts.back();
+ }
+
+ /// Return true/false if the current object and the indexed reference \p Other
+ /// are/aren't in the same cache line of size \p CLS. Two references are in
+ /// the same chace line iff the distance between them in the innermost
+ /// dimension is less than the cache line size. Return None if unsure.
+ Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
+ AliasAnalysis &AA) const;
+
+ /// Return true if the current object and the indexed reference \p Other
+ /// have distance smaller than \p MaxDistance in the dimension associated with
+ /// the given loop \p L. Return false if the distance is not smaller than \p
+ /// MaxDistance and None if unsure.
+ Optional<bool> hasTemporalReuse(const IndexedReference &Other,
+ unsigned MaxDistance, const Loop &L,
+ DependenceInfo &DI, AliasAnalysis &AA) const;
+
+ /// Compute the cost of the reference w.r.t. the given loop \p L when it is
+ /// considered in the innermost position in the loop nest.
+ /// The cost is defined as:
+ /// - equal to one if the reference is loop invariant, or
+ /// - equal to '(TripCount * stride) / cache_line_size' if:
+ /// + the reference stride is less than the cache line size, and
+ /// + the coefficient of this loop's index variable used in all other
+ /// subscripts is zero
+ /// - or otherwise equal to 'TripCount'.
+ CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
+
+private:
+ /// Attempt to delinearize the indexed reference.
+ bool delinearize(const LoopInfo &LI);
+
+ /// Return true if the index reference is invariant with respect to loop \p L.
+ bool isLoopInvariant(const Loop &L) const;
+
+ /// Return true if the indexed reference is 'consecutive' in loop \p L.
+ /// An indexed reference is 'consecutive' if the only coefficient that uses
+ /// the loop induction variable is the rightmost one, and the access stride is
+ /// smaller than the cache line size \p CLS.
+ bool isConsecutive(const Loop &L, unsigned CLS) const;
+
+ /// Return the coefficient used in the rightmost dimension.
+ const SCEV *getLastCoefficient() const;
+
+ /// Return true if the coefficient corresponding to induction variable of
+ /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
+ bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
+ const Loop &L) const;
+
+ /// Verify that the given \p Subscript is 'well formed' (must be a simple add
+ /// recurrence).
+ bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
+
+ /// Return true if the given reference \p Other is definetely aliased with
+ /// the indexed reference represented by this class.
+ bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const;
+
+private:
+ /// True if the reference can be delinearized, false otherwise.
+ bool IsValid = false;
+
+ /// Represent the memory reference instruction.
+ Instruction &StoreOrLoadInst;
+
+ /// The base pointer of the memory reference.
+ const SCEV *BasePointer = nullptr;
+
+ /// The subscript (indexes) of the memory reference.
+ SmallVector<const SCEV *, 3> Subscripts;
+
+ /// The dimensions of the memory reference.
+ SmallVector<const SCEV *, 3> Sizes;
+
+ ScalarEvolution &SE;
+};
+
+/// A reference group represents a set of memory references that exhibit
+/// temporal or spacial reuse. Two references belong to the same
+/// reference group with respect to a inner loop L iff:
+/// 1. they have a loop independent dependency, or
+/// 2. they have a loop carried dependence with a small dependence distance
+/// (e.g. less than 2) carried by the inner loop, or
+/// 3. they refer to the same array, and the subscript in their innermost
+/// dimension is less than or equal to 'd' (where 'd' is less than the cache
+/// line size)
+///
+/// Intuitively a reference group represents memory references that access
+/// the same cache line. Conditions 1,2 above account for temporal reuse, while
+/// contition 3 accounts for spacial reuse.
+using ReferenceGroupTy = SmallVector<std::unique_ptr<IndexedReference>, 8>;
+using ReferenceGroupsTy = SmallVector<ReferenceGroupTy, 8>;
+
+/// \c CacheCost represents the estimated cost of a inner loop as the number of
+/// cache lines used by the memory references it contains.
+/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
+/// the cache costs of all of its reference groups when the loop is considered
+/// to be in the innermost position in the nest.
+/// A reference group represents memory references that fall into the same cache
+/// line. Each reference group is analysed with respect to the innermost loop in
+/// a loop nest. The cost of a reference is defined as follow:
+/// - one if it is loop invariant w.r.t the innermost loop,
+/// - equal to the loop trip count divided by the cache line times the
+/// reference stride if the reference stride is less than the cache line
+/// size (CLS), and the coefficient of this loop's index variable used in all
+/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
+/// - equal to the innermost loop trip count if the reference stride is greater
+/// or equal to the cache line size CLS.
+class CacheCost {
+ friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
+ using LoopTripCountTy = std::pair<const Loop *, unsigned>;
+ using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
+
+public:
+ static CacheCostTy constexpr InvalidCost = -1;
+
+ /// Construct a CacheCost object for the loop nest described by \p Loops.
+ /// The optional parameter \p TRT can be used to specify the max. distance
+ /// between array elements accessed in a loop so that the elements are
+ /// classified to have temporal reuse.
+ CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
+ TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI,
+ Optional<unsigned> TRT = None);
+
+ /// Create a CacheCost for the loop nest rooted by \p Root.
+ /// The optional parameter \p TRT can be used to specify the max. distance
+ /// between array elements accessed in a loop so that the elements are
+ /// classified to have temporal reuse.
+ static std::unique_ptr<CacheCost>
+ getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI,
+ Optional<unsigned> TRT = None);
+
+ /// Return the estimated cost of loop \p L if the given loop is part of the
+ /// loop nest associated with this object. Return -1 otherwise.
+ CacheCostTy getLoopCost(const Loop &L) const {
+ auto IT = std::find_if(
+ LoopCosts.begin(), LoopCosts.end(),
+ [&L](const LoopCacheCostTy &LCC) { return LCC.first == &L; });
+ return (IT != LoopCosts.end()) ? (*IT).second : -1;
+ }
+
+ /// Return the estimated ordered loop costs.
+ const ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
+
+private:
+ /// Calculate the cache footprint of each loop in the nest (when it is
+ /// considered to be in the innermost position).
+ void calculateCacheFootprint();
+
+ /// Partition store/load instructions in the loop nest into reference groups.
+ /// Two or more memory accesses belong in the same reference group if they
+ /// share the same cache line.
+ bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
+
+ /// Calculate the cost of the given loop \p L assuming it is the innermost
+ /// loop in nest.
+ CacheCostTy computeLoopCacheCost(const Loop &L,
+ const ReferenceGroupsTy &RefGroups) const;
+
+ /// Compute the cost of a representative reference in reference group \p RG
+ /// when the given loop \p L is considered as the innermost loop in the nest.
+ /// The computed cost is an estimate for the number of cache lines used by the
+ /// reference group. The representative reference cost is defined as:
+ /// - equal to one if the reference is loop invariant, or
+ /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
+ /// induction variable is used only in the reference subscript associated
+ /// with loop \p L, and (b) the reference stride is less than the cache
+ /// line size, or
+ /// - TripCount otherwise
+ CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
+ const Loop &L) const;
+
+ /// Sort the LoopCosts vector by decreasing cache cost.
+ void sortLoopCosts() {
+ sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
+ return A.second > B.second;
+ });
+ }
+
+private:
+ /// Loops in the loop nest associated with this object.
+ LoopVectorTy Loops;
+
+ /// Trip counts for the loops in the loop nest associated with this object.
+ SmallVector<LoopTripCountTy, 3> TripCounts;
+
+ /// Cache costs for the loops in the loop nest associated with this object.
+ SmallVector<LoopCacheCostTy, 3> LoopCosts;
+
+ /// The max. distance between array elements accessed in a loop so that the
+ /// elements are classified to have temporal reuse.
+ Optional<unsigned> TRT;
+
+ const LoopInfo &LI;
+ ScalarEvolution &SE;
+ TargetTransformInfo &TTI;
+ AliasAnalysis &AA;
+ DependenceInfo &DI;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
+raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
+
+/// Printer pass for the \c CacheCost results.
+class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 584eb3a8c854..abf3863b0601 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -30,6 +30,9 @@
// instance. In particular, a Loop might be inside such a non-loop SCC, or a
// non-loop SCC might contain a sub-SCC which is a Loop.
//
+// For an overview of terminology used in this API (and thus all of our loop
+// analyses or transforms), see docs/LoopTerminology.rst.
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_LOOPINFO_H
@@ -570,9 +573,9 @@ public:
bool getIncomingAndBackEdge(BasicBlock *&Incoming,
BasicBlock *&Backedge) const;
- /// Below are some utilities to get loop bounds and induction variable, and
- /// check if a given phinode is an auxiliary induction variable, as well as
- /// checking if the loop is canonical.
+ /// Below are some utilities to get the loop guard, loop bounds and induction
+ /// variable, and to check if a given phinode is an auxiliary induction
+ /// variable, if the loop is guarded, and if the loop is canonical.
///
/// Here is an example:
/// \code
@@ -604,6 +607,9 @@ public:
///
/// - getInductionVariable --> i_1
/// - isAuxiliaryInductionVariable(x) --> true if x == i_1
+ /// - getLoopGuardBranch()
+ /// --> `if (guardcmp) goto preheader; else goto afterloop`
+ /// - isGuarded() --> true
/// - isCanonical --> false
struct LoopBounds {
/// Return the LoopBounds object if
@@ -725,6 +731,31 @@ public:
bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
ScalarEvolution &SE) const;
+ /// Return the loop guard branch, if it exists.
+ ///
+ /// This currently only works on simplified loop, as it requires a preheader
+ /// and a latch to identify the guard. It will work on loops of the form:
+ /// \code
+ /// GuardBB:
+ /// br cond1, Preheader, ExitSucc <== GuardBranch
+ /// Preheader:
+ /// br Header
+ /// Header:
+ /// ...
+ /// br Latch
+ /// Latch:
+ /// br cond2, Header, ExitBlock
+ /// ExitBlock:
+ /// br ExitSucc
+ /// ExitSucc:
+ /// \endcode
+ BranchInst *getLoopGuardBranch() const;
+
+ /// Return true iff the loop is
+ /// - in simplify rotated form, and
+ /// - guarded by a loop guard branch.
+ bool isGuarded() const { return (getLoopGuardBranch() != nullptr); }
+
/// Return true if the loop induction variable starts at zero and increments
/// by one each time through the loop.
bool isCanonical(ScalarEvolution &SE) const;
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 4c33dac9e21e..8b11e848a195 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -85,9 +85,9 @@ template <class BlockT, class LoopT>
bool LoopBase<BlockT, LoopT>::hasDedicatedExits() const {
// Each predecessor of each exit block of a normal loop is contained
// within the loop.
- SmallVector<BlockT *, 4> ExitBlocks;
- getExitBlocks(ExitBlocks);
- for (BlockT *EB : ExitBlocks)
+ SmallVector<BlockT *, 4> UniqueExitBlocks;
+ getUniqueExitBlocks(UniqueExitBlocks);
+ for (BlockT *EB : UniqueExitBlocks)
for (BlockT *Predecessor : children<Inverse<BlockT *>>(EB))
if (!contains(Predecessor))
return false;
@@ -200,8 +200,6 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const {
}
}
- // Make sure there is only one exit out of the preheader.
- assert(Out && "Header of loop has no predecessors from outside loop?");
return Out;
}
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index 49f9e58ffad7..a89d76b9e5bd 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -58,6 +58,9 @@ class Value;
/// like).
bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
+bool isAllocationFn(const Value *V,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ bool LookThroughBitCast = false);
/// Tests if a value is a call or invoke to a function that returns a
/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions).
@@ -68,6 +71,9 @@ bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI,
/// allocates uninitialized memory (such as malloc).
bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
+bool isMallocLikeFn(const Value *V,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ bool LookThroughBitCast = false);
/// Tests if a value is a call or invoke to a library function that
/// allocates zero-filled memory (such as calloc).
@@ -93,6 +99,16 @@ bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
/// reallocates memory (e.g., realloc).
bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI);
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory and throws if an allocation failed (e.g., new).
+bool isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast = false);
+
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory (strdup, strndup).
+bool isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast = false);
+
//===----------------------------------------------------------------------===//
// malloc Call Utility Functions.
//
@@ -100,9 +116,13 @@ bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI);
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.
-const CallInst *extractMallocCall(const Value *I, const TargetLibraryInfo *TLI);
-inline CallInst *extractMallocCall(Value *I, const TargetLibraryInfo *TLI) {
- return const_cast<CallInst*>(extractMallocCall((const Value*)I, TLI));
+const CallInst *
+extractMallocCall(const Value *I,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
+inline CallInst *
+extractMallocCall(Value *I,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+ return const_cast<CallInst *>(extractMallocCall((const Value *)I, GetTLI));
}
/// getMallocType - Returns the PointerType resulting from the malloc call.
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index e2669c2fa601..e89e5690fad0 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -362,11 +362,14 @@ private:
PhiValues &PV;
PredIteratorCache PredCache;
+ unsigned DefaultBlockScanLimit;
+
public:
MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC,
- const TargetLibraryInfo &TLI,
- DominatorTree &DT, PhiValues &PV)
- : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV) {}
+ const TargetLibraryInfo &TLI, DominatorTree &DT,
+ PhiValues &PV, unsigned DefaultBlockScanLimit)
+ : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV),
+ DefaultBlockScanLimit(DefaultBlockScanLimit) {}
/// Handle invalidation in the new PM.
bool invalidate(Function &F, const PreservedAnalyses &PA,
@@ -511,9 +514,14 @@ class MemoryDependenceAnalysis
static AnalysisKey Key;
+ unsigned DefaultBlockScanLimit;
+
public:
using Result = MemoryDependenceResults;
+ MemoryDependenceAnalysis();
+ MemoryDependenceAnalysis(unsigned DefaultBlockScanLimit) : DefaultBlockScanLimit(DefaultBlockScanLimit) { }
+
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h
index b7730be75354..e89bf26a7234 100644
--- a/include/llvm/Analysis/MemorySSA.h
+++ b/include/llvm/Analysis/MemorySSA.h
@@ -793,6 +793,7 @@ protected:
friend class MemorySSAPrinterLegacyPass;
friend class MemorySSAUpdater;
+ void verifyPrevDefInPhis(Function &F) const;
void verifyDefUses(Function &F) const;
void verifyDomination(Function &F) const;
void verifyOrdering(Function &F) const;
@@ -830,7 +831,8 @@ protected:
void insertIntoListsBefore(MemoryAccess *, const BasicBlock *,
AccessList::iterator);
MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *,
- const MemoryUseOrDef *Template = nullptr);
+ const MemoryUseOrDef *Template = nullptr,
+ bool CreationMustSucceed = true);
private:
template <class AliasAnalysisType> class ClobberWalkerBase;
diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h
index d4d8040c1ff6..1d34663721e3 100644
--- a/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/include/llvm/Analysis/MemorySSAUpdater.h
@@ -99,7 +99,7 @@ public:
/// load a
/// Where a mayalias b, *does* require RenameUses be set to true.
void insertDef(MemoryDef *Def, bool RenameUses = false);
- void insertUse(MemoryUse *Use);
+ void insertUse(MemoryUse *Use, bool RenameUses = false);
/// Update the MemoryPhi in `To` following an edge deletion between `From` and
/// `To`. If `To` becomes unreachable, a call to removeBlocks should be made.
void removeEdge(BasicBlock *From, BasicBlock *To);
@@ -275,6 +275,7 @@ private:
getPreviousDefRecursive(BasicBlock *,
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
MemoryAccess *recursePhi(MemoryAccess *Phi);
+ MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi);
template <class RangeType>
MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
void tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs);
diff --git a/include/llvm/Analysis/MustExecute.h b/include/llvm/Analysis/MustExecute.h
index 3ef539c89d97..87cf9f85c7f1 100644
--- a/include/llvm/Analysis/MustExecute.h
+++ b/include/llvm/Analysis/MustExecute.h
@@ -7,10 +7,17 @@
//===----------------------------------------------------------------------===//
/// \file
/// Contains a collection of routines for determining if a given instruction is
-/// guaranteed to execute if a given point in control flow is reached. The most
+/// guaranteed to execute if a given point in control flow is reached. The most
/// common example is an instruction within a loop being provably executed if we
/// branch to the header of it's containing loop.
///
+/// There are two interfaces available to determine if an instruction is
+/// executed once a given point in the control flow is reached:
+/// 1) A loop-centric one derived from LoopSafetyInfo.
+/// 2) A "must be executed context"-based one implemented in the
+/// MustBeExecutedContextExplorer.
+/// Please refer to the class comments for more information.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_MUSTEXECUTE_H
@@ -164,6 +171,280 @@ public:
virtual ~ICFLoopSafetyInfo() {};
};
-}
+struct MustBeExecutedContextExplorer;
+
+/// Must be executed iterators visit stretches of instructions that are
+/// guaranteed to be executed together, potentially with other instruction
+/// executed in-between.
+///
+/// Given the following code, and assuming all statements are single
+/// instructions which transfer execution to the successor (see
+/// isGuaranteedToTransferExecutionToSuccessor), there are two possible
+/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B,
+/// and E. If we start at C or D, we will visit all instructions A-E.
+///
+/// \code
+/// A;
+/// B;
+/// if (...) {
+/// C;
+/// D;
+/// }
+/// E;
+/// \endcode
+///
+///
+/// Below is the example extneded with instructions F and G. Now we assume F
+/// might not transfer execution to it's successor G. As a result we get the
+/// following visit sets:
+///
+/// Start Instruction | Visit Set
+/// A | A, B, E, F
+/// B | A, B, E, F
+/// C | A, B, C, D, E, F
+/// D | A, B, C, D, E, F
+/// E | A, B, E, F
+/// F | A, B, E, F
+/// G | A, B, E, F, G
+///
+///
+/// \code
+/// A;
+/// B;
+/// if (...) {
+/// C;
+/// D;
+/// }
+/// E;
+/// F; // Might not transfer execution to its successor G.
+/// G;
+/// \endcode
+///
+///
+/// A more complex example involving conditionals, loops, break, and continue
+/// is shown below. We again assume all instructions will transmit control to
+/// the successor and we assume we can prove the inner loop to be finite. We
+/// omit non-trivial branch conditions as the exploration is oblivious to them.
+/// Constant branches are assumed to be unconditional in the CFG. The resulting
+/// visist sets are shown in the table below.
+///
+/// \code
+/// A;
+/// while (true) {
+/// B;
+/// if (...)
+/// C;
+/// if (...)
+/// continue;
+/// D;
+/// if (...)
+/// break;
+/// do {
+/// if (...)
+/// continue;
+/// E;
+/// } while (...);
+/// F;
+/// }
+/// G;
+/// \endcode
+///
+/// Start Instruction | Visit Set
+/// A | A, B
+/// B | A, B
+/// C | A, B, C
+/// D | A, B, D
+/// E | A, B, D, E, F
+/// F | A, B, D, F
+/// G | A, B, D, G
+///
+///
+/// Note that the examples show optimal visist sets but not necessarily the ones
+/// derived by the explorer depending on the available CFG analyses (see
+/// MustBeExecutedContextExplorer). Also note that we, depending on the options,
+/// the visit set can contain instructions from other functions.
+struct MustBeExecutedIterator {
+ /// Type declarations that make his class an input iterator.
+ ///{
+ typedef const Instruction *value_type;
+ typedef std::ptrdiff_t difference_type;
+ typedef const Instruction **pointer;
+ typedef const Instruction *&reference;
+ typedef std::input_iterator_tag iterator_category;
+ ///}
+
+ using ExplorerTy = MustBeExecutedContextExplorer;
+
+ MustBeExecutedIterator(const MustBeExecutedIterator &Other)
+ : Visited(Other.Visited), Explorer(Other.Explorer),
+ CurInst(Other.CurInst) {}
+
+ MustBeExecutedIterator(MustBeExecutedIterator &&Other)
+ : Visited(std::move(Other.Visited)), Explorer(Other.Explorer),
+ CurInst(Other.CurInst) {}
+
+ MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) {
+ if (this != &Other) {
+ std::swap(Visited, Other.Visited);
+ std::swap(CurInst, Other.CurInst);
+ }
+ return *this;
+ }
+
+ ~MustBeExecutedIterator() {}
+
+ /// Pre- and post-increment operators.
+ ///{
+ MustBeExecutedIterator &operator++() {
+ CurInst = advance();
+ return *this;
+ }
+
+ MustBeExecutedIterator operator++(int) {
+ MustBeExecutedIterator tmp(*this);
+ operator++();
+ return tmp;
+ }
+ ///}
+
+ /// Equality and inequality operators. Note that we ignore the history here.
+ ///{
+ bool operator==(const MustBeExecutedIterator &Other) const {
+ return CurInst == Other.CurInst;
+ }
+
+ bool operator!=(const MustBeExecutedIterator &Other) const {
+ return !(*this == Other);
+ }
+ ///}
+
+ /// Return the underlying instruction.
+ const Instruction *&operator*() { return CurInst; }
+ const Instruction *getCurrentInst() const { return CurInst; }
+
+ /// Return true if \p I was encountered by this iterator already.
+ bool count(const Instruction *I) const { return Visited.count(I); }
+
+private:
+ using VisitedSetTy = DenseSet<const Instruction *>;
+
+ /// Private constructors.
+ MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I);
+
+ /// Reset the iterator to its initial state pointing at \p I.
+ void reset(const Instruction *I);
+
+ /// Try to advance one of the underlying positions (Head or Tail).
+ ///
+ /// \return The next instruction in the must be executed context, or nullptr
+ /// if none was found.
+ const Instruction *advance();
+
+ /// A set to track the visited instructions in order to deal with endless
+ /// loops and recursion.
+ VisitedSetTy Visited;
+
+ /// A reference to the explorer that created this iterator.
+ ExplorerTy &Explorer;
+
+ /// The instruction we are currently exposing to the user. There is always an
+ /// instruction that we know is executed with the given program point,
+ /// initially the program point itself.
+ const Instruction *CurInst;
+
+ friend struct MustBeExecutedContextExplorer;
+};
+
+/// A "must be executed context" for a given program point PP is the set of
+/// instructions, potentially before and after PP, that are executed always when
+/// PP is reached. The MustBeExecutedContextExplorer an interface to explore
+/// "must be executed contexts" in a module through the use of
+/// MustBeExecutedIterator.
+///
+/// The explorer exposes "must be executed iterators" that traverse the must be
+/// executed context. There is little information sharing between iterators as
+/// the expected use case involves few iterators for "far apart" instructions.
+/// If that changes, we should consider caching more intermediate results.
+struct MustBeExecutedContextExplorer {
+
+ /// In the description of the parameters we use PP to denote a program point
+ /// for which the must be executed context is explored, or put differently,
+ /// for which the MustBeExecutedIterator is created.
+ ///
+ /// \param ExploreInterBlock Flag to indicate if instructions in blocks
+ /// other than the parent of PP should be
+ /// explored.
+ MustBeExecutedContextExplorer(bool ExploreInterBlock)
+ : ExploreInterBlock(ExploreInterBlock), EndIterator(*this, nullptr) {}
+
+ /// Clean up the dynamically allocated iterators.
+ ~MustBeExecutedContextExplorer() {
+ DeleteContainerSeconds(InstructionIteratorMap);
+ }
+
+ /// Iterator-based interface. \see MustBeExecutedIterator.
+ ///{
+ using iterator = MustBeExecutedIterator;
+ using const_iterator = const MustBeExecutedIterator;
+
+ /// Return an iterator to explore the context around \p PP.
+ iterator &begin(const Instruction *PP) {
+ auto *&It = InstructionIteratorMap[PP];
+ if (!It)
+ It = new iterator(*this, PP);
+ return *It;
+ }
+
+ /// Return an iterator to explore the cached context around \p PP.
+ const_iterator &begin(const Instruction *PP) const {
+ return *InstructionIteratorMap.lookup(PP);
+ }
+
+ /// Return an universal end iterator.
+ ///{
+ iterator &end() { return EndIterator; }
+ iterator &end(const Instruction *) { return EndIterator; }
+
+ const_iterator &end() const { return EndIterator; }
+ const_iterator &end(const Instruction *) const { return EndIterator; }
+ ///}
+
+ /// Return an iterator range to explore the context around \p PP.
+ llvm::iterator_range<iterator> range(const Instruction *PP) {
+ return llvm::make_range(begin(PP), end(PP));
+ }
+
+ /// Return an iterator range to explore the cached context around \p PP.
+ llvm::iterator_range<const_iterator> range(const Instruction *PP) const {
+ return llvm::make_range(begin(PP), end(PP));
+ }
+ ///}
+
+ /// Return the next instruction that is guaranteed to be executed after \p PP.
+ ///
+ /// \param It The iterator that is used to traverse the must be
+ /// executed context.
+ /// \param PP The program point for which the next instruction
+ /// that is guaranteed to execute is determined.
+ const Instruction *
+ getMustBeExecutedNextInstruction(MustBeExecutedIterator &It,
+ const Instruction *PP);
+
+ /// Parameter that limit the performed exploration. See the constructor for
+ /// their meaning.
+ ///{
+ const bool ExploreInterBlock;
+ ///}
+
+private:
+ /// Map from instructions to associated must be executed iterators.
+ DenseMap<const Instruction *, MustBeExecutedIterator *>
+ InstructionIteratorMap;
+
+ /// A unique end iterator.
+ MustBeExecutedIterator EndIterator;
+};
+
+} // namespace llvm
#endif
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index d9c97dff8c6e..8562519fa7b1 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -103,6 +103,13 @@ namespace llvm {
//
FunctionPass *createMustExecutePrinter();
+ //===--------------------------------------------------------------------===//
+ //
+ // createMustBeExecutedContextPrinter - This pass prints information about which
+ // instructions are guaranteed to execute together (run with -analyze).
+ //
+ ModulePass *createMustBeExecutedContextPrinter();
+
}
#endif
diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h
index f309d344b8d1..6693e40ccf22 100644
--- a/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -52,6 +52,15 @@ private:
// because the number of profile counts required to reach the hot
// percentile is above a huge threshold.
Optional<bool> HasHugeWorkingSetSize;
+ // True if the working set size of the code is considered large,
+ // because the number of profile counts required to reach the hot
+ // percentile is above a large threshold.
+ Optional<bool> HasLargeWorkingSetSize;
+ // Compute the threshold for a given cutoff.
+ Optional<uint64_t> computeThreshold(int PercentileCutoff);
+ // The map that caches the threshold values. The keys are the percentile
+ // cutoff values and the values are the corresponding threshold values.
+ DenseMap<int, uint64_t> ThresholdCache;
public:
ProfileSummaryInfo(Module &M) : M(M) {}
@@ -96,6 +105,8 @@ public:
bool AllowSynthetic = false);
/// Returns true if the working set size of the code is considered huge.
bool hasHugeWorkingSetSize();
+ /// Returns true if the working set size of the code is considered large.
+ bool hasLargeWorkingSetSize();
/// Returns true if \p F has hot function entry.
bool isFunctionEntryHot(const Function *F);
/// Returns true if \p F contains hot code.
@@ -104,14 +115,26 @@ public:
bool isFunctionEntryCold(const Function *F);
/// Returns true if \p F contains only cold code.
bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
+ /// Returns true if \p F contains hot code with regard to a given hot
+ /// percentile cutoff value.
+ bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff,
+ const Function *F,
+ BlockFrequencyInfo &BFI);
/// Returns true if count \p C is considered hot.
bool isHotCount(uint64_t C);
/// Returns true if count \p C is considered cold.
bool isColdCount(uint64_t C);
+ /// Returns true if count \p C is considered hot with regard to a given
+ /// hot percentile cutoff value.
+ bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C);
/// Returns true if BasicBlock \p BB is considered hot.
bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI);
/// Returns true if BasicBlock \p BB is considered cold.
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI);
+ /// Returns true if BasicBlock \p BB is considered hot with regard to a given
+ /// hot percentile cutoff value.
+ bool isHotBlockNthPercentile(int PercentileCutoff,
+ const BasicBlock *BB, BlockFrequencyInfo *BFI);
/// Returns true if CallSite \p CS is considered hot.
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
/// Returns true if Callsite \p CS is considered cold.
diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h
index c59c09dd2095..6b5936680c37 100644
--- a/include/llvm/Analysis/RegionInfoImpl.h
+++ b/include/llvm/Analysis/RegionInfoImpl.h
@@ -365,7 +365,7 @@ typename Tr::RegionNodeT *RegionBase<Tr>::getBBNode(BlockT *BB) const {
auto Deconst = const_cast<RegionBase<Tr> *>(this);
typename BBNodeMapT::value_type V = {
BB,
- llvm::make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)};
+ std::make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)};
at = BBNodeMap.insert(std::move(V)).first;
}
return at->second.get();
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 0bd98ef37e7a..9c55f7a5090f 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -468,6 +468,8 @@ template <> struct DenseMapInfo<ExitLimitQuery> {
/// can't do much with the SCEV objects directly, they must ask this class
/// for services.
class ScalarEvolution {
+ friend class ScalarEvolutionsTest;
+
public:
/// An enum describing the relationship between a SCEV and a loop.
enum LoopDisposition {
@@ -777,10 +779,10 @@ public:
/// to (i.e. a "conservative over-approximation") of the value returend by
/// getBackedgeTakenCount. If such a value cannot be computed, it returns the
/// SCEVCouldNotCompute object.
- const SCEV *getMaxBackedgeTakenCount(const Loop *L);
+ const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L);
/// Return true if the backedge taken count is either the value returned by
- /// getMaxBackedgeTakenCount or zero.
+ /// getConstantMaxBackedgeTakenCount or zero.
bool isBackedgeTakenCountMaxOrZero(const Loop *L);
/// Return true if the specified loop has an analyzable loop-invariant
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index a519f93216b3..b4d727449fbe 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -77,9 +77,13 @@ namespace llvm {
/// Phis that complete an IV chain. Reuse
DenseSet<AssertingVH<PHINode>> ChainedPhis;
- /// When true, expressions are expanded in "canonical" form. In particular,
- /// addrecs are expanded as arithmetic based on a canonical induction
- /// variable. When false, expression are expanded in a more literal form.
+ /// When true, SCEVExpander tries to expand expressions in "canonical" form.
+ /// When false, expressions are expanded in a more literal form.
+ ///
+ /// In "canonical" form addrecs are expanded as arithmetic based on a
+ /// canonical induction variable. Note that CanonicalMode doesn't guarantee
+ /// that all expressions are expanded in "canonical" form. For some
+ /// expressions literal mode can be preferred.
bool CanonicalMode;
/// When invoked from LSR, the expander is in "strength reduction" mode. The
@@ -275,8 +279,16 @@ namespace llvm {
/// Clear the current insertion point. This is useful if the instruction
/// that had been serving as the insertion point may have been deleted.
- void clearInsertPoint() {
- Builder.ClearInsertionPoint();
+ void clearInsertPoint() { Builder.ClearInsertionPoint(); }
+
+ /// Set location information used by debugging information.
+ void SetCurrentDebugLocation(DebugLoc L) {
+ Builder.SetCurrentDebugLocation(std::move(L));
+ }
+
+ /// Get location information used by debugging information.
+ const DebugLoc &getCurrentDebugLocation() const {
+ return Builder.getCurrentDebugLocation();
}
/// Return true if the specified instruction was inserted by the code
diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
index 4b5200f5a838..d4b223863c54 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/include/llvm/Analysis/TargetLibraryInfo.h
@@ -30,11 +30,12 @@ struct VecDesc {
unsigned VectorizationFactor;
};
- enum LibFunc {
+ enum LibFunc : unsigned {
#define TLI_DEFINE_ENUM
#include "llvm/Analysis/TargetLibraryInfo.def"
- NumLibFuncs
+ NumLibFuncs,
+ NotLibFunc
};
/// Implementation of the target library information.
@@ -48,7 +49,7 @@ class TargetLibraryInfoImpl {
unsigned char AvailableArray[(NumLibFuncs+3)/4];
llvm::DenseMap<unsigned, std::string> CustomNames;
- static StringRef const StandardNames[NumLibFuncs];
+ static StringLiteral const StandardNames[NumLibFuncs];
bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param;
enum AvailabilityState {
@@ -359,7 +360,6 @@ public:
TargetLibraryAnalysis(TargetLibraryInfoImpl PresetInfoImpl)
: PresetInfoImpl(std::move(PresetInfoImpl)) {}
- TargetLibraryInfo run(Module &M, ModuleAnalysisManager &);
TargetLibraryInfo run(Function &F, FunctionAnalysisManager &);
private:
@@ -385,8 +385,13 @@ public:
explicit TargetLibraryInfoWrapperPass(const Triple &T);
explicit TargetLibraryInfoWrapperPass(const TargetLibraryInfoImpl &TLI);
- TargetLibraryInfo &getTLI() { return TLI; }
- const TargetLibraryInfo &getTLI() const { return TLI; }
+ TargetLibraryInfo &getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) {
+ return TLI;
+ }
+ const TargetLibraryInfo &
+ getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) const {
+ return TLI;
+ }
};
} // end namespace llvm
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 7574b811bc1c..d6fa88411654 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -368,6 +368,20 @@ public:
/// optimize away.
unsigned getFlatAddressSpace() const;
+ /// Return any intrinsic address operand indexes which may be rewritten if
+ /// they use a flat address space pointer.
+ ///
+ /// \returns true if the intrinsic was handled.
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const;
+
+ /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
+ /// NewV, which has a different address space. This should happen for every
+ /// operand index that collectFlatAddressOperands returned for the intrinsic.
+ /// \returns true if the intrinsic /// was handled.
+ bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+ Value *OldV, Value *NewV) const;
+
/// Test whether calls to a function lower to actual program function
/// calls.
///
@@ -469,12 +483,17 @@ public:
bool Force;
/// Allow using trip count upper bound to unroll loops.
bool UpperBound;
- /// Allow peeling off loop iterations for loops with low dynamic tripcount.
+ /// Allow peeling off loop iterations.
bool AllowPeeling;
/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollAndJam;
+ /// Allow peeling basing on profile. Uses to enable peeling off all
+ /// iterations basing on provided profile.
+ /// If the value is true the peeling cost model can decide to peel only
+ /// some iterations and in this case it will set this to false.
+ bool PeelProfiledIterations;
/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
/// value above is used during unroll and jam for the outer loop size.
/// This value is used in the same manner to limit the size of the inner
@@ -555,15 +574,15 @@ public:
/// modes that operate across loop iterations.
bool shouldFavorBackedgeIndex(const Loop *L) const;
- /// Return true if the target supports masked load.
- bool isLegalMaskedStore(Type *DataType) const;
/// Return true if the target supports masked store.
- bool isLegalMaskedLoad(Type *DataType) const;
+ bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const;
+ /// Return true if the target supports masked load.
+ bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const;
/// Return true if the target supports nontemporal store.
- bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
+ bool isLegalNTStore(Type *DataType, Align Alignment) const;
/// Return true if the target supports nontemporal load.
- bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
+ bool isLegalNTLoad(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked scatter.
bool isLegalMaskedScatter(Type *DataType) const;
@@ -622,12 +641,6 @@ public:
/// Return true if this type is legal.
bool isTypeLegal(Type *Ty) const;
- /// Returns the target's jmp_buf alignment in bytes.
- unsigned getJumpBufAlignment() const;
-
- /// Returns the target's jmp_buf size in bytes.
- unsigned getJumpBufSize() const;
-
/// Return true if switches should be turned into lookup tables for the
/// target.
bool shouldBuildLookupTables() const;
@@ -775,10 +788,23 @@ public:
/// Additional properties of an operand's values.
enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
- /// \return The number of scalar or vector registers that the target has.
- /// If 'Vectors' is true, it returns the number of vector registers. If it is
- /// set to false, it returns the number of scalar registers.
- unsigned getNumberOfRegisters(bool Vector) const;
+ /// \return the number of registers in the target-provided register class.
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
+
+ /// \return the target-provided register class ID for the provided type,
+ /// accounting for type promotion and other type-legalization techniques that the target might apply.
+ /// However, it specifically does not account for the scalarization or splitting of vector types.
+ /// Should a vector type require scalarization or splitting into multiple underlying vector registers,
+ /// that type should be mapped to a register class containing no registers.
+ /// Specifically, this is designed to provide a simple, high-level view of the register allocation
+ /// later performed by the backend. These register classes don't necessarily map onto the
+ /// register classes used by the backend.
+ /// FIXME: It's not currently possible to determine how many registers
+ /// are used by the provided type.
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
+
+ /// \return the target-provided register class name
+ const char* getRegisterClassName(unsigned ClassID) const;
/// \return The width of the largest scalar or vector register type.
unsigned getRegisterBitWidth(bool Vector) const;
@@ -824,18 +850,20 @@ public:
/// \return The associativity of the cache level, if available.
llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
- /// \return How much before a load we should place the prefetch instruction.
- /// This is currently measured in number of instructions.
+ /// \return How much before a load we should place the prefetch
+ /// instruction. This is currently measured in number of
+ /// instructions.
unsigned getPrefetchDistance() const;
- /// \return Some HW prefetchers can handle accesses up to a certain constant
- /// stride. This is the minimum stride in bytes where it makes sense to start
- /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
+ /// \return Some HW prefetchers can handle accesses up to a certain
+ /// constant stride. This is the minimum stride in bytes where it
+ /// makes sense to start adding SW prefetches. The default is 1,
+ /// i.e. prefetch with any stride.
unsigned getMinPrefetchStride() const;
- /// \return The maximum number of iterations to prefetch ahead. If the
- /// required number of iterations is more than this number, no prefetching is
- /// performed.
+ /// \return The maximum number of iterations to prefetch ahead. If
+ /// the required number of iterations is more than this number, no
+ /// prefetching is performed.
unsigned getMaxPrefetchIterationsAhead() const;
/// \return The maximum interleave factor that any transform should try to
@@ -1155,6 +1183,10 @@ public:
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
virtual unsigned getFlatAddressSpace() = 0;
+ virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const = 0;
+ virtual bool rewriteIntrinsicWithAddressSpace(
+ IntrinsicInst *II, Value *OldV, Value *NewV) const = 0;
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) = 0;
@@ -1177,10 +1209,10 @@ public:
TargetLibraryInfo *LibInfo) = 0;
virtual bool shouldFavorPostInc() const = 0;
virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
- virtual bool isLegalMaskedStore(Type *DataType) = 0;
- virtual bool isLegalMaskedLoad(Type *DataType) = 0;
- virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0;
- virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0;
+ virtual bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) = 0;
+ virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0;
+ virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
+ virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
virtual bool isLegalMaskedGather(Type *DataType) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
@@ -1196,8 +1228,6 @@ public:
virtual bool isProfitableToHoist(Instruction *I) = 0;
virtual bool useAA() = 0;
virtual bool isTypeLegal(Type *Ty) = 0;
- virtual unsigned getJumpBufAlignment() = 0;
- virtual unsigned getJumpBufSize() = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
virtual bool useColdCCForColdCall(Function &F) = 0;
@@ -1228,19 +1258,35 @@ public:
Type *Ty) = 0;
virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
- virtual unsigned getNumberOfRegisters(bool Vector) = 0;
+ virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
+ virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0;
+ virtual const char* getRegisterClassName(unsigned ClassID) const = 0;
virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
virtual unsigned getMinVectorRegisterBitWidth() = 0;
virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
virtual bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
- virtual unsigned getCacheLineSize() = 0;
- virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
- virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
- virtual unsigned getPrefetchDistance() = 0;
- virtual unsigned getMinPrefetchStride() = 0;
- virtual unsigned getMaxPrefetchIterationsAhead() = 0;
+ virtual unsigned getCacheLineSize() const = 0;
+ virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
+ virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
+
+ /// \return How much before a load we should place the prefetch
+ /// instruction. This is currently measured in number of
+ /// instructions.
+ virtual unsigned getPrefetchDistance() const = 0;
+
+ /// \return Some HW prefetchers can handle accesses up to a certain
+ /// constant stride. This is the minimum stride in bytes where it
+ /// makes sense to start adding SW prefetches. The default is 1,
+ /// i.e. prefetch with any stride.
+ virtual unsigned getMinPrefetchStride() const = 0;
+
+ /// \return The maximum number of iterations to prefetch ahead. If
+ /// the required number of iterations is more than this number, no
+ /// prefetching is performed.
+ virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
+
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
virtual unsigned
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -1395,6 +1441,16 @@ public:
return Impl.getFlatAddressSpace();
}
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const override {
+ return Impl.collectFlatAddressOperands(OpIndexes, IID);
+ }
+
+ bool rewriteIntrinsicWithAddressSpace(
+ IntrinsicInst *II, Value *OldV, Value *NewV) const override {
+ return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
+ }
+
bool isLoweredToCall(const Function *F) override {
return Impl.isLoweredToCall(F);
}
@@ -1440,16 +1496,16 @@ public:
bool shouldFavorBackedgeIndex(const Loop *L) const override {
return Impl.shouldFavorBackedgeIndex(L);
}
- bool isLegalMaskedStore(Type *DataType) override {
- return Impl.isLegalMaskedStore(DataType);
+ bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) override {
+ return Impl.isLegalMaskedStore(DataType, Alignment);
}
- bool isLegalMaskedLoad(Type *DataType) override {
- return Impl.isLegalMaskedLoad(DataType);
+ bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) override {
+ return Impl.isLegalMaskedLoad(DataType, Alignment);
}
- bool isLegalNTStore(Type *DataType, unsigned Alignment) override {
+ bool isLegalNTStore(Type *DataType, Align Alignment) override {
return Impl.isLegalNTStore(DataType, Alignment);
}
- bool isLegalNTLoad(Type *DataType, unsigned Alignment) override {
+ bool isLegalNTLoad(Type *DataType, Align Alignment) override {
return Impl.isLegalNTLoad(DataType, Alignment);
}
bool isLegalMaskedScatter(Type *DataType) override {
@@ -1490,8 +1546,6 @@ public:
}
bool useAA() override { return Impl.useAA(); }
bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
- unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
- unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
bool shouldBuildLookupTables() override {
return Impl.shouldBuildLookupTables();
}
@@ -1563,8 +1617,14 @@ public:
Type *Ty) override {
return Impl.getIntImmCost(IID, Idx, Imm, Ty);
}
- unsigned getNumberOfRegisters(bool Vector) override {
- return Impl.getNumberOfRegisters(Vector);
+ unsigned getNumberOfRegisters(unsigned ClassID) const override {
+ return Impl.getNumberOfRegisters(ClassID);
+ }
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override {
+ return Impl.getRegisterClassForType(Vector, Ty);
+ }
+ const char* getRegisterClassName(unsigned ClassID) const override {
+ return Impl.getRegisterClassName(ClassID);
}
unsigned getRegisterBitWidth(bool Vector) const override {
return Impl.getRegisterBitWidth(Vector);
@@ -1583,22 +1643,36 @@ public:
return Impl.shouldConsiderAddressTypePromotion(
I, AllowPromotionWithoutCommonHeader);
}
- unsigned getCacheLineSize() override {
+ unsigned getCacheLineSize() const override {
return Impl.getCacheLineSize();
}
- llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
+ llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const override {
return Impl.getCacheSize(Level);
}
- llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
+ llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
return Impl.getCacheAssociativity(Level);
}
- unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
- unsigned getMinPrefetchStride() override {
+
+ /// Return the preferred prefetch distance in terms of instructions.
+ ///
+ unsigned getPrefetchDistance() const override {
+ return Impl.getPrefetchDistance();
+ }
+
+ /// Return the minimum stride necessary to trigger software
+ /// prefetching.
+ ///
+ unsigned getMinPrefetchStride() const override {
return Impl.getMinPrefetchStride();
}
- unsigned getMaxPrefetchIterationsAhead() override {
+
+ /// Return the maximum prefetch distance in terms of loop
+ /// iterations.
+ ///
+ unsigned getMaxPrefetchIterationsAhead() const override {
return Impl.getMaxPrefetchIterationsAhead();
}
+
unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);
}
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index b99e1eb9adf0..a431fa0d458b 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -156,6 +156,16 @@ public:
return -1;
}
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const {
+ return false;
+ }
+
+ bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+ Value *OldV, Value *NewV) const {
+ return false;
+ }
+
bool isLoweredToCall(const Function *F) {
assert(F && "A concrete function must be provided to this routine.");
@@ -233,18 +243,18 @@ public:
bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
- bool isLegalMaskedStore(Type *DataType) { return false; }
+ bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; }
- bool isLegalMaskedLoad(Type *DataType) { return false; }
+ bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; }
- bool isLegalNTStore(Type *DataType, unsigned Alignment) {
+ bool isLegalNTStore(Type *DataType, Align Alignment) {
// By default, assume nontemporal memory stores are available for stores
// that are aligned and have a size that is a power of 2.
unsigned DataSize = DL.getTypeStoreSize(DataType);
return Alignment >= DataSize && isPowerOf2_32(DataSize);
}
- bool isLegalNTLoad(Type *DataType, unsigned Alignment) {
+ bool isLegalNTLoad(Type *DataType, Align Alignment) {
// By default, assume nontemporal memory loads are available for loads that
// are aligned and have a size that is a power of 2.
unsigned DataSize = DL.getTypeStoreSize(DataType);
@@ -284,10 +294,6 @@ public:
bool isTypeLegal(Type *Ty) { return false; }
- unsigned getJumpBufAlignment() { return 0; }
-
- unsigned getJumpBufSize() { return 0; }
-
bool shouldBuildLookupTables() { return true; }
bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
@@ -348,7 +354,20 @@ public:
return TTI::TCC_Free;
}
- unsigned getNumberOfRegisters(bool Vector) { return 8; }
+ unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
+
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
+ return Vector ? 1 : 0;
+ };
+
+ const char* getRegisterClassName(unsigned ClassID) const {
+ switch (ClassID) {
+ default:
+ return "Generic::Unknown Register Class";
+ case 0: return "Generic::ScalarRC";
+ case 1: return "Generic::VectorRC";
+ }
+ }
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
@@ -365,21 +384,20 @@ public:
return false;
}
- unsigned getCacheLineSize() { return 0; }
+ unsigned getCacheLineSize() const { return 0; }
- llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) {
+ llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const {
switch (Level) {
case TargetTransformInfo::CacheLevel::L1D:
LLVM_FALLTHROUGH;
case TargetTransformInfo::CacheLevel::L2D:
return llvm::Optional<unsigned>();
}
-
llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
}
llvm::Optional<unsigned> getCacheAssociativity(
- TargetTransformInfo::CacheLevel Level) {
+ TargetTransformInfo::CacheLevel Level) const {
switch (Level) {
case TargetTransformInfo::CacheLevel::L1D:
LLVM_FALLTHROUGH;
@@ -390,11 +408,9 @@ public:
llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
}
- unsigned getPrefetchDistance() { return 0; }
-
- unsigned getMinPrefetchStride() { return 1; }
-
- unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
+ unsigned getPrefetchDistance() const { return 0; }
+ unsigned getMinPrefetchStride() const { return 1; }
+ unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
@@ -830,6 +846,9 @@ public:
if (isa<PHINode>(U))
return TTI::TCC_Free; // Model all PHI nodes as free.
+ if (isa<ExtractValueInst>(U))
+ return TTI::TCC_Free; // Model all ExtractValue nodes as free.
+
// Static alloca doesn't generate target instructions.
if (auto *A = dyn_cast<AllocaInst>(U))
if (A->isStaticAlloca())
diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h
index 82cf8efeea54..43ce26147c2e 100644
--- a/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/include/llvm/Analysis/TypeMetadataUtils.h
@@ -50,6 +50,8 @@ void findDevirtualizableCallsForTypeCheckedLoad(
SmallVectorImpl<Instruction *> &LoadedPtrs,
SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
const CallInst *CI, DominatorTree &DT);
+
+Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M);
}
#endif
diff --git a/include/llvm/Analysis/Utils/Local.h b/include/llvm/Analysis/Utils/Local.h
index acbdf5dca32c..a63bcec9bc41 100644
--- a/include/llvm/Analysis/Utils/Local.h
+++ b/include/llvm/Analysis/Utils/Local.h
@@ -32,7 +32,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
Value *Result = Constant::getNullValue(IntPtrTy);
// If the GEP is inbounds, we know that none of the addressing operations will
- // overflow in an unsigned sense.
+ // overflow in a signed sense.
bool isInBounds = GEPOp->isInBounds() && !NoAssumptions;
// Build a mask for high order bits.
@@ -51,10 +51,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = GTI.getStructTypeOrNull()) {
- if (OpC->getType()->isVectorTy())
- OpC = OpC->getSplatValue();
-
- uint64_t OpValue = cast<ConstantInt>(OpC)->getZExtValue();
+ uint64_t OpValue = OpC->getUniqueInteger().getZExtValue();
Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
if (Size)
@@ -63,20 +60,31 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
continue;
}
+ // Splat the constant if needed.
+ if (IntPtrTy->isVectorTy() && !OpC->getType()->isVectorTy())
+ OpC = ConstantVector::getSplat(IntPtrTy->getVectorNumElements(), OpC);
+
Constant *Scale = ConstantInt::get(IntPtrTy, Size);
Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
- Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
+ Scale =
+ ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/);
// Emit an add instruction.
Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
continue;
}
+
+ // Splat the index if needed.
+ if (IntPtrTy->isVectorTy() && !Op->getType()->isVectorTy())
+ Op = Builder->CreateVectorSplat(IntPtrTy->getVectorNumElements(), Op);
+
// Convert to correct type.
if (Op->getType() != IntPtrTy)
Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
if (Size != 1) {
// We'll let instcombine(mul) convert this to a shl if possible.
Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
- GEP->getName()+".idx", isInBounds /*NUW*/);
+ GEP->getName() + ".idx", false /*NUW*/,
+ isInBounds /*NSW*/);
}
// Emit an add instruction.
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index fa7e0e0eef7e..33b064fcf9d2 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -242,19 +242,21 @@ class Value;
/// This is a wrapper around Value::stripAndAccumulateConstantOffsets that
/// creates and later unpacks the required APInt.
inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const DataLayout &DL) {
+ const DataLayout &DL,
+ bool AllowNonInbounds = true) {
APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
Value *Base =
- Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt,
- /* AllowNonInbounds */ true);
+ Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds);
+
Offset = OffsetAPInt.getSExtValue();
return Base;
}
- inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr,
- int64_t &Offset,
- const DataLayout &DL) {
- return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset,
- DL);
+ inline const Value *
+ GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset,
+ const DataLayout &DL,
+ bool AllowNonInbounds = true) {
+ return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset, DL,
+ AllowNonInbounds);
}
/// Returns true if the GEP is based on a pointer to a string (array of
@@ -307,20 +309,26 @@ class Value;
uint64_t GetStringLength(const Value *V, unsigned CharSize = 8);
/// This function returns call pointer argument that is considered the same by
- /// aliasing rules. You CAN'T use it to replace one value with another.
- const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call);
- inline Value *getArgumentAliasingToReturnedPointer(CallBase *Call) {
+ /// aliasing rules. You CAN'T use it to replace one value with another. If
+ /// \p MustPreserveNullness is true, the call must preserve the nullness of
+ /// the pointer.
+ const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call,
+ bool MustPreserveNullness);
+ inline Value *
+ getArgumentAliasingToReturnedPointer(CallBase *Call,
+ bool MustPreserveNullness) {
return const_cast<Value *>(getArgumentAliasingToReturnedPointer(
- const_cast<const CallBase *>(Call)));
+ const_cast<const CallBase *>(Call), MustPreserveNullness));
}
- // {launder,strip}.invariant.group returns pointer that aliases its argument,
- // and it only captures pointer by returning it.
- // These intrinsics are not marked as nocapture, because returning is
- // considered as capture. The arguments are not marked as returned neither,
- // because it would make it useless.
+ /// {launder,strip}.invariant.group returns pointer that aliases its argument,
+ /// and it only captures pointer by returning it.
+ /// These intrinsics are not marked as nocapture, because returning is
+ /// considered as capture. The arguments are not marked as returned neither,
+ /// because it would make it useless. If \p MustPreserveNullness is true,
+ /// the intrinsic must preserve the nullness of the pointer.
bool isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
- const CallBase *Call);
+ const CallBase *Call, bool MustPreserveNullness);
/// This method strips off any GEP address adjustments and pointer casts from
/// the specified value, returning the original object being addressed. Note
@@ -376,6 +384,13 @@ class Value;
/// Return true if the only users of this pointer are lifetime markers.
bool onlyUsedByLifetimeMarkers(const Value *V);
+ /// Return true if speculation of the given load must be suppressed to avoid
+ /// ordering or interfering with an active sanitizer. If not suppressed,
+ /// dereferenceability and alignment must be proven separately. Note: This
+ /// is only needed for raw reasoning; if you use the interface below
+ /// (isSafeToSpeculativelyExecute), this is handled internally.
+ bool mustSuppressSpeculation(const LoadInst &LI);
+
/// Return true if the instruction does not have any effects besides
/// calculating the result and does not have undefined behavior.
///
@@ -605,12 +620,12 @@ class Value;
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
Instruction::CastOps *CastOp = nullptr,
unsigned Depth = 0);
+
inline SelectPatternResult
- matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS,
- Instruction::CastOps *CastOp = nullptr) {
- Value *L = const_cast<Value*>(LHS);
- Value *R = const_cast<Value*>(RHS);
- auto Result = matchSelectPattern(const_cast<Value*>(V), L, R);
+ matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS) {
+ Value *L = const_cast<Value *>(LHS);
+ Value *R = const_cast<Value *>(RHS);
+ auto Result = matchSelectPattern(const_cast<Value *>(V), L, R);
LHS = L;
RHS = R;
return Result;
@@ -654,6 +669,12 @@ class Value;
Optional<bool> isImpliedByDomCondition(const Value *Cond,
const Instruction *ContextI,
const DataLayout &DL);
+
+ /// If Ptr1 is provably equal to Ptr2 plus a constant offset, return that
+ /// offset. For example, Ptr1 might be &A[42], and Ptr2 might be &A[40]. In
+ /// this case offset would be -8.
+ Optional<int64_t> isPointerOffset(const Value *Ptr1, const Value *Ptr2,
+ const DataLayout &DL);
} // end namespace llvm
#endif // LLVM_ANALYSIS_VALUETRACKING_H
diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h
index d93d2bc4570b..4a61c2bc35c7 100644
--- a/include/llvm/Analysis/VectorUtils.h
+++ b/include/llvm/Analysis/VectorUtils.h
@@ -15,18 +15,129 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CheckedArithmetic.h"
namespace llvm {
+/// Describes the type of Parameters
+enum class VFParamKind {
+ Vector, // No semantic information.
+ OMP_Linear, // declare simd linear(i)
+ OMP_LinearRef, // declare simd linear(ref(i))
+ OMP_LinearVal, // declare simd linear(val(i))
+ OMP_LinearUVal, // declare simd linear(uval(i))
+ OMP_LinearPos, // declare simd linear(i:c) uniform(c)
+ OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
+ OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
+ OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c
+ OMP_Uniform, // declare simd uniform(i)
+ GlobalPredicate, // Global logical predicate that acts on all lanes
+ // of the input and output mask concurrently. For
+ // example, it is implied by the `M` token in the
+ // Vector Function ABI mangled name.
+ Unknown
+};
+
+/// Describes the type of Instruction Set Architecture
+enum class VFISAKind {
+ AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
+ SVE, // AArch64 Scalable Vector Extension
+ SSE, // x86 SSE
+ AVX, // x86 AVX
+ AVX2, // x86 AVX2
+ AVX512, // x86 AVX512
+ Unknown // Unknown ISA
+};
+
+/// Encapsulates information needed to describe a parameter.
+///
+/// The description of the parameter is not linked directly to
+/// OpenMP or any other vector function description. This structure
+/// is extendible to handle other paradigms that describe vector
+/// functions and their parameters.
+struct VFParameter {
+ unsigned ParamPos; // Parameter Position in Scalar Function.
+ VFParamKind ParamKind; // Kind of Parameter.
+ int LinearStepOrPos = 0; // Step or Position of the Parameter.
+ Align Alignment = Align(); // Optional aligment in bytes, defaulted to 1.
+
+ // Comparison operator.
+ bool operator==(const VFParameter &Other) const {
+ return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
+ std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
+ Other.Alignment);
+ }
+};
+
+/// Contains the information about the kind of vectorization
+/// available.
+///
+/// This object in independent on the paradigm used to
+/// represent vector functions. in particular, it is not attached to
+/// any target-specific ABI.
+struct VFShape {
+ unsigned VF; // Vectorization factor.
+ bool IsScalable; // True if the function is a scalable function.
+ VFISAKind ISA; // Instruction Set Architecture.
+ SmallVector<VFParameter, 8> Parameters; // List of parameter informations.
+ // Comparison operator.
+ bool operator==(const VFShape &Other) const {
+ return std::tie(VF, IsScalable, ISA, Parameters) ==
+ std::tie(Other.VF, Other.IsScalable, Other.ISA, Other.Parameters);
+ }
+};
+
+/// Holds the VFShape for a specific scalar to vector function mapping.
+struct VFInfo {
+ VFShape Shape; // Classification of the vector function.
+ StringRef ScalarName; // Scalar Function Name.
+ StringRef VectorName; // Vector Function Name associated to this VFInfo.
+
+ // Comparison operator.
+ bool operator==(const VFInfo &Other) const {
+ return std::tie(Shape, ScalarName, VectorName) ==
+ std::tie(Shape, Other.ScalarName, Other.VectorName);
+ }
+};
+
+namespace VFABI {
+/// Function to contruct a VFInfo out of a mangled names in the
+/// following format:
+///
+/// <VFABI_name>{(<redirection>)}
+///
+/// where <VFABI_name> is the name of the vector function, mangled according
+/// to the rules described in the Vector Function ABI of the target vector
+/// extentsion (or <isa> from now on). The <VFABI_name> is in the following
+/// format:
+///
+/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
+///
+/// This methods support demangling rules for the following <isa>:
+///
+/// * AArch64: https://developer.arm.com/docs/101129/latest
+///
+/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
+/// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
+///
+///
+///
+/// \param MangledName -> input string in the format
+/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
+Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName);
+
+/// Retrieve the `VFParamKind` from a string token.
+VFParamKind getVFParamKindFromString(const StringRef Token);
+} // end namespace VFABI
+
template <typename T> class ArrayRef;
class DemandedBits;
class GetElementPtrInst;
template <typename InstTy> class InterleaveGroup;
class Loop;
class ScalarEvolution;
+class TargetLibraryInfo;
class TargetTransformInfo;
class Type;
class Value;
@@ -270,13 +381,12 @@ APInt possiblyDemandedEltsInMask(Value *Mask);
/// the interleaved store group doesn't allow gaps.
template <typename InstTy> class InterleaveGroup {
public:
- InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
- : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
-
- InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
- : Align(Align), InsertPos(Instr) {
- assert(Align && "The alignment should be non-zero");
+ InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
+ : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
+ InsertPos(nullptr) {}
+ InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
+ : Alignment(Alignment), InsertPos(Instr) {
Factor = std::abs(Stride);
assert(Factor > 1 && "Invalid interleave factor");
@@ -286,7 +396,7 @@ public:
bool isReverse() const { return Reverse; }
uint32_t getFactor() const { return Factor; }
- uint32_t getAlignment() const { return Align; }
+ uint32_t getAlignment() const { return Alignment.value(); }
uint32_t getNumMembers() const { return Members.size(); }
/// Try to insert a new member \p Instr with index \p Index and
@@ -294,9 +404,7 @@ public:
/// negative if it is the new leader.
///
/// \returns false if the instruction doesn't belong to the group.
- bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) {
- assert(NewAlign && "The new member's alignment should be non-zero");
-
+ bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
// Make sure the key fits in an int32_t.
Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
if (!MaybeKey)
@@ -328,7 +436,7 @@ public:
}
// It's always safe to select the minimum alignment.
- Align = std::min(Align, NewAlign);
+ Alignment = std::min(Alignment, NewAlign);
Members[Key] = Instr;
return true;
}
@@ -387,7 +495,7 @@ public:
private:
uint32_t Factor; // Interleave Factor.
bool Reverse;
- uint32_t Align;
+ Align Alignment;
DenseMap<int32_t, InstTy *> Members;
int32_t SmallestKey = 0;
int32_t LargestKey = 0;
@@ -504,8 +612,8 @@ private:
struct StrideDescriptor {
StrideDescriptor() = default;
StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
- unsigned Align)
- : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
+ Align Alignment)
+ : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
// The access's stride. It is negative for a reverse access.
int64_t Stride = 0;
@@ -517,7 +625,7 @@ private:
uint64_t Size = 0;
// The alignment of this access.
- unsigned Align = 0;
+ Align Alignment;
};
/// A type for holding instructions and their stride descriptors.
@@ -528,11 +636,11 @@ private:
///
/// \returns the newly created interleave group.
InterleaveGroup<Instruction> *
- createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) {
+ createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
assert(!InterleaveGroupMap.count(Instr) &&
"Already in an interleaved access group");
InterleaveGroupMap[Instr] =
- new InterleaveGroup<Instruction>(Instr, Stride, Align);
+ new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
InterleaveGroups.insert(InterleaveGroupMap[Instr]);
return InterleaveGroupMap[Instr];
}
diff --git a/include/llvm/BinaryFormat/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def
index b0f78d0fd61f..34a7410f7474 100644
--- a/include/llvm/BinaryFormat/Dwarf.def
+++ b/include/llvm/BinaryFormat/Dwarf.def
@@ -17,7 +17,7 @@
defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \
defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE || \
defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO || \
- defined HANDLE_DW_RLE || \
+ defined HANDLE_DW_RLE || defined HANDLE_DW_LLE || \
(defined HANDLE_DW_CFA && defined HANDLE_DW_CFA_PRED) || \
defined HANDLE_DW_APPLE_PROPERTY || defined HANDLE_DW_UT || \
defined HANDLE_DWARF_SECTION || defined HANDLE_DW_IDX || \
@@ -26,7 +26,17 @@
#endif
#ifndef HANDLE_DW_TAG
-#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR)
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND)
+#endif
+
+// Note that DW_KIND is not a DWARF concept, but rather a way for us to
+// generate a list of tags that belong together.
+#ifndef DW_KIND_NONE
+#define DW_KIND_NONE 0
+#endif
+
+#ifndef DW_KIND_TYPE
+#define DW_KIND_TYPE 1
#endif
#ifndef HANDLE_DW_AT
@@ -81,6 +91,10 @@
#define HANDLE_DW_RLE(ID, NAME)
#endif
+#ifndef HANDLE_DW_LLE
+#define HANDLE_DW_LLE(ID, NAME)
+#endif
+
#ifndef HANDLE_DW_CFA
#define HANDLE_DW_CFA(ID, NAME)
#endif
@@ -109,94 +123,94 @@
#define HANDLE_DW_END(ID, NAME)
#endif
-HANDLE_DW_TAG(0x0000, null, 2, DWARF)
-HANDLE_DW_TAG(0x0001, array_type, 2, DWARF)
-HANDLE_DW_TAG(0x0002, class_type, 2, DWARF)
-HANDLE_DW_TAG(0x0003, entry_point, 2, DWARF)
-HANDLE_DW_TAG(0x0004, enumeration_type, 2, DWARF)
-HANDLE_DW_TAG(0x0005, formal_parameter, 2, DWARF)
-HANDLE_DW_TAG(0x0008, imported_declaration, 2, DWARF)
-HANDLE_DW_TAG(0x000a, label, 2, DWARF)
-HANDLE_DW_TAG(0x000b, lexical_block, 2, DWARF)
-HANDLE_DW_TAG(0x000d, member, 2, DWARF)
-HANDLE_DW_TAG(0x000f, pointer_type, 2, DWARF)
-HANDLE_DW_TAG(0x0010, reference_type, 2, DWARF)
-HANDLE_DW_TAG(0x0011, compile_unit, 2, DWARF)
-HANDLE_DW_TAG(0x0012, string_type, 2, DWARF)
-HANDLE_DW_TAG(0x0013, structure_type, 2, DWARF)
-HANDLE_DW_TAG(0x0015, subroutine_type, 2, DWARF)
-HANDLE_DW_TAG(0x0016, typedef, 2, DWARF)
-HANDLE_DW_TAG(0x0017, union_type, 2, DWARF)
-HANDLE_DW_TAG(0x0018, unspecified_parameters, 2, DWARF)
-HANDLE_DW_TAG(0x0019, variant, 2, DWARF)
-HANDLE_DW_TAG(0x001a, common_block, 2, DWARF)
-HANDLE_DW_TAG(0x001b, common_inclusion, 2, DWARF)
-HANDLE_DW_TAG(0x001c, inheritance, 2, DWARF)
-HANDLE_DW_TAG(0x001d, inlined_subroutine, 2, DWARF)
-HANDLE_DW_TAG(0x001e, module, 2, DWARF)
-HANDLE_DW_TAG(0x001f, ptr_to_member_type, 2, DWARF)
-HANDLE_DW_TAG(0x0020, set_type, 2, DWARF)
-HANDLE_DW_TAG(0x0021, subrange_type, 2, DWARF)
-HANDLE_DW_TAG(0x0022, with_stmt, 2, DWARF)
-HANDLE_DW_TAG(0x0023, access_declaration, 2, DWARF)
-HANDLE_DW_TAG(0x0024, base_type, 2, DWARF)
-HANDLE_DW_TAG(0x0025, catch_block, 2, DWARF)
-HANDLE_DW_TAG(0x0026, const_type, 2, DWARF)
-HANDLE_DW_TAG(0x0027, constant, 2, DWARF)
-HANDLE_DW_TAG(0x0028, enumerator, 2, DWARF)
-HANDLE_DW_TAG(0x0029, file_type, 2, DWARF)
-HANDLE_DW_TAG(0x002a, friend, 2, DWARF)
-HANDLE_DW_TAG(0x002b, namelist, 2, DWARF)
-HANDLE_DW_TAG(0x002c, namelist_item, 2, DWARF)
-HANDLE_DW_TAG(0x002d, packed_type, 2, DWARF)
-HANDLE_DW_TAG(0x002e, subprogram, 2, DWARF)
-HANDLE_DW_TAG(0x002f, template_type_parameter, 2, DWARF)
-HANDLE_DW_TAG(0x0030, template_value_parameter, 2, DWARF)
-HANDLE_DW_TAG(0x0031, thrown_type, 2, DWARF)
-HANDLE_DW_TAG(0x0032, try_block, 2, DWARF)
-HANDLE_DW_TAG(0x0033, variant_part, 2, DWARF)
-HANDLE_DW_TAG(0x0034, variable, 2, DWARF)
-HANDLE_DW_TAG(0x0035, volatile_type, 2, DWARF)
+HANDLE_DW_TAG(0x0000, null, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0001, array_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0002, class_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0003, entry_point, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0004, enumeration_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0005, formal_parameter, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0008, imported_declaration, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x000a, label, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x000b, lexical_block, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x000d, member, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x000f, pointer_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0010, reference_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0011, compile_unit, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0012, string_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0013, structure_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0015, subroutine_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0016, typedef, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0017, union_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0018, unspecified_parameters, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0019, variant, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x001a, common_block, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x001b, common_inclusion, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x001c, inheritance, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x001d, inlined_subroutine, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x001e, module, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x001f, ptr_to_member_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0020, set_type, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0021, subrange_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0022, with_stmt, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0023, access_declaration, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0024, base_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0025, catch_block, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0026, const_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0027, constant, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0028, enumerator, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0029, file_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x002a, friend, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x002b, namelist, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x002c, namelist_item, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x002d, packed_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x002e, subprogram, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x002f, template_type_parameter, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0030, template_value_parameter, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0031, thrown_type, 2, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0032, try_block, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0033, variant_part, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0034, variable, 2, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0035, volatile_type, 2, DWARF, DW_KIND_TYPE)
// New in DWARF v3:
-HANDLE_DW_TAG(0x0036, dwarf_procedure, 3, DWARF)
-HANDLE_DW_TAG(0x0037, restrict_type, 3, DWARF)
-HANDLE_DW_TAG(0x0038, interface_type, 3, DWARF)
-HANDLE_DW_TAG(0x0039, namespace, 3, DWARF)
-HANDLE_DW_TAG(0x003a, imported_module, 3, DWARF)
-HANDLE_DW_TAG(0x003b, unspecified_type, 3, DWARF)
-HANDLE_DW_TAG(0x003c, partial_unit, 3, DWARF)
-HANDLE_DW_TAG(0x003d, imported_unit, 3, DWARF)
-HANDLE_DW_TAG(0x003f, condition, 3, DWARF)
-HANDLE_DW_TAG(0x0040, shared_type, 3, DWARF)
+HANDLE_DW_TAG(0x0036, dwarf_procedure, 3, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0037, restrict_type, 3, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0038, interface_type, 3, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0039, namespace, 3, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x003a, imported_module, 3, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x003b, unspecified_type, 3, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x003c, partial_unit, 3, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x003d, imported_unit, 3, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x003f, condition, 3, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0040, shared_type, 3, DWARF, DW_KIND_TYPE)
// New in DWARF v4:
-HANDLE_DW_TAG(0x0041, type_unit, 4, DWARF)
-HANDLE_DW_TAG(0x0042, rvalue_reference_type, 4, DWARF)
-HANDLE_DW_TAG(0x0043, template_alias, 4, DWARF)
+HANDLE_DW_TAG(0x0041, type_unit, 4, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0042, rvalue_reference_type, 4, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0043, template_alias, 4, DWARF, DW_KIND_NONE)
// New in DWARF v5:
-HANDLE_DW_TAG(0x0044, coarray_type, 5, DWARF)
-HANDLE_DW_TAG(0x0045, generic_subrange, 5, DWARF)
-HANDLE_DW_TAG(0x0046, dynamic_type, 5, DWARF)
-HANDLE_DW_TAG(0x0047, atomic_type, 5, DWARF)
-HANDLE_DW_TAG(0x0048, call_site, 5, DWARF)
-HANDLE_DW_TAG(0x0049, call_site_parameter, 5, DWARF)
-HANDLE_DW_TAG(0x004a, skeleton_unit, 5, DWARF)
-HANDLE_DW_TAG(0x004b, immutable_type, 5, DWARF)
+HANDLE_DW_TAG(0x0044, coarray_type, 5, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0045, generic_subrange, 5, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0046, dynamic_type, 5, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0047, atomic_type, 5, DWARF, DW_KIND_TYPE)
+HANDLE_DW_TAG(0x0048, call_site, 5, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x0049, call_site_parameter, 5, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x004a, skeleton_unit, 5, DWARF, DW_KIND_NONE)
+HANDLE_DW_TAG(0x004b, immutable_type, 5, DWARF, DW_KIND_TYPE)
// Vendor extensions:
-HANDLE_DW_TAG(0x4081, MIPS_loop, 0, MIPS)
-HANDLE_DW_TAG(0x4101, format_label, 0, GNU)
-HANDLE_DW_TAG(0x4102, function_template, 0, GNU)
-HANDLE_DW_TAG(0x4103, class_template, 0, GNU)
-HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU)
-HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU)
-HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU)
-HANDLE_DW_TAG(0x4109, GNU_call_site, 0, GNU)
-HANDLE_DW_TAG(0x410a, GNU_call_site_parameter, 0, GNU)
-HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE)
-HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND)
-HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND)
-HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array, 0, BORLAND)
-HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set, 0, BORLAND)
-HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant, 0, BORLAND)
+HANDLE_DW_TAG(0x4081, MIPS_loop, 0, MIPS, DW_KIND_NONE)
+HANDLE_DW_TAG(0x4101, format_label, 0, GNU, DW_KIND_NONE)
+HANDLE_DW_TAG(0x4102, function_template, 0, GNU, DW_KIND_NONE)
+HANDLE_DW_TAG(0x4103, class_template, 0, GNU, DW_KIND_NONE)
+HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU, DW_KIND_NONE)
+HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU, DW_KIND_NONE)
+HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU, DW_KIND_NONE)
+HANDLE_DW_TAG(0x4109, GNU_call_site, 0, GNU, DW_KIND_NONE)
+HANDLE_DW_TAG(0x410a, GNU_call_site_parameter, 0, GNU, DW_KIND_NONE)
+HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE, DW_KIND_NONE)
+HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND, DW_KIND_NONE)
+HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND, DW_KIND_TYPE)
+HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array, 0, BORLAND, DW_KIND_TYPE)
+HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set, 0, BORLAND, DW_KIND_TYPE)
+HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant, 0, BORLAND, DW_KIND_TYPE)
// Attributes.
HANDLE_DW_AT(0x01, sibling, 2, DWARF)
@@ -815,6 +829,17 @@ HANDLE_DW_RLE(0x05, base_address)
HANDLE_DW_RLE(0x06, start_end)
HANDLE_DW_RLE(0x07, start_length)
+// DWARF v5 Loc List Entry encoding values.
+HANDLE_DW_LLE(0x00, end_of_list)
+HANDLE_DW_LLE(0x01, base_addressx)
+HANDLE_DW_LLE(0x02, startx_endx)
+HANDLE_DW_LLE(0x03, startx_length)
+HANDLE_DW_LLE(0x04, offset_pair)
+HANDLE_DW_LLE(0x05, default_location)
+HANDLE_DW_LLE(0x06, base_address)
+HANDLE_DW_LLE(0x07, start_end)
+HANDLE_DW_LLE(0x08, start_length)
+
// Call frame instruction encodings.
HANDLE_DW_CFA(0x00, nop)
HANDLE_DW_CFA(0x40, advance_loc)
@@ -929,6 +954,7 @@ HANDLE_DW_IDX(0x05, type_hash)
#undef HANDLE_DW_LNCT
#undef HANDLE_DW_MACRO
#undef HANDLE_DW_RLE
+#undef HANDLE_DW_LLE
#undef HANDLE_DW_CFA
#undef HANDLE_DW_CFA_PRED
#undef HANDLE_DW_APPLE_PROPERTY
diff --git a/include/llvm/BinaryFormat/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h
index 76d9c365c0a8..1c6aee48661c 100644
--- a/include/llvm/BinaryFormat/Dwarf.h
+++ b/include/llvm/BinaryFormat/Dwarf.h
@@ -46,6 +46,11 @@ enum LLVMConstants : uint32_t {
DW_VIRTUALITY_invalid = ~0U, // Virtuality for invalid results.
DW_MACINFO_invalid = ~0U, // Macinfo type for invalid results.
+ // Special values for an initial length field.
+ DW_LENGTH_lo_reserved = 0xfffffff0, // Lower bound of the reserved range.
+ DW_LENGTH_DWARF64 = 0xffffffff, // Indicator of 64-bit DWARF format.
+ DW_LENGTH_hi_reserved = 0xffffffff, // Upper bound of the reserved range.
+
// Other constants.
DWARF_VERSION = 4, // Default dwarf version we output.
DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes.
@@ -75,7 +80,7 @@ const uint64_t DW64_CIE_ID = UINT64_MAX;
const uint32_t DW_INVALID_OFFSET = UINT32_MAX;
enum Tag : uint16_t {
-#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID,
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) DW_TAG_##NAME = ID,
#include "llvm/BinaryFormat/Dwarf.def"
DW_TAG_lo_user = 0x4080,
DW_TAG_hi_user = 0xffff,
@@ -84,29 +89,12 @@ enum Tag : uint16_t {
inline bool isType(Tag T) {
switch (T) {
- case DW_TAG_array_type:
- case DW_TAG_class_type:
- case DW_TAG_interface_type:
- case DW_TAG_enumeration_type:
- case DW_TAG_pointer_type:
- case DW_TAG_reference_type:
- case DW_TAG_rvalue_reference_type:
- case DW_TAG_string_type:
- case DW_TAG_structure_type:
- case DW_TAG_subroutine_type:
- case DW_TAG_union_type:
- case DW_TAG_ptr_to_member_type:
- case DW_TAG_set_type:
- case DW_TAG_subrange_type:
- case DW_TAG_base_type:
- case DW_TAG_const_type:
- case DW_TAG_file_type:
- case DW_TAG_packed_type:
- case DW_TAG_volatile_type:
- case DW_TAG_typedef:
- return true;
default:
return false;
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \
+ case DW_TAG_##NAME: \
+ return (KIND == DW_KIND_TYPE);
+#include "llvm/BinaryFormat/Dwarf.def"
}
}
@@ -129,9 +117,10 @@ enum LocationAtom {
#include "llvm/BinaryFormat/Dwarf.def"
DW_OP_lo_user = 0xe0,
DW_OP_hi_user = 0xff,
- DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata.
- DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata.
- DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_entry_value = 0x1003, ///< Only used in LLVM metadata.
};
enum TypeKind : uint8_t {
@@ -192,6 +181,59 @@ enum SourceLanguage {
DW_LANG_hi_user = 0xffff
};
+inline bool isCPlusPlus(SourceLanguage S) {
+ // Deliberately enumerate all the language options so we get a warning when
+ // new language options are added (-Wswitch) that'll hopefully help keep this
+ // switch up-to-date when new C++ versions are added.
+ switch (S) {
+ case DW_LANG_C_plus_plus:
+ case DW_LANG_C_plus_plus_03:
+ case DW_LANG_C_plus_plus_11:
+ case DW_LANG_C_plus_plus_14:
+ return true;
+ case DW_LANG_C89:
+ case DW_LANG_C:
+ case DW_LANG_Ada83:
+ case DW_LANG_Cobol74:
+ case DW_LANG_Cobol85:
+ case DW_LANG_Fortran77:
+ case DW_LANG_Fortran90:
+ case DW_LANG_Pascal83:
+ case DW_LANG_Modula2:
+ case DW_LANG_Java:
+ case DW_LANG_C99:
+ case DW_LANG_Ada95:
+ case DW_LANG_Fortran95:
+ case DW_LANG_PLI:
+ case DW_LANG_ObjC:
+ case DW_LANG_ObjC_plus_plus:
+ case DW_LANG_UPC:
+ case DW_LANG_D:
+ case DW_LANG_Python:
+ case DW_LANG_OpenCL:
+ case DW_LANG_Go:
+ case DW_LANG_Modula3:
+ case DW_LANG_Haskell:
+ case DW_LANG_OCaml:
+ case DW_LANG_Rust:
+ case DW_LANG_C11:
+ case DW_LANG_Swift:
+ case DW_LANG_Julia:
+ case DW_LANG_Dylan:
+ case DW_LANG_Fortran03:
+ case DW_LANG_Fortran08:
+ case DW_LANG_RenderScript:
+ case DW_LANG_BLISS:
+ case DW_LANG_Mips_Assembler:
+ case DW_LANG_GOOGLE_RenderScript:
+ case DW_LANG_BORLAND_Delphi:
+ case DW_LANG_lo_user:
+ case DW_LANG_hi_user:
+ return false;
+ }
+ llvm_unreachable("Invalid source language");
+}
+
enum CaseSensitivity {
// Identifier case codes
DW_ID_case_sensitive = 0x00,
@@ -267,11 +309,17 @@ enum MacroEntryType {
};
/// DWARF v5 range list entry encoding values.
-enum RangeListEntries {
+enum RnglistEntries {
#define HANDLE_DW_RLE(ID, NAME) DW_RLE_##NAME = ID,
#include "llvm/BinaryFormat/Dwarf.def"
};
+/// DWARF v5 loc list entry encoding values.
+enum LoclistEntries {
+#define HANDLE_DW_LLE(ID, NAME) DW_LLE_##NAME = ID,
+#include "llvm/BinaryFormat/Dwarf.def"
+};
+
/// Call frame instruction encodings.
enum CallFrameInfo {
#define HANDLE_DW_CFA(ID, NAME) DW_CFA_##NAME = ID,
@@ -307,19 +355,6 @@ enum Constants {
DW_EH_PE_indirect = 0x80
};
-/// Constants for location lists in DWARF v5.
-enum LocationListEntry : unsigned char {
- DW_LLE_end_of_list = 0x00,
- DW_LLE_base_addressx = 0x01,
- DW_LLE_startx_endx = 0x02,
- DW_LLE_startx_length = 0x03,
- DW_LLE_offset_pair = 0x04,
- DW_LLE_default_location = 0x05,
- DW_LLE_base_address = 0x06,
- DW_LLE_start_end = 0x07,
- DW_LLE_start_length = 0x08
-};
-
/// Constants for the DW_APPLE_PROPERTY_attributes attribute.
/// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind!
enum ApplePropertyAttributes {
@@ -434,6 +469,7 @@ StringRef LNStandardString(unsigned Standard);
StringRef LNExtendedString(unsigned Encoding);
StringRef MacinfoString(unsigned Encoding);
StringRef RangeListEncodingString(unsigned Encoding);
+StringRef LocListEncodingString(unsigned Encoding);
StringRef CallFrameString(unsigned Encoding, Triple::ArchType Arch);
StringRef ApplePropertyString(unsigned);
StringRef UnitTypeString(unsigned);
@@ -525,6 +561,17 @@ struct FormParams {
explicit operator bool() const { return Version && AddrSize; }
};
+/// Get the byte size of the unit length field depending on the DWARF format.
+inline uint8_t getUnitLengthFieldByteSize(DwarfFormat Format) {
+ switch (Format) {
+ case DwarfFormat::DWARF32:
+ return 4;
+ case DwarfFormat::DWARF64:
+ return 12;
+ }
+ llvm_unreachable("Invalid Format value");
+}
+
/// Get the fixed byte size for a given form.
///
/// If the form has a fixed byte size, then an Optional with a value will be
diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h
index 2bd711137845..46edfb6260be 100644
--- a/include/llvm/BinaryFormat/ELF.h
+++ b/include/llvm/BinaryFormat/ELF.h
@@ -1356,6 +1356,72 @@ enum : unsigned {
NT_GNU_BUILD_ATTRIBUTE_FUNC = 0x101,
};
+// Core note types
+enum : unsigned {
+ NT_PRSTATUS = 1,
+ NT_FPREGSET = 2,
+ NT_PRPSINFO = 3,
+ NT_TASKSTRUCT = 4,
+ NT_AUXV = 6,
+ NT_PSTATUS = 10,
+ NT_FPREGS = 12,
+ NT_PSINFO = 13,
+ NT_LWPSTATUS = 16,
+ NT_LWPSINFO = 17,
+ NT_WIN32PSTATUS = 18,
+
+ NT_PPC_VMX = 0x100,
+ NT_PPC_VSX = 0x102,
+ NT_PPC_TAR = 0x103,
+ NT_PPC_PPR = 0x104,
+ NT_PPC_DSCR = 0x105,
+ NT_PPC_EBB = 0x106,
+ NT_PPC_PMU = 0x107,
+ NT_PPC_TM_CGPR = 0x108,
+ NT_PPC_TM_CFPR = 0x109,
+ NT_PPC_TM_CVMX = 0x10a,
+ NT_PPC_TM_CVSX = 0x10b,
+ NT_PPC_TM_SPR = 0x10c,
+ NT_PPC_TM_CTAR = 0x10d,
+ NT_PPC_TM_CPPR = 0x10e,
+ NT_PPC_TM_CDSCR = 0x10f,
+
+ NT_386_TLS = 0x200,
+ NT_386_IOPERM = 0x201,
+ NT_X86_XSTATE = 0x202,
+
+ NT_S390_HIGH_GPRS = 0x300,
+ NT_S390_TIMER = 0x301,
+ NT_S390_TODCMP = 0x302,
+ NT_S390_TODPREG = 0x303,
+ NT_S390_CTRS = 0x304,
+ NT_S390_PREFIX = 0x305,
+ NT_S390_LAST_BREAK = 0x306,
+ NT_S390_SYSTEM_CALL = 0x307,
+ NT_S390_TDB = 0x308,
+ NT_S390_VXRS_LOW = 0x309,
+ NT_S390_VXRS_HIGH = 0x30a,
+ NT_S390_GS_CB = 0x30b,
+ NT_S390_GS_BC = 0x30c,
+
+ NT_ARM_VFP = 0x400,
+ NT_ARM_TLS = 0x401,
+ NT_ARM_HW_BREAK = 0x402,
+ NT_ARM_HW_WATCH = 0x403,
+ NT_ARM_SVE = 0x405,
+ NT_ARM_PAC_MASK = 0x406,
+
+ NT_FILE = 0x46494c45,
+ NT_PRXFPREG = 0x46e62b7f,
+ NT_SIGINFO = 0x53494749,
+};
+
+// LLVM-specific notes.
+enum {
+ NT_LLVM_HWASAN_GLOBALS = 3,
+};
+
+// GNU note types
enum {
NT_GNU_ABI_TAG = 1,
NT_GNU_HWCAP = 2,
diff --git a/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
index 4afcd7d1f093..c8364133e31f 100644
--- a/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
+++ b/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
@@ -124,8 +124,11 @@ ELF_RELOC(R_AARCH64_COPY, 0x400)
ELF_RELOC(R_AARCH64_GLOB_DAT, 0x401)
ELF_RELOC(R_AARCH64_JUMP_SLOT, 0x402)
ELF_RELOC(R_AARCH64_RELATIVE, 0x403)
-ELF_RELOC(R_AARCH64_TLS_DTPREL64, 0x404)
-ELF_RELOC(R_AARCH64_TLS_DTPMOD64, 0x405)
+// 0x404 and 0x405 are now R_AARCH64_TLS_IMPDEF1 and R_AARCH64_TLS_IMPDEF2
+// We follow GNU and define TLS_IMPDEF1 as TLS_DTPMOD64 and TLS_IMPDEF2 as
+// TLS_DTPREL64
+ELF_RELOC(R_AARCH64_TLS_DTPMOD64, 0x404)
+ELF_RELOC(R_AARCH64_TLS_DTPREL64, 0x405)
ELF_RELOC(R_AARCH64_TLS_TPREL64, 0x406)
ELF_RELOC(R_AARCH64_TLSDESC, 0x407)
ELF_RELOC(R_AARCH64_IRELATIVE, 0x408)
diff --git a/include/llvm/BinaryFormat/MachO.h b/include/llvm/BinaryFormat/MachO.h
index a01393a3b303..fb50e549cb9d 100644
--- a/include/llvm/BinaryFormat/MachO.h
+++ b/include/llvm/BinaryFormat/MachO.h
@@ -581,6 +581,11 @@ struct section_64 {
uint32_t reserved3;
};
+inline bool isVirtualSection(uint8_t type) {
+ return (type == MachO::S_ZEROFILL || type == MachO::S_GB_ZEROFILL ||
+ type == MachO::S_THREAD_LOCAL_ZEROFILL);
+}
+
struct fvmlib {
uint32_t name;
uint32_t minor_version;
diff --git a/include/llvm/BinaryFormat/Magic.h b/include/llvm/BinaryFormat/Magic.h
index cd9833ec4d22..64c687262f4a 100644
--- a/include/llvm/BinaryFormat/Magic.h
+++ b/include/llvm/BinaryFormat/Magic.h
@@ -49,6 +49,7 @@ struct file_magic {
xcoff_object_64, ///< 64-bit XCOFF object file
wasm_object, ///< WebAssembly Object file
pdb, ///< Windows PDB debug info file
+ tapi_file, ///< Text-based Dynamic Library Stub file
};
bool is_object() const { return V != unknown; }
diff --git a/include/llvm/BinaryFormat/Minidump.h b/include/llvm/BinaryFormat/Minidump.h
index 65c17d1eb00c..89cd779951cf 100644
--- a/include/llvm/BinaryFormat/Minidump.h
+++ b/include/llvm/BinaryFormat/Minidump.h
@@ -18,12 +18,15 @@
#ifndef LLVM_BINARYFORMAT_MINIDUMP_H
#define LLVM_BINARYFORMAT_MINIDUMP_H
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Support/Endian.h"
namespace llvm {
namespace minidump {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
/// The minidump header is the first part of a minidump file. It identifies the
/// file as a minidump file, and gives the location of the stream directory.
struct Header {
@@ -67,6 +70,50 @@ struct MemoryDescriptor {
};
static_assert(sizeof(MemoryDescriptor) == 16, "");
+struct MemoryInfoListHeader {
+ support::ulittle32_t SizeOfHeader;
+ support::ulittle32_t SizeOfEntry;
+ support::ulittle64_t NumberOfEntries;
+
+ MemoryInfoListHeader() = default;
+ MemoryInfoListHeader(uint32_t SizeOfHeader, uint32_t SizeOfEntry,
+ uint64_t NumberOfEntries)
+ : SizeOfHeader(SizeOfHeader), SizeOfEntry(SizeOfEntry),
+ NumberOfEntries(NumberOfEntries) {}
+};
+static_assert(sizeof(MemoryInfoListHeader) == 16, "");
+
+enum class MemoryProtection : uint32_t {
+#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) NAME = CODE,
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu),
+};
+
+enum class MemoryState : uint32_t {
+#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) NAME = CODE,
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu),
+};
+
+enum class MemoryType : uint32_t {
+#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) NAME = CODE,
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu),
+};
+
+struct MemoryInfo {
+ support::ulittle64_t BaseAddress;
+ support::ulittle64_t AllocationBase;
+ support::little_t<MemoryProtection> AllocationProtect;
+ support::ulittle32_t Reserved0;
+ support::ulittle64_t RegionSize;
+ support::little_t<MemoryState> State;
+ support::little_t<MemoryProtection> Protect;
+ support::little_t<MemoryType> Type;
+ support::ulittle32_t Reserved1;
+};
+static_assert(sizeof(MemoryInfo) == 48, "");
+
/// Specifies the location and type of a single stream in the minidump file. The
/// minidump stream directory is an array of entries of this type, with its size
/// given by Header.NumberOfStreams.
@@ -180,6 +227,27 @@ struct Thread {
};
static_assert(sizeof(Thread) == 48, "");
+struct Exception {
+ static constexpr size_t MaxParameters = 15;
+
+ support::ulittle32_t ExceptionCode;
+ support::ulittle32_t ExceptionFlags;
+ support::ulittle64_t ExceptionRecord;
+ support::ulittle64_t ExceptionAddress;
+ support::ulittle32_t NumberParameters;
+ support::ulittle32_t UnusedAlignment;
+ support::ulittle64_t ExceptionInformation[MaxParameters];
+};
+static_assert(sizeof(Exception) == 152, "");
+
+struct ExceptionStream {
+ support::ulittle32_t ThreadId;
+ support::ulittle32_t UnusedAlignment;
+ Exception ExceptionRecord;
+ LocationDescriptor ThreadContext;
+};
+static_assert(sizeof(ExceptionStream) == 168, "");
+
} // namespace minidump
template <> struct DenseMapInfo<minidump::StreamType> {
diff --git a/include/llvm/BinaryFormat/MinidumpConstants.def b/include/llvm/BinaryFormat/MinidumpConstants.def
index d4f13dd99217..aeef399af7a4 100644
--- a/include/llvm/BinaryFormat/MinidumpConstants.def
+++ b/include/llvm/BinaryFormat/MinidumpConstants.def
@@ -6,8 +6,9 @@
//
//===----------------------------------------------------------------------===//
-#if !(defined HANDLE_MDMP_STREAM_TYPE || defined HANDLE_MDMP_ARCH || \
- defined HANDLE_MDMP_PLATFORM)
+#if !(defined(HANDLE_MDMP_STREAM_TYPE) || defined(HANDLE_MDMP_ARCH) || \
+ defined(HANDLE_MDMP_PLATFORM) || defined(HANDLE_MDMP_PROTECT) || \
+ defined(HANDLE_MDMP_MEMSTATE) || defined(HANDLE_MDMP_MEMTYPE))
#error "Missing HANDLE_MDMP definition"
#endif
@@ -23,6 +24,18 @@
#define HANDLE_MDMP_PLATFORM(CODE, NAME)
#endif
+#ifndef HANDLE_MDMP_PROTECT
+#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME)
+#endif
+
+#ifndef HANDLE_MDMP_MEMSTATE
+#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME)
+#endif
+
+#ifndef HANDLE_MDMP_MEMTYPE
+#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME)
+#endif
+
HANDLE_MDMP_STREAM_TYPE(0x0003, ThreadList)
HANDLE_MDMP_STREAM_TYPE(0x0004, ModuleList)
HANDLE_MDMP_STREAM_TYPE(0x0005, MemoryList)
@@ -102,6 +115,30 @@ HANDLE_MDMP_PLATFORM(0x8203, Android) // Android
HANDLE_MDMP_PLATFORM(0x8204, PS3) // PS3
HANDLE_MDMP_PLATFORM(0x8205, NaCl) // Native Client (NaCl)
+HANDLE_MDMP_PROTECT(0x01, NoAccess, PAGE_NO_ACCESS)
+HANDLE_MDMP_PROTECT(0x02, ReadOnly, PAGE_READ_ONLY)
+HANDLE_MDMP_PROTECT(0x04, ReadWrite, PAGE_READ_WRITE)
+HANDLE_MDMP_PROTECT(0x08, WriteCopy, PAGE_WRITE_COPY)
+HANDLE_MDMP_PROTECT(0x10, Execute, PAGE_EXECUTE)
+HANDLE_MDMP_PROTECT(0x20, ExecuteRead, PAGE_EXECUTE_READ)
+HANDLE_MDMP_PROTECT(0x40, ExecuteReadWrite, PAGE_EXECUTE_READ_WRITE)
+HANDLE_MDMP_PROTECT(0x80, ExeciteWriteCopy, PAGE_EXECUTE_WRITE_COPY)
+HANDLE_MDMP_PROTECT(0x100, Guard, PAGE_GUARD)
+HANDLE_MDMP_PROTECT(0x200, NoCache, PAGE_NOCACHE)
+HANDLE_MDMP_PROTECT(0x400, WriteCombine, PAGE_WRITECOMBINE)
+HANDLE_MDMP_PROTECT(0x40000000, TargetsInvalid, PAGE_TARGETS_INVALID)
+
+HANDLE_MDMP_MEMSTATE(0x01000, Commit, MEM_COMMIT)
+HANDLE_MDMP_MEMSTATE(0x02000, Reserve, MEM_RESERVE)
+HANDLE_MDMP_MEMSTATE(0x10000, Free, MEM_FREE)
+
+HANDLE_MDMP_MEMTYPE(0x0020000, Private, MEM_PRIVATE)
+HANDLE_MDMP_MEMTYPE(0x0040000, Mapped, MEM_MAPPED)
+HANDLE_MDMP_MEMTYPE(0x1000000, Image, MEM_IMAGE)
+
#undef HANDLE_MDMP_STREAM_TYPE
#undef HANDLE_MDMP_ARCH
#undef HANDLE_MDMP_PLATFORM
+#undef HANDLE_MDMP_PROTECT
+#undef HANDLE_MDMP_MEMSTATE
+#undef HANDLE_MDMP_MEMTYPE
diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h
index 0f22bfe610c6..f550d880f68a 100644
--- a/include/llvm/BinaryFormat/Wasm.h
+++ b/include/llvm/BinaryFormat/Wasm.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
namespace llvm {
namespace wasm {
@@ -251,9 +252,21 @@ enum : unsigned {
WASM_OPCODE_F32_CONST = 0x43,
WASM_OPCODE_F64_CONST = 0x44,
WASM_OPCODE_I32_ADD = 0x6a,
+};
+
+// Opcodes used in synthetic functions.
+enum : unsigned {
+ WASM_OPCODE_IF = 0x04,
+ WASM_OPCODE_ELSE = 0x05,
+ WASM_OPCODE_DROP = 0x1a,
WASM_OPCODE_MISC_PREFIX = 0xfc,
WASM_OPCODE_MEMORY_INIT = 0x08,
WASM_OPCODE_DATA_DROP = 0x09,
+ WASM_OPCODE_ATOMICS_PREFIX = 0xfe,
+ WASM_OPCODE_ATOMIC_NOTIFY = 0x00,
+ WASM_OPCODE_I32_ATOMIC_WAIT = 0x01,
+ WASM_OPCODE_I32_ATOMIC_STORE = 0x17,
+ WASM_OPCODE_I32_RMW_CMPXCHG = 0x48,
};
enum : unsigned {
@@ -318,6 +331,7 @@ const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
const unsigned WASM_SYMBOL_EXPORTED = 0x20;
const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
+const unsigned WASM_SYMBOL_NO_STRIP = 0x80;
#define WASM_RELOC(name, value) name = value,
diff --git a/include/llvm/BinaryFormat/XCOFF.h b/include/llvm/BinaryFormat/XCOFF.h
index 7774ab3ed24a..20a0f446272f 100644
--- a/include/llvm/BinaryFormat/XCOFF.h
+++ b/include/llvm/BinaryFormat/XCOFF.h
@@ -19,12 +19,13 @@ namespace llvm {
namespace XCOFF {
// Constants used in the XCOFF definition.
-enum { SectionNameSize = 8, SymbolNameSize = 8 };
+enum { FileNamePadSize = 6, NameSize = 8, SymbolTableEntrySize = 18 };
+
enum ReservedSectionNum { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 };
// x_smclas field of x_csect from system header: /usr/include/syms.h
/// Storage Mapping Class definitions.
-enum StorageMappingClass {
+enum StorageMappingClass : uint8_t {
// READ ONLY CLASSES
XMC_PR = 0, ///< Program Code
XMC_RO = 1, ///< Read Only Constant
@@ -139,6 +140,117 @@ enum StorageClass : uint8_t {
C_TCSYM = 134 // Reserved
};
+enum SymbolType {
+ XTY_ER = 0, ///< External reference.
+ XTY_SD = 1, ///< Csect definition for initialized storage.
+ XTY_LD = 2, ///< Label definition.
+ ///< Defines an entry point to an initialized csect.
+ XTY_CM = 3 ///< Common csect definition. For uninitialized storage.
+};
+
+// Relocation types, defined in `/usr/include/reloc.h`.
+enum RelocationType : uint8_t {
+ R_POS = 0x00, ///< Positive relocation. Provides the address of the referenced
+ ///< symbol.
+ R_RL = 0x0c, ///< Positive indirect load relocation. Modifiable instruction.
+ R_RLA = 0x0d, ///< Positive load address relocation. Modifiable instruction.
+
+ R_NEG = 0x01, ///< Negative relocation. Provides the negative of the address
+ ///< of the referenced symbol.
+ R_REL = 0x02, ///< Relative to self relocation. Provides a displacement value
+ ///< between the address of the referenced symbol and the
+ ///< address being relocated.
+
+ R_TOC = 0x03, ///< Relative to the TOC relocation. Provides a displacement
+ ///< that is the difference between the address of the
+ ///< referenced symbol and the TOC anchor csect.
+ R_TRL = 0x12, ///< TOC relative indirect load relocation. Similar to R_TOC,
+ ///< but not modifiable instruction.
+
+ R_TRLA =
+ 0x13, ///< Relative to the TOC or to the thread-local storage base
+ ///< relocation. Compilers are not permitted to generate this
+ ///< relocation type. It is the result of a reversible
+ ///< transformation by the linker of an R_TOC relation that turned a
+ ///< load instruction into an add-immediate instruction.
+
+ R_GL = 0x05, ///< Global linkage-external TOC address relocation. Provides the
+ ///< address of the external TOC associated with a defined
+ ///< external symbol.
+ R_TCL = 0x06, ///< Local object TOC address relocation. Provides the address
+ ///< of the local TOC entry of a defined external symbol.
+
+ R_REF = 0x0f, ///< A non-relocating relocation. Used to prevent the binder
+ ///< from garbage collecting a csect (such as code used for
+ ///< dynamic initialization of non-local statics) for which
+ ///< another csect has an implicit dependency.
+
+ R_BA = 0x08, ///< Branch absolute relocation. Provides the address of the
+ ///< referenced symbol. References a non-modifiable instruction.
+ R_BR = 0x0a, ///< Branch relative to self relocation. Provides the
+ ///< displacement that is the difference between the address of
+ ///< the referenced symbol and the address of the referenced
+ ///< branch instruction. References a non-modifiable instruction.
+ R_RBA = 0x18, ///< Branch absolute relocation. Similar to R_BA but
+ ///< references a modifiable instruction.
+ R_RBR = 0x1a, ///< Branch relative to self relocation. Similar to the R_BR
+ ///< relocation type, but references a modifiable instruction.
+
+ R_TLS = 0x20, ///< General-dynamic reference to TLS symbol.
+ R_TLS_IE = 0x21, ///< Initial-exec reference to TLS symbol.
+ R_TLS_LD = 0x22, ///< Local-dynamic reference to TLS symbol.
+ R_TLS_LE = 0x23, ///< Local-exec reference to TLS symbol.
+ R_TLSM = 0x24, ///< Module reference to TLS. Provides a handle for the module
+ ///< containing the referenced symbol.
+ R_TLSML = 0x25, ///< Module reference to the local TLS storage.
+
+ R_TOCU = 0x30, ///< Relative to TOC upper. Specifies the high-order 16 bits of
+ ///< a large code model TOC-relative relocation.
+ R_TOCL = 0x31 ///< Relative to TOC lower. Specifies the low-order 16 bits of a
+ ///< large code model TOC-relative relocation.
+};
+
+struct FileHeader32 {
+ uint16_t Magic;
+ uint16_t NumberOfSections;
+ int32_t TimeStamp;
+ uint32_t SymbolTableFileOffset;
+ int32_t NumberOfSymbolTableEntries;
+ uint16_t AuxiliaryHeaderSize;
+ uint16_t Flags;
+};
+
+struct SectionHeader32 {
+ char Name[XCOFF::NameSize];
+ uint32_t PhysicalAddress;
+ uint32_t VirtualAddress;
+ uint32_t Size;
+ uint32_t FileOffsetToData;
+ uint32_t FileOffsetToRelocations;
+ uint32_t FileOffsetToLineNumbers;
+ uint16_t NumberOfRelocations;
+ uint16_t NumberOfLineNumbers;
+ int32_t Flags;
+};
+
+enum CFileStringType : uint8_t {
+ XFT_FN = 0, ///< Specifies the source-file name.
+ XFT_CT = 1, ///< Specifies the compiler time stamp.
+ XFT_CV = 2, ///< Specifies the compiler version number.
+ XFT_CD = 128 ///< Specifies compiler-defined information.
+};
+
+enum CFileLangId : uint8_t {
+ TB_C = 0, ///< C language.
+ TB_CPLUSPLUS = 9 ///< C++ language.
+};
+
+enum CFileCpuId : uint8_t {
+ TCPU_PPC64 = 2, ///< PowerPC common architecture 64-bit mode.
+ TCPU_COM = 3, ///< POWER and PowerPC architecture common.
+ TCPU_970 = 19 ///< PPC970 - PowerPC 64-bit architecture.
+};
+
} // end namespace XCOFF
} // end namespace llvm
diff --git a/include/llvm/Bitcode/BitcodeAnalyzer.h b/include/llvm/Bitcode/BitcodeAnalyzer.h
index cfdebd6fe6cb..5fb8bb26f255 100644
--- a/include/llvm/Bitcode/BitcodeAnalyzer.h
+++ b/include/llvm/Bitcode/BitcodeAnalyzer.h
@@ -30,6 +30,7 @@ enum CurStreamTypeType {
LLVMIRBitstream,
ClangSerializedASTBitstream,
ClangSerializedDiagnosticsBitstream,
+ LLVMBitstreamRemarks
};
struct BCDumpOptions {
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index decd4dd3a965..1a397068caf0 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -391,7 +391,7 @@ enum CastOpcodes {
/// have no fixed relation to the LLVM IR enum values. Changing these will
/// break compatibility with old files.
enum UnaryOpcodes {
- UNOP_NEG = 0
+ UNOP_FNEG = 0
};
/// BinaryOpcodes - These are values used in the bitcode files to encode which
diff --git a/include/llvm/Bitstream/BitCodes.h b/include/llvm/Bitstream/BitCodes.h
index adf54ba96396..41a3de3b20ef 100644
--- a/include/llvm/Bitstream/BitCodes.h
+++ b/include/llvm/Bitstream/BitCodes.h
@@ -168,6 +168,11 @@ class BitCodeAbbrev {
SmallVector<BitCodeAbbrevOp, 32> OperandList;
public:
+ BitCodeAbbrev() = default;
+
+ explicit BitCodeAbbrev(std::initializer_list<BitCodeAbbrevOp> OperandList)
+ : OperandList(OperandList) {}
+
unsigned getNumOperandInfos() const {
return static_cast<unsigned>(OperandList.size());
}
diff --git a/include/llvm/Bitstream/BitstreamReader.h b/include/llvm/Bitstream/BitstreamReader.h
index ee82e7ec1ba2..b49a969a2d8b 100644
--- a/include/llvm/Bitstream/BitstreamReader.h
+++ b/include/llvm/Bitstream/BitstreamReader.h
@@ -379,6 +379,7 @@ public:
using SimpleBitstreamCursor::ReadVBR;
using SimpleBitstreamCursor::ReadVBR64;
using SimpleBitstreamCursor::SizeInBytes;
+ using SimpleBitstreamCursor::skipToEnd;
/// Return the number of bits used to encode an abbrev #.
unsigned getAbbrevIDWidth() const { return CurCodeSize; }
diff --git a/include/llvm/CodeGen/AccelTable.h b/include/llvm/CodeGen/AccelTable.h
index 734531a65d50..f8f6b5448f3f 100644
--- a/include/llvm/CodeGen/AccelTable.h
+++ b/include/llvm/CodeGen/AccelTable.h
@@ -101,8 +101,6 @@
///
/// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable
/// function.
-///
-/// TODO: Add DWARF v5 emission code.
namespace llvm {
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index d110f8b01cb5..a4580da5aec9 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -111,6 +111,10 @@ public:
/// of each call to runOnMachineFunction().
MCSymbol *CurrentFnSym = nullptr;
+ /// The symbol for the current function descriptor on AIX. This is created
+ /// at the beginning of each call to SetupMachineFunction().
+ MCSymbol *CurrentFnDescSym = nullptr;
+
/// The symbol used to represent the start of the current function for the
/// purpose of calculating its size (e.g. using the .size directive). By
/// default, this is equal to CurrentFnSym.
@@ -304,7 +308,7 @@ public:
/// This should be called when a new MachineFunction is being processed from
/// runOnMachineFunction.
- void SetupMachineFunction(MachineFunction &MF);
+ virtual void SetupMachineFunction(MachineFunction &MF);
/// This method emits the body and trailer for a function.
void EmitFunctionBody();
@@ -342,12 +346,11 @@ public:
/// so, emit it and return true, otherwise do nothing and return false.
bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
- /// Emit an alignment directive to the specified power of two boundary. For
- /// example, if you pass in 3 here, you will get an 8 byte alignment. If a
+ /// Emit an alignment directive to the specified power of two boundary. If a
/// global value is specified, and if that global has an explicit alignment
/// requested, it will override the alignment request if required for
/// correctness.
- void EmitAlignment(unsigned NumBits, const GlobalObject *GV = nullptr) const;
+ void EmitAlignment(Align Alignment, const GlobalObject *GV = nullptr) const;
/// Lower the specified LLVM Constant to an MCExpr.
virtual const MCExpr *lowerConstant(const Constant *CV);
@@ -400,7 +403,7 @@ public:
/// By default, this method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing it
/// if appropriate.
- virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const;
+ virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB);
/// Targets can override this to emit stuff at the end of a basic block.
virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB);
@@ -415,6 +418,10 @@ public:
virtual void EmitFunctionEntryLabel();
+ virtual void EmitFunctionDescriptor() {
+ llvm_unreachable("Function descriptor is target-specific.");
+ }
+
virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
/// Targets can override this to change how global constants that are part of
@@ -635,6 +642,10 @@ public:
/// supported by the target.
void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const;
+ /// Return the alignment for the specified \p GV.
+ static Align getGVAlignment(const GlobalValue *GV, const DataLayout &DL,
+ Align InAlign = Align::None());
+
private:
/// Private state for PrintSpecial()
// Assign a unique ID to this machine instruction.
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index 70bf670fdf0b..2e57b4c9d332 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -190,6 +190,7 @@ private:
protected:
explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
: BaseT(DL) {}
+ virtual ~BasicTTIImplBase() = default;
using TargetTransformInfoImplBase::DL;
@@ -215,6 +216,16 @@ public:
return -1;
}
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const {
+ return false;
+ }
+
+ bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+ Value *OldV, Value *NewV) const {
+ return false;
+ }
+
bool isLegalAddImmediate(int64_t imm) {
return getTLI()->isLegalAddImmediate(imm);
}
@@ -317,7 +328,7 @@ public:
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JumpTableSize) {
/// Try to find the estimated number of clusters. Note that the number of
- /// clusters identified in this function could be different from the actural
+ /// clusters identified in this function could be different from the actual
/// numbers found in lowering. This function ignore switches that are
/// lowered with a mix of jump table / bit test / BTree. This function was
/// initially intended to be used when estimating the cost of switch in
@@ -371,10 +382,6 @@ public:
return N;
}
- unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
-
- unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
-
bool shouldBuildLookupTables() {
const TargetLoweringBase *TLI = getTLI();
return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
@@ -508,13 +515,44 @@ public:
return BaseT::getInstructionLatency(I);
}
+ virtual Optional<unsigned>
+ getCacheSize(TargetTransformInfo::CacheLevel Level) const {
+ return Optional<unsigned>(
+ getST()->getCacheSize(static_cast<unsigned>(Level)));
+ }
+
+ virtual Optional<unsigned>
+ getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
+ Optional<unsigned> TargetResult =
+ getST()->getCacheAssociativity(static_cast<unsigned>(Level));
+
+ if (TargetResult)
+ return TargetResult;
+
+ return BaseT::getCacheAssociativity(Level);
+ }
+
+ virtual unsigned getCacheLineSize() const {
+ return getST()->getCacheLineSize();
+ }
+
+ virtual unsigned getPrefetchDistance() const {
+ return getST()->getPrefetchDistance();
+ }
+
+ virtual unsigned getMinPrefetchStride() const {
+ return getST()->getMinPrefetchStride();
+ }
+
+ virtual unsigned getMaxPrefetchIterationsAhead() const {
+ return getST()->getMaxPrefetchIterationsAhead();
+ }
+
/// @}
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
-
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
@@ -1111,9 +1149,7 @@ public:
OpPropsBW);
// For non-rotates (X != Y) we must add shift-by-zero handling costs.
if (X != Y) {
- Type *CondTy = Type::getInt1Ty(RetTy->getContext());
- if (RetVF > 1)
- CondTy = VectorType::get(CondTy, RetVF);
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
CondTy, nullptr);
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
@@ -1131,7 +1167,6 @@ public:
unsigned getIntrinsicInstrCost(
Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
- unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
auto *ConcreteTTI = static_cast<T *>(this);
SmallVector<unsigned, 2> ISDs;
@@ -1288,9 +1323,7 @@ public:
/*IsUnsigned=*/false);
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat: {
- Type *CondTy = Type::getInt1Ty(RetTy->getContext());
- if (RetVF > 1)
- CondTy = VectorType::get(CondTy, RetVF);
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
Type *OpTy = StructType::create({RetTy, CondTy});
Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
@@ -1310,9 +1343,7 @@ public:
}
case Intrinsic::uadd_sat:
case Intrinsic::usub_sat: {
- Type *CondTy = Type::getInt1Ty(RetTy->getContext());
- if (RetVF > 1)
- CondTy = VectorType::get(CondTy, RetVF);
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
Type *OpTy = StructType::create({RetTy, CondTy});
Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
@@ -1329,9 +1360,7 @@ public:
case Intrinsic::smul_fix:
case Intrinsic::umul_fix: {
unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
- Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
- if (RetVF > 1)
- ExtTy = VectorType::get(ExtTy, RetVF);
+ Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
unsigned ExtOp =
IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
@@ -1395,9 +1424,7 @@ public:
Type *MulTy = RetTy->getContainedType(0);
Type *OverflowTy = RetTy->getContainedType(1);
unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
- Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
- if (MulTy->isVectorTy())
- ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
+ Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
unsigned ExtOp =
IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index aa339e1cc913..a30ca638ee6d 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Alignment.h"
namespace llvm {
@@ -43,6 +44,7 @@ public:
AExtUpper, // The value is in the upper bits of the location and should be
// extended with undefined upper bits when retrieved.
BCvt, // The value is bit-converted in the location.
+ Trunc, // The value is truncated in the location.
VExt, // The value is vector-widened in the location.
// FIXME: Not implemented yet. Code that uses AExt to mean
// vector-widen should be fixed to use VExt instead.
@@ -197,7 +199,7 @@ private:
LLVMContext &Context;
unsigned StackOffset;
- unsigned MaxStackArgAlign;
+ Align MaxStackArgAlign;
SmallVector<uint32_t, 16> UsedRegs;
SmallVector<CCValAssign, 4> PendingLocs;
SmallVector<ISD::ArgFlagsTy, 4> PendingArgFlags;
@@ -421,19 +423,19 @@ public:
/// AllocateStack - Allocate a chunk of stack space with the specified size
/// and alignment.
- unsigned AllocateStack(unsigned Size, unsigned Align) {
- assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2.
- StackOffset = alignTo(StackOffset, Align);
+ unsigned AllocateStack(unsigned Size, unsigned Alignment) {
+ const Align CheckedAlignment(Alignment);
+ StackOffset = alignTo(StackOffset, CheckedAlignment);
unsigned Result = StackOffset;
StackOffset += Size;
- MaxStackArgAlign = std::max(Align, MaxStackArgAlign);
- ensureMaxAlignment(Align);
+ MaxStackArgAlign = std::max(CheckedAlignment, MaxStackArgAlign);
+ ensureMaxAlignment(CheckedAlignment);
return Result;
}
- void ensureMaxAlignment(unsigned Align) {
+ void ensureMaxAlignment(Align Alignment) {
if (!AnalyzingMustTailForwardedRegs)
- MF.getFrameInfo().ensureMaxAlignment(Align);
+ MF.getFrameInfo().ensureMaxAlignment(Alignment.value());
}
/// Version of AllocateStack with extra register to be shadowed.
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index cf58ee0cabea..705465b15c4c 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -28,6 +28,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/Support/Automaton.h"
#include <cstdint>
#include <map>
#include <memory>
@@ -76,26 +77,26 @@ using DFAStateInput = int64_t;
class DFAPacketizer {
private:
- using UnsignPair = std::pair<unsigned, DFAInput>;
-
const InstrItineraryData *InstrItins;
- int CurrentState = 0;
- const DFAStateInput (*DFAStateInputTable)[2];
- const unsigned *DFAStateEntryTable;
-
- // CachedTable is a map from <FromState, Input> to ToState.
- DenseMap<UnsignPair, unsigned> CachedTable;
-
- // Read the DFA transition table and update CachedTable.
- void ReadTable(unsigned state);
+ Automaton<DFAInput> A;
public:
- DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2],
- const unsigned *SET);
+ DFAPacketizer(const InstrItineraryData *InstrItins, Automaton<uint64_t> a) :
+ InstrItins(InstrItins), A(std::move(a)) {
+ // Start off with resource tracking disabled.
+ A.enableTranscription(false);
+ }
// Reset the current state to make all resources available.
void clearResources() {
- CurrentState = 0;
+ A.reset();
+ }
+
+ // Set whether this packetizer should track not just whether instructions
+ // can be packetized, but also which functional units each instruction ends up
+ // using after packetization.
+ void setTrackResources(bool Track) {
+ A.enableTranscription(Track);
}
// Return the DFAInput for an instruction class.
@@ -120,6 +121,15 @@ public:
// current state to reflect that change.
void reserveResources(MachineInstr &MI);
+ // Return the resources used by the InstIdx'th instruction added to this
+ // packet. The resources are returned as a bitvector of functional units.
+ //
+ // Note that a bundle may be packed in multiple valid ways. This function
+ // returns one arbitary valid packing.
+ //
+ // Requires setTrackResources(true) to have been called.
+ unsigned getUsedResources(unsigned InstIdx);
+
const InstrItineraryData *getInstrItins() const { return InstrItins; }
};
@@ -134,7 +144,7 @@ class VLIWPacketizerList {
protected:
MachineFunction &MF;
const TargetInstrInfo *TII;
- AliasAnalysis *AA;
+ AAResults *AA;
// The VLIW Scheduler.
DefaultVLIWScheduler *VLIWScheduler;
@@ -146,9 +156,9 @@ protected:
std::map<MachineInstr*, SUnit*> MIToSUnit;
public:
- // The AliasAnalysis parameter can be nullptr.
+ // The AAResults parameter can be nullptr.
VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- AliasAnalysis *AA);
+ AAResults *AA);
virtual ~VLIWPacketizerList();
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index 684f9e40ca5a..e8e7504a6cda 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -550,6 +550,14 @@ public:
return *static_cast<T *>(Last ? Last->Next.getPointer() : nullptr);
}
+ void takeNodes(IntrusiveBackList<T> &Other) {
+ for (auto &N : Other) {
+ N.Next.setPointerAndInt(&N, true);
+ push_back(N);
+ }
+ Other.Last = nullptr;
+ }
+
class const_iterator;
class iterator
: public iterator_facade_base<iterator, std::forward_iterator_tag, T> {
@@ -685,6 +693,10 @@ public:
return addValue(Alloc, DIEValue(Attribute, Form, std::forward<T>(Value)));
}
+ /// Take ownership of the nodes in \p Other, and append them to the back of
+ /// the list.
+ void takeValues(DIEValueList &Other) { List.takeNodes(Other.List); }
+
value_range values() {
return make_range(value_iterator(List.begin()), value_iterator(List.end()));
}
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index f09b59daf4dd..03d681feb7aa 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -93,9 +93,9 @@ public:
SmallVector<Value *, 16> OutVals;
SmallVector<ISD::ArgFlagsTy, 16> OutFlags;
- SmallVector<unsigned, 16> OutRegs;
+ SmallVector<Register, 16> OutRegs;
SmallVector<ISD::InputArg, 4> Ins;
- SmallVector<unsigned, 4> InRegs;
+ SmallVector<Register, 4> InRegs;
CallLoweringInfo()
: RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index fb60191abd3a..f812a2f6c585 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -20,7 +20,6 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -37,6 +36,7 @@ namespace llvm {
class Argument;
class BasicBlock;
class BranchProbabilityInfo;
+class LegacyDivergenceAnalysis;
class Function;
class Instruction;
class MachineFunction;
diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h
index d717121ad78e..4901a3748e4a 100644
--- a/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -45,18 +45,62 @@ class CallLowering {
public:
struct ArgInfo {
SmallVector<Register, 4> Regs;
+ // If the argument had to be split into multiple parts according to the
+ // target calling convention, then this contains the original vregs
+ // if the argument was an incoming arg.
+ SmallVector<Register, 2> OrigRegs;
Type *Ty;
- ISD::ArgFlagsTy Flags;
+ SmallVector<ISD::ArgFlagsTy, 4> Flags;
bool IsFixed;
ArgInfo(ArrayRef<Register> Regs, Type *Ty,
- ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true)
- : Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags),
- IsFixed(IsFixed) {
+ ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
+ bool IsFixed = true)
+ : Regs(Regs.begin(), Regs.end()), Ty(Ty),
+ Flags(Flags.begin(), Flags.end()), IsFixed(IsFixed) {
+ if (!Regs.empty() && Flags.empty())
+ this->Flags.push_back(ISD::ArgFlagsTy());
// FIXME: We should have just one way of saying "no register".
assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) &&
"only void types should have no register");
}
+
+ ArgInfo() : Ty(nullptr), IsFixed(false) {}
+ };
+
+ struct CallLoweringInfo {
+ /// Calling convention to be used for the call.
+ CallingConv::ID CallConv = CallingConv::C;
+
+ /// Destination of the call. It should be either a register, globaladdress,
+ /// or externalsymbol.
+ MachineOperand Callee = MachineOperand::CreateImm(0);
+
+ /// Descriptor for the return type of the function.
+ ArgInfo OrigRet;
+
+ /// List of descriptors of the arguments passed to the function.
+ SmallVector<ArgInfo, 8> OrigArgs;
+
+ /// Valid if the call has a swifterror inout parameter, and contains the
+ /// vreg that the swifterror should be copied into after the call.
+ Register SwiftErrorVReg = 0;
+
+ MDNode *KnownCallees = nullptr;
+
+ /// True if the call must be tail call optimized.
+ bool IsMustTailCall = false;
+
+ /// True if the call passes all target-independent checks for tail call
+ /// optimization.
+ bool IsTailCall = false;
+
+ /// True if the call was lowered as a tail call. This is consumed by the
+ /// legalizer. This allows the legalizer to lower libcalls as tail calls.
+ bool LoweredTailCall = false;
+
+ /// True if the call is to a vararg function.
+ bool IsVarArg = false;
};
/// Argument handling is mostly uniform between the four places that
@@ -72,9 +116,9 @@ public:
virtual ~ValueHandler() = default;
- /// Returns true if the handler is dealing with formal arguments,
- /// not with return values etc.
- virtual bool isArgumentHandler() const { return false; }
+ /// Returns true if the handler is dealing with incoming arguments,
+ /// i.e. those that move values from some physical location to vregs.
+ virtual bool isIncomingArgumentHandler() const = 0;
/// Materialize a VReg containing the address of the specified
/// stack-based object. This is either based on a FrameIndex or
@@ -112,8 +156,8 @@ public:
virtual bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, const ArgInfo &Info,
- CCState &State) {
- return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+ ISD::ArgFlagsTy Flags, CCState &State) {
+ return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
}
MachineIRBuilder &MIRBuilder;
@@ -162,12 +206,42 @@ protected:
/// \p Callback to move them to the assigned locations.
///
/// \return True if everything has succeeded, false otherwise.
- bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
+ bool handleAssignments(MachineIRBuilder &MIRBuilder,
+ SmallVectorImpl<ArgInfo> &Args,
ValueHandler &Handler) const;
bool handleAssignments(CCState &CCState,
SmallVectorImpl<CCValAssign> &ArgLocs,
- MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
+ MachineIRBuilder &MIRBuilder,
+ SmallVectorImpl<ArgInfo> &Args,
ValueHandler &Handler) const;
+
+ /// Analyze passed or returned values from a call, supplied in \p ArgInfo,
+ /// incorporating info about the passed values into \p CCState.
+ ///
+ /// Used to check if arguments are suitable for tail call lowering.
+ bool analyzeArgInfo(CCState &CCState, SmallVectorImpl<ArgInfo> &Args,
+ CCAssignFn &AssignFnFixed,
+ CCAssignFn &AssignFnVarArg) const;
+
+ /// \returns True if the calling convention for a callee and its caller pass
+ /// results in the same way. Typically used for tail call eligibility checks.
+ ///
+ /// \p Info is the CallLoweringInfo for the call.
+ /// \p MF is the MachineFunction for the caller.
+ /// \p InArgs contains the results of the call.
+ /// \p CalleeAssignFnFixed is the CCAssignFn to be used for the callee for
+ /// fixed arguments.
+ /// \p CalleeAssignFnVarArg is similar, but for varargs.
+ /// \p CallerAssignFnFixed is the CCAssignFn to be used for the caller for
+ /// fixed arguments.
+ /// \p CallerAssignFnVarArg is similar, but for varargs.
+ bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &InArgs,
+ CCAssignFn &CalleeAssignFnFixed,
+ CCAssignFn &CalleeAssignFnVarArg,
+ CCAssignFn &CallerAssignFnFixed,
+ CCAssignFn &CallerAssignFnVarArg) const;
+
public:
CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
virtual ~CallLowering() = default;
@@ -223,37 +297,10 @@ public:
/// This hook must be implemented to lower the given call instruction,
/// including argument and return value marshalling.
///
- /// \p CallConv is the calling convention to be used for the call.
- ///
- /// \p Callee is the destination of the call. It should be either a register,
- /// globaladdress, or externalsymbol.
- ///
- /// \p OrigRet is a descriptor for the return type of the function.
- ///
- /// \p OrigArgs is a list of descriptors of the arguments passed to the
- /// function.
- ///
- /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout
- /// parameter, and contains the vreg that the swifterror should be copied into
- /// after the call.
///
/// \return true if the lowering succeeded, false otherwise.
- virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
- const MachineOperand &Callee, const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs,
- Register SwiftErrorVReg) const {
- if (!supportSwiftError()) {
- assert(SwiftErrorVReg == 0 && "trying to use unsupported swifterror");
- return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs);
- }
- return false;
- }
-
- /// This hook behaves as the extended lowerCall function, but for targets that
- /// do not support swifterror value promotion.
- virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
- const MachineOperand &Callee, const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const {
+ virtual bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const {
return false;
}
diff --git a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 0c50c9c5e0cf..4c04dc52547d 100644
--- a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -27,6 +27,8 @@ class MachineIRBuilder;
class MachineRegisterInfo;
class MachineInstr;
class MachineOperand;
+class GISelKnownBits;
+class MachineDominatorTree;
struct PreferredTuple {
LLT Ty; // The result type of the extend.
@@ -35,12 +37,17 @@ struct PreferredTuple {
};
class CombinerHelper {
+protected:
MachineIRBuilder &Builder;
MachineRegisterInfo &MRI;
GISelChangeObserver &Observer;
+ GISelKnownBits *KB;
+ MachineDominatorTree *MDT;
public:
- CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B);
+ CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
+ GISelKnownBits *KB = nullptr,
+ MachineDominatorTree *MDT = nullptr);
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
@@ -56,18 +63,132 @@ public:
bool matchCombineCopy(MachineInstr &MI);
void applyCombineCopy(MachineInstr &MI);
+ /// Returns true if \p DefMI precedes \p UseMI or they are the same
+ /// instruction. Both must be in the same basic block.
+ bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI);
+
+ /// Returns true if \p DefMI dominates \p UseMI. By definition an
+ /// instruction dominates itself.
+ ///
+ /// If we haven't been provided with a MachineDominatorTree during
+ /// construction, this function returns a conservative result that tracks just
+ /// a single basic block.
+ bool dominates(MachineInstr &DefMI, MachineInstr &UseMI);
+
/// If \p MI is extend that consumes the result of a load, try to combine it.
/// Returns true if MI changed.
bool tryCombineExtendingLoads(MachineInstr &MI);
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
- bool matchCombineBr(MachineInstr &MI);
- bool tryCombineBr(MachineInstr &MI);
+ /// Combine \p MI into a pre-indexed or post-indexed load/store operation if
+ /// legal and the surrounding code makes it useful.
+ bool tryCombineIndexedLoadStore(MachineInstr &MI);
+
+ bool matchElideBrByInvertingCond(MachineInstr &MI);
+ void applyElideBrByInvertingCond(MachineInstr &MI);
+ bool tryElideBrByInvertingCond(MachineInstr &MI);
+
+ /// If \p MI is G_CONCAT_VECTORS, try to combine it.
+ /// Returns true if MI changed.
+ /// Right now, we support:
+ /// - concat_vector(undef, undef) => undef
+ /// - concat_vector(build_vector(A, B), build_vector(C, D)) =>
+ /// build_vector(A, B, C, D)
+ ///
+ /// \pre MI.getOpcode() == G_CONCAT_VECTORS.
+ bool tryCombineConcatVectors(MachineInstr &MI);
+ /// Check if the G_CONCAT_VECTORS \p MI is undef or if it
+ /// can be flattened into a build_vector.
+ /// In the first case \p IsUndef will be true.
+ /// In the second case \p Ops will contain the operands needed
+ /// to produce the flattened build_vector.
+ ///
+ /// \pre MI.getOpcode() == G_CONCAT_VECTORS.
+ bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
+ SmallVectorImpl<Register> &Ops);
+ /// Replace \p MI with a flattened build_vector with \p Ops or an
+ /// implicit_def if IsUndef is true.
+ void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef,
+ const ArrayRef<Register> Ops);
+
+ /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
+ /// Returns true if MI changed.
+ ///
+ /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR.
+ bool tryCombineShuffleVector(MachineInstr &MI);
+ /// Check if the G_SHUFFLE_VECTOR \p MI can be replaced by a
+ /// concat_vectors.
+ /// \p Ops will contain the operands needed to produce the flattened
+ /// concat_vectors.
+ ///
+ /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR.
+ bool matchCombineShuffleVector(MachineInstr &MI,
+ SmallVectorImpl<Register> &Ops);
+ /// Replace \p MI with a concat_vectors with \p Ops.
+ void applyCombineShuffleVector(MachineInstr &MI,
+ const ArrayRef<Register> Ops);
+
+ /// Optimize memcpy intrinsics et al, e.g. constant len calls.
+ /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline.
+ ///
+ /// For example (pre-indexed):
+ ///
+ /// $addr = G_GEP $base, $offset
+ /// [...]
+ /// $val = G_LOAD $addr
+ /// [...]
+ /// $whatever = COPY $addr
+ ///
+ /// -->
+ ///
+ /// $val, $addr = G_INDEXED_LOAD $base, $offset, 1 (IsPre)
+ /// [...]
+ /// $whatever = COPY $addr
+ ///
+ /// or (post-indexed):
+ ///
+ /// G_STORE $val, $base
+ /// [...]
+ /// $addr = G_GEP $base, $offset
+ /// [...]
+ /// $whatever = COPY $addr
+ ///
+ /// -->
+ ///
+ /// $addr = G_INDEXED_STORE $val, $base, $offset
+ /// [...]
+ /// $whatever = COPY $addr
+ bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
+
+private:
+ // Memcpy family optimization helpers.
+ bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
+ unsigned KnownLen, unsigned DstAlign, unsigned SrcAlign,
+ bool IsVolatile);
+ bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
+ unsigned KnownLen, unsigned DstAlign, unsigned SrcAlign,
+ bool IsVolatile);
+ bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
+ unsigned KnownLen, unsigned DstAlign, bool IsVolatile);
+
+ /// Given a non-indexed load or store instruction \p MI, find an offset that
+ /// can be usefully and legally folded into it as a post-indexing operation.
+ ///
+ /// \returns true if a candidate is found.
+ bool findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
+ Register &Offset);
+
+ /// Given a non-indexed load or store instruction \p MI, find an offset that
+ /// can be usefully and legally folded into it as a pre-indexing operation.
+ ///
+ /// \returns true if a candidate is found.
+ bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
+ Register &Offset);
};
} // namespace llvm
diff --git a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index 3b09a8e2b479..ad645a46bbe6 100644
--- a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -27,9 +27,11 @@ class MachineRegisterInfo;
class CombinerInfo {
public:
CombinerInfo(bool AllowIllegalOps, bool ShouldLegalizeIllegal,
- LegalizerInfo *LInfo)
+ LegalizerInfo *LInfo, bool OptEnabled, bool OptSize,
+ bool MinSize)
: IllegalOpsAllowed(AllowIllegalOps),
- LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo) {
+ LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo),
+ EnableOpt(OptEnabled), EnableOptSize(OptSize), EnableMinSize(MinSize) {
assert(((AllowIllegalOps || !LegalizeIllegalOps) || LInfo) &&
"Expecting legalizerInfo when illegalops not allowed");
}
@@ -43,6 +45,15 @@ public:
bool LegalizeIllegalOps; // TODO: Make use of this.
const LegalizerInfo *LInfo;
+ /// Whether optimizations should be enabled. This is to distinguish between
+ /// uses of the combiner unconditionally and only when optimizations are
+ /// specifically enabled/
+ bool EnableOpt;
+ /// Whether we're optimizing for size.
+ bool EnableOptSize;
+ /// Whether we're optimizing for minsize (-Oz).
+ bool EnableMinSize;
+
/// Attempt to combine instructions using MI as the root.
///
/// Use Observer to report the creation, modification, and erasure of
diff --git a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
index e817d9b4550e..df196bfbd437 100644
--- a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
@@ -54,6 +54,17 @@ public:
return buildConstant(Dst, MaybeCst->getSExtValue());
break;
}
+ case TargetOpcode::G_SEXT_INREG: {
+ assert(DstOps.size() == 1 && "Invalid dst ops");
+ assert(SrcOps.size() == 2 && "Invalid src ops");
+ const DstOp &Dst = DstOps[0];
+ const SrcOp &Src0 = SrcOps[0];
+ const SrcOp &Src1 = SrcOps[1];
+ if (auto MaybeCst =
+ ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI()))
+ return buildConstant(Dst, MaybeCst->getSExtValue());
+ break;
+ }
}
return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps);
}
diff --git a/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
new file mode 100644
index 000000000000..dfe5a7f3177d
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
@@ -0,0 +1,111 @@
+//===- llvm/CodeGen/GlobalISel/GISelKnownBits.h ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Provides analysis for querying information about KnownBits during GISel
+/// passes.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H
+#define LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H
+
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/KnownBits.h"
+
+namespace llvm {
+
+class TargetLowering;
+class DataLayout;
+
+class GISelKnownBits : public GISelChangeObserver {
+ MachineFunction &MF;
+ MachineRegisterInfo &MRI;
+ const TargetLowering &TL;
+ const DataLayout &DL;
+
+public:
+ GISelKnownBits(MachineFunction &MF);
+ virtual ~GISelKnownBits() = default;
+ void setMF(MachineFunction &MF);
+ virtual void computeKnownBitsImpl(Register R, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth = 0);
+
+ // KnownBitsAPI
+ KnownBits getKnownBits(Register R);
+ // Calls getKnownBits for first operand def of MI.
+ KnownBits getKnownBits(MachineInstr &MI);
+ APInt getKnownZeroes(Register R);
+ APInt getKnownOnes(Register R);
+
+ /// \return true if 'V & Mask' is known to be zero in DemandedElts. We use
+ /// this predicate to simplify operations downstream.
+ /// Mask is known to be zero for bits that V cannot have.
+ bool maskedValueIsZero(Register Val, const APInt &Mask) {
+ return Mask.isSubsetOf(getKnownBits(Val).Zero);
+ }
+
+ /// \return true if the sign bit of Op is known to be zero. We use this
+ /// predicate to simplify operations downstream.
+ bool signBitIsZero(Register Op);
+
+ // FIXME: Is this the right place for G_FRAME_INDEX? Should it be in
+ // TargetLowering?
+ void computeKnownBitsForFrameIndex(Register R, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth = 0);
+ static Align inferAlignmentForFrameIdx(int FrameIdx, int Offset,
+ const MachineFunction &MF);
+ static void computeKnownBitsForAlignment(KnownBits &Known,
+ MaybeAlign Alignment);
+
+ // Try to infer alignment for MI.
+ static MaybeAlign inferPtrAlignment(const MachineInstr &MI);
+
+ // Observer API. No-op for non-caching implementation.
+ void erasingInstr(MachineInstr &MI) override{};
+ void createdInstr(MachineInstr &MI) override{};
+ void changingInstr(MachineInstr &MI) override{};
+ void changedInstr(MachineInstr &MI) override{};
+
+protected:
+ unsigned getMaxDepth() const { return 6; }
+};
+
+/// To use KnownBitsInfo analysis in a pass,
+/// KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis>().get(MF);
+/// Add to observer if the Info is caching.
+/// WrapperObserver.addObserver(Info);
+
+/// Eventually add other features such as caching/ser/deserializing
+/// to MIR etc. Those implementations can derive from GISelKnownBits
+/// and override computeKnownBitsImpl.
+class GISelKnownBitsAnalysis : public MachineFunctionPass {
+ std::unique_ptr<GISelKnownBits> Info;
+
+public:
+ static char ID;
+ GISelKnownBitsAnalysis() : MachineFunctionPass(ID) {
+ initializeGISelKnownBitsAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+ GISelKnownBits &get(MachineFunction &MF) {
+ if (!Info)
+ Info = std::make_unique<GISelKnownBits>(MF);
+ return *Info.get();
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void releaseMemory() override { Info.reset(); }
+};
+} // namespace llvm
+
+#endif // ifdef
diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 8654ba83f08d..bdb92aa4689d 100644
--- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -213,8 +213,8 @@ private:
bool translateStore(const User &U, MachineIRBuilder &MIRBuilder);
/// Translate an LLVM string intrinsic (memcpy, memset, ...).
- bool translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
- unsigned ID);
+ bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
+ Intrinsic::ID ID);
void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);
@@ -243,6 +243,10 @@ private:
bool valueIsSplit(const Value &V,
SmallVectorImpl<uint64_t> *Offsets = nullptr);
+ /// Common code for translating normal calls or invokes.
+ bool translateCallSite(const ImmutableCallSite &CS,
+ MachineIRBuilder &MIRBuilder);
+
/// Translate call instruction.
/// \pre \p U is a call instruction.
bool translateCall(const User &U, MachineIRBuilder &MIRBuilder);
@@ -514,6 +518,10 @@ private:
// function has the optnone attribute.
bool EnableOpts = false;
+ /// True when the block contains a tail call. This allows the IRTranslator to
+ /// stop translating such blocks early.
+ bool HasTailCall = false;
+
/// Switch analysis and optimization.
class GISelSwitchLowering : public SwitchCG::SwitchLowering {
public:
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index e9b93be76754..fd3dc743000b 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -31,6 +31,7 @@ namespace llvm {
class APInt;
class APFloat;
+class GISelKnownBits;
class MachineInstr;
class MachineInstrBuilder;
class MachineFunction;
@@ -148,6 +149,13 @@ enum {
/// - AddrSpaceN+1 ...
GIM_CheckMemoryAddressSpace,
+ /// Check the minimum alignment of the memory access for the given machine
+ /// memory operand.
+ /// - InsnID - Instruction ID
+ /// - MMOIdx - MMO index
+ /// - MinAlign - Minimum acceptable alignment
+ GIM_CheckMemoryAlignment,
+
/// Check the size of the memory access for the given machine memory operand
/// against the size of an operand.
/// - InsnID - Instruction ID
@@ -201,11 +209,22 @@ enum {
/// - Expected Intrinsic ID
GIM_CheckIntrinsicID,
+ /// Check the operand is a specific predicate
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - Expected predicate
+ GIM_CheckCmpPredicate,
+
/// Check the specified operand is an MBB
/// - InsnID - Instruction ID
/// - OpIdx - Operand index
GIM_CheckIsMBB,
+ /// Check the specified operand is an Imm
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ GIM_CheckIsImm,
+
/// Check if the specified operand is safe to fold into the current
/// instruction.
/// - InsnID - Instruction ID
@@ -365,7 +384,20 @@ public:
/// if returns true:
/// for I in all mutated/inserted instructions:
/// !isPreISelGenericOpcode(I.getOpcode())
- virtual bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const = 0;
+ virtual bool select(MachineInstr &I) = 0;
+
+ CodeGenCoverage *CoverageInfo = nullptr;
+ GISelKnownBits *KnownBits = nullptr;
+ MachineFunction *MF = nullptr;
+
+ /// Setup per-MF selector state.
+ virtual void setupMF(MachineFunction &mf,
+ GISelKnownBits &KB,
+ CodeGenCoverage &covinfo) {
+ CoverageInfo = &covinfo;
+ KnownBits = &KB;
+ MF = &mf;
+ }
protected:
using ComplexRendererFns =
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index e8ee4af0cb0b..08f2f54bcf90 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -98,7 +98,7 @@ bool InstructionSelector::executeMatchTable(
return false;
break;
}
- if (TRI.isPhysicalRegister(MO.getReg())) {
+ if (Register::isPhysicalRegister(MO.getReg())) {
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
dbgs() << CurrentIdx << ": Is a physical register\n");
if (handleReject() == RejectAndGiveUp)
@@ -409,6 +409,30 @@ bool InstructionSelector::executeMatchTable(
return false;
break;
}
+ case GIM_CheckMemoryAlignment: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t MMOIdx = MatchTable[CurrentIdx++];
+ unsigned MinAlign = MatchTable[CurrentIdx++];
+
+ assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+
+ if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ break;
+ }
+
+ MachineMemOperand *MMO
+ = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+ DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIM_CheckMemoryAlignment"
+ << "(MIs[" << InsnID << "]->memoperands() + " << MMOIdx
+ << ")->getAlignment() >= " << MinAlign << ")\n");
+ if (MMO->getAlignment() < MinAlign && handleReject() == RejectAndGiveUp)
+ return false;
+
+ break;
+ }
case GIM_CheckMemorySizeEqualTo: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t MMOIdx = MatchTable[CurrentIdx++];
@@ -638,7 +662,21 @@ bool InstructionSelector::executeMatchTable(
return false;
break;
}
-
+ case GIM_CheckCmpPredicate: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t OpIdx = MatchTable[CurrentIdx++];
+ int64_t Value = MatchTable[CurrentIdx++];
+ DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIM_CheckCmpPredicate(MIs["
+ << InsnID << "]->getOperand(" << OpIdx
+ << "), Value=" << Value << ")\n");
+ assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+ MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+ if (!MO.isPredicate() || MO.getPredicate() != Value)
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ break;
+ }
case GIM_CheckIsMBB: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
@@ -652,7 +690,19 @@ bool InstructionSelector::executeMatchTable(
}
break;
}
-
+ case GIM_CheckIsImm: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t OpIdx = MatchTable[CurrentIdx++];
+ DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIM_CheckIsImm(MIs[" << InsnID
+ << "]->getOperand(" << OpIdx << "))\n");
+ assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+ if (!State.MIs[InsnID]->getOperand(OpIdx).isImm()) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ }
+ break;
+ }
case GIM_CheckIsSafeToFold: {
int64_t InsnID = MatchTable[CurrentIdx++];
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
@@ -792,11 +842,13 @@ bool InstructionSelector::executeMatchTable(
case GIR_AddRegister: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t RegNum = MatchTable[CurrentIdx++];
+ uint64_t RegFlags = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
- OutMIs[InsnID].addReg(RegNum);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
- dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs["
- << InsnID << "], " << RegNum << ")\n");
+ OutMIs[InsnID].addReg(RegNum, RegFlags);
+ DEBUG_WITH_TYPE(
+ TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs["
+ << InsnID << "], " << RegNum << ", " << RegFlags << ")\n");
break;
}
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index a22778b8848c..7f960e727846 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -47,8 +47,7 @@ public:
bool tryCombineAnyExt(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts) {
- if (MI.getOpcode() != TargetOpcode::G_ANYEXT)
- return false;
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
Builder.setInstr(MI);
Register DstReg = MI.getOperand(0).getReg();
@@ -93,9 +92,7 @@ public:
bool tryCombineZExt(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts) {
-
- if (MI.getOpcode() != TargetOpcode::G_ZEXT)
- return false;
+ assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
Builder.setInstr(MI);
Register DstReg = MI.getOperand(0).getReg();
@@ -136,32 +133,24 @@ public:
bool tryCombineSExt(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts) {
-
- if (MI.getOpcode() != TargetOpcode::G_SEXT)
- return false;
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT);
Builder.setInstr(MI);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
- // sext(trunc x) - > ashr (shl (aext/copy/trunc x), c), c
+ // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c)
Register TruncSrc;
if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLT DstTy = MRI.getType(DstReg);
- // Guess on the RHS shift amount type, which should be re-legalized if
- // applicable.
- if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy, DstTy}}) ||
- isInstUnsupported({TargetOpcode::G_ASHR, {DstTy, DstTy}}) ||
- isConstantUnsupported(DstTy))
+ if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}}))
return false;
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
LLT SrcTy = MRI.getType(SrcReg);
- unsigned ShAmt = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
- auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt);
- auto MIBShl = Builder.buildInstr(
- TargetOpcode::G_SHL, {DstTy},
- {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), MIBShAmt});
- Builder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {MIBShl, MIBShAmt});
+ uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
+ Builder.buildInstr(
+ TargetOpcode::G_SEXT_INREG, {DstReg},
+ {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), SizeInBits});
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
@@ -172,9 +161,8 @@ public:
bool tryFoldImplicitDef(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts) {
unsigned Opcode = MI.getOpcode();
- if (Opcode != TargetOpcode::G_ANYEXT && Opcode != TargetOpcode::G_ZEXT &&
- Opcode != TargetOpcode::G_SEXT)
- return false;
+ assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT ||
+ Opcode == TargetOpcode::G_SEXT);
if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
MI.getOperand(1).getReg(), MRI)) {
@@ -203,21 +191,38 @@ public:
return false;
}
- static unsigned getMergeOpcode(LLT OpTy, LLT DestTy) {
+ static unsigned canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
+ LLT OpTy, LLT DestTy) {
if (OpTy.isVector() && DestTy.isVector())
- return TargetOpcode::G_CONCAT_VECTORS;
+ return MergeOp == TargetOpcode::G_CONCAT_VECTORS;
+
+ if (OpTy.isVector() && !DestTy.isVector()) {
+ if (MergeOp == TargetOpcode::G_BUILD_VECTOR)
+ return true;
- if (OpTy.isVector() && !DestTy.isVector())
- return TargetOpcode::G_BUILD_VECTOR;
+ if (MergeOp == TargetOpcode::G_CONCAT_VECTORS) {
+ if (ConvertOp == 0)
+ return true;
- return TargetOpcode::G_MERGE_VALUES;
+ const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
+
+ // Don't handle scalarization with a cast that isn't in the same
+ // direction as the vector cast. This could be handled, but it would
+ // require more intermediate unmerges.
+ if (ConvertOp == TargetOpcode::G_TRUNC)
+ return DestTy.getSizeInBits() <= OpEltSize;
+ return DestTy.getSizeInBits() >= OpEltSize;
+ }
+
+ return false;
+ }
+
+ return MergeOp == TargetOpcode::G_MERGE_VALUES;
}
bool tryCombineMerges(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts) {
-
- if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
- return false;
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
unsigned NumDefs = MI.getNumOperands() - 1;
MachineInstr *SrcDef =
@@ -237,16 +242,14 @@ public:
MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI);
}
- // FIXME: Handle scalarizing concat_vectors (scalar result type with vector
- // source)
- unsigned MergingOpcode = getMergeOpcode(OpTy, DestTy);
- if (!MergeI || MergeI->getOpcode() != MergingOpcode)
+ if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(),
+ ConvertOp, OpTy, DestTy))
return false;
const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
if (NumMergeRegs < NumDefs) {
- if (ConvertOp != 0 || NumDefs % NumMergeRegs != 0)
+ if (NumDefs % NumMergeRegs != 0)
return false;
Builder.setInstr(MI);
@@ -264,7 +267,22 @@ public:
++j, ++DefIdx)
DstRegs.push_back(MI.getOperand(DefIdx).getReg());
- Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
+ if (ConvertOp) {
+ SmallVector<Register, 2> TmpRegs;
+ // This is a vector that is being scalarized and casted. Extract to
+ // the element type, and do the conversion on the scalars.
+ LLT MergeEltTy
+ = MRI.getType(MergeI->getOperand(0).getReg()).getElementType();
+ for (unsigned j = 0; j < NumMergeRegs; ++j)
+ TmpRegs.push_back(MRI.createGenericVirtualRegister(MergeEltTy));
+
+ Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg());
+
+ for (unsigned j = 0; j < NumMergeRegs; ++j)
+ Builder.buildInstr(ConvertOp, {DstRegs[j]}, {TmpRegs[j]});
+ } else {
+ Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
+ }
}
} else if (NumMergeRegs > NumDefs) {
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index a0f21e8b19d7..fbfe71255a38 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -200,6 +200,13 @@ public:
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
LLT MoreTy);
+ LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy);
+ LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy);
+
LegalizeResult
reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
@@ -219,9 +226,17 @@ public:
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
+ LegalizeResult lowerFMad(MachineInstr &MI);
+ LegalizeResult lowerUnmergeValues(MachineInstr &MI);
+ LegalizeResult lowerShuffleVector(MachineInstr &MI);
+ LegalizeResult lowerDynStackAlloc(MachineInstr &MI);
+ LegalizeResult lowerExtract(MachineInstr &MI);
+ LegalizeResult lowerInsert(MachineInstr &MI);
+ LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI);
private:
MachineRegisterInfo &MRI;
@@ -236,6 +251,11 @@ createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
const CallLowering::ArgInfo &Result,
ArrayRef<CallLowering::ArgInfo> Args);
+/// Create a libcall to memcpy et al.
+LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI,
+ MachineInstr &MI);
+
} // End namespace llvm.
#endif
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 513c98f2d23f..1cf62d1fde59 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -331,6 +331,8 @@ class LegalizeRuleSet {
/// individually handled.
SmallBitVector TypeIdxsCovered{MCOI::OPERAND_LAST_GENERIC -
MCOI::OPERAND_FIRST_GENERIC + 2};
+ SmallBitVector ImmIdxsCovered{MCOI::OPERAND_LAST_GENERIC_IMM -
+ MCOI::OPERAND_FIRST_GENERIC_IMM + 2};
#endif
unsigned typeIdx(unsigned TypeIdx) {
@@ -342,9 +344,21 @@ class LegalizeRuleSet {
#endif
return TypeIdx;
}
- void markAllTypeIdxsAsCovered() {
+
+ unsigned immIdx(unsigned ImmIdx) {
+ assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
+ MCOI::OPERAND_FIRST_GENERIC_IMM) &&
+ "Imm Index is out of bounds");
+#ifndef NDEBUG
+ ImmIdxsCovered.set(ImmIdx);
+#endif
+ return ImmIdx;
+ }
+
+ void markAllIdxsAsCovered() {
#ifndef NDEBUG
TypeIdxsCovered.set();
+ ImmIdxsCovered.set();
#endif
}
@@ -403,6 +417,15 @@ class LegalizeRuleSet {
return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types),
Mutation);
}
+ /// Use the given action when type index 0 is any type in the given list and
+ /// imm index 0 is anything. Action should not be an action that requires
+ /// mutation.
+ LegalizeRuleSet &actionForTypeWithAnyImm(LegalizeAction Action,
+ std::initializer_list<LLT> Types) {
+ using namespace LegalityPredicates;
+ immIdx(0); // Inform verifier imm idx 0 is handled.
+ return actionIf(Action, typeInSet(typeIdx(0), Types));
+ }
/// Use the given action when type indexes 0 and 1 are both in the given list.
/// That is, the type pair is in the cartesian product of the list.
/// Action should not be an action that requires mutation.
@@ -454,7 +477,7 @@ public:
LegalizeRuleSet &legalIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that the free-form
// user-provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Legal, Predicate);
}
/// The instruction is legal when type index 0 is any type in the given list.
@@ -466,6 +489,12 @@ public:
LegalizeRuleSet &legalFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
return actionFor(LegalizeAction::Legal, Types);
}
+ /// The instruction is legal when type index 0 is any type in the given list
+ /// and imm index 0 is anything.
+ LegalizeRuleSet &legalForTypeWithAnyImm(std::initializer_list<LLT> Types) {
+ markAllIdxsAsCovered();
+ return actionForTypeWithAnyImm(LegalizeAction::Legal, Types);
+ }
/// The instruction is legal when type indexes 0 and 1 along with the memory
/// size and minimum alignment is any type and size tuple in the given list.
LegalizeRuleSet &legalForTypesWithMemDesc(
@@ -497,7 +526,7 @@ public:
LegalizeRuleSet &alwaysLegal() {
using namespace LegalizeMutations;
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Legal, always);
}
@@ -506,7 +535,7 @@ public:
using namespace LegalizeMutations;
// We have no choice but conservatively assume that predicate-less lowering
// properly handles all type indices by design:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Lower, always);
}
/// The instruction is lowered if predicate is true. Keep type index 0 as the
@@ -515,7 +544,7 @@ public:
using namespace LegalizeMutations;
// We have no choice but conservatively assume that lowering with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Lower, Predicate);
}
/// The instruction is lowered if predicate is true.
@@ -523,7 +552,7 @@ public:
LegalizeMutation Mutation) {
// We have no choice but conservatively assume that lowering with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Lower, Predicate, Mutation);
}
/// The instruction is lowered when type index 0 is any type in the given
@@ -571,7 +600,7 @@ public:
LegalizeRuleSet &libcallIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that a libcall with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Libcall, Predicate);
}
LegalizeRuleSet &libcallFor(std::initializer_list<LLT> Types) {
@@ -597,7 +626,7 @@ public:
LegalizeMutation Mutation) {
// We have no choice but conservatively assume that an action with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::WidenScalar, Predicate, Mutation);
}
/// Narrow the scalar to the one selected by the mutation if the predicate is
@@ -606,7 +635,7 @@ public:
LegalizeMutation Mutation) {
// We have no choice but conservatively assume that an action with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::NarrowScalar, Predicate, Mutation);
}
@@ -616,7 +645,7 @@ public:
LegalizeMutation Mutation) {
// We have no choice but conservatively assume that an action with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::MoreElements, Predicate, Mutation);
}
/// Remove elements to reach the type selected by the mutation if the
@@ -625,7 +654,7 @@ public:
LegalizeMutation Mutation) {
// We have no choice but conservatively assume that an action with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::FewerElements, Predicate, Mutation);
}
@@ -640,11 +669,15 @@ public:
return actionIf(LegalizeAction::Unsupported,
LegalityPredicates::memSizeInBytesNotPow2(0));
}
+ LegalizeRuleSet &lowerIfMemSizeNotPow2() {
+ return actionIf(LegalizeAction::Lower,
+ LegalityPredicates::memSizeInBytesNotPow2(0));
+ }
LegalizeRuleSet &customIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that a custom action with a
// free-form user provided Predicate properly handles all type indices:
- markAllTypeIdxsAsCovered();
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Custom, Predicate);
}
LegalizeRuleSet &customFor(std::initializer_list<LLT> Types) {
@@ -882,6 +915,10 @@ public:
/// LegalizeRuleSet in any way at all.
/// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set.
bool verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const;
+ /// Check if there is no imm index which is obviously not handled by the
+ /// LegalizeRuleSet in any way at all.
+ /// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set.
+ bool verifyImmIdxsCoverage(unsigned NumImmIdxs) const;
/// Apply the ruleset to the given LegalityQuery.
LegalizeActionStep apply(const LegalityQuery &Query) const;
diff --git a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index 13eddd9539fa..be12341f5763 100644
--- a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -21,7 +21,7 @@ namespace llvm {
namespace MIPatternMatch {
template <typename Reg, typename Pattern>
-bool mi_match(Reg R, MachineRegisterInfo &MRI, Pattern &&P) {
+bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P) {
return P.match(MRI, R);
}
@@ -30,7 +30,7 @@ template <typename SubPatternT> struct OneUse_match {
SubPatternT SubPat;
OneUse_match(const SubPatternT &SP) : SubPat(SP) {}
- bool match(MachineRegisterInfo &MRI, unsigned Reg) {
+ bool match(const MachineRegisterInfo &MRI, unsigned Reg) {
return MRI.hasOneUse(Reg) && SubPat.match(MRI, Reg);
}
};
@@ -71,7 +71,7 @@ inline operand_type_match m_Reg() { return operand_type_match(); }
/// Matching combinators.
template <typename... Preds> struct And {
template <typename MatchSrc>
- bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+ bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) {
return true;
}
};
@@ -83,14 +83,14 @@ struct And<Pred, Preds...> : And<Preds...> {
: And<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {
}
template <typename MatchSrc>
- bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+ bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) {
return P.match(MRI, src) && And<Preds...>::match(MRI, src);
}
};
template <typename... Preds> struct Or {
template <typename MatchSrc>
- bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+ bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) {
return false;
}
};
@@ -101,7 +101,7 @@ struct Or<Pred, Preds...> : Or<Preds...> {
Or(Pred &&p, Preds &&... preds)
: Or<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {}
template <typename MatchSrc>
- bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+ bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) {
return P.match(MRI, src) || Or<Preds...>::match(MRI, src);
}
};
@@ -175,7 +175,8 @@ struct BinaryOp_match {
RHS_P R;
BinaryOp_match(const LHS_P &LHS, const RHS_P &RHS) : L(LHS), R(RHS) {}
- template <typename OpTy> bool match(MachineRegisterInfo &MRI, OpTy &&Op) {
+ template <typename OpTy>
+ bool match(const MachineRegisterInfo &MRI, OpTy &&Op) {
MachineInstr *TmpMI;
if (mi_match(Op, MRI, m_MInstr(TmpMI))) {
if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 3) {
@@ -242,7 +243,8 @@ template <typename SrcTy, unsigned Opcode> struct UnaryOp_match {
SrcTy L;
UnaryOp_match(const SrcTy &LHS) : L(LHS) {}
- template <typename OpTy> bool match(MachineRegisterInfo &MRI, OpTy &&Op) {
+ template <typename OpTy>
+ bool match(const MachineRegisterInfo &MRI, OpTy &&Op) {
MachineInstr *TmpMI;
if (mi_match(Op, MRI, m_MInstr(TmpMI))) {
if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 2) {
@@ -323,7 +325,7 @@ struct CheckType {
LLT Ty;
CheckType(const LLT &Ty) : Ty(Ty) {}
- bool match(MachineRegisterInfo &MRI, unsigned Reg) {
+ bool match(const MachineRegisterInfo &MRI, unsigned Reg) {
return MRI.getType(Reg) == Ty;
}
};
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 10d712176b1b..416f9c19f794 100644
--- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -122,14 +122,22 @@ class SrcOp {
MachineInstrBuilder SrcMIB;
Register Reg;
CmpInst::Predicate Pred;
+ int64_t Imm;
};
public:
- enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate };
+ enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate, Ty_Imm };
SrcOp(Register R) : Reg(R), Ty(SrcType::Ty_Reg) {}
SrcOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(SrcType::Ty_Reg) {}
SrcOp(const MachineInstrBuilder &MIB) : SrcMIB(MIB), Ty(SrcType::Ty_MIB) {}
SrcOp(const CmpInst::Predicate P) : Pred(P), Ty(SrcType::Ty_Predicate) {}
+ /// Use of registers held in unsigned integer variables (or more rarely signed
+ /// integers) is no longer permitted to avoid ambiguity with upcoming support
+ /// for immediates.
+ SrcOp(unsigned) = delete;
+ SrcOp(int) = delete;
+ SrcOp(uint64_t V) : Imm(V), Ty(SrcType::Ty_Imm) {}
+ SrcOp(int64_t V) : Imm(V), Ty(SrcType::Ty_Imm) {}
void addSrcToMIB(MachineInstrBuilder &MIB) const {
switch (Ty) {
@@ -142,12 +150,16 @@ public:
case SrcType::Ty_MIB:
MIB.addUse(SrcMIB->getOperand(0).getReg());
break;
+ case SrcType::Ty_Imm:
+ MIB.addImm(Imm);
+ break;
}
}
LLT getLLTTy(const MachineRegisterInfo &MRI) const {
switch (Ty) {
case SrcType::Ty_Predicate:
+ case SrcType::Ty_Imm:
llvm_unreachable("Not a register operand");
case SrcType::Ty_Reg:
return MRI.getType(Reg);
@@ -160,6 +172,7 @@ public:
Register getReg() const {
switch (Ty) {
case SrcType::Ty_Predicate:
+ case SrcType::Ty_Imm:
llvm_unreachable("Not a register operand");
case SrcType::Ty_Reg:
return Reg;
@@ -178,6 +191,15 @@ public:
}
}
+ int64_t getImm() const {
+ switch (Ty) {
+ case SrcType::Ty_Imm:
+ return Imm;
+ default:
+ llvm_unreachable("Not an immediate");
+ }
+ }
+
SrcType getSrcOpKind() const { return Ty; }
private:
@@ -348,6 +370,17 @@ public:
/// given. Convert "llvm.dbg.label Label" to "DBG_LABEL Label".
MachineInstrBuilder buildDbgLabel(const MDNode *Label);
+ /// Build and insert \p Res = G_DYN_STACKALLOC \p Size, \p Align
+ ///
+ /// G_DYN_STACKALLOC does a dynamic stack allocation and writes the address of
+ /// the allocated memory into \p Res.
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Res must be a generic virtual register with pointer type.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildDynStackAlloc(const DstOp &Res, const SrcOp &Size,
+ unsigned Align);
+
/// Build and insert \p Res = G_FRAME_INDEX \p Idx
///
/// G_FRAME_INDEX materializes the address of an alloca value or other
@@ -489,11 +522,21 @@ public:
return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src});
}
+ /// Build and insert a G_INTTOPTR instruction.
+ MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_INTTOPTR, {Dst}, {Src});
+ }
+
/// Build and insert \p Dst = G_BITCAST \p Src
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_BITCAST, {Dst}, {Src});
}
+ /// Build and insert \p Dst = G_ADDRSPACE_CAST \p Src
+ MachineInstrBuilder buildAddrSpaceCast(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_ADDRSPACE_CAST, {Dst}, {Src});
+ }
+
/// \return The opcode of the extension the target wants to use for boolean
/// values.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const;
@@ -867,7 +910,8 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res,
- const SrcOp &Op0, const SrcOp &Op1);
+ const SrcOp &Op0, const SrcOp &Op1,
+ Optional<unsigned> Flags = None);
/// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1
///
@@ -880,7 +924,8 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst,
- const SrcOp &Op0, const SrcOp &Op1);
+ const SrcOp &Op0, const SrcOp &Op1,
+ Optional<unsigned> Flags = None);
/// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val,
/// \p Elt, \p Idx
@@ -961,8 +1006,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMW(unsigned Opcode, Register OldValRes,
- Register Addr, Register Val,
+ MachineInstrBuilder buildAtomicRMW(unsigned Opcode, const DstOp &OldValRes,
+ const SrcOp &Addr, const SrcOp &Val,
MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_XCHG Addr, Val, MMO`.
@@ -1135,6 +1180,16 @@ public:
MachineInstrBuilder buildAtomicRMWUmin(Register OldValRes, Register Addr,
Register Val, MachineMemOperand &MMO);
+ /// Build and insert `OldValRes<def> = G_ATOMICRMW_FADD Addr, Val, MMO`.
+ MachineInstrBuilder buildAtomicRMWFAdd(
+ const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO);
+
+ /// Build and insert `OldValRes<def> = G_ATOMICRMW_FSUB Addr, Val, MMO`.
+ MachineInstrBuilder buildAtomicRMWFSub(
+ const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO);
+
/// Build and insert `G_FENCE Ordering, Scope`.
MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope);
@@ -1210,6 +1265,12 @@ public:
return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags);
}
+ MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMUL, {Dst}, {Src0, Src1}, Flags);
+ }
+
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1,
Optional<unsigned> Flags = None) {
@@ -1300,8 +1361,9 @@ public:
/// Build and insert \p Res = G_FADD \p Op0, \p Op1
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0,
- const SrcOp &Src1) {
- return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1});
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}, Flags);
}
/// Build and insert \p Res = G_FSUB \p Op0, \p Op1
@@ -1316,14 +1378,23 @@ public:
return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2});
}
+ /// Build and insert \p Res = G_FMAD \p Op0, \p Op1, \p Op2
+ MachineInstrBuilder buildFMAD(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1, const SrcOp &Src2,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMAD, {Dst}, {Src0, Src1, Src2}, Flags);
+ }
+
/// Build and insert \p Res = G_FNEG \p Op0
- MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0) {
- return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0});
+ MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}, Flags);
}
/// Build and insert \p Res = G_FABS \p Op0
- MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0) {
- return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0});
+ MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}, Flags);
}
/// Build and insert \p Dst = G_FCANONICALIZE \p Src0
diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h
index 4cdaa48fb689..8af2853473c2 100644
--- a/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -16,6 +16,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Register.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MachineValueType.h"
namespace llvm {
@@ -117,14 +119,16 @@ struct ValueAndVReg {
unsigned VReg;
};
/// If \p VReg is defined by a statically evaluable chain of
-/// instructions rooted on a G_CONSTANT (\p LookThroughInstrs == true)
-/// and that constant fits in int64_t, returns its value as well as
-/// the virtual register defined by this G_CONSTANT.
-/// When \p LookThroughInstrs == false, this function behaves like
+/// instructions rooted on a G_F/CONSTANT (\p LookThroughInstrs == true)
+/// and that constant fits in int64_t, returns its value as well as the
+/// virtual register defined by this G_F/CONSTANT.
+/// When \p LookThroughInstrs == false this function behaves like
/// getConstantVRegVal.
+/// When \p HandleFConstants == false the function bails on G_FCONSTANTs.
Optional<ValueAndVReg>
getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI,
- bool LookThroughInstrs = true);
+ bool LookThroughInstrs = true,
+ bool HandleFConstants = true);
const ConstantFP* getConstantFPVRegVal(unsigned VReg,
const MachineRegisterInfo &MRI);
@@ -151,6 +155,9 @@ Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
const unsigned Op2,
const MachineRegisterInfo &MRI);
+Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const unsigned Op1,
+ uint64_t Imm, const MachineRegisterInfo &MRI);
+
/// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
/// this returns if \p Val can be assumed to never be a signaling NaN.
bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
@@ -161,5 +168,10 @@ inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
return isKnownNeverNaN(Val, MRI, true);
}
+/// Get a rough equivalent of an MVT for a given LLT.
+MVT getMVTForLLT(LLT Ty);
+/// Get a rough equivalent of an LLT for a given MVT.
+LLT getLLTForMVT(MVT Ty);
+
} // End namespace llvm.
#endif
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index acf27dcc5fab..658ad31fa2a6 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -281,7 +281,7 @@ namespace ISD {
/// Same as the corresponding unsaturated fixed point instructions, but the
/// result is clamped between the min and max values representable by the
/// bits of the first 2 operands.
- SMULFIXSAT,
+ SMULFIXSAT, UMULFIXSAT,
/// Simple binary floating point operators.
FADD, FSUB, FMUL, FDIV, FREM,
@@ -301,6 +301,14 @@ namespace ISD {
STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2,
STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
+ STRICT_LROUND, STRICT_LLROUND, STRICT_LRINT, STRICT_LLRINT,
+
+ /// STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or
+ /// unsigned integer. These have the same semantics as fptosi and fptoui
+ /// in IR.
+ /// They are used to limit optimizations while the DAG is being optimized.
+ STRICT_FP_TO_SINT,
+ STRICT_FP_TO_UINT,
/// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating
/// point type down to the precision of the destination VT. TRUNC is a
@@ -398,6 +406,13 @@ namespace ISD {
/// than the vector element type, and is implicitly truncated to it.
SCALAR_TO_VECTOR,
+ /// SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL
+ /// duplicated in all lanes. The type of the operand must match the vector
+ /// element type, except when they are integer types. In this case the
+ /// operand is allowed to be wider than the vector element type, and is
+ /// implicitly truncated to it.
+ SPLAT_VECTOR,
+
/// MULHU/MULHS - Multiply high - Multiply two integers of type iN,
/// producing an unsigned/signed value of type i[2*N], then return the top
/// part.
@@ -569,13 +584,6 @@ namespace ISD {
/// 3 Round to -inf
FLT_ROUNDS_,
- /// X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and
- /// rounds it to a floating point value. It then promotes it and returns it
- /// in a register of the same size. This operation effectively just
- /// discards excess precision. The type to round down to is specified by
- /// the VT operand, a VTSDNode.
- FP_ROUND_INREG,
-
/// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
FP_EXTEND,
@@ -958,6 +966,23 @@ namespace ISD {
static const int LAST_INDEXED_MODE = POST_DEC + 1;
//===--------------------------------------------------------------------===//
+ /// MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's
+ /// index parameter when calculating addresses.
+ ///
+ /// SIGNED_SCALED Addr = Base + ((signed)Index * sizeof(element))
+ /// SIGNED_UNSCALED Addr = Base + (signed)Index
+ /// UNSIGNED_SCALED Addr = Base + ((unsigned)Index * sizeof(element))
+ /// UNSIGNED_UNSCALED Addr = Base + (unsigned)Index
+ enum MemIndexType {
+ SIGNED_SCALED = 0,
+ SIGNED_UNSCALED,
+ UNSIGNED_SCALED,
+ UNSIGNED_UNSCALED
+ };
+
+ static const int LAST_MEM_INDEX_TYPE = UNSIGNED_UNSCALED + 1;
+
+ //===--------------------------------------------------------------------===//
/// LoadExtType enum - This enum defines the three variants of LOADEXT
/// (load with extension).
///
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 8bb88165d3e1..290a2381d9c9 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -189,6 +189,10 @@ namespace llvm {
return start == Other.start && end == Other.end;
}
+ bool operator!=(const Segment &Other) const {
+ return !(*this == Other);
+ }
+
void dump() const;
};
@@ -224,7 +228,7 @@ namespace llvm {
/// Constructs a new LiveRange object.
LiveRange(bool UseSegmentSet = false)
- : segmentSet(UseSegmentSet ? llvm::make_unique<SegmentSet>()
+ : segmentSet(UseSegmentSet ? std::make_unique<SegmentSet>()
: nullptr) {}
/// Constructs a new LiveRange object by copying segments and valnos from
diff --git a/include/llvm/CodeGen/LiveIntervals.h b/include/llvm/CodeGen/LiveIntervals.h
index 588b0f9cf39c..888d72b87bd1 100644
--- a/include/llvm/CodeGen/LiveIntervals.h
+++ b/include/llvm/CodeGen/LiveIntervals.h
@@ -111,30 +111,31 @@ class VirtRegMap;
const MachineBlockFrequencyInfo *MBFI,
const MachineBasicBlock *MBB);
- LiveInterval &getInterval(unsigned Reg) {
+ LiveInterval &getInterval(Register Reg) {
if (hasInterval(Reg))
- return *VirtRegIntervals[Reg];
+ return *VirtRegIntervals[Reg.id()];
else
return createAndComputeVirtRegInterval(Reg);
}
- const LiveInterval &getInterval(unsigned Reg) const {
+ const LiveInterval &getInterval(Register Reg) const {
return const_cast<LiveIntervals*>(this)->getInterval(Reg);
}
- bool hasInterval(unsigned Reg) const {
- return VirtRegIntervals.inBounds(Reg) && VirtRegIntervals[Reg];
+ bool hasInterval(Register Reg) const {
+ return VirtRegIntervals.inBounds(Reg.id()) &&
+ VirtRegIntervals[Reg.id()];
}
/// Interval creation.
- LiveInterval &createEmptyInterval(unsigned Reg) {
+ LiveInterval &createEmptyInterval(Register Reg) {
assert(!hasInterval(Reg) && "Interval already exists!");
- VirtRegIntervals.grow(Reg);
- VirtRegIntervals[Reg] = createInterval(Reg);
- return *VirtRegIntervals[Reg];
+ VirtRegIntervals.grow(Reg.id());
+ VirtRegIntervals[Reg.id()] = createInterval(Reg);
+ return *VirtRegIntervals[Reg.id()];
}
- LiveInterval &createAndComputeVirtRegInterval(unsigned Reg) {
+ LiveInterval &createAndComputeVirtRegInterval(Register Reg) {
LiveInterval &LI = createEmptyInterval(Reg);
computeVirtRegInterval(LI);
return LI;
diff --git a/lib/CodeGen/LiveRangeCalc.h b/include/llvm/CodeGen/LiveRangeCalc.h
index 11aea5a3b016..08026c05733c 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/include/llvm/CodeGen/LiveRangeCalc.h
@@ -114,7 +114,7 @@ class LiveRangeCalc {
VNInfo *Value = nullptr;
LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill)
- : LR(LR), DomNode(node), Kill(kill) {}
+ : LR(LR), DomNode(node), Kill(kill) {}
};
/// LiveIn - Work list of blocks where the live-in value has yet to be
@@ -145,9 +145,8 @@ class LiveRangeCalc {
/// @p Undef, the function returns false.
///
/// PhysReg, when set, is used to verify live-in lists on basic blocks.
- bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
- SlotIndex Use, unsigned PhysReg,
- ArrayRef<SlotIndex> Undefs);
+ bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, SlotIndex Use,
+ unsigned PhysReg, ArrayRef<SlotIndex> Undefs);
/// updateSSA - Compute the values that will be live in to all requested
/// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
@@ -267,8 +266,7 @@ public:
/// @param Kill Index in block where LI is killed. If the value is
/// live-through, set Kill = SLotIndex() and also call
/// setLiveOutValue(MBB, 0).
- void addLiveInBlock(LiveRange &LR,
- MachineDomTreeNode *DomNode,
+ void addLiveInBlock(LiveRange &LR, MachineDomTreeNode *DomNode,
SlotIndex Kill = SlotIndex()) {
LiveIn.push_back(LiveInBlock(LR, DomNode, Kill));
}
diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h
index 7dbb2feab8bf..314afad92970 100644
--- a/include/llvm/CodeGen/LiveRegUnits.h
+++ b/include/llvm/CodeGen/LiveRegUnits.h
@@ -53,8 +53,8 @@ public:
ModifiedRegUnits.addRegsInMask(O->getRegMask());
if (!O->isReg())
continue;
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Reg.isPhysical())
continue;
if (O->isDef()) {
// Some architectures (e.g. AArch64 XZR/WZR) have registers that are
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
index 94e76a75e8da..069d0aa45095 100644
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -314,6 +314,7 @@ struct ScalarEnumerationTraits<TargetStackID::Value> {
static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) {
IO.enumCase(ID, "default", TargetStackID::Default);
IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill);
+ IO.enumCase(ID, "sve-vec", TargetStackID::SVEVector);
IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc);
}
};
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 333d0a78618c..ccdde78a0b22 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -103,9 +103,9 @@ private:
using LiveInVector = std::vector<RegisterMaskPair>;
LiveInVector LiveIns;
- /// Alignment of the basic block. Zero if the basic block does not need to be
- /// aligned. The alignment is specified as log2(bytes).
- unsigned Alignment = 0;
+ /// Alignment of the basic block. One if the basic block does not need to be
+ /// aligned.
+ Align Alignment;
/// Indicate that this basic block is entered via an exception handler.
bool IsEHPad = false;
@@ -312,7 +312,7 @@ public:
/// Adds the specified register as a live in. Note that it is an error to add
/// the same register to the same set more than once unless the intention is
/// to call sortUniqueLiveIns after all registers are added.
- void addLiveIn(MCPhysReg PhysReg,
+ void addLiveIn(MCRegister PhysReg,
LaneBitmask LaneMask = LaneBitmask::getAll()) {
LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask));
}
@@ -331,7 +331,7 @@ public:
/// Add PhysReg as live in to this block, and ensure that there is a copy of
/// PhysReg to a virtual register of class RC. Return the virtual register
/// that is a copy of the live in PhysReg.
- unsigned addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC);
+ unsigned addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC);
/// Remove the specified register from the live in set.
void removeLiveIn(MCPhysReg Reg,
@@ -372,13 +372,11 @@ public:
/// \see getBeginClobberMask()
const uint32_t *getEndClobberMask(const TargetRegisterInfo *TRI) const;
- /// Return alignment of the basic block. The alignment is specified as
- /// log2(bytes).
- unsigned getAlignment() const { return Alignment; }
+ /// Return alignment of the basic block.
+ Align getAlignment() const { return Alignment; }
- /// Set alignment of the basic block. The alignment is specified as
- /// log2(bytes).
- void setAlignment(unsigned Align) { Alignment = Align; }
+ /// Set alignment of the basic block.
+ void setAlignment(Align A) { Alignment = A; }
/// Returns true if the block is a landing pad. That is this basic block is
/// entered via an exception handler.
@@ -636,6 +634,18 @@ public:
return Insts.insertAfter(I.getInstrIterator(), MI);
}
+ /// If I is bundled then insert MI into the instruction list after the end of
+ /// the bundle, otherwise insert MI immediately after I.
+ instr_iterator insertAfterBundle(instr_iterator I, MachineInstr *MI) {
+ assert((I == instr_end() || I->getParent() == this) &&
+ "iterator points outside of basic block");
+ assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
+ "Cannot insert instruction with bundle flags");
+ while (I->isBundledWithSucc())
+ ++I;
+ return Insts.insertAfter(I, MI);
+ }
+
/// Remove an instruction from the instruction list and delete it.
///
/// If the instruction is part of a bundle, the other instructions in the
@@ -723,6 +733,10 @@ public:
/// CFG so that it branches to 'New' instead.
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New);
+ /// Update all phi nodes in this basic block to refer to basic block \p New
+ /// instead of basic block \p Old.
+ void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New);
+
/// Various pieces of code can cause excess edges in the CFG to be inserted.
/// If we have proven that MBB can only branch to DestA and DestB, remove any
/// other MBB successors from the CFG. DestA and DestB can be null. Besides
diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h
index 4f4034baf801..503227222207 100644
--- a/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -39,6 +39,10 @@ enum class MachineCombinerPattern {
MULADDXI_OP1,
MULSUBXI_OP1,
// Floating Point
+ FMULADDH_OP1,
+ FMULADDH_OP2,
+ FMULSUBH_OP1,
+ FMULSUBH_OP2,
FMULADDS_OP1,
FMULADDS_OP2,
FMULSUBS_OP1,
@@ -47,16 +51,25 @@ enum class MachineCombinerPattern {
FMULADDD_OP2,
FMULSUBD_OP1,
FMULSUBD_OP2,
+ FNMULSUBH_OP1,
FNMULSUBS_OP1,
FNMULSUBD_OP1,
FMLAv1i32_indexed_OP1,
FMLAv1i32_indexed_OP2,
FMLAv1i64_indexed_OP1,
FMLAv1i64_indexed_OP2,
+ FMLAv4f16_OP1,
+ FMLAv4f16_OP2,
+ FMLAv8f16_OP1,
+ FMLAv8f16_OP2,
FMLAv2f32_OP2,
FMLAv2f32_OP1,
FMLAv2f64_OP1,
FMLAv2f64_OP2,
+ FMLAv4i16_indexed_OP1,
+ FMLAv4i16_indexed_OP2,
+ FMLAv8i16_indexed_OP1,
+ FMLAv8i16_indexed_OP2,
FMLAv2i32_indexed_OP1,
FMLAv2i32_indexed_OP2,
FMLAv2i64_indexed_OP1,
@@ -67,10 +80,18 @@ enum class MachineCombinerPattern {
FMLAv4i32_indexed_OP2,
FMLSv1i32_indexed_OP2,
FMLSv1i64_indexed_OP2,
+ FMLSv4f16_OP1,
+ FMLSv4f16_OP2,
+ FMLSv8f16_OP1,
+ FMLSv8f16_OP2,
FMLSv2f32_OP1,
FMLSv2f32_OP2,
FMLSv2f64_OP1,
FMLSv2f64_OP2,
+ FMLSv4i16_indexed_OP1,
+ FMLSv4i16_indexed_OP2,
+ FMLSv8i16_indexed_OP1,
+ FMLSv8i16_indexed_OP2,
FMLSv2i32_indexed_OP1,
FMLSv2i32_indexed_OP2,
FMLSv2i64_indexed_OP1,
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index d2200080b897..e4d7a02f8c48 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -44,6 +44,8 @@ using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
/// compute a normal dominator tree.
///
class MachineDominatorTree : public MachineFunctionPass {
+ using DomTreeT = DomTreeBase<MachineBasicBlock>;
+
/// Helper structure used to hold all the basic blocks
/// involved in the split of a critical edge.
struct CriticalEdge {
@@ -65,8 +67,8 @@ class MachineDominatorTree : public MachineFunctionPass {
/// such as BB == elt.NewBB.
mutable SmallSet<MachineBasicBlock *, 32> NewBBs;
- /// The DominatorTreeBase that is used to compute a normal dominator tree
- std::unique_ptr<DomTreeBase<MachineBasicBlock>> DT;
+ /// The DominatorTreeBase that is used to compute a normal dominator tree.
+ std::unique_ptr<DomTreeT> DT;
/// Apply all the recorded critical edges to the DT.
/// This updates the underlying DT information in a way that uses
@@ -80,8 +82,8 @@ public:
MachineDominatorTree();
- DomTreeBase<MachineBasicBlock> &getBase() {
- if (!DT) DT.reset(new DomTreeBase<MachineBasicBlock>());
+ DomTreeT &getBase() {
+ if (!DT) DT.reset(new DomTreeT());
applySplitCriticalEdges();
return *DT;
}
@@ -92,31 +94,30 @@ public:
/// multiple blocks if we are computing post dominators. For forward
/// dominators, this will always be a single block (the entry node).
///
- inline const SmallVectorImpl<MachineBasicBlock*> &getRoots() const {
+ const SmallVectorImpl<MachineBasicBlock*> &getRoots() const {
applySplitCriticalEdges();
return DT->getRoots();
}
- inline MachineBasicBlock *getRoot() const {
+ MachineBasicBlock *getRoot() const {
applySplitCriticalEdges();
return DT->getRoot();
}
- inline MachineDomTreeNode *getRootNode() const {
+ MachineDomTreeNode *getRootNode() const {
applySplitCriticalEdges();
return DT->getRootNode();
}
bool runOnMachineFunction(MachineFunction &F) override;
- inline bool dominates(const MachineDomTreeNode* A,
- const MachineDomTreeNode* B) const {
+ bool dominates(const MachineDomTreeNode *A,
+ const MachineDomTreeNode *B) const {
applySplitCriticalEdges();
return DT->dominates(A, B);
}
- inline bool dominates(const MachineBasicBlock* A,
- const MachineBasicBlock* B) const {
+ bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
applySplitCriticalEdges();
return DT->dominates(A, B);
}
@@ -133,36 +134,30 @@ public:
for (; &*I != A && &*I != B; ++I)
/*empty*/ ;
- //if(!DT.IsPostDominators) {
- // A dominates B if it is found first in the basic block.
- return &*I == A;
- //} else {
- // // A post-dominates B if B is found first in the basic block.
- // return &*I == B;
- //}
+ return &*I == A;
}
- inline bool properlyDominates(const MachineDomTreeNode* A,
- const MachineDomTreeNode* B) const {
+ bool properlyDominates(const MachineDomTreeNode *A,
+ const MachineDomTreeNode *B) const {
applySplitCriticalEdges();
return DT->properlyDominates(A, B);
}
- inline bool properlyDominates(const MachineBasicBlock* A,
- const MachineBasicBlock* B) const {
+ bool properlyDominates(const MachineBasicBlock *A,
+ const MachineBasicBlock *B) const {
applySplitCriticalEdges();
return DT->properlyDominates(A, B);
}
/// findNearestCommonDominator - Find nearest common dominator basic block
/// for basic block A and B. If there is no such block then return NULL.
- inline MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A,
- MachineBasicBlock *B) {
+ MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A,
+ MachineBasicBlock *B) {
applySplitCriticalEdges();
return DT->findNearestCommonDominator(A, B);
}
- inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+ MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
applySplitCriticalEdges();
return DT->getNode(BB);
}
@@ -170,7 +165,7 @@ public:
/// getNode - return the (Post)DominatorTree node for the specified basic
/// block. This is the same as using operator[] on this class.
///
- inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+ MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
applySplitCriticalEdges();
return DT->getNode(BB);
}
@@ -178,8 +173,8 @@ public:
/// addNewBlock - Add a new node to the dominator tree information. This
/// creates a new node as a child of DomBB dominator node,linking it into
/// the children list of the immediate dominator.
- inline MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB,
- MachineBasicBlock *DomBB) {
+ MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB) {
applySplitCriticalEdges();
return DT->addNewBlock(BB, DomBB);
}
@@ -187,14 +182,14 @@ public:
/// changeImmediateDominator - This method is used to update the dominator
/// tree information when a node's immediate dominator changes.
///
- inline void changeImmediateDominator(MachineBasicBlock *N,
- MachineBasicBlock* NewIDom) {
+ void changeImmediateDominator(MachineBasicBlock *N,
+ MachineBasicBlock *NewIDom) {
applySplitCriticalEdges();
DT->changeImmediateDominator(N, NewIDom);
}
- inline void changeImmediateDominator(MachineDomTreeNode *N,
- MachineDomTreeNode* NewIDom) {
+ void changeImmediateDominator(MachineDomTreeNode *N,
+ MachineDomTreeNode *NewIDom) {
applySplitCriticalEdges();
DT->changeImmediateDominator(N, NewIDom);
}
@@ -202,14 +197,14 @@ public:
/// eraseNode - Removes a node from the dominator tree. Block must not
/// dominate any other blocks. Removes node from its immediate dominator's
/// children list. Deletes dominator node associated with basic block BB.
- inline void eraseNode(MachineBasicBlock *BB) {
+ void eraseNode(MachineBasicBlock *BB) {
applySplitCriticalEdges();
DT->eraseNode(BB);
}
/// splitBlock - BB is split and now it has one successor. Update dominator
/// tree to reflect this change.
- inline void splitBlock(MachineBasicBlock* NewBB) {
+ void splitBlock(MachineBasicBlock* NewBB) {
applySplitCriticalEdges();
DT->splitBlock(NewBB);
}
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 761735120a64..01fc50d14a7f 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -14,6 +14,7 @@
#define LLVM_CODEGEN_MACHINEFRAMEINFO_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/DataTypes.h"
#include <cassert>
#include <vector>
@@ -129,7 +130,7 @@ private:
uint64_t Size;
// The required alignment of this stack slot.
- unsigned Alignment;
+ Align Alignment;
// If true, the value of the stack object is set before
// entering the function and is not modified inside the function. By
@@ -180,17 +181,16 @@ private:
uint8_t SSPLayout;
- StackObject(uint64_t Size, unsigned Alignment, int64_t SPOffset,
+ StackObject(uint64_t Size, Align Alignment, int64_t SPOffset,
bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca,
bool IsAliased, uint8_t StackID = 0)
- : SPOffset(SPOffset), Size(Size), Alignment(Alignment),
- isImmutable(IsImmutable), isSpillSlot(IsSpillSlot),
- StackID(StackID), Alloca(Alloca), isAliased(IsAliased),
- SSPLayout(SSPLK_None) {}
+ : SPOffset(SPOffset), Size(Size), Alignment(Alignment),
+ isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), StackID(StackID),
+ Alloca(Alloca), isAliased(IsAliased), SSPLayout(SSPLK_None) {}
};
/// The alignment of the stack.
- unsigned StackAlignment;
+ Align StackAlignment;
/// Can the stack be realigned. This can be false if the target does not
/// support stack realignment, or if the user asks us not to realign the
@@ -260,7 +260,7 @@ private:
/// native alignment maintained by the compiler, dynamic alignment code will
/// be needed.
///
- unsigned MaxAlignment = 0;
+ Align MaxAlignment;
/// Set to true if this function adjusts the stack -- e.g.,
/// when calling another function. This is only valid during and after
@@ -304,7 +304,7 @@ private:
/// Required alignment of the local object blob, which is the strictest
/// alignment of any object in it.
- unsigned LocalFrameMaxAlign = 0;
+ Align LocalFrameMaxAlign;
/// Whether the local object blob needs to be allocated together. If not,
/// PEI should ignore the isPreAllocated flags on the stack objects and
@@ -338,8 +338,8 @@ private:
public:
explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable,
bool ForcedRealign)
- : StackAlignment(StackAlignment), StackRealignable(StackRealignable),
- ForcedRealign(ForcedRealign) {}
+ : StackAlignment(assumeAligned(StackAlignment)),
+ StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {}
/// Return true if there are any stack objects in this function.
bool hasStackObjects() const { return !Objects.empty(); }
@@ -419,10 +419,12 @@ public:
/// Required alignment of the local object blob,
/// which is the strictest alignment of any object in it.
- void setLocalFrameMaxAlign(unsigned Align) { LocalFrameMaxAlign = Align; }
+ void setLocalFrameMaxAlign(Align Alignment) {
+ LocalFrameMaxAlign = Alignment;
+ }
/// Return the required alignment of the local object blob.
- unsigned getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; }
+ Align getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; }
/// Get whether the local allocation blob should be allocated together or
/// let PEI allocate the locals in it directly.
@@ -462,14 +464,14 @@ public:
unsigned getObjectAlignment(int ObjectIdx) const {
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
- return Objects[ObjectIdx+NumFixedObjects].Alignment;
+ return Objects[ObjectIdx + NumFixedObjects].Alignment.value();
}
/// setObjectAlignment - Change the alignment of the specified stack object.
void setObjectAlignment(int ObjectIdx, unsigned Align) {
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
- Objects[ObjectIdx+NumFixedObjects].Alignment = Align;
+ Objects[ObjectIdx + NumFixedObjects].Alignment = assumeAligned(Align);
// Only ensure max alignment for the default stack.
if (getStackID(ObjectIdx) == 0)
@@ -561,10 +563,14 @@ public:
/// Return the alignment in bytes that this function must be aligned to,
/// which is greater than the default stack alignment provided by the target.
- unsigned getMaxAlignment() const { return MaxAlignment; }
+ unsigned getMaxAlignment() const { return MaxAlignment.value(); }
/// Make sure the function is at least Align bytes aligned.
- void ensureMaxAlignment(unsigned Align);
+ void ensureMaxAlignment(Align Alignment);
+ /// FIXME: Remove this once transition to Align is over.
+ inline void ensureMaxAlignment(unsigned Align) {
+ ensureMaxAlignment(assumeAligned(Align));
+ }
/// Return true if this function adjusts the stack -- e.g.,
/// when calling another function. This is only valid during and after
@@ -728,12 +734,24 @@ public:
/// Create a new statically sized stack object, returning
/// a nonnegative identifier to represent it.
- int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot,
+ int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot,
const AllocaInst *Alloca = nullptr, uint8_t ID = 0);
+ /// FIXME: Remove this function when transition to Align is over.
+ inline int CreateStackObject(uint64_t Size, unsigned Alignment,
+ bool isSpillSlot,
+ const AllocaInst *Alloca = nullptr,
+ uint8_t ID = 0) {
+ return CreateStackObject(Size, assumeAligned(Alignment), isSpillSlot,
+ Alloca, ID);
+ }
/// Create a new statically sized stack object that represents a spill slot,
/// returning a nonnegative identifier to represent it.
- int CreateSpillStackObject(uint64_t Size, unsigned Alignment);
+ int CreateSpillStackObject(uint64_t Size, Align Alignment);
+ /// FIXME: Remove this function when transition to Align is over.
+ inline int CreateSpillStackObject(uint64_t Size, unsigned Alignment) {
+ return CreateSpillStackObject(Size, assumeAligned(Alignment));
+ }
/// Remove or mark dead a statically sized stack object.
void RemoveStackObject(int ObjectIdx) {
@@ -744,7 +762,11 @@ public:
/// Notify the MachineFrameInfo object that a variable sized object has been
/// created. This must be created whenever a variable sized object is
/// created, whether or not the index returned is actually used.
- int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca);
+ int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca);
+ /// FIXME: Remove this function when transition to Align is over.
+ int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) {
+ return CreateVariableSizedObject(assumeAligned(Alignment), Alloca);
+ }
/// Returns a reference to call saved info vector for the current function.
const std::vector<CalleeSavedInfo> &getCalleeSavedInfo() const {
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 201c126ee52e..3a3176e51c51 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -36,6 +36,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Recycler.h"
+#include "llvm/Target/TargetMachine.h"
#include <cassert>
#include <cstdint>
#include <memory>
@@ -277,7 +278,7 @@ class MachineFunction {
unsigned FunctionNumber;
/// Alignment - The alignment of the function.
- unsigned Alignment;
+ Align Alignment;
/// ExposesReturnsTwice - True if the function calls setjmp or related
/// functions with attribute "returns twice", but doesn't have
@@ -322,7 +323,7 @@ class MachineFunction {
std::vector<std::pair<MCSymbol *, MDNode *>> CodeViewAnnotations;
/// CodeView heapallocsites.
- std::vector<std::tuple<MCSymbol*, MCSymbol*, DIType*>>
+ std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>>
CodeViewHeapAllocSites;
bool CallsEHReturn = false;
@@ -400,6 +401,17 @@ private:
/// Map a call instruction to call site arguments forwarding info.
CallSiteInfoMap CallSitesInfo;
+ /// A helper function that returns call site info for a give call
+ /// instruction if debug entry value support is enabled.
+ CallSiteInfoMap::iterator getCallSiteInfo(const MachineInstr *MI) {
+ assert(MI->isCall() &&
+ "Call site info refers only to call instructions!");
+
+ if (!Target.Options.EnableDebugEntryValues)
+ return CallSitesInfo.end();
+ return CallSitesInfo.find(MI);
+ }
+
// Callbacks for insertion and removal.
void handleInsertion(MachineInstr &MI);
void handleRemoval(MachineInstr &MI);
@@ -508,15 +520,16 @@ public:
const WinEHFuncInfo *getWinEHFuncInfo() const { return WinEHInfo; }
WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; }
- /// getAlignment - Return the alignment (log2, not bytes) of the function.
- unsigned getAlignment() const { return Alignment; }
+ /// getAlignment - Return the alignment of the function.
+ Align getAlignment() const { return Alignment; }
- /// setAlignment - Set the alignment (log2, not bytes) of the function.
- void setAlignment(unsigned A) { Alignment = A; }
+ /// setAlignment - Set the alignment of the function.
+ void setAlignment(Align A) { Alignment = A; }
- /// ensureAlignment - Make sure the function is at least 1 << A bytes aligned.
- void ensureAlignment(unsigned A) {
- if (Alignment < A) Alignment = A;
+ /// ensureAlignment - Make sure the function is at least A bytes aligned.
+ void ensureAlignment(Align A) {
+ if (Alignment < A)
+ Alignment = A;
}
/// exposesReturnsTwice - Returns true if the function calls setjmp or
@@ -935,10 +948,10 @@ public:
}
/// Record heapallocsites
- void addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD);
+ void addCodeViewHeapAllocSite(MachineInstr *I, const MDNode *MD);
- ArrayRef<std::tuple<MCSymbol*, MCSymbol*, DIType*>>
- getCodeViewHeapAllocSites() const {
+ ArrayRef<std::tuple<MCSymbol *, MCSymbol *, const DIType *>>
+ getCodeViewHeapAllocSites() const {
return CodeViewHeapAllocSites;
}
@@ -976,12 +989,24 @@ public:
return CallSitesInfo;
}
- /// Update call sites info by deleting entry for \p Old call instruction.
- /// If \p New is present then transfer \p Old call info to it. This function
- /// should be called before removing call instruction or before replacing
- /// call instruction with new one.
- void updateCallSiteInfo(const MachineInstr *Old,
- const MachineInstr *New = nullptr);
+ /// Following functions update call site info. They should be called before
+ /// removing, replacing or copying call instruction.
+
+ /// Move the call site info from \p Old to \New call site info. This function
+ /// is used when we are replacing one call instruction with another one to
+ /// the same callee.
+ void moveCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New);
+
+ /// Erase the call site info for \p MI. It is used to remove a call
+ /// instruction from the instruction stream.
+ void eraseCallSiteInfo(const MachineInstr *MI);
+
+ /// Copy the call site info from \p Old to \ New. Its usage is when we are
+ /// making a copy of the instruction that will be inserted at different point
+ /// of the instruction stream.
+ void copyCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New);
};
//===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index c82c5b137507..c94ad292ec96 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -20,11 +20,9 @@
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -38,6 +36,7 @@
namespace llvm {
+class AAResults;
template <typename T> class ArrayRef;
class DIExpression;
class DILocalVariable;
@@ -427,6 +426,22 @@ public:
return getNumExplicitDefs() + MCID->getNumImplicitDefs();
}
+ /// Returns true if the instruction has implicit definition.
+ bool hasImplicitDef() const {
+ for (unsigned I = getNumExplicitOperands(), E = getNumOperands();
+ I != E; ++I) {
+ const MachineOperand &MO = getOperand(I);
+ if (MO.isDef() && MO.isImplicit())
+ return true;
+ }
+ return false;
+ }
+
+ /// Returns the implicit operands number.
+ unsigned getNumImplicitOperands() const {
+ return getNumOperands() - getNumExplicitOperands();
+ }
+
/// Return true if operand \p OpIdx is a subregister index.
bool isOperandSubregIdx(unsigned OpIdx) const {
assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate &&
@@ -602,6 +617,12 @@ public:
return hasPropertyInBundle(1ULL << MCFlag, Type);
}
+ /// Return true if this is an instruction that should go through the usual
+ /// legalization steps.
+ bool isPreISelOpcode(QueryType Type = IgnoreBundle) const {
+ return hasProperty(MCID::PreISelOpcode, Type);
+ }
+
/// Return true if this instruction can have a variable number of operands.
/// In this case, the variable operands will be after the normal
/// operands but before the implicit definitions and uses (if any are
@@ -1020,15 +1041,13 @@ public:
}
/// A DBG_VALUE is an entry value iff its debug expression contains the
- /// DW_OP_entry_value DWARF operation.
- bool isDebugEntryValue() const {
- return isDebugValue() && getDebugExpression()->isEntryValue();
- }
+ /// DW_OP_LLVM_entry_value operation.
+ bool isDebugEntryValue() const;
/// Return true if the instruction is a debug value which describes a part of
/// a variable as unavailable.
bool isUndefDebugValue() const {
- return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg();
+ return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg().isValid();
}
bool isPHI() const {
@@ -1140,7 +1159,7 @@ public:
/// is a read of a super-register.
/// This does not count partial redefines of virtual registers as reads:
/// %reg1024:6 = OP.
- bool readsRegister(unsigned Reg,
+ bool readsRegister(Register Reg,
const TargetRegisterInfo *TRI = nullptr) const {
return findRegisterUseOperandIdx(Reg, false, TRI) != -1;
}
@@ -1148,20 +1167,20 @@ public:
/// Return true if the MachineInstr reads the specified virtual register.
/// Take into account that a partial define is a
/// read-modify-write operation.
- bool readsVirtualRegister(unsigned Reg) const {
+ bool readsVirtualRegister(Register Reg) const {
return readsWritesVirtualRegister(Reg).first;
}
/// Return a pair of bools (reads, writes) indicating if this instruction
/// reads or writes Reg. This also considers partial defines.
/// If Ops is not null, all operand indices for Reg are added.
- std::pair<bool,bool> readsWritesVirtualRegister(unsigned Reg,
+ std::pair<bool,bool> readsWritesVirtualRegister(Register Reg,
SmallVectorImpl<unsigned> *Ops = nullptr) const;
/// Return true if the MachineInstr kills the specified register.
/// If TargetRegisterInfo is passed, then it also checks if there is
/// a kill of a super-register.
- bool killsRegister(unsigned Reg,
+ bool killsRegister(Register Reg,
const TargetRegisterInfo *TRI = nullptr) const {
return findRegisterUseOperandIdx(Reg, true, TRI) != -1;
}
@@ -1170,7 +1189,7 @@ public:
/// If TargetRegisterInfo is passed, then it also checks
/// if there is a def of a super-register.
/// NOTE: It's ignoring subreg indices on virtual registers.
- bool definesRegister(unsigned Reg,
+ bool definesRegister(Register Reg,
const TargetRegisterInfo *TRI = nullptr) const {
return findRegisterDefOperandIdx(Reg, false, false, TRI) != -1;
}
@@ -1178,38 +1197,38 @@ public:
/// Return true if the MachineInstr modifies (fully define or partially
/// define) the specified register.
/// NOTE: It's ignoring subreg indices on virtual registers.
- bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const {
+ bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const {
return findRegisterDefOperandIdx(Reg, false, true, TRI) != -1;
}
/// Returns true if the register is dead in this machine instruction.
/// If TargetRegisterInfo is passed, then it also checks
/// if there is a dead def of a super-register.
- bool registerDefIsDead(unsigned Reg,
+ bool registerDefIsDead(Register Reg,
const TargetRegisterInfo *TRI = nullptr) const {
return findRegisterDefOperandIdx(Reg, true, false, TRI) != -1;
}
/// Returns true if the MachineInstr has an implicit-use operand of exactly
/// the given register (not considering sub/super-registers).
- bool hasRegisterImplicitUseOperand(unsigned Reg) const;
+ bool hasRegisterImplicitUseOperand(Register Reg) const;
/// Returns the operand index that is a use of the specific register or -1
/// if it is not found. It further tightens the search criteria to a use
/// that kills the register if isKill is true.
- int findRegisterUseOperandIdx(unsigned Reg, bool isKill = false,
+ int findRegisterUseOperandIdx(Register Reg, bool isKill = false,
const TargetRegisterInfo *TRI = nullptr) const;
/// Wrapper for findRegisterUseOperandIdx, it returns
/// a pointer to the MachineOperand rather than an index.
- MachineOperand *findRegisterUseOperand(unsigned Reg, bool isKill = false,
+ MachineOperand *findRegisterUseOperand(Register Reg, bool isKill = false,
const TargetRegisterInfo *TRI = nullptr) {
int Idx = findRegisterUseOperandIdx(Reg, isKill, TRI);
return (Idx == -1) ? nullptr : &getOperand(Idx);
}
const MachineOperand *findRegisterUseOperand(
- unsigned Reg, bool isKill = false,
+ Register Reg, bool isKill = false,
const TargetRegisterInfo *TRI = nullptr) const {
return const_cast<MachineInstr *>(this)->
findRegisterUseOperand(Reg, isKill, TRI);
@@ -1221,14 +1240,14 @@ public:
/// overlap the specified register. If TargetRegisterInfo is non-null,
/// then it also checks if there is a def of a super-register.
/// This may also return a register mask operand when Overlap is true.
- int findRegisterDefOperandIdx(unsigned Reg,
+ int findRegisterDefOperandIdx(Register Reg,
bool isDead = false, bool Overlap = false,
const TargetRegisterInfo *TRI = nullptr) const;
/// Wrapper for findRegisterDefOperandIdx, it returns
/// a pointer to the MachineOperand rather than an index.
MachineOperand *
- findRegisterDefOperand(unsigned Reg, bool isDead = false,
+ findRegisterDefOperand(Register Reg, bool isDead = false,
bool Overlap = false,
const TargetRegisterInfo *TRI = nullptr) {
int Idx = findRegisterDefOperandIdx(Reg, isDead, Overlap, TRI);
@@ -1236,7 +1255,7 @@ public:
}
const MachineOperand *
- findRegisterDefOperand(unsigned Reg, bool isDead = false,
+ findRegisterDefOperand(Register Reg, bool isDead = false,
bool Overlap = false,
const TargetRegisterInfo *TRI = nullptr) const {
return const_cast<MachineInstr *>(this)->findRegisterDefOperand(
@@ -1283,7 +1302,7 @@ public:
///
/// \pre CurRC must not be NULL.
const TargetRegisterClass *getRegClassConstraintEffectForVReg(
- unsigned Reg, const TargetRegisterClass *CurRC,
+ Register Reg, const TargetRegisterClass *CurRC,
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
bool ExploreBundle = false) const;
@@ -1346,39 +1365,39 @@ public:
/// Replace all occurrences of FromReg with ToReg:SubIdx,
/// properly composing subreg indices where necessary.
- void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx,
+ void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx,
const TargetRegisterInfo &RegInfo);
/// We have determined MI kills a register. Look for the
/// operand that uses it and mark it as IsKill. If AddIfNotFound is true,
/// add a implicit operand if it's not found. Returns true if the operand
/// exists / is added.
- bool addRegisterKilled(unsigned IncomingReg,
+ bool addRegisterKilled(Register IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound = false);
/// Clear all kill flags affecting Reg. If RegInfo is provided, this includes
/// all aliasing registers.
- void clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo);
+ void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo);
/// We have determined MI defined a register without a use.
/// Look for the operand that defines it and mark it as IsDead. If
/// AddIfNotFound is true, add a implicit operand if it's not found. Returns
/// true if the operand exists / is added.
- bool addRegisterDead(unsigned Reg, const TargetRegisterInfo *RegInfo,
+ bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo,
bool AddIfNotFound = false);
/// Clear all dead flags on operands defining register @p Reg.
- void clearRegisterDeads(unsigned Reg);
+ void clearRegisterDeads(Register Reg);
/// Mark all subregister defs of register @p Reg with the undef flag.
/// This function is used when we determined to have a subregister def in an
/// otherwise undefined super register.
- void setRegisterDefReadUndef(unsigned Reg, bool IsUndef = true);
+ void setRegisterDefReadUndef(Register Reg, bool IsUndef = true);
/// We have determined MI defines a register. Make sure there is an operand
/// defining Reg.
- void addRegisterDefined(unsigned Reg,
+ void addRegisterDefined(Register Reg,
const TargetRegisterInfo *RegInfo = nullptr);
/// Mark every physreg used by this instruction as
@@ -1386,13 +1405,13 @@ public:
///
/// On instructions with register mask operands, also add implicit-def
/// operands for all registers in UsedRegs.
- void setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
+ void setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs,
const TargetRegisterInfo &TRI);
/// Return true if it is safe to move this instruction. If
/// SawStore is set to true, it means that there is a store (or call) between
/// the instruction's location and its intended destination.
- bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const;
+ bool isSafeToMove(AAResults *AA, bool &SawStore) const;
/// Returns true if this instruction's memory access aliases the memory
/// access of Other.
@@ -1404,7 +1423,7 @@ public:
/// @param AA Optional alias analysis, used to compare memory operands.
/// @param Other MachineInstr to check aliasing against.
/// @param UseTBAA Whether to pass TBAA information to alias analysis.
- bool mayAlias(AliasAnalysis *AA, const MachineInstr &Other, bool UseTBAA) const;
+ bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const;
/// Return true if this instruction may have an ordered
/// or volatile memory reference, or if the information describing the memory
@@ -1419,7 +1438,7 @@ public:
/// argument area of a function (if it does not change). If the instruction
/// does multiple loads, this returns true only if all of the loads are
/// dereferenceable and invariant.
- bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const;
+ bool isDereferenceableInvariantLoad(AAResults *AA) const;
/// If the specified instruction is a PHI that always merges together the
/// same virtual register, return the register, otherwise return 0.
@@ -1603,9 +1622,15 @@ public:
/// Scan instructions following MI and collect any matching DBG_VALUEs.
void collectDebugValues(SmallVectorImpl<MachineInstr *> &DbgValues);
- /// Find all DBG_VALUEs immediately following this instruction that point
- /// to a register def in this instruction and point them to \p Reg instead.
- void changeDebugValuesDefReg(unsigned Reg);
+ /// Find all DBG_VALUEs that point to the register def in this instruction
+ /// and point them to \p Reg instead.
+ void changeDebugValuesDefReg(Register Reg);
+
+ /// Returns the Intrinsic::ID for this instruction.
+ /// \pre Must have an intrinsic ID operand.
+ unsigned getIntrinsicID() const {
+ return getOperand(getNumExplicitDefs()).getIntrinsicID();
+ }
private:
/// If this instruction is embedded into a MachineFunction, return the
@@ -1630,7 +1655,7 @@ private:
/// this MI and the given operand index \p OpIdx.
/// If the related operand does not constrained Reg, this returns CurRC.
const TargetRegisterClass *getRegClassConstraintEffectForVRegImpl(
- unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC,
+ unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC,
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const;
};
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 6d7fb72b6bd1..880d4829ac7e 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -85,7 +85,7 @@ public:
Register getReg(unsigned Idx) const { return MI->getOperand(Idx).getReg(); }
/// Add a new virtual register operand.
- const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0,
+ const MachineInstrBuilder &addReg(Register RegNo, unsigned flags = 0,
unsigned SubReg = 0) const {
assert((flags & 0x1) == 0 &&
"Passing in 'true' to addReg is forbidden! Use enums instead.");
@@ -104,14 +104,14 @@ public:
}
/// Add a virtual register definition operand.
- const MachineInstrBuilder &addDef(unsigned RegNo, unsigned Flags = 0,
+ const MachineInstrBuilder &addDef(Register RegNo, unsigned Flags = 0,
unsigned SubReg = 0) const {
return addReg(RegNo, Flags | RegState::Define, SubReg);
}
/// Add a virtual register use operand. It is an error for Flags to contain
/// `RegState::Define` when calling this function.
- const MachineInstrBuilder &addUse(unsigned RegNo, unsigned Flags = 0,
+ const MachineInstrBuilder &addUse(Register RegNo, unsigned Flags = 0,
unsigned SubReg = 0) const {
assert(!(Flags & RegState::Define) &&
"Misleading addUse defines register, use addReg instead.");
@@ -135,7 +135,7 @@ public:
}
const MachineInstrBuilder &addMBB(MachineBasicBlock *MBB,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateMBB(MBB, TargetFlags));
return *this;
}
@@ -145,42 +145,42 @@ public:
return *this;
}
- const MachineInstrBuilder &addConstantPoolIndex(unsigned Idx,
- int Offset = 0,
- unsigned char TargetFlags = 0) const {
+ const MachineInstrBuilder &
+ addConstantPoolIndex(unsigned Idx, int Offset = 0,
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateCPI(Idx, Offset, TargetFlags));
return *this;
}
const MachineInstrBuilder &addTargetIndex(unsigned Idx, int64_t Offset = 0,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateTargetIndex(Idx, Offset,
TargetFlags));
return *this;
}
const MachineInstrBuilder &addJumpTableIndex(unsigned Idx,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateJTI(Idx, TargetFlags));
return *this;
}
const MachineInstrBuilder &addGlobalAddress(const GlobalValue *GV,
int64_t Offset = 0,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateGA(GV, Offset, TargetFlags));
return *this;
}
const MachineInstrBuilder &addExternalSymbol(const char *FnName,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateES(FnName, TargetFlags));
return *this;
}
const MachineInstrBuilder &addBlockAddress(const BlockAddress *BA,
int64_t Offset = 0,
- unsigned char TargetFlags = 0) const {
+ unsigned TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateBA(BA, Offset, TargetFlags));
return *this;
}
@@ -250,6 +250,11 @@ public:
return *this;
}
+ const MachineInstrBuilder &addShuffleMask(const Constant *Val) const {
+ MI->addOperand(*MF, MachineOperand::CreateShuffleMask(Val));
+ return *this;
+ }
+
const MachineInstrBuilder &addSym(MCSymbol *Sym,
unsigned char TargetFlags = 0) const {
MI->addOperand(*MF, MachineOperand::CreateMCSymbol(Sym, TargetFlags));
@@ -316,7 +321,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
/// This version of the builder sets up the first operand as a
/// destination virtual register.
inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
- const MCInstrDesc &MCID, unsigned DestReg) {
+ const MCInstrDesc &MCID, Register DestReg) {
return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL))
.addReg(DestReg, RegState::Define);
}
@@ -327,7 +332,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- unsigned DestReg) {
+ Register DestReg) {
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
BB.insert(I, MI);
@@ -343,7 +348,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::instr_iterator I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- unsigned DestReg) {
+ Register DestReg) {
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
BB.insert(I, MI);
@@ -352,7 +357,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- unsigned DestReg) {
+ Register DestReg) {
// Calling the overload for instr_iterator is always correct. However, the
// definition is not available in headers, so inline the check.
if (I.isInsideBundle())
@@ -362,7 +367,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I,
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr *I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- unsigned DestReg) {
+ Register DestReg) {
return BuildMI(BB, *I, DL, MCID, DestReg);
}
@@ -416,7 +421,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL,
/// end of the given MachineBasicBlock, and sets up the first operand as a
/// destination virtual register.
inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL,
- const MCInstrDesc &MCID, unsigned DestReg) {
+ const MCInstrDesc &MCID, Register DestReg) {
return BuildMI(*BB, BB->end(), DL, MCID, DestReg);
}
@@ -426,7 +431,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL,
/// second operand is an immediate.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
const MCInstrDesc &MCID, bool IsIndirect,
- unsigned Reg, const MDNode *Variable,
+ Register Reg, const MDNode *Variable,
const MDNode *Expr);
/// This version of the builder builds a DBG_VALUE intrinsic
@@ -442,7 +447,7 @@ MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
const MCInstrDesc &MCID, bool IsIndirect,
- unsigned Reg, const MDNode *Variable,
+ Register Reg, const MDNode *Variable,
const MDNode *Expr);
/// This version of the builder builds a DBG_VALUE intrinsic
@@ -490,16 +495,13 @@ inline unsigned getRenamableRegState(bool B) {
/// Get all register state flags from machine operand \p RegOp.
inline unsigned getRegState(const MachineOperand &RegOp) {
assert(RegOp.isReg() && "Not a register operand");
- return getDefRegState(RegOp.isDef()) |
- getImplRegState(RegOp.isImplicit()) |
- getKillRegState(RegOp.isKill()) |
- getDeadRegState(RegOp.isDead()) |
- getUndefRegState(RegOp.isUndef()) |
- getInternalReadRegState(RegOp.isInternalRead()) |
- getDebugRegState(RegOp.isDebug()) |
- getRenamableRegState(
- TargetRegisterInfo::isPhysicalRegister(RegOp.getReg()) &&
- RegOp.isRenamable());
+ return getDefRegState(RegOp.isDef()) | getImplRegState(RegOp.isImplicit()) |
+ getKillRegState(RegOp.isKill()) | getDeadRegState(RegOp.isDead()) |
+ getUndefRegState(RegOp.isUndef()) |
+ getInternalReadRegState(RegOp.isInternalRead()) |
+ getDebugRegState(RegOp.isDebug()) |
+ getRenamableRegState(Register::isPhysicalRegister(RegOp.getReg()) &&
+ RegOp.isRenamable());
}
/// Helper class for constructing bundles of MachineInstrs.
diff --git a/include/llvm/CodeGen/MachineLoopUtils.h b/include/llvm/CodeGen/MachineLoopUtils.h
new file mode 100644
index 000000000000..41379b75d00a
--- /dev/null
+++ b/include/llvm/CodeGen/MachineLoopUtils.h
@@ -0,0 +1,41 @@
+//=- MachineLoopUtils.h - Helper functions for manipulating loops -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
+#define LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
+
+namespace llvm {
+class MachineBasicBlock;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+
+enum LoopPeelDirection {
+ LPD_Front, ///< Peel the first iteration of the loop.
+ LPD_Back ///< Peel the last iteration of the loop.
+};
+
+/// Peels a single block loop. Loop must have two successors, one of which
+/// must be itself. Similarly it must have two predecessors, one of which must
+/// be itself.
+///
+/// The loop block is copied and inserted into the CFG such that two copies of
+/// the loop follow on from each other. The copy is inserted either before or
+/// after the loop based on Direction.
+///
+/// Phis are updated and an unconditional branch inserted at the end of the
+/// clone so as to execute a single iteration.
+///
+/// The trip count of Loop is not updated.
+MachineBasicBlock *PeelSingleBlockLoop(LoopPeelDirection Direction,
+ MachineBasicBlock *Loop,
+ MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII);
+
+} // namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index 65f706302bc2..33a48a235e18 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -293,8 +293,6 @@ public:
/// Support for operator<<.
/// @{
- void print(raw_ostream &OS) const;
- void print(raw_ostream &OS, ModuleSlotTracker &MST) const;
void print(raw_ostream &OS, ModuleSlotTracker &MST,
SmallVectorImpl<StringRef> &SSNs, const LLVMContext &Context,
const MachineFrameInfo *MFI, const TargetInstrInfo *TII) const;
@@ -319,11 +317,6 @@ public:
}
};
-inline raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO) {
- MRO.print(OS);
- return OS;
-}
-
} // End llvm namespace
#endif
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 4ff5c7fd013a..6902dada2423 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -33,6 +33,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
@@ -74,7 +75,10 @@ protected:
/// made by different debugging and exception handling schemes and reformated
/// for specific use.
///
-class MachineModuleInfo : public ImmutablePass {
+class MachineModuleInfo {
+ friend class MachineModuleInfoWrapperPass;
+ friend class MachineModuleAnalysis;
+
const LLVMTargetMachine &TM;
/// This is the MCContext used for the entire code generator.
@@ -140,15 +144,17 @@ class MachineModuleInfo : public ImmutablePass {
const Function *LastRequest = nullptr; ///< Used for shortcut/cache.
MachineFunction *LastResult = nullptr; ///< Used for shortcut/cache.
-public:
- static char ID; // Pass identification, replacement for typeid
+ MachineModuleInfo &operator=(MachineModuleInfo &&MMII) = delete;
+public:
explicit MachineModuleInfo(const LLVMTargetMachine *TM = nullptr);
- ~MachineModuleInfo() override;
- // Initialization and Finalization
- bool doInitialization(Module &) override;
- bool doFinalization(Module &) override;
+ MachineModuleInfo(MachineModuleInfo &&MMII);
+
+ ~MachineModuleInfo();
+
+ void initialize();
+ void finalize();
const LLVMTargetMachine &getTarget() const { return TM; }
@@ -254,6 +260,38 @@ public:
/// \}
}; // End class MachineModuleInfo
+class MachineModuleInfoWrapperPass : public ImmutablePass {
+ MachineModuleInfo MMI;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit MachineModuleInfoWrapperPass(const LLVMTargetMachine *TM = nullptr);
+
+ // Initialization and Finalization
+ bool doInitialization(Module &) override;
+ bool doFinalization(Module &) override;
+
+ MachineModuleInfo &getMMI() { return MMI; }
+ const MachineModuleInfo &getMMI() const { return MMI; }
+};
+
+/// An analysis that produces \c MachineInfo for a module.
+class MachineModuleAnalysis : public AnalysisInfoMixin<MachineModuleAnalysis> {
+ friend AnalysisInfoMixin<MachineModuleAnalysis>;
+ static AnalysisKey Key;
+
+ const LLVMTargetMachine *TM;
+
+public:
+ /// Provide the result type for this analysis pass.
+ using Result = MachineModuleInfo;
+
+ MachineModuleAnalysis(const LLVMTargetMachine *TM) : TM(TM) {}
+
+ /// Run the analysis pass and produce machine module information.
+ MachineModuleInfo run(Module &M, ModuleAnalysisManager &);
+};
+
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINEMODULEINFO_H
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 2152c7582e5a..df914dc2d85e 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -23,6 +23,7 @@
namespace llvm {
class BlockAddress;
+class Constant;
class ConstantFP;
class ConstantInt;
class GlobalValue;
@@ -68,7 +69,8 @@ public:
MO_CFIIndex, ///< MCCFIInstruction index.
MO_IntrinsicID, ///< Intrinsic ID for ISel
MO_Predicate, ///< Generic predicate for ISel
- MO_Last = MO_Predicate,
+ MO_ShuffleMask, ///< Other IR Constant for ISel (shuffle masks)
+ MO_Last = MO_ShuffleMask
};
private:
@@ -172,6 +174,7 @@ private:
unsigned CFIIndex; // For MO_CFI.
Intrinsic::ID IntrinsicID; // For MO_IntrinsicID.
unsigned Pred; // For MO_Predicate
+ const Constant *ShuffleMask; // For MO_ShuffleMask
struct { // For MO_Register.
// Register number is in SmallContents.RegNo.
@@ -341,6 +344,7 @@ public:
bool isCFIIndex() const { return OpKind == MO_CFIIndex; }
bool isIntrinsicID() const { return OpKind == MO_IntrinsicID; }
bool isPredicate() const { return OpKind == MO_Predicate; }
+ bool isShuffleMask() const { return OpKind == MO_ShuffleMask; }
//===--------------------------------------------------------------------===//
// Accessors for Register Operands
//===--------------------------------------------------------------------===//
@@ -455,7 +459,7 @@ public:
/// Change the register this operand corresponds to.
///
- void setReg(unsigned Reg);
+ void setReg(Register Reg);
void setSubReg(unsigned subReg) {
assert(isReg() && "Wrong MachineOperand mutator");
@@ -468,13 +472,13 @@ public:
/// using TargetRegisterInfo to compose the subreg indices if necessary.
/// Reg must be a virtual register, SubIdx can be 0.
///
- void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo&);
+ void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo&);
/// substPhysReg - Substitute the current register with the physical register
/// Reg, taking any existing SubReg into account. For instance,
/// substPhysReg(%eax) will change %reg1024:sub_8bit to %al.
///
- void substPhysReg(unsigned Reg, const TargetRegisterInfo&);
+ void substPhysReg(MCRegister Reg, const TargetRegisterInfo&);
void setIsUse(bool Val = true) { setIsDef(!Val); }
@@ -579,6 +583,11 @@ public:
return Contents.Pred;
}
+ const Constant *getShuffleMask() const {
+ assert(isShuffleMask() && "Wrong MachineOperand accessor");
+ return Contents.ShuffleMask;
+ }
+
/// Return the offset from the symbol in this operand. This always returns 0
/// for ExternalSymbol operands.
int64_t getOffset() const {
@@ -717,11 +726,11 @@ public:
void ChangeToFPImmediate(const ConstantFP *FPImm);
/// ChangeToES - Replace this operand with a new external symbol operand.
- void ChangeToES(const char *SymName, unsigned char TargetFlags = 0);
+ void ChangeToES(const char *SymName, unsigned TargetFlags = 0);
/// ChangeToGA - Replace this operand with a new global address operand.
void ChangeToGA(const GlobalValue *GV, int64_t Offset,
- unsigned char TargetFlags = 0);
+ unsigned TargetFlags = 0);
/// ChangeToMCSymbol - Replace this operand with a new MC symbol operand.
void ChangeToMCSymbol(MCSymbol *Sym);
@@ -731,12 +740,12 @@ public:
/// Replace this operand with a target index.
void ChangeToTargetIndex(unsigned Idx, int64_t Offset,
- unsigned char TargetFlags = 0);
+ unsigned TargetFlags = 0);
/// ChangeToRegister - Replace this operand with a new register operand of
/// the specified value. If an operand is known to be an register already,
/// the setReg method should be used.
- void ChangeToRegister(unsigned Reg, bool isDef, bool isImp = false,
+ void ChangeToRegister(Register Reg, bool isDef, bool isImp = false,
bool isKill = false, bool isDead = false,
bool isUndef = false, bool isDebug = false);
@@ -762,7 +771,7 @@ public:
return Op;
}
- static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp = false,
+ static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp = false,
bool isKill = false, bool isDead = false,
bool isUndef = false,
bool isEarlyClobber = false,
@@ -788,7 +797,7 @@ public:
return Op;
}
static MachineOperand CreateMBB(MachineBasicBlock *MBB,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_MachineBasicBlock);
Op.setMBB(MBB);
Op.setTargetFlags(TargetFlags);
@@ -800,7 +809,7 @@ public:
return Op;
}
static MachineOperand CreateCPI(unsigned Idx, int Offset,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_ConstantPoolIndex);
Op.setIndex(Idx);
Op.setOffset(Offset);
@@ -808,21 +817,21 @@ public:
return Op;
}
static MachineOperand CreateTargetIndex(unsigned Idx, int64_t Offset,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_TargetIndex);
Op.setIndex(Idx);
Op.setOffset(Offset);
Op.setTargetFlags(TargetFlags);
return Op;
}
- static MachineOperand CreateJTI(unsigned Idx, unsigned char TargetFlags = 0) {
+ static MachineOperand CreateJTI(unsigned Idx, unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_JumpTableIndex);
Op.setIndex(Idx);
Op.setTargetFlags(TargetFlags);
return Op;
}
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_GlobalAddress);
Op.Contents.OffsetedInfo.Val.GV = GV;
Op.setOffset(Offset);
@@ -830,7 +839,7 @@ public:
return Op;
}
static MachineOperand CreateES(const char *SymName,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_ExternalSymbol);
Op.Contents.OffsetedInfo.Val.SymbolName = SymName;
Op.setOffset(0); // Offset is always 0.
@@ -838,7 +847,7 @@ public:
return Op;
}
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_BlockAddress);
Op.Contents.OffsetedInfo.Val.BA = BA;
Op.setOffset(Offset);
@@ -876,7 +885,7 @@ public:
}
static MachineOperand CreateMCSymbol(MCSymbol *Sym,
- unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0) {
MachineOperand Op(MachineOperand::MO_MCSymbol);
Op.Contents.Sym = Sym;
Op.setOffset(0);
@@ -902,6 +911,12 @@ public:
return Op;
}
+ static MachineOperand CreateShuffleMask(const Constant *C) {
+ MachineOperand Op(MachineOperand::MO_ShuffleMask);
+ Op.Contents.ShuffleMask = C;
+ return Op;
+ }
+
friend class MachineInstr;
friend class MachineRegisterInfo;
diff --git a/include/llvm/CodeGen/MachinePipeliner.h b/include/llvm/CodeGen/MachinePipeliner.h
index 03ca53072685..e9cf7e115bff 100644
--- a/include/llvm/CodeGen/MachinePipeliner.h
+++ b/include/llvm/CodeGen/MachinePipeliner.h
@@ -40,6 +40,8 @@
#ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
#define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
+#include "llvm/Analysis/AliasAnalysis.h"
+
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
@@ -148,7 +150,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
/// We may create a new instruction, so remember it because it
/// must be deleted when the pass is finished.
- SmallPtrSet<MachineInstr *, 4> NewMIs;
+ DenseMap<MachineInstr*, MachineInstr *> NewMIs;
/// Ordered list of DAG postprocessing steps.
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
@@ -200,7 +202,7 @@ public:
RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) {
P.MF->getSubtarget().getSMSMutations(Mutations);
if (SwpEnableCopyToPhi)
- Mutations.push_back(llvm::make_unique<CopyToPhiMutation>());
+ Mutations.push_back(std::make_unique<CopyToPhiMutation>());
}
void schedule() override;
@@ -297,53 +299,8 @@ private:
void computeNodeOrder(NodeSetType &NodeSets);
void checkValidNodeOrder(const NodeSetType &Circuits) const;
bool schedulePipeline(SMSchedule &Schedule);
- void generatePipelinedLoop(SMSchedule &Schedule);
- void generateProlog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
- MBBVectorTy &PrologBBs);
- void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
- MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
- void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
- MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum,
- unsigned CurStageNum, bool IsLast);
- void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
- MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum,
- unsigned CurStageNum, bool IsLast);
- void removeDeadInstructions(MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs);
- void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
- SMSchedule &Schedule);
- void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
- MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
- SMSchedule &Schedule, ValueMapTy *VRMap);
bool computeDelta(MachineInstr &MI, unsigned &Delta);
- void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
- unsigned Num);
- MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
- unsigned InstStageNum);
- MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
- unsigned InstStageNum,
- SMSchedule &Schedule);
- void updateInstruction(MachineInstr *NewMI, bool LastDef,
- unsigned CurStageNum, unsigned InstrStageNum,
- SMSchedule &Schedule, ValueMapTy *VRMap);
MachineInstr *findDefInLoop(unsigned Reg);
- unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
- unsigned LoopStage, ValueMapTy *VRMap,
- MachineBasicBlock *BB);
- void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap);
- void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
- InstrMapTy &InstrMap, unsigned CurStageNum,
- unsigned PhiNum, MachineInstr *Phi,
- unsigned OldReg, unsigned NewReg,
- unsigned PrevReg = 0);
bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
unsigned &OffsetPos, unsigned &NewBase,
int64_t &NewOffset);
@@ -529,12 +486,6 @@ private:
/// Map from instruction to execution cycle.
std::map<SUnit *, int> InstrToCycle;
- /// Map for each register and the max difference between its uses and def.
- /// The first element in the pair is the max difference in stages. The
- /// second is true if the register defines a Phi value and loop value is
- /// scheduled before the Phi.
- std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
-
/// Keep track of the first cycle value in the schedule. It starts
/// as zero, but the algorithm allows negative values.
int FirstCycle = 0;
@@ -560,7 +511,6 @@ public:
void reset() {
ScheduledInstrs.clear();
InstrToCycle.clear();
- RegToStageDiff.clear();
FirstCycle = 0;
LastCycle = 0;
InitiationInterval = 0;
@@ -620,28 +570,6 @@ public:
return (LastCycle - FirstCycle) / InitiationInterval;
}
- /// Return the max. number of stages/iterations that can occur between a
- /// register definition and its uses.
- unsigned getStagesForReg(int Reg, unsigned CurStage) {
- std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
- if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
- return 1;
- return Stages.first;
- }
-
- /// The number of stages for a Phi is a little different than other
- /// instructions. The minimum value computed in RegToStageDiff is 1
- /// because we assume the Phi is needed for at least 1 iteration.
- /// This is not the case if the loop value is scheduled prior to the
- /// Phi in the same stage. This function returns the number of stages
- /// or iterations needed between the Phi definition and any uses.
- unsigned getStagesForPhi(int Reg) {
- std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
- if (Stages.second)
- return Stages.first;
- return Stages.first - 1;
- }
-
/// Return the instructions that are scheduled at the specified cycle.
std::deque<SUnit *> &getInstructions(int cycle) {
return ScheduledInstrs[cycle];
diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h
index b67e6b52ac8f..cb258b5e7b21 100644
--- a/include/llvm/CodeGen/MachinePostDominators.h
+++ b/include/llvm/CodeGen/MachinePostDominators.h
@@ -16,68 +16,76 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <memory>
namespace llvm {
///
-/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
-/// to compute the post-dominator tree.
+/// MachinePostDominatorTree - an analysis pass wrapper for DominatorTree
+/// used to compute the post-dominator tree for MachineFunctions.
///
-struct MachinePostDominatorTree : public MachineFunctionPass {
-private:
- PostDomTreeBase<MachineBasicBlock> *DT;
+class MachinePostDominatorTree : public MachineFunctionPass {
+ using PostDomTreeT = PostDomTreeBase<MachineBasicBlock>;
+ std::unique_ptr<PostDomTreeT> PDT;
public:
static char ID;
MachinePostDominatorTree();
- ~MachinePostDominatorTree() override;
-
FunctionPass *createMachinePostDominatorTreePass();
const SmallVectorImpl<MachineBasicBlock *> &getRoots() const {
- return DT->getRoots();
+ return PDT->getRoots();
}
- MachineDomTreeNode *getRootNode() const {
- return DT->getRootNode();
- }
+ MachineDomTreeNode *getRootNode() const { return PDT->getRootNode(); }
MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
- return DT->getNode(BB);
+ return PDT->getNode(BB);
}
MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
- return DT->getNode(BB);
+ return PDT->getNode(BB);
}
bool dominates(const MachineDomTreeNode *A,
const MachineDomTreeNode *B) const {
- return DT->dominates(A, B);
+ return PDT->dominates(A, B);
}
bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
- return DT->dominates(A, B);
+ return PDT->dominates(A, B);
}
bool properlyDominates(const MachineDomTreeNode *A,
const MachineDomTreeNode *B) const {
- return DT->properlyDominates(A, B);
+ return PDT->properlyDominates(A, B);
}
bool properlyDominates(const MachineBasicBlock *A,
const MachineBasicBlock *B) const {
- return DT->properlyDominates(A, B);
+ return PDT->properlyDominates(A, B);
+ }
+
+ bool isVirtualRoot(const MachineDomTreeNode *Node) const {
+ return PDT->isVirtualRoot(Node);
}
MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A,
- MachineBasicBlock *B) {
- return DT->findNearestCommonDominator(A, B);
+ MachineBasicBlock *B) const {
+ return PDT->findNearestCommonDominator(A, B);
}
+ /// Returns the nearest common dominator of the given blocks.
+ /// If that tree node is a virtual root, a nullptr will be returned.
+ MachineBasicBlock *
+ findNearestCommonDominator(ArrayRef<MachineBasicBlock *> Blocks) const;
+
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override { PDT.reset(nullptr); }
+ void verifyAnalysis() const override;
void print(llvm::raw_ostream &OS, const Module *M = nullptr) const override;
};
} //end of namespace llvm
diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h
index 6d9fb9b9100a..eeb69fef2c6b 100644
--- a/include/llvm/CodeGen/MachineRegionInfo.h
+++ b/include/llvm/CodeGen/MachineRegionInfo.h
@@ -22,7 +22,7 @@
namespace llvm {
-struct MachinePostDominatorTree;
+class MachinePostDominatorTree;
class MachineRegion;
class MachineRegionNode;
class MachineRegionInfo;
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index b5deed1f5010..488a5a55a169 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -107,16 +107,16 @@ private:
/// getRegUseDefListHead - Return the head pointer for the register use/def
/// list for the specified virtual or physical register.
- MachineOperand *&getRegUseDefListHead(unsigned RegNo) {
- if (TargetRegisterInfo::isVirtualRegister(RegNo))
- return VRegInfo[RegNo].second;
- return PhysRegUseDefLists[RegNo];
+ MachineOperand *&getRegUseDefListHead(Register RegNo) {
+ if (RegNo.isVirtual())
+ return VRegInfo[RegNo.id()].second;
+ return PhysRegUseDefLists[RegNo.id()];
}
- MachineOperand *getRegUseDefListHead(unsigned RegNo) const {
- if (TargetRegisterInfo::isVirtualRegister(RegNo))
- return VRegInfo[RegNo].second;
- return PhysRegUseDefLists[RegNo];
+ MachineOperand *getRegUseDefListHead(Register RegNo) const {
+ if (RegNo.isVirtual())
+ return VRegInfo[RegNo.id()].second;
+ return PhysRegUseDefLists[RegNo.id()];
}
/// Get the next element in the use-def chain.
@@ -214,8 +214,8 @@ public:
bool shouldTrackSubRegLiveness(const TargetRegisterClass &RC) const {
return subRegLivenessEnabled() && RC.HasDisjunctSubRegs;
}
- bool shouldTrackSubRegLiveness(unsigned VReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Must pass a VReg");
+ bool shouldTrackSubRegLiveness(Register VReg) const {
+ assert(VReg.isVirtual() && "Must pass a VReg");
return shouldTrackSubRegLiveness(*getRegClass(VReg));
}
bool subRegLivenessEnabled() const {
@@ -326,7 +326,7 @@ public:
/// of the specified register, skipping those marked as Debug.
using reg_nodbg_iterator =
defusechain_iterator<true, true, true, true, false, false>;
- reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const {
+ reg_nodbg_iterator reg_nodbg_begin(Register RegNo) const {
return reg_nodbg_iterator(getRegUseDefListHead(RegNo));
}
static reg_nodbg_iterator reg_nodbg_end() {
@@ -374,7 +374,7 @@ public:
/// reg_nodbg_empty - Return true if the only instructions using or defining
/// Reg are Debug instructions.
- bool reg_nodbg_empty(unsigned RegNo) const {
+ bool reg_nodbg_empty(Register RegNo) const {
return reg_nodbg_begin(RegNo) == reg_nodbg_end();
}
@@ -628,10 +628,10 @@ public:
/// Return the register class of the specified virtual register.
/// This shouldn't be used directly unless \p Reg has a register class.
/// \see getRegClassOrNull when this might happen.
- const TargetRegisterClass *getRegClass(unsigned Reg) const {
- assert(VRegInfo[Reg].first.is<const TargetRegisterClass *>() &&
+ const TargetRegisterClass *getRegClass(Register Reg) const {
+ assert(VRegInfo[Reg.id()].first.is<const TargetRegisterClass *>() &&
"Register class not set, wrong accessor");
- return VRegInfo[Reg].first.get<const TargetRegisterClass *>();
+ return VRegInfo[Reg.id()].first.get<const TargetRegisterClass *>();
}
/// Return the register class of \p Reg, or null if Reg has not been assigned
@@ -727,7 +727,7 @@ public:
/// Get the low-level type of \p Reg or LLT{} if Reg is not a generic
/// (target independent) virtual register.
LLT getType(unsigned Reg) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg) && VRegToType.inBounds(Reg))
+ if (Register::isVirtualRegister(Reg) && VRegToType.inBounds(Reg))
return VRegToType[Reg];
return LLT{};
}
@@ -760,7 +760,7 @@ public:
/// specified virtual register. This is typically used by target, and in case
/// of an earlier hint it will be overwritten.
void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) {
- assert(TargetRegisterInfo::isVirtualRegister(VReg));
+ assert(Register::isVirtualRegister(VReg));
RegAllocHints[VReg].first = Type;
RegAllocHints[VReg].second.clear();
RegAllocHints[VReg].second.push_back(PrefReg);
@@ -769,7 +769,7 @@ public:
/// addRegAllocationHint - Add a register allocation hint to the hints
/// vector for VReg.
void addRegAllocationHint(unsigned VReg, unsigned PrefReg) {
- assert(TargetRegisterInfo::isVirtualRegister(VReg));
+ assert(Register::isVirtualRegister(VReg));
RegAllocHints[VReg].second.push_back(PrefReg);
}
@@ -789,17 +789,18 @@ public:
/// specified virtual register. If there are many hints, this returns the
/// one with the greatest weight.
std::pair<unsigned, unsigned>
- getRegAllocationHint(unsigned VReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(VReg));
- unsigned BestHint = (RegAllocHints[VReg].second.size() ?
- RegAllocHints[VReg].second[0] : 0);
- return std::pair<unsigned, unsigned>(RegAllocHints[VReg].first, BestHint);
+ getRegAllocationHint(Register VReg) const {
+ assert(VReg.isVirtual());
+ unsigned BestHint = (RegAllocHints[VReg.id()].second.size() ?
+ RegAllocHints[VReg.id()].second[0] : 0);
+ return std::pair<unsigned, unsigned>(RegAllocHints[VReg.id()].first,
+ BestHint);
}
/// getSimpleHint - same as getRegAllocationHint except it will only return
/// a target independent hint.
- unsigned getSimpleHint(unsigned VReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(VReg));
+ Register getSimpleHint(Register VReg) const {
+ assert(VReg.isVirtual());
std::pair<unsigned, unsigned> Hint = getRegAllocationHint(VReg);
return Hint.first ? 0 : Hint.second;
}
@@ -808,7 +809,7 @@ public:
/// register allocation hints for VReg.
const std::pair<unsigned, SmallVector<unsigned, 4>>
&getRegAllocationHints(unsigned VReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(VReg));
+ assert(Register::isVirtualRegister(VReg));
return RegAllocHints[VReg];
}
@@ -817,6 +818,17 @@ public:
/// deleted during LiveDebugVariables analysis.
void markUsesInDebugValueAsUndef(unsigned Reg) const;
+ /// updateDbgUsersToReg - Update a collection of DBG_VALUE instructions
+ /// to refer to the designated register.
+ void updateDbgUsersToReg(unsigned Reg,
+ ArrayRef<MachineInstr*> Users) const {
+ for (MachineInstr *MI : Users) {
+ assert(MI->isDebugInstr());
+ assert(MI->getOperand(0).isReg());
+ MI->getOperand(0).setReg(Reg);
+ }
+ }
+
/// Return true if the specified register is modified in this function.
/// This checks that no defining machine operands exist for the register or
/// any of its aliases. Definitions found on functions marked noreturn are
@@ -882,8 +894,8 @@ public:
///
/// Reserved registers may belong to an allocatable register class, but the
/// target has explicitly requested that they are not used.
- bool isReserved(unsigned PhysReg) const {
- return getReservedRegs().test(PhysReg);
+ bool isReserved(Register PhysReg) const {
+ return getReservedRegs().test(PhysReg.id());
}
/// Returns true when the given register unit is considered reserved.
@@ -1164,7 +1176,7 @@ public:
PSetIterator(unsigned RegUnit, const MachineRegisterInfo *MRI) {
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- if (TargetRegisterInfo::isVirtualRegister(RegUnit)) {
+ if (Register::isVirtualRegister(RegUnit)) {
const TargetRegisterClass *RC = MRI->getRegClass(RegUnit);
PSet = TRI->getRegClassPressureSets(RC);
Weight = TRI->getRegClassWeight(RC).RegWeight;
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 75a334f61ad0..333367943ac0 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -100,6 +100,7 @@ namespace llvm {
extern cl::opt<bool> ForceTopDown;
extern cl::opt<bool> ForceBottomUp;
+extern cl::opt<bool> VerifyScheduling;
class LiveIntervals;
class MachineDominatorTree;
diff --git a/include/llvm/CodeGen/ModuloSchedule.h b/include/llvm/CodeGen/ModuloSchedule.h
new file mode 100644
index 000000000000..81a9b63b64ca
--- /dev/null
+++ b/include/llvm/CodeGen/ModuloSchedule.h
@@ -0,0 +1,367 @@
+//===- ModuloSchedule.h - Software pipeline schedule expansion ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Software pipelining (SWP) is an instruction scheduling technique for loops
+// that overlaps loop iterations and exploits ILP via compiler transformations.
+//
+// There are multiple methods for analyzing a loop and creating a schedule.
+// An example algorithm is Swing Modulo Scheduling (implemented by the
+// MachinePipeliner). The details of how a schedule is arrived at are irrelevant
+// for the task of actually rewriting a loop to adhere to the schedule, which
+// is what this file does.
+//
+// A schedule is, for every instruction in a block, a Cycle and a Stage. Note
+// that we only support single-block loops, so "block" and "loop" can be used
+// interchangably.
+//
+// The Cycle of an instruction defines a partial order of the instructions in
+// the remapped loop. Instructions within a cycle must not consume the output
+// of any instruction in the same cycle. Cycle information is assumed to have
+// been calculated such that the processor will execute instructions in
+// lock-step (for example in a VLIW ISA).
+//
+// The Stage of an instruction defines the mapping between logical loop
+// iterations and pipelined loop iterations. An example (unrolled) pipeline
+// may look something like:
+//
+// I0[0] Execute instruction I0 of iteration 0
+// I1[0], I0[1] Execute I0 of iteration 1 and I1 of iteration 1
+// I1[1], I0[2]
+// I1[2], I0[3]
+//
+// In the schedule for this unrolled sequence we would say that I0 was scheduled
+// in stage 0 and I1 in stage 1:
+//
+// loop:
+// [stage 0] x = I0
+// [stage 1] I1 x (from stage 0)
+//
+// And to actually generate valid code we must insert a phi:
+//
+// loop:
+// x' = phi(x)
+// x = I0
+// I1 x'
+//
+// This is a simple example; the rules for how to generate correct code given
+// an arbitrary schedule containing loop-carried values are complex.
+//
+// Note that these examples only mention the steady-state kernel of the
+// generated loop; prologs and epilogs must be generated also that prime and
+// flush the pipeline. Doing so is nontrivial.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
+#define LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopUtils.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include <deque>
+#include <vector>
+
+namespace llvm {
+class MachineBasicBlock;
+class MachineInstr;
+class LiveIntervals;
+
+/// Represents a schedule for a single-block loop. For every instruction we
+/// maintain a Cycle and Stage.
+class ModuloSchedule {
+private:
+ /// The block containing the loop instructions.
+ MachineLoop *Loop;
+
+ /// The instructions to be generated, in total order. Cycle provides a partial
+ /// order; the total order within cycles has been decided by the schedule
+ /// producer.
+ std::vector<MachineInstr *> ScheduledInstrs;
+
+ /// The cycle for each instruction.
+ DenseMap<MachineInstr *, int> Cycle;
+
+ /// The stage for each instruction.
+ DenseMap<MachineInstr *, int> Stage;
+
+ /// The number of stages in this schedule (Max(Stage) + 1).
+ int NumStages;
+
+public:
+ /// Create a new ModuloSchedule.
+ /// \arg ScheduledInstrs The new loop instructions, in total resequenced
+ /// order.
+ /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does
+ /// not need to start at zero. ScheduledInstrs must be partially ordered by
+ /// Cycle.
+ /// \arg Stage Stage index for all instructions in ScheduleInstrs.
+ ModuloSchedule(MachineFunction &MF, MachineLoop *Loop,
+ std::vector<MachineInstr *> ScheduledInstrs,
+ DenseMap<MachineInstr *, int> Cycle,
+ DenseMap<MachineInstr *, int> Stage)
+ : Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)),
+ Stage(std::move(Stage)) {
+ NumStages = 0;
+ for (auto &KV : this->Stage)
+ NumStages = std::max(NumStages, KV.second);
+ ++NumStages;
+ }
+
+ /// Return the single-block loop being scheduled.
+ MachineLoop *getLoop() const { return Loop; }
+
+ /// Return the number of stages contained in this schedule, which is the
+ /// largest stage index + 1.
+ int getNumStages() const { return NumStages; }
+
+ /// Return the first cycle in the schedule, which is the cycle index of the
+ /// first instruction.
+ int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; }
+
+ /// Return the final cycle in the schedule, which is the cycle index of the
+ /// last instruction.
+ int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; }
+
+ /// Return the stage that MI is scheduled in, or -1.
+ int getStage(MachineInstr *MI) {
+ auto I = Stage.find(MI);
+ return I == Stage.end() ? -1 : I->second;
+ }
+
+ /// Return the cycle that MI is scheduled at, or -1.
+ int getCycle(MachineInstr *MI) {
+ auto I = Cycle.find(MI);
+ return I == Cycle.end() ? -1 : I->second;
+ }
+
+ /// Return the rescheduled instructions in order.
+ ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }
+
+ void dump() { print(dbgs()); }
+ void print(raw_ostream &OS);
+};
+
+/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,
+/// rewriting the old loop and inserting prologs and epilogs as required.
+class ModuloScheduleExpander {
+public:
+ using InstrChangesTy = DenseMap<MachineInstr *, std::pair<unsigned, int64_t>>;
+
+private:
+ using ValueMapTy = DenseMap<unsigned, unsigned>;
+ using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
+ using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
+
+ ModuloSchedule &Schedule;
+ MachineFunction &MF;
+ const TargetSubtargetInfo &ST;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ LiveIntervals &LIS;
+
+ MachineBasicBlock *BB;
+ MachineBasicBlock *Preheader;
+ MachineBasicBlock *NewKernel = nullptr;
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
+
+ /// Map for each register and the max difference between its uses and def.
+ /// The first element in the pair is the max difference in stages. The
+ /// second is true if the register defines a Phi value and loop value is
+ /// scheduled before the Phi.
+ std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
+
+ /// Instructions to change when emitting the final schedule.
+ InstrChangesTy InstrChanges;
+
+ void generatePipelinedLoop();
+ void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
+ void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs);
+ void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum,
+ bool IsLast);
+ void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum, bool IsLast);
+ void removeDeadInstructions(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs);
+ void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs);
+ void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
+ MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
+ ValueMapTy *VRMap);
+ bool computeDelta(MachineInstr &MI, unsigned &Delta);
+ void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
+ unsigned Num);
+ MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
+ unsigned InstStageNum);
+ MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
+ unsigned InstStageNum);
+ void updateInstruction(MachineInstr *NewMI, bool LastDef,
+ unsigned CurStageNum, unsigned InstrStageNum,
+ ValueMapTy *VRMap);
+ MachineInstr *findDefInLoop(unsigned Reg);
+ unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
+ unsigned LoopStage, ValueMapTy *VRMap,
+ MachineBasicBlock *BB);
+ void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
+ ValueMapTy *VRMap, InstrMapTy &InstrMap);
+ void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap,
+ unsigned CurStageNum, unsigned PhiNum,
+ MachineInstr *Phi, unsigned OldReg,
+ unsigned NewReg, unsigned PrevReg = 0);
+ bool isLoopCarried(MachineInstr &Phi);
+
+ /// Return the max. number of stages/iterations that can occur between a
+ /// register definition and its uses.
+ unsigned getStagesForReg(int Reg, unsigned CurStage) {
+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+ if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 &&
+ Stages.second)
+ return 1;
+ return Stages.first;
+ }
+
+ /// The number of stages for a Phi is a little different than other
+ /// instructions. The minimum value computed in RegToStageDiff is 1
+ /// because we assume the Phi is needed for at least 1 iteration.
+ /// This is not the case if the loop value is scheduled prior to the
+ /// Phi in the same stage. This function returns the number of stages
+ /// or iterations needed between the Phi definition and any uses.
+ unsigned getStagesForPhi(int Reg) {
+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+ if (Stages.second)
+ return Stages.first;
+ return Stages.first - 1;
+ }
+
+public:
+ /// Create a new ModuloScheduleExpander.
+ /// \arg InstrChanges Modifications to make to instructions with memory
+ /// operands.
+ /// FIXME: InstrChanges is opaque and is an implementation detail of an
+ /// optimization in MachinePipeliner that crosses abstraction boundaries.
+ ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
+ LiveIntervals &LIS, InstrChangesTy InstrChanges)
+ : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
+ TII(ST.getInstrInfo()), LIS(LIS),
+ InstrChanges(std::move(InstrChanges)) {}
+
+ /// Performs the actual expansion.
+ void expand();
+ /// Performs final cleanup after expansion.
+ void cleanup();
+
+ /// Returns the newly rewritten kernel block, or nullptr if this was
+ /// optimized away.
+ MachineBasicBlock *getRewrittenKernel() { return NewKernel; }
+};
+
+/// A reimplementation of ModuloScheduleExpander. It works by generating a
+/// standalone kernel loop and peeling out the prologs and epilogs.
+class PeelingModuloScheduleExpander {
+ ModuloSchedule &Schedule;
+ MachineFunction &MF;
+ const TargetSubtargetInfo &ST;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ LiveIntervals *LIS;
+
+ /// The original loop block that gets rewritten in-place.
+ MachineBasicBlock *BB;
+ /// The original loop preheader.
+ MachineBasicBlock *Preheader;
+ /// All prolog and epilog blocks.
+ SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs;
+ /// For every block, the stages that are produced.
+ DenseMap<MachineBasicBlock *, BitVector> LiveStages;
+ /// For every block, the stages that are available. A stage can be available
+ /// but not produced (in the epilog) or produced but not available (in the
+ /// prolog).
+ DenseMap<MachineBasicBlock *, BitVector> AvailableStages;
+
+ /// CanonicalMIs and BlockMIs form a bidirectional map between any of the
+ /// loop kernel clones.
+ DenseMap<MachineInstr *, MachineInstr *> CanonicalMIs;
+ DenseMap<std::pair<MachineBasicBlock *, MachineInstr *>, MachineInstr *>
+ BlockMIs;
+
+ /// State passed from peelKernel to peelPrologAndEpilogs().
+ std::deque<MachineBasicBlock *> PeeledFront, PeeledBack;
+
+public:
+ PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
+ LiveIntervals *LIS)
+ : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
+ TII(ST.getInstrInfo()), LIS(LIS) {}
+
+ void expand();
+
+ /// Runs ModuloScheduleExpander and treats it as a golden input to validate
+ /// aspects of the code generated by PeelingModuloScheduleExpander.
+ void validateAgainstModuloScheduleExpander();
+
+protected:
+ /// Converts BB from the original loop body to the rewritten, pipelined
+ /// steady-state.
+ void rewriteKernel();
+
+private:
+ /// Peels one iteration of the rewritten kernel (BB) in the specified
+ /// direction.
+ MachineBasicBlock *peelKernel(LoopPeelDirection LPD);
+ /// Peel the kernel forwards and backwards to produce prologs and epilogs,
+ /// and stitch them together.
+ void peelPrologAndEpilogs();
+ /// All prolog and epilog blocks are clones of the kernel, so any produced
+ /// register in one block has an corollary in all other blocks.
+ Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB);
+ /// Change all users of MI, if MI is predicated out
+ /// (LiveStages[MI->getParent()] == false).
+ void rewriteUsesOf(MachineInstr *MI);
+ /// Insert branches between prologs, kernel and epilogs.
+ void fixupBranches();
+ /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block
+ /// to a block dominated by all prologs and epilogs. This allows us to treat
+ /// the loop exiting block as any other kernel clone.
+ MachineBasicBlock *CreateLCSSAExitingBlock();
+ /// Helper to get the stage of an instruction in the schedule.
+ unsigned getStage(MachineInstr *MI) {
+ if (CanonicalMIs.count(MI))
+ MI = CanonicalMIs[MI];
+ return Schedule.getStage(MI);
+ }
+};
+
+/// Expander that simply annotates each scheduled instruction with a post-instr
+/// symbol that can be consumed by the ModuloScheduleTest pass.
+///
+/// The post-instr symbol is a way of annotating an instruction that can be
+/// roundtripped in MIR. The syntax is:
+/// MYINST %0, post-instr-symbol <mcsymbol Stage-1_Cycle-5>
+class ModuloScheduleTestAnnotater {
+ MachineFunction &MF;
+ ModuloSchedule &S;
+
+public:
+ ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S)
+ : MF(MF), S(S) {}
+
+ /// Performs the annotation.
+ void annotate();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h
index 8b014ccbb07b..099ba788e9a2 100644
--- a/include/llvm/CodeGen/PBQP/Math.h
+++ b/include/llvm/CodeGen/PBQP/Math.h
@@ -28,17 +28,17 @@ class Vector {
public:
/// Construct a PBQP vector of the given size.
explicit Vector(unsigned Length)
- : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {}
+ : Length(Length), Data(std::make_unique<PBQPNum []>(Length)) {}
/// Construct a PBQP vector with initializer.
Vector(unsigned Length, PBQPNum InitVal)
- : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {
+ : Length(Length), Data(std::make_unique<PBQPNum []>(Length)) {
std::fill(Data.get(), Data.get() + Length, InitVal);
}
/// Copy construct a PBQP vector.
Vector(const Vector &V)
- : Length(V.Length), Data(llvm::make_unique<PBQPNum []>(Length)) {
+ : Length(V.Length), Data(std::make_unique<PBQPNum []>(Length)) {
std::copy(V.Data.get(), V.Data.get() + Length, Data.get());
}
@@ -125,21 +125,21 @@ private:
public:
/// Construct a PBQP Matrix with the given dimensions.
Matrix(unsigned Rows, unsigned Cols) :
- Rows(Rows), Cols(Cols), Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
+ Rows(Rows), Cols(Cols), Data(std::make_unique<PBQPNum []>(Rows * Cols)) {
}
/// Construct a PBQP Matrix with the given dimensions and initial
/// value.
Matrix(unsigned Rows, unsigned Cols, PBQPNum InitVal)
: Rows(Rows), Cols(Cols),
- Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
+ Data(std::make_unique<PBQPNum []>(Rows * Cols)) {
std::fill(Data.get(), Data.get() + (Rows * Cols), InitVal);
}
/// Copy construct a PBQP matrix.
Matrix(const Matrix &M)
: Rows(M.Rows), Cols(M.Cols),
- Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
+ Data(std::make_unique<PBQPNum []>(Rows * Cols)) {
std::copy(M.Data.get(), M.Data.get() + (Rows * Cols), Data.get());
}
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index d92ee93268e7..1e765ce51e4a 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -226,6 +226,10 @@ namespace llvm {
/// inserting cmov instructions.
extern char &EarlyIfConverterID;
+ /// EarlyIfPredicator - This pass performs if-conversion on SSA form by
+ /// predicating if/else block and insert select at the join point.
+ extern char &EarlyIfPredicatorID;
+
/// This pass performs instruction combining using trace metrics to estimate
/// critical-path and resource depth.
extern char &MachineCombinerID;
diff --git a/include/llvm/CodeGen/Register.h b/include/llvm/CodeGen/Register.h
index 907c1a99e56f..aa5173684e24 100644
--- a/include/llvm/CodeGen/Register.h
+++ b/include/llvm/CodeGen/Register.h
@@ -9,6 +9,7 @@
#ifndef LLVM_CODEGEN_REGISTER_H
#define LLVM_CODEGEN_REGISTER_H
+#include "llvm/MC/MCRegister.h"
#include <cassert>
namespace llvm {
@@ -20,41 +21,136 @@ class Register {
public:
Register(unsigned Val = 0): Reg(Val) {}
+ Register(MCRegister Val): Reg(Val) {}
+
+ // Register numbers can represent physical registers, virtual registers, and
+ // sometimes stack slots. The unsigned values are divided into these ranges:
+ //
+ // 0 Not a register, can be used as a sentinel.
+ // [1;2^30) Physical registers assigned by TableGen.
+ // [2^30;2^31) Stack slots. (Rarely used.)
+ // [2^31;2^32) Virtual registers assigned by MachineRegisterInfo.
+ //
+ // Further sentinels can be allocated from the small negative integers.
+ // DenseMapInfo<unsigned> uses -1u and -2u.
+
+ /// isStackSlot - Sometimes it is useful the be able to store a non-negative
+ /// frame index in a variable that normally holds a register. isStackSlot()
+ /// returns true if Reg is in the range used for stack slots.
+ ///
+ /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack
+ /// slots, so if a variable may contains a stack slot, always check
+ /// isStackSlot() first.
+ ///
+ static bool isStackSlot(unsigned Reg) {
+ return MCRegister::isStackSlot(Reg);
+ }
+
+ /// Compute the frame index from a register value representing a stack slot.
+ static int stackSlot2Index(unsigned Reg) {
+ assert(isStackSlot(Reg) && "Not a stack slot");
+ return int(Reg - (1u << 30));
+ }
+
+ /// Convert a non-negative frame index to a stack slot register value.
+ static unsigned index2StackSlot(int FI) {
+ assert(FI >= 0 && "Cannot hold a negative frame index.");
+ return FI + (1u << 30);
+ }
+
+ /// Return true if the specified register number is in
+ /// the physical register namespace.
+ static bool isPhysicalRegister(unsigned Reg) {
+ return MCRegister::isPhysicalRegister(Reg);
+ }
+
+ /// Return true if the specified register number is in
+ /// the virtual register namespace.
+ static bool isVirtualRegister(unsigned Reg) {
+ assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
+ return int(Reg) < 0;
+ }
+
+ /// Convert a virtual register number to a 0-based index.
+ /// The first virtual register in a function will get the index 0.
+ static unsigned virtReg2Index(unsigned Reg) {
+ assert(isVirtualRegister(Reg) && "Not a virtual register");
+ return Reg & ~(1u << 31);
+ }
+
+ /// Convert a 0-based index to a virtual register number.
+ /// This is the inverse operation of VirtReg2IndexFunctor below.
+ static unsigned index2VirtReg(unsigned Index) {
+ return Index | (1u << 31);
+ }
/// Return true if the specified register number is in the virtual register
/// namespace.
bool isVirtual() const {
- return int(Reg) < 0;
+ return isVirtualRegister(Reg);
}
/// Return true if the specified register number is in the physical register
/// namespace.
bool isPhysical() const {
- return int(Reg) > 0;
+ return isPhysicalRegister(Reg);
}
/// Convert a virtual register number to a 0-based index. The first virtual
/// register in a function will get the index 0.
unsigned virtRegIndex() const {
- assert(isVirtual() && "Not a virtual register");
- return Reg & ~(1u << 31);
- }
-
- /// Convert a 0-based index to a virtual register number.
- /// This is the inverse operation of VirtReg2IndexFunctor below.
- static Register index2VirtReg(unsigned Index) {
- return Register(Index | (1u << 31));
+ return virtReg2Index(Reg);
}
operator unsigned() const {
return Reg;
}
+ unsigned id() const { return Reg; }
+
+ operator MCRegister() const {
+ return MCRegister(Reg);
+ }
+
bool isValid() const {
return Reg != 0;
}
+
+ /// Comparisons between register objects
+ bool operator==(const Register &Other) const { return Reg == Other.Reg; }
+ bool operator!=(const Register &Other) const { return Reg != Other.Reg; }
+ bool operator==(const MCRegister &Other) const { return Reg == Other.id(); }
+ bool operator!=(const MCRegister &Other) const { return Reg != Other.id(); }
+
+ /// Comparisons against register constants. E.g.
+ /// * R == AArch64::WZR
+ /// * R == 0
+ /// * R == VirtRegMap::NO_PHYS_REG
+ bool operator==(unsigned Other) const { return Reg == Other; }
+ bool operator!=(unsigned Other) const { return Reg != Other; }
+ bool operator==(int Other) const { return Reg == unsigned(Other); }
+ bool operator!=(int Other) const { return Reg != unsigned(Other); }
+ // MSVC requires that we explicitly declare these two as well.
+ bool operator==(MCPhysReg Other) const { return Reg == unsigned(Other); }
+ bool operator!=(MCPhysReg Other) const { return Reg != unsigned(Other); }
+};
+
+// Provide DenseMapInfo for Register
+template<> struct DenseMapInfo<Register> {
+ static inline unsigned getEmptyKey() {
+ return DenseMapInfo<unsigned>::getEmptyKey();
+ }
+ static inline unsigned getTombstoneKey() {
+ return DenseMapInfo<unsigned>::getTombstoneKey();
+ }
+ static unsigned getHashValue(const Register &Val) {
+ return DenseMapInfo<unsigned>::getHashValue(Val.id());
+ }
+ static bool isEqual(const Register &LHS, const Register &RHS) {
+ return DenseMapInfo<unsigned>::isEqual(LHS.id(), RHS.id());
+ }
};
}
-#endif
+#endif // ifndef LLVM_CODEGEN_REGISTER_H
diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h
index 14af5c4d090d..25b310c47621 100644
--- a/include/llvm/CodeGen/RegisterClassInfo.h
+++ b/include/llvm/CodeGen/RegisterClassInfo.h
@@ -110,7 +110,7 @@ public:
/// getLastCalleeSavedAlias - Returns the last callee saved register that
/// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases.
unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ assert(Register::isPhysicalRegister(PhysReg));
if (PhysReg < CalleeSavedAliases.size())
return CalleeSavedAliases[PhysReg];
return 0;
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index 5bbaa03fd751..92333b859f1b 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -129,6 +129,8 @@ public:
bool operator==(const PressureChange &RHS) const {
return PSetID == RHS.PSetID && UnitInc == RHS.UnitInc;
}
+
+ void dump() const;
};
/// List of PressureChanges in order of increasing, unique PSetID.
@@ -248,6 +250,7 @@ struct RegPressureDelta {
bool operator!=(const RegPressureDelta &RHS) const {
return !operator==(RHS);
}
+ void dump() const;
};
/// A set of live virtual registers and physical register units.
@@ -273,15 +276,15 @@ private:
unsigned NumRegUnits;
unsigned getSparseIndexFromReg(unsigned Reg) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- return TargetRegisterInfo::virtReg2Index(Reg) + NumRegUnits;
+ if (Register::isVirtualRegister(Reg))
+ return Register::virtReg2Index(Reg) + NumRegUnits;
assert(Reg < NumRegUnits);
return Reg;
}
unsigned getRegFromSparseIndex(unsigned SparseIndex) const {
if (SparseIndex >= NumRegUnits)
- return TargetRegisterInfo::index2VirtReg(SparseIndex-NumRegUnits);
+ return Register::index2VirtReg(SparseIndex-NumRegUnits);
return SparseIndex;
}
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 9c48df82f07d..5b5a80a67e7f 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -51,7 +51,7 @@ class RegScavenger {
/// If non-zero, the specific register is currently being
/// scavenged. That is, it is spilled to this scavenging stack slot.
- unsigned Reg = 0;
+ Register Reg;
/// The instruction that restores the scavenged register from stack.
const MachineInstr *Restore = nullptr;
@@ -119,14 +119,14 @@ public:
MachineBasicBlock::iterator getCurrentPosition() const { return MBBI; }
/// Return if a specific register is currently used.
- bool isRegUsed(unsigned Reg, bool includeReserved = true) const;
+ bool isRegUsed(Register Reg, bool includeReserved = true) const;
/// Return all available registers in the register class in Mask.
BitVector getRegsAvailable(const TargetRegisterClass *RC);
/// Find an unused register of the specified register class.
/// Return 0 if none is found.
- unsigned FindUnusedReg(const TargetRegisterClass *RC) const;
+ Register FindUnusedReg(const TargetRegisterClass *RC) const;
/// Add a scavenging frame index.
void addScavengingFrameIndex(int FI) {
@@ -160,10 +160,10 @@ public:
///
/// If \p AllowSpill is false, fail if a spill is required to make the
/// register available, and return NoRegister.
- unsigned scavengeRegister(const TargetRegisterClass *RC,
+ Register scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I, int SPAdj,
bool AllowSpill = true);
- unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj,
+ Register scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj,
bool AllowSpill = true) {
return scavengeRegister(RegClass, MBBI, SPAdj, AllowSpill);
}
@@ -177,17 +177,17 @@ public:
///
/// If \p AllowSpill is false, fail if a spill is required to make the
/// register available, and return NoRegister.
- unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC,
+ Register scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator To,
bool RestoreAfter, int SPAdj,
bool AllowSpill = true);
/// Tell the scavenger a register is used.
- void setRegUsed(unsigned Reg, LaneBitmask LaneMask = LaneBitmask::getAll());
+ void setRegUsed(Register Reg, LaneBitmask LaneMask = LaneBitmask::getAll());
private:
/// Returns true if a register is reserved. It is never "unused".
- bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); }
+ bool isReserved(Register Reg) const { return MRI->isReserved(Reg); }
/// setUsed / setUnused - Mark the state of one or a number of register units.
///
@@ -203,16 +203,16 @@ private:
void determineKillsAndDefs();
/// Add all Reg Units that Reg contains to BV.
- void addRegUnits(BitVector &BV, unsigned Reg);
+ void addRegUnits(BitVector &BV, Register Reg);
/// Remove all Reg Units that \p Reg contains from \p BV.
- void removeRegUnits(BitVector &BV, unsigned Reg);
+ void removeRegUnits(BitVector &BV, Register Reg);
/// Return the candidate register that is unused for the longest after
/// StartMI. UseMI is set to the instruction where the search stopped.
///
/// No more than InstrLimit instructions are inspected.
- unsigned findSurvivorReg(MachineBasicBlock::iterator StartMI,
+ Register findSurvivorReg(MachineBasicBlock::iterator StartMI,
BitVector &Candidates,
unsigned InstrLimit,
MachineBasicBlock::iterator &UseMI);
@@ -225,7 +225,7 @@ private:
/// Spill a register after position \p After and reload it before position
/// \p UseMI.
- ScavengedInfo &spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
+ ScavengedInfo &spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
MachineBasicBlock::iterator Before,
MachineBasicBlock::iterator &UseMI);
};
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 3e3b604acbac..1eb9b9f322ba 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -34,6 +34,7 @@
namespace llvm {
+ class AAResults;
class LiveIntervals;
class MachineFrameInfo;
class MachineFunction;
@@ -57,7 +58,7 @@ namespace llvm {
: VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
unsigned getSparseSetIndex() const {
- return TargetRegisterInfo::virtReg2Index(VirtReg);
+ return Register::virtReg2Index(VirtReg);
}
};
@@ -173,7 +174,7 @@ namespace llvm {
/// Tracks the last instructions in this region using each virtual register.
VReg2SUnitOperIdxMultiMap CurrentVRegUses;
- AliasAnalysis *AAForDep = nullptr;
+ AAResults *AAForDep = nullptr;
/// Remember a generic side-effecting instruction as we proceed.
/// No other SU ever gets scheduled around it (except in the special
@@ -201,7 +202,7 @@ namespace llvm {
Value2SUsMap &loads, unsigned N);
/// Adds a chain edge between SUa and SUb, but only if both
- /// AliasAnalysis and Target fail to deny the dependency.
+ /// AAResults and Target fail to deny the dependency.
void addChainDependency(SUnit *SUa, SUnit *SUb,
unsigned Latency = 0);
@@ -306,7 +307,7 @@ namespace llvm {
/// If \p RPTracker is non-null, compute register pressure as a side effect.
/// The DAG builder is an efficient place to do it because it already visits
/// operands.
- void buildSchedGraph(AliasAnalysis *AA,
+ void buildSchedGraph(AAResults *AA,
RegPressureTracker *RPTracker = nullptr,
PressureDiffs *PDiffs = nullptr,
LiveIntervals *LIS = nullptr,
@@ -374,6 +375,9 @@ namespace llvm {
/// Returns a mask for which lanes get read/written by the given (register)
/// machine operand.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
+
+ /// Returns true if the def register in \p MO has no uses.
+ bool deadDefHasNoUse(const MachineOperand &MO);
};
/// Creates a new SUnit and return a ptr to it.
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 12a970847021..6b8e2dd803ba 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -26,8 +26,6 @@
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -58,6 +56,7 @@
namespace llvm {
+class AAResults;
class BlockAddress;
class Constant;
class ConstantFP;
@@ -66,6 +65,7 @@ class DataLayout;
struct fltSemantics;
class GlobalValue;
struct KnownBits;
+class LegacyDivergenceAnalysis;
class LLVMContext;
class MachineBasicBlock;
class MachineConstantPoolValue;
@@ -269,7 +269,13 @@ class SelectionDAG {
using CallSiteInfo = MachineFunction::CallSiteInfo;
using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl;
- DenseMap<const SDNode *, CallSiteInfo> SDCallSiteInfo;
+
+ struct CallSiteDbgInfo {
+ CallSiteInfo CSInfo;
+ MDNode *HeapAllocSite = nullptr;
+ };
+
+ DenseMap<const SDNode *, CallSiteDbgInfo> SDCallSiteDbgInfo;
uint16_t NextPersistentId = 0;
@@ -382,7 +388,11 @@ private:
Node->OperandList = nullptr;
}
void CreateTopologicalOrder(std::vector<SDNode*>& Order);
+
public:
+ // Maximum depth for recursive analysis such as computeKnownBits, etc.
+ static constexpr unsigned MaxRecursionDepth = 6;
+
explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level);
SelectionDAG(const SelectionDAG &) = delete;
SelectionDAG &operator=(const SelectionDAG &) = delete;
@@ -489,7 +499,7 @@ public:
/// certain types of nodes together, or eliminating superfluous nodes. The
/// Level argument controls whether Combine is allowed to produce nodes and
/// types that are illegal on the target.
- void Combine(CombineLevel Level, AliasAnalysis *AA,
+ void Combine(CombineLevel Level, AAResults *AA,
CodeGenOpt::Level OptLevel);
/// This transforms the SelectionDAG into a SelectionDAG that
@@ -628,10 +638,9 @@ public:
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT,
int64_t offset = 0, bool isTargetGA = false,
- unsigned char TargetFlags = 0);
+ unsigned TargetFlags = 0);
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT,
- int64_t offset = 0,
- unsigned char TargetFlags = 0) {
+ int64_t offset = 0, unsigned TargetFlags = 0) {
return getGlobalAddress(GV, DL, VT, offset, true, TargetFlags);
}
SDValue getFrameIndex(int FI, EVT VT, bool isTarget = false);
@@ -639,28 +648,27 @@ public:
return getFrameIndex(FI, VT, true);
}
SDValue getJumpTable(int JTI, EVT VT, bool isTarget = false,
- unsigned char TargetFlags = 0);
- SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags = 0) {
+ unsigned TargetFlags = 0);
+ SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags = 0) {
return getJumpTable(JTI, VT, true, TargetFlags);
}
- SDValue getConstantPool(const Constant *C, EVT VT,
- unsigned Align = 0, int Offs = 0, bool isT=false,
- unsigned char TargetFlags = 0);
- SDValue getTargetConstantPool(const Constant *C, EVT VT,
- unsigned Align = 0, int Offset = 0,
- unsigned char TargetFlags = 0) {
+ SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align = 0,
+ int Offs = 0, bool isT = false,
+ unsigned TargetFlags = 0);
+ SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align = 0,
+ int Offset = 0, unsigned TargetFlags = 0) {
return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
}
SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT,
unsigned Align = 0, int Offs = 0, bool isT=false,
- unsigned char TargetFlags = 0);
- SDValue getTargetConstantPool(MachineConstantPoolValue *C,
- EVT VT, unsigned Align = 0,
- int Offset = 0, unsigned char TargetFlags=0) {
+ unsigned TargetFlags = 0);
+ SDValue getTargetConstantPool(MachineConstantPoolValue *C, EVT VT,
+ unsigned Align = 0, int Offset = 0,
+ unsigned TargetFlags = 0) {
return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
}
SDValue getTargetIndex(int Index, EVT VT, int64_t Offset = 0,
- unsigned char TargetFlags = 0);
+ unsigned TargetFlags = 0);
// When generating a branch to a BB, we don't in general know enough
// to provide debug info for the BB at that time, so keep this one around.
SDValue getBasicBlock(MachineBasicBlock *MBB);
@@ -668,7 +676,7 @@ public:
SDValue getExternalSymbol(const char *Sym, EVT VT);
SDValue getExternalSymbol(const char *Sym, const SDLoc &dl, EVT VT);
SDValue getTargetExternalSymbol(const char *Sym, EVT VT,
- unsigned char TargetFlags = 0);
+ unsigned TargetFlags = 0);
SDValue getMCSymbol(MCSymbol *Sym, EVT VT);
SDValue getValueType(EVT);
@@ -677,12 +685,10 @@ public:
SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label);
SDValue getLabelNode(unsigned Opcode, const SDLoc &dl, SDValue Root,
MCSymbol *Label);
- SDValue getBlockAddress(const BlockAddress *BA, EVT VT,
- int64_t Offset = 0, bool isTarget = false,
- unsigned char TargetFlags = 0);
+ SDValue getBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset = 0,
+ bool isTarget = false, unsigned TargetFlags = 0);
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT,
- int64_t Offset = 0,
- unsigned char TargetFlags = 0) {
+ int64_t Offset = 0, unsigned TargetFlags = 0) {
return getBlockAddress(BA, VT, Offset, true, TargetFlags);
}
@@ -1035,7 +1041,7 @@ public:
unsigned Align = 0,
MachineMemOperand::Flags Flags
= MachineMemOperand::MOLoad | MachineMemOperand::MOStore,
- unsigned Size = 0,
+ uint64_t Size = 0,
const AAMDNodes &AAInfo = AAMDNodes());
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList,
@@ -1117,9 +1123,11 @@ public:
MachineMemOperand *MMO, bool IsTruncating = false,
bool IsCompressing = false);
SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
- ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType);
SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
- ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType);
/// Return (create a new or find existing) a target-specific node.
/// TargetMemSDNode should be derived class from MemSDNode.
@@ -1588,9 +1596,12 @@ public:
/// Extract. The reduction must use one of the opcodes listed in /p
/// CandidateBinOps and on success /p BinOp will contain the matching opcode.
/// Returns the vector that is being reduced on, or SDValue() if a reduction
- /// was not matched.
+ /// was not matched. If \p AllowPartials is set then in the case of a
+ /// reduction pattern that only matches the first few stages, the extracted
+ /// subvector of the start of the reduction is returned.
SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
- ArrayRef<ISD::NodeType> CandidateBinOps);
+ ArrayRef<ISD::NodeType> CandidateBinOps,
+ bool AllowPartials = false);
/// Utility function used by legalize and lowering to
/// "unroll" a vector operation by splitting out the scalars and operating
@@ -1664,16 +1675,28 @@ public:
}
void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) {
- SDCallSiteInfo[CallNode] = std::move(CallInfo);
+ SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo);
}
CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) {
- auto I = SDCallSiteInfo.find(CallNode);
- if (I != SDCallSiteInfo.end())
- return std::move(I->second);
+ auto I = SDCallSiteDbgInfo.find(CallNode);
+ if (I != SDCallSiteDbgInfo.end())
+ return std::move(I->second).CSInfo;
return CallSiteInfo();
}
+ void addHeapAllocSite(const SDNode *Node, MDNode *MD) {
+ SDCallSiteDbgInfo[Node].HeapAllocSite = MD;
+ }
+
+ /// Return the HeapAllocSite type associated with the SDNode, if it exists.
+ MDNode *getHeapAllocSite(const SDNode *Node) {
+ auto It = SDCallSiteDbgInfo.find(Node);
+ if (It == SDCallSiteDbgInfo.end())
+ return nullptr;
+ return It->second.HeapAllocSite;
+ }
+
private:
void InsertNode(SDNode *N);
bool RemoveNodeFromCSEMaps(SDNode *N);
@@ -1712,7 +1735,7 @@ private:
std::map<EVT, SDNode*, EVT::compareRawBits> ExtendedValueTypeNodes;
StringMap<SDNode*> ExternalSymbols;
- std::map<std::pair<std::string, unsigned char>,SDNode*> TargetExternalSymbols;
+ std::map<std::pair<std::string, unsigned>, SDNode *> TargetExternalSymbols;
DenseMap<MCSymbol *, SDNode *> MCSymbols;
};
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 147c325342fc..de71a21d4671 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -22,22 +22,23 @@
#include <memory>
namespace llvm {
- class FastISel;
- class SelectionDAGBuilder;
- class SDValue;
- class MachineRegisterInfo;
- class MachineBasicBlock;
- class MachineFunction;
- class MachineInstr;
- class OptimizationRemarkEmitter;
- class TargetLowering;
- class TargetLibraryInfo;
- class FunctionLoweringInfo;
- class ScheduleHazardRecognizer;
- class SwiftErrorValueTracking;
- class GCFunctionInfo;
- class ScheduleDAGSDNodes;
- class LoadInst;
+class AAResults;
+class FastISel;
+class SelectionDAGBuilder;
+class SDValue;
+class MachineRegisterInfo;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class OptimizationRemarkEmitter;
+class TargetLowering;
+class TargetLibraryInfo;
+class FunctionLoweringInfo;
+class ScheduleHazardRecognizer;
+class SwiftErrorValueTracking;
+class GCFunctionInfo;
+class ScheduleDAGSDNodes;
+class LoadInst;
/// SelectionDAGISel - This is the common base class used for SelectionDAG-based
/// pattern-matching instruction selectors.
@@ -51,7 +52,7 @@ public:
MachineRegisterInfo *RegInfo;
SelectionDAG *CurDAG;
SelectionDAGBuilder *SDB;
- AliasAnalysis *AA;
+ AAResults *AA;
GCFunctionInfo *GFI;
CodeGenOpt::Level OptLevel;
const TargetInstrInfo *TII;
@@ -162,6 +163,7 @@ public:
OPC_EmitMergeInputChains1_1,
OPC_EmitMergeInputChains1_2,
OPC_EmitCopyToReg,
+ OPC_EmitCopyToReg2,
OPC_EmitNodeXForm,
OPC_EmitNode,
// Space-optimized forms that implicitly encode number of result VTs.
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 5aab9643e09d..ceb8b72635a2 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -548,10 +548,15 @@ BEGIN_TWO_BYTE_PACK()
class LSBaseSDNodeBitfields {
friend class LSBaseSDNode;
+ friend class MaskedGatherScatterSDNode;
uint16_t : NumMemSDNodeBits;
- uint16_t AddressingMode : 3; // enum ISD::MemIndexedMode
+ // This storage is shared between disparate class hierarchies to hold an
+ // enumeration specific to the class hierarchy in use.
+ // LSBaseSDNode => enum ISD::MemIndexedMode
+ // MaskedGatherScatterSDNode => enum ISD::MemIndexType
+ uint16_t AddressingMode : 3;
};
enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
@@ -696,14 +701,20 @@ public:
case ISD::STRICT_FLOG:
case ISD::STRICT_FLOG10:
case ISD::STRICT_FLOG2:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FMINNUM:
case ISD::STRICT_FCEIL:
case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
return true;
@@ -1346,6 +1357,17 @@ public:
/// store occurs.
AtomicOrdering getOrdering() const { return MMO->getOrdering(); }
+ /// Return true if the memory operation ordering is Unordered or higher.
+ bool isAtomic() const { return MMO->isAtomic(); }
+
+ /// Returns true if the memory operation doesn't imply any ordering
+ /// constraints on surrounding memory operations beyond the normal memory
+ /// aliasing rules.
+ bool isUnordered() const { return MMO->isUnordered(); }
+
+ /// Returns true if the memory operation is neither atomic or volatile.
+ bool isSimple() const { return !isAtomic() && !isVolatile(); }
+
/// Return the type of the in-memory value.
EVT getMemoryVT() const { return MemoryVT; }
@@ -1702,16 +1724,16 @@ class GlobalAddressSDNode : public SDNode {
const GlobalValue *TheGlobal;
int64_t Offset;
- unsigned char TargetFlags;
+ unsigned TargetFlags;
GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
const GlobalValue *GA, EVT VT, int64_t o,
- unsigned char TF);
+ unsigned TF);
public:
const GlobalValue *getGlobal() const { return TheGlobal; }
int64_t getOffset() const { return Offset; }
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
// Return the address space this GlobalAddress belongs to.
unsigned getAddressSpace() const;
@@ -1778,16 +1800,16 @@ class JumpTableSDNode : public SDNode {
friend class SelectionDAG;
int JTI;
- unsigned char TargetFlags;
+ unsigned TargetFlags;
- JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF)
+ JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
: SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
}
public:
int getIndex() const { return JTI; }
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::JumpTable ||
@@ -1804,10 +1826,10 @@ class ConstantPoolSDNode : public SDNode {
} Val;
int Offset; // It's a MachineConstantPoolValue if top bit is set.
unsigned Alignment; // Minimum alignment requirement of CP (not log2 value).
- unsigned char TargetFlags;
+ unsigned TargetFlags;
ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
- unsigned Align, unsigned char TF)
+ unsigned Align, unsigned TF)
: SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
TargetFlags(TF) {
@@ -1816,7 +1838,7 @@ class ConstantPoolSDNode : public SDNode {
}
ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
- EVT VT, int o, unsigned Align, unsigned char TF)
+ EVT VT, int o, unsigned Align, unsigned TF)
: SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
TargetFlags(TF) {
@@ -1847,7 +1869,7 @@ public:
// Return the alignment of this constant pool object, which is either 0 (for
// default alignment) or the desired value.
unsigned getAlignment() const { return Alignment; }
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
Type *getType() const;
@@ -1861,16 +1883,16 @@ public:
class TargetIndexSDNode : public SDNode {
friend class SelectionDAG;
- unsigned char TargetFlags;
+ unsigned TargetFlags;
int Index;
int64_t Offset;
public:
- TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned char TF)
- : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
- TargetFlags(TF), Index(Idx), Offset(Ofs) {}
+ TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
+ : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
+ TargetFlags(TF), Index(Idx), Offset(Ofs) {}
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
int getIndex() const { return Index; }
int64_t getOffset() const { return Offset; }
@@ -2063,17 +2085,17 @@ class BlockAddressSDNode : public SDNode {
const BlockAddress *BA;
int64_t Offset;
- unsigned char TargetFlags;
+ unsigned TargetFlags;
BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
- int64_t o, unsigned char Flags)
+ int64_t o, unsigned Flags)
: SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
BA(ba), Offset(o), TargetFlags(Flags) {}
public:
const BlockAddress *getBlockAddress() const { return BA; }
int64_t getOffset() const { return Offset; }
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::BlockAddress ||
@@ -2104,15 +2126,16 @@ class ExternalSymbolSDNode : public SDNode {
friend class SelectionDAG;
const char *Symbol;
- unsigned char TargetFlags;
+ unsigned TargetFlags;
- ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT)
- : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol,
- 0, DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {}
+ ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
+ : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
+ DebugLoc(), getSDVTList(VT)),
+ Symbol(Sym), TargetFlags(TF) {}
public:
const char *getSymbol() const { return Symbol; }
- unsigned char getTargetFlags() const { return TargetFlags; }
+ unsigned getTargetFlags() const { return TargetFlags; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::ExternalSymbol ||
@@ -2181,8 +2204,6 @@ public:
: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
LSBaseSDNodeBits.AddressingMode = AM;
assert(getAddressingMode() == AM && "Value truncated");
- assert((!MMO->isAtomic() || MMO->isVolatile()) &&
- "use an AtomicSDNode instead for non-volatile atomics");
}
const SDValue &getOffset() const {
@@ -2362,8 +2383,24 @@ public:
MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
const DebugLoc &dl, SDVTList VTs, EVT MemVT,
- MachineMemOperand *MMO)
- : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
+ MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ LSBaseSDNodeBits.AddressingMode = IndexType;
+ assert(getIndexType() == IndexType && "Value truncated");
+ }
+
+ /// How is Index applied to BasePtr when computing addresses.
+ ISD::MemIndexType getIndexType() const {
+ return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
+ }
+ bool isIndexScaled() const {
+ return (getIndexType() == ISD::SIGNED_SCALED) ||
+ (getIndexType() == ISD::UNSIGNED_SCALED);
+ }
+ bool isIndexSigned() const {
+ return (getIndexType() == ISD::SIGNED_SCALED) ||
+ (getIndexType() == ISD::SIGNED_UNSCALED);
+ }
// In the both nodes address is Op1, mask is Op2:
// MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
@@ -2387,8 +2424,10 @@ public:
friend class SelectionDAG;
MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- EVT MemVT, MachineMemOperand *MMO)
- : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {}
+ EVT MemVT, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType)
+ : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
+ IndexType) {}
const SDValue &getPassThru() const { return getOperand(1); }
@@ -2404,8 +2443,10 @@ public:
friend class SelectionDAG;
MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- EVT MemVT, MachineMemOperand *MMO)
- : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {}
+ EVT MemVT, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType)
+ : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
+ IndexType) {}
const SDValue &getValue() const { return getOperand(1); }
diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h
index 2bdf4425e24a..ed52db3e6269 100644
--- a/include/llvm/CodeGen/StackProtector.h
+++ b/include/llvm/CodeGen/StackProtector.h
@@ -61,6 +61,12 @@ private:
/// protection when -fstack-protection is used.
unsigned SSPBufferSize = 0;
+ /// VisitedPHIs - The set of PHI nodes visited when determining
+ /// if a variable's reference has been taken. This set
+ /// is maintained to ensure we don't visit the same PHI node multiple
+ /// times.
+ SmallPtrSet<const PHINode *, 16> VisitedPHIs;
+
// A prologue is generated.
bool HasPrologue = false;
diff --git a/include/llvm/CodeGen/SwitchLoweringUtils.h b/include/llvm/CodeGen/SwitchLoweringUtils.h
index 62134dc792f7..b8adcf759b19 100644
--- a/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -212,16 +212,17 @@ struct BitTestBlock {
BitTestInfo Cases;
BranchProbability Prob;
BranchProbability DefaultProb;
+ bool OmitRangeCheck;
BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E,
bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
BitTestInfo C, BranchProbability Pr)
: First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
- Cases(std::move(C)), Prob(Pr) {}
+ Cases(std::move(C)), Prob(Pr), OmitRangeCheck(false) {}
};
-/// Return the range of value within a range.
+/// Return the range of values within a range.
uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First,
unsigned Last);
diff --git a/include/llvm/CodeGen/TargetCallingConv.h b/include/llvm/CodeGen/TargetCallingConv.h
index aebeeecbe506..db3d1175afee 100644
--- a/include/llvm/CodeGen/TargetCallingConv.h
+++ b/include/llvm/CodeGen/TargetCallingConv.h
@@ -14,6 +14,7 @@
#define LLVM_CODEGEN_TARGETCALLINGCONV_H
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -120,16 +121,22 @@ namespace ISD {
bool isPointer() const { return IsPointer; }
void setPointer() { IsPointer = 1; }
- unsigned getByValAlign() const { return (1U << ByValAlign) / 2; }
- void setByValAlign(unsigned A) {
- ByValAlign = Log2_32(A) + 1;
- assert(getByValAlign() == A && "bitfield overflow");
+ unsigned getByValAlign() const {
+ MaybeAlign A = decodeMaybeAlign(ByValAlign);
+ return A ? A->value() : 0;
+ }
+ void setByValAlign(Align A) {
+ ByValAlign = encode(A);
+ assert(getByValAlign() == A.value() && "bitfield overflow");
}
- unsigned getOrigAlign() const { return (1U << OrigAlign) / 2; }
- void setOrigAlign(unsigned A) {
- OrigAlign = Log2_32(A) + 1;
- assert(getOrigAlign() == A && "bitfield overflow");
+ unsigned getOrigAlign() const {
+ MaybeAlign A = decodeMaybeAlign(OrigAlign);
+ return A ? A->value() : 0;
+ }
+ void setOrigAlign(Align A) {
+ OrigAlign = encode(A);
+ assert(getOrigAlign() == A.value() && "bitfield overflow");
}
unsigned getByValSize() const { return ByValSize; }
diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h
index 878c9ffd2b51..72edb27964c4 100644
--- a/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/include/llvm/CodeGen/TargetFrameLowering.h
@@ -28,6 +28,7 @@ namespace TargetStackID {
enum Value {
Default = 0,
SGPRSpill = 1,
+ SVEVector = 2,
NoAlloc = 255
};
}
@@ -53,15 +54,15 @@ public:
};
private:
StackDirection StackDir;
- unsigned StackAlignment;
- unsigned TransientStackAlignment;
+ Align StackAlignment;
+ Align TransientStackAlignment;
int LocalAreaOffset;
bool StackRealignable;
public:
- TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO,
- unsigned TransAl = 1, bool StackReal = true)
- : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
- LocalAreaOffset(LAO), StackRealignable(StackReal) {}
+ TargetFrameLowering(StackDirection D, Align StackAl, int LAO,
+ Align TransAl = Align::None(), bool StackReal = true)
+ : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
+ LocalAreaOffset(LAO), StackRealignable(StackReal) {}
virtual ~TargetFrameLowering();
@@ -76,7 +77,7 @@ public:
/// stack pointer must be aligned on entry to a function. Typically, this
/// is the largest alignment for any data object in the target.
///
- unsigned getStackAlignment() const { return StackAlignment; }
+ unsigned getStackAlignment() const { return StackAlignment.value(); }
/// alignSPAdjust - This method aligns the stack adjustment to the correct
/// alignment.
@@ -95,7 +96,7 @@ public:
/// calls.
///
unsigned getTransientStackAlignment() const {
- return TransientStackAlignment;
+ return TransientStackAlignment.value();
}
/// isStackRealignable - This method returns whether the stack can be
@@ -366,15 +367,10 @@ public:
/// Check if given function is safe for not having callee saved registers.
/// This is used when interprocedural register allocation is enabled.
- static bool isSafeForNoCSROpt(const Function &F) {
- if (!F.hasLocalLinkage() || F.hasAddressTaken() ||
- !F.hasFnAttribute(Attribute::NoRecurse))
- return false;
- // Function should not be optimized as tail call.
- for (const User *U : F.users())
- if (auto CS = ImmutableCallSite(U))
- if (CS.isTailCall())
- return false;
+ static bool isSafeForNoCSROpt(const Function &F);
+
+ /// Check if the no-CSR optimisation is profitable for the given function.
+ virtual bool isProfitableForNoCSROpt(const Function &F) const {
return true;
}
diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h
index 25b04f8c019a..5011cf34c0ee 100644
--- a/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/include/llvm/CodeGen/TargetInstrInfo.h
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOutliner.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
@@ -38,10 +38,12 @@
namespace llvm {
+class AAResults;
class DFAPacketizer;
class InstrItineraryData;
class LiveIntervals;
class LiveVariables;
+class MachineLoop;
class MachineMemOperand;
class MachineRegisterInfo;
class MCAsmInfo;
@@ -60,6 +62,8 @@ class TargetSubtargetInfo;
template <class T> class SmallVectorImpl;
+using ParamLoadedValue = std::pair<MachineOperand, DIExpression*>;
+
//---------------------------------------------------------------------------
///
/// TargetInstrInfo - Interface to description of machine instruction set
@@ -92,7 +96,7 @@ public:
/// registers so that the instructions result is independent of the place
/// in the function.
bool isTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA = nullptr) const {
+ AAResults *AA = nullptr) const {
return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF ||
(MI.getDesc().isRematerializable() &&
(isReallyTriviallyReMaterializable(MI, AA) ||
@@ -108,7 +112,7 @@ protected:
/// not always available.
/// Requirements must be check as stated in isTriviallyReMaterializable() .
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const {
+ AAResults *AA) const {
return false;
}
@@ -151,7 +155,7 @@ private:
/// this function does target-independent tests to determine if the
/// instruction is really trivially rematerializable.
bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI,
- AliasAnalysis *AA) const;
+ AAResults *AA) const;
public:
/// These methods return the opcode of the frame setup/destroy instructions
@@ -419,7 +423,8 @@ public:
/// findCommutedOpIndices(MI, Op1, Op2);
/// can be interpreted as a query asking to find an operand that would be
/// commutable with the operand#1.
- virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ virtual bool findCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const;
/// A pair composed of a register and a sub-register index.
@@ -659,6 +664,50 @@ public:
BytesAdded);
}
+ /// Object returned by analyzeLoopForPipelining. Allows software pipelining
+ /// implementations to query attributes of the loop being pipelined and to
+ /// apply target-specific updates to the loop once pipelining is complete.
+ class PipelinerLoopInfo {
+ public:
+ virtual ~PipelinerLoopInfo();
+ /// Return true if the given instruction should not be pipelined and should
+ /// be ignored. An example could be a loop comparison, or induction variable
+ /// update with no users being pipelined.
+ virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0;
+
+ /// Create a condition to determine if the trip count of the loop is greater
+ /// than TC.
+ ///
+ /// If the trip count is statically known to be greater than TC, return
+ /// true. If the trip count is statically known to be not greater than TC,
+ /// return false. Otherwise return nullopt and fill out Cond with the test
+ /// condition.
+ virtual Optional<bool>
+ createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond) = 0;
+
+ /// Modify the loop such that the trip count is
+ /// OriginalTC + TripCountAdjust.
+ virtual void adjustTripCount(int TripCountAdjust) = 0;
+
+ /// Called when the loop's preheader has been modified to NewPreheader.
+ virtual void setPreheader(MachineBasicBlock *NewPreheader) = 0;
+
+ /// Called when the loop is being removed. Any instructions in the preheader
+ /// should be removed.
+ ///
+ /// Once this function is called, no other functions on this object are
+ /// valid; the loop has been removed.
+ virtual void disposed() = 0;
+ };
+
+ /// Analyze loop L, which must be a single-basic-block loop, and if the
+ /// conditions can be understood enough produce a PipelinerLoopInfo object.
+ virtual std::unique_ptr<PipelinerLoopInfo>
+ analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
+ return nullptr;
+ }
+
/// Analyze the loop code, return true if it cannot be understoo. Upon
/// success, this function returns false and returns information about the
/// induction variable and compare instruction used at the end.
@@ -730,6 +779,19 @@ public:
return false;
}
+ /// Return the increase in code size needed to predicate a contiguous run of
+ /// NumInsts instructions.
+ virtual unsigned extraSizeToPredicateInstructions(const MachineFunction &MF,
+ unsigned NumInsts) const {
+ return 0;
+ }
+
+ /// Return an estimate for the code size reduction (in bytes) which will be
+ /// caused by removing the given branch instruction during if-conversion.
+ virtual unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const {
+ return getInstSizeInBytes(MI);
+ }
+
/// Return true if it's profitable to unpredicate
/// one side of a 'diamond', i.e. two sides of if-else predicated on mutually
/// exclusive predicates.
@@ -1558,8 +1620,7 @@ public:
/// function.
virtual bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA = nullptr) const {
+ const MachineInstr &MIb) const {
assert((MIa.mayLoad() || MIa.mayStore()) &&
"MIa must load from or modify a memory location");
assert((MIb.mayLoad() || MIb.mayStore()) &&
@@ -1636,6 +1697,28 @@ public:
return false;
}
+ /// During PHI eleimination lets target to make necessary checks and
+ /// insert the copy to the PHI destination register in a target specific
+ /// manner.
+ virtual MachineInstr *createPHIDestinationCopy(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt,
+ const DebugLoc &DL, Register Src, Register Dst) const {
+ return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst)
+ .addReg(Src);
+ }
+
+ /// During PHI eleimination lets target to make necessary checks and
+ /// insert the copy to the PHI destination register in a target specific
+ /// manner.
+ virtual MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsPt,
+ const DebugLoc &DL, Register Src,
+ Register SrcSubReg,
+ Register Dst) const {
+ return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst)
+ .addReg(Src, 0, SrcSubReg);
+ }
+
/// Returns a \p outliner::OutlinedFunction struct containing target-specific
/// information for a set of outlining candidates.
virtual outliner::OutlinedFunction getOutliningCandidateInfo(
@@ -1691,6 +1774,11 @@ public:
return false;
}
+ /// Produce the expression describing the \p MI loading a value into
+ /// the parameter's forwarding register.
+ virtual Optional<ParamLoadedValue>
+ describeLoadedValue(const MachineInstr &MI) const;
+
private:
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
unsigned CatchRetOpcode;
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index d5cca60bb1b2..a58fca7e73f5 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -28,7 +28,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -48,6 +47,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
@@ -72,8 +72,10 @@ class Constant;
class FastISel;
class FunctionLoweringInfo;
class GlobalValue;
+class GISelKnownBits;
class IntrinsicInst;
struct KnownBits;
+class LegacyDivergenceAnalysis;
class LLVMContext;
class MachineBasicBlock;
class MachineFunction;
@@ -122,8 +124,7 @@ public:
TypeLegal, // The target natively supports this type.
TypePromoteInteger, // Replace this integer with a larger one.
TypeExpandInteger, // Split this integer into two of half the size.
- TypeSoftenFloat, // Convert this float to a same size integer type,
- // if an operation is not supported in target HW.
+ TypeSoftenFloat, // Convert this float to a same size integer type.
TypeExpandFloat, // Split this float into two of half the size.
TypeScalarizeVector, // Replace this one-element vector with its element.
TypeSplitVector, // Split this vector into two of half the size.
@@ -284,7 +285,7 @@ public:
/// a constant pool load whose address depends on the select condition. The
/// parameter may be used to differentiate a select with FP compare from
/// integer compare.
- virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+ virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
return true;
}
@@ -539,6 +540,12 @@ public:
return hasAndNotCompare(X);
}
+ /// Return true if the target has a bit-test instruction:
+ /// (X & (1 << Y)) ==/!= 0
+ /// This knowledge can be used to prevent breaking the pattern,
+ /// or creating it if it could be recognized.
+ virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
+
/// There are two ways to clear extreme bits (either low or high):
/// Mask: x & (-1 << y) (the instcombine canonical form)
/// Shifts: x >> y << y
@@ -571,6 +578,38 @@ public:
return false;
}
+ /// Given the pattern
+ /// (X & (C l>>/<< Y)) ==/!= 0
+ /// return true if it should be transformed into:
+ /// ((X <</l>> Y) & C) ==/!= 0
+ /// WARNING: if 'X' is a constant, the fold may deadlock!
+ /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
+ /// here because it can end up being not linked in.
+ virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const {
+ if (hasBitTest(X, Y)) {
+ // One interesting pattern that we'd want to form is 'bit test':
+ // ((1 << Y) & C) ==/!= 0
+ // But we also need to be careful not to try to reverse that fold.
+
+ // Is this '1 << Y' ?
+ if (OldShiftOpcode == ISD::SHL && CC->isOne())
+ return false; // Keep the 'bit test' pattern.
+
+ // Will it be '1 << Y' after the transform ?
+ if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
+ return true; // Do form the 'bit test' pattern.
+ }
+
+ // If 'X' is a constant, and we transform, then we will immediately
+ // try to undo the fold, thus causing endless combine loop.
+ // So by default, let's assume everyone prefers the fold
+ // iff 'X' is not a constant.
+ return !XC;
+ }
+
/// These two forms are equivalent:
/// sub %y, (xor %x, -1)
/// add (add %x, 1), %y
@@ -798,9 +837,9 @@ public:
PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
int offset = 0; // offset off of ptrVal
- unsigned size = 0; // the size of the memory location
+ uint64_t size = 0; // the size of the memory location
// (taken from memVT if zero)
- unsigned align = 1; // alignment
+ MaybeAlign align = Align::None(); // alignment
MachineMemOperand::Flags flags = MachineMemOperand::MONone;
IntrinsicInfo() = default;
@@ -884,6 +923,7 @@ public:
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
Supported = isSupportedFixedPointOperation(Op, VT, Scale);
break;
}
@@ -891,6 +931,8 @@ public:
return Supported ? Action : Expand;
}
+ // If Op is a strict floating-point operation, return the result
+ // of getOperationAction for the equivalent non-strict operation.
LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
unsigned EqOpc;
switch (Op) {
@@ -911,26 +953,25 @@ public:
case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
+ case ISD::STRICT_LRINT: EqOpc = ISD::LRINT; break;
+ case ISD::STRICT_LLRINT: EqOpc = ISD::LLRINT; break;
case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break;
case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
+ case ISD::STRICT_LROUND: EqOpc = ISD::LROUND; break;
+ case ISD::STRICT_LLROUND: EqOpc = ISD::LLROUND; break;
case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
+ case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break;
+ case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break;
case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
}
- auto Action = getOperationAction(EqOpc, VT);
-
- // We don't currently handle Custom or Promote for strict FP pseudo-ops.
- // For now, we just expand for those cases.
- if (Action != Legal)
- Action = Expand;
-
- return Action;
+ return getOperationAction(EqOpc, VT);
}
/// Return true if the specified operation is legal on this target or can be
@@ -1206,7 +1247,7 @@ public:
EltTy = PointerTy.getTypeForEVT(Ty->getContext());
}
return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
- VTy->getNumElements());
+ VTy->getElementCount());
}
return EVT::getEVT(Ty, AllowUnknown);
@@ -1316,9 +1357,9 @@ public:
/// Certain targets have context senstive alignment requirements, where one
/// type has the alignment requirement of another type.
- virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
- DataLayout DL) const {
- return DL.getABITypeAlignment(ArgTy);
+ virtual Align getABIAlignmentForCallingConv(Type *ArgTy,
+ DataLayout DL) const {
+ return Align(DL.getABITypeAlignment(ArgTy));
}
/// If true, then instruction selection should seek to shrink the FP constant
@@ -1426,11 +1467,38 @@ public:
return false;
}
+ /// LLT handling variant.
+ virtual bool allowsMisalignedMemoryAccesses(
+ LLT, unsigned AddrSpace = 0, unsigned Align = 1,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool * /*Fast*/ = nullptr) const {
+ return false;
+ }
+
+ /// This function returns true if the memory access is aligned or if the
+ /// target allows this specific unaligned memory access. If the access is
+ /// allowed, the optional final parameter returns if the access is also fast
+ /// (as defined by the target).
+ bool allowsMemoryAccessForAlignment(
+ LLVMContext &Context, const DataLayout &DL, EVT VT,
+ unsigned AddrSpace = 0, unsigned Alignment = 1,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *Fast = nullptr) const;
+
+ /// Return true if the memory access of this type is aligned or if the target
+ /// allows this specific unaligned access for the given MachineMemOperand.
+ /// If the access is allowed, the optional final parameter returns if the
+ /// access is also fast (as defined by the target).
+ bool allowsMemoryAccessForAlignment(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ const MachineMemOperand &MMO,
+ bool *Fast = nullptr) const;
+
/// Return true if the target supports a memory access of this type for the
/// given address space and alignment. If the access is allowed, the optional
/// final parameter returns if the access is also fast (as defined by the
/// target).
- bool
+ virtual bool
allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace = 0, unsigned Alignment = 1,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
@@ -1463,6 +1531,16 @@ public:
return MVT::Other;
}
+
+ /// LLT returning variant.
+ virtual LLT
+ getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/,
+ unsigned /*SrcAlign*/, bool /*IsMemset*/,
+ bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
+ const AttributeList & /*FuncAttributes*/) const {
+ return LLT();
+ }
+
/// Returns true if it's safe to use load / store of the specified type to
/// expand memcpy / memset inline.
///
@@ -1522,35 +1600,19 @@ public:
report_fatal_error("Funclet EH is not implemented for this target");
}
- /// Returns the target's jmp_buf size in bytes (if never set, the default is
- /// 200)
- unsigned getJumpBufSize() const {
- return JumpBufSize;
- }
-
- /// Returns the target's jmp_buf alignment in bytes (if never set, the default
- /// is 0)
- unsigned getJumpBufAlignment() const {
- return JumpBufAlignment;
- }
-
/// Return the minimum stack alignment of an argument.
- unsigned getMinStackArgumentAlignment() const {
+ Align getMinStackArgumentAlignment() const {
return MinStackArgumentAlignment;
}
/// Return the minimum function alignment.
- unsigned getMinFunctionAlignment() const {
- return MinFunctionAlignment;
- }
+ Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
/// Return the preferred function alignment.
- unsigned getPrefFunctionAlignment() const {
- return PrefFunctionAlignment;
- }
+ Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
/// Return the preferred loop alignment.
- virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
+ virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
return PrefLoopAlignment;
}
@@ -1772,6 +1834,11 @@ public:
return IsSigned;
}
+ /// Returns true if arguments should be extended in lib calls.
+ virtual bool shouldExtendTypeInLibCall(EVT Type) const {
+ return true;
+ }
+
/// Returns how the given (atomic) load should be expanded by the
/// IR-level AtomicExpand pass.
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
@@ -1848,7 +1915,8 @@ public:
/// This may be true if the target does not directly support the
/// multiplication operation for the specified type or the sequence of simpler
/// ops is faster than the multiply.
- virtual bool decomposeMulByConstant(EVT VT, SDValue C) const {
+ virtual bool decomposeMulByConstant(LLVMContext &Context,
+ EVT VT, SDValue C) const {
return false;
}
@@ -2056,40 +2124,25 @@ protected:
TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
}
- /// Set the target's required jmp_buf buffer size (in bytes); default is 200
- void setJumpBufSize(unsigned Size) {
- JumpBufSize = Size;
- }
-
- /// Set the target's required jmp_buf buffer alignment (in bytes); default is
- /// 0
- void setJumpBufAlignment(unsigned Align) {
- JumpBufAlignment = Align;
- }
-
- /// Set the target's minimum function alignment (in log2(bytes))
- void setMinFunctionAlignment(unsigned Align) {
- MinFunctionAlignment = Align;
+ /// Set the target's minimum function alignment.
+ void setMinFunctionAlignment(Align Alignment) {
+ MinFunctionAlignment = Alignment;
}
/// Set the target's preferred function alignment. This should be set if
- /// there is a performance benefit to higher-than-minimum alignment (in
- /// log2(bytes))
- void setPrefFunctionAlignment(unsigned Align) {
- PrefFunctionAlignment = Align;
+ /// there is a performance benefit to higher-than-minimum alignment
+ void setPrefFunctionAlignment(Align Alignment) {
+ PrefFunctionAlignment = Alignment;
}
- /// Set the target's preferred loop alignment. Default alignment is zero, it
- /// means the target does not care about loop alignment. The alignment is
- /// specified in log2(bytes). The target may also override
- /// getPrefLoopAlignment to provide per-loop values.
- void setPrefLoopAlignment(unsigned Align) {
- PrefLoopAlignment = Align;
- }
+ /// Set the target's preferred loop alignment. Default alignment is one, it
+ /// means the target does not care about loop alignment. The target may also
+ /// override getPrefLoopAlignment to provide per-loop values.
+ void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
- /// Set the minimum stack alignment of an argument (in log2(bytes)).
- void setMinStackArgumentAlignment(unsigned Align) {
- MinStackArgumentAlignment = Align;
+ /// Set the minimum stack alignment of an argument.
+ void setMinStackArgumentAlignment(Align Alignment) {
+ MinStackArgumentAlignment = Alignment;
}
/// Set the maximum atomic operation size supported by the
@@ -2555,6 +2608,12 @@ public:
// same blocks of its users.
virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
+ // Return the shift amount threshold for profitable transforms into shifts.
+ // Transforms creating shifts above the returned value will be avoided.
+ virtual unsigned getShiftAmountThreshold(EVT VT) const {
+ return VT.getScalarSizeInBits();
+ }
+
//===--------------------------------------------------------------------===//
// Runtime Library hooks
//
@@ -2650,25 +2709,19 @@ private:
/// register usage.
Sched::Preference SchedPreferenceInfo;
- /// The size, in bytes, of the target's jmp_buf buffers
- unsigned JumpBufSize;
-
- /// The alignment, in bytes, of the target's jmp_buf buffers
- unsigned JumpBufAlignment;
-
/// The minimum alignment that any argument on the stack needs to have.
- unsigned MinStackArgumentAlignment;
+ Align MinStackArgumentAlignment;
/// The minimum function alignment (used when optimizing for size, and to
/// prevent explicitly provided alignment from leading to incorrect code).
- unsigned MinFunctionAlignment;
+ Align MinFunctionAlignment;
/// The preferred function alignment (used when alignment unspecified and
/// optimizing for speed).
- unsigned PrefFunctionAlignment;
+ Align PrefFunctionAlignment;
- /// The preferred loop alignment.
- unsigned PrefLoopAlignment;
+ /// The preferred loop alignment (in log2 bot in bytes).
+ Align PrefLoopAlignment;
/// Size in bits of the maximum atomics size the backend supports.
/// Accesses larger than this will be expanded by AtomicExpandPass.
@@ -2744,7 +2797,6 @@ private:
/// up the MVT::LAST_VALUETYPE value to the next multiple of 8.
uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8];
-protected:
ValueTypeActionImpl ValueTypeActions;
private:
@@ -2790,7 +2842,7 @@ protected:
/// expected to be merged.
unsigned GatherAllAliasesMaxDepth;
- /// Specify maximum number of store instructions per memset call.
+ /// \brief Specify maximum number of store instructions per memset call.
///
/// When lowering \@llvm.memset this field specifies the maximum number of
/// store operations that may be substituted for the call to memset. Targets
@@ -2801,12 +2853,10 @@ protected:
/// with 16-bit alignment would result in four 2-byte stores and one 1-byte
/// store. This only applies to setting a constant array of a constant size.
unsigned MaxStoresPerMemset;
-
- /// Maximum number of stores operations that may be substituted for the call
- /// to memset, used for functions with OptSize attribute.
+ /// Likewise for functions with the OptSize attribute.
unsigned MaxStoresPerMemsetOptSize;
- /// Specify maximum bytes of store instructions per memcpy call.
+ /// \brief Specify maximum number of store instructions per memcpy call.
///
/// When lowering \@llvm.memcpy this field specifies the maximum number of
/// store operations that may be substituted for a call to memcpy. Targets
@@ -2818,8 +2868,8 @@ protected:
/// and one 1-byte store. This only applies to copying a constant array of
/// constant size.
unsigned MaxStoresPerMemcpy;
-
-
+ /// Likewise for functions with the OptSize attribute.
+ unsigned MaxStoresPerMemcpyOptSize;
/// \brief Specify max number of store instructions to glue in inlined memcpy.
///
/// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
@@ -2827,13 +2877,22 @@ protected:
// vectorization later on.
unsigned MaxGluedStoresPerMemcpy = 0;
- /// Maximum number of store operations that may be substituted for a call to
- /// memcpy, used for functions with OptSize attribute.
- unsigned MaxStoresPerMemcpyOptSize;
+ /// \brief Specify maximum number of load instructions per memcmp call.
+ ///
+ /// When lowering \@llvm.memcmp this field specifies the maximum number of
+ /// pairs of load operations that may be substituted for a call to memcmp.
+ /// Targets must set this value based on the cost threshold for that target.
+ /// Targets should assume that the memcmp will be done using as many of the
+ /// largest load operations first, followed by smaller ones, if necessary, per
+ /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine
+ /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load
+ /// and one 1-byte load. This only applies to copying a constant array of
+ /// constant size.
unsigned MaxLoadsPerMemcmp;
+ /// Likewise for functions with the OptSize attribute.
unsigned MaxLoadsPerMemcmpOptSize;
- /// Specify maximum bytes of store instructions per memmove call.
+ /// \brief Specify maximum number of store instructions per memmove call.
///
/// When lowering \@llvm.memmove this field specifies the maximum number of
/// store instructions that may be substituted for a call to memmove. Targets
@@ -2844,9 +2903,7 @@ protected:
/// with 8-bit alignment would result in nine 1-byte stores. This only
/// applies to copying a constant array of constant size.
unsigned MaxStoresPerMemmove;
-
- /// Maximum number of store instructions that may be substituted for a call to
- /// memmove, used for functions with OptSize attribute.
+ /// Likewise for functions with the OptSize attribute.
unsigned MaxStoresPerMemmoveOptSize;
/// Tells the code generator that select is more expensive than a branch if
@@ -2885,6 +2942,7 @@ protected:
class TargetLowering : public TargetLoweringBase {
public:
struct DAGCombinerInfo;
+ struct MakeLibCallOptions;
TargetLowering(const TargetLowering &) = delete;
TargetLowering &operator=(const TargetLowering &) = delete;
@@ -2925,6 +2983,14 @@ public:
return false;
}
+ /// Returns true if the specified base+offset is a legal indexed addressing
+ /// mode for this target. \p MI is the load or store instruction that is being
+ /// considered for transformation.
+ virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
+ bool IsPre, MachineRegisterInfo &MRI) const {
+ return false;
+ }
+
/// Return the entry encoding for a jump table in the current function. The
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
virtual unsigned getJumpTableEncoding() const;
@@ -2955,14 +3021,15 @@ public:
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
SDValue &NewRHS, ISD::CondCode &CCCode,
- const SDLoc &DL) const;
+ const SDLoc &DL, const SDValue OldLHS,
+ const SDValue OldRHS) const;
/// Returns a pair of (return value, chain).
/// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
- std::pair<SDValue, SDValue> makeLibCall(
- SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops,
- bool isSigned, const SDLoc &dl, bool doesNotReturn = false,
- bool isReturnValueUsed = true, bool isPostTypeLegalization = false) const;
+ std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
+ EVT RetVT, ArrayRef<SDValue> Ops,
+ MakeLibCallOptions CallOptions,
+ const SDLoc &dl) const;
/// Check whether parameters to a call that are passed in callee saved
/// registers are the same as from the calling function. This needs to be
@@ -3065,6 +3132,14 @@ public:
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
DAGCombinerInfo &DCI) const;
+ /// More limited version of SimplifyDemandedBits that can be used to "look
+ /// through" ops that don't contribute to the DemandedBits/DemandedElts -
+ /// bitwise ops etc.
+ SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ SelectionDAG &DAG,
+ unsigned Depth) const;
+
/// Look at Vector Op. At this point, we know that only the DemandedElts
/// elements of the result of Op are ever used downstream. If we can use
/// this information to simplify Op, create a new simplified DAG node and
@@ -3099,6 +3174,15 @@ public:
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const;
+ /// Determine which of the bits specified in Mask are known to be either zero
+ /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
+ /// argument allows us to only collect the known bits that are shared by the
+ /// requested vector elements. This is for GISel.
+ virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis,
+ Register R, KnownBits &Known,
+ const APInt &DemandedElts,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth = 0) const;
/// Determine which of the bits of FrameIndex \p FIOp are known to be 0.
/// Default implementation computes low bits based on alignment
@@ -3139,6 +3223,21 @@ public:
TargetLoweringOpt &TLO,
unsigned Depth = 0) const;
+ /// More limited version of SimplifyDemandedBits that can be used to "look
+ /// through" ops that don't contribute to the DemandedBits/DemandedElts -
+ /// bitwise ops etc.
+ virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const;
+
+ /// Tries to build a legal vector shuffle using the provided parameters
+ /// or equivalent variations. The Mask argument maybe be modified as the
+ /// function tries different variations.
+ /// Returns an empty SDValue if the operation fails.
+ SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
+ SDValue N1, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG) const;
+
/// This method returns the constant pool value that will be loaded by LD.
/// NOTE: You must check for implicit extensions of the constant by LD.
virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
@@ -3174,6 +3273,8 @@ public:
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
+ bool recursivelyDeleteUnusedNodes(SDNode *N);
+
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
};
@@ -3297,6 +3398,18 @@ public:
llvm_unreachable("Not Implemented");
}
+ /// Return 1 if we can compute the negated form of the specified expression
+ /// for the same cost as the expression itself, or 2 if we can compute the
+ /// negated form more cheaply than the expression itself. Else return 0.
+ virtual char isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, bool ForCodeSize,
+ unsigned Depth = 0) const;
+
+ /// If isNegatibleForFree returns true, return the newly negated expression.
+ virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, bool ForCodeSize,
+ unsigned Depth = 0) const;
+
//===--------------------------------------------------------------------===//
// Lowering methods - These methods must be implemented by targets so that
// the SelectionDAGBuilder code knows how to lower these.
@@ -3468,6 +3581,51 @@ public:
}
};
+ /// This structure is used to pass arguments to makeLibCall function.
+ struct MakeLibCallOptions {
+ // By passing type list before soften to makeLibCall, the target hook
+ // shouldExtendTypeInLibCall can get the original type before soften.
+ ArrayRef<EVT> OpsVTBeforeSoften;
+ EVT RetVTBeforeSoften;
+ bool IsSExt : 1;
+ bool DoesNotReturn : 1;
+ bool IsReturnValueUsed : 1;
+ bool IsPostTypeLegalization : 1;
+ bool IsSoften : 1;
+
+ MakeLibCallOptions()
+ : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true),
+ IsPostTypeLegalization(false), IsSoften(false) {}
+
+ MakeLibCallOptions &setSExt(bool Value = true) {
+ IsSExt = Value;
+ return *this;
+ }
+
+ MakeLibCallOptions &setNoReturn(bool Value = true) {
+ DoesNotReturn = Value;
+ return *this;
+ }
+
+ MakeLibCallOptions &setDiscardResult(bool Value = true) {
+ IsReturnValueUsed = !Value;
+ return *this;
+ }
+
+ MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) {
+ IsPostTypeLegalization = Value;
+ return *this;
+ }
+
+ MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT,
+ bool Value = true) {
+ OpsVTBeforeSoften = OpsVT;
+ RetVTBeforeSoften = RetVT;
+ IsSoften = Value;
+ return *this;
+ }
+ };
+
/// This function lowers an abstract call to a function into an actual call.
/// This returns a pair of operands. The first element is the return value
/// for the function (if RetTy is not VoidTy). The second element is the
@@ -3537,8 +3695,8 @@ public:
/// Return the register ID of the name passed in. Used by named register
/// global variables extension. There is no target-independent behaviour
/// so the default action is to bail.
- virtual unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
+ virtual Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const {
report_fatal_error("Named registers not implemented for this target");
}
@@ -3597,6 +3755,25 @@ public:
return MachineMemOperand::MONone;
}
+ /// Should SelectionDAG lower an atomic store of the given kind as a normal
+ /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
+ /// eventually migrate all targets to the using StoreSDNodes, but porting is
+ /// being done target at a time.
+ virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
+ assert(SI.isAtomic() && "violated precondition");
+ return false;
+ }
+
+ /// Should SelectionDAG lower an atomic load of the given kind as a normal
+ /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
+ /// eventually migrate all targets to the using LoadSDNodes, but porting is
+ /// being done target at a time.
+ virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
+ assert(LI.isAtomic() && "violated precondition");
+ return false;
+ }
+
+
/// This callback is invoked by the type legalizer to legalize nodes with an
/// illegal operand type but legal result types. It replaces the
/// LowerOperation callback in the type Legalizer. The reason we can not do
@@ -3665,6 +3842,7 @@ public:
C_Register, // Constraint represents specific register(s).
C_RegisterClass, // Constraint represents any of register(s) in class.
C_Memory, // Memory constraint.
+ C_Immediate, // Requires an immediate.
C_Other, // Something else.
C_Unknown // Unsupported constraint.
};
@@ -3905,7 +4083,7 @@ public:
/// \param N Node to expand
/// \param Result output after conversion
/// \returns True, if the expansion was successful, false otherwise
- bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const;
/// Expand UINT(i64) to double(f64) conversion
/// \param N Node to expand
@@ -3986,8 +4164,8 @@ public:
/// method accepts integers as its arguments.
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
- /// Method for building the DAG expansion of ISD::SMULFIX. This method accepts
- /// integers as its arguments.
+ /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This
+ /// method accepts integers as its arguments.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
/// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion
@@ -4070,6 +4248,11 @@ private:
DAGCombinerInfo &DCI,
const SDLoc &DL) const;
+ // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+ SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL) const;
+
SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue CompTargetNode, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL,
@@ -4077,6 +4260,14 @@ private:
SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
ISD::CondCode Cond, DAGCombinerInfo &DCI,
const SDLoc &DL) const;
+
+ SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const;
+ SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
+ ISD::CondCode Cond, DAGCombinerInfo &DCI,
+ const SDLoc &DL) const;
};
/// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index a1fb81cb009d..59f5ddbd9dac 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -14,6 +14,7 @@
#ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
#define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
+#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -35,7 +36,7 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
protected:
MCSymbolRefExpr::VariantKind PLTRelativeVariantKind =
MCSymbolRefExpr::VK_None;
- const TargetMachine *TM;
+ const TargetMachine *TM = nullptr;
public:
TargetLoweringObjectFileELF() = default;
@@ -126,7 +127,8 @@ public:
MachineModuleInfo *MMI) const override;
/// Get MachO PC relative GOT entry relocation
- const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV,
+ const MCSymbol *Sym,
const MCValue &MV, int64_t Offset,
MachineModuleInfo *MMI,
MCStreamer &Streamer) const override;
@@ -206,6 +208,34 @@ public:
const TargetMachine &TM) const override;
};
+class TargetLoweringObjectFileXCOFF : public TargetLoweringObjectFile {
+public:
+ TargetLoweringObjectFileXCOFF() = default;
+ ~TargetLoweringObjectFileXCOFF() override = default;
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+ bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
+ const Function &F) const override;
+
+ MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override;
+
+ MCSection *getStaticCtorSection(unsigned Priority,
+ const MCSymbol *KeySym) const override;
+ MCSection *getStaticDtorSection(unsigned Priority,
+ const MCSymbol *KeySym) const override;
+
+ const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
+ const GlobalValue *RHS,
+ const TargetMachine &TM) const override;
+
+ MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override;
+
+ static XCOFF::StorageClass getStorageClassForGlobal(const GlobalObject *GO);
+};
+
} // end namespace llvm
#endif // LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h
index 0bd82aafac37..d48fc664c1c3 100644
--- a/include/llvm/CodeGen/TargetPassConfig.h
+++ b/include/llvm/CodeGen/TargetPassConfig.h
@@ -280,7 +280,7 @@ public:
///
/// This can also be used to plug a new MachineSchedStrategy into an instance
/// of the standard ScheduleDAGMI:
- /// return new ScheduleDAGMI(C, make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false)
+ /// return new ScheduleDAGMI(C, std::make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false)
///
/// Return NULL to select the default (generic) machine scheduler.
virtual ScheduleDAGInstrs *
diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h
index ddbd677b3eaa..c42ca3ad6eb9 100644
--- a/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -87,11 +87,20 @@ public:
/// Return true if the specified register is included in this register class.
/// This does not include virtual registers.
bool contains(unsigned Reg) const {
+ /// FIXME: Historically this function has returned false when given vregs
+ /// but it should probably only receive physical registers
+ if (!Register::isPhysicalRegister(Reg))
+ return false;
return MC->contains(Reg);
}
/// Return true if both registers are in this class.
bool contains(unsigned Reg1, unsigned Reg2) const {
+ /// FIXME: Historically this function has returned false when given a vregs
+ /// but it should probably only receive physical registers
+ if (!Register::isPhysicalRegister(Reg1) ||
+ !Register::isPhysicalRegister(Reg2))
+ return false;
return MC->contains(Reg1, Reg2);
}
@@ -258,57 +267,6 @@ public:
// Further sentinels can be allocated from the small negative integers.
// DenseMapInfo<unsigned> uses -1u and -2u.
- /// isStackSlot - Sometimes it is useful the be able to store a non-negative
- /// frame index in a variable that normally holds a register. isStackSlot()
- /// returns true if Reg is in the range used for stack slots.
- ///
- /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack
- /// slots, so if a variable may contains a stack slot, always check
- /// isStackSlot() first.
- ///
- static bool isStackSlot(unsigned Reg) {
- return int(Reg) >= (1 << 30);
- }
-
- /// Compute the frame index from a register value representing a stack slot.
- static int stackSlot2Index(unsigned Reg) {
- assert(isStackSlot(Reg) && "Not a stack slot");
- return int(Reg - (1u << 30));
- }
-
- /// Convert a non-negative frame index to a stack slot register value.
- static unsigned index2StackSlot(int FI) {
- assert(FI >= 0 && "Cannot hold a negative frame index.");
- return FI + (1u << 30);
- }
-
- /// Return true if the specified register number is in
- /// the physical register namespace.
- static bool isPhysicalRegister(unsigned Reg) {
- assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
- return int(Reg) > 0;
- }
-
- /// Return true if the specified register number is in
- /// the virtual register namespace.
- static bool isVirtualRegister(unsigned Reg) {
- assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
- return int(Reg) < 0;
- }
-
- /// Convert a virtual register number to a 0-based index.
- /// The first virtual register in a function will get the index 0.
- static unsigned virtReg2Index(unsigned Reg) {
- assert(isVirtualRegister(Reg) && "Not a virtual register");
- return Reg & ~(1u << 31);
- }
-
- /// Convert a 0-based index to a virtual register number.
- /// This is the inverse operation of VirtReg2IndexFunctor below.
- static unsigned index2VirtReg(unsigned Index) {
- return Index | (1u << 31);
- }
-
/// Return the size in bits of a register from class RC.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const {
return getRegClassInfo(RC).RegSize;
@@ -419,9 +377,9 @@ public:
/// Returns true if the two registers are equal or alias each other.
/// The registers may be virtual registers.
- bool regsOverlap(unsigned regA, unsigned regB) const {
+ bool regsOverlap(Register regA, Register regB) const {
if (regA == regB) return true;
- if (isVirtualRegister(regA) || isVirtualRegister(regB))
+ if (regA.isVirtual() || regB.isVirtual())
return false;
// Regunits are numerically ordered. Find a common unit.
@@ -489,6 +447,14 @@ public:
llvm_unreachable("target does not provide no preserved mask");
}
+ /// Return a list of all of the registers which are clobbered "inside" a call
+ /// to the given function. For example, these might be needed for PLT
+ /// sequences of long-branch veneers.
+ virtual ArrayRef<MCPhysReg>
+ getIntraCallClobberedRegs(const MachineFunction *MF) const {
+ return {};
+ }
+
/// Return true if all bits that are set in mask \p mask0 are also set in
/// \p mask1.
bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const;
@@ -535,6 +501,11 @@ public:
return false;
}
+ /// This is a wrapper around getCallPreservedMask().
+ /// Return true if the register is preserved after the call.
+ virtual bool isCalleeSavedPhysReg(unsigned PhysReg,
+ const MachineFunction &MF) const;
+
/// Prior to adding the live-out mask to a stackmap or patchpoint
/// instruction, provide the target the opportunity to adjust it (mainly to
/// remove pseudo-registers that should be ignored).
@@ -709,13 +680,9 @@ public:
/// Find the largest common subclass of A and B.
/// Return NULL if there is no common subclass.
- /// The common subclass should contain
- /// simple value type SVT if it is not the Any type.
const TargetRegisterClass *
getCommonSubClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B,
- const MVT::SimpleValueType SVT =
- MVT::SimpleValueType::Any) const;
+ const TargetRegisterClass *B) const;
/// Returns a TargetRegisterClass used for pointer values.
/// If a target supports multiple different pointer register classes,
@@ -1005,6 +972,13 @@ public:
const MachineRegisterInfo &MRI) const {
return nullptr;
}
+
+ /// Returns the physical register number of sub-register "Index"
+ /// for physical register RegNo. Return zero if the sub-register does not
+ /// exist.
+ inline Register getSubReg(MCRegister Reg, unsigned Idx) const {
+ return static_cast<const MCRegisterInfo *>(this)->getSubReg(Reg, Idx);
+ }
};
//===----------------------------------------------------------------------===//
@@ -1156,7 +1130,7 @@ public:
struct VirtReg2IndexFunctor {
using argument_type = unsigned;
unsigned operator()(unsigned Reg) const {
- return TargetRegisterInfo::virtReg2Index(Reg);
+ return Register::virtReg2Index(Reg);
}
};
@@ -1170,7 +1144,7 @@ struct VirtReg2IndexFunctor {
/// %physreg17 - a physical register when no TRI instance given.
///
/// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n';
-Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr,
+Printable printReg(Register Reg, const TargetRegisterInfo *TRI = nullptr,
unsigned SubIdx = 0,
const MachineRegisterInfo *MRI = nullptr);
diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h
index 037fc3ed3243..56018eca8c27 100644
--- a/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -106,12 +106,10 @@ public:
// us do things like a dedicated avx512 selector). However, we might want
// to also specialize selectors by MachineFunction, which would let us be
// aware of optsize/optnone and such.
- virtual const InstructionSelector *getInstructionSelector() const {
+ virtual InstructionSelector *getInstructionSelector() const {
return nullptr;
}
- virtual unsigned getHwMode() const { return 0; }
-
/// Target can subclass this hook to select a different DAG scheduler.
virtual RegisterScheduler::FunctionPassCtor
getDAGScheduler(CodeGenOpt::Level) const {
@@ -274,6 +272,12 @@ public:
/// scheduling, DAGCombine, etc.).
virtual bool useAA() const;
+ /// \brief Sink addresses into blocks using GEP instructions rather than
+ /// pointer casts and arithmetic.
+ virtual bool addrSinkUsingGEPs() const {
+ return useAA();
+ }
+
/// Enable the use of the early if conversion pass.
virtual bool enableEarlyIfConversion() const { return false; }
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index c540c94f79d9..cd4c4ca64081 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -81,7 +81,7 @@ namespace llvm {
/// Returns the EVT that represents a vector EC.Min elements in length,
/// where each element is of type VT.
- static EVT getVectorVT(LLVMContext &Context, EVT VT, MVT::ElementCount EC) {
+ static EVT getVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) {
MVT M = MVT::getVectorVT(VT.V, EC);
if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
return M;
@@ -277,7 +277,7 @@ namespace llvm {
}
// Given a (possibly scalable) vector type, return the ElementCount
- MVT::ElementCount getVectorElementCount() const {
+ ElementCount getVectorElementCount() const {
assert((isVector()) && "Invalid vector type!");
if (isSimple())
return V.getVectorElementCount();
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 5818ac183fcc..16df565bc8b8 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -40,127 +40,132 @@ def v16i1 : ValueType<16, 18>; // 16 x i1 vector value
def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value
def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value
def v128i1 : ValueType<128, 21>; // 128 x i1 vector value
-def v512i1 : ValueType<512, 22>; // 512 x i1 vector value
-def v1024i1: ValueType<1024,23>; //1024 x i1 vector value
-
-def v1i8 : ValueType<8, 24>; // 1 x i8 vector value
-def v2i8 : ValueType<16 , 25>; // 2 x i8 vector value
-def v4i8 : ValueType<32 , 26>; // 4 x i8 vector value
-def v8i8 : ValueType<64 , 27>; // 8 x i8 vector value
-def v16i8 : ValueType<128, 28>; // 16 x i8 vector value
-def v32i8 : ValueType<256, 29>; // 32 x i8 vector value
-def v64i8 : ValueType<512, 30>; // 64 x i8 vector value
-def v128i8 : ValueType<1024,31>; //128 x i8 vector value
-def v256i8 : ValueType<2048,32>; //256 x i8 vector value
-
-def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value
-def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value
-def v4i16 : ValueType<64 , 35>; // 4 x i16 vector value
-def v8i16 : ValueType<128, 36>; // 8 x i16 vector value
-def v16i16 : ValueType<256, 37>; // 16 x i16 vector value
-def v32i16 : ValueType<512, 38>; // 32 x i16 vector value
-def v64i16 : ValueType<1024,39>; // 64 x i16 vector value
-def v128i16: ValueType<2048,40>; //128 x i16 vector value
-
-def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value
-def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value
-def v3i32 : ValueType<96 , 43>; // 3 x i32 vector value
-def v4i32 : ValueType<128, 44>; // 4 x i32 vector value
-def v5i32 : ValueType<160, 45>; // 5 x i32 vector value
-def v8i32 : ValueType<256, 46>; // 8 x i32 vector value
-def v16i32 : ValueType<512, 47>; // 16 x i32 vector value
-def v32i32 : ValueType<1024,48>; // 32 x i32 vector value
-def v64i32 : ValueType<2048,49>; // 64 x i32 vector value
-def v128i32 : ValueType<4096,50>; // 128 x i32 vector value
-def v256i32 : ValueType<8182,51>; // 256 x i32 vector value
-def v512i32 : ValueType<16384,52>; // 512 x i32 vector value
-def v1024i32 : ValueType<32768,53>; // 1024 x i32 vector value
-def v2048i32 : ValueType<65536,54>; // 2048 x i32 vector value
-
-def v1i64 : ValueType<64 , 55>; // 1 x i64 vector value
-def v2i64 : ValueType<128, 56>; // 2 x i64 vector value
-def v4i64 : ValueType<256, 57>; // 4 x i64 vector value
-def v8i64 : ValueType<512, 58>; // 8 x i64 vector value
-def v16i64 : ValueType<1024,59>; // 16 x i64 vector value
-def v32i64 : ValueType<2048,60>; // 32 x i64 vector value
-
-def v1i128 : ValueType<128, 61>; // 1 x i128 vector value
-
-def nxv1i1 : ValueType<1, 62>; // n x 1 x i1 vector value
-def nxv2i1 : ValueType<2, 63>; // n x 2 x i1 vector value
-def nxv4i1 : ValueType<4, 64>; // n x 4 x i1 vector value
-def nxv8i1 : ValueType<8, 65>; // n x 8 x i1 vector value
-def nxv16i1 : ValueType<16, 66>; // n x 16 x i1 vector value
-def nxv32i1 : ValueType<32, 67>; // n x 32 x i1 vector value
-
-def nxv1i8 : ValueType<8, 68>; // n x 1 x i8 vector value
-def nxv2i8 : ValueType<16, 69>; // n x 2 x i8 vector value
-def nxv4i8 : ValueType<32, 70>; // n x 4 x i8 vector value
-def nxv8i8 : ValueType<64, 71>; // n x 8 x i8 vector value
-def nxv16i8 : ValueType<128, 72>; // n x 16 x i8 vector value
-def nxv32i8 : ValueType<256, 73>; // n x 32 x i8 vector value
-
-def nxv1i16 : ValueType<16, 74>; // n x 1 x i16 vector value
-def nxv2i16 : ValueType<32, 75>; // n x 2 x i16 vector value
-def nxv4i16 : ValueType<64, 76>; // n x 4 x i16 vector value
-def nxv8i16 : ValueType<128, 77>; // n x 8 x i16 vector value
-def nxv16i16: ValueType<256, 78>; // n x 16 x i16 vector value
-def nxv32i16: ValueType<512, 79>; // n x 32 x i16 vector value
-
-def nxv1i32 : ValueType<32, 80>; // n x 1 x i32 vector value
-def nxv2i32 : ValueType<64, 81>; // n x 2 x i32 vector value
-def nxv4i32 : ValueType<128, 82>; // n x 4 x i32 vector value
-def nxv8i32 : ValueType<256, 83>; // n x 8 x i32 vector value
-def nxv16i32: ValueType<512, 84>; // n x 16 x i32 vector value
-def nxv32i32: ValueType<1024,85>; // n x 32 x i32 vector value
-
-def nxv1i64 : ValueType<64, 86>; // n x 1 x i64 vector value
-def nxv2i64 : ValueType<128, 87>; // n x 2 x i64 vector value
-def nxv4i64 : ValueType<256, 88>; // n x 4 x i64 vector value
-def nxv8i64 : ValueType<512, 89>; // n x 8 x i64 vector value
-def nxv16i64: ValueType<1024,90>; // n x 16 x i64 vector value
-def nxv32i64: ValueType<2048,91>; // n x 32 x i64 vector value
-
-def v2f16 : ValueType<32 , 92>; // 2 x f16 vector value
-def v4f16 : ValueType<64 , 93>; // 4 x f16 vector value
-def v8f16 : ValueType<128, 94>; // 8 x f16 vector value
-def v1f32 : ValueType<32 , 95>; // 1 x f32 vector value
-def v2f32 : ValueType<64 , 96>; // 2 x f32 vector value
-def v3f32 : ValueType<96 , 97>; // 3 x f32 vector value
-def v4f32 : ValueType<128, 98>; // 4 x f32 vector value
-def v5f32 : ValueType<160, 99>; // 5 x f32 vector value
-def v8f32 : ValueType<256, 100>; // 8 x f32 vector value
-def v16f32 : ValueType<512, 101>; // 16 x f32 vector value
-def v32f32 : ValueType<1024, 102>; // 32 x f32 vector value
-def v64f32 : ValueType<2048, 103>; // 64 x f32 vector value
-def v128f32 : ValueType<4096, 104>; // 128 x f32 vector value
-def v256f32 : ValueType<8182, 105>; // 256 x f32 vector value
-def v512f32 : ValueType<16384, 106>; // 512 x f32 vector value
-def v1024f32 : ValueType<32768, 107>; // 1024 x f32 vector value
-def v2048f32 : ValueType<65536, 108>; // 2048 x f32 vector value
-def v1f64 : ValueType<64, 109>; // 1 x f64 vector value
-def v2f64 : ValueType<128, 110>; // 2 x f64 vector value
-def v4f64 : ValueType<256, 111>; // 4 x f64 vector value
-def v8f64 : ValueType<512, 112>; // 8 x f64 vector value
-
-def nxv2f16 : ValueType<32 , 113>; // n x 2 x f16 vector value
-def nxv4f16 : ValueType<64 , 114>; // n x 4 x f16 vector value
-def nxv8f16 : ValueType<128, 115>; // n x 8 x f16 vector value
-def nxv1f32 : ValueType<32 , 116>; // n x 1 x f32 vector value
-def nxv2f32 : ValueType<64 , 117>; // n x 2 x f32 vector value
-def nxv4f32 : ValueType<128, 118>; // n x 4 x f32 vector value
-def nxv8f32 : ValueType<256, 119>; // n x 8 x f32 vector value
-def nxv16f32 : ValueType<512, 120>; // n x 16 x f32 vector value
-def nxv1f64 : ValueType<64, 121>; // n x 1 x f64 vector value
-def nxv2f64 : ValueType<128, 122>; // n x 2 x f64 vector value
-def nxv4f64 : ValueType<256, 123>; // n x 4 x f64 vector value
-def nxv8f64 : ValueType<512, 124>; // n x 8 x f64 vector value
-
-def x86mmx : ValueType<64 , 125>; // X86 MMX value
-def FlagVT : ValueType<0 , 126>; // Pre-RA sched glue
-def isVoid : ValueType<0 , 127>; // Produces no value
-def untyped: ValueType<8 , 128>; // Produces an untyped value
-def exnref: ValueType<0, 129>; // WebAssembly's exnref type
+def v256i1 : ValueType<256, 22>; // 256 x i1 vector value
+def v512i1 : ValueType<512, 23>; // 512 x i1 vector value
+def v1024i1: ValueType<1024,24>; //1024 x i1 vector value
+
+def v1i8 : ValueType<8, 25>; // 1 x i8 vector value
+def v2i8 : ValueType<16 , 26>; // 2 x i8 vector value
+def v4i8 : ValueType<32 , 27>; // 4 x i8 vector value
+def v8i8 : ValueType<64 , 28>; // 8 x i8 vector value
+def v16i8 : ValueType<128, 29>; // 16 x i8 vector value
+def v32i8 : ValueType<256, 30>; // 32 x i8 vector value
+def v64i8 : ValueType<512, 31>; // 64 x i8 vector value
+def v128i8 : ValueType<1024,32>; //128 x i8 vector value
+def v256i8 : ValueType<2048,33>; //256 x i8 vector value
+
+def v1i16 : ValueType<16 , 34>; // 1 x i16 vector value
+def v2i16 : ValueType<32 , 35>; // 2 x i16 vector value
+def v3i16 : ValueType<48 , 36>; // 3 x i16 vector value
+def v4i16 : ValueType<64 , 37>; // 4 x i16 vector value
+def v8i16 : ValueType<128, 38>; // 8 x i16 vector value
+def v16i16 : ValueType<256, 39>; // 16 x i16 vector value
+def v32i16 : ValueType<512, 40>; // 32 x i16 vector value
+def v64i16 : ValueType<1024,41>; // 64 x i16 vector value
+def v128i16: ValueType<2048,42>; //128 x i16 vector value
+
+def v1i32 : ValueType<32 , 43>; // 1 x i32 vector value
+def v2i32 : ValueType<64 , 44>; // 2 x i32 vector value
+def v3i32 : ValueType<96 , 45>; // 3 x i32 vector value
+def v4i32 : ValueType<128, 46>; // 4 x i32 vector value
+def v5i32 : ValueType<160, 47>; // 5 x i32 vector value
+def v8i32 : ValueType<256, 48>; // 8 x i32 vector value
+def v16i32 : ValueType<512, 49>; // 16 x i32 vector value
+def v32i32 : ValueType<1024,50>; // 32 x i32 vector value
+def v64i32 : ValueType<2048,51>; // 64 x i32 vector value
+def v128i32 : ValueType<4096,52>; // 128 x i32 vector value
+def v256i32 : ValueType<8182,53>; // 256 x i32 vector value
+def v512i32 : ValueType<16384,54>; // 512 x i32 vector value
+def v1024i32 : ValueType<32768,55>; // 1024 x i32 vector value
+def v2048i32 : ValueType<65536,56>; // 2048 x i32 vector value
+
+def v1i64 : ValueType<64 , 57>; // 1 x i64 vector value
+def v2i64 : ValueType<128, 58>; // 2 x i64 vector value
+def v4i64 : ValueType<256, 59>; // 4 x i64 vector value
+def v8i64 : ValueType<512, 60>; // 8 x i64 vector value
+def v16i64 : ValueType<1024,61>; // 16 x i64 vector value
+def v32i64 : ValueType<2048,62>; // 32 x i64 vector value
+
+def v1i128 : ValueType<128, 63>; // 1 x i128 vector value
+
+def v2f16 : ValueType<32 , 64>; // 2 x f16 vector value
+def v3f16 : ValueType<48 , 65>; // 3 x f16 vector value
+def v4f16 : ValueType<64 , 66>; // 4 x f16 vector value
+def v8f16 : ValueType<128, 67>; // 8 x f16 vector value
+def v16f16 : ValueType<256, 68>; // 8 x f16 vector value
+def v32f16 : ValueType<512, 69>; // 8 x f16 vector value
+def v1f32 : ValueType<32 , 70>; // 1 x f32 vector value
+def v2f32 : ValueType<64 , 71>; // 2 x f32 vector value
+def v3f32 : ValueType<96 , 72>; // 3 x f32 vector value
+def v4f32 : ValueType<128, 73>; // 4 x f32 vector value
+def v5f32 : ValueType<160, 74>; // 5 x f32 vector value
+def v8f32 : ValueType<256, 75>; // 8 x f32 vector value
+def v16f32 : ValueType<512, 76>; // 16 x f32 vector value
+def v32f32 : ValueType<1024, 77>; // 32 x f32 vector value
+def v64f32 : ValueType<2048, 78>; // 64 x f32 vector value
+def v128f32 : ValueType<4096, 79>; // 128 x f32 vector value
+def v256f32 : ValueType<8182, 80>; // 256 x f32 vector value
+def v512f32 : ValueType<16384, 81>; // 512 x f32 vector value
+def v1024f32 : ValueType<32768, 82>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 83>; // 2048 x f32 vector value
+def v1f64 : ValueType<64, 84>; // 1 x f64 vector value
+def v2f64 : ValueType<128, 85>; // 2 x f64 vector value
+def v4f64 : ValueType<256, 86>; // 4 x f64 vector value
+def v8f64 : ValueType<512, 87>; // 8 x f64 vector value
+
+def nxv1i1 : ValueType<1, 88>; // n x 1 x i1 vector value
+def nxv2i1 : ValueType<2, 89>; // n x 2 x i1 vector value
+def nxv4i1 : ValueType<4, 90>; // n x 4 x i1 vector value
+def nxv8i1 : ValueType<8, 91>; // n x 8 x i1 vector value
+def nxv16i1 : ValueType<16, 92>; // n x 16 x i1 vector value
+def nxv32i1 : ValueType<32, 93>; // n x 32 x i1 vector value
+
+def nxv1i8 : ValueType<8, 94>; // n x 1 x i8 vector value
+def nxv2i8 : ValueType<16, 95>; // n x 2 x i8 vector value
+def nxv4i8 : ValueType<32, 96>; // n x 4 x i8 vector value
+def nxv8i8 : ValueType<64, 97>; // n x 8 x i8 vector value
+def nxv16i8 : ValueType<128, 98>; // n x 16 x i8 vector value
+def nxv32i8 : ValueType<256, 99>; // n x 32 x i8 vector value
+
+def nxv1i16 : ValueType<16, 100>; // n x 1 x i16 vector value
+def nxv2i16 : ValueType<32, 101>; // n x 2 x i16 vector value
+def nxv4i16 : ValueType<64, 102>; // n x 4 x i16 vector value
+def nxv8i16 : ValueType<128, 103>; // n x 8 x i16 vector value
+def nxv16i16: ValueType<256, 104>; // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 105>; // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32, 106>; // n x 1 x i32 vector value
+def nxv2i32 : ValueType<64, 107>; // n x 2 x i32 vector value
+def nxv4i32 : ValueType<128, 108>; // n x 4 x i32 vector value
+def nxv8i32 : ValueType<256, 109>; // n x 8 x i32 vector value
+def nxv16i32: ValueType<512, 110>; // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,111>; // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64, 112>; // n x 1 x i64 vector value
+def nxv2i64 : ValueType<128, 113>; // n x 2 x i64 vector value
+def nxv4i64 : ValueType<256, 114>; // n x 4 x i64 vector value
+def nxv8i64 : ValueType<512, 115>; // n x 8 x i64 vector value
+def nxv16i64: ValueType<1024,116>; // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,117>; // n x 32 x i64 vector value
+
+def nxv2f16 : ValueType<32 , 118>; // n x 2 x f16 vector value
+def nxv4f16 : ValueType<64 , 119>; // n x 4 x f16 vector value
+def nxv8f16 : ValueType<128, 120>; // n x 8 x f16 vector value
+def nxv1f32 : ValueType<32 , 121>; // n x 1 x f32 vector value
+def nxv2f32 : ValueType<64 , 122>; // n x 2 x f32 vector value
+def nxv4f32 : ValueType<128, 123>; // n x 4 x f32 vector value
+def nxv8f32 : ValueType<256, 124>; // n x 8 x f32 vector value
+def nxv16f32 : ValueType<512, 125>; // n x 16 x f32 vector value
+def nxv1f64 : ValueType<64, 126>; // n x 1 x f64 vector value
+def nxv2f64 : ValueType<128, 127>; // n x 2 x f64 vector value
+def nxv4f64 : ValueType<256, 128>; // n x 4 x f64 vector value
+def nxv8f64 : ValueType<512, 129>; // n x 8 x f64 vector value
+
+def x86mmx : ValueType<64 , 130>; // X86 MMX value
+def FlagVT : ValueType<0 , 131>; // Pre-RA sched glue
+def isVoid : ValueType<0 , 132>; // Produces no value
+def untyped: ValueType<8 , 133>; // Produces an untyped value
+def exnref: ValueType<0, 134>; // WebAssembly's exnref type
def token : ValueType<0 , 248>; // TokenTy
def MetadataVT: ValueType<0, 249>; // Metadata
diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h
index 70eb048f05eb..db25ed5c5116 100644
--- a/include/llvm/CodeGen/VirtRegMap.h
+++ b/include/llvm/CodeGen/VirtRegMap.h
@@ -49,7 +49,7 @@ class TargetInstrInfo;
/// it; even spilled virtual registers (the register mapped to a
/// spilled register is the temporary used to load it from the
/// stack).
- IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+ IndexedMap<Register, VirtReg2IndexFunctor> Virt2PhysMap;
/// Virt2StackSlotMap - This is virtual register to stack slot
/// mapping. Each spilled virtual register has an entry in it
@@ -93,7 +93,7 @@ class TargetInstrInfo;
/// returns true if the specified virtual register is
/// mapped to a physical register
- bool hasPhys(unsigned virtReg) const {
+ bool hasPhys(Register virtReg) const {
return getPhys(virtReg) != NO_PHYS_REG;
}
@@ -101,20 +101,20 @@ class TargetInstrInfo;
/// virtual register
Register getPhys(Register virtReg) const {
assert(virtReg.isVirtual());
- return Virt2PhysMap[virtReg];
+ return Virt2PhysMap[virtReg.id()];
}
/// creates a mapping for the specified virtual register to
/// the specified physical register
- void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg);
+ void assignVirt2Phys(Register virtReg, MCPhysReg physReg);
/// clears the specified virtual register's, physical
/// register mapping
- void clearVirt(unsigned virtReg) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+ void clearVirt(Register virtReg) {
+ assert(virtReg.isVirtual());
+ assert(Virt2PhysMap[virtReg.id()] != NO_PHYS_REG &&
"attempt to clear a not assigned virtual register");
- Virt2PhysMap[virtReg] = NO_PHYS_REG;
+ Virt2PhysMap[virtReg.id()] = NO_PHYS_REG;
}
/// clears all virtual to physical register mappings
@@ -124,21 +124,21 @@ class TargetInstrInfo;
}
/// returns true if VirtReg is assigned to its preferred physreg.
- bool hasPreferredPhys(unsigned VirtReg);
+ bool hasPreferredPhys(Register VirtReg);
/// returns true if VirtReg has a known preferred register.
/// This returns false if VirtReg has a preference that is a virtual
/// register that hasn't been assigned yet.
- bool hasKnownPreference(unsigned VirtReg);
+ bool hasKnownPreference(Register VirtReg);
/// records virtReg is a split live interval from SReg.
- void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
- Virt2SplitMap[virtReg] = SReg;
+ void setIsSplitFromReg(Register virtReg, unsigned SReg) {
+ Virt2SplitMap[virtReg.id()] = SReg;
}
/// returns the live interval virtReg is split from.
- unsigned getPreSplitReg(unsigned virtReg) const {
- return Virt2SplitMap[virtReg];
+ unsigned getPreSplitReg(Register virtReg) const {
+ return Virt2SplitMap[virtReg.id()];
}
/// getOriginal - Return the original virtual register that VirtReg descends
@@ -152,28 +152,29 @@ class TargetInstrInfo;
/// returns true if the specified virtual register is not
/// mapped to a stack slot or rematerialized.
- bool isAssignedReg(unsigned virtReg) const {
+ bool isAssignedReg(Register virtReg) const {
if (getStackSlot(virtReg) == NO_STACK_SLOT)
return true;
// Split register can be assigned a physical register as well as a
// stack slot or remat id.
- return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
+ return (Virt2SplitMap[virtReg.id()] &&
+ Virt2PhysMap[virtReg.id()] != NO_PHYS_REG);
}
/// returns the stack slot mapped to the specified virtual
/// register
- int getStackSlot(unsigned virtReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- return Virt2StackSlotMap[virtReg];
+ int getStackSlot(Register virtReg) const {
+ assert(virtReg.isVirtual());
+ return Virt2StackSlotMap[virtReg.id()];
}
/// create a mapping for the specifed virtual register to
/// the next available stack slot
- int assignVirt2StackSlot(unsigned virtReg);
+ int assignVirt2StackSlot(Register virtReg);
/// create a mapping for the specified virtual register to
/// the specified stack slot
- void assignVirt2StackSlot(unsigned virtReg, int SS);
+ void assignVirt2StackSlot(Register virtReg, int SS);
void print(raw_ostream &OS, const Module* M = nullptr) const override;
void dump() const;
diff --git a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
index 7d20bb0a7bde..7538cb2c2548 100644
--- a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
@@ -11,7 +11,6 @@
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/Support/Error.h"
namespace llvm {
@@ -31,9 +30,6 @@ enum VisitorDataSource {
Error visitTypeRecord(CVType &Record, TypeIndex Index,
TypeVisitorCallbacks &Callbacks,
VisitorDataSource Source = VDS_BytesPresent);
-Error visitTypeRecord(CVType &Record, TypeIndex Index,
- TypeVisitorCallbackPipeline &Callbacks,
- VisitorDataSource Source = VDS_BytesPresent);
Error visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks,
VisitorDataSource Source = VDS_BytesPresent);
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
index 00fb0cf4cc90..60829a51dc25 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
+++ b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
@@ -33,6 +33,9 @@ public:
virtual void EmitIntValue(uint64_t Value, unsigned Size) = 0;
virtual void EmitBinaryData(StringRef Data) = 0;
virtual void AddComment(const Twine &T) = 0;
+ virtual void AddRawComment(const Twine &T) = 0;
+ virtual bool isVerboseAsm() = 0;
+ virtual std::string getTypeName(TypeIndex TI) = 0;
virtual ~CodeViewRecordStreamer() = default;
};
@@ -206,6 +209,11 @@ public:
return 0;
}
+ void emitRawComment(const Twine &T) {
+ if (isStreaming() && Streamer->isVerboseAsm())
+ Streamer->AddRawComment(T);
+ }
+
private:
void emitEncodedSignedInteger(const int64_t &Value,
const Twine &Comment = "");
@@ -225,9 +233,10 @@ private:
}
void emitComment(const Twine &Comment) {
- if (isStreaming()) {
+ if (isStreaming() && Streamer->isVerboseAsm()) {
Twine TComment(Comment);
- Streamer->AddComment(TComment);
+ if (!TComment.isTriviallyEmpty())
+ Streamer->AddComment(TComment);
}
}
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
index 9767e49c44f5..ed5c143818e6 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
+++ b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
@@ -366,8 +366,134 @@ CV_REGISTER(AMD64_K7, 765)
#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_X86)
+#if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM)
+
+// ARM registers
+
+CV_REGISTER(ARM_NOREG, 0)
+
+// General purpose 32-bit integer regisers
+
+CV_REGISTER(ARM_R0, 10)
+CV_REGISTER(ARM_R1, 11)
+CV_REGISTER(ARM_R2, 12)
+CV_REGISTER(ARM_R3, 13)
+CV_REGISTER(ARM_R4, 14)
+CV_REGISTER(ARM_R5, 15)
+CV_REGISTER(ARM_R6, 16)
+CV_REGISTER(ARM_R7, 17)
+CV_REGISTER(ARM_R8, 18)
+CV_REGISTER(ARM_R9, 19)
+CV_REGISTER(ARM_R10, 20)
+CV_REGISTER(ARM_R11, 21)
+CV_REGISTER(ARM_R12, 22)
+CV_REGISTER(ARM_SP, 23)
+CV_REGISTER(ARM_LR, 24)
+CV_REGISTER(ARM_PC, 25)
+
+// Status register
+
+CV_REGISTER(ARM_CPSR, 25)
+
+// ARM VFPv1 registers
+
+CV_REGISTER(ARM_FPSCR, 40)
+CV_REGISTER(ARM_FPEXC, 41)
+
+// ARM VFPv3/NEON registers
+
+CV_REGISTER(ARM_FS32, 200)
+CV_REGISTER(ARM_FS33, 201)
+CV_REGISTER(ARM_FS34, 202)
+CV_REGISTER(ARM_FS35, 203)
+CV_REGISTER(ARM_FS36, 204)
+CV_REGISTER(ARM_FS37, 205)
+CV_REGISTER(ARM_FS38, 206)
+CV_REGISTER(ARM_FS39, 207)
+CV_REGISTER(ARM_FS40, 208)
+CV_REGISTER(ARM_FS41, 209)
+CV_REGISTER(ARM_FS42, 210)
+CV_REGISTER(ARM_FS43, 211)
+CV_REGISTER(ARM_FS44, 212)
+CV_REGISTER(ARM_FS45, 213)
+CV_REGISTER(ARM_FS46, 214)
+CV_REGISTER(ARM_FS47, 215)
+CV_REGISTER(ARM_FS48, 216)
+CV_REGISTER(ARM_FS49, 217)
+CV_REGISTER(ARM_FS50, 218)
+CV_REGISTER(ARM_FS51, 219)
+CV_REGISTER(ARM_FS52, 220)
+CV_REGISTER(ARM_FS53, 221)
+CV_REGISTER(ARM_FS54, 222)
+CV_REGISTER(ARM_FS55, 223)
+CV_REGISTER(ARM_FS56, 224)
+CV_REGISTER(ARM_FS57, 225)
+CV_REGISTER(ARM_FS58, 226)
+CV_REGISTER(ARM_FS59, 227)
+CV_REGISTER(ARM_FS60, 228)
+CV_REGISTER(ARM_FS61, 229)
+CV_REGISTER(ARM_FS62, 230)
+CV_REGISTER(ARM_FS63, 231)
+
+CV_REGISTER(ARM_ND0, 300)
+CV_REGISTER(ARM_ND1, 301)
+CV_REGISTER(ARM_ND2, 302)
+CV_REGISTER(ARM_ND3, 303)
+CV_REGISTER(ARM_ND4, 304)
+CV_REGISTER(ARM_ND5, 305)
+CV_REGISTER(ARM_ND6, 306)
+CV_REGISTER(ARM_ND7, 307)
+CV_REGISTER(ARM_ND8, 308)
+CV_REGISTER(ARM_ND9, 309)
+CV_REGISTER(ARM_ND10, 310)
+CV_REGISTER(ARM_ND11, 311)
+CV_REGISTER(ARM_ND12, 312)
+CV_REGISTER(ARM_ND13, 313)
+CV_REGISTER(ARM_ND14, 314)
+CV_REGISTER(ARM_ND15, 315)
+CV_REGISTER(ARM_ND16, 316)
+CV_REGISTER(ARM_ND17, 317)
+CV_REGISTER(ARM_ND18, 318)
+CV_REGISTER(ARM_ND19, 319)
+CV_REGISTER(ARM_ND20, 320)
+CV_REGISTER(ARM_ND21, 321)
+CV_REGISTER(ARM_ND22, 322)
+CV_REGISTER(ARM_ND23, 323)
+CV_REGISTER(ARM_ND24, 324)
+CV_REGISTER(ARM_ND25, 325)
+CV_REGISTER(ARM_ND26, 326)
+CV_REGISTER(ARM_ND27, 327)
+CV_REGISTER(ARM_ND28, 328)
+CV_REGISTER(ARM_ND29, 329)
+CV_REGISTER(ARM_ND30, 330)
+CV_REGISTER(ARM_ND31, 331)
+
+CV_REGISTER(ARM_NQ0, 400)
+CV_REGISTER(ARM_NQ1, 401)
+CV_REGISTER(ARM_NQ2, 402)
+CV_REGISTER(ARM_NQ3, 403)
+CV_REGISTER(ARM_NQ4, 404)
+CV_REGISTER(ARM_NQ5, 405)
+CV_REGISTER(ARM_NQ6, 406)
+CV_REGISTER(ARM_NQ7, 407)
+CV_REGISTER(ARM_NQ8, 408)
+CV_REGISTER(ARM_NQ9, 409)
+CV_REGISTER(ARM_NQ10, 410)
+CV_REGISTER(ARM_NQ11, 411)
+CV_REGISTER(ARM_NQ12, 412)
+CV_REGISTER(ARM_NQ13, 413)
+CV_REGISTER(ARM_NQ14, 414)
+CV_REGISTER(ARM_NQ15, 415)
+
+#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM)
+
#if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64)
+// arm64intr.h from MSVC defines ARM64_FPSR, which conflicts with
+// these declarations.
+#pragma push_macro("ARM64_FPSR")
+#undef ARM64_FPSR
+
// ARM64 registers
CV_REGISTER(ARM64_NOREG, 0)
@@ -556,4 +682,6 @@ CV_REGISTER(ARM64_Q31, 211)
CV_REGISTER(ARM64_FPSR, 220)
+#pragma pop_macro("ARM64_FPSR")
+
#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64)
diff --git a/include/llvm/DebugInfo/CodeView/EnumTables.h b/include/llvm/DebugInfo/CodeView/EnumTables.h
index ed126ed9e2ff..270cd4b8330c 100644
--- a/include/llvm/DebugInfo/CodeView/EnumTables.h
+++ b/include/llvm/DebugInfo/CodeView/EnumTables.h
@@ -37,6 +37,17 @@ ArrayRef<EnumEntry<uint8_t>> getThunkOrdinalNames();
ArrayRef<EnumEntry<uint16_t>> getTrampolineNames();
ArrayRef<EnumEntry<COFF::SectionCharacteristics>>
getImageSectionCharacteristicNames();
+ArrayRef<EnumEntry<uint16_t>> getClassOptionNames();
+ArrayRef<EnumEntry<uint8_t>> getMemberAccessNames();
+ArrayRef<EnumEntry<uint16_t>> getMethodOptionNames();
+ArrayRef<EnumEntry<uint16_t>> getMemberKindNames();
+ArrayRef<EnumEntry<uint8_t>> getPtrKindNames();
+ArrayRef<EnumEntry<uint8_t>> getPtrModeNames();
+ArrayRef<EnumEntry<uint16_t>> getPtrMemberRepNames();
+ArrayRef<EnumEntry<uint16_t>> getTypeModifierNames();
+ArrayRef<EnumEntry<uint8_t>> getCallingConventions();
+ArrayRef<EnumEntry<uint8_t>> getFunctionOptionEnum();
+ArrayRef<EnumEntry<uint16_t>> getLabelTypeEnum();
} // end namespace codeview
} // end namespace llvm
diff --git a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
index 62761cb87c81..108abb291498 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
@@ -62,7 +62,7 @@ public:
Error visitSymbolBegin(CVSymbol &Record) override {
assert(!Mapping && "Already in a symbol mapping!");
- Mapping = llvm::make_unique<MappingInfo>(Record.content(), Container);
+ Mapping = std::make_unique<MappingInfo>(Record.content(), Container);
return Mapping->Mapping.visitSymbolBegin(Record);
}
Error visitSymbolEnd(CVSymbol &Record) override {
diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index 5e9a7432b9b6..1aafa3ca9f1d 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -73,17 +73,17 @@ public:
Thunk32Sym(SymbolRecordKind Kind, uint32_t RecordOffset)
: SymbolRecord(Kind), RecordOffset(RecordOffset) {}
- uint32_t Parent;
- uint32_t End;
- uint32_t Next;
- uint32_t Offset;
- uint16_t Segment;
- uint16_t Length;
+ uint32_t Parent = 0;
+ uint32_t End = 0;
+ uint32_t Next = 0;
+ uint32_t Offset = 0;
+ uint16_t Segment = 0;
+ uint16_t Length = 0;
ThunkOrdinal Thunk;
StringRef Name;
ArrayRef<uint8_t> VariantData;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_TRAMPOLINE
@@ -94,13 +94,13 @@ public:
: SymbolRecord(Kind), RecordOffset(RecordOffset) {}
TrampolineType Type;
- uint16_t Size;
- uint32_t ThunkOffset;
- uint32_t TargetOffset;
- uint16_t ThunkSection;
- uint16_t TargetSection;
+ uint16_t Size = 0;
+ uint32_t ThunkOffset = 0;
+ uint32_t TargetOffset = 0;
+ uint16_t ThunkSection = 0;
+ uint16_t TargetSection = 0;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_SECTION
@@ -110,14 +110,14 @@ public:
SectionSym(SymbolRecordKind Kind, uint32_t RecordOffset)
: SymbolRecord(Kind), RecordOffset(RecordOffset) {}
- uint16_t SectionNumber;
- uint8_t Alignment;
- uint32_t Rva;
- uint32_t Length;
- uint32_t Characteristics;
+ uint16_t SectionNumber = 0;
+ uint8_t Alignment = 0;
+ uint32_t Rva = 0;
+ uint32_t Length = 0;
+ uint32_t Characteristics = 0;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_COFFGROUP
@@ -127,13 +127,13 @@ public:
CoffGroupSym(SymbolRecordKind Kind, uint32_t RecordOffset)
: SymbolRecord(Kind), RecordOffset(RecordOffset) {}
- uint32_t Size;
- uint32_t Characteristics;
- uint32_t Offset;
- uint16_t Segment;
+ uint32_t Size = 0;
+ uint32_t Characteristics = 0;
+ uint32_t Offset = 0;
+ uint16_t Segment = 0;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
class ScopeEndSym : public SymbolRecord {
@@ -142,7 +142,7 @@ public:
ScopeEndSym(SymbolRecordKind Kind, uint32_t RecordOffset)
: SymbolRecord(Kind), RecordOffset(RecordOffset) {}
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
class CallerSym : public SymbolRecord {
@@ -153,7 +153,7 @@ public:
std::vector<TypeIndex> Indices;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
struct DecodedAnnotation {
@@ -333,7 +333,7 @@ private:
class InlineSiteSym : public SymbolRecord {
public:
explicit InlineSiteSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- InlineSiteSym(uint32_t RecordOffset)
+ explicit InlineSiteSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::InlineSiteSym),
RecordOffset(RecordOffset) {}
@@ -342,12 +342,12 @@ public:
BinaryAnnotationIterator());
}
- uint32_t Parent;
- uint32_t End;
+ uint32_t Parent = 0;
+ uint32_t End = 0;
TypeIndex Inlinee;
std::vector<uint8_t> AnnotationData;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_PUB32
@@ -371,7 +371,7 @@ public:
class RegisterSym : public SymbolRecord {
public:
explicit RegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- RegisterSym(uint32_t RecordOffset)
+ explicit RegisterSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::RegisterSym),
RecordOffset(RecordOffset) {}
@@ -379,7 +379,7 @@ public:
RegisterId Register;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_PROCREF, S_LPROCREF
@@ -390,13 +390,13 @@ public:
: SymbolRecord(SymbolRecordKind::ProcRefSym), RecordOffset(RecordOffset) {
}
- uint32_t SumName;
- uint32_t SymOffset;
- uint16_t Module;
+ uint32_t SumName = 0;
+ uint32_t SymOffset = 0;
+ uint16_t Module = 0;
StringRef Name;
uint16_t modi() const { return Module - 1; }
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_LOCAL
@@ -410,7 +410,7 @@ public:
LocalSymFlags Flags;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
struct LocalVariableAddrRange {
@@ -440,11 +440,11 @@ public:
return RecordOffset + RelocationOffset;
}
- uint32_t Program;
+ uint32_t Program = 0;
LocalVariableAddrRange Range;
std::vector<LocalVariableAddrGap> Gaps;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_DEFRANGE_SUBFIELD
@@ -453,7 +453,7 @@ class DefRangeSubfieldSym : public SymbolRecord {
public:
explicit DefRangeSubfieldSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- DefRangeSubfieldSym(uint32_t RecordOffset)
+ explicit DefRangeSubfieldSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::DefRangeSubfieldSym),
RecordOffset(RecordOffset) {}
@@ -461,58 +461,62 @@ public:
return RecordOffset + RelocationOffset;
}
- uint32_t Program;
- uint16_t OffsetInParent;
+ uint32_t Program = 0;
+ uint16_t OffsetInParent = 0;
LocalVariableAddrRange Range;
std::vector<LocalVariableAddrGap> Gaps;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
+};
+
+struct DefRangeRegisterHeader {
+ ulittle16_t Register;
+ ulittle16_t MayHaveNoName;
};
// S_DEFRANGE_REGISTER
class DefRangeRegisterSym : public SymbolRecord {
public:
- struct Header {
- ulittle16_t Register;
- ulittle16_t MayHaveNoName;
- };
-
explicit DefRangeRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- DefRangeRegisterSym(uint32_t RecordOffset)
+ explicit DefRangeRegisterSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::DefRangeRegisterSym),
RecordOffset(RecordOffset) {}
- uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); }
+ uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeRegisterHeader); }
- Header Hdr;
+ DefRangeRegisterHeader Hdr;
LocalVariableAddrRange Range;
std::vector<LocalVariableAddrGap> Gaps;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
+};
+
+struct DefRangeSubfieldRegisterHeader {
+ ulittle16_t Register;
+ ulittle16_t MayHaveNoName;
+ ulittle32_t OffsetInParent;
};
// S_DEFRANGE_SUBFIELD_REGISTER
class DefRangeSubfieldRegisterSym : public SymbolRecord {
public:
- struct Header {
- ulittle16_t Register;
- ulittle16_t MayHaveNoName;
- ulittle32_t OffsetInParent;
- };
-
explicit DefRangeSubfieldRegisterSym(SymbolRecordKind Kind)
: SymbolRecord(Kind) {}
- DefRangeSubfieldRegisterSym(uint32_t RecordOffset)
+ explicit DefRangeSubfieldRegisterSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::DefRangeSubfieldRegisterSym),
RecordOffset(RecordOffset) {}
- uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); }
+ uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeSubfieldRegisterHeader); }
- Header Hdr;
+ DefRangeSubfieldRegisterHeader Hdr;
LocalVariableAddrRange Range;
std::vector<LocalVariableAddrGap> Gaps;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
+};
+
+struct DefRangeFramePointerRelHeader {
+ little32_t Offset;
};
// S_DEFRANGE_FRAMEPOINTER_REL
@@ -522,7 +526,7 @@ class DefRangeFramePointerRelSym : public SymbolRecord {
public:
explicit DefRangeFramePointerRelSym(SymbolRecordKind Kind)
: SymbolRecord(Kind) {}
- DefRangeFramePointerRelSym(uint32_t RecordOffset)
+ explicit DefRangeFramePointerRelSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::DefRangeFramePointerRelSym),
RecordOffset(RecordOffset) {}
@@ -530,22 +534,22 @@ public:
return RecordOffset + RelocationOffset;
}
- int32_t Offset;
+ DefRangeFramePointerRelHeader Hdr;
LocalVariableAddrRange Range;
std::vector<LocalVariableAddrGap> Gaps;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
+};
+
+struct DefRangeRegisterRelHeader {
+ ulittle16_t Register;
+ ulittle16_t Flags;
+ little32_t BasePointerOffset;
};
// S_DEFRANGE_REGISTER_REL
class DefRangeRegisterRelSym : public SymbolRecord {
public:
- struct Header {
- ulittle16_t Register;
- ulittle16_t Flags;
- little32_t BasePointerOffset;
- };
-
explicit DefRangeRegisterRelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
explicit DefRangeRegisterRelSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::DefRangeRegisterRelSym),
@@ -563,13 +567,13 @@ public:
bool hasSpilledUDTMember() const { return Hdr.Flags & IsSubfieldFlag; }
uint16_t offsetInParent() const { return Hdr.Flags >> OffsetInParentShift; }
- uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); }
+ uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeRegisterRelHeader); }
- Header Hdr;
+ DefRangeRegisterRelHeader Hdr;
LocalVariableAddrRange Range;
std::vector<LocalVariableAddrGap> Gaps;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE
@@ -581,9 +585,9 @@ public:
: SymbolRecord(SymbolRecordKind::DefRangeFramePointerRelFullScopeSym),
RecordOffset(RecordOffset) {}
- int32_t Offset;
+ int32_t Offset = 0;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_BLOCK32
@@ -599,14 +603,14 @@ public:
return RecordOffset + RelocationOffset;
}
- uint32_t Parent;
- uint32_t End;
- uint32_t CodeSize;
- uint32_t CodeOffset;
- uint16_t Segment;
+ uint32_t Parent = 0;
+ uint32_t End = 0;
+ uint32_t CodeSize = 0;
+ uint32_t CodeOffset = 0;
+ uint16_t Segment = 0;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_LABEL32
@@ -622,12 +626,12 @@ public:
return RecordOffset + RelocationOffset;
}
- uint32_t CodeOffset;
- uint16_t Segment;
+ uint32_t CodeOffset = 0;
+ uint16_t Segment = 0;
ProcSymFlags Flags;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_OBJNAME
@@ -635,82 +639,82 @@ class ObjNameSym : public SymbolRecord {
public:
explicit ObjNameSym() : SymbolRecord(SymbolRecordKind::ObjNameSym) {}
explicit ObjNameSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- ObjNameSym(uint32_t RecordOffset)
+ explicit ObjNameSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::ObjNameSym), RecordOffset(RecordOffset) {
}
- uint32_t Signature;
+ uint32_t Signature = 0;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_ENVBLOCK
class EnvBlockSym : public SymbolRecord {
public:
explicit EnvBlockSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- EnvBlockSym(uint32_t RecordOffset)
+ explicit EnvBlockSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::EnvBlockSym),
RecordOffset(RecordOffset) {}
std::vector<StringRef> Fields;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_EXPORT
class ExportSym : public SymbolRecord {
public:
explicit ExportSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- ExportSym(uint32_t RecordOffset)
+ explicit ExportSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::ExportSym), RecordOffset(RecordOffset) {}
- uint16_t Ordinal;
+ uint16_t Ordinal = 0;
ExportFlags Flags;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_FILESTATIC
class FileStaticSym : public SymbolRecord {
public:
explicit FileStaticSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- FileStaticSym(uint32_t RecordOffset)
+ explicit FileStaticSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::FileStaticSym),
RecordOffset(RecordOffset) {}
TypeIndex Index;
- uint32_t ModFilenameOffset;
+ uint32_t ModFilenameOffset = 0;
LocalSymFlags Flags;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_COMPILE2
class Compile2Sym : public SymbolRecord {
public:
explicit Compile2Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- Compile2Sym(uint32_t RecordOffset)
+ explicit Compile2Sym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::Compile2Sym),
RecordOffset(RecordOffset) {}
CompileSym2Flags Flags;
CPUType Machine;
- uint16_t VersionFrontendMajor;
- uint16_t VersionFrontendMinor;
- uint16_t VersionFrontendBuild;
- uint16_t VersionBackendMajor;
- uint16_t VersionBackendMinor;
- uint16_t VersionBackendBuild;
+ uint16_t VersionFrontendMajor = 0;
+ uint16_t VersionFrontendMinor = 0;
+ uint16_t VersionFrontendBuild = 0;
+ uint16_t VersionBackendMajor = 0;
+ uint16_t VersionBackendMinor = 0;
+ uint16_t VersionBackendBuild = 0;
StringRef Version;
std::vector<StringRef> ExtraStrings;
uint8_t getLanguage() const { return static_cast<uint32_t>(Flags) & 0xFF; }
uint32_t getFlags() const { return static_cast<uint32_t>(Flags) & ~0xFF; }
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_COMPILE3
@@ -718,20 +722,20 @@ class Compile3Sym : public SymbolRecord {
public:
Compile3Sym() : SymbolRecord(SymbolRecordKind::Compile3Sym) {}
explicit Compile3Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- Compile3Sym(uint32_t RecordOffset)
+ explicit Compile3Sym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::Compile3Sym),
RecordOffset(RecordOffset) {}
CompileSym3Flags Flags;
CPUType Machine;
- uint16_t VersionFrontendMajor;
- uint16_t VersionFrontendMinor;
- uint16_t VersionFrontendBuild;
- uint16_t VersionFrontendQFE;
- uint16_t VersionBackendMajor;
- uint16_t VersionBackendMinor;
- uint16_t VersionBackendBuild;
- uint16_t VersionBackendQFE;
+ uint16_t VersionFrontendMajor = 0;
+ uint16_t VersionFrontendMinor = 0;
+ uint16_t VersionFrontendBuild = 0;
+ uint16_t VersionFrontendQFE = 0;
+ uint16_t VersionBackendMajor = 0;
+ uint16_t VersionBackendMinor = 0;
+ uint16_t VersionBackendBuild = 0;
+ uint16_t VersionBackendQFE = 0;
StringRef Version;
void setLanguage(SourceLanguage Lang) {
@@ -750,7 +754,7 @@ public:
(getFlags() & (CompileSym3Flags::PGO | CompileSym3Flags::LTCG));
}
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_FRAMEPROC
@@ -761,12 +765,12 @@ public:
: SymbolRecord(SymbolRecordKind::FrameProcSym),
RecordOffset(RecordOffset) {}
- uint32_t TotalFrameBytes;
- uint32_t PaddingFrameBytes;
- uint32_t OffsetToPadding;
- uint32_t BytesOfCalleeSavedRegisters;
- uint32_t OffsetOfExceptionHandler;
- uint16_t SectionIdOfExceptionHandler;
+ uint32_t TotalFrameBytes = 0;
+ uint32_t PaddingFrameBytes = 0;
+ uint32_t OffsetToPadding = 0;
+ uint32_t BytesOfCalleeSavedRegisters = 0;
+ uint32_t OffsetOfExceptionHandler = 0;
+ uint16_t SectionIdOfExceptionHandler = 0;
FrameProcedureOptions Flags;
/// Extract the register this frame uses to refer to local variables.
@@ -781,7 +785,7 @@ public:
EncodedFramePtrReg((uint32_t(Flags) >> 16U) & 0x3U), CPU);
}
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
private:
};
@@ -799,11 +803,11 @@ public:
return RecordOffset + RelocationOffset;
}
- uint32_t CodeOffset;
- uint16_t Segment;
+ uint32_t CodeOffset = 0;
+ uint16_t Segment = 0;
TypeIndex Type;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_HEAPALLOCSITE
@@ -820,12 +824,12 @@ public:
return RecordOffset + RelocationOffset;
}
- uint32_t CodeOffset;
- uint16_t Segment;
- uint16_t CallInstructionSize;
+ uint32_t CodeOffset = 0;
+ uint16_t Segment = 0;
+ uint16_t CallInstructionSize = 0;
TypeIndex Type;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_FRAMECOOKIE
@@ -841,12 +845,12 @@ public:
return RecordOffset + RelocationOffset;
}
- uint32_t CodeOffset;
- uint16_t Register;
+ uint32_t CodeOffset = 0;
+ uint16_t Register = 0;
FrameCookieKind CookieKind;
- uint8_t Flags;
+ uint8_t Flags = 0;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_UDT, S_COBOLUDT
@@ -859,20 +863,20 @@ public:
TypeIndex Type;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_BUILDINFO
class BuildInfoSym : public SymbolRecord {
public:
explicit BuildInfoSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- BuildInfoSym(uint32_t RecordOffset)
+ explicit BuildInfoSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::BuildInfoSym),
RecordOffset(RecordOffset) {}
TypeIndex BuildId;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_BPREL32
@@ -883,11 +887,11 @@ public:
: SymbolRecord(SymbolRecordKind::BPRelativeSym),
RecordOffset(RecordOffset) {}
- int32_t Offset;
+ int32_t Offset = 0;
TypeIndex Type;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_REGREL32
@@ -898,19 +902,19 @@ public:
: SymbolRecord(SymbolRecordKind::RegRelativeSym),
RecordOffset(RecordOffset) {}
- uint32_t Offset;
+ uint32_t Offset = 0;
TypeIndex Type;
RegisterId Register;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_CONSTANT, S_MANCONSTANT
class ConstantSym : public SymbolRecord {
public:
explicit ConstantSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- ConstantSym(uint32_t RecordOffset)
+ explicit ConstantSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::ConstantSym),
RecordOffset(RecordOffset) {}
@@ -918,7 +922,7 @@ public:
APSInt Value;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_LDATA32, S_GDATA32, S_LMANDATA, S_GMANDATA
@@ -927,7 +931,7 @@ class DataSym : public SymbolRecord {
public:
explicit DataSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
- DataSym(uint32_t RecordOffset)
+ explicit DataSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::DataSym), RecordOffset(RecordOffset) {}
uint32_t getRelocationOffset() const {
@@ -935,11 +939,11 @@ public:
}
TypeIndex Type;
- uint32_t DataOffset;
- uint16_t Segment;
+ uint32_t DataOffset = 0;
+ uint16_t Segment = 0;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_LTHREAD32, S_GTHREAD32
@@ -957,11 +961,11 @@ public:
}
TypeIndex Type;
- uint32_t DataOffset;
- uint16_t Segment;
+ uint32_t DataOffset = 0;
+ uint16_t Segment = 0;
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_UNAMESPACE
@@ -974,7 +978,7 @@ public:
StringRef Name;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
// S_ANNOTATION
@@ -989,7 +993,7 @@ public:
uint16_t Segment = 0;
std::vector<StringRef> Strings;
- uint32_t RecordOffset;
+ uint32_t RecordOffset = 0;
};
using CVSymbol = CVRecord<SymbolKind>;
diff --git a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
index 081de32dd02c..2b17f5ccb13b 100644
--- a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
+++ b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
@@ -66,7 +66,7 @@ public:
Error visitTypeBegin(CVType &Record) override {
assert(!Mapping && "Already in a type mapping!");
- Mapping = llvm::make_unique<MappingInfo>(Record.content());
+ Mapping = std::make_unique<MappingInfo>(Record.content());
return Mapping->Mapping.visitTypeBegin(Record);
}
diff --git a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
index 4c309c10ff0c..c6044d5138a8 100644
--- a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
+++ b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
@@ -10,6 +10,7 @@
#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDMAPPING_H
#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
#include "llvm/Support/Error.h"
diff --git a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
index 169715be2d52..fb0b579d6a06 100644
--- a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
+++ b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
@@ -82,11 +82,6 @@ public:
Pipeline.push_back(&Callbacks);
}
- void addCallbackToPipelineFront(TypeVisitorCallbacks &Callbacks) {
- auto CallBackItr = Pipeline.begin();
- Pipeline.insert(CallBackItr, &Callbacks);
- }
-
#define TYPE_RECORD(EnumName, EnumVal, Name) \
Error visitKnownRecord(CVType &CVR, Name##Record &Record) override { \
return visitKnownRecordImpl(CVR, Record); \
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index d2a5318179eb..fbebfe634b63 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -28,6 +28,10 @@ namespace llvm {
/// A format-neutral container for source line information.
struct DILineInfo {
+ // DILineInfo contains "<invalid>" for function/filename it cannot fetch.
+ static constexpr const char *const BadString = "<invalid>";
+ // Use "??" instead of "<invalid>" to make our output closer to addr2line.
+ static constexpr const char *const Addr2LineBadString = "??";
std::string FileName;
std::string FunctionName;
Optional<StringRef> Source;
@@ -38,7 +42,7 @@ struct DILineInfo {
// DWARF-specific.
uint32_t Discriminator = 0;
- DILineInfo() : FileName("<invalid>"), FunctionName("<invalid>") {}
+ DILineInfo() : FileName(BadString), FunctionName(BadString) {}
bool operator==(const DILineInfo &RHS) const {
return Line == RHS.Line && Column == RHS.Column &&
@@ -61,9 +65,9 @@ struct DILineInfo {
void dump(raw_ostream &OS) {
OS << "Line info: ";
- if (FileName != "<invalid>")
+ if (FileName != BadString)
OS << "file '" << FileName << "', ";
- if (FunctionName != "<invalid>")
+ if (FunctionName != BadString)
OS << "function '" << FunctionName << "', ";
OS << "line " << Line << ", ";
OS << "column " << Column << ", ";
@@ -109,7 +113,7 @@ struct DIGlobal {
uint64_t Start = 0;
uint64_t Size = 0;
- DIGlobal() : Name("<invalid>") {}
+ DIGlobal() : Name(DILineInfo::BadString) {}
};
struct DILocal {
@@ -289,7 +293,7 @@ public:
LoadedObjectInfoHelper(Ts &&... Args) : Base(std::forward<Ts>(Args)...) {}
std::unique_ptr<llvm::LoadedObjectInfo> clone() const override {
- return llvm::make_unique<Derived>(static_cast<const Derived &>(*this));
+ return std::make_unique<Derived>(static_cast<const Derived &>(*this));
}
};
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index ccf2891c2e21..39ae53c4e7fe 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -130,11 +130,11 @@ public:
/// \param Attr DWARF attribute to search for.
/// \param U the DWARFUnit the contains the DIE.
/// \returns Optional DWARF form value if the attribute was extracted.
- Optional<DWARFFormValue> getAttributeValue(const uint32_t DIEOffset,
+ Optional<DWARFFormValue> getAttributeValue(const uint64_t DIEOffset,
const dwarf::Attribute Attr,
const DWARFUnit &U) const;
- bool extract(DataExtractor Data, uint32_t* OffsetPtr);
+ bool extract(DataExtractor Data, uint64_t* OffsetPtr);
void dump(raw_ostream &OS) const;
// Return an optional byte size of all attribute data in this abbreviation
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
index 303375703d2e..c9042e593260 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -96,7 +96,7 @@ class AppleAcceleratorTable : public DWARFAcceleratorTable {
using AtomType = uint16_t;
using Form = dwarf::Form;
- uint32_t DIEOffsetBase;
+ uint64_t DIEOffsetBase;
SmallVector<std::pair<AtomType, Form>, 3> Atoms;
Optional<uint64_t> extractOffset(Optional<DWARFFormValue> Value) const;
@@ -109,7 +109,7 @@ class AppleAcceleratorTable : public DWARFAcceleratorTable {
/// Returns true if we should continue scanning for entries or false if we've
/// reached the last (sentinel) entry of encountered a parsing error.
bool dumpName(ScopedPrinter &W, SmallVectorImpl<DWARFFormValue> &AtomForms,
- uint32_t *DataOffset) const;
+ uint64_t *DataOffset) const;
public:
/// Apple-specific implementation of an Accelerator Entry.
@@ -119,7 +119,7 @@ public:
Entry(const HeaderData &Data);
Entry() = default;
- void extract(const AppleAcceleratorTable &AccelTable, uint32_t *Offset);
+ void extract(const AppleAcceleratorTable &AccelTable, uint64_t *Offset);
public:
Optional<uint64_t> getCUOffset() const override;
@@ -143,7 +143,7 @@ public:
class ValueIterator : public std::iterator<std::input_iterator_tag, Entry> {
const AppleAcceleratorTable *AccelTable = nullptr;
Entry Current; ///< The current entry.
- unsigned DataOffset = 0; ///< Offset into the section.
+ uint64_t DataOffset = 0; ///< Offset into the section.
unsigned Data = 0; ///< Current data entry.
unsigned NumData = 0; ///< Number of data entries.
@@ -151,7 +151,7 @@ public:
void Next();
public:
/// Construct a new iterator for the entries at \p DataOffset.
- ValueIterator(const AppleAcceleratorTable &AccelTable, unsigned DataOffset);
+ ValueIterator(const AppleAcceleratorTable &AccelTable, uint64_t DataOffset);
/// End marker.
ValueIterator() = default;
@@ -193,7 +193,7 @@ public:
/// DieOffset is the offset into the .debug_info section for the DIE
/// related to the input hash data offset.
/// DieTag is the tag of the DIE
- std::pair<uint32_t, dwarf::Tag> readAtoms(uint32_t &HashDataOffset);
+ std::pair<uint64_t, dwarf::Tag> readAtoms(uint64_t *HashDataOffset);
void dump(raw_ostream &OS) const override;
/// Look up all entries in the accelerator table matching \c Key.
@@ -245,7 +245,7 @@ public:
struct Header : public HeaderPOD {
SmallString<8> AugmentationString;
- Error extract(const DWARFDataExtractor &AS, uint32_t *Offset);
+ Error extract(const DWARFDataExtractor &AS, uint64_t *Offset);
void dump(ScopedPrinter &W) const;
};
@@ -354,12 +354,12 @@ public:
DataExtractor StrData;
uint32_t Index;
- uint32_t StringOffset;
- uint32_t EntryOffset;
+ uint64_t StringOffset;
+ uint64_t EntryOffset;
public:
NameTableEntry(const DataExtractor &StrData, uint32_t Index,
- uint32_t StringOffset, uint32_t EntryOffset)
+ uint64_t StringOffset, uint64_t EntryOffset)
: StrData(StrData), Index(Index), StringOffset(StringOffset),
EntryOffset(EntryOffset) {}
@@ -367,17 +367,17 @@ public:
uint32_t getIndex() const { return Index; }
/// Returns the offset of the name of the described entities.
- uint32_t getStringOffset() const { return StringOffset; }
+ uint64_t getStringOffset() const { return StringOffset; }
/// Return the string referenced by this name table entry or nullptr if the
/// string offset is not valid.
const char *getString() const {
- uint32_t Off = StringOffset;
+ uint64_t Off = StringOffset;
return StrData.getCStr(&Off);
}
/// Returns the offset of the first Entry in the list.
- uint32_t getEntryOffset() const { return EntryOffset; }
+ uint64_t getEntryOffset() const { return EntryOffset; }
};
/// Represents a single accelerator table within the DWARF v5 .debug_names
@@ -389,40 +389,40 @@ public:
// Base of the whole unit and of various important tables, as offsets from
// the start of the section.
- uint32_t Base;
- uint32_t CUsBase;
- uint32_t BucketsBase;
- uint32_t HashesBase;
- uint32_t StringOffsetsBase;
- uint32_t EntryOffsetsBase;
- uint32_t EntriesBase;
+ uint64_t Base;
+ uint64_t CUsBase;
+ uint64_t BucketsBase;
+ uint64_t HashesBase;
+ uint64_t StringOffsetsBase;
+ uint64_t EntryOffsetsBase;
+ uint64_t EntriesBase;
void dumpCUs(ScopedPrinter &W) const;
void dumpLocalTUs(ScopedPrinter &W) const;
void dumpForeignTUs(ScopedPrinter &W) const;
void dumpAbbreviations(ScopedPrinter &W) const;
- bool dumpEntry(ScopedPrinter &W, uint32_t *Offset) const;
+ bool dumpEntry(ScopedPrinter &W, uint64_t *Offset) const;
void dumpName(ScopedPrinter &W, const NameTableEntry &NTE,
Optional<uint32_t> Hash) const;
void dumpBucket(ScopedPrinter &W, uint32_t Bucket) const;
- Expected<AttributeEncoding> extractAttributeEncoding(uint32_t *Offset);
+ Expected<AttributeEncoding> extractAttributeEncoding(uint64_t *Offset);
Expected<std::vector<AttributeEncoding>>
- extractAttributeEncodings(uint32_t *Offset);
+ extractAttributeEncodings(uint64_t *Offset);
- Expected<Abbrev> extractAbbrev(uint32_t *Offset);
+ Expected<Abbrev> extractAbbrev(uint64_t *Offset);
public:
- NameIndex(const DWARFDebugNames &Section, uint32_t Base)
+ NameIndex(const DWARFDebugNames &Section, uint64_t Base)
: Section(Section), Base(Base) {}
/// Reads offset of compilation unit CU. CU is 0-based.
- uint32_t getCUOffset(uint32_t CU) const;
+ uint64_t getCUOffset(uint32_t CU) const;
uint32_t getCUCount() const { return Hdr.CompUnitCount; }
/// Reads offset of local type unit TU, TU is 0-based.
- uint32_t getLocalTUOffset(uint32_t TU) const;
+ uint64_t getLocalTUOffset(uint32_t TU) const;
uint32_t getLocalTUCount() const { return Hdr.LocalTypeUnitCount; }
/// Reads signature of foreign type unit TU. TU is 0-based.
@@ -451,7 +451,7 @@ public:
return Abbrevs;
}
- Expected<Entry> getEntry(uint32_t *Offset) const;
+ Expected<Entry> getEntry(uint64_t *Offset) const;
/// Look up all entries in this Name Index matching \c Key.
iterator_range<ValueIterator> equal_range(StringRef Key) const;
@@ -460,8 +460,8 @@ public:
NameIterator end() const { return NameIterator(this, getNameCount() + 1); }
Error extract();
- uint32_t getUnitOffset() const { return Base; }
- uint32_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; }
+ uint64_t getUnitOffset() const { return Base; }
+ uint64_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; }
void dump(ScopedPrinter &W) const;
friend class DWARFDebugNames;
@@ -479,12 +479,12 @@ public:
bool IsLocal;
Optional<Entry> CurrentEntry;
- unsigned DataOffset = 0; ///< Offset into the section.
+ uint64_t DataOffset = 0; ///< Offset into the section.
std::string Key; ///< The Key we are searching for.
Optional<uint32_t> Hash; ///< Hash of Key, if it has been computed.
bool getEntryAtCurrentOffset();
- Optional<uint32_t> findEntryOffsetInCurrentIndex();
+ Optional<uint64_t> findEntryOffsetInCurrentIndex();
bool findInCurrentIndex();
void searchFromStartOfCurrentIndex();
void next();
@@ -572,7 +572,7 @@ public:
private:
SmallVector<NameIndex, 0> NameIndices;
- DenseMap<uint32_t, const NameIndex *> CUToNameIndex;
+ DenseMap<uint64_t, const NameIndex *> CUToNameIndex;
public:
DWARFDebugNames(const DWARFDataExtractor &AccelSection,
@@ -591,7 +591,7 @@ public:
/// Return the Name Index covering the compile unit at CUOffset, or nullptr if
/// there is no Name Index covering that unit.
- const NameIndex *getCUNameIndex(uint32_t CUOffset);
+ const NameIndex *getCUNameIndex(uint64_t CUOffset);
};
} // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
index c8ad19ad6bf6..dfc778346dbe 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
@@ -23,7 +23,7 @@ namespace llvm {
/// attributes in a DWARFDie.
struct DWARFAttribute {
/// The debug info/types offset for this attribute.
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
/// The debug info/types section byte size of the data for this attribute.
uint32_t ByteSize = 0;
/// The attribute enumeration of this attribute.
diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 23cf21c3523f..fae163622edb 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -225,10 +225,10 @@ public:
DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash);
/// Return the compile unit that includes an offset (relative to .debug_info).
- DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset);
+ DWARFCompileUnit *getCompileUnitForOffset(uint64_t Offset);
/// Get a DIE given an exact offset.
- DWARFDie getDIEForOffset(uint32_t Offset);
+ DWARFDie getDIEForOffset(uint64_t Offset);
unsigned getMaxVersion() {
// Ensure info units have been parsed to discover MaxVersion
@@ -301,10 +301,10 @@ public:
std::function<void(Error)> RecoverableErrorCallback);
DataExtractor getStringExtractor() const {
- return DataExtractor(DObj->getStringSection(), false, 0);
+ return DataExtractor(DObj->getStrSection(), false, 0);
}
DataExtractor getLineStringExtractor() const {
- return DataExtractor(DObj->getLineStringSection(), false, 0);
+ return DataExtractor(DObj->getLineStrSection(), false, 0);
}
/// Wraps the returned DIEs for a given address.
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
index 7c2a159b71fa..980724c525d2 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
@@ -35,20 +35,25 @@ public:
/// Extracts a value and applies a relocation to the result if
/// one exists for the given offset.
- uint64_t getRelocatedValue(uint32_t Size, uint32_t *Off,
- uint64_t *SectionIndex = nullptr) const;
+ uint64_t getRelocatedValue(uint32_t Size, uint64_t *Off,
+ uint64_t *SectionIndex = nullptr,
+ Error *Err = nullptr) const;
/// Extracts an address-sized value and applies a relocation to the result if
/// one exists for the given offset.
- uint64_t getRelocatedAddress(uint32_t *Off, uint64_t *SecIx = nullptr) const {
+ uint64_t getRelocatedAddress(uint64_t *Off, uint64_t *SecIx = nullptr) const {
return getRelocatedValue(getAddressSize(), Off, SecIx);
}
+ uint64_t getRelocatedAddress(Cursor &C, uint64_t *SecIx = nullptr) const {
+ return getRelocatedValue(getAddressSize(), &getOffset(C), SecIx,
+ &getError(C));
+ }
/// Extracts a DWARF-encoded pointer in \p Offset using \p Encoding.
/// There is a DWARF encoding that uses a PC-relative adjustment.
/// For these values, \p AbsPosOffset is used to fix them, which should
/// reflect the absolute address of this pointer.
- Optional<uint64_t> getEncodedPointer(uint32_t *Offset, uint8_t Encoding,
+ Optional<uint64_t> getEncodedPointer(uint64_t *Offset, uint8_t Encoding,
uint64_t AbsPosOffset = 0) const;
size_t size() const { return Section == nullptr ? 0 : Section->Data.size(); }
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index 28fd8484b4a9..1398e16252a9 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -20,7 +20,7 @@ namespace llvm {
class raw_ostream;
class DWARFAbbreviationDeclarationSet {
- uint32_t Offset;
+ uint64_t Offset;
/// Code of the first abbreviation, if all abbreviations in the set have
/// consecutive codes. UINT32_MAX otherwise.
uint32_t FirstAbbrCode;
@@ -32,9 +32,9 @@ class DWARFAbbreviationDeclarationSet {
public:
DWARFAbbreviationDeclarationSet();
- uint32_t getOffset() const { return Offset; }
+ uint64_t getOffset() const { return Offset; }
void dump(raw_ostream &OS) const;
- bool extract(DataExtractor Data, uint32_t *OffsetPtr);
+ bool extract(DataExtractor Data, uint64_t *OffsetPtr);
const DWARFAbbreviationDeclaration *
getAbbreviationDeclaration(uint32_t AbbrCode) const;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
index a98bf282fe7c..4539b9c9d581 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
@@ -45,7 +45,7 @@ public:
private:
dwarf::DwarfFormat Format;
- uint32_t HeaderOffset;
+ uint64_t HeaderOffset;
Header HeaderData;
uint32_t DataSize = 0;
std::vector<uint64_t> Addrs;
@@ -54,11 +54,11 @@ public:
void clear();
/// Extract an entire table, including all addresses.
- Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr,
+ Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr,
uint16_t Version, uint8_t AddrSize,
std::function<void(Error)> WarnCallback);
- uint32_t getHeaderOffset() const { return HeaderOffset; }
+ uint64_t getHeaderOffset() const { return HeaderOffset; }
uint8_t getAddrSize() const { return HeaderData.AddrSize; }
void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index 5b6c578bc3bf..ebe4ad6e24dd 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -49,7 +49,7 @@ private:
using DescriptorColl = std::vector<Descriptor>;
using desc_iterator_range = iterator_range<DescriptorColl::const_iterator>;
- uint32_t Offset;
+ uint64_t Offset;
Header HeaderData;
DescriptorColl ArangeDescriptors;
@@ -57,7 +57,7 @@ public:
DWARFDebugArangeSet() { clear(); }
void clear();
- bool extract(DataExtractor data, uint32_t *offset_ptr);
+ bool extract(DataExtractor data, uint64_t *offset_ptr);
void dump(raw_ostream &OS) const;
uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
index 03223fbc80a9..172f1d2c9dbe 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
@@ -28,7 +28,7 @@ private:
void extract(DataExtractor DebugArangesData);
/// Call appendRange multiple times and then call construct.
- void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC);
+ void appendRange(uint64_t CUOffset, uint64_t LowPC, uint64_t HighPC);
void construct();
struct Range {
@@ -60,10 +60,10 @@ private:
struct RangeEndpoint {
uint64_t Address;
- uint32_t CUOffset;
+ uint64_t CUOffset;
bool IsRangeStart;
- RangeEndpoint(uint64_t Address, uint32_t CUOffset, bool IsRangeStart)
+ RangeEndpoint(uint64_t Address, uint64_t CUOffset, bool IsRangeStart)
: Address(Address), CUOffset(CUOffset), IsRangeStart(IsRangeStart) {}
bool operator<(const RangeEndpoint &Other) const {
@@ -76,7 +76,7 @@ private:
std::vector<RangeEndpoint> Endpoints;
RangeColl Aranges;
- DenseSet<uint32_t> ParsedCUOffsets;
+ DenseSet<uint64_t> ParsedCUOffsets;
};
} // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index d960f4bc9b1c..c6539df0d756 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -69,7 +69,7 @@ public:
/// starting at *Offset and ending at EndOffset. *Offset is updated
/// to EndOffset upon successful parsing, or indicates the offset
/// where a problem occurred in case an error is returned.
- Error parse(DataExtractor Data, uint32_t *Offset, uint32_t EndOffset);
+ Error parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset);
void dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH,
unsigned IndentLevel = 1) const;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index f50063b24370..ded960337ec6 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -22,7 +22,7 @@ class DWARFUnit;
/// DWARFDebugInfoEntry - A DIE with only the minimum required data.
class DWARFDebugInfoEntry {
/// Offset within the .debug_info of the start of this entry.
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
/// The integer depth of this DIE within the compile unit DIEs where the
/// compile/type unit DIE has a depth of zero.
@@ -36,14 +36,14 @@ public:
/// Extracts a debug info entry, which is a child of a given unit,
/// starting at a given offset. If DIE can't be extracted, returns false and
/// doesn't change OffsetPtr.
- bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr);
+ bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr);
/// High performance extraction should use this call.
- bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr,
- const DWARFDataExtractor &DebugInfoData, uint32_t UEndOffset,
+ bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
+ const DWARFDataExtractor &DebugInfoData, uint64_t UEndOffset,
uint32_t Depth);
- uint32_t getOffset() const { return Offset; }
+ uint64_t getOffset() const { return Offset; }
uint32_t getDepth() const { return Depth; }
dwarf::Tag getTag() const {
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index e7425c192373..c2be8304ad84 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -18,6 +18,7 @@
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
#include "llvm/Support/MD5.h"
+#include "llvm/Support/Path.h"
#include <cstdint>
#include <map>
#include <string>
@@ -128,13 +129,15 @@ public:
bool hasFileAtIndex(uint64_t FileIndex) const;
- bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
- DILineInfoSpecifier::FileLineInfoKind Kind,
- std::string &Result) const;
+ bool
+ getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
+ DILineInfoSpecifier::FileLineInfoKind Kind,
+ std::string &Result,
+ sys::path::Style Style = sys::path::Style::native) const;
void clear();
void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
- Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
+ Error parse(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
const DWARFContext &Ctx, const DWARFUnit *U = nullptr);
};
@@ -278,7 +281,7 @@ public:
/// Parse prologue and all rows.
Error parse(
- DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
+ DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
const DWARFContext &Ctx, const DWARFUnit *U,
std::function<void(Error)> RecoverableErrorCallback,
raw_ostream *OS = nullptr);
@@ -305,9 +308,9 @@ public:
std::vector<uint32_t> &Result) const;
};
- const LineTable *getLineTable(uint32_t Offset) const;
+ const LineTable *getLineTable(uint64_t Offset) const;
Expected<const LineTable *> getOrParseLineTable(
- DWARFDataExtractor &DebugLineData, uint32_t Offset,
+ DWARFDataExtractor &DebugLineData, uint64_t Offset,
const DWARFContext &Ctx, const DWARFUnit *U,
std::function<void(Error)> RecoverableErrorCallback);
@@ -350,17 +353,17 @@ public:
bool done() const { return Done; }
/// Get the offset the parser has reached.
- uint32_t getOffset() const { return Offset; }
+ uint64_t getOffset() const { return Offset; }
private:
- DWARFUnit *prepareToParse(uint32_t Offset);
- void moveToNextTable(uint32_t OldOffset, const Prologue &P);
+ DWARFUnit *prepareToParse(uint64_t Offset);
+ void moveToNextTable(uint64_t OldOffset, const Prologue &P);
LineToUnitMap LineToUnit;
DWARFDataExtractor &DebugLineData;
const DWARFContext &Context;
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
bool Done = false;
};
@@ -377,7 +380,7 @@ private:
struct Sequence Sequence;
};
- using LineTableMapTy = std::map<uint32_t, LineTable>;
+ using LineTableMapTy = std::map<uint64_t, LineTable>;
using LineTableIter = LineTableMapTy::iterator;
using LineTableConstIter = LineTableMapTy::const_iterator;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index cced6048e811..c79d98e34f6e 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -11,6 +11,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include <cstdint>
@@ -29,19 +30,20 @@ public:
/// The ending address of the instruction range.
uint64_t End;
/// The location of the variable within the specified range.
- SmallVector<char, 4> Loc;
+ SmallVector<uint8_t, 4> Loc;
};
/// A list of locations that contain one variable.
struct LocationList {
/// The beginning offset where this location list is stored in the debug_loc
/// section.
- unsigned Offset;
+ uint64_t Offset;
/// All the locations in which the variable is stored.
SmallVector<Entry, 2> Entries;
/// Dump this list on OS.
- void dump(raw_ostream &OS, bool IsLittleEndian, unsigned AddressSize,
- const MCRegisterInfo *MRI, DWARFUnit *U, uint64_t BaseAddress,
+ void dump(raw_ostream &OS, uint64_t BaseAddress, bool IsLittleEndian,
+ unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U,
+ DIDumpOptions DumpOpts,
unsigned Indent) const;
};
@@ -58,7 +60,7 @@ private:
public:
/// Print the location lists found within the debug_loc section.
- void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo,
+ void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo, DIDumpOptions DumpOpts,
Optional<uint64_t> Offset) const;
/// Parse the debug_loc section accessible via the 'data' parameter using the
@@ -68,25 +70,29 @@ public:
/// Return the location list at the given offset or nullptr.
LocationList const *getLocationListAtOffset(uint64_t Offset) const;
- Optional<LocationList> parseOneLocationList(DWARFDataExtractor Data,
- uint32_t *Offset);
+ Expected<LocationList>
+ parseOneLocationList(const DWARFDataExtractor &Data, uint64_t *Offset);
};
class DWARFDebugLoclists {
public:
struct Entry {
uint8_t Kind;
+ uint64_t Offset;
uint64_t Value0;
uint64_t Value1;
- SmallVector<char, 4> Loc;
+ SmallVector<uint8_t, 4> Loc;
+ void dump(raw_ostream &OS, uint64_t &BaseAddr, bool IsLittleEndian,
+ unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U,
+ DIDumpOptions DumpOpts, unsigned Indent, size_t MaxEncodingStringLength) const;
};
struct LocationList {
- unsigned Offset;
+ uint64_t Offset;
SmallVector<Entry, 2> Entries;
void dump(raw_ostream &OS, uint64_t BaseAddr, bool IsLittleEndian,
unsigned AddressSize, const MCRegisterInfo *RegInfo,
- DWARFUnit *U, unsigned Indent) const;
+ DWARFUnit *U, DIDumpOptions DumpOpts, unsigned Indent) const;
};
private:
@@ -99,15 +105,16 @@ private:
bool IsLittleEndian;
public:
- void parse(DataExtractor data, unsigned Version);
+ void parse(DataExtractor data, uint64_t Offset, uint64_t EndOffset, uint16_t Version);
void dump(raw_ostream &OS, uint64_t BaseAddr, const MCRegisterInfo *RegInfo,
- Optional<uint64_t> Offset) const;
+ DIDumpOptions DumpOpts, Optional<uint64_t> Offset) const;
/// Return the location list at the given offset or nullptr.
LocationList const *getLocationListAtOffset(uint64_t Offset) const;
- static Optional<LocationList>
- parseOneLocationList(DataExtractor Data, unsigned *Offset, unsigned Version);
+ static Expected<LocationList> parseOneLocationList(const DataExtractor &Data,
+ uint64_t *Offset,
+ unsigned Version);
};
} // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
index 99e91ca90319..ae57306b90e1 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
@@ -25,7 +25,7 @@ class DWARFDebugPubTable {
public:
struct Entry {
/// Section offset from the beginning of the compilation unit.
- uint32_t SecOffset;
+ uint64_t SecOffset;
/// An entry of the various gnu_pub* debug sections.
dwarf::PubIndexEntryDescriptor Descriptor;
@@ -50,7 +50,7 @@ public:
/// The offset from the beginning of the .debug_info section of the
/// compilation unit header referenced by the set.
- uint32_t Offset;
+ uint64_t Offset;
/// The size in bytes of the contents of the .debug_info section generated
/// to represent that compilation unit.
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index a66f60292343..2f72c642a2d5 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -53,14 +53,13 @@ public:
assert(AddressSize == 4 || AddressSize == 8);
if (AddressSize == 4)
return StartAddress == -1U;
- else
- return StartAddress == -1ULL;
+ return StartAddress == -1ULL;
}
};
private:
/// Offset in .debug_ranges section.
- uint32_t Offset;
+ uint64_t Offset;
uint8_t AddressSize;
std::vector<RangeListEntry> Entries;
@@ -69,7 +68,7 @@ public:
void clear();
void dump(raw_ostream &OS) const;
- Error extract(const DWARFDataExtractor &data, uint32_t *offset_ptr);
+ Error extract(const DWARFDataExtractor &data, uint64_t *offset_ptr);
const std::vector<RangeListEntry> &getEntries() { return Entries; }
/// getAbsoluteRanges - Returns absolute address ranges defined by this range
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
index 167ddde3ec3d..952c41e188c7 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
@@ -34,7 +34,7 @@ struct RangeListEntry : public DWARFListEntryBase {
uint64_t Value0;
uint64_t Value1;
- Error extract(DWARFDataExtractor Data, uint32_t End, uint32_t *OffsetPtr);
+ Error extract(DWARFDataExtractor Data, uint64_t End, uint64_t *OffsetPtr);
void dump(raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength,
uint64_t &CurrentBase, DIDumpOptions DumpOpts,
llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h
index 21e68f983bb3..f7f08b4a499d 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -63,7 +63,7 @@ public:
/// Get the absolute offset into the debug info or types section.
///
/// \returns the DIE offset or -1U if invalid.
- uint32_t getOffset() const {
+ uint64_t getOffset() const {
assert(isValid() && "must check validity prior to calling");
return Die->getOffset();
}
diff --git a/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index f066dd58d606..456d9df957ad 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -77,18 +77,18 @@ public:
uint8_t Opcode; ///< The Op Opcode, DW_OP_<something>.
Description Desc;
bool Error;
- uint32_t EndOffset;
+ uint64_t EndOffset;
uint64_t Operands[2];
- uint32_t OperandEndOffsets[2];
+ uint64_t OperandEndOffsets[2];
public:
Description &getDescription() { return Desc; }
uint8_t getCode() { return Opcode; }
uint64_t getRawOperand(unsigned Idx) { return Operands[Idx]; }
- uint32_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; }
- uint32_t getEndOffset() { return EndOffset; }
+ uint64_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; }
+ uint64_t getEndOffset() { return EndOffset; }
bool extract(DataExtractor Data, uint16_t Version, uint8_t AddressSize,
- uint32_t Offset);
+ uint64_t Offset);
bool isError() { return Error; }
bool print(raw_ostream &OS, const DWARFExpression *Expr,
const MCRegisterInfo *RegInfo, DWARFUnit *U, bool isEH);
@@ -101,9 +101,9 @@ public:
Operation> {
friend class DWARFExpression;
const DWARFExpression *Expr;
- uint32_t Offset;
+ uint64_t Offset;
Operation Op;
- iterator(const DWARFExpression *Expr, uint32_t Offset)
+ iterator(const DWARFExpression *Expr, uint64_t Offset)
: Expr(Expr), Offset(Offset) {
Op.Error =
Offset >= Expr->Data.getData().size() ||
diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 731e71ed9eae..6fec6fcb6b34 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -70,7 +70,7 @@ public:
static DWARFFormValue createFromBlockValue(dwarf::Form F,
ArrayRef<uint8_t> D);
static DWARFFormValue createFromUnit(dwarf::Form F, const DWARFUnit *Unit,
- uint32_t *OffsetPtr);
+ uint64_t *OffsetPtr);
dwarf::Form getForm() const { return Form; }
uint64_t getRawUValue() const { return Value.uval; }
@@ -87,12 +87,12 @@ public:
/// in \p FormParams is needed to interpret some forms. The optional
/// \p Context and \p Unit allows extracting information if the form refers
/// to other sections (e.g., .debug_str).
- bool extractValue(const DWARFDataExtractor &Data, uint32_t *OffsetPtr,
+ bool extractValue(const DWARFDataExtractor &Data, uint64_t *OffsetPtr,
dwarf::FormParams FormParams,
const DWARFContext *Context = nullptr,
const DWARFUnit *Unit = nullptr);
- bool extractValue(const DWARFDataExtractor &Data, uint32_t *OffsetPtr,
+ bool extractValue(const DWARFDataExtractor &Data, uint64_t *OffsetPtr,
dwarf::FormParams FormParams, const DWARFUnit *U) {
return extractValue(Data, OffsetPtr, FormParams, nullptr, U);
}
@@ -128,7 +128,7 @@ public:
/// \param OffsetPtr A reference to the offset that will be updated.
/// \param Params DWARF parameters to help interpret forms.
/// \returns true on success, false if the form was not skipped.
- bool skipValue(DataExtractor DebugInfoData, uint32_t *OffsetPtr,
+ bool skipValue(DataExtractor DebugInfoData, uint64_t *OffsetPtr,
const dwarf::FormParams Params) const {
return DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, Params);
}
@@ -144,7 +144,7 @@ public:
/// \param FormParams DWARF parameters to help interpret forms.
/// \returns true on success, false if the form was not skipped.
static bool skipValue(dwarf::Form Form, DataExtractor DebugInfoData,
- uint32_t *OffsetPtr,
+ uint64_t *OffsetPtr,
const dwarf::FormParams FormParams);
private:
diff --git a/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/include/llvm/DebugInfo/DWARF/DWARFListTable.h
index a1ea69b040f0..496fdb2477f9 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFListTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFListTable.h
@@ -26,7 +26,7 @@ namespace llvm {
/// entries.
struct DWARFListEntryBase {
/// The offset at which the entry is located in the section.
- uint32_t Offset;
+ uint64_t Offset;
/// The DWARF encoding (DW_RLE_* or DW_LLE_*).
uint8_t EntryKind;
/// The index of the section this entry belongs to.
@@ -46,8 +46,8 @@ public:
const ListEntries &getEntries() const { return Entries; }
bool empty() const { return Entries.empty(); }
void clear() { Entries.clear(); }
- Error extract(DWARFDataExtractor Data, uint32_t HeaderOffset, uint32_t End,
- uint32_t *OffsetPtr, StringRef SectionName,
+ Error extract(DWARFDataExtractor Data, uint64_t HeaderOffset, uint64_t End,
+ uint64_t *OffsetPtr, StringRef SectionName,
StringRef ListStringName);
};
@@ -57,7 +57,7 @@ class DWARFListTableHeader {
struct Header {
/// The total length of the entries for this table, not including the length
/// field itself.
- uint32_t Length = 0;
+ uint64_t Length = 0;
/// The DWARF version number.
uint16_t Version;
/// The size in bytes of an address on the target architecture. For
@@ -75,12 +75,12 @@ class DWARFListTableHeader {
/// The offset table, which contains offsets to the individual list entries.
/// It is used by forms such as DW_FORM_rnglistx.
/// FIXME: Generate the table and use the appropriate forms.
- std::vector<uint32_t> Offsets;
+ std::vector<uint64_t> Offsets;
/// The table's format, either DWARF32 or DWARF64.
dwarf::DwarfFormat Format;
/// The offset at which the header (and hence the table) is located within
/// its section.
- uint32_t HeaderOffset;
+ uint64_t HeaderOffset;
/// The name of the section the list is located in.
StringRef SectionName;
/// A characterization of the list for dumping purposes, e.g. "range" or
@@ -95,28 +95,40 @@ public:
HeaderData = {};
Offsets.clear();
}
- uint32_t getHeaderOffset() const { return HeaderOffset; }
+ uint64_t getHeaderOffset() const { return HeaderOffset; }
uint8_t getAddrSize() const { return HeaderData.AddrSize; }
- uint32_t getLength() const { return HeaderData.Length; }
+ uint64_t getLength() const { return HeaderData.Length; }
uint16_t getVersion() const { return HeaderData.Version; }
StringRef getSectionName() const { return SectionName; }
StringRef getListTypeString() const { return ListTypeString; }
dwarf::DwarfFormat getFormat() const { return Format; }
+ /// Return the size of the table header including the length but not including
+ /// the offsets.
+ static uint8_t getHeaderSize(dwarf::DwarfFormat Format) {
+ switch (Format) {
+ case dwarf::DwarfFormat::DWARF32:
+ return 12;
+ case dwarf::DwarfFormat::DWARF64:
+ return 20;
+ }
+ llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64");
+ }
+
void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const;
- Optional<uint32_t> getOffsetEntry(uint32_t Index) const {
+ Optional<uint64_t> getOffsetEntry(uint32_t Index) const {
if (Index < Offsets.size())
return Offsets[Index];
return None;
}
/// Extract the table header and the array of offsets.
- Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr);
+ Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr);
/// Returns the length of the table, including the length field, or 0 if the
/// length has not been determined (e.g. because the table has not yet been
/// parsed, or there was a problem in parsing).
- uint32_t length() const;
+ uint64_t length() const;
};
/// A class representing a table of lists as specified in the DWARF v5
@@ -128,7 +140,7 @@ template <typename DWARFListType> class DWARFListTableBase {
DWARFListTableHeader Header;
/// A mapping between file offsets and lists. It is used to find a particular
/// list based on an offset (obtained from DW_AT_ranges, for example).
- std::map<uint32_t, DWARFListType> ListMap;
+ std::map<uint64_t, DWARFListType> ListMap;
/// This string is displayed as a heading before the list is dumped
/// (e.g. "ranges:").
StringRef HeaderString;
@@ -144,17 +156,18 @@ public:
ListMap.clear();
}
/// Extract the table header and the array of offsets.
- Error extractHeaderAndOffsets(DWARFDataExtractor Data, uint32_t *OffsetPtr) {
+ Error extractHeaderAndOffsets(DWARFDataExtractor Data, uint64_t *OffsetPtr) {
return Header.extract(Data, OffsetPtr);
}
/// Extract an entire table, including all list entries.
- Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr);
+ Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr);
/// Look up a list based on a given offset. Extract it and enter it into the
/// list map if necessary.
- Expected<DWARFListType> findList(DWARFDataExtractor Data, uint32_t Offset);
+ Expected<DWARFListType> findList(DWARFDataExtractor Data, uint64_t Offset);
- uint32_t getHeaderOffset() const { return Header.getHeaderOffset(); }
+ uint64_t getHeaderOffset() const { return Header.getHeaderOffset(); }
uint8_t getAddrSize() const { return Header.getAddrSize(); }
+ dwarf::DwarfFormat getFormat() const { return Header.getFormat(); }
void dump(raw_ostream &OS,
llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
@@ -162,37 +175,31 @@ public:
DIDumpOptions DumpOpts = {}) const;
/// Return the contents of the offset entry designated by a given index.
- Optional<uint32_t> getOffsetEntry(uint32_t Index) const {
+ Optional<uint64_t> getOffsetEntry(uint32_t Index) const {
return Header.getOffsetEntry(Index);
}
/// Return the size of the table header including the length but not including
/// the offsets. This is dependent on the table format, which is unambiguously
/// derived from parsing the table.
uint8_t getHeaderSize() const {
- switch (Header.getFormat()) {
- case dwarf::DwarfFormat::DWARF32:
- return 12;
- case dwarf::DwarfFormat::DWARF64:
- return 20;
- }
- llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64");
+ return DWARFListTableHeader::getHeaderSize(getFormat());
}
- uint32_t length() { return Header.length(); }
+ uint64_t length() { return Header.length(); }
};
template <typename DWARFListType>
Error DWARFListTableBase<DWARFListType>::extract(DWARFDataExtractor Data,
- uint32_t *OffsetPtr) {
+ uint64_t *OffsetPtr) {
clear();
if (Error E = extractHeaderAndOffsets(Data, OffsetPtr))
return E;
Data.setAddressSize(Header.getAddrSize());
- uint32_t End = getHeaderOffset() + Header.length();
+ uint64_t End = getHeaderOffset() + Header.length();
while (*OffsetPtr < End) {
DWARFListType CurrentList;
- uint32_t Off = *OffsetPtr;
+ uint64_t Off = *OffsetPtr;
if (Error E = CurrentList.extract(Data, getHeaderOffset(), End, OffsetPtr,
Header.getSectionName(),
Header.getListTypeString()))
@@ -208,13 +215,13 @@ Error DWARFListTableBase<DWARFListType>::extract(DWARFDataExtractor Data,
template <typename ListEntryType>
Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data,
- uint32_t HeaderOffset, uint32_t End,
- uint32_t *OffsetPtr,
+ uint64_t HeaderOffset, uint64_t End,
+ uint64_t *OffsetPtr,
StringRef SectionName,
StringRef ListTypeString) {
if (*OffsetPtr < HeaderOffset || *OffsetPtr >= End)
return createStringError(errc::invalid_argument,
- "invalid %s list offset 0x%" PRIx32,
+ "invalid %s list offset 0x%" PRIx64,
ListTypeString.data(), *OffsetPtr);
Entries.clear();
while (*OffsetPtr < End) {
@@ -227,7 +234,7 @@ Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data,
}
return createStringError(errc::illegal_byte_sequence,
"no end of list marker detected at end of %s table "
- "starting at offset 0x%" PRIx32,
+ "starting at offset 0x%" PRIx64,
SectionName.data(), HeaderOffset);
}
@@ -261,15 +268,15 @@ void DWARFListTableBase<DWARFListType>::dump(
template <typename DWARFListType>
Expected<DWARFListType>
DWARFListTableBase<DWARFListType>::findList(DWARFDataExtractor Data,
- uint32_t Offset) {
+ uint64_t Offset) {
auto Entry = ListMap.find(Offset);
if (Entry != ListMap.end())
return Entry->second;
// Extract the list from the section and enter it into the list map.
DWARFListType List;
- uint32_t End = getHeaderOffset() + Header.length();
- uint32_t StartingOffset = Offset;
+ uint64_t End = getHeaderOffset() + Header.length();
+ uint64_t StartingOffset = Offset;
if (Error E =
List.extract(Data, getHeaderOffset(), End, &Offset,
Header.getSectionName(), Header.getListTypeString()))
diff --git a/include/llvm/DebugInfo/DWARF/DWARFObject.h b/include/llvm/DebugInfo/DWARF/DWARFObject.h
index 1bba74a25d0e..88fe3f434edc 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFObject.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFObject.h
@@ -39,20 +39,20 @@ public:
virtual StringRef getAbbrevSection() const { return ""; }
virtual const DWARFSection &getLocSection() const { return Dummy; }
virtual const DWARFSection &getLoclistsSection() const { return Dummy; }
- virtual StringRef getARangeSection() const { return ""; }
- virtual StringRef getDebugFrameSection() const { return ""; }
- virtual StringRef getEHFrameSection() const { return ""; }
+ virtual StringRef getArangesSection() const { return ""; }
+ virtual const DWARFSection &getFrameSection() const { return Dummy; }
+ virtual const DWARFSection &getEHFrameSection() const { return Dummy; }
virtual const DWARFSection &getLineSection() const { return Dummy; }
- virtual StringRef getLineStringSection() const { return ""; }
- virtual StringRef getStringSection() const { return ""; }
- virtual const DWARFSection &getRangeSection() const { return Dummy; }
+ virtual StringRef getLineStrSection() const { return ""; }
+ virtual StringRef getStrSection() const { return ""; }
+ virtual const DWARFSection &getRangesSection() const { return Dummy; }
virtual const DWARFSection &getRnglistsSection() const { return Dummy; }
virtual StringRef getMacinfoSection() const { return ""; }
- virtual const DWARFSection &getPubNamesSection() const { return Dummy; }
- virtual const DWARFSection &getPubTypesSection() const { return Dummy; }
- virtual const DWARFSection &getGnuPubNamesSection() const { return Dummy; }
- virtual const DWARFSection &getGnuPubTypesSection() const { return Dummy; }
- virtual const DWARFSection &getStringOffsetSection() const { return Dummy; }
+ virtual const DWARFSection &getPubnamesSection() const { return Dummy; }
+ virtual const DWARFSection &getPubtypesSection() const { return Dummy; }
+ virtual const DWARFSection &getGnuPubnamesSection() const { return Dummy; }
+ virtual const DWARFSection &getGnuPubtypesSection() const { return Dummy; }
+ virtual const DWARFSection &getStrOffsetsSection() const { return Dummy; }
virtual void
forEachInfoDWOSections(function_ref<void(const DWARFSection &)> F) const {}
virtual void
@@ -60,11 +60,11 @@ public:
virtual StringRef getAbbrevDWOSection() const { return ""; }
virtual const DWARFSection &getLineDWOSection() const { return Dummy; }
virtual const DWARFSection &getLocDWOSection() const { return Dummy; }
- virtual StringRef getStringDWOSection() const { return ""; }
- virtual const DWARFSection &getStringOffsetDWOSection() const {
+ virtual StringRef getStrDWOSection() const { return ""; }
+ virtual const DWARFSection &getStrOffsetsDWOSection() const {
return Dummy;
}
- virtual const DWARFSection &getRangeDWOSection() const { return Dummy; }
+ virtual const DWARFSection &getRangesDWOSection() const { return Dummy; }
virtual const DWARFSection &getRnglistsDWOSection() const { return Dummy; }
virtual const DWARFSection &getAddrSection() const { return Dummy; }
virtual const DWARFSection &getAppleNamesSection() const { return Dummy; }
@@ -72,7 +72,7 @@ public:
virtual const DWARFSection &getAppleNamespacesSection() const {
return Dummy;
}
- virtual const DWARFSection &getDebugNamesSection() const { return Dummy; }
+ virtual const DWARFSection &getNamesSection() const { return Dummy; }
virtual const DWARFSection &getAppleObjCSection() const { return Dummy; }
virtual StringRef getCUIndexSection() const { return ""; }
virtual StringRef getGdbIndexSection() const { return ""; }
diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index 90d89375fd35..c95bdcbd8a43 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -34,7 +34,7 @@ public:
LS, LE, IsDWO, UnitVector) {}
uint64_t getTypeHash() const { return getHeader().getTypeHash(); }
- uint32_t getTypeOffset() const { return getHeader().getTypeOffset(); }
+ uint64_t getTypeOffset() const { return getHeader().getTypeOffset(); }
void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) override;
// Enable LLVM-style RTTI.
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index f9f90db31890..51de114a3506 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -45,7 +45,7 @@ class DWARFUnit;
/// parse the header before deciding what specific kind of unit to construct.
class DWARFUnitHeader {
// Offset within section.
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
// Version, address size, and DWARF format.
dwarf::FormParams FormParams;
uint64_t Length = 0;
@@ -56,7 +56,7 @@ class DWARFUnitHeader {
// For type units only.
uint64_t TypeHash = 0;
- uint32_t TypeOffset = 0;
+ uint64_t TypeOffset = 0;
// For v5 split or skeleton compile units only.
Optional<uint64_t> DWOId;
@@ -70,10 +70,10 @@ class DWARFUnitHeader {
public:
/// Parse a unit header from \p debug_info starting at \p offset_ptr.
bool extract(DWARFContext &Context, const DWARFDataExtractor &debug_info,
- uint32_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO,
+ uint64_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO,
const DWARFUnitIndex *Index = nullptr,
const DWARFUnitIndex::Entry *Entry = nullptr);
- uint32_t getOffset() const { return Offset; }
+ uint64_t getOffset() const { return Offset; }
const dwarf::FormParams &getFormParams() const { return FormParams; }
uint16_t getVersion() const { return FormParams.Version; }
dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
@@ -91,16 +91,17 @@ public:
}
const DWARFUnitIndex::Entry *getIndexEntry() const { return IndexEntry; }
uint64_t getTypeHash() const { return TypeHash; }
- uint32_t getTypeOffset() const { return TypeOffset; }
+ uint64_t getTypeOffset() const { return TypeOffset; }
uint8_t getUnitType() const { return UnitType; }
bool isTypeUnit() const {
return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type;
}
uint8_t getSize() const { return Size; }
- uint32_t getNextUnitOffset() const {
- return Offset + Length +
- (FormParams.Format == llvm::dwarf::DwarfFormat::DWARF64 ? 4 : 0) +
- FormParams.getDwarfOffsetByteSize();
+ uint8_t getUnitLengthFieldByteSize() const {
+ return dwarf::getUnitLengthFieldByteSize(FormParams.Format);
+ }
+ uint64_t getNextUnitOffset() const {
+ return Offset + Length + getUnitLengthFieldByteSize();
}
};
@@ -110,7 +111,7 @@ const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
/// Describe a collection of units. Intended to hold all units either from
/// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo.
class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1> {
- std::function<std::unique_ptr<DWARFUnit>(uint32_t, DWARFSectionKind,
+ std::function<std::unique_ptr<DWARFUnit>(uint64_t, DWARFSectionKind,
const DWARFSection *,
const DWARFUnitIndex::Entry *)>
Parser;
@@ -121,7 +122,7 @@ public:
using iterator = typename UnitVector::iterator;
using iterator_range = llvm::iterator_range<typename UnitVector::iterator>;
- DWARFUnit *getUnitForOffset(uint32_t Offset) const;
+ DWARFUnit *getUnitForOffset(uint64_t Offset) const;
DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
/// Read units from a .debug_info or .debug_types section. Calls made
@@ -197,7 +198,7 @@ class DWARFUnit {
DWARFUnitHeader Header;
const DWARFDebugAbbrev *Abbrev;
const DWARFSection *RangeSection;
- uint32_t RangeSectionBase;
+ uint64_t RangeSectionBase;
/// We either keep track of the location list section or its data, depending
/// on whether we are handling a split DWARF section or not.
union {
@@ -275,7 +276,7 @@ public:
const DWARFSection &getInfoSection() const { return InfoSection; }
const DWARFSection *getLocSection() const { return LocSection; }
StringRef getLocSectionData() const { return LocSectionData; }
- uint32_t getOffset() const { return Header.getOffset(); }
+ uint64_t getOffset() const { return Header.getOffset(); }
const dwarf::FormParams &getFormParams() const {
return Header.getFormParams();
}
@@ -285,10 +286,10 @@ public:
uint8_t getDwarfOffsetByteSize() const {
return Header.getDwarfOffsetByteSize();
}
- uint32_t getLength() const { return Header.getLength(); }
+ uint64_t getLength() const { return Header.getLength(); }
uint8_t getUnitType() const { return Header.getUnitType(); }
bool isTypeUnit() const { return Header.isTypeUnit(); }
- uint32_t getNextUnitOffset() const { return Header.getNextUnitOffset(); }
+ uint64_t getNextUnitOffset() const { return Header.getNextUnitOffset(); }
const DWARFSection &getLineSection() const { return LineSection; }
StringRef getStringSection() const { return StringSection; }
const DWARFSection &getStringOffsetSection() const {
@@ -303,7 +304,7 @@ public:
/// Recursively update address to Die map.
void updateAddressDieMap(DWARFDie Die);
- void setRangesSection(const DWARFSection *RS, uint32_t Base) {
+ void setRangesSection(const DWARFSection *RS, uint64_t Base) {
RangeSection = RS;
RangeSectionBase = Base;
}
@@ -322,7 +323,7 @@ public:
/// .debug_ranges section. If the extraction is unsuccessful, an error
/// is returned. Successful extraction requires that the compile unit
/// has already been extracted.
- Error extractRangeList(uint32_t RangeListOffset,
+ Error extractRangeList(uint64_t RangeListOffset,
DWARFDebugRangeList &RangeList) const;
void clear();
@@ -405,7 +406,7 @@ public:
/// Return a vector of address ranges resulting from a (possibly encoded)
/// range list starting at a given offset in the appropriate ranges section.
- Expected<DWARFAddressRangesVector> findRnglistFromOffset(uint32_t Offset);
+ Expected<DWARFAddressRangesVector> findRnglistFromOffset(uint64_t Offset);
/// Return a vector of address ranges retrieved from an encoded range
/// list whose offset is found via a table lookup given an index (DWARF v5
@@ -415,7 +416,7 @@ public:
/// Return a rangelist's offset based on an index. The index designates
/// an entry in the rangelist table's offset array and is supplied by
/// DW_FORM_rnglistx.
- Optional<uint32_t> getRnglistOffset(uint32_t Index) {
+ Optional<uint64_t> getRnglistOffset(uint32_t Index) {
if (RngListTable)
return RngListTable->getOffsetEntry(Index);
return None;
@@ -470,7 +471,7 @@ public:
/// unit's DIE vector.
///
/// The unit needs to have its DIEs extracted for this method to work.
- DWARFDie getDIEForOffset(uint32_t Offset) {
+ DWARFDie getDIEForOffset(uint64_t Offset) {
extractDIEsIfNeeded(false);
assert(!DieArray.empty());
auto It =
@@ -495,15 +496,19 @@ public:
}
virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0;
+
+ Error tryExtractDIEsIfNeeded(bool CUDieOnly);
+
private:
/// Size in bytes of the .debug_info data associated with this compile unit.
size_t getDebugInfoSize() const {
- return Header.getLength() + 4 - getHeaderSize();
+ return Header.getLength() + Header.getUnitLengthFieldByteSize() -
+ getHeaderSize();
}
/// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it
- /// hasn't already been done. Returns the number of DIEs parsed at this call.
- size_t extractDIEsIfNeeded(bool CUDieOnly);
+ /// hasn't already been done
+ void extractDIEsIfNeeded(bool CUDieOnly);
/// extractDIEsToVector - Appends all parsed DIEs to a vector.
void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
index fc8c707c512e..684103aac2fc 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
@@ -37,7 +37,7 @@ class DWARFUnitIndex {
uint32_t NumUnits;
uint32_t NumBuckets = 0;
- bool parse(DataExtractor IndexData, uint32_t *OffsetPtr);
+ bool parse(DataExtractor IndexData, uint64_t *OffsetPtr);
void dump(raw_ostream &OS) const;
};
diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index f1268f220272..a4a3a11d441b 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -94,7 +94,7 @@ private:
/// A map that tracks all references (converted absolute references) so we
/// can verify each reference points to a valid DIE and not an offset that
/// lies between to valid DIEs.
- std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets;
+ std::map<uint64_t, std::set<uint64_t>> ReferenceToDIEOffsets;
uint32_t NumDebugLineErrors = 0;
// Used to relax some checks that do not currently work portably
bool IsObjectFile;
@@ -138,7 +138,7 @@ private:
///
/// \returns true if the header is verified successfully, false otherwise.
bool verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
- uint32_t *Offset, unsigned UnitIndex, uint8_t &UnitType,
+ uint64_t *Offset, unsigned UnitIndex, uint8_t &UnitType,
bool &isUnitDWARF64);
/// Verifies the header of a unit in a .debug_info or .debug_types section.
diff --git a/include/llvm/DebugInfo/GSYM/FileEntry.h b/include/llvm/DebugInfo/GSYM/FileEntry.h
index 228b4efa0656..49e7fc9c4291 100644
--- a/include/llvm/DebugInfo/GSYM/FileEntry.h
+++ b/include/llvm/DebugInfo/GSYM/FileEntry.h
@@ -1,9 +1,8 @@
//===- FileEntry.h ----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/GSYM/FileWriter.h b/include/llvm/DebugInfo/GSYM/FileWriter.h
new file mode 100644
index 000000000000..cd568765a4f2
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/FileWriter.h
@@ -0,0 +1,124 @@
+//===- FileWriter.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H
+#define LLVM_DEBUGINFO_GSYM_FILEWRITER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Endian.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+namespace llvm {
+class raw_pwrite_stream;
+
+namespace gsym {
+
+/// A simplified binary data writer class that doesn't require targets, target
+/// definitions, architectures, or require any other optional compile time
+/// libraries to be enabled via the build process. This class needs the ability
+/// to seek to different spots in the binary stream that is produces to fixup
+/// offsets and sizes.
+class FileWriter {
+ llvm::raw_pwrite_stream &OS;
+ llvm::support::endianness ByteOrder;
+public:
+ FileWriter(llvm::raw_pwrite_stream &S, llvm::support::endianness B)
+ : OS(S), ByteOrder(B) {}
+ ~FileWriter();
+ /// Write a single uint8_t value into the stream at the current file
+ /// position.
+ ///
+ /// \param Value The value to write into the stream.
+ void writeU8(uint8_t Value);
+
+ /// Write a single uint16_t value into the stream at the current file
+ /// position. The value will be byte swapped if needed to match the byte
+ /// order specified during construction.
+ ///
+ /// \param Value The value to write into the stream.
+ void writeU16(uint16_t Value);
+
+ /// Write a single uint32_t value into the stream at the current file
+ /// position. The value will be byte swapped if needed to match the byte
+ /// order specified during construction.
+ ///
+ /// \param Value The value to write into the stream.
+ void writeU32(uint32_t Value);
+
+ /// Write a single uint64_t value into the stream at the current file
+ /// position. The value will be byte swapped if needed to match the byte
+ /// order specified during construction.
+ ///
+ /// \param Value The value to write into the stream.
+ void writeU64(uint64_t Value);
+
+ /// Write the value into the stream encoded using signed LEB128 at the
+ /// current file position.
+ ///
+ /// \param Value The value to write into the stream.
+ void writeSLEB(int64_t Value);
+
+ /// Write the value into the stream encoded using unsigned LEB128 at the
+ /// current file position.
+ ///
+ /// \param Value The value to write into the stream.
+ void writeULEB(uint64_t Value);
+
+ /// Write an array of uint8_t values into the stream at the current file
+ /// position.
+ ///
+ /// \param Data An array of values to write into the stream.
+ void writeData(llvm::ArrayRef<uint8_t> Data);
+
+ /// Write a NULL terminated C string into the stream at the current file
+ /// position. The entire contents of Str will be written into the steam at
+ /// the current file position and then an extra NULL termation byte will be
+ /// written. It is up to the user to ensure that Str doesn't contain any NULL
+ /// characters unless the additional NULL characters are desired.
+ ///
+ /// \param Str The value to write into the stream.
+ void writeNullTerminated(llvm::StringRef Str);
+
+ /// Fixup a uint32_t value at the specified offset in the stream. This
+ /// function will save the current file position, seek to the specified
+ /// offset, overwrite the data using Value, and then restore the file
+ /// position to the previous file position.
+ ///
+ /// \param Value The value to write into the stream.
+ /// \param Offset The offset at which to write the Value within the stream.
+ void fixup32(uint32_t Value, uint64_t Offset);
+
+ /// Pad with zeroes at the current file position until the current file
+ /// position matches the specified alignment.
+ ///
+ /// \param Align An integer speciying the desired alignment. This does not
+ /// need to be a power of two.
+ void alignTo(size_t Align);
+
+ /// Return the current offset within the file.
+ ///
+ /// \return The unsigned offset from the start of the file of the current
+ /// file position.
+ uint64_t tell();
+
+ llvm::raw_pwrite_stream &get_stream() {
+ return OS;
+ }
+
+private:
+ FileWriter(const FileWriter &rhs) = delete;
+ void operator=(const FileWriter &rhs) = delete;
+};
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H
diff --git a/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index eedb1e638fd1..63e18bb2ecd5 100644
--- a/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -1,17 +1,17 @@
//===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
+#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
-#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/DebugInfo/GSYM/Range.h"
#include "llvm/DebugInfo/GSYM/StringTable.h"
#include <tuple>
@@ -21,41 +21,125 @@ namespace llvm {
class raw_ostream;
namespace gsym {
-/// Function information in GSYM files encodes information for one
-/// contiguous address range. The name of the function is encoded as
-/// a string table offset and allows multiple functions with the same
-/// name to share the name string in the string table. Line tables are
-/// stored in a sorted vector of gsym::LineEntry objects and are split
-/// into line tables for each function. If a function has a discontiguous
-/// range, it will be split into two gsym::FunctionInfo objects. If the
-/// function has inline functions, the information will be encoded in
-/// the "Inline" member, see gsym::InlineInfo for more information.
+/// Function information in GSYM files encodes information for one contiguous
+/// address range. If a function has discontiguous address ranges, they will
+/// need to be encoded using multiple FunctionInfo objects.
+///
+/// ENCODING
+///
+/// The function information gets the function start address as an argument
+/// to the FunctionInfo::decode(...) function. This information is calculated
+/// from the GSYM header and an address offset from the GSYM address offsets
+/// table. The encoded FunctionInfo information must be alinged to a 4 byte
+/// boundary.
+///
+/// The encoded data for a FunctionInfo starts with fixed data that all
+/// function info objects have:
+///
+/// ENCODING NAME DESCRIPTION
+/// ========= =========== ====================================================
+/// uint32_t Size The size in bytes of this function.
+/// uint32_t Name The string table offset of the function name.
+///
+/// The optional data in a FunctionInfo object follows this fixed information
+/// and consists of a stream of tuples that consist of:
+///
+/// ENCODING NAME DESCRIPTION
+/// ========= =========== ====================================================
+/// uint32_t InfoType An "InfoType" enumeration that describes the type
+/// of optional data that is encoded.
+/// uint32_t InfoLength The size in bytes of the encoded data that
+/// immediately follows this length if this value is
+/// greater than zero.
+/// uint8_t[] InfoData Encoded bytes that represent the data for the
+/// "InfoType". These bytes are only present if
+/// "InfoLength" is greater than zero.
+///
+/// The "InfoType" is an enumeration:
+///
+/// enum InfoType {
+/// EndOfList = 0u,
+/// LineTableInfo = 1u,
+/// InlineInfo = 2u
+/// };
+///
+/// This stream of tuples is terminated by a "InfoType" whose value is
+/// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of
+/// the optional information list. This format allows us to add new optional
+/// information data to a FunctionInfo object over time and allows older
+/// clients to still parse the format and skip over any data that they don't
+/// understand or want to parse.
+///
+/// So the function information encoding essientially looks like:
+///
+/// struct {
+/// uint32_t Size;
+/// uint32_t Name;
+/// struct {
+/// uint32_t InfoType;
+/// uint32_t InfoLength;
+/// uint8_t InfoData[InfoLength];
+/// }[N];
+/// }
+///
+/// Where "N" is the number of tuples.
struct FunctionInfo {
AddressRange Range;
uint32_t Name; ///< String table offset in the string table.
- std::vector<gsym::LineEntry> Lines;
- InlineInfo Inline;
+ llvm::Optional<LineTable> OptLineTable;
+ llvm::Optional<InlineInfo> Inline;
FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
: Range(Addr, Addr + Size), Name(N) {}
+ /// Query if a FunctionInfo has rich debug info.
+ ///
+ /// \returns A bool that indicates if this object has something else than
+ /// range and name. When converting information from a symbol table and from
+ /// debug info, we might end up with multiple FunctionInfo objects for the
+ /// same range and we need to be able to tell which one is the better object
+ /// to use.
bool hasRichInfo() const {
- /// Returns whether we have something else than range and name. When
- /// converting information from a symbol table and from debug info, we
- /// might end up with multiple FunctionInfo objects for the same range
- /// and we need to be able to tell which one is the better object to use.
- return !Lines.empty() || Inline.isValid();
+ return OptLineTable.hasValue() || Inline.hasValue();
}
+ /// Query if a FunctionInfo object is valid.
+ ///
+ /// Address and size can be zero and there can be no line entries for a
+ /// symbol so the only indication this entry is valid is if the name is
+ /// not zero. This can happen when extracting information from symbol
+ /// tables that do not encode symbol sizes. In that case only the
+ /// address and name will be filled in.
+ ///
+ /// \returns A boolean indicating if this FunctionInfo is valid.
bool isValid() const {
- /// Address and size can be zero and there can be no line entries for a
- /// symbol so the only indication this entry is valid is if the name is
- /// not zero. This can happen when extracting information from symbol
- /// tables that do not encode symbol sizes. In that case only the
- /// address and name will be filled in.
return Name != 0;
}
+ /// Decode an object from a binary data stream.
+ ///
+ /// \param Data The binary stream to read the data from. This object must
+ /// have the data for the object starting at offset zero. The data
+ /// can contain more data than needed.
+ ///
+ /// \param BaseAddr The FunctionInfo's start address and will be used as the
+ /// base address when decoding any contained information like the line table
+ /// and the inline info.
+ ///
+ /// \returns An FunctionInfo or an error describing the issue that was
+ /// encountered during decoding.
+ static llvm::Expected<FunctionInfo> decode(DataExtractor &Data,
+ uint64_t BaseAddr);
+
+ /// Encode this object into FileWriter stream.
+ ///
+ /// \param O The binary stream to write the data to at the current file
+ /// position.
+ ///
+ /// \returns An error object that indicates failure or the offset of the
+ /// function info that was successfully written into the stream.
+ llvm::Expected<uint64_t> encode(FileWriter &O) const;
+
uint64_t startAddress() const { return Range.Start; }
uint64_t endAddress() const { return Range.End; }
uint64_t size() const { return Range.size(); }
@@ -66,14 +150,14 @@ struct FunctionInfo {
void clear() {
Range = {0, 0};
Name = 0;
- Lines.clear();
- Inline.clear();
+ OptLineTable = None;
+ Inline = None;
}
};
inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
- LHS.Lines == RHS.Lines && LHS.Inline == RHS.Inline;
+ LHS.OptLineTable == RHS.OptLineTable && LHS.Inline == RHS.Inline;
}
inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
return !(LHS == RHS);
@@ -89,14 +173,10 @@ inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
return LHS.Range < RHS.Range;
// Then sort by inline
- if (LHS.Inline.isValid() != RHS.Inline.isValid())
- return RHS.Inline.isValid();
-
- // If the number of lines is the same, then compare line table entries
- if (LHS.Lines.size() == RHS.Lines.size())
- return LHS.Lines < RHS.Lines;
- // Then sort by number of line table entries (more is better)
- return LHS.Lines.size() < RHS.Lines.size();
+ if (LHS.Inline.hasValue() != RHS.Inline.hasValue())
+ return RHS.Inline.hasValue();
+
+ return LHS.OptLineTable < RHS.OptLineTable;
}
raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
diff --git a/include/llvm/DebugInfo/GSYM/GsymCreator.h b/include/llvm/DebugInfo/GSYM/GsymCreator.h
new file mode 100644
index 000000000000..12c8187132ba
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -0,0 +1,229 @@
+//===- GsymCreator.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H
+#define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H
+
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <thread>
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/GSYM/FileEntry.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+
+namespace gsym {
+class FileWriter;
+
+/// GsymCreator is used to emit GSYM data to a stand alone file or section
+/// within a file.
+///
+/// The GsymCreator is designed to be used in 3 stages:
+/// - Create FunctionInfo objects and add them
+/// - Finalize the GsymCreator object
+/// - Save to file or section
+///
+/// The first stage involves creating FunctionInfo objects from another source
+/// of information like compiler debug info metadata, DWARF or Breakpad files.
+/// Any strings in the FunctionInfo or contained information, like InlineInfo
+/// or LineTable objects, should get the string table offsets by calling
+/// GsymCreator::insertString(...). Any file indexes that are needed should be
+/// obtained by calling GsymCreator::insertFile(...). All of the function calls
+/// in GsymCreator are thread safe. This allows multiple threads to create and
+/// add FunctionInfo objects while parsing debug information.
+///
+/// Once all of the FunctionInfo objects have been added, the
+/// GsymCreator::finalize(...) must be called prior to saving. This function
+/// will sort the FunctionInfo objects, finalize the string table, and do any
+/// other passes on the information needed to prepare the information to be
+/// saved.
+///
+/// Once the object has been finalized, it can be saved to a file or section.
+///
+/// ENCODING
+///
+/// GSYM files are designed to be memory mapped into a process as shared, read
+/// only data, and used as is.
+///
+/// The GSYM file format when in a stand alone file consists of:
+/// - Header
+/// - Address Table
+/// - Function Info Offsets
+/// - File Table
+/// - String Table
+/// - Function Info Data
+///
+/// HEADER
+///
+/// The header is fully described in "llvm/DebugInfo/GSYM/Header.h".
+///
+/// ADDRESS TABLE
+///
+/// The address table immediately follows the header in the file and consists
+/// of Header.NumAddresses address offsets. These offsets are sorted and can be
+/// binary searched for efficient lookups. Addresses in the address table are
+/// stored as offsets from a 64 bit base address found in Header.BaseAddress.
+/// This allows the address table to contain 8, 16, or 32 offsets. This allows
+/// the address table to not require full 64 bit addresses for each address.
+/// The resulting GSYM size is smaller and causes fewer pages to be touched
+/// during address lookups when the address table is smaller. The size of the
+/// address offsets in the address table is specified in the header in
+/// Header.AddrOffSize. The first offset in the address table is alinged to
+/// Header.AddrOffSize alignement to ensure efficient access when loaded into
+/// memory.
+///
+/// FUNCTION INFO OFFSETS TABLE
+///
+/// The function info offsets table immediately follows the address table and
+/// consists of Header.NumAddresses 32 bit file offsets: one for each address
+/// in the address table. This data is algined to a 4 byte boundary. The
+/// offsets in this table are the relative offsets from the start offset of the
+/// GSYM header and point to the function info data for each address in the
+/// address table. Keeping this data separate from the address table helps to
+/// reduce the number of pages that are touched when address lookups occur on a
+/// GSYM file.
+///
+/// FILE TABLE
+///
+/// The file table immediately follows the function info offsets table. The
+/// encoding of the FileTable is:
+///
+/// struct FileTable {
+/// uint32_t Count;
+/// FileEntry Files[];
+/// };
+///
+/// The file table starts with a 32 bit count of the number of files that are
+/// used in all of the function info, followed by that number of FileEntry
+/// structures. The file table is aligned to a 4 byte boundary, Each file in
+/// the file table is represented with a FileEntry structure.
+/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details.
+///
+/// STRING TABLE
+///
+/// The string table follows the file table in stand alone GSYM files and
+/// contains all strings for everything contained in the GSYM file. Any string
+/// data should be added to the string table and any references to strings
+/// inside GSYM information must be stored as 32 bit string table offsets into
+/// this string table. The string table always starts with an empty string at
+/// offset zero and is followed by any strings needed by the GSYM information.
+/// The start of the string table is not aligned to any boundary.
+///
+/// FUNCTION INFO DATA
+///
+/// The function info data is the payload that contains information about the
+/// address that is being looked up. It contains all of the encoded
+/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an
+/// entry in the Function Info Offsets Table. For details on the exact encoding
+/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h".
+class GsymCreator {
+ // Private member variables require Mutex protections
+ mutable std::recursive_mutex Mutex;
+ std::vector<FunctionInfo> Funcs;
+ StringTableBuilder StrTab;
+ DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
+ std::vector<llvm::gsym::FileEntry> Files;
+ std::vector<uint8_t> UUID;
+ bool Finalized = false;
+
+public:
+
+ GsymCreator();
+
+ /// Save a GSYM file to a stand alone file.
+ ///
+ /// \param Path The file path to save the GSYM file to.
+ /// \param ByteOrder The endianness to use when saving the file.
+ /// \returns An error object that indicates success or failure of the save.
+ llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const;
+
+ /// Encode a GSYM into the file writer stream at the current position.
+ ///
+ /// \param O The stream to save the binary data to
+ /// \returns An error object that indicates success or failure of the save.
+ llvm::Error encode(FileWriter &O) const;
+
+ /// Insert a string into the GSYM string table.
+ ///
+ /// All strings used by GSYM files must be uniqued by adding them to this
+ /// string pool and using the returned offset for any string values.
+ ///
+ /// \param S The string to insert into the string table.
+ /// \returns The unique 32 bit offset into the string table.
+ uint32_t insertString(StringRef S);
+
+ /// Insert a file into this GSYM creator.
+ ///
+ /// Inserts a file by adding a FileEntry into the "Files" member variable if
+ /// the file has not already been added. The file path is split into
+ /// directory and filename which are both added to the string table. This
+ /// allows paths to be stored efficiently by reusing the directories that are
+ /// common between multiple files.
+ ///
+ /// \param Path The path to the file to insert.
+ /// \param Style The path style for the "Path" parameter.
+ /// \returns The unique file index for the inserted file.
+ uint32_t insertFile(StringRef Path,
+ sys::path::Style Style = sys::path::Style::native);
+
+ /// Add a function info to this GSYM creator.
+ ///
+ /// All information in the FunctionInfo object must use the
+ /// GsymCreator::insertString(...) function when creating string table
+ /// offsets for names and other strings.
+ ///
+ /// \param FI The function info object to emplace into our functions list.
+ void addFunctionInfo(FunctionInfo &&FI);
+
+ /// Finalize the data in the GSYM creator prior to saving the data out.
+ ///
+ /// Finalize must be called after all FunctionInfo objects have been added
+ /// and before GsymCreator::save() is called.
+ ///
+ /// \param OS Output stream to report duplicate function infos, overlapping
+ /// function infos, and function infos that were merged or removed.
+ /// \returns An error object that indicates success or failure of the
+ /// finalize.
+ llvm::Error finalize(llvm::raw_ostream &OS);
+
+ /// Set the UUID value.
+ ///
+ /// \param UUIDBytes The new UUID bytes.
+ void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) {
+ UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
+ }
+
+ /// Thread safe iteration over all function infos.
+ ///
+ /// \param Callback A callback function that will get called with each
+ /// FunctionInfo. If the callback returns false, stop iterating.
+ void forEachFunctionInfo(
+ std::function<bool(FunctionInfo &)> const &Callback);
+
+ /// Thread safe const iteration over all function infos.
+ ///
+ /// \param Callback A callback function that will get called with each
+ /// FunctionInfo. If the callback returns false, stop iterating.
+ void forEachFunctionInfo(
+ std::function<bool(const FunctionInfo &)> const &Callback) const;
+
+};
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H
diff --git a/include/llvm/DebugInfo/GSYM/GsymReader.h b/include/llvm/DebugInfo/GSYM/GsymReader.h
new file mode 100644
index 000000000000..113bcee9c9a3
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -0,0 +1,228 @@
+//===- GsymReader.h ---------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
+#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
+
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/GSYM/FileEntry.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/DebugInfo/GSYM/Header.h"
+#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/DebugInfo/GSYM/StringTable.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorOr.h"
+
+#include <inttypes.h>
+#include <memory>
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+namespace llvm {
+class MemoryBuffer;
+class raw_ostream;
+
+namespace gsym {
+
+/// GsymReader is used to read GSYM data from a file or buffer.
+///
+/// This class is optimized for very quick lookups when the endianness matches
+/// the host system. The Header, address table, address info offsets, and file
+/// table is designed to be mmap'ed as read only into memory and used without
+/// any parsing needed. If the endianness doesn't match, we swap these objects
+/// and tables into GsymReader::SwappedData and then point our header and
+/// ArrayRefs to this swapped internal data.
+///
+/// GsymReader objects must use one of the static functions to create an
+/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
+
+class GsymReader {
+ GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
+ llvm::Error parse();
+
+ std::unique_ptr<MemoryBuffer> MemBuffer;
+ StringRef GsymBytes;
+ llvm::support::endianness Endian;
+ const Header *Hdr = nullptr;
+ ArrayRef<uint8_t> AddrOffsets;
+ ArrayRef<uint32_t> AddrInfoOffsets;
+ ArrayRef<FileEntry> Files;
+ StringTable StrTab;
+ /// When the GSYM file's endianness doesn't match the host system then
+ /// we must decode all data structures that need to be swapped into
+ /// local storage and set point the ArrayRef objects above to these swapped
+ /// copies.
+ struct SwappedData {
+ Header Hdr;
+ std::vector<uint8_t> AddrOffsets;
+ std::vector<uint32_t> AddrInfoOffsets;
+ std::vector<FileEntry> Files;
+ };
+ std::unique_ptr<SwappedData> Swap;
+
+public:
+ GsymReader(GsymReader &&RHS);
+ ~GsymReader();
+
+ /// Construct a GsymReader from a file on disk.
+ ///
+ /// \param Path The file path the GSYM file to read.
+ /// \returns An expected GsymReader that contains the object or an error
+ /// object that indicates reason for failing to read the GSYM.
+ static llvm::Expected<GsymReader> openFile(StringRef Path);
+
+ /// Construct a GsymReader from a buffer.
+ ///
+ /// \param Bytes A set of bytes that will be copied and owned by the
+ /// returned object on success.
+ /// \returns An expected GsymReader that contains the object or an error
+ /// object that indicates reason for failing to read the GSYM.
+ static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
+
+ /// Access the GSYM header.
+ /// \returns A native endian version of the GSYM header.
+ const Header &getHeader() const;
+
+ /// Get the full function info for an address.
+ ///
+ /// \param Addr A virtual address from the orignal object file to lookup.
+ /// \returns An expected FunctionInfo that contains the function info object
+ /// or an error object that indicates reason for failing to lookup the
+ /// address,
+ llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
+
+ /// Get a string from the string table.
+ ///
+ /// \param Offset The string table offset for the string to retrieve.
+ /// \returns The string from the strin table.
+ StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
+
+protected:
+ /// Gets an address from the address table.
+ ///
+ /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
+ ///
+ /// \param Index A index into the address table.
+ /// \returns A resolved virtual address for adddress in the address table
+ /// or llvm::None if Index is out of bounds.
+ Optional<uint64_t> getAddress(size_t Index) const;
+
+ /// Get the a file entry for the suppplied file index.
+ ///
+ /// Used to convert any file indexes in the FunctionInfo data back into
+ /// files. This function can be used for iteration, but is more commonly used
+ /// for random access when doing lookups.
+ ///
+ /// \param Index An index into the file table.
+ /// \returns An optional FileInfo that will be valid if the file index is
+ /// valid, or llvm::None if the file index is out of bounds,
+ Optional<FileEntry> getFile(uint32_t Index) const {
+ if (Index < Files.size())
+ return Files[Index];
+ return llvm::None;
+ }
+
+ /// Get an appropriate address info offsets array.
+ ///
+ /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
+ /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
+ /// internally as a array of bytes that are in the correct endianness. When
+ /// we access this table we must get an array that matches those sizes. This
+ /// templatized helper function is used when accessing address offsets in the
+ /// AddrOffsets member variable.
+ ///
+ /// \returns An ArrayRef of an appropriate address offset size.
+ template <class T> ArrayRef<T>
+ getAddrOffsets() const {
+ return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
+ AddrOffsets.size()/sizeof(T));
+ }
+
+ /// Get an appropriate address from the address table.
+ ///
+ /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
+ /// byte address offsets from the The gsym::Header::BaseAddress. The table is
+ /// stored internally as a array of bytes that are in the correct endianness.
+ /// In order to extract an address from the address table we must access the
+ /// address offset using the correct size and then add it to the BaseAddress
+ /// in the header.
+ ///
+ /// \param Index An index into the AddrOffsets array.
+ /// \returns An virtual address that matches the original object file for the
+ /// address as the specified index, or llvm::None if Index is out of bounds.
+ template <class T> Optional<uint64_t>
+ addressForIndex(size_t Index) const {
+ ArrayRef<T> AIO = getAddrOffsets<T>();
+ if (Index < AIO.size())
+ return AIO[Index] + Hdr->BaseAddress;
+ return llvm::None;
+ }
+ /// Lookup an address offset in the AddrOffsets table.
+ ///
+ /// Given an address offset, look it up using a binary search of the
+ /// AddrOffsets table.
+ ///
+ /// \param AddrOffset An address offset, that has already been computed by
+ /// subtracting the gsym::Header::BaseAddress.
+ /// \returns The matching address offset index. This index will be used to
+ /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
+ template <class T>
+ uint64_t getAddressOffsetIndex(const uint64_t AddrOffset) const {
+ ArrayRef<T> AIO = getAddrOffsets<T>();
+ const auto Begin = AIO.begin();
+ const auto End = AIO.end();
+ auto Iter = std::lower_bound(Begin, End, AddrOffset);
+ if (Iter == End || AddrOffset < *Iter)
+ --Iter;
+ return std::distance(Begin, Iter);
+ }
+
+ /// Create a GSYM from a memory buffer.
+ ///
+ /// Called by both openFile() and copyBuffer(), this function does all of the
+ /// work of parsing the GSYM file and returning an error.
+ ///
+ /// \param MemBuffer A memory buffer that will transfer ownership into the
+ /// GsymReader.
+ /// \returns An expected GsymReader that contains the object or an error
+ /// object that indicates reason for failing to read the GSYM.
+ static llvm::Expected<llvm::gsym::GsymReader>
+ create(std::unique_ptr<MemoryBuffer> &MemBuffer);
+
+
+ /// Given an address, find the address index.
+ ///
+ /// Binary search the address table and find the matching address index.
+ ///
+ /// \param Addr A virtual address that matches the original object file
+ /// to lookup.
+ /// \returns An index into the address table. This index can be used to
+ /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
+ /// Returns an error if the address isn't in the GSYM with details of why.
+ Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
+
+ /// Given an address index, get the offset for the FunctionInfo.
+ ///
+ /// Looking up an address is done by finding the corresponding address
+ /// index for the address. This index is then used to get the offset of the
+ /// FunctionInfo data that we will decode using this function.
+ ///
+ /// \param Index An index into the address table.
+ /// \returns An optional GSYM data offset for the offset of the FunctionInfo
+ /// that needs to be decoded.
+ Optional<uint64_t> getAddressInfoOffset(size_t Index) const;
+};
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
diff --git a/include/llvm/DebugInfo/GSYM/Header.h b/include/llvm/DebugInfo/GSYM/Header.h
new file mode 100644
index 000000000000..6652c59c97a6
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/Header.h
@@ -0,0 +1,129 @@
+//===- Header.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_HEADER_H
+#define LLVM_DEBUGINFO_GSYM_HEADER_H
+
+#include "llvm/Support/Error.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace llvm {
+class raw_ostream;
+class DataExtractor;
+
+namespace gsym {
+class FileWriter;
+
+constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM'
+constexpr uint32_t GSYM_CIGAM = 0x4d595347; // 'MYSG'
+constexpr uint32_t GSYM_VERSION = 1;
+constexpr size_t GSYM_MAX_UUID_SIZE = 20;
+
+/// The GSYM header.
+///
+/// The GSYM header is found at the start of a stand alone GSYM file, or as
+/// the first bytes in a section when GSYM is contained in a section of an
+/// executable file (ELF, mach-o, COFF).
+///
+/// The structure is encoded exactly as it appears in the structure definition
+/// with no gaps between members. Alignment should not change from system to
+/// system as the members were laid out so that they shouldn't align
+/// differently on different architectures.
+///
+/// When endianness of the system loading a GSYM file matches, the file can
+/// be mmap'ed in and a pointer to the header can be cast to the first bytes
+/// of the file (stand alone GSYM file) or section data (GSYM in a section).
+/// When endianness is swapped, the Header::decode() function should be used to
+/// decode the header.
+struct Header {
+ /// The magic bytes should be set to GSYM_MAGIC. This helps detect if a file
+ /// is a GSYM file by scanning the first 4 bytes of a file or section.
+ /// This value might appear byte swapped
+ uint32_t Magic;
+ /// The version can number determines how the header is decoded and how each
+ /// InfoType in FunctionInfo is encoded/decoded. As version numbers increase,
+ /// "Magic" and "Version" members should always appear at offset zero and 4
+ /// respectively to ensure clients figure out if they can parse the format.
+ uint16_t Version;
+ /// The size in bytes of each address offset in the address offsets table.
+ uint8_t AddrOffSize;
+ /// The size in bytes of the UUID encoded in the "UUID" member.
+ uint8_t UUIDSize;
+ /// The 64 bit base address that all address offsets in the address offsets
+ /// table are relative to. Storing a full 64 bit address allows our address
+ /// offsets table to be smaller on disk.
+ uint64_t BaseAddress;
+ /// The number of addresses stored in the address offsets table.
+ uint32_t NumAddresses;
+ /// The file relative offset of the start of the string table for strings
+ /// contained in the GSYM file. If the GSYM in contained in a stand alone
+ /// file this will be the file offset of the start of the string table. If
+ /// the GSYM is contained in a section within an executable file, this can
+ /// be the offset of the first string used in the GSYM file and can possibly
+ /// span one or more executable string tables. This allows the strings to
+ /// share string tables in an ELF or mach-o file.
+ uint32_t StrtabOffset;
+ /// The size in bytes of the string table. For a stand alone GSYM file, this
+ /// will be the exact size in bytes of the string table. When the GSYM data
+ /// is in a section within an executable file, this size can span one or more
+ /// sections that contains strings. This allows any strings that are already
+ /// stored in the executable file to be re-used, and any extra strings could
+ /// be added to another string table and the string table offset and size
+ /// can be set to span all needed string tables.
+ uint32_t StrtabSize;
+ /// The UUID of the original executable file. This is stored to allow
+ /// matching a GSYM file to an executable file when symbolication is
+ /// required. Only the first "UUIDSize" bytes of the UUID are valid. Any
+ /// bytes in the UUID value that appear after the first UUIDSize bytes should
+ /// be set to zero.
+ uint8_t UUID[GSYM_MAX_UUID_SIZE];
+
+ /// Check if a header is valid and return an error if anything is wrong.
+ ///
+ /// This function can be used prior to encoding a header to ensure it is
+ /// valid, or after decoding a header to ensure it is valid and supported.
+ ///
+ /// Check a correctly byte swapped header for errors:
+ /// - check magic value
+ /// - check that version number is supported
+ /// - check that the address offset size is supported
+ /// - check that the UUID size is valid
+ ///
+ /// \returns An error if anything is wrong in the header, or Error::success()
+ /// if there are no errors.
+ llvm::Error checkForError() const;
+
+ /// Decode an object from a binary data stream.
+ ///
+ /// \param Data The binary stream to read the data from. This object must
+ /// have the data for the object starting at offset zero. The data
+ /// can contain more data than needed.
+ ///
+ /// \returns A Header or an error describing the issue that was
+ /// encountered during decoding.
+ static llvm::Expected<Header> decode(DataExtractor &Data);
+
+ /// Encode this object into FileWriter stream.
+ ///
+ /// \param O The binary stream to write the data to at the current file
+ /// position.
+ ///
+ /// \returns An error object that indicates success or failure of the
+ /// encoding process.
+ llvm::Error encode(FileWriter &O) const;
+};
+
+bool operator==(const Header &LHS, const Header &RHS);
+raw_ostream &operator<<(raw_ostream &OS, const llvm::gsym::Header &H);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_HEADER_H
diff --git a/include/llvm/DebugInfo/GSYM/InlineInfo.h b/include/llvm/DebugInfo/GSYM/InlineInfo.h
index 222430622932..48fd9a7c1308 100644
--- a/include/llvm/DebugInfo/GSYM/InlineInfo.h
+++ b/include/llvm/DebugInfo/GSYM/InlineInfo.h
@@ -1,9 +1,8 @@
//===- InlineInfo.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -12,6 +11,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/Support/Error.h"
#include <stdint.h>
#include <vector>
@@ -31,6 +31,30 @@ namespace gsym {
/// Any clients that encode information will need to ensure the ranges are
/// all contined correctly or lookups could fail. Add ranges in these objects
/// must be contained in the top level FunctionInfo address ranges as well.
+///
+/// ENCODING
+///
+/// When saved to disk, the inline info encodes all ranges to be relative to
+/// a parent address range. This will be the FunctionInfo's start address if
+/// the InlineInfo is directly contained in a FunctionInfo, or a the start
+/// address of the containing parent InlineInfo's first "Ranges" member. This
+/// allows address ranges to be efficiently encoded using ULEB128 encodings as
+/// we encode the offset and size of each range instead of full addresses. This
+/// also makes any encoded addresses easy to relocate as we just need to
+/// relocate the FunctionInfo's start address.
+///
+/// - The AddressRanges member "Ranges" is encoded using an approriate base
+/// address as described above.
+/// - UINT8 boolean value that specifies if the InlineInfo object has children.
+/// - UINT32 string table offset that points to the name of the inline
+/// function.
+/// - ULEB128 integer that specifies the file of the call site that called
+/// this function.
+/// - ULEB128 integer that specifies the source line of the call site that
+/// called this function.
+/// - if this object has children, enocode each child InlineInfo using the
+/// the first address range's start address as the base address.
+///
struct InlineInfo {
uint32_t Name; ///< String table offset in the string table.
@@ -62,6 +86,37 @@ struct InlineInfo {
/// \returns optional vector of InlineInfo objects that describe the
/// inline call stack for a given address, false otherwise.
llvm::Optional<InlineArray> getInlineStack(uint64_t Addr) const;
+
+ /// Decode an InlineInfo object from a binary data stream.
+ ///
+ /// \param Data The binary stream to read the data from. This object must
+ /// have the data for the InlineInfo object starting at offset zero. The data
+ /// can contain more data than needed.
+ ///
+ /// \param BaseAddr The base address to use when decoding all address ranges.
+ /// This will be the FunctionInfo's start address if this object is directly
+ /// contained in a FunctionInfo object, or the start address of the first
+ /// address range in an InlineInfo object of this object is a child of
+ /// another InlineInfo object.
+ /// \returns An InlineInfo or an error describing the issue that was
+ /// encountered during decoding.
+ static llvm::Expected<InlineInfo> decode(DataExtractor &Data,
+ uint64_t BaseAddr);
+
+ /// Encode this InlineInfo object into FileWriter stream.
+ ///
+ /// \param O The binary stream to write the data to at the current file
+ /// position.
+ ///
+ /// \param BaseAddr The base address to use when encoding all address ranges.
+ /// This will be the FunctionInfo's start address if this object is directly
+ /// contained in a FunctionInfo object, or the start address of the first
+ /// address range in an InlineInfo object of this object is a child of
+ /// another InlineInfo object.
+ ///
+ /// \returns An error object that indicates success or failure or the
+ /// encoding process.
+ llvm::Error encode(FileWriter &O, uint64_t BaseAddr) const;
};
inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) {
diff --git a/include/llvm/DebugInfo/GSYM/LineEntry.h b/include/llvm/DebugInfo/GSYM/LineEntry.h
index 6b9380940bd3..aac7c48e067e 100644
--- a/include/llvm/DebugInfo/GSYM/LineEntry.h
+++ b/include/llvm/DebugInfo/GSYM/LineEntry.h
@@ -1,9 +1,8 @@
//===- LineEntry.h ----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/GSYM/LineTable.h b/include/llvm/DebugInfo/GSYM/LineTable.h
new file mode 100644
index 000000000000..3cdbccb08ced
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/LineTable.h
@@ -0,0 +1,198 @@
+//===- LineTable.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H
+#define LLVM_DEBUGINFO_GSYM_LINETABLE_H
+
+#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+namespace gsym {
+
+struct FunctionInfo;
+class FileWriter;
+
+/// LineTable class contains deserialized versions of line tables for each
+/// function's address ranges.
+///
+/// When saved to disk, the line table is encoded using a modified version of
+/// the DWARF line tables that only tracks address to source file and line.
+///
+/// ENCODING
+///
+/// The line table starts with a small prolog that contains the following
+/// values:
+///
+/// ENCODING NAME DESCRIPTION
+/// ======== =========== ====================================================
+/// SLEB MinDelta The min line delta for special opcodes that advance
+/// the address and line number.
+/// SLEB MaxDelta The max line delta for single byte opcodes that
+/// advance the address and line number.
+/// ULEB FirstLine The value of the first source line number to
+/// initialize the LineEntry with.
+///
+/// Once these prolog items are read, we initialize a LineEntry struct with
+/// the start address of the function from the FunctionInfo's address range,
+/// a default file index of 1, and the line number set to "FirstLine" from
+/// the prolog above:
+///
+/// LineEntry Row(BaseAddr, 1, FirstLine);
+///
+/// The line table state machine is now initialized and ready to be parsed.
+/// The stream that follows this encodes the line entries in a compact
+/// form. Some opcodes cause "Row" to be modified and some opcodes may also
+/// push "Row" onto the end of the "LineTable.Lines" vector. The end result
+/// is a vector of LineEntry structs that is sorted in ascending address
+/// order.
+///
+/// NORMAL OPCODES
+///
+/// The opcodes 0 through 3 are normal in opcodes. Their encoding and
+/// descriptions are listed below:
+///
+/// ENCODING ENUMERATION VALUE DESCRIPTION
+/// ======== ================ ===== ========================================
+/// LTOC_EndSequence 0x00 Parsing is done.
+/// ULEB LTOC_SetFile 0x01 Row.File = ULEB
+/// ULEB LTOC_AdvancePC 0x02 Row.Addr += ULEB, push "Row".
+/// SLEB LTOC_AdvanceLine 0x03 Row.Line += SLEB
+/// LTOC_FirstSpecial 0x04 First special opcode (see SPECIAL
+/// OPCODES below).
+///
+/// SPECIAL OPCODES
+///
+/// Opcodes LTOC_FirstSpecial through 255 are special opcodes that always
+/// increment both the Row.Addr and Row.Line and push "Row" onto the
+/// LineEntry.Lines array. They do this by using some of the bits to
+/// increment/decrement the source line number, and some of the bits to
+/// increment the address. Line numbers can go up or down when making line
+/// tables, where addresses always only increase since line tables are sorted
+/// by address.
+///
+/// In order to calculate the amount to increment the line and address for
+/// these special opcodes, we calculate the number of values reserved for the
+/// line increment/decrement using the "MinDelta" and "MaxDelta" from the
+/// prolog:
+///
+/// const int64_t LineRange = MaxDelta - MinDelta + 1;
+///
+/// Then we can adjust the opcode to not include any of the normal opcodes:
+///
+/// const uint8_t AdjustedOp = Opcode - LTOC_FirstSpecial;
+///
+/// And we can calculate the line offset, and address offset:
+///
+/// const int64_t LineDelta = MinDelta + (AdjustedOp % LineRange);
+/// const uint64_t AddrDelta = (AdjustedOp / LineRange);
+///
+/// And use these to modify our "Row":
+///
+/// Row.Line += LineDelta;
+/// Row.Addr += AddrDelta;
+///
+/// And push a row onto the line table:
+///
+/// Lines.push_back(Row);
+///
+/// This is verify similar to the way that DWARF encodes its line tables. The
+/// only difference is the DWARF line tables have more normal opcodes and the
+/// "Row" contains more members, like source column number, bools for end of
+/// prologue, beginnging of epilogue, is statement and many others. There are
+/// also more complex rules that happen for the extra normal opcodes. By
+/// leaving these extra opcodes out, we leave more bits for the special
+/// opcodes that allows us to encode line tables in fewer bytes than standard
+/// DWARF encodings.
+///
+/// Opcodes that will push "Row" onto the LineEntry.Lines include the
+/// LTOC_AdvancePC opcode and all special opcodes. All other opcodes
+/// only modify the current "Row", or cause the line table to end.
+class LineTable {
+ typedef std::vector<gsym::LineEntry> Collection;
+ Collection Lines; ///< All line entries in the line table.
+public:
+ static LineEntry lookup(DataExtractor &Data, uint64_t BaseAddr,
+ uint64_t Addr);
+
+ /// Decode an LineTable object from a binary data stream.
+ ///
+ /// \param Data The binary stream to read the data from. This object must
+ /// have the data for the LineTable object starting at offset zero. The data
+ /// can contain more data than needed.
+ ///
+ /// \param BaseAddr The base address to use when decoding the line table.
+ /// This will be the FunctionInfo's start address and will be used to
+ /// initialize the line table row prior to parsing any opcodes.
+ ///
+ /// \returns An LineTable or an error describing the issue that was
+ /// encountered during decoding.
+ static llvm::Expected<LineTable> decode(DataExtractor &Data,
+ uint64_t BaseAddr);
+ /// Encode this LineTable object into FileWriter stream.
+ ///
+ /// \param O The binary stream to write the data to at the current file
+ /// position.
+ ///
+ /// \param BaseAddr The base address to use when decoding the line table.
+ /// This will be the FunctionInfo's start address.
+ ///
+ /// \returns An error object that indicates success or failure or the
+ /// encoding process.
+ llvm::Error encode(FileWriter &O, uint64_t BaseAddr) const;
+ bool empty() const { return Lines.empty(); }
+ void clear() { Lines.clear(); }
+ void push(const LineEntry &LE) {
+ Lines.push_back(LE);
+ }
+ size_t isValid() const {
+ return !Lines.empty();
+ }
+ size_t size() const {
+ return Lines.size();
+ }
+ LineEntry &get(size_t i) {
+ assert(i < Lines.size());
+ return Lines[i];
+ }
+ const LineEntry &get(size_t i) const {
+ assert(i < Lines.size());
+ return Lines[i];
+ }
+ LineEntry &operator[](size_t i) {
+ return get(i);
+ }
+ const LineEntry &operator[](size_t i) const {
+ return get(i);
+ }
+ bool operator==(const LineTable &RHS) const {
+ return Lines == RHS.Lines;
+ }
+ bool operator!=(const LineTable &RHS) const {
+ return Lines != RHS.Lines;
+ }
+ bool operator<(const LineTable &RHS) const {
+ const auto LHSSize = Lines.size();
+ const auto RHSSize = RHS.Lines.size();
+ if (LHSSize == RHSSize)
+ return Lines < RHS.Lines;
+ return LHSSize < RHSSize;
+ }
+ Collection::const_iterator begin() const { return Lines.begin(); }
+ Collection::const_iterator end() const { return Lines.end(); }
+
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const gsym::LineTable &LT);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H
diff --git a/include/llvm/DebugInfo/GSYM/Range.h b/include/llvm/DebugInfo/GSYM/Range.h
index 772ff244c5b7..37cfec713f26 100644
--- a/include/llvm/DebugInfo/GSYM/Range.h
+++ b/include/llvm/DebugInfo/GSYM/Range.h
@@ -1,9 +1,8 @@
-//===- AddressRange.h -------------------------------------------*- C++ -*-===//
+//===- Range.h --------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -21,10 +20,13 @@
#define HEX64(v) llvm::format_hex(v, 18)
namespace llvm {
+class DataExtractor;
class raw_ostream;
namespace gsym {
+class FileWriter;
+
/// A class that represents an address range. The range is specified using
/// a start and an end address.
struct AddressRange {
@@ -47,6 +49,18 @@ struct AddressRange {
bool operator<(const AddressRange &R) const {
return std::make_pair(Start, End) < std::make_pair(R.Start, R.End);
}
+ /// AddressRange objects are encoded and decoded to be relative to a base
+ /// address. This will be the FunctionInfo's start address if the AddressRange
+ /// is directly contained in a FunctionInfo, or a base address of the
+ /// containing parent AddressRange or AddressRanges. This allows address
+ /// ranges to be efficiently encoded using ULEB128 encodings as we encode the
+ /// offset and size of each range instead of full addresses. This also makes
+ /// encoded addresses easy to relocate as we just need to relocate one base
+ /// address.
+ /// @{
+ void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
+ void encode(FileWriter &O, uint64_t BaseAddr) const;
+ /// @}
};
raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R);
@@ -66,6 +80,7 @@ public:
void clear() { Ranges.clear(); }
bool empty() const { return Ranges.empty(); }
bool contains(uint64_t Addr) const;
+ bool contains(AddressRange Range) const;
void insert(AddressRange Range);
size_t size() const { return Ranges.size(); }
bool operator==(const AddressRanges &RHS) const {
@@ -77,6 +92,14 @@ public:
}
Collection::const_iterator begin() const { return Ranges.begin(); }
Collection::const_iterator end() const { return Ranges.end(); }
+
+ /// Address ranges are decoded and encoded to be relative to a base address.
+ /// See the AddressRange comment for the encode and decode methods for full
+ /// details.
+ /// @{
+ void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
+ void encode(FileWriter &O, uint64_t BaseAddr) const;
+ /// @}
};
raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR);
diff --git a/include/llvm/DebugInfo/GSYM/StringTable.h b/include/llvm/DebugInfo/GSYM/StringTable.h
index 0001b8b82743..a96ae5899da3 100644
--- a/include/llvm/DebugInfo/GSYM/StringTable.h
+++ b/include/llvm/DebugInfo/GSYM/StringTable.h
@@ -1,9 +1,8 @@
//===- StringTable.h --------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/GenericError.h b/include/llvm/DebugInfo/PDB/GenericError.h
index ec85d92d2a92..af93be931b8e 100644
--- a/include/llvm/DebugInfo/PDB/GenericError.h
+++ b/include/llvm/DebugInfo/PDB/GenericError.h
@@ -20,7 +20,7 @@ enum class pdb_error_code {
dia_sdk_not_present,
dia_failed_loading,
signature_out_of_date,
- external_cmdline_ref,
+ no_matching_pch,
unspecified,
};
} // namespace pdb
diff --git a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
index 0b15ab474f71..4adf3b394c2e 100644
--- a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
+++ b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
@@ -87,7 +87,7 @@ public:
// Initial construction must not access the cache, since it must be done
// atomically.
- auto Result = llvm::make_unique<ConcreteSymbolT>(
+ auto Result = std::make_unique<ConcreteSymbolT>(
Session, Id, std::forward<Args>(ConstructorArgs)...);
Result->SymbolId = Id;
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbol.h b/include/llvm/DebugInfo/PDB/PDBSymbol.h
index d9004a8894d9..0d95a2467556 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbol.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbol.h
@@ -131,7 +131,7 @@ public:
auto BaseIter = RawSymbol->findChildren(T::Tag);
if (!BaseIter)
return nullptr;
- return llvm::make_unique<ConcreteSymbolEnumerator<T>>(std::move(BaseIter));
+ return std::make_unique<ConcreteSymbolEnumerator<T>>(std::move(BaseIter));
}
std::unique_ptr<IPDBEnumSymbols> findAllChildren(PDB_SymType Type) const;
std::unique_ptr<IPDBEnumSymbols> findAllChildren() const;
diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h
index d3da28ca0b7b..11599fc1797d 100644
--- a/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -39,6 +39,7 @@ public:
bool UseSymbolTable = true;
bool Demangle = true;
bool RelativeAddresses = false;
+ bool UntagAddresses = false;
std::string DefaultArch;
std::vector<std::string> DsymHints;
std::string FallbackDebugPath;
diff --git a/include/llvm/Demangle/Demangle.h b/include/llvm/Demangle/Demangle.h
index 6fea7ef13f11..7b85b9a9ccf7 100644
--- a/include/llvm/Demangle/Demangle.h
+++ b/include/llvm/Demangle/Demangle.h
@@ -32,7 +32,14 @@ char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n,
int *status);
-enum MSDemangleFlags { MSDF_None = 0, MSDF_DumpBackrefs = 1 << 0 };
+enum MSDemangleFlags {
+ MSDF_None = 0,
+ MSDF_DumpBackrefs = 1 << 0,
+ MSDF_NoAccessSpecifier = 1 << 1,
+ MSDF_NoCallingConvention = 1 << 2,
+ MSDF_NoReturnType = 1 << 3,
+ MSDF_NoMemberType = 1 << 4,
+};
char *microsoftDemangle(const char *mangled_name, char *buf, size_t *n,
int *status, MSDemangleFlags Flags = MSDF_None);
diff --git a/include/llvm/Demangle/DemangleConfig.h b/include/llvm/Demangle/DemangleConfig.h
index 73f89d357c85..b7b7dbd24c7f 100644
--- a/include/llvm/Demangle/DemangleConfig.h
+++ b/include/llvm/Demangle/DemangleConfig.h
@@ -15,13 +15,6 @@
#ifndef LLVM_DEMANGLE_COMPILER_H
#define LLVM_DEMANGLE_COMPILER_H
-#ifdef _MSC_VER
-// snprintf is implemented in VS 2015
-#if _MSC_VER < 1900
-#define snprintf _snprintf_s
-#endif
-#endif
-
#ifndef __has_feature
#define __has_feature(x) 0
#endif
diff --git a/include/llvm/Demangle/ItaniumDemangle.h b/include/llvm/Demangle/ItaniumDemangle.h
index aaccb27e17a3..7784e842bfeb 100644
--- a/include/llvm/Demangle/ItaniumDemangle.h
+++ b/include/llvm/Demangle/ItaniumDemangle.h
@@ -57,6 +57,11 @@
X(LocalName) \
X(VectorType) \
X(PixelVectorType) \
+ X(SyntheticTemplateParamName) \
+ X(TypeTemplateParamDecl) \
+ X(NonTypeTemplateParamDecl) \
+ X(TemplateTemplateParamDecl) \
+ X(TemplateParamPackDecl) \
X(ParameterPack) \
X(TemplateArgumentPack) \
X(ParameterPackExpansion) \
@@ -91,6 +96,8 @@
X(ThrowExpr) \
X(UUIDOfExpr) \
X(BoolExpr) \
+ X(StringLiteral) \
+ X(LambdaExpr) \
X(IntegerCastExpr) \
X(IntegerLiteral) \
X(FloatLiteral) \
@@ -303,7 +310,7 @@ inline Qualifiers operator|=(Qualifiers &Q1, Qualifiers Q2) {
return Q1 = static_cast<Qualifiers>(Q1 | Q2);
}
-class QualType : public Node {
+class QualType final : public Node {
protected:
const Qualifiers Quals;
const Node *Child;
@@ -964,6 +971,127 @@ public:
}
};
+enum class TemplateParamKind { Type, NonType, Template };
+
+/// An invented name for a template parameter for which we don't have a
+/// corresponding template argument.
+///
+/// This node is created when parsing the <lambda-sig> for a lambda with
+/// explicit template arguments, which might be referenced in the parameter
+/// types appearing later in the <lambda-sig>.
+class SyntheticTemplateParamName final : public Node {
+ TemplateParamKind Kind;
+ unsigned Index;
+
+public:
+ SyntheticTemplateParamName(TemplateParamKind Kind_, unsigned Index_)
+ : Node(KSyntheticTemplateParamName), Kind(Kind_), Index(Index_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Kind, Index); }
+
+ void printLeft(OutputStream &S) const override {
+ switch (Kind) {
+ case TemplateParamKind::Type:
+ S += "$T";
+ break;
+ case TemplateParamKind::NonType:
+ S += "$N";
+ break;
+ case TemplateParamKind::Template:
+ S += "$TT";
+ break;
+ }
+ if (Index > 0)
+ S << Index - 1;
+ }
+};
+
+/// A template type parameter declaration, 'typename T'.
+class TypeTemplateParamDecl final : public Node {
+ Node *Name;
+
+public:
+ TypeTemplateParamDecl(Node *Name_)
+ : Node(KTypeTemplateParamDecl, Cache::Yes), Name(Name_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Name); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "typename ";
+ }
+
+ void printRight(OutputStream &S) const override {
+ Name->print(S);
+ }
+};
+
+/// A non-type template parameter declaration, 'int N'.
+class NonTypeTemplateParamDecl final : public Node {
+ Node *Name;
+ Node *Type;
+
+public:
+ NonTypeTemplateParamDecl(Node *Name_, Node *Type_)
+ : Node(KNonTypeTemplateParamDecl, Cache::Yes), Name(Name_), Type(Type_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Name, Type); }
+
+ void printLeft(OutputStream &S) const override {
+ Type->printLeft(S);
+ if (!Type->hasRHSComponent(S))
+ S += " ";
+ }
+
+ void printRight(OutputStream &S) const override {
+ Name->print(S);
+ Type->printRight(S);
+ }
+};
+
+/// A template template parameter declaration,
+/// 'template<typename T> typename N'.
+class TemplateTemplateParamDecl final : public Node {
+ Node *Name;
+ NodeArray Params;
+
+public:
+ TemplateTemplateParamDecl(Node *Name_, NodeArray Params_)
+ : Node(KTemplateTemplateParamDecl, Cache::Yes), Name(Name_),
+ Params(Params_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Name, Params); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "template<";
+ Params.printWithComma(S);
+ S += "> typename ";
+ }
+
+ void printRight(OutputStream &S) const override {
+ Name->print(S);
+ }
+};
+
+/// A template parameter pack declaration, 'typename ...T'.
+class TemplateParamPackDecl final : public Node {
+ Node *Param;
+
+public:
+ TemplateParamPackDecl(Node *Param_)
+ : Node(KTemplateParamPackDecl, Cache::Yes), Param(Param_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Param); }
+
+ void printLeft(OutputStream &S) const override {
+ Param->printLeft(S);
+ S += "...";
+ }
+
+ void printRight(OutputStream &S) const override {
+ Param->printRight(S);
+ }
+};
+
/// An unexpanded parameter pack (either in the expression or type context). If
/// this AST is correct, this node will have a ParameterPackExpansion node above
/// it.
@@ -1410,21 +1538,36 @@ public:
};
class ClosureTypeName : public Node {
+ NodeArray TemplateParams;
NodeArray Params;
StringView Count;
public:
- ClosureTypeName(NodeArray Params_, StringView Count_)
- : Node(KClosureTypeName), Params(Params_), Count(Count_) {}
+ ClosureTypeName(NodeArray TemplateParams_, NodeArray Params_,
+ StringView Count_)
+ : Node(KClosureTypeName), TemplateParams(TemplateParams_),
+ Params(Params_), Count(Count_) {}
+
+ template<typename Fn> void match(Fn F) const {
+ F(TemplateParams, Params, Count);
+ }
- template<typename Fn> void match(Fn F) const { F(Params, Count); }
+ void printDeclarator(OutputStream &S) const {
+ if (!TemplateParams.empty()) {
+ S += "<";
+ TemplateParams.printWithComma(S);
+ S += ">";
+ }
+ S += "(";
+ Params.printWithComma(S);
+ S += ")";
+ }
void printLeft(OutputStream &S) const override {
S += "\'lambda";
S += Count;
- S += "\'(";
- Params.printWithComma(S);
- S += ")";
+ S += "\'";
+ printDeclarator(S);
}
};
@@ -1902,6 +2045,37 @@ public:
}
};
+class StringLiteral : public Node {
+ const Node *Type;
+
+public:
+ StringLiteral(const Node *Type_) : Node(KStringLiteral), Type(Type_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Type); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "\"<";
+ Type->print(S);
+ S += ">\"";
+ }
+};
+
+class LambdaExpr : public Node {
+ const Node *Type;
+
+public:
+ LambdaExpr(const Node *Type_) : Node(KLambdaExpr), Type(Type_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Type); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "[]";
+ if (Type->getKind() == KClosureTypeName)
+ static_cast<const ClosureTypeName *>(Type)->printDeclarator(S);
+ S += "{...}";
+ }
+};
+
class IntegerCastExpr : public Node {
// ty(integer)
const Node *Ty;
@@ -2167,10 +2341,36 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
// table.
PODSmallVector<Node *, 32> Subs;
+ using TemplateParamList = PODSmallVector<Node *, 8>;
+
+ class ScopedTemplateParamList {
+ AbstractManglingParser *Parser;
+ size_t OldNumTemplateParamLists;
+ TemplateParamList Params;
+
+ public:
+ ScopedTemplateParamList(AbstractManglingParser *Parser)
+ : Parser(Parser),
+ OldNumTemplateParamLists(Parser->TemplateParams.size()) {
+ Parser->TemplateParams.push_back(&Params);
+ }
+ ~ScopedTemplateParamList() {
+ assert(Parser->TemplateParams.size() >= OldNumTemplateParamLists);
+ Parser->TemplateParams.dropBack(OldNumTemplateParamLists);
+ }
+ };
+
// Template parameter table. Like the above, but referenced like "T42_".
// This has a smaller size compared to Subs and Names because it can be
// stored on the stack.
- PODSmallVector<Node *, 8> TemplateParams;
+ TemplateParamList OuterTemplateParams;
+
+ // Lists of template parameters indexed by template parameter depth,
+ // referenced like "TL2_4_". If nonempty, element 0 is always
+ // OuterTemplateParams; inner elements are always template parameter lists of
+ // lambda expressions. For a generic lambda with no explicit template
+ // parameter list, the corresponding parameter list pointer will be null.
+ PODSmallVector<TemplateParamList *, 4> TemplateParams;
// Set of unresolved forward <template-param> references. These can occur in a
// conversion operator's type, and are resolved in the enclosing <encoding>.
@@ -2178,7 +2378,9 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
bool TryToParseTemplateArgs = true;
bool PermitForwardTemplateReferences = false;
- bool ParsingLambdaParams = false;
+ size_t ParsingLambdaParamsAtLevel = (size_t)-1;
+
+ unsigned NumSyntheticTemplateParameters[3] = {};
Alloc ASTAllocator;
@@ -2193,9 +2395,11 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
Names.clear();
Subs.clear();
TemplateParams.clear();
- ParsingLambdaParams = false;
+ ParsingLambdaParamsAtLevel = (size_t)-1;
TryToParseTemplateArgs = true;
PermitForwardTemplateReferences = false;
+ for (int I = 0; I != 3; ++I)
+ NumSyntheticTemplateParameters[I] = 0;
ASTAllocator.reset();
}
@@ -2253,6 +2457,7 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
bool parseSeqId(size_t *Out);
Node *parseSubstitution();
Node *parseTemplateParam();
+ Node *parseTemplateParamDecl();
Node *parseTemplateArgs(bool TagTemplates = false);
Node *parseTemplateArg();
@@ -2301,9 +2506,10 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
size_t E = ForwardTemplateRefs.size();
for (; I < E; ++I) {
size_t Idx = ForwardTemplateRefs[I]->Index;
- if (Idx >= TemplateParams.size())
+ if (TemplateParams.empty() || !TemplateParams[0] ||
+ Idx >= TemplateParams[0]->size())
return true;
- ForwardTemplateRefs[I]->Ref = TemplateParams[Idx];
+ ForwardTemplateRefs[I]->Ref = (*TemplateParams[0])[Idx];
}
ForwardTemplateRefs.dropBack(State.ForwardTemplateRefsBegin);
return false;
@@ -2470,7 +2676,12 @@ AbstractManglingParser<Derived, Alloc>::parseUnqualifiedName(NameState *State) {
// <lambda-sig> ::= <parameter type>+ # Parameter types or "v" if the lambda has no parameters
template <typename Derived, typename Alloc>
Node *
-AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *) {
+AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *State) {
+ // <template-params> refer to the innermost <template-args>. Clear out any
+ // outer args that we may have inserted into TemplateParams.
+ if (State != nullptr)
+ TemplateParams.clear();
+
if (consumeIf("Ut")) {
StringView Count = parseNumber();
if (!consumeIf('_'))
@@ -2478,22 +2689,59 @@ AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *) {
return make<UnnamedTypeName>(Count);
}
if (consumeIf("Ul")) {
- NodeArray Params;
- SwapAndRestore<bool> SwapParams(ParsingLambdaParams, true);
+ SwapAndRestore<size_t> SwapParams(ParsingLambdaParamsAtLevel,
+ TemplateParams.size());
+ ScopedTemplateParamList LambdaTemplateParams(this);
+
+ size_t ParamsBegin = Names.size();
+ while (look() == 'T' &&
+ StringView("yptn").find(look(1)) != StringView::npos) {
+ Node *T = parseTemplateParamDecl();
+ if (!T)
+ return nullptr;
+ Names.push_back(T);
+ }
+ NodeArray TempParams = popTrailingNodeArray(ParamsBegin);
+
+ // FIXME: If TempParams is empty and none of the function parameters
+ // includes 'auto', we should remove LambdaTemplateParams from the
+ // TemplateParams list. Unfortunately, we don't find out whether there are
+ // any 'auto' parameters until too late in an example such as:
+ //
+ // template<typename T> void f(
+ // decltype([](decltype([]<typename T>(T v) {}),
+ // auto) {})) {}
+ // template<typename T> void f(
+ // decltype([](decltype([]<typename T>(T w) {}),
+ // int) {})) {}
+ //
+ // Here, the type of v is at level 2 but the type of w is at level 1. We
+ // don't find this out until we encounter the type of the next parameter.
+ //
+ // However, compilers can't actually cope with the former example in
+ // practice, and it's likely to be made ill-formed in future, so we don't
+ // need to support it here.
+ //
+ // If we encounter an 'auto' in the function parameter types, we will
+ // recreate a template parameter scope for it, but any intervening lambdas
+ // will be parsed in the 'wrong' template parameter depth.
+ if (TempParams.empty())
+ TemplateParams.pop_back();
+
if (!consumeIf("vE")) {
- size_t ParamsBegin = Names.size();
do {
Node *P = getDerived().parseType();
if (P == nullptr)
return nullptr;
Names.push_back(P);
} while (!consumeIf('E'));
- Params = popTrailingNodeArray(ParamsBegin);
}
+ NodeArray Params = popTrailingNodeArray(ParamsBegin);
+
StringView Count = parseNumber();
if (!consumeIf('_'))
return nullptr;
- return make<ClosureTypeName>(Params, Count);
+ return make<ClosureTypeName>(TempParams, Params, Count);
}
if (consumeIf("Ub")) {
(void)parseNumber();
@@ -3949,6 +4197,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseConversionExpr() {
// ::= L <type> <value float> E # floating literal
// ::= L <string type> E # string literal
// ::= L <nullptr type> E # nullptr literal (i.e., "LDnE")
+// ::= L <lambda type> E # lambda expression
// FIXME: ::= L <type> <real-part float> _ <imag-part float> E # complex floating point literal (C 2000)
// ::= L <mangled-name> E # external name
template <typename Derived, typename Alloc>
@@ -4020,24 +4269,43 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExprPrimary() {
return R;
}
return nullptr;
+ case 'A': {
+ Node *T = getDerived().parseType();
+ if (T == nullptr)
+ return nullptr;
+ // FIXME: We need to include the string contents in the mangling.
+ if (consumeIf('E'))
+ return make<StringLiteral>(T);
+ return nullptr;
+ }
+ case 'D':
+ if (consumeIf("DnE"))
+ return make<NameType>("nullptr");
+ return nullptr;
case 'T':
// Invalid mangled name per
// http://sourcerytools.com/pipermail/cxx-abi-dev/2011-August/002422.html
return nullptr;
+ case 'U': {
+ // FIXME: Should we support LUb... for block literals?
+ if (look(1) != 'l')
+ return nullptr;
+ Node *T = parseUnnamedTypeName(nullptr);
+ if (!T || !consumeIf('E'))
+ return nullptr;
+ return make<LambdaExpr>(T);
+ }
default: {
// might be named type
Node *T = getDerived().parseType();
if (T == nullptr)
return nullptr;
StringView N = parseNumber();
- if (!N.empty()) {
- if (!consumeIf('E'))
- return nullptr;
- return make<IntegerCastExpr>(T, N);
- }
- if (consumeIf('E'))
- return T;
- return nullptr;
+ if (N.empty())
+ return nullptr;
+ if (!consumeIf('E'))
+ return nullptr;
+ return make<IntegerCastExpr>(T, N);
}
}
}
@@ -5062,11 +5330,22 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSubstitution() {
// <template-param> ::= T_ # first template parameter
// ::= T <parameter-2 non-negative number> _
+// ::= TL <level-1> __
+// ::= TL <level-1> _ <parameter-2 non-negative number> _
template <typename Derived, typename Alloc>
Node *AbstractManglingParser<Derived, Alloc>::parseTemplateParam() {
if (!consumeIf('T'))
return nullptr;
+ size_t Level = 0;
+ if (consumeIf('L')) {
+ if (parsePositiveInteger(&Level))
+ return nullptr;
+ ++Level;
+ if (!consumeIf('_'))
+ return nullptr;
+ }
+
size_t Index = 0;
if (!consumeIf('_')) {
if (parsePositiveInteger(&Index))
@@ -5076,15 +5355,11 @@ Node *AbstractManglingParser<Derived, Alloc>::parseTemplateParam() {
return nullptr;
}
- // Itanium ABI 5.1.8: In a generic lambda, uses of auto in the parameter list
- // are mangled as the corresponding artificial template type parameter.
- if (ParsingLambdaParams)
- return make<NameType>("auto");
-
// If we're in a context where this <template-param> refers to a
// <template-arg> further ahead in the mangled name (currently just conversion
// operator types), then we should only look it up in the right context.
- if (PermitForwardTemplateReferences) {
+ // This can only happen at the outermost level.
+ if (PermitForwardTemplateReferences && Level == 0) {
Node *ForwardRef = make<ForwardTemplateReference>(Index);
if (!ForwardRef)
return nullptr;
@@ -5094,9 +5369,78 @@ Node *AbstractManglingParser<Derived, Alloc>::parseTemplateParam() {
return ForwardRef;
}
- if (Index >= TemplateParams.size())
+ if (Level >= TemplateParams.size() || !TemplateParams[Level] ||
+ Index >= TemplateParams[Level]->size()) {
+ // Itanium ABI 5.1.8: In a generic lambda, uses of auto in the parameter
+ // list are mangled as the corresponding artificial template type parameter.
+ if (ParsingLambdaParamsAtLevel == Level && Level <= TemplateParams.size()) {
+ // This will be popped by the ScopedTemplateParamList in
+ // parseUnnamedTypeName.
+ if (Level == TemplateParams.size())
+ TemplateParams.push_back(nullptr);
+ return make<NameType>("auto");
+ }
+
return nullptr;
- return TemplateParams[Index];
+ }
+
+ return (*TemplateParams[Level])[Index];
+}
+
+// <template-param-decl> ::= Ty # type parameter
+// ::= Tn <type> # non-type parameter
+// ::= Tt <template-param-decl>* E # template parameter
+// ::= Tp <template-param-decl> # parameter pack
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseTemplateParamDecl() {
+ auto InventTemplateParamName = [&](TemplateParamKind Kind) {
+ unsigned Index = NumSyntheticTemplateParameters[(int)Kind]++;
+ Node *N = make<SyntheticTemplateParamName>(Kind, Index);
+ if (N) TemplateParams.back()->push_back(N);
+ return N;
+ };
+
+ if (consumeIf("Ty")) {
+ Node *Name = InventTemplateParamName(TemplateParamKind::Type);
+ if (!Name)
+ return nullptr;
+ return make<TypeTemplateParamDecl>(Name);
+ }
+
+ if (consumeIf("Tn")) {
+ Node *Name = InventTemplateParamName(TemplateParamKind::NonType);
+ if (!Name)
+ return nullptr;
+ Node *Type = parseType();
+ if (!Type)
+ return nullptr;
+ return make<NonTypeTemplateParamDecl>(Name, Type);
+ }
+
+ if (consumeIf("Tt")) {
+ Node *Name = InventTemplateParamName(TemplateParamKind::Template);
+ if (!Name)
+ return nullptr;
+ size_t ParamsBegin = Names.size();
+ ScopedTemplateParamList TemplateTemplateParamParams(this);
+ while (!consumeIf("E")) {
+ Node *P = parseTemplateParamDecl();
+ if (!P)
+ return nullptr;
+ Names.push_back(P);
+ }
+ NodeArray Params = popTrailingNodeArray(ParamsBegin);
+ return make<TemplateTemplateParamDecl>(Name, Params);
+ }
+
+ if (consumeIf("Tp")) {
+ Node *P = parseTemplateParamDecl();
+ if (!P)
+ return nullptr;
+ return make<TemplateParamPackDecl>(P);
+ }
+
+ return nullptr;
}
// <template-arg> ::= <type> # type or template
@@ -5153,8 +5497,11 @@ AbstractManglingParser<Derived, Alloc>::parseTemplateArgs(bool TagTemplates) {
// <template-params> refer to the innermost <template-args>. Clear out any
// outer args that we may have inserted into TemplateParams.
- if (TagTemplates)
+ if (TagTemplates) {
TemplateParams.clear();
+ TemplateParams.push_back(&OuterTemplateParams);
+ OuterTemplateParams.clear();
+ }
size_t ArgsBegin = Names.size();
while (!consumeIf('E')) {
@@ -5172,7 +5519,7 @@ AbstractManglingParser<Derived, Alloc>::parseTemplateArgs(bool TagTemplates) {
if (!TableEntry)
return nullptr;
}
- TemplateParams.push_back(TableEntry);
+ TemplateParams.back()->push_back(TableEntry);
} else {
Node *Arg = getDerived().parseTemplateArg();
if (Arg == nullptr)
diff --git a/include/llvm/Demangle/MicrosoftDemangle.h b/include/llvm/Demangle/MicrosoftDemangle.h
index 382e79401c43..c6f26061bedd 100644
--- a/include/llvm/Demangle/MicrosoftDemangle.h
+++ b/include/llvm/Demangle/MicrosoftDemangle.h
@@ -158,6 +158,7 @@ private:
QualifiedNameNode *QN);
SymbolNode *demangleDeclarator(StringView &MangledName);
SymbolNode *demangleMD5Name(StringView &MangledName);
+ SymbolNode *demangleTypeinfoName(StringView &MangledName);
VariableSymbolNode *demangleVariableEncoding(StringView &MangledName,
StorageClass SC);
diff --git a/include/llvm/Demangle/MicrosoftDemangleNodes.h b/include/llvm/Demangle/MicrosoftDemangleNodes.h
index da9d9d5bfdc0..81b279fe237d 100644
--- a/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -16,6 +16,8 @@
#include "llvm/Demangle/DemangleConfig.h"
#include "llvm/Demangle/StringView.h"
#include <array>
+#include <cstdint>
+#include <string>
namespace llvm {
namespace itanium_demangle {
@@ -73,6 +75,9 @@ enum OutputFlags {
OF_Default = 0,
OF_NoCallingConvention = 1,
OF_NoTagSpecifier = 2,
+ OF_NoAccessSpecifier = 4,
+ OF_NoMemberType = 8,
+ OF_NoReturnType = 16,
};
// Types
@@ -301,8 +306,6 @@ struct TypeNode : public Node {
outputPost(OS, Flags);
}
- void outputQuals(bool SpaceBefore, bool SpaceAfter) const;
-
Qualifiers Quals = Q_None;
};
diff --git a/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
index 8d2f641254b3..72687682f606 100644
--- a/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
+++ b/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
@@ -22,17 +22,21 @@ namespace llvm {
namespace jitlink {
/// Registers all FDEs in the given eh-frame section with the current process.
-Error registerEHFrameSection(const void *EHFrameSectionAddr);
+Error registerEHFrameSection(const void *EHFrameSectionAddr,
+ size_t EHFrameSectionSize);
/// Deregisters all FDEs in the given eh-frame section with the current process.
-Error deregisterEHFrameSection(const void *EHFrameSectionAddr);
+Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
+ size_t EHFrameSectionSize);
/// Supports registration/deregistration of EH-frames in a target process.
class EHFrameRegistrar {
public:
virtual ~EHFrameRegistrar();
- virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr) = 0;
- virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr) = 0;
+ virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr,
+ size_t EHFrameSectionSize) = 0;
+ virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr,
+ size_t EHFrameSectionSize) = 0;
};
/// Registers / Deregisters EH-frames in the current process.
@@ -48,31 +52,38 @@ public:
InProcessEHFrameRegistrar(InProcessEHFrameRegistrar &&) = delete;
InProcessEHFrameRegistrar &operator=(InProcessEHFrameRegistrar &&) = delete;
- Error registerEHFrames(JITTargetAddress EHFrameSectionAddr) override {
+ Error registerEHFrames(JITTargetAddress EHFrameSectionAddr,
+ size_t EHFrameSectionSize) override {
return registerEHFrameSection(
- jitTargetAddressToPointer<void *>(EHFrameSectionAddr));
+ jitTargetAddressToPointer<void *>(EHFrameSectionAddr),
+ EHFrameSectionSize);
}
- Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr) override {
+ Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr,
+ size_t EHFrameSectionSize) override {
return deregisterEHFrameSection(
- jitTargetAddressToPointer<void *>(EHFrameSectionAddr));
+ jitTargetAddressToPointer<void *>(EHFrameSectionAddr),
+ EHFrameSectionSize);
}
private:
InProcessEHFrameRegistrar();
};
-using StoreFrameAddressFunction = std::function<void(JITTargetAddress)>;
+using StoreFrameRangeFunction =
+ std::function<void(JITTargetAddress EHFrameSectionAddr,
+ size_t EHFrameSectionSize)>;
-/// Creates a pass that records the address of the EH frame section. If no
-/// eh-frame section is found, it will set EHFrameAddr to zero.
+/// Creates a pass that records the address and size of the EH frame section.
+/// If no eh-frame section is found then the address and size will both be given
+/// as zero.
///
/// Authors of JITLinkContexts can use this function to register a post-fixup
-/// pass that records the address of the eh-frame section. This address can
+/// pass that records the range of the eh-frame section. This range can
/// be used after finalization to register and deregister the frame.
-AtomGraphPassFunction
+LinkGraphPassFunction
createEHFrameRecorderPass(const Triple &TT,
- StoreFrameAddressFunction StoreFrameAddress);
+ StoreFrameRangeFunction StoreFrameRange);
} // end namespace jitlink
} // end namespace llvm
diff --git a/include/llvm/ExecutionEngine/JITLink/JITLink.h b/include/llvm/ExecutionEngine/JITLink/JITLink.h
index be80d44ccf51..b531127cf892 100644
--- a/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -34,6 +34,9 @@
namespace llvm {
namespace jitlink {
+class Symbol;
+class Section;
+
/// Base class for errors originating in JIT linker, e.g. missing relocation
/// support.
class JITLinkError : public ErrorInfo<JITLinkError> {
@@ -50,27 +53,22 @@ private:
std::string ErrMsg;
};
-// Forward declare the Atom class.
-class Atom;
-
-/// Edge class. Represents both object file relocations, as well as layout and
-/// keep-alive constraints.
+/// Represents fixups and constraints in the LinkGraph.
class Edge {
public:
using Kind = uint8_t;
- using GenericEdgeKind = enum : Kind {
+ enum GenericEdgeKind : Kind {
Invalid, // Invalid edge value.
FirstKeepAlive, // Keeps target alive. Offset/addend zero.
KeepAlive = FirstKeepAlive, // Tag first edge kind that preserves liveness.
- LayoutNext, // Layout constraint. Offset/Addend zero.
FirstRelocation // First architecture specific relocation.
};
using OffsetT = uint32_t;
using AddendT = int64_t;
- Edge(Kind K, OffsetT Offset, Atom &Target, AddendT Addend)
+ Edge(Kind K, OffsetT Offset, Symbol &Target, AddendT Addend)
: Target(&Target), Offset(Offset), Addend(Addend), K(K) {}
OffsetT getOffset() const { return Offset; }
@@ -82,461 +80,637 @@ public:
return K - FirstRelocation;
}
bool isKeepAlive() const { return K >= FirstKeepAlive; }
- Atom &getTarget() const { return *Target; }
- void setTarget(Atom &Target) { this->Target = &Target; }
+ Symbol &getTarget() const { return *Target; }
+ void setTarget(Symbol &Target) { this->Target = &Target; }
AddendT getAddend() const { return Addend; }
void setAddend(AddendT Addend) { this->Addend = Addend; }
private:
- Atom *Target;
- OffsetT Offset;
- AddendT Addend;
+ Symbol *Target = nullptr;
+ OffsetT Offset = 0;
+ AddendT Addend = 0;
Kind K = 0;
};
-using EdgeVector = std::vector<Edge>;
+/// Returns the string name of the given generic edge kind, or "unknown"
+/// otherwise. Useful for debugging.
+const char *getGenericEdgeKindName(Edge::Kind K);
-const StringRef getGenericEdgeKindName(Edge::Kind K);
-
-/// Base Atom class. Used by absolute and undefined atoms.
-class Atom {
- friend class AtomGraph;
+/// Base class for Addressable entities (externals, absolutes, blocks).
+class Addressable {
+ friend class LinkGraph;
protected:
- /// Create a named (as yet unresolved) atom.
- Atom(StringRef Name)
- : Name(Name), IsDefined(false), IsLive(false), ShouldDiscard(false),
- IsGlobal(false), IsAbsolute(false), IsCallable(false),
- IsExported(false), IsWeak(false), HasLayoutNext(false),
- IsCommon(false) {}
-
- /// Create an absolute symbol atom.
- Atom(StringRef Name, JITTargetAddress Address)
- : Name(Name), Address(Address), IsDefined(true), IsLive(false),
- ShouldDiscard(false), IsGlobal(false), IsAbsolute(false),
- IsCallable(false), IsExported(false), IsWeak(false),
- HasLayoutNext(false), IsCommon(false) {}
+ Addressable(JITTargetAddress Address, bool IsDefined)
+ : Address(Address), IsDefined(IsDefined), IsAbsolute(false) {}
-public:
- /// Returns true if this atom has a name.
- bool hasName() const { return Name != StringRef(); }
+ Addressable(JITTargetAddress Address)
+ : Address(Address), IsDefined(false), IsAbsolute(true) {
+ assert(!(IsDefined && IsAbsolute) &&
+ "Block cannot be both defined and absolute");
+ }
- /// Returns the name of this atom.
- StringRef getName() const { return Name; }
+public:
+ Addressable(const Addressable &) = delete;
+ Addressable &operator=(const Addressable &) = default;
+ Addressable(Addressable &&) = delete;
+ Addressable &operator=(Addressable &&) = default;
- /// Returns the current target address of this atom.
- /// The initial target address (for atoms that have one) will be taken from
- /// the input object file's virtual address space. During the layout phase
- /// of JIT linking the atom's address will be updated to point to its final
- /// address in the JIT'd process.
JITTargetAddress getAddress() const { return Address; }
-
- /// Set the current target address of this atom.
void setAddress(JITTargetAddress Address) { this->Address = Address; }
- /// Returns true if this is a defined atom.
- bool isDefined() const { return IsDefined; }
+ /// Returns true if this is a defined addressable, in which case you
+ /// can downcast this to a .
+ bool isDefined() const { return static_cast<bool>(IsDefined); }
+ bool isAbsolute() const { return static_cast<bool>(IsAbsolute); }
- /// Returns true if this atom is marked as live.
- bool isLive() const { return IsLive; }
+private:
+ JITTargetAddress Address = 0;
+ uint64_t IsDefined : 1;
+ uint64_t IsAbsolute : 1;
+};
- /// Mark this atom as live.
- ///
- /// Note: Only defined and absolute atoms can be marked live.
- void setLive(bool IsLive) {
- assert((IsDefined || IsAbsolute || !IsLive) &&
- "Only defined and absolute atoms can be marked live");
- this->IsLive = IsLive;
- }
+using BlockOrdinal = unsigned;
+using SectionOrdinal = unsigned;
- /// Returns true if this atom should be discarded during pruning.
- bool shouldDiscard() const { return ShouldDiscard; }
+/// An Addressable with content and edges.
+class Block : public Addressable {
+ friend class LinkGraph;
- /// Mark this atom to be discarded.
- ///
- /// Note: Only defined and absolute atoms can be marked live.
- void setShouldDiscard(bool ShouldDiscard) {
- assert((IsDefined || IsAbsolute || !ShouldDiscard) &&
- "Only defined and absolute atoms can be marked live");
- this->ShouldDiscard = ShouldDiscard;
+private:
+ /// Create a zero-fill defined addressable.
+ Block(Section &Parent, BlockOrdinal Ordinal, JITTargetAddress Size,
+ JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset)
+ : Addressable(Address, true), Parent(Parent), Size(Size),
+ Ordinal(Ordinal) {
+ assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2");
+ assert(AlignmentOffset < Alignment &&
+ "Alignment offset cannot exceed alignment");
+ assert(AlignmentOffset <= MaxAlignmentOffset &&
+ "Alignment offset exceeds maximum");
+ P2Align = Alignment ? countTrailingZeros(Alignment) : 0;
+ this->AlignmentOffset = AlignmentOffset;
}
- /// Returns true if this definition is global (i.e. visible outside this
- /// linkage unit).
- ///
- /// Note: This is distict from Exported, which means visibile outside the
- /// JITDylib that this graph is being linked in to.
- bool isGlobal() const { return IsGlobal; }
+ /// Create a defined addressable for the given content.
+ Block(Section &Parent, BlockOrdinal Ordinal, StringRef Content,
+ JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset)
+ : Addressable(Address, true), Parent(Parent), Data(Content.data()),
+ Size(Content.size()), Ordinal(Ordinal) {
+ assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2");
+ assert(AlignmentOffset < Alignment &&
+ "Alignment offset cannot exceed alignment");
+ assert(AlignmentOffset <= MaxAlignmentOffset &&
+ "Alignment offset exceeds maximum");
+ P2Align = Alignment ? countTrailingZeros(Alignment) : 0;
+ this->AlignmentOffset = AlignmentOffset;
+ }
- /// Mark this atom as global.
- void setGlobal(bool IsGlobal) { this->IsGlobal = IsGlobal; }
+public:
+ using EdgeVector = std::vector<Edge>;
+ using edge_iterator = EdgeVector::iterator;
+ using const_edge_iterator = EdgeVector::const_iterator;
- /// Returns true if this atom represents an absolute symbol.
- bool isAbsolute() const { return IsAbsolute; }
+ Block(const Block &) = delete;
+ Block &operator=(const Block &) = delete;
+ Block(Block &&) = delete;
+ Block &operator=(Block &&) = delete;
- /// Returns true if this atom is known to be callable.
+ /// Return the parent section for this block.
+ Section &getSection() const { return Parent; }
+
+ /// Return the ordinal for this block.
+ BlockOrdinal getOrdinal() const { return Ordinal; }
+
+ /// Returns true if this is a zero-fill block.
///
- /// Primarily provided for easy interoperability with ORC, which uses the
- /// JITSymbolFlags::Common flag to identify symbols that can be interposed
- /// with stubs.
- bool isCallable() const { return IsCallable; }
+ /// If true, getSize is callable but getContent is not (the content is
+ /// defined to be a sequence of zero bytes of length Size).
+ bool isZeroFill() const { return !Data; }
+
+ /// Returns the size of this defined addressable.
+ size_t getSize() const { return Size; }
+
+ /// Get the content for this block. Block must not be a zero-fill block.
+ StringRef getContent() const {
+ assert(Data && "Section does not contain content");
+ return StringRef(Data, Size);
+ }
- /// Mark this atom as callable.
- void setCallable(bool IsCallable) {
- assert((IsDefined || IsAbsolute || !IsCallable) &&
- "Callable atoms must be defined or absolute");
- this->IsCallable = IsCallable;
+ /// Set the content for this block.
+ /// Caller is responsible for ensuring the underlying bytes are not
+ /// deallocated while pointed to by this block.
+ void setContent(StringRef Content) {
+ Data = Content.data();
+ Size = Content.size();
}
- /// Returns true if this atom should appear in the symbol table of a final
- /// linked image.
- bool isExported() const { return IsExported; }
+ /// Get the alignment for this content.
+ uint64_t getAlignment() const { return 1ull << P2Align; }
+
+ /// Get the alignment offset for this content.
+ uint64_t getAlignmentOffset() const { return AlignmentOffset; }
- /// Mark this atom as exported.
- void setExported(bool IsExported) {
- assert((!IsExported || ((IsDefined || IsAbsolute) && hasName())) &&
- "Exported atoms must have names");
- this->IsExported = IsExported;
+ /// Add an edge to this block.
+ void addEdge(Edge::Kind K, Edge::OffsetT Offset, Symbol &Target,
+ Edge::AddendT Addend) {
+ Edges.push_back(Edge(K, Offset, Target, Addend));
}
- /// Returns true if this is a weak symbol.
- bool isWeak() const { return IsWeak; }
+ /// Return the list of edges attached to this content.
+ iterator_range<edge_iterator> edges() {
+ return make_range(Edges.begin(), Edges.end());
+ }
- /// Mark this atom as weak.
- void setWeak(bool IsWeak) { this->IsWeak = IsWeak; }
+ /// Returns the list of edges attached to this content.
+ iterator_range<const_edge_iterator> edges() const {
+ return make_range(Edges.begin(), Edges.end());
+ }
-private:
- StringRef Name;
- JITTargetAddress Address = 0;
+ /// Return the size of the edges list.
+ size_t edges_size() const { return Edges.size(); }
- bool IsDefined : 1;
- bool IsLive : 1;
- bool ShouldDiscard : 1;
+ /// Returns true if the list of edges is empty.
+ bool edges_empty() const { return Edges.empty(); }
- bool IsGlobal : 1;
- bool IsAbsolute : 1;
- bool IsCallable : 1;
- bool IsExported : 1;
- bool IsWeak : 1;
+private:
+ static constexpr uint64_t MaxAlignmentOffset = (1ULL << 57) - 1;
-protected:
- // These flags only make sense for DefinedAtom, but we can minimize the size
- // of DefinedAtom by defining them here.
- bool HasLayoutNext : 1;
- bool IsCommon : 1;
+ uint64_t P2Align : 5;
+ uint64_t AlignmentOffset : 57;
+ Section &Parent;
+ const char *Data = nullptr;
+ size_t Size = 0;
+ BlockOrdinal Ordinal = 0;
+ std::vector<Edge> Edges;
};
-// Forward declare DefinedAtom.
-class DefinedAtom;
+/// Describes symbol linkage. This can be used to make resolve definition
+/// clashes.
+enum class Linkage : uint8_t {
+ Strong,
+ Weak,
+};
-raw_ostream &operator<<(raw_ostream &OS, const Atom &A);
-void printEdge(raw_ostream &OS, const Atom &FixupAtom, const Edge &E,
- StringRef EdgeKindName);
+/// For errors and debugging output.
+const char *getLinkageName(Linkage L);
+
+/// Defines the scope in which this symbol should be visible:
+/// Default -- Visible in the public interface of the linkage unit.
+/// Hidden -- Visible within the linkage unit, but not exported from it.
+/// Local -- Visible only within the LinkGraph.
+enum class Scope : uint8_t { Default, Hidden, Local };
+
+/// For debugging output.
+const char *getScopeName(Scope S);
+
+raw_ostream &operator<<(raw_ostream &OS, const Block &B);
+
+/// Symbol representation.
+///
+/// Symbols represent locations within Addressable objects.
+/// They can be either Named or Anonymous.
+/// Anonymous symbols have neither linkage nor visibility, and must point at
+/// ContentBlocks.
+/// Named symbols may be in one of four states:
+/// - Null: Default initialized. Assignable, but otherwise unusable.
+/// - Defined: Has both linkage and visibility and points to a ContentBlock
+/// - Common: Has both linkage and visibility, points to a null Addressable.
+/// - External: Has neither linkage nor visibility, points to an external
+/// Addressable.
+///
+class Symbol {
+ friend class LinkGraph;
+
+private:
+ Symbol(Addressable &Base, JITTargetAddress Offset, StringRef Name,
+ JITTargetAddress Size, Linkage L, Scope S, bool IsLive,
+ bool IsCallable)
+ : Name(Name), Base(&Base), Offset(Offset), Size(Size) {
+ setLinkage(L);
+ setScope(S);
+ setLive(IsLive);
+ setCallable(IsCallable);
+ }
+
+ static Symbol &constructCommon(void *SymStorage, Block &Base, StringRef Name,
+ JITTargetAddress Size, Scope S, bool IsLive) {
+ assert(SymStorage && "Storage cannot be null");
+ assert(!Name.empty() && "Common symbol name cannot be empty");
+ assert(Base.isDefined() &&
+ "Cannot create common symbol from undefined block");
+ assert(static_cast<Block &>(Base).getSize() == Size &&
+ "Common symbol size should match underlying block size");
+ auto *Sym = reinterpret_cast<Symbol *>(SymStorage);
+ new (Sym) Symbol(Base, 0, Name, Size, Linkage::Weak, S, IsLive, false);
+ return *Sym;
+ }
+
+ static Symbol &constructExternal(void *SymStorage, Addressable &Base,
+ StringRef Name, JITTargetAddress Size) {
+ assert(SymStorage && "Storage cannot be null");
+ assert(!Base.isDefined() &&
+ "Cannot create external symbol from defined block");
+ assert(!Name.empty() && "External symbol name cannot be empty");
+ auto *Sym = reinterpret_cast<Symbol *>(SymStorage);
+ new (Sym) Symbol(Base, 0, Name, Size, Linkage::Strong, Scope::Default,
+ false, false);
+ return *Sym;
+ }
+
+ static Symbol &constructAbsolute(void *SymStorage, Addressable &Base,
+ StringRef Name, JITTargetAddress Size,
+ Linkage L, Scope S, bool IsLive) {
+ assert(SymStorage && "Storage cannot be null");
+ assert(!Base.isDefined() &&
+ "Cannot create absolute symbol from a defined block");
+ auto *Sym = reinterpret_cast<Symbol *>(SymStorage);
+ new (Sym) Symbol(Base, 0, Name, Size, L, S, IsLive, false);
+ return *Sym;
+ }
+
+ static Symbol &constructAnonDef(void *SymStorage, Block &Base,
+ JITTargetAddress Offset,
+ JITTargetAddress Size, bool IsCallable,
+ bool IsLive) {
+ assert(SymStorage && "Storage cannot be null");
+ auto *Sym = reinterpret_cast<Symbol *>(SymStorage);
+ new (Sym) Symbol(Base, Offset, StringRef(), Size, Linkage::Strong,
+ Scope::Local, IsLive, IsCallable);
+ return *Sym;
+ }
+
+ static Symbol &constructNamedDef(void *SymStorage, Block &Base,
+ JITTargetAddress Offset, StringRef Name,
+ JITTargetAddress Size, Linkage L, Scope S,
+ bool IsLive, bool IsCallable) {
+ assert(SymStorage && "Storage cannot be null");
+ assert(!Name.empty() && "Name cannot be empty");
+ auto *Sym = reinterpret_cast<Symbol *>(SymStorage);
+ new (Sym) Symbol(Base, Offset, Name, Size, L, S, IsLive, IsCallable);
+ return *Sym;
+ }
-/// Represents a section address range via a pair of DefinedAtom pointers to
-/// the first and last atoms in the section.
-class SectionRange {
public:
- SectionRange() = default;
- SectionRange(DefinedAtom *First, DefinedAtom *Last)
- : First(First), Last(Last) {}
- DefinedAtom *getFirstAtom() const {
- assert((!Last || First) && "First can not be null if end is non-null");
- return First;
+ /// Create a null Symbol. This allows Symbols to be default initialized for
+ /// use in containers (e.g. as map values). Null symbols are only useful for
+ /// assigning to.
+ Symbol() = default;
+
+ // Symbols are not movable or copyable.
+ Symbol(const Symbol &) = delete;
+ Symbol &operator=(const Symbol &) = delete;
+ Symbol(Symbol &&) = delete;
+ Symbol &operator=(Symbol &&) = delete;
+
+ /// Returns true if this symbol has a name.
+ bool hasName() const { return !Name.empty(); }
+
+ /// Returns the name of this symbol (empty if the symbol is anonymous).
+ StringRef getName() const {
+ assert((!Name.empty() || getScope() == Scope::Local) &&
+ "Anonymous symbol has non-local scope");
+ return Name;
}
- DefinedAtom *getLastAtom() const {
- assert((First || !Last) && "Last can not be null if start is non-null");
- return Last;
+
+ /// Returns true if this Symbol has content (potentially) defined within this
+ /// object file (i.e. is anything but an external or absolute symbol).
+ bool isDefined() const {
+ assert(Base && "Attempt to access null symbol");
+ return Base->isDefined();
}
- bool isEmpty() const {
- assert((First || !Last) && "Last can not be null if start is non-null");
- return !First;
+
+ /// Returns true if this symbol is live (i.e. should be treated as a root for
+ /// dead stripping).
+ bool isLive() const {
+ assert(Base && "Attempting to access null symbol");
+ return IsLive;
}
- JITTargetAddress getStart() const;
- JITTargetAddress getEnd() const;
- uint64_t getSize() const;
-private:
- DefinedAtom *First = nullptr;
- DefinedAtom *Last = nullptr;
-};
+ /// Set this symbol's live bit.
+ void setLive(bool IsLive) { this->IsLive = IsLive; }
-/// Represents an object file section.
-class Section {
- friend class AtomGraph;
+ /// Returns true is this symbol is callable.
+ bool isCallable() const { return IsCallable; }
-private:
- Section(StringRef Name, uint32_t Alignment, sys::Memory::ProtectionFlags Prot,
- unsigned Ordinal, bool IsZeroFill)
- : Name(Name), Alignment(Alignment), Prot(Prot), Ordinal(Ordinal),
- IsZeroFill(IsZeroFill) {
- assert(isPowerOf2_32(Alignment) && "Alignments must be a power of 2");
+ /// Set this symbol's callable bit.
+ void setCallable(bool IsCallable) { this->IsCallable = IsCallable; }
+
+ /// Returns true if the underlying addressable is an unresolved external.
+ bool isExternal() const {
+ assert(Base && "Attempt to access null symbol");
+ return !Base->isDefined() && !Base->isAbsolute();
}
- using DefinedAtomSet = DenseSet<DefinedAtom *>;
+ /// Returns true if the underlying addressable is an absolute symbol.
+ bool isAbsolute() const {
+ assert(Base && "Attempt to access null symbol");
+ return !Base->isDefined() && Base->isAbsolute();
+ }
-public:
- using atom_iterator = DefinedAtomSet::iterator;
- using const_atom_iterator = DefinedAtomSet::const_iterator;
+ /// Return the addressable that this symbol points to.
+ Addressable &getAddressable() {
+ assert(Base && "Cannot get underlying addressable for null symbol");
+ return *Base;
+ }
- ~Section();
- StringRef getName() const { return Name; }
- uint32_t getAlignment() const { return Alignment; }
- sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; }
- unsigned getSectionOrdinal() const { return Ordinal; }
- size_t getNextAtomOrdinal() { return ++NextAtomOrdinal; }
+ /// Return the addressable that thsi symbol points to.
+ const Addressable &getAddressable() const {
+ assert(Base && "Cannot get underlying addressable for null symbol");
+ return *Base;
+ }
- bool isZeroFill() const { return IsZeroFill; }
+ /// Return the Block for this Symbol (Symbol must be defined).
+ Block &getBlock() {
+ assert(Base && "Cannot get block for null symbol");
+ assert(Base->isDefined() && "Not a defined symbol");
+ return static_cast<Block &>(*Base);
+ }
- /// Returns an iterator over the atoms in the section (in no particular
- /// order).
- iterator_range<atom_iterator> atoms() {
- return make_range(DefinedAtoms.begin(), DefinedAtoms.end());
+ /// Return the Block for this Symbol (Symbol must be defined).
+ const Block &getBlock() const {
+ assert(Base && "Cannot get block for null symbol");
+ assert(Base->isDefined() && "Not a defined symbol");
+ return static_cast<const Block &>(*Base);
}
- /// Returns an iterator over the atoms in the section (in no particular
- /// order).
- iterator_range<const_atom_iterator> atoms() const {
- return make_range(DefinedAtoms.begin(), DefinedAtoms.end());
+ /// Returns the offset for this symbol within the underlying addressable.
+ JITTargetAddress getOffset() const { return Offset; }
+
+ /// Returns the address of this symbol.
+ JITTargetAddress getAddress() const { return Base->getAddress() + Offset; }
+
+ /// Returns the size of this symbol.
+ JITTargetAddress getSize() const { return Size; }
+
+ /// Returns true if this symbol is backed by a zero-fill block.
+ /// This method may only be called on defined symbols.
+ bool isSymbolZeroFill() const { return getBlock().isZeroFill(); }
+
+ /// Returns the content in the underlying block covered by this symbol.
+ /// This method may only be called on defined non-zero-fill symbols.
+ StringRef getSymbolContent() const {
+ return getBlock().getContent().substr(Offset, Size);
}
- /// Return the number of atoms in this section.
- DefinedAtomSet::size_type atoms_size() { return DefinedAtoms.size(); }
+ /// Get the linkage for this Symbol.
+ Linkage getLinkage() const { return static_cast<Linkage>(L); }
- /// Return true if this section contains no atoms.
- bool atoms_empty() const { return DefinedAtoms.empty(); }
+ /// Set the linkage for this Symbol.
+ void setLinkage(Linkage L) {
+ assert((L == Linkage::Strong || (Base->isDefined() && !Name.empty())) &&
+ "Linkage can only be applied to defined named symbols");
+ this->L = static_cast<uint8_t>(L);
+ }
- /// Returns the range of this section as the pair of atoms with the lowest
- /// and highest target address. This operation is expensive, as it
- /// must traverse all atoms in the section.
- ///
- /// Note: If the section is empty, both values will be null. The section
- /// address will evaluate to null, and the size to zero. If the section
- /// contains a single atom both values will point to it, the address will
- /// evaluate to the address of that atom, and the size will be the size of
- /// that atom.
- SectionRange getRange() const;
+ /// Get the visibility for this Symbol.
+ Scope getScope() const { return static_cast<Scope>(S); }
-private:
- void addAtom(DefinedAtom &DA) {
- assert(!DefinedAtoms.count(&DA) && "Atom is already in this section");
- DefinedAtoms.insert(&DA);
+ /// Set the visibility for this Symbol.
+ void setScope(Scope S) {
+ assert((S == Scope::Default || Base->isDefined() || Base->isAbsolute()) &&
+ "Invalid visibility for symbol type");
+ this->S = static_cast<uint8_t>(S);
}
- void removeAtom(DefinedAtom &DA) {
- assert(DefinedAtoms.count(&DA) && "Atom is not in this section");
- DefinedAtoms.erase(&DA);
+private:
+ void makeExternal(Addressable &A) {
+ assert(!A.isDefined() && "Attempting to make external with defined block");
+ Base = &A;
+ Offset = 0;
+ setLinkage(Linkage::Strong);
+ setScope(Scope::Default);
+ IsLive = 0;
+ // note: Size and IsCallable fields left unchanged.
}
+ static constexpr uint64_t MaxOffset = (1ULL << 59) - 1;
+
+ // FIXME: A char* or SymbolStringPtr may pack better.
StringRef Name;
- uint32_t Alignment = 0;
- sys::Memory::ProtectionFlags Prot;
- unsigned Ordinal = 0;
- unsigned NextAtomOrdinal = 0;
- bool IsZeroFill = false;
- DefinedAtomSet DefinedAtoms;
+ Addressable *Base = nullptr;
+ uint64_t Offset : 59;
+ uint64_t L : 1;
+ uint64_t S : 2;
+ uint64_t IsLive : 1;
+ uint64_t IsCallable : 1;
+ JITTargetAddress Size = 0;
};
-/// Defined atom class. Suitable for use by defined named and anonymous
-/// atoms.
-class DefinedAtom : public Atom {
- friend class AtomGraph;
+raw_ostream &operator<<(raw_ostream &OS, const Symbol &A);
+
+void printEdge(raw_ostream &OS, const Block &B, const Edge &E,
+ StringRef EdgeKindName);
+
+/// Represents an object file section.
+class Section {
+ friend class LinkGraph;
private:
- DefinedAtom(Section &Parent, JITTargetAddress Address, uint32_t Alignment)
- : Atom("", Address), Parent(Parent), Ordinal(Parent.getNextAtomOrdinal()),
- Alignment(Alignment) {
- assert(isPowerOf2_32(Alignment) && "Alignments must be a power of two");
- }
+ Section(StringRef Name, sys::Memory::ProtectionFlags Prot,
+ SectionOrdinal SecOrdinal)
+ : Name(Name), Prot(Prot), SecOrdinal(SecOrdinal) {}
- DefinedAtom(Section &Parent, StringRef Name, JITTargetAddress Address,
- uint32_t Alignment)
- : Atom(Name, Address), Parent(Parent),
- Ordinal(Parent.getNextAtomOrdinal()), Alignment(Alignment) {
- assert(isPowerOf2_32(Alignment) && "Alignments must be a power of two");
- }
+ using SymbolSet = DenseSet<Symbol *>;
+ using BlockSet = DenseSet<Block *>;
public:
- using edge_iterator = EdgeVector::iterator;
+ using symbol_iterator = SymbolSet::iterator;
+ using const_symbol_iterator = SymbolSet::const_iterator;
- Section &getSection() const { return Parent; }
+ using block_iterator = BlockSet::iterator;
+ using const_block_iterator = BlockSet::const_iterator;
- uint64_t getSize() const { return Size; }
+ ~Section();
- StringRef getContent() const {
- assert(!Parent.isZeroFill() && "Trying to get content for zero-fill atom");
- assert(Size <= std::numeric_limits<size_t>::max() &&
- "Content size too large");
- return {ContentPtr, static_cast<size_t>(Size)};
- }
- void setContent(StringRef Content) {
- assert(!Parent.isZeroFill() && "Calling setContent on zero-fill atom?");
- ContentPtr = Content.data();
- Size = Content.size();
- }
+ /// Returns the name of this section.
+ StringRef getName() const { return Name; }
+
+ /// Returns the protection flags for this section.
+ sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; }
- bool isZeroFill() const { return Parent.isZeroFill(); }
+ /// Returns the ordinal for this section.
+ SectionOrdinal getOrdinal() const { return SecOrdinal; }
- void setZeroFill(uint64_t Size) {
- assert(Parent.isZeroFill() && !ContentPtr &&
- "Can't set zero-fill length of a non zero-fill atom");
- this->Size = Size;
+ /// Returns an iterator over the symbols defined in this section.
+ iterator_range<symbol_iterator> symbols() {
+ return make_range(Symbols.begin(), Symbols.end());
}
- uint64_t getZeroFillSize() const {
- assert(Parent.isZeroFill() &&
- "Can't get zero-fill length of a non zero-fill atom");
- return Size;
+ /// Returns an iterator over the symbols defined in this section.
+ iterator_range<const_symbol_iterator> symbols() const {
+ return make_range(Symbols.begin(), Symbols.end());
}
- uint32_t getAlignment() const { return Alignment; }
+ /// Return the number of symbols in this section.
+ SymbolSet::size_type symbols_size() { return Symbols.size(); }
- bool hasLayoutNext() const { return HasLayoutNext; }
- void setLayoutNext(DefinedAtom &Next) {
- assert(!HasLayoutNext && "Atom already has layout-next constraint");
- HasLayoutNext = true;
- Edges.push_back(Edge(Edge::LayoutNext, 0, Next, 0));
- }
- DefinedAtom &getLayoutNext() {
- assert(HasLayoutNext && "Atom does not have a layout-next constraint");
- DefinedAtom *Next = nullptr;
- for (auto &E : edges())
- if (E.getKind() == Edge::LayoutNext) {
- assert(E.getTarget().isDefined() &&
- "layout-next target atom must be a defined atom");
- Next = static_cast<DefinedAtom *>(&E.getTarget());
- break;
- }
- assert(Next && "Missing LayoutNext edge");
- return *Next;
- }
+ /// Return true if this section contains no symbols.
+ bool symbols_empty() const { return Symbols.empty(); }
- bool isCommon() const { return IsCommon; }
+ /// Returns the ordinal for the next block.
+ BlockOrdinal getNextBlockOrdinal() { return NextBlockOrdinal++; }
- void addEdge(Edge::Kind K, Edge::OffsetT Offset, Atom &Target,
- Edge::AddendT Addend) {
- assert(K != Edge::LayoutNext &&
- "Layout edges should be added via setLayoutNext");
- Edges.push_back(Edge(K, Offset, Target, Addend));
+private:
+ void addSymbol(Symbol &Sym) {
+ assert(!Symbols.count(&Sym) && "Symbol is already in this section");
+ Symbols.insert(&Sym);
}
- iterator_range<edge_iterator> edges() {
- return make_range(Edges.begin(), Edges.end());
+ void removeSymbol(Symbol &Sym) {
+ assert(Symbols.count(&Sym) && "symbol is not in this section");
+ Symbols.erase(&Sym);
}
- size_t edges_size() const { return Edges.size(); }
- bool edges_empty() const { return Edges.empty(); }
- unsigned getOrdinal() const { return Ordinal; }
+ StringRef Name;
+ sys::Memory::ProtectionFlags Prot;
+ SectionOrdinal SecOrdinal = 0;
+ BlockOrdinal NextBlockOrdinal = 0;
+ SymbolSet Symbols;
+};
-private:
- void setCommon(uint64_t Size) {
- assert(ContentPtr == 0 && "Atom already has content?");
- IsCommon = true;
- setZeroFill(Size);
+/// Represents a section address range via a pair of Block pointers
+/// to the first and last Blocks in the section.
+class SectionRange {
+public:
+ SectionRange() = default;
+ SectionRange(const Section &Sec) {
+ if (Sec.symbols_empty())
+ return;
+ First = Last = *Sec.symbols().begin();
+ for (auto *Sym : Sec.symbols()) {
+ if (Sym->getAddress() < First->getAddress())
+ First = Sym;
+ if (Sym->getAddress() > Last->getAddress())
+ Last = Sym;
+ }
+ }
+ Symbol *getFirstSymbol() const {
+ assert((!Last || First) && "First can not be null if end is non-null");
+ return First;
+ }
+ Symbol *getLastSymbol() const {
+ assert((First || !Last) && "Last can not be null if start is non-null");
+ return Last;
+ }
+ bool isEmpty() const {
+ assert((First || !Last) && "Last can not be null if start is non-null");
+ return !First;
+ }
+ JITTargetAddress getStart() const {
+ return First ? First->getBlock().getAddress() : 0;
+ }
+ JITTargetAddress getEnd() const {
+ return Last ? Last->getBlock().getAddress() + Last->getBlock().getSize()
+ : 0;
}
+ uint64_t getSize() const { return getEnd() - getStart(); }
- EdgeVector Edges;
- uint64_t Size = 0;
- Section &Parent;
- const char *ContentPtr = nullptr;
- unsigned Ordinal = 0;
- uint32_t Alignment = 0;
+private:
+ Symbol *First = nullptr;
+ Symbol *Last = nullptr;
};
-inline JITTargetAddress SectionRange::getStart() const {
- return First ? First->getAddress() : 0;
-}
+class LinkGraph {
+private:
+ using SectionList = std::vector<std::unique_ptr<Section>>;
+ using ExternalSymbolSet = DenseSet<Symbol *>;
+ using BlockSet = DenseSet<Block *>;
+
+ template <typename... ArgTs>
+ Addressable &createAddressable(ArgTs &&... Args) {
+ Addressable *A =
+ reinterpret_cast<Addressable *>(Allocator.Allocate<Addressable>());
+ new (A) Addressable(std::forward<ArgTs>(Args)...);
+ return *A;
+ }
-inline JITTargetAddress SectionRange::getEnd() const {
- return Last ? Last->getAddress() + Last->getSize() : 0;
-}
+ void destroyAddressable(Addressable &A) {
+ A.~Addressable();
+ Allocator.Deallocate(&A);
+ }
-inline uint64_t SectionRange::getSize() const { return getEnd() - getStart(); }
+ template <typename... ArgTs> Block &createBlock(ArgTs &&... Args) {
+ Block *B = reinterpret_cast<Block *>(Allocator.Allocate<Block>());
+ new (B) Block(std::forward<ArgTs>(Args)...);
+ Blocks.insert(B);
+ return *B;
+ }
-inline SectionRange Section::getRange() const {
- if (atoms_empty())
- return SectionRange();
- DefinedAtom *First = *DefinedAtoms.begin(), *Last = *DefinedAtoms.begin();
- for (auto *DA : atoms()) {
- if (DA->getAddress() < First->getAddress())
- First = DA;
- if (DA->getAddress() > Last->getAddress())
- Last = DA;
+ void destroyBlock(Block &B) {
+ Blocks.erase(&B);
+ B.~Block();
+ Allocator.Deallocate(&B);
}
- return SectionRange(First, Last);
-}
-class AtomGraph {
-private:
- using SectionList = std::vector<std::unique_ptr<Section>>;
- using AddressToAtomMap = std::map<JITTargetAddress, DefinedAtom *>;
- using NamedAtomMap = DenseMap<StringRef, Atom *>;
- using ExternalAtomSet = DenseSet<Atom *>;
+ void destroySymbol(Symbol &S) {
+ S.~Symbol();
+ Allocator.Deallocate(&S);
+ }
public:
- using external_atom_iterator = ExternalAtomSet::iterator;
+ using external_symbol_iterator = ExternalSymbolSet::iterator;
+
+ using block_iterator = BlockSet::iterator;
using section_iterator = pointee_iterator<SectionList::iterator>;
using const_section_iterator = pointee_iterator<SectionList::const_iterator>;
- template <typename SecItrT, typename AtomItrT, typename T>
- class defined_atom_iterator_impl
+ template <typename SectionItrT, typename SymbolItrT, typename T>
+ class defined_symbol_iterator_impl
: public iterator_facade_base<
- defined_atom_iterator_impl<SecItrT, AtomItrT, T>,
+ defined_symbol_iterator_impl<SectionItrT, SymbolItrT, T>,
std::forward_iterator_tag, T> {
public:
- defined_atom_iterator_impl() = default;
+ defined_symbol_iterator_impl() = default;
- defined_atom_iterator_impl(SecItrT SI, SecItrT SE)
- : SI(SI), SE(SE),
- AI(SI != SE ? SI->atoms().begin() : Section::atom_iterator()) {
- moveToNextAtomOrEnd();
+ defined_symbol_iterator_impl(SectionItrT SecI, SectionItrT SecE)
+ : SecI(SecI), SecE(SecE),
+ SymI(SecI != SecE ? SecI->symbols().begin() : SymbolItrT()) {
+ moveToNextSymbolOrEnd();
}
- bool operator==(const defined_atom_iterator_impl &RHS) const {
- return (SI == RHS.SI) && (AI == RHS.AI);
+ bool operator==(const defined_symbol_iterator_impl &RHS) const {
+ return (SecI == RHS.SecI) && (SymI == RHS.SymI);
}
T operator*() const {
- assert(AI != SI->atoms().end() && "Dereferencing end?");
- return *AI;
+ assert(SymI != SecI->symbols().end() && "Dereferencing end?");
+ return *SymI;
}
- defined_atom_iterator_impl operator++() {
- ++AI;
- moveToNextAtomOrEnd();
+ defined_symbol_iterator_impl operator++() {
+ ++SymI;
+ moveToNextSymbolOrEnd();
return *this;
}
private:
- void moveToNextAtomOrEnd() {
- while (SI != SE && AI == SI->atoms().end()) {
- ++SI;
- if (SI == SE)
- AI = Section::atom_iterator();
- else
- AI = SI->atoms().begin();
+ void moveToNextSymbolOrEnd() {
+ while (SecI != SecE && SymI == SecI->symbols().end()) {
+ ++SecI;
+ SymI = SecI == SecE ? SymbolItrT() : SecI->symbols().begin();
}
}
- SecItrT SI, SE;
- AtomItrT AI;
+ SectionItrT SecI, SecE;
+ SymbolItrT SymI;
};
- using defined_atom_iterator =
- defined_atom_iterator_impl<section_iterator, Section::atom_iterator,
- DefinedAtom *>;
+ using defined_symbol_iterator =
+ defined_symbol_iterator_impl<const_section_iterator,
+ Section::symbol_iterator, Symbol *>;
- using const_defined_atom_iterator =
- defined_atom_iterator_impl<const_section_iterator,
- Section::const_atom_iterator,
- const DefinedAtom *>;
+ using const_defined_symbol_iterator = defined_symbol_iterator_impl<
+ const_section_iterator, Section::const_symbol_iterator, const Symbol *>;
- AtomGraph(std::string Name, unsigned PointerSize,
+ LinkGraph(std::string Name, unsigned PointerSize,
support::endianness Endianness)
: Name(std::move(Name)), PointerSize(PointerSize),
Endianness(Endianness) {}
+ ~LinkGraph();
+
/// Returns the name of this graph (usually the name of the original
/// underlying MemoryBuffer).
const std::string &getName() { return Name; }
@@ -544,84 +718,83 @@ public:
/// Returns the pointer size for use in this graph.
unsigned getPointerSize() const { return PointerSize; }
- /// Returns the endianness of atom-content in this graph.
+ /// Returns the endianness of content in this graph.
support::endianness getEndianness() const { return Endianness; }
/// Create a section with the given name, protection flags, and alignment.
- Section &createSection(StringRef Name, uint32_t Alignment,
- sys::Memory::ProtectionFlags Prot, bool IsZeroFill) {
- std::unique_ptr<Section> Sec(
- new Section(Name, Alignment, Prot, Sections.size(), IsZeroFill));
+ Section &createSection(StringRef Name, sys::Memory::ProtectionFlags Prot) {
+ std::unique_ptr<Section> Sec(new Section(Name, Prot, Sections.size()));
Sections.push_back(std::move(Sec));
return *Sections.back();
}
- /// Add an external atom representing an undefined symbol in this graph.
- Atom &addExternalAtom(StringRef Name) {
- assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
- Atom *A = reinterpret_cast<Atom *>(
- AtomAllocator.Allocate(sizeof(Atom), alignof(Atom)));
- new (A) Atom(Name);
- ExternalAtoms.insert(A);
- NamedAtoms[Name] = A;
- return *A;
+ /// Create a content block.
+ Block &createContentBlock(Section &Parent, StringRef Content,
+ uint64_t Address, uint64_t Alignment,
+ uint64_t AlignmentOffset) {
+ return createBlock(Parent, Parent.getNextBlockOrdinal(), Content, Address,
+ Alignment, AlignmentOffset);
}
- /// Add an external atom representing an absolute symbol.
- Atom &addAbsoluteAtom(StringRef Name, JITTargetAddress Addr) {
- assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
- Atom *A = reinterpret_cast<Atom *>(
- AtomAllocator.Allocate(sizeof(Atom), alignof(Atom)));
- new (A) Atom(Name, Addr);
- AbsoluteAtoms.insert(A);
- NamedAtoms[Name] = A;
- return *A;
+ /// Create a zero-fill block.
+ Block &createZeroFillBlock(Section &Parent, uint64_t Size, uint64_t Address,
+ uint64_t Alignment, uint64_t AlignmentOffset) {
+ return createBlock(Parent, Parent.getNextBlockOrdinal(), Size, Address,
+ Alignment, AlignmentOffset);
}
- /// Add an anonymous defined atom to the graph.
- ///
- /// Anonymous atoms have content but no name. They must have an address.
- DefinedAtom &addAnonymousAtom(Section &Parent, JITTargetAddress Address,
- uint32_t Alignment) {
- DefinedAtom *A = reinterpret_cast<DefinedAtom *>(
- AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom)));
- new (A) DefinedAtom(Parent, Address, Alignment);
- Parent.addAtom(*A);
- getAddrToAtomMap()[A->getAddress()] = A;
- return *A;
+ /// Add an external symbol.
+ /// Some formats (e.g. ELF) allow Symbols to have sizes. For Symbols whose
+ /// size is not known, you should substitute '0'.
+ Symbol &addExternalSymbol(StringRef Name, uint64_t Size) {
+ auto &Sym = Symbol::constructExternal(
+ Allocator.Allocate<Symbol>(), createAddressable(0, false), Name, Size);
+ ExternalSymbols.insert(&Sym);
+ return Sym;
}
- /// Add a defined atom to the graph.
- ///
- /// Allocates and constructs a DefinedAtom instance with the given parent,
- /// name, address, and alignment.
- DefinedAtom &addDefinedAtom(Section &Parent, StringRef Name,
- JITTargetAddress Address, uint32_t Alignment) {
- assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
- DefinedAtom *A = reinterpret_cast<DefinedAtom *>(
- AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom)));
- new (A) DefinedAtom(Parent, Name, Address, Alignment);
- Parent.addAtom(*A);
- getAddrToAtomMap()[A->getAddress()] = A;
- NamedAtoms[Name] = A;
- return *A;
+ /// Add an absolute symbol.
+ Symbol &addAbsoluteSymbol(StringRef Name, JITTargetAddress Address,
+ uint64_t Size, Linkage L, Scope S, bool IsLive) {
+ auto &Sym = Symbol::constructAbsolute(Allocator.Allocate<Symbol>(),
+ createAddressable(Address), Name,
+ Size, L, S, IsLive);
+ AbsoluteSymbols.insert(&Sym);
+ return Sym;
}
- /// Add a common symbol atom to the graph.
- ///
- /// Adds a common-symbol atom to the graph with the given parent, name,
- /// address, alignment and size.
- DefinedAtom &addCommonAtom(Section &Parent, StringRef Name,
- JITTargetAddress Address, uint32_t Alignment,
- uint64_t Size) {
- assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
- DefinedAtom *A = reinterpret_cast<DefinedAtom *>(
- AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom)));
- new (A) DefinedAtom(Parent, Name, Address, Alignment);
- A->setCommon(Size);
- Parent.addAtom(*A);
- NamedAtoms[Name] = A;
- return *A;
+ /// Convenience method for adding a weak zero-fill symbol.
+ Symbol &addCommonSymbol(StringRef Name, Scope S, Section &Section,
+ JITTargetAddress Address, uint64_t Size,
+ uint64_t Alignment, bool IsLive) {
+ auto &Sym = Symbol::constructCommon(
+ Allocator.Allocate<Symbol>(),
+ createBlock(Section, Section.getNextBlockOrdinal(), Address, Size,
+ Alignment, 0),
+ Name, Size, S, IsLive);
+ Section.addSymbol(Sym);
+ return Sym;
+ }
+
+ /// Add an anonymous symbol.
+ Symbol &addAnonymousSymbol(Block &Content, JITTargetAddress Offset,
+ JITTargetAddress Size, bool IsCallable,
+ bool IsLive) {
+ auto &Sym = Symbol::constructAnonDef(Allocator.Allocate<Symbol>(), Content,
+ Offset, Size, IsCallable, IsLive);
+ Content.getSection().addSymbol(Sym);
+ return Sym;
+ }
+
+ /// Add a named symbol.
+ Symbol &addDefinedSymbol(Block &Content, JITTargetAddress Offset,
+ StringRef Name, JITTargetAddress Size, Linkage L,
+ Scope S, bool IsCallable, bool IsLive) {
+ auto &Sym =
+ Symbol::constructNamedDef(Allocator.Allocate<Symbol>(), Content, Offset,
+ Name, Size, L, S, IsLive, IsCallable);
+ Content.getSection().addSymbol(Sym);
+ return Sym;
}
iterator_range<section_iterator> sections() {
@@ -638,135 +811,79 @@ public:
return nullptr;
}
- iterator_range<external_atom_iterator> external_atoms() {
- return make_range(ExternalAtoms.begin(), ExternalAtoms.end());
+ iterator_range<external_symbol_iterator> external_symbols() {
+ return make_range(ExternalSymbols.begin(), ExternalSymbols.end());
}
- iterator_range<external_atom_iterator> absolute_atoms() {
- return make_range(AbsoluteAtoms.begin(), AbsoluteAtoms.end());
+ iterator_range<external_symbol_iterator> absolute_symbols() {
+ return make_range(AbsoluteSymbols.begin(), AbsoluteSymbols.end());
}
- iterator_range<defined_atom_iterator> defined_atoms() {
- return make_range(defined_atom_iterator(Sections.begin(), Sections.end()),
- defined_atom_iterator(Sections.end(), Sections.end()));
+ iterator_range<defined_symbol_iterator> defined_symbols() {
+ return make_range(defined_symbol_iterator(Sections.begin(), Sections.end()),
+ defined_symbol_iterator(Sections.end(), Sections.end()));
}
- iterator_range<const_defined_atom_iterator> defined_atoms() const {
+ iterator_range<const_defined_symbol_iterator> defined_symbols() const {
return make_range(
- const_defined_atom_iterator(Sections.begin(), Sections.end()),
- const_defined_atom_iterator(Sections.end(), Sections.end()));
- }
-
- /// Returns the atom with the given name, which must exist in this graph.
- Atom &getAtomByName(StringRef Name) {
- auto I = NamedAtoms.find(Name);
- assert(I != NamedAtoms.end() && "Name not in NamedAtoms map");
- return *I->second;
- }
-
- /// Returns the atom with the given name, which must exist in this graph and
- /// be a DefinedAtom.
- DefinedAtom &getDefinedAtomByName(StringRef Name) {
- auto &A = getAtomByName(Name);
- assert(A.isDefined() && "Atom is not a defined atom");
- return static_cast<DefinedAtom &>(A);
- }
-
- /// Search for the given atom by name.
- /// Returns the atom (if found) or an error (if no atom with this name
- /// exists).
- Expected<Atom &> findAtomByName(StringRef Name) {
- auto I = NamedAtoms.find(Name);
- if (I == NamedAtoms.end())
- return make_error<JITLinkError>("No atom named " + Name);
- return *I->second;
- }
-
- /// Search for the given defined atom by name.
- /// Returns the defined atom (if found) or an error (if no atom with this
- /// name exists, or if one exists but is not a defined atom).
- Expected<DefinedAtom &> findDefinedAtomByName(StringRef Name) {
- auto I = NamedAtoms.find(Name);
- if (I == NamedAtoms.end())
- return make_error<JITLinkError>("No atom named " + Name);
- if (!I->second->isDefined())
- return make_error<JITLinkError>("Atom " + Name +
- " exists but is not a "
- "defined atom");
- return static_cast<DefinedAtom &>(*I->second);
- }
-
- /// Returns the atom covering the given address, or an error if no such atom
- /// exists.
- ///
- /// Returns null if no atom exists at the given address.
- DefinedAtom *getAtomByAddress(JITTargetAddress Address) {
- refreshAddrToAtomCache();
-
- // If there are no defined atoms, bail out early.
- if (AddrToAtomCache->empty())
- return nullptr;
-
- // Find the atom *after* the given address.
- auto I = AddrToAtomCache->upper_bound(Address);
-
- // If this address falls before any known atom, bail out.
- if (I == AddrToAtomCache->begin())
- return nullptr;
-
- // The atom we're looking for is the one before the atom we found.
- --I;
-
- // Otherwise range check the atom that was found.
- assert(!I->second->getContent().empty() && "Atom content not set");
- if (Address >= I->second->getAddress() + I->second->getContent().size())
- return nullptr;
+ const_defined_symbol_iterator(Sections.begin(), Sections.end()),
+ const_defined_symbol_iterator(Sections.end(), Sections.end()));
+ }
- return I->second;
+ iterator_range<block_iterator> blocks() {
+ return make_range(Blocks.begin(), Blocks.end());
}
- /// Like getAtomByAddress, but returns an Error if the given address is not
- /// covered by an atom, rather than a null pointer.
- Expected<DefinedAtom &> findAtomByAddress(JITTargetAddress Address) {
- if (auto *DA = getAtomByAddress(Address))
- return *DA;
- return make_error<JITLinkError>("No atom at address " +
- formatv("{0:x16}", Address));
+ /// Turn a defined symbol into an external one.
+ void makeExternal(Symbol &Sym) {
+ if (Sym.getAddressable().isAbsolute()) {
+ assert(AbsoluteSymbols.count(&Sym) &&
+ "Sym is not in the absolute symbols set");
+ AbsoluteSymbols.erase(&Sym);
+ } else {
+ assert(Sym.isDefined() && "Sym is not a defined symbol");
+ Section &Sec = Sym.getBlock().getSection();
+ Sec.removeSymbol(Sym);
+ }
+ Sym.makeExternal(createAddressable(false));
+ ExternalSymbols.insert(&Sym);
}
- // Remove the given external atom from the graph.
- void removeExternalAtom(Atom &A) {
- assert(!A.isDefined() && !A.isAbsolute() && "A is not an external atom");
- assert(ExternalAtoms.count(&A) && "A is not in the external atoms set");
- ExternalAtoms.erase(&A);
- A.~Atom();
+ /// Removes an external symbol. Also removes the underlying Addressable.
+ void removeExternalSymbol(Symbol &Sym) {
+ assert(!Sym.isDefined() && !Sym.isAbsolute() &&
+ "Sym is not an external symbol");
+ assert(ExternalSymbols.count(&Sym) && "Symbol is not in the externals set");
+ ExternalSymbols.erase(&Sym);
+ Addressable &Base = *Sym.Base;
+ destroySymbol(Sym);
+ destroyAddressable(Base);
}
- /// Remove the given absolute atom from the graph.
- void removeAbsoluteAtom(Atom &A) {
- assert(A.isAbsolute() && "A is not an absolute atom");
- assert(AbsoluteAtoms.count(&A) && "A is not in the absolute atoms set");
- AbsoluteAtoms.erase(&A);
- A.~Atom();
+ /// Remove an absolute symbol. Also removes the underlying Addressable.
+ void removeAbsoluteSymbol(Symbol &Sym) {
+ assert(!Sym.isDefined() && Sym.isAbsolute() &&
+ "Sym is not an absolute symbol");
+ assert(AbsoluteSymbols.count(&Sym) &&
+ "Symbol is not in the absolute symbols set");
+ AbsoluteSymbols.erase(&Sym);
+ Addressable &Base = *Sym.Base;
+ destroySymbol(Sym);
+ destroyAddressable(Base);
}
- /// Remove the given defined atom from the graph.
- void removeDefinedAtom(DefinedAtom &DA) {
- if (AddrToAtomCache) {
- assert(AddrToAtomCache->count(DA.getAddress()) &&
- "Cache exists, but does not contain atom");
- AddrToAtomCache->erase(DA.getAddress());
- }
- if (DA.hasName()) {
- assert(NamedAtoms.count(DA.getName()) && "Named atom not in map");
- NamedAtoms.erase(DA.getName());
- }
- DA.getSection().removeAtom(DA);
- DA.~DefinedAtom();
+ /// Removes defined symbols. Does not remove the underlying block.
+ void removeDefinedSymbol(Symbol &Sym) {
+ assert(Sym.isDefined() && "Sym is not a defined symbol");
+ Sym.getBlock().getSection().removeSymbol(Sym);
+ destroySymbol(Sym);
}
- /// Invalidate the atom-to-address map.
- void invalidateAddrToAtomMap() { AddrToAtomCache = None; }
+ /// Remove a block.
+ void removeBlock(Block &B) {
+ Blocks.erase(&B);
+ destroyBlock(B);
+ }
/// Dump the graph.
///
@@ -778,87 +895,84 @@ public:
std::function<StringRef(Edge::Kind)>());
private:
- AddressToAtomMap &getAddrToAtomMap() {
- refreshAddrToAtomCache();
- return *AddrToAtomCache;
- }
-
- const AddressToAtomMap &getAddrToAtomMap() const {
- refreshAddrToAtomCache();
- return *AddrToAtomCache;
- }
-
- void refreshAddrToAtomCache() const {
- if (!AddrToAtomCache) {
- AddrToAtomCache = AddressToAtomMap();
- for (auto *DA : defined_atoms())
- (*AddrToAtomCache)[DA->getAddress()] = const_cast<DefinedAtom *>(DA);
- }
- }
-
- // Put the BumpPtrAllocator first so that we don't free any of the atoms in
- // it until all of their destructors have been run.
- BumpPtrAllocator AtomAllocator;
+ // Put the BumpPtrAllocator first so that we don't free any of the underlying
+ // memory until the Symbol/Addressable destructors have been run.
+ BumpPtrAllocator Allocator;
std::string Name;
unsigned PointerSize;
support::endianness Endianness;
+ BlockSet Blocks;
SectionList Sections;
- NamedAtomMap NamedAtoms;
- ExternalAtomSet ExternalAtoms;
- ExternalAtomSet AbsoluteAtoms;
- mutable Optional<AddressToAtomMap> AddrToAtomCache;
+ ExternalSymbolSet ExternalSymbols;
+ ExternalSymbolSet AbsoluteSymbols;
};
-/// A function for mutating AtomGraphs.
-using AtomGraphPassFunction = std::function<Error(AtomGraph &)>;
+/// A function for mutating LinkGraphs.
+using LinkGraphPassFunction = std::function<Error(LinkGraph &)>;
-/// A list of atom graph passes.
-using AtomGraphPassList = std::vector<AtomGraphPassFunction>;
+/// A list of LinkGraph passes.
+using LinkGraphPassList = std::vector<LinkGraphPassFunction>;
-/// An atom graph pass configuration, consisting of a list of pre-prune,
+/// An LinkGraph pass configuration, consisting of a list of pre-prune,
/// post-prune, and post-fixup passes.
struct PassConfiguration {
/// Pre-prune passes.
///
/// These passes are called on the graph after it is built, and before any
- /// atoms have been pruned.
+ /// symbols have been pruned.
///
- /// Notable use cases: Marking atoms live or should-discard.
- AtomGraphPassList PrePrunePasses;
+ /// Notable use cases: Marking symbols live or should-discard.
+ LinkGraphPassList PrePrunePasses;
/// Post-prune passes.
///
- /// These passes are called on the graph after dead and should-discard atoms
- /// have been removed, but before fixups are applied.
+ /// These passes are called on the graph after dead stripping, but before
+ /// fixups are applied.
///
- /// Notable use cases: Building GOT, stub, and TLV atoms.
- AtomGraphPassList PostPrunePasses;
+ /// Notable use cases: Building GOT, stub, and TLV symbols.
+ LinkGraphPassList PostPrunePasses;
/// Post-fixup passes.
///
- /// These passes are called on the graph after atom contents has been copied
+ /// These passes are called on the graph after block contents has been copied
/// to working memory, and fixups applied.
///
/// Notable use cases: Testing and validation.
- AtomGraphPassList PostFixupPasses;
+ LinkGraphPassList PostFixupPasses;
};
/// A map of symbol names to resolved addresses.
using AsyncLookupResult = DenseMap<StringRef, JITEvaluatedSymbol>;
-/// A function to call with a resolved symbol map (See AsyncLookupResult) or an
-/// error if resolution failed.
-using JITLinkAsyncLookupContinuation =
- std::function<void(Expected<AsyncLookupResult> LR)>;
+/// A function object to call with a resolved symbol map (See AsyncLookupResult)
+/// or an error if resolution failed.
+class JITLinkAsyncLookupContinuation {
+public:
+ virtual ~JITLinkAsyncLookupContinuation() {}
+ virtual void run(Expected<AsyncLookupResult> LR) = 0;
+
+private:
+ virtual void anchor();
+};
+
+/// Create a lookup continuation from a function object.
+template <typename Continuation>
+std::unique_ptr<JITLinkAsyncLookupContinuation>
+createLookupContinuation(Continuation Cont) {
-/// An asynchronous symbol lookup. Performs a search (possibly asynchronously)
-/// for the given symbols, calling the given continuation with either the result
-/// (if the lookup succeeds), or an error (if the lookup fails).
-using JITLinkAsyncLookupFunction =
- std::function<void(const DenseSet<StringRef> &Symbols,
- JITLinkAsyncLookupContinuation LookupContinuation)>;
+ class Impl final : public JITLinkAsyncLookupContinuation {
+ public:
+ Impl(Continuation C) : C(std::move(C)) {}
+ void run(Expected<AsyncLookupResult> LR) override { C(std::move(LR)); }
+
+ private:
+ Continuation C;
+ };
+
+ return std::make_unique<Impl>(std::move(Cont));
+}
/// Holds context for a single jitLink invocation.
class JITLinkContext {
@@ -881,13 +995,13 @@ public:
/// lookup continutation which it must call with a result to continue the
/// linking process.
virtual void lookup(const DenseSet<StringRef> &Symbols,
- JITLinkAsyncLookupContinuation LookupContinuation) = 0;
+ std::unique_ptr<JITLinkAsyncLookupContinuation> LC) = 0;
- /// Called by JITLink once all defined atoms in the graph have been assigned
- /// their final memory locations in the target process. At this point he
- /// atom graph can be, inspected to build a symbol table however the atom
+ /// Called by JITLink once all defined symbols in the graph have been assigned
+ /// their final memory locations in the target process. At this point the
+ /// LinkGraph can be inspected to build a symbol table, however the block
/// content will not generally have been copied to the target location yet.
- virtual void notifyResolved(AtomGraph &G) = 0;
+ virtual void notifyResolved(LinkGraph &G) = 0;
/// Called by JITLink to notify the context that the object has been
/// finalized (i.e. emitted to memory and memory permissions set). If all of
@@ -904,20 +1018,20 @@ public:
/// Returns the mark-live pass to be used for this link. If no pass is
/// returned (the default) then the target-specific linker implementation will
- /// choose a conservative default (usually marking all atoms live).
+ /// choose a conservative default (usually marking all symbols live).
/// This function is only called if shouldAddDefaultTargetPasses returns true,
/// otherwise the JITContext is responsible for adding a mark-live pass in
/// modifyPassConfig.
- virtual AtomGraphPassFunction getMarkLivePass(const Triple &TT) const;
+ virtual LinkGraphPassFunction getMarkLivePass(const Triple &TT) const;
/// Called by JITLink to modify the pass pipeline prior to linking.
/// The default version performs no modification.
virtual Error modifyPassConfig(const Triple &TT, PassConfiguration &Config);
};
-/// Marks all atoms in a graph live. This can be used as a default, conservative
-/// mark-live implementation.
-Error markAllAtomsLive(AtomGraph &G);
+/// Marks all symbols in a graph live. This can be used as a default,
+/// conservative mark-live implementation.
+Error markAllSymbolsLive(LinkGraph &G);
/// Basic JITLink implementation.
///
diff --git a/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index 9d0b37fe4a4d..ac5a593bb77b 100644
--- a/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -33,20 +33,19 @@ public:
class SegmentRequest {
public:
SegmentRequest() = default;
- SegmentRequest(size_t ContentSize, unsigned ContentAlign,
- uint64_t ZeroFillSize, unsigned ZeroFillAlign)
- : ContentSize(ContentSize), ZeroFillSize(ZeroFillSize),
- ContentAlign(ContentAlign), ZeroFillAlign(ZeroFillAlign) {}
+ SegmentRequest(uint64_t Alignment, size_t ContentSize,
+ uint64_t ZeroFillSize)
+ : Alignment(Alignment), ContentSize(ContentSize),
+ ZeroFillSize(ZeroFillSize) {
+ assert(isPowerOf2_32(Alignment) && "Alignment must be power of 2");
+ }
+ uint64_t getAlignment() const { return Alignment; }
size_t getContentSize() const { return ContentSize; }
- unsigned getContentAlignment() const { return ContentAlign; }
uint64_t getZeroFillSize() const { return ZeroFillSize; }
- unsigned getZeroFillAlignment() const { return ZeroFillAlign; }
-
private:
+ uint64_t Alignment = 0;
size_t ContentSize = 0;
uint64_t ZeroFillSize = 0;
- unsigned ContentAlign = 0;
- unsigned ZeroFillAlign = 0;
};
using SegmentsRequestMap = DenseMap<unsigned, SegmentRequest>;
diff --git a/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
new file mode 100644
index 000000000000..d70b545fff86
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
@@ -0,0 +1,60 @@
+//===---- MachO_arm64.h - JIT link functions for MachO/arm64 ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// jit-link functions for MachO/arm64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+namespace MachO_arm64_Edges {
+
+enum MachOARM64RelocationKind : Edge::Kind {
+ Branch26 = Edge::FirstRelocation,
+ Pointer32,
+ Pointer64,
+ Pointer64Anon,
+ Page21,
+ PageOffset12,
+ GOTPage21,
+ GOTPageOffset12,
+ PointerToGOT,
+ PairedAddend,
+ LDRLiteral19,
+ Delta32,
+ Delta64,
+ NegDelta32,
+ NegDelta64,
+};
+
+} // namespace MachO_arm64_Edges
+
+/// jit-link the given object buffer, which must be a MachO arm64 object file.
+///
+/// If PrePrunePasses is empty then a default mark-live pass will be inserted
+/// that will mark all exported atoms live. If PrePrunePasses is not empty, the
+/// caller is responsible for including a pass to mark atoms as live.
+///
+/// If PostPrunePasses is empty then a default GOT-and-stubs insertion pass will
+/// be inserted. If PostPrunePasses is not empty then the caller is responsible
+/// for including a pass to insert GOT and stub edges.
+void jitLink_MachO_arm64(std::unique_ptr<JITLinkContext> Ctx);
+
+/// Return the string name of the given MachO arm64 edge kind.
+StringRef getMachOARM64RelocationKindName(Edge::Kind R);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H
diff --git a/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h b/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
index 1d5b586afc32..00a7feb86e83 100644
--- a/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
+++ b/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
@@ -22,6 +22,7 @@ namespace MachO_x86_64_Edges {
enum MachOX86RelocationKind : Edge::Kind {
Branch32 = Edge::FirstRelocation,
+ Pointer32,
Pointer64,
Pointer64Anon,
PCRel32,
diff --git a/include/llvm/ExecutionEngine/JITSymbol.h b/include/llvm/ExecutionEngine/JITSymbol.h
index b14154c5b5e8..c0f1ca4b9876 100644
--- a/include/llvm/ExecutionEngine/JITSymbol.h
+++ b/include/llvm/ExecutionEngine/JITSymbol.h
@@ -23,6 +23,7 @@
#include <string>
#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
@@ -217,7 +218,7 @@ private:
/// Represents a symbol in the JIT.
class JITSymbol {
public:
- using GetAddressFtor = std::function<Expected<JITTargetAddress>()>;
+ using GetAddressFtor = unique_function<Expected<JITTargetAddress>()>;
/// Create a 'null' symbol, used to represent a "symbol not found"
/// result from a successful (non-erroneous) lookup.
@@ -325,7 +326,7 @@ class JITSymbolResolver {
public:
using LookupSet = std::set<StringRef>;
using LookupResult = std::map<StringRef, JITEvaluatedSymbol>;
- using OnResolvedFunction = std::function<void(Expected<LookupResult>)>;
+ using OnResolvedFunction = unique_function<void(Expected<LookupResult>)>;
virtual ~JITSymbolResolver() = default;
diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 5f593a27cad6..7946b5b7b209 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -26,6 +26,7 @@
#include "llvm/ExecutionEngine/Orc/LazyReexports.h"
#include "llvm/ExecutionEngine/Orc/Legacy.h"
#include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/ExecutionEngine/Orc/Speculation.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
@@ -91,6 +92,8 @@ public:
/// Sets the partition function.
void setPartitionFunction(PartitionFunction Partition);
+ /// Sets the ImplSymbolMap
+ void setImplMap(ImplSymbolMap *Imp);
/// Emits the given module. This should not be called by clients: it will be
/// called by the JIT when a definition added via the add method is requested.
void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
@@ -128,6 +131,7 @@ private:
PerDylibResourcesMap DylibResources;
PartitionFunction Partition = compileRequested;
SymbolLinkagePromoter PromoteSymbols;
+ ImplSymbolMap *AliaseeImpls = nullptr;
};
/// Compile-on-demand layer.
@@ -187,7 +191,7 @@ private:
std::unique_ptr<ResourceOwner<ResourceT>>
wrapOwnership(ResourcePtrT ResourcePtr) {
using RO = ResourceOwnerImpl<ResourceT, ResourcePtrT>;
- return llvm::make_unique<RO>(std::move(ResourcePtr));
+ return std::make_unique<RO>(std::move(ResourcePtr));
}
struct LogicalDylib {
@@ -440,7 +444,7 @@ private:
return Error::success();
// Create the GlobalValues module.
- auto GVsM = llvm::make_unique<Module>((SrcM.getName() + ".globals").str(),
+ auto GVsM = std::make_unique<Module>((SrcM.getName() + ".globals").str(),
SrcM.getContext());
GVsM->setDataLayout(DL);
@@ -633,7 +637,7 @@ private:
NewName += F->getName();
}
- auto M = llvm::make_unique<Module>(NewName, SrcM.getContext());
+ auto M = std::make_unique<Module>(NewName, SrcM.getContext());
M->setDataLayout(SrcM.getDataLayout());
ValueToValueMapTy VMap;
diff --git a/include/llvm/ExecutionEngine/Orc/Core.h b/include/llvm/ExecutionEngine/Orc/Core.h
index 94a5618233e4..4f22a4c38796 100644
--- a/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/include/llvm/ExecutionEngine/Orc/Core.h
@@ -14,6 +14,7 @@
#define LLVM_EXECUTIONENGINE_ORC_CORE_H
#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
@@ -51,8 +52,7 @@ using SymbolMap = DenseMap<SymbolStringPtr, JITEvaluatedSymbol>;
/// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags.
using SymbolFlagsMap = DenseMap<SymbolStringPtr, JITSymbolFlags>;
-/// A base class for materialization failures that allows the failing
-/// symbols to be obtained for logging.
+/// A map from JITDylibs to sets of symbols.
using SymbolDependenceMap = DenseMap<JITDylib *, SymbolNameSet>;
/// A list of (JITDylib*, bool) pairs.
@@ -108,7 +108,7 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases);
raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S);
/// Callback to notify client that symbols have been resolved.
-using SymbolsResolvedCallback = std::function<void(Expected<SymbolMap>)>;
+using SymbolsResolvedCallback = unique_function<void(Expected<SymbolMap>)>;
/// Callback to register the dependencies for a given query.
using RegisterDependenciesFunction =
@@ -124,13 +124,13 @@ class FailedToMaterialize : public ErrorInfo<FailedToMaterialize> {
public:
static char ID;
- FailedToMaterialize(SymbolNameSet Symbols);
+ FailedToMaterialize(std::shared_ptr<SymbolDependenceMap> Symbols);
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
- const SymbolNameSet &getSymbols() const { return Symbols; }
+ const SymbolDependenceMap &getSymbols() const { return *Symbols; }
private:
- SymbolNameSet Symbols;
+ std::shared_ptr<SymbolDependenceMap> Symbols;
};
/// Used to notify clients when symbols can not be found during a lookup.
@@ -205,12 +205,26 @@ public:
/// symbols must be ones covered by this MaterializationResponsibility
/// instance. Individual calls to this method may resolve a subset of the
/// symbols, but all symbols must have been resolved prior to calling emit.
- void notifyResolved(const SymbolMap &Symbols);
+ ///
+ /// This method will return an error if any symbols being resolved have been
+ /// moved to the error state due to the failure of a dependency. If this
+ /// method returns an error then clients should log it and call
+ /// failMaterialize. If no dependencies have been registered for the
+ /// symbols covered by this MaterializationResponsibiility then this method
+ /// is guaranteed to return Error::success() and can be wrapped with cantFail.
+ Error notifyResolved(const SymbolMap &Symbols);
/// Notifies the target JITDylib (and any pending queries on that JITDylib)
/// that all symbols covered by this MaterializationResponsibility instance
/// have been emitted.
- void notifyEmitted();
+ ///
+ /// This method will return an error if any symbols being resolved have been
+ /// moved to the error state due to the failure of a dependency. If this
+ /// method returns an error then clients should log it and call
+ /// failMaterialize. If no dependencies have been registered for the
+ /// symbols covered by this MaterializationResponsibiility then this method
+ /// is guaranteed to return Error::success() and can be wrapped with cantFail.
+ Error notifyEmitted();
/// Adds new symbols to the JITDylib and this responsibility instance.
/// JITDylib entries start out in the materializing state.
@@ -346,7 +360,7 @@ private:
///
inline std::unique_ptr<AbsoluteSymbolsMaterializationUnit>
absoluteSymbols(SymbolMap Symbols, VModuleKey K = VModuleKey()) {
- return llvm::make_unique<AbsoluteSymbolsMaterializationUnit>(
+ return std::make_unique<AbsoluteSymbolsMaterializationUnit>(
std::move(Symbols), std::move(K));
}
@@ -390,7 +404,7 @@ private:
/// \endcode
inline std::unique_ptr<ReExportsMaterializationUnit>
symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) {
- return llvm::make_unique<ReExportsMaterializationUnit>(
+ return std::make_unique<ReExportsMaterializationUnit>(
nullptr, true, std::move(Aliases), std::move(K));
}
@@ -402,7 +416,7 @@ symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) {
inline std::unique_ptr<ReExportsMaterializationUnit>
reexports(JITDylib &SourceJD, SymbolAliasMap Aliases,
bool MatchNonExported = false, VModuleKey K = VModuleKey()) {
- return llvm::make_unique<ReExportsMaterializationUnit>(
+ return std::make_unique<ReExportsMaterializationUnit>(
&SourceJD, MatchNonExported, std::move(Aliases), std::move(K));
}
@@ -411,32 +425,13 @@ reexports(JITDylib &SourceJD, SymbolAliasMap Aliases,
Expected<SymbolAliasMap>
buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols);
-/// ReexportsGenerator can be used with JITDylib::setGenerator to automatically
-/// re-export a subset of the source JITDylib's symbols in the target.
-class ReexportsGenerator {
-public:
- using SymbolPredicate = std::function<bool(SymbolStringPtr)>;
-
- /// Create a reexports generator. If an Allow predicate is passed, only
- /// symbols for which the predicate returns true will be reexported. If no
- /// Allow predicate is passed, all symbols will be exported.
- ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false,
- SymbolPredicate Allow = SymbolPredicate());
-
- Expected<SymbolNameSet> operator()(JITDylib &JD, const SymbolNameSet &Names);
-
-private:
- JITDylib &SourceJD;
- bool MatchNonExported = false;
- SymbolPredicate Allow;
-};
-
/// Represents the state that a symbol has reached during materialization.
enum class SymbolState : uint8_t {
Invalid, /// No symbol should be in this state.
NeverSearched, /// Added to the symbol table, never queried.
Materializing, /// Queried, materialization begun.
Resolved, /// Assigned address, still materializing.
+ Emitted, /// Emitted to memory, but waiting on transitive dependencies.
Ready = 0x3f /// Ready and safe for clients to access.
};
@@ -502,8 +497,12 @@ class JITDylib {
friend class ExecutionSession;
friend class MaterializationResponsibility;
public:
- using GeneratorFunction = std::function<Expected<SymbolNameSet>(
- JITDylib &Parent, const SymbolNameSet &Names)>;
+ class DefinitionGenerator {
+ public:
+ virtual ~DefinitionGenerator();
+ virtual Expected<SymbolNameSet>
+ tryToGenerate(JITDylib &Parent, const SymbolNameSet &Names) = 0;
+ };
using AsynchronousSymbolQuerySet =
std::set<std::shared_ptr<AsynchronousSymbolQuery>>;
@@ -519,13 +518,20 @@ public:
/// Get a reference to the ExecutionSession for this JITDylib.
ExecutionSession &getExecutionSession() const { return ES; }
- /// Set a definition generator. If set, whenever a symbol fails to resolve
- /// within this JITDylib, lookup and lookupFlags will pass the unresolved
- /// symbols set to the definition generator. The generator can optionally
- /// add a definition for the unresolved symbols to the dylib.
- void setGenerator(GeneratorFunction DefGenerator) {
- this->DefGenerator = std::move(DefGenerator);
- }
+ /// Adds a definition generator to this JITDylib and returns a referenece to
+ /// it.
+ ///
+ /// When JITDylibs are searched during lookup, if no existing definition of
+ /// a symbol is found, then any generators that have been added are run (in
+ /// the order that they were added) to potentially generate a definition.
+ template <typename GeneratorT>
+ GeneratorT &addGenerator(std::unique_ptr<GeneratorT> DefGenerator);
+
+ /// Remove a definition generator from this JITDylib.
+ ///
+ /// The given generator must exist in this JITDylib's generators list (i.e.
+ /// have been added and not yet removed).
+ void removeGenerator(DefinitionGenerator &G);
/// Set the search order to be used when fixing up definitions in JITDylib.
/// This will replace the previous search order, and apply to any symbol
@@ -633,17 +639,17 @@ private:
struct MaterializingInfo {
SymbolDependenceMap Dependants;
SymbolDependenceMap UnemittedDependencies;
- bool IsEmitted = false;
void addQuery(std::shared_ptr<AsynchronousSymbolQuery> Q);
void removeQuery(const AsynchronousSymbolQuery &Q);
AsynchronousSymbolQueryList takeQueriesMeeting(SymbolState RequiredState);
- AsynchronousSymbolQueryList takeAllQueries();
+ AsynchronousSymbolQueryList takeAllPendingQueries() {
+ return std::move(PendingQueries);
+ }
bool hasQueriesPending() const { return !PendingQueries.empty(); }
const AsynchronousSymbolQueryList &pendingQueries() const {
return PendingQueries;
}
-
private:
AsynchronousSymbolQueryList PendingQueries;
};
@@ -710,9 +716,9 @@ private:
SymbolNameSet &Unresolved, bool MatchNonExported,
MaterializationUnitList &MUs);
- void lodgeQueryImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- SymbolNameSet &Unresolved, bool MatchNonExported,
- MaterializationUnitList &MUs);
+ Error lodgeQueryImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+ SymbolNameSet &Unresolved, bool MatchNonExported,
+ MaterializationUnitList &MUs);
bool lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
@@ -734,18 +740,20 @@ private:
void addDependencies(const SymbolStringPtr &Name,
const SymbolDependenceMap &Dependants);
- void resolve(const SymbolMap &Resolved);
+ Error resolve(const SymbolMap &Resolved);
- void emit(const SymbolFlagsMap &Emitted);
+ Error emit(const SymbolFlagsMap &Emitted);
- void notifyFailed(const SymbolNameSet &FailedSymbols);
+ using FailedSymbolsWorklist =
+ std::vector<std::pair<JITDylib *, SymbolStringPtr>>;
+ static void notifyFailed(FailedSymbolsWorklist FailedSymbols);
ExecutionSession &ES;
std::string JITDylibName;
SymbolTable Symbols;
UnmaterializedInfosMap UnmaterializedInfos;
MaterializingInfosMap MaterializingInfos;
- GeneratorFunction DefGenerator;
+ std::vector<std::unique_ptr<DefinitionGenerator>> DefGenerators;
JITDylibSearchList SearchOrder;
};
@@ -933,6 +941,14 @@ private:
OutstandingMUs;
};
+template <typename GeneratorT>
+GeneratorT &JITDylib::addGenerator(std::unique_ptr<GeneratorT> DefGenerator) {
+ auto &G = *DefGenerator;
+ ES.runSessionLocked(
+ [&]() { DefGenerators.push_back(std::move(DefGenerator)); });
+ return G;
+}
+
template <typename Func>
auto JITDylib::withSearchOrderDo(Func &&F)
-> decltype(F(std::declval<const JITDylibSearchList &>())) {
@@ -972,6 +988,27 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU) {
});
}
+/// ReexportsGenerator can be used with JITDylib::setGenerator to automatically
+/// re-export a subset of the source JITDylib's symbols in the target.
+class ReexportsGenerator : public JITDylib::DefinitionGenerator {
+public:
+ using SymbolPredicate = std::function<bool(SymbolStringPtr)>;
+
+ /// Create a reexports generator. If an Allow predicate is passed, only
+ /// symbols for which the predicate returns true will be reexported. If no
+ /// Allow predicate is passed, all symbols will be exported.
+ ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false,
+ SymbolPredicate Allow = SymbolPredicate());
+
+ Expected<SymbolNameSet> tryToGenerate(JITDylib &JD,
+ const SymbolNameSet &Names) override;
+
+private:
+ JITDylib &SourceJD;
+ bool MatchNonExported = false;
+ SymbolPredicate Allow;
+};
+
/// Mangles symbol names then uniques them in the context of an
/// ExecutionSession.
class MangleAndInterner {
diff --git a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index 75865920c741..cf0a428662ef 100644
--- a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -19,6 +19,7 @@
#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/OrcError.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/Object/Archive.h"
#include "llvm/Support/DynamicLibrary.h"
#include <algorithm>
#include <cstdint>
@@ -37,6 +38,8 @@ class Value;
namespace orc {
+class ObjectLayer;
+
/// This iterator provides a convenient way to iterate over the elements
/// of an llvm.global_ctors/llvm.global_dtors instance.
///
@@ -237,7 +240,7 @@ public:
/// If an instance of this class is attached to a JITDylib as a fallback
/// definition generator, then any symbol found in the given DynamicLibrary that
/// passes the 'Allow' predicate will be added to the JITDylib.
-class DynamicLibrarySearchGenerator {
+class DynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator {
public:
using SymbolPredicate = std::function<bool(SymbolStringPtr)>;
@@ -253,19 +256,20 @@ public:
/// Permanently loads the library at the given path and, on success, returns
/// a DynamicLibrarySearchGenerator that will search it for symbol definitions
/// in the library. On failure returns the reason the library failed to load.
- static Expected<DynamicLibrarySearchGenerator>
+ static Expected<std::unique_ptr<DynamicLibrarySearchGenerator>>
Load(const char *FileName, char GlobalPrefix,
SymbolPredicate Allow = SymbolPredicate());
/// Creates a DynamicLibrarySearchGenerator that searches for symbols in
/// the current process.
- static Expected<DynamicLibrarySearchGenerator>
+ static Expected<std::unique_ptr<DynamicLibrarySearchGenerator>>
GetForCurrentProcess(char GlobalPrefix,
SymbolPredicate Allow = SymbolPredicate()) {
return Load(nullptr, GlobalPrefix, std::move(Allow));
}
- Expected<SymbolNameSet> operator()(JITDylib &JD, const SymbolNameSet &Names);
+ Expected<SymbolNameSet> tryToGenerate(JITDylib &JD,
+ const SymbolNameSet &Names) override;
private:
sys::DynamicLibrary Dylib;
@@ -273,6 +277,40 @@ private:
char GlobalPrefix;
};
+/// A utility class to expose symbols from a static library.
+///
+/// If an instance of this class is attached to a JITDylib as a fallback
+/// definition generator, then any symbol found in the archive will result in
+/// the containing object being added to the JITDylib.
+class StaticLibraryDefinitionGenerator : public JITDylib::DefinitionGenerator {
+public:
+ /// Try to create a StaticLibraryDefinitionGenerator from the given path.
+ ///
+ /// This call will succeed if the file at the given path is a static library
+ /// is a valid archive, otherwise it will return an error.
+ static Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
+ Load(ObjectLayer &L, const char *FileName);
+
+ /// Try to create a StaticLibrarySearchGenerator from the given memory buffer.
+ /// Thhis call will succeed if the buffer contains a valid archive, otherwise
+ /// it will return an error.
+ static Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
+ Create(ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer);
+
+ Expected<SymbolNameSet> tryToGenerate(JITDylib &JD,
+ const SymbolNameSet &Names) override;
+
+private:
+ StaticLibraryDefinitionGenerator(ObjectLayer &L,
+ std::unique_ptr<MemoryBuffer> ArchiveBuffer,
+ Error &Err);
+
+ ObjectLayer &L;
+ std::unique_ptr<MemoryBuffer> ArchiveBuffer;
+ object::Archive Archive;
+ size_t UnrealizedObjects = 0;
+};
+
} // end namespace orc
} // end namespace llvm
diff --git a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
index 1b4c8b6cd95f..b71e5b339711 100644
--- a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
@@ -22,6 +22,9 @@ namespace llvm {
class Module;
namespace orc {
+/// A layer that applies a transform to emitted modules.
+/// The transform function is responsible for locking the ThreadSafeContext
+/// before operating on the module.
class IRTransformLayer : public IRLayer {
public:
using TransformFunction = std::function<Expected<ThreadSafeModule>(
diff --git a/include/llvm/ExecutionEngine/Orc/LLJIT.h b/include/llvm/ExecutionEngine/Orc/LLJIT.h
index 0aac1916423f..b1e47d77557c 100644
--- a/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -184,8 +184,8 @@ private:
class LLJITBuilderState {
public:
- using ObjectLinkingLayerCreator =
- std::function<std::unique_ptr<ObjectLayer>(ExecutionSession &)>;
+ using ObjectLinkingLayerCreator = std::function<std::unique_ptr<ObjectLayer>(
+ ExecutionSession &, const Triple &TT)>;
using CompileFunctionCreator =
std::function<Expected<IRCompileLayer::CompileFunction>(
diff --git a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
index 855e31b33549..b31914f12a0d 100644
--- a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
+++ b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
#include <memory>
namespace llvm {
@@ -62,7 +63,7 @@ std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>>
createLambdaResolver(DylibLookupFtorT DylibLookupFtor,
ExternalLookupFtorT ExternalLookupFtor) {
using LR = LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>;
- return make_unique<LR>(std::move(DylibLookupFtor),
+ return std::make_unique<LR>(std::move(DylibLookupFtor),
std::move(ExternalLookupFtor));
}
@@ -72,7 +73,7 @@ createLambdaResolver(ORCv1DeprecationAcknowledgement,
DylibLookupFtorT DylibLookupFtor,
ExternalLookupFtorT ExternalLookupFtor) {
using LR = LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>;
- return make_unique<LR>(AcknowledgeORCv1Deprecation,
+ return std::make_unique<LR>(AcknowledgeORCv1Deprecation,
std::move(DylibLookupFtor),
std::move(ExternalLookupFtor));
}
diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
index 16202d89f861..b67a9feed523 100644
--- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
@@ -49,28 +49,24 @@ private:
switch (EmitState) {
case NotEmitted:
if (auto GV = searchGVs(Name, ExportedSymbolsOnly)) {
- // Create a std::string version of Name to capture here - the argument
- // (a StringRef) may go away before the lambda is executed.
- // FIXME: Use capture-init when we move to C++14.
- std::string PName = Name;
JITSymbolFlags Flags = JITSymbolFlags::fromGlobalValue(*GV);
- auto GetAddress =
- [this, ExportedSymbolsOnly, PName, &B]() -> Expected<JITTargetAddress> {
- if (this->EmitState == Emitting)
- return 0;
- else if (this->EmitState == NotEmitted) {
- this->EmitState = Emitting;
- if (auto Err = this->emitToBaseLayer(B))
- return std::move(Err);
- this->EmitState = Emitted;
- }
- if (auto Sym = B.findSymbolIn(K, PName, ExportedSymbolsOnly))
- return Sym.getAddress();
- else if (auto Err = Sym.takeError())
+ auto GetAddress = [this, ExportedSymbolsOnly, Name = Name.str(),
+ &B]() -> Expected<JITTargetAddress> {
+ if (this->EmitState == Emitting)
+ return 0;
+ else if (this->EmitState == NotEmitted) {
+ this->EmitState = Emitting;
+ if (auto Err = this->emitToBaseLayer(B))
return std::move(Err);
- else
- llvm_unreachable("Successful symbol lookup should return "
- "definition address here");
+ this->EmitState = Emitted;
+ }
+ if (auto Sym = B.findSymbolIn(K, Name, ExportedSymbolsOnly))
+ return Sym.getAddress();
+ else if (auto Err = Sym.takeError())
+ return std::move(Err);
+ else
+ llvm_unreachable("Successful symbol lookup should return "
+ "definition address here");
};
return JITSymbol(std::move(GetAddress), Flags);
} else
@@ -171,7 +167,7 @@ private:
bool ExportedSymbolsOnly) const {
assert(!MangledSymbols && "Mangled symbols map already exists?");
- auto Symbols = llvm::make_unique<StringMap<const GlobalValue*>>();
+ auto Symbols = std::make_unique<StringMap<const GlobalValue*>>();
Mangler Mang;
@@ -209,7 +205,7 @@ public:
Error addModule(VModuleKey K, std::unique_ptr<Module> M) {
assert(!ModuleMap.count(K) && "VModuleKey K already in use");
ModuleMap[K] =
- llvm::make_unique<EmissionDeferredModule>(std::move(K), std::move(M));
+ std::make_unique<EmissionDeferredModule>(std::move(K), std::move(M));
return Error::success();
}
diff --git a/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/include/llvm/ExecutionEngine/Orc/LazyReexports.h
index 9fdd1d15f782..311ed59b1549 100644
--- a/include/llvm/ExecutionEngine/Orc/LazyReexports.h
+++ b/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -18,6 +18,7 @@
#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
+#include "llvm/ExecutionEngine/Orc/Speculation.h"
namespace llvm {
@@ -70,7 +71,7 @@ public:
template <typename NotifyResolvedImpl>
static std::unique_ptr<NotifyResolvedFunction>
createNotifyResolvedFunction(NotifyResolvedImpl NotifyResolved) {
- return llvm::make_unique<NotifyResolvedFunctionImpl<NotifyResolvedImpl>>(
+ return std::make_unique<NotifyResolvedFunctionImpl<NotifyResolvedImpl>>(
std::move(NotifyResolved));
}
@@ -159,7 +160,7 @@ public:
IndirectStubsManager &ISManager,
JITDylib &SourceJD,
SymbolAliasMap CallableAliases,
- VModuleKey K);
+ ImplSymbolMap *SrcJDLoc, VModuleKey K);
StringRef getName() const override;
@@ -174,6 +175,7 @@ private:
SymbolAliasMap CallableAliases;
std::shared_ptr<LazyCallThroughManager::NotifyResolvedFunction>
NotifyResolved;
+ ImplSymbolMap *AliaseeTable;
};
/// Define lazy-reexports based on the given SymbolAliasMap. Each lazy re-export
@@ -182,9 +184,10 @@ private:
inline std::unique_ptr<LazyReexportsMaterializationUnit>
lazyReexports(LazyCallThroughManager &LCTManager,
IndirectStubsManager &ISManager, JITDylib &SourceJD,
- SymbolAliasMap CallableAliases, VModuleKey K = VModuleKey()) {
- return llvm::make_unique<LazyReexportsMaterializationUnit>(
- LCTManager, ISManager, SourceJD, std::move(CallableAliases),
+ SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc = nullptr,
+ VModuleKey K = VModuleKey()) {
+ return std::make_unique<LazyReexportsMaterializationUnit>(
+ LCTManager, ISManager, SourceJD, std::move(CallableAliases), SrcJDLoc,
std::move(K));
}
diff --git a/include/llvm/ExecutionEngine/Orc/Legacy.h b/include/llvm/ExecutionEngine/Orc/Legacy.h
index f9cbbf6ff180..148e260c9569 100644
--- a/include/llvm/ExecutionEngine/Orc/Legacy.h
+++ b/include/llvm/ExecutionEngine/Orc/Legacy.h
@@ -84,7 +84,7 @@ createSymbolResolver(GetResponsibilitySetFn &&GetResponsibilitySet,
typename std::remove_reference<GetResponsibilitySetFn>::type>::type,
typename std::remove_cv<
typename std::remove_reference<LookupFn>::type>::type>;
- return llvm::make_unique<LambdaSymbolResolverImpl>(
+ return std::make_unique<LambdaSymbolResolverImpl>(
std::forward<GetResponsibilitySetFn>(GetResponsibilitySet),
std::forward<LookupFn>(Lookup));
}
diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index c1e7d27f446e..caf8e707516d 100644
--- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -73,6 +73,9 @@ public:
virtual Error notifyRemovingAllModules() { return Error::success(); }
};
+ using ReturnObjectBufferFunction =
+ std::function<void(std::unique_ptr<MemoryBuffer>)>;
+
/// Construct an ObjectLinkingLayer with the given NotifyLoaded,
/// and NotifyEmitted functors.
ObjectLinkingLayer(ExecutionSession &ES,
@@ -81,6 +84,13 @@ public:
/// Destruct an ObjectLinkingLayer.
~ObjectLinkingLayer();
+ /// Set an object buffer return function. By default object buffers are
+ /// deleted once the JIT has linked them. If a return function is set then
+ /// it will be called to transfer ownership of the buffer instead.
+ void setReturnObjectBuffer(ReturnObjectBufferFunction ReturnObjectBuffer) {
+ this->ReturnObjectBuffer = std::move(ReturnObjectBuffer);
+ }
+
/// Add a pass-config modifier.
ObjectLinkingLayer &addPlugin(std::unique_ptr<Plugin> P) {
std::lock_guard<std::mutex> Lock(LayerMutex);
@@ -138,6 +148,7 @@ private:
jitlink::JITLinkMemoryManager &MemMgr;
bool OverrideObjectFlags = false;
bool AutoClaimObjectSymbols = false;
+ ReturnObjectBufferFunction ReturnObjectBuffer;
DenseMap<VModuleKey, AllocPtr> TrackedAllocs;
std::vector<AllocPtr> UntrackedAllocs;
std::vector<std::unique_ptr<Plugin>> Plugins;
@@ -153,10 +164,16 @@ public:
Error notifyRemovingAllModules() override;
private:
+
+ struct EHFrameRange {
+ JITTargetAddress Addr = 0;
+ size_t Size;
+ };
+
jitlink::EHFrameRegistrar &Registrar;
- DenseMap<MaterializationResponsibility *, JITTargetAddress> InProcessLinks;
- DenseMap<VModuleKey, JITTargetAddress> TrackedEHFrameAddrs;
- std::vector<JITTargetAddress> UntrackedEHFrameAddrs;
+ DenseMap<MaterializationResponsibility *, EHFrameRange> InProcessLinks;
+ DenseMap<VModuleKey, EHFrameRange> TrackedEHFrameRanges;
+ std::vector<EHFrameRange> UntrackedEHFrameRanges;
};
} // end namespace orc
diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
index 8b875b7906e1..86e8d5df3ad9 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
@@ -493,7 +493,7 @@ public:
ExecutionSession &ES,
JITTargetAddress ErrorHandlerAddress)
: JITCompileCallbackManager(
- llvm::make_unique<RemoteTrampolinePool>(Client), ES,
+ std::make_unique<RemoteTrampolinePool>(Client), ES,
ErrorHandlerAddress) {}
};
@@ -553,7 +553,7 @@ public:
auto Id = IndirectStubOwnerIds.getNext();
if (auto Err = callB<stubs::CreateIndirectStubsOwner>(Id))
return std::move(Err);
- return llvm::make_unique<RemoteIndirectStubsManager>(*this, Id);
+ return std::make_unique<RemoteIndirectStubsManager>(*this, Id);
}
Expected<RemoteCompileCallbackManager &>
diff --git a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
index 07c7471afc6a..752a0a34e0a1 100644
--- a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
+++ b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
@@ -359,9 +359,9 @@ public:
{
assert(KeyName != nullptr && "No keyname pointer");
std::lock_guard<std::recursive_mutex> Lock(SerializersMutex);
- // FIXME: Move capture Serialize once we have C++14.
Serializers[ErrorInfoT::classID()] =
- [KeyName, Serialize](ChannelT &C, const ErrorInfoBase &EIB) -> Error {
+ [KeyName, Serialize = std::move(Serialize)](
+ ChannelT &C, const ErrorInfoBase &EIB) -> Error {
assert(EIB.dynamicClassID() == ErrorInfoT::classID() &&
"Serializer called for wrong error type");
if (auto Err = serializeSeq(C, *KeyName))
@@ -551,26 +551,26 @@ public:
/// RPC channel serialization for std::tuple.
static Error serialize(ChannelT &C, const std::tuple<ArgTs...> &V) {
- return serializeTupleHelper(C, V, llvm::index_sequence_for<ArgTs...>());
+ return serializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>());
}
/// RPC channel deserialization for std::tuple.
static Error deserialize(ChannelT &C, std::tuple<ArgTs...> &V) {
- return deserializeTupleHelper(C, V, llvm::index_sequence_for<ArgTs...>());
+ return deserializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>());
}
private:
// Serialization helper for std::tuple.
template <size_t... Is>
static Error serializeTupleHelper(ChannelT &C, const std::tuple<ArgTs...> &V,
- llvm::index_sequence<Is...> _) {
+ std::index_sequence<Is...> _) {
return serializeSeq(C, std::get<Is>(V)...);
}
// Serialization helper for std::tuple.
template <size_t... Is>
static Error deserializeTupleHelper(ChannelT &C, std::tuple<ArgTs...> &V,
- llvm::index_sequence<Is...> _) {
+ std::index_sequence<Is...> _) {
return deserializeSeq(C, std::get<Is>(V)...);
}
};
diff --git a/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/include/llvm/ExecutionEngine/Orc/RPCUtils.h
index 3b11e1b283de..ee9c2cc69c30 100644
--- a/include/llvm/ExecutionEngine/Orc/RPCUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/RPCUtils.h
@@ -338,7 +338,9 @@ public:
return Err;
// Close the response message.
- return C.endSendMessage();
+ if (auto Err = C.endSendMessage())
+ return Err;
+ return C.send();
}
template <typename ChannelT, typename FunctionIdT, typename SequenceNumberT>
@@ -350,7 +352,9 @@ public:
return Err2;
if (auto Err2 = serializeSeq(C, std::move(Err)))
return Err2;
- return C.endSendMessage();
+ if (auto Err2 = C.endSendMessage())
+ return Err2;
+ return C.send();
}
};
@@ -378,8 +382,11 @@ public:
C, *ResultOrErr))
return Err;
- // Close the response message.
- return C.endSendMessage();
+ // End the response message.
+ if (auto Err = C.endSendMessage())
+ return Err;
+
+ return C.send();
}
template <typename ChannelT, typename FunctionIdT, typename SequenceNumberT>
@@ -389,7 +396,9 @@ public:
return Err;
if (auto Err2 = C.startSendMessage(ResponseId, SeqNo))
return Err2;
- return C.endSendMessage();
+ if (auto Err2 = C.endSendMessage())
+ return Err2;
+ return C.send();
}
};
@@ -502,7 +511,7 @@ public:
static typename WrappedHandlerReturn<RetT>::Type
unpackAndRun(HandlerT &Handler, std::tuple<TArgTs...> &Args) {
return unpackAndRunHelper(Handler, Args,
- llvm::index_sequence_for<TArgTs...>());
+ std::index_sequence_for<TArgTs...>());
}
// Call the given handler with the given arguments.
@@ -510,7 +519,7 @@ public:
static Error unpackAndRunAsync(HandlerT &Handler, ResponderT &Responder,
std::tuple<TArgTs...> &Args) {
return unpackAndRunAsyncHelper(Handler, Responder, Args,
- llvm::index_sequence_for<TArgTs...>());
+ std::index_sequence_for<TArgTs...>());
}
// Call the given handler with the given arguments.
@@ -540,14 +549,13 @@ public:
// Deserialize arguments from the channel.
template <typename ChannelT, typename... CArgTs>
static Error deserializeArgs(ChannelT &C, std::tuple<CArgTs...> &Args) {
- return deserializeArgsHelper(C, Args,
- llvm::index_sequence_for<CArgTs...>());
+ return deserializeArgsHelper(C, Args, std::index_sequence_for<CArgTs...>());
}
private:
template <typename ChannelT, typename... CArgTs, size_t... Indexes>
static Error deserializeArgsHelper(ChannelT &C, std::tuple<CArgTs...> &Args,
- llvm::index_sequence<Indexes...> _) {
+ std::index_sequence<Indexes...> _) {
return SequenceSerialization<ChannelT, ArgTs...>::deserialize(
C, std::get<Indexes>(Args)...);
}
@@ -556,18 +564,16 @@ private:
static typename WrappedHandlerReturn<
typename HandlerTraits<HandlerT>::ReturnType>::Type
unpackAndRunHelper(HandlerT &Handler, ArgTuple &Args,
- llvm::index_sequence<Indexes...>) {
+ std::index_sequence<Indexes...>) {
return run(Handler, std::move(std::get<Indexes>(Args))...);
}
-
template <typename HandlerT, typename ResponderT, typename ArgTuple,
size_t... Indexes>
static typename WrappedHandlerReturn<
typename HandlerTraits<HandlerT>::ReturnType>::Type
unpackAndRunAsyncHelper(HandlerT &Handler, ResponderT &Responder,
- ArgTuple &Args,
- llvm::index_sequence<Indexes...>) {
+ ArgTuple &Args, std::index_sequence<Indexes...>) {
return run(Handler, Responder, std::move(std::get<Indexes>(Args))...);
}
};
@@ -743,11 +749,15 @@ public:
// to the user defined handler.
Error handleResponse(ChannelT &C) override {
Error Result = Error::success();
- if (auto Err =
- SerializationTraits<ChannelT, Error, Error>::deserialize(C, Result))
+ if (auto Err = SerializationTraits<ChannelT, Error, Error>::deserialize(
+ C, Result)) {
+ consumeError(std::move(Result));
return Err;
- if (auto Err = C.endReceiveMessage())
+ }
+ if (auto Err = C.endReceiveMessage()) {
+ consumeError(std::move(Result));
return Err;
+ }
return Handler(std::move(Result));
}
@@ -767,7 +777,7 @@ private:
// Create a ResponseHandler from a given user handler.
template <typename ChannelT, typename FuncRetT, typename HandlerT>
std::unique_ptr<ResponseHandler<ChannelT>> createResponseHandler(HandlerT H) {
- return llvm::make_unique<ResponseHandlerImpl<ChannelT, FuncRetT, HandlerT>>(
+ return std::make_unique<ResponseHandlerImpl<ChannelT, FuncRetT, HandlerT>>(
std::move(H));
}
@@ -1403,14 +1413,12 @@ public:
using ErrorReturn = typename RTraits::ErrorReturnType;
using ErrorReturnPromise = typename RTraits::ReturnPromiseType;
- // FIXME: Stack allocate and move this into the handler once LLVM builds
- // with C++14.
- auto Promise = std::make_shared<ErrorReturnPromise>();
- auto FutureResult = Promise->get_future();
+ ErrorReturnPromise Promise;
+ auto FutureResult = Promise.get_future();
if (auto Err = this->template appendCallAsync<Func>(
- [Promise](ErrorReturn RetOrErr) {
- Promise->set_value(std::move(RetOrErr));
+ [Promise = std::move(Promise)](ErrorReturn RetOrErr) mutable {
+ Promise.set_value(std::move(RetOrErr));
return Error::success();
},
Args...)) {
@@ -1523,6 +1531,12 @@ public:
return std::move(Err);
}
+ if (auto Err = this->C.send()) {
+ detail::ResultTraits<typename Func::ReturnType>::consumeAbandoned(
+ std::move(Result));
+ return std::move(Err);
+ }
+
while (!ReceivedResponse) {
if (auto Err = this->handleOne()) {
detail::ResultTraits<typename Func::ReturnType>::consumeAbandoned(
@@ -1582,8 +1596,7 @@ public:
// outstanding calls count, then poke the condition variable.
using ArgType = typename detail::ResponseHandlerArg<
typename detail::HandlerTraits<HandlerT>::Type>::ArgType;
- // FIXME: Move handler into wrapped handler once we have C++14.
- auto WrappedHandler = [this, Handler](ArgType Arg) {
+ auto WrappedHandler = [this, Handler = std::move(Handler)](ArgType Arg) {
auto Err = Handler(std::move(Arg));
std::unique_lock<std::mutex> Lock(M);
--NumOutstandingCalls;
diff --git a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
index d9535ce5f21f..c5106cf09ecc 100644
--- a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
@@ -216,7 +216,7 @@ private:
: K(std::move(K)),
Parent(Parent),
MemMgr(std::move(MemMgr)),
- PFC(llvm::make_unique<PreFinalizeContents>(
+ PFC(std::make_unique<PreFinalizeContents>(
std::move(Obj), std::move(Resolver),
ProcessAllSections)) {
buildInitialSymbolTable(PFC->Obj);
@@ -234,7 +234,7 @@ private:
JITSymbolResolverAdapter ResolverAdapter(Parent.ES, *PFC->Resolver,
nullptr);
- PFC->RTDyld = llvm::make_unique<RuntimeDyld>(*MemMgr, ResolverAdapter);
+ PFC->RTDyld = std::make_unique<RuntimeDyld>(*MemMgr, ResolverAdapter);
PFC->RTDyld->setProcessAllSections(PFC->ProcessAllSections);
Finalized = true;
@@ -338,7 +338,7 @@ private:
std::shared_ptr<SymbolResolver> Resolver,
bool ProcessAllSections) {
using LOS = ConcreteLinkedObject<MemoryManagerPtrT>;
- return llvm::make_unique<LOS>(Parent, std::move(K), std::move(Obj),
+ return std::make_unique<LOS>(Parent, std::move(K), std::move(Obj),
std::move(MemMgr), std::move(Resolver),
ProcessAllSections);
}
diff --git a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
index b87cf697a81e..d7304cfcf931 100644
--- a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
@@ -137,17 +137,12 @@ protected:
RemoteSymbolId Id)
: C(C), Id(Id) {}
- RemoteSymbolMaterializer(const RemoteSymbolMaterializer &Other)
- : C(Other.C), Id(Other.Id) {
- // FIXME: This is a horrible, auto_ptr-style, copy-as-move operation.
- // It should be removed as soon as LLVM has C++14's generalized
- // lambda capture (at which point the materializer can be moved
- // into the lambda in remoteToJITSymbol below).
- const_cast<RemoteSymbolMaterializer&>(Other).Id = 0;
+ RemoteSymbolMaterializer(RemoteSymbolMaterializer &&Other)
+ : C(Other.C), Id(Other.Id) {
+ Other.Id = 0;
}
- RemoteSymbolMaterializer&
- operator=(const RemoteSymbolMaterializer&) = delete;
+ RemoteSymbolMaterializer &operator=(RemoteSymbolMaterializer &&) = delete;
/// Release the remote symbol.
~RemoteSymbolMaterializer() {
@@ -218,9 +213,9 @@ protected:
return nullptr;
// else...
RemoteSymbolMaterializer RSM(*this, RemoteSym.first);
- auto Sym =
- JITSymbol([RSM]() mutable { return RSM.materialize(); },
- RemoteSym.second);
+ auto Sym = JITSymbol(
+ [RSM = std::move(RSM)]() mutable { return RSM.materialize(); },
+ RemoteSym.second);
return Sym;
} else
return RemoteSymOrErr.takeError();
@@ -472,7 +467,7 @@ private:
}
Expected<ObjHandleT> addObject(std::string ObjBuffer) {
- auto Buffer = llvm::make_unique<StringMemoryBuffer>(std::move(ObjBuffer));
+ auto Buffer = std::make_unique<StringMemoryBuffer>(std::move(ObjBuffer));
auto Id = HandleIdMgr.getNext();
assert(!BaseLayerHandles.count(Id) && "Id already in use?");
diff --git a/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h b/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h
new file mode 100644
index 000000000000..cf57b63b6448
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h
@@ -0,0 +1,84 @@
+//===-- SpeculateAnalyses.h --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// \file
+/// Contains the Analyses and Result Interpretation to select likely functions
+/// to Speculatively compile before they are called. [Purely Experimentation]
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H
+#define LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H
+
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/Speculation.h"
+
+#include <vector>
+
+namespace llvm {
+
+namespace orc {
+
+// Provides common code.
+class SpeculateQuery {
+protected:
+ void findCalles(const BasicBlock *, DenseSet<StringRef> &);
+ bool isStraightLine(const Function &F);
+
+public:
+ using ResultTy = Optional<DenseMap<StringRef, DenseSet<StringRef>>>;
+};
+
+// Direct calls in high frequency basic blocks are extracted.
+class BlockFreqQuery : public SpeculateQuery {
+ size_t numBBToGet(size_t);
+
+public:
+ // Find likely next executables based on IR Block Frequency
+ ResultTy operator()(Function &F);
+};
+
+// This Query generates a sequence of basic blocks which follows the order of
+// execution.
+// A handful of BB with higher block frequencies are taken, then path to entry
+// and end BB are discovered by traversing up & down the CFG.
+class SequenceBBQuery : public SpeculateQuery {
+ struct WalkDirection {
+ bool Upward = true, Downward = true;
+ // the block associated contain a call
+ bool CallerBlock = false;
+ };
+
+public:
+ using VisitedBlocksInfoTy = DenseMap<const BasicBlock *, WalkDirection>;
+ using BlockListTy = SmallVector<const BasicBlock *, 8>;
+ using BackEdgesInfoTy =
+ SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 8>;
+ using BlockFreqInfoTy =
+ SmallVector<std::pair<const BasicBlock *, uint64_t>, 8>;
+
+private:
+ std::size_t getHottestBlocks(std::size_t TotalBlocks);
+ BlockListTy rearrangeBB(const Function &, const BlockListTy &);
+ BlockListTy queryCFG(Function &, const BlockListTy &);
+ void traverseToEntryBlock(const BasicBlock *, const BlockListTy &,
+ const BackEdgesInfoTy &,
+ const BranchProbabilityInfo *,
+ VisitedBlocksInfoTy &);
+ void traverseToExitBlock(const BasicBlock *, const BlockListTy &,
+ const BackEdgesInfoTy &,
+ const BranchProbabilityInfo *,
+ VisitedBlocksInfoTy &);
+
+public:
+ ResultTy operator()(Function &F);
+};
+
+} // namespace orc
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H
diff --git a/include/llvm/ExecutionEngine/Orc/Speculation.h b/include/llvm/ExecutionEngine/Orc/Speculation.h
new file mode 100644
index 000000000000..766a6b070f12
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/Speculation.h
@@ -0,0 +1,207 @@
+//===-- Speculation.h - Speculative Compilation --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains the definition to support speculative compilation when laziness is
+// enabled.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SPECULATION_H
+#define LLVM_EXECUTIONENGINE_ORC_SPECULATION_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/Debug.h"
+
+#include <mutex>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+class Speculator;
+
+// Track the Impls (JITDylib,Symbols) of Symbols while lazy call through
+// trampolines are created. Operations are guarded by locks tp ensure that Imap
+// stays in consistent state after read/write
+
+class ImplSymbolMap {
+ friend class Speculator;
+
+public:
+ using AliaseeDetails = std::pair<SymbolStringPtr, JITDylib *>;
+ using Alias = SymbolStringPtr;
+ using ImapTy = DenseMap<Alias, AliaseeDetails>;
+ void trackImpls(SymbolAliasMap ImplMaps, JITDylib *SrcJD);
+
+private:
+ // FIX ME: find a right way to distinguish the pre-compile Symbols, and update
+ // the callsite
+ Optional<AliaseeDetails> getImplFor(const SymbolStringPtr &StubSymbol) {
+ std::lock_guard<std::mutex> Lockit(ConcurrentAccess);
+ auto Position = Maps.find(StubSymbol);
+ if (Position != Maps.end())
+ return Position->getSecond();
+ else
+ return None;
+ }
+
+ std::mutex ConcurrentAccess;
+ ImapTy Maps;
+};
+
+// Defines Speculator Concept,
+class Speculator {
+public:
+ using TargetFAddr = JITTargetAddress;
+ using FunctionCandidatesMap = DenseMap<SymbolStringPtr, SymbolNameSet>;
+ using StubAddrLikelies = DenseMap<TargetFAddr, SymbolNameSet>;
+
+private:
+ void registerSymbolsWithAddr(TargetFAddr ImplAddr,
+ SymbolNameSet likelySymbols) {
+ std::lock_guard<std::mutex> Lockit(ConcurrentAccess);
+ GlobalSpecMap.insert({ImplAddr, std::move(likelySymbols)});
+ }
+
+ void launchCompile(JITTargetAddress FAddr) {
+ SymbolNameSet CandidateSet;
+ // Copy CandidateSet is necessary, to avoid unsynchronized access to
+ // the datastructure.
+ {
+ std::lock_guard<std::mutex> Lockit(ConcurrentAccess);
+ auto It = GlobalSpecMap.find(FAddr);
+ if (It == GlobalSpecMap.end())
+ return;
+ CandidateSet = It->getSecond();
+ }
+
+ SymbolDependenceMap SpeculativeLookUpImpls;
+
+ for (auto &Callee : CandidateSet) {
+ auto ImplSymbol = AliaseeImplTable.getImplFor(Callee);
+ // try to distinguish already compiled & library symbols
+ if (!ImplSymbol.hasValue())
+ continue;
+ const auto &ImplSymbolName = ImplSymbol.getPointer()->first;
+ JITDylib *ImplJD = ImplSymbol.getPointer()->second;
+ auto &SymbolsInJD = SpeculativeLookUpImpls[ImplJD];
+ SymbolsInJD.insert(ImplSymbolName);
+ }
+
+ DEBUG_WITH_TYPE("orc", for (auto &I
+ : SpeculativeLookUpImpls) {
+ llvm::dbgs() << "\n In " << I.first->getName() << " JITDylib ";
+ for (auto &N : I.second)
+ llvm::dbgs() << "\n Likely Symbol : " << N;
+ });
+
+ // for a given symbol, there may be no symbol qualified for speculatively
+ // compile try to fix this before jumping to this code if possible.
+ for (auto &LookupPair : SpeculativeLookUpImpls)
+ ES.lookup(JITDylibSearchList({{LookupPair.first, true}}),
+ LookupPair.second, SymbolState::Ready,
+ [this](Expected<SymbolMap> Result) {
+ if (auto Err = Result.takeError())
+ ES.reportError(std::move(Err));
+ },
+ NoDependenciesToRegister);
+ }
+
+public:
+ Speculator(ImplSymbolMap &Impl, ExecutionSession &ref)
+ : AliaseeImplTable(Impl), ES(ref), GlobalSpecMap(0) {}
+ Speculator(const Speculator &) = delete;
+ Speculator(Speculator &&) = delete;
+ Speculator &operator=(const Speculator &) = delete;
+ Speculator &operator=(Speculator &&) = delete;
+
+ /// Define symbols for this Speculator object (__orc_speculator) and the
+ /// speculation runtime entry point symbol (__orc_speculate_for) in the
+ /// given JITDylib.
+ Error addSpeculationRuntime(JITDylib &JD, MangleAndInterner &Mangle);
+
+ // Speculatively compile likely functions for the given Stub Address.
+ // destination of __orc_speculate_for jump
+ void speculateFor(TargetFAddr StubAddr) { launchCompile(StubAddr); }
+
+ // FIXME : Register with Stub Address, after JITLink Fix.
+ void registerSymbols(FunctionCandidatesMap Candidates, JITDylib *JD) {
+ for (auto &SymPair : Candidates) {
+ auto Target = SymPair.first;
+ auto Likely = SymPair.second;
+
+ auto OnReadyFixUp = [Likely, Target,
+ this](Expected<SymbolMap> ReadySymbol) {
+ if (ReadySymbol) {
+ auto RAddr = (*ReadySymbol)[Target].getAddress();
+ registerSymbolsWithAddr(RAddr, std::move(Likely));
+ } else
+ this->getES().reportError(ReadySymbol.takeError());
+ };
+ // Include non-exported symbols also.
+ ES.lookup(JITDylibSearchList({{JD, true}}), SymbolNameSet({Target}),
+ SymbolState::Ready, OnReadyFixUp, NoDependenciesToRegister);
+ }
+ }
+
+ ExecutionSession &getES() { return ES; }
+
+private:
+ static void speculateForEntryPoint(Speculator *Ptr, uint64_t StubId);
+ std::mutex ConcurrentAccess;
+ ImplSymbolMap &AliaseeImplTable;
+ ExecutionSession &ES;
+ StubAddrLikelies GlobalSpecMap;
+};
+
+class IRSpeculationLayer : public IRLayer {
+public:
+ using IRlikiesStrRef = Optional<DenseMap<StringRef, DenseSet<StringRef>>>;
+ using ResultEval = std::function<IRlikiesStrRef(Function &)>;
+ using TargetAndLikelies = DenseMap<SymbolStringPtr, SymbolNameSet>;
+
+ IRSpeculationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer,
+ Speculator &Spec, MangleAndInterner &Mangle,
+ ResultEval Interpreter)
+ : IRLayer(ES), NextLayer(BaseLayer), S(Spec), Mangle(Mangle),
+ QueryAnalysis(Interpreter) {}
+
+ void emit(MaterializationResponsibility R, ThreadSafeModule TSM);
+
+private:
+ TargetAndLikelies
+ internToJITSymbols(DenseMap<StringRef, DenseSet<StringRef>> IRNames) {
+ assert(!IRNames.empty() && "No IRNames received to Intern?");
+ TargetAndLikelies InternedNames;
+ DenseSet<SymbolStringPtr> TargetJITNames;
+ for (auto &NamePair : IRNames) {
+ for (auto &TargetNames : NamePair.second)
+ TargetJITNames.insert(Mangle(TargetNames));
+
+ InternedNames[Mangle(NamePair.first)] = std::move(TargetJITNames);
+ }
+ return InternedNames;
+ }
+
+ IRCompileLayer &NextLayer;
+ Speculator &S;
+ MangleAndInterner &Mangle;
+ ResultEval QueryAnalysis;
+};
+
+} // namespace orc
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SPECULATION_H
diff --git a/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h b/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
index 5787500387c4..2347faed37a2 100644
--- a/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
+++ b/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
@@ -38,17 +38,12 @@ private:
public:
// RAII based lock for ThreadSafeContext.
class LLVM_NODISCARD Lock {
- private:
- using UnderlyingLock = std::lock_guard<std::recursive_mutex>;
-
public:
- Lock(std::shared_ptr<State> S)
- : S(std::move(S)),
- L(llvm::make_unique<UnderlyingLock>(this->S->Mutex)) {}
+ Lock(std::shared_ptr<State> S) : S(std::move(S)), L(this->S->Mutex) {}
private:
std::shared_ptr<State> S;
- std::unique_ptr<UnderlyingLock> L;
+ std::unique_lock<std::recursive_mutex> L;
};
/// Construct a null context.
@@ -69,7 +64,7 @@ public:
/// instance, or null if the instance was default constructed.
const LLVMContext *getContext() const { return S ? S->Ctx.get() : nullptr; }
- Lock getLock() {
+ Lock getLock() const {
assert(S && "Can not lock an empty ThreadSafeContext");
return Lock(S);
}
@@ -95,7 +90,7 @@ public:
// We also need to lock the context to make sure the module tear-down
// does not overlap any other work on the context.
if (M) {
- auto L = getContextLock();
+ auto L = TSCtx.getLock();
M = nullptr;
}
M = std::move(Other.M);
@@ -117,23 +112,14 @@ public:
~ThreadSafeModule() {
// We need to lock the context while we destruct the module.
if (M) {
- auto L = getContextLock();
+ auto L = TSCtx.getLock();
M = nullptr;
}
}
- /// Get the module wrapped by this ThreadSafeModule.
- Module *getModule() { return M.get(); }
-
- /// Get the module wrapped by this ThreadSafeModule.
- const Module *getModule() const { return M.get(); }
-
- /// Take out a lock on the ThreadSafeContext for this module.
- ThreadSafeContext::Lock getContextLock() { return TSCtx.getLock(); }
-
/// Boolean conversion: This ThreadSafeModule will evaluate to true if it
/// wraps a non-null module.
- explicit operator bool() {
+ explicit operator bool() const {
if (M) {
assert(TSCtx.getContext() &&
"Non-null module must have non-null context");
@@ -142,6 +128,33 @@ public:
return false;
}
+ /// Locks the associated ThreadSafeContext and calls the given function
+ /// on the contained Module.
+ template <typename Func>
+ auto withModuleDo(Func &&F) -> decltype(F(std::declval<Module &>())) {
+ assert(M && "Can not call on null module");
+ auto Lock = TSCtx.getLock();
+ return F(*M);
+ }
+
+ /// Locks the associated ThreadSafeContext and calls the given function
+ /// on the contained Module.
+ template <typename Func>
+ auto withModuleDo(Func &&F) const
+ -> decltype(F(std::declval<const Module &>())) {
+ auto Lock = TSCtx.getLock();
+ return F(*M);
+ }
+
+ /// Get a raw pointer to the contained module without locking the context.
+ Module *getModuleUnlocked() { return M.get(); }
+
+ /// Get a raw pointer to the contained module without locking the context.
+ const Module *getModuleUnlocked() const { return M.get(); }
+
+ /// Returns the context for this ThreadSafeModule.
+ ThreadSafeContext getContext() const { return TSCtx; }
+
private:
std::unique_ptr<Module> M;
ThreadSafeContext TSCtx;
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index b2b4eba47074..ce7024a7f19b 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -13,6 +13,7 @@
#ifndef LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H
#define LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H
+#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DIContext.h"
@@ -271,10 +272,10 @@ private:
std::unique_ptr<MemoryBuffer> UnderlyingBuffer,
RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver,
bool ProcessAllSections,
- std::function<Error(std::unique_ptr<LoadedObjectInfo>,
- std::map<StringRef, JITEvaluatedSymbol>)>
+ unique_function<Error(std::unique_ptr<LoadedObjectInfo>,
+ std::map<StringRef, JITEvaluatedSymbol>)>
OnLoaded,
- std::function<void(Error)> OnEmitted);
+ unique_function<void(Error)> OnEmitted);
// RuntimeDyldImpl is the actual class. RuntimeDyld is just the public
// interface.
@@ -291,14 +292,14 @@ private:
// but ORC's RTDyldObjectLinkingLayer2. Internally it constructs a RuntimeDyld
// instance and uses continuation passing to perform the fix-up and finalize
// steps asynchronously.
-void jitLinkForORC(object::ObjectFile &Obj,
- std::unique_ptr<MemoryBuffer> UnderlyingBuffer,
- RuntimeDyld::MemoryManager &MemMgr,
- JITSymbolResolver &Resolver, bool ProcessAllSections,
- std::function<Error(std::unique_ptr<LoadedObjectInfo>,
- std::map<StringRef, JITEvaluatedSymbol>)>
- OnLoaded,
- std::function<void(Error)> OnEmitted);
+void jitLinkForORC(
+ object::ObjectFile &Obj, std::unique_ptr<MemoryBuffer> UnderlyingBuffer,
+ RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver,
+ bool ProcessAllSections,
+ unique_function<Error(std::unique_ptr<RuntimeDyld::LoadedObjectInfo>,
+ std::map<StringRef, JITEvaluatedSymbol>)>
+ OnLoaded,
+ unique_function<void(Error)> OnEmitted);
} // end namespace llvm
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index 06cc09e1cfc7..e6b280465f72 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
#include <bitset>
#include <cassert>
@@ -94,8 +95,8 @@ public:
/// Return a uniquified Attribute object that has the specific
/// alignment set.
- static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align);
- static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align);
+ static Attribute getWithAlignment(LLVMContext &Context, Align Alignment);
+ static Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment);
static Attribute getWithDereferenceableBytes(LLVMContext &Context,
uint64_t Bytes);
static Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context,
@@ -150,11 +151,11 @@ public:
/// Returns the alignment field of an attribute as a byte alignment
/// value.
- unsigned getAlignment() const;
+ MaybeAlign getAlignment() const;
/// Returns the stack alignment field of an attribute as a byte
/// alignment value.
- unsigned getStackAlignment() const;
+ MaybeAlign getStackAlignment() const;
/// Returns the number of dereferenceable bytes from the
/// dereferenceable attribute.
@@ -284,8 +285,8 @@ public:
/// Return the target-dependent attribute object.
Attribute getAttribute(StringRef Kind) const;
- unsigned getAlignment() const;
- unsigned getStackAlignment() const;
+ MaybeAlign getAlignment() const;
+ MaybeAlign getStackAlignment() const;
uint64_t getDereferenceableBytes() const;
uint64_t getDereferenceableOrNullBytes() const;
Type *getByValType() const;
@@ -603,16 +604,16 @@ public:
}
/// Return the alignment of the return value.
- unsigned getRetAlignment() const;
+ MaybeAlign getRetAlignment() const;
/// Return the alignment for the specified function parameter.
- unsigned getParamAlignment(unsigned ArgNo) const;
+ MaybeAlign getParamAlignment(unsigned ArgNo) const;
/// Return the byval type for the specified function parameter.
Type *getParamByValType(unsigned ArgNo) const;
/// Get the stack alignment.
- unsigned getStackAlignment(unsigned Index) const;
+ MaybeAlign getStackAlignment(unsigned Index) const;
/// Get the number of dereferenceable bytes (or zero if unknown).
uint64_t getDereferenceableBytes(unsigned Index) const;
@@ -704,9 +705,9 @@ template <> struct DenseMapInfo<AttributeList> {
/// equality, presence of attributes, etc.
class AttrBuilder {
std::bitset<Attribute::EndAttrKinds> Attrs;
- std::map<std::string, std::string> TargetDepAttrs;
- uint64_t Alignment = 0;
- uint64_t StackAlignment = 0;
+ std::map<std::string, std::string, std::less<>> TargetDepAttrs;
+ MaybeAlign Alignment;
+ MaybeAlign StackAlignment;
uint64_t DerefBytes = 0;
uint64_t DerefOrNullBytes = 0;
uint64_t AllocSizeArgs = 0;
@@ -773,10 +774,10 @@ public:
bool hasAlignmentAttr() const;
/// Retrieve the alignment attribute, if it exists.
- uint64_t getAlignment() const { return Alignment; }
+ MaybeAlign getAlignment() const { return Alignment; }
/// Retrieve the stack alignment attribute, if it exists.
- uint64_t getStackAlignment() const { return StackAlignment; }
+ MaybeAlign getStackAlignment() const { return StackAlignment; }
/// Retrieve the number of dereferenceable bytes, if the
/// dereferenceable attribute exists (zero is returned otherwise).
@@ -793,13 +794,29 @@ public:
/// doesn't exist, pair(0, 0) is returned.
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
+ /// This turns an alignment into the form used internally in Attribute.
+ /// This call has no effect if Align is not set.
+ AttrBuilder &addAlignmentAttr(MaybeAlign Align);
+
/// This turns an int alignment (which must be a power of 2) into the
/// form used internally in Attribute.
- AttrBuilder &addAlignmentAttr(unsigned Align);
+ /// This call has no effect if Align is 0.
+ /// Deprecated, use the version using a MaybeAlign.
+ inline AttrBuilder &addAlignmentAttr(unsigned Align) {
+ return addAlignmentAttr(MaybeAlign(Align));
+ }
+
+ /// This turns a stack alignment into the form used internally in Attribute.
+ /// This call has no effect if Align is not set.
+ AttrBuilder &addStackAlignmentAttr(MaybeAlign Align);
/// This turns an int stack alignment (which must be a power of 2) into
/// the form used internally in Attribute.
- AttrBuilder &addStackAlignmentAttr(unsigned Align);
+ /// This call has no effect if Align is 0.
+ /// Deprecated, use the version using a MaybeAlign.
+ inline AttrBuilder &addStackAlignmentAttr(unsigned Align) {
+ return addStackAlignmentAttr(MaybeAlign(Align));
+ }
/// This turns the number of dereferenceable bytes into the form used
/// internally in Attribute.
diff --git a/include/llvm/IR/AutoUpgrade.h b/include/llvm/IR/AutoUpgrade.h
index 017ad93d8a2a..66f38e5b55d1 100644
--- a/include/llvm/IR/AutoUpgrade.h
+++ b/include/llvm/IR/AutoUpgrade.h
@@ -54,9 +54,9 @@ namespace llvm {
/// module is modified.
bool UpgradeModuleFlags(Module &M);
- /// This checks for objc retain release marker which should be upgraded. It
- /// returns true if module is modified.
- bool UpgradeRetainReleaseMarker(Module &M);
+ /// Convert calls to ARC runtime functions to intrinsic calls and upgrade the
+ /// old retain release marker to new module flag format.
+ void UpgradeARCRuntime(Module &M);
void UpgradeSectionAttributes(Module &M);
@@ -87,6 +87,10 @@ namespace llvm {
/// Upgrade the loop attachment metadata node.
MDNode *upgradeInstructionLoopAttachment(MDNode &N);
+ /// Upgrade the datalayout string by adding a section for address space
+ /// pointers.
+ std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple);
+
} // End llvm namespace
#endif
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
index 69555af50e1f..d594145f8636 100644
--- a/include/llvm/IR/BasicBlock.h
+++ b/include/llvm/IR/BasicBlock.h
@@ -192,6 +192,11 @@ public:
std::function<bool(Instruction &)>>>
instructionsWithoutDebug();
+ /// Return the size of the basic block ignoring debug instructions
+ filter_iterator<BasicBlock::const_iterator,
+ std::function<bool(const Instruction &)>>::difference_type
+ sizeWithoutDebug() const;
+
/// Unlink 'this' from the containing function, but do not delete it.
void removeFromParent();
diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h
index b47a96c5d5fa..13b1ae8d0e32 100644
--- a/include/llvm/IR/CallSite.h
+++ b/include/llvm/IR/CallSite.h
@@ -854,6 +854,15 @@ public:
return CI.ParameterEncoding[0];
}
+ /// Return the use of the callee value in the underlying instruction. Only
+ /// valid for callback calls!
+ const Use &getCalleeUseForCallback() const {
+ int CalleeArgIdx = getCallArgOperandNoForCallee();
+ assert(CalleeArgIdx >= 0 &&
+ unsigned(CalleeArgIdx) < getInstruction()->getNumOperands());
+ return getInstruction()->getOperandUse(CalleeArgIdx);
+ }
+
/// Return the pointer to function that is being called.
Value *getCalledValue() const {
if (isDirectCall())
diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h
index 399c6ad521fa..c1c979c2e2ab 100644
--- a/include/llvm/IR/CallingConv.h
+++ b/include/llvm/IR/CallingConv.h
@@ -75,6 +75,11 @@ namespace CallingConv {
// CXX_FAST_TLS - Calling convention for access functions.
CXX_FAST_TLS = 17,
+ /// Tail - This calling convention attemps to make calls as fast as
+ /// possible while guaranteeing that tail call optimization can always
+ /// be performed.
+ Tail = 18,
+
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,
@@ -222,6 +227,14 @@ namespace CallingConv {
// Calling convention between AArch64 Advanced SIMD functions
AArch64_VectorCall = 97,
+ /// Calling convention between AArch64 SVE functions
+ AArch64_SVE_VectorCall = 98,
+
+ /// Calling convention for emscripten __invoke_* functions. The first
+ /// argument is required to be the function ptr being indirectly called.
+ /// The remainder matches the regular calling convention.
+ WASM_EmscriptenInvoke = 99,
+
/// The highest possible calling convention ID. Must be some 2^k - 1.
MaxID = 1023
};
diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h
index 931576651224..2b6a6e4141b9 100644
--- a/include/llvm/IR/Constant.h
+++ b/include/llvm/IR/Constant.h
@@ -86,6 +86,12 @@ public:
/// floating-point constant with all NaN elements.
bool isNaN() const;
+ /// Return true if this constant and a constant 'Y' are element-wise equal.
+ /// This is identical to just comparing the pointers, with the exception that
+ /// for vectors, if only one of the constants has an `undef` element in some
+ /// lane, the constants still match.
+ bool isElementWiseEqual(Value *Y) const;
+
/// Return true if this is a vector constant that includes any undefined
/// elements.
bool containsUndefElement() const;
diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h
index 91f3f31abe17..964f9e8e9bc9 100644
--- a/include/llvm/IR/ConstantRange.h
+++ b/include/llvm/IR/ConstantRange.h
@@ -330,9 +330,13 @@ public:
/// from an addition of a value in this range and a value in \p Other.
ConstantRange add(const ConstantRange &Other) const;
- /// Return a new range representing the possible values resulting from a
- /// known NSW addition of a value in this range and \p Other constant.
- ConstantRange addWithNoSignedWrap(const APInt &Other) const;
+ /// Return a new range representing the possible values resulting
+ /// from an addition with wrap type \p NoWrapKind of a value in this
+ /// range and a value in \p Other.
+ /// If the result range is disjoint, the preferred range is determined by the
+ /// \p PreferredRangeType.
+ ConstantRange addWithNoWrap(const ConstantRange &Other, unsigned NoWrapKind,
+ PreferredRangeType RangeType = Smallest) const;
/// Return a new range representing the possible values resulting
/// from a subtraction of a value in this range and a value in \p Other.
diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h
index ac9770a15120..85093dd218f8 100644
--- a/include/llvm/IR/DataLayout.h
+++ b/include/llvm/IR/DataLayout.h
@@ -25,10 +25,11 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/TypeSize.h"
#include <cassert>
#include <cstdint>
#include <string>
@@ -71,11 +72,11 @@ struct LayoutAlignElem {
/// Alignment type from \c AlignTypeEnum
unsigned AlignType : 8;
unsigned TypeBitWidth : 24;
- unsigned ABIAlign : 16;
- unsigned PrefAlign : 16;
+ Align ABIAlign;
+ Align PrefAlign;
- static LayoutAlignElem get(AlignTypeEnum align_type, unsigned abi_align,
- unsigned pref_align, uint32_t bit_width);
+ static LayoutAlignElem get(AlignTypeEnum align_type, Align abi_align,
+ Align pref_align, uint32_t bit_width);
bool operator==(const LayoutAlignElem &rhs) const;
};
@@ -87,15 +88,15 @@ struct LayoutAlignElem {
/// \note The unusual order of elements in the structure attempts to reduce
/// padding and make the structure slightly more cache friendly.
struct PointerAlignElem {
- unsigned ABIAlign;
- unsigned PrefAlign;
+ Align ABIAlign;
+ Align PrefAlign;
uint32_t TypeByteWidth;
uint32_t AddressSpace;
uint32_t IndexWidth;
/// Initializer
- static PointerAlignElem get(uint32_t AddressSpace, unsigned ABIAlign,
- unsigned PrefAlign, uint32_t TypeByteWidth,
+ static PointerAlignElem get(uint32_t AddressSpace, Align ABIAlign,
+ Align PrefAlign, uint32_t TypeByteWidth,
uint32_t IndexWidth);
bool operator==(const PointerAlignElem &rhs) const;
@@ -120,10 +121,10 @@ private:
bool BigEndian;
unsigned AllocaAddrSpace;
- unsigned StackNaturalAlign;
+ MaybeAlign StackNaturalAlign;
unsigned ProgramAddrSpace;
- unsigned FunctionPtrAlign;
+ MaybeAlign FunctionPtrAlign;
FunctionPtrAlignType TheFunctionPtrAlignType;
enum ManglingModeT {
@@ -172,16 +173,15 @@ private:
/// well-defined bitwise representation.
SmallVector<unsigned, 8> NonIntegralAddressSpaces;
- void setAlignment(AlignTypeEnum align_type, unsigned abi_align,
- unsigned pref_align, uint32_t bit_width);
- unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
- bool ABIAlign, Type *Ty) const;
- void setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
- unsigned PrefAlign, uint32_t TypeByteWidth,
- uint32_t IndexWidth);
+ void setAlignment(AlignTypeEnum align_type, Align abi_align, Align pref_align,
+ uint32_t bit_width);
+ Align getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
+ bool ABIAlign, Type *Ty) const;
+ void setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, Align PrefAlign,
+ uint32_t TypeByteWidth, uint32_t IndexWidth);
/// Internal helper method that returns requested alignment for type.
- unsigned getAlignment(Type *Ty, bool abi_or_pref) const;
+ Align getAlignment(Type *Ty, bool abi_or_pref) const;
/// Parses a target data specification string. Assert if the string is
/// malformed.
@@ -261,17 +261,21 @@ public:
bool isIllegalInteger(uint64_t Width) const { return !isLegalInteger(Width); }
/// Returns true if the given alignment exceeds the natural stack alignment.
- bool exceedsNaturalStackAlignment(unsigned Align) const {
- return (StackNaturalAlign != 0) && (Align > StackNaturalAlign);
+ bool exceedsNaturalStackAlignment(Align Alignment) const {
+ return StackNaturalAlign && (Alignment > StackNaturalAlign);
+ }
+
+ Align getStackAlignment() const {
+ assert(StackNaturalAlign && "StackNaturalAlign must be defined");
+ return *StackNaturalAlign;
}
- unsigned getStackAlignment() const { return StackNaturalAlign; }
unsigned getAllocaAddrSpace() const { return AllocaAddrSpace; }
/// Returns the alignment of function pointers, which may or may not be
/// related to the alignment of functions.
/// \see getFunctionPtrAlignType
- unsigned getFunctionPtrAlign() const { return FunctionPtrAlign; }
+ MaybeAlign getFunctionPtrAlign() const { return FunctionPtrAlign; }
/// Return the type of function pointer alignment.
/// \see getFunctionPtrAlign
@@ -344,12 +348,12 @@ public:
}
/// Layout pointer alignment
- unsigned getPointerABIAlignment(unsigned AS) const;
+ Align getPointerABIAlignment(unsigned AS) const;
/// Return target's alignment for stack-based pointers
/// FIXME: The defaults need to be removed once all of
/// the backends/clients are updated.
- unsigned getPointerPrefAlignment(unsigned AS = 0) const;
+ Align getPointerPrefAlignment(unsigned AS = 0) const;
/// Layout pointer size
/// FIXME: The defaults need to be removed once all of
@@ -433,23 +437,33 @@ public:
/// Returns the number of bits necessary to hold the specified type.
///
+ /// If Ty is a scalable vector type, the scalable property will be set and
+ /// the runtime size will be a positive integer multiple of the base size.
+ ///
/// For example, returns 36 for i36 and 80 for x86_fp80. The type passed must
/// have a size (Type::isSized() must return true).
- uint64_t getTypeSizeInBits(Type *Ty) const;
+ TypeSize getTypeSizeInBits(Type *Ty) const;
/// Returns the maximum number of bytes that may be overwritten by
/// storing the specified type.
///
+ /// If Ty is a scalable vector type, the scalable property will be set and
+ /// the runtime size will be a positive integer multiple of the base size.
+ ///
/// For example, returns 5 for i36 and 10 for x86_fp80.
- uint64_t getTypeStoreSize(Type *Ty) const {
- return (getTypeSizeInBits(Ty) + 7) / 8;
+ TypeSize getTypeStoreSize(Type *Ty) const {
+ TypeSize BaseSize = getTypeSizeInBits(Ty);
+ return { (BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable() };
}
/// Returns the maximum number of bits that may be overwritten by
/// storing the specified type; always a multiple of 8.
///
+ /// If Ty is a scalable vector type, the scalable property will be set and
+ /// the runtime size will be a positive integer multiple of the base size.
+ ///
/// For example, returns 40 for i36 and 80 for x86_fp80.
- uint64_t getTypeStoreSizeInBits(Type *Ty) const {
+ TypeSize getTypeStoreSizeInBits(Type *Ty) const {
return 8 * getTypeStoreSize(Ty);
}
@@ -464,9 +478,12 @@ public:
/// Returns the offset in bytes between successive objects of the
/// specified type, including alignment padding.
///
+ /// If Ty is a scalable vector type, the scalable property will be set and
+ /// the runtime size will be a positive integer multiple of the base size.
+ ///
/// This is the amount that alloca reserves for this type. For example,
/// returns 12 or 16 for x86_fp80, depending on alignment.
- uint64_t getTypeAllocSize(Type *Ty) const {
+ TypeSize getTypeAllocSize(Type *Ty) const {
// Round up to the next alignment boundary.
return alignTo(getTypeStoreSize(Ty), getABITypeAlignment(Ty));
}
@@ -474,18 +491,28 @@ public:
/// Returns the offset in bits between successive objects of the
/// specified type, including alignment padding; always a multiple of 8.
///
+ /// If Ty is a scalable vector type, the scalable property will be set and
+ /// the runtime size will be a positive integer multiple of the base size.
+ ///
/// This is the amount that alloca reserves for this type. For example,
/// returns 96 or 128 for x86_fp80, depending on alignment.
- uint64_t getTypeAllocSizeInBits(Type *Ty) const {
+ TypeSize getTypeAllocSizeInBits(Type *Ty) const {
return 8 * getTypeAllocSize(Ty);
}
/// Returns the minimum ABI-required alignment for the specified type.
unsigned getABITypeAlignment(Type *Ty) const;
+ /// Helper function to return `Alignment` if it's set or the result of
+ /// `getABITypeAlignment(Ty)`, in any case the result is a valid alignment.
+ inline Align getValueOrABITypeAlignment(MaybeAlign Alignment,
+ Type *Ty) const {
+ return Alignment ? *Alignment : Align(getABITypeAlignment(Ty));
+ }
+
/// Returns the minimum ABI-required alignment for an integer type of
/// the specified bitwidth.
- unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const;
+ Align getABIIntegerTypeAlignment(unsigned BitWidth) const;
/// Returns the preferred stack/global alignment for the specified
/// type.
@@ -493,10 +520,6 @@ public:
/// This is always at least as good as the ABI alignment.
unsigned getPrefTypeAlignment(Type *Ty) const;
- /// Returns the preferred alignment for the specified type, returned as
- /// log2 of the value (a shift amount).
- unsigned getPreferredTypeAlignmentShift(Type *Ty) const;
-
/// Returns an integer type with size at least as big as that of a
/// pointer in the given address space.
IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace = 0) const;
@@ -561,7 +584,7 @@ inline LLVMTargetDataRef wrap(const DataLayout *P) {
/// based on the DataLayout structure.
class StructLayout {
uint64_t StructSize;
- unsigned StructAlignment;
+ Align StructAlignment;
unsigned IsPadded : 1;
unsigned NumElements : 31;
uint64_t MemberOffsets[1]; // variable sized array!
@@ -571,7 +594,7 @@ public:
uint64_t getSizeInBits() const { return 8 * StructSize; }
- unsigned getAlignment() const { return StructAlignment; }
+ Align getAlignment() const { return StructAlignment; }
/// Returns whether the struct has padding or not between its fields.
/// NB: Padding in nested element is not taken into account.
@@ -598,13 +621,13 @@ private:
// The implementation of this method is provided inline as it is particularly
// well suited to constant folding when called on a specific Type subclass.
-inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
+inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const {
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
switch (Ty->getTypeID()) {
case Type::LabelTyID:
- return getPointerSizeInBits(0);
+ return TypeSize::Fixed(getPointerSizeInBits(0));
case Type::PointerTyID:
- return getPointerSizeInBits(Ty->getPointerAddressSpace());
+ return TypeSize::Fixed(getPointerSizeInBits(Ty->getPointerAddressSpace()));
case Type::ArrayTyID: {
ArrayType *ATy = cast<ArrayType>(Ty);
return ATy->getNumElements() *
@@ -612,26 +635,30 @@ inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
}
case Type::StructTyID:
// Get the layout annotation... which is lazily created on demand.
- return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+ return TypeSize::Fixed(
+ getStructLayout(cast<StructType>(Ty))->getSizeInBits());
case Type::IntegerTyID:
- return Ty->getIntegerBitWidth();
+ return TypeSize::Fixed(Ty->getIntegerBitWidth());
case Type::HalfTyID:
- return 16;
+ return TypeSize::Fixed(16);
case Type::FloatTyID:
- return 32;
+ return TypeSize::Fixed(32);
case Type::DoubleTyID:
case Type::X86_MMXTyID:
- return 64;
+ return TypeSize::Fixed(64);
case Type::PPC_FP128TyID:
case Type::FP128TyID:
- return 128;
+ return TypeSize::Fixed(128);
// In memory objects this is always aligned to a higher boundary, but
// only 80 bits contain information.
case Type::X86_FP80TyID:
- return 80;
+ return TypeSize::Fixed(80);
case Type::VectorTyID: {
VectorType *VTy = cast<VectorType>(Ty);
- return VTy->getNumElements() * getTypeSizeInBits(VTy->getElementType());
+ auto EltCnt = VTy->getElementCount();
+ uint64_t MinBits = EltCnt.Min *
+ getTypeSizeInBits(VTy->getElementType()).getFixedSize();
+ return TypeSize(MinBits, EltCnt.Scalable);
}
default:
llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
diff --git a/include/llvm/IR/DebugInfoFlags.def b/include/llvm/IR/DebugInfoFlags.def
index 07e3d6bdc9e5..f90c580f10ef 100644
--- a/include/llvm/IR/DebugInfoFlags.def
+++ b/include/llvm/IR/DebugInfoFlags.def
@@ -31,7 +31,8 @@ HANDLE_DI_FLAG(2, Protected)
HANDLE_DI_FLAG(3, Public)
HANDLE_DI_FLAG((1 << 2), FwdDecl)
HANDLE_DI_FLAG((1 << 3), AppleBlock)
-HANDLE_DI_FLAG((1 << 4), BlockByrefStruct)
+// Used to be BlockByRef, can be reused for anything except DICompositeType.
+HANDLE_DI_FLAG((1 << 4), ReservedBit4)
HANDLE_DI_FLAG((1 << 5), Virtual)
HANDLE_DI_FLAG((1 << 6), Artificial)
HANDLE_DI_FLAG((1 << 7), Explicit)
@@ -42,8 +43,7 @@ HANDLE_DI_FLAG((1 << 11), Vector)
HANDLE_DI_FLAG((1 << 12), StaticMember)
HANDLE_DI_FLAG((1 << 13), LValueReference)
HANDLE_DI_FLAG((1 << 14), RValueReference)
-// 15 was formerly ExternalTypeRef, but this was never used.
-HANDLE_DI_FLAG((1 << 15), Reserved)
+HANDLE_DI_FLAG((1 << 15), ExportSymbols)
HANDLE_DI_FLAG((1 << 16), SingleInheritance)
HANDLE_DI_FLAG((2 << 16), MultipleInheritance)
HANDLE_DI_FLAG((3 << 16), VirtualInheritance)
diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h
index 9dc6dfbb0f68..28a59576b7c6 100644
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@@ -650,7 +650,6 @@ public:
}
bool isForwardDecl() const { return getFlags() & FlagFwdDecl; }
bool isAppleBlockExtension() const { return getFlags() & FlagAppleBlock; }
- bool isBlockByrefStruct() const { return getFlags() & FlagBlockByrefStruct; }
bool isVirtual() const { return getFlags() & FlagVirtual; }
bool isArtificial() const { return getFlags() & FlagArtificial; }
bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
@@ -668,6 +667,7 @@ public:
}
bool isBigEndian() const { return getFlags() & FlagBigEndian; }
bool isLittleEndian() const { return getFlags() & FlagLittleEndian; }
+ bool getExportSymbols() const { return getFlags() & FlagExportSymbols; }
static bool classof(const Metadata *MD) {
switch (MD->getMetadataID()) {
@@ -2569,7 +2569,7 @@ public:
/// (This is the only configuration of entry values that is supported.)
bool isEntryValue() const {
return getNumElements() > 0 &&
- getElement(0) == dwarf::DW_OP_entry_value;
+ getElement(0) == dwarf::DW_OP_LLVM_entry_value;
}
};
diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h
index 3c1d4278905f..20097ef3f31a 100644
--- a/include/llvm/IR/DerivedTypes.h
+++ b/include/llvm/IR/DerivedTypes.h
@@ -23,7 +23,7 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ScalableSize.h"
+#include "llvm/Support/TypeSize.h"
#include <cassert>
#include <cstdint>
@@ -62,6 +62,11 @@ public:
/// Get or create an IntegerType instance.
static IntegerType *get(LLVMContext &C, unsigned NumBits);
+ /// Returns type twice as wide the input type.
+ IntegerType *getExtendedType() const {
+ return Type::getIntNTy(getContext(), 2 * getScalarSizeInBits());
+ }
+
/// Get the number of bits in this IntegerType
unsigned getBitWidth() const { return getSubclassData(); }
@@ -470,21 +475,47 @@ public:
/// This static method is like getInteger except that the element types are
/// twice as wide as the elements in the input type.
static VectorType *getExtendedElementVectorType(VectorType *VTy) {
- unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
- Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2);
- return VectorType::get(EltTy, VTy->getElementCount());
+ assert(VTy->isIntOrIntVectorTy() && "VTy expected to be a vector of ints.");
+ auto *EltTy = cast<IntegerType>(VTy->getElementType());
+ return VectorType::get(EltTy->getExtendedType(), VTy->getElementCount());
}
- /// This static method is like getInteger except that the element types are
- /// half as wide as the elements in the input type.
+ // This static method gets a VectorType with the same number of elements as
+ // the input type, and the element type is an integer or float type which
+ // is half as wide as the elements in the input type.
static VectorType *getTruncatedElementVectorType(VectorType *VTy) {
- unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
- assert((EltBits & 1) == 0 &&
- "Cannot truncate vector element with odd bit-width");
- Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
+ Type *EltTy;
+ if (VTy->getElementType()->isFloatingPointTy()) {
+ switch(VTy->getElementType()->getTypeID()) {
+ case DoubleTyID:
+ EltTy = Type::getFloatTy(VTy->getContext());
+ break;
+ case FloatTyID:
+ EltTy = Type::getHalfTy(VTy->getContext());
+ break;
+ default:
+ llvm_unreachable("Cannot create narrower fp vector element type");
+ }
+ } else {
+ unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+ assert((EltBits & 1) == 0 &&
+ "Cannot truncate vector element with odd bit-width");
+ EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
+ }
return VectorType::get(EltTy, VTy->getElementCount());
}
+ // This static method returns a VectorType with a smaller number of elements
+ // of a larger type than the input element type. For example, a <16 x i8>
+ // subdivided twice would return <4 x i32>
+ static VectorType *getSubdividedVectorType(VectorType *VTy, int NumSubdivs) {
+ for (int i = 0; i < NumSubdivs; ++i) {
+ VTy = VectorType::getDoubleElementsVectorType(VTy);
+ VTy = VectorType::getTruncatedElementVectorType(VTy);
+ }
+ return VTy;
+ }
+
/// This static method returns a VectorType with half as many elements as the
/// input type and the same element type.
static VectorType *getHalfElementsVectorType(VectorType *VTy) {
@@ -540,6 +571,10 @@ bool Type::getVectorIsScalable() const {
return cast<VectorType>(this)->isScalable();
}
+ElementCount Type::getVectorElementCount() const {
+ return cast<VectorType>(this)->getElementCount();
+}
+
/// Class to represent pointers.
class PointerType : public Type {
explicit PointerType(Type *ElType, unsigned AddrSpace);
@@ -577,6 +612,26 @@ public:
}
};
+Type *Type::getExtendedType() const {
+ assert(
+ isIntOrIntVectorTy() &&
+ "Original type expected to be a vector of integers or a scalar integer.");
+ if (auto *VTy = dyn_cast<VectorType>(this))
+ return VectorType::getExtendedElementVectorType(
+ const_cast<VectorType *>(VTy));
+ return cast<IntegerType>(this)->getExtendedType();
+}
+
+Type *Type::getWithNewBitWidth(unsigned NewBitWidth) const {
+ assert(
+ isIntOrIntVectorTy() &&
+ "Original type expected to be a vector of integers or a scalar integer.");
+ Type *NewType = getIntNTy(getContext(), NewBitWidth);
+ if (isVectorTy())
+ NewType = VectorType::get(NewType, getVectorElementCount());
+ return NewType;
+}
+
unsigned Type::getPointerAddressSpace() const {
return cast<PointerType>(getScalarType())->getAddressSpace();
}
diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h
index 373663289dbd..ec469982d378 100644
--- a/include/llvm/IR/DiagnosticInfo.h
+++ b/include/llvm/IR/DiagnosticInfo.h
@@ -74,8 +74,10 @@ enum DiagnosticKind {
DK_LastMachineRemark = DK_MachineOptimizationRemarkAnalysis,
DK_MIRParser,
DK_PGOProfile,
+ DK_MisExpect,
DK_Unsupported,
- DK_FirstPluginKind
+ DK_FirstPluginKind // Must be last value to work with
+ // getNextAvailablePluginDiagnosticKind
};
/// Get the next available kind ID for a plugin diagnostic.
@@ -663,7 +665,7 @@ public:
private:
/// The IR value (currently basic block) that the optimization operates on.
/// This is currently used to provide run-time hotness information with PGO.
- const Value *CodeRegion;
+ const Value *CodeRegion = nullptr;
};
/// Diagnostic information for applied optimization remarks.
@@ -1002,6 +1004,25 @@ public:
void print(DiagnosticPrinter &DP) const override;
};
+/// Diagnostic information for MisExpect analysis.
+class DiagnosticInfoMisExpect : public DiagnosticInfoWithLocationBase {
+public:
+ DiagnosticInfoMisExpect(const Instruction *Inst, Twine &Msg);
+
+ /// \see DiagnosticInfo::print.
+ void print(DiagnosticPrinter &DP) const override;
+
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == DK_MisExpect;
+ }
+
+ const Twine &getMsg() const { return Msg; }
+
+private:
+ /// Message to report.
+ const Twine &Msg;
+};
+
} // end namespace llvm
#endif // LLVM_IR_DIAGNOSTICINFO_H
diff --git a/include/llvm/IR/FixedMetadataKinds.def b/include/llvm/IR/FixedMetadataKinds.def
new file mode 100644
index 000000000000..0e1ffef58672
--- /dev/null
+++ b/include/llvm/IR/FixedMetadataKinds.def
@@ -0,0 +1,43 @@
+/*===-- FixedMetadataKinds.def - Fixed metadata kind IDs -------*- C++ -*-=== *\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_FIXED_MD_KIND
+#error "LLVM_FIXED_MD_KIND(EnumID, Name, Value) is not defined."
+#endif
+
+LLVM_FIXED_MD_KIND(MD_dbg, "dbg", 0)
+LLVM_FIXED_MD_KIND(MD_tbaa, "tbaa", 1)
+LLVM_FIXED_MD_KIND(MD_prof, "prof", 2)
+LLVM_FIXED_MD_KIND(MD_fpmath, "fpmath", 3)
+LLVM_FIXED_MD_KIND(MD_range, "range", 4)
+LLVM_FIXED_MD_KIND(MD_tbaa_struct, "tbaa.struct", 5)
+LLVM_FIXED_MD_KIND(MD_invariant_load, "invariant.load", 6)
+LLVM_FIXED_MD_KIND(MD_alias_scope, "alias.scope", 7)
+LLVM_FIXED_MD_KIND(MD_noalias, "noalias", 8)
+LLVM_FIXED_MD_KIND(MD_nontemporal, "nontemporal", 9)
+LLVM_FIXED_MD_KIND(MD_mem_parallel_loop_access,
+ "llvm.mem.parallel_loop_access", 10)
+LLVM_FIXED_MD_KIND(MD_nonnull, "nonnull", 11)
+LLVM_FIXED_MD_KIND(MD_dereferenceable, "dereferenceable", 12)
+LLVM_FIXED_MD_KIND(MD_dereferenceable_or_null, "dereferenceable_or_null", 13)
+LLVM_FIXED_MD_KIND(MD_make_implicit, "make.implicit", 14)
+LLVM_FIXED_MD_KIND(MD_unpredictable, "unpredictable", 15)
+LLVM_FIXED_MD_KIND(MD_invariant_group, "invariant.group", 16)
+LLVM_FIXED_MD_KIND(MD_align, "align", 17)
+LLVM_FIXED_MD_KIND(MD_loop, "llvm.loop", 18)
+LLVM_FIXED_MD_KIND(MD_type, "type", 19)
+LLVM_FIXED_MD_KIND(MD_section_prefix, "section_prefix", 20)
+LLVM_FIXED_MD_KIND(MD_absolute_symbol, "absolute_symbol", 21)
+LLVM_FIXED_MD_KIND(MD_associated, "associated", 22)
+LLVM_FIXED_MD_KIND(MD_callees, "callees", 23)
+LLVM_FIXED_MD_KIND(MD_irr_loop, "irr_loop", 24)
+LLVM_FIXED_MD_KIND(MD_access_group, "llvm.access.group", 25)
+LLVM_FIXED_MD_KIND(MD_callback, "callback", 26)
+LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27)
+LLVM_FIXED_MD_KIND(MD_misexpect, "misexpect", 28)
+LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 29)
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index 7fa61e12f431..d586a9460d2b 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -343,7 +343,10 @@ public:
unsigned getFnStackAlignment() const {
if (!hasFnAttribute(Attribute::StackAlignment))
return 0;
- return AttributeSets.getStackAlignment(AttributeList::FunctionIndex);
+ if (const auto MA =
+ AttributeSets.getStackAlignment(AttributeList::FunctionIndex))
+ return MA->value();
+ return 0;
}
/// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
@@ -433,7 +436,9 @@ public:
/// Extract the alignment for a call or parameter (0=unknown).
unsigned getParamAlignment(unsigned ArgNo) const {
- return AttributeSets.getParamAlignment(ArgNo);
+ if (const auto MA = AttributeSets.getParamAlignment(ArgNo))
+ return MA->value();
+ return 0;
}
/// Extract the byval type for a parameter.
@@ -710,6 +715,12 @@ public:
return Arguments + NumArgs;
}
+ Argument* getArg(unsigned i) const {
+ assert (i < NumArgs && "getArg() out of range!");
+ CheckLazyArguments();
+ return Arguments + i;
+ }
+
iterator_range<arg_iterator> args() {
return make_range(arg_begin(), arg_end());
}
diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h
index 3cd405701300..f2d9b9676ec9 100644
--- a/include/llvm/IR/GlobalAlias.h
+++ b/include/llvm/IR/GlobalAlias.h
@@ -58,10 +58,6 @@ public:
// Linkage, Type, Parent and AddressSpace taken from the Aliasee.
static GlobalAlias *create(const Twine &Name, GlobalValue *Aliasee);
- void copyAttributesFrom(const GlobalValue *Src) {
- GlobalValue::copyAttributesFrom(Src);
- }
-
/// removeFromParent - This method unlinks 'this' from the containing module,
/// but does not delete it.
///
diff --git a/include/llvm/IR/GlobalIFunc.h b/include/llvm/IR/GlobalIFunc.h
index bc0d3c053cce..0fdae917878a 100644
--- a/include/llvm/IR/GlobalIFunc.h
+++ b/include/llvm/IR/GlobalIFunc.h
@@ -46,10 +46,6 @@ public:
LinkageTypes Linkage, const Twine &Name,
Constant *Resolver, Module *Parent);
- void copyAttributesFrom(const GlobalIFunc *Src) {
- GlobalValue::copyAttributesFrom(Src);
- }
-
/// This method unlinks 'this' from the containing module, but does not
/// delete it.
void removeFromParent();
diff --git a/include/llvm/IR/GlobalIndirectSymbol.h b/include/llvm/IR/GlobalIndirectSymbol.h
index 8bc3f90b94aa..d996237aa3ef 100644
--- a/include/llvm/IR/GlobalIndirectSymbol.h
+++ b/include/llvm/IR/GlobalIndirectSymbol.h
@@ -42,6 +42,10 @@ public:
/// Provide fast operand accessors
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+ void copyAttributesFrom(const GlobalValue *Src) {
+ GlobalValue::copyAttributesFrom(Src);
+ }
+
/// These methods set and retrieve indirect symbol.
void setIndirectSymbol(Constant *Symbol) {
setOperand(0, Symbol);
@@ -54,9 +58,7 @@ public:
static_cast<const GlobalIndirectSymbol *>(this)->getIndirectSymbol());
}
- const GlobalObject *getBaseObject() const {
- return dyn_cast<GlobalObject>(getIndirectSymbol()->stripInBoundsOffsets());
- }
+ const GlobalObject *getBaseObject() const;
GlobalObject *getBaseObject() {
return const_cast<GlobalObject *>(
static_cast<const GlobalIndirectSymbol *>(this)->getBaseObject());
diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h
index b8ab6140ebe7..ce81eb9f0719 100644
--- a/include/llvm/IR/GlobalObject.h
+++ b/include/llvm/IR/GlobalObject.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
+#include "llvm/Support/Alignment.h"
#include <string>
#include <utility>
@@ -27,6 +28,20 @@ class MDNode;
class Metadata;
class GlobalObject : public GlobalValue {
+public:
+ // VCallVisibility - values for visibility metadata attached to vtables. This
+ // describes the scope in which a virtual call could end up being dispatched
+ // through this vtable.
+ enum VCallVisibility {
+ // Type is potentially visible to external code.
+ VCallVisibilityPublic = 0,
+ // Type is only visible to code which will be in the current Module after
+ // LTO internalization.
+ VCallVisibilityLinkageUnit = 1,
+ // Type is only visible to code in the current Module.
+ VCallVisibilityTranslationUnit = 2,
+ };
+
protected:
GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
LinkageTypes Linkage, const Twine &Name,
@@ -58,9 +73,14 @@ public:
unsigned getAlignment() const {
unsigned Data = getGlobalValueSubClassData();
unsigned AlignmentData = Data & AlignmentMask;
- return (1u << AlignmentData) >> 1;
+ MaybeAlign Align = decodeMaybeAlign(AlignmentData);
+ return Align ? Align->value() : 0;
}
- void setAlignment(unsigned Align);
+
+ /// FIXME: Remove this setter once the migration to MaybeAlign is over.
+ LLVM_ATTRIBUTE_DEPRECATED(void setAlignment(unsigned Align),
+ "Please use `void setAlignment(MaybeAlign Align)`");
+ void setAlignment(MaybeAlign Align);
unsigned getGlobalObjectSubClassData() const {
unsigned ValueData = getGlobalValueSubClassData();
@@ -158,6 +178,8 @@ public:
void copyMetadata(const GlobalObject *Src, unsigned Offset);
void addTypeMetadata(unsigned Offset, Metadata *TypeID);
+ void addVCallVisibilityMetadata(VCallVisibility Visibility);
+ VCallVisibility getVCallVisibility() const;
protected:
void copyAttributesFrom(const GlobalObject *Src);
diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h
index 2e2c8c477913..2c730bc312e4 100644
--- a/include/llvm/IR/GlobalVariable.h
+++ b/include/llvm/IR/GlobalVariable.h
@@ -243,6 +243,7 @@ public:
bool hasImplicitSection() const {
return getAttributes().hasAttribute("bss-section") ||
getAttributes().hasAttribute("data-section") ||
+ getAttributes().hasAttribute("relro-section") ||
getAttributes().hasAttribute("rodata-section");
}
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index a74364dffb2e..d1ddb75cde9b 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -1461,7 +1461,7 @@ public:
if (Value *V = foldConstant(Opc, LHS, RHS, Name)) return V;
Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS);
if (isa<FPMathOperator>(BinOp))
- BinOp = setFPAttrs(BinOp, FPMathTag, FMF);
+ setFPAttrs(BinOp, FPMathTag, FMF);
return Insert(BinOp, Name);
}
@@ -1479,7 +1479,8 @@ public:
CallInst *C = CreateIntrinsic(ID, {L->getType()},
{L, R, RoundingV, ExceptV}, nullptr, Name);
- return cast<CallInst>(setFPAttrs(C, FPMathTag, UseFMF));
+ setFPAttrs(C, FPMathTag, UseFMF);
+ return C;
}
Value *CreateNeg(Value *V, const Twine &Name = "",
@@ -1504,7 +1505,7 @@ public:
MDNode *FPMathTag = nullptr) {
if (auto *VC = dyn_cast<Constant>(V))
return Insert(Folder.CreateFNeg(VC), Name);
- return Insert(setFPAttrs(BinaryOperator::CreateFNeg(V), FPMathTag, FMF),
+ return Insert(setFPAttrs(UnaryOperator::CreateFNeg(V), FPMathTag, FMF),
Name);
}
@@ -1514,9 +1515,7 @@ public:
const Twine &Name = "") {
if (auto *VC = dyn_cast<Constant>(V))
return Insert(Folder.CreateFNeg(VC), Name);
- // TODO: This should return UnaryOperator::CreateFNeg(...) once we are
- // confident that they are optimized sufficiently.
- return Insert(setFPAttrs(BinaryOperator::CreateFNeg(V), nullptr,
+ return Insert(setFPAttrs(UnaryOperator::CreateFNeg(V), nullptr,
FMFSource->getFastMathFlags()),
Name);
}
@@ -1534,7 +1533,7 @@ public:
return Insert(Folder.CreateUnOp(Opc, VC), Name);
Instruction *UnOp = UnaryOperator::Create(Opc, V);
if (isa<FPMathOperator>(UnOp))
- UnOp = setFPAttrs(UnOp, FPMathTag, FMF);
+ setFPAttrs(UnOp, FPMathTag, FMF);
return Insert(UnOp, Name);
}
@@ -1612,19 +1611,19 @@ public:
LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
const char *Name) {
LoadInst *LI = CreateLoad(Ty, Ptr, Name);
- LI->setAlignment(Align);
+ LI->setAlignment(MaybeAlign(Align));
return LI;
}
LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
const Twine &Name = "") {
LoadInst *LI = CreateLoad(Ty, Ptr, Name);
- LI->setAlignment(Align);
+ LI->setAlignment(MaybeAlign(Align));
return LI;
}
LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
bool isVolatile, const Twine &Name = "") {
LoadInst *LI = CreateLoad(Ty, Ptr, isVolatile, Name);
- LI->setAlignment(Align);
+ LI->setAlignment(MaybeAlign(Align));
return LI;
}
@@ -1649,7 +1648,7 @@ public:
StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align,
bool isVolatile = false) {
StoreInst *SI = CreateStore(Val, Ptr, isVolatile);
- SI->setAlignment(Align);
+ SI->setAlignment(MaybeAlign(Align));
return SI;
}
@@ -1913,11 +1912,17 @@ public:
return V;
}
- Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = ""){
+ Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = "") {
+ if (IsFPConstrained)
+ return CreateConstrainedFPCast(Intrinsic::experimental_constrained_fptoui,
+ V, DestTy, nullptr, Name);
return CreateCast(Instruction::FPToUI, V, DestTy, Name);
}
- Value *CreateFPToSI(Value *V, Type *DestTy, const Twine &Name = ""){
+ Value *CreateFPToSI(Value *V, Type *DestTy, const Twine &Name = "") {
+ if (IsFPConstrained)
+ return CreateConstrainedFPCast(Intrinsic::experimental_constrained_fptosi,
+ V, DestTy, nullptr, Name);
return CreateCast(Instruction::FPToSI, V, DestTy, Name);
}
@@ -1931,10 +1936,17 @@ public:
Value *CreateFPTrunc(Value *V, Type *DestTy,
const Twine &Name = "") {
+ if (IsFPConstrained)
+ return CreateConstrainedFPCast(
+ Intrinsic::experimental_constrained_fptrunc, V, DestTy, nullptr,
+ Name);
return CreateCast(Instruction::FPTrunc, V, DestTy, Name);
}
Value *CreateFPExt(Value *V, Type *DestTy, const Twine &Name = "") {
+ if (IsFPConstrained)
+ return CreateConstrainedFPCast(Intrinsic::experimental_constrained_fpext,
+ V, DestTy, nullptr, Name);
return CreateCast(Instruction::FPExt, V, DestTy, Name);
}
@@ -2046,6 +2058,37 @@ public:
return Insert(CastInst::CreateFPCast(V, DestTy), Name);
}
+ CallInst *CreateConstrainedFPCast(
+ Intrinsic::ID ID, Value *V, Type *DestTy,
+ Instruction *FMFSource = nullptr, const Twine &Name = "",
+ MDNode *FPMathTag = nullptr,
+ Optional<ConstrainedFPIntrinsic::RoundingMode> Rounding = None,
+ Optional<ConstrainedFPIntrinsic::ExceptionBehavior> Except = None) {
+ Value *ExceptV = getConstrainedFPExcept(Except);
+
+ FastMathFlags UseFMF = FMF;
+ if (FMFSource)
+ UseFMF = FMFSource->getFastMathFlags();
+
+ CallInst *C;
+ switch (ID) {
+ default: {
+ Value *RoundingV = getConstrainedFPRounding(Rounding);
+ C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, RoundingV, ExceptV},
+ nullptr, Name);
+ } break;
+ case Intrinsic::experimental_constrained_fpext:
+ case Intrinsic::experimental_constrained_fptoui:
+ case Intrinsic::experimental_constrained_fptosi:
+ C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, ExceptV}, nullptr,
+ Name);
+ break;
+ }
+ if (isa<FPMathOperator>(C))
+ setFPAttrs(C, FPMathTag, UseFMF);
+ return C;
+ }
+
// Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a
// compile time error, instead of converting the string to bool for the
// isSigned parameter.
@@ -2187,7 +2230,10 @@ public:
PHINode *CreatePHI(Type *Ty, unsigned NumReservedValues,
const Twine &Name = "") {
- return Insert(PHINode::Create(Ty, NumReservedValues), Name);
+ PHINode *Phi = PHINode::Create(Ty, NumReservedValues);
+ if (isa<FPMathOperator>(Phi))
+ setFPAttrs(Phi, nullptr /* MDNode* */, FMF);
+ return Insert(Phi, Name);
}
CallInst *CreateCall(FunctionType *FTy, Value *Callee,
@@ -2195,7 +2241,7 @@ public:
MDNode *FPMathTag = nullptr) {
CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles);
if (isa<FPMathOperator>(CI))
- CI = cast<CallInst>(setFPAttrs(CI, FPMathTag, FMF));
+ setFPAttrs(CI, FPMathTag, FMF);
return Insert(CI, Name);
}
@@ -2204,7 +2250,7 @@ public:
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
CallInst *CI = CallInst::Create(FTy, Callee, Args, OpBundles);
if (isa<FPMathOperator>(CI))
- CI = cast<CallInst>(setFPAttrs(CI, FPMathTag, FMF));
+ setFPAttrs(CI, FPMathTag, FMF);
return Insert(CI, Name);
}
@@ -2252,7 +2298,7 @@ public:
Sel = addBranchMetadata(Sel, Prof, Unpred);
}
if (isa<FPMathOperator>(Sel))
- Sel = cast<SelectInst>(setFPAttrs(Sel, nullptr /* MDNode* */, FMF));
+ setFPAttrs(Sel, nullptr /* MDNode* */, FMF);
return Insert(Sel, Name);
}
@@ -2454,7 +2500,7 @@ public:
}
Value *CreatePreserveArrayAccessIndex(Value *Base, unsigned Dimension,
- unsigned LastIndex) {
+ unsigned LastIndex, MDNode *DbgInfo) {
assert(isa<PointerType>(Base->getType()) &&
"Invalid Base ptr type for preserve.array.access.index.");
auto *BaseType = Base->getType();
@@ -2476,6 +2522,8 @@ public:
Value *DimV = getInt32(Dimension);
CallInst *Fn =
CreateCall(FnPreserveArrayAccessIndex, {Base, DimV, LastIndexV});
+ if (DbgInfo)
+ Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
return Fn;
}
@@ -2493,7 +2541,8 @@ public:
Value *DIIndex = getInt32(FieldIndex);
CallInst *Fn =
CreateCall(FnPreserveUnionAccessIndex, {Base, DIIndex});
- Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
+ if (DbgInfo)
+ Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
return Fn;
}
@@ -2516,7 +2565,8 @@ public:
Value *DIIndex = getInt32(FieldIndex);
CallInst *Fn = CreateCall(FnPreserveStructAccessIndex,
{Base, GEPIndex, DIIndex});
- Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
+ if (DbgInfo)
+ Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
return Fn;
}
diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
index 2aac807623a9..72d8ad1501ae 100644
--- a/include/llvm/IR/InlineAsm.h
+++ b/include/llvm/IR/InlineAsm.h
@@ -244,6 +244,7 @@ public:
Constraint_m,
Constraint_o,
Constraint_v,
+ Constraint_A,
Constraint_Q,
Constraint_R,
Constraint_S,
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
index ca419b50da6b..7fb94e9d8c22 100644
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -975,7 +975,7 @@ public:
static Type* makeCmpResultType(Type* opnd_type) {
if (VectorType* vt = dyn_cast<VectorType>(opnd_type)) {
return VectorType::get(Type::getInt1Ty(opnd_type->getContext()),
- vt->getNumElements());
+ vt->getElementCount());
}
return Type::getInt1Ty(opnd_type->getContext());
}
@@ -1567,11 +1567,17 @@ public:
}
/// Extract the alignment of the return value.
- unsigned getRetAlignment() const { return Attrs.getRetAlignment(); }
+ unsigned getRetAlignment() const {
+ if (const auto MA = Attrs.getRetAlignment())
+ return MA->value();
+ return 0;
+ }
/// Extract the alignment for a call or parameter (0=unknown).
unsigned getParamAlignment(unsigned ArgNo) const {
- return Attrs.getParamAlignment(ArgNo);
+ if (const auto MA = Attrs.getParamAlignment(ArgNo))
+ return MA->value();
+ return 0;
}
/// Extract the byval type for a call or parameter.
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
index 6a9a74bd16f0..803f6977b32c 100644
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@@ -229,6 +229,16 @@ public:
return hasMetadataHashEntry();
}
+ /// Return true if this instruction has the given type of metadata attached.
+ bool hasMetadata(unsigned KindID) const {
+ return getMetadata(KindID) != nullptr;
+ }
+
+ /// Return true if this instruction has the given type of metadata attached.
+ bool hasMetadata(StringRef Kind) const {
+ return getMetadata(Kind) != nullptr;
+ }
+
/// Get the metadata of given kind attached to this Instruction.
/// If the metadata is not found then return null.
MDNode *getMetadata(unsigned KindID) const {
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index 215ce45c7b75..fa980df03ef0 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -110,9 +110,11 @@ public:
/// Return the alignment of the memory that is being allocated by the
/// instruction.
unsigned getAlignment() const {
- return (1u << (getSubclassDataFromInstruction() & 31)) >> 1;
+ if (const auto MA = decodeMaybeAlign(getSubclassDataFromInstruction() & 31))
+ return MA->value();
+ return 0;
}
- void setAlignment(unsigned Align);
+ void setAlignment(MaybeAlign Align);
/// Return true if this alloca is in the entry block of the function and is a
/// constant size. If so, the code generator will fold it into the
@@ -182,15 +184,15 @@ public:
LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
BasicBlock *InsertAtEnd);
LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
- unsigned Align, Instruction *InsertBefore = nullptr);
+ MaybeAlign Align, Instruction *InsertBefore = nullptr);
LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
- unsigned Align, BasicBlock *InsertAtEnd);
+ MaybeAlign Align, BasicBlock *InsertAtEnd);
LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
- unsigned Align, AtomicOrdering Order,
+ MaybeAlign Align, AtomicOrdering Order,
SyncScope::ID SSID = SyncScope::System,
Instruction *InsertBefore = nullptr);
LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
- unsigned Align, AtomicOrdering Order, SyncScope::ID SSID,
+ MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID,
BasicBlock *InsertAtEnd);
// Deprecated [opaque pointer types]
@@ -209,20 +211,20 @@ public:
BasicBlock *InsertAtEnd)
: LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
isVolatile, InsertAtEnd) {}
- LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+ LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align,
Instruction *InsertBefore = nullptr)
: LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
isVolatile, Align, InsertBefore) {}
- LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+ LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align,
BasicBlock *InsertAtEnd)
: LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
isVolatile, Align, InsertAtEnd) {}
- LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+ LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align,
AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System,
Instruction *InsertBefore = nullptr)
: LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
isVolatile, Align, Order, SSID, InsertBefore) {}
- LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+ LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align,
AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd)
: LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
isVolatile, Align, Order, SSID, InsertAtEnd) {}
@@ -238,10 +240,13 @@ public:
/// Return the alignment of the access that is being performed.
unsigned getAlignment() const {
- return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1;
+ if (const auto MA =
+ decodeMaybeAlign((getSubclassDataFromInstruction() >> 1) & 31))
+ return MA->value();
+ return 0;
}
- void setAlignment(unsigned Align);
+ void setAlignment(MaybeAlign Align);
/// Returns the ordering constraint of this load instruction.
AtomicOrdering getOrdering() const {
@@ -332,17 +337,15 @@ public:
StoreInst(Value *Val, Value *Ptr, bool isVolatile = false,
Instruction *InsertBefore = nullptr);
StoreInst(Value *Val, Value *Ptr, bool isVolatile, BasicBlock *InsertAtEnd);
- StoreInst(Value *Val, Value *Ptr, bool isVolatile,
- unsigned Align, Instruction *InsertBefore = nullptr);
- StoreInst(Value *Val, Value *Ptr, bool isVolatile,
- unsigned Align, BasicBlock *InsertAtEnd);
- StoreInst(Value *Val, Value *Ptr, bool isVolatile,
- unsigned Align, AtomicOrdering Order,
- SyncScope::ID SSID = SyncScope::System,
+ StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align,
Instruction *InsertBefore = nullptr);
- StoreInst(Value *Val, Value *Ptr, bool isVolatile,
- unsigned Align, AtomicOrdering Order, SyncScope::ID SSID,
+ StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align,
BasicBlock *InsertAtEnd);
+ StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align,
+ AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System,
+ Instruction *InsertBefore = nullptr);
+ StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align,
+ AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd);
// allocate space for exactly two operands
void *operator new(size_t s) {
@@ -363,10 +366,13 @@ public:
/// Return the alignment of the access that is being performed
unsigned getAlignment() const {
- return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1;
+ if (const auto MA =
+ decodeMaybeAlign((getSubclassDataFromInstruction() >> 1) & 31))
+ return MA->value();
+ return 0;
}
- void setAlignment(unsigned Align);
+ void setAlignment(MaybeAlign Align);
/// Returns the ordering constraint of this store instruction.
AtomicOrdering getOrdering() const {
@@ -1764,6 +1770,10 @@ public:
void setTrueValue(Value *V) { Op<1>() = V; }
void setFalseValue(Value *V) { Op<2>() = V; }
+ /// Swap the true and false values of the select instruction.
+ /// This doesn't swap prof metadata.
+ void swapValues() { Op<1>().swap(Op<2>()); }
+
/// Return a string if the specified operands are invalid
/// for a select operation, otherwise return null.
static const char *areInvalidOperands(Value *Cond, Value *True, Value *False);
@@ -3455,16 +3465,7 @@ public:
class SwitchInstProfUpdateWrapper {
SwitchInst &SI;
Optional<SmallVector<uint32_t, 8> > Weights = None;
-
- // Sticky invalid state is needed to safely ignore operations with prof data
- // in cases where SwitchInstProfUpdateWrapper is created from SwitchInst
- // with inconsistent prof data. TODO: once we fix all prof data
- // inconsistencies we can turn invalid state to assertions.
- enum {
- Invalid,
- Initialized,
- Changed
- } State = Invalid;
+ bool Changed = false;
protected:
static MDNode *getProfBranchWeightsMD(const SwitchInst &SI);
@@ -3482,7 +3483,7 @@ public:
SwitchInstProfUpdateWrapper(SwitchInst &SI) : SI(SI) { init(); }
~SwitchInstProfUpdateWrapper() {
- if (State == Changed)
+ if (Changed)
SI.setMetadata(LLVMContext::MD_prof, buildProfBranchWeightsMD());
}
@@ -3938,6 +3939,9 @@ class CallBrInst : public CallBase {
ArrayRef<BasicBlock *> IndirectDests, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
+ /// Should the Indirect Destinations change, scan + update the Arg list.
+ void updateArgBlockAddresses(unsigned i, BasicBlock *B);
+
/// Compute the number of operands to allocate.
static int ComputeNumOperands(int NumArgs, int NumIndirectDests,
int NumBundleInputs = 0) {
@@ -4075,7 +4079,7 @@ public:
return cast<BasicBlock>(*(&Op<-1>() - getNumIndirectDests() - 1));
}
BasicBlock *getIndirectDest(unsigned i) const {
- return cast<BasicBlock>(*(&Op<-1>() - getNumIndirectDests() + i));
+ return cast_or_null<BasicBlock>(*(&Op<-1>() - getNumIndirectDests() + i));
}
SmallVector<BasicBlock *, 16> getIndirectDests() const {
SmallVector<BasicBlock *, 16> IndirectDests;
@@ -4087,6 +4091,7 @@ public:
*(&Op<-1>() - getNumIndirectDests() - 1) = reinterpret_cast<Value *>(B);
}
void setIndirectDest(unsigned i, BasicBlock *B) {
+ updateArgBlockAddresses(i, B);
*(&Op<-1>() - getNumIndirectDests() + i) = reinterpret_cast<Value *>(B);
}
@@ -4096,11 +4101,10 @@ public:
return i == 0 ? getDefaultDest() : getIndirectDest(i - 1);
}
- void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
- assert(idx < getNumIndirectDests() + 1 &&
+ void setSuccessor(unsigned i, BasicBlock *NewSucc) {
+ assert(i < getNumIndirectDests() + 1 &&
"Successor # out of range for callbr!");
- *(&Op<-1>() - getNumIndirectDests() -1 + idx) =
- reinterpret_cast<Value *>(NewSucc);
+ return i == 0 ? setDefaultDest(NewSucc) : setIndirectDest(i - 1, NewSucc);
}
unsigned getNumSuccessors() const { return getNumIndirectDests() + 1; }
@@ -5251,31 +5255,38 @@ public:
/// A helper function that returns the pointer operand of a load or store
/// instruction. Returns nullptr if not load or store.
-inline Value *getLoadStorePointerOperand(Value *V) {
+inline const Value *getLoadStorePointerOperand(const Value *V) {
if (auto *Load = dyn_cast<LoadInst>(V))
return Load->getPointerOperand();
if (auto *Store = dyn_cast<StoreInst>(V))
return Store->getPointerOperand();
return nullptr;
}
+inline Value *getLoadStorePointerOperand(Value *V) {
+ return const_cast<Value *>(
+ getLoadStorePointerOperand(static_cast<const Value *>(V)));
+}
/// A helper function that returns the pointer operand of a load, store
/// or GEP instruction. Returns nullptr if not load, store, or GEP.
-inline Value *getPointerOperand(Value *V) {
+inline const Value *getPointerOperand(const Value *V) {
if (auto *Ptr = getLoadStorePointerOperand(V))
return Ptr;
if (auto *Gep = dyn_cast<GetElementPtrInst>(V))
return Gep->getPointerOperand();
return nullptr;
}
+inline Value *getPointerOperand(Value *V) {
+ return const_cast<Value *>(getPointerOperand(static_cast<const Value *>(V)));
+}
/// A helper function that returns the alignment of load or store instruction.
-inline unsigned getLoadStoreAlignment(Value *I) {
+inline MaybeAlign getLoadStoreAlignment(Value *I) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
"Expected Load or Store instruction");
if (auto *LI = dyn_cast<LoadInst>(I))
- return LI->getAlignment();
- return cast<StoreInst>(I)->getAlignment();
+ return MaybeAlign(LI->getAlignment());
+ return MaybeAlign(cast<StoreInst>(I)->getAlignment());
}
/// A helper function that returns the address space of the pointer operand of
diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h
index 438bdb29b706..c989b4a2e72a 100644
--- a/include/llvm/IR/IntrinsicInst.h
+++ b/include/llvm/IR/IntrinsicInst.h
@@ -259,6 +259,8 @@ namespace llvm {
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
@@ -271,12 +273,16 @@ namespace llvm {
case Intrinsic::experimental_constrained_log:
case Intrinsic::experimental_constrained_log10:
case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_lrint:
+ case Intrinsic::experimental_constrained_llrint:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_maxnum:
case Intrinsic::experimental_constrained_minnum:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_lround:
+ case Intrinsic::experimental_constrained_llround:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
return true;
@@ -405,11 +411,11 @@ namespace llvm {
setArgOperand(ARG_DEST, Ptr);
}
- void setDestAlignment(unsigned Align) {
+ void setDestAlignment(unsigned Alignment) {
removeParamAttr(ARG_DEST, Attribute::Alignment);
- if (Align > 0)
- addParamAttr(ARG_DEST,
- Attribute::getWithAlignment(getContext(), Align));
+ if (Alignment > 0)
+ addParamAttr(ARG_DEST, Attribute::getWithAlignment(getContext(),
+ Align(Alignment)));
}
void setLength(Value *L) {
@@ -454,11 +460,12 @@ namespace llvm {
BaseCL::setArgOperand(ARG_SOURCE, Ptr);
}
- void setSourceAlignment(unsigned Align) {
+ void setSourceAlignment(unsigned Alignment) {
BaseCL::removeParamAttr(ARG_SOURCE, Attribute::Alignment);
- if (Align > 0)
- BaseCL::addParamAttr(ARG_SOURCE, Attribute::getWithAlignment(
- BaseCL::getContext(), Align));
+ if (Alignment > 0)
+ BaseCL::addParamAttr(ARG_SOURCE,
+ Attribute::getWithAlignment(BaseCL::getContext(),
+ Align(Alignment)));
}
};
diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h
index f38f92022d21..9e4ebd915afc 100644
--- a/include/llvm/IR/Intrinsics.h
+++ b/include/llvm/IR/Intrinsics.h
@@ -100,7 +100,8 @@ namespace Intrinsic {
Integer, Vector, Pointer, Struct,
Argument, ExtendArgument, TruncArgument, HalfVecArgument,
SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt,
- VecElementArgument
+ VecElementArgument, ScalableVecArgument, Subdivide2Argument,
+ Subdivide4Argument, VecOfBitcastsToInt
} Kind;
union {
@@ -125,14 +126,17 @@ namespace Intrinsic {
assert(Kind == Argument || Kind == ExtendArgument ||
Kind == TruncArgument || Kind == HalfVecArgument ||
Kind == SameVecWidthArgument || Kind == PtrToArgument ||
- Kind == PtrToElt || Kind == VecElementArgument);
+ Kind == PtrToElt || Kind == VecElementArgument ||
+ Kind == Subdivide2Argument || Kind == Subdivide4Argument ||
+ Kind == VecOfBitcastsToInt);
return Argument_Info >> 3;
}
ArgKind getArgumentKind() const {
assert(Kind == Argument || Kind == ExtendArgument ||
Kind == TruncArgument || Kind == HalfVecArgument ||
Kind == SameVecWidthArgument || Kind == PtrToArgument ||
- Kind == VecElementArgument);
+ Kind == VecElementArgument || Kind == Subdivide2Argument ||
+ Kind == Subdivide4Argument || Kind == VecOfBitcastsToInt);
return (ArgKind)(Argument_Info & 7);
}
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index d660f8278437..7a0263f88c2a 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -63,6 +63,12 @@ class NoCapture<int argNo> : IntrinsicProperty {
int ArgNo = argNo;
}
+// NoAlias - The specified argument pointer is not aliasing other "noalias" pointer
+// arguments of the intrinsic wrt. the intrinsic scope.
+class NoAlias<int argNo> : IntrinsicProperty {
+ int ArgNo = argNo;
+}
+
// Returned - The specified argument is always the return value of the
// intrinsic.
class Returned<int argNo> : IntrinsicProperty {
@@ -181,6 +187,16 @@ class LLVMVectorElementType<int num> : LLVMMatchType<num>;
// vector type, but change the element count to be half as many
class LLVMHalfElementsVectorType<int num> : LLVMMatchType<num>;
+// Match the type of another intrinsic parameter that is expected to be a
+// vector type (i.e. <N x iM>) but with each element subdivided to
+// form a vector with more elements that are smaller than the original.
+class LLVMSubdivide2VectorType<int num> : LLVMMatchType<num>;
+class LLVMSubdivide4VectorType<int num> : LLVMMatchType<num>;
+
+// Match the element count and bit width of another intrinsic parameter, but
+// change the element type to an integer.
+class LLVMVectorOfBitcastsToInt<int num> : LLVMMatchType<num>;
+
def llvm_void_ty : LLVMType<isVoid>;
let isAny = 1 in {
def llvm_any_ty : LLVMType<Any>;
@@ -407,9 +423,9 @@ def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[],
//===--------------------- Code Generator Intrinsics ----------------------===//
//
def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
-def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
-def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
-def int_sponentry : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+def int_addressofreturnaddress : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>;
+def int_frameaddress : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
+def int_sponentry : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>;
def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
[IntrReadMem], "llvm.read_register">;
def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty],
@@ -451,8 +467,8 @@ def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>,
// from being reordered overly much with respect to nearby access to the same
// memory while not impeding optimization.
def int_prefetch
- : Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ],
- [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>,
+ : Intrinsic<[], [ llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<0>, NoCapture<0>,
ImmArg<1>, ImmArg<2>]>;
def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>;
@@ -460,7 +476,7 @@ def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
// The assume intrinsic is marked as arbitrarily writing so that proper
// control dependencies will be maintained.
-def int_assume : Intrinsic<[], [llvm_i1_ty], []>;
+def int_assume : Intrinsic<[], [llvm_i1_ty], [IntrWillReturn]>;
// Stack Protector Intrinsic - The stackprotector intrinsic writes the stack
// guard to the correct place on the stack frame.
@@ -493,23 +509,23 @@ def int_instrprof_value_profile : Intrinsic<[],
def int_memcpy : Intrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
llvm_i1_ty],
- [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
- WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>;
+ [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>,
+ NoAlias<0>, NoAlias<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>;
def int_memmove : Intrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
llvm_i1_ty],
- [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
+ [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>,
ReadOnly<1>, ImmArg<3>]>;
def int_memset : Intrinsic<[],
[llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
llvm_i1_ty],
- [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>,
+ [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, WriteOnly<0>,
ImmArg<3>]>;
// FIXME: Add version of these floating point intrinsics which allow non-default
// rounding modes and FP exception handling.
-let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
+let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
def int_fma : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>]>;
@@ -551,19 +567,19 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
def int_minnum : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable, Commutative]
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
def int_maxnum : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable, Commutative]
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
def int_minimum : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable, Commutative]
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
def int_maximum : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable, Commutative]
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
// NOTE: these are internal interfaces.
@@ -576,13 +592,13 @@ def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
def int_objectsize : Intrinsic<[llvm_anyint_ty],
[llvm_anyptr_ty, llvm_i1_ty,
llvm_i1_ty, llvm_i1_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>,
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<1>, ImmArg<2>, ImmArg<3>]>,
GCCBuiltin<"__builtin_object_size">;
//===--------------- Constrained Floating Point Intrinsics ----------------===//
//
-let IntrProperties = [IntrInaccessibleMemOnly] in {
+let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
def int_experimental_constrained_fadd : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -616,6 +632,14 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_fptosi : Intrinsic<[ llvm_anyint_ty ],
+ [ llvm_anyfloat_ty,
+ llvm_metadata_ty ]>;
+
+ def int_experimental_constrained_fptoui : Intrinsic<[ llvm_anyint_ty ],
+ [ llvm_anyfloat_ty,
+ llvm_metadata_ty ]>;
+
def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty,
@@ -679,6 +703,14 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_lrint : Intrinsic<[ llvm_anyint_ty ],
+ [ llvm_anyfloat_ty,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+ def int_experimental_constrained_llrint : Intrinsic<[ llvm_anyint_ty ],
+ [ llvm_anyfloat_ty,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -697,6 +729,12 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_lround : Intrinsic<[ llvm_anyint_ty ],
+ [ llvm_anyfloat_ty,
+ llvm_metadata_ty ]>;
+ def int_experimental_constrained_llround : Intrinsic<[ llvm_anyint_ty ],
+ [ llvm_anyfloat_ty,
+ llvm_metadata_ty ]>;
def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
@@ -706,18 +744,19 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
llvm_metadata_ty,
llvm_metadata_ty ]>;
}
-// FIXME: Add intrinsics for fcmp, fptoui and fptosi.
+// FIXME: Add intrinsic for fcmp.
+// FIXME: Consider maybe adding intrinsics for sitofp, uitofp.
//===------------------------- Expect Intrinsics --------------------------===//
//
def int_expect : Intrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrWillReturn]>;
//===-------------------- Bit Manipulation Intrinsics ---------------------===//
//
// None of these intrinsics accesses memory at all.
-let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
+let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
@@ -727,7 +766,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
}
-let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in {
+let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<1>] in {
def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
}
@@ -739,7 +778,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in {
// mean the optimizers can change them aggressively. Special handling
// needed in a few places. These synthetic intrinsics have no
// side-effects and just mark information about their operands.
-let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
+let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
def int_dbg_declare : Intrinsic<[],
[llvm_metadata_ty,
llvm_metadata_ty,
@@ -796,21 +835,21 @@ def int_eh_sjlj_setup_dispatch : Intrinsic<[], []>;
def int_var_annotation : Intrinsic<[],
[llvm_ptr_ty, llvm_ptr_ty,
llvm_ptr_ty, llvm_i32_ty],
- [], "llvm.var.annotation">;
+ [IntrWillReturn], "llvm.var.annotation">;
def int_ptr_annotation : Intrinsic<[LLVMAnyPointerType<llvm_anyint_ty>],
[LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty,
llvm_i32_ty],
- [], "llvm.ptr.annotation">;
+ [IntrWillReturn], "llvm.ptr.annotation">;
def int_annotation : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, llvm_ptr_ty,
llvm_ptr_ty, llvm_i32_ty],
- [], "llvm.annotation">;
+ [IntrWillReturn], "llvm.annotation">;
// Annotates the current program point with metadata strings which are emitted
// as CodeView debug info records. This is expensive, as it disables inlining
// and is modelled as having side effects.
def int_codeview_annotation : Intrinsic<[], [llvm_metadata_ty],
- [IntrInaccessibleMemOnly, IntrNoDuplicate],
+ [IntrInaccessibleMemOnly, IntrNoDuplicate, IntrWillReturn],
"llvm.codeview.annotation">;
//===------------------------ Trampoline Intrinsics -----------------------===//
@@ -828,79 +867,77 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
//
// Expose the carry flag from add operations on two integrals.
-def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-
-def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-
-def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-
+let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
+ def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [LLVMMatchType<0>, LLVMMatchType<0>]>;
+ def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [LLVMMatchType<0>, LLVMMatchType<0>]>;
+
+ def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [LLVMMatchType<0>, LLVMMatchType<0>]>;
+ def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [LLVMMatchType<0>, LLVMMatchType<0>]>;
+
+ def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [LLVMMatchType<0>, LLVMMatchType<0>]>;
+ def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [LLVMMatchType<0>, LLVMMatchType<0>]>;
+}
//===------------------------- Saturation Arithmetic Intrinsics ---------------------===//
//
def int_sadd_sat : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable, Commutative]>;
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]>;
def int_uadd_sat : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable, Commutative]>;
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]>;
def int_ssub_sat : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]>;
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
def int_usub_sat : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]>;
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
//===------------------------- Fixed Point Arithmetic Intrinsics ---------------------===//
//
def int_smul_fix : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>;
def int_umul_fix : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>;
//===------------------- Fixed Point Saturation Arithmetic Intrinsics ----------------===//
//
def int_smul_fix_sat : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>;
+def int_umul_fix_sat : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>;
//===------------------------- Memory Use Markers -------------------------===//
//
def int_lifetime_start : Intrinsic<[],
[llvm_i64_ty, llvm_anyptr_ty],
- [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
+ [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>;
def int_lifetime_end : Intrinsic<[],
[llvm_i64_ty, llvm_anyptr_ty],
- [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
+ [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>;
def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
[llvm_i64_ty, llvm_anyptr_ty],
- [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
+ [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>;
def int_invariant_end : Intrinsic<[],
[llvm_descriptor_ty, llvm_i64_ty,
llvm_anyptr_ty],
- [IntrArgMemOnly, NoCapture<2>, ImmArg<1>]>;
+ [IntrArgMemOnly, IntrWillReturn, NoCapture<2>, ImmArg<1>]>;
// launder.invariant.group can't be marked with 'readnone' (IntrNoMem),
// because it would cause CSE of two barriers with the same argument.
@@ -916,12 +953,12 @@ def int_invariant_end : Intrinsic<[],
// might change in the future.
def int_launder_invariant_group : Intrinsic<[llvm_anyptr_ty],
[LLVMMatchType<0>],
- [IntrInaccessibleMemOnly, IntrSpeculatable]>;
+ [IntrInaccessibleMemOnly, IntrSpeculatable, IntrWillReturn]>;
def int_strip_invariant_group : Intrinsic<[llvm_anyptr_ty],
[LLVMMatchType<0>],
- [IntrSpeculatable, IntrNoMem]>;
+ [IntrSpeculatable, IntrNoMem, IntrWillReturn]>;
//===------------------------ Stackmap Intrinsics -------------------------===//
//
@@ -964,6 +1001,14 @@ def int_coro_id : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_ptr_ty,
llvm_ptr_ty, llvm_ptr_ty],
[IntrArgMemOnly, IntrReadMem,
ReadNone<1>, ReadOnly<2>, NoCapture<2>]>;
+def int_coro_id_retcon : Intrinsic<[llvm_token_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty,
+ llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
+ []>;
+def int_coro_id_retcon_once : Intrinsic<[llvm_token_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty,
+ llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
+ []>;
def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>;
def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
[WriteOnly<1>]>;
@@ -979,6 +1024,13 @@ def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>;
+def int_coro_suspend_retcon : Intrinsic<[llvm_any_ty], [llvm_vararg_ty], []>;
+def int_coro_prepare_retcon : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
+ [IntrNoMem]>;
+def int_coro_alloca_alloc : Intrinsic<[llvm_token_ty],
+ [llvm_anyint_ty, llvm_i32_ty], []>;
+def int_coro_alloca_get : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], []>;
+def int_coro_alloca_free : Intrinsic<[], [llvm_token_ty], []>;
def int_coro_param : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_ptr_ty],
[IntrNoMem, ReadNone<0>, ReadNone<1>]>;
@@ -1018,19 +1070,19 @@ def int_experimental_guard : Intrinsic<[], [llvm_i1_ty, llvm_vararg_ty],
// Supports widenable conditions for guards represented as explicit branches.
def int_experimental_widenable_condition : Intrinsic<[llvm_i1_ty], [],
- [IntrInaccessibleMemOnly]>;
+ [IntrInaccessibleMemOnly, IntrWillReturn]>;
// NOP: calls/invokes to this intrinsic are removed by codegen
-def int_donothing : Intrinsic<[], [], [IntrNoMem]>;
+def int_donothing : Intrinsic<[], [], [IntrNoMem, IntrWillReturn]>;
// This instruction has no actual effect, though it is treated by the optimizer
// has having opaque side effects. This may be inserted into loops to ensure
// that they are not removed even if they turn out to be empty, for languages
// which specify that infinite loops must be preserved.
-def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly]>;
+def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>;
-// Intrisics to support half precision floating point format
-let IntrProperties = [IntrNoMem] in {
+// Intrinsics to support half precision floating point format
+let IntrProperties = [IntrNoMem, IntrWillReturn] in {
def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>;
def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>;
}
@@ -1041,7 +1093,11 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
[], "llvm.clear_cache">;
// Intrinsic to detect whether its argument is a constant.
-def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.is.constant">;
+def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWillReturn], "llvm.is.constant">;
+
+// Intrinsic to mask out bits of a pointer.
+def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_anyint_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
//===-------------------------- Masked Intrinsics -------------------------===//
//
@@ -1049,45 +1105,45 @@ def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
LLVMAnyPointerType<LLVMMatchType<0>>,
llvm_i32_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [IntrArgMemOnly, ImmArg<2>]>;
+ [IntrArgMemOnly, IntrWillReturn, ImmArg<2>]>;
def int_masked_load : Intrinsic<[llvm_anyvector_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
- [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
+ [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg<1>]>;
def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
[LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>],
- [IntrReadMem, ImmArg<1>]>;
+ [IntrReadMem, IntrWillReturn, ImmArg<1>]>;
def int_masked_scatter: Intrinsic<[],
[llvm_anyvector_ty,
LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [ImmArg<2>]>;
+ [IntrWillReturn, ImmArg<2>]>;
def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
[LLVMPointerToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>],
- [IntrReadMem]>;
+ [IntrReadMem, IntrWillReturn]>;
def int_masked_compressstore: Intrinsic<[],
[llvm_anyvector_ty,
LLVMPointerToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [IntrArgMemOnly]>;
+ [IntrArgMemOnly, IntrWillReturn]>;
// Test whether a pointer is associated with a type metadata identifier.
def int_type_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrWillReturn]>;
// Safely loads a function pointer from a virtual table pointer using type metadata.
def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty],
[llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrWillReturn]>;
// Create a branch funnel that implements an indirect call to a limited set of
// callees. This needs to be a musttail call.
@@ -1098,6 +1154,8 @@ def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
def int_hwasan_check_memaccess :
Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>;
+def int_hwasan_check_memaccess_shortgranules :
+ Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>;
// Xray intrinsics
//===----------------------------------------------------------------------===//
@@ -1121,7 +1179,7 @@ def int_memcpy_element_unordered_atomic
llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty
],
[
- IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
+ IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
ReadOnly<1>, ImmArg<3>
]>;
@@ -1132,58 +1190,47 @@ def int_memmove_element_unordered_atomic
llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty
],
[
- IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
+ IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
ReadOnly<1>, ImmArg<3>
]>;
// @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize)
def int_memset_element_unordered_atomic
: Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ],
- [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>;
+ [ IntrArgMemOnly, IntrWillReturn, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>;
//===------------------------ Reduction Intrinsics ------------------------===//
//
-def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>,
- llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>,
- llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
+let IntrProperties = [IntrNoMem, IntrWillReturn] in {
+ def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>,
+ llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>,
+ llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+}
//===---------- Intrinsics to control hardware supported loops ----------===//
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td
index 832aca4fd30f..db01700f409f 100644
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -691,7 +691,7 @@ def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
// Memory Tagging Extensions (MTE) Intrinsics
let TargetPrefix = "aarch64" in {
def int_aarch64_irg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
- [IntrInaccessibleMemOnly]>;
+ [IntrNoMem, IntrHasSideEffects]>;
def int_aarch64_addg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
[IntrNoMem]>;
def int_aarch64_gmi : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty],
@@ -707,7 +707,7 @@ def int_aarch64_subp : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
// Generate a randomly tagged stack base pointer.
def int_aarch64_irg_sp : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty],
- [IntrInaccessibleMemOnly]>;
+ [IntrNoMem, IntrHasSideEffects]>;
// Transfer pointer tag with offset.
// ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where
@@ -733,3 +733,124 @@ def int_aarch64_settag_zero : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
def int_aarch64_stgp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
[IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
}
+
+// Transactional Memory Extension (TME) Intrinsics
+let TargetPrefix = "aarch64" in {
+def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">,
+ Intrinsic<[llvm_i64_ty]>;
+
+def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[]>;
+
+def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">,
+ Intrinsic<[], [llvm_i64_ty], [ImmArg<0>]>;
+
+def int_aarch64_ttest : GCCBuiltin<"__builtin_arm_ttest">,
+ Intrinsic<[llvm_i64_ty], [],
+ [IntrNoMem, IntrHasSideEffects]>;
+}
+
+def llvm_nxv2i1_ty : LLVMType<nxv2i1>;
+def llvm_nxv4i1_ty : LLVMType<nxv4i1>;
+def llvm_nxv8i1_ty : LLVMType<nxv8i1>;
+def llvm_nxv16i1_ty : LLVMType<nxv16i1>;
+def llvm_nxv16i8_ty : LLVMType<nxv16i8>;
+def llvm_nxv4i32_ty : LLVMType<nxv4i32>;
+def llvm_nxv2i64_ty : LLVMType<nxv2i64>;
+def llvm_nxv8f16_ty : LLVMType<nxv8f16>;
+def llvm_nxv4f32_ty : LLVMType<nxv4f32>;
+def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
+
+let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
+ class AdvSIMD_Merged1VectorArg_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_CNT_Intrinsic
+ : Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
+ [LLVMVectorOfBitcastsToInt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_Unpack_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMSubdivide2VectorType<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_PUNPKHI_Intrinsic
+ : Intrinsic<[LLVMHalfElementsVectorType<0>],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_DOT_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMSubdivide4VectorType<0>,
+ LLVMSubdivide4VectorType<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_DOT_Indexed_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMSubdivide4VectorType<0>,
+ LLVMSubdivide4VectorType<0>,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
+ // This class of intrinsics are not intended to be useful within LLVM IR but
+ // are instead here to support some of the more regid parts of the ACLE.
+ class Builtin_SVCVT<string name, LLVMType OUT, LLVMType IN>
+ : GCCBuiltin<"__builtin_sve_" # name>,
+ Intrinsic<[OUT], [OUT, llvm_nxv16i1_ty, IN], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE
+
+let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
+
+//
+// Integer arithmetic
+//
+
+def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic;
+
+def int_aarch64_sve_sdot : AdvSIMD_SVE_DOT_Intrinsic;
+def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
+
+def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic;
+def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
+
+//
+// Counting bits
+//
+
+def int_aarch64_sve_cnt : AdvSIMD_SVE_CNT_Intrinsic;
+
+//
+// Permutations and selection
+//
+
+def int_aarch64_sve_sunpkhi : AdvSIMD_SVE_Unpack_Intrinsic;
+def int_aarch64_sve_sunpklo : AdvSIMD_SVE_Unpack_Intrinsic;
+
+def int_aarch64_sve_uunpkhi : AdvSIMD_SVE_Unpack_Intrinsic;
+def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic;
+
+//
+// Floating-point comparisons
+//
+
+def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
+
+//
+// Predicate operations
+//
+
+def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic;
+def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic;
+}
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 3982444b5401..ab6ee7f92dd1 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -175,6 +175,7 @@ def int_amdgcn_implicit_buffer_ptr :
// Set EXEC to the 64-bit value given.
// This is always moved to the beginning of the basic block.
+// FIXME: Should be mangled for wave size.
def int_amdgcn_init_exec : Intrinsic<[],
[llvm_i64_ty], // 64-bit literal constant
[IntrConvergent, ImmArg<0>]>;
@@ -185,7 +186,7 @@ def int_amdgcn_init_exec : Intrinsic<[],
def int_amdgcn_init_exec_from_input : Intrinsic<[],
[llvm_i32_ty, // 32-bit SGPR input
llvm_i32_ty], // bit offset of the thread count
- [IntrConvergent]>;
+ [IntrConvergent, ImmArg<1>]>;
def int_amdgcn_wavefrontsize :
GCCBuiltin<"__builtin_amdgcn_wavefrontsize">,
@@ -199,12 +200,14 @@ def int_amdgcn_wavefrontsize :
// The first parameter is s_sendmsg immediate (i16),
// the second one is copied to m0
def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
- Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
+ Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
- Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
+ Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
- Intrinsic<[], [], [IntrConvergent]>;
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent]>;
def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
Intrinsic<[], [], [IntrConvergent]>;
@@ -835,9 +838,6 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
-
- // TODO: INC/DEC are weird: they seem to have a vdata argument in hardware,
- // even though it clearly shouldn't be needed
defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">;
defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">;
@@ -854,8 +854,8 @@ let TargetPrefix = "amdgcn" in {
defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
-class AMDGPUBufferLoad : Intrinsic <
- [llvm_any_ty],
+class AMDGPUBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
+ [data_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
@@ -863,7 +863,7 @@ class AMDGPUBufferLoad : Intrinsic <
llvm_i1_ty], // slc(imm)
[IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
-def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
+def int_amdgcn_buffer_load_format : AMDGPUBufferLoad<llvm_anyfloat_ty>;
def int_amdgcn_buffer_load : AMDGPUBufferLoad;
def int_amdgcn_s_buffer_load : Intrinsic <
@@ -874,9 +874,9 @@ def int_amdgcn_s_buffer_load : Intrinsic <
[IntrNoMem, ImmArg<2>]>,
AMDGPURsrcIntrinsic<0>;
-class AMDGPUBufferStore : Intrinsic <
+class AMDGPUBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
[],
- [llvm_any_ty, // vdata(VGPR)
+ [data_ty, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
@@ -884,7 +884,7 @@ class AMDGPUBufferStore : Intrinsic <
llvm_i1_ty], // slc(imm)
[IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
-def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
+def int_amdgcn_buffer_store_format : AMDGPUBufferStore<llvm_anyfloat_ty>;
def int_amdgcn_buffer_store : AMDGPUBufferStore;
// New buffer intrinsics with separate raw and struct variants. The raw
@@ -894,56 +894,68 @@ def int_amdgcn_buffer_store : AMDGPUBufferStore;
// and swizzling changes depending on whether idxen is set in the instruction.
// These new instrinsics also keep the offset and soffset arguments separate as
// they behave differently in bounds checking and swizzling.
-class AMDGPURawBufferLoad : Intrinsic <
- [llvm_any_ty],
+class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
+ [data_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
[IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
-def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad;
+def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
-class AMDGPUStructBufferLoad : Intrinsic <
- [llvm_any_ty],
+class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
+ [data_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
[IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
-def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
+def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad<llvm_anyfloat_ty>;
def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
-class AMDGPURawBufferStore : Intrinsic <
+class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
[],
- [llvm_any_ty, // vdata(VGPR)
+ [data_ty, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
[IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
-def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore;
+def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
-class AMDGPUStructBufferStore : Intrinsic <
+class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
[],
- [llvm_any_ty, // vdata(VGPR)
+ [data_ty, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
[IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
-def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
+def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore<llvm_anyfloat_ty>;
def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
-class AMDGPURawBufferAtomic : Intrinsic <
- [llvm_anyint_ty],
+class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
+ [data_ty],
[LLVMMatchType<0>, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
@@ -961,6 +973,8 @@ def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
[llvm_anyint_ty],
[LLVMMatchType<0>, // src(VGPR)
@@ -972,8 +986,8 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
[ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;
-class AMDGPUStructBufferAtomic : Intrinsic <
- [llvm_anyint_ty],
+class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
+ [data_ty],
[LLVMMatchType<0>, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
@@ -992,6 +1006,8 @@ def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
[llvm_anyint_ty],
[LLVMMatchType<0>, // src(VGPR)
@@ -1046,7 +1062,10 @@ def int_amdgcn_raw_tbuffer_load : Intrinsic <
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
[IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
@@ -1057,7 +1076,10 @@ def int_amdgcn_raw_tbuffer_store : Intrinsic <
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
[IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
@@ -1068,7 +1090,10 @@ def int_amdgcn_struct_tbuffer_load : Intrinsic <
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
[IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
@@ -1080,7 +1105,10 @@ def int_amdgcn_struct_tbuffer_store : Intrinsic <
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
[IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
@@ -1431,6 +1459,13 @@ def int_amdgcn_wqm : Intrinsic<[llvm_any_ty],
[LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
+// Copies the source value to the destination value, such that the source
+// is computed as if the entire program were executed in WQM if any other
+// program code executes in WQM.
+def int_amdgcn_softwqm : Intrinsic<[llvm_any_ty],
+ [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+>;
+
// Return true if at least one thread within the pixel quad passes true into
// the function.
def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty],
@@ -1459,6 +1494,18 @@ def int_amdgcn_set_inactive :
LLVMMatchType<0>], // value for the inactive lanes to take
[IntrNoMem, IntrConvergent]>;
+// Return if the given flat pointer points to a local memory address.
+def int_amdgcn_is_shared : GCCBuiltin<"__builtin_amdgcn_is_shared">,
+ Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<0>]
+>;
+
+// Return if the given flat pointer points to a prvate memory address.
+def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">,
+ Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<0>]
+>;
+
//===----------------------------------------------------------------------===//
// CI+ Intrinsics
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index 4792af097d95..e13da6157e04 100644
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -777,5 +777,14 @@ class Neon_Dot_Intrinsic
def int_arm_neon_udot : Neon_Dot_Intrinsic;
def int_arm_neon_sdot : Neon_Dot_Intrinsic;
+def int_arm_vctp8 : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_arm_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_arm_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_arm_vctp64 : Intrinsic<[llvm_v2i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+// GNU eabi mcount
+def int_arm_gnu_eabi_mcount : Intrinsic<[],
+ [],
+ [IntrReadMem, IntrWriteMem]>;
} // end TargetPrefix
diff --git a/include/llvm/IR/IntrinsicsBPF.td b/include/llvm/IR/IntrinsicsBPF.td
index d7595a2a7700..3618cc6a4128 100644
--- a/include/llvm/IR/IntrinsicsBPF.td
+++ b/include/llvm/IR/IntrinsicsBPF.td
@@ -20,4 +20,7 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf."
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
def int_bpf_pseudo : GCCBuiltin<"__builtin_bpf_pseudo">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>;
+ def int_bpf_preserve_field_info : GCCBuiltin<"__builtin_bpf_preserve_field_info">,
+ Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<1>]>;
}
diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td
index 6393a9ca35d5..bfcdd80a52d5 100644
--- a/include/llvm/IR/IntrinsicsMips.td
+++ b/include/llvm/IR/IntrinsicsMips.td
@@ -1260,16 +1260,16 @@ def int_mips_insve_d : GCCBuiltin<"__builtin_msa_insve_d">,
def int_mips_ld_b : GCCBuiltin<"__builtin_msa_ld_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
+ [IntrReadMem, IntrArgMemOnly]>;
def int_mips_ld_h : GCCBuiltin<"__builtin_msa_ld_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
+ [IntrReadMem, IntrArgMemOnly]>;
def int_mips_ld_w : GCCBuiltin<"__builtin_msa_ld_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
+ [IntrReadMem, IntrArgMemOnly]>;
def int_mips_ld_d : GCCBuiltin<"__builtin_msa_ld_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
+ [IntrReadMem, IntrArgMemOnly]>;
def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
@@ -1684,16 +1684,16 @@ def int_mips_srlri_d : GCCBuiltin<"__builtin_msa_srlri_d">,
def int_mips_st_b : GCCBuiltin<"__builtin_msa_st_b">,
Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrArgMemOnly, ImmArg<2>]>;
+ [IntrArgMemOnly]>;
def int_mips_st_h : GCCBuiltin<"__builtin_msa_st_h">,
Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrArgMemOnly, ImmArg<2>]>;
+ [IntrArgMemOnly]>;
def int_mips_st_w : GCCBuiltin<"__builtin_msa_st_w">,
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrArgMemOnly, ImmArg<2>]>;
+ [IntrArgMemOnly]>;
def int_mips_st_d : GCCBuiltin<"__builtin_msa_st_d">,
Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrArgMemOnly, ImmArg<2>]>;
+ [IntrArgMemOnly]>;
def int_mips_subs_s_b : GCCBuiltin<"__builtin_msa_subs_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
index dba7dd76c4ff..0483d965ba64 100644
--- a/include/llvm/IR/IntrinsicsNVVM.td
+++ b/include/llvm/IR/IntrinsicsNVVM.td
@@ -276,6 +276,26 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b
);
}
+class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
+ string Suffix = !if(sync, "sync_", "")
+ # mode # "_"
+ # type
+ # !if(return_pred, "p", "");
+
+ string Name = "int_nvvm_shfl_" # Suffix;
+ string Builtin = "__nvvm_shfl_" # Suffix;
+ string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix);
+ list<int> withGccBuiltin = !if(return_pred, [], [1]);
+ list<int> withoutGccBuiltin = !if(return_pred, [1], []);
+ LLVMType OpType = !cond(
+ !eq(type,"i32"): llvm_i32_ty,
+ !eq(type,"f32"): llvm_float_ty);
+ list<LLVMType> RetTy = !if(return_pred, [OpType, llvm_i1_ty], [OpType]);
+ list<LLVMType> ArgsTy = !if(sync,
+ [llvm_i32_ty, OpType, llvm_i32_ty, llvm_i32_ty],
+ [OpType, llvm_i32_ty, llvm_i32_ty]);
+}
+
let TargetPrefix = "nvvm" in {
def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
@@ -3955,90 +3975,27 @@ def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
//
// SHUFFLE
//
-
-// shfl.down.b32 dest, val, offset, mask_and_clamp
-def int_nvvm_shfl_down_i32 :
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.down.i32">,
- GCCBuiltin<"__nvvm_shfl_down_i32">;
-def int_nvvm_shfl_down_f32 :
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.down.f32">,
- GCCBuiltin<"__nvvm_shfl_down_f32">;
-
-// shfl.up.b32 dest, val, offset, mask_and_clamp
-def int_nvvm_shfl_up_i32 :
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.up.i32">,
- GCCBuiltin<"__nvvm_shfl_up_i32">;
-def int_nvvm_shfl_up_f32 :
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.up.f32">,
- GCCBuiltin<"__nvvm_shfl_up_f32">;
-
-// shfl.bfly.b32 dest, val, offset, mask_and_clamp
-def int_nvvm_shfl_bfly_i32 :
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.bfly.i32">,
- GCCBuiltin<"__nvvm_shfl_bfly_i32">;
-def int_nvvm_shfl_bfly_f32 :
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.bfly.f32">,
- GCCBuiltin<"__nvvm_shfl_bfly_f32">;
-
-// shfl.idx.b32 dest, val, lane, mask_and_clamp
-def int_nvvm_shfl_idx_i32 :
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.idx.i32">,
- GCCBuiltin<"__nvvm_shfl_idx_i32">;
-def int_nvvm_shfl_idx_f32 :
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.idx.f32">,
- GCCBuiltin<"__nvvm_shfl_idx_f32">;
-
-// Synchronizing shfl variants available in CUDA-9.
-// On sm_70 these don't have to be convergent, so we may eventually want to
-// implement non-convergent variant of this intrinsic.
-
-// shfl.sync.down.b32 dest, threadmask, val, offset , mask_and_clamp
-def int_nvvm_shfl_sync_down_i32 :
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.down.i32">,
- GCCBuiltin<"__nvvm_shfl_sync_down_i32">;
-def int_nvvm_shfl_sync_down_f32 :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.down.f32">,
- GCCBuiltin<"__nvvm_shfl_sync_down_f32">;
-
-// shfl.sync.up.b32 dest, threadmask, val, offset, mask_and_clamp
-def int_nvvm_shfl_sync_up_i32 :
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.up.i32">,
- GCCBuiltin<"__nvvm_shfl_sync_up_i32">;
-def int_nvvm_shfl_sync_up_f32 :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.up.f32">,
- GCCBuiltin<"__nvvm_shfl_sync_up_f32">;
-
-// shfl.sync.bfly.b32 dest, threadmask, val, offset, mask_and_clamp
-def int_nvvm_shfl_sync_bfly_i32 :
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.bfly.i32">,
- GCCBuiltin<"__nvvm_shfl_sync_bfly_i32">;
-def int_nvvm_shfl_sync_bfly_f32 :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.bfly.f32">,
- GCCBuiltin<"__nvvm_shfl_sync_bfly_f32">;
-
-// shfl.sync.idx.b32 dest, threadmask, val, lane, mask_and_clamp
-def int_nvvm_shfl_sync_idx_i32 :
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.idx.i32">,
- GCCBuiltin<"__nvvm_shfl_sync_idx_i32">;
-def int_nvvm_shfl_sync_idx_f32 :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.idx.f32">,
- GCCBuiltin<"__nvvm_shfl_sync_idx_f32">;
+// Generate intrinsics for all variants of shfl instruction.
+foreach sync = [0, 1] in {
+ foreach mode = ["up", "down", "bfly", "idx"] in {
+ foreach type = ["i32", "f32"] in {
+ foreach return_pred = [0, 1] in {
+ foreach i = [SHFL_INFO<sync, mode, type, return_pred>] in {
+ foreach _ = i.withGccBuiltin in {
+ def i.Name : GCCBuiltin<i.Builtin>,
+ Intrinsic<i.RetTy, i.ArgsTy,
+ [IntrInaccessibleMemOnly, IntrConvergent],
+ i.IntrName>;
+ }
+ foreach _ = i.withoutGccBuiltin in {
+ def i.Name : Intrinsic<i.RetTy, i.ArgsTy,
+ [IntrInaccessibleMemOnly, IntrConvergent], i.IntrName>;
+ }
+ }
+ }
+ }
+ }
+}
//
// VOTE
diff --git a/include/llvm/IR/IntrinsicsWebAssembly.td b/include/llvm/IR/IntrinsicsWebAssembly.td
index 1b892727547d..810979b99934 100644
--- a/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -24,6 +24,17 @@ def int_wasm_memory_grow : Intrinsic<[llvm_anyint_ty],
[]>;
//===----------------------------------------------------------------------===//
+// Trapping float-to-int conversions
+//===----------------------------------------------------------------------===//
+
+def int_wasm_trunc_signed : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyfloat_ty],
+ [IntrNoMem]>;
+def int_wasm_trunc_unsigned : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyfloat_ty],
+ [IntrNoMem]>;
+
+//===----------------------------------------------------------------------===//
// Saturating float-to-int conversions
//===----------------------------------------------------------------------===//
@@ -89,6 +100,10 @@ def int_wasm_atomic_notify:
// SIMD intrinsics
//===----------------------------------------------------------------------===//
+def int_wasm_swizzle :
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_wasm_sub_saturate_signed :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
@@ -109,6 +124,39 @@ def int_wasm_alltrue :
Intrinsic<[llvm_i32_ty],
[llvm_anyvector_ty],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_qfma :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_qfms :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_narrow_signed :
+ Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, LLVMMatchType<1>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_narrow_unsigned :
+ Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, LLVMMatchType<1>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_widen_low_signed :
+ Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_widen_high_signed :
+ Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_widen_low_unsigned :
+ Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_widen_high_unsigned :
+ Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// Bulk memory intrinsics
@@ -133,4 +181,14 @@ def int_wasm_tls_size :
[],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_tls_align :
+ Intrinsic<[llvm_anyint_ty],
+ [],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_tls_base :
+ Intrinsic<[llvm_ptr_ty],
+ [],
+ [IntrReadMem]>;
+
} // TargetPrefix = "wasm"
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 236d312d7d78..5796686dd79f 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -2091,16 +2091,20 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_ptr_ty], [], []>;
def int_x86_lwpins32 :
GCCBuiltin<"__builtin_ia32_lwpins32">,
- Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<2>]>;
def int_x86_lwpins64 :
GCCBuiltin<"__builtin_ia32_lwpins64">,
- Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<2>]>;
def int_x86_lwpval32 :
GCCBuiltin<"__builtin_ia32_lwpval32">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<2>]>;
def int_x86_lwpval64 :
GCCBuiltin<"__builtin_ia32_lwpval64">,
- Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<2>]>;
}
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h
index c80504500418..91bd57dc5ac0 100644
--- a/include/llvm/IR/LLVMContext.h
+++ b/include/llvm/IR/LLVMContext.h
@@ -72,34 +72,9 @@ public:
// Pinned metadata names, which always have the same value. This is a
// compile-time performance optimization, not a correctness optimization.
enum : unsigned {
- MD_dbg = 0, // "dbg"
- MD_tbaa = 1, // "tbaa"
- MD_prof = 2, // "prof"
- MD_fpmath = 3, // "fpmath"
- MD_range = 4, // "range"
- MD_tbaa_struct = 5, // "tbaa.struct"
- MD_invariant_load = 6, // "invariant.load"
- MD_alias_scope = 7, // "alias.scope"
- MD_noalias = 8, // "noalias",
- MD_nontemporal = 9, // "nontemporal"
- MD_mem_parallel_loop_access = 10, // "llvm.mem.parallel_loop_access"
- MD_nonnull = 11, // "nonnull"
- MD_dereferenceable = 12, // "dereferenceable"
- MD_dereferenceable_or_null = 13, // "dereferenceable_or_null"
- MD_make_implicit = 14, // "make.implicit"
- MD_unpredictable = 15, // "unpredictable"
- MD_invariant_group = 16, // "invariant.group"
- MD_align = 17, // "align"
- MD_loop = 18, // "llvm.loop"
- MD_type = 19, // "type"
- MD_section_prefix = 20, // "section_prefix"
- MD_absolute_symbol = 21, // "absolute_symbol"
- MD_associated = 22, // "associated"
- MD_callees = 23, // "callees"
- MD_irr_loop = 24, // "irr_loop"
- MD_access_group = 25, // "llvm.access.group"
- MD_callback = 26, // "callback"
- MD_preserve_access_index = 27, // "llvm.preserve.*.access.index"
+#define LLVM_FIXED_MD_KIND(EnumID, Name, Value) EnumID = Value,
+#include "llvm/IR/FixedMetadataKinds.def"
+#undef LLVM_FIXED_MD_KIND
};
/// Known operand bundle tag IDs, which always have the same value. All
diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h
index 3a2b1bddf45d..11e2e2623257 100644
--- a/include/llvm/IR/MDBuilder.h
+++ b/include/llvm/IR/MDBuilder.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/DataTypes.h"
#include <utility>
@@ -75,6 +76,10 @@ public:
/// Return metadata containing the section prefix for a function.
MDNode *createFunctionSectionPrefix(StringRef Prefix);
+ /// return metadata containing expected value
+ MDNode *createMisExpect(uint64_t Index, uint64_t LikelyWeight,
+ uint64_t UnlikelyWeight);
+
//===------------------------------------------------------------------===//
// Range metadata.
//===------------------------------------------------------------------===//
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index 7ca2540181ba..f62b1e246cca 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -601,7 +601,7 @@ dyn_extract_or_null(Y &&MD) {
/// These are used to efficiently contain a byte sequence for metadata.
/// MDString is always unnamed.
class MDString : public Metadata {
- friend class StringMapEntry<MDString>;
+ friend class StringMapEntryStorage<MDString>;
StringMapEntry<MDString> *Entry = nullptr;
@@ -806,7 +806,7 @@ public:
/// Ensure that this has RAUW support, and then return it.
ReplaceableMetadataImpl *getOrCreateReplaceableUses() {
if (!hasReplaceableUses())
- makeReplaceable(llvm::make_unique<ReplaceableMetadataImpl>(getContext()));
+ makeReplaceable(std::make_unique<ReplaceableMetadataImpl>(getContext()));
return getReplaceableUses();
}
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index f458680cfe15..59331142766a 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -46,6 +46,7 @@ class FunctionType;
class GVMaterializer;
class LLVMContext;
class MemoryBuffer;
+class Pass;
class RandomNumberGenerator;
template <class PtrType> class SmallPtrSetImpl;
class StructType;
diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h
index aacf8cfc089f..be60447abd87 100644
--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@@ -119,7 +119,7 @@ class GlobalValueSummary;
using GlobalValueSummaryList = std::vector<std::unique_ptr<GlobalValueSummary>>;
-struct LLVM_ALIGNAS(8) GlobalValueSummaryInfo {
+struct alignas(8) GlobalValueSummaryInfo {
union NameOrGV {
NameOrGV(bool HaveGVs) {
if (HaveGVs)
@@ -603,7 +603,7 @@ public:
if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() ||
!TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() ||
!TypeCheckedLoadConstVCalls.empty())
- TIdInfo = llvm::make_unique<TypeIdInfo>(TypeIdInfo{
+ TIdInfo = std::make_unique<TypeIdInfo>(TypeIdInfo{
std::move(TypeTests), std::move(TypeTestAssumeVCalls),
std::move(TypeCheckedLoadVCalls),
std::move(TypeTestAssumeConstVCalls),
@@ -632,6 +632,8 @@ public:
/// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
+ void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); }
+
/// Returns the list of type identifiers used by this function in
/// llvm.type.test intrinsics other than by an llvm.assume intrinsic,
/// represented as GUIDs.
@@ -680,7 +682,7 @@ public:
/// were unable to devirtualize a checked call.
void addTypeTest(GlobalValue::GUID Guid) {
if (!TIdInfo)
- TIdInfo = llvm::make_unique<TypeIdInfo>();
+ TIdInfo = std::make_unique<TypeIdInfo>();
TIdInfo->TypeTests.push_back(Guid);
}
@@ -780,7 +782,7 @@ public:
void setVTableFuncs(VTableFuncList Funcs) {
assert(!VTableFuncs);
- VTableFuncs = llvm::make_unique<VTableFuncList>(std::move(Funcs));
+ VTableFuncs = std::make_unique<VTableFuncList>(std::move(Funcs));
}
ArrayRef<VirtFuncOffset> vTableFuncs() const {
@@ -1293,6 +1295,12 @@ public:
return nullptr;
}
+ TypeIdSummary *getTypeIdSummary(StringRef TypeId) {
+ return const_cast<TypeIdSummary *>(
+ static_cast<const ModuleSummaryIndex *>(this)->getTypeIdSummary(
+ TypeId));
+ }
+
const std::map<std::string, TypeIdCompatibleVtableInfo> &
typeIdCompatibleVtableMap() const {
return TypeIdCompatibleVtableMap;
@@ -1411,7 +1419,7 @@ template <>
struct GraphTraits<ModuleSummaryIndex *> : public GraphTraits<ValueInfo> {
static NodeRef getEntryNode(ModuleSummaryIndex *I) {
std::unique_ptr<GlobalValueSummary> Root =
- make_unique<FunctionSummary>(I->calculateCallGraphRoot());
+ std::make_unique<FunctionSummary>(I->calculateCallGraphRoot());
GlobalValueSummaryInfo G(I->haveGVs());
G.SummaryList.push_back(std::move(Root));
static auto P =
diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h
index 26d9c43fabf1..4d4a67c75172 100644
--- a/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -220,7 +220,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
V.emplace(RefGUID, /*IsAnalysis=*/false);
Refs.push_back(ValueInfo(/*IsAnalysis=*/false, &*V.find(RefGUID)));
}
- Elem.SummaryList.push_back(llvm::make_unique<FunctionSummary>(
+ Elem.SummaryList.push_back(std::make_unique<FunctionSummary>(
GlobalValueSummary::GVFlags(
static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal, FSum.CanAutoHide),
diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h
index 8199c65ca8a0..037f5aed03ee 100644
--- a/include/llvm/IR/Operator.h
+++ b/include/llvm/IR/Operator.h
@@ -379,16 +379,25 @@ public:
return false;
switch (Opcode) {
+ case Instruction::FNeg:
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp
+ // should not be treated as a math op, but the other opcodes should.
+ // This would make things consistent with Select/PHI (FP value type
+ // determines whether they are math ops and, therefore, capable of
+ // having fast-math-flags).
case Instruction::FCmp:
return true;
- // non math FP Operators (no FMF)
- case Instruction::ExtractElement:
- case Instruction::ShuffleVector:
- case Instruction::InsertElement:
case Instruction::PHI:
- return false;
- default:
+ case Instruction::Select:
+ case Instruction::Call:
return V->getType()->isFPOrFPVectorTy();
+ default:
+ return false;
}
}
};
diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h
index 37fe2a5b01ad..1e1f4a92f844 100644
--- a/include/llvm/IR/PassManager.h
+++ b/include/llvm/IR/PassManager.h
@@ -45,6 +45,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassManagerInternal.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TypeName.h"
#include "llvm/Support/raw_ostream.h"
@@ -418,7 +419,7 @@ template <typename PassT, typename IRUnitT, typename AnalysisManagerT,
typename PassT::Result
getAnalysisResultUnpackTuple(AnalysisManagerT &AM, IRUnitT &IR,
std::tuple<ArgTs...> Args,
- llvm::index_sequence<Ns...>) {
+ std::index_sequence<Ns...>) {
(void)Args;
return AM.template getResult<PassT>(IR, std::get<Ns>(Args)...);
}
@@ -435,7 +436,7 @@ getAnalysisResult(AnalysisManager<IRUnitT, AnalysisArgTs...> &AM, IRUnitT &IR,
std::tuple<MainArgTs...> Args) {
return (getAnalysisResultUnpackTuple<
PassT, IRUnitT>)(AM, IR, Args,
- llvm::index_sequence_for<AnalysisArgTs...>{});
+ std::index_sequence_for<AnalysisArgTs...>{});
}
} // namespace detail
diff --git a/include/llvm/IR/PassManagerInternal.h b/include/llvm/IR/PassManagerInternal.h
index 58198bf67b11..c602c0b5cc20 100644
--- a/include/llvm/IR/PassManagerInternal.h
+++ b/include/llvm/IR/PassManagerInternal.h
@@ -289,7 +289,7 @@ struct AnalysisPassModel : AnalysisPassConcept<IRUnitT, PreservedAnalysesT,
AnalysisResultConcept<IRUnitT, PreservedAnalysesT, InvalidatorT>>
run(IRUnitT &IR, AnalysisManager<IRUnitT, ExtraArgTs...> &AM,
ExtraArgTs... ExtraArgs) override {
- return llvm::make_unique<ResultModelT>(
+ return std::make_unique<ResultModelT>(
Pass.run(IR, AM, std::forward<ExtraArgTs>(ExtraArgs)...));
}
diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h
index 0f03d7cc56b8..2851b24c05ae 100644
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@@ -88,6 +88,25 @@ inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
/// Match an arbitrary Constant and ignore it.
inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
+/// Match an arbitrary basic block value and ignore it.
+inline class_match<BasicBlock> m_BasicBlock() {
+ return class_match<BasicBlock>();
+}
+
+/// Inverting matcher
+template <typename Ty> struct match_unless {
+ Ty M;
+
+ match_unless(const Ty &Matcher) : M(Matcher) {}
+
+ template <typename ITy> bool match(ITy *V) { return !M.match(V); }
+};
+
+/// Match if the inner matcher does *NOT* match.
+template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) {
+ return match_unless<Ty>(M);
+}
+
/// Matching combinators
template <typename LTy, typename RTy> struct match_combine_or {
LTy L;
@@ -300,6 +319,15 @@ template <typename Predicate> struct cstfp_pred_ty : public Predicate {
//
///////////////////////////////////////////////////////////////////////////////
+struct is_any_apint {
+ bool isValue(const APInt &C) { return true; }
+};
+/// Match an integer or vector with any integral constant.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
+ return cst_pred_ty<is_any_apint>();
+}
+
struct is_all_ones {
bool isValue(const APInt &C) { return C.isAllOnesValue(); }
};
@@ -388,6 +416,18 @@ inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
return V;
}
+struct is_negated_power2 {
+ bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
+};
+/// Match a integer or vector negated power-of-2.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_negated_power2> m_NegatedPower2() {
+ return cst_pred_ty<is_negated_power2>();
+}
+inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
+ return V;
+}
+
struct is_power2_or_zero {
bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
};
@@ -528,6 +568,12 @@ inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
/// Match a ConstantFP, capturing the value if we match.
inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
+/// Match a basic block value, capturing it if we match.
+inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; }
+inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) {
+ return V;
+}
+
/// Match a specified Value*.
struct specificval_ty {
const Value *Val;
@@ -597,11 +643,11 @@ struct bind_const_intval_ty {
};
/// Match a specified integer value or vector of all elements of that
-// value.
+/// value.
struct specific_intval {
- uint64_t Val;
+ APInt Val;
- specific_intval(uint64_t V) : Val(V) {}
+ specific_intval(APInt V) : Val(std::move(V)) {}
template <typename ITy> bool match(ITy *V) {
const auto *CI = dyn_cast<ConstantInt>(V);
@@ -609,18 +655,50 @@ struct specific_intval {
if (const auto *C = dyn_cast<Constant>(V))
CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue());
- return CI && CI->getValue() == Val;
+ return CI && APInt::isSameValue(CI->getValue(), Val);
}
};
/// Match a specific integer value or vector with all elements equal to
/// the value.
-inline specific_intval m_SpecificInt(uint64_t V) { return specific_intval(V); }
+inline specific_intval m_SpecificInt(APInt V) {
+ return specific_intval(std::move(V));
+}
+
+inline specific_intval m_SpecificInt(uint64_t V) {
+ return m_SpecificInt(APInt(64, V));
+}
/// Match a ConstantInt and bind to its value. This does not match
/// ConstantInts wider than 64-bits.
inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
+/// Match a specified basic block value.
+struct specific_bbval {
+ BasicBlock *Val;
+
+ specific_bbval(BasicBlock *Val) : Val(Val) {}
+
+ template <typename ITy> bool match(ITy *V) {
+ const auto *BB = dyn_cast<BasicBlock>(V);
+ return BB && BB == Val;
+ }
+};
+
+/// Match a specific basic block value.
+inline specific_bbval m_SpecificBB(BasicBlock *BB) {
+ return specific_bbval(BB);
+}
+
+/// A commutative-friendly version of m_Specific().
+inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) {
+ return BB;
+}
+inline deferredval_ty<const BasicBlock>
+m_Deferred(const BasicBlock *const &BB) {
+ return BB;
+}
+
//===----------------------------------------------------------------------===//
// Matcher for any binary operator.
//
@@ -968,6 +1046,12 @@ struct is_idiv_op {
}
};
+struct is_irem_op {
+ bool isOpType(unsigned Opcode) {
+ return Opcode == Instruction::SRem || Opcode == Instruction::URem;
+ }
+};
+
/// Matches shift operations.
template <typename LHS, typename RHS>
inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
@@ -1003,6 +1087,13 @@ inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R);
}
+/// Matches integer remainder operations.
+template <typename LHS, typename RHS>
+inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L,
+ const RHS &R) {
+ return BinOpPred_match<LHS, RHS, is_irem_op>(L, R);
+}
+
//===----------------------------------------------------------------------===//
// Class that matches exact binary ops.
//
@@ -1210,6 +1301,12 @@ inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
return CastClass_match<OpTy, Instruction::Trunc>(Op);
}
+template <typename OpTy>
+inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy>
+m_TruncOrSelf(const OpTy &Op) {
+ return m_CombineOr(m_Trunc(Op), Op);
+}
+
/// Matches SExt.
template <typename OpTy>
inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
@@ -1223,12 +1320,33 @@ inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
}
template <typename OpTy>
+inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
+m_ZExtOrSelf(const OpTy &Op) {
+ return m_CombineOr(m_ZExt(Op), Op);
+}
+
+template <typename OpTy>
+inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy>
+m_SExtOrSelf(const OpTy &Op) {
+ return m_CombineOr(m_SExt(Op), Op);
+}
+
+template <typename OpTy>
inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
CastClass_match<OpTy, Instruction::SExt>>
m_ZExtOrSExt(const OpTy &Op) {
return m_CombineOr(m_ZExt(Op), m_SExt(Op));
}
+template <typename OpTy>
+inline match_combine_or<
+ match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
+ CastClass_match<OpTy, Instruction::SExt>>,
+ OpTy>
+m_ZExtOrSExtOrSelf(const OpTy &Op) {
+ return m_CombineOr(m_ZExtOrSExt(Op), Op);
+}
+
/// Matches UIToFP.
template <typename OpTy>
inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) {
@@ -1274,27 +1392,34 @@ struct br_match {
inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); }
-template <typename Cond_t> struct brc_match {
+template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
+struct brc_match {
Cond_t Cond;
- BasicBlock *&T, *&F;
+ TrueBlock_t T;
+ FalseBlock_t F;
- brc_match(const Cond_t &C, BasicBlock *&t, BasicBlock *&f)
+ brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f)
: Cond(C), T(t), F(f) {}
template <typename OpTy> bool match(OpTy *V) {
if (auto *BI = dyn_cast<BranchInst>(V))
- if (BI->isConditional() && Cond.match(BI->getCondition())) {
- T = BI->getSuccessor(0);
- F = BI->getSuccessor(1);
- return true;
- }
+ if (BI->isConditional() && Cond.match(BI->getCondition()))
+ return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1));
return false;
}
};
template <typename Cond_t>
-inline brc_match<Cond_t> m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
- return brc_match<Cond_t>(C, T, F);
+inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>
+m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
+ return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>(
+ C, m_BasicBlock(T), m_BasicBlock(F));
+}
+
+template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
+inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t>
+m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) {
+ return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F);
}
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/RemarkStreamer.h b/include/llvm/IR/RemarkStreamer.h
index f34cc660b2fb..2abf6f99cb08 100644
--- a/include/llvm/IR/RemarkStreamer.h
+++ b/include/llvm/IR/RemarkStreamer.h
@@ -25,12 +25,12 @@
namespace llvm {
/// Streamer for remarks.
class RemarkStreamer {
- /// The filename that the remark diagnostics are emitted to.
- const std::string Filename;
/// The regex used to filter remarks based on the passes that emit them.
Optional<Regex> PassFilter;
/// The object used to serialize the remarks to a specific format.
- std::unique_ptr<remarks::Serializer> Serializer;
+ std::unique_ptr<remarks::RemarkSerializer> RemarkSerializer;
+ /// The filename that the remark diagnostics are emitted to.
+ const Optional<std::string> Filename;
/// Convert diagnostics into remark objects.
/// The lifetime of the members of the result is bound to the lifetime of
@@ -38,14 +38,16 @@ class RemarkStreamer {
remarks::Remark toRemark(const DiagnosticInfoOptimizationBase &Diag);
public:
- RemarkStreamer(StringRef Filename,
- std::unique_ptr<remarks::Serializer> Serializer);
+ RemarkStreamer(std::unique_ptr<remarks::RemarkSerializer> RemarkSerializer,
+ Optional<StringRef> Filename = None);
/// Return the filename that the remark diagnostics are emitted to.
- StringRef getFilename() const { return Filename; }
+ Optional<StringRef> getFilename() const {
+ return Filename ? Optional<StringRef>(*Filename) : None;
+ }
/// Return stream that the remark diagnostics are emitted to.
- raw_ostream &getStream() { return Serializer->OS; }
+ raw_ostream &getStream() { return RemarkSerializer->OS; }
/// Return the serializer used for this stream.
- remarks::Serializer &getSerializer() { return *Serializer; }
+ remarks::RemarkSerializer &getSerializer() { return *RemarkSerializer; }
/// Set a pass filter based on a regex \p Filter.
/// Returns an error if the regex is invalid.
Error setFilter(StringRef Filter);
@@ -84,13 +86,21 @@ struct RemarkSetupFormatError : RemarkSetupErrorInfo<RemarkSetupFormatError> {
using RemarkSetupErrorInfo<RemarkSetupFormatError>::RemarkSetupErrorInfo;
};
-/// Setup optimization remarks.
+/// Setup optimization remarks that output to a file.
Expected<std::unique_ptr<ToolOutputFile>>
setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
StringRef RemarksPasses, StringRef RemarksFormat,
bool RemarksWithHotness,
unsigned RemarksHotnessThreshold = 0);
+/// Setup optimization remarks that output directly to a raw_ostream.
+/// \p OS is managed by the caller and should be open for writing as long as \p
+/// Context is streaming remarks to it.
+Error setupOptimizationRemarks(LLVMContext &Context, raw_ostream &OS,
+ StringRef RemarksPasses, StringRef RemarksFormat,
+ bool RemarksWithHotness,
+ unsigned RemarksHotnessThreshold = 0);
+
} // end namespace llvm
#endif // LLVM_IR_REMARKSTREAMER_H
diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h
index f2aa49030aaa..d0961dac833d 100644
--- a/include/llvm/IR/Type.h
+++ b/include/llvm/IR/Type.h
@@ -21,6 +21,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TypeSize.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -281,12 +282,15 @@ public:
/// This will return zero if the type does not have a size or is not a
/// primitive type.
///
+ /// If this is a scalable vector type, the scalable property will be set and
+ /// the runtime size will be a positive integer multiple of the base size.
+ ///
/// Note that this may not reflect the size of memory allocated for an
/// instance of the type or the number of bytes that are written when an
/// instance of the type is stored to memory. The DataLayout class provides
/// additional query functions to provide this information.
///
- unsigned getPrimitiveSizeInBits() const LLVM_READONLY;
+ TypeSize getPrimitiveSizeInBits() const LLVM_READONLY;
/// If this is a vector type, return the getPrimitiveSizeInBits value for the
/// element type. Otherwise return the getPrimitiveSizeInBits value for this
@@ -368,6 +372,7 @@ public:
inline bool getVectorIsScalable() const;
inline unsigned getVectorNumElements() const;
+ inline ElementCount getVectorElementCount() const;
Type *getVectorElementType() const {
assert(getTypeID() == VectorTyID);
return ContainedTys[0];
@@ -378,6 +383,14 @@ public:
return ContainedTys[0];
}
+ /// Given an integer or vector type, change the lane bitwidth to NewBitwidth,
+ /// whilst keeping the old number of lanes.
+ inline Type *getWithNewBitWidth(unsigned NewBitWidth) const;
+
+ /// Given scalar/vector integer type, returns a type with elements twice as
+ /// wide as in the original type. For vectors, preserves element count.
+ inline Type *getExtendedType() const;
+
/// Get the address space of this pointer or pointer vector type.
inline unsigned getPointerAddressSpace() const;
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
index 19d87c5c621d..850ee72a0387 100644
--- a/include/llvm/IR/User.h
+++ b/include/llvm/IR/User.h
@@ -111,7 +111,7 @@ public:
#endif
}
/// Placement delete - required by std, called if the ctor throws.
- void operator delete(void *Usr, unsigned, bool) {
+ void operator delete(void *Usr, unsigned, unsigned) {
// Note: If a subclass manipulates the information which is required to calculate the
// Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has
// to restore the changed information to the original value, since the dtor of that class
diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h
index b2d8e7ac4741..f2c4b3b3f203 100644
--- a/include/llvm/IR/Value.h
+++ b/include/llvm/IR/Value.h
@@ -14,8 +14,10 @@
#define LLVM_IR_VALUE_H
#include "llvm-c/Types.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Use.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Casting.h"
#include <cassert>
@@ -292,10 +294,29 @@ public:
/// "V" instead of "this". This function skips metadata entries in the list.
void replaceNonMetadataUsesWith(Value *V);
+ /// Go through the uses list for this definition and make each use point
+ /// to "V" if the callback ShouldReplace returns true for the given Use.
+ /// Unlike replaceAllUsesWith() this function does not support basic block
+ /// values or constant users.
+ void replaceUsesWithIf(Value *New,
+ llvm::function_ref<bool(Use &U)> ShouldReplace) {
+ assert(New && "Value::replaceUsesWithIf(<null>) is invalid!");
+ assert(New->getType() == getType() &&
+ "replaceUses of value with new value of different type!");
+
+ for (use_iterator UI = use_begin(), E = use_end(); UI != E;) {
+ Use &U = *UI;
+ ++UI;
+ if (!ShouldReplace(U))
+ continue;
+ U.set(New);
+ }
+ }
+
/// replaceUsesOutsideBlock - Go through the uses list for this definition and
/// make each use point to "V" instead of "this" when the use is outside the
/// block. 'This's use list is expected to have at least one element.
- /// Unlike replaceAllUsesWith this function does not support basic block
+ /// Unlike replaceAllUsesWith() this function does not support basic block
/// values or constant users.
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB);
@@ -493,17 +514,27 @@ public:
/// swifterror attribute.
bool isSwiftError() const;
- /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
+ /// Strip off pointer casts, all-zero GEPs and address space casts.
///
/// Returns the original uncasted value. If this is called on a non-pointer
/// value, it returns 'this'.
const Value *stripPointerCasts() const;
Value *stripPointerCasts() {
return const_cast<Value *>(
- static_cast<const Value *>(this)->stripPointerCasts());
+ static_cast<const Value *>(this)->stripPointerCasts());
}
- /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases
+ /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
+ ///
+ /// Returns the original uncasted value. If this is called on a non-pointer
+ /// value, it returns 'this'.
+ const Value *stripPointerCastsAndAliases() const;
+ Value *stripPointerCastsAndAliases() {
+ return const_cast<Value *>(
+ static_cast<const Value *>(this)->stripPointerCastsAndAliases());
+ }
+
+ /// Strip off pointer casts, all-zero GEPs and address space casts
/// but ensures the representation of the result stays the same.
///
/// Returns the original uncasted value with the same representation. If this
@@ -514,26 +545,15 @@ public:
->stripPointerCastsSameRepresentation());
}
- /// Strip off pointer casts, all-zero GEPs, aliases and invariant group
- /// info.
+ /// Strip off pointer casts, all-zero GEPs and invariant group info.
///
/// Returns the original uncasted value. If this is called on a non-pointer
/// value, it returns 'this'. This function should be used only in
/// Alias analysis.
const Value *stripPointerCastsAndInvariantGroups() const;
Value *stripPointerCastsAndInvariantGroups() {
- return const_cast<Value *>(
- static_cast<const Value *>(this)->stripPointerCastsAndInvariantGroups());
- }
-
- /// Strip off pointer casts and all-zero GEPs.
- ///
- /// Returns the original uncasted value. If this is called on a non-pointer
- /// value, it returns 'this'.
- const Value *stripPointerCastsNoFollowAliases() const;
- Value *stripPointerCastsNoFollowAliases() {
- return const_cast<Value *>(
- static_cast<const Value *>(this)->stripPointerCastsNoFollowAliases());
+ return const_cast<Value *>(static_cast<const Value *>(this)
+ ->stripPointerCastsAndInvariantGroups());
}
/// Strip off pointer casts and all-constant inbounds GEPs.
@@ -612,7 +632,7 @@ public:
///
/// Returns an alignment which is either specified explicitly, e.g. via
/// align attribute of a function argument, or guaranteed by DataLayout.
- unsigned getPointerAlignment(const DataLayout &DL) const;
+ MaybeAlign getPointerAlignment(const DataLayout &DL) const;
/// Translate PHI node to its predecessor from the given basic block.
///
diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h
index 6a79b1d387f3..fb5440d5efe8 100644
--- a/include/llvm/IR/ValueMap.h
+++ b/include/llvm/IR/ValueMap.h
@@ -33,11 +33,11 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Mutex.h"
-#include "llvm/Support/UniqueLock.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <iterator>
+#include <mutex>
#include <type_traits>
#include <utility>
@@ -93,7 +93,6 @@ class ValueMap {
MapT Map;
Optional<MDMapT> MDMap;
ExtraData Data;
- bool MayMapMetadata = true;
public:
using key_type = KeyT;
@@ -120,10 +119,6 @@ public:
}
Optional<MDMapT> &getMDMap() { return MDMap; }
- bool mayMapMetadata() const { return MayMapMetadata; }
- void enableMapMetadata() { MayMapMetadata = true; }
- void disableMapMetadata() { MayMapMetadata = false; }
-
/// Get the mapped metadata, if it's in the map.
Optional<Metadata *> getMappedMD(const Metadata *MD) const {
if (!MDMap)
@@ -266,9 +261,9 @@ public:
// Make a copy that won't get changed even when *this is destroyed.
ValueMapCallbackVH Copy(*this);
typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data);
- unique_lock<typename Config::mutex_type> Guard;
+ std::unique_lock<typename Config::mutex_type> Guard;
if (M)
- Guard = unique_lock<typename Config::mutex_type>(*M);
+ Guard = std::unique_lock<typename Config::mutex_type>(*M);
Config::onDelete(Copy.Map->Data, Copy.Unwrap()); // May destroy *this.
Copy.Map->Map.erase(Copy); // Definitely destroys *this.
}
@@ -279,9 +274,9 @@ public:
// Make a copy that won't get changed even when *this is destroyed.
ValueMapCallbackVH Copy(*this);
typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data);
- unique_lock<typename Config::mutex_type> Guard;
+ std::unique_lock<typename Config::mutex_type> Guard;
if (M)
- Guard = unique_lock<typename Config::mutex_type>(*M);
+ Guard = std::unique_lock<typename Config::mutex_type>(*M);
KeyT typed_new_key = cast<KeySansPointerT>(new_key);
// Can destroy *this:
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 164d0be2855a..49f69340c828 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -132,6 +132,7 @@ void initializeDwarfEHPreparePass(PassRegistry&);
void initializeEarlyCSELegacyPassPass(PassRegistry&);
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry&);
void initializeEarlyIfConverterPass(PassRegistry&);
+void initializeEarlyIfPredicatorPass(PassRegistry &);
void initializeEarlyMachineLICMPass(PassRegistry&);
void initializeEarlyTailDuplicatePass(PassRegistry&);
void initializeEdgeBundlesPass(PassRegistry&);
@@ -202,6 +203,7 @@ void initializeLegacyLICMPassPass(PassRegistry&);
void initializeLegacyLoopSinkPassPass(PassRegistry&);
void initializeLegalizerPass(PassRegistry&);
void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &);
+void initializeGISelKnownBitsAnalysisPass(PassRegistry &);
void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&);
void initializeLintPass(PassRegistry&);
void initializeLiveDebugValuesPass(PassRegistry&);
@@ -241,6 +243,7 @@ void initializeLoopVectorizePass(PassRegistry&);
void initializeLoopVersioningLICMPass(PassRegistry&);
void initializeLoopVersioningPassPass(PassRegistry&);
void initializeLowerAtomicLegacyPassPass(PassRegistry&);
+void initializeLowerConstantIntrinsicsPass(PassRegistry&);
void initializeLowerEmuTLSPass(PassRegistry&);
void initializeLowerExpectIntrinsicPass(PassRegistry&);
void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&);
@@ -250,6 +253,7 @@ void initializeLowerInvokeLegacyPassPass(PassRegistry&);
void initializeLowerSwitchPass(PassRegistry&);
void initializeLowerTypeTestsPass(PassRegistry&);
void initializeMIRCanonicalizerPass(PassRegistry &);
+void initializeMIRNamerPass(PassRegistry &);
void initializeMIRPrintingPassPass(PassRegistry&);
void initializeMachineBlockFrequencyInfoPass(PassRegistry&);
void initializeMachineBlockPlacementPass(PassRegistry&);
@@ -263,7 +267,7 @@ void initializeMachineDominatorTreePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
void initializeMachineLICMPass(PassRegistry&);
void initializeMachineLoopInfoPass(PassRegistry&);
-void initializeMachineModuleInfoPass(PassRegistry&);
+void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);
void initializeMachineOptimizationRemarkEmitterPassPass(PassRegistry&);
void initializeMachineOutlinerPass(PassRegistry&);
void initializeMachinePipelinerPass(PassRegistry&);
@@ -286,7 +290,9 @@ void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&);
void initializeMetaRenamerPass(PassRegistry&);
void initializeModuleDebugInfoPrinterPass(PassRegistry&);
void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&);
+void initializeModuloScheduleTestPass(PassRegistry&);
void initializeMustExecutePrinterPass(PassRegistry&);
+void initializeMustBeExecutedContextPrinterPass(PassRegistry&);
void initializeNameAnonGlobalLegacyPassPass(PassRegistry&);
void initializeNaryReassociateLegacyPassPass(PassRegistry&);
void initializeNewGVNLegacyPassPass(PassRegistry&);
@@ -360,7 +366,7 @@ void initializeSROALegacyPassPass(PassRegistry&);
void initializeSafeStackLegacyPassPass(PassRegistry&);
void initializeSafepointIRVerifierPass(PassRegistry&);
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&);
-void initializeSanitizerCoverageModulePass(PassRegistry&);
+void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &);
void initializeScalarEvolutionWrapperPassPass(PassRegistry&);
void initializeScalarizeMaskedMemIntrinPass(PassRegistry&);
void initializeScalarizerLegacyPassPass(PassRegistry&);
diff --git a/include/llvm/LTO/Config.h b/include/llvm/LTO/Config.h
index fb107e3fbe02..daa6585b1113 100644
--- a/include/llvm/LTO/Config.h
+++ b/include/llvm/LTO/Config.h
@@ -226,7 +226,7 @@ struct LTOLLVMContext : LLVMContext {
setDiscardValueNames(C.ShouldDiscardValueNames);
enableDebugTypeODRUniquing();
setDiagnosticHandler(
- llvm::make_unique<LTOLLVMDiagnosticHandler>(&DiagHandler), true);
+ std::make_unique<LTOLLVMDiagnosticHandler>(&DiagHandler), true);
}
DiagnosticHandlerFunction DiagHandler;
};
diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h
index ca0a8b64523a..0a1e3e1d0e42 100644
--- a/include/llvm/LTO/LTO.h
+++ b/include/llvm/LTO/LTO.h
@@ -59,7 +59,9 @@ void thinLTOResolvePrevailingInIndex(
/// must apply the changes to the Module via thinLTOInternalizeModule.
void thinLTOInternalizeAndPromoteInIndex(
ModuleSummaryIndex &Index,
- function_ref<bool(StringRef, GlobalValue::GUID)> isExported);
+ function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing);
/// Computes a unique hash for the Module considering the current list of
/// export/import and other global analysis results.
@@ -296,6 +298,10 @@ public:
/// Cache) for each task identifier.
Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr);
+ /// Static method that returns a list of libcall symbols that can be generated
+ /// by LTO but might not be visible from bitcode symbol table.
+ static ArrayRef<const char*> getRuntimeLibcallSymbols();
+
private:
Config Conf;
@@ -303,7 +309,7 @@ private:
RegularLTOState(unsigned ParallelCodeGenParallelismLevel, Config &Conf);
struct CommonResolution {
uint64_t Size = 0;
- unsigned Align = 0;
+ MaybeAlign Align;
/// Record if at least one instance of the common was marked as prevailing
bool Prevailing = false;
};
diff --git a/include/llvm/LTO/legacy/LTOCodeGenerator.h b/include/llvm/LTO/legacy/LTOCodeGenerator.h
index d3cb4c8b79a0..8718df4b88e6 100644
--- a/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -113,7 +113,7 @@ struct LTOCodeGenerator {
ShouldRestoreGlobalsLinkage = Value;
}
- void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols[Sym] = 1; }
+ void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols.insert(Sym); }
/// Pass options to the driver and optimization passes.
///
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 675d179eb22a..ac88165845d3 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -140,6 +140,7 @@ namespace {
(void) llvm::createLoopVersioningLICMPass();
(void) llvm::createLoopIdiomPass();
(void) llvm::createLoopRotatePass();
+ (void) llvm::createLowerConstantIntrinsicsPass();
(void) llvm::createLowerExpectIntrinsicPass();
(void) llvm::createLowerInvokePass();
(void) llvm::createLowerSwitchPass();
@@ -219,6 +220,7 @@ namespace {
(void) llvm::createStraightLineStrengthReducePass();
(void) llvm::createMemDerefPrinter();
(void) llvm::createMustExecutePrinter();
+ (void) llvm::createMustBeExecutedContextPrinter();
(void) llvm::createFloat2IntPass();
(void) llvm::createEliminateAvailableExternallyPass();
(void) llvm::createScalarizeMaskedMemIntrinPass();
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 971e9354da8c..3261c483e0d8 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -165,6 +165,10 @@ protected:
/// instead.
bool UseDataRegionDirectives = false;
+ /// True if .align is to be used for alignment. Only power-of-two
+ /// alignment is supported.
+ bool UseDotAlignForAlignment = false;
+
//===--- Data Emission Directives -------------------------------------===//
/// This should be set to the directive used to get some number of zero bytes
@@ -313,6 +317,10 @@ protected:
/// Defaults to false.
bool HasLinkOnceDirective = false;
+ /// True if we have a .lglobl directive, which is used to emit the information
+ /// of a static symbol into the symbol table. Defaults to false.
+ bool HasDotLGloblDirective = false;
+
/// This attribute, if not MCSA_Invalid, is used to declare a symbol as having
/// hidden visibility. Defaults to MCSA_Hidden.
MCSymbolAttr HiddenVisibilityAttr = MCSA_Hidden;
@@ -388,6 +396,9 @@ protected:
// %hi(), and similar unary operators.
bool HasMipsExpressions = false;
+ // If true, emit function descriptor symbol on AIX.
+ bool NeedsFunctionDescriptors = false;
+
public:
explicit MCAsmInfo();
virtual ~MCAsmInfo();
@@ -520,6 +531,10 @@ public:
return UseDataRegionDirectives;
}
+ bool useDotAlignForAlignment() const {
+ return UseDotAlignForAlignment;
+ }
+
const char *getZeroDirective() const { return ZeroDirective; }
const char *getAsciiDirective() const { return AsciiDirective; }
const char *getAscizDirective() const { return AscizDirective; }
@@ -557,6 +572,8 @@ public:
bool hasLinkOnceDirective() const { return HasLinkOnceDirective; }
+ bool hasDotLGloblDirective() const { return HasDotLGloblDirective; }
+
MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr; }
MCSymbolAttr getHiddenDeclarationVisibilityAttr() const {
@@ -639,6 +656,7 @@ public:
bool canRelaxRelocations() const { return RelaxELFRelocations; }
void setRelaxELFRelocations(bool V) { RelaxELFRelocations = V; }
bool hasMipsExpressions() const { return HasMipsExpressions; }
+ bool needsFunctionDescriptors() const { return NeedsFunctionDescriptors; }
};
} // end namespace llvm
diff --git a/include/llvm/MC/MCAsmInfoXCOFF.h b/include/llvm/MC/MCAsmInfoXCOFF.h
index 2a72ba7398a7..4a3bacc954e0 100644
--- a/include/llvm/MC/MCAsmInfoXCOFF.h
+++ b/include/llvm/MC/MCAsmInfoXCOFF.h
@@ -18,6 +18,11 @@ class MCAsmInfoXCOFF : public MCAsmInfo {
protected:
MCAsmInfoXCOFF();
+
+public:
+ // Return true only when the identifier Name does not need quotes to be
+ // syntactically correct for XCOFF.
+ bool isValidUnquotedName(StringRef Name) const override;
};
} // end namespace llvm
diff --git a/include/llvm/MC/MCAsmMacro.h b/include/llvm/MC/MCAsmMacro.h
index 364d3b5f3666..7eecce0faf64 100644
--- a/include/llvm/MC/MCAsmMacro.h
+++ b/include/llvm/MC/MCAsmMacro.h
@@ -124,7 +124,6 @@ public:
}
void dump(raw_ostream &OS) const;
- void dump() const { dump(dbgs()); }
};
struct MCAsmMacroParameter {
@@ -133,10 +132,10 @@ struct MCAsmMacroParameter {
bool Required = false;
bool Vararg = false;
- MCAsmMacroParameter() = default;
-
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() const { dump(dbgs()); }
- void dump(raw_ostream &OS) const;
+ LLVM_DUMP_METHOD void dump(raw_ostream &OS) const;
+#endif
};
typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
@@ -149,8 +148,10 @@ public:
MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
: Name(N), Body(B), Parameters(std::move(P)) {}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() const { dump(dbgs()); }
- void dump(raw_ostream &OS) const;
+ LLVM_DUMP_METHOD void dump(raw_ostream &OS) const;
+#endif
};
} // namespace llvm
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 5c2124cc0d15..b925f3218883 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -22,6 +22,7 @@
#include "llvm/MC/MCAsmMacro.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
@@ -112,6 +113,9 @@ namespace llvm {
/// number of section symbols with the same name).
StringMap<bool, BumpPtrAllocator &> UsedNames;
+ /// Keeps track of labels that are used in inline assembly.
+ SymbolTable InlineAsmUsedLabelNames;
+
/// The next ID to dole out to an unnamed assembler temporary symbol with
/// a given prefix.
StringMap<unsigned> NextID;
@@ -275,6 +279,8 @@ namespace llvm {
/// Do automatic reset in destructor
bool AutoReset;
+ MCTargetOptions const *TargetOptions;
+
bool HadError = false;
MCSymbol *createSymbolImpl(const StringMapEntry<bool> *Name,
@@ -298,7 +304,9 @@ namespace llvm {
public:
explicit MCContext(const MCAsmInfo *MAI, const MCRegisterInfo *MRI,
const MCObjectFileInfo *MOFI,
- const SourceMgr *Mgr = nullptr, bool DoAutoReset = true);
+ const SourceMgr *Mgr = nullptr,
+ MCTargetOptions const *TargetOpts = nullptr,
+ bool DoAutoReset = true);
MCContext(const MCContext &) = delete;
MCContext &operator=(const MCContext &) = delete;
~MCContext();
@@ -377,6 +385,16 @@ namespace llvm {
/// APIs.
const SymbolTable &getSymbols() const { return Symbols; }
+ /// isInlineAsmLabel - Return true if the name is a label referenced in
+ /// inline assembly.
+ MCSymbol *getInlineAsmLabel(StringRef Name) const {
+ return InlineAsmUsedLabelNames.lookup(Name);
+ }
+
+ /// registerInlineAsmLabel - Records that the name is a label referenced in
+ /// inline assembly.
+ void registerInlineAsmLabel(MCSymbol *Sym);
+
/// @}
/// \name Section Management
@@ -490,6 +508,8 @@ namespace llvm {
MCSectionXCOFF *getXCOFFSection(StringRef Section,
XCOFF::StorageMappingClass MappingClass,
+ XCOFF::SymbolType CSectType,
+ XCOFF::StorageClass StorageClass,
SectionKind K,
const char *BeginSymName = nullptr);
@@ -659,6 +679,7 @@ namespace llvm {
bool hadError() { return HadError; }
void reportError(SMLoc L, const Twine &Msg);
+ void reportWarning(SMLoc L, const Twine &Msg);
// Unrecoverable error has occurred. Display the best diagnostic we can
// and bail via exit(1). For now, most MC backend errors are unrecoverable.
// FIXME: We should really do something about that.
diff --git a/include/llvm/MC/MCDirectives.h b/include/llvm/MC/MCDirectives.h
index 4029264c2026..ea79e68674e5 100644
--- a/include/llvm/MC/MCDirectives.h
+++ b/include/llvm/MC/MCDirectives.h
@@ -28,6 +28,7 @@ enum MCSymbolAttr {
MCSA_ELF_TypeNoType, ///< .type _foo, STT_NOTYPE # aka @notype
MCSA_ELF_TypeGnuUniqueObject, /// .type _foo, @gnu_unique_object
MCSA_Global, ///< .globl
+ MCSA_LGlobal, ///< .lglobl (XCOFF)
MCSA_Hidden, ///< .hidden (ELF)
MCSA_IndirectSymbol, ///< .indirect_symbol (MachO)
MCSA_Internal, ///< .internal (ELF)
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 1a37aafd0654..a33b4b31bb06 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -629,7 +629,8 @@ public:
static void Emit(MCObjectStreamer &streamer, MCAsmBackend *MAB, bool isEH);
static void EmitAdvanceLoc(MCObjectStreamer &Streamer, uint64_t AddrDelta);
static void EncodeAdvanceLoc(MCContext &Context, uint64_t AddrDelta,
- raw_ostream &OS);
+ raw_ostream &OS, uint32_t *Offset = nullptr,
+ uint32_t *Size = nullptr);
};
} // end namespace llvm
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index fb23c0114c76..eb2786501f84 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -48,10 +48,6 @@ private:
bool evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
- const SectionAddrMap *Addrs) const;
-
- bool evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
- const MCAsmLayout *Layout,
const SectionAddrMap *Addrs, bool InSet) const;
protected:
@@ -136,7 +132,7 @@ class MCConstantExpr : public MCExpr {
int64_t Value;
bool PrintInHex = false;
- MCConstantExpr(int64_t Value)
+ explicit MCConstantExpr(int64_t Value)
: MCExpr(MCExpr::Constant, SMLoc()), Value(Value) {}
MCConstantExpr(int64_t Value, bool PrintInHex)
@@ -239,6 +235,8 @@ public:
VK_PPC_TOC_LO, // symbol@toc@l
VK_PPC_TOC_HI, // symbol@toc@h
VK_PPC_TOC_HA, // symbol@toc@ha
+ VK_PPC_U, // symbol@u
+ VK_PPC_L, // symbol@l
VK_PPC_DTPMOD, // symbol@dtpmod
VK_PPC_TPREL_LO, // symbol@tprel@l
VK_PPC_TPREL_HI, // symbol@tprel@h
diff --git a/include/llvm/MC/MCFixup.h b/include/llvm/MC/MCFixup.h
index accffb7f2247..29e321e2354c 100644
--- a/include/llvm/MC/MCFixup.h
+++ b/include/llvm/MC/MCFixup.h
@@ -20,35 +20,38 @@ class MCExpr;
/// Extensible enumeration to represent the type of a fixup.
enum MCFixupKind {
- FK_NONE = 0, ///< A no-op fixup.
- FK_Data_1, ///< A one-byte fixup.
- FK_Data_2, ///< A two-byte fixup.
- FK_Data_4, ///< A four-byte fixup.
- FK_Data_8, ///< A eight-byte fixup.
- FK_PCRel_1, ///< A one-byte pc relative fixup.
- FK_PCRel_2, ///< A two-byte pc relative fixup.
- FK_PCRel_4, ///< A four-byte pc relative fixup.
- FK_PCRel_8, ///< A eight-byte pc relative fixup.
- FK_GPRel_1, ///< A one-byte gp relative fixup.
- FK_GPRel_2, ///< A two-byte gp relative fixup.
- FK_GPRel_4, ///< A four-byte gp relative fixup.
- FK_GPRel_8, ///< A eight-byte gp relative fixup.
- FK_DTPRel_4, ///< A four-byte dtp relative fixup.
- FK_DTPRel_8, ///< A eight-byte dtp relative fixup.
- FK_TPRel_4, ///< A four-byte tp relative fixup.
- FK_TPRel_8, ///< A eight-byte tp relative fixup.
- FK_SecRel_1, ///< A one-byte section relative fixup.
- FK_SecRel_2, ///< A two-byte section relative fixup.
- FK_SecRel_4, ///< A four-byte section relative fixup.
- FK_SecRel_8, ///< A eight-byte section relative fixup.
- FK_Data_Add_1, ///< A one-byte add fixup.
- FK_Data_Add_2, ///< A two-byte add fixup.
- FK_Data_Add_4, ///< A four-byte add fixup.
- FK_Data_Add_8, ///< A eight-byte add fixup.
- FK_Data_Sub_1, ///< A one-byte sub fixup.
- FK_Data_Sub_2, ///< A two-byte sub fixup.
- FK_Data_Sub_4, ///< A four-byte sub fixup.
- FK_Data_Sub_8, ///< A eight-byte sub fixup.
+ FK_NONE = 0, ///< A no-op fixup.
+ FK_Data_1, ///< A one-byte fixup.
+ FK_Data_2, ///< A two-byte fixup.
+ FK_Data_4, ///< A four-byte fixup.
+ FK_Data_8, ///< A eight-byte fixup.
+ FK_Data_6b, ///< A six-bits fixup.
+ FK_PCRel_1, ///< A one-byte pc relative fixup.
+ FK_PCRel_2, ///< A two-byte pc relative fixup.
+ FK_PCRel_4, ///< A four-byte pc relative fixup.
+ FK_PCRel_8, ///< A eight-byte pc relative fixup.
+ FK_GPRel_1, ///< A one-byte gp relative fixup.
+ FK_GPRel_2, ///< A two-byte gp relative fixup.
+ FK_GPRel_4, ///< A four-byte gp relative fixup.
+ FK_GPRel_8, ///< A eight-byte gp relative fixup.
+ FK_DTPRel_4, ///< A four-byte dtp relative fixup.
+ FK_DTPRel_8, ///< A eight-byte dtp relative fixup.
+ FK_TPRel_4, ///< A four-byte tp relative fixup.
+ FK_TPRel_8, ///< A eight-byte tp relative fixup.
+ FK_SecRel_1, ///< A one-byte section relative fixup.
+ FK_SecRel_2, ///< A two-byte section relative fixup.
+ FK_SecRel_4, ///< A four-byte section relative fixup.
+ FK_SecRel_8, ///< A eight-byte section relative fixup.
+ FK_Data_Add_1, ///< A one-byte add fixup.
+ FK_Data_Add_2, ///< A two-byte add fixup.
+ FK_Data_Add_4, ///< A four-byte add fixup.
+ FK_Data_Add_8, ///< A eight-byte add fixup.
+ FK_Data_Add_6b, ///< A six-bits add fixup.
+ FK_Data_Sub_1, ///< A one-byte sub fixup.
+ FK_Data_Sub_2, ///< A two-byte sub fixup.
+ FK_Data_Sub_4, ///< A four-byte sub fixup.
+ FK_Data_Sub_8, ///< A eight-byte sub fixup.
+ FK_Data_Sub_6b, ///< A six-bits sub fixup.
FirstTargetFixupKind = 128,
@@ -75,25 +78,25 @@ class MCFixup {
/// The value to put into the fixup location. The exact interpretation of the
/// expression is target dependent, usually it will be one of the operands to
/// an instruction or an assembler directive.
- const MCExpr *Value;
+ const MCExpr *Value = nullptr;
/// The byte index of start of the relocation inside the MCFragment.
- uint32_t Offset;
+ uint32_t Offset = 0;
/// The target dependent kind of fixup item this is. The kind is used to
/// determine how the operand value should be encoded into the instruction.
- unsigned Kind;
+ MCFixupKind Kind = FK_NONE;
/// The source location which gave rise to the fixup, if any.
SMLoc Loc;
public:
static MCFixup create(uint32_t Offset, const MCExpr *Value,
MCFixupKind Kind, SMLoc Loc = SMLoc()) {
- assert(unsigned(Kind) < MaxTargetFixupKind && "Kind out of range!");
+ assert(Kind < MaxTargetFixupKind && "Kind out of range!");
MCFixup FI;
FI.Value = Value;
FI.Offset = Offset;
- FI.Kind = unsigned(Kind);
+ FI.Kind = Kind;
FI.Loc = Loc;
return FI;
}
@@ -104,7 +107,7 @@ public:
MCFixup FI;
FI.Value = Fixup.getValue();
FI.Offset = Fixup.getOffset();
- FI.Kind = (unsigned)getAddKindForKind(Fixup.getKind());
+ FI.Kind = getAddKindForKind(Fixup.getKind());
FI.Loc = Fixup.getLoc();
return FI;
}
@@ -115,12 +118,14 @@ public:
MCFixup FI;
FI.Value = Fixup.getValue();
FI.Offset = Fixup.getOffset();
- FI.Kind = (unsigned)getSubKindForKind(Fixup.getKind());
+ FI.Kind = getSubKindForKind(Fixup.getKind());
FI.Loc = Fixup.getLoc();
return FI;
}
- MCFixupKind getKind() const { return MCFixupKind(Kind); }
+ MCFixupKind getKind() const { return Kind; }
+
+ unsigned getTargetKind() const { return Kind; }
uint32_t getOffset() const { return Offset; }
void setOffset(uint32_t Value) { Offset = Value; }
@@ -129,37 +134,63 @@ public:
/// Return the generic fixup kind for a value with the given size. It
/// is an error to pass an unsupported size.
- static MCFixupKind getKindForSize(unsigned Size, bool isPCRel) {
+ static MCFixupKind getKindForSize(unsigned Size, bool IsPCRel) {
switch (Size) {
default: llvm_unreachable("Invalid generic fixup size!");
- case 1: return isPCRel ? FK_PCRel_1 : FK_Data_1;
- case 2: return isPCRel ? FK_PCRel_2 : FK_Data_2;
- case 4: return isPCRel ? FK_PCRel_4 : FK_Data_4;
- case 8: return isPCRel ? FK_PCRel_8 : FK_Data_8;
+ case 1:
+ return IsPCRel ? FK_PCRel_1 : FK_Data_1;
+ case 2:
+ return IsPCRel ? FK_PCRel_2 : FK_Data_2;
+ case 4:
+ return IsPCRel ? FK_PCRel_4 : FK_Data_4;
+ case 8:
+ return IsPCRel ? FK_PCRel_8 : FK_Data_8;
+ }
+ }
+
+ /// Return the generic fixup kind for a value with the given size in bits.
+ /// It is an error to pass an unsupported size.
+ static MCFixupKind getKindForSizeInBits(unsigned Size, bool IsPCRel) {
+ switch (Size) {
+ default:
+ llvm_unreachable("Invalid generic fixup size!");
+ case 6:
+ assert(!IsPCRel && "Invalid pc-relative fixup size!");
+ return FK_Data_6b;
+ case 8:
+ return IsPCRel ? FK_PCRel_1 : FK_Data_1;
+ case 16:
+ return IsPCRel ? FK_PCRel_2 : FK_Data_2;
+ case 32:
+ return IsPCRel ? FK_PCRel_4 : FK_Data_4;
+ case 64:
+ return IsPCRel ? FK_PCRel_8 : FK_Data_8;
}
}
/// Return the generic fixup kind for an addition with a given size. It
/// is an error to pass an unsupported size.
- static MCFixupKind getAddKindForKind(unsigned Kind) {
+ static MCFixupKind getAddKindForKind(MCFixupKind Kind) {
switch (Kind) {
default: llvm_unreachable("Unknown type to convert!");
case FK_Data_1: return FK_Data_Add_1;
case FK_Data_2: return FK_Data_Add_2;
case FK_Data_4: return FK_Data_Add_4;
case FK_Data_8: return FK_Data_Add_8;
+ case FK_Data_6b: return FK_Data_Add_6b;
}
}
/// Return the generic fixup kind for an subtraction with a given size. It
/// is an error to pass an unsupported size.
- static MCFixupKind getSubKindForKind(unsigned Kind) {
+ static MCFixupKind getSubKindForKind(MCFixupKind Kind) {
switch (Kind) {
default: llvm_unreachable("Unknown type to convert!");
case FK_Data_1: return FK_Data_Sub_1;
case FK_Data_2: return FK_Data_Sub_2;
case FK_Data_4: return FK_Data_Sub_4;
case FK_Data_8: return FK_Data_Sub_8;
+ case FK_Data_6b: return FK_Data_Sub_6b;
}
}
diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h
index aadf2ce725ea..b0def566c46a 100644
--- a/include/llvm/MC/MCFragment.h
+++ b/include/llvm/MC/MCFragment.h
@@ -149,6 +149,7 @@ public:
case MCFragment::FT_CompactEncodedInst:
case MCFragment::FT_Data:
case MCFragment::FT_Dwarf:
+ case MCFragment::FT_DwarfFrame:
return true;
}
}
@@ -232,7 +233,8 @@ public:
static bool classof(const MCFragment *F) {
MCFragment::FragmentType Kind = F->getKind();
return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data ||
- Kind == MCFragment::FT_CVDefRange || Kind == MCFragment::FT_Dwarf;;
+ Kind == MCFragment::FT_CVDefRange || Kind == MCFragment::FT_Dwarf ||
+ Kind == MCFragment::FT_DwarfFrame;
}
};
@@ -543,27 +545,21 @@ public:
}
};
-class MCDwarfCallFrameFragment : public MCFragment {
+class MCDwarfCallFrameFragment : public MCEncodedFragmentWithFixups<8, 1> {
/// AddrDelta - The expression for the difference of the two symbols that
/// make up the address delta between two .cfi_* dwarf directives.
const MCExpr *AddrDelta;
- SmallString<8> Contents;
-
public:
MCDwarfCallFrameFragment(const MCExpr &AddrDelta, MCSection *Sec = nullptr)
- : MCFragment(FT_DwarfFrame, false, Sec), AddrDelta(&AddrDelta) {
- Contents.push_back(0);
- }
+ : MCEncodedFragmentWithFixups<8, 1>(FT_DwarfFrame, false, Sec),
+ AddrDelta(&AddrDelta) {}
/// \name Accessors
/// @{
const MCExpr &getAddrDelta() const { return *AddrDelta; }
- SmallString<8> &getContents() { return Contents; }
- const SmallString<8> &getContents() const { return Contents; }
-
/// @}
static bool classof(const MCFragment *F) {
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index 6bbc4bc2903b..4501ce3084c8 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -87,12 +87,10 @@ public:
/// Utility functions to make adding mark ups simpler.
StringRef markup(StringRef s) const;
- StringRef markup(StringRef a, StringRef b) const;
bool getPrintImmHex() const { return PrintImmHex; }
void setPrintImmHex(bool Value) { PrintImmHex = Value; }
- HexStyle::Style getPrintHexStyle() const { return PrintHexStyle; }
void setPrintHexStyle(HexStyle::Style Value) { PrintHexStyle = Value; }
/// Utility function to print immediates in decimal or hex.
diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h
index dfefd7e72777..898ca47b13b8 100644
--- a/include/llvm/MC/MCInstrAnalysis.h
+++ b/include/llvm/MC/MCInstrAnalysis.h
@@ -152,6 +152,12 @@ public:
evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const;
+ /// Given an instruction tries to get the address of a memory operand. Returns
+ /// the address on success.
+ virtual Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
+ uint64_t Addr,
+ uint64_t Size) const;
+
/// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries.
virtual std::vector<std::pair<uint64_t, uint64_t>>
findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index 0aa586dfc901..e75a27614a22 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -56,7 +56,11 @@ enum OperandType {
OPERAND_GENERIC_5 = 11,
OPERAND_LAST_GENERIC = 11,
- OPERAND_FIRST_TARGET = 12,
+ OPERAND_FIRST_GENERIC_IMM = 12,
+ OPERAND_GENERIC_IMM_0 = 12,
+ OPERAND_LAST_GENERIC_IMM = 12,
+
+ OPERAND_FIRST_TARGET = 13,
};
}
@@ -103,6 +107,16 @@ public:
assert(isGenericType() && "non-generic types don't have an index");
return OperandType - MCOI::OPERAND_FIRST_GENERIC;
}
+
+ bool isGenericImm() const {
+ return OperandType >= MCOI::OPERAND_FIRST_GENERIC_IMM &&
+ OperandType <= MCOI::OPERAND_LAST_GENERIC_IMM;
+ }
+
+ unsigned getGenericImmIndex() const {
+ assert(isGenericImm() && "non-generic immediates don't have an index");
+ return OperandType - MCOI::OPERAND_FIRST_GENERIC_IMM;
+ }
};
//===----------------------------------------------------------------------===//
@@ -115,7 +129,8 @@ namespace MCID {
/// not use these directly. These all correspond to bitfields in the
/// MCInstrDesc::Flags field.
enum Flag {
- Variadic = 0,
+ PreISelOpcode = 0,
+ Variadic,
HasOptionalDef,
Pseudo,
Return,
@@ -228,6 +243,10 @@ public:
/// Return flags of this instruction.
uint64_t getFlags() const { return Flags; }
+ /// \returns true if this instruction is emitted before instruction selection
+ /// and should be legalized/regbankselected/selected.
+ bool isPreISelOpcode() const { return Flags & (1ULL << MCID::PreISelOpcode); }
+
/// Return true if this instruction can have a variable number of
/// operands. In this case, the variable operands will be after the normal
/// operands but before the implicit definitions and uses (if any are
diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h
index f2a1364ad884..003491f32f75 100644
--- a/include/llvm/MC/MCLinkerOptimizationHint.h
+++ b/include/llvm/MC/MCLinkerOptimizationHint.h
@@ -61,6 +61,7 @@ static inline int MCLOHNameToId(StringRef Name) {
MCLOHCaseNameToId(AdrpAdd)
MCLOHCaseNameToId(AdrpLdrGot)
.Default(-1);
+#undef MCLOHCaseNameToId
}
static inline StringRef MCLOHIdToName(MCLOHType Kind) {
@@ -76,6 +77,7 @@ static inline StringRef MCLOHIdToName(MCLOHType Kind) {
MCLOHCaseIdToName(AdrpLdrGot);
}
return StringRef();
+#undef MCLOHCaseIdToName
}
static inline int MCLOHIdToNbArgs(MCLOHType Kind) {
diff --git a/include/llvm/MC/MCRegister.h b/include/llvm/MC/MCRegister.h
new file mode 100644
index 000000000000..8372947a4ba1
--- /dev/null
+++ b/include/llvm/MC/MCRegister.h
@@ -0,0 +1,110 @@
+//===-- llvm/MC/Register.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_REGISTER_H
+#define LLVM_MC_REGISTER_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include <cassert>
+
+namespace llvm {
+
+/// An unsigned integer type large enough to represent all physical registers,
+/// but not necessarily virtual registers.
+using MCPhysReg = uint16_t;
+
+/// Wrapper class representing physical registers. Should be passed by value.
+class MCRegister {
+ unsigned Reg;
+
+public:
+ MCRegister(unsigned Val = 0): Reg(Val) {}
+
+ // Register numbers can represent physical registers, virtual registers, and
+ // sometimes stack slots. The unsigned values are divided into these ranges:
+ //
+ // 0 Not a register, can be used as a sentinel.
+ // [1;2^30) Physical registers assigned by TableGen.
+ // [2^30;2^31) Stack slots. (Rarely used.)
+ // [2^31;2^32) Virtual registers assigned by MachineRegisterInfo.
+ //
+ // Further sentinels can be allocated from the small negative integers.
+ // DenseMapInfo<unsigned> uses -1u and -2u.
+
+ /// This is the portion of the positive number space that is not a physical
+ /// register. StackSlot values do not exist in the MC layer, see
+ /// Register::isStackSlot() for the more information on them.
+ ///
+ /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack
+ /// slots, so if a variable may contains a stack slot, always check
+ /// isStackSlot() first.
+ static bool isStackSlot(unsigned Reg) {
+ return int(Reg) >= (1 << 30);
+ }
+
+ /// Return true if the specified register number is in
+ /// the physical register namespace.
+ static bool isPhysicalRegister(unsigned Reg) {
+ assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
+ return int(Reg) > 0;
+ }
+
+ /// Return true if the specified register number is in the physical register
+ /// namespace.
+ bool isPhysical() const {
+ return isPhysicalRegister(Reg);
+ }
+
+ operator unsigned() const {
+ return Reg;
+ }
+
+ unsigned id() const {
+ return Reg;
+ }
+
+ bool isValid() const {
+ return Reg != 0;
+ }
+
+ /// Comparisons between register objects
+ bool operator==(const MCRegister &Other) const { return Reg == Other.Reg; }
+ bool operator!=(const MCRegister &Other) const { return Reg != Other.Reg; }
+
+ /// Comparisons against register constants. E.g.
+ /// * R == AArch64::WZR
+ /// * R == 0
+ /// * R == VirtRegMap::NO_PHYS_REG
+ bool operator==(unsigned Other) const { return Reg == Other; }
+ bool operator!=(unsigned Other) const { return Reg != Other; }
+ bool operator==(int Other) const { return Reg == unsigned(Other); }
+ bool operator!=(int Other) const { return Reg != unsigned(Other); }
+ // MSVC requires that we explicitly declare these two as well.
+ bool operator==(MCPhysReg Other) const { return Reg == unsigned(Other); }
+ bool operator!=(MCPhysReg Other) const { return Reg != unsigned(Other); }
+};
+
+// Provide DenseMapInfo for MCRegister
+template<> struct DenseMapInfo<MCRegister> {
+ static inline unsigned getEmptyKey() {
+ return DenseMapInfo<unsigned>::getEmptyKey();
+ }
+ static inline unsigned getTombstoneKey() {
+ return DenseMapInfo<unsigned>::getTombstoneKey();
+ }
+ static unsigned getHashValue(const MCRegister &Val) {
+ return DenseMapInfo<unsigned>::getHashValue(Val.id());
+ }
+ static bool isEqual(const MCRegister &LHS, const MCRegister &RHS) {
+ return DenseMapInfo<unsigned>::isEqual(LHS.id(), RHS.id());
+ }
+};
+
+}
+
+#endif // ifndef LLVM_MC_REGISTER_H
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index 92d39c3fcfb7..c7dc56ea588e 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -18,16 +18,13 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegister.h"
#include <cassert>
#include <cstdint>
#include <utility>
namespace llvm {
-/// An unsigned integer type large enough to represent all physical registers,
-/// but not necessarily virtual registers.
-using MCPhysReg = uint16_t;
-
/// MCRegisterClass - Base class of TargetRegisterClass.
class MCRegisterClass {
public:
@@ -65,16 +62,17 @@ public:
/// contains - Return true if the specified register is included in this
/// register class. This does not include virtual registers.
- bool contains(unsigned Reg) const {
- unsigned InByte = Reg % 8;
- unsigned Byte = Reg / 8;
+ bool contains(MCRegister Reg) const {
+ unsigned RegNo = unsigned(Reg);
+ unsigned InByte = RegNo % 8;
+ unsigned Byte = RegNo / 8;
if (Byte >= RegSetSize)
return false;
return (RegSet[Byte] & (1 << InByte)) != 0;
}
/// contains - Return true if both registers are in this class.
- bool contains(unsigned Reg1, unsigned Reg2) const {
+ bool contains(MCRegister Reg1, MCRegister Reg2) const {
return contains(Reg1) && contains(Reg2);
}
@@ -148,8 +146,8 @@ public:
private:
const MCRegisterDesc *Desc; // Pointer to the descriptor array
unsigned NumRegs; // Number of entries in the array
- unsigned RAReg; // Return address register
- unsigned PCReg; // Program counter register
+ MCRegister RAReg; // Return address register
+ MCRegister PCReg; // Program counter register
const MCRegisterClass *Classes; // Pointer to the regclass array
unsigned NumClasses; // Number of entries in the array
unsigned NumRegUnits; // Number of regunits.
@@ -175,8 +173,8 @@ private:
const DwarfLLVMRegPair *EHL2DwarfRegs; // LLVM to Dwarf regs mapping EH
const DwarfLLVMRegPair *Dwarf2LRegs; // Dwarf to LLVM regs mapping
const DwarfLLVMRegPair *EHDwarf2LRegs; // Dwarf to LLVM regs mapping EH
- DenseMap<unsigned, int> L2SEHRegs; // LLVM to SEH regs mapping
- DenseMap<unsigned, int> L2CVRegs; // LLVM to CV regs mapping
+ DenseMap<MCRegister, int> L2SEHRegs; // LLVM to SEH regs mapping
+ DenseMap<MCRegister, int> L2CVRegs; // LLVM to CV regs mapping
public:
/// DiffListIterator - Base iterator class that can traverse the
@@ -202,7 +200,7 @@ public:
/// advance - Move to the next list position, return the applied
/// differential. This function does not detect the end of the list, that
/// is the caller's responsibility (by checking for a 0 return value).
- unsigned advance() {
+ MCRegister advance() {
assert(isValid() && "Cannot move off the end of the list.");
MCPhysReg D = *List++;
Val += D;
@@ -214,7 +212,7 @@ public:
bool isValid() const { return List; }
/// Dereference the iterator to get the value at the current position.
- unsigned operator*() const { return Val; }
+ MCRegister operator*() const { return Val; }
/// Pre-increment to move to the next position.
void operator++() {
@@ -309,26 +307,26 @@ public:
/// as the LLVM register number.
/// FIXME: TableGen these numbers. Currently this requires target specific
/// initialization code.
- void mapLLVMRegToSEHReg(unsigned LLVMReg, int SEHReg) {
+ void mapLLVMRegToSEHReg(MCRegister LLVMReg, int SEHReg) {
L2SEHRegs[LLVMReg] = SEHReg;
}
- void mapLLVMRegToCVReg(unsigned LLVMReg, int CVReg) {
+ void mapLLVMRegToCVReg(MCRegister LLVMReg, int CVReg) {
L2CVRegs[LLVMReg] = CVReg;
}
/// This method should return the register where the return
/// address can be found.
- unsigned getRARegister() const {
+ MCRegister getRARegister() const {
return RAReg;
}
/// Return the register which is the program counter.
- unsigned getProgramCounter() const {
+ MCRegister getProgramCounter() const {
return PCReg;
}
- const MCRegisterDesc &operator[](unsigned RegNo) const {
+ const MCRegisterDesc &operator[](MCRegister RegNo) const {
assert(RegNo < NumRegs &&
"Attempting to access record for invalid register number!");
return Desc[RegNo];
@@ -336,24 +334,24 @@ public:
/// Provide a get method, equivalent to [], but more useful with a
/// pointer to this object.
- const MCRegisterDesc &get(unsigned RegNo) const {
+ const MCRegisterDesc &get(MCRegister RegNo) const {
return operator[](RegNo);
}
/// Returns the physical register number of sub-register "Index"
/// for physical register RegNo. Return zero if the sub-register does not
/// exist.
- unsigned getSubReg(unsigned Reg, unsigned Idx) const;
+ MCRegister getSubReg(MCRegister Reg, unsigned Idx) const;
/// Return a super-register of the specified register
/// Reg so its sub-register of index SubIdx is Reg.
- unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
- const MCRegisterClass *RC) const;
+ MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx,
+ const MCRegisterClass *RC) const;
/// For a given register pair, return the sub-register index
/// if the second register is a sub-register of the first. Return zero
/// otherwise.
- unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const;
+ unsigned getSubRegIndex(MCRegister RegNo, MCRegister SubRegNo) const;
/// Get the size of the bit range covered by a sub-register index.
/// If the index isn't continuous, return the sum of the sizes of its parts.
@@ -367,7 +365,7 @@ public:
/// Return the human-readable symbolic target-specific name for the
/// specified physical register.
- const char *getName(unsigned RegNo) const {
+ const char *getName(MCRegister RegNo) const {
return RegStrings + get(RegNo).Name;
}
@@ -395,15 +393,11 @@ public:
/// number. Returns -1 if there is no equivalent value. The second
/// parameter allows targets to use different numberings for EH info and
/// debugging info.
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
-
- /// Map a dwarf register back to a target register.
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+ int getDwarfRegNum(MCRegister RegNum, bool isEH) const;
- /// Map a DWARF EH register back to a target register (same as
- /// getLLVMRegNum(RegNum, true)) but return -1 if there is no mapping,
- /// rather than asserting that there must be one.
- int getLLVMRegNumFromEH(unsigned RegNum) const;
+ /// Map a dwarf register back to a target register. Returns None is there is
+ /// no mapping.
+ Optional<unsigned> getLLVMRegNum(unsigned RegNum, bool isEH) const;
/// Map a target EH register number to an equivalent DWARF register
/// number.
@@ -411,11 +405,11 @@ public:
/// Map a target register to an equivalent SEH register
/// number. Returns LLVM register number if there is no equivalent value.
- int getSEHRegNum(unsigned RegNum) const;
+ int getSEHRegNum(MCRegister RegNum) const;
/// Map a target register to an equivalent CodeView register
/// number.
- int getCodeViewRegNum(unsigned RegNum) const;
+ int getCodeViewRegNum(MCRegister RegNum) const;
regclass_iterator regclass_begin() const { return Classes; }
regclass_iterator regclass_end() const { return Classes+NumClasses; }
@@ -439,34 +433,34 @@ public:
}
/// Returns the encoding for RegNo
- uint16_t getEncodingValue(unsigned RegNo) const {
+ uint16_t getEncodingValue(MCRegister RegNo) const {
assert(RegNo < NumRegs &&
"Attempting to get encoding for invalid register number!");
return RegEncodingTable[RegNo];
}
/// Returns true if RegB is a sub-register of RegA.
- bool isSubRegister(unsigned RegA, unsigned RegB) const {
+ bool isSubRegister(MCRegister RegA, MCRegister RegB) const {
return isSuperRegister(RegB, RegA);
}
/// Returns true if RegB is a super-register of RegA.
- bool isSuperRegister(unsigned RegA, unsigned RegB) const;
+ bool isSuperRegister(MCRegister RegA, MCRegister RegB) const;
/// Returns true if RegB is a sub-register of RegA or if RegB == RegA.
- bool isSubRegisterEq(unsigned RegA, unsigned RegB) const {
+ bool isSubRegisterEq(MCRegister RegA, MCRegister RegB) const {
return isSuperRegisterEq(RegB, RegA);
}
/// Returns true if RegB is a super-register of RegA or if
/// RegB == RegA.
- bool isSuperRegisterEq(unsigned RegA, unsigned RegB) const {
+ bool isSuperRegisterEq(MCRegister RegA, MCRegister RegB) const {
return RegA == RegB || isSuperRegister(RegA, RegB);
}
/// Returns true if RegB is a super-register or sub-register of RegA
/// or if RegB == RegA.
- bool isSuperOrSubRegisterEq(unsigned RegA, unsigned RegB) const {
+ bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const {
return isSubRegisterEq(RegA, RegB) || isSuperRegister(RegA, RegB);
}
};
@@ -482,8 +476,8 @@ public:
/// If IncludeSelf is set, Reg itself is included in the list.
class MCSubRegIterator : public MCRegisterInfo::DiffListIterator {
public:
- MCSubRegIterator(unsigned Reg, const MCRegisterInfo *MCRI,
- bool IncludeSelf = false) {
+ MCSubRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
+ bool IncludeSelf = false) {
init(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs);
// Initially, the iterator points to Reg itself.
if (!IncludeSelf)
@@ -500,13 +494,13 @@ class MCSubRegIndexIterator {
public:
/// Constructs an iterator that traverses subregisters and their
/// associated subregister indices.
- MCSubRegIndexIterator(unsigned Reg, const MCRegisterInfo *MCRI)
+ MCSubRegIndexIterator(MCRegister Reg, const MCRegisterInfo *MCRI)
: SRIter(Reg, MCRI) {
SRIndex = MCRI->SubRegIndices + MCRI->get(Reg).SubRegIndices;
}
/// Returns current sub-register.
- unsigned getSubReg() const {
+ MCRegister getSubReg() const {
return *SRIter;
}
@@ -531,7 +525,7 @@ class MCSuperRegIterator : public MCRegisterInfo::DiffListIterator {
public:
MCSuperRegIterator() = default;
- MCSuperRegIterator(unsigned Reg, const MCRegisterInfo *MCRI,
+ MCSuperRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
bool IncludeSelf = false) {
init(Reg, MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
// Initially, the iterator points to Reg itself.
@@ -542,7 +536,7 @@ public:
// Definition for isSuperRegister. Put it down here since it needs the
// iterator defined above in addition to the MCRegisterInfo class itself.
-inline bool MCRegisterInfo::isSuperRegister(unsigned RegA, unsigned RegB) const{
+inline bool MCRegisterInfo::isSuperRegister(MCRegister RegA, MCRegister RegB) const{
for (MCSuperRegIterator I(RegA, this); I.isValid(); ++I)
if (*I == RegB)
return true;
@@ -569,7 +563,7 @@ public:
/// in Reg.
MCRegUnitIterator() = default;
- MCRegUnitIterator(unsigned Reg, const MCRegisterInfo *MCRI) {
+ MCRegUnitIterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
assert(Reg && "Null register has no regunits");
// Decode the RegUnits MCRegisterDesc field.
unsigned RU = MCRI->get(Reg).RegUnits;
@@ -600,7 +594,7 @@ public:
/// Constructs an iterator that traverses the register units and their
/// associated LaneMasks in Reg.
- MCRegUnitMaskIterator(unsigned Reg, const MCRegisterInfo *MCRI)
+ MCRegUnitMaskIterator(MCRegister Reg, const MCRegisterInfo *MCRI)
: RUIter(Reg, MCRI) {
uint16_t Idx = MCRI->get(Reg).RegUnitLaneMasks;
MaskListIter = &MCRI->RegUnitMaskSequences[Idx];
@@ -667,7 +661,7 @@ public:
/// any ordering or that entries are unique.
class MCRegAliasIterator {
private:
- unsigned Reg;
+ MCRegister Reg;
const MCRegisterInfo *MCRI;
bool IncludeSelf;
@@ -676,7 +670,7 @@ private:
MCSuperRegIterator SI;
public:
- MCRegAliasIterator(unsigned Reg, const MCRegisterInfo *MCRI,
+ MCRegAliasIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
bool IncludeSelf)
: Reg(Reg), MCRI(MCRI), IncludeSelf(IncludeSelf) {
// Initialize the iterators.
@@ -692,7 +686,7 @@ public:
bool isValid() const { return RI.isValid(); }
- unsigned operator*() const {
+ MCRegister operator*() const {
assert(SI.isValid() && "Cannot dereference an invalid iterator.");
return *SI;
}
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 6fad1ec2069c..d057feda87d8 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/ilist.h"
#include "llvm/MC/MCFragment.h"
#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Alignment.h"
#include <cassert>
#include <utility>
@@ -58,7 +59,7 @@ private:
MCSymbol *Begin;
MCSymbol *End = nullptr;
/// The alignment requirement of this section.
- unsigned Alignment = 1;
+ Align Alignment;
/// The section index in the assemblers section list.
unsigned Ordinal = 0;
/// The index of this section in the layout order.
@@ -117,8 +118,8 @@ public:
MCSymbol *getEndSymbol(MCContext &Ctx);
bool hasEnded() const;
- unsigned getAlignment() const { return Alignment; }
- void setAlignment(unsigned Value) { Alignment = Value; }
+ unsigned getAlignment() const { return Alignment.value(); }
+ void setAlignment(Align Value) { Alignment = Value; }
unsigned getOrdinal() const { return Ordinal; }
void setOrdinal(unsigned Value) { Ordinal = Value; }
diff --git a/include/llvm/MC/MCSectionXCOFF.h b/include/llvm/MC/MCSectionXCOFF.h
index 2a3f391fd3e2..ee302ed5ecec 100644
--- a/include/llvm/MC/MCSectionXCOFF.h
+++ b/include/llvm/MC/MCSectionXCOFF.h
@@ -23,16 +23,30 @@ class MCSymbol;
// This class represents an XCOFF `Control Section`, more commonly referred to
// as a csect. A csect represents the smallest possible unit of data/code which
-// will be relocated as a single block.
+// will be relocated as a single block. A csect can either be:
+// 1) Initialized: The Type will be XTY_SD, and the symbols inside the csect
+// will have a label definition representing their offset within the csect.
+// 2) Uninitialized: The Type will be XTY_CM, it will contain a single symbol,
+// and may not contain label definitions.
+// 3) An external reference providing a symbol table entry for a symbol
+// contained in another XCOFF object file. External reference csects are not
+// implemented yet.
class MCSectionXCOFF final : public MCSection {
friend class MCContext;
StringRef Name;
XCOFF::StorageMappingClass MappingClass;
+ XCOFF::SymbolType Type;
+ XCOFF::StorageClass StorageClass;
MCSectionXCOFF(StringRef Section, XCOFF::StorageMappingClass SMC,
- SectionKind K, MCSymbol *Begin)
- : MCSection(SV_XCOFF, K, Begin), Name(Section), MappingClass(SMC) {}
+ XCOFF::SymbolType ST, XCOFF::StorageClass SC, SectionKind K,
+ MCSymbol *Begin)
+ : MCSection(SV_XCOFF, K, Begin), Name(Section), MappingClass(SMC),
+ Type(ST), StorageClass(SC) {
+ assert((ST == XCOFF::XTY_SD || ST == XCOFF::XTY_CM) &&
+ "Invalid or unhandled type for csect.");
+ }
public:
~MCSectionXCOFF();
@@ -43,6 +57,8 @@ public:
StringRef getSectionName() const { return Name; }
XCOFF::StorageMappingClass getMappingClass() const { return MappingClass; }
+ XCOFF::StorageClass getStorageClass() const { return StorageClass; }
+ XCOFF::SymbolType getCSectType() const { return Type; }
void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 731e7515448c..6b48580ae57c 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -46,6 +46,7 @@ struct MCDwarfFrameInfo;
class MCExpr;
class MCInst;
class MCInstPrinter;
+class MCRegister;
class MCSection;
class MCStreamer;
class MCSymbolRefExpr;
@@ -53,6 +54,13 @@ class MCSubtargetInfo;
class raw_ostream;
class Twine;
+namespace codeview {
+struct DefRangeRegisterRelHeader;
+struct DefRangeSubfieldRegisterHeader;
+struct DefRangeRegisterHeader;
+struct DefRangeFramePointerRelHeader;
+}
+
using MCSectionSubPair = std::pair<MCSection *, const MCExpr *>;
/// Target specific streamer interface. This is used so that targets can
@@ -536,6 +544,15 @@ public:
/// \param Symbol - Symbol the image relative relocation should point to.
virtual void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset);
+ /// Emits an lcomm directive with XCOFF csect information.
+ ///
+ /// \param Symbol - The symbol we are emiting.
+ /// \param Size - The size of the block of storage.
+ /// \param ByteAlignment - The alignment of the symbol in bytes. Must be a power
+ /// of 2.
+ virtual void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
/// Emit an ELF .size directive.
///
/// This corresponds to an assembler statement such as:
@@ -860,6 +877,22 @@ public:
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
StringRef FixedSizePortion);
+ virtual void EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeRegisterRelHeader DRHdr);
+
+ virtual void EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeSubfieldRegisterHeader DRHdr);
+
+ virtual void EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeRegisterHeader DRHdr);
+
+ virtual void EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeFramePointerRelHeader DRHdr);
+
/// This implements the CodeView '.cv_stringtable' assembler directive.
virtual void EmitCVStringTableDirective() {}
@@ -917,13 +950,13 @@ public:
virtual void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc());
virtual void EmitWinCFIStartChained(SMLoc Loc = SMLoc());
virtual void EmitWinCFIEndChained(SMLoc Loc = SMLoc());
- virtual void EmitWinCFIPushReg(unsigned Register, SMLoc Loc = SMLoc());
- virtual void EmitWinCFISetFrame(unsigned Register, unsigned Offset,
+ virtual void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc = SMLoc());
+ virtual void EmitWinCFISetFrame(MCRegister Register, unsigned Offset,
SMLoc Loc = SMLoc());
virtual void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc = SMLoc());
- virtual void EmitWinCFISaveReg(unsigned Register, unsigned Offset,
+ virtual void EmitWinCFISaveReg(MCRegister Register, unsigned Offset,
SMLoc Loc = SMLoc());
- virtual void EmitWinCFISaveXMM(unsigned Register, unsigned Offset,
+ virtual void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset,
SMLoc Loc = SMLoc());
virtual void EmitWinCFIPushFrame(bool Code, SMLoc Loc = SMLoc());
virtual void EmitWinCFIEndProlog(SMLoc Loc = SMLoc());
diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h
index 9490a6ecedad..09130c4641ef 100644
--- a/include/llvm/MC/MCSubtargetInfo.h
+++ b/include/llvm/MC/MCSubtargetInfo.h
@@ -221,6 +221,52 @@ public:
auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU);
return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
}
+
+ virtual unsigned getHwMode() const { return 0; }
+
+ /// Return the cache size in bytes for the given level of cache.
+ /// Level is zero-based, so a value of zero means the first level of
+ /// cache.
+ ///
+ virtual Optional<unsigned> getCacheSize(unsigned Level) const;
+
+ /// Return the cache associatvity for the given level of cache.
+ /// Level is zero-based, so a value of zero means the first level of
+ /// cache.
+ ///
+ virtual Optional<unsigned> getCacheAssociativity(unsigned Level) const;
+
+ /// Return the target cache line size in bytes at a given level.
+ ///
+ virtual Optional<unsigned> getCacheLineSize(unsigned Level) const;
+
+ /// Return the target cache line size in bytes. By default, return
+ /// the line size for the bottom-most level of cache. This provides
+ /// a more convenient interface for the common case where all cache
+ /// levels have the same line size. Return zero if there is no
+ /// cache model.
+ ///
+ virtual unsigned getCacheLineSize() const {
+ Optional<unsigned> Size = getCacheLineSize(0);
+ if (Size)
+ return *Size;
+
+ return 0;
+ }
+
+ /// Return the preferred prefetch distance in terms of instructions.
+ ///
+ virtual unsigned getPrefetchDistance() const;
+
+ /// Return the maximum prefetch distance in terms of loop
+ /// iterations.
+ ///
+ virtual unsigned getMaxPrefetchIterationsAhead() const;
+
+ /// Return the minimum stride necessary to trigger software
+ /// prefetching.
+ ///
+ virtual unsigned getMinPrefetchStride() const;
};
} // end namespace llvm
diff --git a/include/llvm/MC/MCSymbolWasm.h b/include/llvm/MC/MCSymbolWasm.h
index c50cd0ee4709..95beebe3f75a 100644
--- a/include/llvm/MC/MCSymbolWasm.h
+++ b/include/llvm/MC/MCSymbolWasm.h
@@ -54,6 +54,13 @@ public:
modifyFlags(wasm::WASM_SYMBOL_EXPORTED, wasm::WASM_SYMBOL_EXPORTED);
}
+ bool isNoStrip() const {
+ return getFlags() & wasm::WASM_SYMBOL_NO_STRIP;
+ }
+ void setNoStrip() const {
+ modifyFlags(wasm::WASM_SYMBOL_NO_STRIP, wasm::WASM_SYMBOL_NO_STRIP);
+ }
+
bool isWeak() const { return IsWeak; }
void setWeak(bool isWeak) { IsWeak = isWeak; }
diff --git a/include/llvm/MC/MCSymbolXCOFF.h b/include/llvm/MC/MCSymbolXCOFF.h
index 0a1fe1475138..98ecd2466926 100644
--- a/include/llvm/MC/MCSymbolXCOFF.h
+++ b/include/llvm/MC/MCSymbolXCOFF.h
@@ -8,17 +8,49 @@
#ifndef LLVM_MC_MCSYMBOLXCOFF_H
#define LLVM_MC_MCSYMBOLXCOFF_H
+#include "llvm/ADT/Optional.h"
#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCSymbol.h"
namespace llvm {
+class MCSectionXCOFF;
+
class MCSymbolXCOFF : public MCSymbol {
public:
MCSymbolXCOFF(const StringMapEntry<bool> *Name, bool isTemporary)
: MCSymbol(SymbolKindXCOFF, Name, isTemporary) {}
static bool classof(const MCSymbol *S) { return S->isXCOFF(); }
+
+ void setStorageClass(XCOFF::StorageClass SC) {
+ assert((!StorageClass.hasValue() || StorageClass.getValue() == SC) &&
+ "Redefining StorageClass of XCOFF MCSymbol.");
+ StorageClass = SC;
+ };
+
+ XCOFF::StorageClass getStorageClass() const {
+ assert(StorageClass.hasValue() &&
+ "StorageClass not set on XCOFF MCSymbol.");
+ return StorageClass.getValue();
+ }
+
+ void setContainingCsect(MCSectionXCOFF *C) {
+ assert((!ContainingCsect || ContainingCsect == C) &&
+ "Trying to set a containing csect that doesn't match the one that"
+ "this symbol is already mapped to.");
+ ContainingCsect = C;
+ }
+
+ MCSectionXCOFF *getContainingCsect() const {
+ assert(ContainingCsect &&
+ "Trying to get containing csect but none was set.");
+ return ContainingCsect;
+ }
+
+private:
+ Optional<XCOFF::StorageClass> StorageClass;
+ MCSectionXCOFF *ContainingCsect = nullptr;
};
} // end namespace llvm
diff --git a/include/llvm/MC/MCWasmObjectWriter.h b/include/llvm/MC/MCWasmObjectWriter.h
index 4adbca28f116..fbb68549b503 100644
--- a/include/llvm/MC/MCWasmObjectWriter.h
+++ b/include/llvm/MC/MCWasmObjectWriter.h
@@ -20,9 +20,10 @@ class raw_pwrite_stream;
class MCWasmObjectTargetWriter : public MCObjectTargetWriter {
const unsigned Is64Bit : 1;
+ const unsigned IsEmscripten : 1;
protected:
- explicit MCWasmObjectTargetWriter(bool Is64Bit_);
+ explicit MCWasmObjectTargetWriter(bool Is64Bit_, bool IsEmscripten);
public:
virtual ~MCWasmObjectTargetWriter();
@@ -38,6 +39,7 @@ public:
/// \name Accessors
/// @{
bool is64Bit() const { return Is64Bit; }
+ bool isEmscripten() const { return IsEmscripten; }
/// @}
};
diff --git a/include/llvm/MC/MCXCOFFStreamer.h b/include/llvm/MC/MCXCOFFStreamer.h
index 159ae4818749..b13b0031d18e 100644
--- a/include/llvm/MC/MCXCOFFStreamer.h
+++ b/include/llvm/MC/MCXCOFFStreamer.h
@@ -26,6 +26,8 @@ public:
uint64_t Size = 0, unsigned ByteAlignment = 0,
SMLoc Loc = SMLoc()) override;
void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override;
+ void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlign) override;
};
} // end namespace llvm
diff --git a/include/llvm/MC/StringTableBuilder.h b/include/llvm/MC/StringTableBuilder.h
index c83eca4e512d..c8d4c3bbc262 100644
--- a/include/llvm/MC/StringTableBuilder.h
+++ b/include/llvm/MC/StringTableBuilder.h
@@ -22,7 +22,7 @@ class raw_ostream;
/// Utility for building string tables with deduplicated suffixes.
class StringTableBuilder {
public:
- enum Kind { ELF, WinCOFF, MachO, RAW, DWARF };
+ enum Kind { ELF, WinCOFF, MachO, RAW, DWARF, XCOFF };
private:
DenseMap<CachedHashStringRef, size_t> StringIndexMap;
diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index fc9565ceafad..defbc3c64720 100644
--- a/include/llvm/MC/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -18,6 +18,7 @@
#define LLVM_MC_SUBTARGETFEATURE_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MathExtras.h"
#include <array>
#include <bitset>
#include <initializer_list>
@@ -33,20 +34,123 @@ const unsigned MAX_SUBTARGET_WORDS = 3;
const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64;
/// Container class for subtarget features.
-/// This is convenient because std::bitset does not have a constructor
-/// with an initializer list of set bits.
-class FeatureBitset : public std::bitset<MAX_SUBTARGET_FEATURES> {
-public:
- // Cannot inherit constructors because it's not supported by VC++..
- FeatureBitset() = default;
-
- FeatureBitset(const bitset<MAX_SUBTARGET_FEATURES>& B) : bitset(B) {}
+/// This is a constexpr reimplementation of a subset of std::bitset. It would be
+/// nice to use std::bitset directly, but it doesn't support constant
+/// initialization.
+class FeatureBitset {
+ static_assert((MAX_SUBTARGET_FEATURES % 64) == 0,
+ "Should be a multiple of 64!");
+ // This cannot be a std::array, operator[] is not constexpr until C++17.
+ uint64_t Bits[MAX_SUBTARGET_WORDS] = {};
+
+protected:
+ constexpr FeatureBitset(const std::array<uint64_t, MAX_SUBTARGET_WORDS> &B) {
+ for (unsigned I = 0; I != B.size(); ++I)
+ Bits[I] = B[I];
+ }
- FeatureBitset(std::initializer_list<unsigned> Init) {
+public:
+ constexpr FeatureBitset() = default;
+ constexpr FeatureBitset(std::initializer_list<unsigned> Init) {
for (auto I : Init)
set(I);
}
+ FeatureBitset &set() {
+ std::fill(std::begin(Bits), std::end(Bits), -1ULL);
+ return *this;
+ }
+
+ constexpr FeatureBitset &set(unsigned I) {
+ // GCC <6.2 crashes if this is written in a single statement.
+ uint64_t NewBits = Bits[I / 64] | (uint64_t(1) << (I % 64));
+ Bits[I / 64] = NewBits;
+ return *this;
+ }
+
+ constexpr FeatureBitset &reset(unsigned I) {
+ // GCC <6.2 crashes if this is written in a single statement.
+ uint64_t NewBits = Bits[I / 64] & ~(uint64_t(1) << (I % 64));
+ Bits[I / 64] = NewBits;
+ return *this;
+ }
+
+ constexpr FeatureBitset &flip(unsigned I) {
+ // GCC <6.2 crashes if this is written in a single statement.
+ uint64_t NewBits = Bits[I / 64] ^ (uint64_t(1) << (I % 64));
+ Bits[I / 64] = NewBits;
+ return *this;
+ }
+
+ constexpr bool operator[](unsigned I) const {
+ uint64_t Mask = uint64_t(1) << (I % 64);
+ return (Bits[I / 64] & Mask) != 0;
+ }
+
+ constexpr bool test(unsigned I) const { return (*this)[I]; }
+
+ constexpr size_t size() const { return MAX_SUBTARGET_FEATURES; }
+
+ bool any() const {
+ return llvm::any_of(Bits, [](uint64_t I) { return I != 0; });
+ }
+ bool none() const { return !any(); }
+ size_t count() const {
+ size_t Count = 0;
+ for (auto B : Bits)
+ Count += countPopulation(B);
+ return Count;
+ }
+
+ constexpr FeatureBitset &operator^=(const FeatureBitset &RHS) {
+ for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
+ Bits[I] ^= RHS.Bits[I];
+ }
+ return *this;
+ }
+ constexpr FeatureBitset operator^(const FeatureBitset &RHS) const {
+ FeatureBitset Result = *this;
+ Result ^= RHS;
+ return Result;
+ }
+
+ constexpr FeatureBitset &operator&=(const FeatureBitset &RHS) {
+ for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
+ Bits[I] &= RHS.Bits[I];
+ }
+ return *this;
+ }
+ constexpr FeatureBitset operator&(const FeatureBitset &RHS) const {
+ FeatureBitset Result = *this;
+ Result &= RHS;
+ return Result;
+ }
+
+ constexpr FeatureBitset &operator|=(const FeatureBitset &RHS) {
+ for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
+ Bits[I] |= RHS.Bits[I];
+ }
+ return *this;
+ }
+ constexpr FeatureBitset operator|(const FeatureBitset &RHS) const {
+ FeatureBitset Result = *this;
+ Result |= RHS;
+ return Result;
+ }
+
+ constexpr FeatureBitset operator~() const {
+ FeatureBitset Result = *this;
+ for (auto &B : Result.Bits)
+ B = ~B;
+ return Result;
+ }
+
+ bool operator==(const FeatureBitset &RHS) const {
+ return std::equal(std::begin(Bits), std::end(Bits), std::begin(RHS.Bits));
+ }
+
+ bool operator!=(const FeatureBitset &RHS) const { return !(*this == RHS); }
+
bool operator < (const FeatureBitset &Other) const {
for (unsigned I = 0, E = size(); I != E; ++I) {
bool LHS = test(I), RHS = Other.test(I);
@@ -58,23 +162,12 @@ public:
};
/// Class used to store the subtarget bits in the tables created by tablegen.
-/// The std::initializer_list constructor of FeatureBitset can't be done at
-/// compile time and requires a static constructor to run at startup.
-class FeatureBitArray {
- std::array<uint64_t, MAX_SUBTARGET_WORDS> Bits;
-
+class FeatureBitArray : public FeatureBitset {
public:
constexpr FeatureBitArray(const std::array<uint64_t, MAX_SUBTARGET_WORDS> &B)
- : Bits(B) {}
-
- FeatureBitset getAsBitset() const {
- FeatureBitset Result;
-
- for (unsigned i = 0, e = Bits.size(); i != e; ++i)
- Result |= FeatureBitset(Bits[i]) << (64 * i);
+ : FeatureBitset(B) {}
- return Result;
- }
+ const FeatureBitset &getAsBitset() const { return *this; }
};
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MCA/CodeEmitter.h b/include/llvm/MCA/CodeEmitter.h
new file mode 100644
index 000000000000..c8d222bd8c2f
--- /dev/null
+++ b/include/llvm/MCA/CodeEmitter.h
@@ -0,0 +1,72 @@
+//===--------------------- CodeEmitter.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A utility class used to compute instruction encodings. It buffers encodings
+/// for later usage. It exposes a simple API to compute and get the encodings as
+/// StringRef.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_CODEEMITTER_H
+#define LLVM_MCA_CODEEMITTER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <string>
+
+namespace llvm {
+namespace mca {
+
+/// A utility class used to compute instruction encodings for a code region.
+///
+/// It provides a simple API to compute and return instruction encodings as
+/// strings. Encodings are cached internally for later usage.
+class CodeEmitter {
+ const MCSubtargetInfo &STI;
+ const MCAsmBackend &MAB;
+ const MCCodeEmitter &MCE;
+
+ SmallString<256> Code;
+ raw_svector_ostream VecOS;
+ ArrayRef<MCInst> Sequence;
+
+ // An EncodingInfo pair stores <base, length> information. Base (i.e. first)
+ // is an index to the `Code`. Length (i.e. second) is the encoding size.
+ using EncodingInfo = std::pair<unsigned, unsigned>;
+
+ // A cache of encodings.
+ SmallVector<EncodingInfo, 16> Encodings;
+
+ EncodingInfo getOrCreateEncodingInfo(unsigned MCID);
+
+public:
+ CodeEmitter(const MCSubtargetInfo &ST, const MCAsmBackend &AB,
+ const MCCodeEmitter &CE, ArrayRef<MCInst> S)
+ : STI(ST), MAB(AB), MCE(CE), VecOS(Code), Sequence(S),
+ Encodings(S.size()) {}
+
+ StringRef getEncoding(unsigned MCID) {
+ EncodingInfo EI = getOrCreateEncodingInfo(MCID);
+ return StringRef(&Code[EI.first], EI.second);
+ }
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_CODEEMITTER_H
diff --git a/include/llvm/MCA/Context.h b/include/llvm/MCA/Context.h
index 503d780d4947..af3cb8e1e837 100644
--- a/include/llvm/MCA/Context.h
+++ b/include/llvm/MCA/Context.h
@@ -20,7 +20,6 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
-#include "llvm/MCA/InstrBuilder.h"
#include "llvm/MCA/Pipeline.h"
#include "llvm/MCA/SourceMgr.h"
#include <memory>
@@ -58,6 +57,9 @@ public:
Context(const Context &C) = delete;
Context &operator=(const Context &C) = delete;
+ const MCRegisterInfo &getMCRegisterInfo() const { return MRI; }
+ const MCSubtargetInfo &getMCSubtargetInfo() const { return STI; }
+
void addHardwareUnit(std::unique_ptr<HardwareUnit> H) {
Hardware.push_back(std::move(H));
}
@@ -65,7 +67,6 @@ public:
/// Construct a basic pipeline for simulating an out-of-order pipeline.
/// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages.
std::unique_ptr<Pipeline> createDefaultPipeline(const PipelineOptions &Opts,
- InstrBuilder &IB,
SourceMgr &SrcMgr);
};
diff --git a/include/llvm/MCA/HardwareUnits/LSUnit.h b/include/llvm/MCA/HardwareUnits/LSUnit.h
index ae9a49c64855..34903794db4a 100644
--- a/include/llvm/MCA/HardwareUnits/LSUnit.h
+++ b/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -209,8 +209,10 @@ public:
unsigned getUsedLQEntries() const { return UsedLQEntries; }
unsigned getUsedSQEntries() const { return UsedSQEntries; }
- unsigned assignLQSlot() { return UsedLQEntries++; }
- unsigned assignSQSlot() { return UsedSQEntries++; }
+ void acquireLQSlot() { ++UsedLQEntries; }
+ void acquireSQSlot() { ++UsedSQEntries; }
+ void releaseLQSlot() { --UsedLQEntries; }
+ void releaseSQSlot() { --UsedSQEntries; }
bool assumeNoAlias() const { return NoAlias; }
@@ -285,13 +287,18 @@ public:
unsigned createMemoryGroup() {
Groups.insert(
- std::make_pair(NextGroupID, llvm::make_unique<MemoryGroup>()));
+ std::make_pair(NextGroupID, std::make_unique<MemoryGroup>()));
return NextGroupID++;
}
- // Instruction executed event handlers.
virtual void onInstructionExecuted(const InstRef &IR);
+ // Loads are tracked by the LDQ (load queue) from dispatch until completion.
+ // Stores are tracked by the STQ (store queue) from dispatch until commitment.
+ // By default we conservatively assume that the LDQ receives a load at
+ // dispatch. Loads leave the LDQ at retirement stage.
+ virtual void onInstructionRetired(const InstRef &IR);
+
virtual void onInstructionIssued(const InstRef &IR) {
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
Groups[GroupID]->onInstructionIssued(IR);
@@ -436,9 +443,6 @@ public:
/// 6. A store has to wait until an older store barrier is fully executed.
unsigned dispatch(const InstRef &IR) override;
- // FIXME: For simplicity, we optimistically assume a similar behavior for
- // store instructions. In practice, store operations don't tend to leave the
- // store queue until they reach the 'Retired' stage (See PR39830).
void onInstructionExecuted(const InstRef &IR) override;
};
diff --git a/include/llvm/MCA/HardwareUnits/RegisterFile.h b/include/llvm/MCA/HardwareUnits/RegisterFile.h
index 36506327bd29..cd7718d98744 100644
--- a/include/llvm/MCA/HardwareUnits/RegisterFile.h
+++ b/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -220,7 +220,7 @@ public:
//
// Current implementation can simulate up to 32 register files (including the
// special register file at index #0).
- unsigned isAvailable(ArrayRef<unsigned> Regs) const;
+ unsigned isAvailable(ArrayRef<MCPhysReg> Regs) const;
// Returns the number of PRFs implemented by this processor.
unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
diff --git a/include/llvm/MCA/HardwareUnits/ResourceManager.h b/include/llvm/MCA/HardwareUnits/ResourceManager.h
index 2f91185516fb..917af3750044 100644
--- a/include/llvm/MCA/HardwareUnits/ResourceManager.h
+++ b/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -33,8 +33,7 @@ namespace mca {
/// with a buffer size of -1 is always available if it is not reserved.
///
/// Values of type ResourceStateEvent are returned by method
-/// ResourceState::isBufferAvailable(), which is used to query the internal
-/// state of a resource.
+/// ResourceManager::canBeDispatched()
///
/// The naming convention for resource state events is:
/// * Event names start with prefix RS_
@@ -263,16 +262,26 @@ public:
/// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
ResourceStateEvent isBufferAvailable() const;
- /// Reserve a slot in the buffer.
- void reserveBuffer() {
- if (AvailableSlots)
- AvailableSlots--;
+ /// Reserve a buffer slot.
+ ///
+ /// Returns true if the buffer is not full.
+ /// It always returns true if BufferSize is set to zero.
+ bool reserveBuffer() {
+ if (BufferSize <= 0)
+ return true;
+
+ --AvailableSlots;
+ assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
+ return AvailableSlots;
}
- /// Release a slot in the buffer.
+ /// Releases a slot in the buffer.
void releaseBuffer() {
- if (BufferSize > 0)
- AvailableSlots++;
+ // Ignore dispatch hazards or invalid buffer sizes.
+ if (BufferSize <= 0)
+ return;
+
+ ++AvailableSlots;
assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
}
@@ -351,9 +360,16 @@ class ResourceManager {
// Set of processor resource units that are available during this cycle.
uint64_t AvailableProcResUnits;
- // Set of processor resource groups that are currently reserved.
+ // Set of processor resources that are currently reserved.
uint64_t ReservedResourceGroups;
+ // Set of unavailable scheduler buffer resources. This is used internally to
+ // speedup `canBeDispatched()` queries.
+ uint64_t AvailableBuffers;
+
+ // Set of dispatch hazard buffer resources that are currently unavailable.
+ uint64_t ReservedBuffers;
+
// Returns the actual resource unit that will be used.
ResourceRef selectPipe(uint64_t ResourceID);
@@ -382,17 +398,20 @@ public:
// Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
// there are enough available slots in the buffers.
- ResourceStateEvent canBeDispatched(ArrayRef<uint64_t> Buffers) const;
+ ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const;
// Return the processor resource identifier associated to this Mask.
unsigned resolveResourceMask(uint64_t Mask) const;
- // Consume a slot in every buffered resource from array 'Buffers'. Resource
- // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
- void reserveBuffers(ArrayRef<uint64_t> Buffers);
+ // Acquires a slot from every buffered resource in mask `ConsumedBuffers`.
+ // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
+ void reserveBuffers(uint64_t ConsumedBuffers);
- // Release buffer entries previously allocated by method reserveBuffers.
- void releaseBuffers(ArrayRef<uint64_t> Buffers);
+ // Releases a slot from every buffered resource in mask `ConsumedBuffers`.
+ // ConsumedBuffers is a bitmask of previously acquired buffers (using method
+ // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are
+ // not automatically unreserved by this method.
+ void releaseBuffers(uint64_t ConsumedBuffers);
// Reserve a processor resource. A reserved resource is not available for
// instruction issue until it is released.
diff --git a/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
index 06290141739e..acbd4543bd4a 100644
--- a/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
+++ b/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
@@ -57,34 +57,43 @@ struct RetireControlUnit : public HardwareUnit {
private:
unsigned NextAvailableSlotIdx;
unsigned CurrentInstructionSlotIdx;
- unsigned AvailableSlots;
+ unsigned NumROBEntries;
+ unsigned AvailableEntries;
unsigned MaxRetirePerCycle; // 0 means no limit.
std::vector<RUToken> Queue;
-public:
- RetireControlUnit(const MCSchedModel &SM);
-
- bool isEmpty() const { return AvailableSlots == Queue.size(); }
- bool isAvailable(unsigned Quantity = 1) const {
+ unsigned normalizeQuantity(unsigned Quantity) const {
// Some instructions may declare a number of uOps which exceeds the size
// of the reorder buffer. To avoid problems, cap the amount of slots to
// the size of the reorder buffer.
- Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
+ Quantity = std::min(Quantity, NumROBEntries);
// Further normalize the number of micro opcodes for instructions that
// declare zero opcodes. This should match the behavior of method
// reserveSlot().
- Quantity = std::max(Quantity, 1U);
- return AvailableSlots >= Quantity;
+ return std::max(Quantity, 1U);
+ }
+
+ unsigned computeNextSlotIdx() const;
+
+public:
+ RetireControlUnit(const MCSchedModel &SM);
+
+ bool isEmpty() const { return AvailableEntries == NumROBEntries; }
+
+ bool isAvailable(unsigned Quantity = 1) const {
+ return AvailableEntries >= normalizeQuantity(Quantity);
}
unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; }
- // Reserves a number of slots, and returns a new token.
- unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps);
+ // Reserves a number of slots, and returns a new token reference.
+ unsigned dispatch(const InstRef &IS);
// Return the current token from the RCU's circular token queue.
- const RUToken &peekCurrentToken() const;
+ const RUToken &getCurrentToken() const;
+
+ const RUToken &peekNextToken() const;
// Advance the pointer to the next token in the circular token queue.
void consumeCurrentToken();
diff --git a/include/llvm/MCA/HardwareUnits/Scheduler.h b/include/llvm/MCA/HardwareUnits/Scheduler.h
index 27beb842dfd2..6c196757e571 100644
--- a/include/llvm/MCA/HardwareUnits/Scheduler.h
+++ b/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -68,7 +68,7 @@ public:
/// instructions from the dispatch stage, until the write-back stage.
///
class Scheduler : public HardwareUnit {
- LSUnit &LSU;
+ LSUnitBase &LSU;
// Instruction selection strategy for this Scheduler.
std::unique_ptr<SchedulerStrategy> Strategy;
@@ -154,15 +154,15 @@ class Scheduler : public HardwareUnit {
bool promoteToPendingSet(SmallVectorImpl<InstRef> &Pending);
public:
- Scheduler(const MCSchedModel &Model, LSUnit &Lsu)
+ Scheduler(const MCSchedModel &Model, LSUnitBase &Lsu)
: Scheduler(Model, Lsu, nullptr) {}
- Scheduler(const MCSchedModel &Model, LSUnit &Lsu,
+ Scheduler(const MCSchedModel &Model, LSUnitBase &Lsu,
std::unique_ptr<SchedulerStrategy> SelectStrategy)
- : Scheduler(make_unique<ResourceManager>(Model), Lsu,
+ : Scheduler(std::make_unique<ResourceManager>(Model), Lsu,
std::move(SelectStrategy)) {}
- Scheduler(std::unique_ptr<ResourceManager> RM, LSUnit &Lsu,
+ Scheduler(std::unique_ptr<ResourceManager> RM, LSUnitBase &Lsu,
std::unique_ptr<SchedulerStrategy> SelectStrategy)
: LSU(Lsu), Resources(std::move(RM)), BusyResourceUnits(0),
NumDispatchedToThePendingSet(0), HadTokenStall(false) {
@@ -228,6 +228,9 @@ public:
SmallVectorImpl<InstRef> &Ready);
/// Convert a resource mask into a valid llvm processor resource identifier.
+ ///
+ /// Only the most significant bit of the Mask is used by this method to
+ /// identify the processor resource.
unsigned getResourceID(uint64_t Mask) const {
return Resources->resolveResourceMask(Mask);
}
diff --git a/include/llvm/MCA/Instruction.h b/include/llvm/MCA/Instruction.h
index d4d3f22797f7..c97cb463d0f5 100644
--- a/include/llvm/MCA/Instruction.h
+++ b/include/llvm/MCA/Instruction.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCRegister.h" // definition of MCPhysReg.
#include "llvm/Support/MathExtras.h"
#ifndef NDEBUG
@@ -42,7 +43,7 @@ struct WriteDescriptor {
unsigned Latency;
// This field is set to a value different than zero only if this
// is an implicit definition.
- unsigned RegisterID;
+ MCPhysReg RegisterID;
// Instruction itineraries would set this field to the SchedClass ID.
// Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry
// element associated to this write.
@@ -70,7 +71,7 @@ struct ReadDescriptor {
// uses always come first in the sequence of uses.
unsigned UseIndex;
// This field is only set if this is an implicit read.
- unsigned RegisterID;
+ MCPhysReg RegisterID;
// Scheduling Class Index. It is used to query the scheduling model for the
// MCSchedClassDesc object.
unsigned SchedClassID;
@@ -85,7 +86,7 @@ class ReadState;
/// Field RegID is set to the invalid register for memory dependencies.
struct CriticalDependency {
unsigned IID;
- unsigned RegID;
+ MCPhysReg RegID;
unsigned Cycles;
};
@@ -106,7 +107,7 @@ class WriteState {
// to speedup queries on the register file.
// For implicit writes, this field always matches the value of
// field RegisterID from WD.
- unsigned RegisterID;
+ MCPhysReg RegisterID;
// Physical register file that serves register RegisterID.
unsigned PRFID;
@@ -146,7 +147,7 @@ class WriteState {
SmallVector<std::pair<ReadState *, int>, 4> Users;
public:
- WriteState(const WriteDescriptor &Desc, unsigned RegID,
+ WriteState(const WriteDescriptor &Desc, MCPhysReg RegID,
bool clearsSuperRegs = false, bool writesZero = false)
: WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
@@ -158,7 +159,7 @@ public:
int getCyclesLeft() const { return CyclesLeft; }
unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
- unsigned getRegisterID() const { return RegisterID; }
+ MCPhysReg getRegisterID() const { return RegisterID; }
unsigned getRegisterFileID() const { return PRFID; }
unsigned getLatency() const { return WD->Latency; }
unsigned getDependentWriteCyclesLeft() const {
@@ -200,7 +201,7 @@ public:
}
void setDependentWrite(const WriteState *Other) { DependentWrite = Other; }
- void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles);
+ void writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles);
void setWriteZero() { WritesZero = true; }
void setEliminated() {
assert(Users.empty() && "Write is in an inconsistent state.");
@@ -226,7 +227,7 @@ public:
class ReadState {
const ReadDescriptor *RD;
// Physical register identified associated to this read.
- unsigned RegisterID;
+ MCPhysReg RegisterID;
// Physical register file that serves register RegisterID.
unsigned PRFID;
// Number of writes that contribute to the definition of RegisterID.
@@ -253,14 +254,14 @@ class ReadState {
bool IndependentFromDef;
public:
- ReadState(const ReadDescriptor &Desc, unsigned RegID)
+ ReadState(const ReadDescriptor &Desc, MCPhysReg RegID)
: RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), CRD(), IsReady(true),
IsZero(false), IndependentFromDef(false) {}
const ReadDescriptor &getDescriptor() const { return *RD; }
unsigned getSchedClass() const { return RD->SchedClassID; }
- unsigned getRegisterID() const { return RegisterID; }
+ MCPhysReg getRegisterID() const { return RegisterID; }
unsigned getRegisterFileID() const { return PRFID; }
const CriticalDependency &getCriticalRegDep() const { return CRD; }
@@ -272,7 +273,7 @@ public:
void setIndependentFromDef() { IndependentFromDef = true; }
void cycleEvent();
- void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles);
+ void writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles);
void setDependentWrites(unsigned Writes) {
DependentWrites = Writes;
IsReady = !Writes;
@@ -352,11 +353,14 @@ struct InstrDesc {
// reports the number of "consumed cycles".
SmallVector<std::pair<uint64_t, ResourceUsage>, 4> Resources;
- // A list of buffered resources consumed by this instruction.
- SmallVector<uint64_t, 4> Buffers;
+ // A bitmask of used hardware buffers.
+ uint64_t UsedBuffers;
- unsigned UsedProcResUnits;
- unsigned UsedProcResGroups;
+ // A bitmask of used processor resource units.
+ uint64_t UsedProcResUnits;
+
+ // A bitmask of used processor resource groups.
+ uint64_t UsedProcResGroups;
unsigned MaxLatency;
// Number of MicroOps for this instruction.
@@ -414,6 +418,7 @@ public:
const InstrDesc &getDesc() const { return Desc; }
unsigned getLatency() const { return Desc.MaxLatency; }
+ unsigned getNumMicroOps() const { return Desc.NumMicroOps; }
bool hasDependentUsers() const {
return any_of(Defs,
@@ -463,6 +468,12 @@ class Instruction : public InstructionBase {
// operation.
unsigned LSUTokenID;
+ // A resource mask which identifies buffered resources consumed by this
+ // instruction at dispatch stage. In the absence of macro-fusion, this value
+ // should always match the value of field `UsedBuffers` from the instruction
+ // descriptor (see field InstrBase::Desc).
+ uint64_t UsedBuffers;
+
// Critical register dependency.
CriticalDependency CriticalRegDep;
@@ -480,12 +491,18 @@ class Instruction : public InstructionBase {
public:
Instruction(const InstrDesc &D)
: InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
- RCUTokenID(0), LSUTokenID(0), CriticalRegDep(), CriticalMemDep(),
- CriticalResourceMask(0), IsEliminated(false) {}
+ RCUTokenID(0), LSUTokenID(0), UsedBuffers(D.UsedBuffers),
+ CriticalRegDep(), CriticalMemDep(), CriticalResourceMask(0),
+ IsEliminated(false) {}
unsigned getRCUTokenID() const { return RCUTokenID; }
unsigned getLSUTokenID() const { return LSUTokenID; }
void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; }
+
+ uint64_t getUsedBuffers() const { return UsedBuffers; }
+ void setUsedBuffers(uint64_t Mask) { UsedBuffers = Mask; }
+ void clearUsedBuffers() { UsedBuffers = 0ULL; }
+
int getCyclesLeft() const { return CyclesLeft; }
// Transition to the dispatch stage, and assign a RCUToken to this
diff --git a/include/llvm/MCA/SourceMgr.h b/include/llvm/MCA/SourceMgr.h
index dbe31db1b1dd..e844171bdcab 100644
--- a/include/llvm/MCA/SourceMgr.h
+++ b/include/llvm/MCA/SourceMgr.h
@@ -16,12 +16,13 @@
#define LLVM_MCA_SOURCEMGR_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MCA/Instruction.h"
namespace llvm {
namespace mca {
-class Instruction;
-
+// MSVC >= 19.15, < 19.20 need to see the definition of class Instruction to
+// prevent compiler error C2139 about intrinsic type trait '__is_assignable'.
typedef std::pair<unsigned, const Instruction &> SourceRef;
class SourceMgr {
diff --git a/include/llvm/MCA/Stages/RetireStage.h b/include/llvm/MCA/Stages/RetireStage.h
index 08c216ac7bf4..f4713688d25f 100644
--- a/include/llvm/MCA/Stages/RetireStage.h
+++ b/include/llvm/MCA/Stages/RetireStage.h
@@ -16,6 +16,7 @@
#ifndef LLVM_MCA_RETIRE_STAGE_H
#define LLVM_MCA_RETIRE_STAGE_H
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
#include "llvm/MCA/HardwareUnits/RegisterFile.h"
#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
#include "llvm/MCA/Stages/Stage.h"
@@ -27,13 +28,14 @@ class RetireStage final : public Stage {
// Owner will go away when we move listeners/eventing to the stages.
RetireControlUnit &RCU;
RegisterFile &PRF;
+ LSUnitBase &LSU;
RetireStage(const RetireStage &Other) = delete;
RetireStage &operator=(const RetireStage &Other) = delete;
public:
- RetireStage(RetireControlUnit &R, RegisterFile &F)
- : Stage(), RCU(R), PRF(F) {}
+ RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS)
+ : Stage(), RCU(R), PRF(F), LSU(LS) {}
bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
Error cycleStart() override;
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index c40278a4f923..c3f36bdd9d1a 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -48,8 +48,7 @@ public:
/// Get the name looking up long names.
Expected<StringRef> getName(uint64_t Size) const;
- /// Members are not larger than 4GB.
- Expected<uint32_t> getSize() const;
+ Expected<uint64_t> getSize() const;
Expected<sys::fs::perms> getAccessMode() const;
Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const;
@@ -136,6 +135,7 @@ public:
Expected<StringRef> getBuffer() const;
uint64_t getChildOffset() const;
+ uint64_t getDataOffset() const { return getChildOffset() + StartOfFile; }
Expected<MemoryBufferRef> getMemoryBufferRef() const;
@@ -221,6 +221,9 @@ public:
Archive(MemoryBufferRef Source, Error &Err);
static Expected<std::unique_ptr<Archive>> create(MemoryBufferRef Source);
+ /// Size field is 10 decimal digits long
+ static const uint64_t MaxMemberSize = 9999999999;
+
enum Kind {
K_GNU,
K_GNU64,
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index 3c3e977baff4..aa5e718f5e9b 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -42,7 +42,9 @@ protected:
ID_Archive,
ID_MachOUniversalBinary,
ID_COFFImportFile,
- ID_IR, // LLVM IR
+ ID_IR, // LLVM IR
+ ID_TapiUniversal, // Text-based Dynamic Library Stub file.
+ ID_TapiFile, // Text-based Dynamic Library Stub file.
ID_Minidump,
@@ -101,16 +103,18 @@ public:
return TypeID > ID_StartObjects && TypeID < ID_EndObjects;
}
- bool isSymbolic() const { return isIR() || isObject() || isCOFFImportFile(); }
-
- bool isArchive() const {
- return TypeID == ID_Archive;
+ bool isSymbolic() const {
+ return isIR() || isObject() || isCOFFImportFile() || isTapiFile();
}
+ bool isArchive() const { return TypeID == ID_Archive; }
+
bool isMachOUniversalBinary() const {
return TypeID == ID_MachOUniversalBinary;
}
+ bool isTapiUniversal() const { return TypeID == ID_TapiUniversal; }
+
bool isELF() const {
return TypeID >= ID_ELF32L && TypeID <= ID_ELF64B;
}
@@ -137,6 +141,8 @@ public:
bool isMinidump() const { return TypeID == ID_Minidump; }
+ bool isTapiFile() const { return TypeID == ID_TapiFile; }
+
bool isLittleEndian() const {
return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B ||
TypeID == ID_MachO32B || TypeID == ID_MachO64B);
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index c53cbc46c747..b91ee5887fec 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -314,7 +314,10 @@ public:
return CS16 ? CS16->Name.Offset : CS32->Name.Offset;
}
- uint32_t getValue() const { return CS16 ? CS16->Value : CS32->Value; }
+ uint32_t getValue() const {
+ assert(isSet() && "COFFSymbolRef points to nothing!");
+ return CS16 ? CS16->Value : CS32->Value;
+ }
int32_t getSectionNumber() const {
assert(isSet() && "COFFSymbolRef points to nothing!");
@@ -969,11 +972,14 @@ public:
return nullptr;
return reinterpret_cast<const dos_header *>(base());
}
- std::error_code getCOFFHeader(const coff_file_header *&Res) const;
- std::error_code
- getCOFFBigObjHeader(const coff_bigobj_file_header *&Res) const;
- std::error_code getPE32Header(const pe32_header *&Res) const;
- std::error_code getPE32PlusHeader(const pe32plus_header *&Res) const;
+
+ const coff_file_header *getCOFFHeader() const { return COFFHeader; }
+ const coff_bigobj_file_header *getCOFFBigObjHeader() const {
+ return COFFBigObjHeader;
+ }
+ const pe32_header *getPE32Header() const { return PE32Header; }
+ const pe32plus_header *getPE32PlusHeader() const { return PE32PlusHeader; }
+
std::error_code getDataDirectory(uint32_t index,
const data_directory *&Res) const;
std::error_code getSection(int32_t index, const coff_section *&Res) const;
@@ -1201,16 +1207,34 @@ public:
ResourceSectionRef() = default;
explicit ResourceSectionRef(StringRef Ref) : BBS(Ref, support::little) {}
+ Error load(const COFFObjectFile *O);
+ Error load(const COFFObjectFile *O, const SectionRef &S);
+
Expected<ArrayRef<UTF16>>
getEntryNameString(const coff_resource_dir_entry &Entry);
Expected<const coff_resource_dir_table &>
getEntrySubDir(const coff_resource_dir_entry &Entry);
+ Expected<const coff_resource_data_entry &>
+ getEntryData(const coff_resource_dir_entry &Entry);
Expected<const coff_resource_dir_table &> getBaseTable();
+ Expected<const coff_resource_dir_entry &>
+ getTableEntry(const coff_resource_dir_table &Table, uint32_t Index);
+
+ Expected<StringRef> getContents(const coff_resource_data_entry &Entry);
private:
BinaryByteStream BBS;
+ SectionRef Section;
+ const COFFObjectFile *Obj;
+
+ std::vector<const coff_relocation *> Relocs;
+
Expected<const coff_resource_dir_table &> getTableAtOffset(uint32_t Offset);
+ Expected<const coff_resource_dir_entry &>
+ getTableEntryAtOffset(uint32_t Offset);
+ Expected<const coff_resource_data_entry &>
+ getDataEntryAtOffset(uint32_t Offset);
Expected<ArrayRef<UTF16>> getDirStringAtOffset(uint32_t Offset);
};
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index cf8e4529bad9..28b00c8413de 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -64,6 +64,10 @@ std::string getSecIndexForError(const ELFFile<ELFT> *Obj,
return "[unknown index]";
}
+static inline Error defaultWarningHandler(const Twine &Msg) {
+ return createError(Msg);
+}
+
template <class ELFT>
class ELFFile {
public:
@@ -95,6 +99,13 @@ public:
using Elf_Relr_Range = typename ELFT::RelrRange;
using Elf_Phdr_Range = typename ELFT::PhdrRange;
+ // This is a callback that can be passed to a number of functions.
+ // It can be used to ignore non-critical errors (warnings), which is
+ // useful for dumpers, like llvm-readobj.
+ // It accepts a warning message string and returns a success
+ // when the warning should be ignored or an error otherwise.
+ using WarningHandler = llvm::function_ref<Error(const Twine &Msg)>;
+
const uint8_t *base() const { return Buf.bytes_begin(); }
size_t getBufSize() const { return Buf.size(); }
@@ -114,7 +125,9 @@ public:
template <typename T>
Expected<const T *> getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
- Expected<StringRef> getStringTable(const Elf_Shdr *Section) const;
+ Expected<StringRef>
+ getStringTable(const Elf_Shdr *Section,
+ WarningHandler WarnHandler = &defaultWarningHandler) const;
Expected<StringRef> getStringTableForSymtab(const Elf_Shdr &Section) const;
Expected<StringRef> getStringTableForSymtab(const Elf_Shdr &Section,
Elf_Shdr_Range Sections) const;
@@ -137,15 +150,16 @@ public:
static Expected<ELFFile> create(StringRef Object);
+ bool isLE() const {
+ return getHeader()->getDataEncoding() == ELF::ELFDATA2LSB;
+ }
+
bool isMipsELF64() const {
return getHeader()->e_machine == ELF::EM_MIPS &&
getHeader()->getFileClass() == ELF::ELFCLASS64;
}
- bool isMips64EL() const {
- return isMipsELF64() &&
- getHeader()->getDataEncoding() == ELF::ELFDATA2LSB;
- }
+ bool isMips64EL() const { return isMipsELF64() && isLE(); }
Expected<Elf_Shdr_Range> sections() const;
@@ -261,7 +275,9 @@ public:
return make_range(notes_begin(Shdr, Err), notes_end());
}
- Expected<StringRef> getSectionStringTable(Elf_Shdr_Range Sections) const;
+ Expected<StringRef> getSectionStringTable(
+ Elf_Shdr_Range Sections,
+ WarningHandler WarnHandler = &defaultWarningHandler) const;
Expected<uint32_t> getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms,
ArrayRef<Elf_Word> ShndxTable) const;
Expected<const Elf_Shdr *> getSection(const Elf_Sym *Sym,
@@ -271,12 +287,13 @@ public:
Elf_Sym_Range Symtab,
ArrayRef<Elf_Word> ShndxTable) const;
Expected<const Elf_Shdr *> getSection(uint32_t Index) const;
- Expected<const Elf_Shdr *> getSection(const StringRef SectionName) const;
Expected<const Elf_Sym *> getSymbol(const Elf_Shdr *Sec,
uint32_t Index) const;
- Expected<StringRef> getSectionName(const Elf_Shdr *Section) const;
+ Expected<StringRef>
+ getSectionName(const Elf_Shdr *Section,
+ WarningHandler WarnHandler = &defaultWarningHandler) const;
Expected<StringRef> getSectionName(const Elf_Shdr *Section,
StringRef DotShstrtab) const;
template <typename T>
@@ -459,18 +476,18 @@ ELFFile<ELFT>::getRelocationSymbol(const Elf_Rel *Rel,
template <class ELFT>
Expected<StringRef>
-ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections) const {
+ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
+ WarningHandler WarnHandler) const {
uint32_t Index = getHeader()->e_shstrndx;
if (Index == ELF::SHN_XINDEX)
Index = Sections[0].sh_link;
if (!Index) // no section string table.
return "";
- // TODO: Test a case when the sh_link of the section with index 0 is broken.
if (Index >= Sections.size())
return createError("section header string table index " + Twine(Index) +
" does not exist");
- return getStringTable(&Sections[Index]);
+ return getStringTable(&Sections[Index], WarnHandler);
}
template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}
@@ -495,7 +512,8 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
Twine(getHeader()->e_shentsize));
const uint64_t FileSize = Buf.size();
- if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize)
+ if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize ||
+ SectionTableOffset + (uintX_t)sizeof(Elf_Shdr) < SectionTableOffset)
return createError(
"section header table goes past the end of the file: e_shoff = 0x" +
Twine::utohexstr(SectionTableOffset));
@@ -513,15 +531,22 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
NumSections = First->sh_size;
if (NumSections > UINT64_MAX / sizeof(Elf_Shdr))
- // TODO: this error is untested.
- return createError("section table goes past the end of file");
+ return createError("invalid number of sections specified in the NULL "
+ "section's sh_size field (" +
+ Twine(NumSections) + ")");
const uint64_t SectionTableSize = NumSections * sizeof(Elf_Shdr);
+ if (SectionTableOffset + SectionTableSize < SectionTableOffset)
+ return createError(
+ "invalid section header table offset (e_shoff = 0x" +
+ Twine::utohexstr(SectionTableOffset) +
+ ") or invalid number of sections specified in the first section "
+ "header's sh_size field (0x" +
+ Twine::utohexstr(NumSections) + ")");
// Section table goes past end of file!
if (SectionTableOffset + SectionTableSize > FileSize)
return createError("section table goes past the end of file");
-
return makeArrayRef(First, NumSections);
}
@@ -540,8 +565,9 @@ template <typename T>
Expected<const T *> ELFFile<ELFT>::getEntry(const Elf_Shdr *Section,
uint32_t Entry) const {
if (sizeof(T) != Section->sh_entsize)
- // TODO: this error is untested.
- return createError("invalid sh_entsize");
+ return createError("section " + getSecIndexForError(this, Section) +
+ " has invalid sh_entsize: expected " + Twine(sizeof(T)) +
+ ", but got " + Twine(Section->sh_entsize));
size_t Pos = Section->sh_offset + Entry * sizeof(T);
if (Pos + sizeof(T) > Buf.size())
return createError("unable to access section " +
@@ -561,42 +587,26 @@ ELFFile<ELFT>::getSection(uint32_t Index) const {
}
template <class ELFT>
-Expected<const typename ELFT::Shdr *>
-ELFFile<ELFT>::getSection(const StringRef SectionName) const {
- auto TableOrErr = sections();
- if (!TableOrErr)
- return TableOrErr.takeError();
- for (auto &Sec : *TableOrErr) {
- auto SecNameOrErr = getSectionName(&Sec);
- if (!SecNameOrErr)
- return SecNameOrErr.takeError();
- if (*SecNameOrErr == SectionName)
- return &Sec;
- }
- // TODO: this error is untested.
- return createError("invalid section name");
-}
-
-template <class ELFT>
Expected<StringRef>
-ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section) const {
+ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section,
+ WarningHandler WarnHandler) const {
if (Section->sh_type != ELF::SHT_STRTAB)
- return createError("invalid sh_type for string table section " +
- getSecIndexForError(this, Section) +
- ": expected SHT_STRTAB, but got " +
- object::getELFSectionTypeName(getHeader()->e_machine,
- Section->sh_type));
+ if (Error E = WarnHandler("invalid sh_type for string table section " +
+ getSecIndexForError(this, Section) +
+ ": expected SHT_STRTAB, but got " +
+ object::getELFSectionTypeName(
+ getHeader()->e_machine, Section->sh_type)))
+ return std::move(E);
+
auto V = getSectionContentsAsArray<char>(Section);
if (!V)
return V.takeError();
ArrayRef<char> Data = *V;
if (Data.empty())
- // TODO: this error is untested.
- return createError("empty string table");
+ return createError("SHT_STRTAB string table section " +
+ getSecIndexForError(this, Section) + " is empty");
if (Data.back() != '\0')
- return createError(object::getELFSectionTypeName(getHeader()->e_machine,
- Section->sh_type) +
- " string table section " +
+ return createError("SHT_STRTAB string table section " +
getSecIndexForError(this, Section) +
" is non-null terminated");
return StringRef(Data.begin(), Data.size());
@@ -626,8 +636,11 @@ ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section,
const Elf_Shdr &SymTable = **SymTableOrErr;
if (SymTable.sh_type != ELF::SHT_SYMTAB &&
SymTable.sh_type != ELF::SHT_DYNSYM)
- // TODO: this error is untested.
- return createError("invalid sh_type");
+ return createError("SHT_SYMTAB_SHNDX section is linked with " +
+ object::getELFSectionTypeName(getHeader()->e_machine,
+ SymTable.sh_type) +
+ " section (expected SHT_SYMTAB/SHT_DYNSYM)");
+
if (V.size() != (SymTable.sh_size / sizeof(Elf_Sym)))
return createError("SHT_SYMTAB_SHNDX section has sh_size (" +
Twine(SymTable.sh_size) +
@@ -662,11 +675,12 @@ ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec,
template <class ELFT>
Expected<StringRef>
-ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section) const {
+ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section,
+ WarningHandler WarnHandler) const {
auto SectionsOrErr = sections();
if (!SectionsOrErr)
return SectionsOrErr.takeError();
- auto Table = getSectionStringTable(*SectionsOrErr);
+ auto Table = getSectionStringTable(*SectionsOrErr, WarnHandler);
if (!Table)
return Table.takeError();
return getSectionName(Section, *Table);
diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h
index 86c015efd704..424289a9ccaa 100644
--- a/include/llvm/Object/ELFObjectFile.h
+++ b/include/llvm/Object/ELFObjectFile.h
@@ -41,7 +41,7 @@
namespace llvm {
namespace object {
-constexpr int NumElfSymbolTypes = 8;
+constexpr int NumElfSymbolTypes = 16;
extern const llvm::EnumEntry<unsigned> ElfSymbolTypes[NumElfSymbolTypes];
class elf_symbol_iterator;
@@ -239,6 +239,10 @@ public:
using Elf_Rela = typename ELFT::Rela;
using Elf_Dyn = typename ELFT::Dyn;
+ SectionRef toSectionRef(const Elf_Shdr *Sec) const {
+ return SectionRef(toDRI(Sec), this);
+ }
+
private:
ELFObjectFile(MemoryBufferRef Object, ELFFile<ELFT> EF,
const Elf_Shdr *DotDynSymSec, const Elf_Shdr *DotSymtabSec,
@@ -284,7 +288,8 @@ protected:
relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
relocation_iterator section_rel_end(DataRefImpl Sec) const override;
std::vector<SectionRef> dynamic_relocation_sections() const override;
- section_iterator getRelocatedSection(DataRefImpl Sec) const override;
+ Expected<section_iterator>
+ getRelocatedSection(DataRefImpl Sec) const override;
void moveRelocationNext(DataRefImpl &Rel) const override;
uint64_t getRelocationOffset(DataRefImpl Rel) const override;
@@ -461,13 +466,15 @@ Expected<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
if (!SymStrTabOrErr)
return SymStrTabOrErr.takeError();
Expected<StringRef> Name = ESym->getName(*SymStrTabOrErr);
+ if (Name && !Name->empty())
+ return Name;
// If the symbol name is empty use the section name.
- if ((!Name || Name->empty()) && ESym->getType() == ELF::STT_SECTION) {
- StringRef SecName;
- Expected<section_iterator> Sec = getSymbolSection(Sym);
- if (Sec && !(*Sec)->getName(SecName))
- return SecName;
+ if (ESym->getType() == ELF::STT_SECTION) {
+ if (Expected<section_iterator> SecOrErr = getSymbolSection(Sym)) {
+ consumeError(Name.takeError());
+ return (*SecOrErr)->getName();
+ }
}
return Name;
}
@@ -835,7 +842,7 @@ ELFObjectFile<ELFT>::section_rel_end(DataRefImpl Sec) const {
}
template <class ELFT>
-section_iterator
+Expected<section_iterator>
ELFObjectFile<ELFT>::getRelocatedSection(DataRefImpl Sec) const {
if (EF.getHeader()->e_type != ELF::ET_REL)
return section_end();
@@ -845,10 +852,10 @@ ELFObjectFile<ELFT>::getRelocatedSection(DataRefImpl Sec) const {
if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA)
return section_end();
- auto R = EF.getSection(EShdr->sh_info);
- if (!R)
- report_fatal_error(errorToErrorCode(R.takeError()).message());
- return section_iterator(SectionRef(toDRI(*R), this));
+ Expected<const Elf_Shdr *> SecOrErr = EF.getSection(EShdr->sh_info);
+ if (!SecOrErr)
+ return SecOrErr.takeError();
+ return section_iterator(SectionRef(toDRI(*SecOrErr), this));
}
// Relocations
diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h
index 5552208b1f8a..7d1ade4d5437 100644
--- a/include/llvm/Object/ELFTypes.h
+++ b/include/llvm/Object/ELFTypes.h
@@ -248,7 +248,11 @@ template <class ELFT>
Expected<StringRef> Elf_Sym_Impl<ELFT>::getName(StringRef StrTab) const {
uint32_t Offset = this->st_name;
if (Offset >= StrTab.size())
- return errorCodeToError(object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "st_name (0x%" PRIx32
+ ") is past the end of the string table"
+ " of size 0x%zx",
+ Offset, StrTab.size());
return StringRef(StrTab.data() + Offset);
}
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index ca9512f21706..76be8049a7d4 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -297,6 +297,7 @@ public:
uint64_t getSectionAddress(DataRefImpl Sec) const override;
uint64_t getSectionIndex(DataRefImpl Sec) const override;
uint64_t getSectionSize(DataRefImpl Sec) const override;
+ ArrayRef<uint8_t> getSectionContents(uint32_t Offset, uint64_t Size) const;
Expected<ArrayRef<uint8_t>>
getSectionContents(DataRefImpl Sec) const override;
uint64_t getSectionAlignment(DataRefImpl Sec) const override;
diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h
index 5bf724f2c8b2..eb45aff4480b 100644
--- a/include/llvm/Object/MachOUniversal.h
+++ b/include/llvm/Object/MachOUniversal.h
@@ -31,6 +31,8 @@ class MachOUniversalBinary : public Binary {
uint32_t Magic;
uint32_t NumberOfObjects;
public:
+ static constexpr uint32_t MaxSectionAlignment = 15; /* 2**15 or 0x8000 */
+
class ObjectForArch {
const MachOUniversalBinary *Parent;
/// Index of object in the universal binary.
@@ -64,13 +66,13 @@ public:
else // Parent->getMagic() == MachO::FAT_MAGIC_64
return Header64.cpusubtype;
}
- uint32_t getOffset() const {
+ uint64_t getOffset() const {
if (Parent->getMagic() == MachO::FAT_MAGIC)
return Header.offset;
else // Parent->getMagic() == MachO::FAT_MAGIC_64
return Header64.offset;
}
- uint32_t getSize() const {
+ uint64_t getSize() const {
if (Parent->getMagic() == MachO::FAT_MAGIC)
return Header.size;
else // Parent->getMagic() == MachO::FAT_MAGIC_64
@@ -157,8 +159,14 @@ public:
return V->isMachOUniversalBinary();
}
- Expected<std::unique_ptr<MachOObjectFile>>
+ Expected<ObjectForArch>
getObjectForArch(StringRef ArchName) const;
+
+ Expected<std::unique_ptr<MachOObjectFile>>
+ getMachOObjectForArch(StringRef ArchName) const;
+
+ Expected<std::unique_ptr<Archive>>
+ getArchiveForArch(StringRef ArchName) const;
};
}
diff --git a/include/llvm/Object/Minidump.h b/include/llvm/Object/Minidump.h
index 470008d552e7..4429493aff45 100644
--- a/include/llvm/Object/Minidump.h
+++ b/include/llvm/Object/Minidump.h
@@ -11,6 +11,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/iterator.h"
#include "llvm/BinaryFormat/Minidump.h"
#include "llvm/Object/Binary.h"
#include "llvm/Support/Error.h"
@@ -80,16 +81,65 @@ public:
return getListStream<minidump::Thread>(minidump::StreamType::ThreadList);
}
- /// Returns the list of memory ranges embedded in the MemoryList stream. An
- /// error is returned if the file does not contain this stream, or if the
- /// stream is not large enough to contain the number of memory descriptors
- /// declared in the stream header. The consistency of the MemoryDescriptor
- /// entries themselves is not checked in any way.
+ /// Returns the contents of the Exception stream. An error is returned if the
+ /// file does not contain this stream, or the stream is smaller than the size
+ /// of the ExceptionStream structure. The internal consistency of the stream
+ /// is not checked in any way.
+ Expected<const minidump::ExceptionStream &> getExceptionStream() const {
+ return getStream<minidump::ExceptionStream>(
+ minidump::StreamType::Exception);
+ }
+
+ /// Returns the list of descriptors embedded in the MemoryList stream. The
+ /// descriptors provide the content of interesting regions of memory at the
+ /// time the minidump was taken. An error is returned if the file does not
+ /// contain this stream, or if the stream is not large enough to contain the
+ /// number of memory descriptors declared in the stream header. The
+ /// consistency of the MemoryDescriptor entries themselves is not checked in
+ /// any way.
Expected<ArrayRef<minidump::MemoryDescriptor>> getMemoryList() const {
return getListStream<minidump::MemoryDescriptor>(
minidump::StreamType::MemoryList);
}
+ class MemoryInfoIterator
+ : public iterator_facade_base<MemoryInfoIterator,
+ std::forward_iterator_tag,
+ minidump::MemoryInfo> {
+ public:
+ MemoryInfoIterator(ArrayRef<uint8_t> Storage, size_t Stride)
+ : Storage(Storage), Stride(Stride) {
+ assert(Storage.size() % Stride == 0);
+ }
+
+ bool operator==(const MemoryInfoIterator &R) const {
+ return Storage.size() == R.Storage.size();
+ }
+
+ const minidump::MemoryInfo &operator*() const {
+ assert(Storage.size() >= sizeof(minidump::MemoryInfo));
+ return *reinterpret_cast<const minidump::MemoryInfo *>(Storage.data());
+ }
+
+ MemoryInfoIterator &operator++() {
+ Storage = Storage.drop_front(Stride);
+ return *this;
+ }
+
+ private:
+ ArrayRef<uint8_t> Storage;
+ size_t Stride;
+ };
+
+ /// Returns the list of descriptors embedded in the MemoryInfoList stream. The
+ /// descriptors provide properties (e.g. permissions) of interesting regions
+ /// of memory at the time the minidump was taken. An error is returned if the
+ /// file does not contain this stream, or if the stream is not large enough to
+ /// contain the number of memory descriptors declared in the stream header.
+ /// The consistency of the MemoryInfoList entries themselves is not checked
+ /// in any way.
+ Expected<iterator_range<MemoryInfoIterator>> getMemoryInfoList() const;
+
private:
static Error createError(StringRef Str) {
return make_error<GenericBinaryError>(Str, object_error::parse_failed);
@@ -137,10 +187,10 @@ private:
};
template <typename T>
-Expected<const T &> MinidumpFile::getStream(minidump::StreamType Stream) const {
- if (auto OptionalStream = getRawStream(Stream)) {
- if (OptionalStream->size() >= sizeof(T))
- return *reinterpret_cast<const T *>(OptionalStream->data());
+Expected<const T &> MinidumpFile::getStream(minidump::StreamType Type) const {
+ if (Optional<ArrayRef<uint8_t>> Stream = getRawStream(Type)) {
+ if (Stream->size() >= sizeof(T))
+ return *reinterpret_cast<const T *>(Stream->data());
return createEOFError();
}
return createError("No such stream");
@@ -153,10 +203,11 @@ Expected<ArrayRef<T>> MinidumpFile::getDataSliceAs(ArrayRef<uint8_t> Data,
// Check for overflow.
if (Count > std::numeric_limits<size_t>::max() / sizeof(T))
return createEOFError();
- auto ExpectedArray = getDataSlice(Data, Offset, sizeof(T) * Count);
- if (!ExpectedArray)
- return ExpectedArray.takeError();
- return ArrayRef<T>(reinterpret_cast<const T *>(ExpectedArray->data()), Count);
+ Expected<ArrayRef<uint8_t>> Slice =
+ getDataSlice(Data, Offset, sizeof(T) * Count);
+ if (!Slice)
+ return Slice.takeError();
+ return ArrayRef<T>(reinterpret_cast<const T *>(Slice->data()), Count);
}
} // end namespace object
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 483a3486bd72..adc9dbc189af 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -94,7 +94,7 @@ public:
void moveNext();
- std::error_code getName(StringRef &Result) const;
+ Expected<StringRef> getName() const;
uint64_t getAddress() const;
uint64_t getIndex() const;
uint64_t getSize() const;
@@ -130,18 +130,13 @@ public:
iterator_range<relocation_iterator> relocations() const {
return make_range(relocation_begin(), relocation_end());
}
- section_iterator getRelocatedSection() const;
+ Expected<section_iterator> getRelocatedSection() const;
DataRefImpl getRawDataRefImpl() const;
const ObjectFile *getObject() const;
};
struct SectionedAddress {
- // TODO: constructors could be removed when C++14 would be adopted.
- SectionedAddress() {}
- SectionedAddress(uint64_t Addr, uint64_t SectIdx)
- : Address(Addr), SectionIndex(SectIdx) {}
-
const static uint64_t UndefSection = UINT64_MAX;
uint64_t Address = 0;
@@ -277,7 +272,7 @@ protected:
virtual bool isBerkeleyData(DataRefImpl Sec) const;
virtual relocation_iterator section_rel_begin(DataRefImpl Sec) const = 0;
virtual relocation_iterator section_rel_end(DataRefImpl Sec) const = 0;
- virtual section_iterator getRelocatedSection(DataRefImpl Sec) const;
+ virtual Expected<section_iterator> getRelocatedSection(DataRefImpl Sec) const;
// Same as above for RelocationRef.
friend class RelocationRef;
@@ -434,12 +429,8 @@ inline void SectionRef::moveNext() {
return OwningObject->moveSectionNext(SectionPimpl);
}
-inline std::error_code SectionRef::getName(StringRef &Result) const {
- Expected<StringRef> NameOrErr = OwningObject->getSectionName(SectionPimpl);
- if (!NameOrErr)
- return errorToErrorCode(NameOrErr.takeError());
- Result = *NameOrErr;
- return std::error_code();
+inline Expected<StringRef> SectionRef::getName() const {
+ return OwningObject->getSectionName(SectionPimpl);
}
inline uint64_t SectionRef::getAddress() const {
@@ -510,7 +501,7 @@ inline relocation_iterator SectionRef::relocation_end() const {
return OwningObject->section_rel_end(SectionPimpl);
}
-inline section_iterator SectionRef::getRelocatedSection() const {
+inline Expected<section_iterator> SectionRef::getRelocatedSection() const {
return OwningObject->getRelocatedSection(SectionPimpl);
}
diff --git a/include/llvm/Object/StackMapParser.h b/include/llvm/Object/StackMapParser.h
index ed44efbf80b9..b408f4041034 100644
--- a/include/llvm/Object/StackMapParser.h
+++ b/include/llvm/Object/StackMapParser.h
@@ -19,7 +19,7 @@
namespace llvm {
-/// A parser for the latest stackmap format. At the moment, latest=V2.
+/// A parser for the latest stackmap format. At the moment, latest=V3.
template <support::endianness Endianness>
class StackMapParser {
public:
@@ -299,7 +299,7 @@ public:
const uint8_t *P;
};
- /// Construct a parser for a version-2 stackmap. StackMap data will be read
+ /// Construct a parser for a version-3 stackmap. StackMap data will be read
/// from the given array.
StackMapParser(ArrayRef<uint8_t> StackMapSection)
: StackMapSection(StackMapSection) {
diff --git a/include/llvm/Object/TapiFile.h b/include/llvm/Object/TapiFile.h
new file mode 100644
index 000000000000..bc2e04e1cc96
--- /dev/null
+++ b/include/llvm/Object/TapiFile.h
@@ -0,0 +1,60 @@
+//===- TapiFile.h - Text-based Dynamic Library Stub -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TapiFile interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_TAPI_FILE_H
+#define LLVM_OBJECT_TAPI_FILE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TextAPI/MachO/InterfaceFile.h"
+
+namespace llvm {
+namespace object {
+
+class TapiFile : public SymbolicFile {
+public:
+ TapiFile(MemoryBufferRef Source, const MachO::InterfaceFile &interface,
+ MachO::Architecture Arch);
+ ~TapiFile() override;
+
+ void moveSymbolNext(DataRefImpl &DRI) const override;
+
+ Error printSymbolName(raw_ostream &OS, DataRefImpl DRI) const override;
+
+ uint32_t getSymbolFlags(DataRefImpl DRI) const override;
+
+ basic_symbol_iterator symbol_begin() const override;
+
+ basic_symbol_iterator symbol_end() const override;
+
+ static bool classof(const Binary *v) { return v->isTapiFile(); }
+
+private:
+ struct Symbol {
+ StringRef Prefix;
+ StringRef Name;
+ uint32_t Flags;
+
+ constexpr Symbol(StringRef Prefix, StringRef Name, uint32_t Flags)
+ : Prefix(Prefix), Name(Name), Flags(Flags) {}
+ };
+
+ std::vector<Symbol> Symbols;
+};
+
+} // end namespace object.
+} // end namespace llvm.
+
+#endif // LLVM_OBJECT_TAPI_FILE_H
diff --git a/include/llvm/Object/TapiUniversal.h b/include/llvm/Object/TapiUniversal.h
new file mode 100644
index 000000000000..4931183852ad
--- /dev/null
+++ b/include/llvm/Object/TapiUniversal.h
@@ -0,0 +1,109 @@
+//===-- TapiUniversal.h - Text-based Dynamic Library Stub -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TapiUniversal interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_TAPI_UNIVERSAL_H
+#define LLVM_OBJECT_TAPI_UNIVERSAL_H
+
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/TapiFile.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include "llvm/TextAPI/MachO/InterfaceFile.h"
+
+namespace llvm {
+namespace object {
+
+class TapiUniversal : public Binary {
+public:
+ class ObjectForArch {
+ const TapiUniversal *Parent;
+ int Index;
+
+ public:
+ ObjectForArch(const TapiUniversal *Parent, int Index)
+ : Parent(Parent), Index(Index) {}
+
+ ObjectForArch getNext() const { return ObjectForArch(Parent, Index + 1); }
+
+ bool operator==(const ObjectForArch &Other) const {
+ return (Parent == Other.Parent) && (Index == Other.Index);
+ }
+
+ uint32_t getCPUType() const {
+ auto Result =
+ MachO::getCPUTypeFromArchitecture(Parent->Architectures[Index]);
+ return Result.first;
+ }
+
+ uint32_t getCPUSubType() const {
+ auto Result =
+ MachO::getCPUTypeFromArchitecture(Parent->Architectures[Index]);
+ return Result.second;
+ }
+
+ std::string getArchFlagName() const {
+ return MachO::getArchitectureName(Parent->Architectures[Index]);
+ }
+
+ Expected<std::unique_ptr<TapiFile>> getAsObjectFile() const;
+ };
+
+ class object_iterator {
+ ObjectForArch Obj;
+
+ public:
+ object_iterator(const ObjectForArch &Obj) : Obj(Obj) {}
+ const ObjectForArch *operator->() const { return &Obj; }
+ const ObjectForArch &operator*() const { return Obj; }
+
+ bool operator==(const object_iterator &Other) const {
+ return Obj == Other.Obj;
+ }
+ bool operator!=(const object_iterator &Other) const {
+ return !(*this == Other);
+ }
+
+ object_iterator &operator++() { // Preincrement
+ Obj = Obj.getNext();
+ return *this;
+ }
+ };
+
+ TapiUniversal(MemoryBufferRef Source, Error &Err);
+ static Expected<std::unique_ptr<TapiUniversal>>
+ create(MemoryBufferRef Source);
+ ~TapiUniversal() override;
+
+ object_iterator begin_objects() const { return ObjectForArch(this, 0); }
+ object_iterator end_objects() const {
+ return ObjectForArch(this, Architectures.size());
+ }
+
+ iterator_range<object_iterator> objects() const {
+ return make_range(begin_objects(), end_objects());
+ }
+
+ uint32_t getNumberOfObjects() const { return Architectures.size(); }
+
+ // Cast methods.
+ static bool classof(const Binary *v) { return v->isTapiUniversal(); }
+
+private:
+ std::unique_ptr<MachO::InterfaceFile> ParsedFile;
+ std::vector<MachO::Architecture> Architectures;
+};
+
+} // end namespace object.
+} // end namespace llvm.
+
+#endif // LLVM_OBJECT_TAPI_UNIVERSAL_H
diff --git a/include/llvm/Object/WindowsResource.h b/include/llvm/Object/WindowsResource.h
index 356dcb03abba..a0d658491cb9 100644
--- a/include/llvm/Object/WindowsResource.h
+++ b/include/llvm/Object/WindowsResource.h
@@ -31,6 +31,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/Binary.h"
+#include "llvm/Object/COFF.h"
#include "llvm/Object/Error.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamReader.h"
@@ -48,6 +49,7 @@ class ScopedPrinter;
namespace object {
class WindowsResource;
+class ResourceSectionRef;
const size_t WIN_RES_MAGIC_SIZE = 16;
const size_t WIN_RES_NULL_ENTRY_SIZE = 16;
@@ -151,8 +153,11 @@ private:
class WindowsResourceParser {
public:
class TreeNode;
- WindowsResourceParser();
+ WindowsResourceParser(bool MinGW = false);
Error parse(WindowsResource *WR, std::vector<std::string> &Duplicates);
+ Error parse(ResourceSectionRef &RSR, StringRef Filename,
+ std::vector<std::string> &Duplicates);
+ void cleanUpManifests(std::vector<std::string> &Duplicates);
void printTree(raw_ostream &OS) const;
const TreeNode &getTree() const { return Root; }
const ArrayRef<std::vector<uint8_t>> getData() const { return Data; }
@@ -181,32 +186,38 @@ public:
private:
friend class WindowsResourceParser;
- static uint32_t StringCount;
- static uint32_t DataCount;
-
- static std::unique_ptr<TreeNode> createStringNode();
+ // Index is the StringTable vector index for this node's name.
+ static std::unique_ptr<TreeNode> createStringNode(uint32_t Index);
static std::unique_ptr<TreeNode> createIDNode();
+ // DataIndex is the Data vector index that the data node points at.
static std::unique_ptr<TreeNode> createDataNode(uint16_t MajorVersion,
uint16_t MinorVersion,
uint32_t Characteristics,
- uint32_t Origin);
+ uint32_t Origin,
+ uint32_t DataIndex);
- explicit TreeNode(bool IsStringNode);
+ explicit TreeNode(uint32_t StringIndex);
TreeNode(uint16_t MajorVersion, uint16_t MinorVersion,
- uint32_t Characteristics, uint32_t Origin);
+ uint32_t Characteristics, uint32_t Origin, uint32_t DataIndex);
bool addEntry(const ResourceEntryRef &Entry, uint32_t Origin,
- bool &IsNewTypeString, bool &IsNewNameString,
+ std::vector<std::vector<uint8_t>> &Data,
+ std::vector<std::vector<UTF16>> &StringTable,
TreeNode *&Result);
- TreeNode &addTypeNode(const ResourceEntryRef &Entry, bool &IsNewTypeString);
- TreeNode &addNameNode(const ResourceEntryRef &Entry, bool &IsNewNameString);
+ TreeNode &addTypeNode(const ResourceEntryRef &Entry,
+ std::vector<std::vector<UTF16>> &StringTable);
+ TreeNode &addNameNode(const ResourceEntryRef &Entry,
+ std::vector<std::vector<UTF16>> &StringTable);
bool addLanguageNode(const ResourceEntryRef &Entry, uint32_t Origin,
+ std::vector<std::vector<uint8_t>> &Data,
TreeNode *&Result);
bool addDataChild(uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion,
uint32_t Characteristics, uint32_t Origin,
- TreeNode *&Result);
+ uint32_t DataIndex, TreeNode *&Result);
TreeNode &addIDChild(uint32_t ID);
- TreeNode &addNameChild(ArrayRef<UTF16> NameRef, bool &IsNewString);
+ TreeNode &addNameChild(ArrayRef<UTF16> NameRef,
+ std::vector<std::vector<UTF16>> &StringTable);
+ void shiftDataIndexDown(uint32_t Index);
bool IsDataNode = false;
uint32_t StringIndex;
@@ -222,12 +233,30 @@ public:
uint32_t Origin;
};
+ struct StringOrID {
+ bool IsString;
+ ArrayRef<UTF16> String;
+ uint32_t ID;
+
+ StringOrID(uint32_t ID) : IsString(false), ID(ID) {}
+ StringOrID(ArrayRef<UTF16> String) : IsString(true), String(String) {}
+ };
+
private:
+ Error addChildren(TreeNode &Node, ResourceSectionRef &RSR,
+ const coff_resource_dir_table &Table, uint32_t Origin,
+ std::vector<StringOrID> &Context,
+ std::vector<std::string> &Duplicates);
+ bool shouldIgnoreDuplicate(const ResourceEntryRef &Entry) const;
+ bool shouldIgnoreDuplicate(const std::vector<StringOrID> &Context) const;
+
TreeNode Root;
std::vector<std::vector<uint8_t>> Data;
std::vector<std::vector<UTF16>> StringTable;
std::vector<std::string> InputFilenames;
+
+ bool MinGW;
};
Expected<std::unique_ptr<MemoryBuffer>>
diff --git a/include/llvm/Object/XCOFFObjectFile.h b/include/llvm/Object/XCOFFObjectFile.h
index cdee7129a2ab..84073ce5f6cf 100644
--- a/include/llvm/Object/XCOFFObjectFile.h
+++ b/include/llvm/Object/XCOFFObjectFile.h
@@ -13,23 +13,8 @@
#ifndef LLVM_OBJECT_XCOFFOBJECTFILE_H
#define LLVM_OBJECT_XCOFFOBJECTFILE_H
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/BinaryFormat/Magic.h"
#include "llvm/BinaryFormat/XCOFF.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Object/Binary.h"
-#include "llvm/Object/Error.h"
#include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/SymbolicFile.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include <cassert>
-#include <cstdint>
-#include <memory>
-#include <system_error>
namespace llvm {
namespace object {
@@ -63,7 +48,7 @@ struct XCOFFFileHeader64 {
};
struct XCOFFSectionHeader32 {
- char Name[XCOFF::SectionNameSize];
+ char Name[XCOFF::NameSize];
support::ubig32_t PhysicalAddress;
support::ubig32_t VirtualAddress;
support::ubig32_t SectionSize;
@@ -78,7 +63,7 @@ struct XCOFFSectionHeader32 {
};
struct XCOFFSectionHeader64 {
- char Name[XCOFF::SectionNameSize];
+ char Name[XCOFF::NameSize];
support::ubig64_t PhysicalAddress;
support::ubig64_t VirtualAddress;
support::ubig64_t SectionSize;
@@ -106,7 +91,7 @@ struct XCOFFSymbolEntry {
} CFileLanguageIdAndTypeIdType;
union {
- char SymbolName[XCOFF::SymbolNameSize];
+ char SymbolName[XCOFF::NameSize];
NameInStrTblType NameInStrTbl;
};
@@ -127,6 +112,75 @@ struct XCOFFStringTable {
const char *Data;
};
+struct XCOFFCsectAuxEnt32 {
+ support::ubig32_t
+ SectionOrLength; // If the symbol type is XTY_SD or XTY_CM, the csect
+ // length.
+ // If the symbol type is XTY_LD, the symbol table
+ // index of the containing csect.
+ // If the symbol type is XTY_ER, 0.
+ support::ubig32_t ParameterHashIndex;
+ support::ubig16_t TypeChkSectNum;
+ uint8_t SymbolAlignmentAndType;
+ XCOFF::StorageMappingClass StorageMappingClass;
+ support::ubig32_t StabInfoIndex;
+ support::ubig16_t StabSectNum;
+};
+
+struct XCOFFFileAuxEnt {
+ typedef struct {
+ support::big32_t Magic; // Zero indicates name in string table.
+ support::ubig32_t Offset;
+ char NamePad[XCOFF::FileNamePadSize];
+ } NameInStrTblType;
+ union {
+ char Name[XCOFF::NameSize + XCOFF::FileNamePadSize];
+ NameInStrTblType NameInStrTbl;
+ };
+ XCOFF::CFileStringType Type;
+ uint8_t ReservedZeros[2];
+ uint8_t AuxType; // 64-bit XCOFF file only.
+};
+
+struct XCOFFSectAuxEntForStat {
+ support::ubig32_t SectionLength;
+ support::ubig16_t NumberOfRelocEnt;
+ support::ubig16_t NumberOfLineNum;
+ uint8_t Pad[10];
+};
+
+struct XCOFFRelocation32 {
+ // Masks for packing/unpacking the r_rsize field of relocations.
+
+ // The msb is used to indicate if the bits being relocated are signed or
+ // unsigned.
+ static constexpr uint8_t XR_SIGN_INDICATOR_MASK = 0x80;
+
+ // The 2nd msb is used to indicate that the binder has replaced/modified the
+ // original instruction.
+ static constexpr uint8_t XR_FIXUP_INDICATOR_MASK = 0x40;
+
+ // The remaining bits specify the bit length of the relocatable reference
+ // minus one.
+ static constexpr uint8_t XR_BIASED_LENGTH_MASK = 0x3f;
+
+public:
+ support::ubig32_t VirtualAddress;
+ support::ubig32_t SymbolIndex;
+
+ // Packed field, see XR_* masks for details of packing.
+ uint8_t Info;
+
+ XCOFF::RelocationType Type;
+
+public:
+ bool isRelocationSigned() const;
+ bool isFixupIndicated() const;
+
+ // Returns the number of bits being relocated.
+ uint8_t getRelocatedLength() const;
+};
+
class XCOFFObjectFile : public ObjectFile {
private:
const void *FileHeader = nullptr;
@@ -146,18 +200,18 @@ private:
const XCOFFSectionHeader32 *toSection32(DataRefImpl Ref) const;
const XCOFFSectionHeader64 *toSection64(DataRefImpl Ref) const;
- void checkSectionAddress(uintptr_t Addr, uintptr_t TableAddr) const;
uintptr_t getSectionHeaderTableAddress() const;
+ uintptr_t getEndOfSymbolTableAddress() const;
// This returns a pointer to the start of the storage for the name field of
// the 32-bit or 64-bit SectionHeader struct. This string is *not* necessarily
// null-terminated.
const char *getSectionNameInternal(DataRefImpl Sec) const;
- int32_t getSectionFlags(DataRefImpl Sec) const;
+ // This function returns string table entry.
+ Expected<StringRef> getStringTableEntry(uint32_t Offset) const;
static bool isReservedSectionNumber(int16_t SectionNumber);
- Expected<DataRefImpl> getSectionByNum(int16_t Num) const;
// Constructor and "create" factory function. The constructor is only a thin
// wrapper around the base constructor. The "create" function fills out the
@@ -175,6 +229,8 @@ private:
friend Expected<std::unique_ptr<ObjectFile>>
ObjectFile::createXCOFFObjectFile(MemoryBufferRef Object, unsigned FileType);
+ void checkSectionAddress(uintptr_t Addr, uintptr_t TableAddr) const;
+
public:
// Interface inherited from base classes.
void moveSymbolNext(DataRefImpl &Symb) const override;
@@ -253,15 +309,49 @@ public:
uint32_t getLogicalNumberOfSymbolTableEntries32() const;
uint32_t getNumberOfSymbolTableEntries64() const;
+ uint32_t getSymbolIndex(uintptr_t SymEntPtr) const;
+ Expected<StringRef> getSymbolNameByIndex(uint32_t SymbolTableIndex) const;
+ Expected<StringRef> getCFileName(const XCOFFFileAuxEnt *CFileEntPtr) const;
uint16_t getOptionalHeaderSize() const;
uint16_t getFlags() const;
// Section header table related interfaces.
ArrayRef<XCOFFSectionHeader32> sections32() const;
ArrayRef<XCOFFSectionHeader64> sections64() const;
+
+ int32_t getSectionFlags(DataRefImpl Sec) const;
+ Expected<DataRefImpl> getSectionByNum(int16_t Num) const;
+
+ void checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const;
+
+ // Relocation-related interfaces.
+ Expected<uint32_t>
+ getLogicalNumberOfRelocationEntries(const XCOFFSectionHeader32 &Sec) const;
+
+ Expected<ArrayRef<XCOFFRelocation32>>
+ relocations(const XCOFFSectionHeader32 &) const;
}; // XCOFFObjectFile
+class XCOFFSymbolRef {
+ const DataRefImpl SymEntDataRef;
+ const XCOFFObjectFile *const OwningObjectPtr;
+
+public:
+ XCOFFSymbolRef(DataRefImpl SymEntDataRef,
+ const XCOFFObjectFile *OwningObjectPtr)
+ : SymEntDataRef(SymEntDataRef), OwningObjectPtr(OwningObjectPtr){};
+
+ XCOFF::StorageClass getStorageClass() const;
+ uint8_t getNumberOfAuxEntries() const;
+ const XCOFFCsectAuxEnt32 *getXCOFFCsectAuxEnt32() const;
+ uint16_t getType() const;
+ int16_t getSectionNumber() const;
+
+ bool hasCsectAuxEnt() const;
+ bool isFunction() const;
+};
+
} // namespace object
} // namespace llvm
diff --git a/include/llvm/ObjectYAML/DWARFYAML.h b/include/llvm/ObjectYAML/DWARFYAML.h
index 78d736c3ef05..525fd9a89242 100644
--- a/include/llvm/ObjectYAML/DWARFYAML.h
+++ b/include/llvm/ObjectYAML/DWARFYAML.h
@@ -234,7 +234,7 @@ template <> struct MappingTraits<DWARFYAML::InitialLength> {
static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF);
};
-#define HANDLE_DW_TAG(unused, name, unused2, unused3) \
+#define HANDLE_DW_TAG(unused, name, unused2, unused3, unused4) \
io.enumCase(value, "DW_TAG_" #name, dwarf::DW_TAG_##name);
template <> struct ScalarEnumerationTraits<dwarf::Tag> {
diff --git a/include/llvm/ObjectYAML/ELFYAML.h b/include/llvm/ObjectYAML/ELFYAML.h
index f4212516f486..0898a0e7d532 100644
--- a/include/llvm/ObjectYAML/ELFYAML.h
+++ b/include/llvm/ObjectYAML/ELFYAML.h
@@ -25,6 +25,8 @@
namespace llvm {
namespace ELFYAML {
+StringRef dropUniqueSuffix(StringRef S);
+
// These types are invariant across 32/64-bit ELF, so for simplicity just
// directly give them their exact sizes. We don't need to worry about
// endianness because these are just the types in the YAMLIO structures,
@@ -54,8 +56,6 @@ LLVM_YAML_STRONG_TYPEDEF(uint64_t, ELF_SHF)
LLVM_YAML_STRONG_TYPEDEF(uint16_t, ELF_SHN)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STB)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STT)
-LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STV)
-LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STO)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_AFL_REG)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_ABI_FP)
@@ -77,7 +77,7 @@ struct FileHeader {
llvm::yaml::Hex64 Entry;
Optional<llvm::yaml::Hex16> SHEntSize;
- Optional<llvm::yaml::Hex16> SHOffset;
+ Optional<llvm::yaml::Hex64> SHOff;
Optional<llvm::yaml::Hex16> SHNum;
Optional<llvm::yaml::Hex16> SHStrNdx;
};
@@ -107,7 +107,7 @@ struct Symbol {
ELF_STB Binding;
llvm::yaml::Hex64 Value;
llvm::yaml::Hex64 Size;
- uint8_t Other;
+ Optional<uint8_t> Other;
};
struct SectionOrType {
@@ -119,6 +119,11 @@ struct DynamicEntry {
llvm::yaml::Hex64 Val;
};
+struct StackSizeEntry {
+ llvm::yaml::Hex64 Address;
+ llvm::yaml::Hex64 Size;
+};
+
struct Section {
enum class SectionKind {
Dynamic,
@@ -126,10 +131,14 @@ struct Section {
RawContent,
Relocation,
NoBits,
+ Hash,
Verdef,
Verneed,
+ StackSizes,
+ SymtabShndxSection,
Symver,
- MipsABIFlags
+ MipsABIFlags,
+ Addrsig
};
SectionKind Kind;
StringRef Name;
@@ -140,16 +149,44 @@ struct Section {
llvm::yaml::Hex64 AddressAlign;
Optional<llvm::yaml::Hex64> EntSize;
+ // Usually sections are not created implicitly, but loaded from YAML.
+ // When they are, this flag is used to signal about that.
+ bool IsImplicit;
+
+ Section(SectionKind Kind, bool IsImplicit = false)
+ : Kind(Kind), IsImplicit(IsImplicit) {}
+ virtual ~Section();
+
+ // The following members are used to override section fields which is
+ // useful for creating invalid objects.
+
+ // This can be used to override the offset stored in the sh_name field.
+ // It does not affect the name stored in the string table.
+ Optional<llvm::yaml::Hex64> ShName;
+
// This can be used to override the sh_offset field. It does not place the
- // section data at the offset specified. Useful for creating invalid objects.
+ // section data at the offset specified.
Optional<llvm::yaml::Hex64> ShOffset;
// This can be used to override the sh_size field. It does not affect the
// content written.
Optional<llvm::yaml::Hex64> ShSize;
+};
- Section(SectionKind Kind) : Kind(Kind) {}
- virtual ~Section();
+struct StackSizesSection : Section {
+ Optional<yaml::BinaryRef> Content;
+ Optional<llvm::yaml::Hex64> Size;
+ Optional<std::vector<StackSizeEntry>> Entries;
+
+ StackSizesSection() : Section(SectionKind::StackSizes) {}
+
+ static bool classof(const Section *S) {
+ return S->Kind == SectionKind::StackSizes;
+ }
+
+ static bool nameMatches(StringRef Name) {
+ return Name == ".stack_sizes";
+ }
};
struct DynamicSection : Section {
@@ -185,6 +222,17 @@ struct NoBitsSection : Section {
}
};
+struct HashSection : Section {
+ Optional<yaml::BinaryRef> Content;
+ Optional<llvm::yaml::Hex64> Size;
+ Optional<std::vector<uint32_t>> Bucket;
+ Optional<std::vector<uint32_t>> Chain;
+
+ HashSection() : Section(SectionKind::Hash) {}
+
+ static bool classof(const Section *S) { return S->Kind == SectionKind::Hash; }
+};
+
struct VernauxEntry {
uint32_t Hash;
uint16_t Flags;
@@ -209,6 +257,26 @@ struct VerneedSection : Section {
}
};
+struct AddrsigSymbol {
+ AddrsigSymbol(StringRef N) : Name(N), Index(None) {}
+ AddrsigSymbol(llvm::yaml::Hex32 Ndx) : Name(None), Index(Ndx) {}
+ AddrsigSymbol() : Name(None), Index(None) {}
+
+ Optional<StringRef> Name;
+ Optional<llvm::yaml::Hex32> Index;
+};
+
+struct AddrsigSection : Section {
+ Optional<yaml::BinaryRef> Content;
+ Optional<llvm::yaml::Hex64> Size;
+ Optional<std::vector<AddrsigSymbol>> Symbols;
+
+ AddrsigSection() : Section(SectionKind::Addrsig) {}
+ static bool classof(const Section *S) {
+ return S->Kind == SectionKind::Addrsig;
+ }
+};
+
struct SymverSection : Section {
std::vector<uint16_t> Entries;
@@ -269,6 +337,16 @@ struct RelocationSection : Section {
}
};
+struct SymtabShndxSection : Section {
+ std::vector<uint32_t> Entries;
+
+ SymtabShndxSection() : Section(SectionKind::SymtabShndxSection) {}
+
+ static bool classof(const Section *S) {
+ return S->Kind == SectionKind::SymtabShndxSection;
+ }
+};
+
// Represents .MIPS.abiflags section
struct MipsABIFlags : Section {
llvm::yaml::Hex16 Version;
@@ -298,13 +376,15 @@ struct Object {
// cleaner and nicer if we read them from the YAML as a separate
// top-level key, which automatically ensures that invariants like there
// being a single SHT_SYMTAB section are upheld.
- std::vector<Symbol> Symbols;
+ Optional<std::vector<Symbol>> Symbols;
std::vector<Symbol> DynamicSymbols;
};
} // end namespace ELFYAML
} // end namespace llvm
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::AddrsigSymbol)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::StackSizeEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::DynamicEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::ProgramHeader)
LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::ELFYAML::Section>)
@@ -381,16 +461,6 @@ struct ScalarEnumerationTraits<ELFYAML::ELF_STT> {
};
template <>
-struct ScalarEnumerationTraits<ELFYAML::ELF_STV> {
- static void enumeration(IO &IO, ELFYAML::ELF_STV &Value);
-};
-
-template <>
-struct ScalarBitSetTraits<ELFYAML::ELF_STO> {
- static void bitset(IO &IO, ELFYAML::ELF_STO &Value);
-};
-
-template <>
struct ScalarEnumerationTraits<ELFYAML::ELF_REL> {
static void enumeration(IO &IO, ELFYAML::ELF_REL &Value);
};
@@ -450,6 +520,10 @@ struct MappingTraits<ELFYAML::Symbol> {
static StringRef validate(IO &IO, ELFYAML::Symbol &Symbol);
};
+template <> struct MappingTraits<ELFYAML::StackSizeEntry> {
+ static void mapping(IO &IO, ELFYAML::StackSizeEntry &Rel);
+};
+
template <> struct MappingTraits<ELFYAML::DynamicEntry> {
static void mapping(IO &IO, ELFYAML::DynamicEntry &Rel);
};
@@ -466,6 +540,10 @@ template <> struct MappingTraits<ELFYAML::VernauxEntry> {
static void mapping(IO &IO, ELFYAML::VernauxEntry &E);
};
+template <> struct MappingTraits<ELFYAML::AddrsigSymbol> {
+ static void mapping(IO &IO, ELFYAML::AddrsigSymbol &Sym);
+};
+
template <> struct MappingTraits<ELFYAML::Relocation> {
static void mapping(IO &IO, ELFYAML::Relocation &Rel);
};
diff --git a/include/llvm/ObjectYAML/MachOYAML.h b/include/llvm/ObjectYAML/MachOYAML.h
index d7e1c033f43b..327c3b9f892b 100644
--- a/include/llvm/ObjectYAML/MachOYAML.h
+++ b/include/llvm/ObjectYAML/MachOYAML.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/ObjectYAML/DWARFYAML.h"
+#include "llvm/ObjectYAML/YAML.h"
#include "llvm/Support/YAMLTraits.h"
#include <cstdint>
#include <string>
@@ -39,6 +40,7 @@ struct Section {
llvm::yaml::Hex32 reserved1;
llvm::yaml::Hex32 reserved2;
llvm::yaml::Hex32 reserved3;
+ Optional<llvm::yaml::BinaryRef> content;
};
struct FileHeader {
@@ -198,6 +200,7 @@ template <> struct MappingTraits<MachOYAML::ExportEntry> {
template <> struct MappingTraits<MachOYAML::Section> {
static void mapping(IO &IO, MachOYAML::Section &Section);
+ static StringRef validate(IO &io, MachOYAML::Section &Section);
};
template <> struct MappingTraits<MachOYAML::NListEntry> {
diff --git a/include/llvm/ObjectYAML/MinidumpYAML.h b/include/llvm/ObjectYAML/MinidumpYAML.h
index 39fdd62e017b..c1711a28dd84 100644
--- a/include/llvm/ObjectYAML/MinidumpYAML.h
+++ b/include/llvm/ObjectYAML/MinidumpYAML.h
@@ -26,6 +26,8 @@ namespace MinidumpYAML {
/// from Types to Kinds is fixed and given by the static getKind function.
struct Stream {
enum class StreamKind {
+ Exception,
+ MemoryInfoList,
MemoryList,
ModuleList,
RawContent,
@@ -102,6 +104,45 @@ using ModuleListStream = detail::ListStream<detail::ParsedModule>;
using ThreadListStream = detail::ListStream<detail::ParsedThread>;
using MemoryListStream = detail::ListStream<detail::ParsedMemoryDescriptor>;
+/// ExceptionStream minidump stream.
+struct ExceptionStream : public Stream {
+ minidump::ExceptionStream MDExceptionStream;
+ yaml::BinaryRef ThreadContext;
+
+ ExceptionStream()
+ : Stream(StreamKind::Exception, minidump::StreamType::Exception),
+ MDExceptionStream({}) {}
+
+ explicit ExceptionStream(const minidump::ExceptionStream &MDExceptionStream,
+ ArrayRef<uint8_t> ThreadContext)
+ : Stream(StreamKind::Exception, minidump::StreamType::Exception),
+ MDExceptionStream(MDExceptionStream), ThreadContext(ThreadContext) {}
+
+ static bool classof(const Stream *S) {
+ return S->Kind == StreamKind::Exception;
+ }
+};
+
+/// A structure containing the list of MemoryInfo entries comprising a
+/// MemoryInfoList stream.
+struct MemoryInfoListStream : public Stream {
+ std::vector<minidump::MemoryInfo> Infos;
+
+ MemoryInfoListStream()
+ : Stream(StreamKind::MemoryInfoList,
+ minidump::StreamType::MemoryInfoList) {}
+
+ explicit MemoryInfoListStream(
+ iterator_range<object::MinidumpFile::MemoryInfoIterator> Range)
+ : Stream(StreamKind::MemoryInfoList,
+ minidump::StreamType::MemoryInfoList),
+ Infos(Range.begin(), Range.end()) {}
+
+ static bool classof(const Stream *S) {
+ return S->Kind == StreamKind::MemoryInfoList;
+ }
+};
+
/// A minidump stream represented as a sequence of hex bytes. This is used as a
/// fallback when no other stream kind is suitable.
struct RawContentStream : public Stream {
@@ -122,16 +163,16 @@ struct SystemInfoStream : public Stream {
minidump::SystemInfo Info;
std::string CSDVersion;
- explicit SystemInfoStream(const minidump::SystemInfo &Info,
- std::string CSDVersion)
- : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo),
- Info(Info), CSDVersion(std::move(CSDVersion)) {}
-
SystemInfoStream()
: Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) {
memset(&Info, 0, sizeof(Info));
}
+ explicit SystemInfoStream(const minidump::SystemInfo &Info,
+ std::string CSDVersion)
+ : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo),
+ Info(Info), CSDVersion(std::move(CSDVersion)) {}
+
static bool classof(const Stream *S) {
return S->Kind == StreamKind::SystemInfo;
}
@@ -177,12 +218,6 @@ struct Object {
static Expected<Object> create(const object::MinidumpFile &File);
};
-/// Serialize the minidump file represented by Obj to OS in binary form.
-void writeAsBinary(Object &Obj, raw_ostream &OS);
-
-/// Serialize the yaml string as a minidump file to OS in binary form.
-Error writeAsBinary(StringRef Yaml, raw_ostream &OS);
-
} // namespace MinidumpYAML
namespace yaml {
@@ -213,6 +248,10 @@ template <> struct MappingContextTraits<minidump::MemoryDescriptor, BinaryRef> {
} // namespace llvm
+LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryProtection)
+LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryState)
+LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryType)
+
LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::ProcessorArchitecture)
LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::OSPlatform)
LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType)
@@ -220,6 +259,8 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::ArmInfo)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::OtherInfo)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::X86Info)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::Exception)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::MemoryInfo)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::VSFixedFileInfo)
LLVM_YAML_DECLARE_MAPPING_TRAITS(
@@ -233,6 +274,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::MinidumpYAML::Stream>)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::MemoryListStream::entry_type)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ModuleListStream::entry_type)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ThreadListStream::entry_type)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::minidump::MemoryInfo)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::MinidumpYAML::Object)
diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h
index 2411dc7ac17d..15a8cc215020 100644
--- a/include/llvm/ObjectYAML/WasmYAML.h
+++ b/include/llvm/ObjectYAML/WasmYAML.h
@@ -145,7 +145,7 @@ struct Signature {
uint32_t Index;
SignatureForm Form = wasm::WASM_TYPE_FUNC;
std::vector<ValueType> ParamTypes;
- ValueType ReturnType;
+ std::vector<ValueType> ReturnTypes;
};
struct SymbolInfo {
diff --git a/include/llvm/ObjectYAML/yaml2obj.h b/include/llvm/ObjectYAML/yaml2obj.h
new file mode 100644
index 000000000000..386551337d86
--- /dev/null
+++ b/include/llvm/ObjectYAML/yaml2obj.h
@@ -0,0 +1,67 @@
+//===--- yaml2obj.h - -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Common declarations for yaml2obj
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TOOLS_YAML2OBJ_YAML2OBJ_H
+#define LLVM_TOOLS_YAML2OBJ_YAML2OBJ_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include <memory>
+
+namespace llvm {
+class raw_ostream;
+template <typename T> class SmallVectorImpl;
+template <typename T> class Expected;
+
+namespace object {
+class ObjectFile;
+}
+
+namespace COFFYAML {
+struct Object;
+}
+
+namespace ELFYAML {
+struct Object;
+}
+
+namespace MinidumpYAML {
+struct Object;
+}
+
+namespace WasmYAML {
+struct Object;
+}
+
+namespace yaml {
+class Input;
+struct YamlObjectFile;
+
+using ErrorHandler = llvm::function_ref<void(const Twine &Msg)>;
+
+bool yaml2coff(COFFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH);
+bool yaml2elf(ELFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH);
+bool yaml2macho(YamlObjectFile &Doc, raw_ostream &Out, ErrorHandler EH);
+bool yaml2minidump(MinidumpYAML::Object &Doc, raw_ostream &Out,
+ ErrorHandler EH);
+bool yaml2wasm(WasmYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH);
+
+bool convertYAML(Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler,
+ unsigned DocNum = 1);
+
+/// Convenience function for tests.
+std::unique_ptr<object::ObjectFile>
+yaml2ObjectFile(SmallVectorImpl<char> &Storage, StringRef Yaml,
+ ErrorHandler ErrHandler);
+
+} // namespace yaml
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 329f7eaba73d..1d53ae32cf37 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -306,6 +306,9 @@ protected:
};
//===----------------------------------------------------------------------===//
+/// Deprecated - do not create new passes as BasicBlockPasses. Use FunctionPass
+/// with a loop over the BasicBlocks instead.
+//
/// BasicBlockPass class - This class is used to implement most local
/// optimizations. Optimizations should subclass this class if they
/// meet the following constraints:
@@ -338,6 +341,8 @@ public:
/// do any post processing needed after all passes have run.
virtual bool doFinalization(Function &);
+ void preparePassManager(PMStack &PMS) override;
+
void assignPassManager(PMStack &PMS, PassManagerType T) override;
/// Return what kind of Pass Manager can manage this pass.
diff --git a/include/llvm/Passes/PassBuilder.h b/include/llvm/Passes/PassBuilder.h
index 5e6660599f93..f73e4b42dd4b 100644
--- a/include/llvm/Passes/PassBuilder.h
+++ b/include/llvm/Passes/PassBuilder.h
@@ -629,6 +629,12 @@ public:
TopLevelPipelineParsingCallbacks.push_back(C);
}
+ /// Add PGOInstrumenation passes for O0 only.
+ void addPGOInstrPassesForO0(ModulePassManager &MPM, bool DebugLogging,
+ bool RunProfileGen, bool IsCS,
+ std::string ProfileFile,
+ std::string ProfileRemappingFile);
+
private:
static Optional<std::vector<PipelineElement>>
parsePipelineText(StringRef Text);
@@ -660,7 +666,6 @@ private:
OptimizationLevel Level, bool RunProfileGen, bool IsCS,
std::string ProfileFile,
std::string ProfileRemappingFile);
-
void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel);
// Extension Point callbacks
diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 11758ac4cf2f..0dd0c7ec8065 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -301,7 +301,12 @@ public:
struct FunctionRecord {
/// Raw function name.
std::string Name;
- /// Associated files.
+ /// Mapping from FileID (i.e. vector index) to filename. Used to support
+ /// macro expansions within a function in which the macro and function are
+ /// defined in separate files.
+ ///
+ /// TODO: Uniquing filenames across all function records may be a performance
+ /// optimization.
std::vector<std::string> Filenames;
/// Regions in the function along with their counts.
std::vector<CountedRegion> CountedRegions;
@@ -508,6 +513,7 @@ public:
class CoverageMapping {
DenseMap<size_t, DenseSet<size_t>> RecordProvenance;
std::vector<FunctionRecord> Functions;
+ DenseMap<size_t, SmallVector<unsigned, 0>> FilenameHash2RecordIndices;
std::vector<std::pair<std::string, uint64_t>> FuncHashMismatches;
CoverageMapping() = default;
@@ -516,6 +522,13 @@ class CoverageMapping {
Error loadFunctionRecord(const CoverageMappingRecord &Record,
IndexedInstrProfReader &ProfileReader);
+ /// Look up the indices for function records which are at least partially
+ /// defined in the specified file. This is guaranteed to return a superset of
+ /// such records: extra records not in the file may be included if there is
+ /// a hash collision on the filename. Clients must be robust to collisions.
+ ArrayRef<unsigned>
+ getImpreciseRecordIndicesForFilename(StringRef Filename) const;
+
public:
CoverageMapping(const CoverageMapping &) = delete;
CoverageMapping &operator=(const CoverageMapping &) = delete;
@@ -527,6 +540,7 @@ public:
/// Load the coverage mapping from the given object files and profile. If
/// \p Arches is non-empty, it must specify an architecture for each object.
+ /// Ignores non-instrumented object files unless all are not instrumented.
static Expected<std::unique_ptr<CoverageMapping>>
load(ArrayRef<StringRef> ObjectFilenames, StringRef ProfileFilename,
ArrayRef<StringRef> Arches = None);
diff --git a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
index 5f88cacdfcbb..6fcd8a09a494 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
@@ -30,8 +30,7 @@ class CoverageFilenamesSectionWriter {
ArrayRef<StringRef> Filenames;
public:
- CoverageFilenamesSectionWriter(ArrayRef<StringRef> Filenames)
- : Filenames(Filenames) {}
+ CoverageFilenamesSectionWriter(ArrayRef<StringRef> Filenames);
/// Write encoded filenames to the given output stream.
void write(raw_ostream &OS);
diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h
index c7d764ade30d..c26f76949992 100644
--- a/include/llvm/ProfileData/InstrProf.h
+++ b/include/llvm/ProfileData/InstrProf.h
@@ -93,10 +93,6 @@ inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; }
/// Return the name of value profile node array variables:
inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; }
-/// Return the name prefix of the COMDAT group for instrumentation variables
-/// associated with a COMDAT function.
-inline StringRef getInstrProfComdatPrefix() { return "__profv_"; }
-
/// Return the name of the variable holding the strings (possibly compressed)
/// of all function's PGO names.
inline StringRef getInstrProfNamesVarName() {
@@ -634,8 +630,8 @@ struct OverlapStats {
FuncHash = Hash;
}
- Error accumuateCounts(const std::string &BaseFilename,
- const std::string &TestFilename, bool IsCS);
+ Error accumulateCounts(const std::string &BaseFilename,
+ const std::string &TestFilename, bool IsCS);
void addOneMismatch(const CountSumOrPercent &MismatchFunc);
void addOneUnique(const CountSumOrPercent &UniqueFunc);
@@ -695,7 +691,7 @@ struct InstrProfRecord {
InstrProfRecord(const InstrProfRecord &RHS)
: Counts(RHS.Counts),
ValueData(RHS.ValueData
- ? llvm::make_unique<ValueProfData>(*RHS.ValueData)
+ ? std::make_unique<ValueProfData>(*RHS.ValueData)
: nullptr) {}
InstrProfRecord &operator=(InstrProfRecord &&) = default;
InstrProfRecord &operator=(const InstrProfRecord &RHS) {
@@ -705,7 +701,7 @@ struct InstrProfRecord {
return *this;
}
if (!ValueData)
- ValueData = llvm::make_unique<ValueProfData>(*RHS.ValueData);
+ ValueData = std::make_unique<ValueProfData>(*RHS.ValueData);
else
*ValueData = *RHS.ValueData;
return *this;
@@ -772,7 +768,7 @@ struct InstrProfRecord {
void clearValueData() { ValueData = nullptr; }
/// Compute the sums of all counts and store in Sum.
- void accumuateCounts(CountSumOrPercent &Sum) const;
+ void accumulateCounts(CountSumOrPercent &Sum) const;
/// Compute the overlap b/w this IntrprofRecord and Other.
void overlap(InstrProfRecord &Other, OverlapStats &Overlap,
@@ -817,7 +813,7 @@ private:
std::vector<InstrProfValueSiteRecord> &
getOrCreateValueSitesForKind(uint32_t ValueKind) {
if (!ValueData)
- ValueData = llvm::make_unique<ValueProfData>();
+ ValueData = std::make_unique<ValueProfData>();
switch (ValueKind) {
case IPVK_IndirectCallTarget:
return ValueData->IndirectCallSites;
@@ -897,7 +893,7 @@ InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site,
return std::unique_ptr<InstrProfValueData[]>(nullptr);
}
- auto VD = llvm::make_unique<InstrProfValueData[]>(N);
+ auto VD = std::make_unique<InstrProfValueData[]>(N);
TotalCount = getValueForSite(VD.get(), ValueKind, Site);
return VD;
diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h
index 73751faab88e..f5f552672bf0 100644
--- a/include/llvm/ProfileData/InstrProfReader.h
+++ b/include/llvm/ProfileData/InstrProfReader.h
@@ -92,7 +92,7 @@ public:
virtual InstrProfSymtab &getSymtab() = 0;
/// Compute the sum of counts and return in Sum.
- void accumuateCounts(CountSumOrPercent &Sum, bool IsCS);
+ void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
protected:
std::unique_ptr<InstrProfSymtab> Symtab;
@@ -268,8 +268,14 @@ private:
return (const char *)ValueDataStart;
}
- const uint64_t *getCounter(IntPtrT CounterPtr) const {
- ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
+ /// Get the offset of \p CounterPtr from the start of the counters section of
+ /// the profile. The offset has units of "number of counters", i.e. increasing
+ /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
+ ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
+ return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
+ }
+
+ const uint64_t *getCounter(ptrdiff_t Offset) const {
return CountersStart + Offset;
}
diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h
index 7fbc857b7230..55418d9d0f9c 100644
--- a/include/llvm/ProfileData/SampleProf.h
+++ b/include/llvm/ProfileData/SampleProf.h
@@ -18,15 +18,18 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
#include <map>
+#include <set>
#include <string>
#include <system_error>
#include <utility>
@@ -49,7 +52,10 @@ enum class sampleprof_error {
truncated_name_table,
not_implemented,
counter_overflow,
- ostream_seek_unsupported
+ ostream_seek_unsupported,
+ compress_failed,
+ uncompress_failed,
+ zlib_unavailable
};
inline std::error_code make_error_code(sampleprof_error E) {
@@ -83,6 +89,7 @@ enum SampleProfileFormat {
SPF_Text = 0x1,
SPF_Compact_Binary = 0x2,
SPF_GCC = 0x3,
+ SPF_Ext_Binary = 0x4,
SPF_Binary = 0xff
};
@@ -105,6 +112,61 @@ static inline StringRef getRepInFormat(StringRef Name,
static inline uint64_t SPVersion() { return 103; }
+// Section Type used by SampleProfileExtBinaryBaseReader and
+// SampleProfileExtBinaryBaseWriter. Never change the existing
+// value of enum. Only append new ones.
+enum SecType {
+ SecInValid = 0,
+ SecProfSummary = 1,
+ SecNameTable = 2,
+ SecProfileSymbolList = 3,
+ SecFuncOffsetTable = 4,
+ // marker for the first type of profile.
+ SecFuncProfileFirst = 32,
+ SecLBRProfile = SecFuncProfileFirst
+};
+
+static inline std::string getSecName(SecType Type) {
+ switch (Type) {
+ case SecInValid:
+ return "InvalidSection";
+ case SecProfSummary:
+ return "ProfileSummarySection";
+ case SecNameTable:
+ return "NameTableSection";
+ case SecProfileSymbolList:
+ return "ProfileSymbolListSection";
+ case SecFuncOffsetTable:
+ return "FuncOffsetTableSection";
+ case SecLBRProfile:
+ return "LBRProfileSection";
+ }
+ llvm_unreachable("A SecType has no name for output");
+}
+
+// Entry type of section header table used by SampleProfileExtBinaryBaseReader
+// and SampleProfileExtBinaryBaseWriter.
+struct SecHdrTableEntry {
+ SecType Type;
+ uint64_t Flags;
+ uint64_t Offset;
+ uint64_t Size;
+};
+
+enum SecFlags { SecFlagInValid = 0, SecFlagCompress = (1 << 0) };
+
+static inline void addSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) {
+ Entry.Flags |= Flags;
+}
+
+static inline void removeSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) {
+ Entry.Flags &= ~Flags;
+}
+
+static inline bool hasSecFlag(SecHdrTableEntry &Entry, SecFlags Flag) {
+ return Entry.Flags & Flag;
+}
+
/// Represents the relative location of an instruction.
///
/// Instruction locations are specified by the line offset from the
@@ -143,8 +205,18 @@ raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
/// will be a list of one or more functions.
class SampleRecord {
public:
- using CallTargetMap = StringMap<uint64_t>;
+ using CallTarget = std::pair<StringRef, uint64_t>;
+ struct CallTargetComparator {
+ bool operator()(const CallTarget &LHS, const CallTarget &RHS) const {
+ if (LHS.second != RHS.second)
+ return LHS.second > RHS.second;
+
+ return LHS.first < RHS.first;
+ }
+ };
+ using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>;
+ using CallTargetMap = StringMap<uint64_t>;
SampleRecord() = default;
/// Increment the number of samples for this record by \p S.
@@ -179,6 +251,18 @@ public:
uint64_t getSamples() const { return NumSamples; }
const CallTargetMap &getCallTargets() const { return CallTargets; }
+ const SortedCallTargetSet getSortedCallTargets() const {
+ return SortCallTargets(CallTargets);
+ }
+
+ /// Sort call targets in descending order of call frequency.
+ static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) {
+ SortedCallTargetSet SortedTargets;
+ for (const auto &I : Targets) {
+ SortedTargets.emplace(I.first(), I.second);
+ }
+ return SortedTargets;
+ }
/// Merge the samples in \p Other into this record.
/// Optionally scale sample counts by \p Weight.
@@ -205,7 +289,7 @@ class FunctionSamples;
using BodySampleMap = std::map<LineLocation, SampleRecord>;
// NOTE: Using a StringMap here makes parsed profiles consume around 17% more
// memory, which is *very* significant for large profiles.
-using FunctionSamplesMap = std::map<std::string, FunctionSamples>;
+using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>;
using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
/// Representation of the samples collected for a function.
@@ -447,11 +531,10 @@ public:
StringRef getNameInModule(StringRef Name, const Module *M) const {
if (Format != SPF_Compact_Binary)
return Name;
- // Expect CurrentModule to be initialized by GUIDToFuncNameMapper.
- if (M != CurrentModule)
- llvm_unreachable("Input Module should be the same as CurrentModule");
- auto iter = GUIDToFuncNameMap.find(std::stoull(Name.data()));
- if (iter == GUIDToFuncNameMap.end())
+
+ assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be popluated first");
+ auto iter = GUIDToFuncNameMap->find(std::stoull(Name.data()));
+ if (iter == GUIDToFuncNameMap->end())
return StringRef();
return iter->second;
}
@@ -472,42 +555,10 @@ public:
const FunctionSamples *findFunctionSamples(const DILocation *DIL) const;
static SampleProfileFormat Format;
- /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
- /// all the function symbols defined or declared in CurrentModule.
- static DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
- static Module *CurrentModule;
-
- class GUIDToFuncNameMapper {
- public:
- GUIDToFuncNameMapper(Module &M) {
- if (Format != SPF_Compact_Binary)
- return;
-
- for (const auto &F : M) {
- StringRef OrigName = F.getName();
- GUIDToFuncNameMap.insert({Function::getGUID(OrigName), OrigName});
- /// Local to global var promotion used by optimization like thinlto
- /// will rename the var and add suffix like ".llvm.xxx" to the
- /// original local name. In sample profile, the suffixes of function
- /// names are all stripped. Since it is possible that the mapper is
- /// built in post-thin-link phase and var promotion has been done,
- /// we need to add the substring of function name without the suffix
- /// into the GUIDToFuncNameMap.
- StringRef CanonName = getCanonicalFnName(F);
- if (CanonName != OrigName)
- GUIDToFuncNameMap.insert({Function::getGUID(CanonName), CanonName});
- }
- CurrentModule = &M;
- }
-
- ~GUIDToFuncNameMapper() {
- if (Format != SPF_Compact_Binary)
- return;
- GUIDToFuncNameMap.clear();
- CurrentModule = nullptr;
- }
- };
+ /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
+ /// all the function symbols defined or declared in current module.
+ DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr;
// Assume the input \p Name is a name coming from FunctionSamples itself.
// If the format is SPF_Compact_Binary, the name is already a GUID and we
@@ -583,6 +634,47 @@ private:
SamplesWithLocList V;
};
+/// ProfileSymbolList records the list of function symbols shown up
+/// in the binary used to generate the profile. It is useful to
+/// to discriminate a function being so cold as not to shown up
+/// in the profile and a function newly added.
+class ProfileSymbolList {
+public:
+ /// copy indicates whether we need to copy the underlying memory
+ /// for the input Name.
+ void add(StringRef Name, bool copy = false) {
+ if (!copy) {
+ Syms.insert(Name);
+ return;
+ }
+ Syms.insert(Name.copy(Allocator));
+ }
+
+ bool contains(StringRef Name) { return Syms.count(Name); }
+
+ void merge(const ProfileSymbolList &List) {
+ for (auto Sym : List.Syms)
+ add(Sym, true);
+ }
+
+ unsigned size() { return Syms.size(); }
+
+ void setToCompress(bool TC) { ToCompress = TC; }
+ bool toCompress() { return ToCompress; }
+
+ std::error_code read(const uint8_t *Data, uint64_t ListSize);
+ std::error_code write(raw_ostream &OS);
+ void dump(raw_ostream &OS = dbgs()) const;
+
+private:
+ // Determine whether or not to compress the symbol list when
+ // writing it into profile. The variable is unused when the symbol
+ // list is read from an existing profile.
+ bool ToCompress = false;
+ DenseSet<StringRef> Syms;
+ BumpPtrAllocator Allocator;
+};
+
} // end namespace sampleprof
} // end namespace llvm
diff --git a/include/llvm/ProfileData/SampleProfReader.h b/include/llvm/ProfileData/SampleProfReader.h
index 969cdea859c9..5a5d4cfde224 100644
--- a/include/llvm/ProfileData/SampleProfReader.h
+++ b/include/llvm/ProfileData/SampleProfReader.h
@@ -235,6 +235,62 @@ class raw_ostream;
namespace sampleprof {
+class SampleProfileReader;
+
+/// SampleProfileReaderItaniumRemapper remaps the profile data from a
+/// sample profile data reader, by applying a provided set of equivalences
+/// between components of the symbol names in the profile.
+class SampleProfileReaderItaniumRemapper {
+public:
+ SampleProfileReaderItaniumRemapper(std::unique_ptr<MemoryBuffer> B,
+ std::unique_ptr<SymbolRemappingReader> SRR,
+ SampleProfileReader &R)
+ : Buffer(std::move(B)), Remappings(std::move(SRR)), Reader(R) {
+ assert(Remappings && "Remappings cannot be nullptr");
+ }
+
+ /// Create a remapper from the given remapping file. The remapper will
+ /// be used for profile read in by Reader.
+ static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
+ create(const std::string Filename, SampleProfileReader &Reader,
+ LLVMContext &C);
+
+ /// Create a remapper from the given Buffer. The remapper will
+ /// be used for profile read in by Reader.
+ static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
+ create(std::unique_ptr<MemoryBuffer> &B, SampleProfileReader &Reader,
+ LLVMContext &C);
+
+ /// Apply remappings to the profile read by Reader.
+ void applyRemapping(LLVMContext &Ctx);
+
+ bool hasApplied() { return RemappingApplied; }
+
+ /// Insert function name into remapper.
+ void insert(StringRef FunctionName) { Remappings->insert(FunctionName); }
+
+ /// Query whether there is equivalent in the remapper which has been
+ /// inserted.
+ bool exist(StringRef FunctionName) {
+ return Remappings->lookup(FunctionName);
+ }
+
+ /// Return the samples collected for function \p F if remapper knows
+ /// it is present in SampleMap.
+ FunctionSamples *getSamplesFor(StringRef FunctionName);
+
+private:
+ // The buffer holding the content read from remapping file.
+ std::unique_ptr<MemoryBuffer> Buffer;
+ std::unique_ptr<SymbolRemappingReader> Remappings;
+ DenseMap<SymbolRemappingReader::Key, FunctionSamples *> SampleMap;
+ // The Reader the remapper is servicing.
+ SampleProfileReader &Reader;
+ // Indicate whether remapping has been applied to the profile read
+ // by Reader -- by calling applyRemapping.
+ bool RemappingApplied = false;
+};
+
/// Sample-based profile reader.
///
/// Each profile contains sample counts for all the functions
@@ -273,13 +329,22 @@ public:
/// Read and validate the file header.
virtual std::error_code readHeader() = 0;
- /// Read sample profiles from the associated file.
- virtual std::error_code read() = 0;
+ /// The interface to read sample profiles from the associated file.
+ std::error_code read() {
+ if (std::error_code EC = readImpl())
+ return EC;
+ if (Remapper)
+ Remapper->applyRemapping(Ctx);
+ return sampleprof_error::success;
+ }
+
+ /// The implementaion to read sample profiles from the associated file.
+ virtual std::error_code readImpl() = 0;
/// Print the profile for \p FName on stream \p OS.
void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs());
- virtual void collectFuncsToUse(const Module &M) {}
+ virtual void collectFuncsFrom(const Module &M) {}
/// Print all the profiles on stream \p OS.
void dump(raw_ostream &OS = dbgs());
@@ -295,6 +360,10 @@ public:
/// Return the samples collected for function \p F.
virtual FunctionSamples *getSamplesFor(StringRef Fname) {
+ if (Remapper) {
+ if (auto FS = Remapper->getSamplesFor(Fname))
+ return FS;
+ }
std::string FGUID;
Fname = getRepInFormat(Fname, getFormat(), FGUID);
auto It = Profiles.find(Fname);
@@ -313,18 +382,33 @@ public:
}
/// Create a sample profile reader appropriate to the file format.
+ /// Create a remapper underlying if RemapFilename is not empty.
static ErrorOr<std::unique_ptr<SampleProfileReader>>
- create(const Twine &Filename, LLVMContext &C);
+ create(const std::string Filename, LLVMContext &C,
+ const std::string RemapFilename = "");
/// Create a sample profile reader from the supplied memory buffer.
+ /// Create a remapper underlying if RemapFilename is not empty.
static ErrorOr<std::unique_ptr<SampleProfileReader>>
- create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C);
+ create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
+ const std::string RemapFilename = "");
/// Return the profile summary.
- ProfileSummary &getSummary() { return *(Summary.get()); }
+ ProfileSummary &getSummary() const { return *(Summary.get()); }
+
+ MemoryBuffer *getBuffer() const { return Buffer.get(); }
/// \brief Return the profile format.
- SampleProfileFormat getFormat() { return Format; }
+ SampleProfileFormat getFormat() const { return Format; }
+
+ virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() {
+ return nullptr;
+ };
+
+ /// It includes all the names that have samples either in outline instance
+ /// or inline instance.
+ virtual std::vector<StringRef> *getNameTable() { return nullptr; }
+ virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; };
protected:
/// Map every function to its associated profile.
@@ -352,6 +436,8 @@ protected:
/// Compute summary for this profile.
void computeSummary();
+ std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
+
/// \brief The format of sample.
SampleProfileFormat Format = SPF_None;
};
@@ -365,7 +451,7 @@ public:
std::error_code readHeader() override { return sampleprof_error::success; }
/// Read sample profiles from the associated file.
- std::error_code read() override;
+ std::error_code readImpl() override;
/// Return true if \p Buffer is in the format supported by this class.
static bool hasFormat(const MemoryBuffer &Buffer);
@@ -381,7 +467,11 @@ public:
virtual std::error_code readHeader() override;
/// Read sample profiles from the associated file.
- std::error_code read() override;
+ std::error_code readImpl() override;
+
+ /// It includes all the names that have samples either in outline instance
+ /// or inline instance.
+ virtual std::vector<StringRef> *getNameTable() override { return &NameTable; }
protected:
/// Read a numeric value of type T from the profile.
@@ -411,46 +501,134 @@ protected:
bool at_eof() const { return Data >= End; }
/// Read the next function profile instance.
- std::error_code readFuncProfile();
+ std::error_code readFuncProfile(const uint8_t *Start);
/// Read the contents of the given profile instance.
std::error_code readProfile(FunctionSamples &FProfile);
+ /// Read the contents of Magic number and Version number.
+ std::error_code readMagicIdent();
+
+ /// Read profile summary.
+ std::error_code readSummary();
+
+ /// Read the whole name table.
+ virtual std::error_code readNameTable();
+
/// Points to the current location in the buffer.
const uint8_t *Data = nullptr;
/// Points to the end of the buffer.
const uint8_t *End = nullptr;
+ /// Function name table.
+ std::vector<StringRef> NameTable;
+
+ /// Read a string indirectly via the name table.
+ virtual ErrorOr<StringRef> readStringFromTable();
+
private:
std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
virtual std::error_code verifySPMagic(uint64_t Magic) = 0;
+};
- /// Read profile summary.
- std::error_code readSummary();
+class SampleProfileReaderRawBinary : public SampleProfileReaderBinary {
+private:
+ virtual std::error_code verifySPMagic(uint64_t Magic) override;
- /// Read the whole name table.
- virtual std::error_code readNameTable() = 0;
+public:
+ SampleProfileReaderRawBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
+ SampleProfileFormat Format = SPF_Binary)
+ : SampleProfileReaderBinary(std::move(B), C, Format) {}
- /// Read a string indirectly via the name table.
- virtual ErrorOr<StringRef> readStringFromTable() = 0;
+ /// \brief Return true if \p Buffer is in the format supported by this class.
+ static bool hasFormat(const MemoryBuffer &Buffer);
};
-class SampleProfileReaderRawBinary : public SampleProfileReaderBinary {
+/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase defines
+/// the basic structure of the extensible binary format.
+/// The format is organized in sections except the magic and version number
+/// at the beginning. There is a section table before all the sections, and
+/// each entry in the table describes the entry type, start, size and
+/// attributes. The format in each section is defined by the section itself.
+///
+/// It is easy to add a new section while maintaining the backward
+/// compatibility of the profile. Nothing extra needs to be done. If we want
+/// to extend an existing section, like add cache misses information in
+/// addition to the sample count in the profile body, we can add a new section
+/// with the extension and retire the existing section, and we could choose
+/// to keep the parser of the old section if we want the reader to be able
+/// to read both new and old format profile.
+///
+/// SampleProfileReaderExtBinary/SampleProfileWriterExtBinary define the
+/// commonly used sections of a profile in extensible binary format. It is
+/// possible to define other types of profile inherited from
+/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase.
+class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
+private:
+ std::error_code decompressSection(const uint8_t *SecStart,
+ const uint64_t SecSize,
+ const uint8_t *&DecompressBuf,
+ uint64_t &DecompressBufSize);
+
+ BumpPtrAllocator Allocator;
+
+protected:
+ std::vector<SecHdrTableEntry> SecHdrTable;
+ std::unique_ptr<ProfileSymbolList> ProfSymList;
+ std::error_code readSecHdrTableEntry();
+ std::error_code readSecHdrTable();
+ virtual std::error_code readHeader() override;
+ virtual std::error_code verifySPMagic(uint64_t Magic) override = 0;
+ virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
+ SecType Type) = 0;
+
+public:
+ SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
+ LLVMContext &C, SampleProfileFormat Format)
+ : SampleProfileReaderBinary(std::move(B), C, Format) {}
+
+ /// Read sample profiles in extensible format from the associated file.
+ std::error_code readImpl() override;
+
+ /// Get the total size of all \p Type sections.
+ uint64_t getSectionSize(SecType Type);
+ /// Get the total size of header and all sections.
+ uint64_t getFileSize();
+ virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) override;
+};
+
+class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
private:
- /// Function name table.
- std::vector<StringRef> NameTable;
virtual std::error_code verifySPMagic(uint64_t Magic) override;
- virtual std::error_code readNameTable() override;
- /// Read a string indirectly via the name table.
- virtual ErrorOr<StringRef> readStringFromTable() override;
+ virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
+ SecType Type) override;
+ std::error_code readProfileSymbolList();
+ std::error_code readFuncOffsetTable();
+ std::error_code readFuncProfiles();
+
+ /// The table mapping from function name to the offset of its FunctionSample
+ /// towards file start.
+ DenseMap<StringRef, uint64_t> FuncOffsetTable;
+ /// The set containing the functions to use when compiling a module.
+ DenseSet<StringRef> FuncsToUse;
+ /// Use all functions from the input profile.
+ bool UseAllFuncs = true;
public:
- SampleProfileReaderRawBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
- : SampleProfileReaderBinary(std::move(B), C, SPF_Binary) {}
+ SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
+ SampleProfileFormat Format = SPF_Ext_Binary)
+ : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {}
/// \brief Return true if \p Buffer is in the format supported by this class.
static bool hasFormat(const MemoryBuffer &Buffer);
+
+ virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
+ return std::move(ProfSymList);
+ };
+
+ /// Collect functions with definitions in Module \p M.
+ void collectFuncsFrom(const Module &M) override;
};
class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
@@ -462,6 +640,8 @@ private:
DenseMap<StringRef, uint64_t> FuncOffsetTable;
/// The set containing the functions to use when compiling a module.
DenseSet<StringRef> FuncsToUse;
+ /// Use all functions from the input profile.
+ bool UseAllFuncs = true;
virtual std::error_code verifySPMagic(uint64_t Magic) override;
virtual std::error_code readNameTable() override;
/// Read a string indirectly via the name table.
@@ -478,10 +658,10 @@ public:
static bool hasFormat(const MemoryBuffer &Buffer);
/// Read samples only for functions to use.
- std::error_code read() override;
+ std::error_code readImpl() override;
/// Collect functions to be used when compiling Module \p M.
- void collectFuncsToUse(const Module &M) override;
+ void collectFuncsFrom(const Module &M) override;
};
using InlineCallStack = SmallVector<FunctionSamples *, 10>;
@@ -509,7 +689,7 @@ public:
std::error_code readHeader() override;
/// Read sample profiles from the associated file.
- std::error_code read() override;
+ std::error_code readImpl() override;
/// Return true if \p Buffer is in the format supported by this class.
static bool hasFormat(const MemoryBuffer &Buffer);
@@ -537,44 +717,6 @@ protected:
static const uint32_t GCOVTagAFDOFunction = 0xac000000;
};
-/// A profile data reader proxy that remaps the profile data from another
-/// sample profile data reader, by applying a provided set of equivalences
-/// between components of the symbol names in the profile.
-class SampleProfileReaderItaniumRemapper : public SampleProfileReader {
-public:
- SampleProfileReaderItaniumRemapper(
- std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
- std::unique_ptr<SampleProfileReader> Underlying)
- : SampleProfileReader(std::move(B), C, Underlying->getFormat()) {
- Profiles = std::move(Underlying->getProfiles());
- Summary = takeSummary(*Underlying);
- // Keep the underlying reader alive; the profile data may contain
- // StringRefs referencing names in its name table.
- UnderlyingReader = std::move(Underlying);
- }
-
- /// Create a remapped sample profile from the given remapping file and
- /// underlying samples.
- static ErrorOr<std::unique_ptr<SampleProfileReader>>
- create(const Twine &Filename, LLVMContext &C,
- std::unique_ptr<SampleProfileReader> Underlying);
-
- /// Read and validate the file header.
- std::error_code readHeader() override { return sampleprof_error::success; }
-
- /// Read remapping file and apply it to the sample profile.
- std::error_code read() override;
-
- /// Return the samples collected for function \p F.
- FunctionSamples *getSamplesFor(StringRef FunctionName) override;
- using SampleProfileReader::getSamplesFor;
-
-private:
- SymbolRemappingReader Remappings;
- DenseMap<SymbolRemappingReader::Key, FunctionSamples*> SampleMap;
- std::unique_ptr<SampleProfileReader> UnderlyingReader;
-};
-
} // end namespace sampleprof
} // end namespace llvm
diff --git a/include/llvm/ProfileData/SampleProfWriter.h b/include/llvm/ProfileData/SampleProfWriter.h
index 81e6e3ab0b4a..cc951594c9e2 100644
--- a/include/llvm/ProfileData/SampleProfWriter.h
+++ b/include/llvm/ProfileData/SampleProfWriter.h
@@ -36,7 +36,7 @@ public:
/// Write sample profiles in \p S.
///
/// \returns status code of the file update operation.
- virtual std::error_code write(const FunctionSamples &S) = 0;
+ virtual std::error_code writeSample(const FunctionSamples &S) = 0;
/// Write all the sample profiles in the given map of samples.
///
@@ -56,6 +56,8 @@ public:
static ErrorOr<std::unique_ptr<SampleProfileWriter>>
create(std::unique_ptr<raw_ostream> &OS, SampleProfileFormat Format);
+ virtual void setProfileSymbolList(ProfileSymbolList *PSL) {}
+
protected:
SampleProfileWriter(std::unique_ptr<raw_ostream> &OS)
: OutputStream(std::move(OS)) {}
@@ -64,6 +66,10 @@ protected:
virtual std::error_code
writeHeader(const StringMap<FunctionSamples> &ProfileMap) = 0;
+ // Write function profiles to the profile file.
+ virtual std::error_code
+ writeFuncProfiles(const StringMap<FunctionSamples> &ProfileMap);
+
/// Output stream where to emit the profile to.
std::unique_ptr<raw_ostream> OutputStream;
@@ -72,12 +78,15 @@ protected:
/// Compute summary for this profile.
void computeSummary(const StringMap<FunctionSamples> &ProfileMap);
+
+ /// Profile format.
+ SampleProfileFormat Format;
};
/// Sample-based profile writer (text format).
class SampleProfileWriterText : public SampleProfileWriter {
public:
- std::error_code write(const FunctionSamples &S) override;
+ std::error_code writeSample(const FunctionSamples &S) override;
protected:
SampleProfileWriterText(std::unique_ptr<raw_ostream> &OS)
@@ -102,13 +111,14 @@ private:
/// Sample-based profile writer (binary format).
class SampleProfileWriterBinary : public SampleProfileWriter {
public:
- virtual std::error_code write(const FunctionSamples &S) override;
SampleProfileWriterBinary(std::unique_ptr<raw_ostream> &OS)
: SampleProfileWriter(OS) {}
+ virtual std::error_code writeSample(const FunctionSamples &S) override;
+
protected:
- virtual std::error_code writeNameTable() = 0;
- virtual std::error_code writeMagicIdent() = 0;
+ virtual std::error_code writeMagicIdent(SampleProfileFormat Format);
+ virtual std::error_code writeNameTable();
virtual std::error_code
writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
std::error_code writeSummary();
@@ -118,10 +128,10 @@ protected:
MapVector<StringRef, uint32_t> NameTable;
-private:
void addName(StringRef FName);
void addNames(const FunctionSamples &S);
+private:
friend ErrorOr<std::unique_ptr<SampleProfileWriter>>
SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
SampleProfileFormat Format);
@@ -129,10 +139,99 @@ private:
class SampleProfileWriterRawBinary : public SampleProfileWriterBinary {
using SampleProfileWriterBinary::SampleProfileWriterBinary;
+};
+
+class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
+ using SampleProfileWriterBinary::SampleProfileWriterBinary;
+public:
+ virtual std::error_code
+ write(const StringMap<FunctionSamples> &ProfileMap) override;
+
+ void setToCompressAllSections();
+ void setToCompressSection(SecType Type);
protected:
- virtual std::error_code writeNameTable() override;
- virtual std::error_code writeMagicIdent() override;
+ uint64_t markSectionStart(SecType Type);
+ std::error_code addNewSection(SecType Sec, uint64_t SectionStart);
+ virtual void initSectionHdrLayout() = 0;
+ virtual std::error_code
+ writeSections(const StringMap<FunctionSamples> &ProfileMap) = 0;
+
+ // Specifiy the order of sections in section header table. Note
+ // the order of sections in the profile may be different that the
+ // order in SectionHdrLayout. sample Reader will follow the order
+ // in SectionHdrLayout to read each section.
+ SmallVector<SecHdrTableEntry, 8> SectionHdrLayout;
+
+private:
+ void allocSecHdrTable();
+ std::error_code writeSecHdrTable();
+ virtual std::error_code
+ writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+ void addSectionFlags(SecType Type, SecFlags Flags);
+ SecHdrTableEntry &getEntryInLayout(SecType Type);
+ std::error_code compressAndOutput();
+
+ // We will swap the raw_ostream held by LocalBufStream and that
+ // held by OutputStream if we try to add a section which needs
+ // compression. After the swap, all the data written to output
+ // will be temporarily buffered into the underlying raw_string_ostream
+ // originally held by LocalBufStream. After the data writing for the
+ // section is completed, compress the data in the local buffer,
+ // swap the raw_ostream back and write the compressed data to the
+ // real output.
+ std::unique_ptr<raw_ostream> LocalBufStream;
+ // The location where the output stream starts.
+ uint64_t FileStart;
+ // The location in the output stream where the SecHdrTable should be
+ // written to.
+ uint64_t SecHdrTableOffset;
+ // Initial Section Flags setting.
+ std::vector<SecHdrTableEntry> SecHdrTable;
+};
+
+class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase {
+public:
+ SampleProfileWriterExtBinary(std::unique_ptr<raw_ostream> &OS)
+ : SampleProfileWriterExtBinaryBase(OS) {
+ initSectionHdrLayout();
+ }
+
+ virtual std::error_code writeSample(const FunctionSamples &S) override;
+ virtual void setProfileSymbolList(ProfileSymbolList *PSL) override {
+ ProfSymList = PSL;
+ };
+
+private:
+ virtual void initSectionHdrLayout() override {
+ // Note that SecFuncOffsetTable section is written after SecLBRProfile
+ // in the profile, but is put before SecLBRProfile in SectionHdrLayout.
+ //
+ // This is because sample reader follows the order of SectionHdrLayout to
+ // read each section, to read function profiles on demand sample reader
+ // need to get the offset of each function profile first.
+ //
+ // SecFuncOffsetTable section is written after SecLBRProfile in the
+ // profile because FuncOffsetTable needs to be populated while section
+ // SecLBRProfile is written.
+ SectionHdrLayout = {{SecProfSummary, 0, 0, 0},
+ {SecNameTable, 0, 0, 0},
+ {SecFuncOffsetTable, 0, 0, 0},
+ {SecLBRProfile, 0, 0, 0},
+ {SecProfileSymbolList, 0, 0, 0}};
+ };
+ virtual std::error_code
+ writeSections(const StringMap<FunctionSamples> &ProfileMap) override;
+ ProfileSymbolList *ProfSymList = nullptr;
+
+ // Save the start of SecLBRProfile so we can compute the offset to the
+ // start of SecLBRProfile for each Function's Profile and will keep it
+ // in FuncOffsetTable.
+ uint64_t SecLBRProfileStart;
+ // FuncOffsetTable maps function name to its profile offset in SecLBRProfile
+ // section. It is used to load function profile on demand.
+ MapVector<StringRef, uint64_t> FuncOffsetTable;
+ std::error_code writeFuncOffsetTable();
};
// CompactBinary is a compact format of binary profile which both reduces
@@ -169,7 +268,7 @@ class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary {
using SampleProfileWriterBinary::SampleProfileWriterBinary;
public:
- virtual std::error_code write(const FunctionSamples &S) override;
+ virtual std::error_code writeSample(const FunctionSamples &S) override;
virtual std::error_code
write(const StringMap<FunctionSamples> &ProfileMap) override;
@@ -181,7 +280,6 @@ protected:
/// towards profile start.
uint64_t TableOffset;
virtual std::error_code writeNameTable() override;
- virtual std::error_code writeMagicIdent() override;
virtual std::error_code
writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
std::error_code writeFuncOffsetTable();
diff --git a/include/llvm/Remarks/BitstreamRemarkContainer.h b/include/llvm/Remarks/BitstreamRemarkContainer.h
new file mode 100644
index 000000000000..a2282fca04ab
--- /dev/null
+++ b/include/llvm/Remarks/BitstreamRemarkContainer.h
@@ -0,0 +1,106 @@
+//===-- BitstreamRemarkContainer.h - Container for remarks --------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides declarations for things used in the various types of
+// remark containers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_CONTAINER_H
+#define LLVM_REMARKS_REMARK_CONTAINER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include <cstdint>
+
+namespace llvm {
+namespace remarks {
+
+/// The current version of the remark container.
+/// Note: this is different from the version of the remark entry.
+constexpr uint64_t CurrentContainerVersion = 0;
+/// The magic number used for identifying remark blocks.
+constexpr StringLiteral ContainerMagic("RMRK");
+
+/// Type of the remark container.
+/// The remark container has two modes:
+/// * separate: the metadata is separate from the remarks and points to the
+/// auxiliary file that contains the remarks.
+/// * standalone: the metadata and the remarks are emitted together.
+enum class BitstreamRemarkContainerType {
+ /// The metadata emitted separately.
+ /// This will contain the following:
+ /// * Container version and type
+ /// * String table
+ /// * External file
+ SeparateRemarksMeta,
+ /// The remarks emitted separately.
+ /// This will contain the following:
+ /// * Container version and type
+ /// * Remark version
+ SeparateRemarksFile,
+ /// Everything is emitted together.
+ /// This will contain the following:
+ /// * Container version and type
+ /// * Remark version
+ /// * String table
+ Standalone,
+ First = SeparateRemarksMeta,
+ Last = Standalone,
+};
+
+/// The possible blocks that will be encountered in a bitstream remark
+/// container.
+enum BlockIDs {
+ /// The metadata block is mandatory. It should always come after the
+ /// BLOCKINFO_BLOCK, and contains metadata that should be used when parsing
+ /// REMARK_BLOCKs.
+ /// There should always be only one META_BLOCK.
+ META_BLOCK_ID = bitc::FIRST_APPLICATION_BLOCKID,
+ /// One remark entry is represented using a REMARK_BLOCK. There can be
+ /// multiple REMARK_BLOCKs in the same file.
+ REMARK_BLOCK_ID
+};
+
+constexpr StringRef MetaBlockName = StringRef("Meta", 4);
+constexpr StringRef RemarkBlockName = StringRef("Remark", 6);
+
+/// The possible records that can be encountered in the previously described
+/// blocks.
+enum RecordIDs {
+ // Meta block records.
+ RECORD_META_CONTAINER_INFO = 1,
+ RECORD_META_REMARK_VERSION,
+ RECORD_META_STRTAB,
+ RECORD_META_EXTERNAL_FILE,
+ // Remark block records.
+ RECORD_REMARK_HEADER,
+ RECORD_REMARK_DEBUG_LOC,
+ RECORD_REMARK_HOTNESS,
+ RECORD_REMARK_ARG_WITH_DEBUGLOC,
+ RECORD_REMARK_ARG_WITHOUT_DEBUGLOC,
+ // Helpers.
+ RECORD_FIRST = RECORD_META_CONTAINER_INFO,
+ RECORD_LAST = RECORD_REMARK_ARG_WITHOUT_DEBUGLOC
+};
+
+constexpr StringRef MetaContainerInfoName = StringRef("Container info", 14);
+constexpr StringRef MetaRemarkVersionName = StringRef("Remark version", 14);
+constexpr StringRef MetaStrTabName = StringRef("String table", 12);
+constexpr StringRef MetaExternalFileName = StringRef("External File", 13);
+constexpr StringRef RemarkHeaderName = StringRef("Remark header", 13);
+constexpr StringRef RemarkDebugLocName = StringRef("Remark debug location", 21);
+constexpr StringRef RemarkHotnessName = StringRef("Remark hotness", 14);
+constexpr StringRef RemarkArgWithDebugLocName =
+ StringRef("Argument with debug location", 28);
+constexpr StringRef RemarkArgWithoutDebugLocName = StringRef("Argument", 8);
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_CONTAINER_H */
diff --git a/include/llvm/Remarks/BitstreamRemarkParser.h b/include/llvm/Remarks/BitstreamRemarkParser.h
new file mode 100644
index 000000000000..7ebd731693b2
--- /dev/null
+++ b/include/llvm/Remarks/BitstreamRemarkParser.h
@@ -0,0 +1,116 @@
+//===-- BitstreamRemarkParser.h - Bitstream parser --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an implementation of the remark parser using the LLVM
+// Bitstream format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H
+#define LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitstream/BitstreamReader.h"
+#include "llvm/Remarks/BitstreamRemarkContainer.h"
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
+#include "llvm/Support/Error.h"
+#include <array>
+
+namespace llvm {
+namespace remarks {
+
+/// Helper to parse a META_BLOCK for a bitstream remark container.
+struct BitstreamMetaParserHelper {
+ /// The Bitstream reader.
+ BitstreamCursor &Stream;
+ /// Reference to the storage for the block info.
+ BitstreamBlockInfo &BlockInfo;
+ /// The parsed content: depending on the container type, some fields might be
+ /// empty.
+ Optional<uint64_t> ContainerVersion;
+ Optional<uint8_t> ContainerType;
+ Optional<StringRef> StrTabBuf;
+ Optional<StringRef> ExternalFilePath;
+ Optional<uint64_t> RemarkVersion;
+
+ /// Continue parsing with \p Stream. \p Stream is expected to contain a
+ /// ENTER_SUBBLOCK to the META_BLOCK at the current position.
+ /// \p Stream is expected to have a BLOCKINFO_BLOCK set.
+ BitstreamMetaParserHelper(BitstreamCursor &Stream,
+ BitstreamBlockInfo &BlockInfo);
+
+ /// Parse the META_BLOCK and fill the available entries.
+ /// This helper does not check for the validity of the fields.
+ Error parse();
+};
+
+/// Helper to parse a REMARK_BLOCK for a bitstream remark container.
+struct BitstreamRemarkParserHelper {
+ /// The Bitstream reader.
+ BitstreamCursor &Stream;
+ /// The parsed content: depending on the remark, some fields might be empty.
+ Optional<uint8_t> Type;
+ Optional<uint64_t> RemarkNameIdx;
+ Optional<uint64_t> PassNameIdx;
+ Optional<uint64_t> FunctionNameIdx;
+ Optional<uint64_t> SourceFileNameIdx;
+ Optional<uint32_t> SourceLine;
+ Optional<uint32_t> SourceColumn;
+ Optional<uint64_t> Hotness;
+ struct Argument {
+ Optional<uint64_t> KeyIdx;
+ Optional<uint64_t> ValueIdx;
+ Optional<uint64_t> SourceFileNameIdx;
+ Optional<uint32_t> SourceLine;
+ Optional<uint32_t> SourceColumn;
+ };
+ Optional<ArrayRef<Argument>> Args;
+ /// Avoid re-allocating a vector every time.
+ SmallVector<Argument, 8> TmpArgs;
+
+ /// Continue parsing with \p Stream. \p Stream is expected to contain a
+ /// ENTER_SUBBLOCK to the REMARK_BLOCK at the current position.
+ /// \p Stream is expected to have a BLOCKINFO_BLOCK set and to have already
+ /// parsed the META_BLOCK.
+ BitstreamRemarkParserHelper(BitstreamCursor &Stream);
+
+ /// Parse the REMARK_BLOCK and fill the available entries.
+ /// This helper does not check for the validity of the fields.
+ Error parse();
+};
+
+/// Helper to parse any bitstream remark container.
+struct BitstreamParserHelper {
+ /// The Bitstream reader.
+ BitstreamCursor Stream;
+ /// The block info block.
+ BitstreamBlockInfo BlockInfo;
+ /// Start parsing at \p Buffer.
+ BitstreamParserHelper(StringRef Buffer);
+ /// Parse the magic number.
+ Expected<std::array<char, 4>> parseMagic();
+ /// Parse the block info block containing all the abbrevs.
+ /// This needs to be called before calling any other parsing function.
+ Error parseBlockInfoBlock();
+ /// Return true if the next block is a META_BLOCK. This function does not move
+ /// the cursor.
+ Expected<bool> isMetaBlock();
+ /// Return true if the next block is a REMARK_BLOCK. This function does not
+ /// move the cursor.
+ Expected<bool> isRemarkBlock();
+ /// Return true if the parser reached the end of the stream.
+ bool atEndOfStream() { return Stream.AtEndOfStream(); }
+ /// Jump to the end of the stream, skipping everything.
+ void skipToEnd() { return Stream.skipToEnd(); }
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H */
diff --git a/include/llvm/Remarks/BitstreamRemarkSerializer.h b/include/llvm/Remarks/BitstreamRemarkSerializer.h
new file mode 100644
index 000000000000..62a175a1db0b
--- /dev/null
+++ b/include/llvm/Remarks/BitstreamRemarkSerializer.h
@@ -0,0 +1,196 @@
+//===-- BitstreamRemarkSerializer.h - Bitstream serializer ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an implementation of the serializer using the LLVM
+// Bitstream format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_BITSTREAM_REMARK_SERIALIZER_H
+#define LLVM_REMARKS_BITSTREAM_REMARK_SERIALIZER_H
+
+#include "llvm/Bitstream/BitstreamWriter.h"
+#include "llvm/Remarks/BitstreamRemarkContainer.h"
+#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace remarks {
+
+/// Serialize the remarks to LLVM bitstream.
+/// This class provides ways to emit remarks in the LLVM bitstream format and
+/// its associated metadata.
+///
+/// * The separate model:
+/// Separate meta: | Container info
+/// | String table
+/// | External file
+///
+/// Separate remarks: | Container info
+/// | Remark version
+/// | Remark0
+/// | Remark1
+/// | Remark2
+/// | ...
+///
+/// * The standalone model: | Container info
+/// | String table
+/// | Remark version
+/// | Remark0
+/// | Remark1
+/// | Remark2
+/// | ...
+///
+struct BitstreamRemarkSerializerHelper {
+ /// Buffer used for encoding the bitstream before writing it to the final
+ /// stream.
+ SmallVector<char, 1024> Encoded;
+ /// Buffer used to construct records and pass to the bitstream writer.
+ SmallVector<uint64_t, 64> R;
+ /// The Bitstream writer.
+ BitstreamWriter Bitstream;
+ /// The type of the container we are serializing.
+ BitstreamRemarkContainerType ContainerType;
+
+ /// Abbrev IDs initialized in the block info block.
+ /// Note: depending on the container type, some IDs might be uninitialized.
+ /// Warning: When adding more abbrev IDs, make sure to update the
+ /// BlockCodeSize (in the call to EnterSubblock).
+ uint64_t RecordMetaContainerInfoAbbrevID = 0;
+ uint64_t RecordMetaRemarkVersionAbbrevID = 0;
+ uint64_t RecordMetaStrTabAbbrevID = 0;
+ uint64_t RecordMetaExternalFileAbbrevID = 0;
+ uint64_t RecordRemarkHeaderAbbrevID = 0;
+ uint64_t RecordRemarkDebugLocAbbrevID = 0;
+ uint64_t RecordRemarkHotnessAbbrevID = 0;
+ uint64_t RecordRemarkArgWithDebugLocAbbrevID = 0;
+ uint64_t RecordRemarkArgWithoutDebugLocAbbrevID = 0;
+
+ BitstreamRemarkSerializerHelper(BitstreamRemarkContainerType ContainerType);
+
+ // Disable copy and move: Bitstream points to Encoded, which needs special
+ // handling during copy/move, but moving the vectors is probably useless
+ // anyway.
+ BitstreamRemarkSerializerHelper(const BitstreamRemarkSerializerHelper &) =
+ delete;
+ BitstreamRemarkSerializerHelper &
+ operator=(const BitstreamRemarkSerializerHelper &) = delete;
+ BitstreamRemarkSerializerHelper(BitstreamRemarkSerializerHelper &&) = delete;
+ BitstreamRemarkSerializerHelper &
+ operator=(BitstreamRemarkSerializerHelper &&) = delete;
+
+ /// Set up the necessary block info entries according to the container type.
+ void setupBlockInfo();
+
+ /// Set up the block info for the metadata block.
+ void setupMetaBlockInfo();
+ /// The remark version in the metadata block.
+ void setupMetaRemarkVersion();
+ void emitMetaRemarkVersion(uint64_t RemarkVersion);
+ /// The strtab in the metadata block.
+ void setupMetaStrTab();
+ void emitMetaStrTab(const StringTable &StrTab);
+ /// The external file in the metadata block.
+ void setupMetaExternalFile();
+ void emitMetaExternalFile(StringRef Filename);
+
+ /// The block info for the remarks block.
+ void setupRemarkBlockInfo();
+
+ /// Emit the metadata for the remarks.
+ void emitMetaBlock(uint64_t ContainerVersion,
+ Optional<uint64_t> RemarkVersion,
+ Optional<const StringTable *> StrTab = None,
+ Optional<StringRef> Filename = None);
+
+ /// Emit a remark block. The string table is required.
+ void emitRemarkBlock(const Remark &Remark, StringTable &StrTab);
+ /// Finalize the writing to \p OS.
+ void flushToStream(raw_ostream &OS);
+ /// Finalize the writing to a buffer.
+ /// The contents of the buffer remain valid for the lifetime of the object.
+ /// Any call to any other function in this class will invalidate the buffer.
+ StringRef getBuffer();
+};
+
+/// Implementation of the remark serializer using LLVM bitstream.
+struct BitstreamRemarkSerializer : public RemarkSerializer {
+ /// The file should contain:
+ /// 1) The block info block that describes how to read the blocks.
+ /// 2) The metadata block that contains various information about the remarks
+ /// in the file.
+ /// 3) A number of remark blocks.
+
+ /// We need to set up 1) and 2) first, so that we can emit 3) after. This flag
+ /// is used to emit the first two blocks only once.
+ bool DidSetUp = false;
+ /// The helper to emit bitstream.
+ BitstreamRemarkSerializerHelper Helper;
+
+ /// Construct a serializer that will create its own string table.
+ BitstreamRemarkSerializer(raw_ostream &OS, SerializerMode Mode);
+ /// Construct a serializer with a pre-filled string table.
+ BitstreamRemarkSerializer(raw_ostream &OS, SerializerMode Mode,
+ StringTable StrTab);
+
+ /// Emit a remark to the stream. This also emits the metadata associated to
+ /// the remarks based on the SerializerMode specified at construction.
+ /// This writes the serialized output to the provided stream.
+ void emit(const Remark &Remark) override;
+ /// The metadata serializer associated to this remark serializer. Based on the
+ /// container type of the current serializer, the container type of the
+ /// metadata serializer will change.
+ std::unique_ptr<MetaSerializer>
+ metaSerializer(raw_ostream &OS,
+ Optional<StringRef> ExternalFilename = None) override;
+
+ static bool classof(const RemarkSerializer *S) {
+ return S->SerializerFormat == Format::Bitstream;
+ }
+};
+
+/// Serializer of metadata for bitstream remarks.
+struct BitstreamMetaSerializer : public MetaSerializer {
+ /// This class can be used with [1] a pre-constructed
+ /// BitstreamRemarkSerializerHelper, or with [2] one that is owned by the meta
+ /// serializer. In case of [1], we need to be able to store a reference to the
+ /// object, while in case of [2] we need to store the whole object.
+ Optional<BitstreamRemarkSerializerHelper> TmpHelper;
+ /// The actual helper, that can point to \p TmpHelper or to an external helper
+ /// object.
+ BitstreamRemarkSerializerHelper *Helper = nullptr;
+
+ Optional<const StringTable *> StrTab;
+ Optional<StringRef> ExternalFilename;
+
+ /// Create a new meta serializer based on \p ContainerType.
+ BitstreamMetaSerializer(raw_ostream &OS,
+ BitstreamRemarkContainerType ContainerType,
+ Optional<const StringTable *> StrTab = None,
+ Optional<StringRef> ExternalFilename = None)
+ : MetaSerializer(OS), TmpHelper(None), Helper(nullptr), StrTab(StrTab),
+ ExternalFilename(ExternalFilename) {
+ TmpHelper.emplace(ContainerType);
+ Helper = &*TmpHelper;
+ }
+
+ /// Create a new meta serializer based on a previously built \p Helper.
+ BitstreamMetaSerializer(raw_ostream &OS,
+ BitstreamRemarkSerializerHelper &Helper,
+ Optional<const StringTable *> StrTab = None,
+ Optional<StringRef> ExternalFilename = None)
+ : MetaSerializer(OS), TmpHelper(None), Helper(&Helper), StrTab(StrTab),
+ ExternalFilename(ExternalFilename) {}
+
+ void emit() override;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_BITSTREAM_REMARK_SERIALIZER_H */
diff --git a/include/llvm/Remarks/Remark.h b/include/llvm/Remarks/Remark.h
index 05d0ea60accd..1243311fb8c5 100644
--- a/include/llvm/Remarks/Remark.h
+++ b/include/llvm/Remarks/Remark.h
@@ -23,7 +23,8 @@
namespace llvm {
namespace remarks {
-constexpr uint64_t Version = 0;
+/// The current version of the remark entry.
+constexpr uint64_t CurrentRemarkVersion = 0;
/// The debug location used to track a remark back to the source file.
struct RemarkLocation {
@@ -58,7 +59,8 @@ enum class Type {
AnalysisFPCommute,
AnalysisAliasing,
Failure,
- LastTypeValue = Failure
+ First = Unknown,
+ Last = Failure
};
/// A remark type used for both emission and parsing.
@@ -107,6 +109,36 @@ private:
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Remark, LLVMRemarkEntryRef)
+/// Comparison operators for Remark objects and dependent objects.
+inline bool operator==(const RemarkLocation &LHS, const RemarkLocation &RHS) {
+ return LHS.SourceFilePath == RHS.SourceFilePath &&
+ LHS.SourceLine == RHS.SourceLine &&
+ LHS.SourceColumn == RHS.SourceColumn;
+}
+
+inline bool operator!=(const RemarkLocation &LHS, const RemarkLocation &RHS) {
+ return !(LHS == RHS);
+}
+
+inline bool operator==(const Argument &LHS, const Argument &RHS) {
+ return LHS.Key == RHS.Key && LHS.Val == RHS.Val && LHS.Loc == RHS.Loc;
+}
+
+inline bool operator!=(const Argument &LHS, const Argument &RHS) {
+ return !(LHS == RHS);
+}
+
+inline bool operator==(const Remark &LHS, const Remark &RHS) {
+ return LHS.RemarkType == RHS.RemarkType && LHS.PassName == RHS.PassName &&
+ LHS.RemarkName == RHS.RemarkName &&
+ LHS.FunctionName == RHS.FunctionName && LHS.Loc == RHS.Loc &&
+ LHS.Hotness == RHS.Hotness && LHS.Args == RHS.Args;
+}
+
+inline bool operator!=(const Remark &LHS, const Remark &RHS) {
+ return !(LHS == RHS);
+}
+
} // end namespace remarks
} // end namespace llvm
diff --git a/include/llvm/Remarks/RemarkFormat.h b/include/llvm/Remarks/RemarkFormat.h
index e167d99d2517..6dd32b226099 100644
--- a/include/llvm/Remarks/RemarkFormat.h
+++ b/include/llvm/Remarks/RemarkFormat.h
@@ -19,10 +19,10 @@
namespace llvm {
namespace remarks {
-constexpr StringRef Magic("REMARKS", 7);
+constexpr StringLiteral Magic("REMARKS");
/// The format used for serializing/deserializing remarks.
-enum class Format { Unknown, YAML };
+enum class Format { Unknown, YAML, YAMLStrTab, Bitstream };
/// Parse and validate a string for the remark format.
Expected<Format> parseFormat(StringRef FormatStr);
diff --git a/include/llvm/Remarks/RemarkParser.h b/include/llvm/Remarks/RemarkParser.h
index 671e1abe5ec7..d6b1fddb06ff 100644
--- a/include/llvm/Remarks/RemarkParser.h
+++ b/include/llvm/Remarks/RemarkParser.h
@@ -23,9 +23,6 @@
namespace llvm {
namespace remarks {
-struct ParserImpl;
-struct ParsedStringTable;
-
class EndOfFileError : public ErrorInfo<EndOfFileError> {
public:
static char ID;
@@ -39,11 +36,13 @@ public:
};
/// Parser used to parse a raw buffer to remarks::Remark objects.
-struct Parser {
+struct RemarkParser {
/// The format of the parser.
Format ParserFormat;
+ /// Path to prepend when opening an external remark file.
+ std::string ExternalFilePrependPath;
- Parser(Format ParserFormat) : ParserFormat(ParserFormat) {}
+ RemarkParser(Format ParserFormat) : ParserFormat(ParserFormat) {}
/// If no error occurs, this returns a valid Remark object.
/// If an error of type EndOfFileError occurs, it is safe to recover from it
@@ -52,7 +51,7 @@ struct Parser {
/// The pointer should never be null.
virtual Expected<std::unique_ptr<Remark>> next() = 0;
- virtual ~Parser() = default;
+ virtual ~RemarkParser() = default;
};
/// In-memory representation of the string table parsed from a buffer (e.g. the
@@ -60,16 +59,33 @@ struct Parser {
struct ParsedStringTable {
/// The buffer mapped from the section contents.
StringRef Buffer;
- /// Collection of offsets in the buffer for each string entry.
- SmallVector<size_t, 8> Offsets;
+ /// This object has high changes to be std::move'd around, so don't use a
+ /// SmallVector for once.
+ std::vector<size_t> Offsets;
- Expected<StringRef> operator[](size_t Index) const;
ParsedStringTable(StringRef Buffer);
+ /// Disable copy.
+ ParsedStringTable(const ParsedStringTable &) = delete;
+ ParsedStringTable &operator=(const ParsedStringTable &) = delete;
+ /// Should be movable.
+ ParsedStringTable(ParsedStringTable &&) = default;
+ ParsedStringTable &operator=(ParsedStringTable &&) = default;
+
+ size_t size() const { return Offsets.size(); }
+ Expected<StringRef> operator[](size_t Index) const;
};
-Expected<std::unique_ptr<Parser>>
+Expected<std::unique_ptr<RemarkParser>> createRemarkParser(Format ParserFormat,
+ StringRef Buf);
+
+Expected<std::unique_ptr<RemarkParser>>
createRemarkParser(Format ParserFormat, StringRef Buf,
- Optional<const ParsedStringTable *> StrTab = None);
+ ParsedStringTable StrTab);
+
+Expected<std::unique_ptr<RemarkParser>>
+createRemarkParserFromMeta(Format ParserFormat, StringRef Buf,
+ Optional<ParsedStringTable> StrTab = None,
+ Optional<StringRef> ExternalFilePrependPath = None);
} // end namespace remarks
} // end namespace llvm
diff --git a/include/llvm/Remarks/RemarkSerializer.h b/include/llvm/Remarks/RemarkSerializer.h
index def5c2e16620..35752cd5f6fb 100644
--- a/include/llvm/Remarks/RemarkSerializer.h
+++ b/include/llvm/Remarks/RemarkSerializer.h
@@ -14,54 +14,74 @@
#define LLVM_REMARKS_REMARK_SERIALIZER_H
#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Remarks/RemarkStringTable.h"
-#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace remarks {
+enum class SerializerMode {
+ Separate, // A mode where the metadata is serialized separately from the
+ // remarks. Typically, this is used when the remarks need to be
+ // streamed to a side file and the metadata is embedded into the
+ // final result of the compilation.
+ Standalone // A mode where everything can be retrieved in the same
+ // file/buffer. Typically, this is used for storing remarks for
+ // later use.
+};
+
+struct MetaSerializer;
+
/// This is the base class for a remark serializer.
/// It includes support for using a string table while emitting.
-struct Serializer {
+struct RemarkSerializer {
+ /// The format of the serializer.
+ Format SerializerFormat;
/// The open raw_ostream that the remark diagnostics are emitted to.
raw_ostream &OS;
+ /// The serialization mode.
+ SerializerMode Mode;
/// The string table containing all the unique strings used in the output.
/// The table can be serialized to be consumed after the compilation.
Optional<StringTable> StrTab;
- Serializer(raw_ostream &OS) : OS(OS), StrTab() {}
+ RemarkSerializer(Format SerializerFormat, raw_ostream &OS,
+ SerializerMode Mode)
+ : SerializerFormat(SerializerFormat), OS(OS), Mode(Mode), StrTab() {}
/// This is just an interface.
- virtual ~Serializer() = default;
+ virtual ~RemarkSerializer() = default;
+ /// Emit a remark to the stream.
virtual void emit(const Remark &Remark) = 0;
+ /// Return the corresponding metadata serializer.
+ virtual std::unique_ptr<MetaSerializer>
+ metaSerializer(raw_ostream &OS,
+ Optional<StringRef> ExternalFilename = None) = 0;
};
-/// Wether the serializer should use a string table while emitting.
-enum class UseStringTable { No, Yes };
-
-/// Serialize the remarks to YAML. One remark entry looks like this:
-/// --- !<TYPE>
-/// Pass: <PASSNAME>
-/// Name: <REMARKNAME>
-/// DebugLoc: { File: <SOURCEFILENAME>, Line: <SOURCELINE>,
-/// Column: <SOURCECOLUMN> }
-/// Function: <FUNCTIONNAME>
-/// Args:
-/// - <KEY>: <VALUE>
-/// DebugLoc: { File: <FILE>, Line: <LINE>, Column: <COL> }
-/// ...
-struct YAMLSerializer : public Serializer {
- /// The YAML streamer.
- yaml::Output YAMLOutput;
+/// This is the base class for a remark metadata serializer.
+struct MetaSerializer {
+ /// The open raw_ostream that the metadata is emitted to.
+ raw_ostream &OS;
- YAMLSerializer(raw_ostream &OS,
- UseStringTable UseStringTable = remarks::UseStringTable::No);
+ MetaSerializer(raw_ostream &OS) : OS(OS) {}
- /// Emit a remark to the stream.
- void emit(const Remark &Remark) override;
+ /// This is just an interface.
+ virtual ~MetaSerializer() = default;
+ virtual void emit() = 0;
};
+/// Create a remark serializer.
+Expected<std::unique_ptr<RemarkSerializer>>
+createRemarkSerializer(Format RemarksFormat, SerializerMode Mode,
+ raw_ostream &OS);
+
+/// Create a remark serializer that uses a pre-filled string table.
+Expected<std::unique_ptr<RemarkSerializer>>
+createRemarkSerializer(Format RemarksFormat, SerializerMode Mode,
+ raw_ostream &OS, remarks::StringTable StrTab);
+
} // end namespace remarks
} // end namespace llvm
diff --git a/include/llvm/Remarks/RemarkStringTable.h b/include/llvm/Remarks/RemarkStringTable.h
index f9b4fdbbfb8d..4ce27ee884c8 100644
--- a/include/llvm/Remarks/RemarkStringTable.h
+++ b/include/llvm/Remarks/RemarkStringTable.h
@@ -18,7 +18,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Allocator.h"
+#include "llvm/Remarks/Remark.h"
#include <vector>
namespace llvm {
@@ -27,21 +27,35 @@ class raw_ostream;
namespace remarks {
+struct ParsedStringTable;
+
/// The string table used for serializing remarks.
/// This table can be for example serialized in a section to be consumed after
/// the compilation.
struct StringTable {
- /// Allocator holding all the memory used by the map.
- BumpPtrAllocator Allocator;
/// The string table containing all the unique strings used in the output.
/// It maps a string to an unique ID.
- StringMap<unsigned, BumpPtrAllocator &> StrTab;
+ StringMap<unsigned, BumpPtrAllocator> StrTab;
/// Total size of the string table when serialized.
size_t SerializedSize = 0;
- StringTable() : Allocator(), StrTab(Allocator) {}
+ StringTable() = default;
+
+ /// Disable copy.
+ StringTable(const StringTable &) = delete;
+ StringTable &operator=(const StringTable &) = delete;
+ /// Should be movable.
+ StringTable(StringTable &&) = default;
+ StringTable &operator=(StringTable &&) = default;
+
+ /// Construct a string table from a ParsedStringTable.
+ StringTable(const ParsedStringTable &Other);
+
/// Add a string to the table. It returns an unique ID of the string.
std::pair<unsigned, StringRef> add(StringRef Str);
+ /// Modify \p R to use strings from this string table. If the string table
+ /// does not contain the strings, it adds them.
+ void internalize(Remark &R);
/// Serialize the string table to a stream. It is serialized as a little
/// endian uint64 (the size of the table in bytes) followed by a sequence of
/// NULL-terminated strings, where the N-th string is the string with the ID N
diff --git a/include/llvm/Remarks/YAMLRemarkSerializer.h b/include/llvm/Remarks/YAMLRemarkSerializer.h
new file mode 100644
index 000000000000..f1213beab15d
--- /dev/null
+++ b/include/llvm/Remarks/YAMLRemarkSerializer.h
@@ -0,0 +1,108 @@
+//===-- YAMLRemarkSerializer.h - YAML Remark serialization ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an interface for serializing remarks to YAML.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_YAML_REMARK_SERIALIZER_H
+#define LLVM_REMARKS_YAML_REMARK_SERIALIZER_H
+
+#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace llvm {
+namespace remarks {
+
+/// Serialize the remarks to YAML. One remark entry looks like this:
+/// --- !<TYPE>
+/// Pass: <PASSNAME>
+/// Name: <REMARKNAME>
+/// DebugLoc: { File: <SOURCEFILENAME>, Line: <SOURCELINE>,
+/// Column: <SOURCECOLUMN> }
+/// Function: <FUNCTIONNAME>
+/// Args:
+/// - <KEY>: <VALUE>
+/// DebugLoc: { File: <FILE>, Line: <LINE>, Column: <COL> }
+/// ...
+struct YAMLRemarkSerializer : public RemarkSerializer {
+ /// The YAML streamer.
+ yaml::Output YAMLOutput;
+
+ YAMLRemarkSerializer(raw_ostream &OS, SerializerMode Mode,
+ Optional<StringTable> StrTab = None);
+
+ void emit(const Remark &Remark) override;
+ std::unique_ptr<MetaSerializer>
+ metaSerializer(raw_ostream &OS,
+ Optional<StringRef> ExternalFilename = None) override;
+
+ static bool classof(const RemarkSerializer *S) {
+ return S->SerializerFormat == Format::YAML;
+ }
+
+protected:
+ YAMLRemarkSerializer(Format SerializerFormat, raw_ostream &OS,
+ SerializerMode Mode,
+ Optional<StringTable> StrTab = None);
+};
+
+struct YAMLMetaSerializer : public MetaSerializer {
+ Optional<StringRef> ExternalFilename;
+
+ YAMLMetaSerializer(raw_ostream &OS, Optional<StringRef> ExternalFilename)
+ : MetaSerializer(OS), ExternalFilename(ExternalFilename) {}
+
+ void emit() override;
+};
+
+/// Serialize the remarks to YAML using a string table. An remark entry looks
+/// like the regular YAML remark but instead of string entries it's using
+/// numbers that map to an index in the string table.
+struct YAMLStrTabRemarkSerializer : public YAMLRemarkSerializer {
+ /// Wether we already emitted the metadata in standalone mode.
+ /// This should be set to true after the first invocation of `emit`.
+ bool DidEmitMeta = false;
+
+ YAMLStrTabRemarkSerializer(raw_ostream &OS, SerializerMode Mode)
+ : YAMLRemarkSerializer(Format::YAMLStrTab, OS, Mode) {
+ // We always need a string table for this type of serializer.
+ StrTab.emplace();
+ }
+ YAMLStrTabRemarkSerializer(raw_ostream &OS, SerializerMode Mode,
+ StringTable StrTab)
+ : YAMLRemarkSerializer(Format::YAMLStrTab, OS, Mode, std::move(StrTab)) {}
+
+ /// Override to emit the metadata if necessary.
+ void emit(const Remark &Remark) override;
+
+ std::unique_ptr<MetaSerializer>
+ metaSerializer(raw_ostream &OS,
+ Optional<StringRef> ExternalFilename = None) override;
+
+ static bool classof(const RemarkSerializer *S) {
+ return S->SerializerFormat == Format::YAMLStrTab;
+ }
+};
+
+struct YAMLStrTabMetaSerializer : public YAMLMetaSerializer {
+ /// The string table is part of the metadata.
+ const StringTable &StrTab;
+
+ YAMLStrTabMetaSerializer(raw_ostream &OS,
+ Optional<StringRef> ExternalFilename,
+ const StringTable &StrTab)
+ : YAMLMetaSerializer(OS, ExternalFilename), StrTab(StrTab) {}
+
+ void emit() override;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_SERIALIZER_H */
diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def
index e152f383b3ec..15737265dfc3 100644
--- a/include/llvm/Support/AArch64TargetParser.def
+++ b/include/llvm/Support/AArch64TargetParser.def
@@ -50,35 +50,36 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
#endif
// FIXME: This would be nicer were it tablegen
-AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
-AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
-AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
-AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
-AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
-AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
-AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
-AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
-AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
-AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
-AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
-AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
-AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
-AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
-AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
-AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
-AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
-AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
-AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
-AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
-AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
-AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
-AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm")
-AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
-AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
-AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
-AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
-AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
-AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
+AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
+AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
+AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
+AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
+AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
+AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
+AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
+AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
+AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
+AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
+AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
+AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
+AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
+AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
+AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
+AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
+AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
+AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
+AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
+AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
+AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
+AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
+AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm")
+AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
+AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
+AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
+AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
+AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
+AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
+AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme")
#undef AARCH64_ARCH_EXT_NAME
#ifndef AARCH64_CPU_NAME
@@ -92,6 +93,12 @@ AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
+AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
+ AArch64::AEK_RCPC | AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-a65ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
+ AArch64::AEK_RCPC | AArch64::AEK_SSBS))
AARCH64_CPU_NAME("cortex-a72", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a73", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -104,6 +111,13 @@ AARCH64_CPU_NAME("cortex-a76", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
+ AArch64::AEK_RCPC | AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
+ AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RCPC |
+ AArch64::AEK_SSBS))
AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_NONE))
AARCH64_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/include/llvm/Support/AArch64TargetParser.h b/include/llvm/Support/AArch64TargetParser.h
index 965d38535e74..94f341c83260 100644
--- a/include/llvm/Support/AArch64TargetParser.h
+++ b/include/llvm/Support/AArch64TargetParser.h
@@ -53,7 +53,8 @@ enum ArchExtKind : unsigned {
AEK_SVE2AES = 1 << 24,
AEK_SVE2SM4 = 1 << 25,
AEK_SVE2SHA3 = 1 << 26,
- AEK_BITPERM = 1 << 27,
+ AEK_SVE2BITPERM = 1 << 27,
+ AEK_TME = 1 << 28,
};
enum class ArchKind {
diff --git a/include/llvm/Support/ARMTargetParser.def b/include/llvm/Support/ARMTargetParser.def
index f466b3252748..3e77e20762c1 100644
--- a/include/llvm/Support/ARMTargetParser.def
+++ b/include/llvm/Support/ARMTargetParser.def
@@ -274,6 +274,8 @@ ARM_CPU_NAME("cortex-a76", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
diff --git a/include/llvm/Support/ARMTargetParser.h b/include/llvm/Support/ARMTargetParser.h
index 4b9070dea596..02d4c975129f 100644
--- a/include/llvm/Support/ARMTargetParser.h
+++ b/include/llvm/Support/ARMTargetParser.h
@@ -39,19 +39,13 @@ enum ArchExtKind : unsigned {
AEK_DSP = 1 << 10,
AEK_FP16 = 1 << 11,
AEK_RAS = 1 << 12,
- AEK_SVE = 1 << 13,
- AEK_DOTPROD = 1 << 14,
- AEK_SHA2 = 1 << 15,
- AEK_AES = 1 << 16,
- AEK_FP16FML = 1 << 17,
- AEK_SB = 1 << 18,
- AEK_SVE2 = 1 << 19,
- AEK_SVE2AES = 1 << 20,
- AEK_SVE2SM4 = 1 << 21,
- AEK_SVE2SHA3 = 1 << 22,
- AEK_BITPERM = 1 << 23,
- AEK_FP_DP = 1 << 24,
- AEK_LOB = 1 << 25,
+ AEK_DOTPROD = 1 << 13,
+ AEK_SHA2 = 1 << 14,
+ AEK_AES = 1 << 15,
+ AEK_FP16FML = 1 << 16,
+ AEK_SB = 1 << 17,
+ AEK_FP_DP = 1 << 18,
+ AEK_LOB = 1 << 19,
// Unsupported extensions.
AEK_OS = 0x8000000,
AEK_IWMMXT = 0x10000000,
diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h
index d12401f0eb49..eb42542b777f 100644
--- a/include/llvm/Support/AlignOf.h
+++ b/include/llvm/Support/AlignOf.h
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the AlignedCharArray and AlignedCharArrayUnion classes.
+// This file defines the AlignedCharArrayUnion class.
//
//===----------------------------------------------------------------------===//
@@ -18,128 +18,38 @@
namespace llvm {
-/// \struct AlignedCharArray
-/// Helper for building an aligned character array type.
-///
-/// This template is used to explicitly build up a collection of aligned
-/// character array types. We have to build these up using a macro and explicit
-/// specialization to cope with MSVC (at least till 2015) where only an
-/// integer literal can be used to specify an alignment constraint. Once built
-/// up here, we can then begin to indirect between these using normal C++
-/// template parameters.
-
-// MSVC requires special handling here.
-#ifndef _MSC_VER
-
-template<std::size_t Alignment, std::size_t Size>
-struct AlignedCharArray {
- alignas(Alignment) char buffer[Size];
-};
-
-#else // _MSC_VER
-
-/// Create a type with an aligned char buffer.
-template<std::size_t Alignment, std::size_t Size>
-struct AlignedCharArray;
-
-// We provide special variations of this template for the most common
-// alignments because __declspec(align(...)) doesn't actually work when it is
-// a member of a by-value function argument in MSVC, even if the alignment
-// request is something reasonably like 8-byte or 16-byte. Note that we can't
-// even include the declspec with the union that forces the alignment because
-// MSVC warns on the existence of the declspec despite the union member forcing
-// proper alignment.
-
-template<std::size_t Size>
-struct AlignedCharArray<1, Size> {
- union {
- char aligned;
- char buffer[Size];
- };
-};
-
-template<std::size_t Size>
-struct AlignedCharArray<2, Size> {
- union {
- short aligned;
- char buffer[Size];
- };
-};
-
-template<std::size_t Size>
-struct AlignedCharArray<4, Size> {
- union {
- int aligned;
- char buffer[Size];
- };
-};
+namespace detail {
-template<std::size_t Size>
-struct AlignedCharArray<8, Size> {
- union {
- double aligned;
- char buffer[Size];
- };
+template <typename T, typename... Ts> class AlignerImpl {
+ T t;
+ AlignerImpl<Ts...> rest;
+ AlignerImpl() = delete;
};
-
-// The rest of these are provided with a __declspec(align(...)) and we simply
-// can't pass them by-value as function arguments on MSVC.
-
-#define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \
- template<std::size_t Size> \
- struct AlignedCharArray<x, Size> { \
- __declspec(align(x)) char buffer[Size]; \
- };
-
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16)
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32)
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64)
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128)
-
-#undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT
-
-#endif // _MSC_VER
-
-namespace detail {
-template <typename T1,
- typename T2 = char, typename T3 = char, typename T4 = char,
- typename T5 = char, typename T6 = char, typename T7 = char,
- typename T8 = char, typename T9 = char, typename T10 = char>
-class AlignerImpl {
- T1 t1; T2 t2; T3 t3; T4 t4; T5 t5; T6 t6; T7 t7; T8 t8; T9 t9; T10 t10;
-
+template <typename T> class AlignerImpl<T> {
+ T t;
AlignerImpl() = delete;
};
-template <typename T1,
- typename T2 = char, typename T3 = char, typename T4 = char,
- typename T5 = char, typename T6 = char, typename T7 = char,
- typename T8 = char, typename T9 = char, typename T10 = char>
-union SizerImpl {
- char arr1[sizeof(T1)], arr2[sizeof(T2)], arr3[sizeof(T3)], arr4[sizeof(T4)],
- arr5[sizeof(T5)], arr6[sizeof(T6)], arr7[sizeof(T7)], arr8[sizeof(T8)],
- arr9[sizeof(T9)], arr10[sizeof(T10)];
+template <typename T, typename... Ts> union SizerImpl {
+ char arr[sizeof(T)];
+ SizerImpl<Ts...> rest;
};
+
+template <typename T> union SizerImpl<T> { char arr[sizeof(T)]; };
} // end namespace detail
-/// This union template exposes a suitably aligned and sized character
-/// array member which can hold elements of any of up to ten types.
+/// A suitably aligned and sized character array member which can hold elements
+/// of any type.
///
-/// These types may be arrays, structs, or any other types. The goal is to
-/// expose a char array buffer member which can be used as suitable storage for
-/// a placement new of any of these types. Support for more than ten types can
-/// be added at the cost of more boilerplate.
-template <typename T1,
- typename T2 = char, typename T3 = char, typename T4 = char,
- typename T5 = char, typename T6 = char, typename T7 = char,
- typename T8 = char, typename T9 = char, typename T10 = char>
-struct AlignedCharArrayUnion : llvm::AlignedCharArray<
- alignof(llvm::detail::AlignerImpl<T1, T2, T3, T4, T5,
- T6, T7, T8, T9, T10>),
- sizeof(::llvm::detail::SizerImpl<T1, T2, T3, T4, T5,
- T6, T7, T8, T9, T10>)> {
+/// These types may be arrays, structs, or any other types. This exposes a
+/// `buffer` member which can be used as suitable storage for a placement new of
+/// any of these types.
+template <typename T, typename... Ts> struct AlignedCharArrayUnion {
+ alignas(::llvm::detail::AlignerImpl<T, Ts...>) char buffer[sizeof(
+ llvm::detail::SizerImpl<T, Ts...>)];
};
+
} // end namespace llvm
#endif // LLVM_SUPPORT_ALIGNOF_H
diff --git a/include/llvm/Support/Alignment.h b/include/llvm/Support/Alignment.h
new file mode 100644
index 000000000000..72fad87dd0d4
--- /dev/null
+++ b/include/llvm/Support/Alignment.h
@@ -0,0 +1,403 @@
+//===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains types to represent alignments.
+// They are instrumented to guarantee some invariants are preserved and prevent
+// invalid manipulations.
+//
+// - Align represents an alignment in bytes, it is always set and always a valid
+// power of two, its minimum value is 1 which means no alignment requirements.
+//
+// - MaybeAlign is an optional type, it may be undefined or set. When it's set
+// you can get the underlying Align type by using the getValue() method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ALIGNMENT_H_
+#define LLVM_SUPPORT_ALIGNMENT_H_
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <limits>
+
+namespace llvm {
+
+#define ALIGN_CHECK_ISPOSITIVE(decl) \
+ assert(decl > 0 && (#decl " should be defined"))
+#define ALIGN_CHECK_ISSET(decl) \
+ assert(decl.hasValue() && (#decl " should be defined"))
+
+/// This struct is a compact representation of a valid (non-zero power of two)
+/// alignment.
+/// It is suitable for use as static global constants.
+struct Align {
+private:
+ uint8_t ShiftValue = 0; /// The log2 of the required alignment.
+ /// ShiftValue is less than 64 by construction.
+
+ friend struct MaybeAlign;
+ friend unsigned Log2(Align);
+ friend bool operator==(Align Lhs, Align Rhs);
+ friend bool operator!=(Align Lhs, Align Rhs);
+ friend bool operator<=(Align Lhs, Align Rhs);
+ friend bool operator>=(Align Lhs, Align Rhs);
+ friend bool operator<(Align Lhs, Align Rhs);
+ friend bool operator>(Align Lhs, Align Rhs);
+ friend unsigned encode(struct MaybeAlign A);
+ friend struct MaybeAlign decodeMaybeAlign(unsigned Value);
+
+ /// A trivial type to allow construction of constexpr Align.
+ /// This is currently needed to workaround a bug in GCC 5.3 which prevents
+ /// definition of constexpr assign operators.
+ /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic
+ /// FIXME: Remove this, make all assign operators constexpr and introduce user
+ /// defined literals when we don't have to support GCC 5.3 anymore.
+ /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain
+ struct LogValue {
+ uint8_t Log;
+ };
+
+public:
+ /// Default is byte-aligned.
+ constexpr Align() = default;
+ /// Do not perform checks in case of copy/move construct/assign, because the
+ /// checks have been performed when building `Other`.
+ constexpr Align(const Align &Other) = default;
+ constexpr Align(Align &&Other) = default;
+ Align &operator=(const Align &Other) = default;
+ Align &operator=(Align &&Other) = default;
+
+ explicit Align(uint64_t Value) {
+ assert(Value > 0 && "Value must not be 0");
+ assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2");
+ ShiftValue = Log2_64(Value);
+ assert(ShiftValue < 64 && "Broken invariant");
+ }
+
+ /// This is a hole in the type system and should not be abused.
+ /// Needed to interact with C for instance.
+ uint64_t value() const { return uint64_t(1) << ShiftValue; }
+
+ /// Returns a default constructed Align which corresponds to no alignment.
+ /// This is useful to test for unalignment as it conveys clear semantic.
+ /// `if (A != Align::None())`
+ /// would be better than
+ /// `if (A > Align(1))`
+ constexpr static const Align None() { return Align(); }
+
+ /// Allow constructions of constexpr Align.
+ template <size_t kValue> constexpr static LogValue Constant() {
+ return LogValue{static_cast<uint8_t>(CTLog2<kValue>())};
+ }
+
+ /// Allow constructions of constexpr Align from types.
+ /// Compile time equivalent to Align(alignof(T)).
+ template <typename T> constexpr static LogValue Of() {
+ return Constant<std::alignment_of<T>::value>();
+ }
+
+ /// Constexpr constructor from LogValue type.
+ constexpr Align(LogValue CA) : ShiftValue(CA.Log) {}
+};
+
+/// Treats the value 0 as a 1, so Align is always at least 1.
+inline Align assumeAligned(uint64_t Value) {
+ return Value ? Align(Value) : Align();
+}
+
+/// This struct is a compact representation of a valid (power of two) or
+/// undefined (0) alignment.
+struct MaybeAlign : public llvm::Optional<Align> {
+private:
+ using UP = llvm::Optional<Align>;
+
+public:
+ /// Default is undefined.
+ MaybeAlign() = default;
+ /// Do not perform checks in case of copy/move construct/assign, because the
+ /// checks have been performed when building `Other`.
+ MaybeAlign(const MaybeAlign &Other) = default;
+ MaybeAlign &operator=(const MaybeAlign &Other) = default;
+ MaybeAlign(MaybeAlign &&Other) = default;
+ MaybeAlign &operator=(MaybeAlign &&Other) = default;
+
+ /// Use llvm::Optional<Align> constructor.
+ using UP::UP;
+
+ explicit MaybeAlign(uint64_t Value) {
+ assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&
+ "Alignment is neither 0 nor a power of 2");
+ if (Value)
+ emplace(Value);
+ }
+
+ /// For convenience, returns a valid alignment or 1 if undefined.
+ Align valueOrOne() const { return hasValue() ? getValue() : Align(); }
+};
+
+/// Checks that SizeInBytes is a multiple of the alignment.
+inline bool isAligned(Align Lhs, uint64_t SizeInBytes) {
+ return SizeInBytes % Lhs.value() == 0;
+}
+
+/// Checks that SizeInBytes is a multiple of the alignment.
+/// Returns false if the alignment is undefined.
+inline bool isAligned(MaybeAlign Lhs, uint64_t SizeInBytes) {
+ ALIGN_CHECK_ISSET(Lhs);
+ return SizeInBytes % (*Lhs).value() == 0;
+}
+
+/// Checks that Addr is a multiple of the alignment.
+inline bool isAddrAligned(Align Lhs, const void *Addr) {
+ return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr));
+}
+
+/// Returns a multiple of A needed to store `Size` bytes.
+inline uint64_t alignTo(uint64_t Size, Align A) {
+ const uint64_t value = A.value();
+ // The following line is equivalent to `(Size + value - 1) / value * value`.
+
+ // The division followed by a multiplication can be thought of as a right
+ // shift followed by a left shift which zeros out the extra bits produced in
+ // the bump; `~(value - 1)` is a mask where all those bits being zeroed out
+ // are just zero.
+
+ // Most compilers can generate this code but the pattern may be missed when
+ // multiple functions gets inlined.
+ return (Size + value - 1) & ~(value - 1);
+}
+
+/// Returns a multiple of A needed to store `Size` bytes.
+/// Returns `Size` if current alignment is undefined.
+inline uint64_t alignTo(uint64_t Size, MaybeAlign A) {
+ return A ? alignTo(Size, A.getValue()) : Size;
+}
+
+/// Aligns `Addr` to `Alignment` bytes, rounding up.
+inline uintptr_t alignAddr(const void *Addr, Align Alignment) {
+ uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr);
+ assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=
+ ArithAddr && "Overflow");
+ return alignTo(ArithAddr, Alignment);
+}
+
+/// Returns the offset to the next integer (mod 2**64) that is greater than
+/// or equal to \p Value and is a multiple of \p Align.
+inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) {
+ return alignTo(Value, Alignment) - Value;
+}
+
+/// Returns the necessary adjustment for aligning `Addr` to `Alignment`
+/// bytes, rounding up.
+inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) {
+ return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment);
+}
+
+/// Returns the log2 of the alignment.
+inline unsigned Log2(Align A) { return A.ShiftValue; }
+
+/// Returns the log2 of the alignment.
+/// \pre A must be defined.
+inline unsigned Log2(MaybeAlign A) {
+ ALIGN_CHECK_ISSET(A);
+ return Log2(A.getValue());
+}
+
+/// Returns the alignment that satisfies both alignments.
+/// Same semantic as MinAlign.
+inline Align commonAlignment(Align A, Align B) { return std::min(A, B); }
+
+/// Returns the alignment that satisfies both alignments.
+/// Same semantic as MinAlign.
+inline Align commonAlignment(Align A, uint64_t Offset) {
+ return Align(MinAlign(A.value(), Offset));
+}
+
+/// Returns the alignment that satisfies both alignments.
+/// Same semantic as MinAlign.
+inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) {
+ return A && B ? commonAlignment(*A, *B) : A ? A : B;
+}
+
+/// Returns the alignment that satisfies both alignments.
+/// Same semantic as MinAlign.
+inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) {
+ return MaybeAlign(MinAlign((*A).value(), Offset));
+}
+
+/// Returns a representation of the alignment that encodes undefined as 0.
+inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; }
+
+/// Dual operation of the encode function above.
+inline MaybeAlign decodeMaybeAlign(unsigned Value) {
+ if (Value == 0)
+ return MaybeAlign();
+ Align Out;
+ Out.ShiftValue = Value - 1;
+ return Out;
+}
+
+/// Returns a representation of the alignment, the encoded value is positive by
+/// definition.
+inline unsigned encode(Align A) { return encode(MaybeAlign(A)); }
+
+/// Comparisons between Align and scalars. Rhs must be positive.
+inline bool operator==(Align Lhs, uint64_t Rhs) {
+ ALIGN_CHECK_ISPOSITIVE(Rhs);
+ return Lhs.value() == Rhs;
+}
+inline bool operator!=(Align Lhs, uint64_t Rhs) {
+ ALIGN_CHECK_ISPOSITIVE(Rhs);
+ return Lhs.value() != Rhs;
+}
+inline bool operator<=(Align Lhs, uint64_t Rhs) {
+ ALIGN_CHECK_ISPOSITIVE(Rhs);
+ return Lhs.value() <= Rhs;
+}
+inline bool operator>=(Align Lhs, uint64_t Rhs) {
+ ALIGN_CHECK_ISPOSITIVE(Rhs);
+ return Lhs.value() >= Rhs;
+}
+inline bool operator<(Align Lhs, uint64_t Rhs) {
+ ALIGN_CHECK_ISPOSITIVE(Rhs);
+ return Lhs.value() < Rhs;
+}
+inline bool operator>(Align Lhs, uint64_t Rhs) {
+ ALIGN_CHECK_ISPOSITIVE(Rhs);
+ return Lhs.value() > Rhs;
+}
+
+/// Comparisons between MaybeAlign and scalars.
+inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) {
+ return Lhs ? (*Lhs).value() == Rhs : Rhs == 0;
+}
+inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) {
+ return Lhs ? (*Lhs).value() != Rhs : Rhs != 0;
+}
+inline bool operator<=(MaybeAlign Lhs, uint64_t Rhs) {
+ ALIGN_CHECK_ISSET(Lhs);
+ ALIGN_CHECK_ISPOSITIVE(Rhs);
+ return (*Lhs).value() <= Rhs;
+}
+inline bool operator>=(MaybeAlign Lhs, uint64_t Rhs) {
+ ALIGN_CHECK_ISSET(Lhs);
+ ALIGN_CHECK_ISPOSITIVE(Rhs);
+ return (*Lhs).value() >= Rhs;
+}
+inline bool operator<(MaybeAlign Lhs, uint64_t Rhs) {
+ ALIGN_CHECK_ISSET(Lhs);
+ ALIGN_CHECK_ISPOSITIVE(Rhs);
+ return (*Lhs).value() < Rhs;
+}
+inline bool operator>(MaybeAlign Lhs, uint64_t Rhs) {
+ ALIGN_CHECK_ISSET(Lhs);
+ ALIGN_CHECK_ISPOSITIVE(Rhs);
+ return (*Lhs).value() > Rhs;
+}
+
+/// Comparisons operators between Align.
+inline bool operator==(Align Lhs, Align Rhs) {
+ return Lhs.ShiftValue == Rhs.ShiftValue;
+}
+inline bool operator!=(Align Lhs, Align Rhs) {
+ return Lhs.ShiftValue != Rhs.ShiftValue;
+}
+inline bool operator<=(Align Lhs, Align Rhs) {
+ return Lhs.ShiftValue <= Rhs.ShiftValue;
+}
+inline bool operator>=(Align Lhs, Align Rhs) {
+ return Lhs.ShiftValue >= Rhs.ShiftValue;
+}
+inline bool operator<(Align Lhs, Align Rhs) {
+ return Lhs.ShiftValue < Rhs.ShiftValue;
+}
+inline bool operator>(Align Lhs, Align Rhs) {
+ return Lhs.ShiftValue > Rhs.ShiftValue;
+}
+
+/// Comparisons operators between Align and MaybeAlign.
+inline bool operator==(Align Lhs, MaybeAlign Rhs) {
+ ALIGN_CHECK_ISSET(Rhs);
+ return Lhs.value() == (*Rhs).value();
+}
+inline bool operator!=(Align Lhs, MaybeAlign Rhs) {
+ ALIGN_CHECK_ISSET(Rhs);
+ return Lhs.value() != (*Rhs).value();
+}
+inline bool operator<=(Align Lhs, MaybeAlign Rhs) {
+ ALIGN_CHECK_ISSET(Rhs);
+ return Lhs.value() <= (*Rhs).value();
+}
+inline bool operator>=(Align Lhs, MaybeAlign Rhs) {
+ ALIGN_CHECK_ISSET(Rhs);
+ return Lhs.value() >= (*Rhs).value();
+}
+inline bool operator<(Align Lhs, MaybeAlign Rhs) {
+ ALIGN_CHECK_ISSET(Rhs);
+ return Lhs.value() < (*Rhs).value();
+}
+inline bool operator>(Align Lhs, MaybeAlign Rhs) {
+ ALIGN_CHECK_ISSET(Rhs);
+ return Lhs.value() > (*Rhs).value();
+}
+
+/// Comparisons operators between MaybeAlign and Align.
+inline bool operator==(MaybeAlign Lhs, Align Rhs) {
+ ALIGN_CHECK_ISSET(Lhs);
+ return Lhs && (*Lhs).value() == Rhs.value();
+}
+inline bool operator!=(MaybeAlign Lhs, Align Rhs) {
+ ALIGN_CHECK_ISSET(Lhs);
+ return Lhs && (*Lhs).value() != Rhs.value();
+}
+inline bool operator<=(MaybeAlign Lhs, Align Rhs) {
+ ALIGN_CHECK_ISSET(Lhs);
+ return Lhs && (*Lhs).value() <= Rhs.value();
+}
+inline bool operator>=(MaybeAlign Lhs, Align Rhs) {
+ ALIGN_CHECK_ISSET(Lhs);
+ return Lhs && (*Lhs).value() >= Rhs.value();
+}
+inline bool operator<(MaybeAlign Lhs, Align Rhs) {
+ ALIGN_CHECK_ISSET(Lhs);
+ return Lhs && (*Lhs).value() < Rhs.value();
+}
+inline bool operator>(MaybeAlign Lhs, Align Rhs) {
+ ALIGN_CHECK_ISSET(Lhs);
+ return Lhs && (*Lhs).value() > Rhs.value();
+}
+
+inline Align operator/(Align Lhs, uint64_t Divisor) {
+ assert(llvm::isPowerOf2_64(Divisor) &&
+ "Divisor must be positive and a power of 2");
+ assert(Lhs != 1 && "Can't halve byte alignment");
+ return Align(Lhs.value() / Divisor);
+}
+
+inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) {
+ assert(llvm::isPowerOf2_64(Divisor) &&
+ "Divisor must be positive and a power of 2");
+ return Lhs ? Lhs.getValue() / Divisor : MaybeAlign();
+}
+
+inline Align max(MaybeAlign Lhs, Align Rhs) {
+ return Lhs && *Lhs > Rhs ? *Lhs : Rhs;
+}
+
+inline Align max(Align Lhs, MaybeAlign Rhs) {
+ return Rhs && *Rhs > Lhs ? *Rhs : Lhs;
+}
+
+#undef ALIGN_CHECK_ISPOSITIVE
+#undef ALIGN_CHECK_ISSET
+
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_ALIGNMENT_H_
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index 09e967b98abc..106b90c35bf5 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -211,13 +212,11 @@ public:
/// Allocate space at the specified alignment.
LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void *
- Allocate(size_t Size, size_t Alignment) {
- assert(Alignment > 0 && "0-byte alignnment is not allowed. Use 1 instead.");
-
+ Allocate(size_t Size, Align Alignment) {
// Keep track of how many bytes we've allocated.
BytesAllocated += Size;
- size_t Adjustment = alignmentAdjustment(CurPtr, Alignment);
+ size_t Adjustment = offsetToAlignedAddr(CurPtr, Alignment);
assert(Adjustment + Size >= Size && "Adjustment + Size must not overflow");
size_t SizeToAllocate = Size;
@@ -240,7 +239,7 @@ public:
}
// If Size is really big, allocate a separate slab for it.
- size_t PaddedSize = SizeToAllocate + Alignment - 1;
+ size_t PaddedSize = SizeToAllocate + Alignment.value() - 1;
if (PaddedSize > SizeThreshold) {
void *NewSlab = Allocator.Allocate(PaddedSize, 0);
// We own the new slab and don't want anyone reading anyting other than
@@ -268,6 +267,12 @@ public:
return AlignedPtr;
}
+ inline LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void *
+ Allocate(size_t Size, size_t Alignment) {
+ assert(Alignment > 0 && "0-byte alignnment is not allowed. Use 1 instead.");
+ return Allocate(Size, Align(Alignment));
+ }
+
// Pull in base class overloads.
using AllocatorBase<BumpPtrAllocatorImpl>::Allocate;
@@ -461,7 +466,7 @@ public:
/// all memory allocated so far.
void DestroyAll() {
auto DestroyElements = [](char *Begin, char *End) {
- assert(Begin == (char *)alignAddr(Begin, alignof(T)));
+ assert(Begin == (char *)alignAddr(Begin, Align::Of<T>()));
for (char *Ptr = Begin; Ptr + sizeof(T) <= End; Ptr += sizeof(T))
reinterpret_cast<T *>(Ptr)->~T();
};
@@ -470,7 +475,7 @@ public:
++I) {
size_t AllocatedSlabSize = BumpPtrAllocator::computeSlabSize(
std::distance(Allocator.Slabs.begin(), I));
- char *Begin = (char *)alignAddr(*I, alignof(T));
+ char *Begin = (char *)alignAddr(*I, Align::Of<T>());
char *End = *I == Allocator.Slabs.back() ? Allocator.CurPtr
: (char *)*I + AllocatedSlabSize;
@@ -480,7 +485,8 @@ public:
for (auto &PtrAndSize : Allocator.CustomSizedSlabs) {
void *Ptr = PtrAndSize.first;
size_t Size = PtrAndSize.second;
- DestroyElements((char *)alignAddr(Ptr, alignof(T)), (char *)Ptr + Size);
+ DestroyElements((char *)alignAddr(Ptr, Align::Of<T>()),
+ (char *)Ptr + Size);
}
Allocator.Reset();
diff --git a/include/llvm/Support/Automaton.h b/include/llvm/Support/Automaton.h
new file mode 100644
index 000000000000..7c13a698e492
--- /dev/null
+++ b/include/llvm/Support/Automaton.h
@@ -0,0 +1,253 @@
+//===-- Automaton.h - Support for driving TableGen-produced DFAs ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements class that drive and introspect deterministic finite-
+// state automata (DFAs) as generated by TableGen's -gen-automata backend.
+//
+// For a description of how to define an automaton, see
+// include/llvm/TableGen/Automaton.td.
+//
+// One important detail is that these deterministic automata are created from
+// (potentially) nondeterministic definitions. Therefore a unique sequence of
+// input symbols will produce one path through the DFA but multiple paths
+// through the original NFA. An automaton by default only returns "accepted" or
+// "not accepted", but frequently we want to analyze what NFA path was taken.
+// Finding a path through the NFA states that results in a DFA state can help
+// answer *what* the solution to a problem was, not just that there exists a
+// solution.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_AUTOMATON_H
+#define LLVM_SUPPORT_AUTOMATON_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Allocator.h"
+#include <deque>
+#include <map>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+namespace llvm {
+
+using NfaPath = SmallVector<uint64_t, 4>;
+
+/// Forward define the pair type used by the automata transition info tables.
+///
+/// Experimental results with large tables have shown a significant (multiple
+/// orders of magnitude) parsing speedup by using a custom struct here with a
+/// trivial constructor rather than std::pair<uint64_t, uint64_t>.
+struct NfaStatePair {
+ uint64_t FromDfaState, ToDfaState;
+
+ bool operator<(const NfaStatePair &Other) const {
+ return std::make_tuple(FromDfaState, ToDfaState) <
+ std::make_tuple(Other.FromDfaState, Other.ToDfaState);
+ }
+};
+
+namespace internal {
+/// The internal class that maintains all possible paths through an NFA based
+/// on a path through the DFA.
+class NfaTranscriber {
+private:
+ /// Cached transition table. This is a table of NfaStatePairs that contains
+ /// zero-terminated sequences pointed to by DFA transitions.
+ ArrayRef<NfaStatePair> TransitionInfo;
+
+ /// A simple linked-list of traversed states that can have a shared tail. The
+ /// traversed path is stored in reverse order with the latest state as the
+ /// head.
+ struct PathSegment {
+ uint64_t State;
+ PathSegment *Tail;
+ };
+
+ /// We allocate segment objects frequently. Allocate them upfront and dispose
+ /// at the end of a traversal rather than hammering the system allocator.
+ SpecificBumpPtrAllocator<PathSegment> Allocator;
+
+ /// Heads of each tracked path. These are not ordered.
+ std::deque<PathSegment *> Heads;
+
+ /// The returned paths. This is populated during getPaths.
+ SmallVector<NfaPath, 4> Paths;
+
+ /// Create a new segment and return it.
+ PathSegment *makePathSegment(uint64_t State, PathSegment *Tail) {
+ PathSegment *P = Allocator.Allocate();
+ *P = {State, Tail};
+ return P;
+ }
+
+ /// Pairs defines a sequence of possible NFA transitions for a single DFA
+ /// transition.
+ void transition(ArrayRef<NfaStatePair> Pairs) {
+ // Iterate over all existing heads. We will mutate the Heads deque during
+ // iteration.
+ unsigned NumHeads = Heads.size();
+ for (unsigned I = 0; I < NumHeads; ++I) {
+ PathSegment *Head = Heads[I];
+ // The sequence of pairs is sorted. Select the set of pairs that
+ // transition from the current head state.
+ auto PI = lower_bound(Pairs, NfaStatePair{Head->State, 0ULL});
+ auto PE = upper_bound(Pairs, NfaStatePair{Head->State, INT64_MAX});
+ // For every transition from the current head state, add a new path
+ // segment.
+ for (; PI != PE; ++PI)
+ if (PI->FromDfaState == Head->State)
+ Heads.push_back(makePathSegment(PI->ToDfaState, Head));
+ }
+ // Now we've iterated over all the initial heads and added new ones,
+ // dispose of the original heads.
+ Heads.erase(Heads.begin(), std::next(Heads.begin(), NumHeads));
+ }
+
+public:
+ NfaTranscriber(ArrayRef<NfaStatePair> TransitionInfo)
+ : TransitionInfo(TransitionInfo) {
+ reset();
+ }
+
+ void reset() {
+ Paths.clear();
+ Heads.clear();
+ Allocator.DestroyAll();
+ // The initial NFA state is 0.
+ Heads.push_back(makePathSegment(0ULL, nullptr));
+ }
+
+ void transition(unsigned TransitionInfoIdx) {
+ unsigned EndIdx = TransitionInfoIdx;
+ while (TransitionInfo[EndIdx].ToDfaState != 0)
+ ++EndIdx;
+ ArrayRef<NfaStatePair> Pairs(&TransitionInfo[TransitionInfoIdx],
+ EndIdx - TransitionInfoIdx);
+ transition(Pairs);
+ }
+
+ ArrayRef<NfaPath> getPaths() {
+ Paths.clear();
+ for (auto *Head : Heads) {
+ NfaPath P;
+ while (Head->State != 0) {
+ P.push_back(Head->State);
+ Head = Head->Tail;
+ }
+ std::reverse(P.begin(), P.end());
+ Paths.push_back(std::move(P));
+ }
+ return Paths;
+ }
+};
+} // namespace internal
+
+/// A deterministic finite-state automaton. The automaton is defined in
+/// TableGen; this object drives an automaton defined by tblgen-emitted tables.
+///
+/// An automaton accepts a sequence of input tokens ("actions"). This class is
+/// templated on the type of these actions.
+template <typename ActionT> class Automaton {
+ /// Map from {State, Action} to {NewState, TransitionInfoIdx}.
+ /// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition.
+ /// FIXME: This uses a std::map because ActionT can be a pair type including
+ /// an enum. In particular DenseMapInfo<ActionT> must be defined to use
+ /// DenseMap here.
+ /// This is a shared_ptr to allow very quick copy-construction of Automata; this
+ /// state is immutable after construction so this is safe.
+ using MapTy = std::map<std::pair<uint64_t, ActionT>, std::pair<uint64_t, unsigned>>;
+ std::shared_ptr<MapTy> M;
+ /// An optional transcription object. This uses much more state than simply
+ /// traversing the DFA for acceptance, so is heap allocated.
+ std::shared_ptr<internal::NfaTranscriber> Transcriber;
+ /// The initial DFA state is 1.
+ uint64_t State = 1;
+ /// True if we should transcribe and false if not (even if Transcriber is defined).
+ bool Transcribe;
+
+public:
+ /// Create an automaton.
+ /// \param Transitions The Transitions table as created by TableGen. Note that
+ /// because the action type differs per automaton, the
+ /// table type is templated as ArrayRef<InfoT>.
+ /// \param TranscriptionTable The TransitionInfo table as created by TableGen.
+ ///
+ /// Providing the TranscriptionTable argument as non-empty will enable the
+ /// use of transcription, which analyzes the possible paths in the original
+ /// NFA taken by the DFA. NOTE: This is substantially more work than simply
+ /// driving the DFA, so unless you require the getPaths() method leave this
+ /// empty.
+ template <typename InfoT>
+ Automaton(ArrayRef<InfoT> Transitions,
+ ArrayRef<NfaStatePair> TranscriptionTable = {}) {
+ if (!TranscriptionTable.empty())
+ Transcriber =
+ std::make_shared<internal::NfaTranscriber>(TranscriptionTable);
+ Transcribe = Transcriber != nullptr;
+ M = std::make_shared<MapTy>();
+ for (const auto &I : Transitions)
+ // Greedily read and cache the transition table.
+ M->emplace(std::make_pair(I.FromDfaState, I.Action),
+ std::make_pair(I.ToDfaState, I.InfoIdx));
+ }
+ Automaton(const Automaton &) = default;
+
+ /// Reset the automaton to its initial state.
+ void reset() {
+ State = 1;
+ if (Transcriber)
+ Transcriber->reset();
+ }
+
+ /// Enable or disable transcription. Transcription is only available if
+ /// TranscriptionTable was provided to the constructor.
+ void enableTranscription(bool Enable = true) {
+ assert(Transcriber &&
+ "Transcription is only available if TranscriptionTable was provided "
+ "to the Automaton constructor");
+ Transcribe = Enable;
+ }
+
+ /// Transition the automaton based on input symbol A. Return true if the
+ /// automaton transitioned to a valid state, false if the automaton
+ /// transitioned to an invalid state.
+ ///
+ /// If this function returns false, all methods are undefined until reset() is
+ /// called.
+ bool add(const ActionT &A) {
+ auto I = M->find({State, A});
+ if (I == M->end())
+ return false;
+ if (Transcriber && Transcribe)
+ Transcriber->transition(I->second.second);
+ State = I->second.first;
+ return true;
+ }
+
+ /// Return true if the automaton can be transitioned based on input symbol A.
+ bool canAdd(const ActionT &A) {
+ auto I = M->find({State, A});
+ return I != M->end();
+ }
+
+ /// Obtain a set of possible paths through the input nondeterministic
+ /// automaton that could be obtained from the sequence of input actions
+ /// presented to this deterministic automaton.
+ ArrayRef<NfaPath> getNfaPaths() {
+ assert(Transcriber && Transcribe &&
+ "Can only obtain NFA paths if transcribing!");
+ return Transcriber->getPaths();
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_AUTOMATON_H
diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h
index 96d09db69ae5..67ba2e4189be 100644
--- a/include/llvm/Support/BinaryStreamArray.h
+++ b/include/llvm/Support/BinaryStreamArray.h
@@ -286,7 +286,7 @@ public:
// an exact multiple of the element size.
consumeError(std::move(EC));
}
- assert(llvm::alignmentAdjustment(Data.data(), alignof(T)) == 0);
+ assert(isAddrAligned(Align::Of<T>(), Data.data()));
return *reinterpret_cast<const T *>(Data.data());
}
diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h
index d8fddde66bfa..9e16ce227ff8 100644
--- a/include/llvm/Support/BinaryStreamReader.h
+++ b/include/llvm/Support/BinaryStreamReader.h
@@ -198,7 +198,7 @@ public:
if (auto EC = readBytes(Bytes, NumElements * sizeof(T)))
return EC;
- assert(alignmentAdjustment(Bytes.data(), alignof(T)) == 0 &&
+ assert(isAddrAligned(Align::Of<T>(), Bytes.data()) &&
"Reading at invalid alignment!");
Array = ArrayRef<T>(reinterpret_cast<const T *>(Bytes.data()), NumElements);
diff --git a/include/llvm/Support/CRC.h b/include/llvm/Support/CRC.h
index 6ea8e3edcea4..210890ae06d4 100644
--- a/include/llvm/Support/CRC.h
+++ b/include/llvm/Support/CRC.h
@@ -6,20 +6,55 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains basic functions for calculating Cyclic Redundancy Check
-// or CRC.
+// This file contains implementations of CRC functions.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_CRC_H
#define LLVM_SUPPORT_CRC_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
-/// zlib independent CRC32 calculation.
-uint32_t crc32(uint32_t CRC, StringRef S);
+template <typename T> class ArrayRef;
+
+// Compute the CRC-32 of Data.
+uint32_t crc32(ArrayRef<uint8_t> Data);
+
+// Compute the running CRC-32 of Data, with CRC being the previous value of the
+// checksum.
+uint32_t crc32(uint32_t CRC, ArrayRef<uint8_t> Data);
+
+// Class for computing the JamCRC.
+//
+// We will use the "Rocksoft^tm Model CRC Algorithm" to describe the properties
+// of this CRC:
+// Width : 32
+// Poly : 04C11DB7
+// Init : FFFFFFFF
+// RefIn : True
+// RefOut : True
+// XorOut : 00000000
+// Check : 340BC6D9 (result of CRC for "123456789")
+//
+// In other words, this is the same as CRC-32, except that XorOut is 0 instead
+// of FFFFFFFF.
+//
+// N.B. We permit flexibility of the "Init" value. Some consumers of this need
+// it to be zero.
+class JamCRC {
+public:
+ JamCRC(uint32_t Init = 0xFFFFFFFFU) : CRC(Init) {}
+
+ // Update the CRC calculation with Data.
+ void update(ArrayRef<uint8_t> Data);
+
+ uint32_t getCRC() const { return CRC; }
+
+private:
+ uint32_t CRC;
+};
+
} // end namespace llvm
#endif
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index 3cc2c3c0121b..63784463e171 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -2000,6 +2000,9 @@ void ResetAllOptionOccurrences();
/// where no options are supported.
void ResetCommandLineParser();
+/// Parses `Arg` into the option handler `Handler`.
+bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i);
+
} // end namespace cl
} // end namespace llvm
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 3f4f465f3960..cb7e57d4cd21 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -7,7 +7,8 @@
//===----------------------------------------------------------------------===//
//
// This file defines several macros, based on the current compiler. This allows
-// use of compiler-specific features in a way that remains portable.
+// use of compiler-specific features in a way that remains portable. This header
+// can be included from either C or C++.
//
//===----------------------------------------------------------------------===//
@@ -16,7 +17,9 @@
#include "llvm/Config/llvm-config.h"
+#ifdef __cplusplus
#include <new>
+#endif
#include <stddef.h>
#if defined(_MSC_VER)
@@ -35,14 +38,20 @@
# define __has_attribute(x) 0
#endif
-#ifndef __has_cpp_attribute
-# define __has_cpp_attribute(x) 0
-#endif
-
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
+// Only use __has_cpp_attribute in C++ mode. GCC defines __has_cpp_attribute in
+// C mode, but the :: in __has_cpp_attribute(scoped::attribute) is invalid.
+#ifndef LLVM_HAS_CPP_ATTRIBUTE
+#if defined(__cplusplus) && defined(__has_cpp_attribute)
+# define LLVM_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+# define LLVM_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+#endif
+
/// \macro LLVM_GNUC_PREREQ
/// Extend the default __GNUC_PREREQ even if glibc's features.h isn't
/// available.
@@ -62,13 +71,21 @@
/// \macro LLVM_MSC_PREREQ
/// Is the compiler MSVC of at least the specified version?
/// The common \param version values to check for are:
-/// * 1900: Microsoft Visual Studio 2015 / 14.0
+/// * 1910: VS2017, version 15.1 & 15.2
+/// * 1911: VS2017, version 15.3 & 15.4
+/// * 1912: VS2017, version 15.5
+/// * 1913: VS2017, version 15.6
+/// * 1914: VS2017, version 15.7
+/// * 1915: VS2017, version 15.8
+/// * 1916: VS2017, version 15.9
+/// * 1920: VS2019, version 16.0
+/// * 1921: VS2019, version 16.1
#ifdef _MSC_VER
#define LLVM_MSC_PREREQ(version) (_MSC_VER >= (version))
-// We require at least MSVC 2015.
-#if !LLVM_MSC_PREREQ(1900)
-#error LLVM requires at least MSVC 2015.
+// We require at least MSVC 2017.
+#if !LLVM_MSC_PREREQ(1910)
+#error LLVM requires at least MSVC 2017.
#endif
#else
@@ -120,14 +137,18 @@
#endif
/// LLVM_NODISCARD - Warn if a type or return value is discarded.
-#if __cplusplus > 201402L && __has_cpp_attribute(nodiscard)
+
+// Use the 'nodiscard' attribute in C++17 or newer mode.
+#if __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(nodiscard)
#define LLVM_NODISCARD [[nodiscard]]
-#elif !__cplusplus
-// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious
-// error when __has_cpp_attribute is given a scoped attribute in C mode.
-#define LLVM_NODISCARD
-#elif __has_cpp_attribute(clang::warn_unused_result)
+#elif LLVM_HAS_CPP_ATTRIBUTE(clang::warn_unused_result)
#define LLVM_NODISCARD [[clang::warn_unused_result]]
+// Clang in C++14 mode claims that it has the 'nodiscard' attribute, but also
+// warns in the pedantic mode that 'nodiscard' is a C++17 extension (PR33518).
+// Use the 'nodiscard' attribute in C++14 mode only with GCC.
+// TODO: remove this workaround when PR33518 is resolved.
+#elif defined(__GNUC__) && LLVM_HAS_CPP_ATTRIBUTE(nodiscard)
+#define LLVM_NODISCARD [[nodiscard]]
#else
#define LLVM_NODISCARD
#endif
@@ -139,7 +160,7 @@
// The clang-tidy check bugprone-use-after-move recognizes this attribute as a
// marker that a moved-from object has left the indeterminate state and can be
// reused.
-#if __has_cpp_attribute(clang::reinitializes)
+#if LLVM_HAS_CPP_ATTRIBUTE(clang::reinitializes)
#define LLVM_ATTRIBUTE_REINITIALIZES [[clang::reinitializes]]
#else
#define LLVM_ATTRIBUTE_REINITIALIZES
@@ -240,15 +261,13 @@
#endif
/// LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
-#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
+#if __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(fallthrough)
#define LLVM_FALLTHROUGH [[fallthrough]]
-#elif __has_cpp_attribute(gnu::fallthrough)
+#elif LLVM_HAS_CPP_ATTRIBUTE(gnu::fallthrough)
#define LLVM_FALLTHROUGH [[gnu::fallthrough]]
-#elif !__cplusplus
-// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious
-// error when __has_cpp_attribute is given a scoped attribute in C mode.
-#define LLVM_FALLTHROUGH
-#elif __has_cpp_attribute(clang::fallthrough)
+#elif __has_attribute(fallthrough)
+#define LLVM_FALLTHROUGH __attribute__((fallthrough))
+#elif LLVM_HAS_CPP_ATTRIBUTE(clang::fallthrough)
#define LLVM_FALLTHROUGH [[clang::fallthrough]]
#else
#define LLVM_FALLTHROUGH
@@ -256,7 +275,7 @@
/// LLVM_REQUIRE_CONSTANT_INITIALIZATION - Apply this to globals to ensure that
/// they are constant initialized.
-#if __has_cpp_attribute(clang::require_constant_initialization)
+#if LLVM_HAS_CPP_ATTRIBUTE(clang::require_constant_initialization)
#define LLVM_REQUIRE_CONSTANT_INITIALIZATION \
[[clang::require_constant_initialization]]
#else
@@ -338,14 +357,6 @@
# define LLVM_ASSUME_ALIGNED(p, a) (p)
#endif
-/// \macro LLVM_ALIGNAS
-/// Used to specify a minimum alignment for a structure or variable.
-#if __GNUC__ && !__has_feature(cxx_alignas) && !LLVM_GNUC_PREREQ(4, 8, 1)
-# define LLVM_ALIGNAS(x) __attribute__((aligned(x)))
-#else
-# define LLVM_ALIGNAS(x) alignas(x)
-#endif
-
/// \macro LLVM_PACKED
/// Used to specify a packed structure.
/// LLVM_PACKED(
@@ -376,8 +387,8 @@
/// \macro LLVM_PTR_SIZE
/// A constant integer equivalent to the value of sizeof(void*).
-/// Generally used in combination with LLVM_ALIGNAS or when doing computation in
-/// the preprocessor.
+/// Generally used in combination with alignas or when doing computation in the
+/// preprocessor.
#ifdef __SIZEOF_POINTER__
# define LLVM_PTR_SIZE __SIZEOF_POINTER__
#elif defined(_WIN64)
@@ -527,6 +538,7 @@ void AnnotateIgnoreWritesEnd(const char *file, int line);
#define LLVM_ENABLE_EXCEPTIONS 1
#endif
+#ifdef __cplusplus
namespace llvm {
/// Allocate a buffer of memory with the given size and alignment.
@@ -569,4 +581,5 @@ inline void deallocate_buffer(void *Ptr, size_t Size, size_t Alignment) {
} // End namespace llvm
+#endif // __cplusplus
#endif
diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h
index 6b08a2a2a445..f590a1e104fb 100644
--- a/include/llvm/Support/DataExtractor.h
+++ b/include/llvm/Support/DataExtractor.h
@@ -11,6 +11,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Error.h"
namespace llvm {
@@ -42,6 +43,38 @@ class DataExtractor {
uint8_t IsLittleEndian;
uint8_t AddressSize;
public:
+ /// A class representing a position in a DataExtractor, as well as any error
+ /// encountered during extraction. It enables one to extract a sequence of
+ /// values without error-checking and then checking for errors in bulk at the
+ /// end. The class holds an Error object, so failing to check the result of
+ /// the parse will result in a runtime error. The error flag is sticky and
+ /// will cause all subsequent extraction functions to fail without even
+ /// attempting to parse and without updating the Cursor offset. After clearing
+ /// the error flag, one can again use the Cursor object for parsing.
+ class Cursor {
+ uint64_t Offset;
+ Error Err;
+
+ friend class DataExtractor;
+
+ public:
+ /// Construct a cursor for extraction from the given offset.
+ explicit Cursor(uint64_t Offset) : Offset(Offset), Err(Error::success()) {}
+
+ /// Checks whether the cursor is valid (i.e. no errors were encountered). In
+ /// case of errors, this does not clear the error flag -- one must call
+ /// takeError() instead.
+ explicit operator bool() { return !Err; }
+
+ /// Return the current position of this Cursor. In the error state this is
+ /// the position of the Cursor before the first error was encountered.
+ uint64_t tell() const { return Offset; }
+
+ /// Return error contained inside this Cursor, if any. Clears the internal
+ /// Cursor state.
+ Error takeError() { return std::move(Err); }
+ };
+
/// Construct with a buffer that is owned by the caller.
///
/// This constructor allows us to use data that is owned by the
@@ -49,6 +82,11 @@ public:
/// valid.
DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
: Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
+ DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian,
+ uint8_t AddressSize)
+ : Data(StringRef(reinterpret_cast<const char *>(Data.data()),
+ Data.size())),
+ IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
/// Get the data pointed to by this extractor.
StringRef getData() const { return Data; }
@@ -79,17 +117,17 @@ public:
/// pointed to by \a offset_ptr is out of bounds, or if the
/// offset plus the length of the C string is out of bounds,
/// NULL will be returned.
- const char *getCStr(uint32_t *offset_ptr) const;
+ const char *getCStr(uint64_t *offset_ptr) const;
- /// Extract a C string from \a *OffsetPtr.
+ /// Extract a C string from \a *offset_ptr.
///
/// Returns a StringRef for the C String from the data at the offset
- /// pointed to by \a OffsetPtr. A variable length NULL terminated C
- /// string will be extracted and the \a OffsetPtr will be
+ /// pointed to by \a offset_ptr. A variable length NULL terminated C
+ /// string will be extracted and the \a offset_ptr will be
/// updated with the offset of the byte that follows the NULL
/// terminator byte.
///
- /// \param[in,out] OffsetPtr
+ /// \param[in,out] offset_ptr
/// A pointer to an offset within the data that will be advanced
/// by the appropriate number of bytes if the value is extracted
/// correctly. If the offset is out of bounds or there are not
@@ -98,10 +136,10 @@ public:
///
/// \return
/// A StringRef for the C string value in the data. If the offset
- /// pointed to by \a OffsetPtr is out of bounds, or if the
+ /// pointed to by \a offset_ptr is out of bounds, or if the
/// offset plus the length of the C string is out of bounds,
/// a default-initialized StringRef will be returned.
- StringRef getCStrRef(uint32_t *OffsetPtr) const;
+ StringRef getCStrRef(uint64_t *offset_ptr) const;
/// Extract an unsigned integer of size \a byte_size from \a
/// *offset_ptr.
@@ -124,10 +162,24 @@ public:
/// @param[in] byte_size
/// The size in byte of the integer to extract.
///
+ /// @param[in,out] Err
+ /// A pointer to an Error object. Upon return the Error object is set to
+ /// indicate the result (success/failure) of the function. If the Error
+ /// object is already set when calling this function, no extraction is
+ /// performed.
+ ///
/// @return
/// The unsigned integer value that was extracted, or zero on
/// failure.
- uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const;
+ uint64_t getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
+ Error *Err = nullptr) const;
+
+ /// Extract an unsigned integer of the given size from the location given by
+ /// the cursor. In case of an extraction error, or if the cursor is already in
+ /// an error state, zero is returned.
+ uint64_t getUnsigned(Cursor &C, uint32_t Size) const {
+ return getUnsigned(&C.Offset, Size, &C.Err);
+ }
/// Extract an signed integer of size \a byte_size from \a *offset_ptr.
///
@@ -152,7 +204,7 @@ public:
/// @return
/// The sign extended signed integer value that was extracted,
/// or zero on failure.
- int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const;
+ int64_t getSigned(uint64_t *offset_ptr, uint32_t size) const;
//------------------------------------------------------------------
/// Extract an pointer from \a *offset_ptr.
@@ -171,10 +223,15 @@ public:
///
/// @return
/// The extracted pointer value as a 64 integer.
- uint64_t getAddress(uint32_t *offset_ptr) const {
+ uint64_t getAddress(uint64_t *offset_ptr) const {
return getUnsigned(offset_ptr, AddressSize);
}
+ /// Extract a pointer-sized unsigned integer from the location given by the
+ /// cursor. In case of an extraction error, or if the cursor is already in
+ /// an error state, zero is returned.
+ uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); }
+
/// Extract a uint8_t value from \a *offset_ptr.
///
/// Extract a single uint8_t from the binary data at the offset
@@ -187,9 +244,20 @@ public:
/// enough bytes to extract this value, the offset will be left
/// unmodified.
///
+ /// @param[in,out] Err
+ /// A pointer to an Error object. Upon return the Error object is set to
+ /// indicate the result (success/failure) of the function. If the Error
+ /// object is already set when calling this function, no extraction is
+ /// performed.
+ ///
/// @return
/// The extracted uint8_t value.
- uint8_t getU8(uint32_t *offset_ptr) const;
+ uint8_t getU8(uint64_t *offset_ptr, Error *Err = nullptr) const;
+
+ /// Extract a single uint8_t value from the location given by the cursor. In
+ /// case of an extraction error, or if the cursor is already in an error
+ /// state, zero is returned.
+ uint8_t getU8(Cursor &C) const { return getU8(&C.Offset, &C.Err); }
/// Extract \a count uint8_t values from \a *offset_ptr.
///
@@ -214,7 +282,27 @@ public:
/// @return
/// \a dst if all values were properly extracted and copied,
/// NULL otherise.
- uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const;
+ uint8_t *getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const;
+
+ /// Extract \a Count uint8_t values from the location given by the cursor and
+ /// store them into the destination buffer. In case of an extraction error, or
+ /// if the cursor is already in an error state, a nullptr is returned and the
+ /// destination buffer is left unchanged.
+ uint8_t *getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const;
+
+ /// Extract \a Count uint8_t values from the location given by the cursor and
+ /// store them into the destination vector. The vector is resized to fit the
+ /// extracted data. In case of an extraction error, or if the cursor is
+ /// already in an error state, the destination vector is left unchanged and
+ /// cursor is placed into an error state.
+ void getU8(Cursor &C, SmallVectorImpl<uint8_t> &Dst, uint32_t Count) const {
+ if (isValidOffsetForDataOfSize(C.Offset, Count))
+ Dst.resize(Count);
+
+ // This relies on the fact that getU8 will not attempt to write to the
+ // buffer if isValidOffsetForDataOfSize(C.Offset, Count) is false.
+ getU8(C, Dst.data(), Count);
+ }
//------------------------------------------------------------------
/// Extract a uint16_t value from \a *offset_ptr.
@@ -229,10 +317,21 @@ public:
/// enough bytes to extract this value, the offset will be left
/// unmodified.
///
+ /// @param[in,out] Err
+ /// A pointer to an Error object. Upon return the Error object is set to
+ /// indicate the result (success/failure) of the function. If the Error
+ /// object is already set when calling this function, no extraction is
+ /// performed.
+ ///
/// @return
/// The extracted uint16_t value.
//------------------------------------------------------------------
- uint16_t getU16(uint32_t *offset_ptr) const;
+ uint16_t getU16(uint64_t *offset_ptr, Error *Err = nullptr) const;
+
+ /// Extract a single uint16_t value from the location given by the cursor. In
+ /// case of an extraction error, or if the cursor is already in an error
+ /// state, zero is returned.
+ uint16_t getU16(Cursor &C) const { return getU16(&C.Offset, &C.Err); }
/// Extract \a count uint16_t values from \a *offset_ptr.
///
@@ -257,7 +356,7 @@ public:
/// @return
/// \a dst if all values were properly extracted and copied,
/// NULL otherise.
- uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const;
+ uint16_t *getU16(uint64_t *offset_ptr, uint16_t *dst, uint32_t count) const;
/// Extract a 24-bit unsigned value from \a *offset_ptr and return it
/// in a uint32_t.
@@ -274,7 +373,7 @@ public:
///
/// @return
/// The extracted 24-bit value represented in a uint32_t.
- uint32_t getU24(uint32_t *offset_ptr) const;
+ uint32_t getU24(uint64_t *offset_ptr) const;
/// Extract a uint32_t value from \a *offset_ptr.
///
@@ -288,9 +387,20 @@ public:
/// enough bytes to extract this value, the offset will be left
/// unmodified.
///
+ /// @param[in,out] Err
+ /// A pointer to an Error object. Upon return the Error object is set to
+ /// indicate the result (success/failure) of the function. If the Error
+ /// object is already set when calling this function, no extraction is
+ /// performed.
+ ///
/// @return
/// The extracted uint32_t value.
- uint32_t getU32(uint32_t *offset_ptr) const;
+ uint32_t getU32(uint64_t *offset_ptr, Error *Err = nullptr) const;
+
+ /// Extract a single uint32_t value from the location given by the cursor. In
+ /// case of an extraction error, or if the cursor is already in an error
+ /// state, zero is returned.
+ uint32_t getU32(Cursor &C) const { return getU32(&C.Offset, &C.Err); }
/// Extract \a count uint32_t values from \a *offset_ptr.
///
@@ -315,7 +425,7 @@ public:
/// @return
/// \a dst if all values were properly extracted and copied,
/// NULL otherise.
- uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const;
+ uint32_t *getU32(uint64_t *offset_ptr, uint32_t *dst, uint32_t count) const;
/// Extract a uint64_t value from \a *offset_ptr.
///
@@ -329,9 +439,20 @@ public:
/// enough bytes to extract this value, the offset will be left
/// unmodified.
///
+ /// @param[in,out] Err
+ /// A pointer to an Error object. Upon return the Error object is set to
+ /// indicate the result (success/failure) of the function. If the Error
+ /// object is already set when calling this function, no extraction is
+ /// performed.
+ ///
/// @return
/// The extracted uint64_t value.
- uint64_t getU64(uint32_t *offset_ptr) const;
+ uint64_t getU64(uint64_t *offset_ptr, Error *Err = nullptr) const;
+
+ /// Extract a single uint64_t value from the location given by the cursor. In
+ /// case of an extraction error, or if the cursor is already in an error
+ /// state, zero is returned.
+ uint64_t getU64(Cursor &C) const { return getU64(&C.Offset, &C.Err); }
/// Extract \a count uint64_t values from \a *offset_ptr.
///
@@ -356,7 +477,7 @@ public:
/// @return
/// \a dst if all values were properly extracted and copied,
/// NULL otherise.
- uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const;
+ uint64_t *getU64(uint64_t *offset_ptr, uint64_t *dst, uint32_t count) const;
/// Extract a signed LEB128 value from \a *offset_ptr.
///
@@ -374,7 +495,7 @@ public:
///
/// @return
/// The extracted signed integer value.
- int64_t getSLEB128(uint32_t *offset_ptr) const;
+ int64_t getSLEB128(uint64_t *offset_ptr) const;
/// Extract a unsigned LEB128 value from \a *offset_ptr.
///
@@ -390,23 +511,44 @@ public:
/// enough bytes to extract this value, the offset will be left
/// unmodified.
///
+ /// @param[in,out] Err
+ /// A pointer to an Error object. Upon return the Error object is set to
+ /// indicate the result (success/failure) of the function. If the Error
+ /// object is already set when calling this function, no extraction is
+ /// performed.
+ ///
/// @return
/// The extracted unsigned integer value.
- uint64_t getULEB128(uint32_t *offset_ptr) const;
+ uint64_t getULEB128(uint64_t *offset_ptr, llvm::Error *Err = nullptr) const;
+
+ /// Extract an unsigned ULEB128 value from the location given by the cursor.
+ /// In case of an extraction error, or if the cursor is already in an error
+ /// state, zero is returned.
+ uint64_t getULEB128(Cursor &C) const { return getULEB128(&C.Offset, &C.Err); }
+
+ /// Advance the Cursor position by the given number of bytes. No-op if the
+ /// cursor is in an error state.
+ void skip(Cursor &C, uint64_t Length) const;
+
+ /// Return true iff the cursor is at the end of the buffer, regardless of the
+ /// error state of the cursor. The only way both eof and error states can be
+ /// true is if one attempts a read while the cursor is at the very end of the
+ /// data buffer.
+ bool eof(const Cursor &C) const { return Data.size() == C.Offset; }
/// Test the validity of \a offset.
///
/// @return
/// \b true if \a offset is a valid offset into the data in this
/// object, \b false otherwise.
- bool isValidOffset(uint32_t offset) const { return Data.size() > offset; }
+ bool isValidOffset(uint64_t offset) const { return Data.size() > offset; }
/// Test the availability of \a length bytes of data from \a offset.
///
/// @return
/// \b true if \a offset is a valid offset and there are \a
/// length bytes available at that offset, \b false otherwise.
- bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const {
+ bool isValidOffsetForDataOfSize(uint64_t offset, uint64_t length) const {
return offset + length >= offset && isValidOffset(offset + length - 1);
}
@@ -417,9 +559,15 @@ public:
/// \b true if \a offset is a valid offset and there are enough
/// bytes for a pointer available at that offset, \b false
/// otherwise.
- bool isValidOffsetForAddress(uint32_t offset) const {
+ bool isValidOffsetForAddress(uint64_t offset) const {
return isValidOffsetForDataOfSize(offset, AddressSize);
}
+
+protected:
+ // Make it possible for subclasses to access these fields without making them
+ // public.
+ static uint64_t &getOffset(Cursor &C) { return C.Offset; }
+ static Error &getError(Cursor &C) { return C.Err; }
};
} // namespace llvm
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
index d8be94427d7e..87aecedd3a4b 100644
--- a/include/llvm/Support/Endian.h
+++ b/include/llvm/Support/Endian.h
@@ -203,9 +203,8 @@ inline void writeAtBitAlignment(void *memory, value_type value,
namespace detail {
-template<typename ValueType,
- endianness Endian,
- std::size_t Alignment>
+template <typename ValueType, endianness Endian, std::size_t Alignment,
+ std::size_t ALIGN = PickAlignment<ValueType, Alignment>::value>
struct packed_endian_specific_integral {
using value_type = ValueType;
static constexpr endianness endian = Endian;
@@ -246,8 +245,9 @@ struct packed_endian_specific_integral {
}
private:
- AlignedCharArray<PickAlignment<value_type, alignment>::value,
- sizeof(value_type)> Value;
+ struct {
+ alignas(ALIGN) char buffer[sizeof(value_type)];
+ } Value;
public:
struct ref {
diff --git a/include/llvm/Support/Error.h b/include/llvm/Support/Error.h
index 299fce7a1368..350877a219bf 100644
--- a/include/llvm/Support/Error.h
+++ b/include/llvm/Support/Error.h
@@ -328,7 +328,7 @@ inline ErrorSuccess Error::success() { return ErrorSuccess(); }
/// Make a Error instance representing failure using the given error info
/// type.
template <typename ErrT, typename... ArgTs> Error make_error(ArgTs &&... Args) {
- return Error(llvm::make_unique<ErrT>(std::forward<ArgTs>(Args)...));
+ return Error(std::make_unique<ErrT>(std::forward<ArgTs>(Args)...));
}
/// Base class for user error types. Users should declare their error types
@@ -548,7 +548,7 @@ public:
/// Take ownership of the stored error.
/// After calling this the Expected<T> is in an indeterminate state that can
/// only be safely destructed. No further calls (beside the destructor) should
- /// be made on the Expected<T> vaule.
+ /// be made on the Expected<T> value.
Error takeError() {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
Unchecked = false;
@@ -704,6 +704,12 @@ inline void cantFail(Error Err, const char *Msg = nullptr) {
if (Err) {
if (!Msg)
Msg = "Failure value returned from cantFail wrapped call";
+#ifndef NDEBUG
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << Msg << "\n" << Err;
+ Msg = OS.str().c_str();
+#endif
llvm_unreachable(Msg);
}
}
@@ -728,6 +734,13 @@ T cantFail(Expected<T> ValOrErr, const char *Msg = nullptr) {
else {
if (!Msg)
Msg = "Failure value returned from cantFail wrapped call";
+#ifndef NDEBUG
+ std::string Str;
+ raw_string_ostream OS(Str);
+ auto E = ValOrErr.takeError();
+ OS << Msg << "\n" << E;
+ Msg = OS.str().c_str();
+#endif
llvm_unreachable(Msg);
}
}
@@ -752,6 +765,13 @@ T& cantFail(Expected<T&> ValOrErr, const char *Msg = nullptr) {
else {
if (!Msg)
Msg = "Failure value returned from cantFail wrapped call";
+#ifndef NDEBUG
+ std::string Str;
+ raw_string_ostream OS(Str);
+ auto E = ValOrErr.takeError();
+ OS << Msg << "\n" << E;
+ Msg = OS.str().c_str();
+#endif
llvm_unreachable(Msg);
}
}
@@ -982,6 +1002,20 @@ inline void consumeError(Error Err) {
handleAllErrors(std::move(Err), [](const ErrorInfoBase &) {});
}
+/// Convert an Expected to an Optional without doing anything. This method
+/// should be used only where an error can be considered a reasonable and
+/// expected return value.
+///
+/// Uses of this method are potentially indicative of problems: perhaps the
+/// error should be propagated further, or the error-producer should just
+/// return an Optional in the first place.
+template <typename T> Optional<T> expectedToOptional(Expected<T> &&E) {
+ if (E)
+ return std::move(*E);
+ consumeError(E.takeError());
+ return None;
+}
+
/// Helper for converting an Error to a bool.
///
/// This method returns true if Err is in an error state, or false if it is
@@ -1170,6 +1204,10 @@ inline Error createStringError(std::error_code EC, char const *Fmt,
Error createStringError(std::error_code EC, char const *Msg);
+inline Error createStringError(std::error_code EC, const Twine &S) {
+ return createStringError(EC, S.str().c_str());
+}
+
template <typename... Ts>
inline Error createStringError(std::errc EC, char const *Fmt,
const Ts &... Vals) {
diff --git a/include/llvm/Support/FileCheck.h b/include/llvm/Support/FileCheck.h
index 0cd25a71a3b3..2547449246a8 100644
--- a/include/llvm/Support/FileCheck.h
+++ b/include/llvm/Support/FileCheck.h
@@ -13,12 +13,12 @@
#ifndef LLVM_SUPPORT_FILECHECK_H
#define LLVM_SUPPORT_FILECHECK_H
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/SourceMgr.h"
+#include <string>
#include <vector>
-#include <map>
namespace llvm {
@@ -30,6 +30,7 @@ struct FileCheckRequest {
std::vector<std::string> GlobalDefines;
bool AllowEmptyInput = false;
bool MatchFullLines = false;
+ bool IgnoreCase = false;
bool EnableVarScope = false;
bool AllowDeprecatedDagOverlap = false;
bool Verbose = false;
@@ -37,217 +38,7 @@ struct FileCheckRequest {
};
//===----------------------------------------------------------------------===//
-// Numeric substitution handling code.
-//===----------------------------------------------------------------------===//
-
-/// Base class representing the AST of a given expression.
-class FileCheckExpressionAST {
-public:
- virtual ~FileCheckExpressionAST() = default;
-
- /// Evaluates and \returns the value of the expression represented by this
- /// AST or an error if evaluation fails.
- virtual Expected<uint64_t> eval() const = 0;
-};
-
-/// Class representing an unsigned literal in the AST of an expression.
-class FileCheckExpressionLiteral : public FileCheckExpressionAST {
-private:
- /// Actual value of the literal.
- uint64_t Value;
-
-public:
- /// Constructs a literal with the specified value.
- FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {}
-
- /// \returns the literal's value.
- Expected<uint64_t> eval() const { return Value; }
-};
-
-/// Class to represent an undefined variable error, which quotes that
-/// variable's name when printed.
-class FileCheckUndefVarError : public ErrorInfo<FileCheckUndefVarError> {
-private:
- StringRef VarName;
-
-public:
- static char ID;
-
- FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {}
-
- StringRef getVarName() const { return VarName; }
-
- std::error_code convertToErrorCode() const override {
- return inconvertibleErrorCode();
- }
-
- /// Print name of variable associated with this error.
- void log(raw_ostream &OS) const override {
- OS << "\"";
- OS.write_escaped(VarName) << "\"";
- }
-};
-
-/// Class representing a numeric variable and its associated current value.
-class FileCheckNumericVariable {
-private:
- /// Name of the numeric variable.
- StringRef Name;
-
- /// Value of numeric variable, if defined, or None otherwise.
- Optional<uint64_t> Value;
-
- /// Line number where this variable is defined, or None if defined before
- /// input is parsed. Used to determine whether a variable is defined on the
- /// same line as a given use.
- Optional<size_t> DefLineNumber;
-
-public:
- /// Constructor for a variable \p Name defined at line \p DefLineNumber or
- /// defined before input is parsed if DefLineNumber is None.
- FileCheckNumericVariable(StringRef Name,
- Optional<size_t> DefLineNumber = None)
- : Name(Name), DefLineNumber(DefLineNumber) {}
-
- /// \returns name of this numeric variable.
- StringRef getName() const { return Name; }
-
- /// \returns this variable's value.
- Optional<uint64_t> getValue() const { return Value; }
-
- /// Sets value of this numeric variable, if undefined. Triggers an assertion
- /// failure if the variable is actually defined.
- void setValue(uint64_t Value);
-
- /// Clears value of this numeric variable, regardless of whether it is
- /// currently defined or not.
- void clearValue();
-
- /// \returns the line number where this variable is defined, if any, or None
- /// if defined before input is parsed.
- Optional<size_t> getDefLineNumber() { return DefLineNumber; }
-};
-
-/// Class representing the use of a numeric variable in the AST of an
-/// expression.
-class FileCheckNumericVariableUse : public FileCheckExpressionAST {
-private:
- /// Name of the numeric variable.
- StringRef Name;
-
- /// Pointer to the class instance for the variable this use is about.
- FileCheckNumericVariable *NumericVariable;
-
-public:
- FileCheckNumericVariableUse(StringRef Name,
- FileCheckNumericVariable *NumericVariable)
- : Name(Name), NumericVariable(NumericVariable) {}
-
- /// \returns the value of the variable referenced by this instance.
- Expected<uint64_t> eval() const;
-};
-
-/// Type of functions evaluating a given binary operation.
-using binop_eval_t = uint64_t (*)(uint64_t, uint64_t);
-
-/// Class representing a single binary operation in the AST of an expression.
-class FileCheckASTBinop : public FileCheckExpressionAST {
-private:
- /// Left operand.
- std::unique_ptr<FileCheckExpressionAST> LeftOperand;
-
- /// Right operand.
- std::unique_ptr<FileCheckExpressionAST> RightOperand;
-
- /// Pointer to function that can evaluate this binary operation.
- binop_eval_t EvalBinop;
-
-public:
- FileCheckASTBinop(binop_eval_t EvalBinop,
- std::unique_ptr<FileCheckExpressionAST> LeftOp,
- std::unique_ptr<FileCheckExpressionAST> RightOp)
- : EvalBinop(EvalBinop) {
- LeftOperand = std::move(LeftOp);
- RightOperand = std::move(RightOp);
- }
-
- /// Evaluates the value of the binary operation represented by this AST,
- /// using EvalBinop on the result of recursively evaluating the operands.
- /// \returns the expression value or an error if an undefined numeric
- /// variable is used in one of the operands.
- Expected<uint64_t> eval() const;
-};
-
-class FileCheckPatternContext;
-
-/// Class representing a substitution to perform in the RegExStr string.
-class FileCheckSubstitution {
-protected:
- /// Pointer to a class instance holding, among other things, the table with
- /// the values of live string variables at the start of any given CHECK line.
- /// Used for substituting string variables with the text they were defined
- /// as. Expressions are linked to the numeric variables they use at
- /// parse time and directly access the value of the numeric variable to
- /// evaluate their value.
- FileCheckPatternContext *Context;
-
- /// The string that needs to be substituted for something else. For a
- /// string variable this is its name, otherwise this is the whole expression.
- StringRef FromStr;
-
- // Index in RegExStr of where to do the substitution.
- size_t InsertIdx;
-
-public:
- FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName,
- size_t InsertIdx)
- : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {}
-
- virtual ~FileCheckSubstitution() = default;
-
- /// \returns the string to be substituted for something else.
- StringRef getFromString() const { return FromStr; }
-
- /// \returns the index where the substitution is to be performed in RegExStr.
- size_t getIndex() const { return InsertIdx; }
-
- /// \returns a string containing the result of the substitution represented
- /// by this class instance or an error if substitution failed.
- virtual Expected<std::string> getResult() const = 0;
-};
-
-class FileCheckStringSubstitution : public FileCheckSubstitution {
-public:
- FileCheckStringSubstitution(FileCheckPatternContext *Context,
- StringRef VarName, size_t InsertIdx)
- : FileCheckSubstitution(Context, VarName, InsertIdx) {}
-
- /// \returns the text that the string variable in this substitution matched
- /// when defined, or an error if the variable is undefined.
- Expected<std::string> getResult() const override;
-};
-
-class FileCheckNumericSubstitution : public FileCheckSubstitution {
-private:
- /// Pointer to the class representing the expression whose value is to be
- /// substituted.
- std::unique_ptr<FileCheckExpressionAST> ExpressionAST;
-
-public:
- FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr,
- std::unique_ptr<FileCheckExpressionAST> ExprAST,
- size_t InsertIdx)
- : FileCheckSubstitution(Context, Expr, InsertIdx) {
- ExpressionAST = std::move(ExprAST);
- }
-
- /// \returns a string containing the result of evaluating the expression in
- /// this substitution, or an error if evaluation failed.
- Expected<std::string> getResult() const override;
-};
-
-//===----------------------------------------------------------------------===//
-// Pattern handling code.
+// Summary of a FileCheck diagnostic.
//===----------------------------------------------------------------------===//
namespace Check {
@@ -291,325 +82,6 @@ public:
};
} // namespace Check
-struct FileCheckDiag;
-
-/// Class holding the FileCheckPattern global state, shared by all patterns:
-/// tables holding values of variables and whether they are defined or not at
-/// any given time in the matching process.
-class FileCheckPatternContext {
- friend class FileCheckPattern;
-
-private:
- /// When matching a given pattern, this holds the value of all the string
- /// variables defined in previous patterns. In a pattern, only the last
- /// definition for a given variable is recorded in this table.
- /// Back-references are used for uses after any the other definition.
- StringMap<StringRef> GlobalVariableTable;
-
- /// Map of all string variables defined so far. Used at parse time to detect
- /// a name conflict between a numeric variable and a string variable when
- /// the former is defined on a later line than the latter.
- StringMap<bool> DefinedVariableTable;
-
- /// When matching a given pattern, this holds the pointers to the classes
- /// representing the numeric variables defined in previous patterns. When
- /// matching a pattern all definitions for that pattern are recorded in the
- /// NumericVariableDefs table in the FileCheckPattern instance of that
- /// pattern.
- StringMap<FileCheckNumericVariable *> GlobalNumericVariableTable;
-
- /// Pointer to the class instance representing the @LINE pseudo variable for
- /// easily updating its value.
- FileCheckNumericVariable *LineVariable = nullptr;
-
- /// Vector holding pointers to all parsed numeric variables. Used to
- /// automatically free them once they are guaranteed to no longer be used.
- std::vector<std::unique_ptr<FileCheckNumericVariable>> NumericVariables;
-
- /// Vector holding pointers to all substitutions. Used to automatically free
- /// them once they are guaranteed to no longer be used.
- std::vector<std::unique_ptr<FileCheckSubstitution>> Substitutions;
-
-public:
- /// \returns the value of string variable \p VarName or an error if no such
- /// variable has been defined.
- Expected<StringRef> getPatternVarValue(StringRef VarName);
-
- /// Defines string and numeric variables from definitions given on the
- /// command line, passed as a vector of [#]VAR=VAL strings in
- /// \p CmdlineDefines. \returns an error list containing diagnostics against
- /// \p SM for all definition parsing failures, if any, or Success otherwise.
- Error defineCmdlineVariables(std::vector<std::string> &CmdlineDefines,
- SourceMgr &SM);
-
- /// Create @LINE pseudo variable. Value is set when pattern are being
- /// matched.
- void createLineVariable();
-
- /// Undefines local variables (variables whose name does not start with a '$'
- /// sign), i.e. removes them from GlobalVariableTable and from
- /// GlobalNumericVariableTable and also clears the value of numeric
- /// variables.
- void clearLocalVars();
-
-private:
- /// Makes a new numeric variable and registers it for destruction when the
- /// context is destroyed.
- template <class... Types>
- FileCheckNumericVariable *makeNumericVariable(Types... args);
-
- /// Makes a new string substitution and registers it for destruction when the
- /// context is destroyed.
- FileCheckSubstitution *makeStringSubstitution(StringRef VarName,
- size_t InsertIdx);
-
- /// Makes a new numeric substitution and registers it for destruction when
- /// the context is destroyed.
- FileCheckSubstitution *
- makeNumericSubstitution(StringRef ExpressionStr,
- std::unique_ptr<FileCheckExpressionAST> ExpressionAST,
- size_t InsertIdx);
-};
-
-/// Class to represent an error holding a diagnostic with location information
-/// used when printing it.
-class FileCheckErrorDiagnostic : public ErrorInfo<FileCheckErrorDiagnostic> {
-private:
- SMDiagnostic Diagnostic;
-
-public:
- static char ID;
-
- FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {}
-
- std::error_code convertToErrorCode() const override {
- return inconvertibleErrorCode();
- }
-
- /// Print diagnostic associated with this error when printing the error.
- void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); }
-
- static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) {
- return make_error<FileCheckErrorDiagnostic>(
- SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg));
- }
-
- static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) {
- return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg);
- }
-};
-
-class FileCheckNotFoundError : public ErrorInfo<FileCheckNotFoundError> {
-public:
- static char ID;
-
- std::error_code convertToErrorCode() const override {
- return inconvertibleErrorCode();
- }
-
- /// Print diagnostic associated with this error when printing the error.
- void log(raw_ostream &OS) const override {
- OS << "String not found in input";
- }
-};
-
-class FileCheckPattern {
- SMLoc PatternLoc;
-
- /// A fixed string to match as the pattern or empty if this pattern requires
- /// a regex match.
- StringRef FixedStr;
-
- /// A regex string to match as the pattern or empty if this pattern requires
- /// a fixed string to match.
- std::string RegExStr;
-
- /// Entries in this vector represent a substitution of a string variable or
- /// an expression in the RegExStr regex at match time. For example, in the
- /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]",
- /// RegExStr will contain "foobaz" and we'll get two entries in this vector
- /// that tells us to insert the value of string variable "bar" at offset 3
- /// and the value of expression "N+1" at offset 6.
- std::vector<FileCheckSubstitution *> Substitutions;
-
- /// Maps names of string variables defined in a pattern to the number of
- /// their parenthesis group in RegExStr capturing their last definition.
- ///
- /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])",
- /// RegExStr will be "foo(.*)baz(\1<quux value>(.*))" where <quux value> is
- /// the value captured for QUUX on the earlier line where it was defined, and
- /// VariableDefs will map "bar" to the third parenthesis group which captures
- /// the second definition of "bar".
- ///
- /// Note: uses std::map rather than StringMap to be able to get the key when
- /// iterating over values.
- std::map<StringRef, unsigned> VariableDefs;
-
- /// Structure representing the definition of a numeric variable in a pattern.
- /// It holds the pointer to the class representing the numeric variable whose
- /// value is being defined and the number of the parenthesis group in
- /// RegExStr to capture that value.
- struct FileCheckNumericVariableMatch {
- /// Pointer to class representing the numeric variable whose value is being
- /// defined.
- FileCheckNumericVariable *DefinedNumericVariable;
-
- /// Number of the parenthesis group in RegExStr that captures the value of
- /// this numeric variable definition.
- unsigned CaptureParenGroup;
- };
-
- /// Holds the number of the parenthesis group in RegExStr and pointer to the
- /// corresponding FileCheckNumericVariable class instance of all numeric
- /// variable definitions. Used to set the matched value of all those
- /// variables.
- StringMap<FileCheckNumericVariableMatch> NumericVariableDefs;
-
- /// Pointer to a class instance holding the global state shared by all
- /// patterns:
- /// - separate tables with the values of live string and numeric variables
- /// respectively at the start of any given CHECK line;
- /// - table holding whether a string variable has been defined at any given
- /// point during the parsing phase.
- FileCheckPatternContext *Context;
-
- Check::FileCheckType CheckTy;
-
- /// Line number for this CHECK pattern or None if it is an implicit pattern.
- /// Used to determine whether a variable definition is made on an earlier
- /// line to the one with this CHECK.
- Optional<size_t> LineNumber;
-
-public:
- FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context,
- Optional<size_t> Line = None)
- : Context(Context), CheckTy(Ty), LineNumber(Line) {}
-
- /// \returns the location in source code.
- SMLoc getLoc() const { return PatternLoc; }
-
- /// \returns the pointer to the global state for all patterns in this
- /// FileCheck instance.
- FileCheckPatternContext *getContext() const { return Context; }
-
- /// \returns whether \p C is a valid first character for a variable name.
- static bool isValidVarNameStart(char C);
-
- /// Parsing information about a variable.
- struct VariableProperties {
- StringRef Name;
- bool IsPseudo;
- };
-
- /// Parses the string at the start of \p Str for a variable name. \returns
- /// a VariableProperties structure holding the variable name and whether it
- /// is the name of a pseudo variable, or an error holding a diagnostic
- /// against \p SM if parsing fail. If parsing was successful, also strips
- /// \p Str from the variable name.
- static Expected<VariableProperties> parseVariable(StringRef &Str,
- const SourceMgr &SM);
- /// Parses \p Expr for the name of a numeric variable to be defined at line
- /// \p LineNumber or before input is parsed if \p LineNumber is None.
- /// \returns a pointer to the class instance representing that variable,
- /// creating it if needed, or an error holding a diagnostic against \p SM
- /// should defining such a variable be invalid.
- static Expected<FileCheckNumericVariable *> parseNumericVariableDefinition(
- StringRef &Expr, FileCheckPatternContext *Context,
- Optional<size_t> LineNumber, const SourceMgr &SM);
- /// Parses \p Expr for a numeric substitution block. Parameter
- /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE
- /// expression. \returns a pointer to the class instance representing the AST
- /// of the expression whose value must be substituted, or an error holding a
- /// diagnostic against \p SM if parsing fails. If substitution was
- /// successful, sets \p DefinedNumericVariable to point to the class
- /// representing the numeric variable being defined in this numeric
- /// substitution block, or None if this block does not define any variable.
- Expected<std::unique_ptr<FileCheckExpressionAST>>
- parseNumericSubstitutionBlock(
- StringRef Expr,
- Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
- bool IsLegacyLineExpr, const SourceMgr &SM) const;
- /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern
- /// instance accordingly.
- ///
- /// \p Prefix provides which prefix is being matched, \p Req describes the
- /// global options that influence the parsing such as whitespace
- /// canonicalization, \p SM provides the SourceMgr used for error reports.
- /// \returns true in case of an error, false otherwise.
- bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
- const FileCheckRequest &Req);
- /// Matches the pattern string against the input buffer \p Buffer
- ///
- /// \returns the position that is matched or an error indicating why matching
- /// failed. If there is a match, updates \p MatchLen with the size of the
- /// matched string.
- ///
- /// The GlobalVariableTable StringMap in the FileCheckPatternContext class
- /// instance provides the current values of FileCheck string variables and
- /// is updated if this match defines new values. Likewise, the
- /// GlobalNumericVariableTable StringMap in the same class provides the
- /// current values of FileCheck numeric variables and is updated if this
- /// match defines new numeric values.
- Expected<size_t> match(StringRef Buffer, size_t &MatchLen,
- const SourceMgr &SM) const;
- /// Prints the value of successful substitutions or the name of the undefined
- /// string or numeric variables preventing a successful substitution.
- void printSubstitutions(const SourceMgr &SM, StringRef Buffer,
- SMRange MatchRange = None) const;
- void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
- std::vector<FileCheckDiag> *Diags) const;
-
- bool hasVariable() const {
- return !(Substitutions.empty() && VariableDefs.empty());
- }
-
- Check::FileCheckType getCheckTy() const { return CheckTy; }
-
- int getCount() const { return CheckTy.getCount(); }
-
-private:
- bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
- void AddBackrefToRegEx(unsigned BackrefNum);
- /// Computes an arbitrary estimate for the quality of matching this pattern
- /// at the start of \p Buffer; a distance of zero should correspond to a
- /// perfect match.
- unsigned computeMatchDistance(StringRef Buffer) const;
- /// Finds the closing sequence of a regex variable usage or definition.
- ///
- /// \p Str has to point in the beginning of the definition (right after the
- /// opening sequence). \p SM holds the SourceMgr used for error repporting.
- /// \returns the offset of the closing sequence within Str, or npos if it
- /// was not found.
- size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
-
- /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use.
- /// \returns the pointer to the class instance representing that variable if
- /// successful, or an error holding a diagnostic against \p SM otherwise.
- Expected<std::unique_ptr<FileCheckNumericVariableUse>>
- parseNumericVariableUse(StringRef Name, bool IsPseudo,
- const SourceMgr &SM) const;
- enum class AllowedOperand { LineVar, Literal, Any };
- /// Parses \p Expr for use of a numeric operand. Accepts both literal values
- /// and numeric variables, depending on the value of \p AO. \returns the
- /// class representing that operand in the AST of the expression or an error
- /// holding a diagnostic against \p SM otherwise.
- Expected<std::unique_ptr<FileCheckExpressionAST>>
- parseNumericOperand(StringRef &Expr, AllowedOperand AO,
- const SourceMgr &SM) const;
- /// Parses \p Expr for a binary operation. The left operand of this binary
- /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether
- /// we are parsing a legacy @LINE expression. \returns the class representing
- /// the binary operation in the AST of the expression, or an error holding a
- /// diagnostic against \p SM otherwise.
- Expected<std::unique_ptr<FileCheckExpressionAST>>
- parseBinop(StringRef &Expr, std::unique_ptr<FileCheckExpressionAST> LeftOp,
- bool IsLegacyLineExpr, const SourceMgr &SM) const;
-};
-
-//===----------------------------------------------------------------------===//
-/// Summary of a FileCheck diagnostic.
-//===----------------------------------------------------------------------===//
-
struct FileCheckDiag {
/// What is the FileCheck directive for this diagnostic?
Check::FileCheckType CheckTy;
@@ -659,61 +131,20 @@ struct FileCheckDiag {
SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange);
};
-//===----------------------------------------------------------------------===//
-// Check Strings.
-//===----------------------------------------------------------------------===//
-
-/// A check that we found in the input file.
-struct FileCheckString {
- /// The pattern to match.
- FileCheckPattern Pat;
-
- /// Which prefix name this check matched.
- StringRef Prefix;
-
- /// The location in the match file that the check string was specified.
- SMLoc Loc;
-
- /// All of the strings that are disallowed from occurring between this match
- /// string and the previous one (or start of file).
- std::vector<FileCheckPattern> DagNotStrings;
-
- FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L)
- : Pat(P), Prefix(S), Loc(L) {}
-
- /// Matches check string and its "not strings" and/or "dag strings".
- size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
- size_t &MatchLen, FileCheckRequest &Req,
- std::vector<FileCheckDiag> *Diags) const;
-
- /// Verifies that there is a single line in the given \p Buffer. Errors are
- /// reported against \p SM.
- bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
- /// Verifies that there is no newline in the given \p Buffer. Errors are
- /// reported against \p SM.
- bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
- /// Verifies that none of the strings in \p NotStrings are found in the given
- /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in
- /// \p Diags according to the verbosity level set in \p Req.
- bool CheckNot(const SourceMgr &SM, StringRef Buffer,
- const std::vector<const FileCheckPattern *> &NotStrings,
- const FileCheckRequest &Req,
- std::vector<FileCheckDiag> *Diags) const;
- /// Matches "dag strings" and their mixed "not strings".
- size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
- std::vector<const FileCheckPattern *> &NotStrings,
- const FileCheckRequest &Req,
- std::vector<FileCheckDiag> *Diags) const;
-};
+class FileCheckPatternContext;
+struct FileCheckString;
/// FileCheck class takes the request and exposes various methods that
/// use information from the request.
class FileCheck {
FileCheckRequest Req;
- FileCheckPatternContext PatternContext;
+ std::unique_ptr<FileCheckPatternContext> PatternContext;
+ // C++17 TODO: make this a plain std::vector.
+ std::unique_ptr<std::vector<FileCheckString>> CheckStrings;
public:
- FileCheck(FileCheckRequest Req) : Req(Req) {}
+ explicit FileCheck(FileCheckRequest Req);
+ ~FileCheck();
// Combines the check prefixes into a single regex so that we can efficiently
// scan for any of the set.
@@ -723,13 +154,11 @@ public:
Regex buildCheckPrefixRegex();
/// Reads the check file from \p Buffer and records the expected strings it
- /// contains in the \p CheckStrings vector. Errors are reported against
- /// \p SM.
+ /// contains. Errors are reported against \p SM.
///
/// Only expected strings whose prefix is one of those listed in \p PrefixRE
/// are recorded. \returns true in case of an error, false otherwise.
- bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
- std::vector<FileCheckString> &CheckStrings);
+ bool readCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE);
bool ValidateCheckPrefixes();
@@ -739,13 +168,14 @@ public:
SmallVectorImpl<char> &OutputBuffer);
/// Checks the input to FileCheck provided in the \p Buffer against the
- /// \p CheckStrings read from the check file and record diagnostics emitted
+ /// expected strings read from the check file and record diagnostics emitted
/// in \p Diags. Errors are recorded against \p SM.
///
/// \returns false if the input fails to satisfy the checks.
- bool CheckInput(SourceMgr &SM, StringRef Buffer,
- ArrayRef<FileCheckString> CheckStrings,
+ bool checkInput(SourceMgr &SM, StringRef Buffer,
std::vector<FileCheckDiag> *Diags = nullptr);
};
+
} // namespace llvm
+
#endif
diff --git a/include/llvm/Support/FileCollector.h b/include/llvm/Support/FileCollector.h
new file mode 100644
index 000000000000..19429bd3e9b4
--- /dev/null
+++ b/include/llvm/Support/FileCollector.h
@@ -0,0 +1,79 @@
+//===-- FileCollector.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FILE_COLLECTOR_H
+#define LLVM_SUPPORT_FILE_COLLECTOR_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/VirtualFileSystem.h"
+
+#include <mutex>
+
+namespace llvm {
+
+/// Collects files into a directory and generates a mapping that can be used by
+/// the VFS.
+class FileCollector {
+public:
+ FileCollector(std::string Root, std::string OverlayRoot);
+
+ void addFile(const Twine &file);
+
+ /// Write the yaml mapping (for the VFS) to the given file.
+ std::error_code writeMapping(StringRef mapping_file);
+
+ /// Copy the files into the root directory.
+ ///
+ /// When StopOnError is true (the default) we abort as soon as one file
+ /// cannot be copied. This is relatively common, for example when a file was
+ /// removed after it was added to the mapping.
+ std::error_code copyFiles(bool StopOnError = true);
+
+ /// Create a VFS that collects all the paths that might be looked at by the
+ /// file system accesses.
+ static IntrusiveRefCntPtr<vfs::FileSystem>
+ createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
+ std::shared_ptr<FileCollector> Collector);
+
+private:
+ void addFileImpl(StringRef SrcPath);
+
+ bool markAsSeen(StringRef Path) { return Seen.insert(Path).second; }
+
+ bool getRealPath(StringRef SrcPath, SmallVectorImpl<char> &Result);
+
+ void addFileToMapping(StringRef VirtualPath, StringRef RealPath) {
+ VFSWriter.addFileMapping(VirtualPath, RealPath);
+ }
+
+protected:
+ /// Synchronizes adding files.
+ std::mutex Mutex;
+
+ /// The root directory where files are copied.
+ std::string Root;
+
+ /// The root directory where the VFS overlay lives.
+ std::string OverlayRoot;
+
+ /// Tracks already seen files so they can be skipped.
+ StringSet<> Seen;
+
+ /// The yaml mapping writer.
+ vfs::YAMLVFSWriter VFSWriter;
+
+ /// Caches RealPath calls when resolving symlinks.
+ StringMap<std::string> SymlinkMap;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_FILE_COLLECTOR_H
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index 1bec27bddad9..a29a9d787947 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -991,29 +991,27 @@ file_t getStdoutHandle();
/// Returns kInvalidFile when the stream is closed.
file_t getStderrHandle();
-/// Reads \p Buf.size() bytes from \p FileHandle into \p Buf. The number of
-/// bytes actually read is returned in \p BytesRead. On Unix, this is equivalent
-/// to `*BytesRead = ::read(FD, Buf.data(), Buf.size())`, with error reporting.
-/// BytesRead will contain zero when reaching EOF.
+/// Reads \p Buf.size() bytes from \p FileHandle into \p Buf. Returns the number
+/// of bytes actually read. On Unix, this is equivalent to `return ::read(FD,
+/// Buf.data(), Buf.size())`, with error reporting. Returns 0 when reaching EOF.
///
/// @param FileHandle File to read from.
/// @param Buf Buffer to read into.
-/// @param BytesRead Output parameter of the number of bytes read.
-/// @returns The error, if any, or errc::success.
-std::error_code readNativeFile(file_t FileHandle, MutableArrayRef<char> Buf,
- size_t *BytesRead);
+/// @returns The number of bytes read, or error.
+Expected<size_t> readNativeFile(file_t FileHandle, MutableArrayRef<char> Buf);
/// Reads \p Buf.size() bytes from \p FileHandle at offset \p Offset into \p
/// Buf. If 'pread' is available, this will use that, otherwise it will use
-/// 'lseek'. Bytes requested beyond the end of the file will be zero
-/// initialized.
+/// 'lseek'. Returns the number of bytes actually read. Returns 0 when reaching
+/// EOF.
///
/// @param FileHandle File to read from.
/// @param Buf Buffer to read into.
/// @param Offset Offset into the file at which the read should occur.
-/// @returns The error, if any, or errc::success.
-std::error_code readNativeFileSlice(file_t FileHandle,
- MutableArrayRef<char> Buf, size_t Offset);
+/// @returns The number of bytes read, or error.
+Expected<size_t> readNativeFileSlice(file_t FileHandle,
+ MutableArrayRef<char> Buf,
+ uint64_t Offset);
/// @brief Opens the file with the given name in a write-only or read-write
/// mode, returning its open file descriptor. If the file does not exist, it
@@ -1217,9 +1215,9 @@ class directory_entry {
// that whole structure, callers end up paying for a stat().
// std::filesystem::directory_entry may be a better model.
std::string Path;
- file_type Type; // Most platforms can provide this.
- bool FollowSymlinks; // Affects the behavior of status().
- basic_file_status Status; // If available.
+ file_type Type = file_type::type_unknown; // Most platforms can provide this.
+ bool FollowSymlinks = true; // Affects the behavior of status().
+ basic_file_status Status; // If available.
public:
explicit directory_entry(const Twine &Path, bool FollowSymlinks = true,
diff --git a/include/llvm/Support/FileUtilities.h b/include/llvm/Support/FileUtilities.h
index 16b2206924c3..04efdced32a4 100644
--- a/include/llvm/Support/FileUtilities.h
+++ b/include/llvm/Support/FileUtilities.h
@@ -14,6 +14,9 @@
#ifndef LLVM_SUPPORT_FILEUTILITIES_H
#define LLVM_SUPPORT_FILEUTILITIES_H
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
@@ -72,6 +75,41 @@ namespace llvm {
/// will not be removed when the object is destroyed.
void releaseFile() { DeleteIt = false; }
};
+
+ enum class atomic_write_error {
+ failed_to_create_uniq_file = 0,
+ output_stream_error,
+ failed_to_rename_temp_file
+ };
+
+ class AtomicFileWriteError : public llvm::ErrorInfo<AtomicFileWriteError> {
+ public:
+ AtomicFileWriteError(atomic_write_error Error) : Error(Error) {}
+
+ void log(raw_ostream &OS) const override;
+
+ const atomic_write_error Error;
+ static char ID;
+
+ private:
+ // Users are not expected to use error_code.
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+ };
+
+ // atomic_write_error + whatever the Writer can return
+
+ /// Creates a unique file with name according to the given \p TempPathModel,
+ /// writes content of \p Buffer to the file and renames it to \p FinalPath.
+ ///
+ /// \returns \c AtomicFileWriteError in case of error.
+ llvm::Error writeFileAtomically(StringRef TempPathModel, StringRef FinalPath,
+ StringRef Buffer);
+
+ llvm::Error
+ writeFileAtomically(StringRef TempPathModel, StringRef FinalPath,
+ std::function<llvm::Error(llvm::raw_ostream &)> Writer);
} // End llvm namespace
#endif
diff --git a/include/llvm/Support/Format.h b/include/llvm/Support/Format.h
index 77dcbaebf1a3..9dd7b401b46a 100644
--- a/include/llvm/Support/Format.h
+++ b/include/llvm/Support/Format.h
@@ -29,6 +29,7 @@
#include <cassert>
#include <cstdio>
#include <tuple>
+#include <utility>
namespace llvm {
@@ -91,7 +92,7 @@ class format_object final : public format_object_base {
template <std::size_t... Is>
int snprint_tuple(char *Buffer, unsigned BufferSize,
- index_sequence<Is...>) const {
+ std::index_sequence<Is...>) const {
#ifdef _MSC_VER
return _snprintf(Buffer, BufferSize, Fmt, std::get<Is>(Vals)...);
#else
@@ -106,7 +107,7 @@ public:
}
int snprint(char *Buffer, unsigned BufferSize) const override {
- return snprint_tuple(Buffer, BufferSize, index_sequence_for<Ts...>());
+ return snprint_tuple(Buffer, BufferSize, std::index_sequence_for<Ts...>());
}
};
diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h
index 99620802505b..9169379f746d 100644
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@@ -242,7 +242,7 @@ protected:
using DomTreeNodeMapType =
DenseMap<NodeT *, std::unique_ptr<DomTreeNodeBase<NodeT>>>;
DomTreeNodeMapType DomTreeNodes;
- DomTreeNodeBase<NodeT> *RootNode;
+ DomTreeNodeBase<NodeT> *RootNode = nullptr;
ParentPtr Parent = nullptr;
mutable bool DFSInfoValid = false;
@@ -571,7 +571,7 @@ protected:
assert(IDomNode && "Not immediate dominator specified for block!");
DFSInfoValid = false;
return (DomTreeNodes[BB] = IDomNode->addChild(
- llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode))).get();
+ std::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode))).get();
}
/// Add a new node to the forward dominator tree and make it a new root.
@@ -585,7 +585,7 @@ protected:
"Cannot change root of post-dominator tree");
DFSInfoValid = false;
DomTreeNodeBase<NodeT> *NewNode = (DomTreeNodes[BB] =
- llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, nullptr)).get();
+ std::make_unique<DomTreeNodeBase<NodeT>>(BB, nullptr)).get();
if (Roots.empty()) {
addRoot(BB);
} else {
diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h
index ccceba881718..7c0278e8770e 100644
--- a/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/include/llvm/Support/GenericDomTreeConstruction.h
@@ -186,7 +186,7 @@ struct SemiNCAInfo {
// Add a new tree node for this NodeT, and link it as a child of
// IDomNode
return (DT.DomTreeNodes[BB] = IDomNode->addChild(
- llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode)))
+ std::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode)))
.get();
}
@@ -586,7 +586,7 @@ struct SemiNCAInfo {
NodePtr Root = IsPostDom ? nullptr : DT.Roots[0];
DT.RootNode = (DT.DomTreeNodes[Root] =
- llvm::make_unique<DomTreeNodeBase<NodeT>>(Root, nullptr))
+ std::make_unique<DomTreeNodeBase<NodeT>>(Root, nullptr))
.get();
SNCA.attachNewSubtree(DT, DT.RootNode);
}
@@ -611,7 +611,7 @@ struct SemiNCAInfo {
// Add a new tree node for this BasicBlock, and link it as a child of
// IDomNode.
DT.DomTreeNodes[W] = IDomNode->addChild(
- llvm::make_unique<DomTreeNodeBase<NodeT>>(W, IDomNode));
+ std::make_unique<DomTreeNodeBase<NodeT>>(W, IDomNode));
}
}
@@ -663,7 +663,7 @@ struct SemiNCAInfo {
TreeNodePtr VirtualRoot = DT.getNode(nullptr);
FromTN =
(DT.DomTreeNodes[From] = VirtualRoot->addChild(
- llvm::make_unique<DomTreeNodeBase<NodeT>>(From, VirtualRoot)))
+ std::make_unique<DomTreeNodeBase<NodeT>>(From, VirtualRoot)))
.get();
DT.Roots.push_back(From);
}
diff --git a/include/llvm/Support/GlobPattern.h b/include/llvm/Support/GlobPattern.h
index 66a4cd94c12a..0098ac65fd30 100644
--- a/include/llvm/Support/GlobPattern.h
+++ b/include/llvm/Support/GlobPattern.h
@@ -21,7 +21,7 @@
#include <vector>
// This class represents a glob pattern. Supported metacharacters
-// are "*", "?", "[<chars>]" and "[^<chars>]".
+// are "*", "?", "\", "[<chars>]", "[^<chars>]", and "[!<chars>]".
namespace llvm {
class BitVector;
template <typename T> class ArrayRef;
diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h
index b37cc514c92e..44f543c363db 100644
--- a/include/llvm/Support/Host.h
+++ b/include/llvm/Support/Host.h
@@ -15,39 +15,11 @@
#include "llvm/ADT/StringMap.h"
-#if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__)
-#include <endian.h>
-#elif defined(_AIX)
-#include <sys/machine.h>
-#elif defined(__sun)
-/* Solaris provides _BIG_ENDIAN/_LITTLE_ENDIAN selector in sys/types.h */
-#include <sys/types.h>
-#define BIG_ENDIAN 4321
-#define LITTLE_ENDIAN 1234
-#if defined(_BIG_ENDIAN)
-#define BYTE_ORDER BIG_ENDIAN
-#else
-#define BYTE_ORDER LITTLE_ENDIAN
-#endif
-#else
-#if !defined(BYTE_ORDER) && !defined(_WIN32)
-#include <machine/endian.h>
-#endif
-#endif
-
#include <string>
namespace llvm {
namespace sys {
-#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
-constexpr bool IsBigEndianHost = true;
-#else
-constexpr bool IsBigEndianHost = false;
-#endif
-
- static const bool IsLittleEndianHost = !IsBigEndianHost;
-
/// getDefaultTargetTriple() - Return the default target triple the compiler
/// has been configured to produce code for.
///
diff --git a/include/llvm/Support/JamCRC.h b/include/llvm/Support/JamCRC.h
deleted file mode 100644
index b6fc4e7b9b03..000000000000
--- a/include/llvm/Support/JamCRC.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//===-- llvm/Support/JamCRC.h - Cyclic Redundancy Check ---------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains an implementation of JamCRC.
-//
-// We will use the "Rocksoft^tm Model CRC Algorithm" to describe the properties
-// of this CRC:
-// Width : 32
-// Poly : 04C11DB7
-// Init : FFFFFFFF
-// RefIn : True
-// RefOut : True
-// XorOut : 00000000
-// Check : 340BC6D9 (result of CRC for "123456789")
-//
-// N.B. We permit flexibility of the "Init" value. Some consumers of this need
-// it to be zero.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_JAMCRC_H
-#define LLVM_SUPPORT_JAMCRC_H
-
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-template <typename T> class ArrayRef;
-
-class JamCRC {
-public:
- JamCRC(uint32_t Init = 0xFFFFFFFFU) : CRC(Init) {}
-
- // Update the CRC calculation with Data.
- void update(ArrayRef<char> Data);
-
- uint32_t getCRC() const { return CRC; }
-
-private:
- uint32_t CRC;
-};
-} // End of namespace llvm
-
-#endif
diff --git a/include/llvm/Support/MachineValueType.h b/include/llvm/Support/MachineValueType.h
index b94d2c4836cc..7f9f0b85c55e 100644
--- a/include/llvm/Support/MachineValueType.h
+++ b/include/llvm/Support/MachineValueType.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TypeSize.h"
#include <cassert>
namespace llvm {
@@ -64,152 +65,162 @@ namespace llvm {
v32i1 = 19, // 32 x i1
v64i1 = 20, // 64 x i1
v128i1 = 21, // 128 x i1
- v512i1 = 22, // 512 x i1
- v1024i1 = 23, // 1024 x i1
-
- v1i8 = 24, // 1 x i8
- v2i8 = 25, // 2 x i8
- v4i8 = 26, // 4 x i8
- v8i8 = 27, // 8 x i8
- v16i8 = 28, // 16 x i8
- v32i8 = 29, // 32 x i8
- v64i8 = 30, // 64 x i8
- v128i8 = 31, //128 x i8
- v256i8 = 32, //256 x i8
-
- v1i16 = 33, // 1 x i16
- v2i16 = 34, // 2 x i16
- v4i16 = 35, // 4 x i16
- v8i16 = 36, // 8 x i16
- v16i16 = 37, // 16 x i16
- v32i16 = 38, // 32 x i16
- v64i16 = 39, // 64 x i16
- v128i16 = 40, //128 x i16
-
- v1i32 = 41, // 1 x i32
- v2i32 = 42, // 2 x i32
- v3i32 = 43, // 3 x i32
- v4i32 = 44, // 4 x i32
- v5i32 = 45, // 5 x i32
- v8i32 = 46, // 8 x i32
- v16i32 = 47, // 16 x i32
- v32i32 = 48, // 32 x i32
- v64i32 = 49, // 64 x i32
- v128i32 = 50, // 128 x i32
- v256i32 = 51, // 256 x i32
- v512i32 = 52, // 512 x i32
- v1024i32 = 53, // 1024 x i32
- v2048i32 = 54, // 2048 x i32
-
- v1i64 = 55, // 1 x i64
- v2i64 = 56, // 2 x i64
- v4i64 = 57, // 4 x i64
- v8i64 = 58, // 8 x i64
- v16i64 = 59, // 16 x i64
- v32i64 = 60, // 32 x i64
-
- v1i128 = 61, // 1 x i128
-
- // Scalable integer types
- nxv1i1 = 62, // n x 1 x i1
- nxv2i1 = 63, // n x 2 x i1
- nxv4i1 = 64, // n x 4 x i1
- nxv8i1 = 65, // n x 8 x i1
- nxv16i1 = 66, // n x 16 x i1
- nxv32i1 = 67, // n x 32 x i1
-
- nxv1i8 = 68, // n x 1 x i8
- nxv2i8 = 69, // n x 2 x i8
- nxv4i8 = 70, // n x 4 x i8
- nxv8i8 = 71, // n x 8 x i8
- nxv16i8 = 72, // n x 16 x i8
- nxv32i8 = 73, // n x 32 x i8
-
- nxv1i16 = 74, // n x 1 x i16
- nxv2i16 = 75, // n x 2 x i16
- nxv4i16 = 76, // n x 4 x i16
- nxv8i16 = 77, // n x 8 x i16
- nxv16i16 = 78, // n x 16 x i16
- nxv32i16 = 79, // n x 32 x i16
-
- nxv1i32 = 80, // n x 1 x i32
- nxv2i32 = 81, // n x 2 x i32
- nxv4i32 = 82, // n x 4 x i32
- nxv8i32 = 83, // n x 8 x i32
- nxv16i32 = 84, // n x 16 x i32
- nxv32i32 = 85, // n x 32 x i32
-
- nxv1i64 = 86, // n x 1 x i64
- nxv2i64 = 87, // n x 2 x i64
- nxv4i64 = 88, // n x 4 x i64
- nxv8i64 = 89, // n x 8 x i64
- nxv16i64 = 90, // n x 16 x i64
- nxv32i64 = 91, // n x 32 x i64
-
- FIRST_INTEGER_VECTOR_VALUETYPE = v1i1,
- LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64,
-
- FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1,
- LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64,
-
- v2f16 = 92, // 2 x f16
- v4f16 = 93, // 4 x f16
- v8f16 = 94, // 8 x f16
- v1f32 = 95, // 1 x f32
- v2f32 = 96, // 2 x f32
- v3f32 = 97, // 3 x f32
- v4f32 = 98, // 4 x f32
- v5f32 = 99, // 5 x f32
- v8f32 = 100, // 8 x f32
- v16f32 = 101, // 16 x f32
- v32f32 = 102, // 32 x f32
- v64f32 = 103, // 64 x f32
- v128f32 = 104, // 128 x f32
- v256f32 = 105, // 256 x f32
- v512f32 = 106, // 512 x f32
- v1024f32 = 107, // 1024 x f32
- v2048f32 = 108, // 2048 x f32
- v1f64 = 109, // 1 x f64
- v2f64 = 110, // 2 x f64
- v4f64 = 111, // 4 x f64
- v8f64 = 112, // 8 x f64
-
- nxv2f16 = 113, // n x 2 x f16
- nxv4f16 = 114, // n x 4 x f16
- nxv8f16 = 115, // n x 8 x f16
- nxv1f32 = 116, // n x 1 x f32
- nxv2f32 = 117, // n x 2 x f32
- nxv4f32 = 118, // n x 4 x f32
- nxv8f32 = 119, // n x 8 x f32
- nxv16f32 = 120, // n x 16 x f32
- nxv1f64 = 121, // n x 1 x f64
- nxv2f64 = 122, // n x 2 x f64
- nxv4f64 = 123, // n x 4 x f64
- nxv8f64 = 124, // n x 8 x f64
-
- FIRST_FP_VECTOR_VALUETYPE = v2f16,
- LAST_FP_VECTOR_VALUETYPE = nxv8f64,
-
- FIRST_FP_SCALABLE_VALUETYPE = nxv2f16,
- LAST_FP_SCALABLE_VALUETYPE = nxv8f64,
+ v256i1 = 22, // 256 x i1
+ v512i1 = 23, // 512 x i1
+ v1024i1 = 24, // 1024 x i1
+
+ v1i8 = 25, // 1 x i8
+ v2i8 = 26, // 2 x i8
+ v4i8 = 27, // 4 x i8
+ v8i8 = 28, // 8 x i8
+ v16i8 = 29, // 16 x i8
+ v32i8 = 30, // 32 x i8
+ v64i8 = 31, // 64 x i8
+ v128i8 = 32, //128 x i8
+ v256i8 = 33, //256 x i8
+
+ v1i16 = 34, // 1 x i16
+ v2i16 = 35, // 2 x i16
+ v3i16 = 36, // 3 x i16
+ v4i16 = 37, // 4 x i16
+ v8i16 = 38, // 8 x i16
+ v16i16 = 39, // 16 x i16
+ v32i16 = 40, // 32 x i16
+ v64i16 = 41, // 64 x i16
+ v128i16 = 42, //128 x i16
+
+ v1i32 = 43, // 1 x i32
+ v2i32 = 44, // 2 x i32
+ v3i32 = 45, // 3 x i32
+ v4i32 = 46, // 4 x i32
+ v5i32 = 47, // 5 x i32
+ v8i32 = 48, // 8 x i32
+ v16i32 = 49, // 16 x i32
+ v32i32 = 50, // 32 x i32
+ v64i32 = 51, // 64 x i32
+ v128i32 = 52, // 128 x i32
+ v256i32 = 53, // 256 x i32
+ v512i32 = 54, // 512 x i32
+ v1024i32 = 55, // 1024 x i32
+ v2048i32 = 56, // 2048 x i32
+
+ v1i64 = 57, // 1 x i64
+ v2i64 = 58, // 2 x i64
+ v4i64 = 59, // 4 x i64
+ v8i64 = 60, // 8 x i64
+ v16i64 = 61, // 16 x i64
+ v32i64 = 62, // 32 x i64
+
+ v1i128 = 63, // 1 x i128
+
+ FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
+ LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128,
+
+ v2f16 = 64, // 2 x f16
+ v3f16 = 65, // 3 x f16
+ v4f16 = 66, // 4 x f16
+ v8f16 = 67, // 8 x f16
+ v16f16 = 68, // 16 x f16
+ v32f16 = 69, // 32 x f16
+ v1f32 = 70, // 1 x f32
+ v2f32 = 71, // 2 x f32
+ v3f32 = 72, // 3 x f32
+ v4f32 = 73, // 4 x f32
+ v5f32 = 74, // 5 x f32
+ v8f32 = 75, // 8 x f32
+ v16f32 = 76, // 16 x f32
+ v32f32 = 77, // 32 x f32
+ v64f32 = 78, // 64 x f32
+ v128f32 = 79, // 128 x f32
+ v256f32 = 80, // 256 x f32
+ v512f32 = 81, // 512 x f32
+ v1024f32 = 82, // 1024 x f32
+ v2048f32 = 83, // 2048 x f32
+ v1f64 = 84, // 1 x f64
+ v2f64 = 85, // 2 x f64
+ v4f64 = 86, // 4 x f64
+ v8f64 = 87, // 8 x f64
+
+ FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v2f16,
+ LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v8f64,
+
+ FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
+ LAST_FIXEDLEN_VECTOR_VALUETYPE = v8f64,
+
+ nxv1i1 = 88, // n x 1 x i1
+ nxv2i1 = 89, // n x 2 x i1
+ nxv4i1 = 90, // n x 4 x i1
+ nxv8i1 = 91, // n x 8 x i1
+ nxv16i1 = 92, // n x 16 x i1
+ nxv32i1 = 93, // n x 32 x i1
+
+ nxv1i8 = 94, // n x 1 x i8
+ nxv2i8 = 95, // n x 2 x i8
+ nxv4i8 = 96, // n x 4 x i8
+ nxv8i8 = 97, // n x 8 x i8
+ nxv16i8 = 98, // n x 16 x i8
+ nxv32i8 = 99, // n x 32 x i8
+
+ nxv1i16 = 100, // n x 1 x i16
+ nxv2i16 = 101, // n x 2 x i16
+ nxv4i16 = 102, // n x 4 x i16
+ nxv8i16 = 103, // n x 8 x i16
+ nxv16i16 = 104, // n x 16 x i16
+ nxv32i16 = 105, // n x 32 x i16
+
+ nxv1i32 = 106, // n x 1 x i32
+ nxv2i32 = 107, // n x 2 x i32
+ nxv4i32 = 108, // n x 4 x i32
+ nxv8i32 = 109, // n x 8 x i32
+ nxv16i32 = 110, // n x 16 x i32
+ nxv32i32 = 111, // n x 32 x i32
+
+ nxv1i64 = 112, // n x 1 x i64
+ nxv2i64 = 113, // n x 2 x i64
+ nxv4i64 = 114, // n x 4 x i64
+ nxv8i64 = 115, // n x 8 x i64
+ nxv16i64 = 116, // n x 16 x i64
+ nxv32i64 = 117, // n x 32 x i64
+
+ FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
+ LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64,
+
+ nxv2f16 = 118, // n x 2 x f16
+ nxv4f16 = 119, // n x 4 x f16
+ nxv8f16 = 120, // n x 8 x f16
+ nxv1f32 = 121, // n x 1 x f32
+ nxv2f32 = 122, // n x 2 x f32
+ nxv4f32 = 123, // n x 4 x f32
+ nxv8f32 = 124, // n x 8 x f32
+ nxv16f32 = 125, // n x 16 x f32
+ nxv1f64 = 126, // n x 1 x f64
+ nxv2f64 = 127, // n x 2 x f64
+ nxv4f64 = 128, // n x 4 x f64
+ nxv8f64 = 129, // n x 8 x f64
+
+ FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv2f16,
+ LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
+
+ FIRST_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
+ LAST_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
FIRST_VECTOR_VALUETYPE = v1i1,
LAST_VECTOR_VALUETYPE = nxv8f64,
- x86mmx = 125, // This is an X86 MMX value
+ x86mmx = 130, // This is an X86 MMX value
- Glue = 126, // This glues nodes together during pre-RA sched
+ Glue = 131, // This glues nodes together during pre-RA sched
- isVoid = 127, // This has no value
+ isVoid = 132, // This has no value
- Untyped = 128, // This value takes a register, but has
+ Untyped = 133, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
- exnref = 129, // WebAssembly's exnref type
+ exnref = 134, // WebAssembly's exnref type
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
- LAST_VALUETYPE = 130, // This always remains at the end of the list.
+ LAST_VALUETYPE = 135, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -253,41 +264,6 @@ namespace llvm {
SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE;
- // A class to represent the number of elements in a vector
- //
- // For fixed-length vectors, the total number of elements is equal to 'Min'
- // For scalable vectors, the total number of elements is a multiple of 'Min'
- class ElementCount {
- public:
- unsigned Min;
- bool Scalable;
-
- ElementCount(unsigned Min, bool Scalable)
- : Min(Min), Scalable(Scalable) {}
-
- ElementCount operator*(unsigned RHS) {
- return { Min * RHS, Scalable };
- }
-
- ElementCount& operator*=(unsigned RHS) {
- Min *= RHS;
- return *this;
- }
-
- ElementCount operator/(unsigned RHS) {
- return { Min / RHS, Scalable };
- }
-
- ElementCount& operator/=(unsigned RHS) {
- Min /= RHS;
- return *this;
- }
-
- bool operator==(const ElementCount& RHS) {
- return Min == RHS.Min && Scalable == RHS.Scalable;
- }
- };
-
constexpr MVT() = default;
constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {}
@@ -308,16 +284,20 @@ namespace llvm {
bool isFloatingPoint() const {
return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE &&
SimpleTy <= MVT::LAST_FP_VALUETYPE) ||
- (SimpleTy >= MVT::FIRST_FP_VECTOR_VALUETYPE &&
- SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE));
+ (SimpleTy >= MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE) ||
+ (SimpleTy >= MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE));
}
/// Return true if this is an integer or a vector integer type.
bool isInteger() const {
return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) ||
- (SimpleTy >= MVT::FIRST_INTEGER_VECTOR_VALUETYPE &&
- SimpleTy <= MVT::LAST_INTEGER_VECTOR_VALUETYPE));
+ (SimpleTy >= MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE) ||
+ (SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE));
}
/// Return true if this is an integer, not including vectors.
@@ -335,10 +315,13 @@ namespace llvm {
/// Return true if this is a vector value type where the
/// runtime length is machine dependent
bool isScalableVector() const {
- return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE &&
- SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) ||
- (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE &&
- SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE));
+ return (SimpleTy >= MVT::FIRST_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_SCALABLE_VECTOR_VALUETYPE);
+ }
+
+ bool isFixedLengthVector() const {
+ return (SimpleTy >= MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE);
}
/// Return true if this is a 16-bit vector type.
@@ -373,17 +356,18 @@ namespace llvm {
/// Return true if this is a 256-bit vector type.
bool is256BitVector() const {
- return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 ||
- SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
- SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64);
+ return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v8f32 ||
+ SimpleTy == MVT::v4f64 || SimpleTy == MVT::v32i8 ||
+ SimpleTy == MVT::v16i16 || SimpleTy == MVT::v8i32 ||
+ SimpleTy == MVT::v4i64 || SimpleTy == MVT::v256i1);
}
/// Return true if this is a 512-bit vector type.
bool is512BitVector() const {
- return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 ||
- SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8 ||
- SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 ||
- SimpleTy == MVT::v8i64);
+ return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v16f32 ||
+ SimpleTy == MVT::v8f64 || SimpleTy == MVT::v512i1 ||
+ SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 ||
+ SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64);
}
/// Return true if this is a 1024-bit vector type.
@@ -406,6 +390,15 @@ namespace llvm {
SimpleTy==MVT::vAny || SimpleTy==MVT::iPTRAny);
}
+ /// Return a VT for a vector type with the same element type but
+ /// half the number of elements.
+ MVT getHalfNumVectorElementsVT() const {
+ MVT EltVT = getVectorElementType();
+ auto EltCnt = getVectorElementCount();
+ assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!");
+ return getVectorVT(EltVT, EltCnt / 2);
+ }
+
/// Returns true if the given vector is a power of 2.
bool isPow2VectorType() const {
unsigned NElts = getVectorNumElements();
@@ -440,6 +433,7 @@ namespace llvm {
case v32i1:
case v64i1:
case v128i1:
+ case v256i1:
case v512i1:
case v1024i1:
case nxv1i1:
@@ -465,6 +459,7 @@ namespace llvm {
case nxv32i8: return i8;
case v1i16:
case v2i16:
+ case v3i16:
case v4i16:
case v8i16:
case v16i16:
@@ -511,8 +506,11 @@ namespace llvm {
case nxv32i64: return i64;
case v1i128: return i128;
case v2f16:
+ case v3f16:
case v4f16:
case v8f16:
+ case v16f16:
+ case v32f16:
case nxv2f16:
case nxv4f16:
case nxv8f16: return f16;
@@ -558,6 +556,7 @@ namespace llvm {
case v512i1:
case v512i32:
case v512f32: return 512;
+ case v256i1:
case v256i8:
case v256i32:
case v256f32: return 256;
@@ -576,6 +575,7 @@ namespace llvm {
case v32i16:
case v32i32:
case v32i64:
+ case v32f16:
case v32f32:
case nxv32i1:
case nxv32i8:
@@ -587,6 +587,7 @@ namespace llvm {
case v16i16:
case v16i32:
case v16i64:
+ case v16f16:
case v16f32:
case nxv16i1:
case nxv16i8:
@@ -628,7 +629,9 @@ namespace llvm {
case nxv4f16:
case nxv4f32:
case nxv4f64: return 4;
+ case v3i16:
case v3i32:
+ case v3f16:
case v3f32: return 3;
case v2i1:
case v2i8:
@@ -664,7 +667,7 @@ namespace llvm {
}
}
- MVT::ElementCount getVectorElementCount() const {
+ ElementCount getVectorElementCount() const {
return { getVectorNumElements(), isScalableVector() };
}
@@ -721,6 +724,8 @@ namespace llvm {
case nxv1i32:
case nxv2f16:
case nxv1f32: return 32;
+ case v3i16:
+ case v3f16: return 48;
case x86mmx:
case f64 :
case i64 :
@@ -763,10 +768,12 @@ namespace llvm {
case nxv2f64: return 128;
case v5i32:
case v5f32: return 160;
+ case v256i1:
case v32i8:
case v16i16:
case v8i32:
case v4i64:
+ case v16f16:
case v8f32:
case v4f64:
case nxv32i8:
@@ -780,6 +787,7 @@ namespace llvm {
case v32i16:
case v16i32:
case v8i64:
+ case v32f16:
case v16f32:
case v8f64:
case nxv32i16:
@@ -900,6 +908,7 @@ namespace llvm {
if (NumElements == 32) return MVT::v32i1;
if (NumElements == 64) return MVT::v64i1;
if (NumElements == 128) return MVT::v128i1;
+ if (NumElements == 256) return MVT::v256i1;
if (NumElements == 512) return MVT::v512i1;
if (NumElements == 1024) return MVT::v1024i1;
break;
@@ -917,6 +926,7 @@ namespace llvm {
case MVT::i16:
if (NumElements == 1) return MVT::v1i16;
if (NumElements == 2) return MVT::v2i16;
+ if (NumElements == 3) return MVT::v3i16;
if (NumElements == 4) return MVT::v4i16;
if (NumElements == 8) return MVT::v8i16;
if (NumElements == 16) return MVT::v16i16;
@@ -953,8 +963,11 @@ namespace llvm {
break;
case MVT::f16:
if (NumElements == 2) return MVT::v2f16;
+ if (NumElements == 3) return MVT::v3f16;
if (NumElements == 4) return MVT::v4f16;
if (NumElements == 8) return MVT::v8f16;
+ if (NumElements == 16) return MVT::v16f16;
+ if (NumElements == 32) return MVT::v32f16;
break;
case MVT::f32:
if (NumElements == 1) return MVT::v1f32;
@@ -1054,7 +1067,7 @@ namespace llvm {
return getVectorVT(VT, NumElements);
}
- static MVT getVectorVT(MVT VT, MVT::ElementCount EC) {
+ static MVT getVectorVT(MVT VT, ElementCount EC) {
if (EC.Scalable)
return getScalableVectorVT(VT, EC.Min);
return getVectorVT(VT, EC.Min);
@@ -1108,26 +1121,40 @@ namespace llvm {
(MVT::SimpleValueType)(MVT::LAST_VECTOR_VALUETYPE + 1));
}
- static mvt_range integer_vector_valuetypes() {
+ static mvt_range fixedlen_vector_valuetypes() {
return mvt_range(
- MVT::FIRST_INTEGER_VECTOR_VALUETYPE,
- (MVT::SimpleValueType)(MVT::LAST_INTEGER_VECTOR_VALUETYPE + 1));
+ MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE + 1));
}
- static mvt_range fp_vector_valuetypes() {
+ static mvt_range scalable_vector_valuetypes() {
return mvt_range(
- MVT::FIRST_FP_VECTOR_VALUETYPE,
- (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1));
+ MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_SCALABLE_VECTOR_VALUETYPE + 1));
+ }
+
+ static mvt_range integer_fixedlen_vector_valuetypes() {
+ return mvt_range(
+ MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE + 1));
+ }
+
+ static mvt_range fp_fixedlen_vector_valuetypes() {
+ return mvt_range(
+ MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE + 1));
}
static mvt_range integer_scalable_vector_valuetypes() {
- return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE,
- (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1));
+ return mvt_range(
+ MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE + 1));
}
static mvt_range fp_scalable_vector_valuetypes() {
- return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE,
- (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1));
+ return mvt_range(
+ MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE + 1));
}
/// @}
};
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 249139e824b5..004a6f5f6eb8 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -39,6 +39,7 @@ unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
#endif
namespace llvm {
+
/// The behavior an operation has on an input of 0.
enum ZeroBehavior {
/// The returned value is undefined.
@@ -49,6 +50,42 @@ enum ZeroBehavior {
ZB_Width
};
+/// Mathematical constants.
+namespace numbers {
+// TODO: Track C++20 std::numbers.
+// TODO: Favor using the hexadecimal FP constants (requires C++17).
+constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
+ egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
+ ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
+ ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
+ log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
+ log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
+ pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
+ inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
+ sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
+ inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
+ sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
+ inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
+ sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
+ inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
+ phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
+constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
+ egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
+ ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
+ ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
+ log2ef = 1.44269504F, // (0x1.715476P+0)
+ log10ef = .434294482F, // (0x1.bcb7b2P-2)
+ pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
+ inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
+ sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
+ inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
+ sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
+ inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
+ sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
+ inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
+ phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
+} // namespace numbers
+
namespace detail {
template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
static unsigned count(T Val, ZeroBehavior) {
@@ -73,13 +110,13 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
}
};
-#if __GNUC__ >= 4 || defined(_MSC_VER)
+#if defined(__GNUC__) || defined(_MSC_VER)
template <typename T> struct TrailingZerosCounter<T, 4> {
static unsigned count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
return 32;
-#if __has_builtin(__builtin_ctz) || LLVM_GNUC_PREREQ(4, 0, 0)
+#if __has_builtin(__builtin_ctz) || defined(__GNUC__)
return __builtin_ctz(Val);
#elif defined(_MSC_VER)
unsigned long Index;
@@ -95,7 +132,7 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
if (ZB != ZB_Undefined && Val == 0)
return 64;
-#if __has_builtin(__builtin_ctzll) || LLVM_GNUC_PREREQ(4, 0, 0)
+#if __has_builtin(__builtin_ctzll) || defined(__GNUC__)
return __builtin_ctzll(Val);
#elif defined(_MSC_VER)
unsigned long Index;
@@ -142,13 +179,13 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
}
};
-#if __GNUC__ >= 4 || defined(_MSC_VER)
+#if defined(__GNUC__) || defined(_MSC_VER)
template <typename T> struct LeadingZerosCounter<T, 4> {
static unsigned count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
return 32;
-#if __has_builtin(__builtin_clz) || LLVM_GNUC_PREREQ(4, 0, 0)
+#if __has_builtin(__builtin_clz) || defined(__GNUC__)
return __builtin_clz(Val);
#elif defined(_MSC_VER)
unsigned long Index;
@@ -164,7 +201,7 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
if (ZB != ZB_Undefined && Val == 0)
return 64;
-#if __has_builtin(__builtin_clzll) || LLVM_GNUC_PREREQ(4, 0, 0)
+#if __has_builtin(__builtin_clzll) || defined(__GNUC__)
return __builtin_clzll(Val);
#elif defined(_MSC_VER)
unsigned long Index;
@@ -486,7 +523,7 @@ template <typename T, std::size_t SizeOfT> struct PopulationCounter {
static unsigned count(T Value) {
// Generic version, forward to 32 bits.
static_assert(SizeOfT <= 4, "Not implemented!");
-#if __GNUC__ >= 4
+#if defined(__GNUC__)
return __builtin_popcount(Value);
#else
uint32_t v = Value;
@@ -499,7 +536,7 @@ template <typename T, std::size_t SizeOfT> struct PopulationCounter {
template <typename T> struct PopulationCounter<T, 8> {
static unsigned count(T Value) {
-#if __GNUC__ >= 4
+#if defined(__GNUC__)
return __builtin_popcountll(Value);
#else
uint64_t v = Value;
@@ -523,6 +560,16 @@ inline unsigned countPopulation(T Value) {
return detail::PopulationCounter<T, sizeof(T)>::count(Value);
}
+/// Compile time Log2.
+/// Valid only for positive powers of two.
+template <size_t kValue> constexpr inline size_t CTLog2() {
+ static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
+ "Value is not a valid power of 2");
+ return 1 + CTLog2<kValue / 2>();
+}
+
+template <> constexpr inline size_t CTLog2<1>() { return 0; }
+
/// Return the log base 2 of the specified value.
inline double Log2(double Value) {
#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
@@ -620,25 +667,6 @@ constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
return (A | B) & (1 + ~(A | B));
}
-/// Aligns \c Addr to \c Alignment bytes, rounding up.
-///
-/// Alignment should be a power of two. This method rounds up, so
-/// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8.
-inline uintptr_t alignAddr(const void *Addr, size_t Alignment) {
- assert(Alignment && isPowerOf2_64((uint64_t)Alignment) &&
- "Alignment is not a power of two!");
-
- assert((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr);
-
- return (((uintptr_t)Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1));
-}
-
-/// Returns the necessary adjustment for aligning \c Ptr to \c Alignment
-/// bytes, rounding up.
-inline size_t alignmentAdjustment(const void *Ptr, size_t Alignment) {
- return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr;
-}
-
/// Returns the next power of two (in 64-bits) that is strictly greater than A.
/// Returns zero on overflow.
inline uint64_t NextPowerOf2(uint64_t A) {
@@ -704,19 +732,6 @@ inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
return alignTo(Numerator, Denominator) / Denominator;
}
-/// \c alignTo for contexts where a constant expression is required.
-/// \sa alignTo
-///
-/// \todo FIXME: remove when \c constexpr becomes really \c constexpr
-template <uint64_t Align>
-struct AlignTo {
- static_assert(Align != 0u, "Align must be non-zero");
- template <uint64_t Value>
- struct from_value {
- static const uint64_t value = (Value + Align - 1) / Align * Align;
- };
-};
-
/// Returns the largest uint64_t less than or equal to \p Value and is
/// \p Skew mod \p Align. \p Align must be non-zero
inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
@@ -725,13 +740,6 @@ inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
return (Value - Skew) / Align * Align + Skew;
}
-/// Returns the offset to the next integer (mod 2**64) that is greater than
-/// or equal to \p Value and is a multiple of \p Align. \p Align must be
-/// non-zero.
-inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) {
- return alignTo(Value, Align) - Value;
-}
-
/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
/// Requires 0 < B <= 32.
template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
@@ -853,6 +861,91 @@ SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
extern const float huge_valf;
+
+
+/// Add two signed integers, computing the two's complement truncated result,
+/// returning true if overflow occured.
+template <typename T>
+typename std::enable_if<std::is_signed<T>::value, T>::type
+AddOverflow(T X, T Y, T &Result) {
+#if __has_builtin(__builtin_add_overflow)
+ return __builtin_add_overflow(X, Y, &Result);
+#else
+ // Perform the unsigned addition.
+ using U = typename std::make_unsigned<T>::type;
+ const U UX = static_cast<U>(X);
+ const U UY = static_cast<U>(Y);
+ const U UResult = UX + UY;
+
+ // Convert to signed.
+ Result = static_cast<T>(UResult);
+
+ // Adding two positive numbers should result in a positive number.
+ if (X > 0 && Y > 0)
+ return Result <= 0;
+ // Adding two negatives should result in a negative number.
+ if (X < 0 && Y < 0)
+ return Result >= 0;
+ return false;
+#endif
+}
+
+/// Subtract two signed integers, computing the two's complement truncated
+/// result, returning true if an overflow ocurred.
+template <typename T>
+typename std::enable_if<std::is_signed<T>::value, T>::type
+SubOverflow(T X, T Y, T &Result) {
+#if __has_builtin(__builtin_sub_overflow)
+ return __builtin_sub_overflow(X, Y, &Result);
+#else
+ // Perform the unsigned addition.
+ using U = typename std::make_unsigned<T>::type;
+ const U UX = static_cast<U>(X);
+ const U UY = static_cast<U>(Y);
+ const U UResult = UX - UY;
+
+ // Convert to signed.
+ Result = static_cast<T>(UResult);
+
+ // Subtracting a positive number from a negative results in a negative number.
+ if (X <= 0 && Y > 0)
+ return Result >= 0;
+ // Subtracting a negative number from a positive results in a positive number.
+ if (X >= 0 && Y < 0)
+ return Result <= 0;
+ return false;
+#endif
+}
+
+
+/// Multiply two signed integers, computing the two's complement truncated
+/// result, returning true if an overflow ocurred.
+template <typename T>
+typename std::enable_if<std::is_signed<T>::value, T>::type
+MulOverflow(T X, T Y, T &Result) {
+ // Perform the unsigned multiplication on absolute values.
+ using U = typename std::make_unsigned<T>::type;
+ const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
+ const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
+ const U UResult = UX * UY;
+
+ // Convert to signed.
+ const bool IsNegative = (X < 0) ^ (Y < 0);
+ Result = IsNegative ? (0 - UResult) : UResult;
+
+ // If any of the args was 0, result is 0 and no overflow occurs.
+ if (UX == 0 || UY == 0)
+ return false;
+
+ // UX and UY are in [1, 2^n], where n is the number of digits.
+ // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
+ // positive) divided by an argument compares to the other.
+ if (IsNegative)
+ return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
+ else
+ return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
+}
+
} // End llvm namespace
#endif
diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h
index c3abfc7a7806..1d8a0d3c87cb 100644
--- a/include/llvm/Support/Mutex.h
+++ b/include/llvm/Support/Mutex.h
@@ -13,97 +13,31 @@
#ifndef LLVM_SUPPORT_MUTEX_H
#define LLVM_SUPPORT_MUTEX_H
-#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Threading.h"
#include <cassert>
+#include <mutex>
namespace llvm
{
namespace sys
{
- /// Platform agnostic Mutex class.
- class MutexImpl
- {
- /// @name Constructors
- /// @{
- public:
-
- /// Initializes the lock but doesn't acquire it. if \p recursive is set
- /// to false, the lock will not be recursive which makes it cheaper but
- /// also more likely to deadlock (same thread can't acquire more than
- /// once).
- /// Default Constructor.
- explicit MutexImpl(bool recursive = true);
-
- /// Releases and removes the lock
- /// Destructor
- ~MutexImpl();
-
- /// @}
- /// @name Methods
- /// @{
- public:
-
- /// Attempts to unconditionally acquire the lock. If the lock is held by
- /// another thread, this method will wait until it can acquire the lock.
- /// @returns false if any kind of error occurs, true otherwise.
- /// Unconditionally acquire the lock.
- bool acquire();
-
- /// Attempts to release the lock. If the lock is held by the current
- /// thread, the lock is released allowing other threads to acquire the
- /// lock.
- /// @returns false if any kind of error occurs, true otherwise.
- /// Unconditionally release the lock.
- bool release();
-
- /// Attempts to acquire the lock without blocking. If the lock is not
- /// available, this function returns false quickly (without blocking). If
- /// the lock is available, it is acquired.
- /// @returns false if any kind of error occurs or the lock is not
- /// available, true otherwise.
- /// Try to acquire the lock.
- bool tryacquire();
-
- //@}
- /// @name Platform Dependent Data
- /// @{
- private:
-#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0
- void* data_; ///< We don't know what the data will be
-#endif
-
- /// @}
- /// @name Do Not Implement
- /// @{
- private:
- MutexImpl(const MutexImpl &) = delete;
- void operator=(const MutexImpl &) = delete;
- /// @}
- };
-
-
/// SmartMutex - A mutex with a compile time constant parameter that
/// indicates whether this mutex should become a no-op when we're not
/// running in multithreaded mode.
template<bool mt_only>
class SmartMutex {
- MutexImpl impl;
- unsigned acquired;
- bool recursive;
- public:
- explicit SmartMutex(bool rec = true) :
- impl(rec), acquired(0), recursive(rec) { }
+ std::recursive_mutex impl;
+ unsigned acquired = 0;
+ public:
bool lock() {
if (!mt_only || llvm_is_multithreaded()) {
- return impl.acquire();
+ impl.lock();
+ return true;
} else {
// Single-threaded debugging code. This would be racy in
// multithreaded mode, but provides not sanity checks in single
// threaded mode.
- assert((recursive || acquired == 0) && "Lock already acquired!!");
++acquired;
return true;
}
@@ -111,13 +45,13 @@ namespace llvm
bool unlock() {
if (!mt_only || llvm_is_multithreaded()) {
- return impl.release();
+ impl.unlock();
+ return true;
} else {
// Single-threaded debugging code. This would be racy in
// multithreaded mode, but provides not sanity checks in single
// threaded mode.
- assert(((recursive && acquired) || (acquired == 1)) &&
- "Lock not acquired before release!");
+ assert(acquired && "Lock not acquired before release!");
--acquired;
return true;
}
@@ -125,31 +59,16 @@ namespace llvm
bool try_lock() {
if (!mt_only || llvm_is_multithreaded())
- return impl.tryacquire();
+ return impl.try_lock();
else return true;
}
-
- private:
- SmartMutex(const SmartMutex<mt_only> & original);
- void operator=(const SmartMutex<mt_only> &);
};
/// Mutex - A standard, always enforced mutex.
typedef SmartMutex<false> Mutex;
- template<bool mt_only>
- class SmartScopedLock {
- SmartMutex<mt_only>& mtx;
-
- public:
- SmartScopedLock(SmartMutex<mt_only>& m) : mtx(m) {
- mtx.lock();
- }
-
- ~SmartScopedLock() {
- mtx.unlock();
- }
- };
+ template <bool mt_only>
+ using SmartScopedLock = std::lock_guard<SmartMutex<mt_only>>;
typedef SmartScopedLock<false> ScopedLock;
}
diff --git a/include/llvm/Support/MutexGuard.h b/include/llvm/Support/MutexGuard.h
deleted file mode 100644
index d86ced145816..000000000000
--- a/include/llvm/Support/MutexGuard.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- Support/MutexGuard.h - Acquire/Release Mutex In Scope ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a guard for a block of code that ensures a Mutex is locked
-// upon construction and released upon destruction.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_MUTEXGUARD_H
-#define LLVM_SUPPORT_MUTEXGUARD_H
-
-#include "llvm/Support/Mutex.h"
-
-namespace llvm {
- /// Instances of this class acquire a given Mutex Lock when constructed and
- /// hold that lock until destruction. The intention is to instantiate one of
- /// these on the stack at the top of some scope to be assured that C++
- /// destruction of the object will always release the Mutex and thus avoid
- /// a host of nasty multi-threading problems in the face of exceptions, etc.
- /// Guard a section of code with a Mutex.
- class MutexGuard {
- sys::Mutex &M;
- MutexGuard(const MutexGuard &) = delete;
- void operator=(const MutexGuard &) = delete;
- public:
- MutexGuard(sys::Mutex &m) : M(m) { M.lock(); }
- ~MutexGuard() { M.unlock(); }
- /// holds - Returns true if this locker instance holds the specified lock.
- /// This is mostly used in assertions to validate that the correct mutex
- /// is held.
- bool holds(const sys::Mutex& lock) const { return &M == &lock; }
- };
-}
-
-#endif // LLVM_SUPPORT_MUTEXGUARD_H
diff --git a/include/llvm/Support/OnDiskHashTable.h b/include/llvm/Support/OnDiskHashTable.h
index d84da92aab9b..11dc0de0f354 100644
--- a/include/llvm/Support/OnDiskHashTable.h
+++ b/include/llvm/Support/OnDiskHashTable.h
@@ -13,6 +13,7 @@
#ifndef LLVM_SUPPORT_ONDISKHASHTABLE_H
#define LLVM_SUPPORT_ONDISKHASHTABLE_H
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/EndianStream.h"
@@ -207,7 +208,7 @@ public:
// Pad with zeros so that we can start the hashtable at an aligned address.
offset_type TableOff = Out.tell();
- uint64_t N = llvm::OffsetToAlignment(TableOff, alignof(offset_type));
+ uint64_t N = offsetToAlignment(TableOff, Align(alignof(offset_type)));
TableOff += N;
while (N--)
LE.write<uint8_t>(0);
diff --git a/include/llvm/Support/Parallel.h b/include/llvm/Support/Parallel.h
index eab9b492c4a5..3c0ed2c11127 100644
--- a/include/llvm/Support/Parallel.h
+++ b/include/llvm/Support/Parallel.h
@@ -18,14 +18,6 @@
#include <functional>
#include <mutex>
-#if defined(_MSC_VER) && LLVM_ENABLE_THREADS
-#pragma warning(push)
-#pragma warning(disable : 4530)
-#include <concrt.h>
-#include <ppl.h>
-#pragma warning(pop)
-#endif
-
namespace llvm {
namespace parallel {
@@ -84,23 +76,6 @@ public:
void sync() const { L.sync(); }
};
-#if defined(_MSC_VER)
-template <class RandomAccessIterator, class Comparator>
-void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
- const Comparator &Comp) {
- concurrency::parallel_sort(Start, End, Comp);
-}
-template <class IterTy, class FuncTy>
-void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
- concurrency::parallel_for_each(Begin, End, Fn);
-}
-
-template <class IndexTy, class FuncTy>
-void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
- concurrency::parallel_for(Begin, End, Fn);
-}
-
-#else
const ptrdiff_t MinParallelSize = 1024;
/// Inclusive median.
@@ -188,8 +163,6 @@ void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
#endif
-#endif
-
template <typename Iter>
using DefComparator =
std::less<typename std::iterator_traits<Iter>::value_type>;
diff --git a/include/llvm/Support/RWMutex.h b/include/llvm/Support/RWMutex.h
index 9cd57cbd65a1..150bc7dbbce1 100644
--- a/include/llvm/Support/RWMutex.h
+++ b/include/llvm/Support/RWMutex.h
@@ -16,161 +16,184 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Threading.h"
#include <cassert>
+#include <mutex>
+#include <shared_mutex>
+
+// std::shared_timed_mutex is only availble on macOS 10.12 and later.
+#if defined(__APPLE__) && defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
+#if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200
+#define LLVM_USE_RW_MUTEX_IMPL
+#endif
+#endif
namespace llvm {
namespace sys {
- /// Platform agnostic RWMutex class.
- class RWMutexImpl
- {
- /// @name Constructors
- /// @{
- public:
-
- /// Initializes the lock but doesn't acquire it.
- /// Default Constructor.
- explicit RWMutexImpl();
-
- /// @}
- /// @name Do Not Implement
- /// @{
- RWMutexImpl(const RWMutexImpl & original) = delete;
- RWMutexImpl &operator=(const RWMutexImpl &) = delete;
- /// @}
-
- /// Releases and removes the lock
- /// Destructor
- ~RWMutexImpl();
-
- /// @}
- /// @name Methods
- /// @{
- public:
-
- /// Attempts to unconditionally acquire the lock in reader mode. If the
- /// lock is held by a writer, this method will wait until it can acquire
- /// the lock.
- /// @returns false if any kind of error occurs, true otherwise.
- /// Unconditionally acquire the lock in reader mode.
- bool reader_acquire();
-
- /// Attempts to release the lock in reader mode.
- /// @returns false if any kind of error occurs, true otherwise.
- /// Unconditionally release the lock in reader mode.
- bool reader_release();
-
- /// Attempts to unconditionally acquire the lock in reader mode. If the
- /// lock is held by any readers, this method will wait until it can
- /// acquire the lock.
- /// @returns false if any kind of error occurs, true otherwise.
- /// Unconditionally acquire the lock in writer mode.
- bool writer_acquire();
-
- /// Attempts to release the lock in writer mode.
- /// @returns false if any kind of error occurs, true otherwise.
- /// Unconditionally release the lock in write mode.
- bool writer_release();
-
- //@}
- /// @name Platform Dependent Data
- /// @{
- private:
+#if defined(LLVM_USE_RW_MUTEX_IMPL)
+/// Platform agnostic RWMutex class.
+class RWMutexImpl {
+ /// @name Constructors
+ /// @{
+public:
+ /// Initializes the lock but doesn't acquire it.
+ /// Default Constructor.
+ explicit RWMutexImpl();
+
+ /// @}
+ /// @name Do Not Implement
+ /// @{
+ RWMutexImpl(const RWMutexImpl &original) = delete;
+ RWMutexImpl &operator=(const RWMutexImpl &) = delete;
+ /// @}
+
+ /// Releases and removes the lock
+ /// Destructor
+ ~RWMutexImpl();
+
+ /// @}
+ /// @name Methods
+ /// @{
+public:
+ /// Attempts to unconditionally acquire the lock in reader mode. If the
+ /// lock is held by a writer, this method will wait until it can acquire
+ /// the lock.
+ /// @returns false if any kind of error occurs, true otherwise.
+ /// Unconditionally acquire the lock in reader mode.
+ bool lock_shared();
+
+ /// Attempts to release the lock in reader mode.
+ /// @returns false if any kind of error occurs, true otherwise.
+ /// Unconditionally release the lock in reader mode.
+ bool unlock_shared();
+
+ /// Attempts to unconditionally acquire the lock in reader mode. If the
+ /// lock is held by any readers, this method will wait until it can
+ /// acquire the lock.
+ /// @returns false if any kind of error occurs, true otherwise.
+ /// Unconditionally acquire the lock in writer mode.
+ bool lock();
+
+ /// Attempts to release the lock in writer mode.
+ /// @returns false if any kind of error occurs, true otherwise.
+ /// Unconditionally release the lock in write mode.
+ bool unlock();
+
+ //@}
+ /// @name Platform Dependent Data
+ /// @{
+private:
#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0
- void* data_ = nullptr; ///< We don't know what the data will be
+ void *data_ = nullptr; ///< We don't know what the data will be
+#endif
+};
+#endif
+
+/// SmartMutex - An R/W mutex with a compile time constant parameter that
+/// indicates whether this mutex should become a no-op when we're not
+/// running in multithreaded mode.
+template <bool mt_only> class SmartRWMutex {
+ // shared_mutex (C++17) is more efficient than shared_timed_mutex (C++14)
+ // on Windows and always available on MSVC.
+#if defined(_MSC_VER) || __cplusplus > 201402L
+ std::shared_mutex impl;
+#else
+#if !defined(LLVM_USE_RW_MUTEX_IMPL)
+ std::shared_timed_mutex impl;
+#else
+ RWMutexImpl impl;
+#endif
+#endif
+ unsigned readers = 0;
+ unsigned writers = 0;
+
+public:
+ bool lock_shared() {
+ if (!mt_only || llvm_is_multithreaded()) {
+ impl.lock_shared();
+ return true;
+ }
+
+ // Single-threaded debugging code. This would be racy in multithreaded
+ // mode, but provides not sanity checks in single threaded mode.
+ ++readers;
+ return true;
+ }
+
+ bool unlock_shared() {
+ if (!mt_only || llvm_is_multithreaded()) {
+ impl.unlock_shared();
+ return true;
+ }
+
+ // Single-threaded debugging code. This would be racy in multithreaded
+ // mode, but provides not sanity checks in single threaded mode.
+ assert(readers > 0 && "Reader lock not acquired before release!");
+ --readers;
+ return true;
+ }
+
+ bool lock() {
+ if (!mt_only || llvm_is_multithreaded()) {
+ impl.lock();
+ return true;
+ }
+
+ // Single-threaded debugging code. This would be racy in multithreaded
+ // mode, but provides not sanity checks in single threaded mode.
+ assert(writers == 0 && "Writer lock already acquired!");
+ ++writers;
+ return true;
+ }
+
+ bool unlock() {
+ if (!mt_only || llvm_is_multithreaded()) {
+ impl.unlock();
+ return true;
+ }
+
+ // Single-threaded debugging code. This would be racy in multithreaded
+ // mode, but provides not sanity checks in single threaded mode.
+ assert(writers == 1 && "Writer lock not acquired before release!");
+ --writers;
+ return true;
+ }
+};
+
+typedef SmartRWMutex<false> RWMutex;
+
+/// ScopedReader - RAII acquisition of a reader lock
+#if !defined(LLVM_USE_RW_MUTEX_IMPL)
+template <bool mt_only>
+using SmartScopedReader = const std::shared_lock<SmartRWMutex<mt_only>>;
+#else
+template <bool mt_only> struct SmartScopedReader {
+ SmartRWMutex<mt_only> &mutex;
+
+ explicit SmartScopedReader(SmartRWMutex<mt_only> &m) : mutex(m) {
+ mutex.lock_shared();
+ }
+
+ ~SmartScopedReader() { mutex.unlock_shared(); }
+};
+#endif
+typedef SmartScopedReader<false> ScopedReader;
+
+/// ScopedWriter - RAII acquisition of a writer lock
+#if !defined(LLVM_USE_RW_MUTEX_IMPL)
+template <bool mt_only>
+using SmartScopedWriter = std::lock_guard<SmartRWMutex<mt_only>>;
+#else
+template <bool mt_only> struct SmartScopedWriter {
+ SmartRWMutex<mt_only> &mutex;
+
+ explicit SmartScopedWriter(SmartRWMutex<mt_only> &m) : mutex(m) {
+ mutex.lock();
+ }
+
+ ~SmartScopedWriter() { mutex.unlock(); }
+};
#endif
- };
-
- /// SmartMutex - An R/W mutex with a compile time constant parameter that
- /// indicates whether this mutex should become a no-op when we're not
- /// running in multithreaded mode.
- template<bool mt_only>
- class SmartRWMutex {
- RWMutexImpl impl;
- unsigned readers = 0;
- unsigned writers = 0;
-
- public:
- explicit SmartRWMutex() = default;
- SmartRWMutex(const SmartRWMutex<mt_only> & original) = delete;
- SmartRWMutex<mt_only> &operator=(const SmartRWMutex<mt_only> &) = delete;
-
- bool lock_shared() {
- if (!mt_only || llvm_is_multithreaded())
- return impl.reader_acquire();
-
- // Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
- ++readers;
- return true;
- }
-
- bool unlock_shared() {
- if (!mt_only || llvm_is_multithreaded())
- return impl.reader_release();
-
- // Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
- assert(readers > 0 && "Reader lock not acquired before release!");
- --readers;
- return true;
- }
-
- bool lock() {
- if (!mt_only || llvm_is_multithreaded())
- return impl.writer_acquire();
-
- // Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
- assert(writers == 0 && "Writer lock already acquired!");
- ++writers;
- return true;
- }
-
- bool unlock() {
- if (!mt_only || llvm_is_multithreaded())
- return impl.writer_release();
-
- // Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
- assert(writers == 1 && "Writer lock not acquired before release!");
- --writers;
- return true;
- }
- };
-
- typedef SmartRWMutex<false> RWMutex;
-
- /// ScopedReader - RAII acquisition of a reader lock
- template<bool mt_only>
- struct SmartScopedReader {
- SmartRWMutex<mt_only>& mutex;
-
- explicit SmartScopedReader(SmartRWMutex<mt_only>& m) : mutex(m) {
- mutex.lock_shared();
- }
-
- ~SmartScopedReader() {
- mutex.unlock_shared();
- }
- };
-
- typedef SmartScopedReader<false> ScopedReader;
-
- /// ScopedWriter - RAII acquisition of a writer lock
- template<bool mt_only>
- struct SmartScopedWriter {
- SmartRWMutex<mt_only>& mutex;
-
- explicit SmartScopedWriter(SmartRWMutex<mt_only>& m) : mutex(m) {
- mutex.lock();
- }
-
- ~SmartScopedWriter() {
- mutex.unlock();
- }
- };
-
- typedef SmartScopedWriter<false> ScopedWriter;
+typedef SmartScopedWriter<false> ScopedWriter;
} // end namespace sys
} // end namespace llvm
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index 2d19b10fd890..b2620ab4cfc9 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -44,6 +44,9 @@ namespace llvm {
Regex();
/// Compiles the given regular expression \p Regex.
+ ///
+ /// \param Regex - referenced string is no longer needed after this
+ /// constructor does finish. Only its compiled form is kept stored.
Regex(StringRef Regex, unsigned Flags = NoFlags);
Regex(const Regex &) = delete;
Regex &operator=(Regex regex) {
@@ -54,9 +57,10 @@ namespace llvm {
Regex(Regex &&regex);
~Regex();
- /// isValid - returns the error encountered during regex compilation, or
- /// matching, if any.
+ /// isValid - returns the error encountered during regex compilation, if
+ /// any.
bool isValid(std::string &Error) const;
+ bool isValid() const { return !error; }
/// getNumMatches - In a valid regex, return the number of parenthesized
/// matches it contains. The number filled in by match will include this
@@ -69,8 +73,12 @@ namespace llvm {
/// with references to the matched group expressions (inside \p String),
/// the first group is always the entire pattern.
///
+ /// \param Error - If non-null, any errors in the matching will be recorded
+ /// as a non-empty string. If there is no error, it will be an empty string.
+ ///
/// This returns true on a successful match.
- bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = nullptr);
+ bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = nullptr,
+ std::string *Error = nullptr) const;
/// sub - Return the result of replacing the first match of the regex in
/// \p String with the \p Repl string. Backreferences like "\0" in the
@@ -81,9 +89,9 @@ namespace llvm {
///
/// \param Error If non-null, any errors in the substitution (invalid
/// backreferences, trailing backslashes) will be recorded as a non-empty
- /// string.
+ /// string. If there is no error, it will be an empty string.
std::string sub(StringRef Repl, StringRef String,
- std::string *Error = nullptr);
+ std::string *Error = nullptr) const;
/// If this function returns true, ^Str$ is an extended regular
/// expression that matches Str and only Str.
diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h
index 4d8aa5f1470d..5bb6a254a47f 100644
--- a/include/llvm/Support/Registry.h
+++ b/include/llvm/Support/Registry.h
@@ -115,7 +115,7 @@ namespace llvm {
entry Entry;
node Node;
- static std::unique_ptr<T> CtorFn() { return make_unique<V>(); }
+ static std::unique_ptr<T> CtorFn() { return std::make_unique<V>(); }
public:
Add(StringRef Name, StringRef Desc)
diff --git a/include/llvm/Support/SHA1.h b/include/llvm/Support/SHA1.h
index 87fe94bbd5cd..2cfbd2179364 100644
--- a/include/llvm/Support/SHA1.h
+++ b/include/llvm/Support/SHA1.h
@@ -16,13 +16,13 @@
#define LLVM_SUPPORT_SHA1_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
#include <array>
#include <cstdint>
namespace llvm {
template <typename T> class ArrayRef;
-class StringRef;
/// A class that wrap the SHA1 algorithm.
class SHA1 {
diff --git a/include/llvm/Support/ScalableSize.h b/include/llvm/Support/ScalableSize.h
deleted file mode 100644
index 96bf043773a0..000000000000
--- a/include/llvm/Support/ScalableSize.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===- ScalableSize.h - Scalable vector size info ---------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides a struct that can be used to query the size of IR types
-// which may be scalable vectors. It provides convenience operators so that
-// it can be used in much the same way as a single scalar value.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_SCALABLESIZE_H
-#define LLVM_SUPPORT_SCALABLESIZE_H
-
-namespace llvm {
-
-class ElementCount {
-public:
- unsigned Min; // Minimum number of vector elements.
- bool Scalable; // If true, NumElements is a multiple of 'Min' determined
- // at runtime rather than compile time.
-
- ElementCount(unsigned Min, bool Scalable)
- : Min(Min), Scalable(Scalable) {}
-
- ElementCount operator*(unsigned RHS) {
- return { Min * RHS, Scalable };
- }
- ElementCount operator/(unsigned RHS) {
- return { Min / RHS, Scalable };
- }
-
- bool operator==(const ElementCount& RHS) const {
- return Min == RHS.Min && Scalable == RHS.Scalable;
- }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_SUPPORT_SCALABLESIZE_H
diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h
index a6b215a24311..a4f1fad22dd5 100644
--- a/include/llvm/Support/Signals.h
+++ b/include/llvm/Support/Signals.h
@@ -84,6 +84,17 @@ namespace sys {
/// function. Note also that the handler may be executed on a different
/// thread on some platforms.
void SetInfoSignalFunction(void (*Handler)());
+
+ /// Registers a function to be called when a "pipe" signal is delivered to
+ /// the process.
+ ///
+ /// The "pipe" signal typically indicates a failed write to a pipe (SIGPIPE).
+ /// The default installed handler calls `exit(EX_IOERR)`, causing the process
+ /// to immediately exit with an IO error exit code.
+ ///
+ /// This function is only applicable on POSIX systems.
+ void SetPipeSignalFunction(void (*Handler)());
+
} // End sys namespace
} // End llvm namespace
diff --git a/include/llvm/Support/SwapByteOrder.h b/include/llvm/Support/SwapByteOrder.h
index 06a447a27c2a..6cec87006c02 100644
--- a/include/llvm/Support/SwapByteOrder.h
+++ b/include/llvm/Support/SwapByteOrder.h
@@ -22,9 +22,37 @@
#include <stdlib.h>
#endif
+#if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__)
+#include <endian.h>
+#elif defined(_AIX)
+#include <sys/machine.h>
+#elif defined(__sun)
+/* Solaris provides _BIG_ENDIAN/_LITTLE_ENDIAN selector in sys/types.h */
+#include <sys/types.h>
+#define BIG_ENDIAN 4321
+#define LITTLE_ENDIAN 1234
+#if defined(_BIG_ENDIAN)
+#define BYTE_ORDER BIG_ENDIAN
+#else
+#define BYTE_ORDER LITTLE_ENDIAN
+#endif
+#else
+#if !defined(BYTE_ORDER) && !defined(_WIN32)
+#include <machine/endian.h>
+#endif
+#endif
+
namespace llvm {
namespace sys {
+#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
+constexpr bool IsBigEndianHost = true;
+#else
+constexpr bool IsBigEndianHost = false;
+#endif
+
+static const bool IsLittleEndianHost = !IsBigEndianHost;
+
/// SwapByteOrder_16 - This function returns a byte-swapped representation of
/// the 16-bit argument.
inline uint16_t SwapByteOrder_16(uint16_t value) {
@@ -39,10 +67,9 @@ inline uint16_t SwapByteOrder_16(uint16_t value) {
#endif
}
-/// SwapByteOrder_32 - This function returns a byte-swapped representation of
-/// the 32-bit argument.
+/// This function returns a byte-swapped representation of the 32-bit argument.
inline uint32_t SwapByteOrder_32(uint32_t value) {
-#if defined(__llvm__) || (LLVM_GNUC_PREREQ(4, 3, 0) && !defined(__ICC))
+#if defined(__llvm__) || (defined(__GNUC__) && !defined(__ICC))
return __builtin_bswap32(value);
#elif defined(_MSC_VER) && !defined(_DEBUG)
return _byteswap_ulong(value);
@@ -55,10 +82,9 @@ inline uint32_t SwapByteOrder_32(uint32_t value) {
#endif
}
-/// SwapByteOrder_64 - This function returns a byte-swapped representation of
-/// the 64-bit argument.
+/// This function returns a byte-swapped representation of the 64-bit argument.
inline uint64_t SwapByteOrder_64(uint64_t value) {
-#if defined(__llvm__) || (LLVM_GNUC_PREREQ(4, 3, 0) && !defined(__ICC))
+#if defined(__llvm__) || (defined(__GNUC__) && !defined(__ICC))
return __builtin_bswap64(value);
#elif defined(_MSC_VER) && !defined(_DEBUG)
return _byteswap_uint64(value);
diff --git a/include/llvm/Support/TargetOpcodes.def b/include/llvm/Support/TargetOpcodes.def
index 598c1064efd0..11731ac35415 100644
--- a/include/llvm/Support/TargetOpcodes.def
+++ b/include/llvm/Support/TargetOpcodes.def
@@ -294,9 +294,21 @@ HANDLE_TARGET_OPCODE(G_SEXTLOAD)
/// Generic zeroext load
HANDLE_TARGET_OPCODE(G_ZEXTLOAD)
+/// Generic indexed load (including anyext load)
+HANDLE_TARGET_OPCODE(G_INDEXED_LOAD)
+
+/// Generic indexed signext load
+HANDLE_TARGET_OPCODE(G_INDEXED_SEXTLOAD)
+
+/// Generic indexed zeroext load
+HANDLE_TARGET_OPCODE(G_INDEXED_ZEXTLOAD)
+
/// Generic store.
HANDLE_TARGET_OPCODE(G_STORE)
+/// Generic indexed store.
+HANDLE_TARGET_OPCODE(G_INDEXED_STORE)
+
/// Generic atomic cmpxchg with internal success check.
HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
@@ -315,6 +327,8 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_MAX)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_MIN)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_FADD)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_FSUB)
// Generic atomic fence
HANDLE_TARGET_OPCODE(G_FENCE)
@@ -354,6 +368,7 @@ HANDLE_TARGET_OPCODE(G_VAARG)
// Generic sign extend
HANDLE_TARGET_OPCODE(G_SEXT)
+HANDLE_TARGET_OPCODE(G_SEXT_INREG)
// Generic zero extend
HANDLE_TARGET_OPCODE(G_ZEXT)
@@ -436,6 +451,9 @@ HANDLE_TARGET_OPCODE(G_FMUL)
/// Generic FMA multiplication. Behaves like llvm fma intrinsic
HANDLE_TARGET_OPCODE(G_FMA)
+/// Generic FP multiply and add. Behaves as separate fmul and fadd.
+HANDLE_TARGET_OPCODE(G_FMAD)
+
/// Generic FP division.
HANDLE_TARGET_OPCODE(G_FDIV)
@@ -557,6 +575,9 @@ HANDLE_TARGET_OPCODE(G_CTPOP)
/// Generic byte swap.
HANDLE_TARGET_OPCODE(G_BSWAP)
+/// Generic bit reverse.
+HANDLE_TARGET_OPCODE(G_BITREVERSE)
+
/// Floating point ceil.
HANDLE_TARGET_OPCODE(G_FCEIL)
@@ -587,12 +608,15 @@ HANDLE_TARGET_OPCODE(G_BLOCK_ADDR)
/// Generic jump table address
HANDLE_TARGET_OPCODE(G_JUMP_TABLE)
+/// Generic dynamic stack allocation.
+HANDLE_TARGET_OPCODE(G_DYN_STACKALLOC)
+
// TODO: Add more generic opcodes as we move along.
/// Marker for the end of the generic opcode.
/// This is used to check if an opcode is in the range of the
/// generic opcodes.
-HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_JUMP_TABLE)
+HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_DYN_STACKALLOC)
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific post-isel opcode values start here.
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index bf75650760d0..f4bc26b858c8 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -510,8 +510,8 @@ public:
std::move(Emitter), RelaxAll);
break;
case Triple::XCOFF:
- S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
- std::move(Emitter), RelaxAll);
+ S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
+ std::move(Emitter), RelaxAll);
break;
}
if (ObjectTargetStreamerCtorFn)
diff --git a/include/llvm/Support/TimeProfiler.h b/include/llvm/Support/TimeProfiler.h
index 72b6f7180bde..8cc430d0bc72 100644
--- a/include/llvm/Support/TimeProfiler.h
+++ b/include/llvm/Support/TimeProfiler.h
@@ -19,7 +19,7 @@ extern TimeTraceProfiler *TimeTraceProfilerInstance;
/// Initialize the time trace profiler.
/// This sets up the global \p TimeTraceProfilerInstance
/// variable to be the profiler instance.
-void timeTraceProfilerInitialize();
+void timeTraceProfilerInitialize(unsigned TimeTraceGranularity);
/// Cleanup the time trace profiler, if it was initialized.
void timeTraceProfilerCleanup();
diff --git a/include/llvm/Support/TrailingObjects.h b/include/llvm/Support/TrailingObjects.h
index 8cf4f7aed7f8..49be89613c43 100644
--- a/include/llvm/Support/TrailingObjects.h
+++ b/include/llvm/Support/TrailingObjects.h
@@ -47,6 +47,7 @@
#define LLVM_SUPPORT_TRAILINGOBJECTS_H
#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/type_traits.h"
@@ -87,11 +88,6 @@ protected:
template <typename T> struct OverloadToken {};
};
-/// This helper template works-around MSVC 2013's lack of useful
-/// alignas() support. The argument to alignas(), in MSVC, is
-/// required to be a literal integer. But, you *can* use template
-/// specialization to select between a bunch of different alignas()
-/// expressions...
template <int Align>
class TrailingObjectsAligner : public TrailingObjectsBase {};
template <>
@@ -172,7 +168,7 @@ protected:
if (requiresRealignment())
return reinterpret_cast<const NextTy *>(
- llvm::alignAddr(Ptr, alignof(NextTy)));
+ alignAddr(Ptr, Align::Of<NextTy>()));
else
return reinterpret_cast<const NextTy *>(Ptr);
}
@@ -186,7 +182,7 @@ protected:
Obj, TrailingObjectsBase::OverloadToken<PrevTy>());
if (requiresRealignment())
- return reinterpret_cast<NextTy *>(llvm::alignAddr(Ptr, alignof(NextTy)));
+ return reinterpret_cast<NextTy *>(alignAddr(Ptr, Align::Of<NextTy>()));
else
return reinterpret_cast<NextTy *>(Ptr);
}
@@ -254,9 +250,7 @@ class TrailingObjects : private trailing_objects_internal::TrailingObjectsImpl<
// because BaseTy isn't complete at class instantiation time, but
// will be by the time this function is instantiated.
static void verifyTrailingObjectsAssertions() {
-#ifdef LLVM_IS_FINAL
- static_assert(LLVM_IS_FINAL(BaseTy), "BaseTy must be final.");
-#endif
+ static_assert(std::is_final<BaseTy>(), "BaseTy must be final.");
}
// These two methods are the base of the recursion for this method.
@@ -369,7 +363,9 @@ public:
template <typename... Tys> struct FixedSizeStorage {
template <size_t... Counts> struct with_counts {
enum { Size = totalSizeToAlloc<Tys...>(Counts...) };
- typedef llvm::AlignedCharArray<alignof(BaseTy), Size> type;
+ struct type {
+ alignas(BaseTy) char buffer[Size];
+ };
};
};
diff --git a/include/llvm/Support/TypeSize.h b/include/llvm/Support/TypeSize.h
new file mode 100644
index 000000000000..711679cdcacb
--- /dev/null
+++ b/include/llvm/Support/TypeSize.h
@@ -0,0 +1,201 @@
+//===- TypeSize.h - Wrapper around type sizes -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a struct that can be used to query the size of IR types
+// which may be scalable vectors. It provides convenience operators so that
+// it can be used in much the same way as a single scalar value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TYPESIZE_H
+#define LLVM_SUPPORT_TYPESIZE_H
+
+#include <cassert>
+#include <tuple>
+
+namespace llvm {
+
+class ElementCount {
+public:
+ unsigned Min; // Minimum number of vector elements.
+ bool Scalable; // If true, NumElements is a multiple of 'Min' determined
+ // at runtime rather than compile time.
+
+ ElementCount(unsigned Min, bool Scalable)
+ : Min(Min), Scalable(Scalable) {}
+
+ ElementCount operator*(unsigned RHS) {
+ return { Min * RHS, Scalable };
+ }
+ ElementCount operator/(unsigned RHS) {
+ return { Min / RHS, Scalable };
+ }
+
+ bool operator==(const ElementCount& RHS) const {
+ return Min == RHS.Min && Scalable == RHS.Scalable;
+ }
+ bool operator!=(const ElementCount& RHS) const {
+ return !(*this == RHS);
+ }
+};
+
+// This class is used to represent the size of types. If the type is of fixed
+// size, it will represent the exact size. If the type is a scalable vector,
+// it will represent the known minimum size.
+class TypeSize {
+ uint64_t MinSize; // The known minimum size.
+ bool IsScalable; // If true, then the runtime size is an integer multiple
+ // of MinSize.
+
+public:
+ constexpr TypeSize(uint64_t MinSize, bool Scalable)
+ : MinSize(MinSize), IsScalable(Scalable) {}
+
+ static constexpr TypeSize Fixed(uint64_t Size) {
+ return TypeSize(Size, /*IsScalable=*/false);
+ }
+
+ static constexpr TypeSize Scalable(uint64_t MinSize) {
+ return TypeSize(MinSize, /*IsScalable=*/true);
+ }
+
+ // Scalable vector types with the same minimum size as a fixed size type are
+ // not guaranteed to be the same size at runtime, so they are never
+ // considered to be equal.
+ friend bool operator==(const TypeSize &LHS, const TypeSize &RHS) {
+ return std::tie(LHS.MinSize, LHS.IsScalable) ==
+ std::tie(RHS.MinSize, RHS.IsScalable);
+ }
+
+ friend bool operator!=(const TypeSize &LHS, const TypeSize &RHS) {
+ return !(LHS == RHS);
+ }
+
+ // For many cases, size ordering between scalable and fixed size types cannot
+ // be determined at compile time, so such comparisons aren't allowed.
+ //
+ // e.g. <vscale x 2 x i16> could be bigger than <4 x i32> with a runtime
+ // vscale >= 5, equal sized with a vscale of 4, and smaller with
+ // a vscale <= 3.
+ //
+ // If the scalable flags match, just perform the requested comparison
+ // between the minimum sizes.
+ friend bool operator<(const TypeSize &LHS, const TypeSize &RHS) {
+ assert(LHS.IsScalable == RHS.IsScalable &&
+ "Ordering comparison of scalable and fixed types");
+
+ return LHS.MinSize < RHS.MinSize;
+ }
+
+ friend bool operator>(const TypeSize &LHS, const TypeSize &RHS) {
+ return RHS < LHS;
+ }
+
+ friend bool operator<=(const TypeSize &LHS, const TypeSize &RHS) {
+ return !(RHS < LHS);
+ }
+
+ friend bool operator>=(const TypeSize &LHS, const TypeSize& RHS) {
+ return !(LHS < RHS);
+ }
+
+ // Convenience operators to obtain relative sizes independently of
+ // the scalable flag.
+ TypeSize operator*(unsigned RHS) const {
+ return { MinSize * RHS, IsScalable };
+ }
+
+ friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) {
+ return { LHS * RHS.MinSize, RHS.IsScalable };
+ }
+
+ TypeSize operator/(unsigned RHS) const {
+ return { MinSize / RHS, IsScalable };
+ }
+
+ // Return the minimum size with the assumption that the size is exact.
+ // Use in places where a scalable size doesn't make sense (e.g. non-vector
+ // types, or vectors in backends which don't support scalable vectors).
+ uint64_t getFixedSize() const {
+ assert(!IsScalable && "Request for a fixed size on a scalable object");
+ return MinSize;
+ }
+
+ // Return the known minimum size. Use in places where the scalable property
+ // doesn't matter (e.g. determining alignment) or in conjunction with the
+ // isScalable method below.
+ uint64_t getKnownMinSize() const {
+ return MinSize;
+ }
+
+ // Return whether or not the size is scalable.
+ bool isScalable() const {
+ return IsScalable;
+ }
+
+ // Casts to a uint64_t if this is a fixed-width size.
+ //
+ // NOTE: This interface is obsolete and will be removed in a future version
+ // of LLVM in favour of calling getFixedSize() directly.
+ operator uint64_t() const {
+ return getFixedSize();
+ }
+
+ // Additional convenience operators needed to avoid ambiguous parses.
+ // TODO: Make uint64_t the default operator?
+ TypeSize operator*(uint64_t RHS) const {
+ return { MinSize * RHS, IsScalable };
+ }
+
+ TypeSize operator*(int RHS) const {
+ return { MinSize * RHS, IsScalable };
+ }
+
+ TypeSize operator*(int64_t RHS) const {
+ return { MinSize * RHS, IsScalable };
+ }
+
+ friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) {
+ return { LHS * RHS.MinSize, RHS.IsScalable };
+ }
+
+ friend TypeSize operator*(const int LHS, const TypeSize &RHS) {
+ return { LHS * RHS.MinSize, RHS.IsScalable };
+ }
+
+ friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) {
+ return { LHS * RHS.MinSize, RHS.IsScalable };
+ }
+
+ TypeSize operator/(uint64_t RHS) const {
+ return { MinSize / RHS, IsScalable };
+ }
+
+ TypeSize operator/(int RHS) const {
+ return { MinSize / RHS, IsScalable };
+ }
+
+ TypeSize operator/(int64_t RHS) const {
+ return { MinSize / RHS, IsScalable };
+ }
+};
+
+/// Returns a TypeSize with a known minimum size that is the next integer
+/// (mod 2**64) that is greater than or equal to \p Value and is a multiple
+/// of \p Align. \p Align must be non-zero.
+///
+/// Similar to the alignTo functions in MathExtras.h
+inline TypeSize alignTo(TypeSize Size, uint64_t Align) {
+ assert(Align != 0u && "Align must be non-zero");
+ return {(Size.getKnownMinSize() + Align - 1) / Align * Align,
+ Size.isScalable()};
+}
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_TypeSize_H
diff --git a/include/llvm/Support/UnicodeCharRanges.h b/include/llvm/Support/UnicodeCharRanges.h
index 4b59f8a92b76..73d3603b74df 100644
--- a/include/llvm/Support/UnicodeCharRanges.h
+++ b/include/llvm/Support/UnicodeCharRanges.h
@@ -9,11 +9,8 @@
#define LLVM_SUPPORT_UNICODECHARRANGES_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
diff --git a/include/llvm/Support/UniqueLock.h b/include/llvm/Support/UniqueLock.h
deleted file mode 100644
index 0a887ad5965d..000000000000
--- a/include/llvm/Support/UniqueLock.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- Support/UniqueLock.h - Acquire/Release Mutex In Scope ----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a guard for a block of code that ensures a Mutex is locked
-// upon construction and released upon destruction.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_UNIQUE_LOCK_H
-#define LLVM_SUPPORT_UNIQUE_LOCK_H
-
-#include <cassert>
-
-namespace llvm {
-
- /// A pared-down imitation of std::unique_lock from C++11. Contrary to the
- /// name, it's really more of a wrapper for a lock. It may or may not have
- /// an associated mutex, which is guaranteed to be locked upon creation
- /// and unlocked after destruction. unique_lock can also unlock the mutex
- /// and re-lock it freely during its lifetime.
- /// Guard a section of code with a mutex.
- template<typename MutexT>
- class unique_lock {
- MutexT *M = nullptr;
- bool locked = false;
-
- public:
- unique_lock() = default;
- explicit unique_lock(MutexT &m) : M(&m), locked(true) { M->lock(); }
- unique_lock(const unique_lock &) = delete;
- unique_lock &operator=(const unique_lock &) = delete;
-
- void operator=(unique_lock &&o) {
- if (owns_lock())
- M->unlock();
- M = o.M;
- locked = o.locked;
- o.M = nullptr;
- o.locked = false;
- }
-
- ~unique_lock() { if (owns_lock()) M->unlock(); }
-
- void lock() {
- assert(!locked && "mutex already locked!");
- assert(M && "no associated mutex!");
- M->lock();
- locked = true;
- }
-
- void unlock() {
- assert(locked && "unlocking a mutex that isn't locked!");
- assert(M && "no associated mutex!");
- M->unlock();
- locked = false;
- }
-
- bool owns_lock() { return locked; }
- };
-
-} // end namespace llvm
-
-#endif // LLVM_SUPPORT_UNIQUE_LOCK_H
diff --git a/include/llvm/Support/VirtualFileSystem.h b/include/llvm/Support/VirtualFileSystem.h
index 31c9e851daed..c844d9d194f0 100644
--- a/include/llvm/Support/VirtualFileSystem.h
+++ b/include/llvm/Support/VirtualFileSystem.h
@@ -647,9 +647,19 @@ private:
friend class VFSFromYamlDirIterImpl;
friend class RedirectingFileSystemParser;
+ bool shouldUseExternalFS() const {
+ return ExternalFSValidWD && IsFallthrough;
+ }
+
/// The root(s) of the virtual file system.
std::vector<std::unique_ptr<Entry>> Roots;
+ /// The current working directory of the file system.
+ std::string WorkingDirectory;
+
+ /// Whether the current working directory is valid for the external FS.
+ bool ExternalFSValidWD = false;
+
/// The file system to use for external references.
IntrusiveRefCntPtr<FileSystem> ExternalFS;
@@ -689,8 +699,7 @@ private:
true;
#endif
- RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS)
- : ExternalFS(std::move(ExternalFS)) {}
+ RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS);
/// Looks up the path <tt>[Start, End)</tt> in \p From, possibly
/// recursing into the contents of \p From if it is a directory.
@@ -730,9 +739,10 @@ public:
StringRef getExternalContentsPrefixDir() const;
+ void dump(raw_ostream &OS) const;
+ void dumpEntry(raw_ostream &OS, Entry *E, int NumSpaces = 0) const;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const;
- LLVM_DUMP_METHOD void dumpEntry(Entry *E, int NumSpaces = 0) const;
#endif
};
diff --git a/include/llvm/Support/Win64EH.h b/include/llvm/Support/Win64EH.h
index bdd23b41594e..8220131e5be9 100644
--- a/include/llvm/Support/Win64EH.h
+++ b/include/llvm/Support/Win64EH.h
@@ -30,7 +30,9 @@ enum UnwindOpcodes {
UOP_SetFPReg,
UOP_SaveNonVol,
UOP_SaveNonVolBig,
- UOP_SaveXMM128 = 8,
+ UOP_Epilog,
+ UOP_SpareCode,
+ UOP_SaveXMM128,
UOP_SaveXMM128Big,
UOP_PushMachFrame,
// The following set of unwind opcodes is for ARM64. They are documented at
diff --git a/include/llvm/Support/X86TargetParser.def b/include/llvm/Support/X86TargetParser.def
index 1749be3b3ae2..4ebf2d79cb8d 100644
--- a/include/llvm/Support/X86TargetParser.def
+++ b/include/llvm/Support/X86TargetParser.def
@@ -112,6 +112,7 @@ X86_CPU_SUBTYPE ("k6-2", AMDPENTIUM_K62)
X86_CPU_SUBTYPE ("k6-3", AMDPENTIUM_K63)
X86_CPU_SUBTYPE ("geode", AMDPENTIUM_GEODE)
X86_CPU_SUBTYPE ("cooperlake", INTEL_COREI7_COOPERLAKE)
+X86_CPU_SUBTYPE ("tigerlake", INTEL_COREI7_TIGERLAKE)
#undef X86_CPU_SUBTYPE_COMPAT
#undef X86_CPU_SUBTYPE
@@ -160,12 +161,13 @@ X86_FEATURE_COMPAT(32, FEATURE_GFNI, "gfni")
X86_FEATURE_COMPAT(33, FEATURE_VPCLMULQDQ, "vpclmulqdq")
X86_FEATURE_COMPAT(34, FEATURE_AVX512VNNI, "avx512vnni")
X86_FEATURE_COMPAT(35, FEATURE_AVX512BITALG, "avx512bitalg")
+X86_FEATURE_COMPAT(36, FEATURE_AVX512BF16, "avx512bf16")
// Features below here are not in libgcc/compiler-rt.
X86_FEATURE (64, FEATURE_MOVBE)
X86_FEATURE (65, FEATURE_ADX)
X86_FEATURE (66, FEATURE_EM64T)
X86_FEATURE (67, FEATURE_CLFLUSHOPT)
X86_FEATURE (68, FEATURE_SHA)
-X86_FEATURE (69, FEATURE_AVX512BF16)
+X86_FEATURE (69, FEATURE_AVX512VP2INTERSECT)
#undef X86_FEATURE_COMPAT
#undef X86_FEATURE
diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h
index 5181dc56d81d..a3bfa7dc4678 100644
--- a/include/llvm/Support/YAMLTraits.h
+++ b/include/llvm/Support/YAMLTraits.h
@@ -649,7 +649,8 @@ inline bool isBool(StringRef S) {
inline QuotingType needsQuotes(StringRef S) {
if (S.empty())
return QuotingType::Single;
- if (isspace(S.front()) || isspace(S.back()))
+ if (isspace(static_cast<unsigned char>(S.front())) ||
+ isspace(static_cast<unsigned char>(S.back())))
return QuotingType::Single;
if (isNull(S))
return QuotingType::Single;
@@ -748,7 +749,7 @@ public:
IO(void *Ctxt = nullptr);
virtual ~IO();
- virtual bool outputting() = 0;
+ virtual bool outputting() const = 0;
virtual unsigned beginSequence() = 0;
virtual bool preflightElement(unsigned, void *&) = 0;
@@ -842,7 +843,7 @@ public:
Val = Val | ConstVal;
}
- void *getContext();
+ void *getContext() const;
void setContext(void *);
template <typename T> void mapRequired(const char *Key, T &Val) {
@@ -1402,7 +1403,7 @@ public:
std::error_code error();
private:
- bool outputting() override;
+ bool outputting() const override;
bool mapTag(StringRef, bool) override;
void beginMapping() override;
void endMapping() override;
@@ -1549,7 +1550,7 @@ public:
/// anyway.
void setWriteDefaultValues(bool Write) { WriteDefaultValues = Write; }
- bool outputting() override;
+ bool outputting() const override;
bool mapTag(StringRef, bool) override;
void beginMapping() override;
void endMapping() override;
diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h
index 4ecdb17376f1..a72acd4fe002 100644
--- a/include/llvm/Support/circular_raw_ostream.h
+++ b/include/llvm/Support/circular_raw_ostream.h
@@ -122,6 +122,10 @@ namespace llvm {
delete[] BufferArray;
}
+ bool is_displayed() const override {
+ return TheStream->is_displayed();
+ }
+
/// setStream - Tell the circular_raw_ostream to output a
/// different stream. "Owns" tells circular_raw_ostream whether
/// it should take responsibility for managing the underlying
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index 48bb623b0638..0debc5da7a68 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -72,7 +72,7 @@ private:
public:
// color order matches ANSI escape sequence, don't change
- enum Colors {
+ enum class Colors {
BLACK = 0,
RED,
GREEN,
@@ -81,9 +81,21 @@ public:
MAGENTA,
CYAN,
WHITE,
- SAVEDCOLOR
+ SAVEDCOLOR,
+ RESET,
};
+ static const Colors BLACK = Colors::BLACK;
+ static const Colors RED = Colors::RED;
+ static const Colors GREEN = Colors::GREEN;
+ static const Colors YELLOW = Colors::YELLOW;
+ static const Colors BLUE = Colors::BLUE;
+ static const Colors MAGENTA = Colors::MAGENTA;
+ static const Colors CYAN = Colors::CYAN;
+ static const Colors WHITE = Colors::WHITE;
+ static const Colors SAVEDCOLOR = Colors::SAVEDCOLOR;
+ static const Colors RESET = Colors::RESET;
+
explicit raw_ostream(bool unbuffered = false)
: BufferMode(unbuffered ? Unbuffered : InternalBuffer) {
// Start out ready to flush.
@@ -214,6 +226,9 @@ public:
/// Output \p N in hexadecimal, without any prefix or padding.
raw_ostream &write_hex(unsigned long long N);
+ // Change the foreground color of text.
+ raw_ostream &operator<<(Colors C);
+
/// Output a formatted UUID with dash separators.
using uuid_t = uint8_t[16];
raw_ostream &write_uuid(const uuid_t UUID);
@@ -277,6 +292,10 @@ public:
/// This function determines if this stream is displayed and supports colors.
virtual bool has_colors() const { return is_displayed(); }
+ // Enable or disable colors. Once disable_colors() is called,
+ // changeColor() has no effect until enable_colors() is called.
+ virtual void enable_colors(bool /*enable*/) {}
+
//===--------------------------------------------------------------------===//
// Subclass Interface
//===--------------------------------------------------------------------===//
@@ -365,8 +384,8 @@ public:
class raw_fd_ostream : public raw_pwrite_stream {
int FD;
bool ShouldClose;
-
bool SupportsSeeking;
+ bool ColorEnabled = true;
#ifdef _WIN32
/// True if this fd refers to a Windows console device. Mintty and other
@@ -442,6 +461,8 @@ public:
bool has_colors() const override;
+ void enable_colors(bool enable) override { ColorEnabled = enable; }
+
std::error_code error() const { return EC; }
/// Return the value of the flag in this raw_fd_ostream indicating whether an
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index c8c6a76a90f1..b7d48e8e1ade 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -17,11 +17,6 @@
#include <type_traits>
#include <utility>
-#ifndef __has_feature
-#define LLVM_DEFINED_HAS_FEATURE
-#define __has_feature(x) 0
-#endif
-
namespace llvm {
@@ -194,17 +189,4 @@ class is_trivially_copyable<T*> : public std::true_type {
} // end namespace llvm
-// If the compiler supports detecting whether a class is final, define
-// an LLVM_IS_FINAL macro. If it cannot be defined properly, this
-// macro will be left undefined.
-#if __cplusplus >= 201402L || defined(_MSC_VER)
-#define LLVM_IS_FINAL(Ty) std::is_final<Ty>()
-#elif __has_feature(is_final) || LLVM_GNUC_PREREQ(4, 7, 0)
-#define LLVM_IS_FINAL(Ty) __is_final(Ty)
-#endif
-
-#ifdef LLVM_DEFINED_HAS_FEATURE
-#undef __has_feature
-#endif
-
#endif // LLVM_SUPPORT_TYPE_TRAITS_H
diff --git a/include/llvm/TableGen/Automaton.td b/include/llvm/TableGen/Automaton.td
new file mode 100644
index 000000000000..13ced2a0e784
--- /dev/null
+++ b/include/llvm/TableGen/Automaton.td
@@ -0,0 +1,95 @@
+//===- Automaton.td ----------------------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the key top-level classes needed to produce a reasonably
+// generic finite-state automaton.
+//
+//===----------------------------------------------------------------------===//
+
+// Define a record inheriting from GenericAutomaton to generate a reasonably
+// generic finite-state automaton over a set of actions and states.
+//
+// This automaton is defined by:
+// 1) a state space (explicit, always bits<32>).
+// 2) a set of input symbols (actions, explicit) and
+// 3) a transition function from state + action -> state.
+//
+// A theoretical automaton is defined by <Q, S, d, q0, F>:
+// Q: A set of possible states.
+// S: (sigma) The input alphabet.
+// d: (delta) The transition function f(q in Q, s in S) -> q' in Q.
+// F: The set of final (accepting) states.
+//
+// Because generating all possible states is tedious, we instead define the
+// transition function only and crawl all reachable states starting from the
+// initial state with all inputs under all transitions until termination.
+//
+// We define F = S, that is, all valid states are accepting.
+//
+// To ensure the generation of the automaton terminates, the state transitions
+// are defined as a lattice (meaning every transitioned-to state is more
+// specific than the transitioned-from state, for some definition of specificity).
+// Concretely a transition may set one or more bits in the state that were
+// previously zero to one. If any bit was not zero, the transition is invalid.
+//
+// Instead of defining all possible states (which would be cumbersome), the user
+// provides a set of possible Transitions from state A, consuming an input
+// symbol A to state B. The Transition object transforms state A to state B and
+// acts as a predicate. This means the state space can be discovered by crawling
+// all the possible transitions until none are valid.
+//
+// This automaton is considered to be nondeterministic, meaning that multiple
+// transitions can occur from any (state, action) pair. The generated automaton
+// is determinized, meaning that is executes in O(k) time where k is the input
+// sequence length.
+//
+// In addition to a generated automaton that determines if a sequence of inputs
+// is accepted or not, a table is emitted that allows determining a plausible
+// sequence of states traversed to accept that input.
+class GenericAutomaton {
+ // Name of a class that inherits from Transition. All records inheriting from
+ // this class will be considered when constructing the automaton.
+ string TransitionClass;
+
+ // Names of fields within TransitionClass that define the action symbol. This
+ // defines the action as an N-tuple.
+ //
+ // Each symbol field can be of class, int, string or code type.
+ // If the type of a field is a class, the Record's name is used verbatim
+ // in C++ and the class name is used as the C++ type name.
+ // If the type of a field is a string, code or int, that is also used
+ // verbatim in C++.
+ //
+ // To override the C++ type name for field F, define a field called TypeOf_F.
+ // This should be a string that will be used verbatim in C++.
+ //
+ // As an example, to define a 2-tuple with an enum and a string, one might:
+ // def MyTransition : Transition {
+ // MyEnum S1;
+ // int S2;
+ // }
+ // def MyAutomaton : GenericAutomaton }{
+ // let TransitionClass = "Transition";
+ // let SymbolFields = ["S1", "S2"];
+ // let TypeOf_S1 = "MyEnumInCxxKind";
+ // }
+ list<string> SymbolFields;
+}
+
+// All transitions inherit from Transition.
+class Transition {
+ // A transition S' = T(S) is valid if, for every set bit in NewState, the
+ // corresponding bit in S is clear. That is:
+ // def T(S):
+ // S' = S | NewState
+ // return S' if S' != S else Failure
+ //
+ // The automaton generator uses this property to crawl the set of possible
+ // transitions from a starting state of 0b0.
+ bits<32> NewState;
+}
diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h
index 7c83b6298620..cf990427f577 100644
--- a/include/llvm/TableGen/Error.h
+++ b/include/llvm/TableGen/Error.h
@@ -18,6 +18,7 @@
namespace llvm {
+void PrintNote(const Twine &Msg);
void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg);
void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg);
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index bf7f02208c28..73ed342a6101 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -1263,7 +1263,14 @@ class FieldInit : public TypedInit {
FieldInit(Init *R, StringInit *FN)
: TypedInit(IK_FieldInit, R->getFieldType(FN)), Rec(R), FieldName(FN) {
- assert(getType() && "FieldInit with non-record type!");
+#ifndef NDEBUG
+ if (!getType()) {
+ llvm::errs() << "In Record = " << Rec->getAsString()
+ << ", got FieldName = " << *FieldName
+ << " with non-record type!\n";
+ llvm_unreachable("FieldInit with non-record type!");
+ }
+#endif
}
public:
@@ -1323,6 +1330,7 @@ public:
void Profile(FoldingSetNodeID &ID) const;
Init *getOperator() const { return Val; }
+ Record *getOperatorAsDef(ArrayRef<SMLoc> Loc) const;
StringInit *getName() const { return ValName; }
@@ -1680,10 +1688,10 @@ raw_ostream &operator<<(raw_ostream &OS, const Record &R);
class RecordKeeper {
friend class RecordRecTy;
- using RecordMap = std::map<std::string, std::unique_ptr<Record>>;
+ using RecordMap = std::map<std::string, std::unique_ptr<Record>, std::less<>>;
RecordMap Classes, Defs;
FoldingSet<RecordRecTy> RecordTypePool;
- std::map<std::string, Init *> ExtraGlobals;
+ std::map<std::string, Init *, std::less<>> ExtraGlobals;
unsigned AnonCounter = 0;
public:
diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td
index 45718327b4a7..4b49dfd4dd18 100644
--- a/include/llvm/Target/GenericOpcodes.td
+++ b/include/llvm/Target/GenericOpcodes.td
@@ -15,7 +15,9 @@
// Unary ops.
//------------------------------------------------------------------------------
-class GenericInstruction : StandardPseudoInstruction;
+class GenericInstruction : StandardPseudoInstruction {
+ let isPreISelOpcode = 1;
+}
// Extend the underlying scalar type of an operation, leaving the high bits
// unspecified.
@@ -33,6 +35,20 @@ def G_SEXT : GenericInstruction {
let hasSideEffects = 0;
}
+// Sign extend the a value from an arbitrary bit position, copying the sign bit
+// into all bits above it. This is equivalent to a shl + ashr pair with an
+// appropriate shift amount. $sz is an immediate (MachineOperand::isImm()
+// returns true) to allow targets to have some bitwidths legal and others
+// lowered. This opcode is particularly useful if the target has sign-extension
+// instructions that are cheaper than the constituent shifts as the optimizer is
+// able to make decisions on whether it's better to hang on to the G_SEXT_INREG
+// or to lower it and optimize the individual shifts.
+def G_SEXT_INREG : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, untyped_imm_0:$sz);
+ let hasSideEffects = 0;
+}
+
// Zero extend the underlying scalar type of an operation, putting zero bits
// into the newly-created space.
def G_ZEXT : GenericInstruction {
@@ -157,6 +173,12 @@ def G_BSWAP : GenericInstruction {
let hasSideEffects = 0;
}
+def G_BITREVERSE : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
def G_ADDRSPACE_CAST : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
@@ -175,6 +197,12 @@ def G_JUMP_TABLE : GenericInstruction {
let hasSideEffects = 0;
}
+def G_DYN_STACKALLOC : GenericInstruction {
+ let OutOperandList = (outs ptype0:$dst);
+ let InOperandList = (ins type1:$size, i32imm:$align);
+ let hasSideEffects = 1;
+}
+
//------------------------------------------------------------------------------
// Binary ops.
//------------------------------------------------------------------------------
@@ -598,6 +626,15 @@ def G_FMA : GenericInstruction {
let isCommutable = 0;
}
+/// Generic FP multiply and add. Perform a * b + c, while getting the
+/// same result as the separately rounded operations, unlike G_FMA.
+def G_FMAD : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
+ let hasSideEffects = 0;
+ let isCommutable = 0;
+}
+
// Generic FP division.
def G_FDIV : GenericInstruction {
let OutOperandList = (outs type0:$dst);
@@ -725,7 +762,11 @@ def G_INTRINSIC_ROUND : GenericInstruction {
// Memory ops
//------------------------------------------------------------------------------
-// Generic load. Expects a MachineMemOperand in addition to explicit operands.
+// Generic load. Expects a MachineMemOperand in addition to explicit
+// operands. If the result size is larger than the memory size, the
+// high bits are undefined. If the result is a vector type and larger
+// than the memory size, the high elements are undefined (i.e. this is
+// not a per-element, vector anyextload)
def G_LOAD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins ptype1:$addr);
@@ -749,6 +790,32 @@ def G_ZEXTLOAD : GenericInstruction {
let mayLoad = 1;
}
+// Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset.
+// If $am is 0 (post-indexed), then the value is loaded from $base; if $am is 1 (pre-indexed)
+// then the value is loaded from $newaddr.
+def G_INDEXED_LOAD : GenericInstruction {
+ let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
+ let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+}
+
+// Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD.
+def G_INDEXED_SEXTLOAD : GenericInstruction {
+ let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
+ let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+}
+
+// Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD.
+def G_INDEXED_ZEXTLOAD : GenericInstruction {
+ let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
+ let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+}
+
// Generic store. Expects a MachineMemOperand in addition to explicit operands.
def G_STORE : GenericInstruction {
let OutOperandList = (outs);
@@ -757,6 +824,15 @@ def G_STORE : GenericInstruction {
let mayStore = 1;
}
+// Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour.
+def G_INDEXED_STORE : GenericInstruction {
+ let OutOperandList = (outs ptype0:$newaddr);
+ let InOperandList = (ins type1:$src, ptype0:$base, ptype2:$offset,
+ unknown:$am);
+ let hasSideEffects = 0;
+ let mayStore = 1;
+}
+
// Generic atomic cmpxchg with internal success check. Expects a
// MachineMemOperand in addition to explicit operands.
def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction {
@@ -798,6 +874,8 @@ def G_ATOMICRMW_MAX : G_ATOMICRMW_OP;
def G_ATOMICRMW_MIN : G_ATOMICRMW_OP;
def G_ATOMICRMW_UMAX : G_ATOMICRMW_OP;
def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP;
+def G_ATOMICRMW_FADD : G_ATOMICRMW_OP;
+def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP;
def G_FENCE : GenericInstruction {
let OutOperandList = (outs);
@@ -947,9 +1025,12 @@ def G_EXTRACT_VECTOR_ELT : GenericInstruction {
}
// Generic shufflevector.
+//
+// The mask operand should be an IR Constant which exactly matches the
+// corresponding mask for the IR shufflevector instruction.
def G_SHUFFLE_VECTOR: GenericInstruction {
let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type1:$v1, type1:$v2, type2:$mask);
+ let InOperandList = (ins type1:$v1, type1:$v2, unknown:$mask);
let hasSideEffects = 0;
}
diff --git a/include/llvm/Target/GlobalISel/Combine.td b/include/llvm/Target/GlobalISel/Combine.td
new file mode 100644
index 000000000000..dcac399fd693
--- /dev/null
+++ b/include/llvm/Target/GlobalISel/Combine.td
@@ -0,0 +1,103 @@
+//===- Combine.td - Combine rule definitions ---------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Declare GlobalISel combine rules and provide mechanisms to opt-out.
+//
+//===----------------------------------------------------------------------===//
+
+// Common base class for GICombineRule and GICombineGroup.
+class GICombine {
+ // See GICombineGroup. We only declare it here to make the tablegen pass
+ // simpler.
+ list<GICombine> Rules = ?;
+}
+
+// A group of combine rules that can be added to a GICombiner or another group.
+class GICombineGroup<list<GICombine> rules> : GICombine {
+ // The rules contained in this group. The rules in a group are flattened into
+ // a single list and sorted into whatever order is most efficient. However,
+ // they will never be re-ordered such that behaviour differs from the
+ // specified order. It is therefore possible to use the order of rules in this
+ // list to describe priorities.
+ let Rules = rules;
+}
+
+// Declares a combiner helper class
+class GICombinerHelper<string classname, list<GICombine> rules>
+ : GICombineGroup<rules> {
+ // The class name to use in the generated output.
+ string Classname = classname;
+ // The name of a run-time compiler option that will be generated to disable
+ // specific rules within this combiner.
+ string DisableRuleOption = ?;
+}
+class GICombineRule<dag defs, dag match, dag apply> : GICombine {
+ /// Defines the external interface of the match rule. This includes:
+ /// * The names of the root nodes (requires at least one)
+ /// See GIDefKind for details.
+ dag Defs = defs;
+
+ /// Defines the things which must be true for the pattern to match
+ /// See GIMatchKind for details.
+ dag Match = match;
+
+ /// Defines the things which happen after the decision is made to apply a
+ /// combine rule.
+ /// See GIApplyKind for details.
+ dag Apply = apply;
+}
+
+/// The operator at the root of a GICombineRule.Defs dag.
+def defs;
+
+/// All arguments of the defs operator must be subclasses of GIDefKind or
+/// sub-dags whose operator is GIDefKindWithArgs.
+class GIDefKind;
+class GIDefKindWithArgs;
+/// Declare a root node. There must be at least one of these in every combine
+/// rule.
+/// TODO: The plan is to elide `root` definitions and determine it from the DAG
+/// itself with an overide for situations where the usual determination
+/// is incorrect.
+def root : GIDefKind;
+
+/// The operator at the root of a GICombineRule.Match dag.
+def match;
+/// All arguments of the match operator must be either:
+/// * A subclass of GIMatchKind
+/// * A subclass of GIMatchKindWithArgs
+/// * A MIR code block (deprecated)
+/// The GIMatchKind and GIMatchKindWithArgs cases are described in more detail
+/// in their definitions below.
+/// For the Instruction case, these are collected into a DAG where operand names
+/// that occur multiple times introduce edges.
+class GIMatchKind;
+class GIMatchKindWithArgs;
+
+/// The operator at the root of a GICombineRule.Apply dag.
+def apply;
+/// All arguments of the apply operator must be subclasses of GIApplyKind, or
+/// sub-dags whose operator is GIApplyKindWithArgs, or an MIR block
+/// (deprecated).
+class GIApplyKind;
+class GIApplyKindWithArgs;
+
+def copy_prop : GICombineRule<
+ (defs root:$d),
+ (match [{ return Helper.matchCombineCopy(${d}); }]),
+ (apply [{ Helper.applyCombineCopy(${d}); }])>;
+def trivial_combines : GICombineGroup<[copy_prop]>;
+
+// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of
+// all_combines because it wasn't there.
+def elide_br_by_inverting_cond : GICombineRule<
+ (defs root:$d),
+ (match [{ return Helper.matchElideBrByInvertingCond(${d}); }]),
+ (apply [{ Helper.applyElideBrByInvertingCond(${d}); }])>;
+
+def all_combines : GICombineGroup<[trivial_combines]>;
diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 6cc58d6521da..b846d2252b8d 100644
--- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -27,6 +27,7 @@ class GINodeEquiv<Instruction i, SDNode node> {
// (ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE) but GlobalISel
// stores this information in the MachineMemoryOperand.
bit CheckMMOIsNonAtomic = 0;
+ bit CheckMMOIsAtomic = 0;
// SelectionDAG has one node for all loads and uses predicates to
// differentiate them. GlobalISel on the other hand uses separate opcodes.
@@ -34,6 +35,10 @@ class GINodeEquiv<Instruction i, SDNode node> {
// depending on the predicates on the node.
Instruction IfSignExtend = ?;
Instruction IfZeroExtend = ?;
+
+ // SelectionDAG has one setcc for all compares. This differentiates
+ // for G_ICMP and G_FCMP.
+ Instruction IfFloatingPoint = ?;
}
// These are defined in the same order as the G_* instructions.
@@ -46,6 +51,7 @@ def : GINodeEquiv<G_BITCAST, bitconvert>;
// G_PTRTOINT - SelectionDAG has no equivalent.
def : GINodeEquiv<G_CONSTANT, imm>;
def : GINodeEquiv<G_FCONSTANT, fpimm>;
+def : GINodeEquiv<G_IMPLICIT_DEF, undef>;
def : GINodeEquiv<G_ADD, add>;
def : GINodeEquiv<G_SUB, sub>;
def : GINodeEquiv<G_MUL, mul>;
@@ -72,6 +78,7 @@ def : GINodeEquiv<G_UITOFP, uint_to_fp>;
def : GINodeEquiv<G_FADD, fadd>;
def : GINodeEquiv<G_FSUB, fsub>;
def : GINodeEquiv<G_FMA, fma>;
+def : GINodeEquiv<G_FMAD, fmad>;
def : GINodeEquiv<G_FMUL, fmul>;
def : GINodeEquiv<G_FDIV, fdiv>;
def : GINodeEquiv<G_FREM, frem>;
@@ -85,6 +92,7 @@ def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_w_chain>;
def : GINodeEquiv<G_BR, br>;
def : GINodeEquiv<G_BSWAP, bswap>;
+def : GINodeEquiv<G_BITREVERSE, bitreverse>;
def : GINodeEquiv<G_CTLZ, ctlz>;
def : GINodeEquiv<G_CTTZ, cttz>;
def : GINodeEquiv<G_CTLZ_ZERO_UNDEF, ctlz_zero_undef>;
@@ -100,10 +108,15 @@ def : GINodeEquiv<G_FSQRT, fsqrt>;
def : GINodeEquiv<G_FFLOOR, ffloor>;
def : GINodeEquiv<G_FRINT, frint>;
def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
+def : GINodeEquiv<G_FCOPYSIGN, fcopysign>;
def : GINodeEquiv<G_SMIN, smin>;
def : GINodeEquiv<G_SMAX, smax>;
def : GINodeEquiv<G_UMIN, umin>;
def : GINodeEquiv<G_UMAX, umax>;
+def : GINodeEquiv<G_FMINNUM, fminnum>;
+def : GINodeEquiv<G_FMAXNUM, fmaxnum>;
+def : GINodeEquiv<G_FMINNUM_IEEE, fminnum_ieee>;
+def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>;
// Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some
// complications that tablegen must take care of. For example, Predicates such
@@ -117,6 +130,11 @@ def : GINodeEquiv<G_LOAD, ld> {
let IfSignExtend = G_SEXTLOAD;
let IfZeroExtend = G_ZEXTLOAD;
}
+
+def : GINodeEquiv<G_ICMP, setcc> {
+ let IfFloatingPoint = G_FCMP;
+}
+
// Broadly speaking G_STORE is equivalent to ISD::STORE but there are some
// complications that tablegen must take care of. For example, predicates such
// as isTruncStore require that this is not a perfect 1:1 mapping since a
@@ -126,6 +144,11 @@ def : GINodeEquiv<G_LOAD, ld> {
// G_STORE with a non-atomic MachineMemOperand.
def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = 1; }
+def : GINodeEquiv<G_LOAD, atomic_load> {
+ let CheckMMOIsNonAtomic = 0;
+ let CheckMMOIsAtomic = 1;
+}
+
def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap>;
def : GINodeEquiv<G_ATOMICRMW_XCHG, atomic_swap>;
def : GINodeEquiv<G_ATOMICRMW_ADD, atomic_load_add>;
@@ -138,6 +161,8 @@ def : GINodeEquiv<G_ATOMICRMW_MIN, atomic_load_min>;
def : GINodeEquiv<G_ATOMICRMW_MAX, atomic_load_max>;
def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin>;
def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax>;
+def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd>;
+def : GINodeEquiv<G_ATOMICRMW_FSUB, atomic_load_fsub>;
def : GINodeEquiv<G_FENCE, atomic_fence>;
// Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern.
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index d58662e128e0..dd8679661b9a 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -351,7 +351,11 @@ def interleave;
// RegisterTuples instances can be used in other set operations to form
// register classes and so on. This is the only way of using the generated
// registers.
-class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs> {
+//
+// RegNames may be specified to supply asm names for the generated tuples.
+// If used must have the same size as the list of produced registers.
+class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs,
+ list<string> RegNames = []> {
// SubRegs - N lists of registers to be zipped up. Super-registers are
// synthesized from the first element of each SubRegs list, the second
// element and so on.
@@ -360,6 +364,9 @@ class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs> {
// SubRegIndices - N SubRegIndex instances. This provides the names of the
// sub-registers in the synthesized super-registers.
list<SubRegIndex> SubRegIndices = Indices;
+
+ // List of asm names for the generated tuple registers.
+ list<string> RegAsmNames = RegNames;
}
@@ -436,6 +443,15 @@ class InstructionEncoding {
bit hasCompleteDecoder = 1;
}
+// Allows specifying an InstructionEncoding by HwMode. If an Instruction specifies
+// an EncodingByHwMode, its Inst and Size members are ignored and Ts are used
+// to encode and decode based on HwMode.
+class EncodingByHwMode<list<HwMode> Ms = [], list<InstructionEncoding> Ts = []>
+ : HwModeSelect<Ms> {
+ // The length of this list must be the same as the length of Ms.
+ list<InstructionEncoding> Objects = Ts;
+}
+
//===----------------------------------------------------------------------===//
// Instruction set description - These classes correspond to the C++ classes in
// the Target/TargetInstrInfo.h file.
@@ -447,6 +463,10 @@ class Instruction : InstructionEncoding {
dag InOperandList; // An dag containing the MI use operand list.
string AsmString = ""; // The .s format to print the instruction with.
+ // Allows specifying a canonical InstructionEncoding by HwMode. If non-empty,
+ // the Inst member of this Instruction is ignored.
+ EncodingByHwMode EncodingInfos;
+
// Pattern - Set to the DAG pattern for this instruction, if we know of one,
// otherwise, uninitialized.
list<dag> Pattern;
@@ -472,6 +492,10 @@ class Instruction : InstructionEncoding {
// Added complexity passed onto matching pattern.
int AddedComplexity = 0;
+ // Indicates if this is a pre-isel opcode that should be
+ // legalized/regbankselected/selected.
+ bit isPreISelOpcode = 0;
+
// These bits capture information about the high-level semantics of the
// instruction.
bit isReturn = 0; // Is this instruction a return instruction?
@@ -834,6 +858,7 @@ def f64imm : Operand<f64>;
class TypedOperand<string Ty> : Operand<untyped> {
let OperandType = Ty;
bit IsPointer = 0;
+ bit IsImmediate = 0;
}
def type0 : TypedOperand<"OPERAND_GENERIC_0">;
@@ -852,6 +877,12 @@ let IsPointer = 1 in {
def ptype5 : TypedOperand<"OPERAND_GENERIC_5">;
}
+// untyped_imm is for operands where isImm() will be true. It currently has no
+// special behaviour and is only used for clarity.
+def untyped_imm_0 : TypedOperand<"OPERAND_GENERIC_IMM_0"> {
+ let IsImmediate = 1;
+}
+
/// zero_reg definition - Special node to stand for the zero register.
///
def zero_reg;
diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td
index 1bc03cf8a49d..7b1973cc3828 100644
--- a/include/llvm/Target/TargetCallingConv.td
+++ b/include/llvm/Target/TargetCallingConv.td
@@ -152,6 +152,12 @@ class CCBitConvertToType<ValueType destTy> : CCAction {
ValueType DestTy = destTy;
}
+/// CCTruncToType - If applied, this truncates the specified current value to
+/// the specified type.
+class CCTruncToType<ValueType destTy> : CCAction {
+ ValueType DestTy = destTy;
+}
+
/// CCPassIndirect - If applied, this stores the value to stack and passes the pointer
/// as normal argument.
class CCPassIndirect<ValueType destTy> : CCAction {
diff --git a/include/llvm/Target/TargetItinerary.td b/include/llvm/Target/TargetItinerary.td
index b68ed045520c..89e5abd947d0 100644
--- a/include/llvm/Target/TargetItinerary.td
+++ b/include/llvm/Target/TargetItinerary.td
@@ -127,6 +127,17 @@ class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp,
list<FuncUnit> FU = fu;
list<Bypass> BP = bp;
list<InstrItinData> IID = iid;
+ // The packetizer automaton to use for this itinerary. By default all
+ // itineraries for a target are bundled up into the same automaton. This only
+ // works correctly when there are no conflicts in functional unit IDs between
+ // itineraries. For example, given two itineraries A<[SLOT_A]>, B<[SLOT_B]>,
+ // SLOT_A and SLOT_B will be assigned the same functional unit index, and
+ // the generated packetizer will confuse instructions referencing these slots.
+ //
+ // To avoid this, setting PacketizerNamespace to non-"" will cause this
+ // itinerary to be generated in a different automaton. The subtarget will need
+ // to declare a method "create##Namespace##DFAPacketizer()".
+ string PacketizerNamespace = "";
}
// NoItineraries - A marker that can be used by processors without schedule
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 3a2497bff11e..d74341b23fb1 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -191,7 +191,8 @@ public:
}
/// Get the target specific PC relative GOT entry relocation
- virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ virtual const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV,
+ const MCSymbol *Sym,
const MCValue &MV,
int64_t Offset,
MachineModuleInfo *MMI,
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index cdf9f8bfd5ea..285c0ec0fb90 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -25,7 +25,7 @@ namespace llvm {
class Function;
class GlobalValue;
-class MachineModuleInfo;
+class MachineModuleInfoWrapperPass;
class Mangler;
class MCAsmInfo;
class MCContext;
@@ -284,12 +284,13 @@ public:
/// emitted. Typically this will involve several steps of code generation.
/// This method should return true if emission of this file type is not
/// supported, or false on success.
- /// \p MMI is an optional parameter that, if set to non-nullptr,
+ /// \p MMIWP is an optional parameter that, if set to non-nullptr,
/// will be used to set the MachineModuloInfo for this PM.
- virtual bool addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &,
- raw_pwrite_stream *, CodeGenFileType,
- bool /*DisableVerify*/ = true,
- MachineModuleInfo *MMI = nullptr) {
+ virtual bool
+ addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &,
+ raw_pwrite_stream *, CodeGenFileType,
+ bool /*DisableVerify*/ = true,
+ MachineModuleInfoWrapperPass *MMIWP = nullptr) {
return true;
}
@@ -341,12 +342,13 @@ public:
/// Add passes to the specified pass manager to get the specified file
/// emitted. Typically this will involve several steps of code generation.
- /// \p MMI is an optional parameter that, if set to non-nullptr,
- /// will be used to set the MachineModuloInfofor this PM.
- bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
- raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
- bool DisableVerify = true,
- MachineModuleInfo *MMI = nullptr) override;
+ /// \p MMIWP is an optional parameter that, if set to non-nullptr,
+ /// will be used to set the MachineModuloInfo for this PM.
+ bool
+ addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
+ raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
+ bool DisableVerify = true,
+ MachineModuleInfoWrapperPass *MMIWP = nullptr) override;
/// Add passes to the specified pass manager to get machine code emitted with
/// the MCJIT. This method returns true if machine code is not supported. It
@@ -365,7 +367,7 @@ public:
/// Adds an AsmPrinter pass to the pipeline that prints assembly or
/// machine code from the MI representation.
bool addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream &Out,
- raw_pwrite_stream *DwoOut, CodeGenFileType FileTYpe,
+ raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
MCContext &Context);
/// True if the target uses physical regs at Prolog/Epilog insertion
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index a36d259df831..24f37e94da91 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -563,10 +563,10 @@ class RetireControlUnit<int bufferSize, int retirePerCycle> {
// Base class for Load/StoreQueue. It is used to identify processor resources
// which describe load/store queues in the LS unit.
-class MemoryQueue<ProcResource PR> {
- ProcResource QueueDescriptor = PR;
+class MemoryQueue<ProcResourceKind PR> {
+ ProcResourceKind QueueDescriptor = PR;
SchedMachineModel SchedModel = ?;
}
-class LoadQueue<ProcResource LDQueue> : MemoryQueue<LDQueue>;
-class StoreQueue<ProcResource STQueue> : MemoryQueue<STQueue>;
+class LoadQueue<ProcResourceKind LDQueue> : MemoryQueue<LDQueue>;
+class StoreQueue<ProcResourceKind STQueue> : MemoryQueue<STQueue>;
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index b913a054ac2c..441f3d7d118d 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -137,9 +137,12 @@ def SDTFPSignOp : SDTypeProfile<1, 2, [ // fcopysign.
def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>
]>;
-def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz
+def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse
SDTCisSameAs<0, 1>, SDTCisInt<0>
]>;
+def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz
+ SDTCisInt<0>, SDTCisInt<1>
+]>;
def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext
SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>
]>;
@@ -239,6 +242,9 @@ def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract
def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert
SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3>
]>;
+def SDTVecReduce : SDTypeProfile<1, 1, [ // vector reduction
+ SDTCisInt<0>, SDTCisVec<1>
+]>;
def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract
SDTCisSubVecOfVec<0,1>, SDTCisInt<2>
@@ -393,6 +399,7 @@ def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>;
def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>;
def umulfix : SDNode<"ISD::UMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
+def umulfixsat : SDNode<"ISD::UMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>;
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
@@ -401,11 +408,11 @@ def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>;
def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>;
def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
-def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>;
-def cttz : SDNode<"ISD::CTTZ" , SDTIntUnaryOp>;
-def ctpop : SDNode<"ISD::CTPOP" , SDTIntUnaryOp>;
-def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntUnaryOp>;
-def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntUnaryOp>;
+def ctlz : SDNode<"ISD::CTLZ" , SDTIntBitCountUnaryOp>;
+def cttz : SDNode<"ISD::CTTZ" , SDTIntBitCountUnaryOp>;
+def ctpop : SDNode<"ISD::CTPOP" , SDTIntBitCountUnaryOp>;
+def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>;
+def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>;
def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>;
def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>;
def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
@@ -415,6 +422,12 @@ def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>;
def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>;
def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>;
+def vecreduce_add : SDNode<"ISD::VECREDUCE_ADD", SDTVecReduce>;
+def vecreduce_smax : SDNode<"ISD::VECREDUCE_SMAX", SDTVecReduce>;
+def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>;
+def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>;
+def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>;
+
def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;
def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
@@ -493,12 +506,20 @@ def strict_flog2 : SDNode<"ISD::STRICT_FLOG2",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_frint : SDNode<"ISD::STRICT_FRINT",
SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_lrint : SDNode<"ISD::STRICT_LRINT",
+ SDTFPToIntOp, [SDNPHasChain]>;
+def strict_llrint : SDNode<"ISD::STRICT_LLRINT",
+ SDTFPToIntOp, [SDNPHasChain]>;
def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fceil : SDNode<"ISD::STRICT_FCEIL",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR",
SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_lround : SDNode<"ISD::STRICT_LROUND",
+ SDTFPToIntOp, [SDNPHasChain]>;
+def strict_llround : SDNode<"ISD::STRICT_LLROUND",
+ SDTFPToIntOp, [SDNPHasChain]>;
def strict_fround : SDNode<"ISD::STRICT_FROUND",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC",
@@ -513,6 +534,10 @@ def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND",
SDTFPRoundOp, [SDNPHasChain]>;
def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND",
SDTFPExtendOp, [SDNPHasChain]>;
+def strict_fp_to_sint : SDNode<"ISD::STRICT_FP_TO_SINT",
+ SDTFPToIntOp, [SDNPHasChain]>;
+def strict_fp_to_uint : SDNode<"ISD::STRICT_FP_TO_UINT",
+ SDTFPToIntOp, [SDNPHasChain]>;
def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
def select : SDNode<"ISD::SELECT" , SDTSelect>;
@@ -638,16 +663,32 @@ def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
//===----------------------------------------------------------------------===//
// Selection DAG Condition Codes
-class CondCode; // ISD::CondCode enums
-def SETOEQ : CondCode; def SETOGT : CondCode;
-def SETOGE : CondCode; def SETOLT : CondCode; def SETOLE : CondCode;
-def SETONE : CondCode; def SETO : CondCode; def SETUO : CondCode;
-def SETUEQ : CondCode; def SETUGT : CondCode; def SETUGE : CondCode;
-def SETULT : CondCode; def SETULE : CondCode; def SETUNE : CondCode;
-
-def SETEQ : CondCode; def SETGT : CondCode; def SETGE : CondCode;
-def SETLT : CondCode; def SETLE : CondCode; def SETNE : CondCode;
-
+class CondCode<string fcmpName = "", string icmpName = ""> {
+ string ICmpPredicate = icmpName;
+ string FCmpPredicate = fcmpName;
+}
+
+// ISD::CondCode enums, and mapping to CmpInst::Predicate names
+def SETOEQ : CondCode<"FCMP_OEQ">;
+def SETOGT : CondCode<"FCMP_OGT">;
+def SETOGE : CondCode<"FCMP_OGE">;
+def SETOLT : CondCode<"FCMP_OLT">;
+def SETOLE : CondCode<"FCMP_OLE">;
+def SETONE : CondCode<"FCMP_ONE">;
+def SETO : CondCode<"FCMP_ORD">;
+def SETUO : CondCode<"FCMP_UNO">;
+def SETUEQ : CondCode<"FCMP_UEQ">;
+def SETUGT : CondCode<"FCMP_UGT", "ICMP_UGT">;
+def SETUGE : CondCode<"FCMP_UGE", "ICMP_UGE">;
+def SETULT : CondCode<"FCMP_ULT", "ICMP_ULT">;
+def SETULE : CondCode<"FCMP_ULE", "ICMP_ULE">;
+def SETUNE : CondCode<"FCMP_UNE">;
+def SETEQ : CondCode<"", "ICMP_EQ">;
+def SETGT : CondCode<"", "ICMP_SGT">;
+def SETGE : CondCode<"", "ICMP_SGE">;
+def SETLT : CondCode<"", "ICMP_SLT">;
+def SETLE : CondCode<"", "ICMP_SLE">;
+def SETNE : CondCode<"", "ICMP_NE">;
//===----------------------------------------------------------------------===//
// Selection DAG Node Transformation Functions.
@@ -741,6 +782,10 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
// If this empty, accept any address space.
list<int> AddressSpaces = ?;
+ // cast<MemSDNode>(N)->getAlignment() >=
+ // If this is empty, accept any alignment.
+ int MinAlignment = ?;
+
// cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Monotonic
bit IsAtomicOrderingMonotonic = ?;
// cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Acquire
@@ -766,8 +811,6 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
// cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::<VT>;
// cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::<VT>;
ValueType ScalarMemoryVT = ?;
-
- // TODO: Add alignment
}
// PatFrag - A version of PatFrags matching only a single fragment.
@@ -813,6 +856,11 @@ class ImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm,
bit IsAPFloat = 0;
}
+// Convenience wrapper for ImmLeaf to use timm/TargetConstant instead
+// of imm/Constant.
+class TImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm,
+ SDNode ImmNode = timm> : ImmLeaf<vt, pred, xform, ImmNode>;
+
// An ImmLeaf except that Imm is an APInt. This is useful when you need to
// zero-extend the immediate instead of sign-extend it.
//
@@ -1111,6 +1159,16 @@ def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
let IsStore = 1;
let MemoryVT = f32;
}
+def pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncst node:$val, node:$base, node:$offset)> {
+ let IsStore = 1;
+ let ScalarMemoryVT = i8;
+}
+def pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncst node:$val, node:$base, node:$offset)> {
+ let IsStore = 1;
+ let ScalarMemoryVT = i16;
+}
def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(istore node:$val, node:$ptr, node:$offset), [{
@@ -1148,14 +1206,26 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
let IsStore = 1;
let MemoryVT = f32;
}
+def post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (post_truncst node:$val, node:$base, node:$offset)> {
+ let IsStore = 1;
+ let ScalarMemoryVT = i8;
+}
+def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (post_truncst node:$val, node:$base, node:$offset)> {
+ let IsStore = 1;
+ let ScalarMemoryVT = i16;
+}
-def nonvolatile_load : PatFrag<(ops node:$ptr),
- (load node:$ptr), [{
- return !cast<LoadSDNode>(N)->isVolatile();
+// TODO: Split these into volatile and unordered flavors to enable
+// selectively legal optimizations for each. (See D66309)
+def simple_load : PatFrag<(ops node:$ptr),
+ (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->isSimple();
}]>;
-def nonvolatile_store : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
- return !cast<StoreSDNode>(N)->isVolatile();
+def simple_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->isSimple();
}]>;
// nontemporal store fragments.
@@ -1277,6 +1347,12 @@ def any_flog2 : PatFrags<(ops node:$src),
def any_frint : PatFrags<(ops node:$src),
[(strict_frint node:$src),
(frint node:$src)]>;
+def any_lrint : PatFrags<(ops node:$src),
+ [(strict_lrint node:$src),
+ (lrint node:$src)]>;
+def any_llrint : PatFrags<(ops node:$src),
+ [(strict_llrint node:$src),
+ (llrint node:$src)]>;
def any_fnearbyint : PatFrags<(ops node:$src),
[(strict_fnearbyint node:$src),
(fnearbyint node:$src)]>;
@@ -1286,6 +1362,12 @@ def any_fceil : PatFrags<(ops node:$src),
def any_ffloor : PatFrags<(ops node:$src),
[(strict_ffloor node:$src),
(ffloor node:$src)]>;
+def any_lround : PatFrags<(ops node:$src),
+ [(strict_lround node:$src),
+ (lround node:$src)]>;
+def any_llround : PatFrags<(ops node:$src),
+ [(strict_llround node:$src),
+ (llround node:$src)]>;
def any_fround : PatFrags<(ops node:$src),
[(strict_fround node:$src),
(fround node:$src)]>;
@@ -1310,6 +1392,12 @@ def any_extloadf32 : PatFrags<(ops node:$ptr),
def any_extloadf64 : PatFrags<(ops node:$ptr),
[(strict_extloadf64 node:$ptr),
(extloadf64 node:$ptr)]>;
+def any_fp_to_sint : PatFrags<(ops node:$src),
+ [(strict_fp_to_sint node:$src),
+ (fp_to_sint node:$src)]>;
+def any_fp_to_uint : PatFrags<(ops node:$src),
+ [(strict_fp_to_uint node:$src),
+ (fp_to_uint node:$src)]>;
multiclass binary_atomic_op_ord<SDNode atomic_op> {
def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
@@ -1367,26 +1455,26 @@ multiclass ternary_atomic_op_ord<SDNode atomic_op> {
}
}
-multiclass binary_atomic_op<SDNode atomic_op> {
+multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
def _8 : PatFrag<(ops node:$ptr, node:$val),
(atomic_op node:$ptr, node:$val)> {
let IsAtomic = 1;
- let MemoryVT = i8;
+ let MemoryVT = !if(IsInt, i8, ?);
}
def _16 : PatFrag<(ops node:$ptr, node:$val),
(atomic_op node:$ptr, node:$val)> {
let IsAtomic = 1;
- let MemoryVT = i16;
+ let MemoryVT = !if(IsInt, i16, f16);
}
def _32 : PatFrag<(ops node:$ptr, node:$val),
(atomic_op node:$ptr, node:$val)> {
let IsAtomic = 1;
- let MemoryVT = i32;
+ let MemoryVT = !if(IsInt, i32, f32);
}
def _64 : PatFrag<(ops node:$ptr, node:$val),
(atomic_op node:$ptr, node:$val)> {
let IsAtomic = 1;
- let MemoryVT = i64;
+ let MemoryVT = !if(IsInt, i64, f64);
}
defm NAME#_8 : binary_atomic_op_ord<atomic_op>;
diff --git a/include/llvm/TextAPI/MachO/Architecture.h b/include/llvm/TextAPI/MachO/Architecture.h
index 055baeb0c0f0..3898cbada68f 100644
--- a/include/llvm/TextAPI/MachO/Architecture.h
+++ b/include/llvm/TextAPI/MachO/Architecture.h
@@ -14,6 +14,7 @@
#define LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -39,6 +40,9 @@ StringRef getArchitectureName(Architecture Arch);
/// Convert an architecture slice to a CPU Type and Subtype pair.
std::pair<uint32_t, uint32_t> getCPUTypeFromArchitecture(Architecture Arch);
+/// Convert a target to an architecture slice.
+Architecture mapToArchitecture(const llvm::Triple &Target);
+
raw_ostream &operator<<(raw_ostream &OS, Architecture Arch);
} // end namespace MachO.
diff --git a/include/llvm/TextAPI/MachO/ArchitectureSet.h b/include/llvm/TextAPI/MachO/ArchitectureSet.h
index d8dfc7f1af21..6e4ede6275b4 100644
--- a/include/llvm/TextAPI/MachO/ArchitectureSet.h
+++ b/include/llvm/TextAPI/MachO/ArchitectureSet.h
@@ -59,6 +59,10 @@ public:
ArchSetType rawValue() const { return ArchSet; }
+ bool hasX86() const {
+ return has(AK_i386) || has(AK_x86_64) || has(AK_x86_64h);
+ }
+
template <typename Ty>
class arch_iterator
: public std::iterator<std::forward_iterator_tag, Architecture, size_t> {
diff --git a/include/llvm/TextAPI/MachO/InterfaceFile.h b/include/llvm/TextAPI/MachO/InterfaceFile.h
index e722449d52f1..bd434e04b693 100644
--- a/include/llvm/TextAPI/MachO/InterfaceFile.h
+++ b/include/llvm/TextAPI/MachO/InterfaceFile.h
@@ -26,21 +26,13 @@
#include "llvm/TextAPI/MachO/Architecture.h"
#include "llvm/TextAPI/MachO/ArchitectureSet.h"
#include "llvm/TextAPI/MachO/PackedVersion.h"
+#include "llvm/TextAPI/MachO/Platform.h"
#include "llvm/TextAPI/MachO/Symbol.h"
+#include "llvm/TextAPI/MachO/Target.h"
namespace llvm {
namespace MachO {
-/// Defines the list of MachO platforms.
-enum class PlatformKind : unsigned {
- unknown,
- macOS = MachO::PLATFORM_MACOS,
- iOS = MachO::PLATFORM_IOS,
- tvOS = MachO::PLATFORM_TVOS,
- watchOS = MachO::PLATFORM_WATCHOS,
- bridgeOS = MachO::PLATFORM_BRIDGEOS,
-};
-
/// Defines a list of Objective-C constraints.
enum class ObjCConstraintType : unsigned {
/// No constraint.
@@ -75,6 +67,9 @@ enum FileType : unsigned {
/// Text-based stub file (.tbd) version 3.0
TBD_V3 = 1U << 2,
+ /// Text-based stub file (.tbd) version 4.0
+ TBD_V4 = 1U << 3,
+
All = ~0U,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/All),
@@ -89,29 +84,42 @@ public:
InterfaceFileRef(StringRef InstallName) : InstallName(InstallName) {}
- InterfaceFileRef(StringRef InstallName, ArchitectureSet Archs)
- : InstallName(InstallName), Architectures(Archs) {}
+ InterfaceFileRef(StringRef InstallName, const TargetList Targets)
+ : InstallName(InstallName), Targets(std::move(Targets)) {}
StringRef getInstallName() const { return InstallName; };
- void addArchitectures(ArchitectureSet Archs) { Architectures |= Archs; }
- ArchitectureSet getArchitectures() const { return Architectures; }
- bool hasArchitecture(Architecture Arch) const {
- return Architectures.has(Arch);
+
+ void addTarget(const Target &Target);
+ template <typename RangeT> void addTargets(RangeT &&Targets) {
+ for (const auto &Target : Targets)
+ addTarget(Target(Target));
}
+ using const_target_iterator = TargetList::const_iterator;
+ using const_target_range = llvm::iterator_range<const_target_iterator>;
+ const_target_range targets() const { return {Targets}; }
+
+ ArchitectureSet getArchitectures() const {
+ return mapToArchitectureSet(Targets);
+ }
+
+ PlatformSet getPlatforms() const { return mapToPlatformSet(Targets); }
+
bool operator==(const InterfaceFileRef &O) const {
- return std::tie(InstallName, Architectures) ==
- std::tie(O.InstallName, O.Architectures);
+ return std::tie(InstallName, Targets) == std::tie(O.InstallName, O.Targets);
+ }
+
+ bool operator!=(const InterfaceFileRef &O) const {
+ return std::tie(InstallName, Targets) != std::tie(O.InstallName, O.Targets);
}
bool operator<(const InterfaceFileRef &O) const {
- return std::tie(InstallName, Architectures) <
- std::tie(O.InstallName, O.Architectures);
+ return std::tie(InstallName, Targets) < std::tie(O.InstallName, O.Targets);
}
private:
std::string InstallName;
- ArchitectureSet Architectures;
+ TargetList Targets;
};
} // end namespace MachO.
@@ -170,27 +178,43 @@ public:
/// \return The file type.
FileType getFileType() const { return FileKind; }
- /// Set the platform.
- void setPlatform(PlatformKind Platform_) { Platform = Platform_; }
+ /// Get the architectures.
+ ///
+ /// \return The applicable architectures.
+ ArchitectureSet getArchitectures() const {
+ return mapToArchitectureSet(Targets);
+ }
- /// Get the platform.
- PlatformKind getPlatform() const { return Platform; }
+ /// Get the platforms.
+ ///
+ /// \return The applicable platforms.
+ PlatformSet getPlatforms() const { return mapToPlatformSet(Targets); }
- /// Specify the set of supported architectures by this file.
- void setArchitectures(ArchitectureSet Architectures_) {
- Architectures = Architectures_;
- }
+ /// Set and add target.
+ ///
+ /// \param Target the target to add into.
+ void addTarget(const Target &Target);
- /// Add the set of supported architectures by this file.
- void addArchitectures(ArchitectureSet Architectures_) {
- Architectures |= Architectures_;
+ /// Set and add targets.
+ ///
+ /// Add the subset of llvm::triples that is supported by Tapi
+ ///
+ /// \param Targets the collection of targets.
+ template <typename RangeT> void addTargets(RangeT &&Targets) {
+ for (const auto &Target_ : Targets)
+ addTarget(Target(Target_));
}
- /// Add supported architecture by this file..
- void addArch(Architecture Arch) { Architectures.set(Arch); }
+ using const_target_iterator = TargetList::const_iterator;
+ using const_target_range = llvm::iterator_range<const_target_iterator>;
+ const_target_range targets() const { return {Targets}; }
- /// Get the set of supported architectures.
- ArchitectureSet getArchitectures() const { return Architectures; }
+ using const_filtered_target_iterator =
+ llvm::filter_iterator<const_target_iterator,
+ std::function<bool(const Target &)>>;
+ using const_filtered_target_range =
+ llvm::iterator_range<const_filtered_target_iterator>;
+ const_filtered_target_range targets(ArchitectureSet Archs) const;
/// Set the install name of the library.
void setInstallName(StringRef InstallName_) { InstallName = InstallName_; }
@@ -244,11 +268,18 @@ public:
/// Check if this file was generated during InstallAPI.
bool isInstallAPI() const { return IsInstallAPI; }
- /// Set the parent umbrella framework.
- void setParentUmbrella(StringRef Parent) { ParentUmbrella = Parent; }
+ /// Set the parent umbrella frameworks.
+ /// \param Target_ The target applicable to Parent
+ /// \param Parent The name of Parent
+ void addParentUmbrella(const Target &Target_, StringRef Parent);
+ const std::vector<std::pair<Target, std::string>> &umbrellas() const {
+ return ParentUmbrellas;
+ }
/// Get the parent umbrella framework.
- StringRef getParentUmbrella() const { return ParentUmbrella; }
+ const std::vector<std::pair<Target, std::string>> getParentUmbrellas() const {
+ return ParentUmbrellas;
+ }
/// Add an allowable client.
///
@@ -257,9 +288,9 @@ public:
/// that is being generated needs to match one of the allowable clients or the
/// linker refuses to link this library.
///
- /// \param Name The name of the client that is allowed to link this library.
- /// \param Architectures The set of architecture for which this applies.
- void addAllowableClient(StringRef Name, ArchitectureSet Architectures);
+ /// \param InstallName The name of the client that is allowed to link this library.
+ /// \param Target The target triple for which this applies.
+ void addAllowableClient(StringRef InstallName, const Target &Target);
/// Get the list of allowable clients.
///
@@ -271,9 +302,8 @@ public:
/// Add a re-exported library.
///
/// \param InstallName The name of the library to re-export.
- /// \param Architectures The set of architecture for which this applies.
- void addReexportedLibrary(StringRef InstallName,
- ArchitectureSet Architectures);
+ /// \param Target The target triple for which this applies.
+ void addReexportedLibrary(StringRef InstallName, const Target &Target);
/// Get the list of re-exported libraries.
///
@@ -282,27 +312,27 @@ public:
return ReexportedLibraries;
}
- /// Add an architecture/UUID pair.
+ /// Add an Target/UUID pair.
///
- /// \param Arch The architecture for which this applies.
+ /// \param Target The target triple for which this applies.
/// \param UUID The UUID of the library for the specified architecture.
- void addUUID(Architecture Arch, StringRef UUID);
+ void addUUID(const Target &Target, StringRef UUID);
- /// Add an architecture/UUID pair.
+ /// Add an Target/UUID pair.
///
- /// \param Arch The architecture for which this applies.
+ /// \param Target The target triple for which this applies.
/// \param UUID The UUID of the library for the specified architecture.
- void addUUID(Architecture Arch, uint8_t UUID[16]);
+ void addUUID(const Target &Target, uint8_t UUID[16]);
- /// Get the list of architecture/UUID pairs.
+ /// Get the list of Target/UUID pairs.
///
- /// \return Returns a list of architecture/UUID pairs.
- const std::vector<std::pair<Architecture, std::string>> &uuids() const {
+ /// \return Returns a list of Target/UUID pairs.
+ const std::vector<std::pair<Target, std::string>> &uuids() const {
return UUIDs;
}
/// Add a symbol to the symbols list or extend an existing one.
- void addSymbol(SymbolKind Kind, StringRef Name, ArchitectureSet Architectures,
+ void addSymbol(SymbolKind Kind, StringRef Name, const TargetList &Targets,
SymbolFlags Flags = SymbolFlags::None);
using SymbolMapType = DenseMap<SymbolsMapKey, Symbol *>;
@@ -320,84 +350,35 @@ public:
reference operator*() const { return I->second; }
pointer operator->() const { return I->second; }
};
- using const_symbol_range = iterator_range<const_symbol_iterator>;
-
- // Custom iterator to return only exported symbols.
- struct const_export_iterator
- : public iterator_adaptor_base<
- const_export_iterator, const_symbol_iterator,
- std::forward_iterator_tag, const Symbol *> {
- const_symbol_iterator _end;
-
- void skipToNextSymbol() {
- while (I != _end && I->isUndefined())
- ++I;
- }
-
- const_export_iterator() = default;
- template <typename U>
- const_export_iterator(U &&it, U &&end)
- : iterator_adaptor_base(std::forward<U &&>(it)),
- _end(std::forward<U &&>(end)) {
- skipToNextSymbol();
- }
-
- const_export_iterator &operator++() {
- ++I;
- skipToNextSymbol();
- return *this;
- }
-
- const_export_iterator operator++(int) {
- const_export_iterator tmp(*this);
- ++(*this);
- return tmp;
- }
- };
- using const_export_range = llvm::iterator_range<const_export_iterator>;
-
- // Custom iterator to return only undefined symbols.
- struct const_undefined_iterator
- : public iterator_adaptor_base<
- const_undefined_iterator, const_symbol_iterator,
- std::forward_iterator_tag, const Symbol *> {
- const_symbol_iterator _end;
- void skipToNextSymbol() {
- while (I != _end && !I->isUndefined())
- ++I;
- }
+ using const_symbol_range = iterator_range<const_symbol_iterator>;
- const_undefined_iterator() = default;
- template <typename U>
- const_undefined_iterator(U &&it, U &&end)
- : iterator_adaptor_base(std::forward<U &&>(it)),
- _end(std::forward<U &&>(end)) {
- skipToNextSymbol();
- }
-
- const_undefined_iterator &operator++() {
- ++I;
- skipToNextSymbol();
- return *this;
- }
-
- const_undefined_iterator operator++(int) {
- const_undefined_iterator tmp(*this);
- ++(*this);
- return tmp;
- }
- };
- using const_undefined_range = llvm::iterator_range<const_undefined_iterator>;
+ using const_filtered_symbol_iterator =
+ filter_iterator<const_symbol_iterator,
+ std::function<bool(const Symbol *)>>;
+ using const_filtered_symbol_range =
+ iterator_range<const_filtered_symbol_iterator>;
const_symbol_range symbols() const {
return {Symbols.begin(), Symbols.end()};
}
- const_export_range exports() const {
- return {{Symbols.begin(), Symbols.end()}, {Symbols.end(), Symbols.end()}};
+
+ const_filtered_symbol_range exports() const {
+ std::function<bool(const Symbol *)> fn = [](const Symbol *Symbol) {
+ return !Symbol->isUndefined();
+ };
+ return make_filter_range(
+ make_range<const_symbol_iterator>({Symbols.begin()}, {Symbols.end()}),
+ fn);
}
- const_undefined_range undefineds() const {
- return {{Symbols.begin(), Symbols.end()}, {Symbols.end(), Symbols.end()}};
+
+ const_filtered_symbol_range undefineds() const {
+ std::function<bool(const Symbol *)> fn = [](const Symbol *Symbol) {
+ return Symbol->isUndefined();
+ };
+ return make_filter_range(
+ make_range<const_symbol_iterator>({Symbols.begin()}, {Symbols.end()}),
+ fn);
}
private:
@@ -411,10 +392,9 @@ private:
return StringRef(reinterpret_cast<const char *>(Ptr), String.size());
}
+ TargetList Targets;
std::string Path;
FileType FileKind;
- PlatformKind Platform;
- ArchitectureSet Architectures;
std::string InstallName;
PackedVersion CurrentVersion;
PackedVersion CompatibilityVersion;
@@ -423,10 +403,10 @@ private:
bool IsAppExtensionSafe{false};
bool IsInstallAPI{false};
ObjCConstraintType ObjcConstraint = ObjCConstraintType::None;
- std::string ParentUmbrella;
+ std::vector<std::pair<Target, std::string>> ParentUmbrellas;
std::vector<InterfaceFileRef> AllowableClients;
std::vector<InterfaceFileRef> ReexportedLibraries;
- std::vector<std::pair<Architecture, std::string>> UUIDs;
+ std::vector<std::pair<Target, std::string>> UUIDs;
SymbolMapType Symbols;
};
diff --git a/include/llvm/TextAPI/MachO/Platform.h b/include/llvm/TextAPI/MachO/Platform.h
new file mode 100644
index 000000000000..a22aae9b7dce
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/Platform.h
@@ -0,0 +1,45 @@
+//===- llvm/TextAPI/MachO/Platform.h - Platform -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the Platforms supported by Tapi and helpers.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TEXTAPI_MACHO_PLATFORM_H
+#define LLVM_TEXTAPI_MACHO_PLATFORM_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/BinaryFormat/MachO.h"
+
+namespace llvm {
+namespace MachO {
+
+/// Defines the list of MachO platforms.
+enum class PlatformKind : unsigned {
+ unknown,
+ macOS = MachO::PLATFORM_MACOS,
+ iOS = MachO::PLATFORM_IOS,
+ tvOS = MachO::PLATFORM_TVOS,
+ watchOS = MachO::PLATFORM_WATCHOS,
+ bridgeOS = MachO::PLATFORM_BRIDGEOS,
+ macCatalyst = MachO::PLATFORM_MACCATALYST,
+ iOSSimulator = MachO::PLATFORM_IOSSIMULATOR,
+ tvOSSimulator = MachO::PLATFORM_TVOSSIMULATOR,
+ watchOSSimulator = MachO::PLATFORM_WATCHOSSIMULATOR
+};
+
+using PlatformSet = SmallSet<PlatformKind, 3>;
+
+PlatformKind mapToPlatformKind(PlatformKind Platform, bool WantSim);
+PlatformKind mapToPlatformKind(const Triple &Target);
+PlatformSet mapToPlatformSet(ArrayRef<Triple> Targets);
+StringRef getPlatformName(PlatformKind Platform);
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H \ No newline at end of file
diff --git a/include/llvm/TextAPI/MachO/Symbol.h b/include/llvm/TextAPI/MachO/Symbol.h
index 3c7ff5e0f4ea..1b1632c599c4 100644
--- a/include/llvm/TextAPI/MachO/Symbol.h
+++ b/include/llvm/TextAPI/MachO/Symbol.h
@@ -14,6 +14,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+#include "llvm/TextAPI/MachO/Target.h"
namespace llvm {
namespace MachO {
@@ -37,7 +38,10 @@ enum class SymbolFlags : uint8_t {
/// Undefined
Undefined = 1U << 3,
- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Undefined),
+ /// Rexported
+ Rexported = 1U << 4,
+
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Rexported),
};
// clang-format on
@@ -49,16 +53,18 @@ enum class SymbolKind : uint8_t {
ObjectiveCInstanceVariable,
};
+using TargetList = SmallVector<Target, 5>;
class Symbol {
public:
- constexpr Symbol(SymbolKind Kind, StringRef Name,
- ArchitectureSet Architectures, SymbolFlags Flags)
- : Name(Name), Architectures(Architectures), Kind(Kind), Flags(Flags) {}
+ Symbol(SymbolKind Kind, StringRef Name, TargetList Targets, SymbolFlags Flags)
+ : Name(Name), Targets(std::move(Targets)), Kind(Kind), Flags(Flags) {}
+ void addTarget(Target target) { Targets.emplace_back(target); }
SymbolKind getKind() const { return Kind; }
StringRef getName() const { return Name; }
- ArchitectureSet getArchitectures() const { return Architectures; }
- void addArchitectures(ArchitectureSet Archs) { Architectures |= Archs; }
+ ArchitectureSet getArchitectures() const {
+ return mapToArchitectureSet(Targets);
+ }
SymbolFlags getFlags() const { return Flags; }
bool isWeakDefined() const {
@@ -78,6 +84,21 @@ public:
return (Flags & SymbolFlags::Undefined) == SymbolFlags::Undefined;
}
+ bool isReexported() const {
+ return (Flags & SymbolFlags::Rexported) == SymbolFlags::Rexported;
+ }
+
+ using const_target_iterator = TargetList::const_iterator;
+ using const_target_range = llvm::iterator_range<const_target_iterator>;
+ const_target_range targets() const { return {Targets}; }
+
+ using const_filtered_target_iterator =
+ llvm::filter_iterator<const_target_iterator,
+ std::function<bool(const Target &)>>;
+ using const_filtered_target_range =
+ llvm::iterator_range<const_filtered_target_iterator>;
+ const_filtered_target_range targets(ArchitectureSet architectures) const;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump(raw_ostream &OS) const;
void dump() const { dump(llvm::errs()); }
@@ -85,7 +106,7 @@ public:
private:
StringRef Name;
- ArchitectureSet Architectures;
+ TargetList Targets;
SymbolKind Kind;
SymbolFlags Flags;
};
diff --git a/include/llvm/TextAPI/MachO/Target.h b/include/llvm/TextAPI/MachO/Target.h
new file mode 100644
index 000000000000..5fe44cb7d366
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/Target.h
@@ -0,0 +1,68 @@
+//===- llvm/TextAPI/Target.h - TAPI Target ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_TARGET_H
+#define LLVM_TEXTAPI_MACHO_TARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Error.h"
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+#include "llvm/TextAPI/MachO/Platform.h"
+
+namespace llvm {
+namespace MachO {
+
+// This is similar to a llvm Triple, but the triple doesn't have all the
+// information we need. For example there is no enum value for x86_64h. The
+// only way to get that information is to parse the triple string.
+class Target {
+public:
+ Target() = default;
+ Target(Architecture Arch, PlatformKind Platform)
+ : Arch(Arch), Platform(Platform) {}
+ explicit Target(const llvm::Triple &Triple)
+ : Arch(mapToArchitecture(Triple)), Platform(mapToPlatformKind(Triple)) {}
+
+ static llvm::Expected<Target> create(StringRef Target);
+
+ operator std::string() const;
+
+ Architecture Arch;
+ PlatformKind Platform;
+};
+
+inline bool operator==(const Target &LHS, const Target &RHS) {
+ return std::tie(LHS.Arch, LHS.Platform) == std::tie(RHS.Arch, RHS.Platform);
+}
+
+inline bool operator!=(const Target &LHS, const Target &RHS) {
+ return std::tie(LHS.Arch, LHS.Platform) != std::tie(RHS.Arch, RHS.Platform);
+}
+
+inline bool operator<(const Target &LHS, const Target &RHS) {
+ return std::tie(LHS.Arch, LHS.Platform) < std::tie(RHS.Arch, RHS.Platform);
+}
+
+inline bool operator==(const Target &LHS, const Architecture &RHS) {
+ return LHS.Arch == RHS;
+}
+
+inline bool operator!=(const Target &LHS, const Architecture &RHS) {
+ return LHS.Arch != RHS;
+}
+
+PlatformSet mapToPlatformSet(ArrayRef<Target> Targets);
+ArchitectureSet mapToArchitectureSet(ArrayRef<Target> Targets);
+
+raw_ostream &operator<<(raw_ostream &OS, const Target &Target);
+
+} // namespace MachO
+} // namespace llvm
+
+#endif // LLVM_TEXTAPI_MACHO_TARGET_H
diff --git a/include/llvm/TextAPI/MachO/TextAPIReader.h b/include/llvm/TextAPI/MachO/TextAPIReader.h
index 6d9c09de5294..c551f0454e8e 100644
--- a/include/llvm/TextAPI/MachO/TextAPIReader.h
+++ b/include/llvm/TextAPI/MachO/TextAPIReader.h
@@ -20,10 +20,7 @@ class InterfaceFile;
class TextAPIReader {
public:
static Expected<std::unique_ptr<InterfaceFile>>
- get(std::unique_ptr<MemoryBuffer> InputBuffer);
-
- static Expected<std::unique_ptr<InterfaceFile>>
- getUnmanaged(llvm::MemoryBuffer *InputBuffer);
+ get(MemoryBufferRef InputBuffer);
TextAPIReader() = delete;
};
diff --git a/include/llvm/Transforms/IPO/Attributor.h b/include/llvm/Transforms/IPO/Attributor.h
index 5dbe21ac5e4e..3dbe0fcd76ea 100644
--- a/include/llvm/Transforms/IPO/Attributor.h
+++ b/include/llvm/Transforms/IPO/Attributor.h
@@ -60,13 +60,12 @@
// manifest their result in the IR for passes to come.
//
// Attribute manifestation is not mandatory. If desired, there is support to
-// generate a single LLVM-IR attribute already in the AbstractAttribute base
-// class. In the simplest case, a subclass overloads
-// `AbstractAttribute::getManifestPosition()` and
-// `AbstractAttribute::getAttrKind()` to return the appropriate values. The
-// Attributor manifestation framework will then create and place a new attribute
-// if it is allowed to do so (based on the abstract state). Other use cases can
-// be achieved by overloading other abstract attribute methods.
+// generate a single or multiple LLVM-IR attributes already in the helper struct
+// IRAttribute. In the simplest case, a subclass inherits from IRAttribute with
+// a proper Attribute::AttrKind as template parameter. The Attributor
+// manifestation framework will then create and place a new attribute if it is
+// allowed to do so (based on the abstract state). Other use cases can be
+// achieved by overloading AbstractAttribute or IRAttribute methods.
//
//
// The "mechanics" of adding a new "abstract attribute":
@@ -97,7 +96,13 @@
#ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
#define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
-#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/PassManager.h"
@@ -105,6 +110,7 @@ namespace llvm {
struct AbstractAttribute;
struct InformationCache;
+struct AAIsDead;
class Function;
@@ -120,6 +126,563 @@ ChangeStatus operator|(ChangeStatus l, ChangeStatus r);
ChangeStatus operator&(ChangeStatus l, ChangeStatus r);
///}
+/// Helper to describe and deal with positions in the LLVM-IR.
+///
+/// A position in the IR is described by an anchor value and an "offset" that
+/// could be the argument number, for call sites and arguments, or an indicator
+/// of the "position kind". The kinds, specified in the Kind enum below, include
+/// the locations in the attribute list, i.a., function scope and return value,
+/// as well as a distinction between call sites and functions. Finally, there
+/// are floating values that do not have a corresponding attribute list
+/// position.
+struct IRPosition {
+ virtual ~IRPosition() {}
+
+ /// The positions we distinguish in the IR.
+ ///
+ /// The values are chosen such that the KindOrArgNo member has a value >= 1
+ /// if it is an argument or call site argument while a value < 1 indicates the
+ /// respective kind of that value.
+ enum Kind : int {
+ IRP_INVALID = -6, ///< An invalid position.
+ IRP_FLOAT = -5, ///< A position that is not associated with a spot suitable
+ ///< for attributes. This could be any value or instruction.
+ IRP_RETURNED = -4, ///< An attribute for the function return value.
+ IRP_CALL_SITE_RETURNED = -3, ///< An attribute for a call site return value.
+ IRP_FUNCTION = -2, ///< An attribute for a function (scope).
+ IRP_CALL_SITE = -1, ///< An attribute for a call site (function scope).
+ IRP_ARGUMENT = 0, ///< An attribute for a function argument.
+ IRP_CALL_SITE_ARGUMENT = 1, ///< An attribute for a call site argument.
+ };
+
+ /// Default constructor available to create invalid positions implicitly. All
+ /// other positions need to be created explicitly through the appropriate
+ /// static member function.
+ IRPosition() : AnchorVal(nullptr), KindOrArgNo(IRP_INVALID) { verify(); }
+
+ /// Create a position describing the value of \p V.
+ static const IRPosition value(const Value &V) {
+ if (auto *Arg = dyn_cast<Argument>(&V))
+ return IRPosition::argument(*Arg);
+ if (auto *CB = dyn_cast<CallBase>(&V))
+ return IRPosition::callsite_returned(*CB);
+ return IRPosition(const_cast<Value &>(V), IRP_FLOAT);
+ }
+
+ /// Create a position describing the function scope of \p F.
+ static const IRPosition function(const Function &F) {
+ return IRPosition(const_cast<Function &>(F), IRP_FUNCTION);
+ }
+
+ /// Create a position describing the returned value of \p F.
+ static const IRPosition returned(const Function &F) {
+ return IRPosition(const_cast<Function &>(F), IRP_RETURNED);
+ }
+
+ /// Create a position describing the argument \p Arg.
+ static const IRPosition argument(const Argument &Arg) {
+ return IRPosition(const_cast<Argument &>(Arg), Kind(Arg.getArgNo()));
+ }
+
+ /// Create a position describing the function scope of \p CB.
+ static const IRPosition callsite_function(const CallBase &CB) {
+ return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE);
+ }
+
+ /// Create a position describing the returned value of \p CB.
+ static const IRPosition callsite_returned(const CallBase &CB) {
+ return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE_RETURNED);
+ }
+
+ /// Create a position describing the argument of \p CB at position \p ArgNo.
+ static const IRPosition callsite_argument(const CallBase &CB,
+ unsigned ArgNo) {
+ return IRPosition(const_cast<CallBase &>(CB), Kind(ArgNo));
+ }
+
+ /// Create a position describing the function scope of \p ICS.
+ static const IRPosition callsite_function(ImmutableCallSite ICS) {
+ return IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction()));
+ }
+
+ /// Create a position describing the returned value of \p ICS.
+ static const IRPosition callsite_returned(ImmutableCallSite ICS) {
+ return IRPosition::callsite_returned(cast<CallBase>(*ICS.getInstruction()));
+ }
+
+ /// Create a position describing the argument of \p ICS at position \p ArgNo.
+ static const IRPosition callsite_argument(ImmutableCallSite ICS,
+ unsigned ArgNo) {
+ return IRPosition::callsite_argument(cast<CallBase>(*ICS.getInstruction()),
+ ArgNo);
+ }
+
+ /// Create a position describing the argument of \p ACS at position \p ArgNo.
+ static const IRPosition callsite_argument(AbstractCallSite ACS,
+ unsigned ArgNo) {
+ int CSArgNo = ACS.getCallArgOperandNo(ArgNo);
+ if (CSArgNo >= 0)
+ return IRPosition::callsite_argument(
+ cast<CallBase>(*ACS.getInstruction()), CSArgNo);
+ return IRPosition();
+ }
+
+ /// Create a position with function scope matching the "context" of \p IRP.
+ /// If \p IRP is a call site (see isAnyCallSitePosition()) then the result
+ /// will be a call site position, otherwise the function position of the
+ /// associated function.
+ static const IRPosition function_scope(const IRPosition &IRP) {
+ if (IRP.isAnyCallSitePosition()) {
+ return IRPosition::callsite_function(
+ cast<CallBase>(IRP.getAnchorValue()));
+ }
+ assert(IRP.getAssociatedFunction());
+ return IRPosition::function(*IRP.getAssociatedFunction());
+ }
+
+ bool operator==(const IRPosition &RHS) const {
+ return (AnchorVal == RHS.AnchorVal) && (KindOrArgNo == RHS.KindOrArgNo);
+ }
+ bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); }
+
+ /// Return the value this abstract attribute is anchored with.
+ ///
+ /// The anchor value might not be the associated value if the latter is not
+ /// sufficient to determine where arguments will be manifested. This is, so
+ /// far, only the case for call site arguments as the value is not sufficient
+ /// to pinpoint them. Instead, we can use the call site as an anchor.
+ ///
+ ///{
+ Value &getAnchorValue() {
+ assert(KindOrArgNo != IRP_INVALID &&
+ "Invalid position does not have an anchor value!");
+ return *AnchorVal;
+ }
+ const Value &getAnchorValue() const {
+ return const_cast<IRPosition *>(this)->getAnchorValue();
+ }
+ ///}
+
+ /// Return the associated function, if any.
+ ///
+ ///{
+ Function *getAssociatedFunction() {
+ if (auto *CB = dyn_cast<CallBase>(AnchorVal))
+ return CB->getCalledFunction();
+ assert(KindOrArgNo != IRP_INVALID &&
+ "Invalid position does not have an anchor scope!");
+ Value &V = getAnchorValue();
+ if (isa<Function>(V))
+ return &cast<Function>(V);
+ if (isa<Argument>(V))
+ return cast<Argument>(V).getParent();
+ if (isa<Instruction>(V))
+ return cast<Instruction>(V).getFunction();
+ return nullptr;
+ }
+ const Function *getAssociatedFunction() const {
+ return const_cast<IRPosition *>(this)->getAssociatedFunction();
+ }
+ ///}
+
+ /// Return the associated argument, if any.
+ ///
+ ///{
+ Argument *getAssociatedArgument() {
+ if (auto *Arg = dyn_cast<Argument>(&getAnchorValue()))
+ return Arg;
+ int ArgNo = getArgNo();
+ if (ArgNo < 0)
+ return nullptr;
+ Function *AssociatedFn = getAssociatedFunction();
+ if (!AssociatedFn || AssociatedFn->arg_size() <= unsigned(ArgNo))
+ return nullptr;
+ return AssociatedFn->arg_begin() + ArgNo;
+ }
+ const Argument *getAssociatedArgument() const {
+ return const_cast<IRPosition *>(this)->getAssociatedArgument();
+ }
+ ///}
+
+ /// Return true if the position refers to a function interface, that is the
+ /// function scope, the function return, or an argumnt.
+ bool isFnInterfaceKind() const {
+ switch (getPositionKind()) {
+ case IRPosition::IRP_FUNCTION:
+ case IRPosition::IRP_RETURNED:
+ case IRPosition::IRP_ARGUMENT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /// Return the Function surrounding the anchor value.
+ ///
+ ///{
+ Function *getAnchorScope() {
+ Value &V = getAnchorValue();
+ if (isa<Function>(V))
+ return &cast<Function>(V);
+ if (isa<Argument>(V))
+ return cast<Argument>(V).getParent();
+ if (isa<Instruction>(V))
+ return cast<Instruction>(V).getFunction();
+ return nullptr;
+ }
+ const Function *getAnchorScope() const {
+ return const_cast<IRPosition *>(this)->getAnchorScope();
+ }
+ ///}
+
+ /// Return the context instruction, if any.
+ ///
+ ///{
+ Instruction *getCtxI() {
+ Value &V = getAnchorValue();
+ if (auto *I = dyn_cast<Instruction>(&V))
+ return I;
+ if (auto *Arg = dyn_cast<Argument>(&V))
+ if (!Arg->getParent()->isDeclaration())
+ return &Arg->getParent()->getEntryBlock().front();
+ if (auto *F = dyn_cast<Function>(&V))
+ if (!F->isDeclaration())
+ return &(F->getEntryBlock().front());
+ return nullptr;
+ }
+ const Instruction *getCtxI() const {
+ return const_cast<IRPosition *>(this)->getCtxI();
+ }
+ ///}
+
+ /// Return the value this abstract attribute is associated with.
+ ///
+ ///{
+ Value &getAssociatedValue() {
+ assert(KindOrArgNo != IRP_INVALID &&
+ "Invalid position does not have an associated value!");
+ if (getArgNo() < 0 || isa<Argument>(AnchorVal))
+ return *AnchorVal;
+ assert(isa<CallBase>(AnchorVal) && "Expected a call base!");
+ return *cast<CallBase>(AnchorVal)->getArgOperand(getArgNo());
+ }
+ const Value &getAssociatedValue() const {
+ return const_cast<IRPosition *>(this)->getAssociatedValue();
+ }
+ ///}
+
+ /// Return the argument number of the associated value if it is an argument or
+ /// call site argument, otherwise a negative value.
+ int getArgNo() const { return KindOrArgNo; }
+
+ /// Return the index in the attribute list for this position.
+ unsigned getAttrIdx() const {
+ switch (getPositionKind()) {
+ case IRPosition::IRP_INVALID:
+ case IRPosition::IRP_FLOAT:
+ break;
+ case IRPosition::IRP_FUNCTION:
+ case IRPosition::IRP_CALL_SITE:
+ return AttributeList::FunctionIndex;
+ case IRPosition::IRP_RETURNED:
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ return AttributeList::ReturnIndex;
+ case IRPosition::IRP_ARGUMENT:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ return KindOrArgNo + AttributeList::FirstArgIndex;
+ }
+ llvm_unreachable(
+ "There is no attribute index for a floating or invalid position!");
+ }
+
+ /// Return the associated position kind.
+ Kind getPositionKind() const {
+ if (getArgNo() >= 0) {
+ assert(((isa<Argument>(getAnchorValue()) &&
+ isa<Argument>(getAssociatedValue())) ||
+ isa<CallBase>(getAnchorValue())) &&
+ "Expected argument or call base due to argument number!");
+ if (isa<CallBase>(getAnchorValue()))
+ return IRP_CALL_SITE_ARGUMENT;
+ return IRP_ARGUMENT;
+ }
+
+ assert(KindOrArgNo < 0 &&
+ "Expected (call site) arguments to never reach this point!");
+ return Kind(KindOrArgNo);
+ }
+
+ /// TODO: Figure out if the attribute related helper functions should live
+ /// here or somewhere else.
+
+ /// Return true if any kind in \p AKs existing in the IR at a position that
+ /// will affect this one. See also getAttrs(...).
+ /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
+ /// e.g., the function position if this is an
+ /// argument position, should be ignored.
+ bool hasAttr(ArrayRef<Attribute::AttrKind> AKs,
+ bool IgnoreSubsumingPositions = false) const;
+
+ /// Return the attributes of any kind in \p AKs existing in the IR at a
+ /// position that will affect this one. While each position can only have a
+ /// single attribute of any kind in \p AKs, there are "subsuming" positions
+ /// that could have an attribute as well. This method returns all attributes
+ /// found in \p Attrs.
+ void getAttrs(ArrayRef<Attribute::AttrKind> AKs,
+ SmallVectorImpl<Attribute> &Attrs) const;
+
+ /// Return the attribute of kind \p AK existing in the IR at this position.
+ Attribute getAttr(Attribute::AttrKind AK) const {
+ if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
+ return Attribute();
+
+ AttributeList AttrList;
+ if (ImmutableCallSite ICS = ImmutableCallSite(&getAnchorValue()))
+ AttrList = ICS.getAttributes();
+ else
+ AttrList = getAssociatedFunction()->getAttributes();
+
+ if (AttrList.hasAttribute(getAttrIdx(), AK))
+ return AttrList.getAttribute(getAttrIdx(), AK);
+ return Attribute();
+ }
+
+ /// Remove the attribute of kind \p AKs existing in the IR at this position.
+ void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) {
+ if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
+ return;
+
+ AttributeList AttrList;
+ CallSite CS = CallSite(&getAnchorValue());
+ if (CS)
+ AttrList = CS.getAttributes();
+ else
+ AttrList = getAssociatedFunction()->getAttributes();
+
+ LLVMContext &Ctx = getAnchorValue().getContext();
+ for (Attribute::AttrKind AK : AKs)
+ AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK);
+
+ if (CS)
+ CS.setAttributes(AttrList);
+ else
+ getAssociatedFunction()->setAttributes(AttrList);
+ }
+
+ bool isAnyCallSitePosition() const {
+ switch (getPositionKind()) {
+ case IRPosition::IRP_CALL_SITE:
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /// Special DenseMap key values.
+ ///
+ ///{
+ static const IRPosition EmptyKey;
+ static const IRPosition TombstoneKey;
+ ///}
+
+private:
+ /// Private constructor for special values only!
+ explicit IRPosition(int KindOrArgNo)
+ : AnchorVal(0), KindOrArgNo(KindOrArgNo) {}
+
+ /// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK.
+ explicit IRPosition(Value &AnchorVal, Kind PK)
+ : AnchorVal(&AnchorVal), KindOrArgNo(PK) {
+ verify();
+ }
+
+ /// Verify internal invariants.
+ void verify();
+
+ /// The value this position is anchored at.
+ Value *AnchorVal;
+
+ /// The argument number, if non-negative, or the position "kind".
+ int KindOrArgNo;
+};
+
+/// Helper that allows IRPosition as a key in a DenseMap.
+template <> struct DenseMapInfo<IRPosition> {
+ static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; }
+ static inline IRPosition getTombstoneKey() {
+ return IRPosition::TombstoneKey;
+ }
+ static unsigned getHashValue(const IRPosition &IRP) {
+ return (DenseMapInfo<Value *>::getHashValue(&IRP.getAnchorValue()) << 4) ^
+ (unsigned(IRP.getArgNo()));
+ }
+ static bool isEqual(const IRPosition &LHS, const IRPosition &RHS) {
+ return LHS == RHS;
+ }
+};
+
+/// A visitor class for IR positions.
+///
+/// Given a position P, the SubsumingPositionIterator allows to visit "subsuming
+/// positions" wrt. attributes/information. Thus, if a piece of information
+/// holds for a subsuming position, it also holds for the position P.
+///
+/// The subsuming positions always include the initial position and then,
+/// depending on the position kind, additionally the following ones:
+/// - for IRP_RETURNED:
+/// - the function (IRP_FUNCTION)
+/// - for IRP_ARGUMENT:
+/// - the function (IRP_FUNCTION)
+/// - for IRP_CALL_SITE:
+/// - the callee (IRP_FUNCTION), if known
+/// - for IRP_CALL_SITE_RETURNED:
+/// - the callee (IRP_RETURNED), if known
+/// - the call site (IRP_FUNCTION)
+/// - the callee (IRP_FUNCTION), if known
+/// - for IRP_CALL_SITE_ARGUMENT:
+/// - the argument of the callee (IRP_ARGUMENT), if known
+/// - the callee (IRP_FUNCTION), if known
+/// - the position the call site argument is associated with if it is not
+/// anchored to the call site, e.g., if it is an arugment then the argument
+/// (IRP_ARGUMENT)
+class SubsumingPositionIterator {
+ SmallVector<IRPosition, 4> IRPositions;
+ using iterator = decltype(IRPositions)::iterator;
+
+public:
+ SubsumingPositionIterator(const IRPosition &IRP);
+ iterator begin() { return IRPositions.begin(); }
+ iterator end() { return IRPositions.end(); }
+};
+
+/// Wrapper for FunctoinAnalysisManager.
+struct AnalysisGetter {
+ template <typename Analysis>
+ typename Analysis::Result *getAnalysis(const Function &F) {
+ if (!MAM || !F.getParent())
+ return nullptr;
+ auto &FAM = MAM->getResult<FunctionAnalysisManagerModuleProxy>(
+ const_cast<Module &>(*F.getParent()))
+ .getManager();
+ return &FAM.getResult<Analysis>(const_cast<Function &>(F));
+ }
+
+ template <typename Analysis>
+ typename Analysis::Result *getAnalysis(const Module &M) {
+ if (!MAM)
+ return nullptr;
+ return &MAM->getResult<Analysis>(const_cast<Module &>(M));
+ }
+ AnalysisGetter(ModuleAnalysisManager &MAM) : MAM(&MAM) {}
+ AnalysisGetter() {}
+
+private:
+ ModuleAnalysisManager *MAM = nullptr;
+};
+
+/// Data structure to hold cached (LLVM-IR) information.
+///
+/// All attributes are given an InformationCache object at creation time to
+/// avoid inspection of the IR by all of them individually. This default
+/// InformationCache will hold information required by 'default' attributes,
+/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..)
+/// is called.
+///
+/// If custom abstract attributes, registered manually through
+/// Attributor::registerAA(...), need more information, especially if it is not
+/// reusable, it is advised to inherit from the InformationCache and cast the
+/// instance down in the abstract attributes.
+struct InformationCache {
+ InformationCache(const Module &M, AnalysisGetter &AG)
+ : DL(M.getDataLayout()), Explorer(/* ExploreInterBlock */ true), AG(AG) {
+
+ CallGraph *CG = AG.getAnalysis<CallGraphAnalysis>(M);
+ if (!CG)
+ return;
+
+ DenseMap<const Function *, unsigned> SccSize;
+ for (scc_iterator<CallGraph *> I = scc_begin(CG); !I.isAtEnd(); ++I) {
+ for (CallGraphNode *Node : *I)
+ SccSize[Node->getFunction()] = I->size();
+ }
+ SccSizeOpt = std::move(SccSize);
+ }
+
+ /// A map type from opcodes to instructions with this opcode.
+ using OpcodeInstMapTy = DenseMap<unsigned, SmallVector<Instruction *, 32>>;
+
+ /// Return the map that relates "interesting" opcodes with all instructions
+ /// with that opcode in \p F.
+ OpcodeInstMapTy &getOpcodeInstMapForFunction(const Function &F) {
+ return FuncInstOpcodeMap[&F];
+ }
+
+ /// A vector type to hold instructions.
+ using InstructionVectorTy = std::vector<Instruction *>;
+
+ /// Return the instructions in \p F that may read or write memory.
+ InstructionVectorTy &getReadOrWriteInstsForFunction(const Function &F) {
+ return FuncRWInstsMap[&F];
+ }
+
+ /// Return MustBeExecutedContextExplorer
+ MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() {
+ return Explorer;
+ }
+
+ /// Return TargetLibraryInfo for function \p F.
+ TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) {
+ return AG.getAnalysis<TargetLibraryAnalysis>(F);
+ }
+
+ /// Return AliasAnalysis Result for function \p F.
+ AAResults *getAAResultsForFunction(const Function &F) {
+ return AG.getAnalysis<AAManager>(F);
+ }
+
+ /// Return SCC size on call graph for function \p F.
+ unsigned getSccSize(const Function &F) {
+ if (!SccSizeOpt.hasValue())
+ return 0;
+ return (SccSizeOpt.getValue())[&F];
+ }
+
+ /// Return datalayout used in the module.
+ const DataLayout &getDL() { return DL; }
+
+private:
+ /// A map type from functions to opcode to instruction maps.
+ using FuncInstOpcodeMapTy = DenseMap<const Function *, OpcodeInstMapTy>;
+
+ /// A map type from functions to their read or write instructions.
+ using FuncRWInstsMapTy = DenseMap<const Function *, InstructionVectorTy>;
+
+ /// A nested map that remembers all instructions in a function with a certain
+ /// instruction opcode (Instruction::getOpcode()).
+ FuncInstOpcodeMapTy FuncInstOpcodeMap;
+
+ /// A map from functions to their instructions that may read or write memory.
+ FuncRWInstsMapTy FuncRWInstsMap;
+
+ /// The datalayout used in the module.
+ const DataLayout &DL;
+
+ /// MustBeExecutedContextExplorer
+ MustBeExecutedContextExplorer Explorer;
+
+ /// Getters for analysis.
+ AnalysisGetter &AG;
+
+ /// Cache result for scc size in the call graph
+ Optional<DenseMap<const Function *, unsigned>> SccSizeOpt;
+
+ /// Give the Attributor access to the members so
+ /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
+ friend struct Attributor;
+};
+
/// The fixpoint analysis framework that orchestrates the attribute deduction.
///
/// The Attributor provides a general abstract analysis framework (guided
@@ -148,6 +711,18 @@ ChangeStatus operator&(ChangeStatus l, ChangeStatus r);
/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
/// described in the file comment.
struct Attributor {
+ /// Constructor
+ ///
+ /// \param InfoCache Cache to hold various information accessible for
+ /// the abstract attributes.
+ /// \param DepRecomputeInterval Number of iterations until the dependences
+ /// between abstract attributes are recomputed.
+ /// \param Whitelist If not null, a set limiting the attribute opportunities.
+ Attributor(InformationCache &InfoCache, unsigned DepRecomputeInterval,
+ DenseSet<const char *> *Whitelist = nullptr)
+ : InfoCache(InfoCache), DepRecomputeInterval(DepRecomputeInterval),
+ Whitelist(Whitelist) {}
+
~Attributor() { DeleteContainerPointers(AllAbstractAttributes); }
/// Run the analyses until a fixpoint is reached or enforced (timeout).
@@ -156,12 +731,13 @@ struct Attributor {
/// as the Attributor is not destroyed (it owns the attributes now).
///
/// \Returns CHANGED if the IR was changed, otherwise UNCHANGED.
- ChangeStatus run();
+ ChangeStatus run(Module &M);
- /// Lookup an abstract attribute of type \p AAType anchored at value \p V and
- /// argument number \p ArgNo. If no attribute is found and \p V is a call base
- /// instruction, the called function is tried as a value next. Thus, the
- /// returned abstract attribute might be anchored at the callee of \p V.
+ /// Lookup an abstract attribute of type \p AAType at position \p IRP. While
+ /// no abstract attribute is found equivalent positions are checked, see
+ /// SubsumingPositionIterator. Thus, the returned abstract attribute
+ /// might be anchored at a different position, e.g., the callee if \p IRP is a
+ /// call base.
///
/// This method is the only (supported) way an abstract attribute can retrieve
/// information from another abstract attribute. As an example, take an
@@ -170,51 +746,29 @@ struct Attributor {
/// most optimistic information for other abstract attributes in-flight, e.g.
/// the one reasoning about the "captured" state for the argument or the one
/// reasoning on the memory access behavior of the function as a whole.
+ ///
+ /// If the flag \p TrackDependence is set to false the dependence from
+ /// \p QueryingAA to the return abstract attribute is not automatically
+ /// recorded. This should only be used if the caller will record the
+ /// dependence explicitly if necessary, thus if it the returned abstract
+ /// attribute is used for reasoning. To record the dependences explicitly use
+ /// the `Attributor::recordDependence` method.
template <typename AAType>
- const AAType *getAAFor(AbstractAttribute &QueryingAA, const Value &V,
- int ArgNo = -1) {
- static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
- "Cannot query an attribute with a type not derived from "
- "'AbstractAttribute'!");
- assert(AAType::ID != Attribute::None &&
- "Cannot lookup generic abstract attributes!");
-
- // Determine the argument number automatically for llvm::Arguments if none
- // is set. Do not override a given one as it could be a use of the argument
- // in a call site.
- if (ArgNo == -1)
- if (auto *Arg = dyn_cast<Argument>(&V))
- ArgNo = Arg->getArgNo();
-
- // If a function was given together with an argument number, perform the
- // lookup for the actual argument instead. Don't do it for variadic
- // arguments.
- if (ArgNo >= 0 && isa<Function>(&V) &&
- cast<Function>(&V)->arg_size() > (size_t)ArgNo)
- return getAAFor<AAType>(
- QueryingAA, *(cast<Function>(&V)->arg_begin() + ArgNo), ArgNo);
-
- // Lookup the abstract attribute of type AAType. If found, return it after
- // registering a dependence of QueryingAA on the one returned attribute.
- const auto &KindToAbstractAttributeMap = AAMap.lookup({&V, ArgNo});
- if (AAType *AA = static_cast<AAType *>(
- KindToAbstractAttributeMap.lookup(AAType::ID))) {
- // Do not return an attribute with an invalid state. This minimizes checks
- // at the calls sites and allows the fallback below to kick in.
- if (AA->getState().isValidState()) {
- QueryMap[AA].insert(&QueryingAA);
- return AA;
- }
- }
-
- // If no abstract attribute was found and we look for a call site argument,
- // defer to the actual argument instead.
- ImmutableCallSite ICS(&V);
- if (ICS && ICS.getCalledValue())
- return getAAFor<AAType>(QueryingAA, *ICS.getCalledValue(), ArgNo);
+ const AAType &getAAFor(const AbstractAttribute &QueryingAA,
+ const IRPosition &IRP, bool TrackDependence = true) {
+ return getOrCreateAAFor<AAType>(IRP, &QueryingAA, TrackDependence);
+ }
- // No matching attribute found
- return nullptr;
+ /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if
+ /// \p FromAA changes \p ToAA should be updated as well.
+ ///
+ /// This method should be used in conjunction with the `getAAFor` method and
+ /// with the TrackDependence flag passed to the method set to false. This can
+ /// be beneficial to avoid false dependences but it requires the users of
+ /// `getAAFor` to explicitly record true dependences through this method.
+ void recordDependence(const AbstractAttribute &FromAA,
+ const AbstractAttribute &ToAA) {
+ QueryMap[&FromAA].insert(const_cast<AbstractAttribute *>(&ToAA));
}
/// Introduce a new abstract attribute into the fixpoint analysis.
@@ -222,126 +776,242 @@ struct Attributor {
/// Note that ownership of the attribute is given to the Attributor. It will
/// invoke delete for the Attributor on destruction of the Attributor.
///
- /// Attributes are identified by
- /// (1) their anchored value (see AA.getAnchoredValue()),
- /// (2) their argument number (\p ArgNo, or Argument::getArgNo()), and
- /// (3) their default attribute kind (see AAType::ID).
- template <typename AAType> AAType &registerAA(AAType &AA, int ArgNo = -1) {
+ /// Attributes are identified by their IR position (AAType::getIRPosition())
+ /// and the address of their static member (see AAType::ID).
+ template <typename AAType> AAType &registerAA(AAType &AA) {
static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
"Cannot register an attribute with a type not derived from "
"'AbstractAttribute'!");
-
- // Determine the anchor value and the argument number which are used to
- // lookup the attribute together with AAType::ID. If passed an argument,
- // use its argument number but do not override a given one as it could be a
- // use of the argument at a call site.
- Value &AnchoredVal = AA.getAnchoredValue();
- if (ArgNo == -1)
- if (auto *Arg = dyn_cast<Argument>(&AnchoredVal))
- ArgNo = Arg->getArgNo();
-
// Put the attribute in the lookup map structure and the container we use to
// keep track of all attributes.
- AAMap[{&AnchoredVal, ArgNo}][AAType::ID] = &AA;
+ IRPosition &IRP = AA.getIRPosition();
+ auto &KindToAbstractAttributeMap = AAMap[IRP];
+ assert(!KindToAbstractAttributeMap.count(&AAType::ID) &&
+ "Attribute already in map!");
+ KindToAbstractAttributeMap[&AAType::ID] = &AA;
AllAbstractAttributes.push_back(&AA);
return AA;
}
+ /// Return the internal information cache.
+ InformationCache &getInfoCache() { return InfoCache; }
+
/// Determine opportunities to derive 'default' attributes in \p F and create
/// abstract attribute objects for them.
///
/// \param F The function that is checked for attribute opportunities.
- /// \param InfoCache A cache for information queryable by the new attributes.
- /// \param Whitelist If not null, a set limiting the attribute opportunities.
///
/// Note that abstract attribute instances are generally created even if the
/// IR already contains the information they would deduce. The most important
/// reason for this is the single interface, the one of the abstract attribute
/// instance, which can be queried without the need to look at the IR in
/// various places.
- void identifyDefaultAbstractAttributes(
- Function &F, InformationCache &InfoCache,
- DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist = nullptr);
+ void identifyDefaultAbstractAttributes(Function &F);
+
+ /// Initialize the information cache for queries regarding function \p F.
+ ///
+ /// This method needs to be called for all function that might be looked at
+ /// through the information cache interface *prior* to looking at them.
+ void initializeInformationCache(Function &F);
+
+ /// Mark the internal function \p F as live.
+ ///
+ /// This will trigger the identification and initialization of attributes for
+ /// \p F.
+ void markLiveInternalFunction(const Function &F) {
+ assert(F.hasLocalLinkage() &&
+ "Only local linkage is assumed dead initially.");
+
+ identifyDefaultAbstractAttributes(const_cast<Function &>(F));
+ }
+
+ /// Record that \p I is deleted after information was manifested.
+ void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); }
+
+ /// Record that \p BB is deleted after information was manifested.
+ void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); }
+
+ /// Record that \p F is deleted after information was manifested.
+ void deleteAfterManifest(Function &F) { ToBeDeletedFunctions.insert(&F); }
+
+ /// Return true if \p AA (or its context instruction) is assumed dead.
+ ///
+ /// If \p LivenessAA is not provided it is queried.
+ bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA);
/// Check \p Pred on all function call sites.
///
/// This method will evaluate \p Pred on call sites and return
/// true if \p Pred holds in every call sites. However, this is only possible
/// all call sites are known, hence the function has internal linkage.
- bool checkForAllCallSites(Function &F, std::function<bool(CallSite)> &Pred,
+ bool checkForAllCallSites(const function_ref<bool(AbstractCallSite)> &Pred,
+ const AbstractAttribute &QueryingAA,
bool RequireAllCallSites);
+ /// Check \p Pred on all values potentially returned by \p F.
+ ///
+ /// This method will evaluate \p Pred on all values potentially returned by
+ /// the function associated with \p QueryingAA. The returned values are
+ /// matched with their respective return instructions. Returns true if \p Pred
+ /// holds on all of them.
+ bool checkForAllReturnedValuesAndReturnInsts(
+ const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
+ &Pred,
+ const AbstractAttribute &QueryingAA);
+
+ /// Check \p Pred on all values potentially returned by the function
+ /// associated with \p QueryingAA.
+ ///
+ /// This is the context insensitive version of the method above.
+ bool checkForAllReturnedValues(const function_ref<bool(Value &)> &Pred,
+ const AbstractAttribute &QueryingAA);
+
+ /// Check \p Pred on all instructions with an opcode present in \p Opcodes.
+ ///
+ /// This method will evaluate \p Pred on all instructions with an opcode
+ /// present in \p Opcode and return true if \p Pred holds on all of them.
+ bool checkForAllInstructions(const function_ref<bool(Instruction &)> &Pred,
+ const AbstractAttribute &QueryingAA,
+ const ArrayRef<unsigned> &Opcodes);
+
+ /// Check \p Pred on all call-like instructions (=CallBased derived).
+ ///
+ /// See checkForAllCallLikeInstructions(...) for more information.
+ bool
+ checkForAllCallLikeInstructions(const function_ref<bool(Instruction &)> &Pred,
+ const AbstractAttribute &QueryingAA) {
+ return checkForAllInstructions(Pred, QueryingAA,
+ {(unsigned)Instruction::Invoke,
+ (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call});
+ }
+
+ /// Check \p Pred on all Read/Write instructions.
+ ///
+ /// This method will evaluate \p Pred on all instructions that read or write
+ /// to memory present in the information cache and return true if \p Pred
+ /// holds on all of them.
+ bool checkForAllReadWriteInstructions(
+ const llvm::function_ref<bool(Instruction &)> &Pred,
+ AbstractAttribute &QueryingAA);
+
+ /// Return the data layout associated with the anchor scope.
+ const DataLayout &getDataLayout() const { return InfoCache.DL; }
+
private:
+ /// Check \p Pred on all call sites of \p Fn.
+ ///
+ /// This method will evaluate \p Pred on call sites and return
+ /// true if \p Pred holds in every call sites. However, this is only possible
+ /// all call sites are known, hence the function has internal linkage.
+ bool checkForAllCallSites(const function_ref<bool(AbstractCallSite)> &Pred,
+ const Function &Fn, bool RequireAllCallSites,
+ const AbstractAttribute *QueryingAA);
+
+ /// The private version of getAAFor that allows to omit a querying abstract
+ /// attribute. See also the public getAAFor method.
+ template <typename AAType>
+ const AAType &getOrCreateAAFor(const IRPosition &IRP,
+ const AbstractAttribute *QueryingAA = nullptr,
+ bool TrackDependence = false) {
+ if (const AAType *AAPtr =
+ lookupAAFor<AAType>(IRP, QueryingAA, TrackDependence))
+ return *AAPtr;
+
+ // No matching attribute found, create one.
+ // Use the static create method.
+ auto &AA = AAType::createForPosition(IRP, *this);
+ registerAA(AA);
+
+ // For now we ignore naked and optnone functions.
+ bool Invalidate = Whitelist && !Whitelist->count(&AAType::ID);
+ if (const Function *Fn = IRP.getAnchorScope())
+ Invalidate |= Fn->hasFnAttribute(Attribute::Naked) ||
+ Fn->hasFnAttribute(Attribute::OptimizeNone);
+
+ // Bootstrap the new attribute with an initial update to propagate
+ // information, e.g., function -> call site. If it is not on a given
+ // whitelist we will not perform updates at all.
+ if (Invalidate) {
+ AA.getState().indicatePessimisticFixpoint();
+ return AA;
+ }
+
+ AA.initialize(*this);
+ AA.update(*this);
+
+ if (TrackDependence && AA.getState().isValidState())
+ QueryMap[&AA].insert(const_cast<AbstractAttribute *>(QueryingAA));
+ return AA;
+ }
+
+ /// Return the attribute of \p AAType for \p IRP if existing.
+ template <typename AAType>
+ const AAType *lookupAAFor(const IRPosition &IRP,
+ const AbstractAttribute *QueryingAA = nullptr,
+ bool TrackDependence = false) {
+ static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
+ "Cannot query an attribute with a type not derived from "
+ "'AbstractAttribute'!");
+ assert((QueryingAA || !TrackDependence) &&
+ "Cannot track dependences without a QueryingAA!");
+
+ // Lookup the abstract attribute of type AAType. If found, return it after
+ // registering a dependence of QueryingAA on the one returned attribute.
+ const auto &KindToAbstractAttributeMap = AAMap.lookup(IRP);
+ if (AAType *AA = static_cast<AAType *>(
+ KindToAbstractAttributeMap.lookup(&AAType::ID))) {
+ // Do not register a dependence on an attribute with an invalid state.
+ if (TrackDependence && AA->getState().isValidState())
+ QueryMap[AA].insert(const_cast<AbstractAttribute *>(QueryingAA));
+ return AA;
+ }
+ return nullptr;
+ }
+
/// The set of all abstract attributes.
///{
using AAVector = SmallVector<AbstractAttribute *, 64>;
AAVector AllAbstractAttributes;
///}
- /// A nested map to lookup abstract attributes based on the anchored value and
- /// an argument positions (or -1) on the outer level, and attribute kinds
- /// (Attribute::AttrKind) on the inner level.
+ /// A nested map to lookup abstract attributes based on the argument position
+ /// on the outer level, and the addresses of the static member (AAType::ID) on
+ /// the inner level.
///{
- using KindToAbstractAttributeMap = DenseMap<unsigned, AbstractAttribute *>;
- DenseMap<std::pair<const Value *, int>, KindToAbstractAttributeMap> AAMap;
+ using KindToAbstractAttributeMap =
+ DenseMap<const char *, AbstractAttribute *>;
+ DenseMap<IRPosition, KindToAbstractAttributeMap> AAMap;
///}
/// A map from abstract attributes to the ones that queried them through calls
/// to the getAAFor<...>(...) method.
///{
using QueryMapTy =
- DenseMap<AbstractAttribute *, SetVector<AbstractAttribute *>>;
+ MapVector<const AbstractAttribute *, SetVector<AbstractAttribute *>>;
QueryMapTy QueryMap;
///}
-};
-
-/// Data structure to hold cached (LLVM-IR) information.
-///
-/// All attributes are given an InformationCache object at creation time to
-/// avoid inspection of the IR by all of them individually. This default
-/// InformationCache will hold information required by 'default' attributes,
-/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..)
-/// is called.
-///
-/// If custom abstract attributes, registered manually through
-/// Attributor::registerAA(...), need more information, especially if it is not
-/// reusable, it is advised to inherit from the InformationCache and cast the
-/// instance down in the abstract attributes.
-struct InformationCache {
- /// A map type from opcodes to instructions with this opcode.
- using OpcodeInstMapTy = DenseMap<unsigned, SmallVector<Instruction *, 32>>;
-
- /// Return the map that relates "interesting" opcodes with all instructions
- /// with that opcode in \p F.
- OpcodeInstMapTy &getOpcodeInstMapForFunction(Function &F) {
- return FuncInstOpcodeMap[&F];
- }
- /// A vector type to hold instructions.
- using InstructionVectorTy = std::vector<Instruction *>;
-
- /// Return the instructions in \p F that may read or write memory.
- InstructionVectorTy &getReadOrWriteInstsForFunction(Function &F) {
- return FuncRWInstsMap[&F];
- }
-
-private:
- /// A map type from functions to opcode to instruction maps.
- using FuncInstOpcodeMapTy = DenseMap<Function *, OpcodeInstMapTy>;
+ /// The information cache that holds pre-processed (LLVM-IR) information.
+ InformationCache &InfoCache;
- /// A map type from functions to their read or write instructions.
- using FuncRWInstsMapTy = DenseMap<Function *, InstructionVectorTy>;
+ /// Number of iterations until the dependences between abstract attributes are
+ /// recomputed.
+ const unsigned DepRecomputeInterval;
- /// A nested map that remembers all instructions in a function with a certain
- /// instruction opcode (Instruction::getOpcode()).
- FuncInstOpcodeMapTy FuncInstOpcodeMap;
+ /// If not null, a set limiting the attribute opportunities.
+ const DenseSet<const char *> *Whitelist;
- /// A map from functions to their instructions that may read or write memory.
- FuncRWInstsMapTy FuncRWInstsMap;
+ /// A set to remember the functions we already assume to be live and visited.
+ DenseSet<const Function *> VisitedFunctions;
- /// Give the Attributor access to the members so
- /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
- friend struct Attributor;
+ /// Functions, blocks, and instructions we delete after manifest is done.
+ ///
+ ///{
+ SmallPtrSet<Function *, 8> ToBeDeletedFunctions;
+ SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
+ SmallPtrSet<Instruction *, 8> ToBeDeletedInsts;
+ ///}
};
/// An interface to query the internal state of an abstract attribute.
@@ -375,13 +1045,17 @@ struct AbstractState {
///
/// This will usually make the optimistically assumed state the known to be
/// true state.
- virtual void indicateOptimisticFixpoint() = 0;
+ ///
+ /// \returns ChangeStatus::UNCHANGED as the assumed value should not change.
+ virtual ChangeStatus indicateOptimisticFixpoint() = 0;
/// Indicate that the abstract state should converge to the pessimistic state.
///
/// This will usually revert the optimistically assumed state to the known to
/// be true state.
- virtual void indicatePessimisticFixpoint() = 0;
+ ///
+ /// \returns ChangeStatus::CHANGED as the assumed value may change.
+ virtual ChangeStatus indicatePessimisticFixpoint() = 0;
};
/// Simple state with integers encoding.
@@ -412,10 +1086,16 @@ struct IntegerState : public AbstractState {
bool isAtFixpoint() const override { return Assumed == Known; }
/// See AbstractState::indicateOptimisticFixpoint(...)
- void indicateOptimisticFixpoint() override { Known = Assumed; }
+ ChangeStatus indicateOptimisticFixpoint() override {
+ Known = Assumed;
+ return ChangeStatus::UNCHANGED;
+ }
/// See AbstractState::indicatePessimisticFixpoint(...)
- void indicatePessimisticFixpoint() override { Assumed = Known; }
+ ChangeStatus indicatePessimisticFixpoint() override {
+ Assumed = Known;
+ return ChangeStatus::CHANGED;
+ }
/// Return the known state encoding
base_t getKnown() const { return Known; }
@@ -448,6 +1128,12 @@ struct IntegerState : public AbstractState {
return *this;
}
+ /// Remove the bits in \p BitsEncoding from the "known bits".
+ IntegerState &removeKnownBits(base_t BitsEncoding) {
+ Known = (Known & ~BitsEncoding);
+ return *this;
+ }
+
/// Keep only "assumed bits" also set in \p BitsEncoding but all known ones.
IntegerState &intersectAssumedBits(base_t BitsEncoding) {
// Make sure we never loose any "known bits".
@@ -455,6 +1141,62 @@ struct IntegerState : public AbstractState {
return *this;
}
+ /// Take minimum of assumed and \p Value.
+ IntegerState &takeAssumedMinimum(base_t Value) {
+ // Make sure we never loose "known value".
+ Assumed = std::max(std::min(Assumed, Value), Known);
+ return *this;
+ }
+
+ /// Take maximum of known and \p Value.
+ IntegerState &takeKnownMaximum(base_t Value) {
+ // Make sure we never loose "known value".
+ Assumed = std::max(Value, Assumed);
+ Known = std::max(Value, Known);
+ return *this;
+ }
+
+ /// Equality for IntegerState.
+ bool operator==(const IntegerState &R) const {
+ return this->getAssumed() == R.getAssumed() &&
+ this->getKnown() == R.getKnown();
+ }
+
+ /// Inequality for IntegerState.
+ bool operator!=(const IntegerState &R) const { return !(*this == R); }
+
+ /// "Clamp" this state with \p R. The result is the minimum of the assumed
+ /// information but not less than what was known before.
+ ///
+ /// TODO: Consider replacing the operator with a call or using it only when
+ /// we can also take the maximum of the known information, thus when
+ /// \p R is not dependent on additional assumed state.
+ IntegerState operator^=(const IntegerState &R) {
+ takeAssumedMinimum(R.Assumed);
+ return *this;
+ }
+
+ /// "Clamp" this state with \p R. The result is the maximum of the known
+ /// information but not more than what was assumed before.
+ IntegerState operator+=(const IntegerState &R) {
+ takeKnownMaximum(R.Known);
+ return *this;
+ }
+
+ /// Make this the minimum, known and assumed, of this state and \p R.
+ IntegerState operator&=(const IntegerState &R) {
+ Known = std::min(Known, R.Known);
+ Assumed = std::min(Assumed, R.Assumed);
+ return *this;
+ }
+
+ /// Make this the maximum, known and assumed, of this state and \p R.
+ IntegerState operator|=(const IntegerState &R) {
+ Known = std::max(Known, R.Known);
+ Assumed = std::max(Assumed, R.Assumed);
+ return *this;
+ }
+
private:
/// The known state encoding in an integer of type base_t.
base_t Known = getWorstState();
@@ -468,6 +1210,77 @@ struct BooleanState : public IntegerState {
BooleanState() : IntegerState(1){};
};
+/// Helper struct necessary as the modular build fails if the virtual method
+/// IRAttribute::manifest is defined in the Attributor.cpp.
+struct IRAttributeManifest {
+ static ChangeStatus manifestAttrs(Attributor &A, IRPosition &IRP,
+ const ArrayRef<Attribute> &DeducedAttrs);
+};
+
+/// Helper to tie a abstract state implementation to an abstract attribute.
+template <typename StateTy, typename Base>
+struct StateWrapper : public StateTy, public Base {
+ /// Provide static access to the type of the state.
+ using StateType = StateTy;
+
+ /// See AbstractAttribute::getState(...).
+ StateType &getState() override { return *this; }
+
+ /// See AbstractAttribute::getState(...).
+ const AbstractState &getState() const override { return *this; }
+};
+
+/// Helper class that provides common functionality to manifest IR attributes.
+template <Attribute::AttrKind AK, typename Base>
+struct IRAttribute : public IRPosition, public Base {
+ IRAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
+ ~IRAttribute() {}
+
+ /// See AbstractAttribute::initialize(...).
+ virtual void initialize(Attributor &A) override {
+ if (hasAttr(getAttrKind())) {
+ this->getState().indicateOptimisticFixpoint();
+ return;
+ }
+
+ const IRPosition &IRP = this->getIRPosition();
+ bool IsFnInterface = IRP.isFnInterfaceKind();
+ const Function *FnScope = IRP.getAnchorScope();
+ // TODO: Not all attributes require an exact definition. Find a way to
+ // enable deduction for some but not all attributes in case the
+ // definition might be changed at runtime, see also
+ // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
+ // TODO: We could always determine abstract attributes and if sufficient
+ // information was found we could duplicate the functions that do not
+ // have an exact definition.
+ if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition()))
+ this->getState().indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ SmallVector<Attribute, 4> DeducedAttrs;
+ getDeducedAttributes(getAnchorValue().getContext(), DeducedAttrs);
+ return IRAttributeManifest::manifestAttrs(A, getIRPosition(), DeducedAttrs);
+ }
+
+ /// Return the kind that identifies the abstract attribute implementation.
+ Attribute::AttrKind getAttrKind() const { return AK; }
+
+ /// Return the deduced attributes in \p Attrs.
+ virtual void getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const {
+ Attrs.emplace_back(Attribute::get(Ctx, getAttrKind()));
+ }
+
+ /// Return an IR position, see struct IRPosition.
+ ///
+ ///{
+ IRPosition &getIRPosition() override { return *this; }
+ const IRPosition &getIRPosition() const override { return *this; }
+ ///}
+};
+
/// Base struct for all "concrete attribute" deductions.
///
/// The abstract attribute is a minimal interface that allows the Attributor to
@@ -512,29 +1325,7 @@ struct BooleanState : public IntegerState {
/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
/// described in the file comment.
struct AbstractAttribute {
-
- /// The positions attributes can be manifested in.
- enum ManifestPosition {
- MP_ARGUMENT, ///< An attribute for a function argument.
- MP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument.
- MP_FUNCTION, ///< An attribute for a function as a whole.
- MP_RETURNED, ///< An attribute for the function return value.
- };
-
- /// An abstract attribute associated with \p AssociatedVal and anchored at
- /// \p AnchoredVal.
- ///
- /// \param AssociatedVal The value this abstract attribute is associated with.
- /// \param AnchoredVal The value this abstract attributes is anchored at.
- /// \param InfoCache Cached information accessible to the abstract attribute.
- AbstractAttribute(Value *AssociatedVal, Value &AnchoredVal,
- InformationCache &InfoCache)
- : AssociatedVal(AssociatedVal), AnchoredVal(AnchoredVal),
- InfoCache(InfoCache) {}
-
- /// An abstract attribute associated with and anchored at \p V.
- AbstractAttribute(Value &V, InformationCache &InfoCache)
- : AbstractAttribute(&V, V, InfoCache) {}
+ using StateType = AbstractState;
/// Virtual destructor.
virtual ~AbstractAttribute() {}
@@ -550,47 +1341,11 @@ struct AbstractAttribute {
virtual void initialize(Attributor &A) {}
/// Return the internal abstract state for inspection.
- virtual const AbstractState &getState() const = 0;
-
- /// Return the value this abstract attribute is anchored with.
- ///
- /// The anchored value might not be the associated value if the latter is not
- /// sufficient to determine where arguments will be manifested. This is mostly
- /// the case for call site arguments as the value is not sufficient to
- /// pinpoint them. Instead, we can use the call site as an anchor.
- ///
- ///{
- Value &getAnchoredValue() { return AnchoredVal; }
- const Value &getAnchoredValue() const { return AnchoredVal; }
- ///}
-
- /// Return the llvm::Function surrounding the anchored value.
- ///
- ///{
- Function &getAnchorScope();
- const Function &getAnchorScope() const;
- ///}
-
- /// Return the value this abstract attribute is associated with.
- ///
- /// The abstract state usually represents this value.
- ///
- ///{
- virtual Value *getAssociatedValue() { return AssociatedVal; }
- virtual const Value *getAssociatedValue() const { return AssociatedVal; }
- ///}
-
- /// Return the position this abstract state is manifested in.
- virtual ManifestPosition getManifestPosition() const = 0;
-
- /// Return the kind that identifies the abstract attribute implementation.
- virtual Attribute::AttrKind getAttrKind() const = 0;
+ virtual StateType &getState() = 0;
+ virtual const StateType &getState() const = 0;
- /// Return the deduced attributes in \p Attrs.
- virtual void getDeducedAttributes(SmallVectorImpl<Attribute> &Attrs) const {
- LLVMContext &Ctx = AnchoredVal.getContext();
- Attrs.emplace_back(Attribute::get(Ctx, getAttrKind()));
- }
+ /// Return an IR position, see struct IRPosition.
+ virtual const IRPosition &getIRPosition() const = 0;
/// Helper functions, for debug purposes only.
///{
@@ -617,10 +1372,19 @@ protected:
/// represented by the abstract attribute in the LLVM-IR.
///
/// \Return CHANGED if the IR was altered, otherwise UNCHANGED.
- virtual ChangeStatus manifest(Attributor &A);
+ virtual ChangeStatus manifest(Attributor &A) {
+ return ChangeStatus::UNCHANGED;
+ }
- /// Return the internal abstract state for careful modification.
- virtual AbstractState &getState() = 0;
+ /// Hook to enable custom statistic tracking, called after manifest that
+ /// resulted in a change if statistics are enabled.
+ ///
+ /// We require subclasses to provide an implementation so we remember to
+ /// add statistics for them.
+ virtual void trackStatistics() const = 0;
+
+ /// Return an IR position, see struct IRPosition.
+ virtual IRPosition &getIRPosition() = 0;
/// The actual update/transfer function which has to be implemented by the
/// derived classes.
@@ -630,15 +1394,6 @@ protected:
///
/// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
virtual ChangeStatus updateImpl(Attributor &A) = 0;
-
- /// The value this abstract attribute is associated with.
- Value *AssociatedVal;
-
- /// The value this abstract attribute is anchored at.
- Value &AnchoredVal;
-
- /// The information cache accessible to this abstract attribute.
- InformationCache &InfoCache;
};
/// Forward declarations of output streams for debug purposes.
@@ -646,8 +1401,10 @@ protected:
///{
raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA);
raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S);
-raw_ostream &operator<<(raw_ostream &OS, AbstractAttribute::ManifestPosition);
+raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind);
+raw_ostream &operator<<(raw_ostream &OS, const IRPosition &);
raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State);
+raw_ostream &operator<<(raw_ostream &OS, const IntegerState &S);
///}
struct AttributorPass : public PassInfoMixin<AttributorPass> {
@@ -661,129 +1418,531 @@ Pass *createAttributorLegacyPass();
/// ----------------------------------------------------------------------------
/// An abstract attribute for the returned values of a function.
-struct AAReturnedValues : public AbstractAttribute {
- /// See AbstractAttribute::AbstractAttribute(...).
- AAReturnedValues(Function &F, InformationCache &InfoCache)
- : AbstractAttribute(F, InfoCache) {}
+struct AAReturnedValues
+ : public IRAttribute<Attribute::Returned, AbstractAttribute> {
+ AAReturnedValues(const IRPosition &IRP) : IRAttribute(IRP) {}
+
+ /// Return an assumed unique return value if a single candidate is found. If
+ /// there cannot be one, return a nullptr. If it is not clear yet, return the
+ /// Optional::NoneType.
+ Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;
/// Check \p Pred on all returned values.
///
/// This method will evaluate \p Pred on returned values and return
/// true if (1) all returned values are known, and (2) \p Pred returned true
/// for all returned values.
- virtual bool
- checkForallReturnedValues(std::function<bool(Value &)> &Pred) const = 0;
-
- /// See AbstractAttribute::getAttrKind()
- Attribute::AttrKind getAttrKind() const override { return ID; }
-
- /// The identifier used by the Attributor for this class of attributes.
- static constexpr Attribute::AttrKind ID = Attribute::Returned;
+ ///
+ /// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts
+ /// method, this one will not filter dead return instructions.
+ virtual bool checkForAllReturnedValuesAndReturnInsts(
+ const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
+ &Pred) const = 0;
+
+ using iterator =
+ MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::iterator;
+ using const_iterator =
+ MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::const_iterator;
+ virtual llvm::iterator_range<iterator> returned_values() = 0;
+ virtual llvm::iterator_range<const_iterator> returned_values() const = 0;
+
+ virtual size_t getNumReturnValues() const = 0;
+ virtual const SmallSetVector<CallBase *, 4> &getUnresolvedCalls() const = 0;
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAReturnedValues &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
};
-struct AANoUnwind : public AbstractAttribute {
- /// An abstract interface for all nosync attributes.
- AANoUnwind(Value &V, InformationCache &InfoCache)
- : AbstractAttribute(V, InfoCache) {}
-
- /// See AbstractAttribute::getAttrKind()/
- Attribute::AttrKind getAttrKind() const override { return ID; }
-
- static constexpr Attribute::AttrKind ID = Attribute::NoUnwind;
+struct AANoUnwind
+ : public IRAttribute<Attribute::NoUnwind,
+ StateWrapper<BooleanState, AbstractAttribute>> {
+ AANoUnwind(const IRPosition &IRP) : IRAttribute(IRP) {}
/// Returns true if nounwind is assumed.
- virtual bool isAssumedNoUnwind() const = 0;
+ bool isAssumedNoUnwind() const { return getAssumed(); }
/// Returns true if nounwind is known.
- virtual bool isKnownNoUnwind() const = 0;
-};
+ bool isKnownNoUnwind() const { return getKnown(); }
-struct AANoSync : public AbstractAttribute {
- /// An abstract interface for all nosync attributes.
- AANoSync(Value &V, InformationCache &InfoCache)
- : AbstractAttribute(V, InfoCache) {}
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANoUnwind &createForPosition(const IRPosition &IRP, Attributor &A);
- /// See AbstractAttribute::getAttrKind().
- Attribute::AttrKind getAttrKind() const override { return ID; }
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
- static constexpr Attribute::AttrKind ID =
- Attribute::AttrKind(Attribute::NoSync);
+struct AANoSync
+ : public IRAttribute<Attribute::NoSync,
+ StateWrapper<BooleanState, AbstractAttribute>> {
+ AANoSync(const IRPosition &IRP) : IRAttribute(IRP) {}
/// Returns true if "nosync" is assumed.
- virtual bool isAssumedNoSync() const = 0;
+ bool isAssumedNoSync() const { return getAssumed(); }
/// Returns true if "nosync" is known.
- virtual bool isKnownNoSync() const = 0;
-};
+ bool isKnownNoSync() const { return getKnown(); }
-/// An abstract interface for all nonnull attributes.
-struct AANonNull : public AbstractAttribute {
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A);
- /// See AbstractAttribute::AbstractAttribute(...).
- AANonNull(Value &V, InformationCache &InfoCache)
- : AbstractAttribute(V, InfoCache) {}
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
- /// See AbstractAttribute::AbstractAttribute(...).
- AANonNull(Value *AssociatedVal, Value &AnchoredValue,
- InformationCache &InfoCache)
- : AbstractAttribute(AssociatedVal, AnchoredValue, InfoCache) {}
+/// An abstract interface for all nonnull attributes.
+struct AANonNull
+ : public IRAttribute<Attribute::NonNull,
+ StateWrapper<BooleanState, AbstractAttribute>> {
+ AANonNull(const IRPosition &IRP) : IRAttribute(IRP) {}
/// Return true if we assume that the underlying value is nonnull.
- virtual bool isAssumedNonNull() const = 0;
+ bool isAssumedNonNull() const { return getAssumed(); }
/// Return true if we know that underlying value is nonnull.
- virtual bool isKnownNonNull() const = 0;
+ bool isKnownNonNull() const { return getKnown(); }
- /// See AbastractState::getAttrKind().
- Attribute::AttrKind getAttrKind() const override { return ID; }
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANonNull &createForPosition(const IRPosition &IRP, Attributor &A);
- /// The identifier used by the Attributor for this class of attributes.
- static constexpr Attribute::AttrKind ID = Attribute::NonNull;
+ /// Unique ID (due to the unique address)
+ static const char ID;
};
/// An abstract attribute for norecurse.
-struct AANoRecurse : public AbstractAttribute {
+struct AANoRecurse
+ : public IRAttribute<Attribute::NoRecurse,
+ StateWrapper<BooleanState, AbstractAttribute>> {
+ AANoRecurse(const IRPosition &IRP) : IRAttribute(IRP) {}
- /// See AbstractAttribute::AbstractAttribute(...).
- AANoRecurse(Value &V, InformationCache &InfoCache)
- : AbstractAttribute(V, InfoCache) {}
-
- /// See AbstractAttribute::getAttrKind()
- virtual Attribute::AttrKind getAttrKind() const override {
- return Attribute::NoRecurse;
- }
+ /// Return true if "norecurse" is assumed.
+ bool isAssumedNoRecurse() const { return getAssumed(); }
/// Return true if "norecurse" is known.
- virtual bool isKnownNoRecurse() const = 0;
+ bool isKnownNoRecurse() const { return getKnown(); }
- /// Return true if "norecurse" is assumed.
- virtual bool isAssumedNoRecurse() const = 0;
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANoRecurse &createForPosition(const IRPosition &IRP, Attributor &A);
- /// The identifier used by the Attributor for this class of attributes.
- static constexpr Attribute::AttrKind ID = Attribute::NoRecurse;
+ /// Unique ID (due to the unique address)
+ static const char ID;
};
/// An abstract attribute for willreturn.
-struct AAWillReturn : public AbstractAttribute {
+struct AAWillReturn
+ : public IRAttribute<Attribute::WillReturn,
+ StateWrapper<BooleanState, AbstractAttribute>> {
+ AAWillReturn(const IRPosition &IRP) : IRAttribute(IRP) {}
+
+ /// Return true if "willreturn" is assumed.
+ bool isAssumedWillReturn() const { return getAssumed(); }
- /// See AbstractAttribute::AbstractAttribute(...).
- AAWillReturn(Value &V, InformationCache &InfoCache)
- : AbstractAttribute(V, InfoCache) {}
+ /// Return true if "willreturn" is known.
+ bool isKnownWillReturn() const { return getKnown(); }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAWillReturn &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
- /// See AbstractAttribute::getAttrKind()
- virtual Attribute::AttrKind getAttrKind() const override {
- return Attribute::WillReturn;
+/// An abstract interface for all noalias attributes.
+struct AANoAlias
+ : public IRAttribute<Attribute::NoAlias,
+ StateWrapper<BooleanState, AbstractAttribute>> {
+ AANoAlias(const IRPosition &IRP) : IRAttribute(IRP) {}
+
+ /// Return true if we assume that the underlying value is alias.
+ bool isAssumedNoAlias() const { return getAssumed(); }
+
+ /// Return true if we know that underlying value is noalias.
+ bool isKnownNoAlias() const { return getKnown(); }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANoAlias &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+/// An AbstractAttribute for nofree.
+struct AANoFree
+ : public IRAttribute<Attribute::NoFree,
+ StateWrapper<BooleanState, AbstractAttribute>> {
+ AANoFree(const IRPosition &IRP) : IRAttribute(IRP) {}
+
+ /// Return true if "nofree" is assumed.
+ bool isAssumedNoFree() const { return getAssumed(); }
+
+ /// Return true if "nofree" is known.
+ bool isKnownNoFree() const { return getKnown(); }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANoFree &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+/// An AbstractAttribute for noreturn.
+struct AANoReturn
+ : public IRAttribute<Attribute::NoReturn,
+ StateWrapper<BooleanState, AbstractAttribute>> {
+ AANoReturn(const IRPosition &IRP) : IRAttribute(IRP) {}
+
+ /// Return true if the underlying object is assumed to never return.
+ bool isAssumedNoReturn() const { return getAssumed(); }
+
+ /// Return true if the underlying object is known to never return.
+ bool isKnownNoReturn() const { return getKnown(); }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANoReturn &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+/// An abstract interface for liveness abstract attribute.
+struct AAIsDead : public StateWrapper<BooleanState, AbstractAttribute>,
+ public IRPosition {
+ AAIsDead(const IRPosition &IRP) : IRPosition(IRP) {}
+
+ /// Returns true if \p BB is assumed dead.
+ virtual bool isAssumedDead(const BasicBlock *BB) const = 0;
+
+ /// Returns true if \p BB is known dead.
+ virtual bool isKnownDead(const BasicBlock *BB) const = 0;
+
+ /// Returns true if \p I is assumed dead.
+ virtual bool isAssumedDead(const Instruction *I) const = 0;
+
+ /// Returns true if \p I is known dead.
+ virtual bool isKnownDead(const Instruction *I) const = 0;
+
+ /// This method is used to check if at least one instruction in a collection
+ /// of instructions is live.
+ template <typename T> bool isLiveInstSet(T begin, T end) const {
+ for (const auto &I : llvm::make_range(begin, end)) {
+ assert(I->getFunction() == getIRPosition().getAssociatedFunction() &&
+ "Instruction must be in the same anchor scope function.");
+
+ if (!isAssumedDead(I))
+ return true;
+ }
+
+ return false;
}
- /// Return true if "willreturn" is known.
- virtual bool isKnownWillReturn() const = 0;
+ /// Return an IR position, see struct IRPosition.
+ ///
+ ///{
+ IRPosition &getIRPosition() override { return *this; }
+ const IRPosition &getIRPosition() const override { return *this; }
+ ///}
- /// Return true if "willreturn" is assumed.
- virtual bool isAssumedWillReturn() const = 0;
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+/// State for dereferenceable attribute
+struct DerefState : AbstractState {
+
+ /// State representing for dereferenceable bytes.
+ IntegerState DerefBytesState;
+
+ /// State representing that whether the value is globaly dereferenceable.
+ BooleanState GlobalState;
+
+ /// See AbstractState::isValidState()
+ bool isValidState() const override { return DerefBytesState.isValidState(); }
+
+ /// See AbstractState::isAtFixpoint()
+ bool isAtFixpoint() const override {
+ return !isValidState() ||
+ (DerefBytesState.isAtFixpoint() && GlobalState.isAtFixpoint());
+ }
+
+ /// See AbstractState::indicateOptimisticFixpoint(...)
+ ChangeStatus indicateOptimisticFixpoint() override {
+ DerefBytesState.indicateOptimisticFixpoint();
+ GlobalState.indicateOptimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractState::indicatePessimisticFixpoint(...)
+ ChangeStatus indicatePessimisticFixpoint() override {
+ DerefBytesState.indicatePessimisticFixpoint();
+ GlobalState.indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ /// Update known dereferenceable bytes.
+ void takeKnownDerefBytesMaximum(uint64_t Bytes) {
+ DerefBytesState.takeKnownMaximum(Bytes);
+ }
+
+ /// Update assumed dereferenceable bytes.
+ void takeAssumedDerefBytesMinimum(uint64_t Bytes) {
+ DerefBytesState.takeAssumedMinimum(Bytes);
+ }
+
+ /// Equality for DerefState.
+ bool operator==(const DerefState &R) {
+ return this->DerefBytesState == R.DerefBytesState &&
+ this->GlobalState == R.GlobalState;
+ }
+
+ /// Inequality for IntegerState.
+ bool operator!=(const DerefState &R) { return !(*this == R); }
+
+ /// See IntegerState::operator^=
+ DerefState operator^=(const DerefState &R) {
+ DerefBytesState ^= R.DerefBytesState;
+ GlobalState ^= R.GlobalState;
+ return *this;
+ }
+
+ /// See IntegerState::operator+=
+ DerefState operator+=(const DerefState &R) {
+ DerefBytesState += R.DerefBytesState;
+ GlobalState += R.GlobalState;
+ return *this;
+ }
+
+ /// See IntegerState::operator&=
+ DerefState operator&=(const DerefState &R) {
+ DerefBytesState &= R.DerefBytesState;
+ GlobalState &= R.GlobalState;
+ return *this;
+ }
+
+ /// See IntegerState::operator|=
+ DerefState operator|=(const DerefState &R) {
+ DerefBytesState |= R.DerefBytesState;
+ GlobalState |= R.GlobalState;
+ return *this;
+ }
- /// The identifier used by the Attributor for this class of attributes.
- static constexpr Attribute::AttrKind ID = Attribute::WillReturn;
+protected:
+ const AANonNull *NonNullAA = nullptr;
+};
+
+/// An abstract interface for all dereferenceable attribute.
+struct AADereferenceable
+ : public IRAttribute<Attribute::Dereferenceable,
+ StateWrapper<DerefState, AbstractAttribute>> {
+ AADereferenceable(const IRPosition &IRP) : IRAttribute(IRP) {}
+
+ /// Return true if we assume that the underlying value is nonnull.
+ bool isAssumedNonNull() const {
+ return NonNullAA && NonNullAA->isAssumedNonNull();
+ }
+
+ /// Return true if we know that the underlying value is nonnull.
+ bool isKnownNonNull() const {
+ return NonNullAA && NonNullAA->isKnownNonNull();
+ }
+
+ /// Return true if we assume that underlying value is
+ /// dereferenceable(_or_null) globally.
+ bool isAssumedGlobal() const { return GlobalState.getAssumed(); }
+
+ /// Return true if we know that underlying value is
+ /// dereferenceable(_or_null) globally.
+ bool isKnownGlobal() const { return GlobalState.getKnown(); }
+
+ /// Return assumed dereferenceable bytes.
+ uint32_t getAssumedDereferenceableBytes() const {
+ return DerefBytesState.getAssumed();
+ }
+
+ /// Return known dereferenceable bytes.
+ uint32_t getKnownDereferenceableBytes() const {
+ return DerefBytesState.getKnown();
+ }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AADereferenceable &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+/// An abstract interface for all align attributes.
+struct AAAlign
+ : public IRAttribute<Attribute::Alignment,
+ StateWrapper<IntegerState, AbstractAttribute>> {
+ AAAlign(const IRPosition &IRP) : IRAttribute(IRP) {}
+
+ /// Return assumed alignment.
+ unsigned getAssumedAlign() const { return getAssumed(); }
+
+ /// Return known alignemnt.
+ unsigned getKnownAlign() const { return getKnown(); }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+/// An abstract interface for all nocapture attributes.
+struct AANoCapture
+ : public IRAttribute<Attribute::NoCapture,
+ StateWrapper<IntegerState, AbstractAttribute>> {
+ AANoCapture(const IRPosition &IRP) : IRAttribute(IRP) {}
+
+ /// State encoding bits. A set bit in the state means the property holds.
+ /// NO_CAPTURE is the best possible state, 0 the worst possible state.
+ enum {
+ NOT_CAPTURED_IN_MEM = 1 << 0,
+ NOT_CAPTURED_IN_INT = 1 << 1,
+ NOT_CAPTURED_IN_RET = 1 << 2,
+
+ /// If we do not capture the value in memory or through integers we can only
+ /// communicate it back as a derived pointer.
+ NO_CAPTURE_MAYBE_RETURNED = NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT,
+
+ /// If we do not capture the value in memory, through integers, or as a
+ /// derived pointer we know it is not captured.
+ NO_CAPTURE =
+ NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT | NOT_CAPTURED_IN_RET,
+ };
+
+ /// Return true if we know that the underlying value is not captured in its
+ /// respective scope.
+ bool isKnownNoCapture() const { return isKnown(NO_CAPTURE); }
+
+ /// Return true if we assume that the underlying value is not captured in its
+ /// respective scope.
+ bool isAssumedNoCapture() const { return isAssumed(NO_CAPTURE); }
+
+ /// Return true if we know that the underlying value is not captured in its
+ /// respective scope but we allow it to escape through a "return".
+ bool isKnownNoCaptureMaybeReturned() const {
+ return isKnown(NO_CAPTURE_MAYBE_RETURNED);
+ }
+
+ /// Return true if we assume that the underlying value is not captured in its
+ /// respective scope but we allow it to escape through a "return".
+ bool isAssumedNoCaptureMaybeReturned() const {
+ return isAssumed(NO_CAPTURE_MAYBE_RETURNED);
+ }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANoCapture &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
};
+
+/// An abstract interface for value simplify abstract attribute.
+struct AAValueSimplify : public StateWrapper<BooleanState, AbstractAttribute>,
+ public IRPosition {
+ AAValueSimplify(const IRPosition &IRP) : IRPosition(IRP) {}
+
+ /// Return an IR position, see struct IRPosition.
+ ///
+ ///{
+ IRPosition &getIRPosition() { return *this; }
+ const IRPosition &getIRPosition() const { return *this; }
+ ///}
+
+ /// Return an assumed simplified value if a single candidate is found. If
+ /// there cannot be one, return original value. If it is not clear yet, return
+ /// the Optional::NoneType.
+ virtual Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const = 0;
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAValueSimplify &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute>,
+ public IRPosition {
+ AAHeapToStack(const IRPosition &IRP) : IRPosition(IRP) {}
+
+ /// Returns true if HeapToStack conversion is assumed to be possible.
+ bool isAssumedHeapToStack() const { return getAssumed(); }
+
+ /// Returns true if HeapToStack conversion is known to be possible.
+ bool isKnownHeapToStack() const { return getKnown(); }
+
+ /// Return an IR position, see struct IRPosition.
+ ///
+ ///{
+ IRPosition &getIRPosition() { return *this; }
+ const IRPosition &getIRPosition() const { return *this; }
+ ///}
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+/// An abstract interface for all memory related attributes.
+struct AAMemoryBehavior
+ : public IRAttribute<Attribute::ReadNone,
+ StateWrapper<IntegerState, AbstractAttribute>> {
+ AAMemoryBehavior(const IRPosition &IRP) : IRAttribute(IRP) {}
+
+ /// State encoding bits. A set bit in the state means the property holds.
+ /// BEST_STATE is the best possible state, 0 the worst possible state.
+ enum {
+ NO_READS = 1 << 0,
+ NO_WRITES = 1 << 1,
+ NO_ACCESSES = NO_READS | NO_WRITES,
+
+ BEST_STATE = NO_ACCESSES,
+ };
+
+ /// Return true if we know that the underlying value is not read or accessed
+ /// in its respective scope.
+ bool isKnownReadNone() const { return isKnown(NO_ACCESSES); }
+
+ /// Return true if we assume that the underlying value is not read or accessed
+ /// in its respective scope.
+ bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); }
+
+ /// Return true if we know that the underlying value is not accessed
+ /// (=written) in its respective scope.
+ bool isKnownReadOnly() const { return isKnown(NO_WRITES); }
+
+ /// Return true if we assume that the underlying value is not accessed
+ /// (=written) in its respective scope.
+ bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); }
+
+ /// Return true if we know that the underlying value is not read in its
+ /// respective scope.
+ bool isKnownWriteOnly() const { return isKnown(NO_READS); }
+
+ /// Return true if we assume that the underlying value is not read in its
+ /// respective scope.
+ bool isAssumedWriteOnly() const { return isAssumed(NO_READS); }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAMemoryBehavior &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
diff --git a/include/llvm/Transforms/IPO/GlobalDCE.h b/include/llvm/Transforms/IPO/GlobalDCE.h
index c434484d1ae3..0a6851849e7e 100644
--- a/include/llvm/Transforms/IPO/GlobalDCE.h
+++ b/include/llvm/Transforms/IPO/GlobalDCE.h
@@ -43,11 +43,25 @@ private:
/// Comdat -> Globals in that Comdat section.
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+ /// !type metadata -> set of (vtable, offset) pairs
+ DenseMap<Metadata *, SmallSet<std::pair<GlobalVariable *, uint64_t>, 4>>
+ TypeIdMap;
+
+ // Global variables which are vtables, and which we have enough information
+ // about to safely do dead virtual function elimination.
+ SmallPtrSet<GlobalValue *, 32> VFESafeVTables;
+
void UpdateGVDependencies(GlobalValue &GV);
void MarkLive(GlobalValue &GV,
SmallVectorImpl<GlobalValue *> *Updates = nullptr);
bool RemoveUnusedGlobalValue(GlobalValue &GV);
+ // Dead virtual function elimination.
+ void AddVirtualFunctionDependencies(Module &M);
+ void ScanVTables(Module &M);
+ void ScanTypeCheckedLoadIntrinsics(Module &M);
+ void ScanVTableLoad(Function *Caller, Metadata *TypeId, uint64_t CallOffset);
+
void ComputeDependencies(Value *V, SmallPtrSetImpl<GlobalValue *> &U);
};
diff --git a/include/llvm/Transforms/IPO/HotColdSplitting.h b/include/llvm/Transforms/IPO/HotColdSplitting.h
index 73668844590d..8c3049fbaac4 100644
--- a/include/llvm/Transforms/IPO/HotColdSplitting.h
+++ b/include/llvm/Transforms/IPO/HotColdSplitting.h
@@ -17,6 +17,45 @@
namespace llvm {
class Module;
+class ProfileSummaryInfo;
+class BlockFrequencyInfo;
+class TargetTransformInfo;
+class OptimizationRemarkEmitter;
+class AssumptionCache;
+class DominatorTree;
+class CodeExtractorAnalysisCache;
+
+/// A sequence of basic blocks.
+///
+/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
+using BlockSequence = SmallVector<BasicBlock *, 0>;
+
+class HotColdSplitting {
+public:
+ HotColdSplitting(ProfileSummaryInfo *ProfSI,
+ function_ref<BlockFrequencyInfo *(Function &)> GBFI,
+ function_ref<TargetTransformInfo &(Function &)> GTTI,
+ std::function<OptimizationRemarkEmitter &(Function &)> *GORE,
+ function_ref<AssumptionCache *(Function &)> LAC)
+ : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {}
+ bool run(Module &M);
+
+private:
+ bool isFunctionCold(const Function &F) const;
+ bool shouldOutlineFrom(const Function &F) const;
+ bool outlineColdRegions(Function &F, bool HasProfileSummary);
+ Function *extractColdRegion(const BlockSequence &Region,
+ const CodeExtractorAnalysisCache &CEAC,
+ DominatorTree &DT, BlockFrequencyInfo *BFI,
+ TargetTransformInfo &TTI,
+ OptimizationRemarkEmitter &ORE,
+ AssumptionCache *AC, unsigned Count);
+ ProfileSummaryInfo *PSI;
+ function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
+ function_ref<TargetTransformInfo &(Function &)> GetTTI;
+ std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
+ function_ref<AssumptionCache *(Function &)> LookupAC;
+};
/// Pass to outline cold regions.
class HotColdSplittingPass : public PassInfoMixin<HotColdSplittingPass> {
diff --git a/include/llvm/Transforms/IPO/LowerTypeTests.h b/include/llvm/Transforms/IPO/LowerTypeTests.h
index 39b23f5957db..3c2bb65b9552 100644
--- a/include/llvm/Transforms/IPO/LowerTypeTests.h
+++ b/include/llvm/Transforms/IPO/LowerTypeTests.h
@@ -193,6 +193,8 @@ struct ByteArrayBuilder {
uint64_t &AllocByteOffset, uint8_t &AllocMask);
};
+bool isJumpTableCanonical(Function *F);
+
} // end namespace lowertypetests
class LowerTypeTestsPass : public PassInfoMixin<LowerTypeTestsPass> {
diff --git a/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index 509fcc867060..22435e4ed1e5 100644
--- a/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -16,8 +16,10 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/IPO/FunctionImport.h"
#include <cassert>
#include <cstdint>
+#include <set>
#include <utility>
#include <vector>
@@ -28,6 +30,7 @@ template <typename T> class MutableArrayRef;
class Function;
class GlobalVariable;
class ModuleSummaryIndex;
+struct ValueInfo;
namespace wholeprogramdevirt {
@@ -228,6 +231,29 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
};
+struct VTableSlotSummary {
+ StringRef TypeID;
+ uint64_t ByteOffset;
+};
+
+/// Perform index-based whole program devirtualization on the \p Summary
+/// index. Any devirtualized targets used by a type test in another module
+/// are added to the \p ExportedGUIDs set. For any local devirtualized targets
+/// only used within the defining module, the information necessary for
+/// locating the corresponding WPD resolution is recorded for the ValueInfo
+/// in case it is exported by cross module importing (in which case the
+/// devirtualized target name will need adjustment).
+void runWholeProgramDevirtOnIndex(
+ ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap);
+
+/// Call after cross-module importing to update the recorded single impl
+/// devirt target names for any locals that were exported.
+void updateIndexWPDForExports(
+ ModuleSummaryIndex &Summary,
+ function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 8b70d2926ae9..fcad1e11895f 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -181,10 +181,6 @@ struct SanitizerCoverageOptions {
SanitizerCoverageOptions() = default;
};
-// Insert SanitizerCoverage instrumentation.
-ModulePass *createSanitizerCoverageModulePass(
- const SanitizerCoverageOptions &Options = SanitizerCoverageOptions());
-
/// Calculate what to divide by to scale counts.
///
/// Given the maximum count, calculate a divisor that will scale all the
diff --git a/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 8f76d4a1ce55..2e0fae527b15 100644
--- a/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -39,13 +39,14 @@ public:
: Options(Options), IsCS(IsCS) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
- bool run(Module &M, const TargetLibraryInfo &TLI);
+ bool run(Module &M,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
private:
InstrProfOptions Options;
Module *M;
Triple TT;
- const TargetLibraryInfo *TLI;
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
struct PerFunctionProfileData {
uint32_t NumValueSites[IPVK_Last + 1];
GlobalVariable *RegionCounters = nullptr;
diff --git a/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
index 0739d9e58a61..01a86ee3f1fd 100644
--- a/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
+++ b/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
@@ -19,12 +19,11 @@
namespace llvm {
struct MemorySanitizerOptions {
- MemorySanitizerOptions() = default;
- MemorySanitizerOptions(int TrackOrigins, bool Recover, bool Kernel)
- : TrackOrigins(TrackOrigins), Recover(Recover), Kernel(Kernel) {}
- int TrackOrigins = 0;
- bool Recover = false;
- bool Kernel = false;
+ MemorySanitizerOptions() : MemorySanitizerOptions(0, false, false){};
+ MemorySanitizerOptions(int TrackOrigins, bool Recover, bool Kernel);
+ bool Kernel;
+ int TrackOrigins;
+ bool Recover;
};
// Insert MemorySanitizer instrumentation (detection of uninitialized reads)
@@ -41,6 +40,7 @@ struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> {
MemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
MemorySanitizerOptions Options;
diff --git a/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h
new file mode 100644
index 000000000000..85a43ff86f2e
--- /dev/null
+++ b/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h
@@ -0,0 +1,47 @@
+//===--------- Definition of the SanitizerCoverage class --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SanitizerCoverage class which is a port of the legacy
+// SanitizerCoverage pass to use the new PassManager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Instrumentation.h"
+
+namespace llvm {
+
+/// This is the ModuleSanitizerCoverage pass used in the new pass manager. The
+/// pass instruments functions for coverage, adds initialization calls to the
+/// module for trace PC guards and 8bit counters if they are requested, and
+/// appends globals to llvm.compiler.used.
+class ModuleSanitizerCoveragePass
+ : public PassInfoMixin<ModuleSanitizerCoveragePass> {
+public:
+ explicit ModuleSanitizerCoveragePass(
+ SanitizerCoverageOptions Options = SanitizerCoverageOptions())
+ : Options(Options) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ SanitizerCoverageOptions Options;
+};
+
+// Insert SanitizerCoverage instrumentation.
+ModulePass *createModuleSanitizerCoverageLegacyPassPass(
+ const SanitizerCoverageOptions &Options = SanitizerCoverageOptions());
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
index b4e7d9924ff6..ce0e46745abb 100644
--- a/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
+++ b/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
@@ -27,6 +27,8 @@ FunctionPass *createThreadSanitizerLegacyPassPass();
/// yet, the pass inserts the declarations. Otherwise the existing globals are
struct ThreadSanitizerPass : public PassInfoMixin<ThreadSanitizerPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
+
} // namespace llvm
#endif /* LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H */
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index f9360b5ee2c8..f06230b6f366 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -308,7 +308,7 @@ FunctionPass *createGVNSinkPass();
// MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads
// are hoisted into the header, while stores sink into the footer.
//
-FunctionPass *createMergedLoadStoreMotionPass();
+FunctionPass *createMergedLoadStoreMotionPass(bool SplitFooterBB = false);
//===----------------------------------------------------------------------===//
//
@@ -397,6 +397,13 @@ FunctionPass *createLowerExpectIntrinsicPass();
//===----------------------------------------------------------------------===//
//
+// LowerConstantIntrinsicss - Expand any remaining llvm.objectsize and
+// llvm.is.constant intrinsic calls, even for the unknown cases.
+//
+FunctionPass *createLowerConstantIntrinsicsPass();
+
+//===----------------------------------------------------------------------===//
+//
// PartiallyInlineLibCalls - Tries to inline the fast path of library
// calls such as sqrt.
//
diff --git a/include/llvm/Transforms/Scalar/CallSiteSplitting.h b/include/llvm/Transforms/Scalar/CallSiteSplitting.h
index b6055639e8a8..74cbf84b64b2 100644
--- a/include/llvm/Transforms/Scalar/CallSiteSplitting.h
+++ b/include/llvm/Transforms/Scalar/CallSiteSplitting.h
@@ -9,13 +9,8 @@
#ifndef LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H
#define LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Support/Compiler.h"
-#include <vector>
namespace llvm {
diff --git a/include/llvm/Transforms/Scalar/ConstantHoisting.h b/include/llvm/Transforms/Scalar/ConstantHoisting.h
index 6b0fc9c1dd07..39039b093241 100644
--- a/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -37,7 +37,9 @@
#define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
@@ -154,21 +156,21 @@ private:
/// Keeps track of constant candidates found in the function.
using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
- using GVCandVecMapType = DenseMap<GlobalVariable *, ConstCandVecType>;
+ using GVCandVecMapType = MapVector<GlobalVariable *, ConstCandVecType>;
ConstCandVecType ConstIntCandVec;
GVCandVecMapType ConstGEPCandMap;
/// These are the final constants we decided to hoist.
using ConstInfoVecType = SmallVector<consthoist::ConstantInfo, 8>;
- using GVInfoVecMapType = DenseMap<GlobalVariable *, ConstInfoVecType>;
+ using GVInfoVecMapType = MapVector<GlobalVariable *, ConstInfoVecType>;
ConstInfoVecType ConstIntInfoVec;
GVInfoVecMapType ConstGEPInfoMap;
/// Keep track of cast instructions we already cloned.
- SmallDenseMap<Instruction *, Instruction *> ClonedCastMap;
+ MapVector<Instruction *, Instruction *> ClonedCastMap;
Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const;
- SmallPtrSet<Instruction *, 8>
+ SetVector<Instruction *>
findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const;
void collectConstantCandidates(ConstCandMapType &ConstCandMap,
Instruction *Inst, unsigned Idx,
diff --git a/include/llvm/Transforms/Scalar/Float2Int.h b/include/llvm/Transforms/Scalar/Float2Int.h
index 06aeb8322527..f04b98a19d82 100644
--- a/include/llvm/Transforms/Scalar/Float2Int.h
+++ b/include/llvm/Transforms/Scalar/Float2Int.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
@@ -26,10 +27,11 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
// Glue for old PM.
- bool runImpl(Function &F);
+ bool runImpl(Function &F, const DominatorTree &DT);
private:
- void findRoots(Function &F, SmallPtrSet<Instruction *, 8> &Roots);
+ void findRoots(Function &F, const DominatorTree &DT,
+ SmallPtrSet<Instruction *, 8> &Roots);
void seen(Instruction *I, ConstantRange R);
ConstantRange badRange();
ConstantRange unknownRange();
diff --git a/include/llvm/Transforms/Scalar/GVN.h b/include/llvm/Transforms/Scalar/GVN.h
index 9fe00a9e7f2d..8a64768af6b5 100644
--- a/include/llvm/Transforms/Scalar/GVN.h
+++ b/include/llvm/Transforms/Scalar/GVN.h
@@ -120,6 +120,8 @@ public:
uint32_t lookupOrAddCall(CallInst *C);
uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock,
uint32_t Num, GVN &Gvn);
+ bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred,
+ const BasicBlock *PhiBlock, GVN &Gvn);
std::pair<uint32_t, bool> assignExpNewValueNum(Expression &exp);
bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn);
@@ -159,6 +161,7 @@ private:
SetVector<BasicBlock *> DeadBlocks;
OptimizationRemarkEmitter *ORE;
ImplicitControlFlowTracking *ICF;
+ LoopInfo *LI;
ValueTable VN;
@@ -175,7 +178,7 @@ private:
// Block-local map of equivalent values to their leader, does not
// propagate to any successors. Entries added mid-block are applied
// to the remaining instructions in the block.
- SmallMapVector<Value *, Constant *, 4> ReplaceWithConstMap;
+ SmallMapVector<Value *, Value *, 4> ReplaceOperandsWithMap;
SmallVector<Instruction *, 8> InstrsToErase;
// Map the block to reversed postorder traversal number. It is used to
@@ -280,7 +283,7 @@ private:
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
- bool replaceOperandsWithConsts(Instruction *I) const;
+ bool replaceOperandsForInBlockEquality(Instruction *I) const;
bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
bool DominatesByEdge);
bool processFoldableCondBr(BranchInst *BI);
diff --git a/include/llvm/Transforms/Scalar/GVNExpression.h b/include/llvm/Transforms/Scalar/GVNExpression.h
index 3dc4515f85a1..1600d1af3242 100644
--- a/include/llvm/Transforms/Scalar/GVNExpression.h
+++ b/include/llvm/Transforms/Scalar/GVNExpression.h
@@ -323,7 +323,7 @@ public:
class LoadExpression final : public MemoryExpression {
private:
LoadInst *Load;
- unsigned Alignment;
+ MaybeAlign Alignment;
public:
LoadExpression(unsigned NumOperands, LoadInst *L,
@@ -333,7 +333,8 @@ public:
LoadExpression(enum ExpressionType EType, unsigned NumOperands, LoadInst *L,
const MemoryAccess *MemoryLeader)
: MemoryExpression(NumOperands, EType, MemoryLeader), Load(L) {
- Alignment = L ? L->getAlignment() : 0;
+ if (L)
+ Alignment = MaybeAlign(L->getAlignment());
}
LoadExpression() = delete;
@@ -348,8 +349,8 @@ public:
LoadInst *getLoadInst() const { return Load; }
void setLoadInst(LoadInst *L) { Load = L; }
- unsigned getAlignment() const { return Alignment; }
- void setAlignment(unsigned Align) { Alignment = Align; }
+ MaybeAlign getAlignment() const { return Alignment; }
+ void setAlignment(MaybeAlign Align) { Alignment = Align; }
bool equals(const Expression &Other) const override;
bool exactlyEquals(const Expression &Other) const override {
diff --git a/include/llvm/Transforms/Scalar/LoopPassManager.h b/include/llvm/Transforms/Scalar/LoopPassManager.h
index 61ec58585fd0..aed764855b2e 100644
--- a/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -263,8 +263,10 @@ template <typename LoopPassT>
class FunctionToLoopPassAdaptor
: public PassInfoMixin<FunctionToLoopPassAdaptor<LoopPassT>> {
public:
- explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false)
- : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging) {
+ explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false,
+ bool DebugLogging = false)
+ : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging),
+ UseMemorySSA(UseMemorySSA) {
LoopCanonicalizationFPM.addPass(LoopSimplifyPass());
LoopCanonicalizationFPM.addPass(LCSSAPass());
}
@@ -293,7 +295,7 @@ public:
return PA;
// Get the analysis results needed by loop passes.
- MemorySSA *MSSA = EnableMSSALoopDependency
+ MemorySSA *MSSA = UseMemorySSA
? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA())
: nullptr;
LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
@@ -310,8 +312,10 @@ public:
// LoopStandardAnalysisResults object. The loop analyses cached in this
// manager have access to those analysis results and so it must invalidate
// itself when they go away.
- LoopAnalysisManager &LAM =
- AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
+ auto &LAMFP = AM.getResult<LoopAnalysisManagerFunctionProxy>(F);
+ if (UseMemorySSA)
+ LAMFP.markMSSAUsed();
+ LoopAnalysisManager &LAM = LAMFP.getManager();
// A postorder worklist of loops to process.
SmallPriorityWorklist<Loop *, 4> Worklist;
@@ -382,7 +386,7 @@ public:
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
PA.preserve<ScalarEvolutionAnalysis>();
- if (EnableMSSALoopDependency)
+ if (UseMemorySSA)
PA.preserve<MemorySSAAnalysis>();
// FIXME: What we really want to do here is preserve an AA category, but
// that concept doesn't exist yet.
@@ -397,14 +401,18 @@ private:
LoopPassT Pass;
FunctionPassManager LoopCanonicalizationFPM;
+
+ bool UseMemorySSA = false;
};
/// A function to deduce a loop pass type and wrap it in the templated
/// adaptor.
template <typename LoopPassT>
FunctionToLoopPassAdaptor<LoopPassT>
-createFunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false) {
- return FunctionToLoopPassAdaptor<LoopPassT>(std::move(Pass), DebugLogging);
+createFunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false,
+ bool DebugLogging = false) {
+ return FunctionToLoopPassAdaptor<LoopPassT>(std::move(Pass), UseMemorySSA,
+ DebugLogging);
}
/// Pass for printing a loop's contents as textual IR.
diff --git a/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index a84d889a83ad..afeb1f1da029 100644
--- a/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -62,6 +62,8 @@ struct LoopUnrollOptions {
Optional<bool> AllowPeeling;
Optional<bool> AllowRuntime;
Optional<bool> AllowUpperBound;
+ Optional<bool> AllowProfileBasedPeeling;
+ Optional<unsigned> FullUnrollMaxCount;
int OptLevel;
/// If false, use a cost model to determine whether unrolling of a loop is
@@ -110,6 +112,18 @@ struct LoopUnrollOptions {
OptLevel = O;
return *this;
}
+
+ // Enables or disables loop peeling basing on profile.
+ LoopUnrollOptions &setProfileBasedPeeling(int O) {
+ AllowProfileBasedPeeling = O;
+ return *this;
+ }
+
+ // Sets the max full unroll count.
+ LoopUnrollOptions &setFullUnrollMaxCount(unsigned O) {
+ FullUnrollMaxCount = O;
+ return *this;
+ }
};
/// Loop unroll pass that will support both full and partial unrolling.
diff --git a/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h b/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
new file mode 100644
index 000000000000..a5ad4a2192a0
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
@@ -0,0 +1,41 @@
+//===- LowerConstantIntrinsics.h - Lower constant int. pass -*- C++ -*-========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The header file for the LowerConstantIntrinsics pass as used by the new pass
+/// manager.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERCONSTANTINTRINSICS_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERCONSTANTINTRINSICS_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct LowerConstantIntrinsicsPass :
+ PassInfoMixin<LowerConstantIntrinsicsPass> {
+public:
+ explicit LowerConstantIntrinsicsPass() {}
+
+ /// Run the pass over the function.
+ ///
+ /// This will lower all remaining 'objectsize' and 'is.constant'`
+ /// intrinsic calls in this function, even when the argument has no known
+ /// size or is not a constant respectively. The resulting constant is
+ /// propagated and conditional branches are resolved where possible.
+ /// This complements the Instruction Simplification and
+ /// Instruction Combination passes of the optimized pass chain.
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+}
+
+#endif
diff --git a/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
index 9071a56532f8..c5f6d6e0e8bd 100644
--- a/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
+++ b/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
@@ -27,12 +27,28 @@
#include "llvm/IR/PassManager.h"
namespace llvm {
+struct MergedLoadStoreMotionOptions {
+ bool SplitFooterBB;
+ MergedLoadStoreMotionOptions(bool SplitFooterBB = false)
+ : SplitFooterBB(SplitFooterBB) {}
+
+ MergedLoadStoreMotionOptions &splitFooterBB(bool SFBB) {
+ SplitFooterBB = SFBB;
+ return *this;
+ }
+};
+
class MergedLoadStoreMotionPass
: public PassInfoMixin<MergedLoadStoreMotionPass> {
+ MergedLoadStoreMotionOptions Options;
+
public:
+ MergedLoadStoreMotionPass()
+ : MergedLoadStoreMotionPass(MergedLoadStoreMotionOptions()) {}
+ MergedLoadStoreMotionPass(const MergedLoadStoreMotionOptions &PassOptions)
+ : Options(PassOptions) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-
}
#endif // LLVM_TRANSFORMS_SCALAR_MERGEDLOADSTOREMOTION_H
diff --git a/include/llvm/Transforms/Scalar/Reassociate.h b/include/llvm/Transforms/Scalar/Reassociate.h
index 2db8d8ce309c..d5b175eff0e6 100644
--- a/include/llvm/Transforms/Scalar/Reassociate.h
+++ b/include/llvm/Transforms/Scalar/Reassociate.h
@@ -122,7 +122,9 @@ private:
void EraseInst(Instruction *I);
void RecursivelyEraseDeadInsts(Instruction *I, OrderedSet &Insts);
void OptimizeInst(Instruction *I);
- Instruction *canonicalizeNegConstExpr(Instruction *I);
+ Instruction *canonicalizeNegFPConstantsForOp(Instruction *I, Instruction *Op,
+ Value *OtherOp);
+ Instruction *canonicalizeNegFPConstants(Instruction *I);
void BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT);
};
diff --git a/include/llvm/Transforms/Scalar/SCCP.h b/include/llvm/Transforms/Scalar/SCCP.h
index 0ffd983eb3e0..45e674a20a16 100644
--- a/include/llvm/Transforms/Scalar/SCCP.h
+++ b/include/llvm/Transforms/Scalar/SCCP.h
@@ -45,7 +45,8 @@ struct AnalysisResultsForFn {
PostDominatorTree *PDT;
};
-bool runIPSCCP(Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI,
+bool runIPSCCP(Module &M, const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI,
function_ref<AnalysisResultsForFn(Function &)> getAnalysis);
} // end namespace llvm
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 4d861ffe9a31..698e57fd0394 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -83,10 +83,16 @@ bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = nullptr);
/// Attempts to merge a block into its predecessor, if possible. The return
/// value indicates success or failure.
+/// By default do not merge blocks if BB's predecessor has multiple successors.
+/// If PredecessorWithTwoSuccessors = true, the blocks can only be merged
+/// if BB's Pred has a branch to BB and to AnotherBB, and BB has a single
+/// successor Sing. In this case the branch will be updated with Sing instead of
+/// BB, and BB will still be merged into its predecessor and removed.
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU = nullptr,
LoopInfo *LI = nullptr,
MemorySSAUpdater *MSSAU = nullptr,
- MemoryDependenceResults *MemDep = nullptr);
+ MemoryDependenceResults *MemDep = nullptr,
+ bool PredecessorWithTwoSuccessors = false);
/// Replace all uses of an instruction (specified by BI) with a value, then
/// remove and delete the original instruction.
@@ -222,7 +228,8 @@ BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To,
/// info is updated.
BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt,
DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
- MemorySSAUpdater *MSSAU = nullptr);
+ MemorySSAUpdater *MSSAU = nullptr,
+ const Twine &BBName = "");
/// This method introduces at least one new basic block into the function and
/// moves some of the predecessors of BB to be predecessors of the new block.
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index 8421c31a36da..3d15b2a7bf2a 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -30,17 +30,16 @@ namespace llvm {
bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI);
bool inferLibFuncAttributes(Module *M, StringRef Name, const TargetLibraryInfo &TLI);
- /// Check whether the overloaded unary floating point function
+ /// Check whether the overloaded floating point function
/// corresponding to \a Ty is available.
- bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn);
+ bool hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn);
- /// Get the name of the overloaded unary floating point function
+ /// Get the name of the overloaded floating point function
/// corresponding to \a Ty.
- StringRef getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn);
+ StringRef getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn);
/// Return V if it is an i8*, otherwise cast it to i8*.
Value *castToCStr(Value *V, IRBuilder<> &B);
@@ -51,6 +50,11 @@ namespace llvm {
Value *emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI);
+ /// Emit a call to the strdup function to the builder, for the specified
+ /// pointer. Ptr is required to be some pointer type, and the return value has
+ /// 'i8*' type.
+ Value *emitStrDup(Value *Ptr, IRBuilder<> &B, const TargetLibraryInfo *TLI);
+
/// Emit a call to the strnlen function to the builder, for the specified
/// pointer. Ptr is required to be some pointer type, MaxLen must be of size_t
/// type, and the return value has 'intptr_t' type.
@@ -164,6 +168,13 @@ namespace llvm {
Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
IRBuilder<> &B, const AttributeList &Attrs);
+ /// Emit a call to the binary function DoubleFn, FloatFn or LongDoubleFn,
+ /// depending of the type of Op1.
+ Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2,
+ const TargetLibraryInfo *TLI, LibFunc DoubleFn,
+ LibFunc FloatFn, LibFunc LongDoubleFn,
+ IRBuilder<> &B, const AttributeList &Attrs);
+
/// Emit a call to the putchar function. This assumes that Char is an integer.
Value *emitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI);
diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h
index 471055921fa8..bd98c902d1ab 100644
--- a/include/llvm/Transforms/Utils/BypassSlowDivision.h
+++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/IR/ValueHandle.h"
#include <cstdint>
namespace llvm {
@@ -28,8 +29,10 @@ class Value;
struct DivRemMapKey {
bool SignedOp;
- Value *Dividend;
- Value *Divisor;
+ AssertingVH<Value> Dividend;
+ AssertingVH<Value> Divisor;
+
+ DivRemMapKey() = default;
DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor)
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
@@ -50,8 +53,10 @@ template <> struct DenseMapInfo<DivRemMapKey> {
}
static unsigned getHashValue(const DivRemMapKey &Val) {
- return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
- reinterpret_cast<uintptr_t>(Val.Divisor)) ^
+ return (unsigned)(reinterpret_cast<uintptr_t>(
+ static_cast<Value *>(Val.Dividend)) ^
+ reinterpret_cast<uintptr_t>(
+ static_cast<Value *>(Val.Divisor))) ^
(unsigned)Val.SignedOp;
}
};
diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h
index 9d79ee1633f6..8a1ab796734e 100644
--- a/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -22,6 +22,7 @@
namespace llvm {
+class AllocaInst;
class BasicBlock;
class BlockFrequency;
class BlockFrequencyInfo;
@@ -36,6 +37,38 @@ class Module;
class Type;
class Value;
+/// A cache for the CodeExtractor analysis. The operation \ref
+/// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this
+/// object. This object should conservatively be considered invalid if any
+/// other mutating operations on the IR occur.
+///
+/// Constructing this object is O(n) in the size of the function.
+class CodeExtractorAnalysisCache {
+ /// The allocas in the function.
+ SmallVector<AllocaInst *, 16> Allocas;
+
+ /// Base memory addresses of load/store instructions, grouped by block.
+ DenseMap<BasicBlock *, DenseSet<Value *>> BaseMemAddrs;
+
+ /// Blocks which contain instructions which may have unknown side-effects
+ /// on memory.
+ DenseSet<BasicBlock *> SideEffectingBlocks;
+
+ void findSideEffectInfoForBlock(BasicBlock &BB);
+
+public:
+ CodeExtractorAnalysisCache(Function &F);
+
+ /// Get the allocas in the function at the time the analysis was created.
+ /// Note that some of these allocas may no longer be present in the function,
+ /// due to \ref CodeExtractor::extractCodeRegion.
+ ArrayRef<AllocaInst *> getAllocas() const { return Allocas; }
+
+ /// Check whether \p BB contains an instruction thought to load from, store
+ /// to, or otherwise clobber the alloca \p Addr.
+ bool doesBlockContainClobberOfAddr(BasicBlock &BB, AllocaInst *Addr) const;
+};
+
/// Utility class for extracting code into a new function.
///
/// This utility provides a simple interface for extracting some sequence of
@@ -104,13 +137,21 @@ class Value;
///
/// Returns zero when called on a CodeExtractor instance where isEligible
/// returns false.
- Function *extractCodeRegion();
+ Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC);
+
+ /// Verify that assumption cache isn't stale after a region is extracted.
+ /// Returns false when verifier finds errors. AssumptionCache is passed as
+ /// parameter to make this function stateless.
+ static bool verifyAssumptionCache(const Function& F, AssumptionCache *AC);
/// Test whether this code extractor is eligible.
///
/// Based on the blocks used when constructing the code extractor,
/// determine whether it is eligible for extraction.
- bool isEligible() const { return !Blocks.empty(); }
+ ///
+ /// Checks that varargs handling (with vastart and vaend) is only done in
+ /// the outlined blocks.
+ bool isEligible() const;
/// Compute the set of input values and output values for the code.
///
@@ -127,7 +168,9 @@ class Value;
/// region.
///
/// Returns true if it is safe to do the code motion.
- bool isLegalToShrinkwrapLifetimeMarkers(Instruction *AllocaAddr) const;
+ bool
+ isLegalToShrinkwrapLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
+ Instruction *AllocaAddr) const;
/// Find the set of allocas whose life ranges are contained within the
/// outlined region.
@@ -137,7 +180,8 @@ class Value;
/// are used by the lifetime markers are also candidates for shrink-
/// wrapping. The instructions that need to be sunk are collected in
/// 'Allocas'.
- void findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
+ void findAllocas(const CodeExtractorAnalysisCache &CEAC,
+ ValueSet &SinkCands, ValueSet &HoistCands,
BasicBlock *&ExitBlock) const;
/// Find or create a block within the outline region for placing hoisted
@@ -158,8 +202,9 @@ class Value;
Instruction *LifeEnd = nullptr;
};
- LifetimeMarkerInfo getLifetimeMarkers(Instruction *Addr,
- BasicBlock *ExitBlock) const;
+ LifetimeMarkerInfo
+ getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
+ Instruction *Addr, BasicBlock *ExitBlock) const;
void severSplitPHINodesOfEntry(BasicBlock *&Header);
void severSplitPHINodesOfExits(const SmallPtrSetImpl<BasicBlock *> &Exits);
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index ff516f230979..9fcb2f64d79b 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -271,6 +271,15 @@ inline unsigned getKnownAlignment(Value *V, const DataLayout &DL,
return getOrEnforceKnownAlignment(V, 0, DL, CxtI, AC, DT);
}
+/// Create a call that matches the invoke \p II in terms of arguments,
+/// attributes, debug information, etc. The call is not placed in a block and it
+/// will not have a name. The invoke instruction is not removed, nor are the
+/// uses replaced by the new call.
+CallInst *createCallMatchingInvoke(InvokeInst *II);
+
+/// This function converts the specified invoek into a normall call.
+void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr);
+
///===---------------------------------------------------------------------===//
/// Dbg Intrinsic utilities
///
@@ -403,8 +412,7 @@ void removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
/// Remove all blocks that can not be reached from the function's entry.
///
/// Returns true if any basic block was removed.
-bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr,
- DomTreeUpdater *DTU = nullptr,
+bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr,
MemorySSAUpdater *MSSAU = nullptr);
/// Combine the metadata of two instructions so that K can replace J. Some
@@ -424,6 +432,10 @@ void combineMetadata(Instruction *K, const Instruction *J,
void combineMetadataForCSE(Instruction *K, const Instruction *J,
bool DoesKMove);
+/// Copy the metadata from the source instruction to the destination (the
+/// replacement for the source instruction).
+void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source);
+
/// Patch the replacement so that it is not more restrictive than the value
/// being replaced. It assumes that the replacement does not get moved from
/// its original position.
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h
index 68bdded5cf93..d32f08717e9b 100644
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -215,6 +215,9 @@ makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs,
/// Look for the loop attribute that disables all transformation heuristic.
bool hasDisableAllTransformsHint(const Loop *L);
+/// Look for the loop attribute that disables the LICM transformation heuristics.
+bool hasDisableLICMTransformsHint(const Loop *L);
+
/// The mode sets how eager a transformation should be applied.
enum TransformationMode {
/// The pass can use heuristics to determine whether a transformation should
@@ -252,6 +255,8 @@ TransformationMode hasLICMVersioningTransformation(Loop *L);
/// @}
/// Set input string into loop metadata by keeping other values intact.
+/// If the string is already in loop metadata update value if it is
+/// different.
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
unsigned V = 0);
diff --git a/include/llvm/Transforms/Utils/MisExpect.h b/include/llvm/Transforms/Utils/MisExpect.h
new file mode 100644
index 000000000000..1dbe8cb95936
--- /dev/null
+++ b/include/llvm/Transforms/Utils/MisExpect.h
@@ -0,0 +1,43 @@
+//===--- MisExpect.h - Check the use of llvm.expect with PGO data ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit warnings for potentially incorrect usage of the
+// llvm.expect intrinsic. This utility extracts the threshold values from
+// metadata associated with the instrumented Branch or Switch instruction. The
+// threshold values are then used to determine if a warning should be emmited.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace llvm {
+namespace misexpect {
+
+/// verifyMisExpect - compares PGO counters to the thresholds used for
+/// llvm.expect and warns if the PGO counters are outside of the expected
+/// range.
+/// \param I The Instruction being checked
+/// \param Weights A vector of profile weights for each target block
+/// \param Ctx The current LLVM context
+void verifyMisExpect(llvm::Instruction *I,
+ const llvm::SmallVector<uint32_t, 4> &Weights,
+ llvm::LLVMContext &Ctx);
+
+/// checkClangInstrumentation - verify if llvm.expect matches PGO profile
+/// This function checks the frontend instrumentation in the backend when
+/// lowering llvm.expect intrinsics. It checks for existing metadata, and
+/// then validates the use of llvm.expect against the assigned branch weights.
+//
+/// \param I the Instruction being checked
+void checkFrontendInstrumentation(Instruction &I);
+
+} // namespace misexpect
+} // namespace llvm
diff --git a/include/llvm/Transforms/Utils/PredicateInfo.h b/include/llvm/Transforms/Utils/PredicateInfo.h
index da4a5dcc28c0..7c7a8eb04a2c 100644
--- a/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -229,10 +229,10 @@ protected:
private:
void buildPredicateInfo();
- void processAssume(IntrinsicInst *, BasicBlock *, SmallPtrSetImpl<Value *> &);
- void processBranch(BranchInst *, BasicBlock *, SmallPtrSetImpl<Value *> &);
- void processSwitch(SwitchInst *, BasicBlock *, SmallPtrSetImpl<Value *> &);
- void renameUses(SmallPtrSetImpl<Value *> &);
+ void processAssume(IntrinsicInst *, BasicBlock *, SmallVectorImpl<Value *> &);
+ void processBranch(BranchInst *, BasicBlock *, SmallVectorImpl<Value *> &);
+ void processSwitch(SwitchInst *, BasicBlock *, SmallVectorImpl<Value *> &);
+ void renameUses(SmallVectorImpl<Value *> &);
using ValueDFS = PredicateInfoClasses::ValueDFS;
typedef SmallVectorImpl<ValueDFS> ValueDFSStack;
void convertUsesToDFSOrdered(Value *, SmallVectorImpl<ValueDFS> &);
@@ -240,7 +240,7 @@ private:
bool stackIsInScope(const ValueDFSStack &, const ValueDFS &) const;
void popStackUntilDFSScope(ValueDFSStack &, const ValueDFS &);
ValueInfo &getOrCreateValueInfo(Value *);
- void addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op,
+ void addInfoFor(SmallVectorImpl<Value *> &OpsToRename, Value *Op,
PredicateBase *PB);
const ValueInfo &getValueInfo(Value *) const;
Function &F;
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 2572094ddac8..88c2ef787ad8 100644
--- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -126,6 +126,12 @@ private:
/// Erase an instruction from its parent with our eraser.
void eraseFromParent(Instruction *I);
+ /// Replace an instruction with a value and erase it from its parent.
+ void substituteInParent(Instruction *I, Value *With) {
+ replaceAllUsesWith(I, With);
+ eraseFromParent(I);
+ }
+
Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B);
public:
@@ -154,6 +160,7 @@ private:
Value *optimizeStrRChr(CallInst *CI, IRBuilder<> &B);
Value *optimizeStrCmp(CallInst *CI, IRBuilder<> &B);
Value *optimizeStrNCmp(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeStrNDup(CallInst *CI, IRBuilder<> &B);
Value *optimizeStrCpy(CallInst *CI, IRBuilder<> &B);
Value *optimizeStpCpy(CallInst *CI, IRBuilder<> &B);
Value *optimizeStrNCpy(CallInst *CI, IRBuilder<> &B);
@@ -164,14 +171,17 @@ private:
Value *optimizeStrCSpn(CallInst *CI, IRBuilder<> &B);
Value *optimizeStrStr(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemChr(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeMemRChr(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B);
Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeMemPCpy(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B);
Value *optimizeRealloc(CallInst *CI, IRBuilder<> &B);
Value *optimizeWcslen(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeBCopy(CallInst *CI, IRBuilder<> &B);
// Wrapper for all String/Memory Library Call Optimizations
Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B);
diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h
index 593ca26feb98..02b81b4b7ee2 100644
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -114,8 +114,8 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned &TripCount,
- unsigned MaxTripCount, unsigned &TripMultiple,
- unsigned LoopSize,
+ unsigned MaxTripCount, bool MaxOrZero,
+ unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
bool &UseUpperBound);
@@ -132,7 +132,9 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
- Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling);
+ Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
+ Optional<bool> UserAllowProfileBasedPeeling,
+ Optional<unsigned> UserFullUnrollMaxCount);
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable, bool &Convergent,
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index 1952a210291e..ff5bfc609586 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -22,7 +22,7 @@ namespace llvm {
class Constant;
class Function;
-class GlobalAlias;
+class GlobalIndirectSymbol;
class GlobalVariable;
class Instruction;
class MDNode;
@@ -120,7 +120,7 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
/// instance:
/// - \a scheduleMapGlobalInitializer()
/// - \a scheduleMapAppendingVariable()
-/// - \a scheduleMapGlobalAliasee()
+/// - \a scheduleMapGlobalIndirectSymbol()
/// - \a scheduleRemapFunction()
///
/// Sometimes a callback needs a different mapping context. Such a context can
@@ -180,8 +180,9 @@ public:
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers,
unsigned MappingContextID = 0);
- void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
- unsigned MappingContextID = 0);
+ void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
+ Constant &Target,
+ unsigned MappingContextID = 0);
void scheduleRemapFunction(Function &F, unsigned MappingContextID = 0);
};
diff --git a/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index b144006e2628..d1e7acc877bf 100644
--- a/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -33,18 +33,6 @@
namespace llvm {
-/// Create an analysis remark that explains why vectorization failed
-///
-/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
-/// RemarkName is the identifier for the remark. If \p I is passed it is an
-/// instruction that prevents vectorization. Otherwise \p TheLoop is used for
-/// the location of the remark. \return the remark object that can be
-/// streamed to.
-OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
- StringRef RemarkName,
- Loop *TheLoop,
- Instruction *I = nullptr);
-
/// Utility class for getting and setting loop vectorizer hints in the form
/// of loop metadata.
/// This class keeps a number of loop annotations locally (as member variables)
@@ -55,7 +43,8 @@ OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
/// for example 'force', means a decision has been made. So, we need to be
/// careful NOT to add them if the user hasn't specifically asked so.
class LoopVectorizeHints {
- enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
+ enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
+ HK_PREDICATE };
/// Hint - associates name and validation with the hint value.
struct Hint {
@@ -81,6 +70,9 @@ class LoopVectorizeHints {
/// Already Vectorized
Hint IsVectorized;
+ /// Vector Predicate
+ Hint Predicate;
+
/// Return the loop metadata prefix.
static StringRef Prefix() { return "llvm.loop."; }
@@ -109,6 +101,7 @@ public:
unsigned getWidth() const { return Width.Value; }
unsigned getInterleave() const { return Interleave.Value; }
unsigned getIsVectorized() const { return IsVectorized.Value; }
+ unsigned getPredicate() const { return Predicate.Value; }
enum ForceKind getForce() const {
if ((ForceKind)Force.Value == FK_Undefined &&
hasDisableAllTransformsHint(TheLoop))
@@ -235,8 +228,8 @@ public:
bool canVectorize(bool UseVPlanNativePath);
/// Return true if we can vectorize this loop while folding its tail by
- /// masking.
- bool canFoldTailByMasking();
+ /// masking, and mark all respective loads/stores for masking.
+ bool prepareToFoldTailByMasking();
/// Returns the primary induction variable.
PHINode *getPrimaryInduction() { return PrimaryInduction; }
@@ -362,9 +355,16 @@ private:
bool canVectorizeOuterLoop();
/// Return true if all of the instructions in the block can be speculatively
- /// executed. \p SafePtrs is a list of addresses that are known to be legal
- /// and we know that we can read from them without segfault.
- bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
+ /// executed, and record the loads/stores that require masking. If's that
+ /// guard loads can be ignored under "assume safety" unless \p PreserveGuards
+ /// is true. This can happen when we introduces guards for which the original
+ /// "unguarded-loads are safe" assumption does not hold. For example, the
+ /// vectorizer's fold-tail transformation changes the loop to execute beyond
+ /// its original trip-count, under a proper guard, which should be preserved.
+ /// \p SafePtrs is a list of addresses that are known to be legal and we know
+ /// that we can read from them without segfault.
+ bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
+ bool PreserveGuards = false);
/// Updates the vectorization state by adding \p Phi to the inductions list.
/// This can set \p Phi as the main induction of the loop if \p Phi is a
@@ -382,14 +382,6 @@ private:
return LAI ? &LAI->getSymbolicStrides() : nullptr;
}
- /// Reports a vectorization illegality: print \p DebugMsg for debugging
- /// purposes along with the corresponding optimization remark \p RemarkName.
- /// If \p I is passed it is an instruction that prevents vectorization.
- /// Otherwise the loop is used for the location of the remark.
- void reportVectorizationFailure(const StringRef DebugMsg,
- const StringRef OREMsg, const StringRef ORETag,
- Instruction *I = nullptr) const;
-
/// The loop that we evaluate.
Loop *TheLoop;
@@ -452,8 +444,8 @@ private:
/// Holds the widest induction type encountered.
Type *WidestIndTy = nullptr;
- /// Allowed outside users. This holds the induction and reduction
- /// vars which can be accessed from outside the loop.
+ /// Allowed outside users. This holds the variables that can be accessed from
+ /// outside the loop.
SmallPtrSet<Value *, 4> AllowedExit;
/// Can we assume the absence of NaNs.
diff --git a/include/llvm/Transforms/Vectorize/LoopVectorize.h b/include/llvm/Transforms/Vectorize/LoopVectorize.h
index d1ec06afb02a..d824e2903ef3 100644
--- a/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -155,6 +155,14 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
bool processLoop(Loop *L);
};
+/// Reports a vectorization failure: print \p DebugMsg for debugging
+/// purposes along with the corresponding optimization remark \p RemarkName.
+/// If \p I is passed, it is an instruction that prevents vectorization.
+/// Otherwise, the loop \p TheLoop is used for the location of the remark.
+void reportVectorizationFailure(const StringRef DebugMsg,
+ const StringRef OREMsg, const StringRef ORETag,
+ OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index ac6afb761d4d..32ccc8a46380 100644
--- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -24,7 +24,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/ValueHandle.h"
namespace llvm {
@@ -60,8 +59,8 @@ extern cl::opt<bool> RunSLPVectorization;
struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
using StoreList = SmallVector<StoreInst *, 8>;
using StoreListMap = MapVector<Value *, StoreList>;
- using WeakTrackingVHList = SmallVector<WeakTrackingVH, 8>;
- using WeakTrackingVHListMap = MapVector<Value *, WeakTrackingVHList>;
+ using GEPList = SmallVector<GetElementPtrInst *, 8>;
+ using GEPListMap = MapVector<Value *, GEPList>;
ScalarEvolution *SE = nullptr;
TargetTransformInfo *TTI = nullptr;
@@ -131,7 +130,7 @@ private:
/// Tries to vectorize constructs started from CmpInst, InsertValueInst or
/// InsertElementInst instructions.
- bool vectorizeSimpleInstructions(SmallVectorImpl<WeakVH> &Instructions,
+ bool vectorizeSimpleInstructions(SmallVectorImpl<Instruction *> &Instructions,
BasicBlock *BB, slpvectorizer::BoUpSLP &R);
/// Scan the basic block and look for patterns that are likely to start
@@ -147,7 +146,7 @@ private:
StoreListMap Stores;
/// The getelementptr instructions in a basic block organized by base pointer.
- WeakTrackingVHListMap GEPs;
+ GEPListMap GEPs;
};
} // end namespace llvm
diff --git a/include/llvm/XRay/FDRRecordProducer.h b/include/llvm/XRay/FDRRecordProducer.h
index b530a85bc7e1..043d91568f4e 100644
--- a/include/llvm/XRay/FDRRecordProducer.h
+++ b/include/llvm/XRay/FDRRecordProducer.h
@@ -27,7 +27,7 @@ public:
class FileBasedRecordProducer : public RecordProducer {
const XRayFileHeader &Header;
DataExtractor &E;
- uint32_t &OffsetPtr;
+ uint64_t &OffsetPtr;
uint32_t CurrentBufferBytes = 0;
// Helper function which gets the next record by speculatively reading through
@@ -36,7 +36,7 @@ class FileBasedRecordProducer : public RecordProducer {
public:
FileBasedRecordProducer(const XRayFileHeader &FH, DataExtractor &DE,
- uint32_t &OP)
+ uint64_t &OP)
: Header(FH), E(DE), OffsetPtr(OP) {}
/// This producer encapsulates the logic for loading a File-backed
diff --git a/include/llvm/XRay/FDRRecords.h b/include/llvm/XRay/FDRRecords.h
index a8ce74bd88fb..e3e16f71e2fe 100644
--- a/include/llvm/XRay/FDRRecords.h
+++ b/include/llvm/XRay/FDRRecords.h
@@ -417,16 +417,16 @@ public:
class RecordInitializer : public RecordVisitor {
DataExtractor &E;
- uint32_t &OffsetPtr;
+ uint64_t &OffsetPtr;
uint16_t Version;
public:
static constexpr uint16_t DefaultVersion = 5u;
- explicit RecordInitializer(DataExtractor &DE, uint32_t &OP, uint16_t V)
+ explicit RecordInitializer(DataExtractor &DE, uint64_t &OP, uint16_t V)
: RecordVisitor(), E(DE), OffsetPtr(OP), Version(V) {}
- explicit RecordInitializer(DataExtractor &DE, uint32_t &OP)
+ explicit RecordInitializer(DataExtractor &DE, uint64_t &OP)
: RecordInitializer(DE, OP, DefaultVersion) {}
Error visit(BufferExtents &) override;
diff --git a/include/llvm/XRay/FileHeaderReader.h b/include/llvm/XRay/FileHeaderReader.h
index 1c9681cfd9af..30878f3e99e8 100644
--- a/include/llvm/XRay/FileHeaderReader.h
+++ b/include/llvm/XRay/FileHeaderReader.h
@@ -24,7 +24,7 @@ namespace xray {
/// Convenience function for loading the file header given a data extractor at a
/// specified offset.
Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor,
- uint32_t &OffsetPtr);
+ uint64_t &OffsetPtr);
} // namespace xray
} // namespace llvm
diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
index 9c4668e1473c..ecb3b37004fd 100644
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@@ -253,6 +253,7 @@ module LLVM_IR {
textual header "IR/DebugInfoFlags.def"
textual header "IR/Instruction.def"
textual header "IR/Metadata.def"
+ textual header "IR/FixedMetadataKinds.def"
textual header "IR/Value.def"
textual header "IR/RuntimeLibcalls.def"
}
@@ -331,6 +332,7 @@ module LLVM_TableGen {
module LLVM_Transforms {
requires cplusplus
umbrella "Transforms"
+
module * { export * }
}
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 32241e355eb8..55dd9a4cda08 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -784,7 +784,7 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) {
// previous object first, in this case replacing it with an empty one, before
// registering new results.
AAR.reset(
- new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+ new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F)));
// BasicAA is always available for function analyses. Also, we add it first
// so that it can trump TBAA results when it proves MustAlias.
@@ -840,7 +840,7 @@ void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
BasicAAResult &BAR) {
- AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
+ AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
// Add in our explicitly constructed BasicAA results.
if (!DisableBasicAA)
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index a6e5b9fab558..79fbcd464c1b 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -119,6 +119,12 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) {
TotalMayAliasSetSize -= AS->size();
AliasSets.erase(AS);
+ // If we've removed the saturated alias set, set saturated marker back to
+ // nullptr and ensure this tracker is empty.
+ if (AS == AliasAnyAS) {
+ AliasAnyAS = nullptr;
+ assert(AliasSets.empty() && "Tracker not empty");
+ }
}
void AliasSet::removeFromTracker(AliasSetTracker &AST) {
@@ -690,8 +696,10 @@ void AliasSet::print(raw_ostream &OS) const {
}
void AliasSetTracker::print(raw_ostream &OS) const {
- OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
- << PointerMap.size() << " pointer values.\n";
+ OS << "Alias Set Tracker: " << AliasSets.size();
+ if (AliasAnyAS)
+ OS << " (Saturated)";
+ OS << " alias sets for " << PointerMap.size() << " pointer values.\n";
for (const AliasSet &AS : *this)
AS.print(OS);
OS << "\n";
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index d46a8d8e306c..af718526684b 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -65,6 +65,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeModuleDebugInfoPrinterPass(Registry);
initializeModuleSummaryIndexWrapperPassPass(Registry);
initializeMustExecutePrinterPass(Registry);
+ initializeMustBeExecutedContextPrinterPass(Registry);
initializeObjCARCAAWrapperPassPass(Registry);
initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
initializePhiValuesWrapperPassPass(Registry);
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index cf2f845dee0a..129944743c5e 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -130,7 +130,10 @@ void AssumptionCache::unregisterAssumption(CallInst *CI) {
if (AVI != AffectedValues.end())
AffectedValues.erase(AVI);
}
- remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; });
+
+ AssumeHandles.erase(
+ remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }),
+ AssumeHandles.end());
}
void AssumptionCache::AffectedValueCallbackVH::deleted() {
@@ -140,7 +143,7 @@ void AssumptionCache::AffectedValueCallbackVH::deleted() {
// 'this' now dangles!
}
-void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) {
+void AssumptionCache::transferAffectedValuesInCache(Value *OV, Value *NV) {
auto &NAVV = getOrInsertAffectedValues(NV);
auto AVI = AffectedValues.find(OV);
if (AVI == AffectedValues.end())
@@ -149,6 +152,7 @@ void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) {
for (auto &A : AVI->second)
if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end())
NAVV.push_back(A);
+ AffectedValues.erase(OV);
}
void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) {
@@ -157,7 +161,7 @@ void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) {
// Any assumptions that affected this value now affect the new value.
- AC->copyAffectedValuesInCache(getValPtr(), NV);
+ AC->transferAffectedValuesInCache(getValPtr(), NV);
// 'this' now might dangle! If the AffectedValues map was resized to add an
// entry for NV then this object might have been destroyed in favor of some
// copy in the grown map.
@@ -252,7 +256,7 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
// Ok, build a new cache by scanning the function, insert it and the value
// handle into our map, and return the newly populated cache.
auto IP = AssumptionCaches.insert(std::make_pair(
- FunctionCallbackVH(&F, this), llvm::make_unique<AssumptionCache>(F)));
+ FunctionCallbackVH(&F, this), std::make_unique<AssumptionCache>(F)));
assert(IP.second && "Scanning function already in the map?");
return *IP.first->second;
}
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 3721c99883b8..f3c30c258c19 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -233,6 +233,26 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
}
+/// Return the minimal extent from \p V to the end of the underlying object,
+/// assuming the result is used in an aliasing query. E.g., we do use the query
+/// location size and the fact that null pointers cannot alias here.
+static uint64_t getMinimalExtentFrom(const Value &V,
+ const LocationSize &LocSize,
+ const DataLayout &DL,
+ bool NullIsValidLoc) {
+ // If we have dereferenceability information we know a lower bound for the
+ // extent as accesses for a lower offset would be valid. We need to exclude
+ // the "or null" part if null is a valid pointer.
+ bool CanBeNull;
+ uint64_t DerefBytes = V.getPointerDereferenceableBytes(DL, CanBeNull);
+ DerefBytes = (CanBeNull && NullIsValidLoc) ? 0 : DerefBytes;
+ // If queried with a precise location size, we assume that location size to be
+ // accessed, thus valid.
+ if (LocSize.isPrecise())
+ DerefBytes = std::max(DerefBytes, LocSize.getValue());
+ return DerefBytes;
+}
+
/// Returns true if we can prove that the object specified by V has size Size.
static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
const TargetLibraryInfo &TLI, bool NullIsValidLoc) {
@@ -481,7 +501,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// because it should be in sync with CaptureTracking. Not using it may
// cause weird miscompilations where 2 aliasing pointers are assumed to
// noalias.
- if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
V = RP;
continue;
}
@@ -1792,10 +1812,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
bool NullIsValidLocation = NullPointerIsDefined(&F);
- if ((V1Size.isPrecise() && isObjectSmallerThan(O2, V1Size.getValue(), DL, TLI,
- NullIsValidLocation)) ||
- (V2Size.isPrecise() && isObjectSmallerThan(O1, V2Size.getValue(), DL, TLI,
- NullIsValidLocation)))
+ if ((isObjectSmallerThan(
+ O2, getMinimalExtentFrom(*V1, V1Size, DL, NullIsValidLocation), DL,
+ TLI, NullIsValidLocation)) ||
+ (isObjectSmallerThan(
+ O1, getMinimalExtentFrom(*V2, V2Size, DL, NullIsValidLocation), DL,
+ TLI, NullIsValidLocation)))
return NoAlias;
// Check the cache before climbing up use-def chains. This also terminates
@@ -2053,8 +2075,9 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) {
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *PVWP = getAnalysisIfAvailable<PhiValuesWrapperPass>();
- Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(),
- ACT.getAssumptionCache(F), &DTWP.getDomTree(),
+ Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F,
+ TLIWP.getTLI(F), ACT.getAssumptionCache(F),
+ &DTWP.getDomTree(),
LIWP ? &LIWP->getLoopInfo() : nullptr,
PVWP ? &PVWP->getResult() : nullptr));
@@ -2071,8 +2094,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
return BasicAAResult(
- F.getParent()->getDataLayout(),
- F,
- P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ F.getParent()->getDataLayout(), F,
+ P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 5eb95003f5d8..a06ee096d54c 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -118,6 +118,13 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
static const uint32_t FPH_TAKEN_WEIGHT = 20;
static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
+/// This is the probability for an ordered floating point comparison.
+static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
+/// This is the probability for an unordered floating point comparison, it means
+/// one or two of the operands are NaN. Usually it is used to test for an
+/// exceptional case, so the result is unlikely.
+static const uint32_t FPH_UNO_WEIGHT = 1;
+
/// Invoke-terminating normal branch taken weight
///
/// This is the weight for branching to the normal destination of an invoke
@@ -778,6 +785,8 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
if (!FCmp)
return false;
+ uint32_t TakenWeight = FPH_TAKEN_WEIGHT;
+ uint32_t NontakenWeight = FPH_NONTAKEN_WEIGHT;
bool isProb;
if (FCmp->isEquality()) {
// f1 == f2 -> Unlikely
@@ -786,9 +795,13 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
} else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
// !isnan -> Likely
isProb = true;
+ TakenWeight = FPH_ORD_WEIGHT;
+ NontakenWeight = FPH_UNO_WEIGHT;
} else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
// isnan -> Unlikely
isProb = false;
+ TakenWeight = FPH_ORD_WEIGHT;
+ NontakenWeight = FPH_UNO_WEIGHT;
} else {
return false;
}
@@ -798,8 +811,7 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
- BranchProbability TakenProb(FPH_TAKEN_WEIGHT,
- FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+ BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight);
setEdgeProbability(BB, TakenIdx, TakenProb);
setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
@@ -1014,7 +1026,8 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
BPI.calculate(F, LI, &TLI);
return false;
}
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 18b83d6838cc..8215b4ecbb03 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -87,11 +87,18 @@ unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
/// with multiple predecessors.
bool llvm::isCriticalEdge(const Instruction *TI, unsigned SuccNum,
bool AllowIdenticalEdges) {
- assert(TI->isTerminator() && "Must be a terminator to have successors!");
assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+ return isCriticalEdge(TI, TI->getSuccessor(SuccNum), AllowIdenticalEdges);
+}
+
+bool llvm::isCriticalEdge(const Instruction *TI, const BasicBlock *Dest,
+ bool AllowIdenticalEdges) {
+ assert(TI->isTerminator() && "Must be a terminator to have successors!");
if (TI->getNumSuccessors() == 1) return false;
- const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+ assert(find(predecessors(Dest), TI->getParent()) != pred_end(Dest) &&
+ "No edge between TI's block and Dest.");
+
const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
// If there is more than one predecessor, this is a critical edge...
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 619b675b58d8..4f4103fefa25 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -99,7 +99,7 @@ static void writeCFGToDotFile(Function &F, bool CFGOnly = false) {
errs() << "Writing '" << Filename << "'...";
std::error_code EC;
- raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
if (!EC)
WriteGraph(File, (const Function*)&F, CFGOnly);
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 690e514d4f5c..fd90bd1521d6 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -88,9 +88,11 @@ using namespace llvm::cflaa;
#define DEBUG_TYPE "cfl-anders-aa"
-CFLAndersAAResult::CFLAndersAAResult(const TargetLibraryInfo &TLI) : TLI(TLI) {}
+CFLAndersAAResult::CFLAndersAAResult(
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
+ : GetTLI(std::move(GetTLI)) {}
CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS)
- : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {}
+ : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {}
CFLAndersAAResult::~CFLAndersAAResult() = default;
namespace {
@@ -779,7 +781,7 @@ static AliasAttrMap buildAttrMap(const CFLGraph &Graph,
CFLAndersAAResult::FunctionInfo
CFLAndersAAResult::buildInfoFrom(const Function &Fn) {
CFLGraphBuilder<CFLAndersAAResult> GraphBuilder(
- *this, TLI,
+ *this, GetTLI(const_cast<Function &>(Fn)),
// Cast away the constness here due to GraphBuilder's API requirement
const_cast<Function &>(Fn));
auto &Graph = GraphBuilder.getCFLGraph();
@@ -898,7 +900,10 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
AnalysisKey CFLAndersAA::Key;
CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) {
- return CFLAndersAAResult(AM.getResult<TargetLibraryAnalysis>(F));
+ auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & {
+ return AM.getResult<TargetLibraryAnalysis>(F);
+ };
+ return CFLAndersAAResult(GetTLI);
}
char CFLAndersAAWrapperPass::ID = 0;
@@ -914,8 +919,10 @@ CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) {
}
void CFLAndersAAWrapperPass::initializePass() {
- auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
- Result.reset(new CFLAndersAAResult(TLIWP.getTLI()));
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ Result.reset(new CFLAndersAAResult(GetTLI));
}
void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp
index 44b1834f70bf..b87aa4065392 100644
--- a/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -60,10 +60,11 @@ using namespace llvm::cflaa;
#define DEBUG_TYPE "cfl-steens-aa"
-CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI)
- : AAResultBase(), TLI(TLI) {}
+CFLSteensAAResult::CFLSteensAAResult(
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
+ : AAResultBase(), GetTLI(std::move(GetTLI)) {}
CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
- : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {}
+ : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {}
CFLSteensAAResult::~CFLSteensAAResult() = default;
/// Information we have about a function and would like to keep around.
@@ -181,7 +182,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo(
// Builds the graph + StratifiedSets for a function.
CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
- CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, TLI, *Fn);
+ CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn);
StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
// Add all CFLGraph nodes and all Dereference edges to StratifiedSets
@@ -331,7 +332,10 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
AnalysisKey CFLSteensAA::Key;
CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
- return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F));
+ auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & {
+ return AM.getResult<TargetLibraryAnalysis>(F);
+ };
+ return CFLSteensAAResult(GetTLI);
}
char CFLSteensAAWrapperPass::ID = 0;
@@ -347,8 +351,10 @@ CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
}
void CFLSteensAAWrapperPass::initializePass() {
- auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
- Result.reset(new CFLSteensAAResult(TLIWP.getTLI()));
+ auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ Result.reset(new CFLSteensAAResult(GetTLI));
}
void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index ec5e94d499be..70aeb1a688ee 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
CallGraph::CallGraph(Module &M)
: M(M), ExternalCallingNode(getOrInsertFunction(nullptr)),
- CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) {
+ CallsExternalNode(std::make_unique<CallGraphNode>(nullptr)) {
// Add every function to the call graph.
for (Function &F : M)
addToCallGraph(&F);
@@ -150,7 +150,7 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
return CGN.get();
assert((!F || F->getParent() == &M) && "Function not in current module!");
- CGN = llvm::make_unique<CallGraphNode>(const_cast<Function *>(F));
+ CGN = std::make_unique<CallGraphNode>(const_cast<Function *>(F));
return CGN.get();
}
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index adaa83a6c443..20e2f06540a3 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -33,6 +33,22 @@ CaptureTracker::~CaptureTracker() {}
bool CaptureTracker::shouldExplore(const Use *U) { return true; }
+bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
+ // An inbounds GEP can either be a valid pointer (pointing into
+ // or to the end of an allocation), or be null in the default
+ // address space. So for an inbounds GEP there is no way to let
+ // the pointer escape using clever GEP hacking because doing so
+ // would make the pointer point outside of the allocated object
+ // and thus make the GEP result a poison value. Similarly, other
+ // dereferenceable pointers cannot be manipulated without producing
+ // poison.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
+ if (GEP->isInBounds())
+ return true;
+ bool CanBeNull;
+ return O->getPointerDereferenceableBytes(DL, CanBeNull);
+}
+
namespace {
struct SimpleCaptureTracker : public CaptureTracker {
explicit SimpleCaptureTracker(bool ReturnCaptures)
@@ -251,7 +267,8 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
// marked with nocapture do not capture. This means that places like
// GetUnderlyingObject in ValueTracking or DecomposeGEPExpression
// in BasicAA also need to know about this property.
- if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) {
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call,
+ true)) {
AddUses(Call);
break;
}
@@ -330,7 +347,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
AddUses(I);
break;
case Instruction::ICmp: {
- if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(1))) {
+ unsigned Idx = (I->getOperand(0) == V) ? 0 : 1;
+ unsigned OtherIdx = 1 - Idx;
+ if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
// Don't count comparisons of a no-alias return value against null as
// captures. This allows us to ignore comparisons of malloc results
// with null, for example.
@@ -338,29 +357,18 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
if (isNoAliasCall(V->stripPointerCasts()))
break;
if (!I->getFunction()->nullPointerIsDefined()) {
- auto *O = I->getOperand(0)->stripPointerCastsSameRepresentation();
- // An inbounds GEP can either be a valid pointer (pointing into
- // or to the end of an allocation), or be null in the default
- // address space. So for an inbounds GEPs there is no way to let
- // the pointer escape using clever GEP hacking because doing so
- // would make the pointer point outside of the allocated object
- // and thus make the GEP result a poison value.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
- if (GEP->isInBounds())
- break;
- // Comparing a dereferenceable_or_null argument against null
- // cannot lead to pointer escapes, because if it is not null it
- // must be a valid (in-bounds) pointer.
- bool CanBeNull;
- if (O->getPointerDereferenceableBytes(I->getModule()->getDataLayout(), CanBeNull))
+ auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
+ // Comparing a dereferenceable_or_null pointer against null cannot
+ // lead to pointer escapes, because if it is not null it must be a
+ // valid (in-bounds) pointer.
+ if (Tracker->isDereferenceableOrNull(O, I->getModule()->getDataLayout()))
break;
}
}
// Comparison against value stored in global variable. Given the pointer
// does not escape, its value cannot be guessed and stored separately in a
// global variable.
- unsigned OtherIndex = (I->getOperand(0) == V) ? 1 : 0;
- auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIndex));
+ auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx));
if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
break;
// Otherwise, be conservative. There are crazy ways to capture pointers
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 20231ca78b45..8dbcf7034fda 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -93,6 +93,9 @@ static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
/// This always returns a non-null constant, but it may be a
/// ConstantExpr if unfoldable.
Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
+ assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&
+ "Invalid constantexpr bitcast!");
+
// Catch the obvious splat cases.
if (C->isNullValue() && !DestTy->isX86_MMXTy())
return Constant::getNullValue(DestTy);
@@ -521,8 +524,23 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
return nullptr;
C = FoldBitCast(C, MapTy->getPointerTo(AS), DL);
- if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL))
- return FoldBitCast(Res, LoadTy, DL);
+ if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) {
+ if (Res->isNullValue() && !LoadTy->isX86_MMXTy())
+ // Materializing a zero can be done trivially without a bitcast
+ return Constant::getNullValue(LoadTy);
+ Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
+ Res = FoldBitCast(Res, CastTy, DL);
+ if (LoadTy->isPtrOrPtrVectorTy()) {
+ // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
+ if (Res->isNullValue() && !LoadTy->isX86_MMXTy())
+ return Constant::getNullValue(LoadTy);
+ if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+ // Be careful not to replace a load of an addrspace value with an inttoptr here
+ return nullptr;
+ Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy);
+ }
+ return Res;
+ }
return nullptr;
}
@@ -544,7 +562,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
int64_t InitializerSize = DL.getTypeAllocSize(GV->getInitializer()->getType());
// If we're not accessing anything in this constant, the result is undefined.
- if (Offset + BytesLoaded <= 0)
+ if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
return UndefValue::get(IntType);
// If we're not accessing anything in this constant, the result is undefined.
@@ -781,10 +799,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
}
/// Strip the pointer casts, but preserve the address space information.
-Constant* StripPtrCastKeepAS(Constant* Ptr, Type *&ElemTy) {
+Constant *StripPtrCastKeepAS(Constant *Ptr, Type *&ElemTy) {
assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
auto *OldPtrTy = cast<PointerType>(Ptr->getType());
- Ptr = Ptr->stripPointerCasts();
+ Ptr = cast<Constant>(Ptr->stripPointerCasts());
auto *NewPtrTy = cast<PointerType>(Ptr->getType());
ElemTy = NewPtrTy->getPointerElementType();
@@ -1038,7 +1056,7 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
case Instruction::ExtractValue:
return ConstantExpr::getExtractValue(
- Ops[0], dyn_cast<ExtractValueInst>(InstOrCE)->getIndices());
+ Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
case Instruction::InsertElement:
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
case Instruction::ShuffleVector:
@@ -1464,40 +1482,50 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
if (!F->hasName())
return false;
- StringRef Name = F->getName();
// In these cases, the check of the length is required. We don't want to
// return true for a name like "cos\0blah" which strcmp would return equal to
// "cos", but has length 8.
+ StringRef Name = F->getName();
switch (Name[0]) {
default:
return false;
case 'a':
- return Name == "acos" || Name == "asin" || Name == "atan" ||
- Name == "atan2" || Name == "acosf" || Name == "asinf" ||
- Name == "atanf" || Name == "atan2f";
+ return Name == "acos" || Name == "acosf" ||
+ Name == "asin" || Name == "asinf" ||
+ Name == "atan" || Name == "atanf" ||
+ Name == "atan2" || Name == "atan2f";
case 'c':
- return Name == "ceil" || Name == "cos" || Name == "cosh" ||
- Name == "ceilf" || Name == "cosf" || Name == "coshf";
+ return Name == "ceil" || Name == "ceilf" ||
+ Name == "cos" || Name == "cosf" ||
+ Name == "cosh" || Name == "coshf";
case 'e':
- return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f";
+ return Name == "exp" || Name == "expf" ||
+ Name == "exp2" || Name == "exp2f";
case 'f':
- return Name == "fabs" || Name == "floor" || Name == "fmod" ||
- Name == "fabsf" || Name == "floorf" || Name == "fmodf";
+ return Name == "fabs" || Name == "fabsf" ||
+ Name == "floor" || Name == "floorf" ||
+ Name == "fmod" || Name == "fmodf";
case 'l':
- return Name == "log" || Name == "log10" || Name == "logf" ||
- Name == "log10f";
+ return Name == "log" || Name == "logf" ||
+ Name == "log2" || Name == "log2f" ||
+ Name == "log10" || Name == "log10f";
+ case 'n':
+ return Name == "nearbyint" || Name == "nearbyintf";
case 'p':
return Name == "pow" || Name == "powf";
case 'r':
- return Name == "round" || Name == "roundf";
+ return Name == "rint" || Name == "rintf" ||
+ Name == "round" || Name == "roundf";
case 's':
- return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
- Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
+ return Name == "sin" || Name == "sinf" ||
+ Name == "sinh" || Name == "sinhf" ||
+ Name == "sqrt" || Name == "sqrtf";
case 't':
- return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf";
+ return Name == "tan" || Name == "tanf" ||
+ Name == "tanh" || Name == "tanhf" ||
+ Name == "trunc" || Name == "truncf";
case '_':
-
// Check for various function names that get used for the math functions
// when the header files are preprocessed with the macro
// __FINITE_MATH_ONLY__ enabled.
@@ -1713,40 +1741,37 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
return nullptr;
- if (IntrinsicID == Intrinsic::round) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToAway);
- return ConstantFP::get(Ty->getContext(), V);
+ // Use internal versions of these intrinsics.
+ APFloat U = Op->getValueAPF();
+
+ if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {
+ U.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), U);
}
- if (IntrinsicID == Intrinsic::floor) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardNegative);
- return ConstantFP::get(Ty->getContext(), V);
+ if (IntrinsicID == Intrinsic::round) {
+ U.roundToIntegral(APFloat::rmNearestTiesToAway);
+ return ConstantFP::get(Ty->getContext(), U);
}
if (IntrinsicID == Intrinsic::ceil) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardPositive);
- return ConstantFP::get(Ty->getContext(), V);
+ U.roundToIntegral(APFloat::rmTowardPositive);
+ return ConstantFP::get(Ty->getContext(), U);
}
- if (IntrinsicID == Intrinsic::trunc) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardZero);
- return ConstantFP::get(Ty->getContext(), V);
+ if (IntrinsicID == Intrinsic::floor) {
+ U.roundToIntegral(APFloat::rmTowardNegative);
+ return ConstantFP::get(Ty->getContext(), U);
}
- if (IntrinsicID == Intrinsic::rint) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToEven);
- return ConstantFP::get(Ty->getContext(), V);
+ if (IntrinsicID == Intrinsic::trunc) {
+ U.roundToIntegral(APFloat::rmTowardZero);
+ return ConstantFP::get(Ty->getContext(), U);
}
- if (IntrinsicID == Intrinsic::nearbyint) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToEven);
- return ConstantFP::get(Ty->getContext(), V);
+ if (IntrinsicID == Intrinsic::fabs) {
+ U.clearSign();
+ return ConstantFP::get(Ty->getContext(), U);
}
/// We only fold functions with finite arguments. Folding NaN and inf is
@@ -1763,18 +1788,19 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
switch (IntrinsicID) {
default: break;
- case Intrinsic::fabs:
- return ConstantFoldFP(fabs, V, Ty);
- case Intrinsic::log2:
- return ConstantFoldFP(Log2, V, Ty);
case Intrinsic::log:
return ConstantFoldFP(log, V, Ty);
+ case Intrinsic::log2:
+ // TODO: What about hosts that lack a C99 library?
+ return ConstantFoldFP(Log2, V, Ty);
case Intrinsic::log10:
+ // TODO: What about hosts that lack a C99 library?
return ConstantFoldFP(log10, V, Ty);
case Intrinsic::exp:
return ConstantFoldFP(exp, V, Ty);
case Intrinsic::exp2:
- return ConstantFoldFP(exp2, V, Ty);
+ // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
+ return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
case Intrinsic::sin:
return ConstantFoldFP(sin, V, Ty);
case Intrinsic::cos:
@@ -1786,104 +1812,150 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
if (!TLI)
return nullptr;
- char NameKeyChar = Name[0];
- if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
- NameKeyChar = Name[2];
-
- switch (NameKeyChar) {
- case 'a':
- if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
- (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
- (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
- (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
+ LibFunc Func = NotLibFunc;
+ TLI->getLibFunc(Name, Func);
+ switch (Func) {
+ default:
+ break;
+ case LibFunc_acos:
+ case LibFunc_acosf:
+ case LibFunc_acos_finite:
+ case LibFunc_acosf_finite:
+ if (TLI->has(Func))
return ConstantFoldFP(acos, V, Ty);
- else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
- (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
- (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
- (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
+ break;
+ case LibFunc_asin:
+ case LibFunc_asinf:
+ case LibFunc_asin_finite:
+ case LibFunc_asinf_finite:
+ if (TLI->has(Func))
return ConstantFoldFP(asin, V, Ty);
- else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
- (Name == "atanf" && TLI->has(LibFunc_atanf)))
+ break;
+ case LibFunc_atan:
+ case LibFunc_atanf:
+ if (TLI->has(Func))
return ConstantFoldFP(atan, V, Ty);
break;
- case 'c':
- if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
- (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
- return ConstantFoldFP(ceil, V, Ty);
- else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
- (Name == "cosf" && TLI->has(LibFunc_cosf)))
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ if (TLI->has(Func)) {
+ U.roundToIntegral(APFloat::rmTowardPositive);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
+ break;
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ if (TLI->has(Func))
return ConstantFoldFP(cos, V, Ty);
- else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
- (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
- (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
- (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
+ break;
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_cosh_finite:
+ case LibFunc_coshf_finite:
+ if (TLI->has(Func))
return ConstantFoldFP(cosh, V, Ty);
break;
- case 'e':
- if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
- (Name == "expf" && TLI->has(LibFunc_expf)) ||
- (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
- (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_exp_finite:
+ case LibFunc_expf_finite:
+ if (TLI->has(Func))
return ConstantFoldFP(exp, V, Ty);
- if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
- (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
- (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
- (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
- // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
- // C99 library.
+ break;
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2_finite:
+ case LibFunc_exp2f_finite:
+ if (TLI->has(Func))
+ // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
break;
- case 'f':
- if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
- (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
- return ConstantFoldFP(fabs, V, Ty);
- else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
- (Name == "floorf" && TLI->has(LibFunc_floorf)))
- return ConstantFoldFP(floor, V, Ty);
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ if (TLI->has(Func)) {
+ U.clearSign();
+ return ConstantFP::get(Ty->getContext(), U);
+ }
break;
- case 'l':
- if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
- (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
- (Name == "__log_finite" && V > 0 &&
- TLI->has(LibFunc_log_finite)) ||
- (Name == "__logf_finite" && V > 0 &&
- TLI->has(LibFunc_logf_finite)))
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ if (TLI->has(Func)) {
+ U.roundToIntegral(APFloat::rmTowardNegative);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
+ break;
+ case LibFunc_log:
+ case LibFunc_logf:
+ case LibFunc_log_finite:
+ case LibFunc_logf_finite:
+ if (V > 0.0 && TLI->has(Func))
return ConstantFoldFP(log, V, Ty);
- else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
- (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
- (Name == "__log10_finite" && V > 0 &&
- TLI->has(LibFunc_log10_finite)) ||
- (Name == "__log10f_finite" && V > 0 &&
- TLI->has(LibFunc_log10f_finite)))
+ break;
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2_finite:
+ case LibFunc_log2f_finite:
+ if (V > 0.0 && TLI->has(Func))
+ // TODO: What about hosts that lack a C99 library?
+ return ConstantFoldFP(Log2, V, Ty);
+ break;
+ case LibFunc_log10:
+ case LibFunc_log10f:
+ case LibFunc_log10_finite:
+ case LibFunc_log10f_finite:
+ if (V > 0.0 && TLI->has(Func))
+ // TODO: What about hosts that lack a C99 library?
return ConstantFoldFP(log10, V, Ty);
break;
- case 'r':
- if ((Name == "round" && TLI->has(LibFunc_round)) ||
- (Name == "roundf" && TLI->has(LibFunc_roundf)))
- return ConstantFoldFP(round, V, Ty);
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ if (TLI->has(Func)) {
+ U.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
break;
- case 's':
- if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
- (Name == "sinf" && TLI->has(LibFunc_sinf)))
+ case LibFunc_round:
+ case LibFunc_roundf:
+ if (TLI->has(Func)) {
+ U.roundToIntegral(APFloat::rmNearestTiesToAway);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
+ break;
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ if (TLI->has(Func))
return ConstantFoldFP(sin, V, Ty);
- else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
- (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
- (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
- (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
+ break;
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinh_finite:
+ case LibFunc_sinhf_finite:
+ if (TLI->has(Func))
return ConstantFoldFP(sinh, V, Ty);
- else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
- (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
+ break;
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ if (V >= 0.0 && TLI->has(Func))
return ConstantFoldFP(sqrt, V, Ty);
break;
- case 't':
- if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
- (Name == "tanf" && TLI->has(LibFunc_tanf)))
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ if (TLI->has(Func))
return ConstantFoldFP(tan, V, Ty);
- else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
- (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
+ break;
+ case LibFunc_tanh:
+ case LibFunc_tanhf:
+ if (TLI->has(Func))
return ConstantFoldFP(tanh, V, Ty);
break;
- default:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ if (TLI->has(Func)) {
+ U.roundToIntegral(APFloat::rmTowardZero);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
break;
}
return nullptr;
@@ -2002,19 +2074,35 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
if (!TLI)
return nullptr;
- if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
- (Name == "powf" && TLI->has(LibFunc_powf)) ||
- (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
- (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
- return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
- (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
- return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
- if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
- (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
- (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
- (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
- return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+
+ LibFunc Func = NotLibFunc;
+ TLI->getLibFunc(Name, Func);
+ switch (Func) {
+ default:
+ break;
+ case LibFunc_pow:
+ case LibFunc_powf:
+ case LibFunc_pow_finite:
+ case LibFunc_powf_finite:
+ if (TLI->has(Func))
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ break;
+ case LibFunc_fmod:
+ case LibFunc_fmodf:
+ if (TLI->has(Func)) {
+ APFloat V = Op1->getValueAPF();
+ if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+ break;
+ case LibFunc_atan2:
+ case LibFunc_atan2f:
+ case LibFunc_atan2_finite:
+ case LibFunc_atan2f_finite:
+ if (TLI->has(Func))
+ return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+ break;
+ }
} else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
return ConstantFP::get(Ty->getContext(),
@@ -2041,20 +2129,27 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
switch (IntrinsicID) {
default: break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ // X - undef -> { undef, false }
+ // undef - X -> { undef, false }
+ // X + undef -> { undef, false }
+ // undef + x -> { undef, false }
+ if (!C0 || !C1) {
+ return ConstantStruct::get(
+ cast<StructType>(Ty),
+ {UndefValue::get(Ty->getStructElementType(0)),
+ Constant::getNullValue(Ty->getStructElementType(1))});
+ }
+ LLVM_FALLTHROUGH;
case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- // Even if both operands are undef, we cannot fold muls to undef
- // in the general case. For example, on i2 there are no inputs
- // that would produce { i2 -1, i1 true } as the result.
+ case Intrinsic::umul_with_overflow: {
+ // undef * X -> { 0, false }
+ // X * undef -> { 0, false }
if (!C0 || !C1)
return Constant::getNullValue(Ty);
- LLVM_FALLTHROUGH;
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow: {
- if (!C0 || !C1)
- return UndefValue::get(Ty);
APInt Res;
bool Overflow;
@@ -2194,13 +2289,9 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
case Intrinsic::fma:
case Intrinsic::fmuladd: {
APFloat V = Op1->getValueAPF();
- APFloat::opStatus s = V.fusedMultiplyAdd(Op2->getValueAPF(),
- Op3->getValueAPF(),
- APFloat::rmNearestTiesToEven);
- if (s != APFloat::opInvalidOp)
- return ConstantFP::get(Ty->getContext(), V);
-
- return nullptr;
+ V.fusedMultiplyAdd(Op2->getValueAPF(), Op3->getValueAPF(),
+ APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
}
}
}
diff --git a/lib/Analysis/DDG.cpp b/lib/Analysis/DDG.cpp
new file mode 100644
index 000000000000..b5c3c761ad98
--- /dev/null
+++ b/lib/Analysis/DDG.cpp
@@ -0,0 +1,203 @@
+//===- DDG.cpp - Data Dependence Graph -------------------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation for the data dependence graph.
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/DDG.h"
+#include "llvm/Analysis/LoopInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ddg"
+
+template class llvm::DGEdge<DDGNode, DDGEdge>;
+template class llvm::DGNode<DDGNode, DDGEdge>;
+template class llvm::DirectedGraph<DDGNode, DDGEdge>;
+
+//===--------------------------------------------------------------------===//
+// DDGNode implementation
+//===--------------------------------------------------------------------===//
+DDGNode::~DDGNode() {}
+
+bool DDGNode::collectInstructions(
+ llvm::function_ref<bool(Instruction *)> const &Pred,
+ InstructionListType &IList) const {
+ assert(IList.empty() && "Expected the IList to be empty on entry.");
+ if (isa<SimpleDDGNode>(this)) {
+ for (auto *I : cast<const SimpleDDGNode>(this)->getInstructions())
+ if (Pred(I))
+ IList.push_back(I);
+ } else
+ llvm_unreachable("unimplemented type of node");
+ return !IList.empty();
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode::NodeKind K) {
+ const char *Out;
+ switch (K) {
+ case DDGNode::NodeKind::SingleInstruction:
+ Out = "single-instruction";
+ break;
+ case DDGNode::NodeKind::MultiInstruction:
+ Out = "multi-instruction";
+ break;
+ case DDGNode::NodeKind::Root:
+ Out = "root";
+ break;
+ case DDGNode::NodeKind::Unknown:
+ Out = "??";
+ break;
+ }
+ OS << Out;
+ return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) {
+ OS << "Node Address:" << &N << ":" << N.getKind() << "\n";
+ if (isa<SimpleDDGNode>(N)) {
+ OS << " Instructions:\n";
+ for (auto *I : cast<const SimpleDDGNode>(N).getInstructions())
+ OS.indent(2) << *I << "\n";
+ } else if (!isa<RootDDGNode>(N))
+ llvm_unreachable("unimplemented type of node");
+
+ OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n");
+ for (auto &E : N.getEdges())
+ OS.indent(2) << *E;
+ return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// SimpleDDGNode implementation
+//===--------------------------------------------------------------------===//
+
+SimpleDDGNode::SimpleDDGNode(Instruction &I)
+ : DDGNode(NodeKind::SingleInstruction), InstList() {
+ assert(InstList.empty() && "Expected empty list.");
+ InstList.push_back(&I);
+}
+
+SimpleDDGNode::SimpleDDGNode(const SimpleDDGNode &N)
+ : DDGNode(N), InstList(N.InstList) {
+ assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) ||
+ (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) &&
+ "constructing from invalid simple node.");
+}
+
+SimpleDDGNode::SimpleDDGNode(SimpleDDGNode &&N)
+ : DDGNode(std::move(N)), InstList(std::move(N.InstList)) {
+ assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) ||
+ (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) &&
+ "constructing from invalid simple node.");
+}
+
+SimpleDDGNode::~SimpleDDGNode() { InstList.clear(); }
+
+//===--------------------------------------------------------------------===//
+// DDGEdge implementation
+//===--------------------------------------------------------------------===//
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K) {
+ const char *Out;
+ switch (K) {
+ case DDGEdge::EdgeKind::RegisterDefUse:
+ Out = "def-use";
+ break;
+ case DDGEdge::EdgeKind::MemoryDependence:
+ Out = "memory";
+ break;
+ case DDGEdge::EdgeKind::Rooted:
+ Out = "rooted";
+ break;
+ case DDGEdge::EdgeKind::Unknown:
+ Out = "??";
+ break;
+ }
+ OS << Out;
+ return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge &E) {
+ OS << "[" << E.getKind() << "] to " << &E.getTargetNode() << "\n";
+ return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// DataDependenceGraph implementation
+//===--------------------------------------------------------------------===//
+using BasicBlockListType = SmallVector<BasicBlock *, 8>;
+
+DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D)
+ : DependenceGraphInfo(F.getName().str(), D) {
+ BasicBlockListType BBList;
+ for (auto &BB : F.getBasicBlockList())
+ BBList.push_back(&BB);
+ DDGBuilder(*this, D, BBList).populate();
+}
+
+DataDependenceGraph::DataDependenceGraph(const Loop &L, DependenceInfo &D)
+ : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." +
+ L.getHeader()->getName())
+ .str(),
+ D) {
+ BasicBlockListType BBList;
+ for (BasicBlock *BB : L.blocks())
+ BBList.push_back(BB);
+ DDGBuilder(*this, D, BBList).populate();
+}
+
+DataDependenceGraph::~DataDependenceGraph() {
+ for (auto *N : Nodes) {
+ for (auto *E : *N)
+ delete E;
+ delete N;
+ }
+}
+
+bool DataDependenceGraph::addNode(DDGNode &N) {
+ if (!DDGBase::addNode(N))
+ return false;
+
+ // In general, if the root node is already created and linked, it is not safe
+ // to add new nodes since they may be unreachable by the root.
+ // TODO: Allow adding Pi-block nodes after root is created. Pi-blocks are an
+ // exception because they represent components that are already reachable by
+ // root.
+ assert(!Root && "Root node is already added. No more nodes can be added.");
+ if (isa<RootDDGNode>(N))
+ Root = &N;
+
+ return true;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) {
+ for (auto *Node : G)
+ OS << *Node << "\n";
+ return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// DDG Analysis Passes
+//===--------------------------------------------------------------------===//
+
+/// DDG as a loop pass.
+DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR) {
+ Function *F = L.getHeader()->getParent();
+ DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
+ return std::make_unique<DataDependenceGraph>(L, DI);
+}
+AnalysisKey DDGAnalysis::Key;
+
+PreservedAnalyses DDGAnalysisPrinterPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ OS << "'DDG' for loop '" << L.getHeader()->getName() << "':\n";
+ OS << *AM.getResult<DDGAnalysis>(L, AR);
+ return PreservedAnalyses::all();
+}
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 75f269e84f9d..0038c9fb9ce4 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -254,7 +254,7 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
LoopIndependent(PossiblyLoopIndependent) {
Consistent = true;
if (CommonLevels)
- DV = make_unique<DVEntry[]>(CommonLevels);
+ DV = std::make_unique<DVEntry[]>(CommonLevels);
}
// The rest are simple getters that hide the implementation.
@@ -3415,7 +3415,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
- return make_unique<Dependence>(Src, Dst);
+ return std::make_unique<Dependence>(Src, Dst);
}
assert(isLoadOrStore(Src) && "instruction is not load or store");
@@ -3430,7 +3430,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
case PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
- return make_unique<Dependence>(Src, Dst);
+ return std::make_unique<Dependence>(Src, Dst);
case NoAlias:
// If the objects noalias, they are distinct, accesses are independent.
LLVM_DEBUG(dbgs() << "no alias\n");
@@ -3777,7 +3777,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
return nullptr;
}
- return make_unique<FullDependence>(std::move(Result));
+ return std::make_unique<FullDependence>(std::move(Result));
}
diff --git a/lib/Analysis/DependenceGraphBuilder.cpp b/lib/Analysis/DependenceGraphBuilder.cpp
new file mode 100644
index 000000000000..ed1d8351b2f0
--- /dev/null
+++ b/lib/Analysis/DependenceGraphBuilder.cpp
@@ -0,0 +1,228 @@
+//===- DependenceGraphBuilder.cpp ------------------------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file implements common steps of the build algorithm for construction
+// of dependence graphs such as DDG and PDG.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DependenceGraphBuilder.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DDG.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dgb"
+
+STATISTIC(TotalGraphs, "Number of dependence graphs created.");
+STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
+STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
+STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
+STATISTIC(TotalConfusedEdges,
+ "Number of confused memory dependencies between two nodes.");
+STATISTIC(TotalEdgeReversals,
+ "Number of times the source and sink of dependence was reversed to "
+ "expose cycles in the graph.");
+
+using InstructionListType = SmallVector<Instruction *, 2>;
+
+//===--------------------------------------------------------------------===//
+// AbstractDependenceGraphBuilder implementation
+//===--------------------------------------------------------------------===//
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
+ ++TotalGraphs;
+ assert(IMap.empty() && "Expected empty instruction map at start");
+ for (BasicBlock *BB : BBList)
+ for (Instruction &I : *BB) {
+ auto &NewNode = createFineGrainedNode(I);
+ IMap.insert(std::make_pair(&I, &NewNode));
+ ++TotalFineGrainedNodes;
+ }
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
+ // Create a root node that connects to every connected component of the graph.
+ // This is done to allow graph iterators to visit all the disjoint components
+ // of the graph, in a single walk.
+ //
+ // This algorithm works by going through each node of the graph and for each
+ // node N, do a DFS starting from N. A rooted edge is established between the
+ // root node and N (if N is not yet visited). All the nodes reachable from N
+ // are marked as visited and are skipped in the DFS of subsequent nodes.
+ //
+ // Note: This algorithm tries to limit the number of edges out of the root
+ // node to some extent, but there may be redundant edges created depending on
+ // the iteration order. For example for a graph {A -> B}, an edge from the
+ // root node is added to both nodes if B is visited before A. While it does
+ // not result in minimal number of edges, this approach saves compile-time
+ // while keeping the number of edges in check.
+ auto &RootNode = createRootNode();
+ df_iterator_default_set<const NodeType *, 4> Visited;
+ for (auto *N : Graph) {
+ if (*N == RootNode)
+ continue;
+ for (auto I : depth_first_ext(N, Visited))
+ if (I == N)
+ createRootedEdge(RootNode, *N);
+ }
+}
+
+template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
+ for (NodeType *N : Graph) {
+ InstructionListType SrcIList;
+ N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
+
+ // Use a set to mark the targets that we link to N, so we don't add
+ // duplicate def-use edges when more than one instruction in a target node
+ // use results of instructions that are contained in N.
+ SmallPtrSet<NodeType *, 4> VisitedTargets;
+
+ for (Instruction *II : SrcIList) {
+ for (User *U : II->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI)
+ continue;
+ NodeType *DstNode = nullptr;
+ if (IMap.find(UI) != IMap.end())
+ DstNode = IMap.find(UI)->second;
+
+ // In the case of loops, the scope of the subgraph is all the
+ // basic blocks (and instructions within them) belonging to the loop. We
+ // simply ignore all the edges coming from (or going into) instructions
+ // or basic blocks outside of this range.
+ if (!DstNode) {
+ LLVM_DEBUG(
+ dbgs()
+ << "skipped def-use edge since the sink" << *UI
+ << " is outside the range of instructions being considered.\n");
+ continue;
+ }
+
+ // Self dependencies are ignored because they are redundant and
+ // uninteresting.
+ if (DstNode == N) {
+ LLVM_DEBUG(dbgs()
+ << "skipped def-use edge since the sink and the source ("
+ << N << ") are the same.\n");
+ continue;
+ }
+
+ if (VisitedTargets.insert(DstNode).second) {
+ createDefUseEdge(*N, *DstNode);
+ ++TotalDefUseEdges;
+ }
+ }
+ }
+ }
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
+ using DGIterator = typename G::iterator;
+ auto isMemoryAccess = [](const Instruction *I) {
+ return I->mayReadOrWriteMemory();
+ };
+ for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
+ InstructionListType SrcIList;
+ (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
+ if (SrcIList.empty())
+ continue;
+
+ for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
+ if (**SrcIt == **DstIt)
+ continue;
+ InstructionListType DstIList;
+ (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
+ if (DstIList.empty())
+ continue;
+ bool ForwardEdgeCreated = false;
+ bool BackwardEdgeCreated = false;
+ for (Instruction *ISrc : SrcIList) {
+ for (Instruction *IDst : DstIList) {
+ auto D = DI.depends(ISrc, IDst, true);
+ if (!D)
+ continue;
+
+ // If we have a dependence with its left-most non-'=' direction
+ // being '>' we need to reverse the direction of the edge, because
+ // the source of the dependence cannot occur after the sink. For
+ // confused dependencies, we will create edges in both directions to
+ // represent the possibility of a cycle.
+
+ auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
+ if (!ForwardEdgeCreated) {
+ createMemoryEdge(Src, Dst);
+ ++TotalMemoryEdges;
+ }
+ if (!BackwardEdgeCreated) {
+ createMemoryEdge(Dst, Src);
+ ++TotalMemoryEdges;
+ }
+ ForwardEdgeCreated = BackwardEdgeCreated = true;
+ ++TotalConfusedEdges;
+ };
+
+ auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
+ if (!ForwardEdgeCreated) {
+ createMemoryEdge(Src, Dst);
+ ++TotalMemoryEdges;
+ }
+ ForwardEdgeCreated = true;
+ };
+
+ auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
+ if (!BackwardEdgeCreated) {
+ createMemoryEdge(Dst, Src);
+ ++TotalMemoryEdges;
+ }
+ BackwardEdgeCreated = true;
+ };
+
+ if (D->isConfused())
+ createConfusedEdges(**SrcIt, **DstIt);
+ else if (D->isOrdered() && !D->isLoopIndependent()) {
+ bool ReversedEdge = false;
+ for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
+ if (D->getDirection(Level) == Dependence::DVEntry::EQ)
+ continue;
+ else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
+ createBackwardEdge(**SrcIt, **DstIt);
+ ReversedEdge = true;
+ ++TotalEdgeReversals;
+ break;
+ } else if (D->getDirection(Level) == Dependence::DVEntry::LT)
+ break;
+ else {
+ createConfusedEdges(**SrcIt, **DstIt);
+ break;
+ }
+ }
+ if (!ReversedEdge)
+ createForwardEdge(**SrcIt, **DstIt);
+ } else
+ createForwardEdge(**SrcIt, **DstIt);
+
+ // Avoid creating duplicate edges.
+ if (ForwardEdgeCreated && BackwardEdgeCreated)
+ break;
+ }
+
+ // If we've created edges in both directions, there is no more
+ // unique edge that we can create between these two nodes, so we
+ // can exit early.
+ if (ForwardEdgeCreated && BackwardEdgeCreated)
+ break;
+ }
+ }
+ }
+}
+
+template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
+template class llvm::DependenceGraphInfo<DDGNode>;
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
index 0ccd59ef2bfd..3d1be1e1cce0 100644
--- a/lib/Analysis/DivergenceAnalysis.cpp
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -412,6 +412,12 @@ bool DivergenceAnalysis::isDivergent(const Value &V) const {
return DivergentValues.find(&V) != DivergentValues.end();
}
+bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
+ Value &V = *U.get();
+ Instruction &I = *cast<Instruction>(U.getUser());
+ return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
+}
+
void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if (DivergentValues.empty())
return;
@@ -449,6 +455,10 @@ bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
return DA.isDivergent(val);
}
+bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const {
+ return DA.isDivergentUse(use);
+}
+
void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
DA.print(OS, mod);
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index 0d6c0ffb18a8..efdf9706ba3c 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -370,7 +370,8 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
// passing into the function.
if (Call->isDataOperand(&U)) {
// Detect calls to free.
- if (Call->isArgOperand(&U) && isFreeCall(I, &TLI)) {
+ if (Call->isArgOperand(&U) &&
+ isFreeCall(I, &GetTLI(*Call->getFunction()))) {
if (Writers)
Writers->insert(Call->getParent()->getParent());
} else {
@@ -432,7 +433,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
GV->getParent()->getDataLayout());
- if (!isAllocLikeFn(Ptr, &TLI))
+ if (!isAllocLikeFn(Ptr, &GetTLI(*SI->getFunction())))
return false; // Too hard to analyze.
// Analyze all uses of the allocation. If any of them are used in a
@@ -576,6 +577,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// We handle calls specially because the graph-relevant aspects are
// handled above.
if (auto *Call = dyn_cast<CallBase>(&I)) {
+ auto &TLI = GetTLI(*Node->getFunction());
if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) {
// FIXME: It is completely unclear why this is necessary and not
// handled by the above graph code.
@@ -937,12 +939,13 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc, AAQI));
}
-GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
- const TargetLibraryInfo &TLI)
- : AAResultBase(), DL(DL), TLI(TLI) {}
+GlobalsAAResult::GlobalsAAResult(
+ const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
+ : AAResultBase(), DL(DL), GetTLI(std::move(GetTLI)) {}
GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
- : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI),
+ : AAResultBase(std::move(Arg)), DL(Arg.DL), GetTLI(std::move(Arg.GetTLI)),
NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)),
IndirectGlobals(std::move(Arg.IndirectGlobals)),
AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)),
@@ -957,10 +960,10 @@ GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
GlobalsAAResult::~GlobalsAAResult() {}
-/*static*/ GlobalsAAResult
-GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
- CallGraph &CG) {
- GlobalsAAResult Result(M.getDataLayout(), TLI);
+/*static*/ GlobalsAAResult GlobalsAAResult::analyzeModule(
+ Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
+ CallGraph &CG) {
+ GlobalsAAResult Result(M.getDataLayout(), GetTLI);
// Discover which functions aren't recursive, to feed into AnalyzeGlobals.
Result.CollectSCCMembership(CG);
@@ -977,8 +980,12 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
AnalysisKey GlobalsAA::Key;
GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) {
- return GlobalsAAResult::analyzeModule(M,
- AM.getResult<TargetLibraryAnalysis>(M),
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+ return GlobalsAAResult::analyzeModule(M, GetTLI,
AM.getResult<CallGraphAnalysis>(M));
}
@@ -999,9 +1006,11 @@ GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) {
}
bool GlobalsAAWrapperPass::runOnModule(Module &M) {
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule(
- M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
- getAnalysis<CallGraphWrapperPass>().getCallGraph())));
+ M, GetTLI, getAnalysis<CallGraphWrapperPass>().getCallGraph())));
return false;
}
diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp
index ce285f82f720..6fb600114bc6 100644
--- a/lib/Analysis/IVDescriptors.cpp
+++ b/lib/Analysis/IVDescriptors.cpp
@@ -300,7 +300,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
if (!ReduxDesc.isRecurrence())
return false;
- if (isa<FPMathOperator>(ReduxDesc.getPatternInst()))
+ // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
+ if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi)
FMF &= ReduxDesc.getPatternInst()->getFastMathFlags();
}
diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index 6ff840efcb64..68153de8219f 100644
--- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -53,7 +53,7 @@ static cl::opt<unsigned>
"call callsite"));
ICallPromotionAnalysis::ICallPromotionAnalysis() {
- ValueDataArray = llvm::make_unique<InstrProfValueData[]>(MaxNumPromotions);
+ ValueDataArray = std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
}
bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count,
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 0dec146e0465..89811ec0e377 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -436,7 +436,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
Type *Ty = I.getAllocatedType();
AllocatedSize = SaturatingMultiplyAdd(
- AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
+ AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(),
+ AllocatedSize);
return Base::visitAlloca(I);
}
}
@@ -444,7 +445,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// Accumulate the allocated size.
if (I.isStaticAlloca()) {
Type *Ty = I.getAllocatedType();
- AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
+ AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(),
+ AllocatedSize);
}
// We will happily inline static alloca instructions.
@@ -1070,8 +1072,8 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
Value *SimpleV = nullptr;
if (auto FI = dyn_cast<FPMathOperator>(&I))
- SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
- CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
+ SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
+ CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
else
SimpleV =
SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
@@ -1453,19 +1455,6 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// Maximum valid cost increased in this function.
int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
- // Exit early for a large switch, assuming one case needs at least one
- // instruction.
- // FIXME: This is not true for a bit test, but ignore such case for now to
- // save compile-time.
- int64_t CostLowerBound =
- std::min((int64_t)CostUpperBound,
- (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
-
- if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
- addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost);
- return false;
- }
-
unsigned JumpTableSize = 0;
unsigned NumCaseCluster =
TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index e34bf6f4e43f..cb8987721700 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -56,8 +56,8 @@ static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &,
const SimplifyQuery &, unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
-static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
- const SimplifyQuery &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &,
+ const SimplifyQuery &, unsigned);
static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -1371,7 +1371,8 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
/// Commuted variants are assumed to be handled by calling this function again
/// with the parameters swapped.
static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
- ICmpInst *UnsignedICmp, bool IsAnd) {
+ ICmpInst *UnsignedICmp, bool IsAnd,
+ const SimplifyQuery &Q) {
Value *X, *Y;
ICmpInst::Predicate EqPred;
@@ -1380,6 +1381,59 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
return nullptr;
ICmpInst::Predicate UnsignedPred;
+
+ Value *A, *B;
+ // Y = (A - B);
+ if (match(Y, m_Sub(m_Value(A), m_Value(B)))) {
+ if (match(UnsignedICmp,
+ m_c_ICmp(UnsignedPred, m_Specific(A), m_Specific(B))) &&
+ ICmpInst::isUnsigned(UnsignedPred)) {
+ if (UnsignedICmp->getOperand(0) != A)
+ UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+
+ // A >=/<= B || (A - B) != 0 <--> true
+ if ((UnsignedPred == ICmpInst::ICMP_UGE ||
+ UnsignedPred == ICmpInst::ICMP_ULE) &&
+ EqPred == ICmpInst::ICMP_NE && !IsAnd)
+ return ConstantInt::getTrue(UnsignedICmp->getType());
+ // A </> B && (A - B) == 0 <--> false
+ if ((UnsignedPred == ICmpInst::ICMP_ULT ||
+ UnsignedPred == ICmpInst::ICMP_UGT) &&
+ EqPred == ICmpInst::ICMP_EQ && IsAnd)
+ return ConstantInt::getFalse(UnsignedICmp->getType());
+
+ // A </> B && (A - B) != 0 <--> A </> B
+ // A </> B || (A - B) != 0 <--> (A - B) != 0
+ if (EqPred == ICmpInst::ICMP_NE && (UnsignedPred == ICmpInst::ICMP_ULT ||
+ UnsignedPred == ICmpInst::ICMP_UGT))
+ return IsAnd ? UnsignedICmp : ZeroICmp;
+
+ // A <=/>= B && (A - B) == 0 <--> (A - B) == 0
+ // A <=/>= B || (A - B) == 0 <--> A <=/>= B
+ if (EqPred == ICmpInst::ICMP_EQ && (UnsignedPred == ICmpInst::ICMP_ULE ||
+ UnsignedPred == ICmpInst::ICMP_UGE))
+ return IsAnd ? ZeroICmp : UnsignedICmp;
+ }
+
+ // Given Y = (A - B)
+ // Y >= A && Y != 0 --> Y >= A iff B != 0
+ // Y < A || Y == 0 --> Y < A iff B != 0
+ if (match(UnsignedICmp,
+ m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) {
+ if (UnsignedICmp->getOperand(0) != Y)
+ UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+
+ if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd &&
+ EqPred == ICmpInst::ICMP_NE &&
+ isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return UnsignedICmp;
+ if (UnsignedPred == ICmpInst::ICMP_ULT && !IsAnd &&
+ EqPred == ICmpInst::ICMP_EQ &&
+ isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return UnsignedICmp;
+ }
+ }
+
if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) &&
ICmpInst::isUnsigned(UnsignedPred))
;
@@ -1395,19 +1449,33 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE)
return IsAnd ? UnsignedICmp : ZeroICmp;
- // X >= Y || Y != 0 --> true
+ // X <= Y && Y != 0 --> X <= Y iff X != 0
+ // X <= Y || Y != 0 --> Y != 0 iff X != 0
+ if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&
+ isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return IsAnd ? UnsignedICmp : ZeroICmp;
+
+ // X >= Y && Y == 0 --> Y == 0
// X >= Y || Y == 0 --> X >= Y
- if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) {
- if (EqPred == ICmpInst::ICMP_NE)
- return getTrue(UnsignedICmp->getType());
- return UnsignedICmp;
- }
+ if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ)
+ return IsAnd ? ZeroICmp : UnsignedICmp;
+
+ // X > Y && Y == 0 --> Y == 0 iff X != 0
+ // X > Y || Y == 0 --> X > Y iff X != 0
+ if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&
+ isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return IsAnd ? ZeroICmp : UnsignedICmp;
// X < Y && Y == 0 --> false
if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ &&
IsAnd)
return getFalse(UnsignedICmp->getType());
+ // X >= Y || Y != 0 --> true
+ if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_NE &&
+ !IsAnd)
+ return getTrue(UnsignedICmp->getType());
+
return nullptr;
}
@@ -1587,10 +1655,10 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
}
static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
- const InstrInfoQuery &IIQ) {
- if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
+ const SimplifyQuery &Q) {
+ if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q))
return X;
- if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true))
+ if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true, Q))
return X;
if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
@@ -1604,9 +1672,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
return X;
- if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, IIQ))
+ if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ))
return X;
- if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, IIQ))
+ if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ))
return X;
return nullptr;
@@ -1660,10 +1728,10 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
}
static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
- const InstrInfoQuery &IIQ) {
- if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
+ const SimplifyQuery &Q) {
+ if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false, Q))
return X;
- if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false))
+ if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false, Q))
return X;
if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
@@ -1677,9 +1745,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
return X;
- if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, IIQ))
+ if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ))
return X;
- if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, IIQ))
+ if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ))
return X;
return nullptr;
@@ -1738,8 +1806,8 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
auto *ICmp0 = dyn_cast<ICmpInst>(Op0);
auto *ICmp1 = dyn_cast<ICmpInst>(Op1);
if (ICmp0 && ICmp1)
- V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ)
- : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ);
+ V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q)
+ : simplifyOrOfICmps(ICmp0, ICmp1, Q);
auto *FCmp0 = dyn_cast<FCmpInst>(Op0);
auto *FCmp1 = dyn_cast<FCmpInst>(Op1);
@@ -1759,6 +1827,77 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
return nullptr;
}
+/// Check that the Op1 is in expected form, i.e.:
+/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+/// %Op1 = extractvalue { i4, i1 } %Agg, 1
+static bool omitCheckForZeroBeforeMulWithOverflowInternal(Value *Op1,
+ Value *X) {
+ auto *Extract = dyn_cast<ExtractValueInst>(Op1);
+ // We should only be extracting the overflow bit.
+ if (!Extract || !Extract->getIndices().equals(1))
+ return false;
+ Value *Agg = Extract->getAggregateOperand();
+ // This should be a multiplication-with-overflow intrinsic.
+ if (!match(Agg, m_CombineOr(m_Intrinsic<Intrinsic::umul_with_overflow>(),
+ m_Intrinsic<Intrinsic::smul_with_overflow>())))
+ return false;
+ // One of its multipliers should be the value we checked for zero before.
+ if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)),
+ m_Argument<1>(m_Specific(X)))))
+ return false;
+ return true;
+}
+
+/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
+/// other form of check, e.g. one that was using division; it may have been
+/// guarded against division-by-zero. We can drop that check now.
+/// Look for:
+/// %Op0 = icmp ne i4 %X, 0
+/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+/// %Op1 = extractvalue { i4, i1 } %Agg, 1
+/// %??? = and i1 %Op0, %Op1
+/// We can just return %Op1
+static Value *omitCheckForZeroBeforeMulWithOverflow(Value *Op0, Value *Op1) {
+ ICmpInst::Predicate Pred;
+ Value *X;
+ if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
+ Pred != ICmpInst::Predicate::ICMP_NE)
+ return nullptr;
+ // Is Op1 in expected form?
+ if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
+ return nullptr;
+ // Can omit 'and', and just return the overflow bit.
+ return Op1;
+}
+
+/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
+/// other form of check, e.g. one that was using division; it may have been
+/// guarded against division-by-zero. We can drop that check now.
+/// Look for:
+/// %Op0 = icmp eq i4 %X, 0
+/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+/// %Op1 = extractvalue { i4, i1 } %Agg, 1
+/// %NotOp1 = xor i1 %Op1, true
+/// %or = or i1 %Op0, %NotOp1
+/// We can just return %NotOp1
+static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0,
+ Value *NotOp1) {
+ ICmpInst::Predicate Pred;
+ Value *X;
+ if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
+ Pred != ICmpInst::Predicate::ICMP_EQ)
+ return nullptr;
+ // We expect the other hand of an 'or' to be a 'not'.
+ Value *Op1;
+ if (!match(NotOp1, m_Not(m_Value(Op1))))
+ return nullptr;
+ // Is Op1 in expected form?
+ if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
+ return nullptr;
+ // Can omit 'and', and just return the inverted overflow bit.
+ return NotOp1;
+}
+
/// Given operands for an And, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -1813,6 +1952,14 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op0;
}
+ // If we have a multiplication overflow check that is being 'and'ed with a
+ // check that one of the multipliers is not zero, we can omit the 'and', and
+ // only keep the overflow check.
+ if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op0, Op1))
+ return V;
+ if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op1, Op0))
+ return V;
+
// A & (-A) = A if A is a power of two or zero.
if (match(Op0, m_Neg(m_Specific(Op1))) ||
match(Op1, m_Neg(m_Specific(Op0)))) {
@@ -1987,6 +2134,14 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
+ // If we have a multiplication overflow check that is being 'and'ed with a
+ // check that one of the multipliers is not zero, we can omit the 'and', and
+ // only keep the overflow check.
+ if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op0, Op1))
+ return V;
+ if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op1, Op0))
+ return V;
+
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
MaxRecurse))
@@ -3529,6 +3684,9 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// %sel = select i1 %cmp, i32 -2147483648, i32 %add
//
// We can't replace %sel with %add unless we strip away the flags.
+ // TODO: This is an unusual limitation because better analysis results in
+ // worse simplification. InstCombine can do this fold more generally
+ // by dropping the flags. Remove this fold to save compile-time?
if (isa<OverflowingBinaryOperator>(B))
if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B))
return nullptr;
@@ -4324,14 +4482,16 @@ static Constant *propagateNaN(Constant *In) {
return In;
}
-static Constant *simplifyFPBinop(Value *Op0, Value *Op1) {
- if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
- return ConstantFP::getNaN(Op0->getType());
+/// Perform folds that are common to any floating-point operation. This implies
+/// transforms based on undef/NaN because the operation itself makes no
+/// difference to the result.
+static Constant *simplifyFPOp(ArrayRef<Value *> Ops) {
+ if (any_of(Ops, [](Value *V) { return isa<UndefValue>(V); }))
+ return ConstantFP::getNaN(Ops[0]->getType());
- if (match(Op0, m_NaN()))
- return propagateNaN(cast<Constant>(Op0));
- if (match(Op1, m_NaN()))
- return propagateNaN(cast<Constant>(Op1));
+ for (Value *V : Ops)
+ if (match(V, m_NaN()))
+ return propagateNaN(cast<Constant>(V));
return nullptr;
}
@@ -4343,7 +4503,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPBinop(Op0, Op1))
+ if (Constant *C = simplifyFPOp({Op0, Op1}))
return C;
// fadd X, -0 ==> X
@@ -4390,7 +4550,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPBinop(Op0, Op1))
+ if (Constant *C = simplifyFPOp({Op0, Op1}))
return C;
// fsub X, +0 ==> X
@@ -4430,23 +4590,27 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return nullptr;
}
-/// Given the operands for an FMul, see if we can fold the result
-static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q, unsigned MaxRecurse) {
- if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
- return C;
-
- if (Constant *C = simplifyFPBinop(Op0, Op1))
+static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ if (Constant *C = simplifyFPOp({Op0, Op1}))
return C;
// fmul X, 1.0 ==> X
if (match(Op1, m_FPOne()))
return Op0;
+ // fmul 1.0, X ==> X
+ if (match(Op0, m_FPOne()))
+ return Op1;
+
// fmul nnan nsz X, 0 ==> 0
if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP()))
return ConstantFP::getNullValue(Op0->getType());
+ // fmul nnan nsz 0, X ==> 0
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()))
+ return ConstantFP::getNullValue(Op1->getType());
+
// sqrt(X) * sqrt(X) --> X, if we can:
// 1. Remove the intermediate rounding (reassociate).
// 2. Ignore non-zero negative numbers because sqrt would produce NAN.
@@ -4459,6 +4623,16 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return nullptr;
}
+/// Given the operands for an FMul, see if we can fold the result
+static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
+ return C;
+
+ // Now apply simplifications that do not require rounding.
+ return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse);
+}
+
Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q) {
return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit);
@@ -4475,12 +4649,17 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit);
}
+Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned) {
if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPBinop(Op0, Op1))
+ if (Constant *C = simplifyFPOp({Op0, Op1}))
return C;
// X / 1.0 -> X
@@ -4525,7 +4704,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPBinop(Op0, Op1))
+ if (Constant *C = simplifyFPOp({Op0, Op1}))
return C;
// Unlike fdiv, the result of frem always matches the sign of the dividend.
@@ -4564,8 +4743,7 @@ static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q,
/// Given the operand for a UnaryOperator, see if we can fold the result.
/// If not, this returns null.
-/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
+/// Try to use FastMathFlags when folding the result.
static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
const FastMathFlags &FMF,
const SimplifyQuery &Q, unsigned MaxRecurse) {
@@ -4581,8 +4759,8 @@ Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit);
}
-Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
- const SimplifyQuery &Q) {
+Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit);
}
@@ -4634,11 +4812,10 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// Given operands for a BinaryOperator, see if we can fold the result.
/// If not, this returns null.
-/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
-static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const FastMathFlags &FMF, const SimplifyQuery &Q,
- unsigned MaxRecurse) {
+/// Try to use FastMathFlags when folding the result.
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const FastMathFlags &FMF, const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::FAdd:
return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
@@ -4658,9 +4835,9 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
}
-Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- FastMathFlags FMF, const SimplifyQuery &Q) {
- return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ FastMathFlags FMF, const SimplifyQuery &Q) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
}
/// Given operands for a CmpInst, see if we can fold the result.
@@ -5009,6 +5186,15 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
}
return nullptr;
}
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd: {
+ Value *Op0 = Call->getArgOperand(0);
+ Value *Op1 = Call->getArgOperand(1);
+ Value *Op2 = Call->getArgOperand(2);
+ if (Value *V = simplifyFPOp({ Op0, Op1, Op2 }))
+ return V;
+ return nullptr;
+ }
default:
return nullptr;
}
@@ -5221,14 +5407,16 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to
/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of
/// instructions to process and attempt to simplify it using
-/// InstructionSimplify.
+/// InstructionSimplify. Recursively visited users which could not be
+/// simplified themselves are to the optional UnsimplifiedUsers set for
+/// further processing by the caller.
///
/// This routine returns 'true' only when *it* simplifies something. The passed
/// in simplified value does not count toward this.
-static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionCache *AC) {
+static bool replaceAndRecursivelySimplifyImpl(
+ Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ SmallSetVector<Instruction *, 8> *UnsimplifiedUsers = nullptr) {
bool Simplified = false;
SmallSetVector<Instruction *, 8> Worklist;
const DataLayout &DL = I->getModule()->getDataLayout();
@@ -5258,8 +5446,11 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
// See if this instruction simplifies.
SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC});
- if (!SimpleV)
+ if (!SimpleV) {
+ if (UnsimplifiedUsers)
+ UnsimplifiedUsers->insert(I);
continue;
+ }
Simplified = true;
@@ -5285,16 +5476,17 @@ bool llvm::recursivelySimplifyInstruction(Instruction *I,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
AssumptionCache *AC) {
- return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC);
+ return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC, nullptr);
}
-bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionCache *AC) {
+bool llvm::replaceAndRecursivelySimplify(
+ Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ SmallSetVector<Instruction *, 8> *UnsimplifiedUsers) {
assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
assert(SimpleV && "Must provide a simplified value.");
- return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC);
+ return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC,
+ UnsimplifiedUsers);
}
namespace llvm {
@@ -5302,7 +5494,7 @@ const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) {
auto *DTWP = P.getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *TLIWP = P.getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
+ auto *TLI = TLIWP ? &TLIWP->getTLI(F) : nullptr;
auto *ACWP = P.getAnalysisIfAvailable<AssumptionCacheTracker>();
auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
return {F.getParent()->getDataLayout(), TLI, DT, AC};
diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp
index f2592c26b373..e727de468a0d 100644
--- a/lib/Analysis/LazyBranchProbabilityInfo.cpp
+++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -55,8 +55,9 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); }
bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) {
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI, &TLI);
+ TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ LBPI = std::make_unique<LazyBranchProbabilityInfo>(&F, &LI, &TLI);
return false;
}
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index 797fcf516429..ef31c1e0ba8c 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -150,7 +150,8 @@ static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName());
}
-LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
+LazyCallGraph::LazyCallGraph(
+ Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
LLVM_DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
<< "\n");
for (Function &F : M) {
@@ -159,7 +160,7 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
// If this function is a known lib function to LLVM then we want to
// synthesize reference edges to it to model the fact that LLVM can turn
// arbitrary code into a library function call.
- if (isKnownLibFunction(F, TLI))
+ if (isKnownLibFunction(F, GetTLI(F)))
LibFunctions.insert(&F);
if (F.hasLocalLinkage())
@@ -631,7 +632,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(
// If the merge range is empty, then adding the edge didn't actually form any
// new cycles. We're done.
- if (empty(MergeRange)) {
+ if (MergeRange.empty()) {
// Now that the SCC structure is finalized, flip the kind to call.
SourceN->setEdgeKind(TargetN, Edge::Call);
return false; // No new cycle.
@@ -1751,16 +1752,14 @@ static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) {
}
static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &C) {
- ptrdiff_t Size = size(C);
- OS << " SCC with " << Size << " functions:\n";
+ OS << " SCC with " << C.size() << " functions:\n";
for (LazyCallGraph::Node &N : C)
OS << " " << N.getFunction().getName() << "\n";
}
static void printRefSCC(raw_ostream &OS, LazyCallGraph::RefSCC &C) {
- ptrdiff_t Size = size(C);
- OS << " RefSCC with " << Size << " call SCCs:\n";
+ OS << " RefSCC with " << C.size() << " call SCCs:\n";
for (LazyCallGraph::SCC &InnerC : C)
printSCC(OS, InnerC);
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 542ff709d475..96722f32e355 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -188,7 +188,7 @@ namespace {
else {
auto It = ValueCache.find_as(Val);
if (It == ValueCache.end()) {
- ValueCache[Val] = make_unique<ValueCacheEntryTy>(Val, this);
+ ValueCache[Val] = std::make_unique<ValueCacheEntryTy>(Val, this);
It = ValueCache.find_as(Val);
assert(It != ValueCache.end() && "Val was just added to the map!");
}
@@ -434,6 +434,8 @@ namespace {
ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB);
bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II,
BasicBlock *BB);
+ bool solveBlockValueExtractValue(ValueLatticeElement &BBLV,
+ ExtractValueInst *EVI, BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
ValueLatticeElement &BBLV,
Instruction *BBI);
@@ -648,9 +650,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
return solveBlockValueBinaryOp(Res, BO, BB);
if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
- if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
- if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
- return solveBlockValueOverflowIntrinsic(Res, WO, BB);
+ return solveBlockValueExtractValue(Res, EVI, BB);
if (auto *II = dyn_cast<IntrinsicInst>(BBI))
return solveBlockValueIntrinsic(Res, II, BB);
@@ -1135,6 +1135,33 @@ bool LazyValueInfoImpl::solveBlockValueIntrinsic(
}
}
+bool LazyValueInfoImpl::solveBlockValueExtractValue(
+ ValueLatticeElement &BBLV, ExtractValueInst *EVI, BasicBlock *BB) {
+ if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+ if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
+ return solveBlockValueOverflowIntrinsic(BBLV, WO, BB);
+
+ // Handle extractvalue of insertvalue to allow further simplification
+ // based on replaced with.overflow intrinsics.
+ if (Value *V = SimplifyExtractValueInst(
+ EVI->getAggregateOperand(), EVI->getIndices(),
+ EVI->getModule()->getDataLayout())) {
+ if (!hasBlockValue(V, BB)) {
+ if (pushBlockValue({ BB, V }))
+ return false;
+ BBLV = ValueLatticeElement::getOverdefined();
+ return true;
+ }
+ BBLV = getBlockValue(V, BB);
+ return true;
+ }
+
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined (unknown extractvalue).\n");
+ BBLV = ValueLatticeElement::getOverdefined();
+ return true;
+}
+
static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
bool isTrueDest) {
Value *LHS = ICI->getOperand(0);
@@ -1575,7 +1602,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
Info.DT = DTWP ? &DTWP->getDomTree() : nullptr;
- Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
if (Info.PImpl)
getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear();
diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 52212e1c42aa..7de9d2cbfddb 100644
--- a/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -93,8 +93,9 @@ namespace {
class DivergencePropagator {
public:
DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
- PostDominatorTree &PDT, DenseSet<const Value *> &DV)
- : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+ PostDominatorTree &PDT, DenseSet<const Value *> &DV,
+ DenseSet<const Use *> &DU)
+ : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
void populateWithSourcesOfDivergence();
void propagate();
@@ -118,11 +119,14 @@ private:
PostDominatorTree &PDT;
std::vector<Value *> Worklist; // Stack for DFS.
DenseSet<const Value *> &DV; // Stores all divergent values.
+ DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform
+ // values.
};
void DivergencePropagator::populateWithSourcesOfDivergence() {
Worklist.clear();
DV.clear();
+ DU.clear();
for (auto &I : instructions(F)) {
if (TTI.isSourceOfDivergence(&I)) {
Worklist.push_back(&I);
@@ -197,8 +201,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
// dominators of TI until it is outside the influence region.
BasicBlock *InfluencedBB = ThisBB;
while (InfluenceRegion.count(InfluencedBB)) {
- for (auto &I : *InfluencedBB)
- findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+ for (auto &I : *InfluencedBB) {
+ if (!DV.count(&I))
+ findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+ }
DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
if (IDomNode == nullptr)
break;
@@ -208,9 +214,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
void DivergencePropagator::findUsersOutsideInfluenceRegion(
Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
- for (User *U : I.users()) {
- Instruction *UserInst = cast<Instruction>(U);
+ for (Use &Use : I.uses()) {
+ Instruction *UserInst = cast<Instruction>(Use.getUser());
if (!InfluenceRegion.count(UserInst->getParent())) {
+ DU.insert(&Use);
if (DV.insert(UserInst).second)
Worklist.push_back(UserInst);
}
@@ -250,9 +257,8 @@ void DivergencePropagator::computeInfluenceRegion(
void DivergencePropagator::exploreDataDependency(Value *V) {
// Follow def-use chains of V.
for (User *U : V->users()) {
- Instruction *UserInst = cast<Instruction>(U);
- if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
- Worklist.push_back(UserInst);
+ if (!TTI.isAlwaysUniform(U) && DV.insert(U).second)
+ Worklist.push_back(U);
}
}
@@ -320,6 +326,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
return false;
DivergentValues.clear();
+ DivergentUses.clear();
gpuDA = nullptr;
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -328,11 +335,11 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
if (shouldUseGPUDivergenceAnalysis(F)) {
// run the new GPU divergence analysis
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- gpuDA = llvm::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
+ gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
} else {
// run LLVM's existing DivergenceAnalysis
- DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues);
+ DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses);
DP.populateWithSourcesOfDivergence();
DP.propagate();
}
@@ -351,6 +358,13 @@ bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
return DivergentValues.count(V);
}
+bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const {
+ if (gpuDA) {
+ return gpuDA->isDivergentUse(*U);
+ }
+ return DivergentValues.count(U->get()) || DivergentUses.count(U);
+}
+
void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
return;
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index d28b8a189d4b..db18716c64cf 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -205,7 +205,7 @@ bool Lint::runOnFunction(Function &F) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
visit(F);
dbgs() << MessagesStr.str();
Messages.clear();
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 31da4e9ec783..641e92eac781 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -12,6 +12,9 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalAlias.h"
@@ -24,34 +27,30 @@
using namespace llvm;
-static bool isAligned(const Value *Base, const APInt &Offset, unsigned Align,
- const DataLayout &DL) {
- APInt BaseAlign(Offset.getBitWidth(), Base->getPointerAlignment(DL));
-
- if (!BaseAlign) {
- Type *Ty = Base->getType()->getPointerElementType();
- if (!Ty->isSized())
- return false;
- BaseAlign = DL.getABITypeAlignment(Ty);
- }
-
- APInt Alignment(Offset.getBitWidth(), Align);
-
- assert(Alignment.isPowerOf2() && "must be a power of 2!");
- return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1));
+static MaybeAlign getBaseAlign(const Value *Base, const DataLayout &DL) {
+ if (const MaybeAlign PA = Base->getPointerAlignment(DL))
+ return *PA;
+ Type *const Ty = Base->getType()->getPointerElementType();
+ if (!Ty->isSized())
+ return None;
+ return Align(DL.getABITypeAlignment(Ty));
}
-static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) {
- Type *Ty = Base->getType();
- assert(Ty->isSized() && "must be sized");
- APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
- return isAligned(Base, Offset, Align, DL);
+static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment,
+ const DataLayout &DL) {
+ if (MaybeAlign BA = getBaseAlign(Base, DL)) {
+ const APInt APBaseAlign(Offset.getBitWidth(), BA->value());
+ const APInt APAlign(Offset.getBitWidth(), Alignment.value());
+ assert(APAlign.isPowerOf2() && "must be a power of 2!");
+ return APBaseAlign.uge(APAlign) && !(Offset & (APAlign - 1));
+ }
+ return false;
}
/// Test if V is always a pointer to allocated and suitably aligned memory for
/// a simple load or store.
static bool isDereferenceableAndAlignedPointer(
- const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL,
+ const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL,
const Instruction *CtxI, const DominatorTree *DT,
SmallPtrSetImpl<const Value *> &Visited) {
// Already visited? Bail out, we've likely hit unreachable code.
@@ -63,17 +62,22 @@ static bool isDereferenceableAndAlignedPointer(
// bitcast instructions are no-ops as far as dereferenceability is concerned.
if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
- return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, Size,
- DL, CtxI, DT, Visited);
+ return isDereferenceableAndAlignedPointer(BC->getOperand(0), Alignment,
+ Size, DL, CtxI, DT, Visited);
bool CheckForNonNull = false;
APInt KnownDerefBytes(Size.getBitWidth(),
V->getPointerDereferenceableBytes(DL, CheckForNonNull));
- if (KnownDerefBytes.getBoolValue()) {
- if (KnownDerefBytes.uge(Size))
- if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT))
- return isAligned(V, Align, DL);
- }
+ if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size))
+ if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) {
+ // As we recursed through GEPs to get here, we've incrementally checked
+ // that each step advanced by a multiple of the alignment. If our base is
+ // properly aligned, then the original offset accessed must also be.
+ Type *Ty = V->getType();
+ assert(Ty->isSized() && "must be sized");
+ APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
+ return isAligned(V, Offset, Alignment, DL);
+ }
// For GEPs, determine if the indexing lands within the allocated object.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
@@ -81,7 +85,8 @@ static bool isDereferenceableAndAlignedPointer(
APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() ||
- !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue())
+ !Offset.urem(APInt(Offset.getBitWidth(), Alignment.value()))
+ .isMinValue())
return false;
// If the base pointer is dereferenceable for Offset+Size bytes, then the
@@ -93,67 +98,69 @@ static bool isDereferenceableAndAlignedPointer(
// Offset and Size may have different bit widths if we have visited an
// addrspacecast, so we can't do arithmetic directly on the APInt values.
return isDereferenceableAndAlignedPointer(
- Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()),
- DL, CtxI, DT, Visited);
+ Base, Alignment, Offset + Size.sextOrTrunc(Offset.getBitWidth()), DL,
+ CtxI, DT, Visited);
}
// For gc.relocate, look through relocations
if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V))
return isDereferenceableAndAlignedPointer(
- RelocateInst->getDerivedPtr(), Align, Size, DL, CtxI, DT, Visited);
+ RelocateInst->getDerivedPtr(), Alignment, Size, DL, CtxI, DT, Visited);
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
- return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size,
- DL, CtxI, DT, Visited);
+ return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment,
+ Size, DL, CtxI, DT, Visited);
if (const auto *Call = dyn_cast<CallBase>(V))
- if (auto *RP = getArgumentAliasingToReturnedPointer(Call))
- return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT,
- Visited);
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
+ return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI,
+ DT, Visited);
// If we don't know, assume the worst.
return false;
}
-bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
const APInt &Size,
const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
+ // Note: At the moment, Size can be zero. This ends up being interpreted as
+ // a query of whether [Base, V] is dereferenceable and V is aligned (since
+ // that's what the implementation happened to do). It's unclear if this is
+ // the desired semantic, but at least SelectionDAG does exercise this case.
+
SmallPtrSet<const Value *, 32> Visited;
- return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT,
+ return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT,
Visited);
}
bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
- unsigned Align,
+ MaybeAlign MA,
const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
+ if (!Ty->isSized())
+ return false;
+
// When dereferenceability information is provided by a dereferenceable
// attribute, we know exactly how many bytes are dereferenceable. If we can
// determine the exact offset to the attributed variable, we can use that
// information here.
// Require ABI alignment for loads without alignment specification
- if (Align == 0)
- Align = DL.getABITypeAlignment(Ty);
-
- if (!Ty->isSized())
- return false;
-
- SmallPtrSet<const Value *, 32> Visited;
- return ::isDereferenceableAndAlignedPointer(
- V, Align,
- APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)),
- DL, CtxI, DT, Visited);
+ const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty);
+ APInt AccessSize(DL.getIndexTypeSizeInBits(V->getType()),
+ DL.getTypeStoreSize(Ty));
+ return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI,
+ DT);
}
bool llvm::isDereferenceablePointer(const Value *V, Type *Ty,
const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
- return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT);
+ return isDereferenceableAndAlignedPointer(V, Ty, Align::None(), DL, CtxI, DT);
}
/// Test if A and B will obviously have the same value.
@@ -187,6 +194,60 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
return false;
}
+bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
+ ScalarEvolution &SE,
+ DominatorTree &DT) {
+ auto &DL = LI->getModule()->getDataLayout();
+ Value *Ptr = LI->getPointerOperand();
+
+ APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
+ DL.getTypeStoreSize(LI->getType()));
+ const Align Alignment = DL.getValueOrABITypeAlignment(
+ MaybeAlign(LI->getAlignment()), LI->getType());
+
+ Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
+
+ // If given a uniform (i.e. non-varying) address, see if we can prove the
+ // access is safe within the loop w/o needing predication.
+ if (L->isLoopInvariant(Ptr))
+ return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
+ HeaderFirstNonPHI, &DT);
+
+ // Otherwise, check to see if we have a repeating access pattern where we can
+ // prove that all accesses are well aligned and dereferenceable.
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
+ if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
+ return false;
+ auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
+ if (!Step)
+ return false;
+ // TODO: generalize to access patterns which have gaps
+ if (Step->getAPInt() != EltSize)
+ return false;
+
+ // TODO: If the symbolic trip count has a small bound (max count), we might
+ // be able to prove safety.
+ auto TC = SE.getSmallConstantTripCount(L);
+ if (!TC)
+ return false;
+
+ const APInt AccessSize = TC * EltSize;
+
+ auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart());
+ if (!StartS)
+ return false;
+ assert(SE.isLoopInvariant(StartS, L) && "implied by addrec definition");
+ Value *Base = StartS->getValue();
+
+ // For the moment, restrict ourselves to the case where the access size is a
+ // multiple of the requested alignment and the base is aligned.
+ // TODO: generalize if a case found which warrants
+ if (EltSize.urem(Alignment.value()) != 0)
+ return false;
+ return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
+ HeaderFirstNonPHI, &DT);
+}
+
/// Check if executing a load of this pointer value cannot trap.
///
/// If DT and ScanFrom are specified this method performs context-sensitive
@@ -198,64 +259,25 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
///
/// This uses the pointee type to determine how many bytes need to be safe to
/// load from the pointer.
-bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
+bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size,
const DataLayout &DL,
Instruction *ScanFrom,
const DominatorTree *DT) {
// Zero alignment means that the load has the ABI alignment for the target
- if (Align == 0)
- Align = DL.getABITypeAlignment(V->getType()->getPointerElementType());
- assert(isPowerOf2_32(Align));
+ const Align Alignment =
+ DL.getValueOrABITypeAlignment(MA, V->getType()->getPointerElementType());
// If DT is not specified we can't make context-sensitive query
const Instruction* CtxI = DT ? ScanFrom : nullptr;
- if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT))
+ if (isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT))
return true;
- int64_t ByteOffset = 0;
- Value *Base = V;
- Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);
-
- if (ByteOffset < 0) // out of bounds
+ if (!ScanFrom)
return false;
- Type *BaseType = nullptr;
- unsigned BaseAlign = 0;
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
- // An alloca is safe to load from as load as it is suitably aligned.
- BaseType = AI->getAllocatedType();
- BaseAlign = AI->getAlignment();
- } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
- // Global variables are not necessarily safe to load from if they are
- // interposed arbitrarily. Their size may change or they may be weak and
- // require a test to determine if they were in fact provided.
- if (!GV->isInterposable()) {
- BaseType = GV->getType()->getElementType();
- BaseAlign = GV->getAlignment();
- }
- }
-
- PointerType *AddrTy = cast<PointerType>(V->getType());
- uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType());
-
- // If we found a base allocated type from either an alloca or global variable,
- // try to see if we are definitively within the allocated region. We need to
- // know the size of the base type and the loaded type to do anything in this
- // case.
- if (BaseType && BaseType->isSized()) {
- if (BaseAlign == 0)
- BaseAlign = DL.getPrefTypeAlignment(BaseType);
-
- if (Align <= BaseAlign) {
- // Check if the load is within the bounds of the underlying object.
- if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) &&
- ((ByteOffset % Align) == 0))
- return true;
- }
- }
-
- if (!ScanFrom)
+ if (Size.getBitWidth() > 64)
return false;
+ const uint64_t LoadSize = Size.getZExtValue();
// Otherwise, be a little bit aggressive by scanning the local block where we
// want to check to see if the pointer is already being loaded or stored
@@ -279,7 +301,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
return false;
Value *AccessedPtr;
- unsigned AccessedAlign;
+ MaybeAlign MaybeAccessedAlign;
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
// Ignore volatile loads. The execution of a volatile load cannot
// be used to prove an address is backed by regular memory; it can,
@@ -287,24 +309,26 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
if (LI->isVolatile())
continue;
AccessedPtr = LI->getPointerOperand();
- AccessedAlign = LI->getAlignment();
+ MaybeAccessedAlign = MaybeAlign(LI->getAlignment());
} else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
// Ignore volatile stores (see comment for loads).
if (SI->isVolatile())
continue;
AccessedPtr = SI->getPointerOperand();
- AccessedAlign = SI->getAlignment();
+ MaybeAccessedAlign = MaybeAlign(SI->getAlignment());
} else
continue;
Type *AccessedTy = AccessedPtr->getType()->getPointerElementType();
- if (AccessedAlign == 0)
- AccessedAlign = DL.getABITypeAlignment(AccessedTy);
- if (AccessedAlign < Align)
+
+ const Align AccessedAlign =
+ DL.getValueOrABITypeAlignment(MaybeAccessedAlign, AccessedTy);
+ if (AccessedAlign < Alignment)
continue;
// Handle trivial cases.
- if (AccessedPtr == V)
+ if (AccessedPtr == V &&
+ LoadSize <= DL.getTypeStoreSize(AccessedTy))
return true;
if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
@@ -314,12 +338,12 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
return false;
}
-bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
+bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment,
const DataLayout &DL,
Instruction *ScanFrom,
const DominatorTree *DT) {
APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty));
- return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT);
+ return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT);
}
/// DefMaxInstsToScan - the default number of maximum instructions
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 36bd9a8b7ea7..3d8f77675f3a 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1189,18 +1189,31 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
+ // Retrieve the address space again as pointer stripping now tracks through
+ // `addrspacecast`.
+ ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();
+ ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();
+ // Check that the address spaces match and that the pointers are valid.
+ if (ASA != ASB)
+ return false;
+
+ IdxWidth = DL.getIndexSizeInBits(ASA);
+ OffsetA = OffsetA.sextOrTrunc(IdxWidth);
+ OffsetB = OffsetB.sextOrTrunc(IdxWidth);
+
+ APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
+
// OffsetDelta = OffsetB - OffsetA;
const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
- const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
- const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
+ const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt();
+
// Check if they are based on the same pointer. That makes the offsets
// sufficient.
if (PtrA == PtrB)
@@ -1641,13 +1654,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
// Check every access pair.
while (AI != AE) {
Visited.insert(*AI);
- EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
+ bool AIIsWrite = AI->getInt();
+ // Check loads only against next equivalent class, but stores also against
+ // other stores in the same equivalence class - to the same address.
+ EquivalenceClasses<MemAccessInfo>::member_iterator OI =
+ (AIIsWrite ? AI : std::next(AI));
while (OI != AE) {
// Check every accessing instruction pair in program order.
for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
- for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
- I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
+ // Scan all accesses of another equivalence class, but only the next
+ // accesses of the same equivalent class.
+ for (std::vector<unsigned>::iterator
+ I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
+ I2E = (OI == AI ? I1E : Accesses[*OI].end());
+ I2 != I2E; ++I2) {
auto A = std::make_pair(&*AI, *I1);
auto B = std::make_pair(&*OI, *I2);
@@ -2078,7 +2099,7 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
DL = I->getDebugLoc();
}
- Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
+ Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
CodeRegion);
return *Report;
}
@@ -2323,9 +2344,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI)
- : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
- PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
- DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
+ : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
+ PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
+ DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
HasConvergentOp(false),
HasDependenceInvolvingLoopInvariantAddress(false) {
@@ -2380,7 +2401,7 @@ const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
auto &LAI = LoopAccessInfoMap[L];
if (!LAI)
- LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
+ LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
return *LAI.get();
}
@@ -2399,7 +2420,7 @@ void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp
index a10a87ce113b..02d40fb8d72a 100644
--- a/lib/Analysis/LoopAnalysisManager.cpp
+++ b/lib/Analysis/LoopAnalysisManager.cpp
@@ -46,7 +46,7 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate(
// invalidation logic below to act on that.
auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>();
bool invalidateMemorySSAAnalysis = false;
- if (EnableMSSALoopDependency)
+ if (MSSAUsed)
invalidateMemorySSAAnalysis = Inv.invalidate<MemorySSAAnalysis>(F, PA);
if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
Inv.invalidate<AAManager>(F, PA) ||
diff --git a/lib/Analysis/LoopCacheAnalysis.cpp b/lib/Analysis/LoopCacheAnalysis.cpp
new file mode 100644
index 000000000000..10d2fe07884a
--- /dev/null
+++ b/lib/Analysis/LoopCacheAnalysis.cpp
@@ -0,0 +1,625 @@
+//===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the implementation for the loop cache analysis.
+/// The implementation is largely based on the following paper:
+///
+/// Compiler Optimizations for Improving Data Locality
+/// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
+/// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
+///
+/// The general approach taken to estimate the number of cache lines used by the
+/// memory references in an inner loop is:
+/// 1. Partition memory references that exhibit temporal or spacial reuse
+/// into reference groups.
+/// 2. For each loop L in the a loop nest LN:
+/// a. Compute the cost of the reference group
+/// b. Compute the loop cost by summing up the reference groups costs
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopCacheAnalysis.h"
+#include "llvm/ADT/BreadthFirstIterator.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-cache-cost"
+
+static cl::opt<unsigned> DefaultTripCount(
+ "default-trip-count", cl::init(100), cl::Hidden,
+ cl::desc("Use this to specify the default trip count of a loop"));
+
+// In this analysis two array references are considered to exhibit temporal
+// reuse if they access either the same memory location, or a memory location
+// with distance smaller than a configurable threshold.
+static cl::opt<unsigned> TemporalReuseThreshold(
+ "temporal-reuse-threshold", cl::init(2), cl::Hidden,
+ cl::desc("Use this to specify the max. distance between array elements "
+ "accessed in a loop so that the elements are classified to have "
+ "temporal reuse"));
+
+/// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
+/// nullptr if any loops in the loop vector supplied has more than one sibling.
+/// The loop vector is expected to contain loops collected in breadth-first
+/// order.
+static Loop *getInnerMostLoop(const LoopVectorTy &Loops) {
+ assert(!Loops.empty() && "Expecting a non-empy loop vector");
+
+ Loop *LastLoop = Loops.back();
+ Loop *ParentLoop = LastLoop->getParentLoop();
+
+ if (ParentLoop == nullptr) {
+ assert(Loops.size() == 1 && "Expecting a single loop");
+ return LastLoop;
+ }
+
+ return (std::is_sorted(Loops.begin(), Loops.end(),
+ [](const Loop *L1, const Loop *L2) {
+ return L1->getLoopDepth() < L2->getLoopDepth();
+ }))
+ ? LastLoop
+ : nullptr;
+}
+
+static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
+ const Loop &L, ScalarEvolution &SE) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
+ if (!AR || !AR->isAffine())
+ return false;
+
+ assert(AR->getLoop() && "AR should have a loop");
+
+ // Check that start and increment are not add recurrences.
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
+ return false;
+
+ // Check that start and increment are both invariant in the loop.
+ if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
+ return false;
+
+ return AR->getStepRecurrence(SE) == &ElemSize;
+}
+
+/// Compute the trip count for the given loop \p L. Return the SCEV expression
+/// for the trip count or nullptr if it cannot be computed.
+static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+ !isa<SCEVConstant>(BackedgeTakenCount))
+ return nullptr;
+
+ return SE.getAddExpr(BackedgeTakenCount,
+ SE.getOne(BackedgeTakenCount->getType()));
+}
+
+//===----------------------------------------------------------------------===//
+// IndexedReference implementation
+//
+raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) {
+ if (!R.IsValid) {
+ OS << R.StoreOrLoadInst;
+ OS << ", IsValid=false.";
+ return OS;
+ }
+
+ OS << *R.BasePointer;
+ for (const SCEV *Subscript : R.Subscripts)
+ OS << "[" << *Subscript << "]";
+
+ OS << ", Sizes: ";
+ for (const SCEV *Size : R.Sizes)
+ OS << "[" << *Size << "]";
+
+ return OS;
+}
+
+IndexedReference::IndexedReference(Instruction &StoreOrLoadInst,
+ const LoopInfo &LI, ScalarEvolution &SE)
+ : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
+ assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
+ "Expecting a load or store instruction");
+
+ IsValid = delinearize(LI);
+ if (IsValid)
+ LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
+ << "\n");
+}
+
+Optional<bool> IndexedReference::hasSpacialReuse(const IndexedReference &Other,
+ unsigned CLS,
+ AliasAnalysis &AA) const {
+ assert(IsValid && "Expecting a valid reference");
+
+ if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No spacial reuse: different base pointers\n");
+ return false;
+ }
+
+ unsigned NumSubscripts = getNumSubscripts();
+ if (NumSubscripts != Other.getNumSubscripts()) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No spacial reuse: different number of subscripts\n");
+ return false;
+ }
+
+ // all subscripts must be equal, except the leftmost one (the last one).
+ for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
+ if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
+ LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
+ << "\n\t" << *getSubscript(SubNum) << "\n\t"
+ << *Other.getSubscript(SubNum) << "\n");
+ return false;
+ }
+ }
+
+ // the difference between the last subscripts must be less than the cache line
+ // size.
+ const SCEV *LastSubscript = getLastSubscript();
+ const SCEV *OtherLastSubscript = Other.getLastSubscript();
+ const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
+ SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
+
+ if (Diff == nullptr) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No spacial reuse, difference between subscript:\n\t"
+ << *LastSubscript << "\n\t" << OtherLastSubscript
+ << "\nis not constant.\n");
+ return None;
+ }
+
+ bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
+
+ LLVM_DEBUG({
+ if (InSameCacheLine)
+ dbgs().indent(2) << "Found spacial reuse.\n";
+ else
+ dbgs().indent(2) << "No spacial reuse.\n";
+ });
+
+ return InSameCacheLine;
+}
+
+Optional<bool> IndexedReference::hasTemporalReuse(const IndexedReference &Other,
+ unsigned MaxDistance,
+ const Loop &L,
+ DependenceInfo &DI,
+ AliasAnalysis &AA) const {
+ assert(IsValid && "Expecting a valid reference");
+
+ if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No temporal reuse: different base pointer\n");
+ return false;
+ }
+
+ std::unique_ptr<Dependence> D =
+ DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
+
+ if (D == nullptr) {
+ LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
+ return false;
+ }
+
+ if (D->isLoopIndependent()) {
+ LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
+ return true;
+ }
+
+ // Check the dependence distance at every loop level. There is temporal reuse
+ // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
+ // it is zero at every other loop level.
+ int LoopDepth = L.getLoopDepth();
+ int Levels = D->getLevels();
+ for (int Level = 1; Level <= Levels; ++Level) {
+ const SCEV *Distance = D->getDistance(Level);
+ const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
+
+ if (SCEVConst == nullptr) {
+ LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
+ return None;
+ }
+
+ const ConstantInt &CI = *SCEVConst->getValue();
+ if (Level != LoopDepth && !CI.isZero()) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No temporal reuse: distance is not zero at depth=" << Level
+ << "\n");
+ return false;
+ } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
+ LLVM_DEBUG(
+ dbgs().indent(2)
+ << "No temporal reuse: distance is greater than MaxDistance at depth="
+ << Level << "\n");
+ return false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
+ return true;
+}
+
+CacheCostTy IndexedReference::computeRefCost(const Loop &L,
+ unsigned CLS) const {
+ assert(IsValid && "Expecting a valid reference");
+ LLVM_DEBUG({
+ dbgs().indent(2) << "Computing cache cost for:\n";
+ dbgs().indent(4) << *this << "\n";
+ });
+
+ // If the indexed reference is loop invariant the cost is one.
+ if (isLoopInvariant(L)) {
+ LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
+ return 1;
+ }
+
+ const SCEV *TripCount = computeTripCount(L, SE);
+ if (!TripCount) {
+ LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
+ << " could not be computed, using DefaultTripCount\n");
+ const SCEV *ElemSize = Sizes.back();
+ TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
+ }
+ LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
+
+ // If the indexed reference is 'consecutive' the cost is
+ // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
+ const SCEV *RefCost = TripCount;
+
+ if (isConsecutive(L, CLS)) {
+ const SCEV *Coeff = getLastCoefficient();
+ const SCEV *ElemSize = Sizes.back();
+ const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+ const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
+ const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
+ RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
+ LLVM_DEBUG(dbgs().indent(4)
+ << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
+ << *RefCost << "\n");
+ } else
+ LLVM_DEBUG(dbgs().indent(4)
+ << "Access is not consecutive: RefCost=TripCount=" << *RefCost
+ << "\n");
+
+ // Attempt to fold RefCost into a constant.
+ if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
+ return ConstantCost->getValue()->getSExtValue();
+
+ LLVM_DEBUG(dbgs().indent(4)
+ << "RefCost is not a constant! Setting to RefCost=InvalidCost "
+ "(invalid value).\n");
+
+ return CacheCost::InvalidCost;
+}
+
+bool IndexedReference::delinearize(const LoopInfo &LI) {
+ assert(Subscripts.empty() && "Subscripts should be empty");
+ assert(Sizes.empty() && "Sizes should be empty");
+ assert(!IsValid && "Should be called once from the constructor");
+ LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
+
+ const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
+ const BasicBlock *BB = StoreOrLoadInst.getParent();
+
+ for (Loop *L = LI.getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
+ const SCEV *AccessFn =
+ SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
+
+ BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
+ if (BasePointer == nullptr) {
+ LLVM_DEBUG(
+ dbgs().indent(2)
+ << "ERROR: failed to delinearize, can't identify base pointer\n");
+ return false;
+ }
+
+ AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
+
+ LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
+ << "', AccessFn: " << *AccessFn << "\n");
+
+ SE.delinearize(AccessFn, Subscripts, Sizes,
+ SE.getElementSize(&StoreOrLoadInst));
+
+ if (Subscripts.empty() || Sizes.empty() ||
+ Subscripts.size() != Sizes.size()) {
+ // Attempt to determine whether we have a single dimensional array access.
+ // before giving up.
+ if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "ERROR: failed to delinearize reference\n");
+ Subscripts.clear();
+ Sizes.clear();
+ break;
+ }
+
+ const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
+ Subscripts.push_back(Div);
+ Sizes.push_back(ElemSize);
+ }
+
+ return all_of(Subscripts, [&](const SCEV *Subscript) {
+ return isSimpleAddRecurrence(*Subscript, *L);
+ });
+ }
+
+ return false;
+}
+
+bool IndexedReference::isLoopInvariant(const Loop &L) const {
+ Value *Addr = getPointerOperand(&StoreOrLoadInst);
+ assert(Addr != nullptr && "Expecting either a load or a store instruction");
+ assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
+
+ if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
+ return true;
+
+ // The indexed reference is loop invariant if none of the coefficients use
+ // the loop induction variable.
+ bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
+ return isCoeffForLoopZeroOrInvariant(*Subscript, L);
+ });
+
+ return allCoeffForLoopAreZero;
+}
+
+bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
+ // The indexed reference is 'consecutive' if the only coefficient that uses
+ // the loop induction variable is the last one...
+ const SCEV *LastSubscript = Subscripts.back();
+ for (const SCEV *Subscript : Subscripts) {
+ if (Subscript == LastSubscript)
+ continue;
+ if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
+ return false;
+ }
+
+ // ...and the access stride is less than the cache line size.
+ const SCEV *Coeff = getLastCoefficient();
+ const SCEV *ElemSize = Sizes.back();
+ const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+ const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
+
+ return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
+}
+
+const SCEV *IndexedReference::getLastCoefficient() const {
+ const SCEV *LastSubscript = getLastSubscript();
+ assert(isa<SCEVAddRecExpr>(LastSubscript) &&
+ "Expecting a SCEV add recurrence expression");
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
+ return AR->getStepRecurrence(SE);
+}
+
+bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
+ const Loop &L) const {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
+ return (AR != nullptr) ? AR->getLoop() != &L
+ : SE.isLoopInvariant(&Subscript, &L);
+}
+
+bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
+ const Loop &L) const {
+ if (!isa<SCEVAddRecExpr>(Subscript))
+ return false;
+
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
+ assert(AR->getLoop() && "AR should have a loop");
+
+ if (!AR->isAffine())
+ return false;
+
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(SE);
+
+ if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
+ return false;
+
+ return true;
+}
+
+bool IndexedReference::isAliased(const IndexedReference &Other,
+ AliasAnalysis &AA) const {
+ const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
+ const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
+ return AA.isMustAlias(Loc1, Loc2);
+}
+
+//===----------------------------------------------------------------------===//
+// CacheCost implementation
+//
+raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) {
+ for (const auto &LC : CC.LoopCosts) {
+ const Loop *L = LC.first;
+ OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
+ }
+ return OS;
+}
+
+CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
+ ScalarEvolution &SE, TargetTransformInfo &TTI,
+ AliasAnalysis &AA, DependenceInfo &DI,
+ Optional<unsigned> TRT)
+ : Loops(Loops), TripCounts(), LoopCosts(),
+ TRT(TRT == None ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
+ LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
+ assert(!Loops.empty() && "Expecting a non-empty loop vector.");
+
+ for (const Loop *L : Loops) {
+ unsigned TripCount = SE.getSmallConstantTripCount(L);
+ TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
+ TripCounts.push_back({L, TripCount});
+ }
+
+ calculateCacheFootprint();
+}
+
+std::unique_ptr<CacheCost>
+CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
+ DependenceInfo &DI, Optional<unsigned> TRT) {
+ if (Root.getParentLoop()) {
+ LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
+ return nullptr;
+ }
+
+ LoopVectorTy Loops;
+ for (Loop *L : breadth_first(&Root))
+ Loops.push_back(L);
+
+ if (!getInnerMostLoop(Loops)) {
+ LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
+ "than one innermost loop\n");
+ return nullptr;
+ }
+
+ return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
+}
+
+void CacheCost::calculateCacheFootprint() {
+ LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
+ ReferenceGroupsTy RefGroups;
+ if (!populateReferenceGroups(RefGroups))
+ return;
+
+ LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
+ for (const Loop *L : Loops) {
+ assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
+ [L](const LoopCacheCostTy &LCC) {
+ return LCC.first == L;
+ }) == LoopCosts.end()) &&
+ "Should not add duplicate element");
+ CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
+ LoopCosts.push_back(std::make_pair(L, LoopCost));
+ }
+
+ sortLoopCosts();
+ RefGroups.clear();
+}
+
+bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
+ assert(RefGroups.empty() && "Reference groups should be empty");
+
+ unsigned CLS = TTI.getCacheLineSize();
+ Loop *InnerMostLoop = getInnerMostLoop(Loops);
+ assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
+
+ for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
+ for (Instruction &I : *BB) {
+ if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
+ continue;
+
+ std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
+ if (!R->isValid())
+ continue;
+
+ bool Added = false;
+ for (ReferenceGroupTy &RefGroup : RefGroups) {
+ const IndexedReference &Representative = *RefGroup.front().get();
+ LLVM_DEBUG({
+ dbgs() << "References:\n";
+ dbgs().indent(2) << *R << "\n";
+ dbgs().indent(2) << Representative << "\n";
+ });
+
+ Optional<bool> HasTemporalReuse =
+ R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
+ Optional<bool> HasSpacialReuse =
+ R->hasSpacialReuse(Representative, CLS, AA);
+
+ if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
+ (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
+ RefGroup.push_back(std::move(R));
+ Added = true;
+ break;
+ }
+ }
+
+ if (!Added) {
+ ReferenceGroupTy RG;
+ RG.push_back(std::move(R));
+ RefGroups.push_back(std::move(RG));
+ }
+ }
+ }
+
+ if (RefGroups.empty())
+ return false;
+
+ LLVM_DEBUG({
+ dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
+ int n = 1;
+ for (const ReferenceGroupTy &RG : RefGroups) {
+ dbgs().indent(2) << "RefGroup " << n << ":\n";
+ for (const auto &IR : RG)
+ dbgs().indent(4) << *IR << "\n";
+ n++;
+ }
+ dbgs() << "\n";
+ });
+
+ return true;
+}
+
+CacheCostTy
+CacheCost::computeLoopCacheCost(const Loop &L,
+ const ReferenceGroupsTy &RefGroups) const {
+ if (!L.isLoopSimplifyForm())
+ return InvalidCost;
+
+ LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
+ << "' as innermost loop.\n");
+
+ // Compute the product of the trip counts of each other loop in the nest.
+ CacheCostTy TripCountsProduct = 1;
+ for (const auto &TC : TripCounts) {
+ if (TC.first == &L)
+ continue;
+ TripCountsProduct *= TC.second;
+ }
+
+ CacheCostTy LoopCost = 0;
+ for (const ReferenceGroupTy &RG : RefGroups) {
+ CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
+ LoopCost += RefGroupCost * TripCountsProduct;
+ }
+
+ LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
+ << "' has cost=" << LoopCost << "\n");
+
+ return LoopCost;
+}
+
+CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
+ const Loop &L) const {
+ assert(!RG.empty() && "Reference group should have at least one member.");
+
+ const IndexedReference *Representative = RG.front().get();
+ return Representative->computeRefCost(L, TTI.getCacheLineSize());
+}
+
+//===----------------------------------------------------------------------===//
+// LoopCachePrinterPass implementation
+//
+PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ Function *F = L.getHeader()->getParent();
+ DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
+
+ if (auto CC = CacheCost::getCacheCost(L, AR, DI))
+ OS << *CC;
+
+ return PreservedAnalyses::all();
+}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index aa5da0859805..dbab5db7dbc2 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -359,6 +359,45 @@ bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
return SE.isLoopInvariant(IndDesc.getStep(), this);
}
+BranchInst *Loop::getLoopGuardBranch() const {
+ if (!isLoopSimplifyForm())
+ return nullptr;
+
+ BasicBlock *Preheader = getLoopPreheader();
+ BasicBlock *Latch = getLoopLatch();
+ assert(Preheader && Latch &&
+ "Expecting a loop with valid preheader and latch");
+
+ // Loop should be in rotate form.
+ if (!isLoopExiting(Latch))
+ return nullptr;
+
+ // Disallow loops with more than one unique exit block, as we do not verify
+ // that GuardOtherSucc post dominates all exit blocks.
+ BasicBlock *ExitFromLatch = getUniqueExitBlock();
+ if (!ExitFromLatch)
+ return nullptr;
+
+ BasicBlock *ExitFromLatchSucc = ExitFromLatch->getUniqueSuccessor();
+ if (!ExitFromLatchSucc)
+ return nullptr;
+
+ BasicBlock *GuardBB = Preheader->getUniquePredecessor();
+ if (!GuardBB)
+ return nullptr;
+
+ assert(GuardBB->getTerminator() && "Expecting valid guard terminator");
+
+ BranchInst *GuardBI = dyn_cast<BranchInst>(GuardBB->getTerminator());
+ if (!GuardBI || GuardBI->isUnconditional())
+ return nullptr;
+
+ BasicBlock *GuardOtherSucc = (GuardBI->getSuccessor(0) == Preheader)
+ ? GuardBI->getSuccessor(1)
+ : GuardBI->getSuccessor(0);
+ return (GuardOtherSucc == ExitFromLatchSucc) ? GuardBI : nullptr;
+}
+
bool Loop::isCanonical(ScalarEvolution &SE) const {
InductionDescriptor IndDesc;
if (!getInductionDescriptor(SE, IndDesc))
diff --git a/lib/Analysis/LoopUnrollAnalyzer.cpp b/lib/Analysis/LoopUnrollAnalyzer.cpp
index 1728b5e9f6d2..762623de41e9 100644
--- a/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -78,7 +78,7 @@ bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) {
const DataLayout &DL = I.getModule()->getDataLayout();
if (auto FI = dyn_cast<FPMathOperator>(&I))
SimpleV =
- SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+ SimplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
else
SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp
index 77ebf89d9a08..5cf516a538b5 100644
--- a/lib/Analysis/MemDerefPrinter.cpp
+++ b/lib/Analysis/MemDerefPrinter.cpp
@@ -55,8 +55,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
Value *PO = LI->getPointerOperand();
if (isDereferenceablePointer(PO, LI->getType(), DL))
Deref.push_back(PO);
- if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
- LI->getAlignment(), DL))
+ if (isDereferenceableAndAlignedPointer(
+ PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL))
DerefAndAligned.insert(PO);
}
}
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 729dad463657..172c86eb4646 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -180,6 +180,19 @@ static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
return None;
}
+static Optional<AllocFnsTy>
+getAllocationData(const Value *V, AllocType AllocTy,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ bool LookThroughBitCast = false) {
+ bool IsNoBuiltinCall;
+ if (const Function *Callee =
+ getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall))
+ if (!IsNoBuiltinCall)
+ return getAllocationDataForFunction(
+ Callee, AllocTy, &GetTLI(const_cast<Function &>(*Callee)));
+ return None;
+}
+
static Optional<AllocFnsTy> getAllocationSize(const Value *V,
const TargetLibraryInfo *TLI) {
bool IsNoBuiltinCall;
@@ -223,6 +236,11 @@ bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast).hasValue();
}
+bool llvm::isAllocationFn(
+ const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, AnyAlloc, GetTLI, LookThroughBitCast).hasValue();
+}
/// Tests if a value is a call or invoke to a function that returns a
/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions).
@@ -240,6 +258,12 @@ bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
return getAllocationData(V, MallocLike, TLI, LookThroughBitCast).hasValue();
}
+bool llvm::isMallocLikeFn(
+ const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, MallocLike, GetTLI, LookThroughBitCast)
+ .hasValue();
+}
/// Tests if a value is a call or invoke to a library function that
/// allocates zero-filled memory (such as calloc).
@@ -276,12 +300,27 @@ bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) {
return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue();
}
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory and throws if an allocation failed (e.g., new).
+bool llvm::isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast).hasValue();
+}
+
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory (strdup, strndup).
+bool llvm::isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, StrDupLike, TLI, LookThroughBitCast).hasValue();
+}
+
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.
-const CallInst *llvm::extractMallocCall(const Value *I,
- const TargetLibraryInfo *TLI) {
- return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr;
+const CallInst *llvm::extractMallocCall(
+ const Value *I,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+ return isMallocLikeFn(I, GetTLI) ? dyn_cast<CallInst>(I) : nullptr;
}
static Value *computeArraySize(const CallInst *CI, const DataLayout &DL,
@@ -521,9 +560,9 @@ STATISTIC(ObjectVisitorArgument,
STATISTIC(ObjectVisitorLoad,
"Number of load instructions with unsolved size and offset");
-APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
- if (Options.RoundToAlign && Align)
- return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align));
+APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Alignment) {
+ if (Options.RoundToAlign && Alignment)
+ return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align(Alignment)));
return Size;
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index b25b655165d7..884587e020bb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -183,7 +183,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
MemDepResult MemoryDependenceResults::getCallDependencyFrom(
CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
BasicBlock *BB) {
- unsigned Limit = BlockScanLimit;
+ unsigned Limit = getDefaultBlockScanLimit();
// Walk backwards through the block, looking for dependencies.
while (ScanIt != BB->begin()) {
@@ -356,7 +356,7 @@ MemDepResult
MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
BasicBlock *BB) {
- if (!LI->getMetadata(LLVMContext::MD_invariant_group))
+ if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
return MemDepResult::getUnknown();
// Take the ptr operand after all casts and geps 0. This way we can search
@@ -417,7 +417,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// same pointer operand) we can assume that value pointed by pointer
// operand didn't change.
if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
- U->getMetadata(LLVMContext::MD_invariant_group) != nullptr)
+ U->hasMetadata(LLVMContext::MD_invariant_group))
ClosestDependency = GetClosestDependency(ClosestDependency, U);
}
}
@@ -443,7 +443,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
OrderedBasicBlock *OBB) {
bool isInvariantLoad = false;
- unsigned DefaultLimit = BlockScanLimit;
+ unsigned DefaultLimit = getDefaultBlockScanLimit();
if (!Limit)
Limit = &DefaultLimit;
@@ -481,7 +481,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// Arguably, this logic should be pushed inside AliasAnalysis itself.
if (isLoad && QueryInst) {
LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
- if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+ if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load))
isInvariantLoad = true;
}
@@ -1746,6 +1746,9 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
AnalysisKey MemoryDependenceAnalysis::Key;
+MemoryDependenceAnalysis::MemoryDependenceAnalysis()
+ : DefaultBlockScanLimit(BlockScanLimit) {}
+
MemoryDependenceResults
MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
auto &AA = AM.getResult<AAManager>(F);
@@ -1753,7 +1756,7 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &PV = AM.getResult<PhiValuesAnalysis>(F);
- return MemoryDependenceResults(AA, AC, TLI, DT, PV);
+ return MemoryDependenceResults(AA, AC, TLI, DT, PV, DefaultBlockScanLimit);
}
char MemoryDependenceWrapperPass::ID = 0;
@@ -1807,15 +1810,15 @@ bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &P
}
unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
- return BlockScanLimit;
+ return DefaultBlockScanLimit;
}
bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
- MemDep.emplace(AA, AC, TLI, DT, PV);
+ MemDep.emplace(AA, AC, TLI, DT, PV, BlockScanLimit);
return false;
}
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index 17f5d9b9f0ad..cfb8b7e7dcb5 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -49,6 +49,7 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
+#include <cstdlib>
#include <iterator>
#include <memory>
#include <utility>
@@ -83,7 +84,7 @@ bool llvm::VerifyMemorySSA = false;
#endif
/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
cl::opt<bool> llvm::EnableMSSALoopDependency(
- "enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
+ "enable-mssa-loop-dependency", cl::Hidden, cl::init(true),
cl::desc("Enable MemorySSA dependency for loop pass manager"));
static cl::opt<bool, true>
@@ -284,6 +285,11 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
case Intrinsic::invariant_end:
case Intrinsic::assume:
return {false, NoAlias};
+ case Intrinsic::dbg_addr:
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_label:
+ case Intrinsic::dbg_value:
+ llvm_unreachable("debuginfo shouldn't have associated defs!");
default:
break;
}
@@ -369,7 +375,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
const Instruction *I) {
// If the memory can't be changed, then loads of the memory can't be
// clobbered.
- return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
+ return isa<LoadInst>(I) && (I->hasMetadata(LLVMContext::MD_invariant_load) ||
AA.pointsToConstantMemory(MemoryLocation(
cast<LoadInst>(I)->getPointerOperand())));
}
@@ -867,6 +873,7 @@ template <class AliasAnalysisType> class ClobberWalker {
if (!DefChainEnd)
for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target)))
DefChainEnd = MA;
+ assert(DefChainEnd && "Failed to find dominating phi/liveOnEntry");
// If any of the terminated paths don't dominate the phi we'll try to
// optimize, we need to figure out what they are and quit.
@@ -1087,9 +1094,14 @@ void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal,
AccessList *Accesses = It->second.get();
auto *Phi = cast<MemoryPhi>(&Accesses->front());
if (RenameAllUses) {
- int PhiIndex = Phi->getBasicBlockIndex(BB);
- assert(PhiIndex != -1 && "Incomplete phi during partial rename");
- Phi->setIncomingValue(PhiIndex, IncomingVal);
+ bool ReplacementDone = false;
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
+ if (Phi->getIncomingBlock(I) == BB) {
+ Phi->setIncomingValue(I, IncomingVal);
+ ReplacementDone = true;
+ }
+ (void) ReplacementDone;
+ assert(ReplacementDone && "Incomplete phi during partial rename");
} else
Phi->addIncoming(IncomingVal, BB);
}
@@ -1237,7 +1249,7 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr));
if (Res.second)
- Res.first->second = llvm::make_unique<AccessList>();
+ Res.first->second = std::make_unique<AccessList>();
return Res.first->second.get();
}
@@ -1245,7 +1257,7 @@ MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) {
auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr));
if (Res.second)
- Res.first->second = llvm::make_unique<DefsList>();
+ Res.first->second = std::make_unique<DefsList>();
return Res.first->second.get();
}
@@ -1554,10 +1566,10 @@ MemorySSA::CachingWalker<AliasAnalysis> *MemorySSA::getWalkerImpl() {
if (!WalkerBase)
WalkerBase =
- llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
+ std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
Walker =
- llvm::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get());
+ std::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get());
return Walker.get();
}
@@ -1567,10 +1579,10 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
if (!WalkerBase)
WalkerBase =
- llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
+ std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
SkipWalker =
- llvm::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get());
+ std::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get());
return SkipWalker.get();
}
@@ -1687,13 +1699,15 @@ MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
MemoryAccess *Definition,
- const MemoryUseOrDef *Template) {
+ const MemoryUseOrDef *Template,
+ bool CreationMustSucceed) {
assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template);
- assert(
- NewAccess != nullptr &&
- "Tried to create a memory access for a non-memory touching instruction");
- NewAccess->setDefiningAccess(Definition);
+ if (CreationMustSucceed)
+ assert(NewAccess != nullptr && "Tried to create a memory access for a "
+ "non-memory touching instruction");
+ if (NewAccess)
+ NewAccess->setDefiningAccess(Definition);
return NewAccess;
}
@@ -1717,13 +1731,21 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
AliasAnalysisType *AAP,
const MemoryUseOrDef *Template) {
// The assume intrinsic has a control dependency which we model by claiming
- // that it writes arbitrarily. Ignore that fake memory dependency here.
+ // that it writes arbitrarily. Debuginfo intrinsics may be considered
+ // clobbers when we have a nonstandard AA pipeline. Ignore these fake memory
+ // dependencies here.
// FIXME: Replace this special casing with a more accurate modelling of
// assume's control dependency.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
if (II->getIntrinsicID() == Intrinsic::assume)
return nullptr;
+ // Using a nonstandard AA pipelines might leave us with unexpected modref
+ // results for I, so add a check to not model instructions that may not read
+ // from or write to memory. This is necessary for correctness.
+ if (!I->mayReadFromMemory() && !I->mayWriteToMemory())
+ return nullptr;
+
bool Def, Use;
if (Template) {
Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr;
@@ -1850,6 +1872,7 @@ void MemorySSA::verifyMemorySSA() const {
verifyDomination(F);
verifyOrdering(F);
verifyDominationNumbers(F);
+ verifyPrevDefInPhis(F);
// Previously, the verification used to also verify that the clobberingAccess
// cached by MemorySSA is the same as the clobberingAccess found at a later
// query to AA. This does not hold true in general due to the current fragility
@@ -1862,6 +1885,40 @@ void MemorySSA::verifyMemorySSA() const {
// example, see test4 added in D51960.
}
+void MemorySSA::verifyPrevDefInPhis(Function &F) const {
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
+ for (const BasicBlock &BB : F) {
+ if (MemoryPhi *Phi = getMemoryAccess(&BB)) {
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+ auto *Pred = Phi->getIncomingBlock(I);
+ auto *IncAcc = Phi->getIncomingValue(I);
+ // If Pred has no unreachable predecessors, get last def looking at
+ // IDoms. If, while walkings IDoms, any of these has an unreachable
+ // predecessor, then the incoming def can be any access.
+ if (auto *DTNode = DT->getNode(Pred)) {
+ while (DTNode) {
+ if (auto *DefList = getBlockDefs(DTNode->getBlock())) {
+ auto *LastAcc = &*(--DefList->end());
+ assert(LastAcc == IncAcc &&
+ "Incorrect incoming access into phi.");
+ break;
+ }
+ DTNode = DTNode->getIDom();
+ }
+ } else {
+ // If Pred has unreachable predecessors, but has at least a Def, the
+ // incoming access can be the last Def in Pred, or it could have been
+ // optimized to LoE. After an update, though, the LoE may have been
+ // replaced by another access, so IncAcc may be any access.
+ // If Pred has unreachable predecessors and no Defs, incoming access
+ // should be LoE; However, after an update, it may be any access.
+ }
+ }
+ }
+ }
+#endif
+}
+
/// Verify that all of the blocks we believe to have valid domination numbers
/// actually have valid domination numbers.
void MemorySSA::verifyDominationNumbers(const Function &F) const {
@@ -2005,7 +2062,7 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
/// accesses and verifying that, for each use, it appears in the
/// appropriate def's use list
void MemorySSA::verifyDefUses(Function &F) const {
-#ifndef NDEBUG
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
for (BasicBlock &B : F) {
// Phi nodes are attached to basic blocks
if (MemoryPhi *Phi = getMemoryAccess(&B)) {
@@ -2212,7 +2269,7 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
- return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT));
+ return MemorySSAAnalysis::Result(std::make_unique<MemorySSA>(F, &AA, &DT));
}
bool MemorySSAAnalysis::Result::invalidate(
diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp
index 4c1feee7fd9a..f2d56b05d968 100644
--- a/lib/Analysis/MemorySSAUpdater.cpp
+++ b/lib/Analysis/MemorySSAUpdater.cpp
@@ -44,11 +44,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
// First, do a cache lookup. Without this cache, certain CFG structures
// (like a series of if statements) take exponential time to visit.
auto Cached = CachedPreviousDef.find(BB);
- if (Cached != CachedPreviousDef.end()) {
+ if (Cached != CachedPreviousDef.end())
return Cached->second;
- }
- if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ // If this method is called from an unreachable block, return LoE.
+ if (!MSSA->DT->isReachableFromEntry(BB))
+ return MSSA->getLiveOnEntryDef();
+
+ if (BasicBlock *Pred = BB->getUniquePredecessor()) {
+ VisitedBlocks.insert(BB);
// Single predecessor case, just recurse, we can only have one definition.
MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef);
CachedPreviousDef.insert({BB, Result});
@@ -71,11 +75,19 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
// Recurse to get the values in our predecessors for placement of a
// potential phi node. This will insert phi nodes if we cycle in order to
// break the cycle and have an operand.
- for (auto *Pred : predecessors(BB))
- if (MSSA->DT->isReachableFromEntry(Pred))
- PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
- else
+ bool UniqueIncomingAccess = true;
+ MemoryAccess *SingleAccess = nullptr;
+ for (auto *Pred : predecessors(BB)) {
+ if (MSSA->DT->isReachableFromEntry(Pred)) {
+ auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef);
+ if (!SingleAccess)
+ SingleAccess = IncomingAccess;
+ else if (IncomingAccess != SingleAccess)
+ UniqueIncomingAccess = false;
+ PhiOps.push_back(IncomingAccess);
+ } else
PhiOps.push_back(MSSA->getLiveOnEntryDef());
+ }
// Now try to simplify the ops to avoid placing a phi.
// This may return null if we never created a phi yet, that's okay
@@ -84,7 +96,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
// See if we can avoid the phi by simplifying it.
auto *Result = tryRemoveTrivialPhi(Phi, PhiOps);
// If we couldn't simplify, we may have to create a phi
- if (Result == Phi) {
+ if (Result == Phi && UniqueIncomingAccess && SingleAccess) {
+ // A concrete Phi only exists if we created an empty one to break a cycle.
+ if (Phi) {
+ assert(Phi->operands().empty() && "Expected empty Phi");
+ Phi->replaceAllUsesWith(SingleAccess);
+ removeMemoryAccess(Phi);
+ }
+ Result = SingleAccess;
+ } else if (Result == Phi && !(UniqueIncomingAccess && SingleAccess)) {
if (!Phi)
Phi = MSSA->createMemoryPhi(BB);
@@ -173,12 +193,9 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
TrackingVH<MemoryAccess> Res(Phi);
SmallVector<TrackingVH<Value>, 8> Uses;
std::copy(Phi->user_begin(), Phi->user_end(), std::back_inserter(Uses));
- for (auto &U : Uses) {
- if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U)) {
- auto OperRange = UsePhi->operands();
- tryRemoveTrivialPhi(UsePhi, OperRange);
- }
- }
+ for (auto &U : Uses)
+ if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U))
+ tryRemoveTrivialPhi(UsePhi);
return Res;
}
@@ -187,6 +204,11 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
// argument.
// IE phi(a, a) or b = phi(a, b) or c = phi(a, a, c)
// We recursively try to remove them.
+MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi) {
+ assert(Phi && "Can only remove concrete Phi.");
+ auto OperRange = Phi->operands();
+ return tryRemoveTrivialPhi(Phi, OperRange);
+}
template <class RangeType>
MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
RangeType &Operands) {
@@ -218,17 +240,49 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
return recursePhi(Same);
}
-void MemorySSAUpdater::insertUse(MemoryUse *MU) {
+void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) {
InsertedPHIs.clear();
MU->setDefiningAccess(getPreviousDef(MU));
- // Unlike for defs, there is no extra work to do. Because uses do not create
- // new may-defs, there are only two cases:
- //
+
+ // In cases without unreachable blocks, because uses do not create new
+ // may-defs, there are only two cases:
// 1. There was a def already below us, and therefore, we should not have
// created a phi node because it was already needed for the def.
//
// 2. There is no def below us, and therefore, there is no extra renaming work
// to do.
+
+ // In cases with unreachable blocks, where the unnecessary Phis were
+ // optimized out, adding the Use may re-insert those Phis. Hence, when
+ // inserting Uses outside of the MSSA creation process, and new Phis were
+ // added, rename all uses if we are asked.
+
+ if (!RenameUses && !InsertedPHIs.empty()) {
+ auto *Defs = MSSA->getBlockDefs(MU->getBlock());
+ (void)Defs;
+ assert((!Defs || (++Defs->begin() == Defs->end())) &&
+ "Block may have only a Phi or no defs");
+ }
+
+ if (RenameUses && InsertedPHIs.size()) {
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ BasicBlock *StartBlock = MU->getBlock();
+
+ if (auto *Defs = MSSA->getWritableBlockDefs(StartBlock)) {
+ MemoryAccess *FirstDef = &*Defs->begin();
+ // Convert to incoming value if it's a memorydef. A phi *is* already an
+ // incoming value.
+ if (auto *MD = dyn_cast<MemoryDef>(FirstDef))
+ FirstDef = MD->getDefiningAccess();
+
+ MSSA->renamePass(MU->getBlock(), FirstDef, Visited);
+ }
+ // We just inserted a phi into this block, so the incoming value will
+ // become the phi anyway, so it does not matter what we pass.
+ for (auto &MP : InsertedPHIs)
+ if (MemoryPhi *Phi = cast_or_null<MemoryPhi>(MP))
+ MSSA->renamePass(Phi->getBlock(), nullptr, Visited);
+ }
}
// Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef.
@@ -260,33 +314,35 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// See if we had a local def, and if not, go hunting.
MemoryAccess *DefBefore = getPreviousDef(MD);
- bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock();
+ bool DefBeforeSameBlock = false;
+ if (DefBefore->getBlock() == MD->getBlock() &&
+ !(isa<MemoryPhi>(DefBefore) &&
+ std::find(InsertedPHIs.begin(), InsertedPHIs.end(), DefBefore) !=
+ InsertedPHIs.end()))
+ DefBeforeSameBlock = true;
// There is a def before us, which means we can replace any store/phi uses
// of that thing with us, since we are in the way of whatever was there
// before.
// We now define that def's memorydefs and memoryphis
if (DefBeforeSameBlock) {
- for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end();
- UI != UE;) {
- Use &U = *UI++;
+ DefBefore->replaceUsesWithIf(MD, [MD](Use &U) {
// Leave the MemoryUses alone.
// Also make sure we skip ourselves to avoid self references.
- if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD)
- continue;
+ User *Usr = U.getUser();
+ return !isa<MemoryUse>(Usr) && Usr != MD;
// Defs are automatically unoptimized when the user is set to MD below,
// because the isOptimized() call will fail to find the same ID.
- U.set(MD);
- }
+ });
}
// and that def is now our defining access.
MD->setDefiningAccess(DefBefore);
- // Remember the index where we may insert new phis below.
- unsigned NewPhiIndex = InsertedPHIs.size();
-
SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
+
+ // Remember the index where we may insert new phis.
+ unsigned NewPhiIndex = InsertedPHIs.size();
if (!DefBeforeSameBlock) {
// If there was a local def before us, we must have the same effect it
// did. Because every may-def is the same, any phis/etc we would create, it
@@ -302,46 +358,54 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// If this is the first def in the block and this insert is in an arbitrary
// place, compute IDF and place phis.
+ SmallPtrSet<BasicBlock *, 2> DefiningBlocks;
+
+ // If this is the last Def in the block, also compute IDF based on MD, since
+ // this may a new Def added, and we may need additional Phis.
auto Iter = MD->getDefsIterator();
++Iter;
auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end();
- if (Iter == IterEnd) {
- ForwardIDFCalculator IDFs(*MSSA->DT);
- SmallVector<BasicBlock *, 32> IDFBlocks;
- SmallPtrSet<BasicBlock *, 2> DefiningBlocks;
+ if (Iter == IterEnd)
DefiningBlocks.insert(MD->getBlock());
- IDFs.setDefiningBlocks(DefiningBlocks);
- IDFs.calculate(IDFBlocks);
- SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs;
- for (auto *BBIDF : IDFBlocks)
- if (!MSSA->getMemoryAccess(BBIDF)) {
- auto *MPhi = MSSA->createMemoryPhi(BBIDF);
- NewInsertedPHIs.push_back(MPhi);
- // Add the phis created into the IDF blocks to NonOptPhis, so they are
- // not optimized out as trivial by the call to getPreviousDefFromEnd
- // below. Once they are complete, all these Phis are added to the
- // FixupList, and removed from NonOptPhis inside fixupDefs().
- NonOptPhis.insert(MPhi);
- }
- for (auto &MPhi : NewInsertedPHIs) {
- auto *BBIDF = MPhi->getBlock();
- for (auto *Pred : predecessors(BBIDF)) {
- DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
- MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef),
- Pred);
- }
+ for (const auto &VH : InsertedPHIs)
+ if (const auto *RealPHI = cast_or_null<MemoryPhi>(VH))
+ DefiningBlocks.insert(RealPHI->getBlock());
+ ForwardIDFCalculator IDFs(*MSSA->DT);
+ SmallVector<BasicBlock *, 32> IDFBlocks;
+ IDFs.setDefiningBlocks(DefiningBlocks);
+ IDFs.calculate(IDFBlocks);
+ SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs;
+ for (auto *BBIDF : IDFBlocks) {
+ auto *MPhi = MSSA->getMemoryAccess(BBIDF);
+ if (!MPhi) {
+ MPhi = MSSA->createMemoryPhi(BBIDF);
+ NewInsertedPHIs.push_back(MPhi);
}
-
- // Re-take the index where we're adding the new phis, because the above
- // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs.
- NewPhiIndex = InsertedPHIs.size();
- for (auto &MPhi : NewInsertedPHIs) {
- InsertedPHIs.push_back(&*MPhi);
- FixupList.push_back(&*MPhi);
+ // Add the phis created into the IDF blocks to NonOptPhis, so they are not
+ // optimized out as trivial by the call to getPreviousDefFromEnd below.
+ // Once they are complete, all these Phis are added to the FixupList, and
+ // removed from NonOptPhis inside fixupDefs(). Existing Phis in IDF may
+ // need fixing as well, and potentially be trivial before this insertion,
+ // hence add all IDF Phis. See PR43044.
+ NonOptPhis.insert(MPhi);
+ }
+ for (auto &MPhi : NewInsertedPHIs) {
+ auto *BBIDF = MPhi->getBlock();
+ for (auto *Pred : predecessors(BBIDF)) {
+ DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
+ MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), Pred);
}
}
+ // Re-take the index where we're adding the new phis, because the above call
+ // to getPreviousDefFromEnd, may have inserted into InsertedPHIs.
+ NewPhiIndex = InsertedPHIs.size();
+ for (auto &MPhi : NewInsertedPHIs) {
+ InsertedPHIs.push_back(&*MPhi);
+ FixupList.push_back(&*MPhi);
+ }
+
FixupList.push_back(MD);
}
@@ -458,8 +522,7 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) {
if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
MPhi->unorderedDeleteIncomingBlock(From);
- if (MPhi->getNumIncomingValues() == 1)
- removeMemoryAccess(MPhi);
+ tryRemoveTrivialPhi(MPhi);
}
}
@@ -475,34 +538,51 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From,
Found = true;
return false;
});
- if (MPhi->getNumIncomingValues() == 1)
- removeMemoryAccess(MPhi);
+ tryRemoveTrivialPhi(MPhi);
+ }
+}
+
+static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA,
+ const ValueToValueMapTy &VMap,
+ PhiToDefMap &MPhiMap,
+ bool CloneWasSimplified,
+ MemorySSA *MSSA) {
+ MemoryAccess *InsnDefining = MA;
+ if (MemoryDef *DefMUD = dyn_cast<MemoryDef>(InsnDefining)) {
+ if (!MSSA->isLiveOnEntryDef(DefMUD)) {
+ Instruction *DefMUDI = DefMUD->getMemoryInst();
+ assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
+ if (Instruction *NewDefMUDI =
+ cast_or_null<Instruction>(VMap.lookup(DefMUDI))) {
+ InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
+ if (!CloneWasSimplified)
+ assert(InsnDefining && "Defining instruction cannot be nullptr.");
+ else if (!InsnDefining || isa<MemoryUse>(InsnDefining)) {
+ // The clone was simplified, it's no longer a MemoryDef, look up.
+ auto DefIt = DefMUD->getDefsIterator();
+ // Since simplified clones only occur in single block cloning, a
+ // previous definition must exist, otherwise NewDefMUDI would not
+ // have been found in VMap.
+ assert(DefIt != MSSA->getBlockDefs(DefMUD->getBlock())->begin() &&
+ "Previous def must exist");
+ InsnDefining = getNewDefiningAccessForClone(
+ &*(--DefIt), VMap, MPhiMap, CloneWasSimplified, MSSA);
+ }
+ }
+ }
+ } else {
+ MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
+ if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi))
+ InsnDefining = NewDefPhi;
}
+ assert(InsnDefining && "Defining instruction cannot be nullptr.");
+ return InsnDefining;
}
void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
const ValueToValueMapTy &VMap,
PhiToDefMap &MPhiMap,
bool CloneWasSimplified) {
- auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * {
- MemoryAccess *InsnDefining = MA;
- if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) {
- if (!MSSA->isLiveOnEntryDef(DefMUD)) {
- Instruction *DefMUDI = DefMUD->getMemoryInst();
- assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
- if (Instruction *NewDefMUDI =
- cast_or_null<Instruction>(VMap.lookup(DefMUDI)))
- InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
- }
- } else {
- MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
- if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi))
- InsnDefining = NewDefPhi;
- }
- assert(InsnDefining && "Defining instruction cannot be nullptr.");
- return InsnDefining;
- };
-
const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
if (!Acc)
return;
@@ -519,9 +599,13 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
if (Instruction *NewInsn =
dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) {
MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
- NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()),
- CloneWasSimplified ? nullptr : MUD);
- MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
+ NewInsn,
+ getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap,
+ MPhiMap, CloneWasSimplified, MSSA),
+ /*Template=*/CloneWasSimplified ? nullptr : MUD,
+ /*CreationMustSucceed=*/CloneWasSimplified ? false : true);
+ if (NewUseOrDef)
+ MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
}
}
}
@@ -563,8 +647,7 @@ void MemorySSAUpdater::updatePhisWhenInsertingUniqueBackedgeBlock(
// If NewMPhi is a trivial phi, remove it. Its use in the header MPhi will be
// replaced with the unique value.
- if (HasUniqueIncomingValue)
- removeMemoryAccess(NewMPhi);
+ tryRemoveTrivialPhi(NewMPhi);
}
void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
@@ -770,6 +853,9 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
} else {
// Single predecessor, BB cannot be dead. GetLastDef of Pred.
assert(Count == 1 && Pred && "Single predecessor expected.");
+ // BB can be unreachable though, return LoE if that is the case.
+ if (!DT.getNode(BB))
+ return MSSA->getLiveOnEntryDef();
BB = Pred;
}
};
@@ -1010,7 +1096,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
for (; UI != E;) {
Use &U = *UI;
++UI;
- MemoryAccess *Usr = dyn_cast<MemoryAccess>(U.getUser());
+ MemoryAccess *Usr = cast<MemoryAccess>(U.getUser());
if (MemoryPhi *UsrPhi = dyn_cast<MemoryPhi>(Usr)) {
BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U);
if (!DT.dominates(DominatingBlock, DominatedBlock))
@@ -1052,9 +1138,9 @@ void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
// Now reinsert it into the IR and do whatever fixups needed.
if (auto *MD = dyn_cast<MemoryDef>(What))
- insertDef(MD);
+ insertDef(MD, /*RenameUses=*/true);
else
- insertUse(cast<MemoryUse>(What));
+ insertUse(cast<MemoryUse>(What), /*RenameUses=*/true);
// Clear dangling pointers. We added all MemoryPhi users, but not all
// of them are removed by fixupDefs().
@@ -1084,25 +1170,32 @@ void MemorySSAUpdater::moveAllAccesses(BasicBlock *From, BasicBlock *To,
if (!Accs)
return;
+ assert(Start->getParent() == To && "Incorrect Start instruction");
MemoryAccess *FirstInNew = nullptr;
for (Instruction &I : make_range(Start->getIterator(), To->end()))
if ((FirstInNew = MSSA->getMemoryAccess(&I)))
break;
- if (!FirstInNew)
- return;
+ if (FirstInNew) {
+ auto *MUD = cast<MemoryUseOrDef>(FirstInNew);
+ do {
+ auto NextIt = ++MUD->getIterator();
+ MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end())
+ ? nullptr
+ : cast<MemoryUseOrDef>(&*NextIt);
+ MSSA->moveTo(MUD, To, MemorySSA::End);
+ // Moving MUD from Accs in the moveTo above, may delete Accs, so we need
+ // to retrieve it again.
+ Accs = MSSA->getWritableBlockAccesses(From);
+ MUD = NextMUD;
+ } while (MUD);
+ }
- auto *MUD = cast<MemoryUseOrDef>(FirstInNew);
- do {
- auto NextIt = ++MUD->getIterator();
- MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end())
- ? nullptr
- : cast<MemoryUseOrDef>(&*NextIt);
- MSSA->moveTo(MUD, To, MemorySSA::End);
- // Moving MUD from Accs in the moveTo above, may delete Accs, so we need to
- // retrieve it again.
- Accs = MSSA->getWritableBlockAccesses(From);
- MUD = NextMUD;
- } while (MUD);
+ // If all accesses were moved and only a trivial Phi remains, we try to remove
+ // that Phi. This is needed when From is going to be deleted.
+ auto *Defs = MSSA->getWritableBlockDefs(From);
+ if (Defs && !Defs->empty())
+ if (auto *Phi = dyn_cast<MemoryPhi>(&*Defs->begin()))
+ tryRemoveTrivialPhi(Phi);
}
void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From,
@@ -1118,7 +1211,7 @@ void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From,
void MemorySSAUpdater::moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To,
Instruction *Start) {
- assert(From->getSinglePredecessor() == To &&
+ assert(From->getUniquePredecessor() == To &&
"From block is expected to have a single predecessor (To).");
moveAllAccesses(From, To, Start);
for (BasicBlock *Succ : successors(From))
@@ -1173,8 +1266,7 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
return false;
});
Phi->addIncoming(NewPhi, New);
- if (onlySingleValue(NewPhi))
- removeMemoryAccess(NewPhi);
+ tryRemoveTrivialPhi(NewPhi);
}
}
@@ -1239,10 +1331,8 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) {
unsigned PhisSize = PhisToOptimize.size();
while (PhisSize-- > 0)
if (MemoryPhi *MP =
- cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val())) {
- auto OperRange = MP->operands();
- tryRemoveTrivialPhi(MP, OperRange);
- }
+ cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val()))
+ tryRemoveTrivialPhi(MP);
}
}
@@ -1256,8 +1346,7 @@ void MemorySSAUpdater::removeBlocks(
if (!DeadBlocks.count(Succ))
if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) {
MP->unorderedDeleteIncomingBlock(BB);
- if (MP->getNumIncomingValues() == 1)
- removeMemoryAccess(MP);
+ tryRemoveTrivialPhi(MP);
}
// Drop all references of all accesses in BB
if (MemorySSA::AccessList *Acc = MSSA->getWritableBlockAccesses(BB))
@@ -1281,10 +1370,8 @@ void MemorySSAUpdater::removeBlocks(
void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) {
for (auto &VH : UpdatedPHIs)
- if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) {
- auto OperRange = MPhi->operands();
- tryRemoveTrivialPhi(MPhi, OperRange);
- }
+ if (auto *MPhi = cast_or_null<MemoryPhi>(VH))
+ tryRemoveTrivialPhi(MPhi);
}
void MemorySSAUpdater::changeToUnreachable(const Instruction *I) {
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index e25eb290a665..8232bf07cafc 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -319,7 +319,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
auto *CalledValue = CS.getCalledValue();
auto *CalledFunction = CS.getCalledFunction();
if (CalledValue && !CalledFunction) {
- CalledValue = CalledValue->stripPointerCastsNoFollowAliases();
+ CalledValue = CalledValue->stripPointerCasts();
// Stripping pointer casts can reveal a called function.
CalledFunction = dyn_cast<Function>(CalledValue);
}
@@ -467,7 +467,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// FIXME: refactor this to use the same code that inliner is using.
// Don't try to import functions with noinline attribute.
F.getAttributes().hasFnAttribute(Attribute::NoInline)};
- auto FuncSummary = llvm::make_unique<FunctionSummary>(
+ auto FuncSummary = std::make_unique<FunctionSummary>(
Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
CallGraphEdges.takeVector(), TypeTests.takeVector(),
TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
@@ -598,7 +598,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index,
!V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
!V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass();
GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized);
- auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
+ auto GVarSummary = std::make_unique<GlobalVarSummary>(Flags, VarFlags,
RefEdges.takeVector());
if (NonRenamableLocal)
CantBePromoted.insert(V.getGUID());
@@ -616,7 +616,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
/* Live = */ false, A.isDSOLocal(),
A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
- auto AS = llvm::make_unique<AliasSummary>(Flags);
+ auto AS = std::make_unique<AliasSummary>(Flags);
auto *Aliasee = A.getBaseObject();
auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
assert(AliaseeVI && "Alias expects aliasee summary to be available");
@@ -696,7 +696,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
// Create the appropriate summary type.
if (Function *F = dyn_cast<Function>(GV)) {
std::unique_ptr<FunctionSummary> Summary =
- llvm::make_unique<FunctionSummary>(
+ std::make_unique<FunctionSummary>(
GVFlags, /*InstCount=*/0,
FunctionSummary::FFlags{
F->hasFnAttribute(Attribute::ReadNone),
@@ -714,7 +714,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
Index.addGlobalValueSummary(*GV, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
- llvm::make_unique<GlobalVarSummary>(
+ std::make_unique<GlobalVarSummary>(
GVFlags, GlobalVarSummary::GVarFlags(false, false),
ArrayRef<ValueInfo>{});
Index.addGlobalValueSummary(*GV, std::move(Summary));
@@ -741,7 +741,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
else if (F.hasProfileData()) {
LoopInfo LI{DT};
BranchProbabilityInfo BPI{F, LI};
- BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI);
+ BFIPtr = std::make_unique<BlockFrequencyInfo>(F, BPI, LI);
BFI = BFIPtr.get();
}
@@ -813,7 +813,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
if (!ModuleSummaryDotFile.empty()) {
std::error_code EC;
- raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_None);
if (EC)
report_fatal_error(Twine("Failed to open dot file ") +
ModuleSummaryDotFile + ": " + EC.message() + "\n");
diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp
index b616cd6f762b..44527773115d 100644
--- a/lib/Analysis/MustExecute.cpp
+++ b/lib/Analysis/MustExecute.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/MustExecute.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
@@ -19,8 +21,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
+#define DEBUG_TYPE "must-execute"
+
const DenseMap<BasicBlock *, ColorVector> &
LoopSafetyInfo::getBlockColors() const {
return BlockColors;
@@ -306,6 +311,17 @@ namespace {
}
bool runOnFunction(Function &F) override;
};
+ struct MustBeExecutedContextPrinter : public ModulePass {
+ static char ID;
+
+ MustBeExecutedContextPrinter() : ModulePass(ID) {
+ initializeMustBeExecutedContextPrinterPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ bool runOnModule(Module &M) override;
+ };
}
char MustExecutePrinter::ID = 0;
@@ -320,6 +336,36 @@ FunctionPass *llvm::createMustExecutePrinter() {
return new MustExecutePrinter();
}
+char MustBeExecutedContextPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ MustBeExecutedContextPrinter, "print-must-be-executed-contexts",
+ "print the must-be-executed-contexed for all instructions", false, true)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(MustBeExecutedContextPrinter,
+ "print-must-be-executed-contexts",
+ "print the must-be-executed-contexed for all instructions",
+ false, true)
+
+ModulePass *llvm::createMustBeExecutedContextPrinter() {
+ return new MustBeExecutedContextPrinter();
+}
+
+bool MustBeExecutedContextPrinter::runOnModule(Module &M) {
+ MustBeExecutedContextExplorer Explorer(true);
+ for (Function &F : M) {
+ for (Instruction &I : instructions(F)) {
+ dbgs() << "-- Explore context of: " << I << "\n";
+ for (const Instruction *CI : Explorer.range(&I))
+ dbgs() << " [F: " << CI->getFunction()->getName() << "] " << *CI
+ << "\n";
+ }
+ }
+
+ return false;
+}
+
static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) {
// TODO: merge these two routines. For the moment, we display the best
// result obtained by *either* implementation. This is a bit unfair since no
@@ -396,3 +442,75 @@ bool MustExecutePrinter::runOnFunction(Function &F) {
return false;
}
+
+const Instruction *
+MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction(
+ MustBeExecutedIterator &It, const Instruction *PP) {
+ if (!PP)
+ return PP;
+ LLVM_DEBUG(dbgs() << "Find next instruction for " << *PP << "\n");
+
+ // If we explore only inside a given basic block we stop at terminators.
+ if (!ExploreInterBlock && PP->isTerminator()) {
+ LLVM_DEBUG(dbgs() << "\tReached terminator in intra-block mode, done\n");
+ return nullptr;
+ }
+
+ // If we do not traverse the call graph we check if we can make progress in
+ // the current function. First, check if the instruction is guaranteed to
+ // transfer execution to the successor.
+ bool TransfersExecution = isGuaranteedToTransferExecutionToSuccessor(PP);
+ if (!TransfersExecution)
+ return nullptr;
+
+ // If this is not a terminator we know that there is a single instruction
+ // after this one that is executed next if control is transfered. If not,
+ // we can try to go back to a call site we entered earlier. If none exists, we
+ // do not know any instruction that has to be executd next.
+ if (!PP->isTerminator()) {
+ const Instruction *NextPP = PP->getNextNode();
+ LLVM_DEBUG(dbgs() << "\tIntermediate instruction does transfer control\n");
+ return NextPP;
+ }
+
+ // Finally, we have to handle terminators, trivial ones first.
+ assert(PP->isTerminator() && "Expected a terminator!");
+
+ // A terminator without a successor is not handled yet.
+ if (PP->getNumSuccessors() == 0) {
+ LLVM_DEBUG(dbgs() << "\tUnhandled terminator\n");
+ return nullptr;
+ }
+
+ // A terminator with a single successor, we will continue at the beginning of
+ // that one.
+ if (PP->getNumSuccessors() == 1) {
+ LLVM_DEBUG(
+ dbgs() << "\tUnconditional terminator, continue with successor\n");
+ return &PP->getSuccessor(0)->front();
+ }
+
+ LLVM_DEBUG(dbgs() << "\tNo join point found\n");
+ return nullptr;
+}
+
+MustBeExecutedIterator::MustBeExecutedIterator(
+ MustBeExecutedContextExplorer &Explorer, const Instruction *I)
+ : Explorer(Explorer), CurInst(I) {
+ reset(I);
+}
+
+void MustBeExecutedIterator::reset(const Instruction *I) {
+ CurInst = I;
+ Visited.clear();
+ Visited.insert(I);
+}
+
+const Instruction *MustBeExecutedIterator::advance() {
+ assert(CurInst && "Cannot advance an end iterator!");
+ const Instruction *Next =
+ Explorer.getMustBeExecutedNextInstruction(*this, CurInst);
+ if (Next && !Visited.insert(Next).second)
+ Next = nullptr;
+ return Next;
+}
diff --git a/lib/Analysis/OptimizationRemarkEmitter.cpp b/lib/Analysis/OptimizationRemarkEmitter.cpp
index 72c40a0be232..07a5619a35b9 100644
--- a/lib/Analysis/OptimizationRemarkEmitter.cpp
+++ b/lib/Analysis/OptimizationRemarkEmitter.cpp
@@ -39,7 +39,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F)
BPI.calculate(*F, LI);
// Finally compute BFI.
- OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
+ OwnedBFI = std::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
BFI = OwnedBFI.get();
}
@@ -97,7 +97,7 @@ bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) {
else
BFI = nullptr;
- ORE = llvm::make_unique<OptimizationRemarkEmitter>(&Fn, BFI);
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn, BFI);
return false;
}
diff --git a/lib/Analysis/OrderedInstructions.cpp b/lib/Analysis/OrderedInstructions.cpp
index 458c0a7de6c2..e947e5e388a8 100644
--- a/lib/Analysis/OrderedInstructions.cpp
+++ b/lib/Analysis/OrderedInstructions.cpp
@@ -21,7 +21,7 @@ bool OrderedInstructions::localDominates(const Instruction *InstA,
const BasicBlock *IBB = InstA->getParent();
auto OBB = OBBMap.find(IBB);
if (OBB == OBBMap.end())
- OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
+ OBB = OBBMap.insert({IBB, std::make_unique<OrderedBasicBlock>(IBB)}).first;
return OBB->second->dominates(InstA, InstB);
}
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index dce19d6d546e..b99b75715025 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -45,6 +45,13 @@ static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
" blocks required to reach the -profile-summary-cutoff-hot"
" percentile exceeds this count."));
+static cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
+ "profile-summary-large-working-set-size-threshold", cl::Hidden,
+ cl::init(12500), cl::ZeroOrMore,
+ cl::desc("The code working set size is considered large if the number of"
+ " blocks required to reach the -profile-summary-cutoff-hot"
+ " percentile exceeds this count."));
+
// The next two options override the counts derived from summary computation and
// are useful for debugging purposes.
static cl::opt<int> ProfileSummaryHotCount(
@@ -186,6 +193,31 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
return true;
}
+// Like isFunctionHotInCallGraph but for a given cutoff.
+bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile(
+ int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) {
+ if (!F || !computeSummary())
+ return false;
+ if (auto FunctionCount = F->getEntryCount())
+ if (isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount()))
+ return true;
+
+ if (hasSampleProfile()) {
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(&I, nullptr))
+ TotalCallCount += CallCount.getValue();
+ if (isHotCountNthPercentile(PercentileCutoff, TotalCallCount))
+ return true;
+ }
+ for (const auto &BB : *F)
+ if (isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI))
+ return true;
+ return false;
+}
+
/// Returns true if the function's entry is a cold. If it returns false, it
/// either means it is not cold or it is unknown whether it is cold or not (for
/// example, no profile data is available).
@@ -222,6 +254,23 @@ void ProfileSummaryInfo::computeThresholds() {
"Cold count threshold cannot exceed hot count threshold!");
HasHugeWorkingSetSize =
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
+ HasLargeWorkingSetSize =
+ HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
+}
+
+Optional<uint64_t> ProfileSummaryInfo::computeThreshold(int PercentileCutoff) {
+ if (!computeSummary())
+ return None;
+ auto iter = ThresholdCache.find(PercentileCutoff);
+ if (iter != ThresholdCache.end()) {
+ return iter->second;
+ }
+ auto &DetailedSummary = Summary->getDetailedSummary();
+ auto &Entry =
+ getEntryForPercentile(DetailedSummary, PercentileCutoff);
+ uint64_t CountThreshold = Entry.MinCount;
+ ThresholdCache[PercentileCutoff] = CountThreshold;
+ return CountThreshold;
}
bool ProfileSummaryInfo::hasHugeWorkingSetSize() {
@@ -230,6 +279,12 @@ bool ProfileSummaryInfo::hasHugeWorkingSetSize() {
return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue();
}
+bool ProfileSummaryInfo::hasLargeWorkingSetSize() {
+ if (!HasLargeWorkingSetSize)
+ computeThresholds();
+ return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue();
+}
+
bool ProfileSummaryInfo::isHotCount(uint64_t C) {
if (!HotCountThreshold)
computeThresholds();
@@ -242,6 +297,11 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
return ColdCountThreshold && C <= ColdCountThreshold.getValue();
}
+bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, uint64_t C) {
+ auto CountThreshold = computeThreshold(PercentileCutoff);
+ return CountThreshold && C >= CountThreshold.getValue();
+}
+
uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() {
if (!HotCountThreshold)
computeThresholds();
@@ -265,6 +325,13 @@ bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
return Count && isColdCount(*Count);
}
+bool ProfileSummaryInfo::isHotBlockNthPercentile(int PercentileCutoff,
+ const BasicBlock *BB,
+ BlockFrequencyInfo *BFI) {
+ auto Count = BFI->getBlockProfileCount(BB);
+ return Count && isHotCountNthPercentile(PercentileCutoff, *Count);
+}
+
bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
BlockFrequencyInfo *BFI) {
auto C = getProfileCount(CS.getInstruction(), BFI);
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index bc2cfd6fcc42..5ce0a1adeaa0 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -148,6 +148,7 @@ STATISTIC(NumBruteForceTripCountsComputed,
static cl::opt<unsigned>
MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+ cl::ZeroOrMore,
cl::desc("Maximum number of iterations SCEV will "
"symbolically execute a constant "
"derived loop"),
@@ -157,6 +158,9 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
static cl::opt<bool> VerifySCEV(
"verify-scev", cl::Hidden,
cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
+static cl::opt<bool> VerifySCEVStrict(
+ "verify-scev-strict", cl::Hidden,
+ cl::desc("Enable stricter verification with -verify-scev is passed"));
static cl::opt<bool>
VerifySCEVMap("verify-scev-maps", cl::Hidden,
cl::desc("Verify no dangling value in ScalarEvolution's "
@@ -1707,7 +1711,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// in infinite recursion. In the later case, the analysis code will
// cope with a conservative value, and it will take care to purge
// that value once it has finished.
- const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+ const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
// Manually compute the final value for AR, checking for
// overflow.
@@ -2051,7 +2055,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// in infinite recursion. In the later case, the analysis code will
// cope with a conservative value, and it will take care to purge
// that value once it has finished.
- const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+ const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
// Manually compute the final value for AR, checking for
// overflow.
@@ -3421,7 +3425,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
}
- // It's tempting to want to call getMaxBackedgeTakenCount count here and
+ // It's tempting to want to call getConstantMaxBackedgeTakenCount count here and
// use that information to infer NUW and NSW flags. However, computing a
// BE count requires calling getAddRecExpr, so we may not yet have a
// meaningful BE count at this point (and if we don't, we'd be stuck
@@ -4991,7 +4995,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
// overflow.
if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
- (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags);
+ (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
return PHISCEV;
}
@@ -5596,6 +5600,22 @@ ScalarEvolution::getRangeRef(const SCEV *S,
ConservativeResult.intersectWith(X, RangeType));
}
+ if (const SCEVSMinExpr *SMin = dyn_cast<SCEVSMinExpr>(S)) {
+ ConstantRange X = getRangeRef(SMin->getOperand(0), SignHint);
+ for (unsigned i = 1, e = SMin->getNumOperands(); i != e; ++i)
+ X = X.smin(getRangeRef(SMin->getOperand(i), SignHint));
+ return setRange(SMin, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
+ }
+
+ if (const SCEVUMinExpr *UMin = dyn_cast<SCEVUMinExpr>(S)) {
+ ConstantRange X = getRangeRef(UMin->getOperand(0), SignHint);
+ for (unsigned i = 1, e = UMin->getNumOperands(); i != e; ++i)
+ X = X.umin(getRangeRef(UMin->getOperand(i), SignHint));
+ return setRange(UMin, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
+ }
+
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
@@ -5654,7 +5674,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
// TODO: non-affine addrec
if (AddRec->isAffine()) {
- const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+ const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
auto RangeFromAffine = getRangeForAffineAR(
@@ -6523,7 +6543,7 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L,
unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
const auto *MaxExitCount =
- dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L));
+ dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
return getConstantTripCount(MaxExitCount);
}
@@ -6599,7 +6619,7 @@ const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
/// Similar to getBackedgeTakenCount, except return the least SCEV value that is
/// known never to be less than the actual backedge taken count.
-const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+const SCEV *ScalarEvolution::getConstantMaxBackedgeTakenCount(const Loop *L) {
return getBackedgeTakenInfo(L).getMax(this);
}
@@ -9833,6 +9853,10 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
// We avoid subtracting expressions here because this function is usually
// fairly deep in the call stack (i.e. is called many times).
+ // X - X = 0.
+ if (More == Less)
+ return APInt(getTypeSizeInBits(More->getType()), 0);
+
if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
const auto *LAR = cast<SCEVAddRecExpr>(Less);
const auto *MAR = cast<SCEVAddRecExpr>(More);
@@ -10314,10 +10338,43 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
return false;
}
+static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // zext x u<= sext x, sext x s<= zext x
+ switch (Pred) {
+ case ICmpInst::ICMP_SGE:
+ std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_SLE: {
+ // If operand >=s 0 then ZExt == SExt. If operand <s 0 then SExt <s ZExt.
+ const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(LHS);
+ const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(RHS);
+ if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
+ return true;
+ break;
+ }
+ case ICmpInst::ICMP_UGE:
+ std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_ULE: {
+ // If operand >=s 0 then ZExt == SExt. If operand <s 0 then ZExt <u SExt.
+ const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS);
+ const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(RHS);
+ if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
+ return true;
+ break;
+ }
+ default:
+ break;
+ };
+ return false;
+}
+
bool
ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
- return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
+ return isKnownPredicateExtendIdiom(Pred, LHS, RHS) ||
+ isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
@@ -11434,8 +11491,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
- if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
- OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+ if (!isa<SCEVCouldNotCompute>(SE->getConstantMaxBackedgeTakenCount(L))) {
+ OS << "max backedge-taken count is " << *SE->getConstantMaxBackedgeTakenCount(L);
if (SE->isBackedgeTakenCountMaxOrZero(L))
OS << ", actual taken count either this or zero.";
} else {
@@ -11901,14 +11958,14 @@ void ScalarEvolution::verify() const {
SE.getTypeSizeInBits(NewBECount->getType()))
CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());
- auto *ConstantDelta =
- dyn_cast<SCEVConstant>(SE2.getMinusSCEV(CurBECount, NewBECount));
+ const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount);
- if (ConstantDelta && ConstantDelta->getAPInt() != 0) {
- dbgs() << "Trip Count Changed!\n";
+ // Unless VerifySCEVStrict is set, we only compare constant deltas.
+ if ((VerifySCEVStrict || isa<SCEVConstant>(Delta)) && !Delta->isZero()) {
+ dbgs() << "Trip Count for " << *L << " Changed!\n";
dbgs() << "Old: " << *CurBECount << "\n";
dbgs() << "New: " << *NewBECount << "\n";
- dbgs() << "Delta: " << *ConstantDelta << "\n";
+ dbgs() << "Delta: " << *Delta << "\n";
std::abort();
}
}
@@ -11959,7 +12016,7 @@ ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
SE.reset(new ScalarEvolution(
- F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index e8a95d35482c..bceec921188e 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -240,9 +240,6 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
/// division. If so, update S with Factor divided out and return true.
/// S need not be evenly divisible if a reasonable remainder can be
/// computed.
-/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
-/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
-/// check to see if the divide was folded.
static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
const SCEV *Factor, ScalarEvolution &SE,
const DataLayout &DL) {
@@ -1486,7 +1483,18 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
}
Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
- if (!CanonicalMode) return expandAddRecExprLiterally(S);
+ // In canonical mode we compute the addrec as an expression of a canonical IV
+ // using evaluateAtIteration and expand the resulting SCEV expression. This
+ // way we avoid introducing new IVs to carry on the comutation of the addrec
+ // throughout the loop.
+ //
+ // For nested addrecs evaluateAtIteration might need a canonical IV of a
+ // type wider than the addrec itself. Emitting a canonical IV of the
+ // proper type might produce non-legal types, for example expanding an i64
+ // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
+ // back to non-canonical mode for nested addrecs.
+ if (!CanonicalMode || (S->getNumOperands() > 2))
+ return expandAddRecExprLiterally(S);
Type *Ty = SE.getEffectiveSCEVType(S->getType());
const Loop *L = S->getLoop();
@@ -2094,11 +2102,10 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
for (BasicBlock *BB : ExitingBlocks) {
ICmpInst::Predicate Pred;
Instruction *LHS, *RHS;
- BasicBlock *TrueBB, *FalseBB;
if (!match(BB->getTerminator(),
m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
- TrueBB, FalseBB)))
+ m_BasicBlock(), m_BasicBlock())))
continue;
if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
diff --git a/lib/Analysis/StackSafetyAnalysis.cpp b/lib/Analysis/StackSafetyAnalysis.cpp
index 4cf235db86eb..1b3638698950 100644
--- a/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/lib/Analysis/StackSafetyAnalysis.cpp
@@ -333,8 +333,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) {
// FIXME: consult devirt?
// Do not follow aliases, otherwise we could inadvertently follow
// dso_preemptable aliases or aliases with interposable linkage.
- const GlobalValue *Callee = dyn_cast<GlobalValue>(
- CS.getCalledValue()->stripPointerCastsNoFollowAliases());
+ const GlobalValue *Callee =
+ dyn_cast<GlobalValue>(CS.getCalledValue()->stripPointerCasts());
if (!Callee) {
US.updateRange(UnknownRange);
return false;
diff --git a/lib/Analysis/SyncDependenceAnalysis.cpp b/lib/Analysis/SyncDependenceAnalysis.cpp
index 3cf248a31142..8447dc87069d 100644
--- a/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -218,9 +218,11 @@ struct DivergencePropagator {
template <typename SuccessorIterable>
std::unique_ptr<ConstBlockSet>
computeJoinPoints(const BasicBlock &RootBlock,
- SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
+ SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
assert(JoinBlocks);
+ LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints. Parent loop: " << (ParentLoop ? ParentLoop->getName() : "<null>") << "\n" );
+
// bootstrap with branch targets
for (const auto *SuccBlock : NodeSuccessors) {
DefMap.emplace(SuccBlock, SuccBlock);
@@ -228,13 +230,19 @@ struct DivergencePropagator {
if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
// immediate loop exit from node.
ReachedLoopExits.insert(SuccBlock);
- continue;
} else {
// regular successor
PendingUpdates.insert(SuccBlock);
}
}
+ LLVM_DEBUG(
+ dbgs() << "SDA: rpo order:\n";
+ for (const auto * RpoBlock : FuncRPOT) {
+ dbgs() << "- " << RpoBlock->getName() << "\n";
+ }
+ );
+
auto ItBeginRPO = FuncRPOT.begin();
// skip until term (TODO RPOT won't let us start at @term directly)
@@ -245,16 +253,18 @@ struct DivergencePropagator {
// propagate definitions at the immediate successors of the node in RPO
auto ItBlockRPO = ItBeginRPO;
- while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
+ while ((++ItBlockRPO != ItEndRPO) &&
+ !PendingUpdates.empty()) {
const auto *Block = *ItBlockRPO;
+ LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
- // skip @block if not pending update
+ // skip Block if not pending update
auto ItPending = PendingUpdates.find(Block);
if (ItPending == PendingUpdates.end())
continue;
PendingUpdates.erase(ItPending);
- // propagate definition at @block to its successors
+ // propagate definition at Block to its successors
auto ItDef = DefMap.find(Block);
const auto *DefBlock = ItDef->second;
assert(DefBlock);
@@ -278,6 +288,8 @@ struct DivergencePropagator {
}
}
+ LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
+
// We need to know the definition at the parent loop header to decide
// whether the definition at the header is different from the definition at
// the loop exits, which would indicate a divergent loop exits.
@@ -292,24 +304,17 @@ struct DivergencePropagator {
// |
// proper exit from both loops
//
- // D post-dominates B as it is the only proper exit from the "A loop".
- // If C has a divergent branch, propagation will therefore stop at D.
- // That implies that B will never receive a definition.
- // But that definition can only be the same as at D (D itself in thise case)
- // because all paths to anywhere have to pass through D.
- //
- const BasicBlock *ParentLoopHeader =
- ParentLoop ? ParentLoop->getHeader() : nullptr;
- if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
- DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
- }
-
// analyze reached loop exits
if (!ReachedLoopExits.empty()) {
+ const BasicBlock *ParentLoopHeader =
+ ParentLoop ? ParentLoop->getHeader() : nullptr;
+
assert(ParentLoop);
- const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
+ auto ItHeaderDef = DefMap.find(ParentLoopHeader);
+ const auto *HeaderDefBlock = (ItHeaderDef == DefMap.end()) ? nullptr : ItHeaderDef->second;
+
LLVM_DEBUG(printDefs(dbgs()));
- assert(HeaderDefBlock && "no definition in header of carrying loop");
+ assert(HeaderDefBlock && "no definition at header of carrying loop");
for (const auto *ExitBlock : ReachedLoopExits) {
auto ItExitDef = DefMap.find(ExitBlock);
@@ -339,19 +344,10 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
return *ItCached->second;
}
- // dont propagte beyond the immediate post dom of the loop
- const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
- const auto *IpdNode = PdNode->getIDom();
- const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
- while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
- IpdNode = IpdNode->getIDom();
- PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
- }
-
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
- *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
+ *Loop.getHeader(), LoopExits, Loop.getParentLoop());
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
assert(ItInserted.second);
@@ -370,16 +366,11 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
if (ItCached != CachedBranchJoins.end())
return *ItCached->second;
- // dont propagate beyond the immediate post dominator of the branch
- const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
- const auto *IpdNode = PdNode->getIDom();
- const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
const auto &TermBlock = *Term.getParent();
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
- TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
+ TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
assert(ItInserted.second);
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index ef139d3257d2..230969698054 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -28,7 +28,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
"Intel SVML library")));
-StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
+StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
+ {
#define TLI_DEFINE_STRING
#include "llvm/Analysis/TargetLibraryInfo.def"
};
@@ -58,14 +59,14 @@ static bool hasBcmp(const Triple &TT) {
return TT.isGNUEnvironment() || TT.isMusl();
// Both NetBSD and OpenBSD are planning to remove the function. Windows does
// not have it.
- return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin();
+ return TT.isOSFreeBSD() || TT.isOSSolaris();
}
/// Initialize the set of available library functions based on the specified
/// target triple. This should be carefully written so that a missing target
/// triple gets a sane set of defaults.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
- ArrayRef<StringRef> StandardNames) {
+ ArrayRef<StringLiteral> StandardNames) {
// Verify that the StandardNames array is in alphabetical order.
assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
[](StringRef LHS, StringRef RHS) {
@@ -104,19 +105,10 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setShouldSignExtI32Param(ShouldSignExtI32Param);
if (T.getArch() == Triple::r600 ||
- T.getArch() == Triple::amdgcn) {
- TLI.setUnavailable(LibFunc_ldexp);
- TLI.setUnavailable(LibFunc_ldexpf);
- TLI.setUnavailable(LibFunc_ldexpl);
- TLI.setUnavailable(LibFunc_exp10);
- TLI.setUnavailable(LibFunc_exp10f);
- TLI.setUnavailable(LibFunc_exp10l);
- TLI.setUnavailable(LibFunc_log10);
- TLI.setUnavailable(LibFunc_log10f);
- TLI.setUnavailable(LibFunc_log10l);
- }
+ T.getArch() == Triple::amdgcn)
+ TLI.disableAllFunctions();
- // There are no library implementations of mempcy and memset for AMD gpus and
+ // There are no library implementations of memcpy and memset for AMD gpus and
// these can be difficult to lower in the backend.
if (T.getArch() == Triple::r600 ||
T.getArch() == Triple::amdgcn) {
@@ -623,19 +615,14 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
return GlobalValue::dropLLVMManglingEscape(funcName);
}
-bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
- LibFunc &F) const {
- StringRef const *Start = &StandardNames[0];
- StringRef const *End = &StandardNames[NumLibFuncs];
-
+bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, LibFunc &F) const {
funcName = sanitizeFunctionName(funcName);
if (funcName.empty())
return false;
- StringRef const *I = std::lower_bound(
- Start, End, funcName, [](StringRef LHS, StringRef RHS) {
- return LHS < RHS;
- });
+ const auto *Start = std::begin(StandardNames);
+ const auto *End = std::end(StandardNames);
+ const auto *I = std::lower_bound(Start, End, funcName);
if (I != End && *I == funcName) {
F = (LibFunc)(I - Start);
return true;
@@ -1481,6 +1468,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return false;
}
case LibFunc::NumLibFuncs:
+ case LibFunc::NotLibFunc:
break;
}
@@ -1599,14 +1587,6 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
return I->ScalarFnName;
}
-TargetLibraryInfo TargetLibraryAnalysis::run(Module &M,
- ModuleAnalysisManager &) {
- if (PresetInfoImpl)
- return TargetLibraryInfo(*PresetInfoImpl);
-
- return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple())));
-}
-
TargetLibraryInfo TargetLibraryAnalysis::run(Function &F,
FunctionAnalysisManager &) {
if (PresetInfoImpl)
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index eb04c34453fb..c9c294873ea6 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -9,6 +9,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -59,11 +60,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
SmallVector<BasicBlock *, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
- IE = ExitingBlocks.end();
- I != IE; ++I) {
- BasicBlock *BB = *I;
-
+ for (BasicBlock *BB : ExitingBlocks) {
// If we pass the updated counter back through a phi, we need to know
// which latch the updated value will be coming from.
if (!L->isLoopLatch(BB)) {
@@ -97,13 +94,11 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
// For this to be true, we must dominate all blocks with backedges. Such
// blocks are in-loop predecessors to the header block.
bool NotAlways = false;
- for (pred_iterator PI = pred_begin(L->getHeader()),
- PIE = pred_end(L->getHeader());
- PI != PIE; ++PI) {
- if (!L->contains(*PI))
+ for (BasicBlock *Pred : predecessors(L->getHeader())) {
+ if (!L->contains(Pred))
continue;
- if (!DT.dominates(*I, *PI)) {
+ if (!DT.dominates(BB, Pred)) {
NotAlways = true;
break;
}
@@ -127,7 +122,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
// Note that this block may not be the loop latch block, even if the loop
// has a latch block.
- ExitBlock = *I;
+ ExitBlock = BB;
ExitCount = EC;
break;
}
@@ -227,6 +222,16 @@ unsigned TargetTransformInfo::getFlatAddressSpace() const {
return TTIImpl->getFlatAddressSpace();
}
+bool TargetTransformInfo::collectFlatAddressOperands(
+ SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const {
+ return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
+}
+
+bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
+ IntrinsicInst *II, Value *OldV, Value *NewV) const {
+ return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
+}
+
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return TTIImpl->isLoweredToCall(F);
}
@@ -283,21 +288,22 @@ bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
return TTIImpl->shouldFavorBackedgeIndex(L);
}
-bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
- return TTIImpl->isLegalMaskedStore(DataType);
+bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
+ MaybeAlign Alignment) const {
+ return TTIImpl->isLegalMaskedStore(DataType, Alignment);
}
-bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
- return TTIImpl->isLegalMaskedLoad(DataType);
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
+ MaybeAlign Alignment) const {
+ return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
}
bool TargetTransformInfo::isLegalNTStore(Type *DataType,
- unsigned Alignment) const {
+ Align Alignment) const {
return TTIImpl->isLegalNTStore(DataType, Alignment);
}
-bool TargetTransformInfo::isLegalNTLoad(Type *DataType,
- unsigned Alignment) const {
+bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
return TTIImpl->isLegalNTLoad(DataType, Alignment);
}
@@ -359,14 +365,6 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
return TTIImpl->isTypeLegal(Ty);
}
-unsigned TargetTransformInfo::getJumpBufAlignment() const {
- return TTIImpl->getJumpBufAlignment();
-}
-
-unsigned TargetTransformInfo::getJumpBufSize() const {
- return TTIImpl->getJumpBufSize();
-}
-
bool TargetTransformInfo::shouldBuildLookupTables() const {
return TTIImpl->shouldBuildLookupTables();
}
@@ -470,8 +468,16 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
return Cost;
}
-unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
- return TTIImpl->getNumberOfRegisters(Vector);
+unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
+ return TTIImpl->getNumberOfRegisters(ClassID);
+}
+
+unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const {
+ return TTIImpl->getRegisterClassForType(Vector, Ty);
+}
+
+const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
+ return TTIImpl->getRegisterClassName(ClassID);
}
unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
@@ -1276,6 +1282,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
return getVectorInstrCost(I->getOpcode(),
IE->getType(), Idx);
}
+ case Instruction::ExtractValue:
+ return 0; // Model all ExtractValue nodes as free.
case Instruction::ShuffleVector: {
const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
Type *Ty = Shuffle->getType();
diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp
index 9311dfbc6eba..072d291f3f93 100644
--- a/lib/Analysis/TypeMetadataUtils.cpp
+++ b/lib/Analysis/TypeMetadataUtils.cpp
@@ -127,3 +127,35 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr,
Offset->getZExtValue(), CI, DT);
}
+
+Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) {
+ if (I->getType()->isPointerTy()) {
+ if (Offset == 0)
+ return I;
+ return nullptr;
+ }
+
+ const DataLayout &DL = M.getDataLayout();
+
+ if (auto *C = dyn_cast<ConstantStruct>(I)) {
+ const StructLayout *SL = DL.getStructLayout(C->getType());
+ if (Offset >= SL->getSizeInBytes())
+ return nullptr;
+
+ unsigned Op = SL->getElementContainingOffset(Offset);
+ return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
+ Offset - SL->getElementOffset(Op), M);
+ }
+ if (auto *C = dyn_cast<ConstantArray>(I)) {
+ ArrayType *VTableTy = C->getType();
+ uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType());
+
+ unsigned Op = Offset / ElemSize;
+ if (Op >= C->getNumOperands())
+ return nullptr;
+
+ return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
+ Offset % ElemSize, M);
+ }
+ return nullptr;
+}
diff --git a/lib/Analysis/VFABIDemangling.cpp b/lib/Analysis/VFABIDemangling.cpp
new file mode 100644
index 000000000000..6fd8ae63f5f0
--- /dev/null
+++ b/lib/Analysis/VFABIDemangling.cpp
@@ -0,0 +1,418 @@
+//===- VFABIDemangling.cpp - Vector Function ABI demangling utilities. ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/VectorUtils.h"
+
+using namespace llvm;
+
+namespace {
+/// Utilities for the Vector Function ABI name parser.
+
+/// Return types for the parser functions.
+enum class ParseRet {
+ OK, // Found.
+ None, // Not found.
+ Error // Syntax error.
+};
+
+/// Extracts the `<isa>` information from the mangled string, and
+/// sets the `ISA` accordingly.
+ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
+ if (MangledName.empty())
+ return ParseRet::Error;
+
+ ISA = StringSwitch<VFISAKind>(MangledName.take_front(1))
+ .Case("n", VFISAKind::AdvancedSIMD)
+ .Case("s", VFISAKind::SVE)
+ .Case("b", VFISAKind::SSE)
+ .Case("c", VFISAKind::AVX)
+ .Case("d", VFISAKind::AVX2)
+ .Case("e", VFISAKind::AVX512)
+ .Default(VFISAKind::Unknown);
+
+ MangledName = MangledName.drop_front(1);
+
+ return ParseRet::OK;
+}
+
+/// Extracts the `<mask>` information from the mangled string, and
+/// sets `IsMasked` accordingly. The input string `MangledName` is
+/// left unmodified.
+ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) {
+ if (MangledName.consume_front("M")) {
+ IsMasked = true;
+ return ParseRet::OK;
+ }
+
+ if (MangledName.consume_front("N")) {
+ IsMasked = false;
+ return ParseRet::OK;
+ }
+
+ return ParseRet::Error;
+}
+
+/// Extract the `<vlen>` information from the mangled string, and
+/// sets `VF` accordingly. A `<vlen> == "x"` token is interpreted as a scalable
+/// vector length. On success, the `<vlen>` token is removed from
+/// the input string `ParseString`.
+///
+ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) {
+ if (ParseString.consume_front("x")) {
+ VF = 0;
+ IsScalable = true;
+ return ParseRet::OK;
+ }
+
+ if (ParseString.consumeInteger(10, VF))
+ return ParseRet::Error;
+
+ IsScalable = false;
+ return ParseRet::OK;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+/// <token> <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `Pos` to
+/// <number>, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+///
+/// The function expects <token> to be one of "ls", "Rs", "Us" or
+/// "Ls".
+ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString,
+ VFParamKind &PKind, int &Pos,
+ const StringRef Token) {
+ if (ParseString.consume_front(Token)) {
+ PKind = VFABI::getVFParamKindFromString(Token);
+ if (ParseString.consumeInteger(10, Pos))
+ return ParseRet::Error;
+ return ParseRet::OK;
+ }
+
+ return ParseRet::None;
+}
+
+/// The function looks for the following stringt at the beginning of
+/// the input string `ParseString`:
+///
+/// <token> <number>
+///
+/// <token> is one of "ls", "Rs", "Us" or "Ls".
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `StepOrPos` to
+/// <number>, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseLinearWithRuntimeStep(StringRef &ParseString,
+ VFParamKind &PKind, int &StepOrPos) {
+ ParseRet Ret;
+
+ // "ls" <RuntimeStepPos>
+ Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "ls");
+ if (Ret != ParseRet::None)
+ return Ret;
+
+ // "Rs" <RuntimeStepPos>
+ Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Rs");
+ if (Ret != ParseRet::None)
+ return Ret;
+
+ // "Ls" <RuntimeStepPos>
+ Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Ls");
+ if (Ret != ParseRet::None)
+ return Ret;
+
+ // "Us" <RuntimeStepPos>
+ Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Us");
+ if (Ret != ParseRet::None)
+ return Ret;
+
+ return ParseRet::None;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+/// <token> {"n"} <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `LinearStep` to
+/// <number>, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+///
+/// The function expects <token> to be one of "l", "R", "U" or
+/// "L".
+ParseRet tryParseCompileTimeLinearToken(StringRef &ParseString,
+ VFParamKind &PKind, int &LinearStep,
+ const StringRef Token) {
+ if (ParseString.consume_front(Token)) {
+ PKind = VFABI::getVFParamKindFromString(Token);
+ const bool Negate = ParseString.consume_front("n");
+ if (ParseString.consumeInteger(10, LinearStep))
+ LinearStep = 1;
+ if (Negate)
+ LinearStep *= -1;
+ return ParseRet::OK;
+ }
+
+ return ParseRet::None;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+/// ["l" | "R" | "U" | "L"] {"n"} <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `LinearStep` to
+/// <number>, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseLinearWithCompileTimeStep(StringRef &ParseString,
+ VFParamKind &PKind, int &StepOrPos) {
+ // "l" {"n"} <CompileTimeStep>
+ if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "l") ==
+ ParseRet::OK)
+ return ParseRet::OK;
+
+ // "R" {"n"} <CompileTimeStep>
+ if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "R") ==
+ ParseRet::OK)
+ return ParseRet::OK;
+
+ // "L" {"n"} <CompileTimeStep>
+ if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "L") ==
+ ParseRet::OK)
+ return ParseRet::OK;
+
+ // "U" {"n"} <CompileTimeStep>
+ if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "U") ==
+ ParseRet::OK)
+ return ParseRet::OK;
+
+ return ParseRet::None;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+/// "u" <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `Pos` to
+/// <number>, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseUniform(StringRef &ParseString, VFParamKind &PKind, int &Pos) {
+ // "u" <Pos>
+ const char *UniformToken = "u";
+ if (ParseString.consume_front(UniformToken)) {
+ PKind = VFABI::getVFParamKindFromString(UniformToken);
+ if (ParseString.consumeInteger(10, Pos))
+ return ParseRet::Error;
+
+ return ParseRet::OK;
+ }
+ return ParseRet::None;
+}
+
+/// Looks into the <parameters> part of the mangled name in search
+/// for valid paramaters at the beginning of the string
+/// `ParseString`.
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `StepOrPos`
+/// accordingly, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind,
+ int &StepOrPos) {
+ if (ParseString.consume_front("v")) {
+ PKind = VFParamKind::Vector;
+ StepOrPos = 0;
+ return ParseRet::OK;
+ }
+
+ const ParseRet HasLinearRuntime =
+ tryParseLinearWithRuntimeStep(ParseString, PKind, StepOrPos);
+ if (HasLinearRuntime != ParseRet::None)
+ return HasLinearRuntime;
+
+ const ParseRet HasLinearCompileTime =
+ tryParseLinearWithCompileTimeStep(ParseString, PKind, StepOrPos);
+ if (HasLinearCompileTime != ParseRet::None)
+ return HasLinearCompileTime;
+
+ const ParseRet HasUniform = tryParseUniform(ParseString, PKind, StepOrPos);
+ if (HasUniform != ParseRet::None)
+ return HasUniform;
+
+ return ParseRet::None;
+}
+
+/// Looks into the <parameters> part of the mangled name in search
+/// of a valid 'aligned' clause. The function should be invoked
+/// after parsing a parameter via `tryParseParameter`.
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `StepOrPos`
+/// accordingly, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
+ uint64_t Val;
+ // "a" <number>
+ if (ParseString.consume_front("a")) {
+ if (ParseString.consumeInteger(10, Val))
+ return ParseRet::Error;
+
+ if (!isPowerOf2_64(Val))
+ return ParseRet::Error;
+
+ Alignment = Align(Val);
+
+ return ParseRet::OK;
+ }
+
+ return ParseRet::None;
+}
+} // namespace
+
+// Format of the ABI name:
+// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
+Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName) {
+ // Assume there is no custom name <redirection>, and therefore the
+ // vector name consists of
+ // _ZGV<isa><mask><vlen><parameters>_<scalarname>.
+ StringRef VectorName = MangledName;
+
+ // Parse the fixed size part of the manled name
+ if (!MangledName.consume_front("_ZGV"))
+ return None;
+
+ // Extract ISA. An unknow ISA is also supported, so we accept all
+ // values.
+ VFISAKind ISA;
+ if (tryParseISA(MangledName, ISA) != ParseRet::OK)
+ return None;
+
+ // Extract <mask>.
+ bool IsMasked;
+ if (tryParseMask(MangledName, IsMasked) != ParseRet::OK)
+ return None;
+
+ // Parse the variable size, starting from <vlen>.
+ unsigned VF;
+ bool IsScalable;
+ if (tryParseVLEN(MangledName, VF, IsScalable) != ParseRet::OK)
+ return None;
+
+ // Parse the <parameters>.
+ ParseRet ParamFound;
+ SmallVector<VFParameter, 8> Parameters;
+ do {
+ const unsigned ParameterPos = Parameters.size();
+ VFParamKind PKind;
+ int StepOrPos;
+ ParamFound = tryParseParameter(MangledName, PKind, StepOrPos);
+
+ // Bail off if there is a parsing error in the parsing of the parameter.
+ if (ParamFound == ParseRet::Error)
+ return None;
+
+ if (ParamFound == ParseRet::OK) {
+ Align Alignment;
+ // Look for the alignment token "a <number>".
+ const ParseRet AlignFound = tryParseAlign(MangledName, Alignment);
+ // Bail off if there is a syntax error in the align token.
+ if (AlignFound == ParseRet::Error)
+ return None;
+
+ // Add the parameter.
+ Parameters.push_back({ParameterPos, PKind, StepOrPos, Alignment});
+ }
+ } while (ParamFound == ParseRet::OK);
+
+ // A valid MangledName mus have at least one valid entry in the
+ // <parameters>.
+ if (Parameters.empty())
+ return None;
+
+ // Check for the <scalarname> and the optional <redirection>, which
+ // are separated from the prefix with "_"
+ if (!MangledName.consume_front("_"))
+ return None;
+
+ // The rest of the string must be in the format:
+ // <scalarname>[(<redirection>)]
+ const StringRef ScalarName =
+ MangledName.take_while([](char In) { return In != '('; });
+
+ if (ScalarName.empty())
+ return None;
+
+ // Reduce MangledName to [(<redirection>)].
+ MangledName = MangledName.ltrim(ScalarName);
+ // Find the optional custom name redirection.
+ if (MangledName.consume_front("(")) {
+ if (!MangledName.consume_back(")"))
+ return None;
+ // Update the vector variant with the one specified by the user.
+ VectorName = MangledName;
+ // If the vector name is missing, bail out.
+ if (VectorName.empty())
+ return None;
+ }
+
+ // When <mask> is "M", we need to add a parameter that is used as
+ // global predicate for the function.
+ if (IsMasked) {
+ const unsigned Pos = Parameters.size();
+ Parameters.push_back({Pos, VFParamKind::GlobalPredicate});
+ }
+
+ // Asserts for parameters of type `VFParamKind::GlobalPredicate`, as
+ // prescribed by the Vector Function ABI specifications supported by
+ // this parser:
+ // 1. Uniqueness.
+ // 2. Must be the last in the parameter list.
+ const auto NGlobalPreds = std::count_if(
+ Parameters.begin(), Parameters.end(), [](const VFParameter PK) {
+ return PK.ParamKind == VFParamKind::GlobalPredicate;
+ });
+ assert(NGlobalPreds < 2 && "Cannot have more than one global predicate.");
+ if (NGlobalPreds)
+ assert(Parameters.back().ParamKind == VFParamKind::GlobalPredicate &&
+ "The global predicate must be the last parameter");
+
+ const VFShape Shape({VF, IsScalable, ISA, Parameters});
+ return VFInfo({Shape, ScalarName, VectorName});
+}
+
+VFParamKind VFABI::getVFParamKindFromString(const StringRef Token) {
+ const VFParamKind ParamKind = StringSwitch<VFParamKind>(Token)
+ .Case("v", VFParamKind::Vector)
+ .Case("l", VFParamKind::OMP_Linear)
+ .Case("R", VFParamKind::OMP_LinearRef)
+ .Case("L", VFParamKind::OMP_LinearVal)
+ .Case("U", VFParamKind::OMP_LinearUVal)
+ .Case("ls", VFParamKind::OMP_LinearPos)
+ .Case("Ls", VFParamKind::OMP_LinearValPos)
+ .Case("Rs", VFParamKind::OMP_LinearRefPos)
+ .Case("Us", VFParamKind::OMP_LinearUValPos)
+ .Case("u", VFParamKind::OMP_Uniform)
+ .Default(VFParamKind::Unknown);
+
+ if (ParamKind != VFParamKind::Unknown)
+ return ParamKind;
+
+ // This function should never be invoked with an invalid input.
+ llvm_unreachable("This fuction should be invoken only on parameters"
+ " that have a textual representation in the mangled name"
+ " of the Vector Function ABI");
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index c70906dcc629..bbf389991836 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -558,12 +558,18 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
return true;
}
+ // Don't let an assume affect itself - this would cause the problems
+ // `isEphemeralValueOf` is trying to prevent, and it would also make
+ // the loop below go out of bounds.
+ if (Inv == CxtI)
+ return false;
+
// The context comes first, but they're both in the same block. Make sure
// there is nothing in between that might interrupt the control flow.
for (BasicBlock::const_iterator I =
std::next(BasicBlock::const_iterator(CxtI)), IE(Inv);
I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
return false;
return !isEphemeralValueOf(Inv, CxtI);
@@ -1049,7 +1055,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
break;
}
case Instruction::Select: {
- const Value *LHS, *RHS;
+ const Value *LHS = nullptr, *RHS = nullptr;
SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
if (SelectPatternResult::isMinOrMax(SPF)) {
computeKnownBits(RHS, Known, Depth + 1, Q);
@@ -1095,7 +1101,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// RHS from matchSelectPattern returns the negation part of abs pattern.
// If the negate has an NSW flag we can assume the sign bit of the result
// will be 0 because that makes abs(INT_MIN) undefined.
- if (Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
+ if (match(RHS, m_Neg(m_Specific(LHS))) &&
+ Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
MaxHighZeros = 1;
}
@@ -1366,7 +1373,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
else if (LR == I)
L = LL;
else
- break;
+ continue; // Check for recurrence with L and R flipped.
// Ok, we have a PHI of the form L op= R. Check for low
// zero bits.
computeKnownBits(R, Known2, Depth + 1, Q);
@@ -1714,9 +1721,9 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
// Aligned pointers have trailing zeros - refine Known.Zero set
if (V->getType()->isPointerTy()) {
- unsigned Align = V->getPointerAlignment(Q.DL);
+ const MaybeAlign Align = V->getPointerAlignment(Q.DL);
if (Align)
- Known.Zero.setLowBits(countTrailingZeros(Align));
+ Known.Zero.setLowBits(countTrailingZeros(Align->value()));
}
// computeKnownBitsFromAssume strictly refines Known.
@@ -2066,7 +2073,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (const auto *Call = dyn_cast<CallBase>(V)) {
if (Call->isReturnNonNull())
return true;
- if (const auto *RP = getArgumentAliasingToReturnedPointer(Call))
+ if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
return isKnownNonZero(RP, Depth, Q);
}
}
@@ -2300,7 +2307,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
cast<Operator>(Select)->getOpcode() == Instruction::Select &&
"Input should be a Select!");
- const Value *LHS, *RHS, *LHS2, *RHS2;
+ const Value *LHS = nullptr, *RHS = nullptr;
SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
if (SPF != SPF_SMAX && SPF != SPF_SMIN)
return false;
@@ -2308,6 +2315,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
if (!match(RHS, m_APInt(CLow)))
return false;
+ const Value *LHS2 = nullptr, *RHS2 = nullptr;
SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
if (getInverseMinMaxFlavor(SPF) != SPF2)
return false;
@@ -2384,253 +2392,256 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
if (Depth == MaxDepth)
return 1; // Limit search depth.
- const Operator *U = dyn_cast<Operator>(V);
- switch (Operator::getOpcode(V)) {
- default: break;
- case Instruction::SExt:
- Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
- return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
+ if (auto *U = dyn_cast<Operator>(V)) {
+ switch (Operator::getOpcode(V)) {
+ default: break;
+ case Instruction::SExt:
+ Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
+ return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
- case Instruction::SDiv: {
- const APInt *Denominator;
- // sdiv X, C -> adds log(C) sign bits.
- if (match(U->getOperand(1), m_APInt(Denominator))) {
+ case Instruction::SDiv: {
+ const APInt *Denominator;
+ // sdiv X, C -> adds log(C) sign bits.
+ if (match(U->getOperand(1), m_APInt(Denominator))) {
- // Ignore non-positive denominator.
- if (!Denominator->isStrictlyPositive())
- break;
+ // Ignore non-positive denominator.
+ if (!Denominator->isStrictlyPositive())
+ break;
- // Calculate the incoming numerator bits.
- unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ // Calculate the incoming numerator bits.
+ unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- // Add floor(log(C)) bits to the numerator bits.
- return std::min(TyBits, NumBits + Denominator->logBase2());
+ // Add floor(log(C)) bits to the numerator bits.
+ return std::min(TyBits, NumBits + Denominator->logBase2());
+ }
+ break;
}
- break;
- }
-
- case Instruction::SRem: {
- const APInt *Denominator;
- // srem X, C -> we know that the result is within [-C+1,C) when C is a
- // positive constant. This let us put a lower bound on the number of sign
- // bits.
- if (match(U->getOperand(1), m_APInt(Denominator))) {
-
- // Ignore non-positive denominator.
- if (!Denominator->isStrictlyPositive())
- break;
- // Calculate the incoming numerator bits. SRem by a positive constant
- // can't lower the number of sign bits.
- unsigned NumrBits =
- ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ case Instruction::SRem: {
+ const APInt *Denominator;
+ // srem X, C -> we know that the result is within [-C+1,C) when C is a
+ // positive constant. This let us put a lower bound on the number of sign
+ // bits.
+ if (match(U->getOperand(1), m_APInt(Denominator))) {
- // Calculate the leading sign bit constraints by examining the
- // denominator. Given that the denominator is positive, there are two
- // cases:
- //
- // 1. the numerator is positive. The result range is [0,C) and [0,C) u<
- // (1 << ceilLogBase2(C)).
- //
- // 2. the numerator is negative. Then the result range is (-C,0] and
- // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
- //
- // Thus a lower bound on the number of sign bits is `TyBits -
- // ceilLogBase2(C)`.
+ // Ignore non-positive denominator.
+ if (!Denominator->isStrictlyPositive())
+ break;
- unsigned ResBits = TyBits - Denominator->ceilLogBase2();
- return std::max(NumrBits, ResBits);
+ // Calculate the incoming numerator bits. SRem by a positive constant
+ // can't lower the number of sign bits.
+ unsigned NumrBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+
+ // Calculate the leading sign bit constraints by examining the
+ // denominator. Given that the denominator is positive, there are two
+ // cases:
+ //
+ // 1. the numerator is positive. The result range is [0,C) and [0,C) u<
+ // (1 << ceilLogBase2(C)).
+ //
+ // 2. the numerator is negative. Then the result range is (-C,0] and
+ // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
+ //
+ // Thus a lower bound on the number of sign bits is `TyBits -
+ // ceilLogBase2(C)`.
+
+ unsigned ResBits = TyBits - Denominator->ceilLogBase2();
+ return std::max(NumrBits, ResBits);
+ }
+ break;
}
- break;
- }
- case Instruction::AShr: {
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- // ashr X, C -> adds C sign bits. Vectors too.
- const APInt *ShAmt;
- if (match(U->getOperand(1), m_APInt(ShAmt))) {
- if (ShAmt->uge(TyBits))
- break; // Bad shift.
- unsigned ShAmtLimited = ShAmt->getZExtValue();
- Tmp += ShAmtLimited;
- if (Tmp > TyBits) Tmp = TyBits;
- }
- return Tmp;
- }
- case Instruction::Shl: {
- const APInt *ShAmt;
- if (match(U->getOperand(1), m_APInt(ShAmt))) {
- // shl destroys sign bits.
+ case Instruction::AShr: {
Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (ShAmt->uge(TyBits) || // Bad shift.
- ShAmt->uge(Tmp)) break; // Shifted all sign bits out.
- Tmp2 = ShAmt->getZExtValue();
- return Tmp - Tmp2;
+ // ashr X, C -> adds C sign bits. Vectors too.
+ const APInt *ShAmt;
+ if (match(U->getOperand(1), m_APInt(ShAmt))) {
+ if (ShAmt->uge(TyBits))
+ break; // Bad shift.
+ unsigned ShAmtLimited = ShAmt->getZExtValue();
+ Tmp += ShAmtLimited;
+ if (Tmp > TyBits) Tmp = TyBits;
+ }
+ return Tmp;
}
- break;
- }
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: // NOT is handled here.
- // Logical binary ops preserve the number of sign bits at the worst.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (Tmp != 1) {
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- FirstAnswer = std::min(Tmp, Tmp2);
- // We computed what we know about the sign bits as our first
- // answer. Now proceed to the generic code that uses
- // computeKnownBits, and pick whichever answer is better.
+ case Instruction::Shl: {
+ const APInt *ShAmt;
+ if (match(U->getOperand(1), m_APInt(ShAmt))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (ShAmt->uge(TyBits) || // Bad shift.
+ ShAmt->uge(Tmp)) break; // Shifted all sign bits out.
+ Tmp2 = ShAmt->getZExtValue();
+ return Tmp - Tmp2;
+ }
+ break;
}
- break;
-
- case Instruction::Select: {
- // If we have a clamp pattern, we know that the number of sign bits will be
- // the minimum of the clamp min/max range.
- const Value *X;
- const APInt *CLow, *CHigh;
- if (isSignedMinMaxClamp(U, X, CLow, CHigh))
- return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
-
- Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (Tmp == 1) break;
- Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
- return std::min(Tmp, Tmp2);
- }
-
- case Instruction::Add:
- // Add can have at most one carry bit. Thus we know that the output
- // is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (Tmp == 1) break;
-
- // Special case decrementing a value (ADD X, -1):
- if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
- if (CRHS->isAllOnesValue()) {
- KnownBits Known(TyBits);
- computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
-
- // If the input is known to be 0 or 1, the output is 0/-1, which is all
- // sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
- return TyBits;
-
- // If we are subtracting one from a positive number, there is no carry
- // out of the result.
- if (Known.isNonNegative())
- return Tmp;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // computeKnownBits, and pick whichever answer is better.
}
+ break;
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (Tmp2 == 1) break;
- return std::min(Tmp, Tmp2)-1;
+ case Instruction::Select: {
+ // If we have a clamp pattern, we know that the number of sign bits will
+ // be the minimum of the clamp min/max range.
+ const Value *X;
+ const APInt *CLow, *CHigh;
+ if (isSignedMinMaxClamp(U, X, CLow, CHigh))
+ return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
+
+ Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ if (Tmp == 1) break;
+ Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
+ return std::min(Tmp, Tmp2);
+ }
- case Instruction::Sub:
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (Tmp2 == 1) break;
-
- // Handle NEG.
- if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
- if (CLHS->isNullValue()) {
- KnownBits Known(TyBits);
- computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
- // If the input is known to be 0 or 1, the output is 0/-1, which is all
- // sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
- return TyBits;
-
- // If the input is known to be positive (the sign bit is known clear),
- // the output of the NEG has the same number of sign bits as the input.
- if (Known.isNonNegative())
- return Tmp2;
-
- // Otherwise, we treat this like a SUB.
- }
+ case Instruction::Add:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (Tmp == 1) break;
+
+ // Special case decrementing a value (ADD X, -1):
+ if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ KnownBits Known(TyBits);
+ computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is
+ // all sign bits set.
+ if ((Known.Zero | 1).isAllOnesValue())
+ return TyBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (Known.isNonNegative())
+ return Tmp;
+ }
- // Sub can have at most one carry bit. Thus we know that the output
- // is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (Tmp == 1) break;
- return std::min(Tmp, Tmp2)-1;
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ if (Tmp2 == 1) break;
+ return std::min(Tmp, Tmp2) - 1;
- case Instruction::Mul: {
- // The output of the Mul can be at most twice the valid bits in the inputs.
- unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (SignBitsOp0 == 1) break;
- unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (SignBitsOp1 == 1) break;
- unsigned OutValidBits =
- (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
- return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
- }
+ case Instruction::Sub:
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ if (Tmp2 == 1) break;
+
+ // Handle NEG.
+ if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
+ if (CLHS->isNullValue()) {
+ KnownBits Known(TyBits);
+ computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is
+ // all sign bits set.
+ if ((Known.Zero | 1).isAllOnesValue())
+ return TyBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the
+ // input.
+ if (Known.isNonNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
- case Instruction::PHI: {
- const PHINode *PN = cast<PHINode>(U);
- unsigned NumIncomingValues = PN->getNumIncomingValues();
- // Don't analyze large in-degree PHIs.
- if (NumIncomingValues > 4) break;
- // Unreachable blocks may have zero-operand PHI nodes.
- if (NumIncomingValues == 0) break;
-
- // Take the minimum of all incoming values. This can't infinitely loop
- // because of our depth threshold.
- Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q);
- for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
- if (Tmp == 1) return Tmp;
- Tmp = std::min(
- Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q));
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (Tmp == 1) break;
+ return std::min(Tmp, Tmp2) - 1;
+
+ case Instruction::Mul: {
+ // The output of the Mul can be at most twice the valid bits in the
+ // inputs.
+ unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (SignBitsOp0 == 1) break;
+ unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ if (SignBitsOp1 == 1) break;
+ unsigned OutValidBits =
+ (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
+ return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
}
- return Tmp;
- }
- case Instruction::Trunc:
- // FIXME: it's tricky to do anything useful for this, but it is an important
- // case for targets like X86.
- break;
+ case Instruction::PHI: {
+ const PHINode *PN = cast<PHINode>(U);
+ unsigned NumIncomingValues = PN->getNumIncomingValues();
+ // Don't analyze large in-degree PHIs.
+ if (NumIncomingValues > 4) break;
+ // Unreachable blocks may have zero-operand PHI nodes.
+ if (NumIncomingValues == 0) break;
+
+ // Take the minimum of all incoming values. This can't infinitely loop
+ // because of our depth threshold.
+ Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q);
+ for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
+ if (Tmp == 1) return Tmp;
+ Tmp = std::min(
+ Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q));
+ }
+ return Tmp;
+ }
- case Instruction::ExtractElement:
- // Look through extract element. At the moment we keep this simple and skip
- // tracking the specific element. But at least we might find information
- // valid for all elements of the vector (for example if vector is sign
- // extended, shifted, etc).
- return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
-
- case Instruction::ShuffleVector: {
- // TODO: This is copied almost directly from the SelectionDAG version of
- // ComputeNumSignBits. It would be better if we could share common
- // code. If not, make sure that changes are translated to the DAG.
-
- // Collect the minimum number of sign bits that are shared by every vector
- // element referenced by the shuffle.
- auto *Shuf = cast<ShuffleVectorInst>(U);
- int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
- int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
- APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
- for (int i = 0; i != NumMaskElts; ++i) {
- int M = Shuf->getMaskValue(i);
- assert(M < NumElts * 2 && "Invalid shuffle mask constant");
- // For undef elements, we don't know anything about the common state of
- // the shuffle result.
- if (M == -1)
- return 1;
- if (M < NumElts)
- DemandedLHS.setBit(M % NumElts);
- else
- DemandedRHS.setBit(M % NumElts);
+ case Instruction::Trunc:
+ // FIXME: it's tricky to do anything useful for this, but it is an
+ // important case for targets like X86.
+ break;
+
+ case Instruction::ExtractElement:
+ // Look through extract element. At the moment we keep this simple and
+ // skip tracking the specific element. But at least we might find
+ // information valid for all elements of the vector (for example if vector
+ // is sign extended, shifted, etc).
+ return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+
+ case Instruction::ShuffleVector: {
+ // TODO: This is copied almost directly from the SelectionDAG version of
+ // ComputeNumSignBits. It would be better if we could share common
+ // code. If not, make sure that changes are translated to the DAG.
+
+ // Collect the minimum number of sign bits that are shared by every vector
+ // element referenced by the shuffle.
+ auto *Shuf = cast<ShuffleVectorInst>(U);
+ int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
+ int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
+ APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ for (int i = 0; i != NumMaskElts; ++i) {
+ int M = Shuf->getMaskValue(i);
+ assert(M < NumElts * 2 && "Invalid shuffle mask constant");
+ // For undef elements, we don't know anything about the common state of
+ // the shuffle result.
+ if (M == -1)
+ return 1;
+ if (M < NumElts)
+ DemandedLHS.setBit(M % NumElts);
+ else
+ DemandedRHS.setBit(M % NumElts);
+ }
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (!!DemandedLHS)
+ Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
+ if (!!DemandedRHS) {
+ Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ // If we don't know anything, early out and try computeKnownBits
+ // fall-back.
+ if (Tmp == 1)
+ break;
+ assert(Tmp <= V->getType()->getScalarSizeInBits() &&
+ "Failed to determine minimum sign bits");
+ return Tmp;
}
- Tmp = std::numeric_limits<unsigned>::max();
- if (!!DemandedLHS)
- Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
- if (!!DemandedRHS) {
- Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
- Tmp = std::min(Tmp, Tmp2);
}
- // If we don't know anything, early out and try computeKnownBits fall-back.
- if (Tmp == 1)
- break;
- assert(Tmp <= V->getType()->getScalarSizeInBits() &&
- "Failed to determine minimum sign bits");
- return Tmp;
- }
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -2655,8 +2666,6 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
/// through SExt instructions only if LookThroughSExt is true.
bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
bool LookThroughSExt, unsigned Depth) {
- const unsigned MaxDepth = 6;
-
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
@@ -3651,23 +3660,28 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
return Len == ~0ULL ? 1 : Len;
}
-const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) {
+const Value *
+llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call,
+ bool MustPreserveNullness) {
assert(Call &&
"getArgumentAliasingToReturnedPointer only works on nonnull calls");
if (const Value *RV = Call->getReturnedArgOperand())
return RV;
// This can be used only as a aliasing property.
- if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call))
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+ Call, MustPreserveNullness))
return Call->getArgOperand(0);
return nullptr;
}
bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
- const CallBase *Call) {
+ const CallBase *Call, bool MustPreserveNullness) {
return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
Call->getIntrinsicID() == Intrinsic::strip_invariant_group ||
Call->getIntrinsicID() == Intrinsic::aarch64_irg ||
- Call->getIntrinsicID() == Intrinsic::aarch64_tagp;
+ Call->getIntrinsicID() == Intrinsic::aarch64_tagp ||
+ (!MustPreserveNullness &&
+ Call->getIntrinsicID() == Intrinsic::ptrmask);
}
/// \p PN defines a loop-variant pointer to an object. Check if the
@@ -3725,7 +3739,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
// because it should be in sync with CaptureTracking. Not using it may
// cause weird miscompilations where 2 aliasing pointers are assumed to
// noalias.
- if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
V = RP;
continue;
}
@@ -3865,6 +3879,18 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
return true;
}
+bool llvm::mustSuppressSpeculation(const LoadInst &LI) {
+ if (!LI.isUnordered())
+ return true;
+ const Function &F = *LI.getFunction();
+ // Speculative load may create a race that did not exist in the source.
+ return F.hasFnAttribute(Attribute::SanitizeThread) ||
+ // Speculative load may load data from dirty regions.
+ F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress);
+}
+
+
bool llvm::isSafeToSpeculativelyExecute(const Value *V,
const Instruction *CtxI,
const DominatorTree *DT) {
@@ -3909,17 +3935,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
}
case Instruction::Load: {
const LoadInst *LI = cast<LoadInst>(Inst);
- if (!LI->isUnordered() ||
- // Speculative load may create a race that did not exist in the source.
- LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
- // Speculative load may load data from dirty regions.
- LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
- LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
+ if (mustSuppressSpeculation(*LI))
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
- return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
- LI->getType(), LI->getAlignment(),
- DL, CtxI, DT);
+ return isDereferenceableAndAlignedPointer(
+ LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlignment()),
+ DL, CtxI, DT);
}
case Instruction::Call: {
auto *CI = cast<const CallInst>(Inst);
@@ -4221,22 +4242,9 @@ OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
}
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
- // A memory operation returns normally if it isn't volatile. A volatile
- // operation is allowed to trap.
- //
- // An atomic operation isn't guaranteed to return in a reasonable amount of
- // time because it's possible for another thread to interfere with it for an
+ // Note: An atomic operation isn't guaranteed to return in a reasonable amount
+ // of time because it's possible for another thread to interfere with it for an
// arbitrary length of time, but programs aren't allowed to rely on that.
- if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- return !LI->isVolatile();
- if (const StoreInst *SI = dyn_cast<StoreInst>(I))
- return !SI->isVolatile();
- if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
- return !CXI->isVolatile();
- if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
- return !RMWI->isVolatile();
- if (const MemIntrinsic *MII = dyn_cast<MemIntrinsic>(I))
- return !MII->isVolatile();
// If there is no successor, then execution can't transfer to it.
if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
@@ -4277,10 +4285,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
// FIXME: This isn't aggressive enough; a call which only writes to a global
// is guaranteed to return.
- return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
- match(I, m_Intrinsic<Intrinsic::assume>()) ||
- match(I, m_Intrinsic<Intrinsic::sideeffect>()) ||
- match(I, m_Intrinsic<Intrinsic::experimental_widenable_condition>());
+ return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory();
}
// Other instructions return normally.
@@ -4572,12 +4577,12 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
// TODO: Allow FP min/max with nnan/nsz.
assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
- Value *A, *B;
+ Value *A = nullptr, *B = nullptr;
SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1);
if (!SelectPatternResult::isMinOrMax(L.Flavor))
return {SPF_UNKNOWN, SPNB_NA, false};
- Value *C, *D;
+ Value *C = nullptr, *D = nullptr;
SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1);
if (L.Flavor != R.Flavor)
return {SPF_UNKNOWN, SPNB_NA, false};
@@ -5627,8 +5632,8 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
}
static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
- APInt &Upper) {
- const Value *LHS, *RHS;
+ APInt &Upper, const InstrInfoQuery &IIQ) {
+ const Value *LHS = nullptr, *RHS = nullptr;
SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS);
if (R.Flavor == SPF_UNKNOWN)
return;
@@ -5640,7 +5645,8 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
// then the result of abs(X) is [0..SIGNED_MAX],
// otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
Lower = APInt::getNullValue(BitWidth);
- if (cast<Instruction>(RHS)->hasNoSignedWrap())
+ if (match(RHS, m_Neg(m_Specific(LHS))) &&
+ IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
Upper = APInt::getSignedMaxValue(BitWidth) + 1;
else
Upper = APInt::getSignedMinValue(BitWidth) + 1;
@@ -5694,7 +5700,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) {
else if (auto *II = dyn_cast<IntrinsicInst>(V))
setLimitsForIntrinsic(*II, Lower, Upper);
else if (auto *SI = dyn_cast<SelectInst>(V))
- setLimitsForSelectPattern(*SI, Lower, Upper);
+ setLimitsForSelectPattern(*SI, Lower, Upper, IIQ);
ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
@@ -5704,3 +5710,111 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) {
return CR;
}
+
+static Optional<int64_t>
+getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) {
+ // Skip over the first indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (unsigned i = 1; i != Idx; ++i, ++GTI)
+ /*skip along*/;
+
+ // Compute the offset implied by the rest of the indices.
+ int64_t Offset = 0;
+ for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!OpC)
+ return None;
+ if (OpC->isZero())
+ continue; // No offset.
+
+ // Handle struct indices, which add their field offset to the pointer.
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
+ Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ continue;
+ }
+
+ // Otherwise, we have a sequential type like an array or vector. Multiply
+ // the index by the ElementSize.
+ uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size * OpC->getSExtValue();
+ }
+
+ return Offset;
+}
+
+Optional<int64_t> llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2,
+ const DataLayout &DL) {
+ Ptr1 = Ptr1->stripPointerCasts();
+ Ptr2 = Ptr2->stripPointerCasts();
+
+ // Handle the trivial case first.
+ if (Ptr1 == Ptr2) {
+ return 0;
+ }
+
+ const GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
+ const GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
+
+ // If one pointer is a GEP see if the GEP is a constant offset from the base,
+ // as in "P" and "gep P, 1".
+ // Also do this iteratively to handle the the following case:
+ // Ptr_t1 = GEP Ptr1, c1
+ // Ptr_t2 = GEP Ptr_t1, c2
+ // Ptr2 = GEP Ptr_t2, c3
+ // where we will return c1+c2+c3.
+ // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base
+ // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases
+ // are the same, and return the difference between offsets.
+ auto getOffsetFromBase = [&DL](const GEPOperator *GEP,
+ const Value *Ptr) -> Optional<int64_t> {
+ const GEPOperator *GEP_T = GEP;
+ int64_t OffsetVal = 0;
+ bool HasSameBase = false;
+ while (GEP_T) {
+ auto Offset = getOffsetFromIndex(GEP_T, 1, DL);
+ if (!Offset)
+ return None;
+ OffsetVal += *Offset;
+ auto Op0 = GEP_T->getOperand(0)->stripPointerCasts();
+ if (Op0 == Ptr) {
+ HasSameBase = true;
+ break;
+ }
+ GEP_T = dyn_cast<GEPOperator>(Op0);
+ }
+ if (!HasSameBase)
+ return None;
+ return OffsetVal;
+ };
+
+ if (GEP1) {
+ auto Offset = getOffsetFromBase(GEP1, Ptr2);
+ if (Offset)
+ return -*Offset;
+ }
+ if (GEP2) {
+ auto Offset = getOffsetFromBase(GEP2, Ptr1);
+ if (Offset)
+ return Offset;
+ }
+
+ // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
+ // base. After that base, they may have some number of common (and
+ // potentially variable) indices. After that they handle some constant
+ // offset, which determines their offset from each other. At this point, we
+ // handle no other case.
+ if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+ return None;
+
+ // Skip any common indices and track the GEP types.
+ unsigned Idx = 1;
+ for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
+ if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
+ break;
+
+ auto Offset1 = getOffsetFromIndex(GEP1, Idx, DL);
+ auto Offset2 = getOffsetFromIndex(GEP2, Idx, DL);
+ if (!Offset1 || !Offset2)
+ return None;
+ return *Offset2 - *Offset1;
+}
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 986756eb2627..600f57ab9d71 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -56,6 +56,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
case Intrinsic::umul_fix:
+ case Intrinsic::umul_fix_sat:
case Intrinsic::sqrt: // Begin floating-point.
case Intrinsic::sin:
case Intrinsic::cos:
@@ -98,6 +99,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
case Intrinsic::umul_fix:
+ case Intrinsic::umul_fix_sat:
return (ScalarOpdIdx == 2);
default:
return false;
@@ -830,15 +832,15 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
/*Assume=*/true, /*ShouldCheckWrap=*/false);
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
- PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ PointerType *PtrTy = cast<PointerType>(Ptr->getType());
uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
// An alignment of 0 means target ABI alignment.
- unsigned Align = getLoadStoreAlignment(&I);
- if (!Align)
- Align = DL.getABITypeAlignment(PtrTy->getElementType());
+ MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I));
+ if (!Alignment)
+ Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType()));
- AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
+ AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment);
}
}
@@ -925,7 +927,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
if (!Group) {
LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
<< '\n');
- Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
+ Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
}
if (B->mayWriteToMemory())
StoreGroups.insert(Group);
@@ -964,6 +966,10 @@ void InterleavedAccessInfo::analyzeInterleaving(
// instructions that precede it.
if (isInterleaved(A)) {
InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
+
+ LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
+ "dependence between " << *A << " and "<< *B << '\n');
+
StoreGroups.remove(StoreGroup);
releaseGroup(StoreGroup);
}
@@ -1028,7 +1034,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
// Try to insert A into B's group.
- if (Group->insertMember(A, IndexA, DesA.Align)) {
+ if (Group->insertMember(A, IndexA, DesA.Alignment)) {
LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
<< " into the interleave group with" << *B
<< '\n');
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 72d2357c2933..5292b0e62744 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -622,6 +622,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(amdgpu_ps);
KEYWORD(amdgpu_cs);
KEYWORD(amdgpu_kernel);
+ KEYWORD(tailcc);
KEYWORD(cc);
KEYWORD(c);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 87dff6468f2d..594537307d00 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1122,7 +1122,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
if (ParseToken(lltok::StringConstant, "expected partition string"))
return true;
} else if (Lex.getKind() == lltok::kw_align) {
- unsigned Alignment;
+ MaybeAlign Alignment;
if (ParseOptionalAlignment(Alignment)) return true;
GV->setAlignment(Alignment);
} else if (Lex.getKind() == lltok::MetadataVar) {
@@ -1229,12 +1229,13 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
// As a hack, we allow function alignment to be initially parsed as an
// attribute on a function declaration/definition or added to an attribute
// group and later moved to the alignment field.
- unsigned Alignment;
+ MaybeAlign Alignment;
if (inAttrGrp) {
Lex.Lex();
- if (ParseToken(lltok::equal, "expected '=' here") ||
- ParseUInt32(Alignment))
+ uint32_t Value = 0;
+ if (ParseToken(lltok::equal, "expected '=' here") || ParseUInt32(Value))
return true;
+ Alignment = Align(Value);
} else {
if (ParseOptionalAlignment(Alignment))
return true;
@@ -1603,7 +1604,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
continue;
}
case lltok::kw_align: {
- unsigned Alignment;
+ MaybeAlign Alignment;
if (ParseOptionalAlignment(Alignment))
return true;
B.addAlignmentAttr(Alignment);
@@ -1720,7 +1721,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
continue;
}
case lltok::kw_align: {
- unsigned Alignment;
+ MaybeAlign Alignment;
if (ParseOptionalAlignment(Alignment))
return true;
B.addAlignmentAttr(Alignment);
@@ -1955,6 +1956,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'amdgpu_ps'
/// ::= 'amdgpu_cs'
/// ::= 'amdgpu_kernel'
+/// ::= 'tailcc'
/// ::= 'cc' UINT
///
bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
@@ -2000,6 +2002,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break;
case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break;
case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break;
+ case lltok::kw_tailcc: CC = CallingConv::Tail; break;
case lltok::kw_cc: {
Lex.Lex();
return ParseUInt32(CC);
@@ -2067,16 +2070,19 @@ bool LLParser::ParseOptionalFunctionMetadata(Function &F) {
/// ParseOptionalAlignment
/// ::= /* empty */
/// ::= 'align' 4
-bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
- Alignment = 0;
+bool LLParser::ParseOptionalAlignment(MaybeAlign &Alignment) {
+ Alignment = None;
if (!EatIfPresent(lltok::kw_align))
return false;
LocTy AlignLoc = Lex.getLoc();
- if (ParseUInt32(Alignment)) return true;
- if (!isPowerOf2_32(Alignment))
+ uint32_t Value = 0;
+ if (ParseUInt32(Value))
+ return true;
+ if (!isPowerOf2_32(Value))
return Error(AlignLoc, "alignment is not a power of two");
- if (Alignment > Value::MaximumAlignment)
+ if (Value > Value::MaximumAlignment)
return Error(AlignLoc, "huge alignments are not supported yet");
+ Alignment = Align(Value);
return false;
}
@@ -2113,7 +2119,7 @@ bool LLParser::ParseOptionalDerefAttrBytes(lltok::Kind AttrKind,
///
/// This returns with AteExtraComma set to true if it ate an excess comma at the
/// end.
-bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
+bool LLParser::ParseOptionalCommaAlign(MaybeAlign &Alignment,
bool &AteExtraComma) {
AteExtraComma = false;
while (EatIfPresent(lltok::comma)) {
@@ -2551,6 +2557,7 @@ bool LLParser::ParseOptionalOperandBundles(
///
bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
bool &isVarArg){
+ unsigned CurValID = 0;
isVarArg = false;
assert(Lex.getKind() == lltok::lparen);
Lex.Lex(); // eat the (.
@@ -2575,6 +2582,12 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
if (Lex.getKind() == lltok::LocalVar) {
Name = Lex.getStrVal();
Lex.Lex();
+ } else if (Lex.getKind() == lltok::LocalVarID) {
+ if (Lex.getUIntVal() != CurValID)
+ return Error(TypeLoc, "argument expected to be numbered '%" +
+ Twine(CurValID) + "'");
+ ++CurValID;
+ Lex.Lex();
}
if (!FunctionType::isValidArgumentType(ArgTy))
@@ -2602,6 +2615,13 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
Name = Lex.getStrVal();
Lex.Lex();
} else {
+ if (Lex.getKind() == lltok::LocalVarID) {
+ if (Lex.getUIntVal() != CurValID)
+ return Error(TypeLoc, "argument expected to be numbered '%" +
+ Twine(CurValID) + "'");
+ Lex.Lex();
+ }
+ ++CurValID;
Name = "";
}
@@ -3093,7 +3113,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
ParseToken(lltok::rbrace, "expected end of struct constant"))
return true;
- ID.ConstantStructElts = make_unique<Constant *[]>(Elts.size());
+ ID.ConstantStructElts = std::make_unique<Constant *[]>(Elts.size());
ID.UIntVal = Elts.size();
memcpy(ID.ConstantStructElts.get(), Elts.data(),
Elts.size() * sizeof(Elts[0]));
@@ -3115,7 +3135,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return true;
if (isPackedStruct) {
- ID.ConstantStructElts = make_unique<Constant *[]>(Elts.size());
+ ID.ConstantStructElts = std::make_unique<Constant *[]>(Elts.size());
memcpy(ID.ConstantStructElts.get(), Elts.data(),
Elts.size() * sizeof(Elts[0]));
ID.UIntVal = Elts.size();
@@ -5354,7 +5374,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
LocTy BuiltinLoc;
std::string Section;
std::string Partition;
- unsigned Alignment;
+ MaybeAlign Alignment;
std::string GC;
GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
unsigned AddrSpace = 0;
@@ -5471,7 +5491,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
Fn->setCallingConv(CC);
Fn->setAttributes(PAL);
Fn->setUnnamedAddr(UnnamedAddr);
- Fn->setAlignment(Alignment);
+ Fn->setAlignment(MaybeAlign(Alignment));
Fn->setSection(Section);
Fn->setPartition(Partition);
Fn->setComdat(C);
@@ -5788,7 +5808,19 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS);
case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS);
case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS);
- case lltok::kw_phi: return ParsePHI(Inst, PFS);
+ case lltok::kw_phi: {
+ FastMathFlags FMF = EatFastMathFlagsIfPresent();
+ int Res = ParsePHI(Inst, PFS);
+ if (Res != 0)
+ return Res;
+ if (FMF.any()) {
+ if (!Inst->getType()->isFPOrFPVectorTy())
+ return Error(Loc, "fast-math-flags specified for phi without "
+ "floating-point scalar or vector return type");
+ Inst->setFastMathFlags(FMF);
+ }
+ return 0;
+ }
case lltok::kw_landingpad: return ParseLandingPad(Inst, PFS);
// Call.
case lltok::kw_call: return ParseCall(Inst, PFS, CallInst::TCK_None);
@@ -6837,7 +6869,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
Value *Size = nullptr;
LocTy SizeLoc, TyLoc, ASLoc;
- unsigned Alignment = 0;
+ MaybeAlign Alignment;
unsigned AddrSpace = 0;
Type *Ty = nullptr;
@@ -6885,7 +6917,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
if (Size && !Size->getType()->isIntegerTy())
return Error(SizeLoc, "element count must have integer type");
- AllocaInst *AI = new AllocaInst(Ty, AddrSpace, Size, Alignment);
+ AllocaInst *AI =
+ new AllocaInst(Ty, AddrSpace, Size, Alignment ? Alignment->value() : 0);
AI->setUsedWithInAlloca(IsInAlloca);
AI->setSwiftError(IsSwiftError);
Inst = AI;
@@ -6898,7 +6931,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val; LocTy Loc;
- unsigned Alignment = 0;
+ MaybeAlign Alignment;
bool AteExtraComma = false;
bool isAtomic = false;
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
@@ -6947,7 +6980,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val, *Ptr; LocTy Loc, PtrLoc;
- unsigned Alignment = 0;
+ MaybeAlign Alignment;
bool AteExtraComma = false;
bool isAtomic = false;
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
@@ -8074,7 +8107,7 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
if (ParseToken(lltok::rparen, "expected ')' here"))
return true;
- auto FS = llvm::make_unique<FunctionSummary>(
+ auto FS = std::make_unique<FunctionSummary>(
GVFlags, InstCount, FFlags, /*EntryCount=*/0, std::move(Refs),
std::move(Calls), std::move(TypeIdInfo.TypeTests),
std::move(TypeIdInfo.TypeTestAssumeVCalls),
@@ -8134,7 +8167,7 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
return true;
auto GS =
- llvm::make_unique<GlobalVarSummary>(GVFlags, GVarFlags, std::move(Refs));
+ std::make_unique<GlobalVarSummary>(GVFlags, GVarFlags, std::move(Refs));
GS->setModulePath(ModulePath);
GS->setVTableFuncs(std::move(VTableFuncs));
@@ -8175,7 +8208,7 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
if (ParseToken(lltok::rparen, "expected ')' here"))
return true;
- auto AS = llvm::make_unique<AliasSummary>(GVFlags);
+ auto AS = std::make_unique<AliasSummary>(GVFlags);
AS->setModulePath(ModulePath);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 610e2e262008..abc423b4e3cd 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -281,14 +281,14 @@ namespace llvm {
void ParseOptionalVisibility(unsigned &Res);
void ParseOptionalDLLStorageClass(unsigned &Res);
bool ParseOptionalCallingConv(unsigned &CC);
- bool ParseOptionalAlignment(unsigned &Alignment);
+ bool ParseOptionalAlignment(MaybeAlign &Alignment);
bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
bool ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID,
AtomicOrdering &Ordering);
bool ParseScope(SyncScope::ID &SSID);
bool ParseOrdering(AtomicOrdering &Ordering);
bool ParseOptionalStackAlignment(unsigned &Alignment);
- bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
+ bool ParseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma);
bool ParseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc,
bool &AteExtraComma);
bool ParseOptionalCommaInAlloca(bool &IsInAlloca);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 0e9ba4db4742..f49feb2dc14d 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -168,6 +168,7 @@ enum Kind {
kw_amdgpu_ps,
kw_amdgpu_cs,
kw_amdgpu_kernel,
+ kw_tailcc,
// Attributes:
kw_attributes,
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index b13c6237f411..b7f552a6fccb 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -42,7 +42,7 @@ llvm::parseAssembly(MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context,
SlotMapping *Slots, bool UpgradeDebugInfo,
StringRef DataLayoutString) {
std::unique_ptr<Module> M =
- make_unique<Module>(F.getBufferIdentifier(), Context);
+ std::make_unique<Module>(F.getBufferIdentifier(), Context);
if (parseAssemblyInto(F, M.get(), nullptr, Err, Slots, UpgradeDebugInfo,
DataLayoutString))
@@ -71,9 +71,9 @@ ParsedModuleAndIndex llvm::parseAssemblyWithIndex(
MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context,
SlotMapping *Slots, bool UpgradeDebugInfo, StringRef DataLayoutString) {
std::unique_ptr<Module> M =
- make_unique<Module>(F.getBufferIdentifier(), Context);
+ std::make_unique<Module>(F.getBufferIdentifier(), Context);
std::unique_ptr<ModuleSummaryIndex> Index =
- make_unique<ModuleSummaryIndex>(/*HaveGVs=*/true);
+ std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/true);
if (parseAssemblyInto(F, M.get(), Index.get(), Err, Slots, UpgradeDebugInfo,
DataLayoutString))
@@ -123,7 +123,7 @@ static bool parseSummaryIndexAssemblyInto(MemoryBufferRef F,
std::unique_ptr<ModuleSummaryIndex>
llvm::parseSummaryIndexAssembly(MemoryBufferRef F, SMDiagnostic &Err) {
std::unique_ptr<ModuleSummaryIndex> Index =
- make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
+ std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
if (parseSummaryIndexAssemblyInto(F, *Index, Err))
return nullptr;
diff --git a/lib/BinaryFormat/Dwarf.cpp b/lib/BinaryFormat/Dwarf.cpp
index eb6bd33ce583..d06cccdf0dfd 100644
--- a/lib/BinaryFormat/Dwarf.cpp
+++ b/lib/BinaryFormat/Dwarf.cpp
@@ -22,7 +22,7 @@ StringRef llvm::dwarf::TagString(unsigned Tag) {
switch (Tag) {
default:
return StringRef();
-#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \
case DW_TAG_##NAME: \
return "DW_TAG_" #NAME;
#include "llvm/BinaryFormat/Dwarf.def"
@@ -31,7 +31,7 @@ StringRef llvm::dwarf::TagString(unsigned Tag) {
unsigned llvm::dwarf::getTag(StringRef TagString) {
return StringSwitch<unsigned>(TagString)
-#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \
.Case("DW_TAG_" #NAME, DW_TAG_##NAME)
#include "llvm/BinaryFormat/Dwarf.def"
.Default(DW_TAG_invalid);
@@ -41,7 +41,7 @@ unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) {
switch (Tag) {
default:
return 0;
-#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \
case DW_TAG_##NAME: \
return VERSION;
#include "llvm/BinaryFormat/Dwarf.def"
@@ -52,7 +52,7 @@ unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) {
switch (Tag) {
default:
return 0;
-#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \
case DW_TAG_##NAME: \
return DWARF_VENDOR_##VENDOR;
#include "llvm/BinaryFormat/Dwarf.def"
@@ -149,6 +149,8 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
return "DW_OP_LLVM_fragment";
case DW_OP_LLVM_tag_offset:
return "DW_OP_LLVM_tag_offset";
+ case DW_OP_LLVM_entry_value:
+ return "DW_OP_LLVM_entry_value";
}
}
@@ -160,6 +162,7 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
.Case("DW_OP_LLVM_convert", DW_OP_LLVM_convert)
.Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment)
.Case("DW_OP_LLVM_tag_offset", DW_OP_LLVM_tag_offset)
+ .Case("DW_OP_LLVM_entry_value", DW_OP_LLVM_entry_value)
.Default(0);
}
@@ -472,6 +475,17 @@ StringRef llvm::dwarf::RangeListEncodingString(unsigned Encoding) {
}
}
+StringRef llvm::dwarf::LocListEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ default:
+ return StringRef();
+#define HANDLE_DW_LLE(ID, NAME) \
+ case DW_LLE_##NAME: \
+ return "DW_LLE_" #NAME;
+#include "llvm/BinaryFormat/Dwarf.def"
+ }
+}
+
StringRef llvm::dwarf::CallFrameString(unsigned Encoding,
Triple::ArchType Arch) {
assert(Arch != llvm::Triple::ArchType::UnknownArch);
diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp
index 7dfe23690a50..bbcbbabededb 100644
--- a/lib/BinaryFormat/Magic.cpp
+++ b/lib/BinaryFormat/Magic.cpp
@@ -210,6 +210,11 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::coff_object;
break;
+ case 0x2d: // YAML '-'
+ if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
+ return file_magic::tapi_file;
+ break;
+
default:
break;
}
diff --git a/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 9c30d563a314..e70caa83c8c1 100644
--- a/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -434,6 +434,13 @@ static Expected<CurStreamTypeType> ReadSignature(BitstreamCursor &Stream) {
return std::move(Err);
if (Signature[2] == 'A' && Signature[3] == 'G')
return ClangSerializedDiagnosticsBitstream;
+ } else if (Signature[0] == 'R' && Signature[1] == 'M') {
+ if (Error Err = tryRead(Signature[2], 8))
+ return std::move(Err);
+ if (Error Err = tryRead(Signature[3], 8))
+ return std::move(Err);
+ if (Signature[2] == 'R' && Signature[3] == 'K')
+ return LLVMBitstreamRemarks;
} else {
if (Error Err = tryRead(Signature[2], 4))
return std::move(Err);
@@ -627,6 +634,9 @@ void BitcodeAnalyzer::printStats(BCDumpOptions O,
case ClangSerializedDiagnosticsBitstream:
O.OS << "Clang Serialized Diagnostics\n";
break;
+ case LLVMBitstreamRemarks:
+ O.OS << "LLVM Remarks\n";
+ break;
}
O.OS << " # Toplevel Blocks: " << NumTopBlocks << "\n";
O.OS << "\n";
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 29dc7f616392..15eead1de31a 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -722,7 +722,7 @@ private:
/// Converts alignment exponent (i.e. power of two (or zero)) to the
/// corresponding alignment to use. If alignment is too large, returns
/// a corresponding error code.
- Error parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
+ Error parseAlignmentValue(uint64_t Exponent, MaybeAlign &Alignment);
Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
Error parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false);
@@ -1063,7 +1063,7 @@ static int getDecodedUnaryOpcode(unsigned Val, Type *Ty) {
switch (Val) {
default:
return -1;
- case bitc::UNOP_NEG:
+ case bitc::UNOP_FNEG:
return IsFP ? Instruction::FNeg : -1;
}
}
@@ -1544,12 +1544,12 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
}
Error BitcodeReader::parseAlignmentValue(uint64_t Exponent,
- unsigned &Alignment) {
+ MaybeAlign &Alignment) {
// Note: Alignment in bitcode files is incremented by 1, so that zero
// can be used for default alignment.
if (Exponent > Value::MaxAlignmentExponent + 1)
return error("Invalid alignment value");
- Alignment = (1 << static_cast<unsigned>(Exponent)) >> 1;
+ Alignment = decodeMaybeAlign(Exponent);
return Error::success();
}
@@ -2377,6 +2377,8 @@ Error BitcodeReader::parseConstants() {
CurTy = flattenPointerTypes(CurFullTy);
continue; // Skip the ValueList manipulation.
case bitc::CST_CODE_NULL: // NULL
+ if (CurTy->isVoidTy() || CurTy->isFunctionTy() || CurTy->isLabelTy())
+ return error("Invalid type for a constant null value");
V = Constant::getNullValue(CurTy);
break;
case bitc::CST_CODE_INTEGER: // INTEGER: [intval]
@@ -3110,7 +3112,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
uint64_t RawLinkage = Record[3];
GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
- unsigned Alignment;
+ MaybeAlign Alignment;
if (Error Err = parseAlignmentValue(Record[4], Alignment))
return Err;
std::string Section;
@@ -3241,7 +3243,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
Context, getPointerElementFlatType(PTy)));
}
- unsigned Alignment;
+ MaybeAlign Alignment;
if (Error Err = parseAlignmentValue(Record[5], Alignment))
return Err;
Func->setAlignment(Alignment);
@@ -3646,6 +3648,11 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
break;
}
Record.clear();
+
+ // Upgrade data layout string.
+ std::string DL = llvm::UpgradeDataLayoutString(
+ TheModule->getDataLayoutStr(), TheModule->getTargetTriple());
+ TheModule->setDataLayout(DL);
}
}
@@ -4622,31 +4629,48 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
- if (Record.size() < 1 || ((Record.size()-1)&1))
+ if (Record.size() < 1)
return error("Invalid record");
+ // The first record specifies the type.
FullTy = getFullyStructuredTypeByID(Record[0]);
Type *Ty = flattenPointerTypes(FullTy);
if (!Ty)
return error("Invalid record");
- PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
+ // Phi arguments are pairs of records of [value, basic block].
+ // There is an optional final record for fast-math-flags if this phi has a
+ // floating-point type.
+ size_t NumArgs = (Record.size() - 1) / 2;
+ if ((Record.size() - 1) % 2 == 1 && !Ty->isFPOrFPVectorTy())
+ return error("Invalid record");
+
+ PHINode *PN = PHINode::Create(Ty, NumArgs);
InstructionList.push_back(PN);
- for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
+ for (unsigned i = 0; i != NumArgs; i++) {
Value *V;
// With the new function encoding, it is possible that operands have
// negative IDs (for forward references). Use a signed VBR
// representation to keep the encoding small.
if (UseRelativeIDs)
- V = getValueSigned(Record, 1+i, NextValueNo, Ty);
+ V = getValueSigned(Record, i * 2 + 1, NextValueNo, Ty);
else
- V = getValue(Record, 1+i, NextValueNo, Ty);
- BasicBlock *BB = getBasicBlock(Record[2+i]);
+ V = getValue(Record, i * 2 + 1, NextValueNo, Ty);
+ BasicBlock *BB = getBasicBlock(Record[i * 2 + 2]);
if (!V || !BB)
return error("Invalid record");
PN->addIncoming(V, BB);
}
I = PN;
+
+ // If there are an even number of records, the final record must be FMF.
+ if (Record.size() % 2 == 0) {
+ assert(isa<FPMathOperator>(I) && "Unexpected phi type");
+ FastMathFlags FMF = getDecodedFastMathFlags(Record[Record.size() - 1]);
+ if (FMF.any())
+ I->setFastMathFlags(FMF);
+ }
+
break;
}
@@ -4726,7 +4750,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
Type *OpTy = getTypeByID(Record[1]);
Value *Size = getFnValueByID(Record[2], OpTy);
- unsigned Align;
+ MaybeAlign Align;
if (Error Err = parseAlignmentValue(AlignRecord & ~FlagMask, Align)) {
return Err;
}
@@ -4737,7 +4761,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
const DataLayout &DL = TheModule->getDataLayout();
unsigned AS = DL.getAllocaAddrSpace();
- AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align);
+ AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align ? Align->value() : 0);
AI->setUsedWithInAlloca(InAlloca);
AI->setSwiftError(SwiftError);
I = AI;
@@ -4765,7 +4789,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
return Err;
- unsigned Align;
+ MaybeAlign Align;
if (Error Err = parseAlignmentValue(Record[OpNum], Align))
return Err;
I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align);
@@ -4802,7 +4826,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Invalid record");
SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
- unsigned Align;
+ MaybeAlign Align;
if (Error Err = parseAlignmentValue(Record[OpNum], Align))
return Err;
I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align, Ordering, SSID);
@@ -4824,10 +4848,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
return Err;
- unsigned Align;
+ MaybeAlign Align;
if (Error Err = parseAlignmentValue(Record[OpNum], Align))
return Err;
- I = new StoreInst(Val, Ptr, Record[OpNum+1], Align);
+ I = new StoreInst(Val, Ptr, Record[OpNum + 1], Align);
InstructionList.push_back(I);
break;
}
@@ -4857,10 +4881,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0)
return error("Invalid record");
- unsigned Align;
+ MaybeAlign Align;
if (Error Err = parseAlignmentValue(Record[OpNum], Align))
return Err;
- I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SSID);
+ I = new StoreInst(Val, Ptr, Record[OpNum + 1], Align, Ordering, SSID);
InstructionList.push_back(I);
break;
}
@@ -5312,7 +5336,7 @@ Error BitcodeReader::materializeModule() {
UpgradeModuleFlags(*TheModule);
- UpgradeRetainReleaseMarker(*TheModule);
+ UpgradeARCRuntime(*TheModule);
return Error::success();
}
@@ -5874,7 +5898,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
IsOldProfileFormat, HasProfile, HasRelBF);
setSpecialRefs(Refs, NumRORefs, NumWORefs);
- auto FS = llvm::make_unique<FunctionSummary>(
+ auto FS = std::make_unique<FunctionSummary>(
Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0,
std::move(Refs), std::move(Calls), std::move(PendingTypeTests),
std::move(PendingTypeTestAssumeVCalls),
@@ -5900,7 +5924,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
uint64_t RawFlags = Record[1];
unsigned AliaseeID = Record[2];
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
- auto AS = llvm::make_unique<AliasSummary>(Flags);
+ auto AS = std::make_unique<AliasSummary>(Flags);
// The module path string ref set in the summary must be owned by the
// index's module string table. Since we don't have a module path
// string table section in the per-module index, we create a single
@@ -5934,7 +5958,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
std::vector<ValueInfo> Refs =
makeRefList(ArrayRef<uint64_t>(Record).slice(RefArrayStart));
auto FS =
- llvm::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
+ std::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
FS->setModulePath(getThisModule()->first());
auto GUID = getValueInfoFromValueId(ValueID);
FS->setOriginalName(GUID.second);
@@ -5961,7 +5985,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
VTableFuncs.push_back({Callee, Offset});
}
auto VS =
- llvm::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
+ std::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
VS->setModulePath(getThisModule()->first());
VS->setVTableFuncs(VTableFuncs);
auto GUID = getValueInfoFromValueId(ValueID);
@@ -6019,7 +6043,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
IsOldProfileFormat, HasProfile, false);
ValueInfo VI = getValueInfoFromValueId(ValueID).first;
setSpecialRefs(Refs, NumRORefs, NumWORefs);
- auto FS = llvm::make_unique<FunctionSummary>(
+ auto FS = std::make_unique<FunctionSummary>(
Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount,
std::move(Refs), std::move(Edges), std::move(PendingTypeTests),
std::move(PendingTypeTestAssumeVCalls),
@@ -6046,7 +6070,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
uint64_t RawFlags = Record[2];
unsigned AliaseeValueId = Record[3];
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
- auto AS = llvm::make_unique<AliasSummary>(Flags);
+ auto AS = std::make_unique<AliasSummary>(Flags);
LastSeenSummary = AS.get();
AS->setModulePath(ModuleIdMap[ModuleId]);
@@ -6075,7 +6099,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
std::vector<ValueInfo> Refs =
makeRefList(ArrayRef<uint64_t>(Record).slice(RefArrayStart));
auto FS =
- llvm::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
+ std::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
LastSeenSummary = FS.get();
FS->setModulePath(ModuleIdMap[ModuleId]);
ValueInfo VI = getValueInfoFromValueId(ValueID).first;
@@ -6438,7 +6462,7 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
Context);
std::unique_ptr<Module> M =
- llvm::make_unique<Module>(ModuleIdentifier, Context);
+ std::make_unique<Module>(ModuleIdentifier, Context);
M->setMaterializer(R);
// Delay parsing Metadata if ShouldLazyLoadMetadata is true.
@@ -6485,7 +6509,7 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
return std::move(JumpFailed);
- auto Index = llvm::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
+ auto Index = std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index,
ModuleIdentifier, 0);
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index 108f71189585..4da51dda8b74 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -515,7 +515,7 @@ class MetadataLoader::MetadataLoaderImpl {
GV.getMetadata(LLVMContext::MD_dbg, MDs);
GV.eraseMetadata(LLVMContext::MD_dbg);
for (auto *MD : MDs)
- if (auto *DGV = dyn_cast_or_null<DIGlobalVariable>(MD)) {
+ if (auto *DGV = dyn_cast<DIGlobalVariable>(MD)) {
auto *DGVE = DIGlobalVariableExpression::getDistinct(
Context, DGV, DIExpression::get(Context, {}));
GV.addMetadata(LLVMContext::MD_dbg, *DGVE);
@@ -987,7 +987,7 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(
assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString");
// Lookup first if the metadata hasn't already been loaded.
if (auto *MD = MetadataList.lookup(ID)) {
- auto *N = dyn_cast_or_null<MDNode>(MD);
+ auto *N = cast<MDNode>(MD);
if (!N->isTemporary())
return;
}
@@ -2133,7 +2133,7 @@ MetadataLoader::MetadataLoader(BitstreamCursor &Stream, Module &TheModule,
BitcodeReaderValueList &ValueList,
bool IsImporting,
std::function<Type *(unsigned)> getTypeByID)
- : Pimpl(llvm::make_unique<MetadataLoaderImpl>(
+ : Pimpl(std::make_unique<MetadataLoaderImpl>(
Stream, TheModule, ValueList, std::move(getTypeByID), IsImporting)) {}
Error MetadataLoader::parseMetadata(bool ModuleLevel) {
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 76ca89147e52..be59c1f92836 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
std::error_code EC;
- raw_fd_ostream OS(Path, EC, sys::fs::F_None);
+ raw_fd_ostream OS(Path, EC, sys::fs::OF_None);
if (EC)
return -1;
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 5c7b970a3a75..deb4019ea8ba 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -86,7 +86,7 @@ static cl::opt<unsigned>
cl::desc("Number of metadatas above which we emit an index "
"to enable lazy-loading"));
-cl::opt<bool> WriteRelBFToSummary(
+static cl::opt<bool> WriteRelBFToSummary(
"write-relbf-to-summary", cl::Hidden, cl::init(false),
cl::desc("Write relative block frequency to function summary "));
@@ -520,7 +520,7 @@ static unsigned getEncodedCastOpcode(unsigned Opcode) {
static unsigned getEncodedUnaryOpcode(unsigned Opcode) {
switch (Opcode) {
default: llvm_unreachable("Unknown binary instruction!");
- case Instruction::FNeg: return bitc::UNOP_NEG;
+ case Instruction::FNeg: return bitc::UNOP_FNEG;
}
}
@@ -2880,6 +2880,11 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
pushValueSigned(PN.getIncomingValue(i), InstID, Vals64);
Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i)));
}
+
+ uint64_t Flags = getOptimizationFlags(&I);
+ if (Flags != 0)
+ Vals64.push_back(Flags);
+
// Emit a Vals64 vector and exit.
Stream.EmitRecord(Code, Vals64, AbbrevToUse);
Vals64.clear();
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 444f618d8b8c..f64b775a8b77 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -232,7 +232,7 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI,
if (!MO.isReg() || !MO.isImplicit())
return false;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0)
return false;
@@ -252,7 +252,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(
if (!MO.isReg()) continue;
if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) ||
IsImplicitDefUse(MI, MO)) {
- const unsigned Reg = MO.getReg();
+ const Register Reg = MO.getReg();
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
PassthruRegs.insert(*SubRegs);
@@ -365,7 +365,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
@@ -375,7 +375,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g"
@@ -418,7 +418,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
// Ignore KILLs and passthru registers for liveness...
if (MI.isKill() || (PassthruRegs.count(Reg) != 0))
@@ -471,7 +471,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g"
@@ -506,7 +506,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
if (FirstReg != 0) {
@@ -790,7 +790,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
CriticalPathSU = SU;
}
}
-
+ assert(CriticalPathSU && "Failed to find SUnit critical path");
CriticalPathMI = CriticalPathSU->getInstr();
}
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index d158e70b86ac..4f24f077d120 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -309,7 +309,8 @@ static const Value *getNoopInput(const Value *V,
NoopInput = Op;
} else if (isa<TruncInst>(I) &&
TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
- DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
+ DataBits = std::min((uint64_t)DataBits,
+ I->getType()->getPrimitiveSizeInBits().getFixedSize());
NoopInput = Op;
} else if (auto CS = ImmutableCallSite(I)) {
const Value *ReturnedOp = CS.getReturnedArgOperand();
@@ -523,7 +524,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
- (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term)))
+ ((!TM.Options.GuaranteedTailCallOpt &&
+ CS.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term)))
return false;
// If I will have a chain, make sure no other instruction that will have a
@@ -536,9 +538,11 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
// Debug info intrinsics do not get in the way of tail call optimization.
if (isa<DbgInfoIntrinsic>(BBI))
continue;
- // A lifetime end intrinsic should not stop tail call optimization.
+ // A lifetime end or assume intrinsic should not stop tail call
+ // optimization.
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
- if (II->getIntrinsicID() == Intrinsic::lifetime_end)
+ if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
+ II->getIntrinsicID() == Intrinsic::assume)
continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
!isSafeToSpeculativelyExecute(&*BBI))
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 54f6cc2d5571..73c53d6c4af5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -91,10 +91,12 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
@@ -159,30 +161,30 @@ static gcp_map_type &getGCMap(void *&P) {
return *(gcp_map_type*)P;
}
-/// getGVAlignmentLog2 - Return the alignment to use for the specified global
-/// value in log2 form. This rounds up to the preferred alignment if possible
-/// and legal.
-static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
- unsigned InBits = 0) {
- unsigned NumBits = 0;
+/// getGVAlignment - Return the alignment to use for the specified global
+/// value. This rounds up to the preferred alignment if possible and legal.
+Align AsmPrinter::getGVAlignment(const GlobalValue *GV, const DataLayout &DL,
+ Align InAlign) {
+ Align Alignment;
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- NumBits = DL.getPreferredAlignmentLog(GVar);
+ Alignment = Align(DL.getPreferredAlignment(GVar));
- // If InBits is specified, round it to it.
- if (InBits > NumBits)
- NumBits = InBits;
+ // If InAlign is specified, round it to it.
+ if (InAlign > Alignment)
+ Alignment = InAlign;
// If the GV has a specified alignment, take it into account.
- if (GV->getAlignment() == 0)
- return NumBits;
+ const MaybeAlign GVAlign(GV->getAlignment());
+ if (!GVAlign)
+ return Alignment;
- unsigned GVAlign = Log2_32(GV->getAlignment());
+ assert(GVAlign && "GVAlign must be set");
// If the GVAlign is larger than NumBits, or if we are required to obey
// NumBits because the GV has an assigned section, obey it.
- if (GVAlign > NumBits || GV->hasSection())
- NumBits = GVAlign;
- return NumBits;
+ if (*GVAlign > Alignment || GV->hasSection())
+ Alignment = *GVAlign;
+ return Alignment;
}
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
@@ -248,13 +250,14 @@ const MCSection *AsmPrinter::getCurrentSection() const {
void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
}
bool AsmPrinter::doInitialization(Module &M) {
- MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
// Initialize TargetLoweringObjectFile.
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
@@ -311,7 +314,7 @@ bool AsmPrinter::doInitialization(Module &M) {
if (MAI->doesSupportDebugInformation()) {
bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
if (EmitCodeView && TM.getTargetTriple().isOSWindows()) {
- Handlers.emplace_back(llvm::make_unique<CodeViewDebug>(this),
+ Handlers.emplace_back(std::make_unique<CodeViewDebug>(this),
DbgTimerName, DbgTimerDescription,
CodeViewLineTablesGroupName,
CodeViewLineTablesGroupDescription);
@@ -380,7 +383,7 @@ bool AsmPrinter::doInitialization(Module &M) {
if (mdconst::extract_or_null<ConstantInt>(
MMI->getModule()->getModuleFlag("cfguardtable")))
- Handlers.emplace_back(llvm::make_unique<WinCFGuard>(this), CFGuardName,
+ Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName,
CFGuardDescription, DWARFGroupName,
DWARFGroupDescription);
@@ -425,7 +428,10 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
return;
case GlobalValue::PrivateLinkage:
+ return;
case GlobalValue::InternalLinkage:
+ if (MAI->hasDotLGloblDirective())
+ OutStreamer->EmitSymbolAttribute(GVSym, MCSA_LGlobal);
return;
case GlobalValue::AppendingLinkage:
case GlobalValue::AvailableExternallyLinkage:
@@ -501,7 +507,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// If the alignment is specified, we *must* obey it. Overaligning a global
// with a specified alignment is a prompt way to break globals emitted to
// sections and expected to be contiguous (e.g. ObjC metadata).
- unsigned AlignLog = getGVAlignmentLog2(GV, DL);
+ const Align Alignment = getGVAlignment(GV, DL);
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
@@ -513,12 +519,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Handle common symbols
if (GVKind.isCommon()) {
if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
- unsigned Align = 1 << AlignLog;
- if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
- Align = 0;
-
// .comm _foo, 42, 4
- OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ const bool SupportsAlignment =
+ getObjFileLowering().getCommDirectiveSupportsAlignment();
+ OutStreamer->EmitCommonSymbol(GVSym, Size,
+ SupportsAlignment ? Alignment.value() : 0);
return;
}
@@ -531,10 +536,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
TheSection->isVirtualSection()) {
if (Size == 0)
Size = 1; // zerofill of 0 bytes is undefined.
- unsigned Align = 1 << AlignLog;
EmitLinkage(GV, GVSym);
// .zerofill __DATA, __bss, _foo, 400, 5
- OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align);
+ OutStreamer->EmitZerofill(TheSection, GVSym, Size, Alignment.value());
return;
}
@@ -544,7 +548,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
getObjFileLowering().getBSSSection() == TheSection) {
if (Size == 0)
Size = 1; // .comm Foo, 0 is undefined, avoid it.
- unsigned Align = 1 << AlignLog;
// Use .lcomm only if it supports user-specified alignment.
// Otherwise, while it would still be correct to use .lcomm in some
@@ -554,17 +557,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Prefer to simply fall back to .local / .comm in this case.
if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
// .lcomm _foo, 42
- OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Align);
+ OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Alignment.value());
return;
}
- if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
- Align = 0;
-
// .local _foo
OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local);
// .comm _foo, 42, 4
- OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ const bool SupportsAlignment =
+ getObjFileLowering().getCommDirectiveSupportsAlignment();
+ OutStreamer->EmitCommonSymbol(GVSym, Size,
+ SupportsAlignment ? Alignment.value() : 0);
return;
}
@@ -585,11 +588,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GVKind.isThreadBSS()) {
TheSection = getObjFileLowering().getTLSBSSSection();
- OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+ OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, Alignment.value());
} else if (GVKind.isThreadData()) {
OutStreamer->SwitchSection(TheSection);
- EmitAlignment(AlignLog, GV);
+ EmitAlignment(Alignment, GV);
OutStreamer->EmitLabel(MangSym);
EmitGlobalConstant(GV->getParent()->getDataLayout(),
@@ -625,7 +628,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->SwitchSection(TheSection);
EmitLinkage(GV, EmittedInitSym);
- EmitAlignment(AlignLog, GV);
+ EmitAlignment(Alignment, GV);
OutStreamer->EmitLabel(EmittedInitSym);
@@ -664,6 +667,10 @@ void AsmPrinter::EmitFunctionHeader() {
OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM));
EmitVisibility(CurrentFnSym, F.getVisibility());
+ if (MAI->needsFunctionDescriptors() &&
+ F.getLinkage() != GlobalValue::InternalLinkage)
+ EmitLinkage(&F, CurrentFnDescSym);
+
EmitLinkage(&F, CurrentFnSym);
if (MAI->hasFunctionAlignment())
EmitAlignment(MF->getAlignment(), &F);
@@ -699,8 +706,13 @@ void AsmPrinter::EmitFunctionHeader() {
}
}
- // Emit the CurrentFnSym. This is a virtual function to allow targets to
- // do their wild and crazy things as required.
+ // Emit the function descriptor. This is a virtual function to allow targets
+ // to emit their specific function descriptor.
+ if (MAI->needsFunctionDescriptors())
+ EmitFunctionDescriptor();
+
+ // Emit the CurrentFnSym. This is a virtual function to allow targets to do
+ // their wild and crazy things as required.
EmitFunctionEntryLabel();
// If the function had address-taken blocks that got deleted, then we have
@@ -783,7 +795,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
/// emitImplicitDef - This method emits the specified machine instruction
/// that is an implicit def.
void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
- unsigned RegNo = MI->getOperand(0).getReg();
+ Register RegNo = MI->getOperand(0).getReg();
SmallString<128> Str;
raw_svector_ostream OS(Str);
@@ -910,7 +922,8 @@ static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << "DEBUG_LABEL: ";
const DILabel *V = MI->getDebugLabel();
- if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) {
+ if (auto *SP = dyn_cast<DISubprogram>(
+ V->getScope()->getNonLexicalBlockFileScope())) {
StringRef Name = SP->getName();
if (!Name.empty())
OS << Name << ":";
@@ -1024,7 +1037,7 @@ void AsmPrinter::EmitFunctionBody() {
// Get MachineDominatorTree or compute it on the fly if it's unavailable
MDT = getAnalysisIfAvailable<MachineDominatorTree>();
if (!MDT) {
- OwnedMDT = make_unique<MachineDominatorTree>();
+ OwnedMDT = std::make_unique<MachineDominatorTree>();
OwnedMDT->getBase().recalculate(*MF);
MDT = OwnedMDT.get();
}
@@ -1032,7 +1045,7 @@ void AsmPrinter::EmitFunctionBody() {
// Get MachineLoopInfo or compute it on the fly if it's unavailable
MLI = getAnalysisIfAvailable<MachineLoopInfo>();
if (!MLI) {
- OwnedMLI = make_unique<MachineLoopInfo>();
+ OwnedMLI = std::make_unique<MachineLoopInfo>();
OwnedMLI->getBase().analyze(MDT->getBase());
MLI = OwnedMLI.get();
}
@@ -1052,9 +1065,13 @@ void AsmPrinter::EmitFunctionBody() {
++NumInstsInFunction;
}
- // If there is a pre-instruction symbol, emit a label for it here.
+ // If there is a pre-instruction symbol, emit a label for it here. If the
+ // instruction was duplicated and the label has already been emitted,
+ // don't re-emit the same label.
+ // FIXME: Consider strengthening that to an assertion.
if (MCSymbol *S = MI.getPreInstrSymbol())
- OutStreamer->EmitLabel(S);
+ if (S->isUndefined())
+ OutStreamer->EmitLabel(S);
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
@@ -1107,9 +1124,13 @@ void AsmPrinter::EmitFunctionBody() {
break;
}
- // If there is a post-instruction symbol, emit a label for it here.
+ // If there is a post-instruction symbol, emit a label for it here. If
+ // the instruction was duplicated and the label has already been emitted,
+ // don't re-emit the same label.
+ // FIXME: Consider strengthening that to an assertion.
if (MCSymbol *S = MI.getPostInstrSymbol())
- OutStreamer->EmitLabel(S);
+ if (S->isUndefined())
+ OutStreamer->EmitLabel(S);
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
@@ -1313,11 +1334,10 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
- if (IsFunction) {
- OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
- if (isa<GlobalIFunc>(GIS))
- OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
- }
+ if (IsFunction)
+ OutStreamer->EmitSymbolAttribute(Name, isa<GlobalIFunc>(GIS)
+ ? MCSA_ELF_TypeIndFunction
+ : MCSA_ELF_TypeFunction);
EmitVisibility(Name, GIS.getVisibility());
@@ -1349,60 +1369,28 @@ void AsmPrinter::emitRemarksSection(Module &M) {
RemarkStreamer *RS = M.getContext().getRemarkStreamer();
if (!RS)
return;
- const remarks::Serializer &Serializer = RS->getSerializer();
+ remarks::RemarkSerializer &RemarkSerializer = RS->getSerializer();
+
+ Optional<SmallString<128>> Filename;
+ if (Optional<StringRef> FilenameRef = RS->getFilename()) {
+ Filename = *FilenameRef;
+ sys::fs::make_absolute(*Filename);
+ assert(!Filename->empty() && "The filename can't be empty.");
+ }
+
+ std::string Buf;
+ raw_string_ostream OS(Buf);
+ std::unique_ptr<remarks::MetaSerializer> MetaSerializer =
+ Filename ? RemarkSerializer.metaSerializer(OS, StringRef(*Filename))
+ : RemarkSerializer.metaSerializer(OS);
+ MetaSerializer->emit();
// Switch to the right section: .remarks/__remarks.
MCSection *RemarksSection =
OutContext.getObjectFileInfo()->getRemarksSection();
OutStreamer->SwitchSection(RemarksSection);
- // Emit the magic number.
- OutStreamer->EmitBytes(remarks::Magic);
- // Explicitly emit a '\0'.
- OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
-
- // Emit the version number: little-endian uint64_t.
- // The version number is located at the offset 0x0 in the section.
- std::array<char, 8> Version;
- support::endian::write64le(Version.data(), remarks::Version);
- OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size()));
-
- // Emit the string table in the section.
- // Note: we need to use the streamer here to emit it in the section. We can't
- // just use the serialize function with a raw_ostream because of the way
- // MCStreamers work.
- uint64_t StrTabSize =
- Serializer.StrTab ? Serializer.StrTab->SerializedSize : 0;
- // Emit the total size of the string table (the size itself excluded):
- // little-endian uint64_t.
- // The total size is located after the version number.
- // Note: even if no string table is used, emit 0.
- std::array<char, 8> StrTabSizeBuf;
- support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
- OutStreamer->EmitBinaryData(
- StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
-
- if (const Optional<remarks::StringTable> &StrTab = Serializer.StrTab) {
- std::vector<StringRef> StrTabStrings = StrTab->serialize();
- // Emit a list of null-terminated strings.
- // Note: the order is important here: the ID used in the remarks corresponds
- // to the position of the string in the section.
- for (StringRef Str : StrTabStrings) {
- OutStreamer->EmitBytes(Str);
- // Explicitly emit a '\0'.
- OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
- }
- }
-
- // Emit the null-terminated absolute path to the remark file.
- // The path is located at the offset 0x4 in the section.
- StringRef FilenameRef = RS->getFilename();
- SmallString<128> Filename = FilenameRef;
- sys::fs::make_absolute(Filename);
- assert(!Filename.empty() && "The filename can't be empty.");
- OutStreamer->EmitBytes(Filename);
- // Explicitly emit a '\0'.
- OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+ OutStreamer->EmitBinaryData(OS.str());
}
bool AsmPrinter::doFinalization(Module &M) {
@@ -1455,7 +1443,7 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer->SwitchSection(TLOF.getDataSection());
const DataLayout &DL = M.getDataLayout();
- EmitAlignment(Log2_32(DL.getPointerSize()));
+ EmitAlignment(Align(DL.getPointerSize()));
for (const auto &Stub : Stubs) {
OutStreamer->EmitLabel(Stub.first);
OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
@@ -1482,7 +1470,7 @@ bool AsmPrinter::doFinalization(Module &M) {
COFF::IMAGE_SCN_LNK_COMDAT,
SectionKind::getReadOnly(), Stub.first->getName(),
COFF::IMAGE_COMDAT_SELECT_ANY));
- EmitAlignment(Log2_32(DL.getPointerSize()));
+ EmitAlignment(Align(DL.getPointerSize()));
OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global);
OutStreamer->EmitLabel(Stub.first);
OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
@@ -1607,8 +1595,7 @@ bool AsmPrinter::doFinalization(Module &M) {
"expected llvm.used to be an array type");
if (const auto *A = cast<ConstantArray>(LU->getInitializer())) {
for (const Value *Op : A->operands()) {
- const auto *GV =
- cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases());
+ const auto *GV = cast<GlobalValue>(Op->stripPointerCasts());
// Global symbols with internal or private linkage are not visible to
// the linker, and thus would cause an error when the linker tried to
// preserve the symbol due to the `/include:` directive.
@@ -1679,8 +1666,27 @@ MCSymbol *AsmPrinter::getCurExceptionSym() {
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
+
// Get the function symbol.
- CurrentFnSym = getSymbol(&MF.getFunction());
+ if (MAI->needsFunctionDescriptors()) {
+ assert(TM.getTargetTriple().isOSAIX() && "Function descriptor is only"
+ " supported on AIX.");
+ assert(CurrentFnDescSym && "The function descriptor symbol needs to be"
+ " initalized first.");
+
+ // Get the function entry point symbol.
+ CurrentFnSym =
+ OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName());
+
+ const Function &F = MF.getFunction();
+ MCSectionXCOFF *FnEntryPointSec =
+ cast<MCSectionXCOFF>(getObjFileLowering().SectionForGlobal(&F, TM));
+ // Set the containing csect.
+ cast<MCSymbolXCOFF>(CurrentFnSym)->setContainingCsect(FnEntryPointSec);
+ } else {
+ CurrentFnSym = getSymbol(&MF.getFunction());
+ }
+
CurrentFnSymForSize = CurrentFnSym;
CurrentFnBegin = nullptr;
CurExceptionSym = nullptr;
@@ -1765,7 +1771,7 @@ void AsmPrinter::EmitConstantPool() {
if (CurSection != CPSections[i].S) {
OutStreamer->SwitchSection(CPSections[i].S);
- EmitAlignment(Log2_32(CPSections[i].Alignment));
+ EmitAlignment(Align(CPSections[i].Alignment));
CurSection = CPSections[i].S;
Offset = 0;
}
@@ -1812,7 +1818,7 @@ void AsmPrinter::EmitJumpTableInfo() {
OutStreamer->SwitchSection(ReadOnlySection);
}
- EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL)));
+ EmitAlignment(Align(MJTI->getEntryAlignment(DL)));
// Jump tables in code sections are marked with a data_region directive
// where that's supported.
@@ -2025,10 +2031,10 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
}
// Emit the function pointers in the target-specific order
- unsigned Align = Log2_32(DL.getPointerPrefAlignment());
llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) {
return L.Priority < R.Priority;
});
+ const Align Align = DL.getPointerPrefAlignment();
for (Structor &S : Structors) {
const TargetLoweringObjectFile &Obj = getObjFileLowering();
const MCSymbol *KeySym = nullptr;
@@ -2149,23 +2155,20 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
//===----------------------------------------------------------------------===//
// EmitAlignment - Emit an alignment directive to the specified power of
-// two boundary. For example, if you pass in 3 here, you will get an 8
-// byte alignment. If a global value is specified, and if that global has
+// two boundary. If a global value is specified, and if that global has
// an explicit alignment requested, it will override the alignment request
// if required for correctness.
-void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
+void AsmPrinter::EmitAlignment(Align Alignment, const GlobalObject *GV) const {
if (GV)
- NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits);
+ Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment);
- if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
+ if (Alignment == Align::None())
+ return; // 1-byte aligned: no need to emit alignment.
- assert(NumBits <
- static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
- "undefined behavior");
if (getCurrentSection()->getKind().isText())
- OutStreamer->EmitCodeAlignment(1u << NumBits);
+ OutStreamer->EmitCodeAlignment(Alignment.value());
else
- OutStreamer->EmitValueToAlignment(1u << NumBits);
+ OutStreamer->EmitValueToAlignment(Alignment.value());
}
//===----------------------------------------------------------------------===//
@@ -2481,6 +2484,7 @@ static void emitGlobalConstantStruct(const DataLayout &DL,
}
static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) {
+ assert(ET && "Unknown float type");
APInt API = APF.bitcastToAPInt();
// First print a comment with what we think the original floating-point value
@@ -2488,11 +2492,7 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) {
if (AP.isVerbose()) {
SmallString<8> StrVal;
APF.toString(StrVal);
-
- if (ET)
- ET->print(AP.OutStreamer->GetCommentOS());
- else
- AP.OutStreamer->GetCommentOS() << "Printing <null> Type";
+ ET->print(AP.OutStreamer->GetCommentOS());
AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n';
}
@@ -2670,7 +2670,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0));
const MCSymbol *FinalSym = AP.getSymbol(FinalGV);
*ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(
- FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer);
+ FinalGV, FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer);
// Update GOT equivalent usage information
--NumUses;
@@ -2930,7 +2930,7 @@ void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
/// EmitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
-void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
// End the previous funclet and start a new one.
if (MBB.isEHFuncletEntry()) {
for (const HandlerInfo &HI : Handlers) {
@@ -2940,8 +2940,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
}
// Emit an alignment directive for this block, if needed.
- if (unsigned Align = MBB.getAlignment())
- EmitAlignment(Align);
+ const Align Alignment = MBB.getAlignment();
+ if (Alignment != Align::None())
+ EmitAlignment(Alignment);
MCCodePaddingContext Context;
setupCodePaddingContext(MBB, Context);
OutStreamer->EmitCodePaddingBasicBlockStart(Context);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 7721e996aca5..420df26a2b8b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -72,7 +72,7 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr,
const MDNode *LocMDNode) const {
if (!DiagInfo) {
- DiagInfo = make_unique<SrcMgrDiagInfo>();
+ DiagInfo = std::make_unique<SrcMgrDiagInfo>();
MCContext &Context = MMI->getContext();
Context.setInlineSourceManager(&DiagInfo->SrcMgr);
@@ -432,6 +432,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
Sym->print(OS, AP->MAI);
+ MMI->getContext().registerInlineAsmLabel(Sym);
} else if (MI->getOperand(OpNo).isMBB()) {
const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
Sym->print(OS, AP->MAI);
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index db2ff458eb2e..09f7496cd4ef 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -73,18 +73,18 @@ class HashingByteStreamer final : public ByteStreamer {
class BufferByteStreamer final : public ByteStreamer {
private:
SmallVectorImpl<char> &Buffer;
- SmallVectorImpl<std::string> &Comments;
+ std::vector<std::string> &Comments;
+public:
/// Only verbose textual output needs comments. This will be set to
/// true for that case, and false otherwise. If false, comments passed in to
/// the emit methods will be ignored.
- bool GenerateComments;
+ const bool GenerateComments;
-public:
BufferByteStreamer(SmallVectorImpl<char> &Buffer,
- SmallVectorImpl<std::string> &Comments,
- bool GenerateComments)
- : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {}
+ std::vector<std::string> &Comments, bool GenerateComments)
+ : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {
+ }
void EmitInt8(uint8_t Byte, const Twine &Comment) override {
Buffer.push_back(Byte);
if (GenerateComments)
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 932959c311fa..c6457f3626d1 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -98,7 +98,8 @@ using namespace llvm::codeview;
namespace {
class CVMCAdapter : public CodeViewRecordStreamer {
public:
- CVMCAdapter(MCStreamer &OS) : OS(&OS) {}
+ CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable)
+ : OS(&OS), TypeTable(TypeTable) {}
void EmitBytes(StringRef Data) { OS->EmitBytes(Data); }
@@ -110,8 +111,24 @@ public:
void AddComment(const Twine &T) { OS->AddComment(T); }
+ void AddRawComment(const Twine &T) { OS->emitRawComment(T); }
+
+ bool isVerboseAsm() { return OS->isVerboseAsm(); }
+
+ std::string getTypeName(TypeIndex TI) {
+ std::string TypeName;
+ if (!TI.isNoneType()) {
+ if (TI.isSimple())
+ TypeName = TypeIndex::simpleTypeName(TI);
+ else
+ TypeName = TypeTable.getTypeName(TI);
+ }
+ return TypeName;
+ }
+
private:
MCStreamer *OS = nullptr;
+ TypeCollection &TypeTable;
};
} // namespace
@@ -617,13 +634,6 @@ emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
OS.EmitBytes(NullTerminatedString);
}
-static StringRef getTypeLeafName(TypeLeafKind TypeKind) {
- for (const EnumEntry<TypeLeafKind> &EE : getTypeLeafNames())
- if (EE.Value == TypeKind)
- return EE.Name;
- return "";
-}
-
void CodeViewDebug::emitTypeInformation() {
if (TypeTable.empty())
return;
@@ -632,30 +642,11 @@ void CodeViewDebug::emitTypeInformation() {
OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
emitCodeViewMagicVersion();
- SmallString<8> CommentPrefix;
- if (OS.isVerboseAsm()) {
- CommentPrefix += '\t';
- CommentPrefix += Asm->MAI->getCommentString();
- CommentPrefix += ' ';
- }
-
TypeTableCollection Table(TypeTable.records());
- SmallString<512> CommentBlock;
- raw_svector_ostream CommentOS(CommentBlock);
- std::unique_ptr<ScopedPrinter> SP;
- std::unique_ptr<TypeDumpVisitor> TDV;
TypeVisitorCallbackPipeline Pipeline;
- if (OS.isVerboseAsm()) {
- // To construct block comment describing the type record for readability.
- SP = llvm::make_unique<ScopedPrinter>(CommentOS);
- SP->setPrefix(CommentPrefix);
- TDV = llvm::make_unique<TypeDumpVisitor>(Table, SP.get(), false);
- Pipeline.addCallbackToPipeline(*TDV);
- }
-
// To emit type record using Codeview MCStreamer adapter
- CVMCAdapter CVMCOS(OS);
+ CVMCAdapter CVMCOS(OS, Table);
TypeRecordMapping typeMapping(CVMCOS);
Pipeline.addCallbackToPipeline(typeMapping);
@@ -664,17 +655,6 @@ void CodeViewDebug::emitTypeInformation() {
// This will fail if the record data is invalid.
CVType Record = Table.getType(*B);
- CommentBlock.clear();
-
- auto RecordLen = Record.length();
- auto RecordKind = Record.kind();
- if (OS.isVerboseAsm())
- CVMCOS.AddComment("Record length");
- CVMCOS.EmitIntValue(RecordLen - 2, 2);
- if (OS.isVerboseAsm())
- CVMCOS.AddComment("Record kind: " + getTypeLeafName(RecordKind));
- CVMCOS.EmitIntValue(RecordKind, sizeof(RecordKind));
-
Error E = codeview::visitTypeRecord(Record, *B, Pipeline);
if (E) {
@@ -682,13 +662,6 @@ void CodeViewDebug::emitTypeInformation() {
llvm_unreachable("produced malformed type record");
}
- if (OS.isVerboseAsm()) {
- // emitRawComment will insert its own tab and comment string before
- // the first line, so strip off our first one. It also prints its own
- // newline.
- OS.emitRawComment(
- CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
- }
B = Table.getNext(*B);
}
}
@@ -1135,7 +1108,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
if (!BeginLabel->isDefined() || !EndLabel->isDefined())
continue;
- DIType *DITy = std::get<2>(HeapAllocSite);
+ const DIType *DITy = std::get<2>(HeapAllocSite);
MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
OS.AddComment("Call site offset");
OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0);
@@ -1363,7 +1336,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
const MachineFrameInfo &MFI = MF->getFrameInfo();
const Function &GV = MF->getFunction();
- auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique<FunctionInfo>()});
+ auto Insertion = FnDebugInfo.insert({&GV, std::make_unique<FunctionInfo>()});
assert(Insertion.second && "function already has info");
CurFn = Insertion.first->second.get();
CurFn->FuncId = NextFuncId++;
@@ -2633,17 +2606,6 @@ void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI,
emitLocalVariable(FI, L);
}
-/// Only call this on endian-specific types like ulittle16_t and little32_t, or
-/// structs composed of them.
-template <typename T>
-static void copyBytesForDefRange(SmallString<20> &BytePrefix,
- SymbolKind SymKind, const T &DefRangeHeader) {
- BytePrefix.resize(2 + sizeof(T));
- ulittle16_t SymKindLE = ulittle16_t(SymKind);
- memcpy(&BytePrefix[0], &SymKindLE, 2);
- memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T));
-}
-
void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
const LocalVariable &Var) {
// LocalSym record, see SymbolRecord.h for more info.
@@ -2692,8 +2654,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
(bool(Flags & LocalSymFlags::IsParameter)
? (EncFP == FI.EncodedParamFramePtrReg)
: (EncFP == FI.EncodedLocalFramePtrReg))) {
- little32_t FPOffset = little32_t(Offset);
- copyBytesForDefRange(BytePrefix, S_DEFRANGE_FRAMEPOINTER_REL, FPOffset);
+ DefRangeFramePointerRelHeader DRHdr;
+ DRHdr.Offset = Offset;
+ OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
} else {
uint16_t RegRelFlags = 0;
if (DefRange.IsSubfield) {
@@ -2701,28 +2664,27 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
(DefRange.StructOffset
<< DefRangeRegisterRelSym::OffsetInParentShift);
}
- DefRangeRegisterRelSym::Header DRHdr;
+ DefRangeRegisterRelHeader DRHdr;
DRHdr.Register = Reg;
DRHdr.Flags = RegRelFlags;
DRHdr.BasePointerOffset = Offset;
- copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER_REL, DRHdr);
+ OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
}
} else {
assert(DefRange.DataOffset == 0 && "unexpected offset into register");
if (DefRange.IsSubfield) {
- DefRangeSubfieldRegisterSym::Header DRHdr;
+ DefRangeSubfieldRegisterHeader DRHdr;
DRHdr.Register = DefRange.CVRegister;
DRHdr.MayHaveNoName = 0;
DRHdr.OffsetInParent = DefRange.StructOffset;
- copyBytesForDefRange(BytePrefix, S_DEFRANGE_SUBFIELD_REGISTER, DRHdr);
+ OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
} else {
- DefRangeRegisterSym::Header DRHdr;
+ DefRangeRegisterHeader DRHdr;
DRHdr.Register = DefRange.CVRegister;
DRHdr.MayHaveNoName = 0;
- copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER, DRHdr);
+ OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
}
}
- OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix);
}
}
@@ -2896,6 +2858,14 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
CurFn = nullptr;
}
+// Usable locations are valid with non-zero line numbers. A line number of zero
+// corresponds to optimized code that doesn't have a distinct source location.
+// In this case, we try to use the previous or next source location depending on
+// the context.
+static bool isUsableDebugLoc(DebugLoc DL) {
+ return DL && DL.getLine() != 0;
+}
+
void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
@@ -2907,19 +2877,21 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
// If the first instruction of a new MBB has no location, find the first
// instruction with a location and use that.
DebugLoc DL = MI->getDebugLoc();
- if (!DL && MI->getParent() != PrevInstBB) {
+ if (!isUsableDebugLoc(DL) && MI->getParent() != PrevInstBB) {
for (const auto &NextMI : *MI->getParent()) {
if (NextMI.isDebugInstr())
continue;
DL = NextMI.getDebugLoc();
- if (DL)
+ if (isUsableDebugLoc(DL))
break;
}
+ // FIXME: Handle the case where the BB has no valid locations. This would
+ // probably require doing a real dataflow analysis.
}
PrevInstBB = MI->getParent();
// If we still don't have a debug location, don't record a location.
- if (!DL)
+ if (!isUsableDebugLoc(DL))
return;
maybeRecordLocation(DL, Asm->MF);
@@ -3026,7 +2998,7 @@ void CodeViewDebug::collectGlobalVariableInfo() {
auto Insertion = ScopeGlobals.insert(
{Scope, std::unique_ptr<GlobalVariableList>()});
if (Insertion.second)
- Insertion.first->second = llvm::make_unique<GlobalVariableList>();
+ Insertion.first->second = std::make_unique<GlobalVariableList>();
VariableList = Insertion.first->second.get();
} else if (GV->hasComdat())
// Emit this global variable into a COMDAT section.
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index ce57b789d7fa..7ffd77926cf7 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -148,7 +148,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LexicalBlock *, 1> ChildBlocks;
std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
- std::vector<std::tuple<MCSymbol *, MCSymbol *, DIType *>> HeapAllocSites;
+ std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>>
+ HeapAllocSites;
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
diff --git a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index ddd60575b6c0..7f9d6c618ad3 100644
--- a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -41,7 +41,7 @@ using EntryIndex = DbgValueHistoryMap::EntryIndex;
static Register isDescribedByReg(const MachineInstr &MI) {
assert(MI.isDebugValue());
assert(MI.getNumOperands() == 4);
- // If the location of variable is an entry value (DW_OP_entry_value)
+ // If the location of variable is an entry value (DW_OP_LLVM_entry_value)
// do not consider it as a register location.
if (MI.getDebugExpression()->isEntryValue())
return 0;
@@ -177,13 +177,13 @@ static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV,
IndicesToErase.push_back(Index);
Entry.endEntry(NewIndex);
}
- if (unsigned Reg = isDescribedByReg(DV))
+ if (Register Reg = isDescribedByReg(DV))
TrackedRegs[Reg] |= !Overlaps;
}
// If the new debug value is described by a register, add tracking of
// that register if it is not already tracked.
- if (unsigned NewReg = isDescribedByReg(DV)) {
+ if (Register NewReg = isDescribedByReg(DV)) {
if (!TrackedRegs.count(NewReg))
addRegDescribedVar(RegVars, NewReg, Var);
LiveEntries[Var].insert(NewIndex);
@@ -234,7 +234,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
DbgLabelInstrMap &DbgLabels) {
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
- unsigned FrameReg = TRI->getFrameRegister(*MF);
+ Register FrameReg = TRI->getFrameRegister(*MF);
RegDescribedVarsMap RegVars;
DbgValueEntriesMap LiveEntries;
for (const auto &MBB : *MF) {
@@ -275,7 +275,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
continue;
// If this is a virtual register, only clobber it since it doesn't
// have aliases.
- if (TRI->isVirtualRegister(MO.getReg()))
+ if (Register::isVirtualRegister(MO.getReg()))
clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries,
MI);
// If this is a register def operand, it may end a debug value
@@ -296,7 +296,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
// Don't consider SP to be clobbered by register masks.
for (auto It : RegVars) {
unsigned int Reg = It.first;
- if (Reg != SP && TRI->isPhysicalRegister(Reg) &&
+ if (Reg != SP && Register::isPhysicalRegister(Reg) &&
MO.clobbersPhysReg(Reg))
RegsToClobber.push_back(Reg);
}
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 789291771b5a..0db86b09d19a 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -38,21 +38,18 @@ public:
: CU(CU), EntryOffset(EntryOffset) {}
};
struct Entry {
- const MCSymbol *BeginSym;
- const MCSymbol *EndSym;
+ const MCSymbol *Begin;
+ const MCSymbol *End;
size_t ByteOffset;
size_t CommentOffset;
- Entry(const MCSymbol *BeginSym, const MCSymbol *EndSym, size_t ByteOffset,
- size_t CommentOffset)
- : BeginSym(BeginSym), EndSym(EndSym), ByteOffset(ByteOffset),
- CommentOffset(CommentOffset) {}
};
private:
SmallVector<List, 4> Lists;
SmallVector<Entry, 32> Entries;
SmallString<256> DWARFBytes;
- SmallVector<std::string, 32> Comments;
+ std::vector<std::string> Comments;
+ MCSymbol *Sym;
/// Only verbose textual output needs comments. This will be set to
/// true for that case, and false otherwise.
@@ -63,6 +60,12 @@ public:
size_t getNumLists() const { return Lists.size(); }
const List &getList(size_t LI) const { return Lists[LI]; }
ArrayRef<List> getLists() const { return Lists; }
+ MCSymbol *getSym() const {
+ return Sym;
+ }
+ void setSym(MCSymbol *Sym) {
+ this->Sym = Sym;
+ }
class ListBuilder;
class EntryBuilder;
@@ -93,7 +96,7 @@ private:
/// Until the next call, bytes added to the stream will be added to this
/// entry.
void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) {
- Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size());
+ Entries.push_back({BeginSym, EndSym, DWARFBytes.size(), Comments.size()});
}
/// Finalize a .debug_loc entry, deleting if it's empty.
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 9548ad9918c1..a61c98ec1c18 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -208,7 +208,7 @@ void DwarfCompileUnit::addLocationAttribute(
if (!Loc) {
addToAccelTable = true;
Loc = new (DIEValueAllocator) DIELoc;
- DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
+ DwarfExpr = std::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
}
if (Expr) {
@@ -326,14 +326,13 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
// emitted into and the subprogram was contained within. If these are the
// same then extend our current range, otherwise add this as a new range.
if (CURanges.empty() || !SameAsPrevCU ||
- (&CURanges.back().getEnd()->getSection() !=
- &Range.getEnd()->getSection())) {
+ (&CURanges.back().End->getSection() !=
+ &Range.End->getSection())) {
CURanges.push_back(Range);
- DD->addSectionLabel(Range.getStart());
return;
}
- CURanges.back().setEnd(Range.getEnd());
+ CURanges.back().End = Range.End;
}
void DwarfCompileUnit::initStmtList() {
@@ -399,7 +398,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
} else {
const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo();
MachineLocation Location(RI->getFrameRegister(*Asm->MF));
- if (RI->isPhysicalRegister(Location.getReg()))
+ if (Register::isPhysicalRegister(Location.getReg()))
addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
}
}
@@ -468,14 +467,6 @@ void DwarfCompileUnit::constructScopeDIE(
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
SmallVector<RangeSpan, 2> Range) {
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
- // Emit the offset into .debug_ranges or .debug_rnglists as a relocatable
- // label. emitDIE() will handle emitting it appropriately.
- const MCSymbol *RangeSectionSym =
- DD->getDwarfVersion() >= 5
- ? TLOF.getDwarfRnglistsSection()->getBeginSymbol()
- : TLOF.getDwarfRangesSection()->getBeginSymbol();
HasRangeLists = true;
@@ -494,12 +485,17 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
// (DW_RLE_startx_endx etc.).
if (DD->getDwarfVersion() >= 5)
addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index);
- else if (isDwoUnit())
- addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
- RangeSectionSym);
- else
- addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
- RangeSectionSym);
+ else {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const MCSymbol *RangeSectionSym =
+ TLOF.getDwarfRangesSection()->getBeginSymbol();
+ if (isDwoUnit())
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+ else
+ addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+ }
}
void DwarfCompileUnit::attachRangesOrLowHighPC(
@@ -507,7 +503,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
if (Ranges.size() == 1 || !DD->useRangesSection()) {
const RangeSpan &Front = Ranges.front();
const RangeSpan &Back = Ranges.back();
- attachLowHighPC(Die, Front.getStart(), Back.getEnd());
+ attachLowHighPC(Die, Front.Begin, Back.End);
} else
addScopeRangeList(Die, std::move(Ranges));
}
@@ -517,8 +513,8 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
SmallVector<RangeSpan, 2> List;
List.reserve(Ranges.size());
for (const InsnRange &R : Ranges)
- List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first),
- DD->getLabelAfterInsn(R.second)));
+ List.push_back(
+ {DD->getLabelBeforeInsn(R.first), DD->getLabelAfterInsn(R.second)});
attachRangesOrLowHighPC(Die, std::move(List));
}
@@ -647,8 +643,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
DwarfExpr.addFragmentOffset(Expr);
SmallVector<uint64_t, 8> Ops;
- Ops.push_back(dwarf::DW_OP_plus_uconst);
- Ops.push_back(Offset);
+ DIExpression::appendOffset(Ops, Offset);
// According to
// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
// cuda-gdb requires DW_AT_address_class for all variables to be able to
@@ -892,32 +887,117 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
-DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
- const DISubprogram &CalleeSP,
- bool IsTail,
- const MCExpr *PCOffset) {
+/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location atom.
+static bool useGNUAnalogForDwarf5Feature(DwarfDebug *DD) {
+ return DD->getDwarfVersion() == 4 && DD->tuneForGDB();
+}
+
+dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const {
+ if (!useGNUAnalogForDwarf5Feature(DD))
+ return Tag;
+ switch (Tag) {
+ case dwarf::DW_TAG_call_site:
+ return dwarf::DW_TAG_GNU_call_site;
+ case dwarf::DW_TAG_call_site_parameter:
+ return dwarf::DW_TAG_GNU_call_site_parameter;
+ default:
+ llvm_unreachable("DWARF5 tag with no GNU analog");
+ }
+}
+
+dwarf::Attribute
+DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const {
+ if (!useGNUAnalogForDwarf5Feature(DD))
+ return Attr;
+ switch (Attr) {
+ case dwarf::DW_AT_call_all_calls:
+ return dwarf::DW_AT_GNU_all_call_sites;
+ case dwarf::DW_AT_call_target:
+ return dwarf::DW_AT_GNU_call_site_target;
+ case dwarf::DW_AT_call_origin:
+ return dwarf::DW_AT_abstract_origin;
+ case dwarf::DW_AT_call_pc:
+ return dwarf::DW_AT_low_pc;
+ case dwarf::DW_AT_call_value:
+ return dwarf::DW_AT_GNU_call_site_value;
+ case dwarf::DW_AT_call_tail_call:
+ return dwarf::DW_AT_GNU_tail_call;
+ default:
+ llvm_unreachable("DWARF5 attribute with no GNU analog");
+ }
+}
+
+dwarf::LocationAtom
+DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
+ if (!useGNUAnalogForDwarf5Feature(DD))
+ return Loc;
+ switch (Loc) {
+ case dwarf::DW_OP_entry_value:
+ return dwarf::DW_OP_GNU_entry_value;
+ default:
+ llvm_unreachable("DWARF5 location atom with no GNU analog");
+ }
+}
+
+DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
+ DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail,
+ const MCSymbol *PCAddr, const MCExpr *PCOffset, unsigned CallReg) {
// Insert a call site entry DIE within ScopeDIE.
- DIE &CallSiteDIE =
- createAndAddDIE(dwarf::DW_TAG_call_site, ScopeDIE, nullptr);
+ DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site),
+ ScopeDIE, nullptr);
- // For the purposes of showing tail call frames in backtraces, a key piece of
- // information is DW_AT_call_origin, a pointer to the callee DIE.
- DIE *CalleeDIE = getOrCreateSubprogramDIE(&CalleeSP);
- assert(CalleeDIE && "Could not create DIE for call site entry origin");
- addDIEEntry(CallSiteDIE, dwarf::DW_AT_call_origin, *CalleeDIE);
+ if (CallReg) {
+ // Indirect call.
+ addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
+ MachineLocation(CallReg));
+ } else {
+ DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
+ assert(CalleeDIE && "Could not create DIE for call site entry origin");
+ addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
+ *CalleeDIE);
+ }
- if (IsTail) {
+ if (IsTail)
// Attach DW_AT_call_tail_call to tail calls for standards compliance.
- addFlag(CallSiteDIE, dwarf::DW_AT_call_tail_call);
- } else {
- // Attach the return PC to allow the debugger to disambiguate call paths
- // from one function to another.
+ addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call));
+
+ // Attach the return PC to allow the debugger to disambiguate call paths
+ // from one function to another.
+ if (DD->getDwarfVersion() == 4 && DD->tuneForGDB()) {
+ assert(PCAddr && "Missing PC information for a call");
+ addLabelAddress(CallSiteDIE, dwarf::DW_AT_low_pc, PCAddr);
+ } else if (!IsTail || DD->tuneForGDB()) {
assert(PCOffset && "Missing return PC information for a call");
addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset);
}
+
return CallSiteDIE;
}
+void DwarfCompileUnit::constructCallSiteParmEntryDIEs(
+ DIE &CallSiteDIE, SmallVector<DbgCallSiteParam, 4> &Params) {
+ for (const auto &Param : Params) {
+ unsigned Register = Param.getRegister();
+ auto CallSiteDieParam =
+ DIE::get(DIEValueAllocator,
+ getDwarf5OrGNUTag(dwarf::DW_TAG_call_site_parameter));
+ insertDIE(CallSiteDieParam);
+ addAddress(*CallSiteDieParam, dwarf::DW_AT_location,
+ MachineLocation(Register));
+
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ DwarfExpr.setCallSiteParamValueFlag();
+
+ DwarfDebug::emitDebugLocValue(*Asm, nullptr, Param.getValue(), DwarfExpr);
+
+ addBlock(*CallSiteDieParam, getDwarf5OrGNUAttr(dwarf::DW_AT_call_value),
+ DwarfExpr.finalize());
+
+ CallSiteDIE.addChild(CallSiteDieParam);
+ }
+}
+
DIE *DwarfCompileUnit::constructImportedEntityDIE(
const DIImportedEntity *Module) {
DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
@@ -997,11 +1077,11 @@ void DwarfCompileUnit::createAbstractEntity(const DINode *Node,
assert(Scope && Scope->isAbstractScope());
auto &Entity = getAbstractEntities()[Node];
if (isa<const DILocalVariable>(Node)) {
- Entity = llvm::make_unique<DbgVariable>(
+ Entity = std::make_unique<DbgVariable>(
cast<const DILocalVariable>(Node), nullptr /* IA */);;
DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get()));
} else if (isa<const DILabel>(Node)) {
- Entity = llvm::make_unique<DbgLabel>(
+ Entity = std::make_unique<DbgLabel>(
cast<const DILabel>(Node), nullptr /* IA */);
DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get()));
}
@@ -1081,16 +1161,8 @@ void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie()));
}
-/// addVariableAddress - Add DW_AT_location attribute for a
-/// DbgVariable based on provided MachineLocation.
void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
MachineLocation Location) {
- // addBlockByrefAddress is obsolete and will be removed soon.
- // The clang frontend always generates block byref variables with a
- // complex expression that encodes exactly what addBlockByrefAddress
- // would do.
- assert((!DV.isBlockByrefVariable() || DV.hasComplexAddress()) &&
- "block byref variable without a complex expression");
if (DV.hasComplexAddress())
addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
else
@@ -1133,7 +1205,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
if (DIExpr->isEntryValue()) {
DwarfExpr.setEntryValueFlag();
- DwarfExpr.addEntryValueExpression(Cursor);
+ DwarfExpr.beginEntryValueExpression(Cursor);
}
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index ea980dfda17e..1b7ea2673ac0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -227,12 +227,35 @@ public:
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+ /// This takes a DWARF 5 tag and returns it or a GNU analog.
+ dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const;
+
+ /// This takes a DWARF 5 attribute and returns it or a GNU analog.
+ dwarf::Attribute getDwarf5OrGNUAttr(dwarf::Attribute Attr) const;
+
+ /// This takes a DWARF 5 location atom and either returns it or a GNU analog.
+ dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const;
+
/// Construct a call site entry DIE describing a call within \p Scope to a
- /// callee described by \p CalleeSP. \p IsTail specifies whether the call is
- /// a tail call. \p PCOffset must be non-zero for non-tail calls or be the
+ /// callee described by \p CalleeSP.
+ /// \p IsTail specifies whether the call is a tail call.
+ /// \p PCAddr (used for GDB + DWARF 4 tuning) points to the PC value after
+ /// the call instruction.
+ /// \p PCOffset (used for cases other than GDB + DWARF 4 tuning) must be
+ /// non-zero for non-tail calls (in the case of non-gdb tuning, since for
+ /// GDB + DWARF 5 tuning we still generate PC info for tail calls) or be the
/// function-local offset to PC value after the call instruction.
- DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram &CalleeSP,
- bool IsTail, const MCExpr *PCOffset);
+ /// \p CallReg is a register location for an indirect call. For direct calls
+ /// the \p CallReg is set to 0.
+ DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP,
+ bool IsTail, const MCSymbol *PCAddr,
+ const MCExpr *PCOffset, unsigned CallReg);
+ /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params
+ /// were collected by the \ref collectCallSiteParameters.
+ /// Note: The order of parameters does not matter, since debuggers recognize
+ /// call site parameters by the DW_AT_location attribute.
+ void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE,
+ SmallVector<DbgCallSiteParam, 4> &Params);
/// Construct import_module DIE.
DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 71bb2b0858cc..c505e77e5acd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -39,6 +40,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
@@ -83,6 +85,8 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
+STATISTIC(NumCSParams, "Number of dbg call site params created");
+
static cl::opt<bool>
DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
cl::desc("Disable debug info printing"));
@@ -166,26 +170,26 @@ static const char *const DbgTimerDescription = "DWARF Debug Writer";
static constexpr unsigned ULEB128PadSize = 4;
void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
- BS.EmitInt8(
+ getActiveStreamer().EmitInt8(
Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
: dwarf::OperationEncodingString(Op));
}
void DebugLocDwarfExpression::emitSigned(int64_t Value) {
- BS.EmitSLEB128(Value, Twine(Value));
+ getActiveStreamer().EmitSLEB128(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
- BS.EmitULEB128(Value, Twine(Value));
+ getActiveStreamer().EmitULEB128(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitData1(uint8_t Value) {
- BS.EmitInt8(Value, Twine(Value));
+ getActiveStreamer().EmitInt8(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit");
- BS.EmitULEB128(Idx, Twine(Idx), ULEB128PadSize);
+ getActiveStreamer().EmitULEB128(Idx, Twine(Idx), ULEB128PadSize);
}
bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
@@ -194,54 +198,34 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
return false;
}
-bool DbgVariable::isBlockByrefVariable() const {
- assert(getVariable() && "Invalid complex DbgVariable!");
- return getVariable()->getType()->isBlockByrefStruct();
+void DebugLocDwarfExpression::enableTemporaryBuffer() {
+ assert(!IsBuffering && "Already buffering?");
+ if (!TmpBuf)
+ TmpBuf = std::make_unique<TempBuffer>(OutBS.GenerateComments);
+ IsBuffering = true;
}
-const DIType *DbgVariable::getType() const {
- DIType *Ty = getVariable()->getType();
- // FIXME: isBlockByrefVariable should be reformulated in terms of complex
- // addresses instead.
- if (Ty->isBlockByrefStruct()) {
- /* Byref variables, in Blocks, are declared by the programmer as
- "SomeType VarName;", but the compiler creates a
- __Block_byref_x_VarName struct, and gives the variable VarName
- either the struct, or a pointer to the struct, as its type. This
- is necessary for various behind-the-scenes things the compiler
- needs to do with by-reference variables in blocks.
-
- However, as far as the original *programmer* is concerned, the
- variable should still have type 'SomeType', as originally declared.
-
- The following function dives into the __Block_byref_x_VarName
- struct to find the original type of the variable. This will be
- passed back to the code generating the type for the Debug
- Information Entry for the variable 'VarName'. 'VarName' will then
- have the original type 'SomeType' in its debug information.
-
- The original type 'SomeType' will be the type of the field named
- 'VarName' inside the __Block_byref_x_VarName struct.
-
- NOTE: In order for this to not completely fail on the debugger
- side, the Debug Information Entry for the variable VarName needs to
- have a DW_AT_location that tells the debugger how to unwind through
- the pointers and __Block_byref_x_VarName struct to find the actual
- value of the variable. The function addBlockByrefType does this. */
- DIType *subType = Ty;
- uint16_t tag = Ty->getTag();
-
- if (tag == dwarf::DW_TAG_pointer_type)
- subType = cast<DIDerivedType>(Ty)->getBaseType();
-
- auto Elements = cast<DICompositeType>(subType)->getElements();
- for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
- auto *DT = cast<DIDerivedType>(Elements[i]);
- if (getName() == DT->getName())
- return DT->getBaseType();
- }
+void DebugLocDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }
+
+unsigned DebugLocDwarfExpression::getTemporaryBufferSize() {
+ return TmpBuf ? TmpBuf->Bytes.size() : 0;
+}
+
+void DebugLocDwarfExpression::commitTemporaryBuffer() {
+ if (!TmpBuf)
+ return;
+ for (auto Byte : enumerate(TmpBuf->Bytes)) {
+ const char *Comment = (Byte.index() < TmpBuf->Comments.size())
+ ? TmpBuf->Comments[Byte.index()].c_str()
+ : "";
+ OutBS.EmitInt8(Byte.value(), Comment);
}
- return Ty;
+ TmpBuf->Bytes.clear();
+ TmpBuf->Comments.clear();
+}
+
+const DIType *DbgVariable::getType() const {
+ return getVariable()->getType();
}
/// Get .debug_loc entry for the instruction range starting at MI.
@@ -275,7 +259,7 @@ void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) {
assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
"Wrong inlined-at");
- ValueLoc = llvm::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
+ ValueLoc = std::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
if (auto *E = DbgValue->getDebugExpression())
if (E->getNumElements())
FrameIndexExprs.push_back({0, E});
@@ -551,6 +535,157 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
+/// Try to interpret values loaded into registers that forward parameters
+/// for \p CallMI. Store parameters with interpreted value into \p Params.
+static void collectCallSiteParameters(const MachineInstr *CallMI,
+ ParamSet &Params) {
+ auto *MF = CallMI->getMF();
+ auto CalleesMap = MF->getCallSitesInfo();
+ auto CallFwdRegsInfo = CalleesMap.find(CallMI);
+
+ // There is no information for the call instruction.
+ if (CallFwdRegsInfo == CalleesMap.end())
+ return;
+
+ auto *MBB = CallMI->getParent();
+ const auto &TRI = MF->getSubtarget().getRegisterInfo();
+ const auto &TII = MF->getSubtarget().getInstrInfo();
+ const auto &TLI = MF->getSubtarget().getTargetLowering();
+
+ // Skip the call instruction.
+ auto I = std::next(CallMI->getReverseIterator());
+
+ DenseSet<unsigned> ForwardedRegWorklist;
+ // Add all the forwarding registers into the ForwardedRegWorklist.
+ for (auto ArgReg : CallFwdRegsInfo->second) {
+ bool InsertedReg = ForwardedRegWorklist.insert(ArgReg.Reg).second;
+ assert(InsertedReg && "Single register used to forward two arguments?");
+ (void)InsertedReg;
+ }
+
+ // We erase, from the ForwardedRegWorklist, those forwarding registers for
+ // which we successfully describe a loaded value (by using
+ // the describeLoadedValue()). For those remaining arguments in the working
+ // list, for which we do not describe a loaded value by
+ // the describeLoadedValue(), we try to generate an entry value expression
+ // for their call site value desctipion, if the call is within the entry MBB.
+ // The RegsForEntryValues maps a forwarding register into the register holding
+ // the entry value.
+ // TODO: Handle situations when call site parameter value can be described
+ // as the entry value within basic blocks other then the first one.
+ bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin();
+ DenseMap<unsigned, unsigned> RegsForEntryValues;
+
+ // If the MI is an instruction defining one or more parameters' forwarding
+ // registers, add those defines. We can currently only describe forwarded
+ // registers that are explicitly defined, but keep track of implicit defines
+ // also to remove those registers from the work list.
+ auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI,
+ SmallVectorImpl<unsigned> &Explicit,
+ SmallVectorImpl<unsigned> &Implicit) {
+ if (MI.isDebugInstr())
+ return;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() &&
+ Register::isPhysicalRegister(MO.getReg())) {
+ for (auto FwdReg : ForwardedRegWorklist) {
+ if (TRI->regsOverlap(FwdReg, MO.getReg())) {
+ if (MO.isImplicit())
+ Implicit.push_back(FwdReg);
+ else
+ Explicit.push_back(FwdReg);
+ break;
+ }
+ }
+ }
+ }
+ };
+
+ auto finishCallSiteParam = [&](DbgValueLoc DbgLocVal, unsigned Reg) {
+ unsigned FwdReg = Reg;
+ if (ShouldTryEmitEntryVals) {
+ auto EntryValReg = RegsForEntryValues.find(Reg);
+ if (EntryValReg != RegsForEntryValues.end())
+ FwdReg = EntryValReg->second;
+ }
+
+ DbgCallSiteParam CSParm(FwdReg, DbgLocVal);
+ Params.push_back(CSParm);
+ ++NumCSParams;
+ };
+
+ // Search for a loading value in forwaring registers.
+ for (; I != MBB->rend(); ++I) {
+ // If the next instruction is a call we can not interpret parameter's
+ // forwarding registers or we finished the interpretation of all parameters.
+ if (I->isCall())
+ return;
+
+ if (ForwardedRegWorklist.empty())
+ return;
+
+ SmallVector<unsigned, 4> ExplicitFwdRegDefs;
+ SmallVector<unsigned, 4> ImplicitFwdRegDefs;
+ getForwardingRegsDefinedByMI(*I, ExplicitFwdRegDefs, ImplicitFwdRegDefs);
+ if (ExplicitFwdRegDefs.empty() && ImplicitFwdRegDefs.empty())
+ continue;
+
+ // If the MI clobbers more then one forwarding register we must remove
+ // all of them from the working list.
+ for (auto Reg : concat<unsigned>(ExplicitFwdRegDefs, ImplicitFwdRegDefs))
+ ForwardedRegWorklist.erase(Reg);
+
+ // The describeLoadedValue() hook currently does not have any information
+ // about which register it should describe in case of multiple defines, so
+ // for now we only handle instructions where a forwarded register is (at
+ // least partially) defined by the instruction's single explicit define.
+ if (I->getNumExplicitDefs() != 1 || ExplicitFwdRegDefs.empty())
+ continue;
+ unsigned Reg = ExplicitFwdRegDefs[0];
+
+ if (auto ParamValue = TII->describeLoadedValue(*I)) {
+ if (ParamValue->first.isImm()) {
+ int64_t Val = ParamValue->first.getImm();
+ DbgValueLoc DbgLocVal(ParamValue->second, Val);
+ finishCallSiteParam(DbgLocVal, Reg);
+ } else if (ParamValue->first.isReg()) {
+ Register RegLoc = ParamValue->first.getReg();
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register FP = TRI->getFrameRegister(*MF);
+ bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
+ if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
+ DbgValueLoc DbgLocVal(ParamValue->second,
+ MachineLocation(RegLoc,
+ /*IsIndirect=*/IsSPorFP));
+ finishCallSiteParam(DbgLocVal, Reg);
+ } else if (ShouldTryEmitEntryVals) {
+ ForwardedRegWorklist.insert(RegLoc);
+ RegsForEntryValues[RegLoc] = Reg;
+ }
+ }
+ }
+ }
+
+ // Emit the call site parameter's value as an entry value.
+ if (ShouldTryEmitEntryVals) {
+ // Create an expression where the register's entry value is used.
+ DIExpression *EntryExpr = DIExpression::get(
+ MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1});
+ for (auto RegEntry : ForwardedRegWorklist) {
+ unsigned FwdReg = RegEntry;
+ auto EntryValReg = RegsForEntryValues.find(RegEntry);
+ if (EntryValReg != RegsForEntryValues.end())
+ FwdReg = EntryValReg->second;
+
+ DbgValueLoc DbgLocVal(EntryExpr, MachineLocation(RegEntry));
+ DbgCallSiteParam CSParm(FwdReg, DbgLocVal);
+ Params.push_back(CSParm);
+ ++NumCSParams;
+ }
+ }
+}
+
void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
DwarfCompileUnit &CU, DIE &ScopeDIE,
const MachineFunction &MF) {
@@ -563,10 +698,11 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
// for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls
// because one of its requirements is not met: call site entries for
// optimized-out calls are elided.
- CU.addFlag(ScopeDIE, dwarf::DW_AT_call_all_calls);
+ CU.addFlag(ScopeDIE, CU.getDwarf5OrGNUAttr(dwarf::DW_AT_call_all_calls));
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
assert(TII && "TargetInstrInfo not found: cannot label tail calls");
+ bool ApplyGNUExtensions = getDwarfVersion() == 4 && tuneForGDB();
// Emit call site entries for each call or tail call in the function.
for (const MachineBasicBlock &MBB : MF) {
@@ -581,30 +717,66 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
return;
// If this is a direct call, find the callee's subprogram.
+ // In the case of an indirect call find the register that holds
+ // the callee.
const MachineOperand &CalleeOp = MI.getOperand(0);
- if (!CalleeOp.isGlobal())
- continue;
- const Function *CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
- if (!CalleeDecl || !CalleeDecl->getSubprogram())
+ if (!CalleeOp.isGlobal() && !CalleeOp.isReg())
continue;
+ unsigned CallReg = 0;
+ const DISubprogram *CalleeSP = nullptr;
+ const Function *CalleeDecl = nullptr;
+ if (CalleeOp.isReg()) {
+ CallReg = CalleeOp.getReg();
+ if (!CallReg)
+ continue;
+ } else {
+ CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
+ if (!CalleeDecl || !CalleeDecl->getSubprogram())
+ continue;
+ CalleeSP = CalleeDecl->getSubprogram();
+ }
+
// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
- // TODO: Add support for indirect calls.
bool IsTail = TII->isTailCall(MI);
- // For tail calls, no return PC information is needed. For regular calls,
- // the return PC is needed to disambiguate paths in the call graph which
- // could lead to some target function.
+ // For tail calls, for non-gdb tuning, no return PC information is needed.
+ // For regular calls (and tail calls in GDB tuning), the return PC
+ // is needed to disambiguate paths in the call graph which could lead to
+ // some target function.
const MCExpr *PCOffset =
- IsTail ? nullptr : getFunctionLocalOffsetAfterInsn(&MI);
+ (IsTail && !tuneForGDB()) ? nullptr
+ : getFunctionLocalOffsetAfterInsn(&MI);
+
+ // Address of a call-like instruction for a normal call or a jump-like
+ // instruction for a tail call. This is needed for GDB + DWARF 4 tuning.
+ const MCSymbol *PCAddr =
+ ApplyGNUExtensions ? const_cast<MCSymbol*>(getLabelAfterInsn(&MI))
+ : nullptr;
+
+ assert((IsTail || PCOffset || PCAddr) &&
+ "Call without return PC information");
- assert((IsTail || PCOffset) && "Call without return PC information");
LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> "
- << CalleeDecl->getName() << (IsTail ? " [tail]" : "")
- << "\n");
- CU.constructCallSiteEntryDIE(ScopeDIE, *CalleeDecl->getSubprogram(),
- IsTail, PCOffset);
+ << (CalleeDecl ? CalleeDecl->getName()
+ : StringRef(MF.getSubtarget()
+ .getRegisterInfo()
+ ->getName(CallReg)))
+ << (IsTail ? " [IsTail]" : "") << "\n");
+
+ DIE &CallSiteDIE =
+ CU.constructCallSiteEntryDIE(ScopeDIE, CalleeSP, IsTail, PCAddr,
+ PCOffset, CallReg);
+
+ // GDB and LLDB support call site parameter debug info.
+ if (Asm->TM.Options.EnableDebugEntryValues &&
+ (tuneForGDB() || tuneForLLDB())) {
+ ParamSet Params;
+ // Try to interpret values of call site parameters.
+ collectCallSiteParameters(&MI, Params);
+ CU.constructCallSiteParmEntryDIEs(CallSiteDIE, Params);
+ }
}
}
}
@@ -680,7 +852,7 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
CompilationDir = DIUnit->getDirectory();
- auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
+ auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
InfoHolder.addUnit(std::move(OwnedUnit));
@@ -793,8 +965,6 @@ void DwarfDebug::beginModule() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.setRnglistsTableBaseSym(
Asm->createTempSymbol("rnglists_table_base"));
- Holder.setLoclistsTableBaseSym(
- Asm->createTempSymbol("loclists_table_base"));
if (useSplitDwarf())
InfoHolder.setRnglistsTableBaseSym(
@@ -907,7 +1077,7 @@ void DwarfDebug::finalizeModuleInfo() {
// If we're splitting the dwarf out now that we've got the entire
// CU then add the dwo id to it.
auto *SkCU = TheCU.getSkeleton();
- if (useSplitDwarf() && !empty(TheCU.getUnitDie().children())) {
+ if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) {
finishUnitAttributes(TheCU.getCUNode(), TheCU);
TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
Asm->TM.Options.MCOptions.SplitDwarfFile);
@@ -951,7 +1121,7 @@ void DwarfDebug::finalizeModuleInfo() {
// 2.17.3).
U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
else
- U.setBaseAddress(TheCU.getRanges().front().getStart());
+ U.setBaseAddress(TheCU.getRanges().front().Begin);
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
@@ -959,15 +1129,19 @@ void DwarfDebug::finalizeModuleInfo() {
// is a bit pessimistic under LTO.
if (!AddrPool.isEmpty() &&
(getDwarfVersion() >= 5 ||
- (SkCU && !empty(TheCU.getUnitDie().children()))))
+ (SkCU && !TheCU.getUnitDie().children().empty())))
U.addAddrTableBase();
if (getDwarfVersion() >= 5) {
if (U.hasRangeLists())
U.addRnglistsBase();
- if (!DebugLocs.getLists().empty() && !useSplitDwarf())
- U.addLoclistsBase();
+ if (!DebugLocs.getLists().empty() && !useSplitDwarf()) {
+ DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
+ DebugLocs.getSym(),
+ TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+ }
}
auto *CUNode = cast<DICompileUnit>(P.first);
@@ -1105,7 +1279,7 @@ void DwarfDebug::collectVariableInfoFromMFTable(
continue;
ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
- auto RegVar = llvm::make_unique<DbgVariable>(
+ auto RegVar = std::make_unique<DbgVariable>(
cast<DILocalVariable>(Var.first), Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
if (DbgVariable *DbgVar = MFVars.lookup(Var))
@@ -1316,13 +1490,13 @@ DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode());
if (isa<const DILocalVariable>(Node)) {
ConcreteEntities.push_back(
- llvm::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
+ std::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
Location));
InfoHolder.addScopeVariable(&Scope,
cast<DbgVariable>(ConcreteEntities.back().get()));
} else if (isa<const DILabel>(Node)) {
ConcreteEntities.push_back(
- llvm::make_unique<DbgLabel>(cast<const DILabel>(Node),
+ std::make_unique<DbgLabel>(cast<const DILabel>(Node),
Location, Sym));
InfoHolder.addScopeLabel(&Scope,
cast<DbgLabel>(ConcreteEntities.back().get()));
@@ -1419,11 +1593,14 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
LexicalScope *Scope = nullptr;
const DILabel *Label = cast<DILabel>(IL.first);
+ // The scope could have an extra lexical block file.
+ const DILocalScope *LocalScope =
+ Label->getScope()->getNonLexicalBlockFileScope();
// Get inlined DILocation if it is inlined label.
if (const DILocation *IA = IL.second)
- Scope = LScopes.findInlinedScope(Label->getScope(), IA);
+ Scope = LScopes.findInlinedScope(LocalScope, IA);
else
- Scope = LScopes.findLexicalScope(Label->getScope());
+ Scope = LScopes.findLexicalScope(LocalScope);
// If label scope is not found then skip this label.
if (!Scope)
continue;
@@ -1607,6 +1784,9 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
return;
+ SectionLabels.insert(std::make_pair(&Asm->getFunctionBegin()->getSection(),
+ Asm->getFunctionBegin()));
+
DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
@@ -1654,7 +1834,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
collectEntityInfo(TheCU, SP, Processed);
// Add the range of this function to the list of ranges for the CU.
- TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
+ TheCU.addRange({Asm->getFunctionBegin(), Asm->getFunctionEnd()});
// Under -gmlt, skip building the subprogram if there are no inlined
// subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
@@ -1836,9 +2016,10 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_enumeration_type:
return dwarf::PubIndexEntryDescriptor(
- dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus
- ? dwarf::GIEL_STATIC
- : dwarf::GIEL_EXTERNAL);
+ dwarf::GIEK_TYPE,
+ dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage())
+ ? dwarf::GIEL_EXTERNAL
+ : dwarf::GIEL_STATIC);
case dwarf::DW_TAG_typedef:
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_subrange_type:
@@ -1967,7 +2148,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
DWARFExpression Expr(Data, getDwarfVersion(), PtrSize);
using Encoding = DWARFExpression::Operation::Encoding;
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
for (auto &Op : Expr) {
assert(Op.getCode() != dwarf::DW_OP_const_type &&
"3 operand ops not yet supported");
@@ -1990,7 +2171,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
if (Comment != End)
Comment++;
} else {
- for (uint32_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
+ for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
}
Offset = Op.getOperandEndOffset(I);
@@ -2020,7 +2201,7 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
if (DIExpr->isEntryValue()) {
DwarfExpr.setEntryValueFlag();
- DwarfExpr.addEntryValueExpression(Cursor);
+ DwarfExpr.beginEntryValueExpression(Cursor);
}
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
@@ -2083,7 +2264,7 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
}
// Emit the common part of the DWARF 5 range/locations list tables header.
-static void emitListsTableHeaderStart(AsmPrinter *Asm, const DwarfFile &Holder,
+static void emitListsTableHeaderStart(AsmPrinter *Asm,
MCSymbol *TableStart,
MCSymbol *TableEnd) {
// Build the table header, which starts with the length field.
@@ -2108,7 +2289,7 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
const DwarfFile &Holder) {
MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start");
MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end");
- emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd);
+ emitListsTableHeaderStart(Asm, TableStart, TableEnd);
Asm->OutStreamer->AddComment("Offset entry count");
Asm->emitInt32(Holder.getRangeLists().size());
@@ -2125,94 +2306,147 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
// designates the end of the table for the caller to emit when the table is
// complete.
static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
- const DwarfFile &Holder) {
+ const DwarfDebug &DD) {
MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start");
MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end");
- emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd);
+ emitListsTableHeaderStart(Asm, TableStart, TableEnd);
+
+ const auto &DebugLocs = DD.getDebugLocs();
// FIXME: Generate the offsets table and use DW_FORM_loclistx with the
// DW_AT_loclists_base attribute. Until then set the number of offsets to 0.
Asm->OutStreamer->AddComment("Offset entry count");
Asm->emitInt32(0);
- Asm->OutStreamer->EmitLabel(Holder.getLoclistsTableBaseSym());
+ Asm->OutStreamer->EmitLabel(DebugLocs.getSym());
return TableEnd;
}
-// Emit locations into the .debug_loc/.debug_rnglists section.
-void DwarfDebug::emitDebugLoc() {
- if (DebugLocs.getLists().empty())
- return;
+template <typename Ranges, typename PayloadEmitter>
+static void emitRangeList(
+ DwarfDebug &DD, AsmPrinter *Asm, MCSymbol *Sym, const Ranges &R,
+ const DwarfCompileUnit &CU, unsigned BaseAddressx, unsigned OffsetPair,
+ unsigned StartxLength, unsigned EndOfList,
+ StringRef (*StringifyEnum)(unsigned),
+ bool ShouldUseBaseAddress,
+ PayloadEmitter EmitPayload) {
- bool IsLocLists = getDwarfVersion() >= 5;
- MCSymbol *TableEnd = nullptr;
- if (IsLocLists) {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLoclistsSection());
- TableEnd = emitLoclistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder
- : InfoHolder);
- } else {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLocSection());
- }
+ auto Size = Asm->MAI->getCodePointerSize();
+ bool UseDwarf5 = DD.getDwarfVersion() >= 5;
- unsigned char Size = Asm->MAI->getCodePointerSize();
- for (const auto &List : DebugLocs.getLists()) {
- Asm->OutStreamer->EmitLabel(List.Label);
+ // Emit our symbol so we can find the beginning of the range.
+ Asm->OutStreamer->EmitLabel(Sym);
- const DwarfCompileUnit *CU = List.CU;
- const MCSymbol *Base = CU->getBaseAddress();
- for (const auto &Entry : DebugLocs.getEntries(List)) {
+ // Gather all the ranges that apply to the same section so they can share
+ // a base address entry.
+ MapVector<const MCSection *, std::vector<decltype(&*R.begin())>> SectionRanges;
+
+ for (const auto &Range : R)
+ SectionRanges[&Range.Begin->getSection()].push_back(&Range);
+
+ const MCSymbol *CUBase = CU.getBaseAddress();
+ bool BaseIsSet = false;
+ for (const auto &P : SectionRanges) {
+ auto *Base = CUBase;
+ if (!Base && ShouldUseBaseAddress) {
+ const MCSymbol *Begin = P.second.front()->Begin;
+ const MCSymbol *NewBase = DD.getSectionLabel(&Begin->getSection());
+ if (!UseDwarf5) {
+ Base = NewBase;
+ BaseIsSet = true;
+ Asm->OutStreamer->EmitIntValue(-1, Size);
+ Asm->OutStreamer->AddComment(" base address");
+ Asm->OutStreamer->EmitSymbolValue(Base, Size);
+ } else if (NewBase != Begin || P.second.size() > 1) {
+ // Only use a base address if
+ // * the existing pool address doesn't match (NewBase != Begin)
+ // * or, there's more than one entry to share the base address
+ Base = NewBase;
+ BaseIsSet = true;
+ Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx));
+ Asm->emitInt8(BaseAddressx);
+ Asm->OutStreamer->AddComment(" base address index");
+ Asm->EmitULEB128(DD.getAddressPool().getIndex(Base));
+ }
+ } else if (BaseIsSet && !UseDwarf5) {
+ BaseIsSet = false;
+ assert(!Base);
+ Asm->OutStreamer->EmitIntValue(-1, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ }
+
+ for (const auto *RS : P.second) {
+ const MCSymbol *Begin = RS->Begin;
+ const MCSymbol *End = RS->End;
+ assert(Begin && "Range without a begin symbol?");
+ assert(End && "Range without an end symbol?");
if (Base) {
- // Set up the range. This range is relative to the entry point of the
- // compile unit. This is a hard coded 0 for low_pc when we're emitting
- // ranges, or the DW_AT_low_pc on the compile unit otherwise.
- if (IsLocLists) {
- Asm->OutStreamer->AddComment("DW_LLE_offset_pair");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_offset_pair, 1);
+ if (UseDwarf5) {
+ // Emit offset_pair when we have a base.
+ Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair));
+ Asm->emitInt8(OffsetPair);
Asm->OutStreamer->AddComment(" starting offset");
- Asm->EmitLabelDifferenceAsULEB128(Entry.BeginSym, Base);
+ Asm->EmitLabelDifferenceAsULEB128(Begin, Base);
Asm->OutStreamer->AddComment(" ending offset");
- Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Base);
+ Asm->EmitLabelDifferenceAsULEB128(End, Base);
} else {
- Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
- Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
+ Asm->EmitLabelDifference(Begin, Base, Size);
+ Asm->EmitLabelDifference(End, Base, Size);
}
-
- emitDebugLocEntryLocation(Entry, CU);
- continue;
- }
-
- // We have no base address.
- if (IsLocLists) {
- // TODO: Use DW_LLE_base_addressx + DW_LLE_offset_pair, or
- // DW_LLE_startx_length in case if there is only a single range.
- // That should reduce the size of the debug data emited.
- // For now just use the DW_LLE_startx_length for all cases.
- Asm->OutStreamer->AddComment("DW_LLE_startx_length");
- Asm->emitInt8(dwarf::DW_LLE_startx_length);
- Asm->OutStreamer->AddComment(" start idx");
- Asm->EmitULEB128(AddrPool.getIndex(Entry.BeginSym));
+ } else if (UseDwarf5) {
+ Asm->OutStreamer->AddComment(StringifyEnum(StartxLength));
+ Asm->emitInt8(StartxLength);
+ Asm->OutStreamer->AddComment(" start index");
+ Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin));
Asm->OutStreamer->AddComment(" length");
- Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Entry.BeginSym);
+ Asm->EmitLabelDifferenceAsULEB128(End, Begin);
} else {
- Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size);
- Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
+ Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+ Asm->OutStreamer->EmitSymbolValue(End, Size);
}
-
- emitDebugLocEntryLocation(Entry, CU);
+ EmitPayload(*RS);
}
+ }
- if (IsLocLists) {
- // .debug_loclists section ends with DW_LLE_end_of_list.
- Asm->OutStreamer->AddComment("DW_LLE_end_of_list");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_end_of_list, 1);
- } else {
- // Terminate the .debug_loc list with two 0 values.
- Asm->OutStreamer->EmitIntValue(0, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
- }
+ if (UseDwarf5) {
+ Asm->OutStreamer->AddComment(StringifyEnum(EndOfList));
+ Asm->emitInt8(EndOfList);
+ } else {
+ // Terminate the list with two 0 values.
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
}
+}
+
+static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) {
+ emitRangeList(
+ DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), *List.CU,
+ dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair,
+ dwarf::DW_LLE_startx_length, dwarf::DW_LLE_end_of_list,
+ llvm::dwarf::LocListEncodingString,
+ /* ShouldUseBaseAddress */ true,
+ [&](const DebugLocStream::Entry &E) {
+ DD.emitDebugLocEntryLocation(E, List.CU);
+ });
+}
+
+// Emit locations into the .debug_loc/.debug_rnglists section.
+void DwarfDebug::emitDebugLoc() {
+ if (DebugLocs.getLists().empty())
+ return;
+
+ MCSymbol *TableEnd = nullptr;
+ if (getDwarfVersion() >= 5) {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLoclistsSection());
+ TableEnd = emitLoclistsTableHeader(Asm, *this);
+ } else {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocSection());
+ }
+
+ for (const auto &List : DebugLocs.getLists())
+ emitLocList(*this, Asm, List);
if (TableEnd)
Asm->OutStreamer->EmitLabel(TableEnd);
@@ -2232,9 +2466,9 @@ void DwarfDebug::emitDebugLocDWO() {
// Ideally/in v5, this could use SectionLabels to reuse existing addresses
// in the address pool to minimize object size/relocations.
Asm->emitInt8(dwarf::DW_LLE_startx_length);
- unsigned idx = AddrPool.getIndex(Entry.BeginSym);
+ unsigned idx = AddrPool.getIndex(Entry.Begin);
Asm->EmitULEB128(idx);
- Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
+ Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4);
emitDebugLocEntryLocation(Entry, List.CU);
}
@@ -2360,7 +2594,7 @@ void DwarfDebug::emitDebugARanges() {
// 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
unsigned Padding =
- OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize);
+ offsetToAlignment(sizeof(int32_t) + ContentSize, Align(TupleSize));
ContentSize += Padding;
ContentSize += (List.size() + 1) * TupleSize;
@@ -2405,93 +2639,13 @@ void DwarfDebug::emitDebugARanges() {
/// Emit a single range list. We handle both DWARF v5 and earlier.
static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
const RangeSpanList &List) {
-
- auto DwarfVersion = DD.getDwarfVersion();
- // Emit our symbol so we can find the beginning of the range.
- Asm->OutStreamer->EmitLabel(List.getSym());
- // Gather all the ranges that apply to the same section so they can share
- // a base address entry.
- MapVector<const MCSection *, std::vector<const RangeSpan *>> SectionRanges;
- // Size for our labels.
- auto Size = Asm->MAI->getCodePointerSize();
-
- for (const RangeSpan &Range : List.getRanges())
- SectionRanges[&Range.getStart()->getSection()].push_back(&Range);
-
- const DwarfCompileUnit &CU = List.getCU();
- const MCSymbol *CUBase = CU.getBaseAddress();
- bool BaseIsSet = false;
- for (const auto &P : SectionRanges) {
- // Don't bother with a base address entry if there's only one range in
- // this section in this range list - for example ranges for a CU will
- // usually consist of single regions from each of many sections
- // (-ffunction-sections, or just C++ inline functions) except under LTO
- // or optnone where there may be holes in a single CU's section
- // contributions.
- auto *Base = CUBase;
- if (!Base && (P.second.size() > 1 || DwarfVersion < 5) &&
- (CU.getCUNode()->getRangesBaseAddress() || DwarfVersion >= 5)) {
- BaseIsSet = true;
- // FIXME/use care: This may not be a useful base address if it's not
- // the lowest address/range in this object.
- Base = P.second.front()->getStart();
- if (DwarfVersion >= 5) {
- Base = DD.getSectionLabel(&Base->getSection());
- Asm->OutStreamer->AddComment("DW_RLE_base_addressx");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_addressx, 1);
- Asm->OutStreamer->AddComment(" base address index");
- Asm->EmitULEB128(DD.getAddressPool().getIndex(Base));
- } else {
- Asm->OutStreamer->EmitIntValue(-1, Size);
- Asm->OutStreamer->AddComment(" base address");
- Asm->OutStreamer->EmitSymbolValue(Base, Size);
- }
- } else if (BaseIsSet && DwarfVersion < 5) {
- BaseIsSet = false;
- assert(!Base);
- Asm->OutStreamer->EmitIntValue(-1, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
- }
-
- for (const auto *RS : P.second) {
- const MCSymbol *Begin = RS->getStart();
- const MCSymbol *End = RS->getEnd();
- assert(Begin && "Range without a begin symbol?");
- assert(End && "Range without an end symbol?");
- if (Base) {
- if (DwarfVersion >= 5) {
- // Emit DW_RLE_offset_pair when we have a base.
- Asm->OutStreamer->AddComment("DW_RLE_offset_pair");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_offset_pair, 1);
- Asm->OutStreamer->AddComment(" starting offset");
- Asm->EmitLabelDifferenceAsULEB128(Begin, Base);
- Asm->OutStreamer->AddComment(" ending offset");
- Asm->EmitLabelDifferenceAsULEB128(End, Base);
- } else {
- Asm->EmitLabelDifference(Begin, Base, Size);
- Asm->EmitLabelDifference(End, Base, Size);
- }
- } else if (DwarfVersion >= 5) {
- Asm->OutStreamer->AddComment("DW_RLE_startx_length");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_startx_length, 1);
- Asm->OutStreamer->AddComment(" start index");
- Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin));
- Asm->OutStreamer->AddComment(" length");
- Asm->EmitLabelDifferenceAsULEB128(End, Begin);
- } else {
- Asm->OutStreamer->EmitSymbolValue(Begin, Size);
- Asm->OutStreamer->EmitSymbolValue(End, Size);
- }
- }
- }
- if (DwarfVersion >= 5) {
- Asm->OutStreamer->AddComment("DW_RLE_end_of_list");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_end_of_list, 1);
- } else {
- // Terminate the list with two 0 values.
- Asm->OutStreamer->EmitIntValue(0, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
- }
+ emitRangeList(DD, Asm, List.getSym(), List.getRanges(), List.getCU(),
+ dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair,
+ dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list,
+ llvm::dwarf::RangeListEncodingString,
+ List.getCU().getCUNode()->getRangesBaseAddress() ||
+ DD.getDwarfVersion() >= 5,
+ [](auto) {});
}
static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
@@ -2637,7 +2791,7 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
- auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
+ auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
@@ -2737,7 +2891,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
- auto OwnedUnit = llvm::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
+ auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
getDwoLineTable(CU));
DwarfTypeUnit &NewTU = *OwnedUnit;
DIE &UnitDie = NewTU.getUnitDie();
@@ -2879,10 +3033,6 @@ uint16_t DwarfDebug::getDwarfVersion() const {
return Asm->OutStreamer->getContext().getDwarfVersion();
}
-void DwarfDebug::addSectionLabel(const MCSymbol *Sym) {
- SectionLabels.insert(std::make_pair(&Sym->getSection(), Sym));
-}
-
const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
return SectionLabels.find(S)->second;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 3ac474e2bdda..c8c511f67c2a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -153,7 +153,7 @@ public:
assert(!ValueLoc && "Already initialized?");
assert(!Value.getExpression()->isFragment() && "Fragments not supported.");
- ValueLoc = llvm::make_unique<DbgValueLoc>(Value);
+ ValueLoc = std::make_unique<DbgValueLoc>(Value);
if (auto *E = ValueLoc->getExpression())
if (E->getNumElements())
FrameIndexExprs.push_back({0, E});
@@ -216,7 +216,6 @@ public:
return !FrameIndexExprs.empty();
}
- bool isBlockByrefVariable() const;
const DIType *getType() const;
static bool classof(const DbgEntity *N) {
@@ -254,6 +253,25 @@ public:
}
};
+/// Used for tracking debug info about call site parameters.
+class DbgCallSiteParam {
+private:
+ unsigned Register; ///< Parameter register at the callee entry point.
+ DbgValueLoc Value; ///< Corresponding location for the parameter value at
+ ///< the call site.
+public:
+ DbgCallSiteParam(unsigned Reg, DbgValueLoc Val)
+ : Register(Reg), Value(Val) {
+ assert(Reg && "Parameter register cannot be undef");
+ }
+
+ unsigned getRegister() const { return Register; }
+ DbgValueLoc getValue() const { return Value; }
+};
+
+/// Collection used for storing debug call site parameters.
+using ParamSet = SmallVector<DbgCallSiteParam, 4>;
+
/// Helper used to pair up a symbol and its DWARF compile unit.
struct SymbolCU {
SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 2858afaa1cf1..1c5a244d7c5d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/ErrorHandling.h"
@@ -97,7 +98,7 @@ void DwarfExpression::addAnd(unsigned Mask) {
bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
unsigned MachineReg, unsigned MaxSize) {
- if (!TRI.isPhysicalRegister(MachineReg)) {
+ if (!llvm::Register::isPhysicalRegister(MachineReg)) {
if (isFrameRegister(TRI, MachineReg)) {
DwarfRegs.push_back({-1, 0, nullptr});
return true;
@@ -241,15 +242,22 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return false;
}
- // Handle simple register locations.
- if (!isMemoryLocation() && !HasComplexExpression) {
+ // Handle simple register locations. If we are supposed to emit
+ // a call site parameter expression and if that expression is just a register
+ // location, emit it with addBReg and offset 0, because we should emit a DWARF
+ // expression representing a value, rather than a location.
+ if (!isMemoryLocation() && !HasComplexExpression &&
+ (!isParameterValue() || isEntryValue())) {
for (auto &Reg : DwarfRegs) {
if (Reg.DwarfRegNo >= 0)
addReg(Reg.DwarfRegNo, Reg.Comment);
addOpPiece(Reg.Size);
}
- if (isEntryValue() && DwarfVersion >= 4)
+ if (isEntryValue())
+ finalizeEntryValue();
+
+ if (isEntryValue() && !isParameterValue() && DwarfVersion >= 4)
emitOp(dwarf::DW_OP_stack_value);
DwarfRegs.clear();
@@ -275,19 +283,27 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// Pattern-match combinations for which more efficient representations exist.
// [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset].
if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) {
- SignedOffset = Op->getArg(0);
- ExprCursor.take();
+ uint64_t Offset = Op->getArg(0);
+ uint64_t IntMax = static_cast<uint64_t>(std::numeric_limits<int>::max());
+ if (Offset <= IntMax) {
+ SignedOffset = Offset;
+ ExprCursor.take();
+ }
}
// [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset]
// [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset]
// If Reg is a subregister we need to mask it out before subtracting.
if (Op && Op->getOp() == dwarf::DW_OP_constu) {
+ uint64_t Offset = Op->getArg(0);
+ uint64_t IntMax = static_cast<uint64_t>(std::numeric_limits<int>::max());
auto N = ExprCursor.peekNext();
- if (N && (N->getOp() == dwarf::DW_OP_plus ||
- (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) {
- int Offset = Op->getArg(0);
- SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset;
+ if (N && N->getOp() == dwarf::DW_OP_plus && Offset <= IntMax) {
+ SignedOffset = Offset;
+ ExprCursor.consume(2);
+ } else if (N && N->getOp() == dwarf::DW_OP_minus &&
+ !SubRegisterSizeInBits && Offset <= IntMax + 1) {
+ SignedOffset = -static_cast<int64_t>(Offset);
ExprCursor.consume(2);
}
}
@@ -300,17 +316,34 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return true;
}
-void DwarfExpression::addEntryValueExpression(DIExpressionCursor &ExprCursor) {
+void DwarfExpression::beginEntryValueExpression(
+ DIExpressionCursor &ExprCursor) {
auto Op = ExprCursor.take();
- assert(Op && Op->getOp() == dwarf::DW_OP_entry_value);
+ (void)Op;
+ assert(Op && Op->getOp() == dwarf::DW_OP_LLVM_entry_value);
assert(!isMemoryLocation() &&
"We don't support entry values of memory locations yet");
+ assert(!IsEmittingEntryValue && "Already emitting entry value?");
+ assert(Op->getArg(0) == 1 &&
+ "Can currently only emit entry values covering a single operation");
- if (DwarfVersion >= 5)
- emitOp(dwarf::DW_OP_entry_value);
- else
- emitOp(dwarf::DW_OP_GNU_entry_value);
- emitUnsigned(Op->getArg(0));
+ emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value));
+ IsEmittingEntryValue = true;
+ enableTemporaryBuffer();
+}
+
+void DwarfExpression::finalizeEntryValue() {
+ assert(IsEmittingEntryValue && "Entry value not open?");
+ disableTemporaryBuffer();
+
+ // Emit the entry value's size operand.
+ unsigned Size = getTemporaryBufferSize();
+ emitUnsigned(Size);
+
+ // Emit the entry value's DWARF block operand.
+ commitTemporaryBuffer();
+
+ IsEmittingEntryValue = false;
}
/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?".
@@ -340,7 +373,17 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
while (ExprCursor) {
auto Op = ExprCursor.take();
- switch (Op->getOp()) {
+ uint64_t OpNum = Op->getOp();
+
+ if (OpNum >= dwarf::DW_OP_reg0 && OpNum <= dwarf::DW_OP_reg31) {
+ emitOp(OpNum);
+ continue;
+ } else if (OpNum >= dwarf::DW_OP_breg0 && OpNum <= dwarf::DW_OP_breg31) {
+ addBReg(OpNum - dwarf::DW_OP_breg0, Op->getArg(0));
+ continue;
+ }
+
+ switch (OpNum) {
case dwarf::DW_OP_LLVM_fragment: {
unsigned SizeInBits = Op->getArg(1);
unsigned FragmentOffset = Op->getArg(0);
@@ -389,10 +432,13 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
case dwarf::DW_OP_lit0:
case dwarf::DW_OP_not:
case dwarf::DW_OP_dup:
- emitOp(Op->getOp());
+ emitOp(OpNum);
break;
case dwarf::DW_OP_deref:
assert(!isRegisterLocation());
+ // For more detailed explanation see llvm.org/PR43343.
+ assert(!isParameterValue() && "Parameter entry values should not be "
+ "dereferenced due to safety reasons.");
if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor))
// Turning this into a memory location description makes the deref
// implicit.
@@ -458,12 +504,21 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
case dwarf::DW_OP_LLVM_tag_offset:
TagOffset = Op->getArg(0);
break;
+ case dwarf::DW_OP_regx:
+ emitOp(dwarf::DW_OP_regx);
+ emitUnsigned(Op->getArg(0));
+ break;
+ case dwarf::DW_OP_bregx:
+ emitOp(dwarf::DW_OP_bregx);
+ emitUnsigned(Op->getArg(0));
+ emitSigned(Op->getArg(1));
+ break;
default:
llvm_unreachable("unhandled opcode found in expression");
}
}
- if (isImplicitLocation())
+ if (isImplicitLocation() && !isParameterValue())
// Turn this into an implicit location description.
addStackValue();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index ec2ef6e575f7..1ad46669f9b2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
+#include "ByteStreamer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
@@ -26,7 +27,6 @@ namespace llvm {
class AsmPrinter;
class APInt;
-class ByteStreamer;
class DwarfCompileUnit;
class DIELoc;
class TargetRegisterInfo;
@@ -95,6 +95,13 @@ public:
/// Base class containing the logic for constructing DWARF expressions
/// independently of whether they are emitted into a DIE or into a .debug_loc
/// entry.
+///
+/// Some DWARF operations, e.g. DW_OP_entry_value, need to calculate the size
+/// of a succeeding DWARF block before the latter is emitted to the output.
+/// To handle such cases, data can conditionally be emitted to a temporary
+/// buffer, which can later on be committed to the main output. The size of the
+/// temporary buffer is queryable, allowing for the size of the data to be
+/// emitted before the data is committed.
class DwarfExpression {
protected:
/// Holds information about all subregisters comprising a register location.
@@ -104,6 +111,9 @@ protected:
const char *Comment;
};
+ /// Whether we are currently emitting an entry value operation.
+ bool IsEmittingEntryValue = false;
+
DwarfCompileUnit &CU;
/// The register location, if any.
@@ -120,7 +130,7 @@ protected:
enum { Unknown = 0, Register, Memory, Implicit };
/// The flags of location description being produced.
- enum { EntryValue = 1 };
+ enum { EntryValue = 1, CallSiteParamValue };
unsigned LocationKind : 3;
unsigned LocationFlags : 2;
@@ -147,6 +157,10 @@ public:
return LocationFlags & EntryValue;
}
+ bool isParameterValue() {
+ return LocationFlags & CallSiteParamValue;
+ }
+
Optional<uint8_t> TagOffset;
protected:
@@ -174,6 +188,22 @@ protected:
virtual void emitBaseTypeRef(uint64_t Idx) = 0;
+ /// Start emitting data to the temporary buffer. The data stored in the
+ /// temporary buffer can be committed to the main output using
+ /// commitTemporaryBuffer().
+ virtual void enableTemporaryBuffer() = 0;
+
+ /// Disable emission to the temporary buffer. This does not commit data
+ /// in the temporary buffer to the main output.
+ virtual void disableTemporaryBuffer() = 0;
+
+ /// Return the emitted size, in number of bytes, for the data stored in the
+ /// temporary buffer.
+ virtual unsigned getTemporaryBufferSize() = 0;
+
+ /// Commit the data stored in the temporary buffer to the main output.
+ virtual void commitTemporaryBuffer() = 0;
+
/// Emit a normalized unsigned constant.
void emitConstu(uint64_t Value);
@@ -233,6 +263,10 @@ protected:
/// expression. See PR21176 for more details.
void addStackValue();
+ /// Finalize an entry value by emitting its size operand, and committing the
+ /// DWARF block which has been emitted to the temporary buffer.
+ void finalizeEntryValue();
+
~DwarfExpression() = default;
public:
@@ -264,6 +298,11 @@ public:
LocationFlags |= EntryValue;
}
+ /// Lock this down to become a call site parameter location.
+ void setCallSiteParamValueFlag() {
+ LocationFlags |= CallSiteParamValue;
+ }
+
/// Emit a machine register location. As an optimization this may also consume
/// the prefix of a DwarfExpression if a more efficient representation for
/// combining the register location and the first operation exists.
@@ -278,8 +317,11 @@ public:
DIExpressionCursor &Expr, unsigned MachineReg,
unsigned FragmentOffsetInBits = 0);
- /// Emit entry value dwarf operation.
- void addEntryValueExpression(DIExpressionCursor &ExprCursor);
+ /// Begin emission of an entry value dwarf operation. The entry value's
+ /// first operand is the size of the DWARF block (its second operand),
+ /// which needs to be calculated at time of emission, so we don't emit
+ /// any operands here.
+ void beginEntryValueExpression(DIExpressionCursor &ExprCursor);
/// Emit all remaining operations in the DIExpressionCursor.
///
@@ -299,31 +341,62 @@ public:
/// DwarfExpression implementation for .debug_loc entries.
class DebugLocDwarfExpression final : public DwarfExpression {
- ByteStreamer &BS;
+
+ struct TempBuffer {
+ SmallString<32> Bytes;
+ std::vector<std::string> Comments;
+ BufferByteStreamer BS;
+
+ TempBuffer(bool GenerateComments) : BS(Bytes, Comments, GenerateComments) {}
+ };
+
+ std::unique_ptr<TempBuffer> TmpBuf;
+ BufferByteStreamer &OutBS;
+ bool IsBuffering = false;
+
+ /// Return the byte streamer that currently is being emitted to.
+ ByteStreamer &getActiveStreamer() { return IsBuffering ? TmpBuf->BS : OutBS; }
void emitOp(uint8_t Op, const char *Comment = nullptr) override;
void emitSigned(int64_t Value) override;
void emitUnsigned(uint64_t Value) override;
void emitData1(uint8_t Value) override;
void emitBaseTypeRef(uint64_t Idx) override;
+
+ void enableTemporaryBuffer() override;
+ void disableTemporaryBuffer() override;
+ unsigned getTemporaryBufferSize() override;
+ void commitTemporaryBuffer() override;
+
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
-
public:
- DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS, DwarfCompileUnit &CU)
- : DwarfExpression(DwarfVersion, CU), BS(BS) {}
+ DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS,
+ DwarfCompileUnit &CU)
+ : DwarfExpression(DwarfVersion, CU), OutBS(BS) {}
};
/// DwarfExpression implementation for singular DW_AT_location.
class DIEDwarfExpression final : public DwarfExpression {
-const AsmPrinter &AP;
- DIELoc &DIE;
+ const AsmPrinter &AP;
+ DIELoc &OutDIE;
+ DIELoc TmpDIE;
+ bool IsBuffering = false;
+
+ /// Return the DIE that currently is being emitted to.
+ DIELoc &getActiveDIE() { return IsBuffering ? TmpDIE : OutDIE; }
void emitOp(uint8_t Op, const char *Comment = nullptr) override;
void emitSigned(int64_t Value) override;
void emitUnsigned(uint64_t Value) override;
void emitData1(uint8_t Value) override;
void emitBaseTypeRef(uint64_t Idx) override;
+
+ void enableTemporaryBuffer() override;
+ void disableTemporaryBuffer() override;
+ unsigned getTemporaryBufferSize() override;
+ void commitTemporaryBuffer() override;
+
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
public:
@@ -331,7 +404,7 @@ public:
DIELoc *finalize() {
DwarfExpression::finalize();
- return &DIE;
+ return &OutDIE;
}
};
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 244678ce9dc1..35fa51fb24c4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -32,15 +32,9 @@ class LexicalScope;
class MCSection;
// Data structure to hold a range for range lists.
-class RangeSpan {
-public:
- RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {}
- const MCSymbol *getStart() const { return Start; }
- const MCSymbol *getEnd() const { return End; }
- void setEnd(const MCSymbol *E) { End = E; }
-
-private:
- const MCSymbol *Start, *End;
+struct RangeSpan {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
};
class RangeSpanList {
@@ -86,10 +80,6 @@ class DwarfFile {
/// The table is shared by all units.
MCSymbol *RnglistsTableBaseSym = nullptr;
- /// DWARF v5: The symbol that designates the base of the locations list table.
- /// The table is shared by all units.
- MCSymbol *LoclistsTableBaseSym = nullptr;
-
/// The variables of a lexical scope.
struct ScopeVars {
/// We need to sort Args by ArgNo and check for duplicates. This could also
@@ -167,9 +157,6 @@ public:
MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; }
void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; }
- MCSymbol *getLoclistsTableBaseSym() const { return LoclistsTableBaseSym; }
- void setLoclistsTableBaseSym(MCSymbol *Sym) { LoclistsTableBaseSym = Sym; }
-
/// \returns false if the variable was merged with a previous one.
bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 991ab94b50ab..37c68c085792 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -47,31 +47,42 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP,
- DwarfCompileUnit &CU,
- DIELoc &DIE)
- : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP),
- DIE(DIE) {}
+ DwarfCompileUnit &CU, DIELoc &DIE)
+ : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), OutDIE(DIE) {}
void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
- CU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
+ CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Op);
}
void DIEDwarfExpression::emitSigned(int64_t Value) {
- CU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
+ CU.addSInt(getActiveDIE(), dwarf::DW_FORM_sdata, Value);
}
void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
- CU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
+ CU.addUInt(getActiveDIE(), dwarf::DW_FORM_udata, Value);
}
void DIEDwarfExpression::emitData1(uint8_t Value) {
- CU.addUInt(DIE, dwarf::DW_FORM_data1, Value);
+ CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Value);
}
void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
- CU.addBaseTypeRef(DIE, Idx);
+ CU.addBaseTypeRef(getActiveDIE(), Idx);
}
+void DIEDwarfExpression::enableTemporaryBuffer() {
+ assert(!IsBuffering && "Already buffering?");
+ IsBuffering = true;
+}
+
+void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }
+
+unsigned DIEDwarfExpression::getTemporaryBufferSize() {
+ return TmpDIE.ComputeSize(&AP);
+}
+
+void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); }
+
bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) {
return MachineReg == TRI.getFrameRegister(*AP.MF);
@@ -205,6 +216,10 @@ void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) {
MDNodeToDieMap.insert(std::make_pair(Desc, D));
}
+void DwarfUnit::insertDIE(DIE *D) {
+ MDNodeToDieMap.insert(std::make_pair(nullptr, D));
+}
+
void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
if (DD->getDwarfVersion() >= 4)
Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present,
@@ -718,7 +733,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
return "";
// FIXME: Decide whether to implement this for non-C++ languages.
- if (getLanguage() != dwarf::DW_LANG_C_plus_plus)
+ if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage()))
return "";
std::string CS;
@@ -942,6 +957,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (CTy->isAppleBlockExtension())
addFlag(Buffer, dwarf::DW_AT_APPLE_block);
+ if (CTy->getExportSymbols())
+ addFlag(Buffer, dwarf::DW_AT_export_symbols);
+
// This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
// inside C++ composite types to point to the base class with the vtable.
// Rust uses DW_AT_containing_type to link a vtable to the type
@@ -1696,15 +1714,6 @@ void DwarfUnit::addRnglistsBase() {
TLOF.getDwarfRnglistsSection()->getBeginSymbol());
}
-void DwarfUnit::addLoclistsBase() {
- assert(DD->getDwarfVersion() >= 5 &&
- "DW_AT_loclists_base requires DWARF version 5 or later");
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
- addSectionLabel(getUnitDie(), dwarf::DW_AT_loclists_base,
- DU->getLoclistsTableBaseSym(),
- TLOF.getDwarfLoclistsSection()->getBeginSymbol());
-}
-
void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
addFlag(D, dwarf::DW_AT_declaration);
StringRef Name = CTy->getName();
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 56c934a35ae8..46c52a1faf4b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -127,6 +127,8 @@ public:
/// the mappings are kept in DwarfDebug.
void insertDIE(const DINode *Desc, DIE *D);
+ void insertDIE(DIE *D);
+
/// Add a flag that is true to the DIE.
void addFlag(DIE &Die, dwarf::Attribute Attribute);
@@ -214,15 +216,6 @@ public:
/// Add thrown types.
void addThrownTypes(DIE &Die, DINodeArray ThrownTypes);
- // FIXME: Should be reformulated in terms of addComplexAddress.
- /// Start with the address based on the location provided, and generate the
- /// DWARF information necessary to find the actual Block variable (navigating
- /// the Block struct) based on the starting location. Add the DWARF
- /// information to the die. Obsolete, please use addComplexAddress instead.
- void addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
- dwarf::Attribute Attribute,
- const MachineLocation &Location);
-
/// Add a new type attribute to the specified entity.
///
/// This takes and attribute parameter because DW_AT_friend attributes are
@@ -279,9 +272,6 @@ public:
/// Add the DW_AT_rnglists_base attribute to the unit DIE.
void addRnglistsBase();
- /// Add the DW_AT_loclists_base attribute to the unit DIE.
- void addLoclistsBase();
-
virtual DwarfCompileUnit &getCU() = 0;
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 99e3687b36b8..31dfaaac836e 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -426,7 +426,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// EHABI). In this case LSDASection will be NULL.
if (LSDASection)
Asm->OutStreamer->SwitchSection(LSDASection);
- Asm->EmitAlignment(2);
+ Asm->EmitAlignment(Align(4));
// Emit the LSDA.
MCSymbol *GCCETSym =
@@ -602,11 +602,11 @@ MCSymbol *EHStreamer::emitExceptionTable() {
}
if (HaveTTData) {
- Asm->EmitAlignment(2);
+ Asm->EmitAlignment(Align(4));
emitTypeInfos(TTypeEncoding, TTBaseLabel);
}
- Asm->EmitAlignment(2);
+ Asm->EmitAlignment(Align(4));
return GCCETSym;
}
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 39392b79e960..3849644d1584 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -72,7 +72,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
**/
// Align to address width.
- AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
// Emit PointCount.
OS.AddComment("safe point count");
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 3145cc90dc73..b4eda5fa8c58 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -129,7 +129,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
report_fatal_error(" Too much descriptor for ocaml GC");
}
AP.emitInt16(NumDescriptors);
- AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
IE = Info.funcinfo_end();
@@ -180,7 +180,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AP.emitInt16(K->StackOffset);
}
- AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
}
}
}
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 155e91ce61a1..0398675577cd 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -982,8 +982,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
OS.EmitValueToAlignment(4);
OS.EmitLabel(LSDALabel);
- const Function *Per =
- dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ const auto *Per = cast<Function>(F.getPersonalityFn()->stripPointerCasts());
StringRef PerName = Per->getName();
int BaseState = -1;
if (PerName == "_except_handler4") {
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index dc7eaf6a5fe7..27b298dcf6af 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -382,7 +382,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
- NewLI->setAlignment(LI->getAlignment());
+ NewLI->setAlignment(MaybeAlign(LI->getAlignment()));
NewLI->setVolatile(LI->isVolatile());
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
@@ -469,7 +469,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
- NewSI->setAlignment(SI->getAlignment());
+ NewSI->setAlignment(MaybeAlign(SI->getAlignment()));
NewSI->setVolatile(SI->isVolatile());
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
@@ -1376,7 +1376,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
Builder.SetInsertPoint(BB);
LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
// Atomics require at least natural alignment.
- InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
+ InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8));
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
@@ -1711,7 +1711,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// 'expected' argument, if present.
if (CASExpected) {
AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
- AllocaCASExpected->setAlignment(AllocaAlignment);
+ AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment));
unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
AllocaCASExpected_i8 =
@@ -1730,7 +1730,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
Args.push_back(IntValue);
} else {
AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
- AllocaValue->setAlignment(AllocaAlignment);
+ AllocaValue->setAlignment(MaybeAlign(AllocaAlignment));
AllocaValue_i8 =
Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
@@ -1742,7 +1742,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// 'ret' argument.
if (!CASExpected && HasResult && !UseSizedLibcall) {
AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
- AllocaResult->setAlignment(AllocaAlignment);
+ AllocaResult->setAlignment(MaybeAlign(AllocaAlignment));
unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
AllocaResult_i8 =
Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index fb54b5d6c8d8..455916eeb82f 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -129,9 +129,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
getAnalysis<MachineBlockFrequencyInfo>());
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
getAnalysis<MachineBranchProbabilityInfo>());
- return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
- MF.getSubtarget().getRegisterInfo(),
- getAnalysisIfAvailable<MachineModuleInfo>());
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ return Folder.OptimizeFunction(
+ MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(),
+ MMIWP ? &MMIWP->getMMI() : nullptr);
}
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
@@ -161,6 +162,11 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
// Avoid matching if this pointer gets reused.
TriedMerging.erase(MBB);
+ // Update call site info.
+ std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) {
+ if (MI.isCall(MachineInstr::IgnoreBundle))
+ MF->eraseCallSiteInfo(&MI);
+ });
// Remove the block.
MF->erase(MBB);
EHScopeMembership.erase(MBB);
@@ -1306,6 +1312,8 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
/// result in infinite loops.
static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2) {
+ assert(MBB1 && MBB2 && "Unknown MachineBasicBlock");
+
// Right now, we use a simple heuristic. If MBB2 ends with a call, and
// MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
// optimize branches that branch to either a return block or an assert block
@@ -1843,7 +1851,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
template <class Container>
static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
Container &Set) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
Set.insert(*AI);
} else {
@@ -1871,7 +1879,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
for (const MachineOperand &MO : Loc->operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isUse()) {
@@ -1909,7 +1917,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
return Loc;
if (!MO.isReg() || MO.isUse())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (Uses.count(Reg)) {
@@ -1937,14 +1945,14 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
for (const MachineOperand &MO : PI->operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isUse()) {
addRegAndItsAliases(Reg, TRI, Uses);
} else {
if (Uses.erase(Reg)) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
Uses.erase(*SubRegs); // Use sub-registers to be conservative
}
@@ -2010,7 +2018,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
}
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isDef()) {
@@ -2060,13 +2068,13 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
for (const MachineOperand &MO : TIB->operands()) {
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (!AllDefsSet.count(Reg)) {
continue;
}
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
ActiveDefsSet.erase(*AI);
} else {
@@ -2078,8 +2086,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
for (const MachineOperand &MO : TIB->operands()) {
if (!MO.isReg() || !MO.isDef() || MO.isDead())
continue;
- unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Reg || Register::isVirtualRegister(Reg))
continue;
addRegAndItsAliases(Reg, TRI, ActiveDefsSet);
addRegAndItsAliases(Reg, TRI, AllDefsSet);
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index 3ad6266d4f35..6efdc9efa968 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -64,19 +64,18 @@ class BranchRelaxation : public MachineFunctionPass {
/// Compute the offset immediately following this block. \p MBB is the next
/// block.
unsigned postOffset(const MachineBasicBlock &MBB) const {
- unsigned PO = Offset + Size;
- unsigned Align = MBB.getAlignment();
- if (Align == 0)
+ const unsigned PO = Offset + Size;
+ const Align Alignment = MBB.getAlignment();
+ if (Alignment == 1)
return PO;
- unsigned AlignAmt = 1 << Align;
- unsigned ParentAlign = MBB.getParent()->getAlignment();
- if (Align <= ParentAlign)
- return PO + OffsetToAlignment(PO, AlignAmt);
+ const Align ParentAlign = MBB.getParent()->getAlignment();
+ if (Alignment <= ParentAlign)
+ return PO + offsetToAlignment(PO, Alignment);
// The alignment of this MBB is larger than the function's alignment, so we
// can't tell whether or not it will insert nops. Assume that it will.
- return PO + AlignAmt + OffsetToAlignment(PO, AlignAmt);
+ return PO + Alignment.value() + offsetToAlignment(PO, Alignment);
}
};
@@ -128,9 +127,8 @@ void BranchRelaxation::verify() {
#ifndef NDEBUG
unsigned PrevNum = MF->begin()->getNumber();
for (MachineBasicBlock &MBB : *MF) {
- unsigned Align = MBB.getAlignment();
- unsigned Num = MBB.getNumber();
- assert(BlockInfo[Num].Offset % (1u << Align) == 0);
+ const unsigned Num = MBB.getNumber();
+ assert(isAligned(MBB.getAlignment(), BlockInfo[Num].Offset));
assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset);
assert(BlockInfo[Num].Size == computeBlockSize(MBB));
PrevNum = Num;
@@ -143,7 +141,7 @@ void BranchRelaxation::verify() {
LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
for (auto &MBB : *MF) {
const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
- dbgs() << format("%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
+ dbgs() << format("%%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
<< format("size=%#x\n", BBI.Size);
}
}
diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp
index cc4b2caa9bed..709164e5f178 100644
--- a/lib/CodeGen/BreakFalseDeps.cpp
+++ b/lib/CodeGen/BreakFalseDeps.cpp
@@ -9,12 +9,11 @@
/// \file Break False Dependency pass.
///
/// Some instructions have false dependencies which cause unnecessary stalls.
-/// For exmaple, instructions that only write part of a register, and implicitly
-/// need to read the other parts of the register. This may cause unwanted
+/// For example, instructions may write part of a register and implicitly
+/// need to read the other parts of the register. This may cause unwanted
/// stalls preventing otherwise unrelated instructions from executing in
/// parallel in an out-of-order CPU.
-/// This pass is aimed at identifying and avoiding these depepndencies when
-/// possible.
+/// This pass is aimed at identifying and avoiding these dependencies.
//
//===----------------------------------------------------------------------===//
@@ -24,6 +23,7 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -109,7 +109,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
MachineOperand &MO = MI->getOperand(OpIdx);
assert(MO.isUndef() && "Expected undef machine operand");
- unsigned OriginalReg = MO.getReg();
+ Register OriginalReg = MO.getReg();
// Update only undef operands that have reg units that are mapped to one root.
for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) {
@@ -162,7 +162,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
unsigned Pref) {
- unsigned reg = MI->getOperand(OpIdx).getReg();
+ Register reg = MI->getOperand(OpIdx).getReg();
unsigned Clearance = RDA->getClearance(MI, reg);
LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
@@ -178,6 +178,7 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) {
assert(!MI->isDebugInstr() && "Won't process debug values");
// Break dependence on undef uses. Do this before updating LiveRegs below.
+ // This can remove a false dependence with no additional instructions.
unsigned OpNum;
unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
if (Pref) {
@@ -189,6 +190,11 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) {
UndefReads.push_back(std::make_pair(MI, OpNum));
}
+ // The code below allows the target to create a new instruction to break the
+ // dependence. That opposes the goal of minimizing size, so bail out now.
+ if (MF->getFunction().hasMinSize())
+ return;
+
const MCInstrDesc &MCID = MI->getDesc();
for (unsigned i = 0,
e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
@@ -209,6 +215,11 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {
if (UndefReads.empty())
return;
+ // The code below allows the target to create a new instruction to break the
+ // dependence. That opposes the goal of minimizing size, so bail out now.
+ if (MF->getFunction().hasMinSize())
+ return;
+
// Collect this block's live out register units.
LiveRegSet.init(*TRI);
// We do not need to care about pristine registers as they are just preserved
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 7164fdfb7886..bf97aaee3665 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -40,7 +40,7 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
MachineRegisterInfo &MRI = MF.getRegInfo();
VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm);
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (MRI.reg_nodbg_empty(Reg))
continue;
VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg));
@@ -48,10 +48,11 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
}
// Return the preferred allocation register for reg, given a COPY instruction.
-static unsigned copyHint(const MachineInstr *mi, unsigned reg,
+static Register copyHint(const MachineInstr *mi, unsigned reg,
const TargetRegisterInfo &tri,
const MachineRegisterInfo &mri) {
- unsigned sub, hreg, hsub;
+ unsigned sub, hsub;
+ Register hreg;
if (mi->getOperand(0).getReg() == reg) {
sub = mi->getOperand(0).getSubReg();
hreg = mi->getOperand(1).getReg();
@@ -65,11 +66,11 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
if (!hreg)
return 0;
- if (TargetRegisterInfo::isVirtualRegister(hreg))
- return sub == hsub ? hreg : 0;
+ if (Register::isVirtualRegister(hreg))
+ return sub == hsub ? hreg : Register();
const TargetRegisterClass *rc = mri.getRegClass(reg);
- unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
+ Register CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
if (rc->contains(CopiedPReg))
return CopiedPReg;
@@ -112,7 +113,7 @@ static bool isRematerializable(const LiveInterval &LI,
// If the original (pre-splitting) registers match this
// copy came from a split.
- if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ if (!Register::isVirtualRegister(Reg) ||
VRM->getOriginal(Reg) != Original)
return false;
@@ -243,7 +244,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
// Get allocation hints from copies.
if (!mi->isCopy())
continue;
- unsigned hint = copyHint(mi, li.reg, tri, mri);
+ Register hint = copyHint(mi, li.reg, tri, mri);
if (!hint)
continue;
// Force hweight onto the stack so that x86 doesn't add hidden precision,
@@ -251,8 +252,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
//
// FIXME: we probably shouldn't use floats at all.
volatile float hweight = Hint[hint] += weight;
- if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint))
- CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint)));
+ if (Register::isVirtualRegister(hint) || mri.isAllocatable(hint))
+ CopyHints.insert(
+ CopyHint(hint, hweight, Register::isPhysicalRegister(hint)));
}
Hint.clear();
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 497fcb147849..a397039180a4 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -32,7 +32,6 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C) {
// No stack is used.
StackOffset = 0;
- MaxStackArgAlign = 1;
clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -41,20 +40,21 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
/// Allocate space on the stack large enough to pass an argument by value.
/// The size and alignment information of the argument is encoded in
/// its parameter attribute.
-void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- int MinSize, int MinAlign,
- ISD::ArgFlagsTy ArgFlags) {
- unsigned Align = ArgFlags.getByValAlign();
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, int MinSize,
+ int MinAlignment, ISD::ArgFlagsTy ArgFlags) {
+ Align MinAlign(MinAlignment);
+ Align Alignment(ArgFlags.getByValAlign());
unsigned Size = ArgFlags.getByValSize();
if (MinSize > (int)Size)
Size = MinSize;
- if (MinAlign > (int)Align)
- Align = MinAlign;
- ensureMaxAlignment(Align);
- MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align);
+ if (MinAlign > Alignment)
+ Alignment = MinAlign;
+ ensureMaxAlignment(Alignment);
+ MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size,
+ Alignment.value());
Size = unsigned(alignTo(Size, MinAlign));
- unsigned Offset = AllocateStack(Size, Align);
+ unsigned Offset = AllocateStack(Size, Alignment.value());
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
@@ -90,13 +90,8 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Ins[i].VT;
ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
- if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
-#ifndef NDEBUG
- dbgs() << "Formal argument #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << '\n';
-#endif
- llvm_unreachable(nullptr);
- }
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this))
+ report_fatal_error("unable to allocate function argument #" + Twine(i));
}
}
@@ -122,13 +117,8 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
- if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
-#ifndef NDEBUG
- dbgs() << "Return operand #" << i << " has unhandled type "
- << EVT(VT).getEVTString() << '\n';
-#endif
- llvm_unreachable(nullptr);
- }
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ report_fatal_error("unable to allocate function return #" + Twine(i));
}
}
@@ -209,7 +199,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
MVT VT, CCAssignFn Fn) {
unsigned SavedStackOffset = StackOffset;
- unsigned SavedMaxStackArgAlign = MaxStackArgAlign;
+ Align SavedMaxStackArgAlign = MaxStackArgAlign;
unsigned NumLocs = Locs.size();
// Set the 'inreg' flag if it is used for this calling convention.
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index c37ed57781d4..ad9525f927e8 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -28,6 +28,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeDetectDeadLanesPass(Registry);
initializeDwarfEHPreparePass(Registry);
initializeEarlyIfConverterPass(Registry);
+ initializeEarlyIfPredicatorPass(Registry);
initializeEarlyMachineLICMPass(Registry);
initializeEarlyTailDuplicatePass(Registry);
initializeExpandMemCmpPassPass(Registry);
@@ -53,6 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLocalStackSlotPassPass(Registry);
initializeLowerIntrinsicsPass(Registry);
initializeMIRCanonicalizerPass(Registry);
+ initializeMIRNamerPass(Registry);
initializeMachineBlockFrequencyInfoPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
@@ -63,10 +65,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineFunctionPrinterPassPass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
- initializeMachineModuleInfoPass(Registry);
+ initializeMachineModuleInfoWrapperPassPass(Registry);
initializeMachineOptimizationRemarkEmitterPassPass(Registry);
initializeMachineOutlinerPass(Registry);
initializeMachinePipelinerPass(Registry);
+ initializeModuloScheduleTestPass(Registry);
initializeMachinePostDominatorTreePass(Registry);
initializeMachineRegionInfoPassPass(Registry);
initializeMachineSchedulerPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 52b4bbea012b..fa4432ea23ec 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -344,7 +344,7 @@ class TypePromotionTransaction;
// Get the DominatorTree, building if necessary.
DominatorTree &getDT(Function &F) {
if (!DT)
- DT = llvm::make_unique<DominatorTree>(F);
+ DT = std::make_unique<DominatorTree>(F);
return *DT;
}
@@ -424,7 +424,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLI = SubtargetInfo->getTargetLowering();
TRI = SubtargetInfo->getRegisterInfo();
}
- TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
@@ -1524,7 +1524,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
const TargetLowering &TLI, const DataLayout &DL) {
BasicBlock *UserBB = User->getParent();
DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
- TruncInst *TruncI = dyn_cast<TruncInst>(User);
+ auto *TruncI = cast<TruncInst>(User);
bool MadeChange = false;
for (Value::user_iterator TruncUI = TruncI->user_begin(),
@@ -1682,10 +1682,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
TheUse = InsertedShift;
}
- // If we removed all uses, nuke the shift.
+ // If we removed all uses, or there are none, nuke the shift.
if (ShiftI->use_empty()) {
salvageDebugInfo(*ShiftI);
ShiftI->eraseFromParent();
+ MadeChange = true;
}
return MadeChange;
@@ -1811,7 +1812,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
AllocaInst *AI;
if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
- AI->setAlignment(PrefAlign);
+ AI->setAlignment(MaybeAlign(PrefAlign));
// Global variables can only be aligned if they are defined in this
// object (i.e. they are uniquely initialized in this object), and
// over-aligning global variables that have an explicit section is
@@ -1821,7 +1822,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
GV->getPointerAlignment(*DL) < PrefAlign &&
DL->getTypeAllocSize(GV->getValueType()) >=
MinSize + Offset2)
- GV->setAlignment(PrefAlign);
+ GV->setAlignment(MaybeAlign(PrefAlign));
}
// If this is a memcpy (or similar) then we may be able to improve the
// alignment
@@ -1867,24 +1868,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
});
return true;
}
- case Intrinsic::objectsize: {
- // Lower all uses of llvm.objectsize.*
- Value *RetVal =
- lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
-
- resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
- replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
- });
- return true;
- }
- case Intrinsic::is_constant: {
- // If is_constant hasn't folded away yet, lower it to false now.
- Constant *RetVal = ConstantInt::get(II->getType(), 0);
- resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
- replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
- });
- return true;
- }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
case Intrinsic::aarch64_stlxr:
case Intrinsic::aarch64_stxr: {
ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
@@ -2024,17 +2011,18 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
/// call.
const Function *F = BB->getParent();
- SmallVector<CallInst*, 4> TailCalls;
+ SmallVector<BasicBlock*, 4> TailCallBBs;
if (PN) {
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
// Look through bitcasts.
Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
CallInst *CI = dyn_cast<CallInst>(IncomingVal);
+ BasicBlock *PredBB = PN->getIncomingBlock(I);
// Make sure the phi value is indeed produced by the tail call.
- if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+ if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
TLI->mayBeEmittedAsTailCall(CI) &&
attributesPermitTailCall(F, CI, RetI, *TLI))
- TailCalls.push_back(CI);
+ TailCallBBs.push_back(PredBB);
}
} else {
SmallPtrSet<BasicBlock*, 4> VisitedBBs;
@@ -2052,24 +2040,20 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
CallInst *CI = dyn_cast<CallInst>(&*RI);
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
attributesPermitTailCall(F, CI, RetI, *TLI))
- TailCalls.push_back(CI);
+ TailCallBBs.push_back(*PI);
}
}
bool Changed = false;
- for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
- CallInst *CI = TailCalls[i];
- CallSite CS(CI);
-
+ for (auto const &TailCallBB : TailCallBBs) {
// Make sure the call instruction is followed by an unconditional branch to
// the return block.
- BasicBlock *CallBB = CI->getParent();
- BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+ BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
continue;
- // Duplicate the return into CallBB.
- (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);
+ // Duplicate the return into TailCallBB.
+ (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
ModifiedDT = Changed = true;
++NumRetsDup;
}
@@ -2683,26 +2667,26 @@ private:
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
Value *NewVal) {
- Actions.push_back(llvm::make_unique<TypePromotionTransaction::OperandSetter>(
+ Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
Inst, Idx, NewVal));
}
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
Value *NewVal) {
Actions.push_back(
- llvm::make_unique<TypePromotionTransaction::InstructionRemover>(
+ std::make_unique<TypePromotionTransaction::InstructionRemover>(
Inst, RemovedInsts, NewVal));
}
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
Value *New) {
Actions.push_back(
- llvm::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
+ std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
}
void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
Actions.push_back(
- llvm::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
+ std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
}
Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
@@ -2732,7 +2716,7 @@ Value *TypePromotionTransaction::createZExt(Instruction *Inst,
void TypePromotionTransaction::moveBefore(Instruction *Inst,
Instruction *Before) {
Actions.push_back(
- llvm::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
+ std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
Inst, Before));
}
@@ -3048,7 +3032,7 @@ public:
To = dyn_cast<PHINode>(OldReplacement);
OldReplacement = Get(From);
}
- assert(Get(To) == To && "Replacement PHI node is already replaced.");
+ assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
Put(From, To);
From->replaceAllUsesWith(To);
AllPhiNodes.erase(From);
@@ -3334,7 +3318,7 @@ private:
// So the values are different and does not match. So we need them to
// match. (But we register no more than one match per PHI node, so that
// we won't later try to replace them twice.)
- if (!MatchedPHIs.insert(FirstPhi).second)
+ if (MatchedPHIs.insert(FirstPhi).second)
Matcher.insert({ FirstPhi, SecondPhi });
// But me must check it.
WorkList.push_back({ FirstPhi, SecondPhi });
@@ -3412,11 +3396,10 @@ private:
Select->setFalseValue(ST.Get(Map[FalseValue]));
} else {
// Must be a Phi node then.
- PHINode *PHI = cast<PHINode>(V);
- auto *CurrentPhi = dyn_cast<PHINode>(Current);
+ auto *PHI = cast<PHINode>(V);
// Fill the Phi node with values from predecessors.
for (auto B : predecessors(PHI->getParent())) {
- Value *PV = CurrentPhi->getIncomingValueForBlock(B);
+ Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
assert(Map.find(PV) != Map.end() && "No predecessor Value!");
PHI->addIncoming(ST.Get(Map[PV]), B);
}
@@ -3785,13 +3768,11 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// poisoned value regular value
// It should be OK since undef covers valid value.
if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
- const Instruction *ExtInst =
- dyn_cast<const Instruction>(*Inst->user_begin());
+ const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
if (ExtInst->hasOneUse()) {
- const Instruction *AndInst =
- dyn_cast<const Instruction>(*ExtInst->user_begin());
+ const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
if (AndInst && AndInst->getOpcode() == Instruction::And) {
- const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
+ const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
if (Cst &&
Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
return true;
@@ -4793,8 +4774,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
<< " for " << *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
- } else if (AddrSinkUsingGEPs ||
- (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) {
+ } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
+ TM && SubtargetInfo->addrSinkUsingGEPs())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
@@ -5816,7 +5797,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
return false;
IRBuilder<> Builder(Load->getNextNode());
- auto *NewAnd = dyn_cast<Instruction>(
+ auto *NewAnd = cast<Instruction>(
Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
// Mark this instruction as "inserted by CGP", so that other
// optimizations don't touch it.
@@ -6193,35 +6174,49 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
// OpsToSink can contain multiple uses in a use chain (e.g.
// (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
- // uses must come first, which means they are sunk first, temporarily creating
- // invalid IR. This will be fixed once their dominated users are sunk and
- // updated.
+ // uses must come first, so we process the ops in reverse order so as to not
+ // create invalid IR.
BasicBlock *TargetBB = I->getParent();
bool Changed = false;
SmallVector<Use *, 4> ToReplace;
- for (Use *U : OpsToSink) {
+ for (Use *U : reverse(OpsToSink)) {
auto *UI = cast<Instruction>(U->get());
if (UI->getParent() == TargetBB || isa<PHINode>(UI))
continue;
ToReplace.push_back(U);
}
- SmallPtrSet<Instruction *, 4> MaybeDead;
+ SetVector<Instruction *> MaybeDead;
+ DenseMap<Instruction *, Instruction *> NewInstructions;
+ Instruction *InsertPoint = I;
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
Instruction *NI = UI->clone();
+ NewInstructions[UI] = NI;
MaybeDead.insert(UI);
LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
- NI->insertBefore(I);
+ NI->insertBefore(InsertPoint);
+ InsertPoint = NI;
InsertedInsts.insert(NI);
- U->set(NI);
+
+ // Update the use for the new instruction, making sure that we update the
+ // sunk instruction uses, if it is part of a chain that has already been
+ // sunk.
+ Instruction *OldI = cast<Instruction>(U->getUser());
+ if (NewInstructions.count(OldI))
+ NewInstructions[OldI]->setOperand(U->getOperandNo(), NI);
+ else
+ U->set(NI);
Changed = true;
}
// Remove instructions that are dead after sinking.
- for (auto *I : MaybeDead)
- if (!I->hasNUsesOrMore(1))
+ for (auto *I : MaybeDead) {
+ if (!I->hasNUsesOrMore(1)) {
+ LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
I->eraseFromParent();
+ }
+ }
return Changed;
}
@@ -7106,7 +7101,6 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
for (auto &I : reverse(BB)) {
if (makeBitReverse(I, *DL, *TLI)) {
MadeBitReverse = MadeChange = true;
- ModifiedDT = true;
break;
}
}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 4144c243a341..702e7e244bce 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -187,7 +187,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
const TargetRegisterClass *NewRC = nullptr;
@@ -272,7 +272,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
}
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
if (!MO.isDef()) continue;
@@ -303,7 +303,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
if (!MO.isUse()) continue;
@@ -457,6 +457,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
Max = SU;
}
+ assert(Max && "Failed to find bottom of the critical path");
#ifndef NDEBUG
{
@@ -612,7 +613,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
AntiDepReg = 0;
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index b99be5d7a87c..a169c3cb16b2 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,6 +23,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
@@ -71,39 +73,13 @@ static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
// --------------------------------------------------------------------
-DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
- const DFAStateInput (*SIT)[2],
- const unsigned *SET):
- InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) {
- // Make sure DFA types are large enough for the number of terms & resources.
- static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
- (8 * sizeof(DFAInput)),
- "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
- static_assert(
- (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
- "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
-}
-
-// Read the DFA transition table and update CachedTable.
-//
-// Format of the transition tables:
-// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
-// transitions
-// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
-// for the ith state
-//
-void DFAPacketizer::ReadTable(unsigned int state) {
- unsigned ThisState = DFAStateEntryTable[state];
- unsigned NextStateInTable = DFAStateEntryTable[state+1];
- // Early exit in case CachedTable has already contains this
- // state's transitions.
- if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0])))
- return;
-
- for (unsigned i = ThisState; i < NextStateInTable; i++)
- CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
- DFAStateInputTable[i][1];
-}
+// Make sure DFA types are large enough for the number of terms & resources.
+static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
+ (8 * sizeof(DFAInput)),
+ "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
+static_assert(
+ (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
+ "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
// Return the DFAInput for an instruction class.
DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
@@ -129,9 +105,7 @@ DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
DFAInput InsnInput = getInsnInput(InsnClass);
- UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
- ReadTable(CurrentState);
- return CachedTable.count(StateTrans) != 0;
+ return A.canAdd(InsnInput);
}
// Reserve the resources occupied by a MCInstrDesc and change the current
@@ -139,10 +113,7 @@ bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
DFAInput InsnInput = getInsnInput(InsnClass);
- UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
- ReadTable(CurrentState);
- assert(CachedTable.count(StateTrans) != 0);
- CurrentState = CachedTable[StateTrans];
+ A.add(InsnInput);
}
// Check if the resources occupied by a machine instruction are available
@@ -159,19 +130,33 @@ void DFAPacketizer::reserveResources(MachineInstr &MI) {
reserveResources(&MID);
}
+unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) {
+ ArrayRef<NfaPath> NfaPaths = A.getNfaPaths();
+ assert(!NfaPaths.empty() && "Invalid bundle!");
+ const NfaPath &RS = NfaPaths.front();
+
+ // RS stores the cumulative resources used up to and including the I'th
+ // instruction. The 0th instruction is the base case.
+ if (InstIdx == 0)
+ return RS[0];
+ // Return the difference between the cumulative resources used by InstIdx and
+ // its predecessor.
+ return RS[InstIdx] ^ RS[InstIdx - 1];
+}
+
namespace llvm {
// This class extends ScheduleDAGInstrs and overrides the schedule method
// to build the dependence graph.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
private:
- AliasAnalysis *AA;
+ AAResults *AA;
/// Ordered list of DAG postprocessing steps.
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- AliasAnalysis *AA);
+ AAResults *AA);
// Actual scheduling work.
void schedule() override;
@@ -189,7 +174,7 @@ protected:
DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
MachineLoopInfo &MLI,
- AliasAnalysis *AA)
+ AAResults *AA)
: ScheduleDAGInstrs(MF, &MLI), AA(AA) {
CanHandleTerminators = true;
}
@@ -207,9 +192,10 @@ void DefaultVLIWScheduler::schedule() {
}
VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
- MachineLoopInfo &mli, AliasAnalysis *aa)
+ MachineLoopInfo &mli, AAResults *aa)
: MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
+ ResourceTracker->setTrackResources(true);
VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
}
@@ -224,8 +210,11 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
LLVM_DEBUG({
if (!CurrentPacketMIs.empty()) {
dbgs() << "Finalizing packet:\n";
- for (MachineInstr *MI : CurrentPacketMIs)
- dbgs() << " * " << *MI;
+ unsigned Idx = 0;
+ for (MachineInstr *MI : CurrentPacketMIs) {
+ unsigned R = ResourceTracker->getUsedResources(Idx++);
+ dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
+ }
}
});
if (CurrentPacketMIs.size() > 1) {
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 049ce7063307..9a537c859a67 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -75,8 +75,8 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
// Don't delete live physreg defs, or any reserved register defs.
if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
return false;
@@ -140,8 +140,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
// Check the subreg set, not the alias set, because a def
// of a super-register may still be partially live after
// this def.
@@ -159,8 +159,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isUse()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
LivePhysRegs.set(*AI);
}
diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp
index fe78acf4d80a..6d5306c1dc0c 100644
--- a/lib/CodeGen/DetectDeadLanes.cpp
+++ b/lib/CodeGen/DetectDeadLanes.cpp
@@ -154,7 +154,7 @@ static bool isCrossCopy(const MachineRegisterInfo &MRI,
const TargetRegisterClass *DstRC,
const MachineOperand &MO) {
assert(lowersToCopies(MI));
- unsigned SrcReg = MO.getReg();
+ Register SrcReg = MO.getReg();
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
if (DstRC == SrcRC)
return false;
@@ -194,8 +194,8 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
LaneBitmask UsedLanes) {
if (!MO.readsReg())
return;
- unsigned MOReg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(MOReg))
+ Register MOReg = MO.getReg();
+ if (!Register::isVirtualRegister(MOReg))
return;
unsigned MOSubReg = MO.getSubReg();
@@ -203,7 +203,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes);
UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg);
- unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg);
+ unsigned MORegIdx = Register::virtReg2Index(MOReg);
VRegInfo &MORegInfo = VRegInfos[MORegIdx];
LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
// Any change at all?
@@ -219,7 +219,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI,
LaneBitmask UsedLanes) {
for (const MachineOperand &MO : MI.uses()) {
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO);
addUsedLanesOnOperand(MO, UsedOnMO);
@@ -230,8 +230,8 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI,
LaneBitmask UsedLanes,
const MachineOperand &MO) const {
unsigned OpNum = MI.getOperandNo(&MO);
- assert(lowersToCopies(MI) && DefinedByCopy[
- TargetRegisterInfo::virtReg2Index(MI.getOperand(0).getReg())]);
+ assert(lowersToCopies(MI) &&
+ DefinedByCopy[Register::virtReg2Index(MI.getOperand(0).getReg())]);
switch (MI.getOpcode()) {
case TargetOpcode::COPY:
@@ -250,7 +250,7 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI,
return MO2UsedLanes;
const MachineOperand &Def = MI.getOperand(0);
- unsigned DefReg = Def.getReg();
+ Register DefReg = Def.getReg();
const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
LaneBitmask MO1UsedLanes;
if (RC->CoveredBySubRegs)
@@ -285,10 +285,10 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
if (MI.getOpcode() == TargetOpcode::PATCHPOINT)
return;
const MachineOperand &Def = *MI.defs().begin();
- unsigned DefReg = Def.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ Register DefReg = Def.getReg();
+ if (!Register::isVirtualRegister(DefReg))
return;
- unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg);
+ unsigned DefRegIdx = Register::virtReg2Index(DefReg);
if (!DefinedByCopy.test(DefRegIdx))
return;
@@ -360,7 +360,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
if (lowersToCopies(DefMI)) {
// Start optimisatically with no used or defined lanes for copy
// instructions. The following dataflow analysis will add more bits.
- unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
+ unsigned RegIdx = Register::virtReg2Index(Reg);
DefinedByCopy.set(RegIdx);
PutInWorklist(RegIdx);
@@ -377,17 +377,17 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
for (const MachineOperand &MO : DefMI.uses()) {
if (!MO.isReg() || !MO.readsReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
LaneBitmask MODefinedLanes;
- if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ if (Register::isPhysicalRegister(MOReg)) {
MODefinedLanes = LaneBitmask::getAll();
} else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
MODefinedLanes = LaneBitmask::getAll();
} else {
- assert(TargetRegisterInfo::isVirtualRegister(MOReg));
+ assert(Register::isVirtualRegister(MOReg));
if (MRI->hasOneDef(MOReg)) {
const MachineOperand &MODef = *MRI->def_begin(MOReg);
const MachineInstr &MODefMI = *MODef.getParent();
@@ -428,10 +428,10 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
if (lowersToCopies(UseMI)) {
assert(UseMI.getDesc().getNumDefs() == 1);
const MachineOperand &Def = *UseMI.defs().begin();
- unsigned DefReg = Def.getReg();
+ Register DefReg = Def.getReg();
// The used lanes of COPY-like instruction operands are determined by the
// following dataflow analysis.
- if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
+ if (Register::isVirtualRegister(DefReg)) {
// But ignore copies across incompatible register classes.
bool CrossCopy = false;
if (lowersToCopies(UseMI)) {
@@ -470,10 +470,10 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
if (!lowersToCopies(MI))
return false;
const MachineOperand &Def = MI.getOperand(0);
- unsigned DefReg = Def.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ Register DefReg = Def.getReg();
+ if (!Register::isVirtualRegister(DefReg))
return false;
- unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg);
+ unsigned DefRegIdx = Register::virtReg2Index(DefReg);
if (!DefinedByCopy.test(DefRegIdx))
return false;
@@ -482,8 +482,8 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
if (UsedLanes.any())
return false;
- unsigned MOReg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(MOReg)) {
+ Register MOReg = MO.getReg();
+ if (Register::isVirtualRegister(MOReg)) {
const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
*CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO);
}
@@ -494,7 +494,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
// First pass: Populate defs/uses of vregs with initial values
unsigned NumVirtRegs = MRI->getNumVirtRegs();
for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+ unsigned Reg = Register::index2VirtReg(RegIdx);
// Determine used/defined lanes and add copy instructions to worklist.
VRegInfo &Info = VRegInfos[RegIdx];
@@ -508,7 +508,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
Worklist.pop_front();
WorklistMembers.reset(RegIdx);
VRegInfo &Info = VRegInfos[RegIdx];
- unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+ unsigned Reg = Register::index2VirtReg(RegIdx);
// Transfer UsedLanes to operands of DefMI (backwards dataflow).
MachineOperand &Def = *MRI->def_begin(Reg);
@@ -522,7 +522,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0;
RegIdx < NumVirtRegs;
++RegIdx) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+ unsigned Reg = Register::index2VirtReg(RegIdx);
const VRegInfo &Info = VRegInfos[RegIdx];
dbgs() << printReg(Reg, nullptr)
<< " Used: " << PrintLaneMask(Info.UsedLanes)
@@ -536,10 +536,10 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
- unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
+ unsigned RegIdx = Register::virtReg2Index(Reg);
const VRegInfo &RegInfo = VRegInfos[RegIdx];
if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) {
LLVM_DEBUG(dbgs()
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 0a83760befaa..e5694218b5c3 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
@@ -140,6 +141,18 @@ private:
/// speculated.
bool canSpeculateInstrs(MachineBasicBlock *MBB);
+ /// Return true if all non-terminator instructions in MBB can be safely
+ /// predicated.
+ bool canPredicateInstrs(MachineBasicBlock *MBB);
+
+ /// Scan through instruction dependencies and update InsertAfter array.
+ /// Return false if any dependency is incompatible with if conversion.
+ bool InstrDependenciesAllowIfConv(MachineInstr *I);
+
+ /// Predicate all instructions of the basic block with current condition
+ /// except for terminators. Reverse the condition if ReversePredicate is set.
+ void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate);
+
/// Find a valid insertion point in Head.
bool findInsertionPoint();
@@ -163,11 +176,14 @@ public:
/// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
/// initialize the internal state, and return true.
- bool canConvertIf(MachineBasicBlock *MBB);
+ /// If predicate is set try to predicate the block otherwise try to
+ /// speculatively execute it.
+ bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false);
/// convertIf - If-convert the last block passed to canConvertIf(), assuming
/// it is possible. Add any erased blocks to RemovedBlocks.
- void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks);
+ void convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,
+ bool Predicate = false);
};
} // end anonymous namespace
@@ -225,37 +241,112 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
}
// Check for any dependencies on Head instructions.
- for (const MachineOperand &MO : I->operands()) {
- if (MO.isRegMask()) {
- LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I);
- return false;
- }
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
+ if (!InstrDependenciesAllowIfConv(&(*I)))
+ return false;
+ }
+ return true;
+}
- // Remember clobbered regunits.
- if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
- ClobberedRegUnits.set(*Units);
+/// Check that there is no dependencies preventing if conversion.
+///
+/// If instruction uses any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) {
+ for (const MachineOperand &MO : I->operands()) {
+ if (MO.isRegMask()) {
+ LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I);
+ return false;
+ }
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
- if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- MachineInstr *DefMI = MRI->getVRegDef(Reg);
- if (!DefMI || DefMI->getParent() != Head)
- continue;
- if (InsertAfter.insert(DefMI).second)
- LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " depends on "
- << *DefMI);
- if (DefMI->isTerminator()) {
- LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
- return false;
- }
+ // Remember clobbered regunits.
+ if (MO.isDef() && Register::isPhysicalRegister(Reg))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ ClobberedRegUnits.set(*Units);
+
+ if (!MO.readsReg() || !Register::isVirtualRegister(Reg))
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI || DefMI->getParent() != Head)
+ continue;
+ if (InsertAfter.insert(DefMI).second)
+ LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on "
+ << *DefMI);
+ if (DefMI->isTerminator()) {
+ LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
+ return false;
}
}
return true;
}
+/// canPredicateInstrs - Returns true if all the instructions in MBB can safely
+/// be predicates. The terminators are not considered.
+///
+/// If instructions use any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+///
+/// Any clobbered regunits are added to ClobberedRegUnits.
+///
+bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) {
+ // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // get right.
+ if (!MBB->livein_empty()) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
+ return false;
+ }
+
+ unsigned InstrCount = 0;
+
+ // Check all instructions, except the terminators. It is assumed that
+ // terminators never have side effects or define any used register values.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator();
+ I != E; ++I) {
+ if (I->isDebugInstr())
+ continue;
+
+ if (++InstrCount > BlockInstrLimit && !Stress) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
+ << BlockInstrLimit << " instructions.\n");
+ return false;
+ }
+
+ // There shouldn't normally be any phis in a single-predecessor block.
+ if (I->isPHI()) {
+ LLVM_DEBUG(dbgs() << "Can't predicate: " << *I);
+ return false;
+ }
+
+ // Check that instruction is predicable and that it is not already
+ // predicated.
+ if (!TII->isPredicable(*I) || TII->isPredicated(*I)) {
+ return false;
+ }
+
+ // Check for any dependencies on Head instructions.
+ if (!InstrDependenciesAllowIfConv(&(*I)))
+ return false;
+ }
+ return true;
+}
+
+// Apply predicate to all instructions in the machine block.
+void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) {
+ auto Condition = Cond;
+ if (ReversePredicate)
+ TII->reverseBranchCondition(Condition);
+ // Terminators don't need to be predicated as they will be removed.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator();
+ I != E; ++I) {
+ if (I->isDebugInstr())
+ continue;
+ TII->PredicateInstruction(*I, Condition);
+ }
+}
/// Find an insertion point in Head for the speculated instructions. The
/// insertion point must be:
@@ -288,8 +379,8 @@ bool SSAIfConv::findInsertionPoint() {
// We're ignoring regmask operands. That is conservatively correct.
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
// I clobbers Reg, so it isn't live before I.
if (MO.isDef())
@@ -337,7 +428,7 @@ bool SSAIfConv::findInsertionPoint() {
/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
/// a potential candidate for if-conversion. Fill out the internal state.
///
-bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
+bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
Head = MBB;
TBB = FBB = Tail = nullptr;
@@ -378,8 +469,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
}
// This is a triangle or a diamond.
- // If Tail doesn't have any phis, there must be side effects.
- if (Tail->empty() || !Tail->front().isPHI()) {
+ // Skip if we cannot predicate and there are no phis skip as there must be
+ // side effects that can only be handled with predication.
+ if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) {
LLVM_DEBUG(dbgs() << "No phis in tail.\n");
return false;
}
@@ -423,8 +515,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
if (PI.PHI->getOperand(i+1).getMBB() == FPred)
PI.FReg = PI.PHI->getOperand(i).getReg();
}
- assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI");
- assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI");
+ assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI");
+ assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI");
// Get target information.
if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
@@ -437,10 +529,17 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
// Check that the conditional instructions can be speculated.
InsertAfter.clear();
ClobberedRegUnits.reset();
- if (TBB != Tail && !canSpeculateInstrs(TBB))
- return false;
- if (FBB != Tail && !canSpeculateInstrs(FBB))
- return false;
+ if (Predicate) {
+ if (TBB != Tail && !canPredicateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canPredicateInstrs(FBB))
+ return false;
+ } else {
+ if (TBB != Tail && !canSpeculateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canSpeculateInstrs(FBB))
+ return false;
+ }
// Try to find a valid insertion point for the speculated instructions in the
// head basic block.
@@ -467,7 +566,7 @@ void SSAIfConv::replacePHIInstrs() {
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
PHIInfo &PI = PHIs[i];
LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
- unsigned DstReg = PI.PHI->getOperand(0).getReg();
+ Register DstReg = PI.PHI->getOperand(0).getReg();
TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
PI.PHI->eraseFromParent();
@@ -494,7 +593,7 @@ void SSAIfConv::rewritePHIOperands() {
// equal.
DstReg = PI.TReg;
} else {
- unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+ Register PHIDst = PI.PHI->getOperand(0).getReg();
DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
TII->insertSelect(*Head, FirstTerm, HeadDL,
DstReg, Cond, PI.TReg, PI.FReg);
@@ -521,7 +620,8 @@ void SSAIfConv::rewritePHIOperands() {
///
/// Any basic blocks erased will be added to RemovedBlocks.
///
-void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,
+ bool Predicate) {
assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
// Update statistics.
@@ -531,11 +631,16 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
++NumDiamondsConv;
// Move all instructions into Head, except for the terminators.
- if (TBB != Tail)
+ if (TBB != Tail) {
+ if (Predicate)
+ PredicateBlock(TBB, /*ReversePredicate=*/false);
Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
- if (FBB != Tail)
+ }
+ if (FBB != Tail) {
+ if (Predicate)
+ PredicateBlock(FBB, /*ReversePredicate=*/true);
Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
-
+ }
// Are there extra Tail predecessors?
bool ExtraPreds = Tail->pred_size() != 2;
if (ExtraPreds)
@@ -587,7 +692,6 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
LLVM_DEBUG(dbgs() << *Head);
}
-
//===----------------------------------------------------------------------===//
// EarlyIfConverter Pass
//===----------------------------------------------------------------------===//
@@ -613,8 +717,6 @@ public:
private:
bool tryConvertIf(MachineBasicBlock*);
- void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
- void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
void invalidateTraces();
bool shouldConvertIf();
};
@@ -642,32 +744,36 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
+namespace {
/// Update the dominator tree after if-conversion erased some blocks.
-void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) {
+void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv,
+ ArrayRef<MachineBasicBlock *> Removed) {
// convertIf can remove TBB, FBB, and Tail can be merged into Head.
// TBB and FBB should not dominate any blocks.
// Tail children should be transferred to Head.
MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
- for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
- MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+ for (auto B : Removed) {
+ MachineDomTreeNode *Node = DomTree->getNode(B);
assert(Node != HeadNode && "Cannot erase the head node");
while (Node->getNumChildren()) {
assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
}
- DomTree->eraseNode(Removed[i]);
+ DomTree->eraseNode(B);
}
}
/// Update LoopInfo after if-conversion.
-void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
+void updateLoops(MachineLoopInfo *Loops,
+ ArrayRef<MachineBasicBlock *> Removed) {
if (!Loops)
return;
// If-conversion doesn't change loop structure, and it doesn't mess with back
// edges, so updating LoopInfo is simply removing the dead blocks.
- for (unsigned i = 0, e = Removed.size(); i != e; ++i)
- Loops->removeBlock(Removed[i]);
+ for (auto B : Removed)
+ Loops->removeBlock(B);
}
+} // namespace
/// Invalidate MachineTraceMetrics before if-conversion.
void EarlyIfConverter::invalidateTraces() {
@@ -783,8 +889,8 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
IfConv.convertIf(RemovedBlocks);
Changed = true;
- updateDomTree(RemovedBlocks);
- updateLoops(RemovedBlocks);
+ updateDomTree(DomTree, IfConv, RemovedBlocks);
+ updateLoops(Loops, RemovedBlocks);
}
return Changed;
}
@@ -822,3 +928,132 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+
+//===----------------------------------------------------------------------===//
+// EarlyIfPredicator Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class EarlyIfPredicator : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ TargetSchedModel SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ SSAIfConv IfConv;
+
+public:
+ static char ID;
+ EarlyIfPredicator() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return "Early If-predicator"; }
+
+protected:
+ bool tryConvertIf(MachineBasicBlock *);
+ bool shouldConvertIf();
+};
+} // end anonymous namespace
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "early-if-predicator"
+
+char EarlyIfPredicator::ID = 0;
+char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID;
+
+INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false,
+ false)
+
+void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Apply the target heuristic to decide if the transformation is profitable.
+bool EarlyIfPredicator::shouldConvertIf() {
+ if (IfConv.isTriangle()) {
+ MachineBasicBlock &IfBlock =
+ (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB;
+
+ unsigned ExtraPredCost = 0;
+ unsigned Cycles = 0;
+ for (MachineInstr &I : IfBlock) {
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ Cycles += NumCycles - 1;
+ ExtraPredCost += TII->getPredicationCost(I);
+ }
+
+ return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost,
+ BranchProbability::getUnknown());
+ }
+ unsigned TExtra = 0;
+ unsigned FExtra = 0;
+ unsigned TCycle = 0;
+ unsigned FCycle = 0;
+ for (MachineInstr &I : *IfConv.TBB) {
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ TCycle += NumCycles - 1;
+ TExtra += TII->getPredicationCost(I);
+ }
+ for (MachineInstr &I : *IfConv.FBB) {
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ FCycle += NumCycles - 1;
+ FExtra += TII->getPredicationCost(I);
+ }
+ return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB,
+ FCycle, FExtra,
+ BranchProbability::getUnknown());
+}
+
+/// Attempt repeated if-conversion on MBB, return true if successful.
+///
+bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) {
+ // If-convert MBB and update analyses.
+ SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
+ IfConv.convertIf(RemovedBlocks, /*Predicate*/ true);
+ Changed = true;
+ updateDomTree(DomTree, IfConv, RemovedBlocks);
+ updateLoops(Loops, RemovedBlocks);
+ }
+ return Changed;
+}
+
+bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ SchedModel.init(&STI);
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+
+ bool Changed = false;
+ IfConv.runOnMachineFunction(MF);
+
+ // Visit blocks in dominator tree post-order. The post-order enables nested
+ // if-conversion in a single pass. The tryConvertIf() function may erase
+ // blocks, but only blocks dominated by the head block. This makes it safe to
+ // update the dominator tree while the post-order iterator is still active.
+ for (auto DomNode : post_order(DomTree))
+ if (tryConvertIf(DomNode->getBlock()))
+ Changed = true;
+
+ return Changed;
+}
diff --git a/lib/CodeGen/ExecutionDomainFix.cpp b/lib/CodeGen/ExecutionDomainFix.cpp
index a2dd5eee33b7..2cca05ea6f55 100644
--- a/lib/CodeGen/ExecutionDomainFix.cpp
+++ b/lib/CodeGen/ExecutionDomainFix.cpp
@@ -9,6 +9,7 @@
#include "llvm/CodeGen/ExecutionDomainFix.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp
index b425482e6adf..9916f2de0414 100644
--- a/lib/CodeGen/ExpandMemCmp.cpp
+++ b/lib/CodeGen/ExpandMemCmp.cpp
@@ -795,7 +795,7 @@ public:
TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
const TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto PA = runImpl(F, TLI, TTI, TL);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 0ab70aff7dc4..1fc57fac1489 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -79,17 +79,17 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
(MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned InsReg = MI->getOperand(2).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
+ Register InsReg = MI->getOperand(2).getReg();
assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
unsigned SubIdx = MI->getOperand(3).getImm();
assert(SubIdx != 0 && "Invalid index for insert_subreg");
- unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+ Register DstSubReg = TRI->getSubReg(DstReg, SubIdx);
- assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ assert(Register::isPhysicalRegister(DstReg) &&
"Insert destination must be in a physical register");
- assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ assert(Register::isPhysicalRegister(InsReg) &&
"Inserted value must be in a physical register");
LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 9c53550eaa9d..c1d22ef89195 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -72,7 +72,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
return *I->second;
GCStrategy *S = getGCStrategy(F.getGC());
- Functions.push_back(llvm::make_unique<GCFunctionInfo>(F, *S));
+ Functions.push_back(std::make_unique<GCFunctionInfo>(F, *S));
GCFunctionInfo *GFI = Functions.back().get();
FInfoMap[&F] = GFI;
return *GFI;
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index 90571d090bfb..0dc0a5bce747 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -249,7 +249,7 @@ GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {}
void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
AU.setPreservesAll();
- AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<GCModuleInfo>();
}
@@ -310,7 +310,7 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
return false;
FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(MF.getFunction());
- MMI = &getAnalysis<MachineModuleInfo>();
+ MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
TII = MF.getSubtarget().getInstrInfo();
// Find the size of the stack frame. There may be no correct static frame
diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 4518dbee1a9f..7d9d812d34bc 100644
--- a/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -52,6 +52,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_UNMERGE_VALUES:
case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_GEP:
return true;
}
return false;
@@ -65,9 +66,9 @@ std::unique_ptr<CSEConfigBase>
llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) {
std::unique_ptr<CSEConfigBase> Config;
if (Level == CodeGenOpt::None)
- Config = make_unique<CSEConfigConstantOnly>();
+ Config = std::make_unique<CSEConfigConstantOnly>();
else
- Config = make_unique<CSEConfigFull>();
+ Config = std::make_unique<CSEConfigFull>();
return Config;
}
@@ -332,7 +333,7 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const {
const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
const MachineOperand &MO) const {
if (MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!MO.isDef())
addNodeIDRegNum(Reg);
LLT Ty = MRI.getType(Reg);
diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 461bc6038c2c..51a74793f029 100644
--- a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -162,6 +162,17 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
return buildConstant(DstOps[0], Cst->getSExtValue());
break;
}
+ case TargetOpcode::G_SEXT_INREG: {
+ assert(DstOps.size() == 1 && "Invalid dst ops");
+ assert(SrcOps.size() == 2 && "Invalid src ops");
+ const DstOp &Dst = DstOps[0];
+ const SrcOp &Src0 = SrcOps[0];
+ const SrcOp &Src1 = SrcOps[1];
+ if (auto MaybeCst =
+ ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI()))
+ return buildConstant(Dst, MaybeCst->getSExtValue());
+ break;
+ }
}
bool CanCopy = checkCopyToDefsPossible(DstOps);
if (!canPerformCSEForOpc(Opc))
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index a5d8205a34a8..cdad92f7db4f 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -11,14 +11,16 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#define DEBUG_TYPE "call-lowering"
@@ -32,66 +34,70 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
ArrayRef<ArrayRef<Register>> ArgRegs,
Register SwiftErrorVReg,
std::function<unsigned()> GetCalleeReg) const {
+ CallLoweringInfo Info;
auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout();
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
// we'll pass to the assigner function.
- SmallVector<ArgInfo, 8> OrigArgs;
unsigned i = 0;
unsigned NumFixedArgs = CS.getFunctionType()->getNumParams();
for (auto &Arg : CS.args()) {
ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS);
- // We don't currently support swiftself args.
- if (OrigArg.Flags.isSwiftSelf())
- return false;
- OrigArgs.push_back(OrigArg);
+ Info.OrigArgs.push_back(OrigArg);
++i;
}
- MachineOperand Callee = MachineOperand::CreateImm(0);
if (const Function *F = CS.getCalledFunction())
- Callee = MachineOperand::CreateGA(F, 0);
+ Info.Callee = MachineOperand::CreateGA(F, 0);
else
- Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
-
- ArgInfo OrigRet{ResRegs, CS.getType(), ISD::ArgFlagsTy{}};
- if (!OrigRet.Ty->isVoidTy())
- setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS);
-
- return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs,
- SwiftErrorVReg);
+ Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
+
+ Info.OrigRet = ArgInfo{ResRegs, CS.getType(), ISD::ArgFlagsTy{}};
+ if (!Info.OrigRet.Ty->isVoidTy())
+ setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CS);
+
+ Info.KnownCallees =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_callees);
+ Info.CallConv = CS.getCallingConv();
+ Info.SwiftErrorVReg = SwiftErrorVReg;
+ Info.IsMustTailCall = CS.isMustTailCall();
+ Info.IsTailCall = CS.isTailCall() &&
+ isInTailCallPosition(CS, MIRBuilder.getMF().getTarget());
+ Info.IsVarArg = CS.getFunctionType()->isVarArg();
+ return lowerCall(MIRBuilder, Info);
}
template <typename FuncInfoTy>
void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const DataLayout &DL,
const FuncInfoTy &FuncInfo) const {
+ auto &Flags = Arg.Flags[0];
const AttributeList &Attrs = FuncInfo.getAttributes();
if (Attrs.hasAttribute(OpIdx, Attribute::ZExt))
- Arg.Flags.setZExt();
+ Flags.setZExt();
if (Attrs.hasAttribute(OpIdx, Attribute::SExt))
- Arg.Flags.setSExt();
+ Flags.setSExt();
if (Attrs.hasAttribute(OpIdx, Attribute::InReg))
- Arg.Flags.setInReg();
+ Flags.setInReg();
if (Attrs.hasAttribute(OpIdx, Attribute::StructRet))
- Arg.Flags.setSRet();
+ Flags.setSRet();
if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf))
- Arg.Flags.setSwiftSelf();
+ Flags.setSwiftSelf();
if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError))
- Arg.Flags.setSwiftError();
+ Flags.setSwiftError();
if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
- Arg.Flags.setByVal();
+ Flags.setByVal();
if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
- Arg.Flags.setInAlloca();
+ Flags.setInAlloca();
- if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
+ if (Flags.isByVal() || Flags.isInAlloca()) {
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
- Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+ Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
@@ -100,11 +106,11 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2);
else
FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL);
- Arg.Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Attrs.hasAttribute(OpIdx, Attribute::Nest))
- Arg.Flags.setNest();
- Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty));
+ Flags.setNest();
+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));
}
template void
@@ -159,7 +165,7 @@ void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg,
}
bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
- ArrayRef<ArgInfo> Args,
+ SmallVectorImpl<ArgInfo> &Args,
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
@@ -171,7 +177,7 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
bool CallLowering::handleAssignments(CCState &CCInfo,
SmallVectorImpl<CCValAssign> &ArgLocs,
MachineIRBuilder &MIRBuilder,
- ArrayRef<ArgInfo> Args,
+ SmallVectorImpl<ArgInfo> &Args,
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
@@ -180,14 +186,99 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT CurVT = MVT::getVT(Args[i].Ty);
- if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) {
- // Try to use the register type if we couldn't assign the VT.
- if (!Handler.isArgumentHandler() || !CurVT.isValid())
+ if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[0], CCInfo)) {
+ if (!CurVT.isValid())
return false;
- CurVT = TLI->getRegisterTypeForCallingConv(
+ MVT NewVT = TLI->getRegisterTypeForCallingConv(
F.getContext(), F.getCallingConv(), EVT(CurVT));
- if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
- return false;
+
+ // If we need to split the type over multiple regs, check it's a scenario
+ // we currently support.
+ unsigned NumParts = TLI->getNumRegistersForCallingConv(
+ F.getContext(), F.getCallingConv(), CurVT);
+ if (NumParts > 1) {
+ // For now only handle exact splits.
+ if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits())
+ return false;
+ }
+
+ // For incoming arguments (physregs to vregs), we could have values in
+ // physregs (or memlocs) which we want to extract and copy to vregs.
+ // During this, we might have to deal with the LLT being split across
+ // multiple regs, so we have to record this information for later.
+ //
+ // If we have outgoing args, then we have the opposite case. We have a
+ // vreg with an LLT which we want to assign to a physical location, and
+ // we might have to record that the value has to be split later.
+ if (Handler.isIncomingArgumentHandler()) {
+ if (NumParts == 1) {
+ // Try to use the register type if we couldn't assign the VT.
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[0], CCInfo))
+ return false;
+ } else {
+ // We're handling an incoming arg which is split over multiple regs.
+ // E.g. passing an s128 on AArch64.
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ Args[i].OrigRegs.push_back(Args[i].Regs[0]);
+ Args[i].Regs.clear();
+ Args[i].Flags.clear();
+ LLT NewLLT = getLLTForMVT(NewVT);
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ Register Reg =
+ MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT);
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (Part == 0) {
+ Flags.setSplit();
+ } else {
+ Flags.setOrigAlign(Align::None());
+ if (Part == NumParts - 1)
+ Flags.setSplitEnd();
+ }
+ Args[i].Regs.push_back(Reg);
+ Args[i].Flags.push_back(Flags);
+ if (Handler.assignArg(i + Part, NewVT, NewVT, CCValAssign::Full,
+ Args[i], Args[i].Flags[Part], CCInfo)) {
+ // Still couldn't assign this smaller part type for some reason.
+ return false;
+ }
+ }
+ }
+ } else {
+ // Handling an outgoing arg that might need to be split.
+ if (NumParts < 2)
+ return false; // Don't know how to deal with this type combination.
+
+ // This type is passed via multiple registers in the calling convention.
+ // We need to extract the individual parts.
+ Register LargeReg = Args[i].Regs[0];
+ LLT SmallTy = LLT::scalar(NewVT.getSizeInBits());
+ auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg);
+ assert(Unmerge->getNumOperands() == NumParts + 1);
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ // We're going to replace the regs and flags with the split ones.
+ Args[i].Regs.clear();
+ Args[i].Flags.clear();
+ for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) {
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (PartIdx == 0) {
+ Flags.setSplit();
+ } else {
+ Flags.setOrigAlign(Align::None());
+ if (PartIdx == NumParts - 1)
+ Flags.setSplitEnd();
+ }
+ Args[i].Regs.push_back(Unmerge.getReg(PartIdx));
+ Args[i].Flags.push_back(Flags);
+ if (Handler.assignArg(i + PartIdx, NewVT, NewVT, CCValAssign::Full,
+ Args[i], Args[i].Flags[PartIdx], CCInfo))
+ return false;
+ }
+ }
}
}
@@ -202,18 +293,32 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
continue;
}
- assert(Args[i].Regs.size() == 1 &&
- "Can't handle multiple virtual regs yet");
-
// FIXME: Pack registers if we have more than one.
Register ArgReg = Args[i].Regs[0];
+ MVT OrigVT = MVT::getVT(Args[i].Ty);
+ MVT VAVT = VA.getValVT();
if (VA.isRegLoc()) {
- MVT OrigVT = MVT::getVT(Args[i].Ty);
- MVT VAVT = VA.getValVT();
- if (Handler.isArgumentHandler() && VAVT != OrigVT) {
- if (VAVT.getSizeInBits() < OrigVT.getSizeInBits())
- return false; // Can't handle this type of arg yet.
+ if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) {
+ if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) {
+ // Expected to be multiple regs for a single incoming arg.
+ unsigned NumArgRegs = Args[i].Regs.size();
+ if (NumArgRegs < 2)
+ return false;
+
+ assert((j + (NumArgRegs - 1)) < ArgLocs.size() &&
+ "Too many regs for number of args");
+ for (unsigned Part = 0; Part < NumArgRegs; ++Part) {
+ // There should be Regs.size() ArgLocs per argument.
+ VA = ArgLocs[j + Part];
+ Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+ }
+ j += NumArgRegs - 1;
+ // Merge the split registers into the expected larger result vreg
+ // of the original call.
+ MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
+ continue;
+ }
const LLT VATy(VAVT);
Register NewReg =
MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
@@ -234,10 +339,28 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
} else {
MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
}
+ } else if (!Handler.isIncomingArgumentHandler()) {
+ assert((j + (Args[i].Regs.size() - 1)) < ArgLocs.size() &&
+ "Too many regs for number of args");
+ // This is an outgoing argument that might have been split.
+ for (unsigned Part = 0; Part < Args[i].Regs.size(); ++Part) {
+ // There should be Regs.size() ArgLocs per argument.
+ VA = ArgLocs[j + Part];
+ Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+ }
+ j += Args[i].Regs.size() - 1;
} else {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
}
} else if (VA.isMemLoc()) {
+ // Don't currently support loading/storing a type that needs to be split
+ // to the stack. Should be easy, just not implemented yet.
+ if (Args[i].Regs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Load/store a split arg to/from the stack not implemented yet");
+ return false;
+ }
MVT VT = MVT::getVT(Args[i].Ty);
unsigned Size = VT == MVT::iPTR ? DL.getPointerSize()
: alignTo(VT.getSizeInBits(), 8) / 8;
@@ -253,6 +376,81 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
return true;
}
+bool CallLowering::analyzeArgInfo(CCState &CCState,
+ SmallVectorImpl<ArgInfo> &Args,
+ CCAssignFn &AssignFnFixed,
+ CCAssignFn &AssignFnVarArg) const {
+ for (unsigned i = 0, e = Args.size(); i < e; ++i) {
+ MVT VT = MVT::getVT(Args[i].Ty);
+ CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg;
+ if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) {
+ // Bail out on anything we can't handle.
+ LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString()
+ << " (arg number = " << i << "\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
+ MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &InArgs,
+ CCAssignFn &CalleeAssignFnFixed,
+ CCAssignFn &CalleeAssignFnVarArg,
+ CCAssignFn &CallerAssignFnFixed,
+ CCAssignFn &CallerAssignFnVarArg) const {
+ const Function &F = MF.getFunction();
+ CallingConv::ID CalleeCC = Info.CallConv;
+ CallingConv::ID CallerCC = F.getCallingConv();
+
+ if (CallerCC == CalleeCC)
+ return true;
+
+ SmallVector<CCValAssign, 16> ArgLocs1;
+ CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext());
+ if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed,
+ CalleeAssignFnVarArg))
+ return false;
+
+ SmallVector<CCValAssign, 16> ArgLocs2;
+ CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext());
+ if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed,
+ CalleeAssignFnVarArg))
+ return false;
+
+ // We need the argument locations to match up exactly. If there's more in
+ // one than the other, then we are done.
+ if (ArgLocs1.size() != ArgLocs2.size())
+ return false;
+
+ // Make sure that each location is passed in exactly the same way.
+ for (unsigned i = 0, e = ArgLocs1.size(); i < e; ++i) {
+ const CCValAssign &Loc1 = ArgLocs1[i];
+ const CCValAssign &Loc2 = ArgLocs2[i];
+
+ // We need both of them to be the same. So if one is a register and one
+ // isn't, we're done.
+ if (Loc1.isRegLoc() != Loc2.isRegLoc())
+ return false;
+
+ if (Loc1.isRegLoc()) {
+ // If they don't have the same register location, we're done.
+ if (Loc1.getLocReg() != Loc2.getLocReg())
+ return false;
+
+ // They matched, so we can move to the next ArgLoc.
+ continue;
+ }
+
+ // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match.
+ if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset())
+ return false;
+ }
+
+ return true;
+}
+
Register CallLowering::ValueHandler::extendRegister(Register ValReg,
CCValAssign &VA) {
LLT LocTy{VA.getLocVT()};
diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp
index 31cb1dbbc9b5..b4562a5c6601 100644
--- a/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -27,6 +27,18 @@
using namespace llvm;
+namespace llvm {
+cl::OptionCategory GICombinerOptionCategory(
+ "GlobalISel Combiner",
+ "Control the rules which are enabled. These options all take a comma "
+ "separated list of rules to disable and may be specified by number "
+ "or number range (e.g. 1-10)."
+#ifndef NDEBUG
+ " They may also be specified by name."
+#endif
+);
+} // end namespace llvm
+
namespace {
/// This class acts as the glue the joins the CombinerHelper to the overall
/// Combine algorithm. The CombinerHelper is intended to report the
@@ -92,7 +104,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
return false;
Builder =
- CSEInfo ? make_unique<CSEMIRBuilder>() : make_unique<MachineIRBuilder>();
+ CSEInfo ? std::make_unique<CSEMIRBuilder>() : std::make_unique<MachineIRBuilder>();
MRI = &MF.getRegInfo();
Builder->setMF(MF);
if (CSEInfo)
diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9cbf3dd83ff1..854769d283f7 100644
--- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -8,19 +8,36 @@
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "gi-combiner"
using namespace llvm;
+// Option to allow testing of the combiner while no targets know about indexed
+// addressing.
+static cl::opt<bool>
+ ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
+ cl::desc("Force all indexed operations to be "
+ "legal for the GlobalISel combiner"));
+
+
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
- MachineIRBuilder &B)
- : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
+ MachineIRBuilder &B, GISelKnownBits *KB,
+ MachineDominatorTree *MDT)
+ : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
+ KB(KB), MDT(MDT) {
+ (void)this->KB;
+}
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
Register ToReg) const {
@@ -55,8 +72,8 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::COPY)
return false;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
// Simple Copy Propagation.
@@ -66,12 +83,183 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
return false;
}
void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
MI.eraseFromParent();
replaceRegWith(MRI, DstReg, SrcReg);
}
+bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
+ bool IsUndef = false;
+ SmallVector<Register, 4> Ops;
+ if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
+ applyCombineConcatVectors(MI, IsUndef, Ops);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
+ SmallVectorImpl<Register> &Ops) {
+ assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
+ "Invalid instruction");
+ IsUndef = true;
+ MachineInstr *Undef = nullptr;
+
+ // Walk over all the operands of concat vectors and check if they are
+ // build_vector themselves or undef.
+ // Then collect their operands in Ops.
+ for (const MachineOperand &MO : MI.operands()) {
+ // Skip the instruction definition.
+ if (MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ assert(Def && "Operand not defined");
+ switch (Def->getOpcode()) {
+ case TargetOpcode::G_BUILD_VECTOR:
+ IsUndef = false;
+ // Remember the operands of the build_vector to fold
+ // them into the yet-to-build flattened concat vectors.
+ for (const MachineOperand &BuildVecMO : Def->operands()) {
+ // Skip the definition.
+ if (BuildVecMO.isDef())
+ continue;
+ Ops.push_back(BuildVecMO.getReg());
+ }
+ break;
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ LLT OpType = MRI.getType(Reg);
+ // Keep one undef value for all the undef operands.
+ if (!Undef) {
+ Builder.setInsertPt(*MI.getParent(), MI);
+ Undef = Builder.buildUndef(OpType.getScalarType());
+ }
+ assert(MRI.getType(Undef->getOperand(0).getReg()) ==
+ OpType.getScalarType() &&
+ "All undefs should have the same type");
+ // Break the undef vector in as many scalar elements as needed
+ // for the flattening.
+ for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
+ EltIdx != EltEnd; ++EltIdx)
+ Ops.push_back(Undef->getOperand(0).getReg());
+ break;
+ }
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+void CombinerHelper::applyCombineConcatVectors(
+ MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
+ // We determined that the concat_vectors can be flatten.
+ // Generate the flattened build_vector.
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInsertPt(*MI.getParent(), MI);
+ Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
+
+ // Note: IsUndef is sort of redundant. We could have determine it by
+ // checking that at all Ops are undef. Alternatively, we could have
+ // generate a build_vector of undefs and rely on another combine to
+ // clean that up. For now, given we already gather this information
+ // in tryCombineConcatVectors, just save compile time and issue the
+ // right thing.
+ if (IsUndef)
+ Builder.buildUndef(NewDstReg);
+ else
+ Builder.buildBuildVector(NewDstReg, Ops);
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, NewDstReg);
+}
+
+bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
+ SmallVector<Register, 4> Ops;
+ if (matchCombineShuffleVector(MI, Ops)) {
+ applyCombineShuffleVector(MI, Ops);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
+ SmallVectorImpl<Register> &Ops) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Invalid instruction kind");
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+ Register Src1 = MI.getOperand(1).getReg();
+ LLT SrcType = MRI.getType(Src1);
+ unsigned DstNumElts = DstType.getNumElements();
+ unsigned SrcNumElts = SrcType.getNumElements();
+
+ // If the resulting vector is smaller than the size of the source
+ // vectors being concatenated, we won't be able to replace the
+ // shuffle vector into a concat_vectors.
+ //
+ // Note: We may still be able to produce a concat_vectors fed by
+ // extract_vector_elt and so on. It is less clear that would
+ // be better though, so don't bother for now.
+ if (DstNumElts < 2 * SrcNumElts)
+ return false;
+
+ // Check that the shuffle mask can be broken evenly between the
+ // different sources.
+ if (DstNumElts % SrcNumElts != 0)
+ return false;
+
+ // Mask length is a multiple of the source vector length.
+ // Check if the shuffle is some kind of concatenation of the input
+ // vectors.
+ unsigned NumConcat = DstNumElts / SrcNumElts;
+ SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask);
+ for (unsigned i = 0; i != DstNumElts; ++i) {
+ int Idx = Mask[i];
+ // Undef value.
+ if (Idx < 0)
+ continue;
+ // Ensure the indices in each SrcType sized piece are sequential and that
+ // the same source is used for the whole piece.
+ if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+ (ConcatSrcs[i / SrcNumElts] >= 0 &&
+ ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
+ return false;
+ // Remember which source this index came from.
+ ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
+ }
+
+ // The shuffle is concatenating multiple vectors together.
+ // Collect the different operands for that.
+ Register UndefReg;
+ Register Src2 = MI.getOperand(2).getReg();
+ for (auto Src : ConcatSrcs) {
+ if (Src < 0) {
+ if (!UndefReg) {
+ Builder.setInsertPt(*MI.getParent(), MI);
+ UndefReg = Builder.buildUndef(SrcType).getReg(0);
+ }
+ Ops.push_back(UndefReg);
+ } else if (Src == 0)
+ Ops.push_back(Src1);
+ else
+ Ops.push_back(Src2);
+ }
+ return true;
+}
+
+void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
+ const ArrayRef<Register> Ops) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInsertPt(*MI.getParent(), MI);
+ Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
+
+ Builder.buildConcatVectors(NewDstReg, Ops);
+
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, NewDstReg);
+}
+
namespace {
/// Select a preference between two uses. CurrentUse is the current preference
@@ -279,7 +467,7 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
// up the type and extend so that it uses the preferred use.
if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
- unsigned UseDstReg = UseMI->getOperand(0).getReg();
+ Register UseDstReg = UseMI->getOperand(0).getReg();
MachineOperand &UseSrcMO = UseMI->getOperand(1);
const LLT &UseDstTy = MRI.getType(UseDstReg);
if (UseDstReg != ChosenDstReg) {
@@ -342,8 +530,212 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
Observer.changedInstr(MI);
}
-bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
+bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
+ assert(DefMI.getParent() == UseMI.getParent());
+ if (&DefMI == &UseMI)
+ return false;
+
+ // Loop through the basic block until we find one of the instructions.
+ MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
+ for (; &*I != &DefMI && &*I != &UseMI; ++I)
+ return &*I == &DefMI;
+
+ llvm_unreachable("Block must contain instructions");
+}
+
+bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
+ if (MDT)
+ return MDT->dominates(&DefMI, &UseMI);
+ else if (DefMI.getParent() != UseMI.getParent())
+ return false;
+
+ return isPredecessor(DefMI, UseMI);
+}
+
+bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
+ Register &Base, Register &Offset) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+#ifndef NDEBUG
+ unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
+ Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
+#endif
+
+ Base = MI.getOperand(1).getReg();
+ MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
+ if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
+
+ for (auto &Use : MRI.use_instructions(Base)) {
+ if (Use.getOpcode() != TargetOpcode::G_GEP)
+ continue;
+
+ Offset = Use.getOperand(2).getReg();
+ if (!ForceLegalIndexing &&
+ !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
+ LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
+ << Use);
+ continue;
+ }
+
+ // Make sure the offset calculation is before the potentially indexed op.
+ // FIXME: we really care about dependency here. The offset calculation might
+ // be movable.
+ MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
+ if (!OffsetDef || !dominates(*OffsetDef, MI)) {
+ LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
+ << Use);
+ continue;
+ }
+
+ // FIXME: check whether all uses of Base are load/store with foldable
+ // addressing modes. If so, using the normal addr-modes is better than
+ // forming an indexed one.
+
+ bool MemOpDominatesAddrUses = true;
+ for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
+ if (!dominates(MI, GEPUse)) {
+ MemOpDominatesAddrUses = false;
+ break;
+ }
+ }
+
+ if (!MemOpDominatesAddrUses) {
+ LLVM_DEBUG(
+ dbgs() << " Ignoring candidate as memop does not dominate uses: "
+ << Use);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << " Found match: " << Use);
+ Addr = Use.getOperand(0).getReg();
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
+ Register &Base, Register &Offset) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+#ifndef NDEBUG
+ unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
+ Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
+#endif
+
+ Addr = MI.getOperand(1).getReg();
+ MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
+ if (!AddrDef || MRI.hasOneUse(Addr))
+ return false;
+
+ Base = AddrDef->getOperand(1).getReg();
+ Offset = AddrDef->getOperand(2).getReg();
+
+ LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
+
+ if (!ForceLegalIndexing &&
+ !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
+ LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
+ return false;
+ }
+
+ MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
+ if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
+ return false;
+ }
+
+ if (MI.getOpcode() == TargetOpcode::G_STORE) {
+ // Would require a copy.
+ if (Base == MI.getOperand(0).getReg()) {
+ LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
+ return false;
+ }
+
+ // We're expecting one use of Addr in MI, but it could also be the
+ // value stored, which isn't actually dominated by the instruction.
+ if (MI.getOperand(0).getReg() == Addr) {
+ LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
+ return false;
+ }
+ }
+
+ // FIXME: check whether all uses of the base pointer are constant GEPs. That
+ // might allow us to end base's liveness here by adjusting the constant.
+
+ for (auto &UseMI : MRI.use_instructions(Addr)) {
+ if (!dominates(MI, UseMI)) {
+ LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
+ Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
+ return false;
+
+ bool IsStore = Opcode == TargetOpcode::G_STORE;
+ Register Addr, Base, Offset;
+ bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
+ if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
+ return false;
+
+
+ unsigned NewOpcode;
+ switch (Opcode) {
+ case TargetOpcode::G_LOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_LOAD;
+ break;
+ case TargetOpcode::G_SEXTLOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
+ break;
+ case TargetOpcode::G_ZEXTLOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
+ break;
+ case TargetOpcode::G_STORE:
+ NewOpcode = TargetOpcode::G_INDEXED_STORE;
+ break;
+ default:
+ llvm_unreachable("Unknown load/store opcode");
+ }
+
+ MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
+ MachineIRBuilder MIRBuilder(MI);
+ auto MIB = MIRBuilder.buildInstr(NewOpcode);
+ if (IsStore) {
+ MIB.addDef(Addr);
+ MIB.addUse(MI.getOperand(0).getReg());
+ } else {
+ MIB.addDef(MI.getOperand(0).getReg());
+ MIB.addDef(Addr);
+ }
+
+ MIB.addUse(Base);
+ MIB.addUse(Offset);
+ MIB.addImm(IsPre);
+ MI.eraseFromParent();
+ AddrDef.eraseFromParent();
+
+ LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
+ return true;
+}
+
+bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
+ if (MI.getOpcode() != TargetOpcode::G_BR)
+ return false;
+
// Try to match the following:
// bb1:
// %c(s32) = G_ICMP pred, %a, %b
@@ -380,9 +772,14 @@ bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
return true;
}
-bool CombinerHelper::tryCombineBr(MachineInstr &MI) {
- if (!matchCombineBr(MI))
+bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
+ if (!matchElideBrByInvertingCond(MI))
return false;
+ applyElideBrByInvertingCond(MI);
+ return true;
+}
+
+void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
MachineBasicBlock::iterator BrIt(MI);
MachineInstr *BrCond = &*std::prev(BrIt);
@@ -401,11 +798,509 @@ bool CombinerHelper::tryCombineBr(MachineInstr &MI) {
BrCond->getOperand(1).setMBB(BrTarget);
Observer.changedInstr(*BrCond);
MI.eraseFromParent();
+}
+
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction().hasMinSize();
+ return MF.getFunction().hasOptSize();
+}
+
+// Returns a list of types to use for memory op lowering in MemOps. A partial
+// port of findOptimalMemOpLowering in TargetLowering.
+static bool findGISelOptimalMemOpLowering(
+ std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes, const TargetLowering &TLI) {
+ // If 'SrcAlign' is zero, that means the memory operation does not need to
+ // load the value, i.e. memset or memcpy from constant string. Otherwise,
+ // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+ // is the specified alignment of the memory operation. If it is zero, that
+ // means it's possible to change the alignment of the destination.
+ // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+ // not need to be loaded.
+ if (SrcAlign != 0 && SrcAlign < DstAlign)
+ return false;
+
+ LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
+ ZeroMemset, MemcpyStrSrc, FuncAttributes);
+
+ if (Ty == LLT()) {
+ // Use the largest scalar type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ Ty = LLT::scalar(64);
+ while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
+ !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
+ Ty = LLT::scalar(Ty.getSizeInBytes());
+ assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
+ // FIXME: check for the largest legal type we can load/store to.
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned TySize = Ty.getSizeInBytes();
+ while (TySize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ LLT NewTy = Ty;
+ // FIXME: check for mem op safety and legality of the types. Not all of
+ // SDAGisms map cleanly to GISel concepts.
+ if (NewTy.isVector())
+ NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
+ NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
+ unsigned NewTySize = NewTy.getSizeInBytes();
+ assert(NewTySize > 0 && "Could not find appropriate type");
+
+ // If the new LLT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ bool Fast;
+ // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
+ MVT VT = getMVTForLLT(Ty);
+ if (NumMemOps && AllowOverlap && NewTySize < Size &&
+ TLI.allowsMisalignedMemoryAccesses(
+ VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
+ Fast)
+ TySize = Size;
+ else {
+ Ty = NewTy;
+ TySize = NewTySize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(Ty);
+ Size -= TySize;
+ }
+
+ return true;
+}
+
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+// Get a vectorized representation of the memset value operand, GISel edition.
+static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ unsigned NumBits = Ty.getScalarSizeInBits();
+ auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
+ if (!Ty.isVector() && ValVRegAndVal) {
+ unsigned KnownVal = ValVRegAndVal->Value;
+ APInt Scalar = APInt(8, KnownVal);
+ APInt SplatVal = APInt::getSplat(NumBits, Scalar);
+ return MIB.buildConstant(Ty, SplatVal).getReg(0);
+ }
+ // FIXME: for vector types create a G_BUILD_VECTOR.
+ if (Ty.isVector())
+ return Register();
+
+ // Extend the byte value to the larger type, and then multiply by a magic
+ // value 0x010101... in order to replicate it across every byte.
+ LLT ExtType = Ty.getScalarType();
+ auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
+ if (NumBits > 8) {
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ auto MagicMI = MIB.buildConstant(ExtType, Magic);
+ Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
+ }
+
+ assert(ExtType == Ty && "Vector memset value type not supported yet");
+ return Val;
+}
+
+bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
+ unsigned KnownLen, unsigned Align,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memset length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+
+ auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
+ bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
+
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
+ /*IsMemset=*/true,
+ /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
+ /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes(), TLI))
+ return false;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
+ if (NewAlign > Align) {
+ Align = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlignment(FI) < Align)
+ MFI.setObjectAlignment(FI, Align);
+ }
+ }
+
+ MachineIRBuilder MIB(MI);
+ // Find the largest store and generate the bit pattern for it.
+ LLT LargestTy = MemOps[0];
+ for (unsigned i = 1; i < MemOps.size(); i++)
+ if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
+ LargestTy = MemOps[i];
+
+ // The memset stored value is always defined as an s8, so in order to make it
+ // work with larger store types we need to repeat the bit pattern across the
+ // wider type.
+ Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
+
+ if (!MemSetValue)
+ return false;
+
+ // Generate the stores. For each store type in the list, we generate the
+ // matching store of that type to the destination address.
+ LLT PtrTy = MRI.getType(Dst);
+ unsigned DstOff = 0;
+ unsigned Size = KnownLen;
+ for (unsigned I = 0; I < MemOps.size(); I++) {
+ LLT Ty = MemOps[I];
+ unsigned TySize = Ty.getSizeInBytes();
+ if (TySize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(I == MemOps.size() - 1 && I != 0);
+ DstOff -= TySize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ Register Value = MemSetValue;
+ if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
+ MVT VT = getMVTForLLT(Ty);
+ MVT LargestVT = getMVTForLLT(LargestTy);
+ if (!LargestTy.isVector() && !Ty.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
+ else
+ Value = getMemsetValue(Val, Ty, MIB);
+ if (!Value)
+ return false;
+ }
+
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
+
+ Register Ptr = Dst;
+ if (DstOff != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
+ Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ }
+
+ MIB.buildStore(Value, Ptr, *StoreMMO);
+ DstOff += Ty.getSizeInBytes();
+ Size -= TySize;
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+
+bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
+ Register Src, unsigned KnownLen,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memcpy length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ unsigned Alignment = MinAlign(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ // FIXME: infer better src pointer alignment like SelectionDAG does here.
+ // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
+ // if the memcpy is in a tail call position.
+
+ unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
+ SrcAlign,
+ /*IsMemset=*/false,
+ /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+ /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
+ return false;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Alignment &&
+ DL.exceedsNaturalStackAlignment(Align(NewAlign)))
+ NewAlign /= 2;
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlignment(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Now we need to emit a pair of load and stores for each of the types we've
+ // collected. I.e. for each type, generate a load from the source pointer of
+ // that type width, and then generate a corresponding store to the dest buffer
+ // of that value loaded. This can result in a sequence of loads and stores
+ // mixed types, depending on what the target specifies as good types to use.
+ unsigned CurrOffset = 0;
+ LLT PtrTy = MRI.getType(Src);
+ unsigned Size = KnownLen;
+ for (auto CopyTy : MemOps) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ if (CopyTy.getSizeInBytes() > Size)
+ CurrOffset -= CopyTy.getSizeInBytes() - Size;
+
+ // Construct MMOs for the accesses.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ Register Offset;
+ if (CurrOffset != 0) {
+ Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
+ .getReg(0);
+ LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
+ }
+ auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
+
+ // Create the store.
+ Register StorePtr =
+ CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ MIB.buildStore(LdVal, StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ Size -= CopyTy.getSizeInBytes();
+ }
+
+ MI.eraseFromParent();
return true;
}
+bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
+ Register Src, unsigned KnownLen,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memmove length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ unsigned Alignment = MinAlign(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
+ // to a bug in it's findOptimalMemOpLowering implementation. For now do the
+ // same thing here.
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
+ SrcAlign,
+ /*IsMemset=*/false,
+ /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+ /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
+ return false;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Alignment &&
+ DL.exceedsNaturalStackAlignment(Align(NewAlign)))
+ NewAlign /= 2;
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlignment(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Memmove requires that we perform the loads first before issuing the stores.
+ // Apart from that, this loop is pretty much doing the same thing as the
+ // memcpy codegen function.
+ unsigned CurrOffset = 0;
+ LLT PtrTy = MRI.getType(Src);
+ SmallVector<Register, 16> LoadVals;
+ for (auto CopyTy : MemOps) {
+ // Construct MMO for the load.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ if (CurrOffset != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
+ LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
+ }
+ LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+
+ CurrOffset = 0;
+ for (unsigned I = 0; I < MemOps.size(); ++I) {
+ LLT CopyTy = MemOps[I];
+ // Now store the values loaded.
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ Register StorePtr = Dst;
+ if (CurrOffset != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
+ StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ }
+ MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+ // This combine is fairly complex so it's not written with a separate
+ // matcher function.
+ assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
+ assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
+ ID == Intrinsic::memset) &&
+ "Expected a memcpy like intrinsic");
+
+ auto MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+ bool IsVolatile = MemOp->isVolatile();
+ // Don't try to optimize volatile.
+ if (IsVolatile)
+ return false;
+
+ unsigned DstAlign = MemOp->getBaseAlignment();
+ unsigned SrcAlign = 0;
+ Register Dst = MI.getOperand(1).getReg();
+ Register Src = MI.getOperand(2).getReg();
+ Register Len = MI.getOperand(3).getReg();
+
+ if (ID != Intrinsic::memset) {
+ assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
+ MemOp = *(++MMOIt);
+ SrcAlign = MemOp->getBaseAlignment();
+ }
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
+ if (!LenVRegAndVal)
+ return false; // Leave it to the legalizer to lower it to a libcall.
+ unsigned KnownLen = LenVRegAndVal->Value;
+
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (MaxLen && KnownLen > MaxLen)
+ return false;
+
+ if (ID == Intrinsic::memcpy)
+ return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+ if (ID == Intrinsic::memmove)
+ return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+ if (ID == Intrinsic::memset)
+ return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
+ return false;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
- return tryCombineExtendingLoads(MI);
+ if (tryCombineExtendingLoads(MI))
+ return true;
+ if (tryCombineIndexedLoadStore(MI))
+ return true;
+ return false;
}
diff --git a/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
new file mode 100644
index 000000000000..be8efa8795f3
--- /dev/null
+++ b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -0,0 +1,383 @@
+//===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Provides analysis for querying information about KnownBits during GISel
+/// passes.
+//
+//===------------------
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+
+#define DEBUG_TYPE "gisel-known-bits"
+
+using namespace llvm;
+
+char llvm::GISelKnownBitsAnalysis::ID = 0;
+
+INITIALIZE_PASS_BEGIN(GISelKnownBitsAnalysis, DEBUG_TYPE,
+ "Analysis for ComputingKnownBits", false, true)
+INITIALIZE_PASS_END(GISelKnownBitsAnalysis, DEBUG_TYPE,
+ "Analysis for ComputingKnownBits", false, true)
+
+GISelKnownBits::GISelKnownBits(MachineFunction &MF)
+ : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
+ DL(MF.getFunction().getParent()->getDataLayout()) {}
+
+Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset,
+ const MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ return commonAlignment(Align(MFI.getObjectAlignment(FrameIdx)), Offset);
+ // TODO: How to handle cases with Base + Offset?
+}
+
+MaybeAlign GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) {
+ if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ int FrameIdx = MI.getOperand(1).getIndex();
+ return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF());
+ }
+ return None;
+}
+
+void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ const MachineInstr &MI = *MRI.getVRegDef(R);
+ computeKnownBitsForAlignment(Known, inferPtrAlignment(MI));
+}
+
+void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known,
+ MaybeAlign Alignment) {
+ if (Alignment)
+ // The low bits are known zero if the pointer is aligned.
+ Known.Zero.setLowBits(Log2(Alignment));
+}
+
+KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
+ return getKnownBits(MI.getOperand(0).getReg());
+}
+
+KnownBits GISelKnownBits::getKnownBits(Register R) {
+ KnownBits Known;
+ LLT Ty = MRI.getType(R);
+ APInt DemandedElts =
+ Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1);
+ computeKnownBitsImpl(R, Known, DemandedElts);
+ return Known;
+}
+
+bool GISelKnownBits::signBitIsZero(Register R) {
+ LLT Ty = MRI.getType(R);
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+ return maskedValueIsZero(R, APInt::getSignMask(BitWidth));
+}
+
+APInt GISelKnownBits::getKnownZeroes(Register R) {
+ return getKnownBits(R).Zero;
+}
+
+APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }
+
+void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ MachineInstr &MI = *MRI.getVRegDef(R);
+ unsigned Opcode = MI.getOpcode();
+ LLT DstTy = MRI.getType(R);
+
+ // Handle the case where this is called on a register that does not have a
+ // type constraint (i.e. it has a register class constraint instead). This is
+ // unlikely to occur except by looking through copies but it is possible for
+ // the initial register being queried to be in this state.
+ if (!DstTy.isValid()) {
+ Known = KnownBits();
+ return;
+ }
+
+ unsigned BitWidth = DstTy.getSizeInBits();
+ Known = KnownBits(BitWidth); // Don't know anything
+
+ if (DstTy.isVector())
+ return; // TODO: Handle vectors.
+
+ if (Depth == getMaxDepth())
+ return;
+
+ if (!DemandedElts)
+ return; // No demanded elts, better to assume we don't know anything.
+
+ KnownBits Known2;
+
+ switch (Opcode) {
+ default:
+ TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
+ Depth);
+ break;
+ case TargetOpcode::COPY: {
+ MachineOperand Dst = MI.getOperand(0);
+ MachineOperand Src = MI.getOperand(1);
+ // Look through trivial copies but don't look through trivial copies of the
+ // form `%1:(s32) = OP %0:gpr32` known-bits analysis is currently unable to
+ // determine the bit width of a register class.
+ //
+ // We can't use NoSubRegister by name as it's defined by each target but
+ // it's always defined to be 0 by tablegen.
+ if (Dst.getSubReg() == 0 /*NoSubRegister*/ && Src.getReg().isVirtual() &&
+ Src.getSubReg() == 0 /*NoSubRegister*/ &&
+ MRI.getType(Src.getReg()).isValid()) {
+ // Don't increment Depth for this one since we didn't do any work.
+ computeKnownBitsImpl(Src.getReg(), Known, DemandedElts, Depth);
+ }
+ break;
+ }
+ case TargetOpcode::G_CONSTANT: {
+ auto CstVal = getConstantVRegVal(R, MRI);
+ if (!CstVal)
+ break;
+ Known.One = *CstVal;
+ Known.Zero = ~Known.One;
+ break;
+ }
+ case TargetOpcode::G_FRAME_INDEX: {
+ computeKnownBitsForFrameIndex(R, Known, DemandedElts);
+ break;
+ }
+ case TargetOpcode::G_SUB: {
+ // If low bits are known to be zero in both operands, then we know they are
+ // going to be 0 in the result. Both addition and complement operations
+ // preserve the low zero bits.
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ unsigned KnownZeroLow = Known2.countMinTrailingZeros();
+ if (KnownZeroLow == 0)
+ break;
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
+ Known.Zero.setLowBits(KnownZeroLow);
+ break;
+ }
+ case TargetOpcode::G_XOR: {
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
+ Known.Zero = KnownZeroOut;
+ break;
+ }
+ case TargetOpcode::G_GEP: {
+ // G_GEP is like G_ADD. FIXME: Is this true for all targets?
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
+ break;
+ LLVM_FALLTHROUGH;
+ }
+ case TargetOpcode::G_ADD: {
+ // Output known-0 bits are known if clear or set in both the low clear bits
+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
+ // low 3 bits clear.
+ // Output known-0 bits are also known if the top bits of each input are
+ // known to be clear. For example, if one input has the top 10 bits clear
+ // and the other has the top 8 bits clear, we know the top 7 bits of the
+ // output must be clear.
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
+ unsigned KnownZeroLow = Known2.countMinTrailingZeros();
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
+ KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
+ Known.Zero.setLowBits(KnownZeroLow);
+ if (KnownZeroHigh > 1)
+ Known.Zero.setHighBits(KnownZeroHigh - 1);
+ break;
+ }
+ case TargetOpcode::G_AND: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ Known.One &= Known2.One;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ Known.Zero |= Known2.Zero;
+ break;
+ }
+ case TargetOpcode::G_OR: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ Known.Zero &= Known2.Zero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ Known.One |= Known2.One;
+ break;
+ }
+ case TargetOpcode::G_MUL: {
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conservative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ unsigned TrailZ =
+ Known.countMinTrailingZeros() + Known2.countMinTrailingZeros();
+ unsigned LeadZ =
+ std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(),
+ BitWidth) -
+ BitWidth;
+
+ Known.resetAll();
+ Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
+ Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
+ break;
+ }
+ case TargetOpcode::G_SELECT: {
+ computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts,
+ Depth + 1);
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ // Only known if known in both the LHS and RHS.
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ break;
+ }
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_ICMP: {
+ if (TL.getBooleanContents(DstTy.isVector(),
+ Opcode == TargetOpcode::G_FCMP) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ case TargetOpcode::G_SEXT: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ // If the sign bit is known to be zero or one, then sext will extend
+ // it to the top bits, else it will just zext.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case TargetOpcode::G_ANYEXT: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ break;
+ }
+ case TargetOpcode::G_LOAD: {
+ if (MI.hasOneMemOperand()) {
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ if (const MDNode *Ranges = MMO->getRanges()) {
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
+ }
+ }
+ break;
+ }
+ case TargetOpcode::G_ZEXTLOAD: {
+ // Everything above the retrieved bits is zero
+ if (MI.hasOneMemOperand())
+ Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
+ break;
+ }
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_SHL: {
+ KnownBits RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ if (!RHSKnown.isConstant()) {
+ LLVM_DEBUG(
+ MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg());
+ dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI);
+ break;
+ }
+ uint64_t Shift = RHSKnown.getConstant().getZExtValue();
+ LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n');
+
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+
+ switch (Opcode) {
+ case TargetOpcode::G_ASHR:
+ Known.Zero = Known.Zero.ashr(Shift);
+ Known.One = Known.One.ashr(Shift);
+ break;
+ case TargetOpcode::G_LSHR:
+ Known.Zero = Known.Zero.lshr(Shift);
+ Known.One = Known.One.lshr(Shift);
+ Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift);
+ break;
+ case TargetOpcode::G_SHL:
+ Known.Zero = Known.Zero.shl(Shift);
+ Known.One = Known.One.shl(Shift);
+ Known.Zero.setBits(0, Shift);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_INTTOPTR:
+ case TargetOpcode::G_PTRTOINT:
+ // Fall through and handle them the same as zext/trunc.
+ LLVM_FALLTHROUGH;
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_TRUNC: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ unsigned SrcBitWidth = SrcTy.isPointer()
+ ? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
+ : SrcTy.getSizeInBits();
+ assert(SrcBitWidth && "SrcBitWidth can't be zero");
+ Known = Known.zextOrTrunc(SrcBitWidth, true);
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known = Known.zextOrTrunc(BitWidth, true);
+ if (BitWidth > SrcBitWidth)
+ Known.Zero.setBitsFrom(SrcBitWidth);
+ break;
+ }
+ }
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ LLVM_DEBUG(dbgs() << "[" << Depth << "] Compute known bits: " << MI << "["
+ << Depth << "] Computed for: " << MI << "[" << Depth
+ << "] Known: 0x"
+ << (Known.Zero | Known.One).toString(16, false) << "\n"
+ << "[" << Depth << "] Zero: 0x"
+ << Known.Zero.toString(16, false) << "\n"
+ << "[" << Depth << "] One: 0x"
+ << Known.One.toString(16, false) << "\n");
+}
+
+void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ return false;
+}
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 6e99bdbd8264..45cef4aca888 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -334,7 +335,7 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
- const CmpInst *CI = dyn_cast<CmpInst>(&U);
+ auto *CI = dyn_cast<CmpInst>(&U);
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Op1 = getOrCreateVReg(*U.getOperand(1));
Register Res = getOrCreateVReg(U);
@@ -345,11 +346,12 @@ bool IRTranslator::translateCompare(const User &U,
MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
else if (Pred == CmpInst::FCMP_FALSE)
MIRBuilder.buildCopy(
- Res, getOrCreateVReg(*Constant::getNullValue(CI->getType())));
+ Res, getOrCreateVReg(*Constant::getNullValue(U.getType())));
else if (Pred == CmpInst::FCMP_TRUE)
MIRBuilder.buildCopy(
- Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
+ Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
else {
+ assert(CI && "Instruction should be CmpInst");
MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1},
MachineInstr::copyFlagsFromInstruction(*CI));
}
@@ -588,8 +590,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
} else {
- assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE &&
- "Can only handle ULE ranges");
+ assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
+ "Can only handle SLE ranges");
const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
@@ -598,7 +600,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
Cond =
- MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0);
+ MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0);
} else {
const LLT &CmpTy = MRI->getType(CmpOpReg);
auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
@@ -728,7 +730,7 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
MHS = nullptr;
} else {
// Check I->Low <= Cond <= I->High.
- Pred = CmpInst::ICMP_ULE;
+ Pred = CmpInst::ICMP_SLE;
LHS = I->Low;
MHS = Cond;
RHS = I->High;
@@ -879,7 +881,8 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
-
+ const MDNode *Ranges =
+ Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
for (unsigned i = 0; i < Regs.size(); ++i) {
Register Addr;
MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
@@ -888,7 +891,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
unsigned BaseAlign = getMemOpAlignment(LI);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8,
- MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
+ MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), Ranges,
LI.getSyncScopeID(), LI.getOrdering());
MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
}
@@ -1075,36 +1078,29 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
if (Offset != 0) {
- Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
- MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0));
-
- BaseReg = NewBaseReg;
+ BaseReg =
+ MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0);
Offset = 0;
}
Register IdxReg = getOrCreateVReg(*Idx);
- if (MRI->getType(IdxReg) != OffsetTy) {
- Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg);
- IdxReg = NewIdxReg;
- }
+ if (MRI->getType(IdxReg) != OffsetTy)
+ IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0);
// N = N + Idx * ElementSize;
// Avoid doing it for ElementSize of 1.
Register GepOffsetReg;
if (ElementSize != 1) {
- GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
auto ElementSizeMIB = MIRBuilder.buildConstant(
getLLTForType(*OffsetIRTy, *DL), ElementSize);
- MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg);
+ GepOffsetReg =
+ MIRBuilder.buildMul(OffsetTy, ElementSizeMIB, IdxReg).getReg(0);
} else
GepOffsetReg = IdxReg;
- Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg);
- BaseReg = NewBaseReg;
+ BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0);
}
}
@@ -1119,54 +1115,51 @@ bool IRTranslator::translateGetElementPtr(const User &U,
return true;
}
-bool IRTranslator::translateMemfunc(const CallInst &CI,
+bool IRTranslator::translateMemFunc(const CallInst &CI,
MachineIRBuilder &MIRBuilder,
- unsigned ID) {
+ Intrinsic::ID ID) {
// If the source is undef, then just emit a nop.
- if (isa<UndefValue>(CI.getArgOperand(1))) {
- switch (ID) {
- case Intrinsic::memmove:
- case Intrinsic::memcpy:
- case Intrinsic::memset:
- return true;
- default:
- break;
- }
- }
-
- LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
- Type *DstTy = CI.getArgOperand(0)->getType();
- if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
- SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
- return false;
+ if (isa<UndefValue>(CI.getArgOperand(1)))
+ return true;
- SmallVector<CallLowering::ArgInfo, 8> Args;
- for (int i = 0; i < 3; ++i) {
- const auto &Arg = CI.getArgOperand(i);
- Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+ ArrayRef<Register> Res;
+ auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true);
+ for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI)
+ ICall.addUse(getOrCreateVReg(**AI));
+
+ unsigned DstAlign = 0, SrcAlign = 0;
+ unsigned IsVol =
+ cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
+ ->getZExtValue();
+
+ if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
+ DstAlign = std::max<unsigned>(MCI->getDestAlignment(), 1);
+ SrcAlign = std::max<unsigned>(MCI->getSourceAlignment(), 1);
+ } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
+ DstAlign = std::max<unsigned>(MMI->getDestAlignment(), 1);
+ SrcAlign = std::max<unsigned>(MMI->getSourceAlignment(), 1);
+ } else {
+ auto *MSI = cast<MemSetInst>(&CI);
+ DstAlign = std::max<unsigned>(MSI->getDestAlignment(), 1);
}
- const char *Callee;
- switch (ID) {
- case Intrinsic::memmove:
- case Intrinsic::memcpy: {
- Type *SrcTy = CI.getArgOperand(1)->getType();
- if(cast<PointerType>(SrcTy)->getAddressSpace() != 0)
- return false;
- Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove";
- break;
- }
- case Intrinsic::memset:
- Callee = "memset";
- break;
- default:
- return false;
- }
+ // We need to propagate the tail call flag from the IR inst as an argument.
+ // Otherwise, we have to pessimize and assume later that we cannot tail call
+ // any memory intrinsics.
+ ICall.addImm(CI.isTailCall() ? 1 : 0);
- return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
- MachineOperand::CreateES(Callee),
- CallLowering::ArgInfo({0}, CI.getType()), Args);
+ // Create mem operands to store the alignment and volatile info.
+ auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+ ICall.addMemOperand(MF->getMachineMemOperand(
+ MachinePointerInfo(CI.getArgOperand(0)),
+ MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
+ if (ID != Intrinsic::memset)
+ ICall.addMemOperand(MF->getMachineMemOperand(
+ MachinePointerInfo(CI.getArgOperand(1)),
+ MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));
+
+ return true;
}
void IRTranslator::getStackGuard(Register DstReg,
@@ -1186,7 +1179,7 @@ void IRTranslator::getStackGuard(Register DstReg,
MachineMemOperand::MODereferenceable;
MachineMemOperand *MemRef =
MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
- DL->getPointerABIAlignment(0));
+ DL->getPointerABIAlignment(0).value());
MIB.setMemRefs({MemRef});
}
@@ -1208,6 +1201,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
break;
case Intrinsic::bswap:
return TargetOpcode::G_BSWAP;
+ case Intrinsic::bitreverse:
+ return TargetOpcode::G_BITREVERSE;
case Intrinsic::ceil:
return TargetOpcode::G_FCEIL;
case Intrinsic::cos:
@@ -1383,16 +1378,17 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
- MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
+ MIRBuilder.buildDirectDbgValue(0, DI.getVariable(), DI.getExpression());
} else if (const auto *CI = dyn_cast<Constant>(V)) {
MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
} else {
- Register Reg = getOrCreateVReg(*V);
- // FIXME: This does not handle register-indirect values at offset 0. The
- // direct/indirect thing shouldn't really be handled by something as
- // implicit as reg+noreg vs reg+imm in the first palce, but it seems
- // pretty baked in right now.
- MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
+ for (Register Reg : getOrCreateVRegs(*V)) {
+ // FIXME: This does not handle register-indirect values at offset 0. The
+ // direct/indirect thing shouldn't really be handled by something as
+ // implicit as reg+noreg vs reg+imm in the first place, but it seems
+ // pretty baked in right now.
+ MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
+ }
}
return true;
}
@@ -1433,7 +1429,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
- return translateMemfunc(CI, MIRBuilder, ID);
+ return translateMemFunc(CI, MIRBuilder, ID);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
Register Reg = getOrCreateVReg(CI);
@@ -1441,18 +1437,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MIRBuilder.buildConstant(Reg, TypeID);
return true;
}
- case Intrinsic::objectsize: {
- // If we don't know by now, we're never going to know.
- const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1));
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
- MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0);
- return true;
- }
case Intrinsic::is_constant:
- // If this wasn't constant-folded away by now, then it's not a
- // constant.
- MIRBuilder.buildConstant(getOrCreateVReg(CI), 0);
- return true;
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+
case Intrinsic::stackguard:
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
@@ -1551,6 +1541,46 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI,
return true;
}
+bool IRTranslator::translateCallSite(const ImmutableCallSite &CS,
+ MachineIRBuilder &MIRBuilder) {
+ const Instruction &I = *CS.getInstruction();
+ ArrayRef<Register> Res = getOrCreateVRegs(I);
+
+ SmallVector<ArrayRef<Register>, 8> Args;
+ Register SwiftInVReg = 0;
+ Register SwiftErrorVReg = 0;
+ for (auto &Arg : CS.args()) {
+ if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+ assert(SwiftInVReg == 0 && "Expected only one swift error argument");
+ LLT Ty = getLLTForType(*Arg->getType(), *DL);
+ SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
+ &I, &MIRBuilder.getMBB(), Arg));
+ Args.emplace_back(makeArrayRef(SwiftInVReg));
+ SwiftErrorVReg =
+ SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg);
+ continue;
+ }
+ Args.push_back(getOrCreateVRegs(*Arg));
+ }
+
+ // We don't set HasCalls on MFI here yet because call lowering may decide to
+ // optimize into tail calls. Instead, we defer that to selection where a final
+ // scan is done to check if any instructions are calls.
+ bool Success =
+ CLI->lowerCall(MIRBuilder, CS, Res, Args, SwiftErrorVReg,
+ [&]() { return getOrCreateVReg(*CS.getCalledValue()); });
+
+ // Check if we just inserted a tail call.
+ if (Success) {
+ assert(!HasTailCall && "Can't tail call return twice from block?");
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt()));
+ }
+
+ return Success;
+}
+
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const CallInst &CI = cast<CallInst>(U);
auto TII = MF->getTarget().getIntrinsicInfo();
@@ -1570,34 +1600,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
}
- if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
- ArrayRef<Register> Res = getOrCreateVRegs(CI);
-
- SmallVector<ArrayRef<Register>, 8> Args;
- Register SwiftInVReg = 0;
- Register SwiftErrorVReg = 0;
- for (auto &Arg: CI.arg_operands()) {
- if (CLI->supportSwiftError() && isSwiftError(Arg)) {
- assert(SwiftInVReg == 0 && "Expected only one swift error argument");
- LLT Ty = getLLTForType(*Arg->getType(), *DL);
- SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
- MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
- &CI, &MIRBuilder.getMBB(), Arg));
- Args.emplace_back(makeArrayRef(SwiftInVReg));
- SwiftErrorVReg =
- SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg);
- continue;
- }
- Args.push_back(getOrCreateVRegs(*Arg));
- }
-
- MF->getFrameInfo().setHasCalls(true);
- bool Success =
- CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg,
- [&]() { return getOrCreateVReg(*CI.getCalledValue()); });
-
- return Success;
- }
+ if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
+ return translateCallSite(&CI, MIRBuilder);
assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
@@ -1615,14 +1619,29 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
- for (auto &Arg : CI.arg_operands()) {
+ for (auto &Arg : enumerate(CI.arg_operands())) {
// Some intrinsics take metadata parameters. Reject them.
- if (isa<MetadataAsValue>(Arg))
- return false;
- ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg);
- if (VRegs.size() > 1)
+ if (isa<MetadataAsValue>(Arg.value()))
return false;
- MIB.addUse(VRegs[0]);
+
+ // If this is required to be an immediate, don't materialize it in a
+ // register.
+ if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
+ // imm arguments are more convenient than cimm (and realistically
+ // probably sufficient), so use them.
+ assert(CI->getBitWidth() <= 64 &&
+ "large intrinsic immediates not handled");
+ MIB.addImm(CI->getSExtValue());
+ } else {
+ MIB.addFPImm(cast<ConstantFP>(Arg.value()));
+ }
+ } else {
+ ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
+ if (VRegs.size() > 1)
+ return false;
+ MIB.addUse(VRegs[0]);
+ }
}
// Add a MachineMemOperand if it is a target mem intrinsic.
@@ -1630,13 +1649,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
TargetLowering::IntrinsicInfo Info;
// TODO: Add a GlobalISel version of getTgtMemIntrinsic.
if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
- unsigned Align = Info.align;
- if (Align == 0)
- Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext()));
+ MaybeAlign Align = Info.align;
+ if (!Align)
+ Align = MaybeAlign(
+ DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext())));
uint64_t Size = Info.memVT.getStoreSize();
- MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
- Info.flags, Size, Align));
+ MIB.addMemOperand(MF->getMachineMemOperand(
+ MachinePointerInfo(Info.ptrVal), Info.flags, Size, Align->value()));
}
return true;
@@ -1672,30 +1692,7 @@ bool IRTranslator::translateInvoke(const User &U,
MCSymbol *BeginSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
- ArrayRef<Register> Res;
- if (!I.getType()->isVoidTy())
- Res = getOrCreateVRegs(I);
- SmallVector<ArrayRef<Register>, 8> Args;
- Register SwiftErrorVReg = 0;
- Register SwiftInVReg = 0;
- for (auto &Arg : I.arg_operands()) {
- if (CLI->supportSwiftError() && isSwiftError(Arg)) {
- assert(SwiftInVReg == 0 && "Expected only one swift error argument");
- LLT Ty = getLLTForType(*Arg->getType(), *DL);
- SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
- MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
- &I, &MIRBuilder.getMBB(), Arg));
- Args.push_back(makeArrayRef(SwiftInVReg));
- SwiftErrorVReg =
- SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg);
- continue;
- }
-
- Args.push_back(getOrCreateVRegs(*Arg));
- }
-
- if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg,
- [&]() { return getOrCreateVReg(*I.getCalledValue()); }))
+ if (!translateCallSite(&I, MIRBuilder))
return false;
MCSymbol *EndSymbol = Context.createTempSymbol();
@@ -1811,36 +1808,25 @@ bool IRTranslator::translateAlloca(const User &U,
Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
Register TySize =
- getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty)));
+ getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
MIRBuilder.buildMul(AllocSize, NumElts, TySize);
- LLT PtrTy = getLLTForType(*AI.getType(), *DL);
- auto &TLI = *MF->getSubtarget().getTargetLowering();
- Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
-
- Register SPTmp = MRI->createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildCopy(SPTmp, SPReg);
-
- Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
-
- // Handle alignment. We have to realign if the allocation granule was smaller
- // than stack alignment, or the specific alloca requires more than stack
- // alignment.
unsigned StackAlign =
MF->getSubtarget().getFrameLowering()->getStackAlignment();
- Align = std::max(Align, StackAlign);
- if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) {
- // Round the size of the allocation up to the stack alignment size
- // by add SA-1 to the size. This doesn't overflow because we're computing
- // an address inside an alloca.
- Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
- AllocTmp = AlignedAlloc;
- }
+ if (Align <= StackAlign)
+ Align = 0;
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign - 1);
+ auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
+ MachineInstr::NoUWrap);
+ auto AlignCst =
+ MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign - 1));
+ auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
- MIRBuilder.buildCopy(SPReg, AllocTmp);
- MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp);
+ MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Align);
MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI);
assert(MF->getFrameInfo().hasVarSizedObjects());
@@ -1926,7 +1912,7 @@ bool IRTranslator::translateShuffleVector(const User &U,
.addDef(getOrCreateVReg(U))
.addUse(getOrCreateVReg(*U.getOperand(0)))
.addUse(getOrCreateVReg(*U.getOperand(1)))
- .addUse(getOrCreateVReg(*U.getOperand(2)));
+ .addShuffleMask(cast<Constant>(U.getOperand(2)));
return true;
}
@@ -1991,7 +1977,6 @@ bool IRTranslator::translateAtomicRMW(const User &U,
unsigned Opcode = 0;
switch (I.getOperation()) {
default:
- llvm_unreachable("Unknown atomicrmw op");
return false;
case AtomicRMWInst::Xchg:
Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
@@ -2026,6 +2011,12 @@ bool IRTranslator::translateAtomicRMW(const User &U,
case AtomicRMWInst::UMin:
Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
break;
+ case AtomicRMWInst::FAdd:
+ Opcode = TargetOpcode::G_ATOMICRMW_FADD;
+ break;
+ case AtomicRMWInst::FSub:
+ Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
+ break;
}
MIRBuilder.buildAtomicRMW(
@@ -2197,6 +2188,20 @@ void IRTranslator::finalizeFunction() {
FuncInfo.clear();
}
+/// Returns true if a BasicBlock \p BB within a variadic function contains a
+/// variadic musttail call.
+static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
+ if (!IsVarArg)
+ return false;
+
+ // Walk the block backwards, because tail calls usually only appear at the end
+ // of a block.
+ return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) {
+ const auto *CI = dyn_cast<CallInst>(&I);
+ return CI && CI->isMustTailCall();
+ });
+}
+
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MF = &CurMF;
const Function &F = MF->getFunction();
@@ -2212,26 +2217,26 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
: TPC->isGISelCSEEnabled();
if (EnableCSE) {
- EntryBuilder = make_unique<CSEMIRBuilder>(CurMF);
+ EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
CSEInfo = &Wrapper.get(TPC->getCSEConfig());
EntryBuilder->setCSEInfo(CSEInfo);
- CurBuilder = make_unique<CSEMIRBuilder>(CurMF);
+ CurBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
CurBuilder->setCSEInfo(CSEInfo);
} else {
- EntryBuilder = make_unique<MachineIRBuilder>();
- CurBuilder = make_unique<MachineIRBuilder>();
+ EntryBuilder = std::make_unique<MachineIRBuilder>();
+ CurBuilder = std::make_unique<MachineIRBuilder>();
}
CLI = MF->getSubtarget().getCallLowering();
CurBuilder->setMF(*MF);
EntryBuilder->setMF(*MF);
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
- ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
FuncInfo.MF = MF;
FuncInfo.BPI = nullptr;
const auto &TLI = *MF->getSubtarget().getTargetLowering();
const TargetMachine &TM = MF->getTarget();
- SL = make_unique<GISelSwitchLowering>(this, FuncInfo);
+ SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
SL->init(TLI, TM, *DL);
EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F);
@@ -2258,6 +2263,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
SwiftError.setFunction(CurMF);
SwiftError.createEntriesInEntryBlock(DbgLoc);
+ bool IsVarArg = F.isVarArg();
+ bool HasMustTailInVarArgFn = false;
+
// Create all blocks, in IR order, to preserve the layout.
for (const BasicBlock &BB: F) {
auto *&MBB = BBToMBB[&BB];
@@ -2267,8 +2275,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
if (BB.hasAddressTaken())
MBB->setHasAddressTaken();
+
+ if (!HasMustTailInVarArgFn)
+ HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
}
+ MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn);
+
// Make our arguments/constants entry block fallthrough to the IR entry block.
EntryBB->addSuccessor(&getMBB(F.front()));
@@ -2286,18 +2299,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
}
}
- // We don't currently support translating swifterror or swiftself functions.
- for (auto &Arg : F.args()) {
- if (Arg.hasSwiftSelfAttr()) {
- OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
- F.getSubprogram(), &F.getEntryBlock());
- R << "unable to lower arguments due to swiftself: "
- << ore::NV("Prototype", F.getType());
- reportTranslationError(*MF, *TPC, *ORE, R);
- return false;
- }
- }
-
if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
@@ -2322,8 +2323,15 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Set the insertion point of all the following translations to
// the end of this basic block.
CurBuilder->setMBB(MBB);
-
+ HasTailCall = false;
for (const Instruction &Inst : *BB) {
+ // If we translated a tail call in the last step, then we know
+ // everything after the call is either a return, or something that is
+ // handled by the call itself. (E.g. a lifetime marker or assume
+ // intrinsic.) In this case, we should stop translating the block and
+ // move on.
+ if (HasTailCall)
+ break;
#ifndef NDEBUG
Verifier.setCurrentInst(&Inst);
#endif // ifndef NDEBUG
diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 70694fe6b6c8..7c4fd2d140d3 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -12,11 +12,14 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -45,6 +48,7 @@ INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
@@ -53,6 +57,8 @@ InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { }
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -64,11 +70,13 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
return false;
LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
+ GISelKnownBits &KB = getAnalysis<GISelKnownBitsAnalysis>().get(MF);
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
- const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+ InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
CodeGenCoverage CoverageInfo;
assert(ISel && "Cannot work without InstructionSelector");
+ ISel->setupMF(MF, KB, CoverageInfo);
// An optimization remark emitter. Used to report failures.
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
@@ -124,7 +132,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- if (!ISel->select(MI, CoverageInfo)) {
+ if (!ISel->select(MI)) {
// FIXME: It would be nice to dump all inserted instructions. It's
// not obvious how, esp. considering select() can insert after MI.
reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI);
@@ -159,10 +167,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
--MII;
if (MI.getOpcode() != TargetOpcode::COPY)
continue;
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ if (Register::isVirtualRegister(SrcReg) &&
+ Register::isVirtualRegister(DstReg)) {
auto SrcRC = MRI.getRegClass(SrcReg);
auto DstRC = MRI.getRegClass(DstReg);
if (SrcRC == DstRC) {
@@ -179,7 +187,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// that the size of the now-constrained vreg is unchanged and that it has a
// register class.
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned VReg = Register::index2VirtReg(I);
MachineInstr *MI = nullptr;
if (!MRI.def_empty(VReg))
@@ -217,6 +225,22 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
auto &TLI = *MF.getSubtarget().getTargetLowering();
TLI.finalizeLowering(MF);
+ // Determine if there are any calls in this machine function. Ported from
+ // SelectionDAG.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ for (const auto &MBB : MF) {
+ if (MFI.hasCalls() && MF.hasInlineAsm())
+ break;
+
+ for (const auto &MI : MBB) {
+ if ((MI.isCall() && !MI.isReturn()) || MI.isStackAligningInlineAsm())
+ MFI.setHasCalls(true);
+ if (MI.isInlineAsm())
+ MF.setHasInlineAsm(true);
+ }
+ }
+
+
LLVM_DEBUG({
dbgs() << "Rules covered by selecting function: " << MF.getName() << ":";
for (auto RuleID : CoverageInfo.covered())
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 2ad35b3a72c9..28143b30d4e8 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -79,5 +79,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
return true;
return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
- !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands());
+ !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
}
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index b5b26bff34bb..1593e21fe07e 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -184,11 +184,11 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
: TPC.isGISelCSEEnabled();
if (EnableCSE) {
- MIRBuilder = make_unique<CSEMIRBuilder>();
+ MIRBuilder = std::make_unique<CSEMIRBuilder>();
CSEInfo = &Wrapper.get(TPC.getCSEConfig());
MIRBuilder->setCSEInfo(CSEInfo);
} else
- MIRBuilder = make_unique<MachineIRBuilder>();
+ MIRBuilder = std::make_unique<MachineIRBuilder>();
// This observer keeps the worklist updated.
LegalizerWorkListManager WorkListObserver(InstList, ArtifactList);
// We want both WorkListObserver as well as CSEInfo to observe all changes.
@@ -206,8 +206,16 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
WrapperObserver.erasingInstr(*DeadMI);
};
+ auto stopLegalizing = [&](MachineInstr &MI) {
+ Helper.MIRBuilder.stopObservingChanges();
+ reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+ "unable to legalize instruction", MI);
+ };
bool Changed = false;
+ SmallVector<MachineInstr *, 128> RetryList;
do {
+ assert(RetryList.empty() && "Expected no instructions in RetryList");
+ unsigned NumArtifacts = ArtifactList.size();
while (!InstList.empty()) {
MachineInstr &MI = *InstList.pop_back_val();
assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
@@ -222,14 +230,31 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// Error out if we couldn't legalize this instruction. We may want to
// fall back to DAG ISel instead in the future.
if (Res == LegalizerHelper::UnableToLegalize) {
- Helper.MIRBuilder.stopObservingChanges();
- reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
- "unable to legalize instruction", MI);
+ // Move illegal artifacts to RetryList instead of aborting because
+ // legalizing InstList may generate artifacts that allow
+ // ArtifactCombiner to combine away them.
+ if (isArtifact(MI)) {
+ RetryList.push_back(&MI);
+ continue;
+ }
+ stopLegalizing(MI);
return false;
}
WorkListObserver.printNewInstrs();
Changed |= Res == LegalizerHelper::Legalized;
}
+ // Try to combine the instructions in RetryList again if there
+ // are new artifacts. If not, stop legalizing.
+ if (!RetryList.empty()) {
+ if (ArtifactList.size() > NumArtifacts) {
+ while (!RetryList.empty())
+ ArtifactList.insert(RetryList.pop_back_val());
+ } else {
+ MachineInstr *MI = *RetryList.begin();
+ stopLegalizing(*MI);
+ return false;
+ }
+ }
while (!ArtifactList.empty()) {
MachineInstr &MI = *ArtifactList.pop_back_val();
assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index f5cf7fc9bd9b..21512e543878 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -171,6 +172,26 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
return true;
}
+static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
+ if (OrigTy.isVector() && TargetTy.isVector()) {
+ assert(OrigTy.getElementType() == TargetTy.getElementType());
+ int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
+ TargetTy.getNumElements());
+ return LLT::scalarOrVector(GCD, OrigTy.getElementType());
+ }
+
+ if (OrigTy.isVector() && !TargetTy.isVector()) {
+ assert(OrigTy.getElementType() == TargetTy);
+ return TargetTy;
+ }
+
+ assert(!OrigTy.isVector() && !TargetTy.isVector());
+
+ int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
+ TargetTy.getSizeInBits());
+ return LLT::scalar(GCD);
+}
+
void LegalizerHelper::insertParts(Register DstReg,
LLT ResultTy, LLT PartTy,
ArrayRef<Register> PartRegs,
@@ -219,11 +240,29 @@ void LegalizerHelper::insertParts(Register DstReg,
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
switch (Opcode) {
case TargetOpcode::G_SDIV:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ switch (Size) {
+ case 32:
+ return RTLIB::SDIV_I32;
+ case 64:
+ return RTLIB::SDIV_I64;
+ case 128:
+ return RTLIB::SDIV_I128;
+ default:
+ llvm_unreachable("unexpected size");
+ }
case TargetOpcode::G_UDIV:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ switch (Size) {
+ case 32:
+ return RTLIB::UDIV_I32;
+ case 64:
+ return RTLIB::UDIV_I64;
+ case 128:
+ return RTLIB::UDIV_I128;
+ default:
+ llvm_unreachable("unexpected size");
+ }
case TargetOpcode::G_SREM:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
@@ -288,6 +327,35 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
llvm_unreachable("Unknown libcall function");
}
+/// True if an instruction is in tail position in its caller. Intended for
+/// legalizing libcalls as tail calls when possible.
+static bool isLibCallInTailPosition(MachineInstr &MI) {
+ const Function &F = MI.getParent()->getParent()->getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore NoAlias and NonNull because they don't affect the
+ // call sequence.
+ AttributeList CallerAttrs = F.getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias)
+ .removeAttribute(Attribute::NonNull)
+ .hasAttributes())
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ return false;
+
+ // Only tail call if the following instruction is a standard return.
+ auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+ MachineInstr *Next = MI.getNextNode();
+ if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
+ return false;
+
+ return true;
+}
+
LegalizerHelper::LegalizeResult
llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
const CallLowering::ArgInfo &Result,
@@ -296,9 +364,12 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
const char *Name = TLI.getLibcallName(Libcall);
- MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
- if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
- MachineOperand::CreateES(Name), Result, Args))
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(Libcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = Result;
+ std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
+ if (!CLI.lowerCall(MIRBuilder, Info))
return LegalizerHelper::UnableToLegalize;
return LegalizerHelper::Legalized;
@@ -317,6 +388,74 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
Args);
}
+LegalizerHelper::LegalizeResult
+llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+
+ SmallVector<CallLowering::ArgInfo, 3> Args;
+ // Add all the args, except for the last which is an imm denoting 'tail'.
+ for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
+ Register Reg = MI.getOperand(i).getReg();
+
+ // Need derive an IR type for call lowering.
+ LLT OpLLT = MRI.getType(Reg);
+ Type *OpTy = nullptr;
+ if (OpLLT.isPointer())
+ OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
+ else
+ OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
+ Args.push_back({Reg, OpTy});
+ }
+
+ auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
+ RTLIB::Libcall RTLibcall;
+ switch (ID) {
+ case Intrinsic::memcpy:
+ RTLibcall = RTLIB::MEMCPY;
+ break;
+ case Intrinsic::memset:
+ RTLibcall = RTLIB::MEMSET;
+ break;
+ case Intrinsic::memmove:
+ RTLibcall = RTLIB::MEMMOVE;
+ break;
+ default:
+ return LegalizerHelper::UnableToLegalize;
+ }
+ const char *Name = TLI.getLibcallName(RTLibcall);
+
+ MIRBuilder.setInstr(MI);
+
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
+ Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
+ isLibCallInTailPosition(MI);
+
+ std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
+ if (!CLI.lowerCall(MIRBuilder, Info))
+ return LegalizerHelper::UnableToLegalize;
+
+ if (Info.LoweredTailCall) {
+ assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
+ // We must have a return following the call to get past
+ // isLibCallInTailPosition.
+ assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
+ "Expected instr following MI to be a return?");
+
+ // We lowered a tail call, so the call is now the return from the block.
+ // Delete the old return.
+ MI.getNextNode()->eraseFromParent();
+ }
+
+ return LegalizerHelper::Legalized;
+}
+
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
Type *FromType) {
auto ToMVT = MVT::getVT(ToType);
@@ -518,6 +657,65 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SEXT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ // FIXME: support the general case where the requested NarrowTy may not be
+ // the same as the source type. E.g. s128 = sext(s32)
+ if ((SrcTy.getSizeInBits() != SizeOp0 / 2) ||
+ SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) {
+ LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
+ return UnableToLegalize;
+ }
+
+ // Shift the sign bit of the low register through the high register.
+ auto ShiftAmt =
+ MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
+ auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_ZEXT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ uint64_t SizeOp1 = SrcTy.getSizeInBits();
+ if (SizeOp0 % SizeOp1 != 0)
+ return UnableToLegalize;
+
+ // Generate a merge where the bottom bits are taken from the source, and
+ // zero everything else.
+ Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0);
+ unsigned NumParts = SizeOp0 / SizeOp1;
+ SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()};
+ for (unsigned Part = 1; Part < NumParts; ++Part)
+ Srcs.push_back(ZeroReg);
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_TRUNC: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
+ LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
+ return UnableToLegalize;
+ }
+
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
+ MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0));
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
case TargetOpcode::G_ADD: {
// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
@@ -530,15 +728,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
- Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
- MIRBuilder.buildConstant(CarryIn, 0);
-
+ Register CarryIn;
for (int i = 0; i < NumParts; ++i) {
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
- MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
- Src2Regs[i], CarryIn);
+ if (i == 0)
+ MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
+ else {
+ MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
+ Src2Regs[i], CarryIn);
+ }
DstRegs.push_back(DstReg);
CarryIn = CarryOut;
@@ -730,7 +930,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
}
- MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+ MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
Observer.changedInstr(MI);
MI.eraseFromParent();
@@ -763,6 +963,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
CmpInst::Predicate Pred =
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
@@ -771,18 +972,109 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
} else {
- const LLT s1 = LLT::scalar(1);
- MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH);
+ MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpHEQ =
- MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH);
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
- ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL);
+ ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
}
Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SEXT_INREG: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ if (!MI.getOperand(2).isImm())
+ return UnableToLegalize;
+ int64_t SizeInBits = MI.getOperand(2).getImm();
+
+ // So long as the new type has more bits than the bits we're extending we
+ // don't need to break it apart.
+ if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
+ Observer.changingInstr(MI);
+ // We don't lose any non-extension bits by truncating the src and
+ // sign-extending the dst.
+ MachineOperand &MO1 = MI.getOperand(1);
+ auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg());
+ MO1.setReg(TruncMIB->getOperand(0).getReg());
+
+ MachineOperand &MO2 = MI.getOperand(0);
+ Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt});
+ MO2.setReg(DstExt);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ // Break it apart. Components below the extension point are unmodified. The
+ // component containing the extension point becomes a narrower SEXT_INREG.
+ // Components above it are ashr'd from the component containing the
+ // extension point.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp0 / NarrowSize;
+
+ // List the registers where the destination will be scattered.
+ SmallVector<Register, 2> DstRegs;
+ // List the registers where the source will be split.
+ SmallVector<Register, 2> SrcRegs;
+
+ // Create all the temporary registers.
+ for (int i = 0; i < NumParts; ++i) {
+ Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
+
+ SrcRegs.push_back(SrcReg);
+ }
+
+ // Explode the big arguments into smaller chunks.
+ MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg());
+
+ Register AshrCstReg =
+ MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
+ ->getOperand(0)
+ .getReg();
+ Register FullExtensionReg = 0;
+ Register PartialExtensionReg = 0;
+
+ // Do the operation on each small part.
+ for (int i = 0; i < NumParts; ++i) {
+ if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
+ DstRegs.push_back(SrcRegs[i]);
+ else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
+ assert(PartialExtensionReg &&
+ "Expected to visit partial extension before full");
+ if (FullExtensionReg) {
+ DstRegs.push_back(FullExtensionReg);
+ continue;
+ }
+ DstRegs.push_back(MIRBuilder
+ .buildInstr(TargetOpcode::G_ASHR, {NarrowTy},
+ {PartialExtensionReg, AshrCstReg})
+ ->getOperand(0)
+ .getReg());
+ FullExtensionReg = DstRegs.back();
+ } else {
+ DstRegs.push_back(
+ MIRBuilder
+ .buildInstr(
+ TargetOpcode::G_SEXT_INREG, {NarrowTy},
+ {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
+ ->getOperand(0)
+ .getReg());
+ PartialExtensionReg = DstRegs.back();
+ }
+ }
+
+ // Gather the destination registers into the final destination.
+ Register DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -892,7 +1184,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
- Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg :
+ Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
MRI.createGenericVirtualRegister(WideTy);
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
@@ -903,6 +1195,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
if (WideSize > DstSize)
MIRBuilder.buildTrunc(DstReg, ResultReg);
+ else if (DstTy.isPointer())
+ MIRBuilder.buildIntToPtr(DstReg, ResultReg);
MI.eraseFromParent();
return Legalized;
@@ -1218,6 +1512,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_BITREVERSE: {
+ Observer.changingInstr(MI);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
+
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ MI.getOperand(0).setReg(DstExt);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
+ auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
+ MIRBuilder.buildTrunc(DstReg, Shift);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
@@ -1310,13 +1622,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
- if (TypeIdx != 0)
- return UnableToLegalize;
Observer.changingInstr(MI);
- widenScalarDst(MI, WideTy);
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+
Observer.changedInstr(MI);
return Legalized;
-
case TargetOpcode::G_SITOFP:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -1483,6 +1797,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FMAD:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FCANONICALIZE:
@@ -1553,6 +1868,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_SEXT_INREG:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
}
}
@@ -1579,6 +1903,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
+ return lowerSADDO_SSUBO(MI);
case TargetOpcode::G_SMULO:
case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
@@ -1669,6 +1996,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_FMAD:
+ return lowerFMad(MI);
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register OldValRes = MI.getOperand(0).getReg();
Register SuccessRes = MI.getOperand(1).getReg();
@@ -1690,11 +2019,57 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
LLT DstTy = MRI.getType(DstReg);
auto &MMO = **MI.memoperands_begin();
- if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
- // In the case of G_LOAD, this was a non-extending load already and we're
- // about to lower to the same instruction.
- if (MI.getOpcode() == TargetOpcode::G_LOAD)
+ if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
+ if (MI.getOpcode() == TargetOpcode::G_LOAD) {
+ // This load needs splitting into power of 2 sized loads.
+ if (DstTy.isVector())
return UnableToLegalize;
+ if (isPowerOf2_32(DstTy.getSizeInBits()))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Our strategy here is to generate anyextending loads for the smaller
+ // types up to next power-2 result type, and then combine the two larger
+ // result values together, before truncating back down to the non-pow-2
+ // type.
+ // E.g. v1 = i24 load =>
+ // v2 = i32 load (2 byte)
+ // v3 = i32 load (1 byte)
+ // v4 = i32 shl v3, 16
+ // v5 = i32 or v4, v2
+ // v1 = i24 trunc v5
+ // By doing this we generate the correct truncate which should get
+ // combined away as an artifact with a matching extend.
+ uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
+ uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
+ &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
+ LLT AnyExtTy = LLT::scalar(AnyExtSize);
+ Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+ Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+ auto LargeLoad =
+ MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
+
+ auto OffsetCst =
+ MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
+ Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
+ auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
+ *SmallMMO);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
+ auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+ MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
+ MI.eraseFromParent();
+ return Legalized;
+ }
MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
MI.eraseFromParent();
return Legalized;
@@ -1723,6 +2098,51 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
+ case TargetOpcode::G_STORE: {
+ // Lower a non-power of 2 store into multiple pow-2 stores.
+ // E.g. split an i24 store into an i16 store + i8 store.
+ // We do this by first extending the stored value to the next largest power
+ // of 2 type, and then using truncating stores to store the components.
+ // By doing this, likewise with G_LOAD, generate an extend that can be
+ // artifact-combined away instead of leaving behind extracts.
+ Register SrcReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+ if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
+ return UnableToLegalize;
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+ if (isPowerOf2_32(SrcTy.getSizeInBits()))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Extend to the next pow-2.
+ const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
+ auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
+
+ // Obtain the smaller value by shifting away the larger value.
+ uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
+ uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
+ auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
+ auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
+
+ // Generate the GEP and truncating stores.
+ LLT PtrTy = MRI.getType(PtrReg);
+ auto OffsetCst =
+ MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
+ Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO =
+ MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+ MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
+ MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
@@ -1797,6 +2217,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerUITOFP(MI, TypeIdx, Ty);
case G_SITOFP:
return lowerSITOFP(MI, TypeIdx, Ty);
+ case G_FPTOUI:
+ return lowerFPTOUI(MI, TypeIdx, Ty);
case G_SMIN:
case G_SMAX:
case G_UMIN:
@@ -1807,6 +2229,31 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case G_FMINNUM:
case G_FMAXNUM:
return lowerFMinNumMaxNum(MI);
+ case G_UNMERGE_VALUES:
+ return lowerUnmergeValues(MI);
+ case TargetOpcode::G_SEXT_INREG: {
+ assert(MI.getOperand(2).isImm() && "Expected immediate");
+ int64_t SizeInBits = MI.getOperand(2).getImm();
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
+
+ auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
+ MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()});
+ MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_SHUFFLE_VECTOR:
+ return lowerShuffleVector(MI);
+ case G_DYN_STACKALLOC:
+ return lowerDynStackAlloc(MI);
+ case G_EXTRACT:
+ return lowerExtract(MI);
+ case G_INSERT:
+ return lowerInsert(MI);
}
}
@@ -2283,6 +2730,105 @@ LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ const int NumDst = MI.getNumOperands() - 1;
+ const Register SrcReg = MI.getOperand(NumDst).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ // TODO: Create sequence of extracts.
+ if (DstTy == NarrowTy)
+ return UnableToLegalize;
+
+ LLT GCDTy = getGCDType(SrcTy, NarrowTy);
+ if (DstTy == GCDTy) {
+ // This would just be a copy of the same unmerge.
+ // TODO: Create extracts, pad with undef and create intermediate merges.
+ return UnableToLegalize;
+ }
+
+ auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ const int NumUnmerge = Unmerge->getNumOperands() - 1;
+ const int PartsPerUnmerge = NumDst / NumUnmerge;
+
+ for (int I = 0; I != NumUnmerge; ++I) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+
+ for (int J = 0; J != PartsPerUnmerge; ++J)
+ MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
+ MIB.addUse(Unmerge.getReg(I));
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy) {
+ assert(TypeIdx == 0 && "not a vector type index");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = DstTy.getElementType();
+
+ int DstNumElts = DstTy.getNumElements();
+ int NarrowNumElts = NarrowTy.getNumElements();
+ int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
+ LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
+
+ SmallVector<Register, 8> ConcatOps;
+ SmallVector<Register, 8> SubBuildVector;
+
+ Register UndefReg;
+ if (WidenedDstTy != DstTy)
+ UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
+
+ // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
+ // necessary.
+ //
+ // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
+ // -> <2 x s16>
+ //
+ // %4:_(s16) = G_IMPLICIT_DEF
+ // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
+ // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
+ // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
+ // %3:_(<3 x s16>) = G_EXTRACT %7, 0
+ for (int I = 0; I != NumConcat; ++I) {
+ for (int J = 0; J != NarrowNumElts; ++J) {
+ int SrcIdx = NarrowNumElts * I + J;
+
+ if (SrcIdx < DstNumElts) {
+ Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
+ SubBuildVector.push_back(SrcReg);
+ } else
+ SubBuildVector.push_back(UndefReg);
+ }
+
+ auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
+ ConcatOps.push_back(BuildVec.getReg(0));
+ SubBuildVector.clear();
+ }
+
+ if (DstTy == WidenedDstTy)
+ MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
+ else {
+ auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
+ MIRBuilder.buildExtract(DstReg, Concat, 0);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
// FIXME: Don't know how to handle secondary types yet.
@@ -2395,6 +2941,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FDIV:
case G_FREM:
case G_FMA:
+ case G_FMAD:
case G_FPOW:
case G_FEXP:
case G_FEXP2:
@@ -2411,6 +2958,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FSIN:
case G_FSQRT:
case G_BSWAP:
+ case G_BITREVERSE:
case G_SDIV:
case G_SMIN:
case G_SMAX:
@@ -2453,6 +3001,10 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
case G_PHI:
return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
+ case G_UNMERGE_VALUES:
+ return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
+ case G_BUILD_VECTOR:
+ return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
case G_LOAD:
case G_STORE:
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
@@ -2604,11 +3156,11 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
switch (MI.getOpcode()) {
case TargetOpcode::G_SHL: {
// Short: ShAmt < NewBitSize
- auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
+ auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
- auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
- auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
- auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+ auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
+ auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
+ auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
// Long: ShAmt >= NewBitSize
auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
@@ -2622,41 +3174,25 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
ResultRegs[1] = Hi.getReg(0);
break;
}
- case TargetOpcode::G_LSHR: {
- // Short: ShAmt < NewBitSize
- auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
-
- auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
- auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
- auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
-
- // Long: ShAmt >= NewBitSize
- auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
- auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
-
- auto Lo = MIRBuilder.buildSelect(
- HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
- auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
-
- ResultRegs[0] = Lo.getReg(0);
- ResultRegs[1] = Hi.getReg(0);
- break;
- }
+ case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR: {
// Short: ShAmt < NewBitSize
- auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
+ auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
- auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
- auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
- auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+ auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+ auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
+ auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
// Long: ShAmt >= NewBitSize
-
- // Sign of Hi part.
- auto HiL = MIRBuilder.buildAShr(
- HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
-
- auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
+ MachineInstrBuilder HiL;
+ if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
+ } else {
+ auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
+ HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
+ }
+ auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
+ {InH, AmtExcess}); // Lo from Hi part.
auto Lo = MIRBuilder.buildSelect(
HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
@@ -2701,12 +3237,22 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
MIRBuilder.setInstr(MI);
unsigned Opc = MI.getOpcode();
switch (Opc) {
- case TargetOpcode::G_IMPLICIT_DEF: {
+ case TargetOpcode::G_IMPLICIT_DEF:
+ case TargetOpcode::G_LOAD: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
Observer.changingInstr(MI);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_STORE:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
@@ -2748,6 +3294,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ int NumDst = MI.getNumOperands() - 1;
+ moreElementsVectorSrc(MI, MoreTy, NumDst);
+
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+ for (int I = 0; I != NumDst; ++I)
+ MIB.addDef(MI.getOperand(I).getReg());
+
+ int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
+ for (int I = NumDst; I != NewNumDst; ++I)
+ MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+
+ MIB.addUse(MI.getOperand(NumDst).getReg());
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_PHI:
return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
default:
@@ -3310,6 +3876,48 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+
+ if (SrcTy != S64 && SrcTy != S32)
+ return UnableToLegalize;
+ if (DstTy != S32 && DstTy != S64)
+ return UnableToLegalize;
+
+ // FPTOSI gives same result as FPTOUI for positive signed integers.
+ // FPTOUI needs to deal with fp values that convert to unsigned integers
+ // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
+
+ APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
+ APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
+ : APFloat::IEEEdouble(),
+ APInt::getNullValue(SrcTy.getSizeInBits()));
+ TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
+
+ MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
+
+ MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
+ // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
+ // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
+ MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
+ MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
+ MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
+ MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
+
+ MachineInstrBuilder FCMP =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold);
+ MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_SMIN:
@@ -3419,3 +4027,251 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
+ // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ unsigned Flags = MI.getFlags();
+
+ auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
+ Flags);
+ MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
+ const unsigned NumDst = MI.getNumOperands() - 1;
+ const Register SrcReg = MI.getOperand(NumDst).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst0Reg);
+
+
+ // Expand scalarizing unmerge as bitcast to integer and shift.
+ if (!DstTy.isVector() && SrcTy.isVector() &&
+ SrcTy.getElementType() == DstTy) {
+ LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
+ Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
+
+ MIRBuilder.buildTrunc(Dst0Reg, Cast);
+
+ const unsigned DstSize = DstTy.getSizeInBits();
+ unsigned Offset = DstSize;
+ for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
+ auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
+ MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0Reg = MI.getOperand(1).getReg();
+ Register Src1Reg = MI.getOperand(2).getReg();
+ LLT Src0Ty = MRI.getType(Src0Reg);
+ LLT DstTy = MRI.getType(DstReg);
+ LLT IdxTy = LLT::scalar(32);
+
+ const Constant *ShufMask = MI.getOperand(3).getShuffleMask();
+
+ SmallVector<int, 32> Mask;
+ ShuffleVectorInst::getShuffleMask(ShufMask, Mask);
+
+ if (DstTy.isScalar()) {
+ if (Src0Ty.isVector())
+ return UnableToLegalize;
+
+ // This is just a SELECT.
+ assert(Mask.size() == 1 && "Expected a single mask element");
+ Register Val;
+ if (Mask[0] < 0 || Mask[0] > 1)
+ Val = MIRBuilder.buildUndef(DstTy).getReg(0);
+ else
+ Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
+ MIRBuilder.buildCopy(DstReg, Val);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ Register Undef;
+ SmallVector<Register, 32> BuildVec;
+ LLT EltTy = DstTy.getElementType();
+
+ for (int Idx : Mask) {
+ if (Idx < 0) {
+ if (!Undef.isValid())
+ Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
+ BuildVec.push_back(Undef);
+ continue;
+ }
+
+ if (Src0Ty.isScalar()) {
+ BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
+ } else {
+ int NumElts = Src0Ty.getNumElements();
+ Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
+ int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
+ auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
+ auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
+ BuildVec.push_back(Extract.getReg(0));
+ }
+ }
+
+ MIRBuilder.buildBuildVector(DstReg, BuildVec);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register AllocSize = MI.getOperand(1).getReg();
+ unsigned Align = MI.getOperand(2).getImm();
+
+ const auto &MF = *MI.getMF();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+ LLT PtrTy = MRI.getType(Dst);
+ LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
+
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
+ SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
+
+ // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
+ // have to generate an extra instruction to negate the alloc and then use
+ // G_GEP to add the negative offset.
+ auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
+ if (Align) {
+ APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
+ AlignMask.negate();
+ auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
+ Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
+ }
+
+ SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
+ MIRBuilder.buildCopy(SPReg, SPTmp);
+ MIRBuilder.buildCopy(Dst, SPTmp);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerExtract(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ unsigned Offset = MI.getOperand(2).getImm();
+
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ if (DstTy.isScalar() &&
+ (SrcTy.isScalar() ||
+ (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
+ LLT SrcIntTy = SrcTy;
+ if (!SrcTy.isScalar()) {
+ SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
+ Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
+ }
+
+ if (Offset == 0)
+ MIRBuilder.buildTrunc(Dst, Src);
+ else {
+ auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
+ auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
+ MIRBuilder.buildTrunc(Dst, Shr);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register InsertSrc = MI.getOperand(2).getReg();
+ uint64_t Offset = MI.getOperand(3).getImm();
+
+ LLT DstTy = MRI.getType(Src);
+ LLT InsertTy = MRI.getType(InsertSrc);
+
+ if (InsertTy.isScalar() &&
+ (DstTy.isScalar() ||
+ (DstTy.isVector() && DstTy.getElementType() == InsertTy))) {
+ LLT IntDstTy = DstTy;
+ if (!DstTy.isScalar()) {
+ IntDstTy = LLT::scalar(DstTy.getSizeInBits());
+ Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0);
+ }
+
+ Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
+ if (Offset != 0) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
+ ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
+ }
+
+ APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset,
+ InsertTy.getSizeInBits());
+
+ auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
+ auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
+ auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
+
+ MIRBuilder.buildBitcast(Dst, Or);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
+ Register Dst0 = MI.getOperand(0).getReg();
+ Register Dst1 = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+ const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
+
+ LLT Ty = MRI.getType(Dst0);
+ LLT BoolTy = MRI.getType(Dst1);
+
+ if (IsAdd)
+ MIRBuilder.buildAdd(Dst0, LHS, RHS);
+ else
+ MIRBuilder.buildSub(Dst0, LHS, RHS);
+
+ // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
+
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+
+ // For an addition, the result should be less than one of the operands (LHS)
+ // if and only if the other operand (RHS) is negative, otherwise there will
+ // be overflow.
+ // For a subtraction, the result should be less than one of the operands
+ // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
+ // otherwise there will be overflow.
+ auto ResultLowerThanLHS =
+ MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
+ auto ConditionRHS = MIRBuilder.buildICmp(
+ IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
+
+ MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 6e1de95b3277..70045512fae5 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -215,7 +215,30 @@ bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const {
return true;
}
const bool AllCovered = (FirstUncovered >= NumTypeIdxs);
- LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered
+ if (NumTypeIdxs > 0)
+ LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered
+ << ", " << (AllCovered ? "OK" : "FAIL") << "\n");
+ return AllCovered;
+#else
+ return true;
+#endif
+}
+
+bool LegalizeRuleSet::verifyImmIdxsCoverage(unsigned NumImmIdxs) const {
+#ifndef NDEBUG
+ if (Rules.empty()) {
+ LLVM_DEBUG(
+ dbgs() << ".. imm index coverage check SKIPPED: no rules defined\n");
+ return true;
+ }
+ const int64_t FirstUncovered = ImmIdxsCovered.find_first_unset();
+ if (FirstUncovered < 0) {
+ LLVM_DEBUG(dbgs() << ".. imm index coverage check SKIPPED:"
+ " user-defined predicate detected\n");
+ return true;
+ }
+ const bool AllCovered = (FirstUncovered >= NumImmIdxs);
+ LLVM_DEBUG(dbgs() << ".. the first uncovered imm index: " << FirstUncovered
<< ", " << (AllCovered ? "OK" : "FAIL") << "\n");
return AllCovered;
#else
@@ -387,8 +410,6 @@ unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const {
LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias
<< "\n");
OpcodeIdx = getOpcodeIdxForOpcode(Alias);
- LLVM_DEBUG(dbgs() << ".. opcode " << Alias << " is aliased to "
- << RulesForOpcode[OpcodeIdx].getAlias() << "\n");
assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases");
}
@@ -412,7 +433,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(
std::initializer_list<unsigned> Opcodes) {
unsigned Representative = *Opcodes.begin();
- assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() &&
+ assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() &&
"Initializer list must have at least two opcodes");
for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I)
@@ -677,12 +698,23 @@ void LegalizerInfo::verify(const MCInstrInfo &MII) const {
? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc)
: Acc;
});
+ const unsigned NumImmIdxs = std::accumulate(
+ MCID.opInfo_begin(), MCID.opInfo_end(), 0U,
+ [](unsigned Acc, const MCOperandInfo &OpInfo) {
+ return OpInfo.isGenericImm()
+ ? std::max(OpInfo.getGenericImmIndex() + 1U, Acc)
+ : Acc;
+ });
LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode
<< "): " << NumTypeIdxs << " type ind"
- << (NumTypeIdxs == 1 ? "ex" : "ices") << "\n");
+ << (NumTypeIdxs == 1 ? "ex" : "ices") << ", "
+ << NumImmIdxs << " imm ind"
+ << (NumImmIdxs == 1 ? "ex" : "ices") << "\n");
const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode);
if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs))
FailedOpcodes.push_back(Opcode);
+ else if (!RuleSet.verifyImmIdxsCoverage(NumImmIdxs))
+ FailedOpcodes.push_back(Opcode);
}
if (!FailedOpcodes.empty()) {
errs() << "The following opcodes have ill-defined legalization rules:";
diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp
index 3592409710a7..f882ecbf5db3 100644
--- a/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -79,7 +79,7 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) {
return true;
case TargetOpcode::G_GLOBAL_VALUE: {
unsigned RematCost = TTI->getGISelRematGlobalCost();
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
unsigned MaxUses = maxUses(RematCost);
if (MaxUses == UINT_MAX)
return true; // Remats are "free" so always localize.
@@ -121,7 +121,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "Should localize: " << MI);
assert(MI.getDesc().getNumDefs() == 1 &&
"More than one definition not supported yet");
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
// Check if all the users of MI are local.
// We are going to invalidation the list of use operands, so we
// can't use range iterator.
@@ -151,7 +151,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
LocalizedMI);
// Set a new register for the definition.
- unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
+ Register NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
LocalizedMI->getOperand(0).setReg(NewReg);
NewVRegIt =
@@ -177,7 +177,7 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
// many users, but this case may be better served by regalloc improvements.
for (MachineInstr *MI : LocalizedInstrs) {
- unsigned Reg = MI->getOperand(0).getReg();
+ Register Reg = MI->getOperand(0).getReg();
MachineBasicBlock &MBB = *MI->getParent();
// All of the user MIs of this reg.
SmallPtrSet<MachineInstr *, 32> Users;
@@ -220,5 +220,6 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
LocalizedSetVecT LocalizedInstrs;
bool Changed = localizeInterBlock(MF, LocalizedInstrs);
- return Changed |= localizeIntraBlock(LocalizedInstrs);
+ Changed |= localizeIntraBlock(LocalizedInstrs);
+ return Changed;
}
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index b7a73326b85c..df770f6664ca 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -107,9 +107,13 @@ MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable,
assert(
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
+ // DBG_VALUE insts now carry IR-level indirection in their DIExpression
+ // rather than encoding it in the instruction itself.
+ const DIExpression *DIExpr = cast<DIExpression>(Expr);
+ DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref});
return insertInstr(BuildMI(getMF(), getDL(),
getTII().get(TargetOpcode::DBG_VALUE),
- /*IsIndirect*/ true, Reg, Variable, Expr));
+ /*IsIndirect*/ false, Reg, Variable, DIExpr));
}
MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
@@ -120,11 +124,15 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
assert(
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
+ // DBG_VALUE insts now carry IR-level indirection in their DIExpression
+ // rather than encoding it in the instruction itself.
+ const DIExpression *DIExpr = cast<DIExpression>(Expr);
+ DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref});
return buildInstr(TargetOpcode::DBG_VALUE)
.addFrameIndex(FI)
- .addImm(0)
+ .addReg(0)
.addMetadata(Variable)
- .addMetadata(Expr);
+ .addMetadata(DIExpr);
}
MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
@@ -148,7 +156,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
MIB.addReg(0U);
}
- return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
+ return MIB.addReg(0).addMetadata(Variable).addMetadata(Expr);
}
MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
@@ -160,6 +168,17 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
return MIB.addMetadata(Label);
}
+MachineInstrBuilder MachineIRBuilder::buildDynStackAlloc(const DstOp &Res,
+ const SrcOp &Size,
+ unsigned Align) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "expected ptr dst type");
+ auto MIB = buildInstr(TargetOpcode::G_DYN_STACKALLOC);
+ Res.addDefToMIB(*getMRI(), MIB);
+ Size.addSrcToMIB(MIB);
+ MIB.addImm(Align);
+ return MIB;
+}
+
MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res,
int Idx) {
assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
@@ -207,11 +226,7 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res,
Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type");
- auto MIB = buildInstr(TargetOpcode::G_GEP);
- Res.addDefToMIB(*getMRI(), MIB);
- Op0.addSrcToMIB(MIB);
- Op1.addSrcToMIB(MIB);
- return MIB;
+ return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1});
}
Optional<MachineInstrBuilder>
@@ -697,17 +712,19 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
const DstOp &Res,
const SrcOp &Op0,
- const SrcOp &Op1) {
+ const SrcOp &Op1,
+ Optional<unsigned> Flags) {
- return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1});
+ return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags);
}
MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res,
const SrcOp &Tst,
const SrcOp &Op0,
- const SrcOp &Op1) {
+ const SrcOp &Op1,
+ Optional<unsigned> Flags) {
- return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1});
+ return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags);
}
MachineInstrBuilder
@@ -774,26 +791,28 @@ MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr,
.addMemOperand(&MMO);
}
-MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
- Register OldValRes,
- Register Addr,
- Register Val,
- MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(
+ unsigned Opcode, const DstOp &OldValRes,
+ const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO) {
+
#ifndef NDEBUG
- LLT OldValResTy = getMRI()->getType(OldValRes);
- LLT AddrTy = getMRI()->getType(Addr);
- LLT ValTy = getMRI()->getType(Val);
+ LLT OldValResTy = OldValRes.getLLTTy(*getMRI());
+ LLT AddrTy = Addr.getLLTTy(*getMRI());
+ LLT ValTy = Val.getLLTTy(*getMRI());
assert(OldValResTy.isScalar() && "invalid operand type");
assert(AddrTy.isPointer() && "invalid operand type");
assert(ValTy.isValid() && "invalid operand type");
assert(OldValResTy == ValTy && "type mismatch");
+ assert(MMO.isAtomic() && "not atomic mem operand");
#endif
- return buildInstr(Opcode)
- .addDef(OldValRes)
- .addUse(Addr)
- .addUse(Val)
- .addMemOperand(&MMO);
+ auto MIB = buildInstr(Opcode);
+ OldValRes.addDefToMIB(*getMRI(), MIB);
+ Addr.addSrcToMIB(MIB);
+ Val.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
}
MachineInstrBuilder
@@ -865,6 +884,21 @@ MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr,
}
MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFAdd(
+ const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FADD, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FSUB, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
return buildInstr(TargetOpcode::G_FENCE)
.addImm(Ordering)
@@ -1037,8 +1071,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
"input operands do not cover output register");
if (SrcOps.size() == 1)
return buildCast(DstOps[0], SrcOps[0]);
- if (DstOps[0].getLLTTy(*getMRI()).isVector())
- return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps);
+ if (DstOps[0].getLLTTy(*getMRI()).isVector()) {
+ if (SrcOps[0].getLLTTy(*getMRI()).isVector())
+ return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps);
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps);
+ }
break;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 42be88fcf947..f0e35c65c53b 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -92,7 +92,7 @@ void RegBankSelect::init(MachineFunction &MF) {
MBPI = nullptr;
}
MIRBuilder.setMF(MF);
- MORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ MORE = std::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
}
void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -139,7 +139,7 @@ bool RegBankSelect::repairReg(
"need new vreg for each breakdown");
// An empty range of new register means no repairing.
- assert(!empty(NewVRegs) && "We should not have to repair");
+ assert(!NewVRegs.empty() && "We should not have to repair");
MachineInstr *MI;
if (ValMapping.NumBreakDowns == 1) {
@@ -154,7 +154,7 @@ bool RegBankSelect::repairReg(
std::swap(Src, Dst);
assert((RepairPt.getNumInsertPoints() == 1 ||
- TargetRegisterInfo::isPhysicalRegister(Dst)) &&
+ Register::isPhysicalRegister(Dst)) &&
"We are about to create several defs for Dst");
// Build the instruction used to repair, then clone it at the right
@@ -398,7 +398,7 @@ void RegBankSelect::tryAvoidingSplit(
// Check if this is a physical or virtual register.
Register Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
// We are going to split every outgoing edges.
// Check that this is possible.
// FIXME: The machine representation is currently broken
@@ -687,8 +687,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
// iterator before hand.
MachineInstr &MI = *MII++;
- // Ignore target-specific instructions: they should use proper regclasses.
- if (isTargetSpecificOpcode(MI.getOpcode()))
+ // Ignore target-specific post-isel instructions: they should use proper
+ // regclasses.
+ if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode())
continue;
if (!assignInstr(MI)) {
diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 4e41f338934d..fc9c802693ab 100644
--- a/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "registerbank"
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 159422e38878..3fcc55286beb 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -82,7 +82,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
const RegisterBank *
RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI));
assert(Reg && "NoRegister does not have a register bank");
@@ -97,8 +97,7 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterClass &
RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
const TargetRegisterInfo &TRI) const {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
- "Reg must be a physreg");
+ assert(Register::isPhysicalRegister(Reg) && "Reg must be a physreg");
const auto &RegRCIt = PhysRegMinimalRCs.find(Reg);
if (RegRCIt != PhysRegMinimalRCs.end())
return *RegRCIt->second;
@@ -284,7 +283,7 @@ RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length,
++NumPartialMappingsCreated;
auto &PartMapping = MapOfPartialMappings[Hash];
- PartMapping = llvm::make_unique<PartialMapping>(StartIdx, Length, RegBank);
+ PartMapping = std::make_unique<PartialMapping>(StartIdx, Length, RegBank);
return *PartMapping;
}
@@ -318,7 +317,7 @@ RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown,
++NumValueMappingsCreated;
auto &ValMapping = MapOfValueMappings[Hash];
- ValMapping = llvm::make_unique<ValueMapping>(BreakDown, NumBreakDowns);
+ ValMapping = std::make_unique<ValueMapping>(BreakDown, NumBreakDowns);
return *ValMapping;
}
@@ -342,7 +341,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
// mapping, because we use the pointer of the ValueMapping
// to hash and we expect them to uniquely identify an instance
// of value mapping.
- Res = llvm::make_unique<ValueMapping[]>(std::distance(Begin, End));
+ Res = std::make_unique<ValueMapping[]>(std::distance(Begin, End));
unsigned Idx = 0;
for (Iterator It = Begin; It != End; ++It, ++Idx) {
const ValueMapping *ValMap = *It;
@@ -392,7 +391,7 @@ RegisterBankInfo::getInstructionMappingImpl(
++NumInstructionMappingsCreated;
auto &InstrMapping = MapOfInstructionMappings[Hash];
- InstrMapping = llvm::make_unique<InstructionMapping>(
+ InstrMapping = std::make_unique<InstructionMapping>(
ID, Cost, OperandsMapping, NumOperands);
return *InstrMapping;
}
@@ -456,7 +455,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
"This mapping is too complex for this function");
iterator_range<SmallVectorImpl<Register>::const_iterator> NewRegs =
OpdMapper.getVRegs(OpIdx);
- if (empty(NewRegs)) {
+ if (NewRegs.empty()) {
LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
continue;
}
@@ -489,7 +488,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
unsigned RegisterBankInfo::getSizeInBits(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
// The size is not directly available for physical registers.
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 766ea1d60bac..45618d7992ad 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -43,10 +43,9 @@ unsigned llvm::constrainOperandRegClass(
const RegisterBankInfo &RBI, MachineInstr &InsertPt,
const TargetRegisterClass &RegClass, const MachineOperand &RegMO,
unsigned OpIdx) {
- unsigned Reg = RegMO.getReg();
+ Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
- "PhysReg not implemented");
+ assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass);
// If we created a new virtual register because the class is not compatible
@@ -73,10 +72,9 @@ unsigned llvm::constrainOperandRegClass(
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
const MachineOperand &RegMO, unsigned OpIdx) {
- unsigned Reg = RegMO.getReg();
+ Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
- "PhysReg not implemented");
+ assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
// Some of the target independent instructions, like COPY, may not impose any
@@ -130,9 +128,9 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n');
assert(MO.isReg() && "Unsupported non-reg operand");
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// Physical registers don't need to be constrained.
- if (TRI.isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
// Register operands with a value of 0 (e.g. predicate operands) don't need
@@ -170,9 +168,8 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !MRI.use_nodbg_empty(Reg))
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg) || !MRI.use_nodbg_empty(Reg))
return false;
}
return true;
@@ -219,11 +216,33 @@ Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
}
Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
- unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
+ bool HandleFConstant) {
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
MachineInstr *MI;
- while ((MI = MRI.getVRegDef(VReg)) &&
- MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) {
+ auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
+ return Opcode == TargetOpcode::G_CONSTANT ||
+ (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT);
+ };
+ auto GetImmediateValue = [HandleFConstant,
+ &MRI](const MachineInstr &MI) -> Optional<APInt> {
+ const MachineOperand &CstVal = MI.getOperand(1);
+ if (!CstVal.isImm() && !CstVal.isCImm() &&
+ (!HandleFConstant || !CstVal.isFPImm()))
+ return None;
+ if (!CstVal.isFPImm()) {
+ unsigned BitWidth =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm())
+ : CstVal.getCImm()->getValue();
+ assert(Val.getBitWidth() == BitWidth &&
+ "Value bitwidth doesn't match definition type");
+ return Val;
+ }
+ return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+ };
+ while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
+ LookThroughInstrs) {
switch (MI->getOpcode()) {
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_SEXT:
@@ -235,7 +254,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
break;
case TargetOpcode::COPY:
VReg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(VReg))
+ if (Register::isPhysicalRegister(VReg))
return None;
break;
case TargetOpcode::G_INTTOPTR:
@@ -245,16 +264,13 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
return None;
}
}
- if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT ||
- (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm()))
+ if (!MI || !IsConstantOpcode(MI->getOpcode()))
return None;
- const MachineOperand &CstVal = MI->getOperand(1);
- unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits();
- APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm())
- : CstVal.getCImm()->getValue();
- assert(Val.getBitWidth() == BitWidth &&
- "Value bitwidth doesn't match definition type");
+ Optional<APInt> MaybeVal = GetImmediateValue(*MI);
+ if (!MaybeVal)
+ return None;
+ APInt &Val = *MaybeVal;
while (!SeenOpcodes.empty()) {
std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val();
switch (OpcodeAndSize.first) {
@@ -291,7 +307,7 @@ llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
if (!DstTy.isValid())
return nullptr;
while (DefMI->getOpcode() == TargetOpcode::COPY) {
- unsigned SrcReg = DefMI->getOperand(1).getReg();
+ Register SrcReg = DefMI->getOperand(1).getReg();
auto SrcTy = MRI.getType(SrcReg);
if (!SrcTy.isValid() || SrcTy != DstTy)
break;
@@ -395,6 +411,40 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
return false;
}
+Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1,
+ uint64_t Imm,
+ const MachineRegisterInfo &MRI) {
+ auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ if (MaybeOp1Cst) {
+ LLT Ty = MRI.getType(Op1);
+ APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
+ switch (Opcode) {
+ default:
+ break;
+ case TargetOpcode::G_SEXT_INREG:
+ return C1.trunc(Imm).sext(C1.getBitWidth());
+ }
+ }
+ return None;
+}
+
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
+
+MVT llvm::getMVTForLLT(LLT Ty) {
+ if (!Ty.isVector())
+ return MVT::getIntegerVT(Ty.getSizeInBits());
+
+ return MVT::getVectorVT(
+ MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
+ Ty.getNumElements());
+}
+
+LLT llvm::getLLTForMVT(MVT Ty) {
+ if (!Ty.isVector())
+ return LLT::scalar(Ty.getSizeInBits());
+
+ return LLT::vector(Ty.getVectorNumElements(),
+ Ty.getVectorElementType().getSizeInBits());
+}
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 09201c2e7bae..d4fa45fcb405 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -456,14 +456,14 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
bool HasExternal = false;
StringRef FirstExternalName;
- unsigned MaxAlign = 1;
+ Align MaxAlign;
unsigned CurIdx = 0;
for (j = i; j != -1; j = GlobalSet.find_next(j)) {
Type *Ty = Globals[j]->getValueType();
// Make sure we use the same alignment AsmPrinter would use.
- unsigned Align = DL.getPreferredAlignment(Globals[j]);
- unsigned Padding = alignTo(MergedSize, Align) - MergedSize;
+ Align Alignment(DL.getPreferredAlignment(Globals[j]));
+ unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
MergedSize += Padding;
MergedSize += DL.getTypeAllocSize(Ty);
if (MergedSize > MaxOffset) {
@@ -478,7 +478,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
Inits.push_back(Globals[j]->getInitializer());
StructIdxs.push_back(CurIdx++);
- MaxAlign = std::max(MaxAlign, Align);
+ MaxAlign = std::max(MaxAlign, Alignment);
if (Globals[j]->hasExternalLinkage() && !HasExternal) {
HasExternal = true;
diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp
index 5f57cabbe865..6a0f98d2e2b4 100644
--- a/lib/CodeGen/HardwareLoops.cpp
+++ b/lib/CodeGen/HardwareLoops.cpp
@@ -183,7 +183,7 @@ bool HardwareLoops::runOnFunction(Function &F) {
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DL = &F.getParent()->getDataLayout();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
+ LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
M = F.getParent();
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index b17a253fe23f..d9caa5660695 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -285,14 +285,113 @@ namespace {
Prediction);
}
- bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
- unsigned TCycle, unsigned TExtra,
- MachineBasicBlock &FBB,
- unsigned FCycle, unsigned FExtra,
- BranchProbability Prediction) const {
- return TCycle > 0 && FCycle > 0 &&
- TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
- Prediction);
+ bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo,
+ MachineBasicBlock &CommBB, unsigned Dups,
+ BranchProbability Prediction, bool Forked) const {
+ const MachineFunction &MF = *TBBInfo.BB->getParent();
+ if (MF.getFunction().hasMinSize()) {
+ MachineBasicBlock::iterator TIB = TBBInfo.BB->begin();
+ MachineBasicBlock::iterator FIB = FBBInfo.BB->begin();
+ MachineBasicBlock::iterator TIE = TBBInfo.BB->end();
+ MachineBasicBlock::iterator FIE = FBBInfo.BB->end();
+
+ unsigned Dups1, Dups2;
+ if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+ *TBBInfo.BB, *FBBInfo.BB,
+ /*SkipUnconditionalBranches*/ true))
+ llvm_unreachable("should already have been checked by ValidDiamond");
+
+ unsigned BranchBytes = 0;
+ unsigned CommonBytes = 0;
+
+ // Count common instructions at the start of the true and false blocks.
+ for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+
+ // Count instructions at the end of the true and false blocks, after
+ // the ones we plan to predicate. Analyzable branches will be removed
+ // (unless this is a forked diamond), and all other instructions are
+ // common between the two blocks.
+ for (auto &I : make_range(TIE, TBBInfo.BB->end())) {
+ if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ } else {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ }
+ for (auto &I : make_range(FIE, FBBInfo.BB->end())) {
+ if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ } else {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ }
+ for (auto &I : CommBB.terminators()) {
+ if (I.isBranch()) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ }
+ }
+
+ // The common instructions in one branch will be eliminated, halving
+ // their code size.
+ CommonBytes /= 2;
+
+ // Count the instructions which we need to predicate.
+ unsigned NumPredicatedInstructions = 0;
+ for (auto &I : make_range(TIB, TIE)) {
+ if (!I.isDebugInstr()) {
+ LLVM_DEBUG(dbgs() << "Predicating: " << I);
+ NumPredicatedInstructions++;
+ }
+ }
+ for (auto &I : make_range(FIB, FIE)) {
+ if (!I.isDebugInstr()) {
+ LLVM_DEBUG(dbgs() << "Predicating: " << I);
+ NumPredicatedInstructions++;
+ }
+ }
+
+ // Even though we're optimising for size at the expense of performance,
+ // avoid creating really long predicated blocks.
+ if (NumPredicatedInstructions > 15)
+ return false;
+
+ // Some targets (e.g. Thumb2) need to insert extra instructions to
+ // start predicated blocks.
+ unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions(
+ MF, NumPredicatedInstructions);
+
+ LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes
+ << ", CommonBytes=" << CommonBytes
+ << ", NumPredicatedInstructions="
+ << NumPredicatedInstructions
+ << ", ExtraPredicateBytes=" << ExtraPredicateBytes
+ << ")\n");
+ return (BranchBytes + CommonBytes) > ExtraPredicateBytes;
+ } else {
+ unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups;
+ unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups;
+ bool Res = TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(
+ *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB,
+ FCycle, FBBInfo.ExtraCost2, Prediction);
+ LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle
+ << ", FCycle=" << FCycle
+ << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra="
+ << FBBInfo.ExtraCost2 << ") = " << Res << "\n");
+ return Res;
+ }
}
/// Returns true if Block ends without a terminator.
@@ -356,8 +455,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
BranchFolder BF(true, false, MBFI, *MBPI);
- BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(),
- getAnalysisIfAvailable<MachineModuleInfo>());
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ BFChange = BF.OptimizeFunction(
+ MF, TII, ST.getRegisterInfo(),
+ MMIWP ? &MMIWP->getMMI() : nullptr);
}
LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
@@ -496,8 +597,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (MadeChange && IfCvtBranchFold) {
BranchFolder BF(false, false, MBFI, *MBPI);
- BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
- getAnalysisIfAvailable<MachineModuleInfo>());
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ BF.OptimizeFunction(
+ MF, TII, MF.getSubtarget().getRegisterInfo(),
+ MMIWP ? &MMIWP->getMMI() : nullptr);
}
MadeChange |= BFChange;
@@ -569,6 +672,9 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,
BranchProbability Prediction) const {
Dups = 0;
+ if (TrueBBI.BB == FalseBBI.BB)
+ return false;
+
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
@@ -835,6 +941,8 @@ bool IfConverter::ValidForkedDiamond(
TrueBBICalc.BB = TrueBBI.BB;
FalseBBICalc.BB = FalseBBI.BB;
+ TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
+ FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
return false;
@@ -892,6 +1000,8 @@ bool IfConverter::ValidDiamond(
TrueBBICalc.BB = TrueBBI.BB;
FalseBBICalc.BB = FalseBBI.BB;
+ TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
+ FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
return false;
// The size is used to decide whether to if-convert, and the shared portions
@@ -912,6 +1022,12 @@ void IfConverter::AnalyzeBranches(BBInfo &BBI) {
BBI.BrCond.clear();
BBI.IsBrAnalyzable =
!TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ if (!BBI.IsBrAnalyzable) {
+ BBI.TrueBB = nullptr;
+ BBI.FalseBB = nullptr;
+ BBI.BrCond.clear();
+ }
+
SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
BBI.IsBrReversible = (RevCond.size() == 0) ||
!TII->reverseBranchCondition(RevCond);
@@ -1173,13 +1289,9 @@ void IfConverter::AnalyzeBlock(
if (CanRevCond) {
BBInfo TrueBBICalc, FalseBBICalc;
- auto feasibleDiamond = [&]() {
- bool MeetsSize = MeetIfcvtSizeLimit(
- *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
- TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
- *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
- FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
- Prediction);
+ auto feasibleDiamond = [&](bool Forked) {
+ bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB,
+ Dups + Dups2, Prediction, Forked);
bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
/* IsTriangle */ false, /* RevCond */ false,
/* hasCommonTail */ true);
@@ -1191,7 +1303,7 @@ void IfConverter::AnalyzeBlock(
if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
TrueBBICalc, FalseBBICalc)) {
- if (feasibleDiamond()) {
+ if (feasibleDiamond(false)) {
// Diamond:
// EBB
// / \_
@@ -1200,14 +1312,14 @@ void IfConverter::AnalyzeBlock(
// \ /
// TailBB
// Note TailBB can be empty.
- Tokens.push_back(llvm::make_unique<IfcvtToken>(
+ Tokens.push_back(std::make_unique<IfcvtToken>(
BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2,
(bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
Enqueued = true;
}
} else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
TrueBBICalc, FalseBBICalc)) {
- if (feasibleDiamond()) {
+ if (feasibleDiamond(true)) {
// ForkedDiamond:
// if TBB and FBB have a common tail that includes their conditional
// branch instructions, then we can If Convert this pattern.
@@ -1218,7 +1330,7 @@ void IfConverter::AnalyzeBlock(
// / \ / \
// FalseBB TrueBB FalseBB
//
- Tokens.push_back(llvm::make_unique<IfcvtToken>(
+ Tokens.push_back(std::make_unique<IfcvtToken>(
BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2,
(bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
Enqueued = true;
@@ -1238,7 +1350,7 @@ void IfConverter::AnalyzeBlock(
// | /
// FBB
Tokens.push_back(
- llvm::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups));
+ std::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups));
Enqueued = true;
}
@@ -1247,7 +1359,7 @@ void IfConverter::AnalyzeBlock(
TrueBBI.ExtraCost2, Prediction) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
Tokens.push_back(
- llvm::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups));
+ std::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups));
Enqueued = true;
}
@@ -1263,7 +1375,7 @@ void IfConverter::AnalyzeBlock(
// |
// FBB
Tokens.push_back(
- llvm::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups));
+ std::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups));
Enqueued = true;
}
@@ -1275,7 +1387,7 @@ void IfConverter::AnalyzeBlock(
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond, true)) {
- Tokens.push_back(llvm::make_unique<IfcvtToken>(BBI, ICTriangleFalse,
+ Tokens.push_back(std::make_unique<IfcvtToken>(BBI, ICTriangleFalse,
FNeedSub, Dups));
Enqueued = true;
}
@@ -1287,7 +1399,7 @@ void IfConverter::AnalyzeBlock(
FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
Tokens.push_back(
- llvm::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups));
+ std::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups));
Enqueued = true;
}
@@ -1297,7 +1409,7 @@ void IfConverter::AnalyzeBlock(
FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
Tokens.push_back(
- llvm::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups));
+ std::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups));
Enqueued = true;
}
}
@@ -1730,6 +1842,11 @@ bool IfConverter::IfConvertDiamondCommon(
++i;
}
while (NumDups1 != 0) {
+ // Since this instruction is going to be deleted, update call
+ // site info state if the instruction is call instruction.
+ if (DI2->isCall(MachineInstr::IgnoreBundle))
+ MBB2.getParent()->eraseCallSiteInfo(&*DI2);
+
++DI2;
if (DI2 == MBB2.end())
break;
@@ -1758,14 +1875,27 @@ bool IfConverter::IfConvertDiamondCommon(
if (!BBI1->IsBrAnalyzable)
verifySameBranchInstructions(&MBB1, &MBB2);
#endif
- BBI1->NonPredSize -= TII->removeBranch(*BBI1->BB);
- // Remove duplicated instructions.
+ // Remove duplicated instructions from the tail of MBB1: any branch
+ // instructions, and the common instructions counted by NumDups2.
DI1 = MBB1.end();
+ while (DI1 != MBB1.begin()) {
+ MachineBasicBlock::iterator Prev = std::prev(DI1);
+ if (!Prev->isBranch() && !Prev->isDebugInstr())
+ break;
+ DI1 = Prev;
+ }
for (unsigned i = 0; i != NumDups2; ) {
// NumDups2 only counted non-dbg_value instructions, so this won't
// run off the head of the list.
assert(DI1 != MBB1.begin());
+
--DI1;
+
+ // Since this instruction is going to be deleted, update call
+ // site info state if the instruction is call instruction.
+ if (DI1->isCall(MachineInstr::IgnoreBundle))
+ MBB1.getParent()->eraseCallSiteInfo(&*DI1);
+
// skip dbg_value instructions
if (!DI1->isDebugInstr())
++i;
@@ -1815,7 +1945,7 @@ bool IfConverter::IfConvertDiamondCommon(
for (const MachineOperand &MO : FI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isDef()) {
@@ -1983,7 +2113,7 @@ static bool MaySpeculate(const MachineInstr &MI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isDef() && !LaterRedefs.count(Reg))
@@ -2050,6 +2180,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
break;
MachineInstr *MI = MF.CloneMachineInstr(&I);
+ // Make a copy of the call site info.
+ if (MI->isCall(MachineInstr::IgnoreBundle))
+ MF.copyCallSiteInfo(&I,MI);
+
ToBBI.BB->insert(ToBBI.BB->end(), MI);
ToBBI.NonPredSize++;
unsigned ExtraPredCost = TII->getPredicationCost(I);
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index 1e82ea659617..b7dcaec90106 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -278,12 +278,12 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A,
if (!(MOA.isReg() && MOA.getReg()))
continue;
- unsigned RegA = MOA.getReg();
+ Register RegA = MOA.getReg();
for (auto MOB : B->operands()) {
if (!(MOB.isReg() && MOB.getReg()))
continue;
- unsigned RegB = MOB.getReg();
+ Register RegB = MOB.getReg();
if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef()))
return false;
@@ -517,7 +517,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
//
// we must ensure that there are no instructions between the 'test' and
// conditional jump that modify %rax.
- const unsigned PointerReg = MBP.LHS.getReg();
+ const Register PointerReg = MBP.LHS.getReg();
assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block");
@@ -689,7 +689,7 @@ void ImplicitNullChecks::rewriteNullChecks(
for (const MachineOperand &MO : FaultingInstr->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg || MBB->isLiveIn(Reg))
continue;
MBB->addLiveIn(Reg);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 41ae8061a917..2408f18678e4 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "LiveRangeCalc.h"
#include "Spiller.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
@@ -26,6 +25,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -346,8 +346,7 @@ void InlineSpiller::collectRegsToSpill() {
}
bool InlineSpiller::isSibling(unsigned Reg) {
- return TargetRegisterInfo::isVirtualRegister(Reg) &&
- VRM.getOriginal(Reg) == Original;
+ return Register::isVirtualRegister(Reg) && VRM.getOriginal(Reg) == Original;
}
/// It is beneficial to spill to earlier place in the same BB in case
@@ -377,7 +376,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
#endif
- unsigned SrcReg = CopyMI.getOperand(1).getReg();
+ Register SrcReg = CopyMI.getOperand(1).getReg();
LiveInterval &SrcLI = LIS.getInterval(SrcReg);
VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx);
LiveQueryResult SrcQ = SrcLI.Query(Idx);
@@ -845,9 +844,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) {
if (!MO->isReg())
continue;
- unsigned Reg = MO->getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
- MRI.isReserved(Reg)) {
+ Register Reg = MO->getReg();
+ if (!Reg || Register::isVirtualRegister(Reg) || MRI.isReserved(Reg)) {
continue;
}
// Skip non-Defs, including undef uses and internal reads.
@@ -869,7 +867,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
--NumSpills;
LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
if (MI->isCall())
- MI->getMF()->updateCallSiteInfo(MI, FoldMI);
+ MI->getMF()->moveCallSiteInfo(MI, FoldMI);
MI->eraseFromParent();
// Insert any new instructions other than FoldMI into the LIS maps.
@@ -1111,8 +1109,8 @@ void InlineSpiller::spillAll() {
void InlineSpiller::spill(LiveRangeEdit &edit) {
++NumSpilledRanges;
Edit = &edit;
- assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
- && "Trying to spill a stack slot.");
+ assert(!Register::isStackSlot(edit.getReg()) &&
+ "Trying to spill a stack slot.");
// Share a stack slot among all descendants of Original.
Original = VRM.getOriginal(edit.getReg());
StackSlot = VRM.getStackSlot(Original);
@@ -1147,7 +1145,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
// save a copy of LiveInterval in StackSlotToOrigLI because the original
// LiveInterval may be cleared after all its references are spilled.
if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) {
- auto LI = llvm::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight);
+ auto LI = std::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight);
LI->assign(OrigLI, Allocator);
StackSlotToOrigLI[StackSlot] = std::move(LI);
}
@@ -1459,7 +1457,7 @@ void HoistSpillHelper::hoistAllSpills() {
LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this);
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
unsigned Original = VRM.getPreSplitReg(Reg);
if (!MRI.def_empty(Reg))
Virt2SiblingsMap[Original].insert(Reg);
diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 9525da849e2a..770c4952d169 100644
--- a/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -940,8 +940,8 @@ public:
/// \param V input value
/// \param Result result polynomial
static void computePolynomial(Value &V, Polynomial &Result) {
- if (isa<BinaryOperator>(&V))
- computePolynomialBinOp(*dyn_cast<BinaryOperator>(&V), Result);
+ if (auto *BO = dyn_cast<BinaryOperator>(&V))
+ computePolynomialBinOp(*BO, Result);
else
Result = Polynomial(&V);
}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 886ae7e94adb..1c362aec6e67 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -96,14 +96,15 @@ LLVMTargetMachine::getTargetTransformInfo(const Function &F) {
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
static TargetPassConfig *
addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM,
- bool DisableVerify, MachineModuleInfo &MMI) {
+ bool DisableVerify,
+ MachineModuleInfoWrapperPass &MMIWP) {
// Targets may override createPassConfig to provide a target-specific
// subclass.
TargetPassConfig *PassConfig = TM.createPassConfig(PM);
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
PM.add(PassConfig);
- PM.add(&MMI);
+ PM.add(&MMIWP);
if (PassConfig->addISelPasses())
return nullptr;
@@ -139,7 +140,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
std::unique_ptr<MCAsmBackend> MAB(
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
- auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
+ auto FOut = std::make_unique<formatted_raw_ostream>(Out);
MCStreamer *S = getTarget().createAsmStreamer(
Context, std::move(FOut), Options.MCOptions.AsmVerbose,
Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE),
@@ -186,17 +187,15 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
return false;
}
-bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
- raw_pwrite_stream &Out,
- raw_pwrite_stream *DwoOut,
- CodeGenFileType FileType,
- bool DisableVerify,
- MachineModuleInfo *MMI) {
+bool LLVMTargetMachine::addPassesToEmitFile(
+ PassManagerBase &PM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType, bool DisableVerify,
+ MachineModuleInfoWrapperPass *MMIWP) {
// Add common CodeGen passes.
- if (!MMI)
- MMI = new MachineModuleInfo(this);
+ if (!MMIWP)
+ MMIWP = new MachineModuleInfoWrapperPass(this);
TargetPassConfig *PassConfig =
- addPassesToGenerateCode(*this, PM, DisableVerify, *MMI);
+ addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP);
if (!PassConfig)
return true;
@@ -206,12 +205,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
// testing to be meaningful, we need to ensure that the symbols created
// are MCSymbolXCOFF variants, which requires that
// the TargetLoweringObjectFile instance has been initialized.
- MCContext &Ctx = MMI->getContext();
+ MCContext &Ctx = MMIWP->getMMI().getContext();
const_cast<TargetLoweringObjectFile &>(*this->getObjFileLowering())
.Initialize(Ctx, *this);
}
PM.add(createPrintMIRPass(Out));
- } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext()))
+ } else if (addAsmPrinter(PM, Out, DwoOut, FileType,
+ MMIWP->getMMI().getContext()))
return true;
PM.add(createFreeMachineFunctionPass());
@@ -227,15 +227,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
raw_pwrite_stream &Out,
bool DisableVerify) {
// Add common CodeGen passes.
- MachineModuleInfo *MMI = new MachineModuleInfo(this);
+ MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(this);
TargetPassConfig *PassConfig =
- addPassesToGenerateCode(*this, PM, DisableVerify, *MMI);
+ addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP);
if (!PassConfig)
return true;
assert(TargetPassConfig::willCompleteCodeGenPipeline() &&
"Cannot emit MC with limited codegen pipeline");
- Ctx = &MMI->getContext();
+ Ctx = &MMIWP->getMMI().getContext();
if (Options.MCOptions.MCSaveTempLabels)
Ctx->setAllowTemporaryLabels(false);
diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index 200ac0ba15bf..cef5085ae079 100644
--- a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -73,18 +73,18 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
if (!MDT) {
LLVM_DEBUG(dbgs() << "Building DominatorTree on the fly\n");
- OwnedMDT = make_unique<MachineDominatorTree>();
+ OwnedMDT = std::make_unique<MachineDominatorTree>();
OwnedMDT->getBase().recalculate(*MF);
MDT = OwnedMDT.get();
}
// Generate LoopInfo from it.
- OwnedMLI = make_unique<MachineLoopInfo>();
+ OwnedMLI = std::make_unique<MachineLoopInfo>();
OwnedMLI->getBase().analyze(MDT->getBase());
MLI = OwnedMLI.get();
}
- OwnedMBFI = make_unique<MachineBlockFrequencyInfo>();
+ OwnedMBFI = std::make_unique<MachineBlockFrequencyInfo>();
OwnedMBFI->calculate(*MF, MBPI, *MLI);
return *OwnedMBFI.get();
}
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 503821537ed9..ac3ef0e709f3 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index a669e64692b9..f1b237d83e8c 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -7,14 +7,23 @@
//===----------------------------------------------------------------------===//
///
/// This pass implements a data flow analysis that propagates debug location
-/// information by inserting additional DBG_VALUE instructions into the machine
-/// instruction stream. The pass internally builds debug location liveness
-/// ranges to determine the points where additional DBG_VALUEs need to be
-/// inserted.
+/// information by inserting additional DBG_VALUE insts into the machine
+/// instruction stream. Before running, each DBG_VALUE inst corresponds to a
+/// source assignment of a variable. Afterwards, a DBG_VALUE inst specifies a
+/// variable location for the current basic block (see SourceLevelDebugging.rst).
///
/// This is a separate pass from DbgValueHistoryCalculator to facilitate
/// testing and improve modularity.
///
+/// Each variable location is represented by a VarLoc object that identifies the
+/// source variable, its current machine-location, and the DBG_VALUE inst that
+/// specifies the location. Each VarLoc is indexed in the (function-scope)
+/// VarLocMap, giving each VarLoc a unique index. Rather than operate directly
+/// on machine locations, the dataflow analysis in this pass identifies
+/// locations by their index in the VarLocMap, meaning all the variable
+/// locations in a block can be described by a sparse vector of VarLocMap
+/// indexes.
+///
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
@@ -68,6 +77,7 @@ using namespace llvm;
#define DEBUG_TYPE "livedebugvalues"
STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
+STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed");
// If @MI is a DBG_VALUE with debug value described by a defined
// register, returns the number of this register. In the other case, returns 0.
@@ -179,8 +189,16 @@ private:
}
};
+ /// Identity of the variable at this location.
const DebugVariable Var;
- const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
+
+ /// The expression applied to this location.
+ const DIExpression *Expr;
+
+ /// DBG_VALUE to clone var/expr information from if this location
+ /// is moved.
+ const MachineInstr &MI;
+
mutable UserValueScopes UVS;
enum VarLocKind {
InvalidKind = 0,
@@ -201,9 +219,9 @@ private:
const ConstantInt *CImm;
} Loc;
- VarLoc(const MachineInstr &MI, LexicalScopes &LS,
- VarLocKind K = InvalidKind)
- : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS){
+ VarLoc(const MachineInstr &MI, LexicalScopes &LS)
+ : Var(MI), Expr(MI.getDebugExpression()), MI(MI),
+ UVS(MI.getDebugLoc(), LS) {
static_assert((sizeof(Loc) == sizeof(uint64_t)),
"hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
@@ -225,17 +243,78 @@ private:
"entry values must be register locations");
}
- /// The constructor for spill locations.
- VarLoc(const MachineInstr &MI, unsigned SpillBase, int SpillOffset,
- LexicalScopes &LS)
- : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS) {
- assert(MI.isDebugValue() && "not a DBG_VALUE");
- assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
- Kind = SpillLocKind;
- Loc.SpillLocation = {SpillBase, SpillOffset};
+ /// Take the variable and machine-location in DBG_VALUE MI, and build an
+ /// entry location using the given expression.
+ static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS,
+ const DIExpression *EntryExpr) {
+ VarLoc VL(MI, LS);
+ VL.Kind = EntryValueKind;
+ VL.Expr = EntryExpr;
+ return VL;
+ }
+
+ /// Copy the register location in DBG_VALUE MI, updating the register to
+ /// be NewReg.
+ static VarLoc CreateCopyLoc(const MachineInstr &MI, LexicalScopes &LS,
+ unsigned NewReg) {
+ VarLoc VL(MI, LS);
+ assert(VL.Kind == RegisterKind);
+ VL.Loc.RegNo = NewReg;
+ return VL;
+ }
+
+ /// Take the variable described by DBG_VALUE MI, and create a VarLoc
+ /// locating it in the specified spill location.
+ static VarLoc CreateSpillLoc(const MachineInstr &MI, unsigned SpillBase,
+ int SpillOffset, LexicalScopes &LS) {
+ VarLoc VL(MI, LS);
+ assert(VL.Kind == RegisterKind);
+ VL.Kind = SpillLocKind;
+ VL.Loc.SpillLocation = {SpillBase, SpillOffset};
+ return VL;
}
- // Is the Loc field a constant or constant object?
+ /// Create a DBG_VALUE representing this VarLoc in the given function.
+ /// Copies variable-specific information such as DILocalVariable and
+ /// inlining information from the original DBG_VALUE instruction, which may
+ /// have been several transfers ago.
+ MachineInstr *BuildDbgValue(MachineFunction &MF) const {
+ const DebugLoc &DbgLoc = MI.getDebugLoc();
+ bool Indirect = MI.isIndirectDebugValue();
+ const auto &IID = MI.getDesc();
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *DIExpr = MI.getDebugExpression();
+
+ switch (Kind) {
+ case EntryValueKind:
+ // An entry value is a register location -- but with an updated
+ // expression.
+ return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, Expr);
+ case RegisterKind:
+ // Register locations are like the source DBG_VALUE, but with the
+ // register number from this VarLoc.
+ return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, DIExpr);
+ case SpillLocKind: {
+ // Spills are indirect DBG_VALUEs, with a base register and offset.
+ // Use the original DBG_VALUEs expression to build the spilt location
+ // on top of. FIXME: spill locations created before this pass runs
+ // are not recognized, and not handled here.
+ auto *SpillExpr = DIExpression::prepend(
+ DIExpr, DIExpression::ApplyOffset, Loc.SpillLocation.SpillOffset);
+ unsigned Base = Loc.SpillLocation.SpillBase;
+ return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr);
+ }
+ case ImmediateKind: {
+ MachineOperand MO = MI.getOperand(0);
+ return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr);
+ }
+ case InvalidKind:
+ llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc");
+ }
+ llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum");
+ }
+
+ /// Is the Loc field a constant or constant object?
bool isConstant() const { return Kind == ImmediateKind; }
/// If this variable is described by a register, return it,
@@ -251,18 +330,42 @@ private:
bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const { MI.dump(); }
+ // TRI can be null.
+ void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const {
+ dbgs() << "VarLoc(";
+ switch (Kind) {
+ case RegisterKind:
+ case EntryValueKind:
+ dbgs() << printReg(Loc.RegNo, TRI);
+ break;
+ case SpillLocKind:
+ dbgs() << printReg(Loc.SpillLocation.SpillBase, TRI);
+ dbgs() << "[" << Loc.SpillLocation.SpillOffset << "]";
+ break;
+ case ImmediateKind:
+ dbgs() << Loc.Immediate;
+ break;
+ case InvalidKind:
+ llvm_unreachable("Invalid VarLoc in dump method");
+ }
+
+ dbgs() << ", \"" << Var.getVar()->getName() << "\", " << *Expr << ", ";
+ if (Var.getInlinedAt())
+ dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n";
+ else
+ dbgs() << "(null))\n";
+ }
#endif
bool operator==(const VarLoc &Other) const {
return Kind == Other.Kind && Var == Other.Var &&
- Loc.Hash == Other.Loc.Hash;
+ Loc.Hash == Other.Loc.Hash && Expr == Other.Expr;
}
/// This operator guarantees that VarLocs are sorted by Variable first.
bool operator<(const VarLoc &Other) const {
- return std::tie(Var, Kind, Loc.Hash) <
- std::tie(Other.Var, Other.Kind, Other.Loc.Hash);
+ return std::tie(Var, Kind, Loc.Hash, Expr) <
+ std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr);
}
};
@@ -271,8 +374,8 @@ private:
using VarLocSet = SparseBitVector<>;
using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
struct TransferDebugPair {
- MachineInstr *TransferInst;
- MachineInstr *DebugInst;
+ MachineInstr *TransferInst; /// Instruction where this transfer occurs.
+ unsigned LocationID; /// Location number for the transfer dest.
};
using TransferMap = SmallVector<TransferDebugPair, 4>;
@@ -320,6 +423,14 @@ private:
Vars.insert({Var, VarLocID});
}
+ /// Insert a set of ranges.
+ void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) {
+ for (unsigned Id : ToLoad) {
+ const VarLoc &Var = Map[Id];
+ insert(Id, Var.Var);
+ }
+ }
+
/// Empty the set.
void clear() {
VarLocs.clear();
@@ -333,8 +444,18 @@ private:
}
};
- bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF,
- unsigned &Reg);
+ /// Tests whether this instruction is a spill to a stack location.
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
+
+ /// Decide if @MI is a spill instruction and return true if it is. We use 2
+ /// criteria to make this decision:
+ /// - Is this instruction a store to a spill slot?
+ /// - Is there a register operand that is both used and killed?
+ /// TODO: Store optimization can fold spills into other stores (including
+ /// other spills). We do not handle this yet (more than one memory operand).
+ bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+
/// If a given instruction is identified as a spill, return the spill location
/// and set \p Reg to the spilled register.
Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
@@ -361,13 +482,13 @@ private:
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers,
DebugParamMap &DebugEntryVals);
- bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
+ bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
- bool process(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ void process(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
TransferMap &Transfers, DebugParamMap &DebugEntryVals,
- bool transferChanges, OverlapMap &OverlapFragments,
+ OverlapMap &OverlapFragments,
VarToFragments &SeenFragments);
void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
@@ -376,7 +497,12 @@ private:
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks);
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks,
+ VarLocInMBB &PendingInLocs);
+
+ /// Create DBG_VALUE insts for inlocs that have been propagated but
+ /// had their instruction creation deferred.
+ void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs);
bool ExtendRanges(MachineFunction &MF);
@@ -518,7 +644,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
const VarLoc &VL = VarLocIDs[VLL];
Out << " Var: " << VL.Var.getVar()->getName();
Out << " MI: ";
- VL.dump();
+ VL.dump(TRI, Out);
}
}
Out << "\n";
@@ -567,11 +693,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
ID = VarLocIDs.insert(VL);
OpenRanges.insert(ID, VL.Var);
} else if (MI.hasOneMemOperand()) {
- // It's a stack spill -- fetch spill base and offset.
- VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
- VarLoc VL(MI, SpillLocation.SpillBase, SpillLocation.SpillOffset, LS);
- ID = VarLocIDs.insert(VL);
- OpenRanges.insert(ID, VL.Var);
+ llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?");
} else {
// This must be an undefined location. We should leave OpenRanges closed.
assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 &&
@@ -585,7 +707,6 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI,
TransferMap &Transfers,
DebugParamMap &DebugEntryVals,
SparseBitVector<> &KillSet) {
- MachineFunction *MF = MI.getParent()->getParent();
for (unsigned ID : KillSet) {
if (!VarLocIDs[ID].Var.getVar()->isParameter())
continue;
@@ -600,20 +721,12 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI,
auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()];
DIExpression *NewExpr = DIExpression::prepend(
ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue);
- MachineInstr *EntryValDbgMI =
- BuildMI(*MF, ParamDebugInstr->getDebugLoc(), ParamDebugInstr->getDesc(),
- ParamDebugInstr->isIndirectDebugValue(),
- ParamDebugInstr->getOperand(0).getReg(),
- ParamDebugInstr->getDebugVariable(), NewExpr);
-
- if (ParamDebugInstr->isIndirectDebugValue())
- EntryValDbgMI->getOperand(1).setImm(
- ParamDebugInstr->getOperand(1).getImm());
-
- Transfers.push_back({&MI, EntryValDbgMI});
- VarLoc VL(*EntryValDbgMI, LS);
- unsigned EntryValLocID = VarLocIDs.insert(VL);
- OpenRanges.insert(EntryValLocID, VL.Var);
+
+ VarLoc EntryLoc = VarLoc::CreateEntryLoc(*ParamDebugInstr, LS, NewExpr);
+
+ unsigned EntryValLocID = VarLocIDs.insert(EntryLoc);
+ Transfers.push_back({&MI, EntryValLocID});
+ OpenRanges.insert(EntryValLocID, EntryLoc.Var);
}
}
@@ -627,21 +740,19 @@ void LiveDebugValues::insertTransferDebugPair(
VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind,
unsigned NewReg) {
const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
- MachineFunction *MF = MI.getParent()->getParent();
- MachineInstr *NewDebugInstr;
auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr,
- &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) {
+ &VarLocIDs](VarLoc &VL) {
unsigned LocId = VarLocIDs.insert(VL);
// Close this variable's previous location range.
DebugVariable V(*DebugInstr);
OpenRanges.erase(V);
+ // Record the new location as an open range, and a postponed transfer
+ // inserting a DBG_VALUE for this location.
OpenRanges.insert(LocId, VL.Var);
- // The newly created DBG_VALUE instruction NewDebugInstr must be inserted
- // after MI. Keep track of the pairing.
- TransferDebugPair MIP = {&MI, NewDebugInstr};
+ TransferDebugPair MIP = {&MI, LocId};
Transfers.push_back(MIP);
};
@@ -653,37 +764,25 @@ void LiveDebugValues::insertTransferDebugPair(
"No register supplied when handling a copy of a debug value");
// Create a DBG_VALUE instruction to describe the Var in its new
// register location.
- NewDebugInstr = BuildMI(
- *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(),
- DebugInstr->isIndirectDebugValue(), NewReg,
- DebugInstr->getDebugVariable(), DebugInstr->getDebugExpression());
- if (DebugInstr->isIndirectDebugValue())
- NewDebugInstr->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
- VarLoc VL(*NewDebugInstr, LS);
- ProcessVarLoc(VL, NewDebugInstr);
- LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: ";
- NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
- /*SkipOpers*/false, /*SkipDebugLoc*/false,
- /*AddNewLine*/true, TII));
+ VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg);
+ ProcessVarLoc(VL);
+ LLVM_DEBUG({
+ dbgs() << "Creating VarLoc for register copy:";
+ VL.dump(TRI);
+ });
return;
}
case TransferKind::TransferSpill: {
// Create a DBG_VALUE instruction to describe the Var in its spilled
// location.
VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
- auto *SpillExpr = DIExpression::prepend(DebugInstr->getDebugExpression(),
- DIExpression::ApplyOffset,
- SpillLocation.SpillOffset);
- NewDebugInstr = BuildMI(
- *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), true,
- SpillLocation.SpillBase, DebugInstr->getDebugVariable(), SpillExpr);
- VarLoc VL(*NewDebugInstr, SpillLocation.SpillBase,
- SpillLocation.SpillOffset, LS);
- ProcessVarLoc(VL, NewDebugInstr);
- LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
- NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
- /*SkipOpers*/false, /*SkipDebugLoc*/false,
- /*AddNewLine*/true, TII));
+ VarLoc VL = VarLoc::CreateSpillLoc(*DebugInstr, SpillLocation.SpillBase,
+ SpillLocation.SpillOffset, LS);
+ ProcessVarLoc(VL);
+ LLVM_DEBUG({
+ dbgs() << "Creating VarLoc for spill:";
+ VL.dump(TRI);
+ });
return;
}
case TransferKind::TransferRestore: {
@@ -691,15 +790,14 @@ void LiveDebugValues::insertTransferDebugPair(
"No register supplied when handling a restore of a debug value");
MachineFunction *MF = MI.getMF();
DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
- NewDebugInstr =
- BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false,
- NewReg, DebugInstr->getDebugVariable(), DIB.createExpression());
- VarLoc VL(*NewDebugInstr, LS);
- ProcessVarLoc(VL, NewDebugInstr);
- LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
- NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
- /*SkipOpers*/false, /*SkipDebugLoc*/false,
- /*AddNewLine*/true, TII));
+ // DebugInstr refers to the pre-spill location, therefore we can reuse
+ // its expression.
+ VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg);
+ ProcessVarLoc(VL);
+ LLVM_DEBUG({
+ dbgs() << "Creating VarLoc for restore:";
+ VL.dump(TRI);
+ });
return;
}
}
@@ -719,7 +817,7 @@ void LiveDebugValues::transferRegisterDef(
// instructions never clobber SP, because some backends (e.g., AArch64)
// never list SP in the regmask.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
- TRI->isPhysicalRegister(MO.getReg()) &&
+ Register::isPhysicalRegister(MO.getReg()) &&
!(MI.isCall() && MO.getReg() == SP)) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
@@ -748,16 +846,8 @@ void LiveDebugValues::transferRegisterDef(
}
}
-/// Decide if @MI is a spill instruction and return true if it is. We use 2
-/// criteria to make this decision:
-/// - Is this instruction a store to a spill slot?
-/// - Is there a register operand that is both used and killed?
-/// TODO: Store optimization can fold spills into other stores (including
-/// other spills). We do not handle this yet (more than one memory operand).
bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
- MachineFunction *MF, unsigned &Reg) {
- SmallVector<const MachineMemOperand*, 1> Accesses;
-
+ MachineFunction *MF) {
// TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
return false;
@@ -766,6 +856,14 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
return false; // This is not a spill instruction, since no valid size was
// returned from either function.
+ return true;
+}
+
+bool LiveDebugValues::isLocationSpill(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ if (!isSpillInstruction(MI, MF))
+ return false;
+
auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
if (!MO.isReg() || !MO.isUse()) {
Reg = 0;
@@ -834,7 +932,37 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
- if (isSpillInstruction(MI, MF, Reg)) {
+ // First, if there are any DBG_VALUEs pointing at a spill slot that is
+ // written to, then close the variable location. The value in memory
+ // will have changed.
+ VarLocSet KillSet;
+ if (isSpillInstruction(MI, MF)) {
+ Loc = extractSpillBaseRegAndOffset(MI);
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ const VarLoc &VL = VarLocIDs[ID];
+ if (VL.Kind == VarLoc::SpillLocKind && VL.Loc.SpillLocation == *Loc) {
+ // This location is overwritten by the current instruction -- terminate
+ // the open range, and insert an explicit DBG_VALUE $noreg.
+ //
+ // Doing this at a later stage would require re-interpreting all
+ // DBG_VALUes and DIExpressions to identify whether they point at
+ // memory, and then analysing all memory writes to see if they
+ // overwrite that memory, which is expensive.
+ //
+ // At this stage, we already know which DBG_VALUEs are for spills and
+ // where they are located; it's best to fix handle overwrites now.
+ KillSet.set(ID);
+ VarLoc UndefVL = VarLoc::CreateCopyLoc(VL.MI, LS, 0);
+ unsigned UndefLocID = VarLocIDs.insert(UndefVL);
+ Transfers.push_back({&MI, UndefLocID});
+ }
+ }
+ OpenRanges.erase(KillSet, VarLocIDs);
+ }
+
+ // Try to recognise spill and restore instructions that may create a new
+ // variable location.
+ if (isLocationSpill(MI, MF, Reg)) {
TKind = TransferKind::TransferSpill;
LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump(););
LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI)
@@ -854,6 +982,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
} else if (TKind == TransferKind::TransferRestore &&
+ VarLocIDs[ID].Kind == VarLoc::SpillLocKind &&
VarLocIDs[ID].Loc.SpillLocation == *Loc) {
LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
@@ -885,8 +1014,8 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
return false;
};
- unsigned SrcReg = SrcRegOp->getReg();
- unsigned DestReg = DestRegOp->getReg();
+ Register SrcReg = SrcRegOp->getReg();
+ Register DestReg = DestRegOp->getReg();
// We want to recognize instructions where destination register is callee
// saved register. If register that could be clobbered by the call is
@@ -906,26 +1035,20 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
}
/// Terminate all open ranges at the end of the current basic block.
-bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs,
- const VarLocMap &VarLocIDs) {
+bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB,
+ OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs,
+ const VarLocMap &VarLocIDs) {
bool Changed = false;
- const MachineBasicBlock *CurMBB = MI.getParent();
- if (!(MI.isTerminator() || (&MI == &CurMBB->back())))
- return false;
-
- if (OpenRanges.empty())
- return false;
LLVM_DEBUG(for (unsigned ID
: OpenRanges.getVarLocs()) {
// Copy OpenRanges to OutLocs, if not already present.
dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": ";
- VarLocIDs[ID].dump();
+ VarLocIDs[ID].dump(TRI);
});
VarLocSet &VLS = OutLocs[CurMBB];
- Changed = VLS |= OpenRanges.getVarLocs();
+ Changed = VLS != OpenRanges.getVarLocs();
// New OutLocs set may be different due to spill, restore or register
// copy instruction processing.
if (Changed)
@@ -995,26 +1118,17 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
}
/// This routine creates OpenRanges and OutLocs.
-bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
+void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- TransferMap &Transfers, DebugParamMap &DebugEntryVals,
- bool transferChanges,
+ TransferMap &Transfers,
+ DebugParamMap &DebugEntryVals,
OverlapMap &OverlapFragments,
VarToFragments &SeenFragments) {
- bool Changed = false;
transferDebugValue(MI, OpenRanges, VarLocIDs);
transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers,
DebugEntryVals);
- if (transferChanges) {
- transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
- transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
- } else {
- // Build up a map of overlapping fragments on the first run through.
- if (MI.isDebugValue())
- accumulateFragmentMap(MI, SeenFragments, OverlapFragments);
- }
- Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
- return Changed;
+ transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
+ transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
}
/// This routine joins the analysis results of all incoming edges in @MBB by
@@ -1024,7 +1138,8 @@ bool LiveDebugValues::join(
MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks) {
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks,
+ VarLocInMBB &PendingInLocs) {
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
@@ -1034,9 +1149,11 @@ bool LiveDebugValues::join(
// can be joined.
int NumVisited = 0;
for (auto p : MBB.predecessors()) {
- // Ignore unvisited predecessor blocks. As we are processing
- // the blocks in reverse post-order any unvisited block can
- // be considered to not remove any incoming values.
+ // Ignore backedges if we have not visited the predecessor yet. As the
+ // predecessor hasn't yet had locations propagated into it, most locations
+ // will not yet be valid, so treat them as all being uninitialized and
+ // potentially valid. If a location guessed to be correct here is
+ // invalidated later, we will remove it when we revisit this block.
if (!Visited.count(p)) {
LLVM_DEBUG(dbgs() << " ignoring unvisited pred MBB: " << p->getNumber()
<< "\n");
@@ -1086,44 +1203,59 @@ bool LiveDebugValues::join(
// is the entry block which has no predecessor.
assert((NumVisited || MBB.pred_empty()) &&
"Should have processed at least one predecessor");
- if (InLocsT.empty())
- return false;
VarLocSet &ILS = InLocs[&MBB];
+ VarLocSet &Pending = PendingInLocs[&MBB];
- // Insert DBG_VALUE instructions, if not already inserted.
+ // New locations will have DBG_VALUE insts inserted at the start of the
+ // block, after location propagation has finished. Record the insertions
+ // that we need to perform in the Pending set.
VarLocSet Diff = InLocsT;
Diff.intersectWithComplement(ILS);
for (auto ID : Diff) {
- // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a
- // new range is started for the var from the mbb's beginning by inserting
- // a new DBG_VALUE. process() will end this range however appropriate.
- const VarLoc &DiffIt = VarLocIDs[ID];
- const MachineInstr *DebugInstr = &DiffIt.MI;
- MachineInstr *MI = nullptr;
- if (DiffIt.isConstant()) {
- MachineOperand MO(DebugInstr->getOperand(0));
- MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
- DebugInstr->getDesc(), false, MO,
- DebugInstr->getDebugVariable(),
- DebugInstr->getDebugExpression());
- } else {
- MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
- DebugInstr->getDesc(), DebugInstr->isIndirectDebugValue(),
- DebugInstr->getOperand(0).getReg(),
- DebugInstr->getDebugVariable(),
- DebugInstr->getDebugExpression());
- if (DebugInstr->isIndirectDebugValue())
- MI->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
- }
- LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););
+ Pending.set(ID);
ILS.set(ID);
++NumInserted;
Changed = true;
}
+
+ // We may have lost locations by learning about a predecessor that either
+ // loses or moves a variable. Find any locations in ILS that are not in the
+ // new in-locations, and delete those.
+ VarLocSet Removed = ILS;
+ Removed.intersectWithComplement(InLocsT);
+ for (auto ID : Removed) {
+ Pending.reset(ID);
+ ILS.reset(ID);
+ ++NumRemoved;
+ Changed = true;
+ }
+
return Changed;
}
+void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
+ VarLocMap &VarLocIDs) {
+ // PendingInLocs records all locations propagated into blocks, which have
+ // not had DBG_VALUE insts created. Go through and create those insts now.
+ for (auto &Iter : PendingInLocs) {
+ // Map is keyed on a constant pointer, unwrap it so we can insert insts.
+ auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first);
+ VarLocSet &Pending = Iter.second;
+
+ for (unsigned ID : Pending) {
+ // The ID location is live-in to MBB -- work out what kind of machine
+ // location it is and create a DBG_VALUE.
+ const VarLoc &DiffIt = VarLocIDs[ID];
+ MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent());
+ MBB.insert(MBB.instr_begin(), MI);
+
+ (void)MI;
+ LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););
+ }
+ }
+}
+
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
@@ -1140,6 +1272,9 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
VarLocInMBB OutLocs; // Ranges that exist beyond bb.
VarLocInMBB InLocs; // Ranges that are incoming after joining.
TransferMap Transfers; // DBG_VALUEs associated with spills.
+ VarLocInMBB PendingInLocs; // Ranges that are incoming after joining, but
+ // that we have deferred creating DBG_VALUE insts
+ // for immediately.
VarToFragments SeenFragments;
@@ -1156,8 +1291,6 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
std::greater<unsigned int>>
Pending;
- enum : bool { dontTransferChanges = false, transferChanges = true };
-
// Besides parameter's modification, check whether a DBG_VALUE is inlined
// in order to deduce whether the variable that it tracks comes from
// a different function. If that is the case we can't track its entry value.
@@ -1169,7 +1302,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
- unsigned FP = TRI->getFrameRegister(MF);
+ Register FP = TRI->getFrameRegister(MF);
auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool {
return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP;
};
@@ -1195,23 +1328,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
!MI.getDebugExpression()->isFragment())
DebugEntryVals[MI.getDebugVariable()] = &MI;
- // Initialize every mbb with OutLocs.
- // We are not looking at any spill instructions during the initial pass
- // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE
- // instructions for spills of registers that are known to be user variables
- // within the BB in which the spill occurs.
+ // Initialize per-block structures and scan for fragment overlaps.
for (auto &MBB : MF) {
+ PendingInLocs[&MBB] = VarLocSet();
+
for (auto &MI : MBB) {
- process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, DebugEntryVals,
- dontTransferChanges, OverlapFragments, SeenFragments);
- }
- // Add any entry DBG_VALUE instructions necessitated by parameter
- // clobbering.
- for (auto &TR : Transfers) {
- MBB.insertAfter(MachineBasicBlock::iterator(*TR.TransferInst),
- TR.DebugInst);
+ if (MI.isDebugValue())
+ accumulateFragmentMap(MI, SeenFragments, OverlapFragments);
}
- Transfers.clear();
}
auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
@@ -1248,26 +1372,21 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
while (!Worklist.empty()) {
MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
Worklist.pop();
- MBBJoined =
- join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, ArtificialBlocks);
- Visited.insert(MBB);
+ MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited,
+ ArtificialBlocks, PendingInLocs);
+ MBBJoined |= Visited.insert(MBB).second;
if (MBBJoined) {
MBBJoined = false;
Changed = true;
// Now that we have started to extend ranges across BBs we need to
// examine spill instructions to see whether they spill registers that
// correspond to user variables.
+ // First load any pending inlocs.
+ OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs);
for (auto &MI : *MBB)
- OLChanged |=
process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
- DebugEntryVals, transferChanges, OverlapFragments,
- SeenFragments);
-
- // Add any DBG_VALUE instructions necessitated by spills.
- for (auto &TR : Transfers)
- MBB->insertAfter(MachineBasicBlock::iterator(*TR.TransferInst),
- TR.DebugInst);
- Transfers.clear();
+ DebugEntryVals, OverlapFragments, SeenFragments);
+ OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
"OutLocs after propagating", dbgs()));
@@ -1289,6 +1408,19 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
assert(Pending.empty() && "Pending should be empty");
}
+ // Add any DBG_VALUE instructions created by location transfers.
+ for (auto &TR : Transfers) {
+ MachineBasicBlock *MBB = TR.TransferInst->getParent();
+ const VarLoc &VL = VarLocIDs[TR.LocationID];
+ MachineInstr *MI = VL.BuildDbgValue(MF);
+ MBB->insertAfterBundle(TR.TransferInst->getIterator(), MI);
+ }
+ Transfers.clear();
+
+ // Deferred inlocs will not have had any DBG_VALUE insts created; do
+ // that now.
+ flushPendingLocs(PendingInLocs, VarLocIDs);
+
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs()));
LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs()));
return Changed;
@@ -1308,7 +1440,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TFI = MF.getSubtarget().getFrameLowering();
TFI->determineCalleeSaves(MF, CalleeSavedRegs,
- make_unique<RegScavenger>().get());
+ std::make_unique<RegScavenger>().get());
LS.initialize(MF);
bool Changed = ExtendRanges(MF);
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 656ec7d4bdfd..2dd462fc72b3 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -99,28 +99,27 @@ enum : unsigned { UndefLocNo = ~0U };
/// usage of the location.
class DbgValueLocation {
public:
- DbgValueLocation(unsigned LocNo, bool WasIndirect)
- : LocNo(LocNo), WasIndirect(WasIndirect) {
+ DbgValueLocation(unsigned LocNo)
+ : LocNo(LocNo) {
static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing");
assert(locNo() == LocNo && "location truncation");
}
- DbgValueLocation() : LocNo(0), WasIndirect(0) {}
+ DbgValueLocation() : LocNo(0) {}
unsigned locNo() const {
// Fix up the undef location number, which gets truncated.
return LocNo == INT_MAX ? UndefLocNo : LocNo;
}
- bool wasIndirect() const { return WasIndirect; }
bool isUndef() const { return locNo() == UndefLocNo; }
DbgValueLocation changeLocNo(unsigned NewLocNo) const {
- return DbgValueLocation(NewLocNo, WasIndirect);
+ return DbgValueLocation(NewLocNo);
}
friend inline bool operator==(const DbgValueLocation &LHS,
const DbgValueLocation &RHS) {
- return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect;
+ return LHS.LocNo == RHS.LocNo;
}
friend inline bool operator!=(const DbgValueLocation &LHS,
@@ -129,8 +128,7 @@ public:
}
private:
- unsigned LocNo : 31;
- unsigned WasIndirect : 1;
+ unsigned LocNo;
};
/// Map of where a user value is live, and its location.
@@ -144,22 +142,51 @@ namespace {
class LDVImpl;
+/// A UserValue is uniquely identified by the source variable it refers to
+/// (Variable), the expression describing how to get the value (Expression) and
+/// the specific usage (InlinedAt). InlinedAt differentiates both between
+/// inline and non-inline functions, and multiple inlined instances in the same
+/// scope. FIXME: The only part of the Expression which matters for UserValue
+/// identification is the fragment part.
+class UserValueIdentity {
+private:
+ /// The debug info variable we are part of.
+ const DILocalVariable *Variable;
+ /// Any complex address expression.
+ const DIExpression *Expression;
+ /// Function usage identification.
+ const DILocation *InlinedAt;
+
+public:
+ UserValueIdentity(const DILocalVariable *Var, const DIExpression *Expr,
+ const DILocation *IA)
+ : Variable(Var), Expression(Expr), InlinedAt(IA) {}
+
+ bool match(const DILocalVariable *Var, const DIExpression *Expr,
+ const DILocation *IA) const {
+ // FIXME: The fragment should be part of the identity, but not
+ // other things in the expression like stack values.
+ return Var == Variable && Expr == Expression && IA == InlinedAt;
+ }
+
+ bool match(const UserValueIdentity &Other) const {
+ return match(Other.Variable, Other.Expression, Other.InlinedAt);
+ }
+
+ unsigned hash_value() const {
+ return hash_combine(Variable, Expression, InlinedAt);
+ }
+};
+
/// A user value is a part of a debug info user variable.
///
/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
/// holds part of a user variable. The part is identified by a byte offset.
-///
-/// UserValues are grouped into equivalence classes for easier searching. Two
-/// user values are related if they refer to the same variable, or if they are
-/// held by the same virtual register. The equivalence class is the transitive
-/// closure of that relation.
class UserValue {
const DILocalVariable *Variable; ///< The debug info variable we are part of.
const DIExpression *Expression; ///< Any complex address expression.
DebugLoc dl; ///< The debug location for the variable. This is
///< used by dwarf writer to find lexical scope.
- UserValue *leader; ///< Equivalence class leader.
- UserValue *next = nullptr; ///< Next value in equivalence class, or null.
/// Numbered locations referenced by locmap.
SmallVector<MachineOperand, 4> locations;
@@ -180,49 +207,15 @@ class UserValue {
LiveIntervals &LIS);
public:
+ UserValue(const UserValue &) = delete;
+
/// Create a new UserValue.
UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
LocMap::Allocator &alloc)
- : Variable(var), Expression(expr), dl(std::move(L)), leader(this),
- locInts(alloc) {}
-
- /// Get the leader of this value's equivalence class.
- UserValue *getLeader() {
- UserValue *l = leader;
- while (l != l->leader)
- l = l->leader;
- return leader = l;
- }
+ : Variable(var), Expression(expr), dl(std::move(L)), locInts(alloc) {}
- /// Return the next UserValue in the equivalence class.
- UserValue *getNext() const { return next; }
-
- /// Does this UserValue match the parameters?
- bool match(const DILocalVariable *Var, const DIExpression *Expr,
- const DILocation *IA) const {
- // FIXME: The fragment should be part of the equivalence class, but not
- // other things in the expression like stack values.
- return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
- }
-
- /// Merge equivalence classes.
- static UserValue *merge(UserValue *L1, UserValue *L2) {
- L2 = L2->getLeader();
- if (!L1)
- return L2;
- L1 = L1->getLeader();
- if (L1 == L2)
- return L1;
- // Splice L2 before L1's members.
- UserValue *End = L2;
- while (End->next) {
- End->leader = L1;
- End = End->next;
- }
- End->leader = L1;
- End->next = L1->next;
- L1->next = L2;
- return L1;
+ UserValueIdentity getId() {
+ return UserValueIdentity(Variable, Expression, dl->getInlinedAt());
}
/// Return the location number that matches Loc.
@@ -261,8 +254,8 @@ public:
void mapVirtRegs(LDVImpl *LDV);
/// Add a definition point to this value.
- void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) {
- DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect);
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+ DbgValueLocation Loc(getLocationNo(LocMO));
// Add a singular (Idx,Idx) -> Loc mapping.
LocMap::iterator I = locInts.find(Idx);
if (!I.valid() || I.start() != Idx)
@@ -297,11 +290,10 @@ public:
///
/// \param LI Scan for copies of the value in LI->reg.
/// \param LocNo Location number of LI->reg.
- /// \param WasIndirect Indicates if the original use of LI->reg was indirect
/// \param Kills Points where the range of LocNo could be extended.
/// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here.
void addDefsFromCopies(
- LiveInterval *LI, unsigned LocNo, bool WasIndirect,
+ LiveInterval *LI, unsigned LocNo,
const SmallVectorImpl<SlotIndex> &Kills,
SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS);
@@ -335,7 +327,29 @@ public:
void print(raw_ostream &, const TargetRegisterInfo *);
};
+} // namespace
+namespace llvm {
+template <> struct DenseMapInfo<UserValueIdentity> {
+ static UserValueIdentity getEmptyKey() {
+ auto Key = DenseMapInfo<DILocalVariable *>::getEmptyKey();
+ return UserValueIdentity(Key, nullptr, nullptr);
+ }
+ static UserValueIdentity getTombstoneKey() {
+ auto Key = DenseMapInfo<DILocalVariable *>::getTombstoneKey();
+ return UserValueIdentity(Key, nullptr, nullptr);
+ }
+ static unsigned getHashValue(const UserValueIdentity &Val) {
+ return Val.hash_value();
+ }
+ static bool isEqual(const UserValueIdentity &LHS,
+ const UserValueIdentity &RHS) {
+ return LHS.match(RHS);
+ }
+};
+} // namespace llvm
+
+namespace {
/// A user label is a part of a debug info user label.
class UserLabel {
const DILabel *Label; ///< The debug info label we are part of.
@@ -387,20 +401,20 @@ class LDVImpl {
/// All allocated UserLabel instances.
SmallVector<std::unique_ptr<UserLabel>, 2> userLabels;
- /// Map virtual register to eq class leader.
- using VRMap = DenseMap<unsigned, UserValue *>;
- VRMap virtRegToEqClass;
+ /// Map virtual register to UserValues which use it.
+ using VRMap = DenseMap<unsigned, SmallVector<UserValue *, 4>>;
+ VRMap VirtRegToUserVals;
- /// Map user variable to eq class leader.
- using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
- UVMap userVarMap;
+ /// Map unique UserValue identity to UserValue.
+ using UVMap = DenseMap<UserValueIdentity, UserValue *>;
+ UVMap UserVarMap;
/// Find or create a UserValue.
UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
const DebugLoc &DL);
- /// Find the EC leader for VirtReg or null.
- UserValue *lookupVirtReg(unsigned VirtReg);
+ /// Find the UserValues for VirtReg or null.
+ SmallVectorImpl<UserValue *> *lookupVirtReg(unsigned VirtReg);
/// Add DBG_VALUE instruction to our maps.
///
@@ -440,8 +454,8 @@ public:
MF = nullptr;
userValues.clear();
userLabels.clear();
- virtRegToEqClass.clear();
- userVarMap.clear();
+ VirtRegToUserVals.clear();
+ UserVarMap.clear();
// Make sure we call emitDebugValues if the machine function was modified.
assert((!ModifiedMF || EmitDone) &&
"Dbg values are not emitted in LDV");
@@ -449,8 +463,8 @@ public:
ModifiedMF = false;
}
- /// Map virtual register to an equivalence class.
- void mapVirtReg(unsigned VirtReg, UserValue *EC);
+ /// Map virtual register to a UserValue.
+ void mapVirtReg(unsigned VirtReg, UserValue *UV);
/// Replace all references to OldReg with NewRegs.
void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
@@ -521,8 +535,6 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
OS << "undef";
else {
OS << I.value().locNo();
- if (I.value().wasIndirect())
- OS << " ind";
}
}
for (unsigned i = 0, e = locations.size(); i != e; ++i) {
@@ -554,37 +566,33 @@ void LDVImpl::print(raw_ostream &OS) {
void UserValue::mapVirtRegs(LDVImpl *LDV) {
for (unsigned i = 0, e = locations.size(); i != e; ++i)
if (locations[i].isReg() &&
- TargetRegisterInfo::isVirtualRegister(locations[i].getReg()))
+ Register::isVirtualRegister(locations[i].getReg()))
LDV->mapVirtReg(locations[i].getReg(), this);
}
UserValue *LDVImpl::getUserValue(const DILocalVariable *Var,
const DIExpression *Expr, const DebugLoc &DL) {
- UserValue *&Leader = userVarMap[Var];
- if (Leader) {
- UserValue *UV = Leader->getLeader();
- Leader = UV;
- for (; UV; UV = UV->getNext())
- if (UV->match(Var, Expr, DL->getInlinedAt()))
- return UV;
- }
+ auto Ident = UserValueIdentity(Var, Expr, DL->getInlinedAt());
+ UserValue *&UVEntry = UserVarMap[Ident];
- userValues.push_back(
- llvm::make_unique<UserValue>(Var, Expr, DL, allocator));
- UserValue *UV = userValues.back().get();
- Leader = UserValue::merge(Leader, UV);
- return UV;
+ if (UVEntry)
+ return UVEntry;
+
+ userValues.push_back(std::make_unique<UserValue>(Var, Expr, DL, allocator));
+ return UVEntry = userValues.back().get();
}
-void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
- UserValue *&Leader = virtRegToEqClass[VirtReg];
- Leader = UserValue::merge(Leader, EC);
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *UV) {
+ assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+ assert(UserVarMap.find(UV->getId()) != UserVarMap.end() &&
+ "UserValue should exist in UserVarMap");
+ VirtRegToUserVals[VirtReg].push_back(UV);
}
-UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
- if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
- return UV->getLeader();
+SmallVectorImpl<UserValue *> *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ VRMap::iterator Itr = VirtRegToUserVals.find(VirtReg);
+ if (Itr != VirtRegToUserVals.end())
+ return &Itr->getSecond();
return nullptr;
}
@@ -606,8 +614,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// could be removed or replaced by asserts.
bool Discard = false;
if (MI.getOperand(0).isReg() &&
- TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) {
- const unsigned Reg = MI.getOperand(0).getReg();
+ Register::isVirtualRegister(MI.getOperand(0).getReg())) {
+ const Register Reg = MI.getOperand(0).getReg();
if (!LIS->hasInterval(Reg)) {
// The DBG_VALUE is described by a virtual register that does not have a
// live interval. Discard the DBG_VALUE.
@@ -631,19 +639,18 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
}
// Get or create the UserValue for (variable,offset) here.
- bool IsIndirect = MI.getOperand(1).isImm();
- if (IsIndirect)
- assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
+ assert(!MI.getOperand(1).isImm() && "DBG_VALUE with indirect flag before "
+ "LiveDebugVariables");
const DILocalVariable *Var = MI.getDebugVariable();
const DIExpression *Expr = MI.getDebugExpression();
UserValue *UV =
getUserValue(Var, Expr, MI.getDebugLoc());
if (!Discard)
- UV->addDef(Idx, MI.getOperand(0), IsIndirect);
+ UV->addDef(Idx, MI.getOperand(0));
else {
MachineOperand MO = MachineOperand::CreateReg(0U, false);
MO.setIsDebug();
- UV->addDef(Idx, MO, false);
+ UV->addDef(Idx, MO);
}
return true;
}
@@ -666,7 +673,7 @@ bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
}
}
if (!Found)
- userLabels.push_back(llvm::make_unique<UserLabel>(Label, DL, Idx));
+ userLabels.push_back(std::make_unique<UserLabel>(Label, DL, Idx));
return true;
}
@@ -751,14 +758,14 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
}
void UserValue::addDefsFromCopies(
- LiveInterval *LI, unsigned LocNo, bool WasIndirect,
+ LiveInterval *LI, unsigned LocNo,
const SmallVectorImpl<SlotIndex> &Kills,
SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS) {
if (Kills.empty())
return;
// Don't track copies from physregs, there are too many uses.
- if (!TargetRegisterInfo::isVirtualRegister(LI->reg))
+ if (!Register::isVirtualRegister(LI->reg))
return;
// Collect all the (vreg, valno) pairs that are copies of LI.
@@ -768,13 +775,13 @@ void UserValue::addDefsFromCopies(
// Copies of the full value.
if (MO.getSubReg() || !MI->isCopy())
continue;
- unsigned DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
// Don't follow copies to physregs. These are usually setting up call
// arguments, and the argument registers are always call clobbered. We are
// better off in the source register which could be a callee-saved register,
// or it could be spilled.
- if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (!Register::isVirtualRegister(DstReg))
continue;
// Is LocNo extended to reach this copy? If not, another def may be blocking
@@ -815,7 +822,7 @@ void UserValue::addDefsFromCopies(
MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
- DbgValueLocation NewLoc(LocNo, WasIndirect);
+ DbgValueLocation NewLoc(LocNo);
I.insert(Idx, Idx.getNextSlot(), NewLoc);
NewDefs.push_back(std::make_pair(Idx, NewLoc));
break;
@@ -845,7 +852,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
}
// Register locations are constrained to where the register value is live.
- if (TargetRegisterInfo::isVirtualRegister(LocMO.getReg())) {
+ if (Register::isVirtualRegister(LocMO.getReg())) {
LiveInterval *LI = nullptr;
const VNInfo *VNI = nullptr;
if (LIS.hasInterval(LocMO.getReg())) {
@@ -863,8 +870,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
// sub-register in that regclass). For now, simply skip handling copies if
// a sub-register is involved.
if (LI && !LocMO.getSubReg())
- addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI,
- LIS);
+ addDefsFromCopies(LI, Loc.locNo(), Kills, Defs, MRI, LIS);
continue;
}
@@ -1123,16 +1129,18 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) {
bool DidChange = false;
- for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
- DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
+ if (auto *UserVals = lookupVirtReg(OldReg))
+ for (auto *UV : *UserVals)
+ DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
if (!DidChange)
return;
// Map all of the new virtual registers.
- UserValue *UV = lookupVirtReg(OldReg);
- for (unsigned i = 0; i != NewRegs.size(); ++i)
- mapVirtReg(NewRegs[i], UV);
+ if (auto *UserVals = lookupVirtReg(OldReg))
+ for (auto *UV : *UserVals)
+ for (unsigned i = 0; i != NewRegs.size(); ++i)
+ mapVirtReg(NewRegs[i], UV);
}
void LiveDebugVariables::
@@ -1161,10 +1169,10 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
MachineOperand Loc = locations[I];
// Only virtual registers are rewritten.
if (Loc.isReg() && Loc.getReg() &&
- TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
- unsigned VirtReg = Loc.getReg();
+ Register::isVirtualRegister(Loc.getReg())) {
+ Register VirtReg = Loc.getReg();
if (VRM.isAssignedReg(VirtReg) &&
- TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ Register::isPhysicalRegister(VRM.getPhys(VirtReg))) {
// This can create a %noreg operand in rare cases when the sub-register
// index is no longer available. That means the user value is in a
// non-existent sub-register, and %noreg is exactly what we want.
@@ -1258,7 +1266,7 @@ findNextInsertLocation(MachineBasicBlock *MBB,
const TargetRegisterInfo &TRI) {
if (!LocMO.isReg())
return MBB->instr_end();
- unsigned Reg = LocMO.getReg();
+ Register Reg = LocMO.getReg();
// Find the next instruction in the MBB that define the register Reg.
while (I != MBB->end() && !I->isTerminator()) {
@@ -1302,21 +1310,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
// that the original virtual register was a pointer. Also, add the stack slot
// offset for the spilled register to the expression.
const DIExpression *Expr = Expression;
- uint8_t DIExprFlags = DIExpression::ApplyOffset;
- bool IsIndirect = Loc.wasIndirect();
- if (Spilled) {
- if (IsIndirect)
- DIExprFlags |= DIExpression::DerefAfter;
- Expr =
- DIExpression::prepend(Expr, DIExprFlags, SpillOffset);
- IsIndirect = true;
- }
+ if (Spilled)
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, SpillOffset);
assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index");
do {
BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect, MO, Variable, Expr);
+ Spilled, MO, Variable, Expr);
// Continue and insert DBG_VALUES after every redefinition of register
// associated with the debug value within the range
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 70b2a77fe800..54ac46f2e7ce 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -886,7 +886,7 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
const TargetRegisterInfo &TRI) {
// Phys reg should not be tracked at subreg level.
// Same for noreg (Reg == 0).
- if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg)
+ if (!Register::isVirtualRegister(Reg) || !Reg)
return;
// Remove the values that don't define those lanes.
SmallVector<VNInfo *, 8> ToBeRemoved;
@@ -917,7 +917,8 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
for (VNInfo *VNI : ToBeRemoved)
SR.removeValNo(VNI);
- assert(!SR.empty() && "At least one value should be defined by this mask");
+ // If the subrange is empty at this point, the MIR is invalid. Do not assert
+ // and let the verifier catch this case.
}
void LiveInterval::refineSubRanges(
@@ -967,7 +968,7 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
LaneBitmask LaneMask,
const MachineRegisterInfo &MRI,
const SlotIndexes &Indexes) const {
- assert(TargetRegisterInfo::isVirtualRegister(reg));
+ assert(Register::isVirtualRegister(reg));
LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg);
assert((VRegMask & LaneMask).any());
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp
index aa85569063b3..2989930ad093 100644
--- a/lib/CodeGen/LiveIntervals.cpp
+++ b/lib/CodeGen/LiveIntervals.cpp
@@ -14,7 +14,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveIntervals.h"
-#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -22,6 +21,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -108,7 +108,7 @@ LiveIntervals::~LiveIntervals() {
void LiveIntervals::releaseMemory() {
// Free the live intervals themselves.
for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i)
- delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)];
+ delete VirtRegIntervals[Register::index2VirtReg(i)];
VirtRegIntervals.clear();
RegMaskSlots.clear();
RegMaskBits.clear();
@@ -161,7 +161,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
// Dump the virtregs.
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (hasInterval(Reg))
OS << getInterval(Reg) << '\n';
}
@@ -186,7 +186,7 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
#endif
LiveInterval* LiveIntervals::createInterval(unsigned reg) {
- float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? huge_valf : 0.0F;
+ float Weight = Register::isPhysicalRegister(reg) ? huge_valf : 0.0F;
return new LiveInterval(reg, Weight);
}
@@ -201,7 +201,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
void LiveIntervals::computeVirtRegs() {
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
createAndComputeVirtRegInterval(Reg);
@@ -441,8 +441,8 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments,
bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead) {
LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n');
- assert(TargetRegisterInfo::isVirtualRegister(li->reg)
- && "Can only shrink virtual registers");
+ assert(Register::isVirtualRegister(li->reg) &&
+ "Can only shrink virtual registers");
// Shrink subregister live ranges.
bool NeedsCleanup = false;
@@ -541,8 +541,8 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n');
- assert(TargetRegisterInfo::isVirtualRegister(Reg)
- && "Can only shrink virtual registers");
+ assert(Register::isVirtualRegister(Reg) &&
+ "Can only shrink virtual registers");
// Find all the values used, including PHI kills.
ShrinkToUsesWorkList WorkList;
@@ -688,7 +688,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
LiveRange::const_iterator>, 4> SRs;
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
const LiveInterval &LI = getInterval(Reg);
@@ -986,10 +986,10 @@ public:
MO.setIsKill(false);
}
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
LiveInterval &LI = LIS.getInterval(Reg);
if (LI.hasSubRanges()) {
unsigned SubReg = MO.getSubReg();
@@ -1023,7 +1023,7 @@ private:
return;
LLVM_DEBUG({
dbgs() << " ";
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
dbgs() << printReg(Reg);
if (LaneMask.any())
dbgs() << " L" << PrintLaneMask(LaneMask);
@@ -1288,6 +1288,20 @@ private:
const SlotIndex SplitPos = NewIdxDef;
OldIdxVNI = OldIdxIn->valno;
+ SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end;
+ LiveRange::iterator Prev = std::prev(OldIdxIn);
+ if (OldIdxIn != LR.begin() &&
+ SlotIndex::isEarlierInstr(NewIdx, Prev->end)) {
+ // If the segment before OldIdx read a value defined earlier than
+ // NewIdx, the moved instruction also reads and forwards that
+ // value. Extend the lifetime of the new def point.
+
+ // Extend to where the previous range started, unless there is
+ // another redef first.
+ NewDefEndPoint = std::min(OldIdxIn->start,
+ std::next(NewIdxOut)->start);
+ }
+
// Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
OldIdxOut->valno->def = OldIdxIn->start;
*OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
@@ -1305,7 +1319,8 @@ private:
// There is no gap between NewSegment and its predecessor.
*NewSegment = LiveRange::Segment(Next->start, SplitPos,
Next->valno);
- *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI);
+
+ *Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI);
Next->valno->def = SplitPos;
} else {
// There is a gap between NewSegment and its predecessor
@@ -1384,7 +1399,7 @@ private:
// Return the last use of reg between NewIdx and OldIdx.
SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg,
LaneBitmask LaneMask) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
SlotIndex LastUse = Before;
for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
if (MO.isUndef())
@@ -1429,7 +1444,7 @@ private:
// Check if MII uses Reg.
for (MIBundleOperands MO(*MII); MO.isValid(); ++MO)
if (MO->isReg() && !MO->isUndef() &&
- TargetRegisterInfo::isPhysicalRegister(MO->getReg()) &&
+ Register::isPhysicalRegister(MO->getReg()) &&
TRI.hasRegUnit(MO->getReg(), Reg))
return Idx.getRegSlot();
}
@@ -1439,7 +1454,10 @@ private:
};
void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) {
- assert(!MI.isBundled() && "Can't handle bundled instructions yet.");
+ // It is fine to move a bundle as a whole, but not an individual instruction
+ // inside it.
+ assert((!MI.isBundled() || MI.getOpcode() == TargetOpcode::BUNDLE) &&
+ "Cannot move instruction in bundle");
SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
Indexes->removeMachineInstrFromMaps(MI);
SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
@@ -1582,8 +1600,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
MOE = MI.operands_end();
MOI != MOE; ++MOI) {
- if (MOI->isReg() &&
- TargetRegisterInfo::isVirtualRegister(MOI->getReg()) &&
+ if (MOI->isReg() && Register::isVirtualRegister(MOI->getReg()) &&
!hasInterval(MOI->getReg())) {
createAndComputeVirtRegInterval(MOI->getReg());
}
@@ -1591,7 +1608,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
}
for (unsigned Reg : OrigRegs) {
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
continue;
LiveInterval &LI = getInterval(Reg);
@@ -1642,7 +1659,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
unsigned Reg = LI.reg;
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
for (unsigned I = 1; I < NumComp; ++I) {
- unsigned NewVReg = MRI->createVirtualRegister(RegClass);
+ Register NewVReg = MRI->createVirtualRegister(RegClass);
LiveInterval &NewLI = createEmptyInterval(NewVReg);
SplitLIs.push_back(&NewLI);
}
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index cd3d248ac878..c2a1cc7c6490 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -46,8 +46,8 @@ void LivePhysRegs::removeDefs(const MachineInstr &MI) {
if (O->isReg()) {
if (!O->isDef() || O->isDebug())
continue;
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
removeReg(Reg);
} else if (O->isRegMask())
@@ -60,8 +60,8 @@ void LivePhysRegs::addUses(const MachineInstr &MI) {
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (!O->isReg() || !O->readsReg() || O->isDebug())
continue;
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
addReg(Reg);
}
@@ -86,8 +86,8 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
// Remove killed registers from the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg() && !O->isDebug()) {
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
if (O->isDef()) {
// Note, dead defs are still recorded. The caller should decide how to
@@ -292,10 +292,10 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
if (!MO->isReg() || !MO->isDef() || MO->isDebug())
continue;
- unsigned Reg = MO->getReg();
+ Register Reg = MO->getReg();
if (Reg == 0)
continue;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Register::isPhysicalRegister(Reg));
bool IsNotLive = LiveRegs.available(MRI, Reg);
MO->setIsDead(IsNotLive);
@@ -309,10 +309,10 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
if (!MO->isReg() || !MO->readsReg() || MO->isDebug())
continue;
- unsigned Reg = MO->getReg();
+ Register Reg = MO->getReg();
if (Reg == 0)
continue;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Register::isPhysicalRegister(Reg));
bool IsNotLive = LiveRegs.available(MRI, Reg);
MO->setIsKill(IsNotLive);
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index d670f28df6ba..24b57be0da00 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "LiveRangeCalc.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -372,8 +372,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
report_fatal_error("Use not jointly dominated by defs.");
}
- if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
- !MBB->isLiveIn(PhysReg)) {
+ if (Register::isPhysicalRegister(PhysReg) && !MBB->isLiveIn(PhysReg)) {
MBB->getParent()->verify();
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
errs() << "The register " << printReg(PhysReg, TRI)
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 882e562ba95c..34bac082bcd7 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -32,7 +32,7 @@ void LiveRangeEdit::Delegate::anchor() { }
LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg,
bool createSubRanges) {
- unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
if (VRM)
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
@@ -52,7 +52,7 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg,
}
unsigned LiveRangeEdit::createFrom(unsigned OldReg) {
- unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
if (VRM) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
@@ -114,7 +114,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
continue;
// We can't remat physreg uses, unless it is a constant.
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (Register::isPhysicalRegister(MO.getReg())) {
if (MRI.isConstantPhysReg(MO.getReg()))
continue;
return false;
@@ -232,7 +232,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
LLVM_DEBUG(dbgs() << " folded: " << *FoldMI);
LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);
if (UseMI->isCall())
- UseMI->getMF()->updateCallSiteInfo(UseMI, FoldMI);
+ UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI);
UseMI->eraseFromParent();
DefMI->addRegisterDead(LI->reg, nullptr);
Dead.push_back(DefMI);
@@ -308,8 +308,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
if (!MOI->isReg())
continue;
- unsigned Reg = MOI->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = MOI->getReg();
+ if (!Register::isVirtualRegister(Reg)) {
// Check if MI reads any unreserved physregs.
if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
@@ -349,7 +349,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// Remove all operands that aren't physregs.
for (unsigned i = MI->getNumOperands(); i; --i) {
const MachineOperand &MO = MI->getOperand(i-1);
- if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ if (MO.isReg() && Register::isPhysicalRegister(MO.getReg()))
continue;
MI->RemoveOperand(i-1);
}
diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp
index 8818f1ce0ad9..cbf112ee2bd5 100644
--- a/lib/CodeGen/LiveRangeShrink.cpp
+++ b/lib/CodeGen/LiveRangeShrink.cpp
@@ -172,10 +172,10 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.isDead() || MO.isDebug())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// Do not move the instruction if it def/uses a physical register,
// unless it is a constant physical register or a noreg.
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!Register::isVirtualRegister(Reg)) {
if (!Reg || MRI.isConstantPhysReg(Reg))
continue;
Insert = nullptr;
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index ce99e5535c25..72c79e5f8a75 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -118,7 +118,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
}
void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
- unsigned PhysReg = VRM->getPhys(VirtReg.reg);
+ Register PhysReg = VRM->getPhys(VirtReg.reg);
LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from "
<< printReg(PhysReg, TRI) << ':');
VRM->clearVirt(VirtReg.reg);
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
index 6afb7fb7aa11..97763def1f40 100644
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -47,8 +47,8 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) {
if (O->isReg()) {
if (!O->isDef() || O->isDebug())
continue;
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
removeReg(Reg);
} else if (O->isRegMask())
@@ -59,8 +59,8 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) {
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (!O->isReg() || !O->readsReg() || O->isDebug())
continue;
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
addReg(Reg);
}
@@ -70,8 +70,8 @@ void LiveRegUnits::accumulate(const MachineInstr &MI) {
// Add defs, uses and regmask clobbers to the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg()) {
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
if (!O->isDef() && !O->readsReg())
continue;
diff --git a/lib/CodeGen/LiveStacks.cpp b/lib/CodeGen/LiveStacks.cpp
index f55977d72723..8df84ebf4f06 100644
--- a/lib/CodeGen/LiveStacks.cpp
+++ b/lib/CodeGen/LiveStacks.cpp
@@ -58,9 +58,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
assert(Slot >= 0 && "Spill slot indice must be >= 0");
SS2IntervalMap::iterator I = S2IMap.find(Slot);
if (I == S2IMap.end()) {
- I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot),
- std::forward_as_tuple(
- TargetRegisterInfo::index2StackSlot(Slot), 0.0F))
+ I = S2IMap
+ .emplace(
+ std::piecewise_construct, std::forward_as_tuple(Slot),
+ std::forward_as_tuple(Register::index2StackSlot(Slot), 0.0F))
.first;
S2RCMap.insert(std::make_pair(Slot, RC));
} else {
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index aaff982ef1b0..9bd55c6f750f 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -26,6 +26,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -82,7 +83,7 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
- assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+ assert(Register::isVirtualRegister(RegIdx) &&
"getVarInfo: not a virtual register!");
VirtRegInfo.grow(RegIdx);
return VirtRegInfo[RegIdx];
@@ -214,7 +215,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
MachineOperand &MO = LastDef->getOperand(i);
if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
continue;
- unsigned DefReg = MO.getReg();
+ Register DefReg = MO.getReg();
if (TRI->isSubRegister(Reg, DefReg)) {
for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
@@ -519,10 +520,9 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
}
if (!MO.isReg() || MO.getReg() == 0)
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (MO.isUse()) {
- if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- MRI->isReserved(MOReg)))
+ if (!(Register::isPhysicalRegister(MOReg) && MRI->isReserved(MOReg)))
MO.setIsKill(false);
if (MO.readsReg())
UseRegs.push_back(MOReg);
@@ -530,8 +530,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
assert(MO.isDef());
// FIXME: We should not remove any dead flags. However the MIPS RDDSP
// instruction needs it at the moment: http://llvm.org/PR27116.
- if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- !MRI->isReserved(MOReg))
+ if (Register::isPhysicalRegister(MOReg) && !MRI->isReserved(MOReg))
MO.setIsDead(false);
DefRegs.push_back(MOReg);
}
@@ -541,7 +540,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
// Process all uses.
for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
unsigned MOReg = UseRegs[i];
- if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ if (Register::isVirtualRegister(MOReg))
HandleVirtRegUse(MOReg, MBB, MI);
else if (!MRI->isReserved(MOReg))
HandlePhysRegUse(MOReg, MI);
@@ -554,7 +553,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
// Process all defs.
for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
unsigned MOReg = DefRegs[i];
- if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ if (Register::isVirtualRegister(MOReg))
HandleVirtRegDef(MOReg, MI);
else if (!MRI->isReserved(MOReg))
HandlePhysRegDef(MOReg, &MI, Defs);
@@ -566,7 +565,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
// Mark live-in registers as live-in.
SmallVector<unsigned, 4> Defs;
for (const auto &LI : MBB->liveins()) {
- assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) &&
+ assert(Register::isPhysicalRegister(LI.PhysReg) &&
"Cannot have a live-in virtual register!");
HandlePhysRegDef(LI.PhysReg, nullptr, Defs);
}
@@ -654,7 +653,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// Convert and transfer the dead / killed information we have gathered into
// VirtRegInfo onto MI's.
for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
- const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ const unsigned Reg = Register::index2VirtReg(i);
for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
@@ -692,8 +691,8 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) {
MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isKill()) {
MO.setIsKill(false);
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isVirtualRegister(Reg)) {
bool removed = getVarInfo(Reg).removeKill(MI);
assert(removed && "kill not in register's VarInfo?");
(void)removed;
@@ -783,7 +782,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
for (; BBI != BBE; ++BBI) {
for (MachineInstr::mop_iterator I = BBI->operands_begin(),
E = BBI->operands_end(); I != E; ++I) {
- if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) {
+ if (I->isReg() && Register::isVirtualRegister(I->getReg())) {
if (I->isDef())
Defs.insert(I->getReg());
else if (I->isKill())
@@ -794,7 +793,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
// Update info for all live variables
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
// If the Defs is defined in the successor it can't be live in BB.
if (Defs.count(Reg))
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index b14d76a585f7..2392d4d00b56 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -261,7 +261,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Remember how big this blob of stack space is
MFI.setLocalFrameSize(Offset);
- MFI.setLocalFrameMaxAlign(MaxAlign);
+ MFI.setLocalFrameMaxAlign(assumeAligned(MaxAlign));
}
static inline bool
@@ -351,6 +351,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
assert(MFI.isObjectPreAllocated(FrameIdx) &&
"Only pre-allocated locals expected!");
+ // We need to keep the references to the stack protector slot through frame
+ // index operands so that it gets resolved by PEI rather than this pass.
+ // This avoids accesses to the stack protector though virtual base
+ // registers, and forces PEI to address it using fp/sp/bp.
+ if (MFI.hasStackProtectorIndex() &&
+ FrameIdx == MFI.getStackProtectorIndex())
+ continue;
+
LLVM_DEBUG(dbgs() << "Considering: " << MI);
unsigned idx = 0;
diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp
index c8cf6abda4fc..ed48365b0102 100644
--- a/lib/CodeGen/LowerEmuTLS.cpp
+++ b/lib/CodeGen/LowerEmuTLS.cpp
@@ -142,7 +142,7 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer");
EmuTlsTmplVar->setConstant(true);
EmuTlsTmplVar->setInitializer(const_cast<Constant*>(InitValue));
- EmuTlsTmplVar->setAlignment(GVAlignment);
+ EmuTlsTmplVar->setAlignment(Align(GVAlignment));
copyLinkageVisibility(M, GV, EmuTlsTmplVar);
}
@@ -155,9 +155,8 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
ArrayRef<Constant*> ElementValueArray(ElementValues, 4);
EmuTlsVar->setInitializer(
ConstantStruct::get(EmuTlsVarType, ElementValueArray));
- unsigned MaxAlignment = std::max(
- DL.getABITypeAlignment(WordType),
- DL.getABITypeAlignment(VoidPtrType));
+ Align MaxAlignment(std::max(DL.getABITypeAlignment(WordType),
+ DL.getABITypeAlignment(VoidPtrType)));
EmuTlsVar->setAlignment(MaxAlignment);
return true;
}
diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp
index f49bc854e23f..c9bb5461aa3c 100644
--- a/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -23,12 +23,14 @@
//
//===----------------------------------------------------------------------===//
+#include "MIRVRegNamerUtils.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <queue>
@@ -71,28 +73,6 @@ public:
} // end anonymous namespace
-enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
-class TypedVReg {
- VRType type;
- unsigned reg;
-
-public:
- TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {}
- TypedVReg(VRType type) : type(type), reg(~0U) {
- assert(type != RSE_Reg && "Expected a non-register type.");
- }
-
- bool isReg() const { return type == RSE_Reg; }
- bool isFrameIndex() const { return type == RSE_FrameIndex; }
- bool isCandidate() const { return type == RSE_NewCandidate; }
-
- VRType getType() const { return type; }
- unsigned getReg() const {
- assert(this->isReg() && "Expected a virtual or physical register.");
- return reg;
- }
-};
-
char MIRCanonicalizer::ID;
char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
@@ -190,7 +170,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
if (!MO.isReg())
continue;
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (Register::isVirtualRegister(MO.getReg()))
continue;
if (!MO.isDef())
@@ -207,7 +187,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
continue;
MachineOperand &MO = II->getOperand(0);
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
if (!MO.isDef())
continue;
@@ -220,7 +200,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
}
if (II->getOperand(i).isReg()) {
- if (!TargetRegisterInfo::isVirtualRegister(II->getOperand(i).getReg()))
+ if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
PhysRegDefs.end()) {
continue;
@@ -340,12 +320,12 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
if (!MI->getOperand(1).isReg())
continue;
- const unsigned Dst = MI->getOperand(0).getReg();
- const unsigned Src = MI->getOperand(1).getReg();
+ const Register Dst = MI->getOperand(0).getReg();
+ const Register Src = MI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Dst))
+ if (!Register::isVirtualRegister(Dst))
continue;
- if (!TargetRegisterInfo::isVirtualRegister(Src))
+ if (!Register::isVirtualRegister(Src))
continue;
// Not folding COPY instructions if regbankselect has not set the RCs.
// Why are we only considering Register Classes? Because the verifier
@@ -370,258 +350,6 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
return Changed;
}
-/// Here we find our candidates. What makes an interesting candidate?
-/// An candidate for a canonicalization tree root is normally any kind of
-/// instruction that causes side effects such as a store to memory or a copy to
-/// a physical register or a return instruction. We use these as an expression
-/// tree root that we walk inorder to build a canonical walk which should result
-/// in canoncal vreg renaming.
-static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
- std::vector<MachineInstr *> Candidates;
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-
- for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
- MachineInstr *MI = &*II;
-
- bool DoesMISideEffect = false;
-
- if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
- const unsigned Dst = MI->getOperand(0).getReg();
- DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst);
-
- for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
- if (DoesMISideEffect)
- break;
- DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
- }
- }
-
- if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
- continue;
-
- LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump(););
- Candidates.push_back(MI);
- }
-
- return Candidates;
-}
-
-static void doCandidateWalk(std::vector<TypedVReg> &VRegs,
- std::queue<TypedVReg> &RegQueue,
- std::vector<MachineInstr *> &VisitedMIs,
- const MachineBasicBlock *MBB) {
-
- const MachineFunction &MF = *MBB->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- while (!RegQueue.empty()) {
-
- auto TReg = RegQueue.front();
- RegQueue.pop();
-
- if (TReg.isFrameIndex()) {
- LLVM_DEBUG(dbgs() << "Popping frame index.\n";);
- VRegs.push_back(TypedVReg(RSE_FrameIndex));
- continue;
- }
-
- assert(TReg.isReg() && "Expected vreg or physreg.");
- unsigned Reg = TReg.getReg();
-
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- LLVM_DEBUG({
- dbgs() << "Popping vreg ";
- MRI.def_begin(Reg)->dump();
- dbgs() << "\n";
- });
-
- if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) {
- return TR.isReg() && TR.getReg() == Reg;
- })) {
- VRegs.push_back(TypedVReg(Reg));
- }
- } else {
- LLVM_DEBUG(dbgs() << "Popping physreg.\n";);
- VRegs.push_back(TypedVReg(Reg));
- continue;
- }
-
- for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
- MachineInstr *Def = RI->getParent();
-
- if (Def->getParent() != MBB)
- continue;
-
- if (llvm::any_of(VisitedMIs,
- [&](const MachineInstr *VMI) { return Def == VMI; })) {
- break;
- }
-
- LLVM_DEBUG({
- dbgs() << "\n========================\n";
- dbgs() << "Visited MI: ";
- Def->dump();
- dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
- dbgs() << "\n========================\n";
- });
- VisitedMIs.push_back(Def);
- for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
-
- MachineOperand &MO = Def->getOperand(I);
- if (MO.isFI()) {
- LLVM_DEBUG(dbgs() << "Pushing frame index.\n";);
- RegQueue.push(TypedVReg(RSE_FrameIndex));
- }
-
- if (!MO.isReg())
- continue;
- RegQueue.push(TypedVReg(MO.getReg()));
- }
- }
- }
-}
-
-namespace {
-class NamedVRegCursor {
- MachineRegisterInfo &MRI;
- unsigned virtualVRegNumber;
-
-public:
- NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI), virtualVRegNumber(0) {}
-
- void SkipVRegs() {
- unsigned VRegGapIndex = 1;
- if (!virtualVRegNumber) {
- VRegGapIndex = 0;
- virtualVRegNumber = MRI.createIncompleteVirtualRegister();
- }
- const unsigned VR_GAP = (++VRegGapIndex * 1000);
-
- unsigned I = virtualVRegNumber;
- const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
-
- virtualVRegNumber = E;
- }
-
- unsigned getVirtualVReg() const { return virtualVRegNumber; }
-
- unsigned incrementVirtualVReg(unsigned incr = 1) {
- virtualVRegNumber += incr;
- return virtualVRegNumber;
- }
-
- unsigned createVirtualRegister(unsigned VReg) {
- if (!virtualVRegNumber)
- SkipVRegs();
- std::string S;
- raw_string_ostream OS(S);
- OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
- OS.flush();
- virtualVRegNumber++;
- if (auto RC = MRI.getRegClassOrNull(VReg))
- return MRI.createVirtualRegister(RC, OS.str());
- return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
- }
-};
-} // namespace
-
-static std::map<unsigned, unsigned>
-GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
- const std::vector<unsigned> &renamedInOtherBB,
- MachineRegisterInfo &MRI, NamedVRegCursor &NVC) {
- std::map<unsigned, unsigned> VRegRenameMap;
- bool FirstCandidate = true;
-
- for (auto &vreg : VRegs) {
- if (vreg.isFrameIndex()) {
- // We skip one vreg for any frame index because there is a good chance
- // (especially when comparing SelectionDAG to GlobalISel generated MIR)
- // that in the other file we are just getting an incoming vreg that comes
- // from a copy from a frame index. So it's safe to skip by one.
- unsigned LastRenameReg = NVC.incrementVirtualVReg();
- (void)LastRenameReg;
- LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
- continue;
- } else if (vreg.isCandidate()) {
-
- // After the first candidate, for every subsequent candidate, we skip mod
- // 10 registers so that the candidates are more likely to start at the
- // same vreg number making it more likely that the canonical walk from the
- // candidate insruction. We don't need to skip from the first candidate of
- // the BasicBlock because we already skip ahead several vregs for each BB.
- unsigned LastRenameReg = NVC.getVirtualVReg();
- if (FirstCandidate)
- NVC.incrementVirtualVReg(LastRenameReg % 10);
- FirstCandidate = false;
- continue;
- } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) {
- unsigned LastRenameReg = NVC.incrementVirtualVReg();
- (void)LastRenameReg;
- LLVM_DEBUG({
- dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
- });
- continue;
- }
-
- auto Reg = vreg.getReg();
- if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
- LLVM_DEBUG(dbgs() << "Vreg " << Reg
- << " already renamed in other BB.\n";);
- continue;
- }
-
- auto Rename = NVC.createVirtualRegister(Reg);
-
- if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
- LLVM_DEBUG(dbgs() << "Mapping vreg ";);
- if (MRI.reg_begin(Reg) != MRI.reg_end()) {
- LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
- } else {
- LLVM_DEBUG(dbgs() << Reg;);
- }
- LLVM_DEBUG(dbgs() << " to ";);
- if (MRI.reg_begin(Rename) != MRI.reg_end()) {
- LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
- } else {
- LLVM_DEBUG(dbgs() << Rename;);
- }
- LLVM_DEBUG(dbgs() << "\n";);
-
- VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
- }
- }
-
- return VRegRenameMap;
-}
-
-static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB,
- const std::map<unsigned, unsigned> &VRegRenameMap,
- MachineRegisterInfo &MRI) {
- bool Changed = false;
- for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
-
- auto VReg = I->first;
- auto Rename = I->second;
-
- RenamedInOtherBB.push_back(Rename);
-
- std::vector<MachineOperand *> RenameMOs;
- for (auto &MO : MRI.reg_operands(VReg)) {
- RenameMOs.push_back(&MO);
- }
-
- for (auto *MO : RenameMOs) {
- Changed = true;
- MO->setReg(Rename);
-
- if (!MO->isDef())
- MO->setIsKill(false);
- }
- }
-
- return Changed;
-}
-
static bool doDefKillClear(MachineBasicBlock *MBB) {
bool Changed = false;
@@ -646,9 +374,7 @@ static bool doDefKillClear(MachineBasicBlock *MBB) {
static bool runOnBasicBlock(MachineBasicBlock *MBB,
std::vector<StringRef> &bbNames,
- std::vector<unsigned> &renamedInOtherBB,
- unsigned &basicBlockNum, unsigned &VRegGapIndex,
- NamedVRegCursor &NVC) {
+ unsigned &basicBlockNum, NamedVRegCursor &NVC) {
if (CanonicalizeBasicBlockNumber != ~0U) {
if (CanonicalizeBasicBlockNumber != basicBlockNum++)
@@ -687,74 +413,20 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
- std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
- std::vector<MachineInstr *> VisitedMIs;
- llvm::copy(Candidates, std::back_inserter(VisitedMIs));
-
- std::vector<TypedVReg> VRegs;
- for (auto candidate : Candidates) {
- VRegs.push_back(TypedVReg(RSE_NewCandidate));
-
- std::queue<TypedVReg> RegQueue;
-
- // Here we walk the vreg operands of a non-root node along our walk.
- // The root nodes are the original candidates (stores normally).
- // These are normally not the root nodes (except for the case of copies to
- // physical registers).
- for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
- if (candidate->mayStore() || candidate->isBranch())
- break;
-
- MachineOperand &MO = candidate->getOperand(i);
- if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
- continue;
-
- LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
- RegQueue.push(TypedVReg(MO.getReg()));
- }
-
- // Here we walk the root candidates. We start from the 0th operand because
- // the root is normally a store to a vreg.
- for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
-
- if (!candidate->mayStore() && !candidate->isBranch())
- break;
-
- MachineOperand &MO = candidate->getOperand(i);
-
- // TODO: Do we want to only add vregs here?
- if (!MO.isReg() && !MO.isFI())
- continue;
-
- LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
-
- RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg())
- : TypedVReg(RSE_FrameIndex));
- }
-
- doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
- }
-
- // If we have populated no vregs to rename then bail.
- // The rest of this function does the vreg remaping.
- if (VRegs.size() == 0)
- return Changed;
-
- auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC);
- Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
+ Changed |= NVC.renameVRegs(MBB);
// Here we renumber the def vregs for the idempotent instructions from the top
// of the MachineBasicBlock so that they are named in the order that we sorted
// them alphabetically. Eventually we wont need SkipVRegs because we will use
// named vregs instead.
if (IdempotentInstCount)
- NVC.SkipVRegs();
+ NVC.skipVRegs();
auto MII = MBB->begin();
for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
MachineInstr &MI = *MII++;
Changed = true;
- unsigned vRegToRename = MI.getOperand(0).getReg();
+ Register vRegToRename = MI.getOperand(0).getReg();
auto Rename = NVC.createVirtualRegister(vRegToRename);
std::vector<MachineOperand *> RenameMOs;
@@ -799,9 +471,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
<< "\n\n================================================\n\n";);
std::vector<StringRef> BBNames;
- std::vector<unsigned> RenamedInOtherBB;
- unsigned GapIdx = 0;
unsigned BBNum = 0;
bool Changed = false;
@@ -809,8 +479,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
NamedVRegCursor NVC(MRI);
for (auto MBB : RPOList)
- Changed |=
- runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx, NVC);
+ Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC);
return Changed;
}
diff --git a/lib/CodeGen/MIRNamerPass.cpp b/lib/CodeGen/MIRNamerPass.cpp
new file mode 100644
index 000000000000..9d719f3917ce
--- /dev/null
+++ b/lib/CodeGen/MIRNamerPass.cpp
@@ -0,0 +1,77 @@
+//===----------------------- MIRNamer.cpp - MIR Namer ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of this pass is to rename virtual register operands with the goal
+// of making it easier to author easier to read tests for MIR. This pass reuses
+// the vreg renamer used by MIRCanonicalizerPass.
+//
+// Basic Usage:
+//
+// llc -o - -run-pass mir-namer example.mir
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRVRegNamerUtils.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+namespace llvm {
+extern char &MIRNamerID;
+} // namespace llvm
+
+#define DEBUG_TYPE "mir-namer"
+
+namespace {
+
+class MIRNamer : public MachineFunctionPass {
+public:
+ static char ID;
+ MIRNamer() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Rename virtual register operands";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ bool Changed = false;
+
+ if (MF.empty())
+ return Changed;
+
+ NamedVRegCursor NVC(MF.getRegInfo());
+
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ for (auto &MBB : RPOT)
+ Changed |= NVC.renameVRegs(MBB);
+
+ return Changed;
+ }
+};
+
+} // end anonymous namespace
+
+char MIRNamer::ID;
+
+char &llvm::MIRNamerID = MIRNamer::ID;
+
+INITIALIZE_PASS_BEGIN(MIRNamer, "mir-namer", "Rename Register Operands", false,
+ false)
+
+INITIALIZE_PASS_END(MIRNamer, "mir-namer", "Rename Register Operands", false,
+ false)
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index 4899bd3f5811..ad5c617623f2 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -249,6 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("successors", MIToken::kw_successors)
.Case("floatpred", MIToken::kw_floatpred)
.Case("intpred", MIToken::kw_intpred)
+ .Case("shufflemask", MIToken::kw_shufflemask)
.Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
.Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
.Case("unknown-size", MIToken::kw_unknown_size)
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index 0fe3f9f706db..200f9d026cc8 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -117,6 +117,7 @@ struct MIToken {
kw_successors,
kw_floatpred,
kw_intpred,
+ kw_shufflemask,
kw_pre_instr_symbol,
kw_post_instr_symbol,
kw_unknown_size,
@@ -146,6 +147,7 @@ struct MIToken {
IntegerLiteral,
FloatingPointLiteral,
HexLiteral,
+ VectorLiteral,
VirtualRegister,
ConstantPoolItem,
JumpTableIndex,
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index c0b800a0b870..6498acc9fa51 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -451,6 +451,7 @@ public:
bool parseBlockAddressOperand(MachineOperand &Dest);
bool parseIntrinsicOperand(MachineOperand &Dest);
bool parsePredicateOperand(MachineOperand &Dest);
+ bool parseShuffleMaskOperand(MachineOperand &Dest);
bool parseTargetIndexOperand(MachineOperand &Dest);
bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
@@ -640,7 +641,7 @@ bool MIParser::parseBasicBlockDefinition(
return error(Loc, Twine("redefinition of machine basic block with id #") +
Twine(ID));
if (Alignment)
- MBB->setAlignment(Alignment);
+ MBB->setAlignment(Align(Alignment));
if (HasAddressTaken)
MBB->setHasAddressTaken();
MBB->setIsEHPad(IsLandingPad);
@@ -1078,7 +1079,7 @@ static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
static std::string getRegisterName(const TargetRegisterInfo *TRI,
unsigned Reg) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg");
+ assert(Register::isPhysicalRegister(Reg) && "expected phys reg");
return StringRef(TRI->getName(Reg)).lower();
}
@@ -1408,11 +1409,11 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (Token.is(MIToken::dot)) {
if (parseSubRegisterIndex(SubReg))
return true;
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return error("subregister index expects a virtual register");
}
if (Token.is(MIToken::colon)) {
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return error("register class specification expects a virtual register");
lex();
if (parseRegisterClassOrBank(*RegInfo))
@@ -1436,12 +1437,13 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
return error("inconsistent type for generic virtual register");
+ MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr));
MRI.setType(Reg, Ty);
}
}
} else if (consumeIfPresent(MIToken::lparen)) {
// Virtual registers may have a tpe with GlobalISel.
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return error("unexpected type on physical register");
LLT Ty;
@@ -1454,8 +1456,9 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
return error("inconsistent type for generic virtual register");
+ MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr));
MRI.setType(Reg, Ty);
- } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ } else if (Register::isVirtualRegister(Reg)) {
// Generic virtual registers must have a type.
// If we end up here this means the type hasn't been specified and
// this is bad!
@@ -2285,6 +2288,49 @@ bool MIParser::parsePredicateOperand(MachineOperand &Dest) {
return false;
}
+bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_shufflemask));
+
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected syntax shufflemask(<integer or undef>, ...)");
+
+ SmallVector<Constant *, 32> ShufMask;
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ Type *I32Ty = Type::getInt32Ty(Ctx);
+
+ bool AllZero = true;
+ bool AllUndef = true;
+
+ do {
+ if (Token.is(MIToken::kw_undef)) {
+ ShufMask.push_back(UndefValue::get(I32Ty));
+ AllZero = false;
+ } else if (Token.is(MIToken::IntegerLiteral)) {
+ AllUndef = false;
+ const APSInt &Int = Token.integerValue();
+ if (!Int.isNullValue())
+ AllZero = false;
+ ShufMask.push_back(ConstantInt::get(I32Ty, Int.getExtValue()));
+ } else
+ return error("expected integer constant");
+
+ lex();
+ } while (consumeIfPresent(MIToken::comma));
+
+ if (expectAndConsume(MIToken::rparen))
+ return error("shufflemask should be terminated by ')'.");
+
+ if (AllZero || AllUndef) {
+ VectorType *VT = VectorType::get(I32Ty, ShufMask.size());
+ Constant *C = AllZero ? Constant::getNullValue(VT) : UndefValue::get(VT);
+ Dest = MachineOperand::CreateShuffleMask(C);
+ } else
+ Dest = MachineOperand::CreateShuffleMask(ConstantVector::get(ShufMask));
+
+ return false;
+}
+
bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::kw_target_index));
lex();
@@ -2432,6 +2478,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
case MIToken::kw_floatpred:
case MIToken::kw_intpred:
return parsePredicateOperand(Dest);
+ case MIToken::kw_shufflemask:
+ return parseShuffleMaskOperand(Dest);
case MIToken::Error:
return true;
case MIToken::Identifier:
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index b242934def80..55fac93d8991 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -216,7 +216,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() {
return nullptr;
// Create an empty module when the MIR file is empty.
NoMIRDocuments = true;
- return llvm::make_unique<Module>(Filename, Context);
+ return std::make_unique<Module>(Filename, Context);
}
std::unique_ptr<Module> M;
@@ -236,7 +236,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() {
NoMIRDocuments = true;
} else {
// Create an new, empty module.
- M = llvm::make_unique<Module>(Filename, Context);
+ M = std::make_unique<Module>(Filename, Context);
NoLLVMIR = true;
}
return M;
@@ -306,7 +306,7 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
static bool isSSA(const MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg))
return false;
}
@@ -355,10 +355,10 @@ bool MIRParserImpl::initializeCallSiteInfo(
if (MILoc.Offset >= CallB->size())
return error(Twine(MF.getName()) +
Twine(" call instruction offset out of range.") +
- "Unable to reference instruction at bb: " +
+ " Unable to reference instruction at bb: " +
Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset));
- auto CallI = std::next(CallB->begin(), MILoc.Offset);
- if (!CallI->isCall())
+ auto CallI = std::next(CallB->instr_begin(), MILoc.Offset);
+ if (!CallI->isCall(MachineInstr::IgnoreBundle))
return error(Twine(MF.getName()) +
Twine(" call site info should reference call "
"instruction. Instruction at bb:") +
@@ -393,7 +393,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
}
if (YamlMF.Alignment)
- MF.setAlignment(YamlMF.Alignment);
+ MF.setAlignment(Align(YamlMF.Alignment));
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
MF.setHasWinCFI(YamlMF.HasWinCFI);
@@ -949,6 +949,6 @@ llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
"Can't read MIR with a Context that discards named Values")));
return nullptr;
}
- return llvm::make_unique<MIRParser>(
- llvm::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
+ return std::make_unique<MIRParser>(
+ std::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
}
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index 0a95a0ced0f5..1a4e21ac06a9 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -197,7 +197,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
yaml::MachineFunction YamlMF;
YamlMF.Name = MF.getName();
- YamlMF.Alignment = MF.getAlignment();
+ YamlMF.Alignment = MF.getAlignment().value();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
YamlMF.HasWinCFI = MF.hasWinCFI();
@@ -290,7 +290,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
// Print the virtual register definitions.
for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
yaml::VirtualRegisterDefinition VReg;
VReg.ID = I;
if (RegInfo.getVRegName(Reg) != "")
@@ -473,10 +473,11 @@ void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF,
yaml::CallSiteInfo::MachineInstrLoc CallLocation;
// Prepare instruction position.
- MachineBasicBlock::const_iterator CallI = CSInfo.first->getIterator();
+ MachineBasicBlock::const_instr_iterator CallI = CSInfo.first->getIterator();
CallLocation.BlockNum = CallI->getParent()->getNumber();
// Get call instruction offset from the beginning of block.
- CallLocation.Offset = std::distance(CallI->getParent()->begin(), CallI);
+ CallLocation.Offset =
+ std::distance(CallI->getParent()->instr_begin(), CallI);
YmlCS.CallLocation = CallLocation;
// Construct call arguments and theirs forwarding register info.
for (auto ArgReg : CSInfo.second) {
@@ -628,9 +629,9 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
OS << "landing-pad";
HasAttributes = true;
}
- if (MBB.getAlignment()) {
+ if (MBB.getAlignment() != Align::None()) {
OS << (HasAttributes ? ", " : " (");
- OS << "align " << MBB.getAlignment();
+ OS << "align " << MBB.getAlignment().value();
HasAttributes = true;
}
if (HasAttributes)
@@ -842,7 +843,8 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
case MachineOperand::MO_CFIIndex:
case MachineOperand::MO_IntrinsicID:
case MachineOperand::MO_Predicate:
- case MachineOperand::MO_BlockAddress: {
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_ShuffleMask: {
unsigned TiedOperandIdx = 0;
if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
diff --git a/lib/CodeGen/MIRVRegNamerUtils.cpp b/lib/CodeGen/MIRVRegNamerUtils.cpp
new file mode 100644
index 000000000000..6629000f468f
--- /dev/null
+++ b/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -0,0 +1,348 @@
+//===---------- MIRVRegNamerUtils.cpp - MIR VReg Renaming Utilities -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRVRegNamerUtils.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mir-vregnamer-utils"
+
+namespace {
+
+// TypedVReg and VRType are used to tell the renamer what to do at points in a
+// sequence of values to be renamed. A TypedVReg can either contain
+// an actual VReg, a FrameIndex, or it could just be a barrier for the next
+// candidate (side-effecting instruction). This tells the renamer to increment
+// to the next vreg name, or to skip modulo some skip-gap value.
+enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
+class TypedVReg {
+ VRType Type;
+ Register Reg;
+
+public:
+ TypedVReg(Register Reg) : Type(RSE_Reg), Reg(Reg) {}
+ TypedVReg(VRType Type) : Type(Type), Reg(~0U) {
+ assert(Type != RSE_Reg && "Expected a non-Register Type.");
+ }
+
+ bool isReg() const { return Type == RSE_Reg; }
+ bool isFrameIndex() const { return Type == RSE_FrameIndex; }
+ bool isCandidate() const { return Type == RSE_NewCandidate; }
+
+ VRType getType() const { return Type; }
+ Register getReg() const {
+ assert(this->isReg() && "Expected a virtual or physical Register.");
+ return Reg;
+ }
+};
+
+/// Here we find our candidates. What makes an interesting candidate?
+/// A candidate for a canonicalization tree root is normally any kind of
+/// instruction that causes side effects such as a store to memory or a copy to
+/// a physical register or a return instruction. We use these as an expression
+/// tree root that we walk in order to build a canonical walk which should
+/// result in canonical vreg renaming.
+std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
+ std::vector<MachineInstr *> Candidates;
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
+ MachineInstr *MI = &*II;
+
+ bool DoesMISideEffect = false;
+
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
+ const Register Dst = MI->getOperand(0).getReg();
+ DoesMISideEffect |= !Register::isVirtualRegister(Dst);
+
+ for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
+ if (DoesMISideEffect)
+ break;
+ DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
+ }
+ }
+
+ if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump(););
+ Candidates.push_back(MI);
+ }
+
+ return Candidates;
+}
+
+void doCandidateWalk(std::vector<TypedVReg> &VRegs,
+ std::queue<TypedVReg> &RegQueue,
+ std::vector<MachineInstr *> &VisitedMIs,
+ const MachineBasicBlock *MBB) {
+
+ const MachineFunction &MF = *MBB->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ while (!RegQueue.empty()) {
+
+ auto TReg = RegQueue.front();
+ RegQueue.pop();
+
+ if (TReg.isFrameIndex()) {
+ LLVM_DEBUG(dbgs() << "Popping frame index.\n";);
+ VRegs.push_back(TypedVReg(RSE_FrameIndex));
+ continue;
+ }
+
+ assert(TReg.isReg() && "Expected vreg or physreg.");
+ Register Reg = TReg.getReg();
+
+ if (Register::isVirtualRegister(Reg)) {
+ LLVM_DEBUG({
+ dbgs() << "Popping vreg ";
+ MRI.def_begin(Reg)->dump();
+ dbgs() << "\n";
+ });
+
+ if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) {
+ return TR.isReg() && TR.getReg() == Reg;
+ })) {
+ VRegs.push_back(TypedVReg(Reg));
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "Popping physreg.\n";);
+ VRegs.push_back(TypedVReg(Reg));
+ continue;
+ }
+
+ for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
+ MachineInstr *Def = RI->getParent();
+
+ if (Def->getParent() != MBB)
+ continue;
+
+ if (llvm::any_of(VisitedMIs,
+ [&](const MachineInstr *VMI) { return Def == VMI; })) {
+ break;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "\n========================\n";
+ dbgs() << "Visited MI: ";
+ Def->dump();
+ dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
+ dbgs() << "\n========================\n";
+ });
+ VisitedMIs.push_back(Def);
+ for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
+
+ MachineOperand &MO = Def->getOperand(I);
+ if (MO.isFI()) {
+ LLVM_DEBUG(dbgs() << "Pushing frame index.\n";);
+ RegQueue.push(TypedVReg(RSE_FrameIndex));
+ }
+
+ if (!MO.isReg())
+ continue;
+ RegQueue.push(TypedVReg(MO.getReg()));
+ }
+ }
+ }
+}
+
+std::map<unsigned, unsigned>
+getVRegRenameMap(const std::vector<TypedVReg> &VRegs,
+ const std::vector<Register> &renamedInOtherBB,
+ MachineRegisterInfo &MRI, NamedVRegCursor &NVC) {
+ std::map<unsigned, unsigned> VRegRenameMap;
+ bool FirstCandidate = true;
+
+ for (auto &vreg : VRegs) {
+ if (vreg.isFrameIndex()) {
+ // We skip one vreg for any frame index because there is a good chance
+ // (especially when comparing SelectionDAG to GlobalISel generated MIR)
+ // that in the other file we are just getting an incoming vreg that comes
+ // from a copy from a frame index. So it's safe to skip by one.
+ unsigned LastRenameReg = NVC.incrementVirtualVReg();
+ (void)LastRenameReg;
+ LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
+ continue;
+ } else if (vreg.isCandidate()) {
+
+ // After the first candidate, for every subsequent candidate, we skip mod
+ // 10 registers so that the candidates are more likely to start at the
+ // same vreg number making it more likely that the canonical walk from the
+ // candidate insruction. We don't need to skip from the first candidate of
+ // the BasicBlock because we already skip ahead several vregs for each BB.
+ unsigned LastRenameReg = NVC.getVirtualVReg();
+ if (FirstCandidate)
+ NVC.incrementVirtualVReg(LastRenameReg % 10);
+ FirstCandidate = false;
+ continue;
+ } else if (!Register::isVirtualRegister(vreg.getReg())) {
+ unsigned LastRenameReg = NVC.incrementVirtualVReg();
+ (void)LastRenameReg;
+ LLVM_DEBUG({
+ dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
+ });
+ continue;
+ }
+
+ auto Reg = vreg.getReg();
+ if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
+ LLVM_DEBUG(dbgs() << "Vreg " << Reg
+ << " already renamed in other BB.\n";);
+ continue;
+ }
+
+ auto Rename = NVC.createVirtualRegister(Reg);
+
+ if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
+ LLVM_DEBUG(dbgs() << "Mapping vreg ";);
+ if (MRI.reg_begin(Reg) != MRI.reg_end()) {
+ LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
+ } else {
+ LLVM_DEBUG(dbgs() << Reg;);
+ }
+ LLVM_DEBUG(dbgs() << " to ";);
+ if (MRI.reg_begin(Rename) != MRI.reg_end()) {
+ LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
+ } else {
+ LLVM_DEBUG(dbgs() << Rename;);
+ }
+ LLVM_DEBUG(dbgs() << "\n";);
+
+ VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
+ }
+ }
+
+ return VRegRenameMap;
+}
+
+bool doVRegRenaming(std::vector<Register> &renamedInOtherBB,
+ const std::map<unsigned, unsigned> &VRegRenameMap,
+ MachineRegisterInfo &MRI) {
+ bool Changed = false;
+ for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
+
+ auto VReg = I->first;
+ auto Rename = I->second;
+
+ renamedInOtherBB.push_back(Rename);
+
+ std::vector<MachineOperand *> RenameMOs;
+ for (auto &MO : MRI.reg_operands(VReg)) {
+ RenameMOs.push_back(&MO);
+ }
+
+ for (auto *MO : RenameMOs) {
+ Changed = true;
+ MO->setReg(Rename);
+
+ if (!MO->isDef())
+ MO->setIsKill(false);
+ }
+ }
+
+ return Changed;
+}
+
+bool renameVRegs(MachineBasicBlock *MBB,
+ std::vector<Register> &renamedInOtherBB,
+ NamedVRegCursor &NVC) {
+ bool Changed = false;
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
+ std::vector<MachineInstr *> VisitedMIs;
+ llvm::copy(Candidates, std::back_inserter(VisitedMIs));
+
+ std::vector<TypedVReg> VRegs;
+ for (auto candidate : Candidates) {
+ VRegs.push_back(TypedVReg(RSE_NewCandidate));
+
+ std::queue<TypedVReg> RegQueue;
+
+ // Here we walk the vreg operands of a non-root node along our walk.
+ // The root nodes are the original candidates (stores normally).
+ // These are normally not the root nodes (except for the case of copies to
+ // physical registers).
+ for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
+ if (candidate->mayStore() || candidate->isBranch())
+ break;
+
+ MachineOperand &MO = candidate->getOperand(i);
+ if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
+ RegQueue.push(TypedVReg(MO.getReg()));
+ }
+
+ // Here we walk the root candidates. We start from the 0th operand because
+ // the root is normally a store to a vreg.
+ for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
+
+ if (!candidate->mayStore() && !candidate->isBranch())
+ break;
+
+ MachineOperand &MO = candidate->getOperand(i);
+
+ // TODO: Do we want to only add vregs here?
+ if (!MO.isReg() && !MO.isFI())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
+
+ RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg())
+ : TypedVReg(RSE_FrameIndex));
+ }
+
+ doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
+ }
+
+ // If we have populated no vregs to rename then bail.
+ // The rest of this function does the vreg remaping.
+ if (VRegs.size() == 0)
+ return Changed;
+
+ auto VRegRenameMap = getVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC);
+ Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
+ return Changed;
+}
+} // anonymous namespace
+
+void NamedVRegCursor::skipVRegs() {
+ unsigned VRegGapIndex = 1;
+ if (!virtualVRegNumber) {
+ VRegGapIndex = 0;
+ virtualVRegNumber = MRI.createIncompleteVirtualRegister();
+ }
+ const unsigned VR_GAP = (++VRegGapIndex * SkipGapSize);
+
+ unsigned I = virtualVRegNumber;
+ const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
+
+ virtualVRegNumber = E;
+}
+
+unsigned NamedVRegCursor::createVirtualRegister(unsigned VReg) {
+ if (!virtualVRegNumber)
+ skipVRegs();
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
+ OS.flush();
+ virtualVRegNumber++;
+ if (auto RC = MRI.getRegClassOrNull(VReg))
+ return MRI.createVirtualRegister(RC, OS.str());
+ return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
+}
+
+bool NamedVRegCursor::renameVRegs(MachineBasicBlock *MBB) {
+ return ::renameVRegs(MBB, RenamedInOtherBB, *this);
+}
diff --git a/lib/CodeGen/MIRVRegNamerUtils.h b/lib/CodeGen/MIRVRegNamerUtils.h
new file mode 100644
index 000000000000..c5b52a968538
--- /dev/null
+++ b/lib/CodeGen/MIRVRegNamerUtils.h
@@ -0,0 +1,91 @@
+
+//===------------ MIRVRegNamerUtils.h - MIR VReg Renaming Utilities -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of these utilities is to abstract out parts of the MIRCanon pass
+// that are responsible for renaming virtual registers with the purpose of
+// sharing code with a MIRVRegNamer pass that could be the analog of the
+// opt -instnamer pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H
+#define LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <queue>
+
+namespace llvm {
+
+/// NamedVRegCursor - The cursor is an object that keeps track of what the next
+/// vreg name should be. It does book keeping to determine when to skip the
+/// index value and by how much, or if the next vreg name should be an increment
+/// from the previous.
+class NamedVRegCursor {
+ MachineRegisterInfo &MRI;
+
+ /// virtualVRegNumber - Book keeping of the last vreg position.
+ unsigned virtualVRegNumber;
+
+ /// SkipGapSize - Used to calculate a modulo amount to skip by after every
+ /// sequence of instructions starting from a given side-effecting
+ /// MachineInstruction for a given MachineBasicBlock. The general idea is that
+ /// for a given program compiled with two different opt pipelines, there
+ /// shouldn't be greater than SkipGapSize difference in how many vregs are in
+ /// play between the two and for every def-use graph of vregs we rename we
+ /// will round up to the next SkipGapSize'th number so that we have a high
+ /// change of landing on the same name for two given matching side-effects
+ /// for the two compilation outcomes.
+ const unsigned SkipGapSize;
+
+ /// RenamedInOtherBB - VRegs that we already renamed: ie breadcrumbs.
+ std::vector<Register> RenamedInOtherBB;
+
+public:
+ NamedVRegCursor() = delete;
+ /// 1000 for the SkipGapSize was a good heuristic at the time of the writing
+ /// of the MIRCanonicalizerPass. Adjust as needed.
+ NamedVRegCursor(MachineRegisterInfo &MRI, unsigned SkipGapSize = 1000)
+ : MRI(MRI), virtualVRegNumber(0), SkipGapSize(SkipGapSize) {}
+
+ /// SkipGapSize - Skips modulo a gap value of indices. Indices are used to
+ /// produce the next vreg name.
+ void skipVRegs();
+
+ unsigned getVirtualVReg() const { return virtualVRegNumber; }
+
+ /// incrementVirtualVReg - This increments an index value that us used to
+ /// create a new vreg name. This is not a Register.
+ unsigned incrementVirtualVReg(unsigned incr = 1) {
+ virtualVRegNumber += incr;
+ return virtualVRegNumber;
+ }
+
+ /// createVirtualRegister - Given an existing vreg, create a named vreg to
+ /// take its place.
+ unsigned createVirtualRegister(unsigned VReg);
+
+ /// renameVRegs - For a given MachineBasicBlock, scan for side-effecting
+ /// instructions, walk the def-use from each side-effecting root (in sorted
+ /// root order) and rename the encountered vregs in the def-use graph in a
+ /// canonical ordering. This method maintains book keeping for which vregs
+ /// were already renamed in RenamedInOtherBB.
+ // @return changed
+ bool renameVRegs(MachineBasicBlock *MBB);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 4d29e883d879..854bef3aab05 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -39,6 +39,12 @@ using namespace llvm;
#define DEBUG_TYPE "codegen"
+static cl::opt<bool> PrintSlotIndexes(
+ "print-slotindexes",
+ cl::desc("When printing machine IR, annotate instructions and blocks with "
+ "SlotIndexes when available"),
+ cl::init(true), cl::Hidden);
+
MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
: BB(B), Number(-1), xParent(&MF) {
Insts.Parent = this;
@@ -291,7 +297,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
return;
}
- if (Indexes)
+ if (Indexes && PrintSlotIndexes)
OS << Indexes->getMBBStartIdx(this) << '\t';
OS << "bb." << getNumber();
@@ -320,9 +326,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "landing-pad";
HasAttributes = true;
}
- if (getAlignment()) {
+ if (getAlignment() != Align::None()) {
OS << (HasAttributes ? ", " : " (");
- OS << "align " << getAlignment();
+ OS << "align " << Log2(getAlignment());
HasAttributes = true;
}
if (HasAttributes)
@@ -402,7 +408,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
bool IsInBundle = false;
for (const MachineInstr &MI : instrs()) {
- if (Indexes) {
+ if (Indexes && PrintSlotIndexes) {
if (Indexes->hasIndex(MI))
OS << Indexes->getInstructionIndex(MI);
OS << '\t';
@@ -484,9 +490,9 @@ void MachineBasicBlock::sortUniqueLiveIns() {
}
unsigned
-MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) {
+MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) {
assert(getParent() && "MBB must be inserted in function");
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg");
+ assert(PhysReg.isPhysical() && "Expected physreg");
assert(RC && "Register class is required");
assert((isEHPad() || this == &getParent()->front()) &&
"Only the entry block and landing pads can have physreg live ins");
@@ -500,14 +506,14 @@ MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) {
if (LiveIn)
for (;I != E && I->isCopy(); ++I)
if (I->getOperand(1).getReg() == PhysReg) {
- unsigned VirtReg = I->getOperand(0).getReg();
+ Register VirtReg = I->getOperand(0).getReg();
if (!MRI.constrainRegClass(VirtReg, RC))
llvm_unreachable("Incompatible live-in register class.");
return VirtReg;
}
// No luck, create a virtual register.
- unsigned VirtReg = MRI.createVirtualRegister(RC);
+ Register VirtReg = MRI.createVirtualRegister(RC);
BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg)
.addReg(PhysReg, RegState::Kill);
if (!LiveIn)
@@ -772,7 +778,8 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) {
while (!FromMBB->succ_empty()) {
MachineBasicBlock *Succ = *FromMBB->succ_begin();
- // If probability list is empty it means we don't use it (disabled optimization).
+ // If probability list is empty it means we don't use it (disabled
+ // optimization).
if (!FromMBB->Probs.empty()) {
auto Prob = *FromMBB->Probs.begin();
addSuccessor(Succ, Prob);
@@ -798,13 +805,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
FromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
- for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
- ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
- for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.getMBB() == FromMBB)
- MO.setMBB(this);
- }
+ Succ->replacePhiUsesWith(FromMBB, this);
}
normalizeSuccProbs();
}
@@ -907,8 +908,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
if (!OI->isReg() || OI->getReg() == 0 ||
!OI->isUse() || !OI->isKill() || OI->isUndef())
continue;
- unsigned Reg = OI->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ Register Reg = OI->getReg();
+ if (Register::isPhysicalRegister(Reg) ||
LV->getVarInfo(Reg).removeKill(*MI)) {
KilledRegs.push_back(Reg);
LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI);
@@ -928,7 +929,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
if (!OI->isReg() || OI->getReg() == 0)
continue;
- unsigned Reg = OI->getReg();
+ Register Reg = OI->getReg();
if (!is_contained(UsedRegs, Reg))
UsedRegs.push_back(Reg);
}
@@ -979,13 +980,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
}
}
- // Fix PHI nodes in Succ so they refer to NMBB instead of this
- for (MachineBasicBlock::instr_iterator
- i = Succ->instr_begin(),e = Succ->instr_end();
- i != e && i->isPHI(); ++i)
- for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
- if (i->getOperand(ni+1).getMBB() == this)
- i->getOperand(ni+1).setMBB(NMBB);
+ // Fix PHI nodes in Succ so they refer to NMBB instead of this.
+ Succ->replacePhiUsesWith(this, NMBB);
// Inherit live-ins from the successor
for (const auto &LI : Succ->liveins())
@@ -1000,7 +996,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false))
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
LV->getVarInfo(Reg).Kills.push_back(&*I);
LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I);
break;
@@ -1033,7 +1029,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
if (I->getOperand(ni+1).getMBB() == NMBB) {
MachineOperand &MO = I->getOperand(ni);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
PHISrcRegs.insert(Reg);
if (MO.isUndef())
continue;
@@ -1049,7 +1045,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
MachineRegisterInfo *MRI = &getParent()->getRegInfo();
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
continue;
@@ -1217,6 +1213,16 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
replaceSuccessor(Old, New);
}
+void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ for (MachineInstr &MI : phis())
+ for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.getMBB() == Old)
+ MO.setMBB(New);
+ }
+}
+
/// Various pieces of code can cause excess edges in the CFG to be inserted. If
/// we have proven that MBB can only branch to DestA and DestB, remove any other
/// MBB successors from the CFG. DestA and DestB can be null.
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 639b588766a1..ac19bc0bd8ea 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -79,16 +79,17 @@ STATISTIC(CondBranchTakenFreq,
STATISTIC(UncondBranchTakenFreq,
"Potential frequency of taking unconditional branches");
-static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
- cl::desc("Force the alignment of all "
- "blocks in the function."),
- cl::init(0), cl::Hidden);
+static cl::opt<unsigned> AlignAllBlock(
+ "align-all-blocks",
+ cl::desc("Force the alignment of all blocks in the function in log2 format "
+ "(e.g 4 means align on 16B boundaries)."),
+ cl::init(0), cl::Hidden);
static cl::opt<unsigned> AlignAllNonFallThruBlocks(
"align-all-nofallthru-blocks",
- cl::desc("Force the alignment of all "
- "blocks that have no fall-through predecessors (i.e. don't add "
- "nops that are executed)."),
+ cl::desc("Force the alignment of all blocks that have no fall-through "
+ "predecessors (i.e. don't add nops that are executed). In log2 "
+ "format (e.g 4 means align on 16B boundaries)."),
cl::init(0), cl::Hidden);
// FIXME: Find a good default for this flag and remove the flag.
@@ -2763,8 +2764,8 @@ void MachineBlockPlacement::alignBlocks() {
if (!L)
continue;
- unsigned Align = TLI->getPrefLoopAlignment(L);
- if (!Align)
+ const Align Align = TLI->getPrefLoopAlignment(L);
+ if (Align == 1)
continue; // Don't care about loop alignment.
// If the block is cold relative to the function entry don't waste space
@@ -2981,7 +2982,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
F = &MF;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
+ MBFI = std::make_unique<BranchFolder::MBFIWrapper>(
getAnalysis<MachineBlockFrequencyInfo>());
MLI = &getAnalysis<MachineLoopInfo>();
TII = MF.getSubtarget().getInstrInfo();
@@ -3038,8 +3039,9 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, TailMergeSize);
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
- getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
+ MMIWP ? &MMIWP->getMMI() : nullptr, MLI,
/*AfterPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
@@ -3062,14 +3064,14 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (AlignAllBlock)
// Align all of the blocks in the function to a specific alignment.
for (MachineBasicBlock &MBB : MF)
- MBB.setAlignment(AlignAllBlock);
+ MBB.setAlignment(Align(1ULL << AlignAllBlock));
else if (AlignAllNonFallThruBlocks) {
// Align all of the blocks that have no fall-through predecessors to a
// specific alignment.
for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
auto LayoutPred = std::prev(MBI);
if (!LayoutPred->isSuccessor(&*MBI))
- MBI->setAlignment(AlignAllNonFallThruBlocks);
+ MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks));
}
}
if (ViewBlockLayoutWithBFI != GVDT_None &&
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 2df6d40d9293..d9bd32b2fbab 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -66,6 +67,7 @@ namespace {
AliasAnalysis *AA;
MachineDominatorTree *DT;
MachineRegisterInfo *MRI;
+ MachineBlockFrequencyInfo *MBFI;
public:
static char ID; // Pass identification
@@ -83,6 +85,8 @@ namespace {
AU.addPreservedID(MachineLoopInfoID);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
}
void releaseMemory() override {
@@ -133,6 +137,11 @@ namespace {
bool isPRECandidate(MachineInstr *MI);
bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
bool PerformSimplePRE(MachineDominatorTree *DT);
+ /// Heuristics to see if it's profitable to move common computations of MBB
+ /// and MBB1 to CandidateBB.
+ bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *MBB1);
};
} // end anonymous namespace
@@ -158,15 +167,15 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
for (MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (!DefMI->isCopy())
continue;
- unsigned SrcReg = DefMI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = DefMI->getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
continue;
if (DefMI->getOperand(0).getSubReg())
continue;
@@ -189,14 +198,16 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
LLVM_DEBUG(dbgs() << "*** to: " << *MI);
- // Update matching debug values.
- DefMI->changeDebugValuesDefReg(SrcReg);
-
// Propagate SrcReg of copies to MI.
MO.setReg(SrcReg);
MRI->clearKillFlags(SrcReg);
// Coalesce single use copies.
if (OnlyOneUse) {
+ // If (and only if) we've eliminated all uses of the copy, also
+ // copy-propagate to any debug-users of MI, or they'll be left using
+ // an undefined value.
+ DefMI->changeDebugValuesDefReg(SrcReg);
+
DefMI->eraseFromParent();
++NumCoalesces;
}
@@ -271,10 +282,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
continue;
// Reading either caller preserved or constant physregs is ok.
if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
@@ -290,10 +301,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineOperand &MO = MOP.value();
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
continue;
// Check against PhysRefs even if the def is "dead".
if (PhysRefs.count(Reg))
@@ -367,8 +378,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
return false;
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned MOReg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ Register MOReg = MO.getReg();
+ if (Register::isVirtualRegister(MOReg))
continue;
if (PhysRefs.count(MOReg))
return false;
@@ -424,8 +435,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// If CSReg is used at all uses of Reg, CSE should not increase register
// pressure of CSReg.
bool MayIncreasePressure = true;
- if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
- TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(CSReg) && Register::isVirtualRegister(Reg)) {
MayIncreasePressure = false;
SmallPtrSet<MachineInstr*, 8> CSUses;
for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
@@ -453,8 +463,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// of the redundant computation are copies, do not cse.
bool HasVRegUse = false;
for (const MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.isUse() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) {
HasVRegUse = true;
break;
}
@@ -586,8 +595,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned OldReg = MO.getReg();
- unsigned NewReg = CSMI->getOperand(i).getReg();
+ Register OldReg = MO.getReg();
+ Register NewReg = CSMI->getOperand(i).getReg();
// Go through implicit defs of CSMI and MI, if a def is not dead at MI,
// we should make sure it is not dead at CSMI.
@@ -604,8 +613,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
continue;
}
- assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
- TargetRegisterInfo::isVirtualRegister(NewReg) &&
+ assert(Register::isVirtualRegister(OldReg) &&
+ Register::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
@@ -769,11 +778,11 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) {
return false;
for (auto def : MI->defs())
- if (!TRI->isVirtualRegister(def.getReg()))
+ if (!Register::isVirtualRegister(def.getReg()))
return false;
for (auto use : MI->uses())
- if (use.isReg() && !TRI->isVirtualRegister(use.getReg()))
+ if (use.isReg() && !Register::isVirtualRegister(use.getReg()))
return false;
return true;
@@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
if (!CMBB->isLegalToHoistInto())
continue;
+ if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
+ continue;
+
// Two instrs are partial redundant if their basic blocks are reachable
// from one to another but one doesn't dominate another.
if (CMBB != MBB1) {
@@ -812,8 +824,8 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
assert(MI->getOperand(0).isDef() &&
"First operand of instr with one explicit def must be this def");
- unsigned VReg = MI->getOperand(0).getReg();
- unsigned NewReg = MRI->cloneVirtualRegister(VReg);
+ Register VReg = MI->getOperand(0).getReg();
+ Register NewReg = MRI->cloneVirtualRegister(VReg);
if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
continue;
MachineInstr &NewMI =
@@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
return Changed;
}
+bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *MBB1) {
+ if (CandidateBB->getParent()->getFunction().hasMinSize())
+ return true;
+ assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
+ assert(DT->dominates(CandidateBB, MBB1) &&
+ "CandidateBB should dominate MBB1");
+ return MBFI->getBlockFreq(CandidateBB) <=
+ MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
+}
+
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<MachineDominatorTree>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
bool ChangedPRE, ChangedCSE;
ChangedPRE = PerformSimplePRE(DT);
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index 0584ec0bd2b3..e9f462fd1b37 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -137,7 +137,7 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
MachineInstr *DefInstr = nullptr;
// We need a virtual register definition.
- if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
DefInstr = MRI->getUniqueVRegDef(MO.getReg());
// PHI's have no depth etc.
if (DefInstr && DefInstr->isPHI())
@@ -168,7 +168,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
unsigned IDepth = 0;
for (const MachineOperand &MO : InstrPtr->operands()) {
// Check for virtual register operand.
- if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
continue;
if (!MO.isUse())
continue;
@@ -223,7 +223,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
for (const MachineOperand &MO : NewRoot->operands()) {
// Check for virtual register operand.
- if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
continue;
if (!MO.isDef())
continue;
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 9fc12ac89e12..ebe76e31dca9 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -119,8 +119,8 @@ public:
void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) {
assert(MI->isCopy() && "Tracking non-copy?");
- unsigned Def = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
+ Register Def = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
// Remember Def is defined by the copy.
for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
@@ -163,8 +163,8 @@ public:
// Check that the available copy isn't clobbered by any regmasks between
// itself and the destination.
- unsigned AvailSrc = AvailCopy->getOperand(1).getReg();
- unsigned AvailDef = AvailCopy->getOperand(0).getReg();
+ Register AvailSrc = AvailCopy->getOperand(1).getReg();
+ Register AvailDef = AvailCopy->getOperand(0).getReg();
for (const MachineInstr &MI :
make_range(AvailCopy->getIterator(), DestCopy.getIterator()))
for (const MachineOperand &MO : MI.operands())
@@ -205,8 +205,11 @@ public:
}
private:
+ typedef enum { DebugUse = false, RegularUse = true } DebugType;
+
void ClobberRegister(unsigned Reg);
- void ReadRegister(unsigned Reg);
+ void ReadRegister(unsigned Reg, MachineInstr &Reader,
+ DebugType DT);
void CopyPropagateBlock(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
void forwardUses(MachineInstr &MI);
@@ -217,6 +220,9 @@ private:
/// Candidates for deletion.
SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
+ /// Multimap tracking debug users in current BB
+ DenseMap<MachineInstr*, SmallVector<MachineInstr*, 2>> CopyDbgUsers;
+
CopyTracker Tracker;
bool Changed;
@@ -231,13 +237,19 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
"Machine Copy Propagation Pass", false, false)
-void MachineCopyPropagation::ReadRegister(unsigned Reg) {
+void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader,
+ DebugType DT) {
// If 'Reg' is defined by a copy, the copy is no longer a candidate
- // for elimination.
+ // for elimination. If a copy is "read" by a debug user, record the user
+ // for propagation.
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) {
- LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
- MaybeDeadCopies.remove(Copy);
+ if (DT == RegularUse) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
+ MaybeDeadCopies.remove(Copy);
+ } else {
+ CopyDbgUsers[Copy].push_back(&Reader);
+ }
}
}
}
@@ -250,8 +262,8 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg) {
/// isNopCopy("ecx = COPY eax", AH, CL) == false
static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
unsigned Def, const TargetRegisterInfo *TRI) {
- unsigned PreviousSrc = PreviousCopy.getOperand(1).getReg();
- unsigned PreviousDef = PreviousCopy.getOperand(0).getReg();
+ Register PreviousSrc = PreviousCopy.getOperand(1).getReg();
+ Register PreviousDef = PreviousCopy.getOperand(0).getReg();
if (Src == PreviousSrc) {
assert(Def == PreviousDef);
return true;
@@ -288,7 +300,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
// Copy was redundantly redefining either Src or Def. Remove earlier kill
// flags between Copy and PrevCopy because the value will be reused now.
assert(Copy.isCopy());
- unsigned CopyDef = Copy.getOperand(0).getReg();
+ Register CopyDef = Copy.getOperand(0).getReg();
assert(CopyDef == Src || CopyDef == Def);
for (MachineInstr &MI :
make_range(PrevCopy->getIterator(), Copy.getIterator()))
@@ -307,7 +319,7 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
const MachineInstr &UseI,
unsigned UseIdx) {
- unsigned CopySrcReg = Copy.getOperand(1).getReg();
+ Register CopySrcReg = Copy.getOperand(1).getReg();
// If the new register meets the opcode register constraints, then allow
// forwarding.
@@ -398,9 +410,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (!Copy)
continue;
- unsigned CopyDstReg = Copy->getOperand(0).getReg();
+ Register CopyDstReg = Copy->getOperand(0).getReg();
const MachineOperand &CopySrc = Copy->getOperand(1);
- unsigned CopySrcReg = CopySrc.getReg();
+ Register CopySrcReg = CopySrc.getReg();
// FIXME: Don't handle partial uses of wider COPYs yet.
if (MOUse.getReg() != CopyDstReg) {
@@ -456,11 +468,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// Analyze copies (which don't overlap themselves).
if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(),
MI->getOperand(1).getReg())) {
- unsigned Def = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
+ Register Def = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
- assert(!TargetRegisterInfo::isVirtualRegister(Def) &&
- !TargetRegisterInfo::isVirtualRegister(Src) &&
+ assert(!Register::isVirtualRegister(Def) &&
+ !Register::isVirtualRegister(Src) &&
"MachineCopyPropagation should be run after register allocation!");
// The two copies cancel out and the source of the first copy
@@ -488,14 +500,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
- ReadRegister(Src);
+ ReadRegister(Src, *MI, RegularUse);
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.readsReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- ReadRegister(Reg);
+ ReadRegister(Reg, *MI, RegularUse);
}
LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
@@ -515,7 +527,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
Tracker.clobberRegister(Reg, *TRI);
@@ -529,12 +541,12 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// Clobber any earlyclobber regs first.
for (const MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isEarlyClobber()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// If we have a tied earlyclobber, that means it is also read by this
// instruction, so we need to make sure we don't remove it as dead
// later.
if (MO.isTied())
- ReadRegister(Reg);
+ ReadRegister(Reg, *MI, RegularUse);
Tracker.clobberRegister(Reg, *TRI);
}
@@ -548,18 +560,18 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
RegMask = &MO;
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- assert(!TargetRegisterInfo::isVirtualRegister(Reg) &&
+ assert(!Register::isVirtualRegister(Reg) &&
"MachineCopyPropagation should be run after register allocation!");
if (MO.isDef() && !MO.isEarlyClobber()) {
Defs.push_back(Reg);
continue;
- } else if (!MO.isDebug() && MO.readsReg())
- ReadRegister(Reg);
+ } else if (MO.readsReg())
+ ReadRegister(Reg, *MI, MO.isDebug() ? DebugUse : RegularUse);
}
// The instruction has a register mask operand which means that it clobbers
@@ -571,7 +583,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
MaybeDeadCopies.begin();
DI != MaybeDeadCopies.end();) {
MachineInstr *MaybeDead = *DI;
- unsigned Reg = MaybeDead->getOperand(0).getReg();
+ Register Reg = MaybeDead->getOperand(0).getReg();
assert(!MRI->isReserved(Reg));
if (!RegMask->clobbersPhysReg(Reg)) {
@@ -609,9 +621,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
MaybeDead->dump());
assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
- // Update matching debug values.
+ // Update matching debug values, if any.
assert(MaybeDead->isCopy());
- MaybeDead->changeDebugValuesDefReg(MaybeDead->getOperand(1).getReg());
+ unsigned SrcReg = MaybeDead->getOperand(1).getReg();
+ MRI->updateDbgUsersToReg(SrcReg, CopyDbgUsers[MaybeDead]);
MaybeDead->eraseFromParent();
Changed = true;
@@ -620,6 +633,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
}
MaybeDeadCopies.clear();
+ CopyDbgUsers.clear();
Tracker.clear();
}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 1dfba8638c22..706c706d7527 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -18,12 +18,15 @@
using namespace llvm;
+namespace llvm {
// Always verify dominfo if expensive checking is enabled.
#ifdef EXPENSIVE_CHECKS
-static bool VerifyMachineDomInfo = true;
+bool VerifyMachineDomInfo = true;
#else
-static bool VerifyMachineDomInfo = false;
+bool VerifyMachineDomInfo = false;
#endif
+} // namespace llvm
+
static cl::opt<bool, true> VerifyMachineDomInfoX(
"verify-machine-dom-info", cl::location(VerifyMachineDomInfo), cl::Hidden,
cl::desc("Verify machine dominator info (time consuming)"));
@@ -64,21 +67,11 @@ void MachineDominatorTree::releaseMemory() {
}
void MachineDominatorTree::verifyAnalysis() const {
- if (DT && VerifyMachineDomInfo) {
- MachineFunction &F = *getRoot()->getParent();
-
- DomTreeBase<MachineBasicBlock> OtherDT;
- OtherDT.recalculate(F);
- if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() ||
- DT->compare(OtherDT)) {
- errs() << "MachineDominatorTree for function " << F.getName()
- << " is not up to date!\nComputed:\n";
- DT->print(errs());
- errs() << "\nActual:\n";
- OtherDT.print(errs());
+ if (DT && VerifyMachineDomInfo)
+ if (!DT->verify(DomTreeT::VerificationLevel::Basic)) {
+ errs() << "MachineDominatorTree verification failed\n";
abort();
}
- }
}
void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp
index bae3a4333bda..604f5145b1a0 100644
--- a/lib/CodeGen/MachineFrameInfo.cpp
+++ b/lib/CodeGen/MachineFrameInfo.cpp
@@ -28,25 +28,26 @@
using namespace llvm;
-void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
+void MachineFrameInfo::ensureMaxAlignment(Align Alignment) {
if (!StackRealignable)
- assert(Align <= StackAlignment &&
- "For targets without stack realignment, Align is out of limit!");
- if (MaxAlignment < Align) MaxAlignment = Align;
+ assert(Alignment <= StackAlignment &&
+ "For targets without stack realignment, Alignment is out of limit!");
+ if (MaxAlignment < Alignment)
+ MaxAlignment = Alignment;
}
/// Clamp the alignment if requested and emit a warning.
-static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
- unsigned StackAlign) {
- if (!ShouldClamp || Align <= StackAlign)
- return Align;
- LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Align
- << " exceeds the stack alignment " << StackAlign
+static inline Align clampStackAlignment(bool ShouldClamp, Align Alignment,
+ Align StackAlignment) {
+ if (!ShouldClamp || Alignment <= StackAlignment)
+ return Alignment;
+ LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Alignment.value()
+ << " exceeds the stack alignment " << StackAlignment.value()
<< " when stack realignment is off" << '\n');
- return StackAlign;
+ return StackAlignment;
}
-int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
+int MachineFrameInfo::CreateStackObject(uint64_t Size, Align Alignment,
bool IsSpillSlot,
const AllocaInst *Alloca,
uint8_t StackID) {
@@ -61,8 +62,7 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
return Index;
}
-int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
- unsigned Alignment) {
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, Align Alignment) {
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
CreateStackObject(Size, Alignment, true);
int Index = (int)Objects.size() - NumFixedObjects - 1;
@@ -70,7 +70,7 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
return Index;
}
-int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
+int MachineFrameInfo::CreateVariableSizedObject(Align Alignment,
const AllocaInst *Alloca) {
HasVarSizedObjects = true;
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
@@ -88,7 +88,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
// object is 16-byte aligned. Note that unlike the non-fixed case, if the
// stack needs realignment, we can't assume that the stack will in fact be
// aligned.
- unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Align Alignment =
+ commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset);
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.insert(Objects.begin(),
StackObject(Size, Alignment, SPOffset, IsImmutable,
@@ -100,7 +101,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
int64_t SPOffset,
bool IsImmutable) {
- unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Align Alignment =
+ commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset);
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.insert(Objects.begin(),
StackObject(Size, Alignment, SPOffset, IsImmutable,
@@ -232,7 +234,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
OS << "variable sized";
else
OS << "size=" << SO.Size;
- OS << ", align=" << SO.Alignment;
+ OS << ", align=" << SO.Alignment.value();
if (i < NumFixedObjects)
OS << ", fixed";
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 4df5ce2dcedc..7d2ee230ca9f 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -78,10 +78,11 @@ using namespace llvm;
#define DEBUG_TYPE "codegen"
-static cl::opt<unsigned>
-AlignAllFunctions("align-all-functions",
- cl::desc("Force the alignment of all functions."),
- cl::init(0), cl::Hidden);
+static cl::opt<unsigned> AlignAllFunctions(
+ "align-all-functions",
+ cl::desc("Force the alignment of all functions in log2 format (e.g. 4 "
+ "means align on 16B boundaries)."),
+ cl::init(0), cl::Hidden);
static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
using P = MachineFunctionProperties::Property;
@@ -181,7 +182,7 @@ void MachineFunction::init() {
STI->getTargetLowering()->getPrefFunctionAlignment());
if (AlignAllFunctions)
- Alignment = AlignAllFunctions;
+ Alignment = Align(1ULL << AlignAllFunctions);
JumpTableInfo = nullptr;
@@ -200,7 +201,7 @@ void MachineFunction::init() {
"Target-incompatible DataLayout attached\n");
PSVManager =
- llvm::make_unique<PseudoSourceValueManager>(*(getSubtarget().
+ std::make_unique<PseudoSourceValueManager>(*(getSubtarget().
getInstrInfo()));
}
@@ -823,30 +824,47 @@ try_next:;
return FilterID;
}
-void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD) {
+void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I,
+ const MDNode *MD) {
MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true);
MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true);
I->setPreInstrSymbol(*this, BeginLabel);
I->setPostInstrSymbol(*this, EndLabel);
- DIType *DI = dyn_cast<DIType>(MD);
+ const DIType *DI = dyn_cast<DIType>(MD);
CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI));
}
-void MachineFunction::updateCallSiteInfo(const MachineInstr *Old,
- const MachineInstr *New) {
- if (!Target.Options.EnableDebugEntryValues || Old == New)
- return;
+void MachineFunction::moveCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New) {
+ assert(New->isCall() && "Call site info refers only to call instructions!");
- assert(Old->isCall() && (!New || New->isCall()) &&
- "Call site info referes only to call instructions!");
- CallSiteInfoMap::iterator CSIt = CallSitesInfo.find(Old);
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old);
if (CSIt == CallSitesInfo.end())
return;
+
CallSiteInfo CSInfo = std::move(CSIt->second);
CallSitesInfo.erase(CSIt);
- if (New)
- CallSitesInfo[New] = CSInfo;
+ CallSitesInfo[New] = CSInfo;
+}
+
+void MachineFunction::eraseCallSiteInfo(const MachineInstr *MI) {
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(MI);
+ if (CSIt == CallSitesInfo.end())
+ return;
+ CallSitesInfo.erase(CSIt);
+}
+
+void MachineFunction::copyCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New) {
+ assert(New->isCall() && "Call site info refers only to call instructions!");
+
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old);
+ if (CSIt == CallSitesInfo.end())
+ return;
+
+ CallSiteInfo CSInfo = CSIt->second;
+ CallSitesInfo[New] = CSInfo;
}
/// \}
@@ -881,13 +899,13 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
// alignment.
switch (getEntryKind()) {
case MachineJumpTableInfo::EK_BlockAddress:
- return TD.getPointerABIAlignment(0);
+ return TD.getPointerABIAlignment(0).value();
case MachineJumpTableInfo::EK_GPRel64BlockAddress:
- return TD.getABIIntegerTypeAlignment(64);
+ return TD.getABIIntegerTypeAlignment(64).value();
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
case MachineJumpTableInfo::EK_LabelDifference32:
case MachineJumpTableInfo::EK_Custom32:
- return TD.getABIIntegerTypeAlignment(32);
+ return TD.getABIIntegerTypeAlignment(32).value();
case MachineJumpTableInfo::EK_Inline:
return 1;
}
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 0da4cf3fc90c..03149aa7db4a 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -41,7 +41,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
if (F.hasAvailableExternallyLinkage())
return false;
- MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
MachineFunctionProperties &MFProps = MF.getProperties();
@@ -101,8 +101,8 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
}
void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineModuleInfo>();
- AU.addPreserved<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
// MachineFunctionPass preserves all LLVM IR passes, but there's no
// high-level way to express this. Instead, just list a bunch of
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index e5c398a2d10c..fec20b2b1a05 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -636,8 +636,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
if (Check == IgnoreDefs)
continue;
else if (Check == IgnoreVRegDefs) {
- if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()) ||
- !TargetRegisterInfo::isVirtualRegister(OMO.getReg()))
+ if (!Register::isVirtualRegister(MO.getReg()) ||
+ !Register::isVirtualRegister(OMO.getReg()))
if (!MO.isIdenticalTo(OMO))
return false;
} else {
@@ -692,8 +692,8 @@ void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() {
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
continue;
MRI.markUsesInDebugValueAsUndef(Reg);
}
@@ -832,6 +832,10 @@ const DIExpression *MachineInstr::getDebugExpression() const {
return cast<DIExpression>(getOperand(3).getMetadata());
}
+bool MachineInstr::isDebugEntryValue() const {
+ return isDebugValue() && getDebugExpression()->isEntryValue();
+}
+
const TargetRegisterClass*
MachineInstr::getRegClassConstraint(unsigned OpIdx,
const TargetInstrInfo *TII,
@@ -873,7 +877,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
}
const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg(
- unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII,
+ Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI, bool ExploreBundle) const {
// Check every operands inside the bundle if we have
// been asked to.
@@ -890,7 +894,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg(
}
const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl(
- unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC,
+ unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC,
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const {
assert(CurRC && "Invalid initial register class");
// Check if Reg is constrained by some of its use/def from MI.
@@ -933,7 +937,7 @@ unsigned MachineInstr::getBundleSize() const {
/// Returns true if the MachineInstr has an implicit-use operand of exactly
/// the given register (not considering sub/super-registers).
-bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const {
+bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg)
@@ -946,12 +950,12 @@ bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const {
/// the specific register or -1 if it is not found. It further tightens
/// the search criteria to a use that kills the register if isKill is true.
int MachineInstr::findRegisterUseOperandIdx(
- unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const {
+ Register Reg, bool isKill, const TargetRegisterInfo *TRI) const {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (MOReg == Reg || (TRI && Reg && MOReg && TRI->regsOverlap(MOReg, Reg)))
@@ -965,7 +969,7 @@ int MachineInstr::findRegisterUseOperandIdx(
/// indicating if this instruction reads or writes Reg. This also considers
/// partial defines.
std::pair<bool,bool>
-MachineInstr::readsWritesVirtualRegister(unsigned Reg,
+MachineInstr::readsWritesVirtualRegister(Register Reg,
SmallVectorImpl<unsigned> *Ops) const {
bool PartDef = false; // Partial redefine.
bool FullDef = false; // Full define.
@@ -994,9 +998,9 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg,
/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
/// also checks if there is a def of a super-register.
int
-MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
+MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap,
const TargetRegisterInfo *TRI) const {
- bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
+ bool isPhys = Register::isPhysicalRegister(Reg);
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
// Accept regmask operands when Overlap is set.
@@ -1005,10 +1009,9 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
return i;
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
bool Found = (MOReg == Reg);
- if (!Found && TRI && isPhys &&
- TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ if (!Found && TRI && isPhys && Register::isPhysicalRegister(MOReg)) {
if (Overlap)
Found = TRI->regsOverlap(MOReg, Reg);
else
@@ -1142,10 +1145,10 @@ void MachineInstr::clearKillInfo() {
}
}
-void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg,
+void MachineInstr::substituteRegister(Register FromReg, Register ToReg,
unsigned SubIdx,
const TargetRegisterInfo &RegInfo) {
- if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ if (Register::isPhysicalRegister(ToReg)) {
if (SubIdx)
ToReg = RegInfo.getSubReg(ToReg, SubIdx);
for (MachineOperand &MO : operands()) {
@@ -1165,7 +1168,7 @@ void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg,
/// isSafeToMove - Return true if it is safe to move this instruction. If
/// SawStore is set to true, it means that there is a store (or call) between
/// the instruction's location and its intended destination.
-bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
+bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const {
// Ignore stuff that we obviously can't move.
//
// Treat volatile loads as stores. This is not strictly necessary for
@@ -1194,7 +1197,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
return true;
}
-bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other,
+bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
bool UseTBAA) const {
const MachineFunction *MF = getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -1206,7 +1209,7 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other,
return false;
// Let the target decide if memory accesses cannot possibly overlap.
- if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA))
+ if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
return false;
// FIXME: Need to handle multiple memory operands to support all targets.
@@ -1312,7 +1315,7 @@ bool MachineInstr::hasOrderedMemoryRef() const {
/// isDereferenceableInvariantLoad - Return true if this instruction will never
/// trap and is loading from a location whose value is invariant across a run of
/// this function.
-bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const {
+bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const {
// If the instruction doesn't load at all, it isn't an invariant load.
if (!mayLoad())
return false;
@@ -1364,7 +1367,7 @@ unsigned MachineInstr::isConstantValuePHI() const {
assert(getNumOperands() >= 3 &&
"It's illegal to have a PHI without source operands");
- unsigned Reg = getOperand(1).getReg();
+ Register Reg = getOperand(1).getReg();
for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
if (getOperand(i).getReg() != Reg)
return 0;
@@ -1726,7 +1729,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
MFI = &MF->getFrameInfo();
Context = &MF->getFunction().getContext();
} else {
- CtxPtr = llvm::make_unique<LLVMContext>();
+ CtxPtr = std::make_unique<LLVMContext>();
Context = CtxPtr.get();
}
@@ -1780,10 +1783,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << '\n';
}
-bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+bool MachineInstr::addRegisterKilled(Register IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
- bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool isPhysReg = Register::isPhysicalRegister(IncomingReg);
bool hasAliases = isPhysReg &&
MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
bool Found = false;
@@ -1799,7 +1802,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
if (MO.isDebug())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
@@ -1814,8 +1817,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
MO.setIsKill();
Found = true;
}
- } else if (hasAliases && MO.isKill() &&
- TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ } else if (hasAliases && MO.isKill() && Register::isPhysicalRegister(Reg)) {
// A super-register kill already exists.
if (RegInfo->isSuperRegister(IncomingReg, Reg))
return true;
@@ -1847,23 +1849,23 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
return Found;
}
-void MachineInstr::clearRegisterKills(unsigned Reg,
+void MachineInstr::clearRegisterKills(Register Reg,
const TargetRegisterInfo *RegInfo) {
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!Register::isPhysicalRegister(Reg))
RegInfo = nullptr;
for (MachineOperand &MO : operands()) {
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
continue;
- unsigned OpReg = MO.getReg();
+ Register OpReg = MO.getReg();
if ((RegInfo && RegInfo->regsOverlap(Reg, OpReg)) || Reg == OpReg)
MO.setIsKill(false);
}
}
-bool MachineInstr::addRegisterDead(unsigned Reg,
+bool MachineInstr::addRegisterDead(Register Reg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
- bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg);
+ bool isPhysReg = Register::isPhysicalRegister(Reg);
bool hasAliases = isPhysReg &&
MCRegAliasIterator(Reg, RegInfo, false).isValid();
bool Found = false;
@@ -1872,7 +1874,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
MachineOperand &MO = getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
@@ -1880,7 +1882,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
MO.setIsDead();
Found = true;
} else if (hasAliases && MO.isDead() &&
- TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ Register::isPhysicalRegister(MOReg)) {
// There exists a super-register that's marked dead.
if (RegInfo->isSuperRegister(Reg, MOReg))
return true;
@@ -1913,7 +1915,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
return true;
}
-void MachineInstr::clearRegisterDeads(unsigned Reg) {
+void MachineInstr::clearRegisterDeads(Register Reg) {
for (MachineOperand &MO : operands()) {
if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
continue;
@@ -1921,7 +1923,7 @@ void MachineInstr::clearRegisterDeads(unsigned Reg) {
}
}
-void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
+void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) {
for (MachineOperand &MO : operands()) {
if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
continue;
@@ -1929,9 +1931,9 @@ void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
}
}
-void MachineInstr::addRegisterDefined(unsigned Reg,
+void MachineInstr::addRegisterDefined(Register Reg,
const TargetRegisterInfo *RegInfo) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo);
if (MO)
return;
@@ -1947,7 +1949,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg,
true /*IsImp*/));
}
-void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
+void MachineInstr::setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs,
const TargetRegisterInfo &TRI) {
bool HasRegMask = false;
for (MachineOperand &MO : operands()) {
@@ -1956,18 +1958,19 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
continue;
}
if (!MO.isReg() || !MO.isDef()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical())
+ continue;
// If there are no uses, including partial uses, the def is dead.
if (llvm::none_of(UsedRegs,
- [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+ [&](MCRegister Use) { return TRI.regsOverlap(Use, Reg); }))
MO.setIsDead();
}
// This is a call with a register mask operand.
// Mask clobbers are always dead, so add defs for the non-dead defines.
if (HasRegMask)
- for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ for (ArrayRef<Register>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
I != E; ++I)
addRegisterDefined(*I, &TRI);
}
@@ -1979,8 +1982,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
HashComponents.reserve(MI->getNumOperands() + 1);
HashComponents.push_back(MI->getOpcode());
for (const MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.isDef() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg()))
continue; // Skip virtual register defs.
HashComponents.push_back(hash_value(MO));
@@ -2012,7 +2014,7 @@ void MachineInstr::emitError(StringRef Msg) const {
MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
const MCInstrDesc &MCID, bool IsIndirect,
- unsigned Reg, const MDNode *Variable,
+ Register Reg, const MDNode *Variable,
const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
@@ -2048,7 +2050,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- bool IsIndirect, unsigned Reg,
+ bool IsIndirect, Register Reg,
const MDNode *Variable, const MDNode *Expr) {
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr);
@@ -2118,10 +2120,24 @@ void MachineInstr::collectDebugValues(
}
}
-void MachineInstr::changeDebugValuesDefReg(unsigned Reg) {
+void MachineInstr::changeDebugValuesDefReg(Register Reg) {
// Collect matching debug values.
SmallVector<MachineInstr *, 2> DbgValues;
- collectDebugValues(DbgValues);
+
+ if (!getOperand(0).isReg())
+ return;
+
+ unsigned DefReg = getOperand(0).getReg();
+ auto *MRI = getRegInfo();
+ for (auto &MO : MRI->use_operands(DefReg)) {
+ auto *DI = MO.getParent();
+ if (!DI->isDebugValue())
+ continue;
+ if (DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg() == DefReg){
+ DbgValues.push_back(DI);
+ }
+ }
// Propagate Reg to debug value instructions.
for (auto *DBI : DbgValues)
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 32e266e9401e..feb849ced353 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -154,10 +154,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
continue;
}
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+
if (LocalDefSet.count(Reg)) {
MO.setIsInternalRead();
if (MO.isKill())
@@ -177,7 +177,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
MachineOperand &MO = *Defs[i];
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
@@ -194,7 +194,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
DeadDefSet.erase(Reg);
}
- if (!MO.isDead()) {
+ if (!MO.isDead() && Register::isPhysicalRegister(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
if (LocalDefSet.insert(SubReg).second)
@@ -316,7 +316,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
bool AllDefsDead = true;
PhysRegInfo PRI = {false, false, false, false, false, false, false, false};
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ assert(Register::isPhysicalRegister(Reg) &&
"analyzePhysReg not given a physical register!");
for (; isValid(); ++*this) {
MachineOperand &MO = deref();
@@ -329,8 +329,8 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
- if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
+ Register MOReg = MO.getReg();
+ if (!MOReg || !Register::isPhysicalRegister(MOReg))
continue;
if (!TRI->regsOverlap(MOReg, Reg))
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 1107e609c258..6a898ff6ef88 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -153,7 +153,6 @@ namespace {
AU.addRequired<MachineDominatorTree>();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<MachineLoopInfo>();
- AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -424,10 +423,10 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ assert(Register::isPhysicalRegister(Reg) &&
"Not expecting virtual register!");
if (!MO.isDef()) {
@@ -526,7 +525,7 @@ void MachineLICMBase::HoistRegionPostRA() {
for (const MachineOperand &MO : TI->operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
@@ -554,7 +553,7 @@ void MachineLICMBase::HoistRegionPostRA() {
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef() || !MO.getReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (PhysRegDefs.test(Reg) ||
PhysRegClobbers.test(Reg)) {
// If it's using a non-loop-invariant register, then it's obviously
@@ -852,8 +851,8 @@ MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isImplicit())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
// FIXME: It seems bad to use RegSeen only for some of these calculations.
@@ -922,12 +921,12 @@ static bool isInvariantStore(const MachineInstr &MI,
// Check that all register operands are caller-preserved physical registers.
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// If operand is a virtual register, check if it comes from a copy of a
// physical register.
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return false;
if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF()))
return false;
@@ -955,17 +954,17 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
const MachineFunction *MF = MI.getMF();
// Check that we are copying a constant physical register.
- unsigned CopySrcReg = MI.getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+ Register CopySrcReg = MI.getOperand(1).getReg();
+ if (Register::isVirtualRegister(CopySrcReg))
return false;
if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF))
return false;
- unsigned CopyDstReg = MI.getOperand(0).getReg();
+ Register CopyDstReg = MI.getOperand(0).getReg();
// Check if any of the uses of the copy are invariant stores.
- assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) &&
- "copy dst is not a virtual reg");
+ assert(Register::isVirtualRegister(CopyDstReg) &&
+ "copy dst is not a virtual reg");
for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
@@ -1010,11 +1009,11 @@ bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
// Don't hoist an instruction that uses or defines a physical register.
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
@@ -1061,8 +1060,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
// A PHI may cause a copy to be inserted.
@@ -1104,7 +1103,7 @@ bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI,
const MachineOperand &MO = UseMI.getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (MOReg != Reg)
continue;
@@ -1132,8 +1131,8 @@ bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {
if (!DefMO.isReg() || !DefMO.isDef())
continue;
--NumDefs;
- unsigned Reg = DefMO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = DefMO.getReg();
+ if (Register::isPhysicalRegister(Reg))
continue;
if (!TII->hasLowDefLatency(SchedModel, MI, i))
@@ -1225,8 +1224,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || MO.isImplicit())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
@@ -1304,7 +1303,7 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
MachineFunction &MF = *MI->getMF();
const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
// Ok, we're unfolding. Create a temporary register and do the unfold.
- unsigned Reg = MRI->createVirtualRegister(RC);
+ Register Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
@@ -1378,20 +1377,20 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
// Physical registers may not differ here.
assert((!MO.isReg() || MO.getReg() == 0 ||
- !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ !Register::isPhysicalRegister(MO.getReg()) ||
MO.getReg() == Dup->getOperand(i).getReg()) &&
"Instructions with different phys regs are not identical!");
if (MO.isReg() && MO.isDef() &&
- !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ !Register::isPhysicalRegister(MO.getReg()))
Defs.push_back(i);
}
SmallVector<const TargetRegisterClass*, 2> OrigRCs;
for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
unsigned Idx = Defs[i];
- unsigned Reg = MI->getOperand(Idx).getReg();
- unsigned DupReg = Dup->getOperand(Idx).getReg();
+ Register Reg = MI->getOperand(Idx).getReg();
+ Register DupReg = Dup->getOperand(Idx).getReg();
OrigRCs.push_back(MRI->getRegClass(DupReg));
if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
@@ -1403,8 +1402,8 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
}
for (unsigned Idx : Defs) {
- unsigned Reg = MI->getOperand(Idx).getReg();
- unsigned DupReg = Dup->getOperand(Idx).getReg();
+ Register Reg = MI->getOperand(Idx).getReg();
+ Register DupReg = Dup->getOperand(Idx).getReg();
MRI->replaceRegWith(Reg, DupReg);
MRI->clearKillFlags(DupReg);
}
diff --git a/lib/CodeGen/MachineLoopUtils.cpp b/lib/CodeGen/MachineLoopUtils.cpp
new file mode 100644
index 000000000000..e074b76082f0
--- /dev/null
+++ b/lib/CodeGen/MachineLoopUtils.cpp
@@ -0,0 +1,132 @@
+//=- MachineLoopUtils.cpp - Functions for manipulating loops ----------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+using namespace llvm;
+
+namespace {
+// MI's parent and BB are clones of each other. Find the equivalent copy of MI
+// in BB.
+MachineInstr &findEquivalentInstruction(MachineInstr &MI,
+ MachineBasicBlock *BB) {
+ MachineBasicBlock *PB = MI.getParent();
+ unsigned Offset = std::distance(PB->instr_begin(), MachineBasicBlock::instr_iterator(MI));
+ return *std::next(BB->instr_begin(), Offset);
+}
+} // namespace
+
+MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
+ MachineBasicBlock *Loop,
+ MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) {
+ MachineFunction &MF = *Loop->getParent();
+ MachineBasicBlock *Preheader = *Loop->pred_begin();
+ if (Preheader == Loop)
+ Preheader = *std::next(Loop->pred_begin());
+ MachineBasicBlock *Exit = *Loop->succ_begin();
+ if (Exit == Loop)
+ Exit = *std::next(Loop->succ_begin());
+
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(Loop->getBasicBlock());
+ if (Direction == LPD_Front)
+ MF.insert(Loop->getIterator(), NewBB);
+ else
+ MF.insert(std::next(Loop->getIterator()), NewBB);
+
+ // FIXME: Add DenseMapInfo trait for Register so we can use it as a key.
+ DenseMap<unsigned, Register> Remaps;
+ auto InsertPt = NewBB->end();
+ for (MachineInstr &MI : *Loop) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
+ NewBB->insert(InsertPt, NewMI);
+ for (MachineOperand &MO : NewMI->defs()) {
+ Register OrigR = MO.getReg();
+ if (OrigR.isPhysical())
+ continue;
+ Register &R = Remaps[OrigR];
+ R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
+ MO.setReg(R);
+
+ if (Direction == LPD_Back) {
+ // Replace all uses outside the original loop with the new register.
+ // FIXME: is the use_iterator stable enough to mutate register uses
+ // while iterating?
+ SmallVector<MachineOperand *, 4> Uses;
+ for (auto &Use : MRI.use_operands(OrigR))
+ if (Use.getParent()->getParent() != Loop)
+ Uses.push_back(&Use);
+ for (auto *Use : Uses) {
+ MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg()));
+ Use->setReg(R);
+ }
+ }
+ }
+ }
+
+ for (auto I = NewBB->getFirstNonPHI(); I != NewBB->end(); ++I)
+ for (MachineOperand &MO : I->uses())
+ if (MO.isReg() && Remaps.count(MO.getReg()))
+ MO.setReg(Remaps[MO.getReg()]);
+
+ for (auto I = NewBB->begin(); I->isPHI(); ++I) {
+ MachineInstr &MI = *I;
+ unsigned LoopRegIdx = 3, InitRegIdx = 1;
+ if (MI.getOperand(2).getMBB() != Preheader)
+ std::swap(LoopRegIdx, InitRegIdx);
+ MachineInstr &OrigPhi = findEquivalentInstruction(MI, Loop);
+ assert(OrigPhi.isPHI());
+ if (Direction == LPD_Front) {
+ // When peeling front, we are only left with the initial value from the
+ // preheader.
+ Register R = MI.getOperand(LoopRegIdx).getReg();
+ if (Remaps.count(R))
+ R = Remaps[R];
+ OrigPhi.getOperand(InitRegIdx).setReg(R);
+ MI.RemoveOperand(LoopRegIdx + 1);
+ MI.RemoveOperand(LoopRegIdx + 0);
+ } else {
+ // When peeling back, the initial value is the loop-carried value from
+ // the original loop.
+ Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg();
+ MI.getOperand(LoopRegIdx).setReg(LoopReg);
+ MI.RemoveOperand(InitRegIdx + 1);
+ MI.RemoveOperand(InitRegIdx + 0);
+ }
+ }
+
+ DebugLoc DL;
+ if (Direction == LPD_Front) {
+ Preheader->replaceSuccessor(Loop, NewBB);
+ NewBB->addSuccessor(Loop);
+ Loop->replacePhiUsesWith(Preheader, NewBB);
+ if (TII->removeBranch(*Preheader) > 0)
+ TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL);
+ TII->removeBranch(*NewBB);
+ TII->insertBranch(*NewBB, Loop, nullptr, {}, DL);
+ } else {
+ Loop->replaceSuccessor(Exit, NewBB);
+ Exit->replacePhiUsesWith(Loop, NewBB);
+ NewBB->addSuccessor(Exit);
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool CanAnalyzeBr = !TII->analyzeBranch(*Loop, TBB, FBB, Cond);
+ (void)CanAnalyzeBr;
+ assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
+ TII->removeBranch(*Loop);
+ TII->insertBranch(*Loop, TBB == Exit ? NewBB : TBB,
+ FBB == Exit ? NewBB : FBB, Cond, DL);
+ if (TII->removeBranch(*NewBB) > 0)
+ TII->insertBranch(*NewBB, Exit, nullptr, {}, DL);
+ }
+
+ return NewBB;
+}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index aadcd7319799..e0b4e9cac229 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -36,11 +36,6 @@
using namespace llvm;
using namespace llvm::dwarf;
-// Handle the Pass registration stuff necessary to use DataLayout's.
-INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
- "Machine Module Information", false, false)
-char MachineModuleInfo::ID = 0;
-
// Out of line virtual method.
MachineModuleInfoImpl::~MachineModuleInfoImpl() = default;
@@ -121,7 +116,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
BBCallbacks.back().setMap(this);
Entry.Index = BBCallbacks.size() - 1;
Entry.Fn = BB->getParent();
- Entry.Symbols.push_back(Context.createTempSymbol());
+ Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken()));
return Entry.Symbols;
}
@@ -193,27 +188,15 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
}
-MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
- : ImmutablePass(ID), TM(*TM),
- Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
- TM->getObjFileLowering(), nullptr, false) {
- initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
-}
-
-MachineModuleInfo::~MachineModuleInfo() = default;
-
-bool MachineModuleInfo::doInitialization(Module &M) {
+void MachineModuleInfo::initialize() {
ObjFileMMI = nullptr;
CurCallSite = 0;
UsesMSVCFloatingPoint = UsesMorestackAddr = false;
HasSplitStack = HasNosplitStack = false;
AddrLabelSymbols = nullptr;
- TheModule = &M;
- DbgInfoAvailable = !llvm::empty(M.debug_compile_units());
- return false;
}
-bool MachineModuleInfo::doFinalization(Module &M) {
+void MachineModuleInfo::finalize() {
Personalities.clear();
delete AddrLabelSymbols;
@@ -223,10 +206,30 @@ bool MachineModuleInfo::doFinalization(Module &M) {
delete ObjFileMMI;
ObjFileMMI = nullptr;
+}
- return false;
+MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
+ : TM(std::move(MMI.TM)),
+ Context(MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(),
+ MMI.TM.getObjFileLowering(), nullptr, nullptr, false) {
+ ObjFileMMI = MMI.ObjFileMMI;
+ CurCallSite = MMI.CurCallSite;
+ UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint;
+ UsesMorestackAddr = MMI.UsesMorestackAddr;
+ HasSplitStack = MMI.HasSplitStack;
+ HasNosplitStack = MMI.HasNosplitStack;
+ AddrLabelSymbols = MMI.AddrLabelSymbols;
+ TheModule = MMI.TheModule;
}
+MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
+ : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
+ TM->getObjFileLowering(), nullptr, nullptr, false) {
+ initialize();
+}
+
+MachineModuleInfo::~MachineModuleInfo() { finalize(); }
+
//===- Address of Block Management ----------------------------------------===//
ArrayRef<MCSymbol *>
@@ -305,12 +308,13 @@ public:
FreeMachineFunction() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo>();
- AU.addPreserved<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
}
bool runOnFunction(Function &F) override {
- MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MMI.deleteMachineFunctionFor(F);
return true;
}
@@ -327,3 +331,36 @@ char FreeMachineFunction::ID;
FunctionPass *llvm::createFreeMachineFunctionPass() {
return new FreeMachineFunction();
}
+
+MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass(
+ const LLVMTargetMachine *TM)
+ : ImmutablePass(ID), MMI(TM) {
+ initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+// Handle the Pass registration stuff necessary to use DataLayout's.
+INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo",
+ "Machine Module Information", false, false)
+char MachineModuleInfoWrapperPass::ID = 0;
+
+bool MachineModuleInfoWrapperPass::doInitialization(Module &M) {
+ MMI.initialize();
+ MMI.TheModule = &M;
+ MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
+ return false;
+}
+
+bool MachineModuleInfoWrapperPass::doFinalization(Module &M) {
+ MMI.finalize();
+ return false;
+}
+
+AnalysisKey MachineModuleAnalysis::Key;
+
+MachineModuleInfo MachineModuleAnalysis::run(Module &M,
+ ModuleAnalysisManager &) {
+ MachineModuleInfo MMI(TM);
+ MMI.TheModule = &M;
+ MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
+ return MMI;
+}
diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp
index 4fa4ea7f6cf5..8b19501ec3cf 100644
--- a/lib/CodeGen/MachineOperand.cpp
+++ b/lib/CodeGen/MachineOperand.cpp
@@ -49,7 +49,7 @@ static MachineFunction *getMFIfAvailable(MachineOperand &MO) {
getMFIfAvailable(const_cast<const MachineOperand &>(MO)));
}
-void MachineOperand::setReg(unsigned Reg) {
+void MachineOperand::setReg(Register Reg) {
if (getReg() == Reg)
return; // No change.
@@ -71,9 +71,9 @@ void MachineOperand::setReg(unsigned Reg) {
SmallContents.RegNo = Reg;
}
-void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx,
const TargetRegisterInfo &TRI) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Reg.isVirtual());
if (SubIdx && getSubReg())
SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
setReg(Reg);
@@ -81,8 +81,8 @@ void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
setSubReg(SubIdx);
}
-void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+void MachineOperand::substPhysReg(MCRegister Reg, const TargetRegisterInfo &TRI) {
+ assert(Reg.isPhysical());
if (getSubReg()) {
Reg = TRI.getSubReg(Reg, getSubReg());
// Note that getSubReg() may return 0 if the sub-register doesn't exist.
@@ -114,7 +114,7 @@ void MachineOperand::setIsDef(bool Val) {
bool MachineOperand::isRenamable() const {
assert(isReg() && "Wrong MachineOperand accessor");
- assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&
+ assert(Register::isPhysicalRegister(getReg()) &&
"isRenamable should only be checked on physical registers");
if (!IsRenamable)
return false;
@@ -132,7 +132,7 @@ bool MachineOperand::isRenamable() const {
void MachineOperand::setIsRenamable(bool Val) {
assert(isReg() && "Wrong MachineOperand accessor");
- assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&
+ assert(Register::isPhysicalRegister(getReg()) &&
"setIsRenamable should only be called on physical registers");
IsRenamable = Val;
}
@@ -169,7 +169,7 @@ void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
}
void MachineOperand::ChangeToES(const char *SymName,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into an external symbol");
@@ -182,7 +182,7 @@ void MachineOperand::ChangeToES(const char *SymName,
}
void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into a global address");
@@ -215,7 +215,7 @@ void MachineOperand::ChangeToFrameIndex(int Idx) {
}
void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into a FrameIndex");
@@ -230,7 +230,7 @@ void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset,
/// ChangeToRegister - Replace this operand with a new register operand of
/// the specified value. If an operand is known to be an register already,
/// the setReg method should be used.
-void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp,
bool isKill, bool isDead, bool isUndef,
bool isDebug) {
MachineRegisterInfo *RegInfo = nullptr;
@@ -333,6 +333,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return getIntrinsicID() == Other.getIntrinsicID();
case MachineOperand::MO_Predicate:
return getPredicate() == Other.getPredicate();
+ case MachineOperand::MO_ShuffleMask:
+ return getShuffleMask() == Other.getShuffleMask();
}
llvm_unreachable("Invalid machine operand type");
}
@@ -381,6 +383,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
case MachineOperand::MO_Predicate:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
+ case MachineOperand::MO_ShuffleMask:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getShuffleMask());
}
llvm_unreachable("Invalid machine operand type");
}
@@ -425,12 +429,10 @@ static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
return;
}
- int Reg = TRI->getLLVMRegNum(DwarfReg, true);
- if (Reg == -1) {
+ if (Optional<unsigned> Reg = TRI->getLLVMRegNum(DwarfReg, true))
+ OS << printReg(*Reg, TRI);
+ else
OS << "<badreg>";
- return;
- }
- OS << printReg(Reg, TRI);
}
static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,
@@ -746,7 +748,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
printTargetFlags(OS, *this);
switch (getType()) {
case MachineOperand::MO_Register: {
- unsigned Reg = getReg();
+ Register Reg = getReg();
if (isImplicit())
OS << (isDef() ? "implicit-def " : "implicit ");
else if (PrintDef && isDef())
@@ -762,13 +764,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "undef ";
if (isEarlyClobber())
OS << "early-clobber ";
- if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable())
+ if (Register::isPhysicalRegister(getReg()) && isRenamable())
OS << "renamable ";
// isDebug() is exactly true for register operands of a DBG_VALUE. So we
// simply infer it when parsing and do not need to print it.
const MachineRegisterInfo *MRI = nullptr;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (const MachineFunction *MF = getMFIfAvailable(*this)) {
MRI = &MF->getRegInfo();
}
@@ -783,7 +785,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << ".subreg" << SubReg;
}
// Print the register class / bank.
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (const MachineFunction *MF = getMFIfAvailable(*this)) {
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) {
@@ -936,6 +938,20 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
<< CmpInst::getPredicateName(Pred) << ')';
break;
}
+ case MachineOperand::MO_ShuffleMask:
+ OS << "shufflemask(";
+ const Constant* C = getShuffleMask();
+ const int NumElts = C->getType()->getVectorNumElements();
+
+ StringRef Separator;
+ for (int I = 0; I != NumElts; ++I) {
+ OS << Separator;
+ C->getAggregateElement(I)->printAsOperand(OS, false, MST);
+ Separator = ", ";
+ }
+
+ OS << ')';
+ break;
}
}
@@ -963,7 +979,8 @@ bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
return false;
return isDereferenceableAndAlignedPointer(
- BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL);
+ BasePtr, Align::None(), APInt(DL.getPointerSizeInBits(), Offset + Size),
+ DL);
}
/// getConstantPool - Return a MachinePointerInfo record that refers to the
@@ -1049,17 +1066,6 @@ uint64_t MachineMemOperand::getAlignment() const {
return MinAlign(getBaseAlignment(), getOffset());
}
-void MachineMemOperand::print(raw_ostream &OS) const {
- ModuleSlotTracker DummyMST(nullptr);
- print(OS, DummyMST);
-}
-
-void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
- SmallVector<StringRef, 0> SSNs;
- LLVMContext Ctx;
- print(OS, MST, SSNs, Ctx, nullptr, nullptr);
-}
-
void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
SmallVectorImpl<StringRef> &SSNs,
const LLVMContext &Context,
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 27db9106b337..b82403ae1b85 100644
--- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -76,7 +76,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
else
MBFI = nullptr;
- ORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ ORE = std::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
return false;
}
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index 80a235aeaa5c..8cd66825a58a 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -846,8 +846,8 @@ struct MachineOutliner : public ModulePass {
StringRef getPassName() const override { return "Machine Outliner"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo>();
- AU.addPreserved<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
AU.setPreservesAll();
ModulePass::getAnalysisUsage(AU);
}
@@ -1128,7 +1128,7 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
IRBuilder<> Builder(EntryBB);
Builder.CreateRetVoid();
- MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
const TargetSubtargetInfo &STI = MF.getSubtarget();
@@ -1260,7 +1260,7 @@ bool MachineOutliner::outline(Module &M,
true /* isImp = true */));
}
if (MI.isCall())
- MI.getMF()->updateCallSiteInfo(&MI);
+ MI.getMF()->eraseCallSiteInfo(&MI);
};
// Copy over the defs in the outlined range.
// First inst in outlined range <-- Anything that's defined in this
@@ -1303,6 +1303,12 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
if (F.empty())
continue;
+ // Disable outlining from noreturn functions right now. Noreturn requires
+ // special handling for the case where what we are outlining could be a
+ // tail call.
+ if (F.hasFnAttribute(Attribute::NoReturn))
+ continue;
+
// There's something in F. Check if it has a MachineFunction associated with
// it.
MachineFunction *MF = MMI.getMachineFunction(F);
@@ -1421,7 +1427,7 @@ bool MachineOutliner::runOnModule(Module &M) {
if (M.empty())
return false;
- MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
// If the user passed -enable-machine-outliner=always or
// -enable-machine-outliner, the pass will run on all functions in the module.
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 54df522d371a..89c9f6093a97 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -56,6 +56,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePipeliner.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ModuloSchedule.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
@@ -153,6 +154,17 @@ static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
cl::init(false));
+static cl::opt<bool> EmitTestAnnotations(
+ "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
+ cl::desc("Instead of emitting the pipelined code, annotate instructions "
+ "with the generated schedule for feeding into the "
+ "-modulo-schedule-test pass"));
+
+static cl::opt<bool> ExperimentalCodeGen(
+ "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Use the experimental peeling code generator for software pipelining"));
+
namespace llvm {
// A command line option to enable the CopyToPhi DAG mutation.
@@ -314,7 +326,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
LI.LoopInductionVar = nullptr;
LI.LoopCompare = nullptr;
- if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) {
+ if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
LLVM_DEBUG(
dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
NumFailLoop++;
@@ -349,7 +361,7 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
// If the operand uses a subregister, replace it with a new register
// without subregisters, and generate a copy to the new register.
- unsigned NewReg = MRI.createVirtualRegister(RC);
+ Register NewReg = MRI.createVirtualRegister(RC);
MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
MachineBasicBlock::iterator At = PredB.getFirstTerminator();
const DebugLoc &DL = PredB.findDebugLoc(At);
@@ -515,14 +527,49 @@ void SwingSchedulerDAG::schedule() {
return;
}
- generatePipelinedLoop(Schedule);
+ // Generate the schedule as a ModuloSchedule.
+ DenseMap<MachineInstr *, int> Cycles, Stages;
+ std::vector<MachineInstr *> OrderedInsts;
+ for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
+ ++Cycle) {
+ for (SUnit *SU : Schedule.getInstructions(Cycle)) {
+ OrderedInsts.push_back(SU->getInstr());
+ Cycles[SU->getInstr()] = Cycle;
+ Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
+ }
+ }
+ DenseMap<MachineInstr *, std::pair<unsigned, int64_t>> NewInstrChanges;
+ for (auto &KV : NewMIs) {
+ Cycles[KV.first] = Cycles[KV.second];
+ Stages[KV.first] = Stages[KV.second];
+ NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
+ }
+
+ ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
+ std::move(Stages));
+ if (EmitTestAnnotations) {
+ assert(NewInstrChanges.empty() &&
+ "Cannot serialize a schedule with InstrChanges!");
+ ModuloScheduleTestAnnotater MSTI(MF, MS);
+ MSTI.annotate();
+ return;
+ }
+ // The experimental code generator can't work if there are InstChanges.
+ if (ExperimentalCodeGen && NewInstrChanges.empty()) {
+ PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
+ MSE.expand();
+ } else {
+ ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
+ MSE.expand();
+ MSE.cleanup();
+ }
++NumPipelined;
}
/// Clean up after the software pipeliner runs.
void SwingSchedulerDAG::finishBlock() {
- for (MachineInstr *I : NewMIs)
- MF.DeleteMachineInstr(I);
+ for (auto &KV : NewMIs)
+ MF.DeleteMachineInstr(KV.second);
NewMIs.clear();
// Call the superclass.
@@ -546,14 +593,6 @@ static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
}
-/// Return the Phi register value that comes from the incoming block.
-static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
- for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
- if (Phi.getOperand(i + 1).getMBB() != LoopBB)
- return Phi.getOperand(i).getReg();
- return 0;
-}
-
/// Return the Phi register value that comes the loop block.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
@@ -658,7 +697,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
if (BaseOp1->isIdenticalTo(*BaseOp2) &&
(int)Offset1 < (int)Offset2) {
- assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
+ assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
"What happened to the chain edge?");
SDep Dep(Load, SDep::Barrier);
Dep.setLatency(1);
@@ -730,7 +769,7 @@ void SwingSchedulerDAG::updatePhiDependences() {
MOI != MOE; ++MOI) {
if (!MOI->isReg())
continue;
- unsigned Reg = MOI->getReg();
+ Register Reg = MOI->getReg();
if (MOI->isDef()) {
// If the register is used by a Phi, then create an anti dependence.
for (MachineRegisterInfo::use_instr_iterator
@@ -809,7 +848,7 @@ void SwingSchedulerDAG::changeDependences() {
continue;
// Get the MI and SUnit for the instruction that defines the original base.
- unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
+ Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
if (!DefMI)
continue;
@@ -958,7 +997,7 @@ struct FuncUnitSorter {
unsigned F1 = 0, F2 = 0;
unsigned MFUs1 = minFuncUnits(IS1, F1);
unsigned MFUs2 = minFuncUnits(IS2, F2);
- if (MFUs1 == 1 && MFUs2 == 1)
+ if (MFUs1 == MFUs2)
return Resources.lookup(F1) < Resources.lookup(F2);
return MFUs1 > MFUs2;
}
@@ -1514,8 +1553,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
continue;
for (const MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isUse()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (Register::isVirtualRegister(Reg))
Uses.insert(Reg);
else if (MRI.isAllocatable(Reg))
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
@@ -1525,8 +1564,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
for (SUnit *SU : NS)
for (const MachineOperand &MO : SU->getInstr()->operands())
if (MO.isReg() && MO.isDef() && !MO.isDead()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isVirtualRegister(Reg)) {
if (!Uses.count(Reg))
LiveOutRegs.push_back(RegisterMaskPair(Reg,
LaneBitmask::getNone()));
@@ -2012,836 +2051,6 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
return scheduleFound && Schedule.getMaxStageCount() > 0;
}
-/// Given a schedule for the loop, generate a new version of the loop,
-/// and replace the old version. This function generates a prolog
-/// that contains the initial iterations in the pipeline, and kernel
-/// loop, and the epilogue that contains the code for the final
-/// iterations.
-void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
- // Create a new basic block for the kernel and add it to the CFG.
- MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
-
- unsigned MaxStageCount = Schedule.getMaxStageCount();
-
- // Remember the registers that are used in different stages. The index is
- // the iteration, or stage, that the instruction is scheduled in. This is
- // a map between register names in the original block and the names created
- // in each stage of the pipelined loop.
- ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
- InstrMapTy InstrMap;
-
- SmallVector<MachineBasicBlock *, 4> PrologBBs;
-
- MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
- assert(PreheaderBB != nullptr &&
- "Need to add code to handle loops w/o preheader");
- // Generate the prolog instructions that set up the pipeline.
- generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
- MF.insert(BB->getIterator(), KernelBB);
-
- // Rearrange the instructions to generate the new, pipelined loop,
- // and update register names as needed.
- for (int Cycle = Schedule.getFirstCycle(),
- LastCycle = Schedule.getFinalCycle();
- Cycle <= LastCycle; ++Cycle) {
- std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
- // This inner loop schedules each instruction in the cycle.
- for (SUnit *CI : CycleInstrs) {
- if (CI->getInstr()->isPHI())
- continue;
- unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
- MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
- updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
- KernelBB->push_back(NewMI);
- InstrMap[NewMI] = CI->getInstr();
- }
- }
-
- // Copy any terminator instructions to the new kernel, and update
- // names as needed.
- for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
- E = BB->instr_end();
- I != E; ++I) {
- MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
- updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
- KernelBB->push_back(NewMI);
- InstrMap[NewMI] = &*I;
- }
-
- KernelBB->transferSuccessors(BB);
- KernelBB->replaceSuccessor(BB, KernelBB);
-
- generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
- VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
- generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
- InstrMap, MaxStageCount, MaxStageCount, false);
-
- LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
-
- SmallVector<MachineBasicBlock *, 4> EpilogBBs;
- // Generate the epilog instructions to complete the pipeline.
- generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
- PrologBBs);
-
- // We need this step because the register allocation doesn't handle some
- // situations well, so we insert copies to help out.
- splitLifetimes(KernelBB, EpilogBBs, Schedule);
-
- // Remove dead instructions due to loop induction variables.
- removeDeadInstructions(KernelBB, EpilogBBs);
-
- // Add branches between prolog and epilog blocks.
- addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
-
- // Remove the original loop since it's no longer referenced.
- for (auto &I : *BB)
- LIS.RemoveMachineInstrFromMaps(I);
- BB->clear();
- BB->eraseFromParent();
-
- delete[] VRMap;
-}
-
-/// Generate the pipeline prolog code.
-void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB,
- ValueMapTy *VRMap,
- MBBVectorTy &PrologBBs) {
- MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
- assert(PreheaderBB != nullptr &&
- "Need to add code to handle loops w/o preheader");
- MachineBasicBlock *PredBB = PreheaderBB;
- InstrMapTy InstrMap;
-
- // Generate a basic block for each stage, not including the last stage,
- // which will be generated in the kernel. Each basic block may contain
- // instructions from multiple stages/iterations.
- for (unsigned i = 0; i < LastStage; ++i) {
- // Create and insert the prolog basic block prior to the original loop
- // basic block. The original loop is removed later.
- MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
- PrologBBs.push_back(NewBB);
- MF.insert(BB->getIterator(), NewBB);
- NewBB->transferSuccessors(PredBB);
- PredBB->addSuccessor(NewBB);
- PredBB = NewBB;
-
- // Generate instructions for each appropriate stage. Process instructions
- // in original program order.
- for (int StageNum = i; StageNum >= 0; --StageNum) {
- for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
- BBE = BB->getFirstTerminator();
- BBI != BBE; ++BBI) {
- if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
- if (BBI->isPHI())
- continue;
- MachineInstr *NewMI =
- cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
- updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
- VRMap);
- NewBB->push_back(NewMI);
- InstrMap[NewMI] = &*BBI;
- }
- }
- }
- rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
- LLVM_DEBUG({
- dbgs() << "prolog:\n";
- NewBB->dump();
- });
- }
-
- PredBB->replaceSuccessor(BB, KernelBB);
-
- // Check if we need to remove the branch from the preheader to the original
- // loop, and replace it with a branch to the new loop.
- unsigned numBranches = TII->removeBranch(*PreheaderBB);
- if (numBranches) {
- SmallVector<MachineOperand, 0> Cond;
- TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
- }
-}
-
-/// Generate the pipeline epilog code. The epilog code finishes the iterations
-/// that were started in either the prolog or the kernel. We create a basic
-/// block for each stage that needs to complete.
-void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB,
- ValueMapTy *VRMap,
- MBBVectorTy &EpilogBBs,
- MBBVectorTy &PrologBBs) {
- // We need to change the branch from the kernel to the first epilog block, so
- // this call to analyze branch uses the kernel rather than the original BB.
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
- SmallVector<MachineOperand, 4> Cond;
- bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
- assert(!checkBranch && "generateEpilog must be able to analyze the branch");
- if (checkBranch)
- return;
-
- MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
- if (*LoopExitI == KernelBB)
- ++LoopExitI;
- assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
- MachineBasicBlock *LoopExitBB = *LoopExitI;
-
- MachineBasicBlock *PredBB = KernelBB;
- MachineBasicBlock *EpilogStart = LoopExitBB;
- InstrMapTy InstrMap;
-
- // Generate a basic block for each stage, not including the last stage,
- // which was generated for the kernel. Each basic block may contain
- // instructions from multiple stages/iterations.
- int EpilogStage = LastStage + 1;
- for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
- MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
- EpilogBBs.push_back(NewBB);
- MF.insert(BB->getIterator(), NewBB);
-
- PredBB->replaceSuccessor(LoopExitBB, NewBB);
- NewBB->addSuccessor(LoopExitBB);
-
- if (EpilogStart == LoopExitBB)
- EpilogStart = NewBB;
-
- // Add instructions to the epilog depending on the current block.
- // Process instructions in original program order.
- for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
- for (auto &BBI : *BB) {
- if (BBI.isPHI())
- continue;
- MachineInstr *In = &BBI;
- if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
- // Instructions with memoperands in the epilog are updated with
- // conservative values.
- MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
- updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
- NewBB->push_back(NewMI);
- InstrMap[NewMI] = In;
- }
- }
- }
- generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
- VRMap, InstrMap, LastStage, EpilogStage, i == 1);
- generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
- InstrMap, LastStage, EpilogStage, i == 1);
- PredBB = NewBB;
-
- LLVM_DEBUG({
- dbgs() << "epilog:\n";
- NewBB->dump();
- });
- }
-
- // Fix any Phi nodes in the loop exit block.
- for (MachineInstr &MI : *LoopExitBB) {
- if (!MI.isPHI())
- break;
- for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
- MachineOperand &MO = MI.getOperand(i);
- if (MO.getMBB() == BB)
- MO.setMBB(PredBB);
- }
- }
-
- // Create a branch to the new epilog from the kernel.
- // Remove the original branch and add a new branch to the epilog.
- TII->removeBranch(*KernelBB);
- TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
- // Add a branch to the loop exit.
- if (EpilogBBs.size() > 0) {
- MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
- SmallVector<MachineOperand, 4> Cond1;
- TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
- }
-}
-
-/// Replace all uses of FromReg that appear outside the specified
-/// basic block with ToReg.
-static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
- MachineBasicBlock *MBB,
- MachineRegisterInfo &MRI,
- LiveIntervals &LIS) {
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
- E = MRI.use_end();
- I != E;) {
- MachineOperand &O = *I;
- ++I;
- if (O.getParent()->getParent() != MBB)
- O.setReg(ToReg);
- }
- if (!LIS.hasInterval(ToReg))
- LIS.createEmptyInterval(ToReg);
-}
-
-/// Return true if the register has a use that occurs outside the
-/// specified loop.
-static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
- MachineRegisterInfo &MRI) {
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
- E = MRI.use_end();
- I != E; ++I)
- if (I->getParent()->getParent() != BB)
- return true;
- return false;
-}
-
-/// Generate Phis for the specific block in the generated pipelined code.
-/// This function looks at the Phis from the original code to guide the
-/// creation of new Phis.
-void SwingSchedulerDAG::generateExistingPhis(
- MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
- MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
- bool IsLast) {
- // Compute the stage number for the initial value of the Phi, which
- // comes from the prolog. The prolog to use depends on to which kernel/
- // epilog that we're adding the Phi.
- unsigned PrologStage = 0;
- unsigned PrevStage = 0;
- bool InKernel = (LastStageNum == CurStageNum);
- if (InKernel) {
- PrologStage = LastStageNum - 1;
- PrevStage = CurStageNum;
- } else {
- PrologStage = LastStageNum - (CurStageNum - LastStageNum);
- PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
- }
-
- for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
- BBE = BB->getFirstNonPHI();
- BBI != BBE; ++BBI) {
- unsigned Def = BBI->getOperand(0).getReg();
-
- unsigned InitVal = 0;
- unsigned LoopVal = 0;
- getPhiRegs(*BBI, BB, InitVal, LoopVal);
-
- unsigned PhiOp1 = 0;
- // The Phi value from the loop body typically is defined in the loop, but
- // not always. So, we need to check if the value is defined in the loop.
- unsigned PhiOp2 = LoopVal;
- if (VRMap[LastStageNum].count(LoopVal))
- PhiOp2 = VRMap[LastStageNum][LoopVal];
-
- int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
- int LoopValStage =
- Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
- unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
- if (NumStages == 0) {
- // We don't need to generate a Phi anymore, but we need to rename any uses
- // of the Phi value.
- unsigned NewReg = VRMap[PrevStage][LoopVal];
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
- Def, InitVal, NewReg);
- if (VRMap[CurStageNum].count(LoopVal))
- VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
- }
- // Adjust the number of Phis needed depending on the number of prologs left,
- // and the distance from where the Phi is first scheduled. The number of
- // Phis cannot exceed the number of prolog stages. Each stage can
- // potentially define two values.
- unsigned MaxPhis = PrologStage + 2;
- if (!InKernel && (int)PrologStage <= LoopValStage)
- MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
- unsigned NumPhis = std::min(NumStages, MaxPhis);
-
- unsigned NewReg = 0;
- unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
- // In the epilog, we may need to look back one stage to get the correct
- // Phi name because the epilog and prolog blocks execute the same stage.
- // The correct name is from the previous block only when the Phi has
- // been completely scheduled prior to the epilog, and Phi value is not
- // needed in multiple stages.
- int StageDiff = 0;
- if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
- NumPhis == 1)
- StageDiff = 1;
- // Adjust the computations below when the phi and the loop definition
- // are scheduled in different stages.
- if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
- StageDiff = StageScheduled - LoopValStage;
- for (unsigned np = 0; np < NumPhis; ++np) {
- // If the Phi hasn't been scheduled, then use the initial Phi operand
- // value. Otherwise, use the scheduled version of the instruction. This
- // is a little complicated when a Phi references another Phi.
- if (np > PrologStage || StageScheduled >= (int)LastStageNum)
- PhiOp1 = InitVal;
- // Check if the Phi has already been scheduled in a prolog stage.
- else if (PrologStage >= AccessStage + StageDiff + np &&
- VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
- PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
- // Check if the Phi has already been scheduled, but the loop instruction
- // is either another Phi, or doesn't occur in the loop.
- else if (PrologStage >= AccessStage + StageDiff + np) {
- // If the Phi references another Phi, we need to examine the other
- // Phi to get the correct value.
- PhiOp1 = LoopVal;
- MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
- int Indirects = 1;
- while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
- int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
- if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
- PhiOp1 = getInitPhiReg(*InstOp1, BB);
- else
- PhiOp1 = getLoopPhiReg(*InstOp1, BB);
- InstOp1 = MRI.getVRegDef(PhiOp1);
- int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
- int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
- if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
- VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
- PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
- break;
- }
- ++Indirects;
- }
- } else
- PhiOp1 = InitVal;
- // If this references a generated Phi in the kernel, get the Phi operand
- // from the incoming block.
- if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
- if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
- PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
-
- MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
- bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
- // In the epilog, a map lookup is needed to get the value from the kernel,
- // or previous epilog block. How is does this depends on if the
- // instruction is scheduled in the previous block.
- if (!InKernel) {
- int StageDiffAdj = 0;
- if (LoopValStage != -1 && StageScheduled > LoopValStage)
- StageDiffAdj = StageScheduled - LoopValStage;
- // Use the loop value defined in the kernel, unless the kernel
- // contains the last definition of the Phi.
- if (np == 0 && PrevStage == LastStageNum &&
- (StageScheduled != 0 || LoopValStage != 0) &&
- VRMap[PrevStage - StageDiffAdj].count(LoopVal))
- PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
- // Use the value defined by the Phi. We add one because we switch
- // from looking at the loop value to the Phi definition.
- else if (np > 0 && PrevStage == LastStageNum &&
- VRMap[PrevStage - np + 1].count(Def))
- PhiOp2 = VRMap[PrevStage - np + 1][Def];
- // Use the loop value defined in the kernel.
- else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
- VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
- PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
- // Use the value defined by the Phi, unless we're generating the first
- // epilog and the Phi refers to a Phi in a different stage.
- else if (VRMap[PrevStage - np].count(Def) &&
- (!LoopDefIsPhi || (PrevStage != LastStageNum) || (LoopValStage == StageScheduled)))
- PhiOp2 = VRMap[PrevStage - np][Def];
- }
-
- // Check if we can reuse an existing Phi. This occurs when a Phi
- // references another Phi, and the other Phi is scheduled in an
- // earlier stage. We can try to reuse an existing Phi up until the last
- // stage of the current Phi.
- if (LoopDefIsPhi) {
- if (static_cast<int>(PrologStage - np) >= StageScheduled) {
- int LVNumStages = Schedule.getStagesForPhi(LoopVal);
- int StageDiff = (StageScheduled - LoopValStage);
- LVNumStages -= StageDiff;
- // Make sure the loop value Phi has been processed already.
- if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
- NewReg = PhiOp2;
- unsigned ReuseStage = CurStageNum;
- if (Schedule.isLoopCarried(this, *PhiInst))
- ReuseStage -= LVNumStages;
- // Check if the Phi to reuse has been generated yet. If not, then
- // there is nothing to reuse.
- if (VRMap[ReuseStage - np].count(LoopVal)) {
- NewReg = VRMap[ReuseStage - np][LoopVal];
-
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
- &*BBI, Def, NewReg);
- // Update the map with the new Phi name.
- VRMap[CurStageNum - np][Def] = NewReg;
- PhiOp2 = NewReg;
- if (VRMap[LastStageNum - np - 1].count(LoopVal))
- PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
-
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
- continue;
- }
- }
- }
- if (InKernel && StageDiff > 0 &&
- VRMap[CurStageNum - StageDiff - np].count(LoopVal))
- PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
- }
-
- const TargetRegisterClass *RC = MRI.getRegClass(Def);
- NewReg = MRI.createVirtualRegister(RC);
-
- MachineInstrBuilder NewPhi =
- BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
- TII->get(TargetOpcode::PHI), NewReg);
- NewPhi.addReg(PhiOp1).addMBB(BB1);
- NewPhi.addReg(PhiOp2).addMBB(BB2);
- if (np == 0)
- InstrMap[NewPhi] = &*BBI;
-
- // We define the Phis after creating the new pipelined code, so
- // we need to rename the Phi values in scheduled instructions.
-
- unsigned PrevReg = 0;
- if (InKernel && VRMap[PrevStage - np].count(LoopVal))
- PrevReg = VRMap[PrevStage - np][LoopVal];
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
- Def, NewReg, PrevReg);
- // If the Phi has been scheduled, use the new name for rewriting.
- if (VRMap[CurStageNum - np].count(Def)) {
- unsigned R = VRMap[CurStageNum - np][Def];
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
- R, NewReg);
- }
-
- // Check if we need to rename any uses that occurs after the loop. The
- // register to replace depends on whether the Phi is scheduled in the
- // epilog.
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
-
- // In the kernel, a dependent Phi uses the value from this Phi.
- if (InKernel)
- PhiOp2 = NewReg;
-
- // Update the map with the new Phi name.
- VRMap[CurStageNum - np][Def] = NewReg;
- }
-
- while (NumPhis++ < NumStages) {
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
- &*BBI, Def, NewReg, 0);
- }
-
- // Check if we need to rename a Phi that has been eliminated due to
- // scheduling.
- if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
- replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
- }
-}
-
-/// Generate Phis for the specified block in the generated pipelined code.
-/// These are new Phis needed because the definition is scheduled after the
-/// use in the pipelined sequence.
-void SwingSchedulerDAG::generatePhis(
- MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
- MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
- bool IsLast) {
- // Compute the stage number that contains the initial Phi value, and
- // the Phi from the previous stage.
- unsigned PrologStage = 0;
- unsigned PrevStage = 0;
- unsigned StageDiff = CurStageNum - LastStageNum;
- bool InKernel = (StageDiff == 0);
- if (InKernel) {
- PrologStage = LastStageNum - 1;
- PrevStage = CurStageNum;
- } else {
- PrologStage = LastStageNum - StageDiff;
- PrevStage = LastStageNum + StageDiff - 1;
- }
-
- for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
- BBE = BB->instr_end();
- BBI != BBE; ++BBI) {
- for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = BBI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() ||
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
-
- int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
- assert(StageScheduled != -1 && "Expecting scheduled instruction.");
- unsigned Def = MO.getReg();
- unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
- // An instruction scheduled in stage 0 and is used after the loop
- // requires a phi in the epilog for the last definition from either
- // the kernel or prolog.
- if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
- hasUseAfterLoop(Def, BB, MRI))
- NumPhis = 1;
- if (!InKernel && (unsigned)StageScheduled > PrologStage)
- continue;
-
- unsigned PhiOp2 = VRMap[PrevStage][Def];
- if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
- if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
- PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
- // The number of Phis can't exceed the number of prolog stages. The
- // prolog stage number is zero based.
- if (NumPhis > PrologStage + 1 - StageScheduled)
- NumPhis = PrologStage + 1 - StageScheduled;
- for (unsigned np = 0; np < NumPhis; ++np) {
- unsigned PhiOp1 = VRMap[PrologStage][Def];
- if (np <= PrologStage)
- PhiOp1 = VRMap[PrologStage - np][Def];
- if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
- if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
- PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
- if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
- PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
- }
- if (!InKernel)
- PhiOp2 = VRMap[PrevStage - np][Def];
-
- const TargetRegisterClass *RC = MRI.getRegClass(Def);
- unsigned NewReg = MRI.createVirtualRegister(RC);
-
- MachineInstrBuilder NewPhi =
- BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
- TII->get(TargetOpcode::PHI), NewReg);
- NewPhi.addReg(PhiOp1).addMBB(BB1);
- NewPhi.addReg(PhiOp2).addMBB(BB2);
- if (np == 0)
- InstrMap[NewPhi] = &*BBI;
-
- // Rewrite uses and update the map. The actions depend upon whether
- // we generating code for the kernel or epilog blocks.
- if (InKernel) {
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
- &*BBI, PhiOp1, NewReg);
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
- &*BBI, PhiOp2, NewReg);
-
- PhiOp2 = NewReg;
- VRMap[PrevStage - np - 1][Def] = NewReg;
- } else {
- VRMap[CurStageNum - np][Def] = NewReg;
- if (np == NumPhis - 1)
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
- &*BBI, Def, NewReg);
- }
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
- }
- }
- }
-}
-
-/// Remove instructions that generate values with no uses.
-/// Typically, these are induction variable operations that generate values
-/// used in the loop itself. A dead instruction has a definition with
-/// no uses, or uses that occur in the original loop only.
-void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs) {
- // For each epilog block, check that the value defined by each instruction
- // is used. If not, delete it.
- for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
- MBE = EpilogBBs.rend();
- MBB != MBE; ++MBB)
- for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
- ME = (*MBB)->instr_rend();
- MI != ME;) {
- // From DeadMachineInstructionElem. Don't delete inline assembly.
- if (MI->isInlineAsm()) {
- ++MI;
- continue;
- }
- bool SawStore = false;
- // Check if it's safe to remove the instruction due to side effects.
- // We can, and want to, remove Phis here.
- if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
- ++MI;
- continue;
- }
- bool used = true;
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- if (!MOI->isReg() || !MOI->isDef())
- continue;
- unsigned reg = MOI->getReg();
- // Assume physical registers are used, unless they are marked dead.
- if (TargetRegisterInfo::isPhysicalRegister(reg)) {
- used = !MOI->isDead();
- if (used)
- break;
- continue;
- }
- unsigned realUses = 0;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
- EI = MRI.use_end();
- UI != EI; ++UI) {
- // Check if there are any uses that occur only in the original
- // loop. If so, that's not a real use.
- if (UI->getParent()->getParent() != BB) {
- realUses++;
- used = true;
- break;
- }
- }
- if (realUses > 0)
- break;
- used = false;
- }
- if (!used) {
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI++->eraseFromParent();
- continue;
- }
- ++MI;
- }
- // In the kernel block, check if we can remove a Phi that generates a value
- // used in an instruction removed in the epilog block.
- for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
- BBE = KernelBB->getFirstNonPHI();
- BBI != BBE;) {
- MachineInstr *MI = &*BBI;
- ++BBI;
- unsigned reg = MI->getOperand(0).getReg();
- if (MRI.use_begin(reg) == MRI.use_end()) {
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
- }
- }
-}
-
-/// For loop carried definitions, we split the lifetime of a virtual register
-/// that has uses past the definition in the next iteration. A copy with a new
-/// virtual register is inserted before the definition, which helps with
-/// generating a better register assignment.
-///
-/// v1 = phi(a, v2) v1 = phi(a, v2)
-/// v2 = phi(b, v3) v2 = phi(b, v3)
-/// v3 = .. v4 = copy v1
-/// .. = V1 v3 = ..
-/// .. = v4
-void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs,
- SMSchedule &Schedule) {
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- for (auto &PHI : KernelBB->phis()) {
- unsigned Def = PHI.getOperand(0).getReg();
- // Check for any Phi definition that used as an operand of another Phi
- // in the same block.
- for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
- E = MRI.use_instr_end();
- I != E; ++I) {
- if (I->isPHI() && I->getParent() == KernelBB) {
- // Get the loop carried definition.
- unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
- if (!LCDef)
- continue;
- MachineInstr *MI = MRI.getVRegDef(LCDef);
- if (!MI || MI->getParent() != KernelBB || MI->isPHI())
- continue;
- // Search through the rest of the block looking for uses of the Phi
- // definition. If one occurs, then split the lifetime.
- unsigned SplitReg = 0;
- for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
- KernelBB->instr_end()))
- if (BBJ.readsRegister(Def)) {
- // We split the lifetime when we find the first use.
- if (SplitReg == 0) {
- SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
- BuildMI(*KernelBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), SplitReg)
- .addReg(Def);
- }
- BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
- }
- if (!SplitReg)
- continue;
- // Search through each of the epilog blocks for any uses to be renamed.
- for (auto &Epilog : EpilogBBs)
- for (auto &I : *Epilog)
- if (I.readsRegister(Def))
- I.substituteRegister(Def, SplitReg, 0, *TRI);
- break;
- }
- }
- }
-}
-
-/// Remove the incoming block from the Phis in a basic block.
-static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
- for (MachineInstr &MI : *BB) {
- if (!MI.isPHI())
- break;
- for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
- if (MI.getOperand(i + 1).getMBB() == Incoming) {
- MI.RemoveOperand(i + 1);
- MI.RemoveOperand(i);
- break;
- }
- }
-}
-
-/// Create branches from each prolog basic block to the appropriate epilog
-/// block. These edges are needed if the loop ends before reaching the
-/// kernel.
-void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB,
- MBBVectorTy &PrologBBs,
- MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs,
- SMSchedule &Schedule, ValueMapTy *VRMap) {
- assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
- MachineInstr *IndVar = Pass.LI.LoopInductionVar;
- MachineInstr *Cmp = Pass.LI.LoopCompare;
- MachineBasicBlock *LastPro = KernelBB;
- MachineBasicBlock *LastEpi = KernelBB;
-
- // Start from the blocks connected to the kernel and work "out"
- // to the first prolog and the last epilog blocks.
- SmallVector<MachineInstr *, 4> PrevInsts;
- unsigned MaxIter = PrologBBs.size() - 1;
- unsigned LC = UINT_MAX;
- unsigned LCMin = UINT_MAX;
- for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
- // Add branches to the prolog that go to the corresponding
- // epilog, and the fall-thru prolog/kernel block.
- MachineBasicBlock *Prolog = PrologBBs[j];
- MachineBasicBlock *Epilog = EpilogBBs[i];
- // We've executed one iteration, so decrement the loop count and check for
- // the loop end.
- SmallVector<MachineOperand, 4> Cond;
- // Check if the LOOP0 has already been removed. If so, then there is no need
- // to reduce the trip count.
- if (LC != 0)
- LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond,
- PrevInsts, j, MaxIter);
-
- // Record the value of the first trip count, which is used to determine if
- // branches and blocks can be removed for constant trip counts.
- if (LCMin == UINT_MAX)
- LCMin = LC;
-
- unsigned numAdded = 0;
- if (TargetRegisterInfo::isVirtualRegister(LC)) {
- Prolog->addSuccessor(Epilog);
- numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
- } else if (j >= LCMin) {
- Prolog->addSuccessor(Epilog);
- Prolog->removeSuccessor(LastPro);
- LastEpi->removeSuccessor(Epilog);
- numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
- removePhis(Epilog, LastEpi);
- // Remove the blocks that are no longer referenced.
- if (LastPro != LastEpi) {
- LastEpi->clear();
- LastEpi->eraseFromParent();
- }
- LastPro->clear();
- LastPro->eraseFromParent();
- } else {
- numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
- removePhis(Epilog, Prolog);
- }
- LastPro = Prolog;
- LastEpi = Epilog;
- for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
- E = Prolog->instr_rend();
- I != E && numAdded > 0; ++I, --numAdded)
- updateInstruction(&*I, false, j, 0, Schedule, VRMap);
- }
-}
-
/// Return true if we can compute the amount the instruction changes
/// during each iteration. Set Delta to the amount of the change.
bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
@@ -2854,7 +2063,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
if (!BaseOp->isReg())
return false;
- unsigned BaseReg = BaseOp->getReg();
+ Register BaseReg = BaseOp->getReg();
MachineRegisterInfo &MRI = MF.getRegInfo();
// Check if there is a Phi. If so, get the definition in the loop.
@@ -2874,261 +2083,6 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
return true;
}
-/// Update the memory operand with a new offset when the pipeliner
-/// generates a new copy of the instruction that refers to a
-/// different memory location.
-void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
- MachineInstr &OldMI, unsigned Num) {
- if (Num == 0)
- return;
- // If the instruction has memory operands, then adjust the offset
- // when the instruction appears in different stages.
- if (NewMI.memoperands_empty())
- return;
- SmallVector<MachineMemOperand *, 2> NewMMOs;
- for (MachineMemOperand *MMO : NewMI.memoperands()) {
- // TODO: Figure out whether isAtomic is really necessary (see D57601).
- if (MMO->isVolatile() || MMO->isAtomic() ||
- (MMO->isInvariant() && MMO->isDereferenceable()) ||
- (!MMO->getValue())) {
- NewMMOs.push_back(MMO);
- continue;
- }
- unsigned Delta;
- if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
- int64_t AdjOffset = Delta * Num;
- NewMMOs.push_back(
- MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
- } else {
- NewMMOs.push_back(
- MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
- }
- }
- NewMI.setMemRefs(MF, NewMMOs);
-}
-
-/// Clone the instruction for the new pipelined loop and update the
-/// memory operands, if needed.
-MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
- unsigned CurStageNum,
- unsigned InstStageNum) {
- MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
- // Check for tied operands in inline asm instructions. This should be handled
- // elsewhere, but I'm not sure of the best solution.
- if (OldMI->isInlineAsm())
- for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
- const auto &MO = OldMI->getOperand(i);
- if (MO.isReg() && MO.isUse())
- break;
- unsigned UseIdx;
- if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
- NewMI->tieOperands(i, UseIdx);
- }
- updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
- return NewMI;
-}
-
-/// Clone the instruction for the new pipelined loop. If needed, this
-/// function updates the instruction using the values saved in the
-/// InstrChanges structure.
-MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
- unsigned CurStageNum,
- unsigned InstStageNum,
- SMSchedule &Schedule) {
- MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
- DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
- InstrChanges.find(getSUnit(OldMI));
- if (It != InstrChanges.end()) {
- std::pair<unsigned, int64_t> RegAndOffset = It->second;
- unsigned BasePos, OffsetPos;
- if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
- return nullptr;
- int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
- MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
- if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
- NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
- NewMI->getOperand(OffsetPos).setImm(NewOffset);
- }
- updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
- return NewMI;
-}
-
-/// Update the machine instruction with new virtual registers. This
-/// function may change the defintions and/or uses.
-void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
- unsigned CurStageNum,
- unsigned InstrStageNum,
- SMSchedule &Schedule,
- ValueMapTy *VRMap) {
- for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = NewMI->getOperand(i);
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
- unsigned reg = MO.getReg();
- if (MO.isDef()) {
- // Create a new virtual register for the definition.
- const TargetRegisterClass *RC = MRI.getRegClass(reg);
- unsigned NewReg = MRI.createVirtualRegister(RC);
- MO.setReg(NewReg);
- VRMap[CurStageNum][reg] = NewReg;
- if (LastDef)
- replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
- } else if (MO.isUse()) {
- MachineInstr *Def = MRI.getVRegDef(reg);
- // Compute the stage that contains the last definition for instruction.
- int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
- unsigned StageNum = CurStageNum;
- if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
- // Compute the difference in stages between the defintion and the use.
- unsigned StageDiff = (InstrStageNum - DefStageNum);
- // Make an adjustment to get the last definition.
- StageNum -= StageDiff;
- }
- if (VRMap[StageNum].count(reg))
- MO.setReg(VRMap[StageNum][reg]);
- }
- }
-}
-
-/// Return the instruction in the loop that defines the register.
-/// If the definition is a Phi, then follow the Phi operand to
-/// the instruction in the loop.
-MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
- SmallPtrSet<MachineInstr *, 8> Visited;
- MachineInstr *Def = MRI.getVRegDef(Reg);
- while (Def->isPHI()) {
- if (!Visited.insert(Def).second)
- break;
- for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
- if (Def->getOperand(i + 1).getMBB() == BB) {
- Def = MRI.getVRegDef(Def->getOperand(i).getReg());
- break;
- }
- }
- return Def;
-}
-
-/// Return the new name for the value from the previous stage.
-unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
- unsigned LoopVal, unsigned LoopStage,
- ValueMapTy *VRMap,
- MachineBasicBlock *BB) {
- unsigned PrevVal = 0;
- if (StageNum > PhiStage) {
- MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
- if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
- // The name is defined in the previous stage.
- PrevVal = VRMap[StageNum - 1][LoopVal];
- else if (VRMap[StageNum].count(LoopVal))
- // The previous name is defined in the current stage when the instruction
- // order is swapped.
- PrevVal = VRMap[StageNum][LoopVal];
- else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
- // The loop value hasn't yet been scheduled.
- PrevVal = LoopVal;
- else if (StageNum == PhiStage + 1)
- // The loop value is another phi, which has not been scheduled.
- PrevVal = getInitPhiReg(*LoopInst, BB);
- else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
- // The loop value is another phi, which has been scheduled.
- PrevVal =
- getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
- LoopStage, VRMap, BB);
- }
- return PrevVal;
-}
-
-/// Rewrite the Phi values in the specified block to use the mappings
-/// from the initial operand. Once the Phi is scheduled, we switch
-/// to using the loop value instead of the Phi value, so those names
-/// do not need to be rewritten.
-void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
- unsigned StageNum,
- SMSchedule &Schedule,
- ValueMapTy *VRMap,
- InstrMapTy &InstrMap) {
- for (auto &PHI : BB->phis()) {
- unsigned InitVal = 0;
- unsigned LoopVal = 0;
- getPhiRegs(PHI, BB, InitVal, LoopVal);
- unsigned PhiDef = PHI.getOperand(0).getReg();
-
- unsigned PhiStage =
- (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
- unsigned LoopStage =
- (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
- unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
- if (NumPhis > StageNum)
- NumPhis = StageNum;
- for (unsigned np = 0; np <= NumPhis; ++np) {
- unsigned NewVal =
- getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
- if (!NewVal)
- NewVal = InitVal;
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
- PhiDef, NewVal);
- }
- }
-}
-
-/// Rewrite a previously scheduled instruction to use the register value
-/// from the new instruction. Make sure the instruction occurs in the
-/// basic block, and we don't change the uses in the new instruction.
-void SwingSchedulerDAG::rewriteScheduledInstr(
- MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
- unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
- unsigned NewReg, unsigned PrevReg) {
- bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
- int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
- // Rewrite uses that have been scheduled already to use the new
- // Phi register.
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
- EI = MRI.use_end();
- UI != EI;) {
- MachineOperand &UseOp = *UI;
- MachineInstr *UseMI = UseOp.getParent();
- ++UI;
- if (UseMI->getParent() != BB)
- continue;
- if (UseMI->isPHI()) {
- if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
- continue;
- if (getLoopPhiReg(*UseMI, BB) != OldReg)
- continue;
- }
- InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
- assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
- SUnit *OrigMISU = getSUnit(OrigInstr->second);
- int StageSched = Schedule.stageScheduled(OrigMISU);
- int CycleSched = Schedule.cycleScheduled(OrigMISU);
- unsigned ReplaceReg = 0;
- // This is the stage for the scheduled instruction.
- if (StagePhi == StageSched && Phi->isPHI()) {
- int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
- if (PrevReg && InProlog)
- ReplaceReg = PrevReg;
- else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
- (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
- ReplaceReg = PrevReg;
- else
- ReplaceReg = NewReg;
- }
- // The scheduled instruction occurs before the scheduled Phi, and the
- // Phi is not loop carried.
- if (!InProlog && StagePhi + 1 == StageSched &&
- !Schedule.isLoopCarried(this, *Phi))
- ReplaceReg = NewReg;
- if (StagePhi > StageSched && Phi->isPHI())
- ReplaceReg = NewReg;
- if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
- ReplaceReg = NewReg;
- if (ReplaceReg) {
- MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
- UseOp.setReg(ReplaceReg);
- }
- }
-}
-
/// Check if we can change the instruction to use an offset value from the
/// previous iteration. If so, return true and set the base and offset values
/// so that we can rewrite the load, if necessary.
@@ -3147,7 +2101,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
unsigned BasePosLd, OffsetPosLd;
if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
return false;
- unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
+ Register BaseReg = MI->getOperand(BasePosLd).getReg();
// Look for the Phi instruction.
MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
@@ -3202,7 +2156,7 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
unsigned BasePos, OffsetPos;
if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
return;
- unsigned BaseReg = MI->getOperand(BasePos).getReg();
+ Register BaseReg = MI->getOperand(BasePos).getReg();
MachineInstr *LoopDef = findDefInLoop(BaseReg);
int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
@@ -3221,11 +2175,29 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
NewMI->getOperand(OffsetPos).setImm(NewOffset);
SU->setInstr(NewMI);
MISUnitMap[NewMI] = SU;
- NewMIs.insert(NewMI);
+ NewMIs[MI] = NewMI;
}
}
}
+/// Return the instruction in the loop that defines the register.
+/// If the definition is a Phi, then follow the Phi operand to
+/// the instruction in the loop.
+MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
+ SmallPtrSet<MachineInstr *, 8> Visited;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ while (Def->isPHI()) {
+ if (!Visited.insert(Def).second)
+ break;
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
+ if (Def->getOperand(i + 1).getMBB() == BB) {
+ Def = MRI.getVRegDef(Def->getOperand(i).getReg());
+ break;
+ }
+ }
+ return Def;
+}
+
/// Return true for an order or output dependence that is loop carried
/// potentially. A dependence is loop carried if the destination defines a valu
/// that may be used or defined by the source in a subsequent iteration.
@@ -3499,10 +2471,10 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
++I, ++Pos) {
for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
unsigned BasePos, OffsetPos;
if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
if (MI->getOperand(BasePos).getReg() == Reg)
@@ -3676,7 +2648,7 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
assert(StageDef != -1 && "Instruction should have been scheduled.");
for (auto &SI : SU.Succs)
if (SI.isAssignedRegDep())
- if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
+ if (Register::isPhysicalRegister(SI.getReg()))
if (stageScheduled(SI.getSUnit()) != StageDef)
return false;
}
@@ -3810,7 +2782,7 @@ void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
NewMI->getOperand(OffsetPos).setImm(NewOffset);
SU->setInstr(NewMI);
MISUnitMap[NewMI] = SU;
- NewMIs.insert(NewMI);
+ NewMIs[MI] = NewMI;
}
}
OverlapReg = 0;
@@ -3847,40 +2819,6 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
ScheduledInstrs[cycle].push_front(*I);
}
}
- // Iterate over the definitions in each instruction, and compute the
- // stage difference for each use. Keep the maximum value.
- for (auto &I : InstrToCycle) {
- int DefStage = stageScheduled(I.first);
- MachineInstr *MI = I.first->getInstr();
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- MachineOperand &Op = MI->getOperand(i);
- if (!Op.isReg() || !Op.isDef())
- continue;
-
- unsigned Reg = Op.getReg();
- unsigned MaxDiff = 0;
- bool PhiIsSwapped = false;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
- EI = MRI.use_end();
- UI != EI; ++UI) {
- MachineOperand &UseOp = *UI;
- MachineInstr *UseMI = UseOp.getParent();
- SUnit *SUnitUse = SSD->getSUnit(UseMI);
- int UseStage = stageScheduled(SUnitUse);
- unsigned Diff = 0;
- if (UseStage != -1 && UseStage >= DefStage)
- Diff = UseStage - DefStage;
- if (MI->isPHI()) {
- if (isLoopCarried(SSD, *MI))
- ++Diff;
- else
- PhiIsSwapped = true;
- }
- MaxDiff = std::max(Diff, MaxDiff);
- }
- RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
- }
- }
// Erase all the elements in the later stages. Only one iteration should
// remain in the scheduled list, and it contains all the instructions.
@@ -4085,4 +3023,3 @@ void ResourceManager::clearResources() {
return DFAResources->clearResources();
std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
}
-
diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp
index 7f220ed1fd8f..f4daff667e86 100644
--- a/lib/CodeGen/MachinePostDominators.cpp
+++ b/lib/CodeGen/MachinePostDominators.cpp
@@ -17,7 +17,9 @@ using namespace llvm;
namespace llvm {
template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTreeBase
-}
+
+extern bool VerifyMachineDomInfo;
+} // namespace llvm
char MachinePostDominatorTree::ID = 0;
@@ -25,33 +27,52 @@ char MachinePostDominatorTree::ID = 0;
INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
"MachinePostDominator Tree Construction", true, true)
-MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) {
+MachinePostDominatorTree::MachinePostDominatorTree()
+ : MachineFunctionPass(ID), PDT(nullptr) {
initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
- DT = new PostDomTreeBase<MachineBasicBlock>();
}
-FunctionPass *
-MachinePostDominatorTree::createMachinePostDominatorTreePass() {
+FunctionPass *MachinePostDominatorTree::createMachinePostDominatorTreePass() {
return new MachinePostDominatorTree();
}
-bool
-MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
- DT->recalculate(F);
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ PDT = std::make_unique<PostDomTreeT>();
+ PDT->recalculate(F);
return false;
}
-MachinePostDominatorTree::~MachinePostDominatorTree() {
- delete DT;
-}
-
-void
-MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+void MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
-void
-MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const {
- DT->print(OS);
+MachineBasicBlock *MachinePostDominatorTree::findNearestCommonDominator(
+ ArrayRef<MachineBasicBlock *> Blocks) const {
+ assert(!Blocks.empty());
+
+ MachineBasicBlock *NCD = Blocks.front();
+ for (MachineBasicBlock *BB : Blocks.drop_front()) {
+ NCD = PDT->findNearestCommonDominator(NCD, BB);
+
+ // Stop when the root is reached.
+ if (PDT->isVirtualRoot(PDT->getNode(NCD)))
+ return nullptr;
+ }
+
+ return NCD;
+}
+
+void MachinePostDominatorTree::verifyAnalysis() const {
+ if (PDT && VerifyMachineDomInfo)
+ if (!PDT->verify(PostDomTreeT::VerificationLevel::Basic)) {
+ errs() << "MachinePostDominatorTree verification failed\n";
+
+ abort();
+ }
+}
+
+void MachinePostDominatorTree::print(llvm::raw_ostream &OS,
+ const Module *M) const {
+ PDT->print(OS);
}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index f0fd0405d69d..b88d4ea462ef 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -144,7 +144,7 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
}
unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+ unsigned Reg = Register::index2VirtReg(getNumVirtRegs());
VRegInfo.grow(Reg);
RegAllocHints.grow(Reg);
insertVRegByName(Name, Reg);
@@ -202,7 +202,7 @@ void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); }
void MachineRegisterInfo::clearVirtRegs() {
#ifndef NDEBUG
for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (!VRegInfo[Reg].second)
continue;
verifyUseList(Reg);
@@ -255,7 +255,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
void MachineRegisterInfo::verifyUseLists() const {
#ifndef NDEBUG
for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
- verifyUseList(TargetRegisterInfo::index2VirtReg(i));
+ verifyUseList(Register::index2VirtReg(i));
for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i)
verifyUseList(i);
#endif
@@ -386,7 +386,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
MachineOperand &O = *I;
++I;
- if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ if (Register::isPhysicalRegister(ToReg)) {
O.substPhysReg(ToReg, *TRI);
} else {
O.setReg(ToReg);
@@ -498,7 +498,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
// Lane masks are only defined for vregs.
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
const TargetRegisterClass &TRC = *getRegClass(Reg);
return TRC.getLaneMask();
}
@@ -517,7 +517,7 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
}
bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ assert(Register::isPhysicalRegister(PhysReg));
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
if (TRI->isConstantPhysReg(PhysReg))
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index e8b42047b49f..258a5f9e0482 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -95,7 +95,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
while (I != BB->end() && I->isPHI()) {
bool Same = true;
for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
- unsigned SrcReg = I->getOperand(i).getReg();
+ Register SrcReg = I->getOperand(i).getReg();
MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
if (AVals[SrcBB] != SrcReg) {
Same = false;
@@ -118,7 +118,7 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode,
const TargetRegisterClass *RC,
MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
}
@@ -292,7 +292,7 @@ public:
MachineSSAUpdater *Updater) {
// Insert an implicit_def to represent an undef value.
MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
- BB, BB->getFirstTerminator(),
+ BB, BB->getFirstNonPHI(),
Updater->VRC, Updater->MRI,
Updater->TII);
return NewDef->getOperand(0).getReg();
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index ae1170ad1be6..f0721ea3b76d 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -82,6 +82,10 @@ cl::opt<bool>
DumpCriticalPathLength("misched-dcpl", cl::Hidden,
cl::desc("Print critical path length to stdout"));
+cl::opt<bool> VerifyScheduling(
+ "verify-misched", cl::Hidden,
+ cl::desc("Verify machine instrs before and after machine scheduling"));
+
} // end namespace llvm
#ifndef NDEBUG
@@ -122,9 +126,6 @@ static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
cl::desc("Enable memop clustering."),
cl::init(true));
-static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
- cl::desc("Verify machine instrs before and after machine scheduling"));
-
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
@@ -198,6 +199,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID;
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
"Machine Instruction Scheduler", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
@@ -210,7 +212,7 @@ MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequiredID(MachineDominatorsID);
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
@@ -234,7 +236,7 @@ PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequiredID(MachineDominatorsID);
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -933,8 +935,8 @@ void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
if (TrackLaneMasks && !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
// Ignore re-defs.
@@ -985,7 +987,7 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
"ShouldTrackLaneMasks requires ShouldTrackPressure");
}
-// Setup the register pressure trackers for the top scheduled top and bottom
+// Setup the register pressure trackers for the top scheduled and bottom
// scheduled regions.
void ScheduleDAGMILive::initRegPressure() {
VRegUses.clear();
@@ -1095,7 +1097,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
for (const RegisterMaskPair &P : LiveUses) {
unsigned Reg = P.RegUnit;
/// FIXME: Currently assuming single-use physregs.
- if (!TRI->isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
continue;
if (ShouldTrackLaneMasks) {
@@ -1319,8 +1321,8 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
// Visit each live out vreg def to find def/use pairs that cross iterations.
for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
unsigned Reg = P.RegUnit;
- if (!TRI->isVirtualRegister(Reg))
- continue;
+ if (!Register::isVirtualRegister(Reg))
+ continue;
const LiveInterval &LI = LIS->getInterval(Reg);
const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
if (!DefVNI)
@@ -1538,14 +1540,14 @@ namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
+ return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
: nullptr;
}
std::unique_ptr<ScheduleDAGMutation>
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
+ return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
: nullptr;
}
@@ -1657,7 +1659,7 @@ namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return llvm::make_unique<CopyConstrain>(TII, TRI);
+ return std::make_unique<CopyConstrain>(TII, TRI);
}
} // end namespace llvm
@@ -1687,13 +1689,13 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
// Check for pure vreg copies.
const MachineOperand &SrcOp = Copy->getOperand(1);
- unsigned SrcReg = SrcOp.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
+ Register SrcReg = SrcOp.getReg();
+ if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
return;
const MachineOperand &DstOp = Copy->getOperand(0);
- unsigned DstReg = DstOp.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
+ Register DstReg = DstOp.getReg();
+ if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
return;
// Check if either the dest or source is local. If it's live across a back
@@ -2914,14 +2916,12 @@ int biasPhysReg(const SUnit *SU, bool isTop) {
unsigned UnscheduledOper = isTop ? 0 : 1;
// If we have already scheduled the physreg produce/consumer, immediately
// schedule the copy.
- if (TargetRegisterInfo::isPhysicalRegister(
- MI->getOperand(ScheduledOper).getReg()))
+ if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
return 1;
// If the physreg is at the boundary, defer it. Otherwise schedule it
// immediately to free the dependent. We can hoist the copy later.
bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
- if (TargetRegisterInfo::isPhysicalRegister(
- MI->getOperand(UnscheduledOper).getReg()))
+ if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
return AtBoundary ? -1 : 1;
}
@@ -2931,7 +2931,7 @@ int biasPhysReg(const SUnit *SU, bool isTop) {
// physical registers.
bool DoBias = true;
for (const MachineOperand &Op : MI->defs()) {
- if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
+ if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
DoBias = false;
break;
}
@@ -3259,7 +3259,8 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
// Find already scheduled copies with a single physreg dependence and move
// them just above the scheduled instruction.
for (SDep &Dep : Deps) {
- if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
+ if (Dep.getKind() != SDep::Data ||
+ !Register::isPhysicalRegister(Dep.getReg()))
continue;
SUnit *DepSU = Dep.getSUnit();
if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
@@ -3298,7 +3299,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
/// default scheduler if the target does not set a default.
ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
- new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
+ new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
// Register DAG post-processors.
//
// FIXME: extend the mutation API to allow earlier mutations to instantiate
@@ -3450,7 +3451,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
}
ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
- return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
+ return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
/*RemoveKillFlags=*/true);
}
@@ -3561,10 +3562,10 @@ public:
} // end anonymous namespace
static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
+ return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
}
static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
+ return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
}
static MachineSchedRegistry ILPMaxRegistry(
@@ -3658,7 +3659,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
assert((TopDown || !ForceTopDown) &&
"-misched-topdown incompatible with -misched-bottomup");
return new ScheduleDAGMILive(
- C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
+ C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
}
static MachineSchedRegistry ShufflerRegistry(
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 41db2c88ce50..27a2e7023f22 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -36,8 +36,9 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
@@ -114,15 +115,12 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addPreserved<MachinePostDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
if (UseBlockFreqInfo)
AU.addRequired<MachineBlockFrequencyInfo>();
@@ -195,11 +193,10 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
if (!MI.isCopy())
return false;
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- !TargetRegisterInfo::isVirtualRegister(DstReg) ||
- !MRI->hasOneNonDBGUse(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!Register::isVirtualRegister(SrcReg) ||
+ !Register::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg))
return false;
const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
@@ -233,8 +230,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
MachineBasicBlock *DefMBB,
bool &BreakPHIEdge,
bool &LocalUse) const {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
- "Only makes sense for vregs");
+ assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs");
// Ignore debug uses because debug info doesn't affect the code.
if (MRI->use_nodbg_empty(Reg))
@@ -416,13 +412,13 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0)
continue;
// We don't move live definitions of physical registers,
// so sinking their uses won't enable any opportunities.
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
// If this instruction is the only user of a virtual register,
@@ -615,10 +611,10 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
@@ -817,8 +813,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0 || !Register::isPhysicalRegister(Reg))
+ continue;
if (SuccToSinkTo->isLiveIn(Reg))
return false;
}
@@ -958,8 +955,9 @@ private:
/// Track which register units have been modified and used.
LiveRegUnits ModifiedRegUnits, UsedRegUnits;
- /// Track DBG_VALUEs of (unmodified) register units.
- DenseMap<unsigned, TinyPtrVector<MachineInstr*>> SeenDbgInstrs;
+ /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
+ /// entry in this map for each unit it touches.
+ DenseMap<unsigned, TinyPtrVector<MachineInstr *>> SeenDbgInstrs;
/// Sink Copy instructions unused in the same block close to their uses in
/// successors.
@@ -1030,7 +1028,7 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
const TargetRegisterInfo *TRI) {
for (auto U : UsedOpsInCopy) {
MachineOperand &MO = MI->getOperand(U);
- unsigned SrcReg = MO.getReg();
+ Register SrcReg = MO.getReg();
if (!UsedRegUnits.available(SrcReg)) {
MachineBasicBlock::iterator NI = std::next(MI->getIterator());
for (MachineInstr &UI : make_range(NI, CurBB.end())) {
@@ -1053,7 +1051,7 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
SuccBB->removeLiveIn(*S);
for (auto U : UsedOpsInCopy) {
- unsigned Reg = MI->getOperand(U).getReg();
+ Register Reg = MI->getOperand(U).getReg();
if (!SuccBB->isLiveIn(Reg))
SuccBB->addLiveIn(Reg);
}
@@ -1069,7 +1067,7 @@ static bool hasRegisterDependency(MachineInstr *MI,
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isDef()) {
@@ -1094,6 +1092,14 @@ static bool hasRegisterDependency(MachineInstr *MI,
return HasRegDependency;
}
+static SmallSet<unsigned, 4> getRegUnits(unsigned Reg,
+ const TargetRegisterInfo *TRI) {
+ SmallSet<unsigned, 4> RegUnits;
+ for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI)
+ RegUnits.insert(*RI);
+ return RegUnits;
+}
+
bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
MachineFunction &MF,
const TargetRegisterInfo *TRI,
@@ -1130,15 +1136,17 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// for DBG_VALUEs later, record them when they're encountered.
if (MI->isDebugValue()) {
auto &MO = MI->getOperand(0);
- if (MO.isReg() && TRI->isPhysicalRegister(MO.getReg())) {
+ if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
// Bail if we can already tell the sink would be rejected, rather
// than needlessly accumulating lots of DBG_VALUEs.
if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
ModifiedRegUnits, UsedRegUnits))
continue;
- // Record debug use of this register.
- SeenDbgInstrs[MO.getReg()].push_back(MI);
+ // Record debug use of each reg unit.
+ SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI);
+ for (unsigned Reg : Units)
+ SeenDbgInstrs[Reg].push_back(MI);
}
continue;
}
@@ -1177,15 +1185,22 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
"Unexpected predecessor");
- // Collect DBG_VALUEs that must sink with this copy.
+ // Collect DBG_VALUEs that must sink with this copy. We've previously
+ // recorded which reg units that DBG_VALUEs read, if this instruction
+ // writes any of those units then the corresponding DBG_VALUEs must sink.
+ SetVector<MachineInstr *> DbgValsToSinkSet;
SmallVector<MachineInstr *, 4> DbgValsToSink;
for (auto &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned reg = MO.getReg();
- for (auto *MI : SeenDbgInstrs.lookup(reg))
- DbgValsToSink.push_back(MI);
+
+ SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI);
+ for (unsigned Reg : Units)
+ for (auto *MI : SeenDbgInstrs.lookup(Reg))
+ DbgValsToSinkSet.insert(MI);
}
+ DbgValsToSink.insert(DbgValsToSink.begin(), DbgValsToSinkSet.begin(),
+ DbgValsToSinkSet.end());
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index f9505df4e7f4..66a3bc2f8cc4 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -634,7 +634,7 @@ struct DataDep {
/// Create a DataDep from an SSA form virtual register.
DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
: UseOp(UseOp) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
+ assert(Register::isVirtualRegister(VirtReg));
MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
assert(!DefI.atEnd() && "Register has no defs");
DefMI = DefI->getParent();
@@ -660,10 +660,10 @@ static bool getDataDeps(const MachineInstr &UseMI,
const MachineOperand &MO = *I;
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
HasPhysRegs = true;
continue;
}
@@ -687,7 +687,7 @@ static void getPHIDeps(const MachineInstr &UseMI,
assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
if (UseMI.getOperand(i + 1).getMBB() == Pred) {
- unsigned Reg = UseMI.getOperand(i).getReg();
+ Register Reg = UseMI.getOperand(i).getReg();
Deps.push_back(DataDep(MRI, Reg, i));
return;
}
@@ -708,8 +708,8 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
const MachineOperand &MO = *MI;
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
// Track live defs and kills for updating RegUnits.
if (MO.isDef()) {
@@ -765,7 +765,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
assert(TBI.HasValidInstrHeights && "Missing height info");
unsigned MaxLen = 0;
for (const LiveInReg &LIR : TBI.LiveIns) {
- if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+ if (!Register::isVirtualRegister(LIR.Reg))
continue;
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
// Ignore dependencies outside the current trace.
@@ -902,8 +902,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
const MachineOperand &MO = *MOI;
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
if (MO.readsReg())
ReadOps.push_back(MI.getOperandNo(MOI));
@@ -930,7 +930,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
// Now we know the height of MI. Update any regunits read.
for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
- unsigned Reg = MI.getOperand(ReadOps[i]).getReg();
+ Register Reg = MI.getOperand(ReadOps[i]).getReg();
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
LiveRegUnit &LRU = RegUnits[*Units];
// Set the height to the highest reader of the unit.
@@ -979,7 +979,7 @@ addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
ArrayRef<const MachineBasicBlock*> Trace) {
assert(!Trace.empty() && "Trace should contain at least one block");
unsigned Reg = DefMI->getOperand(DefOp).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
const MachineBasicBlock *DefMBB = DefMI->getParent();
// Reg is live-in to all blocks in Trace that follow DefMBB.
@@ -1026,7 +1026,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
if (MBB) {
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
for (LiveInReg &LI : TBI.LiveIns) {
- if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
+ if (Register::isVirtualRegister(LI.Reg)) {
// For virtual registers, the def latency is included.
unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
if (Height < LI.Height)
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 0ad792ac62cf..969743edca52 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -22,7 +22,6 @@
// the verifier errors.
//===----------------------------------------------------------------------===//
-#include "LiveRangeCalc.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -37,6 +36,7 @@
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -122,7 +122,7 @@ namespace {
// Add Reg and any sub-registers to RV
void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
RV.push_back(Reg);
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
RV.push_back(*SubRegs);
}
@@ -159,7 +159,7 @@ namespace {
// Add register to vregsPassed if it belongs there. Return true if
// anything changed.
bool addPassed(unsigned Reg) {
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return false;
if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
return false;
@@ -178,7 +178,7 @@ namespace {
// Add register to vregsRequired if it belongs there. Return true if
// anything changed.
bool addRequired(unsigned Reg) {
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return false;
if (regsLiveOut.count(Reg))
return false;
@@ -552,7 +552,7 @@ void MachineVerifier::report_context_vreg(unsigned VReg) const {
}
void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const {
- if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
+ if (Register::isVirtualRegister(VRegOrUnit)) {
report_context_vreg(VRegOrUnit);
} else {
errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n';
@@ -797,7 +797,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
regsLive.clear();
if (MRI->tracksLiveness()) {
for (const auto &LI : MBB->liveins()) {
- if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
+ if (!Register::isPhysicalRegister(LI.PhysReg)) {
report("MBB live-in list contains non-physical register", MBB);
continue;
}
@@ -957,7 +957,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
// Generic opcodes must not have physical register operands.
for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
const MachineOperand *MO = &MI->getOperand(I);
- if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg()))
+ if (MO->isReg() && Register::isPhysicalRegister(MO->getReg()))
report("Generic instruction cannot have physical register", MO, I);
}
@@ -1368,7 +1368,108 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
}
+ switch (IntrID) {
+ case Intrinsic::memcpy:
+ if (MI->getNumOperands() != 5)
+ report("Expected memcpy intrinsic to have 5 operands", MI);
+ break;
+ case Intrinsic::memmove:
+ if (MI->getNumOperands() != 5)
+ report("Expected memmove intrinsic to have 5 operands", MI);
+ break;
+ case Intrinsic::memset:
+ if (MI->getNumOperands() != 5)
+ report("Expected memset intrinsic to have 5 operands", MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_SEXT_INREG: {
+ if (!MI->getOperand(2).isImm()) {
+ report("G_SEXT_INREG expects an immediate operand #2", MI);
+ break;
+ }
+
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ verifyVectorElementMatch(DstTy, SrcTy, MI);
+
+ int64_t Imm = MI->getOperand(2).getImm();
+ if (Imm <= 0)
+ report("G_SEXT_INREG size must be >= 1", MI);
+ if (Imm >= SrcTy.getScalarSizeInBits())
+ report("G_SEXT_INREG size must be less than source bit width", MI);
+ break;
+ }
+ case TargetOpcode::G_SHUFFLE_VECTOR: {
+ const MachineOperand &MaskOp = MI->getOperand(3);
+ if (!MaskOp.isShuffleMask()) {
+ report("Incorrect mask operand type for G_SHUFFLE_VECTOR", MI);
+ break;
+ }
+
+ const Constant *Mask = MaskOp.getShuffleMask();
+ auto *MaskVT = dyn_cast<VectorType>(Mask->getType());
+ if (!MaskVT || !MaskVT->getElementType()->isIntegerTy(32)) {
+ report("Invalid shufflemask constant type", MI);
+ break;
+ }
+
+ if (!Mask->getAggregateElement(0u)) {
+ report("Invalid shufflemask constant type", MI);
+ break;
+ }
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT Src0Ty = MRI->getType(MI->getOperand(1).getReg());
+ LLT Src1Ty = MRI->getType(MI->getOperand(2).getReg());
+
+ if (Src0Ty != Src1Ty)
+ report("Source operands must be the same type", MI);
+
+ if (Src0Ty.getScalarType() != DstTy.getScalarType())
+ report("G_SHUFFLE_VECTOR cannot change element type", MI);
+
+ // Don't check that all operands are vector because scalars are used in
+ // place of 1 element vectors.
+ int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1;
+ int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
+
+ SmallVector<int, 32> MaskIdxes;
+ ShuffleVectorInst::getShuffleMask(Mask, MaskIdxes);
+
+ if (static_cast<int>(MaskIdxes.size()) != DstNumElts)
+ report("Wrong result type for shufflemask", MI);
+
+ for (int Idx : MaskIdxes) {
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= 2 * SrcNumElts)
+ report("Out of bounds shuffle index", MI);
+ }
+
+ break;
+ }
+ case TargetOpcode::G_DYN_STACKALLOC: {
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &AllocOp = MI->getOperand(1);
+ const MachineOperand &AlignOp = MI->getOperand(2);
+
+ if (!DstOp.isReg() || !MRI->getType(DstOp.getReg()).isPointer()) {
+ report("dst operand 0 must be a pointer type", MI);
+ break;
+ }
+
+ if (!AllocOp.isReg() || !MRI->getType(AllocOp.getReg()).isScalar()) {
+ report("src operand 1 must be a scalar reg type", MI);
+ break;
+ }
+
+ if (!AlignOp.isImm()) {
+ report("src operand 2 must be an immediate type", MI);
+ break;
+ }
break;
}
default:
@@ -1525,11 +1626,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("Operand should be tied", MO, MONum);
else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
report("Tied def doesn't match MCInstrDesc", MO, MONum);
- else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) {
+ else if (Register::isPhysicalRegister(MO->getReg())) {
const MachineOperand &MOTied = MI->getOperand(TiedTo);
if (!MOTied.isReg())
report("Tied counterpart must be a register", &MOTied, TiedTo);
- else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) &&
+ else if (Register::isPhysicalRegister(MOTied.getReg()) &&
MO->getReg() != MOTied.getReg())
report("Tied physical registers must match.", &MOTied, TiedTo);
}
@@ -1543,7 +1644,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
switch (MO->getType()) {
case MachineOperand::MO_Register: {
- const unsigned Reg = MO->getReg();
+ const Register Reg = MO->getReg();
if (!Reg)
return;
if (MRI->tracksLiveness() && !MI->isDebugValue())
@@ -1581,7 +1682,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Check register classes.
unsigned SubIdx = MO->getSubReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
if (SubIdx) {
report("Illegal subregister index for physical register", MO, MONum);
return;
@@ -1817,7 +1918,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
if (MO->isDead()) {
LiveQueryResult LRQ = LR.Query(DefIdx);
if (!LRQ.isDeadDef()) {
- assert(TargetRegisterInfo::isVirtualRegister(VRegOrUnit) &&
+ assert(Register::isVirtualRegister(VRegOrUnit) &&
"Expecting a virtual register.");
// A dead subreg def only tells us that the specific subreg is dead. There
// could be other non-dead defs of other subregs, or we could have other
@@ -1845,8 +1946,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
addRegWithSubRegs(regsKilled, Reg);
// Check that LiveVars knows this kill.
- if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
- MO->isKill()) {
+ if (LiveVars && Register::isVirtualRegister(Reg) && MO->isKill()) {
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
if (!is_contained(VI.Kills, MI))
report("Kill missing from LiveVariables", MO, MONum);
@@ -1856,7 +1956,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI);
// Check the cached regunit intervals.
- if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
+ if (Register::isPhysicalRegister(Reg) && !isReserved(Reg)) {
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
if (MRI->isReservedRegUnit(*Units))
continue;
@@ -1865,7 +1965,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
}
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (LiveInts->hasInterval(Reg)) {
// This is a virtual register interval.
const LiveInterval &LI = LiveInts->getInterval(Reg);
@@ -1900,7 +2000,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Use of a dead register.
if (!regsLive.count(Reg)) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
// Reserved registers may be used even when 'dead'.
bool Bad = !isReserved(Reg);
// We are fine if just any subregister has a defined value.
@@ -1922,7 +2022,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (!MOP.isReg() || !MOP.isImplicit())
continue;
- if (!TargetRegisterInfo::isPhysicalRegister(MOP.getReg()))
+ if (!Register::isPhysicalRegister(MOP.getReg()))
continue;
for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
@@ -1960,7 +2060,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
addRegWithSubRegs(regsDefined, Reg);
// Verify SSA form.
- if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ if (MRI->isSSA() && Register::isVirtualRegister(Reg) &&
std::next(MRI->def_begin(Reg)) != MRI->def_end())
report("Multiple virtual register defs in SSA form", MO, MONum);
@@ -1969,7 +2069,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI);
DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (LiveInts->hasInterval(Reg)) {
const LiveInterval &LI = LiveInts->getInterval(Reg);
checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg);
@@ -2007,7 +2107,7 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
while (!regMasks.empty()) {
const uint32_t *Mask = regMasks.pop_back_val();
for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I)
- if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+ if (Register::isPhysicalRegister(*I) &&
MachineOperand::clobbersPhysReg(Mask, *I))
regsDead.push_back(*I);
}
@@ -2119,8 +2219,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) {
if (MODef.isTied() || MODef.isImplicit() || MODef.isInternalRead() ||
MODef.isEarlyClobber() || MODef.isDebug())
report("Unexpected flag on PHI operand", &MODef, 0);
- unsigned DefReg = MODef.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ Register DefReg = MODef.getReg();
+ if (!Register::isVirtualRegister(DefReg))
report("Expected first PHI operand to be a virtual register", &MODef, 0);
for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
@@ -2212,7 +2312,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
void MachineVerifier::verifyLiveVariables() {
assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
for (const auto &MBB : *MF) {
BBInfo &MInfo = MBBInfoMap[&MBB];
@@ -2238,7 +2338,7 @@ void MachineVerifier::verifyLiveVariables() {
void MachineVerifier::verifyLiveIntervals() {
assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
// Spilling and splitting may leave unused registers around. Skip them.
if (MRI->reg_nodbg_empty(Reg))
@@ -2315,11 +2415,11 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
if (!MOI->isReg() || !MOI->isDef())
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (MOI->getReg() != Reg)
continue;
} else {
- if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ if (!Register::isPhysicalRegister(MOI->getReg()) ||
!TRI->hasRegUnit(MOI->getReg(), Reg))
continue;
}
@@ -2402,7 +2502,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
return;
// RegUnit intervals are allowed dead phis.
- if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() &&
+ if (!Register::isVirtualRegister(Reg) && VNI->isPHIDef() &&
S.start == VNI->def && S.end == VNI->def.getDeadSlot())
return;
@@ -2446,7 +2546,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// The following checks only apply to virtual registers. Physreg liveness
// is too weird to check.
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
// A live segment can end with either a redefinition, a kill flag on a
// use, or a dead flag on a def.
bool hasRead = false;
@@ -2519,8 +2619,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
while (true) {
assert(LiveInts->isLiveInToMBB(LR, &*MFI));
// We don't know how to track physregs into a landing pad.
- if (!TargetRegisterInfo::isVirtualRegister(Reg) &&
- MFI->isEHPad()) {
+ if (!Register::isVirtualRegister(Reg) && MFI->isEHPad()) {
if (&*MFI == EndMBB)
break;
++MFI;
@@ -2580,7 +2679,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
unsigned Reg = LI.reg;
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
verifyLiveRange(LI, Reg);
LaneBitmask Mask;
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 2db1e86905a4..d21eae222af0 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -176,7 +176,7 @@ std::unique_ptr<ScheduleDAGMutation>
llvm::createMacroFusionDAGMutation(
ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
- return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
+ return std::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
return nullptr;
}
@@ -184,6 +184,6 @@ std::unique_ptr<ScheduleDAGMutation>
llvm::createBranchMacroFusionDAGMutation(
ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
- return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
+ return std::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
return nullptr;
}
diff --git a/lib/CodeGen/ModuloSchedule.cpp b/lib/CodeGen/ModuloSchedule.cpp
new file mode 100644
index 000000000000..7ce3c5861801
--- /dev/null
+++ b/lib/CodeGen/ModuloSchedule.cpp
@@ -0,0 +1,2022 @@
+//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ModuloSchedule.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopUtils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "pipeliner"
+using namespace llvm;
+
+void ModuloSchedule::print(raw_ostream &OS) {
+ for (MachineInstr *MI : ScheduledInstrs)
+ OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloScheduleExpander implementation
+//===----------------------------------------------------------------------===//
+
+/// Return the register values for the operands of a Phi instruction.
+/// This function assume the instruction is a Phi.
+static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
+ unsigned &InitVal, unsigned &LoopVal) {
+ assert(Phi.isPHI() && "Expecting a Phi.");
+
+ InitVal = 0;
+ LoopVal = 0;
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() != Loop)
+ InitVal = Phi.getOperand(i).getReg();
+ else
+ LoopVal = Phi.getOperand(i).getReg();
+
+ assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
+}
+
+/// Return the Phi register value that comes from the incoming block.
+static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() != LoopBB)
+ return Phi.getOperand(i).getReg();
+ return 0;
+}
+
+/// Return the Phi register value that comes the loop block.
+static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() == LoopBB)
+ return Phi.getOperand(i).getReg();
+ return 0;
+}
+
+void ModuloScheduleExpander::expand() {
+ BB = Schedule.getLoop()->getTopBlock();
+ Preheader = *BB->pred_begin();
+ if (Preheader == BB)
+ Preheader = *std::next(BB->pred_begin());
+
+ // Iterate over the definitions in each instruction, and compute the
+ // stage difference for each use. Keep the maximum value.
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ int DefStage = Schedule.getStage(MI);
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+
+ Register Reg = Op.getReg();
+ unsigned MaxDiff = 0;
+ bool PhiIsSwapped = false;
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
+ EI = MRI.use_end();
+ UI != EI; ++UI) {
+ MachineOperand &UseOp = *UI;
+ MachineInstr *UseMI = UseOp.getParent();
+ int UseStage = Schedule.getStage(UseMI);
+ unsigned Diff = 0;
+ if (UseStage != -1 && UseStage >= DefStage)
+ Diff = UseStage - DefStage;
+ if (MI->isPHI()) {
+ if (isLoopCarried(*MI))
+ ++Diff;
+ else
+ PhiIsSwapped = true;
+ }
+ MaxDiff = std::max(Diff, MaxDiff);
+ }
+ RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
+ }
+ }
+
+ generatePipelinedLoop();
+}
+
+void ModuloScheduleExpander::generatePipelinedLoop() {
+ LoopInfo = TII->analyzeLoopForPipelining(BB);
+ assert(LoopInfo && "Must be able to analyze loop!");
+
+ // Create a new basic block for the kernel and add it to the CFG.
+ MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+
+ unsigned MaxStageCount = Schedule.getNumStages() - 1;
+
+ // Remember the registers that are used in different stages. The index is
+ // the iteration, or stage, that the instruction is scheduled in. This is
+ // a map between register names in the original block and the names created
+ // in each stage of the pipelined loop.
+ ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
+ InstrMapTy InstrMap;
+
+ SmallVector<MachineBasicBlock *, 4> PrologBBs;
+
+ // Generate the prolog instructions that set up the pipeline.
+ generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
+ MF.insert(BB->getIterator(), KernelBB);
+
+ // Rearrange the instructions to generate the new, pipelined loop,
+ // and update register names as needed.
+ for (MachineInstr *CI : Schedule.getInstructions()) {
+ if (CI->isPHI())
+ continue;
+ unsigned StageNum = Schedule.getStage(CI);
+ MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
+ updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
+ KernelBB->push_back(NewMI);
+ InstrMap[NewMI] = CI;
+ }
+
+ // Copy any terminator instructions to the new kernel, and update
+ // names as needed.
+ for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
+ E = BB->instr_end();
+ I != E; ++I) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
+ KernelBB->push_back(NewMI);
+ InstrMap[NewMI] = &*I;
+ }
+
+ NewKernel = KernelBB;
+ KernelBB->transferSuccessors(BB);
+ KernelBB->replaceSuccessor(BB, KernelBB);
+
+ generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
+ InstrMap, MaxStageCount, MaxStageCount, false);
+ generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
+ MaxStageCount, MaxStageCount, false);
+
+ LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
+
+ SmallVector<MachineBasicBlock *, 4> EpilogBBs;
+ // Generate the epilog instructions to complete the pipeline.
+ generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
+
+ // We need this step because the register allocation doesn't handle some
+ // situations well, so we insert copies to help out.
+ splitLifetimes(KernelBB, EpilogBBs);
+
+ // Remove dead instructions due to loop induction variables.
+ removeDeadInstructions(KernelBB, EpilogBBs);
+
+ // Add branches between prolog and epilog blocks.
+ addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
+
+ delete[] VRMap;
+}
+
+void ModuloScheduleExpander::cleanup() {
+ // Remove the original loop since it's no longer referenced.
+ for (auto &I : *BB)
+ LIS.RemoveMachineInstrFromMaps(I);
+ BB->clear();
+ BB->eraseFromParent();
+}
+
+/// Generate the pipeline prolog code.
+void ModuloScheduleExpander::generateProlog(unsigned LastStage,
+ MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap,
+ MBBVectorTy &PrologBBs) {
+ MachineBasicBlock *PredBB = Preheader;
+ InstrMapTy InstrMap;
+
+ // Generate a basic block for each stage, not including the last stage,
+ // which will be generated in the kernel. Each basic block may contain
+ // instructions from multiple stages/iterations.
+ for (unsigned i = 0; i < LastStage; ++i) {
+ // Create and insert the prolog basic block prior to the original loop
+ // basic block. The original loop is removed later.
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+ PrologBBs.push_back(NewBB);
+ MF.insert(BB->getIterator(), NewBB);
+ NewBB->transferSuccessors(PredBB);
+ PredBB->addSuccessor(NewBB);
+ PredBB = NewBB;
+
+ // Generate instructions for each appropriate stage. Process instructions
+ // in original program order.
+ for (int StageNum = i; StageNum >= 0; --StageNum) {
+ for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+ BBE = BB->getFirstTerminator();
+ BBI != BBE; ++BBI) {
+ if (Schedule.getStage(&*BBI) == StageNum) {
+ if (BBI->isPHI())
+ continue;
+ MachineInstr *NewMI =
+ cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
+ updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
+ NewBB->push_back(NewMI);
+ InstrMap[NewMI] = &*BBI;
+ }
+ }
+ }
+ rewritePhiValues(NewBB, i, VRMap, InstrMap);
+ LLVM_DEBUG({
+ dbgs() << "prolog:\n";
+ NewBB->dump();
+ });
+ }
+
+ PredBB->replaceSuccessor(BB, KernelBB);
+
+ // Check if we need to remove the branch from the preheader to the original
+ // loop, and replace it with a branch to the new loop.
+ unsigned numBranches = TII->removeBranch(*Preheader);
+ if (numBranches) {
+ SmallVector<MachineOperand, 0> Cond;
+ TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
+ }
+}
+
+/// Generate the pipeline epilog code. The epilog code finishes the iterations
+/// that were started in either the prolog or the kernel. We create a basic
+/// block for each stage that needs to complete.
+void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
+ MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap,
+ MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs) {
+ // We need to change the branch from the kernel to the first epilog block, so
+ // this call to analyze branch uses the kernel rather than the original BB.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
+ assert(!checkBranch && "generateEpilog must be able to analyze the branch");
+ if (checkBranch)
+ return;
+
+ MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
+ if (*LoopExitI == KernelBB)
+ ++LoopExitI;
+ assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
+ MachineBasicBlock *LoopExitBB = *LoopExitI;
+
+ MachineBasicBlock *PredBB = KernelBB;
+ MachineBasicBlock *EpilogStart = LoopExitBB;
+ InstrMapTy InstrMap;
+
+ // Generate a basic block for each stage, not including the last stage,
+ // which was generated for the kernel. Each basic block may contain
+ // instructions from multiple stages/iterations.
+ int EpilogStage = LastStage + 1;
+ for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
+ EpilogBBs.push_back(NewBB);
+ MF.insert(BB->getIterator(), NewBB);
+
+ PredBB->replaceSuccessor(LoopExitBB, NewBB);
+ NewBB->addSuccessor(LoopExitBB);
+
+ if (EpilogStart == LoopExitBB)
+ EpilogStart = NewBB;
+
+ // Add instructions to the epilog depending on the current block.
+ // Process instructions in original program order.
+ for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
+ for (auto &BBI : *BB) {
+ if (BBI.isPHI())
+ continue;
+ MachineInstr *In = &BBI;
+ if ((unsigned)Schedule.getStage(In) == StageNum) {
+ // Instructions with memoperands in the epilog are updated with
+ // conservative values.
+ MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
+ updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
+ NewBB->push_back(NewMI);
+ InstrMap[NewMI] = In;
+ }
+ }
+ }
+ generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
+ InstrMap, LastStage, EpilogStage, i == 1);
+ generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
+ LastStage, EpilogStage, i == 1);
+ PredBB = NewBB;
+
+ LLVM_DEBUG({
+ dbgs() << "epilog:\n";
+ NewBB->dump();
+ });
+ }
+
+ // Fix any Phi nodes in the loop exit block.
+ LoopExitBB->replacePhiUsesWith(BB, PredBB);
+
+ // Create a branch to the new epilog from the kernel.
+ // Remove the original branch and add a new branch to the epilog.
+ TII->removeBranch(*KernelBB);
+ TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
+ // Add a branch to the loop exit.
+ if (EpilogBBs.size() > 0) {
+ MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
+ SmallVector<MachineOperand, 4> Cond1;
+ TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
+ }
+}
+
+/// Replace all uses of FromReg that appear outside the specified
+/// basic block with ToReg.
+static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
+ MachineBasicBlock *MBB,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS) {
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
+ E = MRI.use_end();
+ I != E;) {
+ MachineOperand &O = *I;
+ ++I;
+ if (O.getParent()->getParent() != MBB)
+ O.setReg(ToReg);
+ }
+ if (!LIS.hasInterval(ToReg))
+ LIS.createEmptyInterval(ToReg);
+}
+
+/// Return true if the register has a use that occurs outside the
+/// specified loop.
+static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
+ MachineRegisterInfo &MRI) {
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
+ E = MRI.use_end();
+ I != E; ++I)
+ if (I->getParent()->getParent() != BB)
+ return true;
+ return false;
+}
+
+/// Generate Phis for the specific block in the generated pipelined code.
+/// This function looks at the Phis from the original code to guide the
+/// creation of new Phis.
+void ModuloScheduleExpander::generateExistingPhis(
+ MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
+ // Compute the stage number for the initial value of the Phi, which
+ // comes from the prolog. The prolog to use depends on to which kernel/
+ // epilog that we're adding the Phi.
+ unsigned PrologStage = 0;
+ unsigned PrevStage = 0;
+ bool InKernel = (LastStageNum == CurStageNum);
+ if (InKernel) {
+ PrologStage = LastStageNum - 1;
+ PrevStage = CurStageNum;
+ } else {
+ PrologStage = LastStageNum - (CurStageNum - LastStageNum);
+ PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
+ }
+
+ for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+ BBE = BB->getFirstNonPHI();
+ BBI != BBE; ++BBI) {
+ Register Def = BBI->getOperand(0).getReg();
+
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(*BBI, BB, InitVal, LoopVal);
+
+ unsigned PhiOp1 = 0;
+ // The Phi value from the loop body typically is defined in the loop, but
+ // not always. So, we need to check if the value is defined in the loop.
+ unsigned PhiOp2 = LoopVal;
+ if (VRMap[LastStageNum].count(LoopVal))
+ PhiOp2 = VRMap[LastStageNum][LoopVal];
+
+ int StageScheduled = Schedule.getStage(&*BBI);
+ int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
+ unsigned NumStages = getStagesForReg(Def, CurStageNum);
+ if (NumStages == 0) {
+ // We don't need to generate a Phi anymore, but we need to rename any uses
+ // of the Phi value.
+ unsigned NewReg = VRMap[PrevStage][LoopVal];
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
+ InitVal, NewReg);
+ if (VRMap[CurStageNum].count(LoopVal))
+ VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
+ }
+ // Adjust the number of Phis needed depending on the number of prologs left,
+ // and the distance from where the Phi is first scheduled. The number of
+ // Phis cannot exceed the number of prolog stages. Each stage can
+ // potentially define two values.
+ unsigned MaxPhis = PrologStage + 2;
+ if (!InKernel && (int)PrologStage <= LoopValStage)
+ MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
+ unsigned NumPhis = std::min(NumStages, MaxPhis);
+
+ unsigned NewReg = 0;
+ unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
+ // In the epilog, we may need to look back one stage to get the correct
+ // Phi name because the epilog and prolog blocks execute the same stage.
+ // The correct name is from the previous block only when the Phi has
+ // been completely scheduled prior to the epilog, and Phi value is not
+ // needed in multiple stages.
+ int StageDiff = 0;
+ if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
+ NumPhis == 1)
+ StageDiff = 1;
+ // Adjust the computations below when the phi and the loop definition
+ // are scheduled in different stages.
+ if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
+ StageDiff = StageScheduled - LoopValStage;
+ for (unsigned np = 0; np < NumPhis; ++np) {
+ // If the Phi hasn't been scheduled, then use the initial Phi operand
+ // value. Otherwise, use the scheduled version of the instruction. This
+ // is a little complicated when a Phi references another Phi.
+ if (np > PrologStage || StageScheduled >= (int)LastStageNum)
+ PhiOp1 = InitVal;
+ // Check if the Phi has already been scheduled in a prolog stage.
+ else if (PrologStage >= AccessStage + StageDiff + np &&
+ VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
+ PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
+ // Check if the Phi has already been scheduled, but the loop instruction
+ // is either another Phi, or doesn't occur in the loop.
+ else if (PrologStage >= AccessStage + StageDiff + np) {
+ // If the Phi references another Phi, we need to examine the other
+ // Phi to get the correct value.
+ PhiOp1 = LoopVal;
+ MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
+ int Indirects = 1;
+ while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
+ int PhiStage = Schedule.getStage(InstOp1);
+ if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
+ PhiOp1 = getInitPhiReg(*InstOp1, BB);
+ else
+ PhiOp1 = getLoopPhiReg(*InstOp1, BB);
+ InstOp1 = MRI.getVRegDef(PhiOp1);
+ int PhiOpStage = Schedule.getStage(InstOp1);
+ int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
+ if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
+ VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
+ PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
+ break;
+ }
+ ++Indirects;
+ }
+ } else
+ PhiOp1 = InitVal;
+ // If this references a generated Phi in the kernel, get the Phi operand
+ // from the incoming block.
+ if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
+ if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+ PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+
+ MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
+ bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
+ // In the epilog, a map lookup is needed to get the value from the kernel,
+ // or previous epilog block. How is does this depends on if the
+ // instruction is scheduled in the previous block.
+ if (!InKernel) {
+ int StageDiffAdj = 0;
+ if (LoopValStage != -1 && StageScheduled > LoopValStage)
+ StageDiffAdj = StageScheduled - LoopValStage;
+ // Use the loop value defined in the kernel, unless the kernel
+ // contains the last definition of the Phi.
+ if (np == 0 && PrevStage == LastStageNum &&
+ (StageScheduled != 0 || LoopValStage != 0) &&
+ VRMap[PrevStage - StageDiffAdj].count(LoopVal))
+ PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
+ // Use the value defined by the Phi. We add one because we switch
+ // from looking at the loop value to the Phi definition.
+ else if (np > 0 && PrevStage == LastStageNum &&
+ VRMap[PrevStage - np + 1].count(Def))
+ PhiOp2 = VRMap[PrevStage - np + 1][Def];
+ // Use the loop value defined in the kernel.
+ else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
+ VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
+ PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
+ // Use the value defined by the Phi, unless we're generating the first
+ // epilog and the Phi refers to a Phi in a different stage.
+ else if (VRMap[PrevStage - np].count(Def) &&
+ (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
+ (LoopValStage == StageScheduled)))
+ PhiOp2 = VRMap[PrevStage - np][Def];
+ }
+
+ // Check if we can reuse an existing Phi. This occurs when a Phi
+ // references another Phi, and the other Phi is scheduled in an
+ // earlier stage. We can try to reuse an existing Phi up until the last
+ // stage of the current Phi.
+ if (LoopDefIsPhi) {
+ if (static_cast<int>(PrologStage - np) >= StageScheduled) {
+ int LVNumStages = getStagesForPhi(LoopVal);
+ int StageDiff = (StageScheduled - LoopValStage);
+ LVNumStages -= StageDiff;
+ // Make sure the loop value Phi has been processed already.
+ if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
+ NewReg = PhiOp2;
+ unsigned ReuseStage = CurStageNum;
+ if (isLoopCarried(*PhiInst))
+ ReuseStage -= LVNumStages;
+ // Check if the Phi to reuse has been generated yet. If not, then
+ // there is nothing to reuse.
+ if (VRMap[ReuseStage - np].count(LoopVal)) {
+ NewReg = VRMap[ReuseStage - np][LoopVal];
+
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
+ Def, NewReg);
+ // Update the map with the new Phi name.
+ VRMap[CurStageNum - np][Def] = NewReg;
+ PhiOp2 = NewReg;
+ if (VRMap[LastStageNum - np - 1].count(LoopVal))
+ PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
+
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ continue;
+ }
+ }
+ }
+ if (InKernel && StageDiff > 0 &&
+ VRMap[CurStageNum - StageDiff - np].count(LoopVal))
+ PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
+ }
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Def);
+ NewReg = MRI.createVirtualRegister(RC);
+
+ MachineInstrBuilder NewPhi =
+ BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ NewPhi.addReg(PhiOp1).addMBB(BB1);
+ NewPhi.addReg(PhiOp2).addMBB(BB2);
+ if (np == 0)
+ InstrMap[NewPhi] = &*BBI;
+
+ // We define the Phis after creating the new pipelined code, so
+ // we need to rename the Phi values in scheduled instructions.
+
+ unsigned PrevReg = 0;
+ if (InKernel && VRMap[PrevStage - np].count(LoopVal))
+ PrevReg = VRMap[PrevStage - np][LoopVal];
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
+ NewReg, PrevReg);
+ // If the Phi has been scheduled, use the new name for rewriting.
+ if (VRMap[CurStageNum - np].count(Def)) {
+ unsigned R = VRMap[CurStageNum - np][Def];
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
+ NewReg);
+ }
+
+ // Check if we need to rename any uses that occurs after the loop. The
+ // register to replace depends on whether the Phi is scheduled in the
+ // epilog.
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+
+ // In the kernel, a dependent Phi uses the value from this Phi.
+ if (InKernel)
+ PhiOp2 = NewReg;
+
+ // Update the map with the new Phi name.
+ VRMap[CurStageNum - np][Def] = NewReg;
+ }
+
+ while (NumPhis++ < NumStages) {
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
+ NewReg, 0);
+ }
+
+ // Check if we need to rename a Phi that has been eliminated due to
+ // scheduling.
+ if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
+ replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
+ }
+}
+
+/// Generate Phis for the specified block in the generated pipelined code.
+/// These are new Phis needed because the definition is scheduled after the
+/// use in the pipelined sequence.
+void ModuloScheduleExpander::generatePhis(
+ MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
+ // Compute the stage number that contains the initial Phi value, and
+ // the Phi from the previous stage.
+ unsigned PrologStage = 0;
+ unsigned PrevStage = 0;
+ unsigned StageDiff = CurStageNum - LastStageNum;
+ bool InKernel = (StageDiff == 0);
+ if (InKernel) {
+ PrologStage = LastStageNum - 1;
+ PrevStage = CurStageNum;
+ } else {
+ PrologStage = LastStageNum - StageDiff;
+ PrevStage = LastStageNum + StageDiff - 1;
+ }
+
+ for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
+ BBE = BB->instr_end();
+ BBI != BBE; ++BBI) {
+ for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = BBI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() ||
+ !Register::isVirtualRegister(MO.getReg()))
+ continue;
+
+ int StageScheduled = Schedule.getStage(&*BBI);
+ assert(StageScheduled != -1 && "Expecting scheduled instruction.");
+ Register Def = MO.getReg();
+ unsigned NumPhis = getStagesForReg(Def, CurStageNum);
+ // An instruction scheduled in stage 0 and is used after the loop
+ // requires a phi in the epilog for the last definition from either
+ // the kernel or prolog.
+ if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
+ hasUseAfterLoop(Def, BB, MRI))
+ NumPhis = 1;
+ if (!InKernel && (unsigned)StageScheduled > PrologStage)
+ continue;
+
+ unsigned PhiOp2 = VRMap[PrevStage][Def];
+ if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
+ if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
+ PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+ // The number of Phis can't exceed the number of prolog stages. The
+ // prolog stage number is zero based.
+ if (NumPhis > PrologStage + 1 - StageScheduled)
+ NumPhis = PrologStage + 1 - StageScheduled;
+ for (unsigned np = 0; np < NumPhis; ++np) {
+ unsigned PhiOp1 = VRMap[PrologStage][Def];
+ if (np <= PrologStage)
+ PhiOp1 = VRMap[PrologStage - np][Def];
+ if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
+ if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+ PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+ if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
+ PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
+ }
+ if (!InKernel)
+ PhiOp2 = VRMap[PrevStage - np][Def];
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Def);
+ Register NewReg = MRI.createVirtualRegister(RC);
+
+ MachineInstrBuilder NewPhi =
+ BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ NewPhi.addReg(PhiOp1).addMBB(BB1);
+ NewPhi.addReg(PhiOp2).addMBB(BB2);
+ if (np == 0)
+ InstrMap[NewPhi] = &*BBI;
+
+ // Rewrite uses and update the map. The actions depend upon whether
+ // we generating code for the kernel or epilog blocks.
+ if (InKernel) {
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
+ NewReg);
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
+ NewReg);
+
+ PhiOp2 = NewReg;
+ VRMap[PrevStage - np - 1][Def] = NewReg;
+ } else {
+ VRMap[CurStageNum - np][Def] = NewReg;
+ if (np == NumPhis - 1)
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
+ NewReg);
+ }
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ }
+ }
+ }
+}
+
+/// Remove instructions that generate values with no uses.
+/// Typically, these are induction variable operations that generate values
+/// used in the loop itself. A dead instruction has a definition with
+/// no uses, or uses that occur in the original loop only.
+void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs) {
+ // For each epilog block, check that the value defined by each instruction
+ // is used. If not, delete it.
+ for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
+ MBE = EpilogBBs.rend();
+ MBB != MBE; ++MBB)
+ for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
+ ME = (*MBB)->instr_rend();
+ MI != ME;) {
+ // From DeadMachineInstructionElem. Don't delete inline assembly.
+ if (MI->isInlineAsm()) {
+ ++MI;
+ continue;
+ }
+ bool SawStore = false;
+ // Check if it's safe to remove the instruction due to side effects.
+ // We can, and want to, remove Phis here.
+ if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
+ ++MI;
+ continue;
+ }
+ bool used = true;
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end();
+ MOI != MOE; ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ Register reg = MOI->getReg();
+ // Assume physical registers are used, unless they are marked dead.
+ if (Register::isPhysicalRegister(reg)) {
+ used = !MOI->isDead();
+ if (used)
+ break;
+ continue;
+ }
+ unsigned realUses = 0;
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
+ EI = MRI.use_end();
+ UI != EI; ++UI) {
+ // Check if there are any uses that occur only in the original
+ // loop. If so, that's not a real use.
+ if (UI->getParent()->getParent() != BB) {
+ realUses++;
+ used = true;
+ break;
+ }
+ }
+ if (realUses > 0)
+ break;
+ used = false;
+ }
+ if (!used) {
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI++->eraseFromParent();
+ continue;
+ }
+ ++MI;
+ }
+ // In the kernel block, check if we can remove a Phi that generates a value
+ // used in an instruction removed in the epilog block.
+ for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
+ BBE = KernelBB->getFirstNonPHI();
+ BBI != BBE;) {
+ MachineInstr *MI = &*BBI;
+ ++BBI;
+ Register reg = MI->getOperand(0).getReg();
+ if (MRI.use_begin(reg) == MRI.use_end()) {
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+ }
+}
+
+/// For loop carried definitions, we split the lifetime of a virtual register
+/// that has uses past the definition in the next iteration. A copy with a new
+/// virtual register is inserted before the definition, which helps with
+/// generating a better register assignment.
+///
+/// v1 = phi(a, v2) v1 = phi(a, v2)
+/// v2 = phi(b, v3) v2 = phi(b, v3)
+/// v3 = .. v4 = copy v1
+/// .. = V1 v3 = ..
+/// .. = v4
+void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (auto &PHI : KernelBB->phis()) {
+ Register Def = PHI.getOperand(0).getReg();
+ // Check for any Phi definition that used as an operand of another Phi
+ // in the same block.
+ for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
+ E = MRI.use_instr_end();
+ I != E; ++I) {
+ if (I->isPHI() && I->getParent() == KernelBB) {
+ // Get the loop carried definition.
+ unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
+ if (!LCDef)
+ continue;
+ MachineInstr *MI = MRI.getVRegDef(LCDef);
+ if (!MI || MI->getParent() != KernelBB || MI->isPHI())
+ continue;
+ // Search through the rest of the block looking for uses of the Phi
+ // definition. If one occurs, then split the lifetime.
+ unsigned SplitReg = 0;
+ for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
+ KernelBB->instr_end()))
+ if (BBJ.readsRegister(Def)) {
+ // We split the lifetime when we find the first use.
+ if (SplitReg == 0) {
+ SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
+ BuildMI(*KernelBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SplitReg)
+ .addReg(Def);
+ }
+ BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
+ }
+ if (!SplitReg)
+ continue;
+ // Search through each of the epilog blocks for any uses to be renamed.
+ for (auto &Epilog : EpilogBBs)
+ for (auto &I : *Epilog)
+ if (I.readsRegister(Def))
+ I.substituteRegister(Def, SplitReg, 0, *TRI);
+ break;
+ }
+ }
+ }
+}
+
+/// Remove the incoming block from the Phis in a basic block.
+static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
+ for (MachineInstr &MI : *BB) {
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
+ if (MI.getOperand(i + 1).getMBB() == Incoming) {
+ MI.RemoveOperand(i + 1);
+ MI.RemoveOperand(i);
+ break;
+ }
+ }
+}
+
+/// Create branches from each prolog basic block to the appropriate epilog
+/// block. These edges are needed if the loop ends before reaching the
+/// kernel.
+void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
+ MBBVectorTy &PrologBBs,
+ MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs,
+ ValueMapTy *VRMap) {
+ assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
+ MachineBasicBlock *LastPro = KernelBB;
+ MachineBasicBlock *LastEpi = KernelBB;
+
+ // Start from the blocks connected to the kernel and work "out"
+ // to the first prolog and the last epilog blocks.
+ SmallVector<MachineInstr *, 4> PrevInsts;
+ unsigned MaxIter = PrologBBs.size() - 1;
+ for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
+ // Add branches to the prolog that go to the corresponding
+ // epilog, and the fall-thru prolog/kernel block.
+ MachineBasicBlock *Prolog = PrologBBs[j];
+ MachineBasicBlock *Epilog = EpilogBBs[i];
+
+ SmallVector<MachineOperand, 4> Cond;
+ Optional<bool> StaticallyGreater =
+ LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
+ unsigned numAdded = 0;
+ if (!StaticallyGreater.hasValue()) {
+ Prolog->addSuccessor(Epilog);
+ numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
+ } else if (*StaticallyGreater == false) {
+ Prolog->addSuccessor(Epilog);
+ Prolog->removeSuccessor(LastPro);
+ LastEpi->removeSuccessor(Epilog);
+ numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
+ removePhis(Epilog, LastEpi);
+ // Remove the blocks that are no longer referenced.
+ if (LastPro != LastEpi) {
+ LastEpi->clear();
+ LastEpi->eraseFromParent();
+ }
+ if (LastPro == KernelBB) {
+ LoopInfo->disposed();
+ NewKernel = nullptr;
+ }
+ LastPro->clear();
+ LastPro->eraseFromParent();
+ } else {
+ numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
+ removePhis(Epilog, Prolog);
+ }
+ LastPro = Prolog;
+ LastEpi = Epilog;
+ for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
+ E = Prolog->instr_rend();
+ I != E && numAdded > 0; ++I, --numAdded)
+ updateInstruction(&*I, false, j, 0, VRMap);
+ }
+
+ if (NewKernel) {
+ LoopInfo->setPreheader(PrologBBs[MaxIter]);
+ LoopInfo->adjustTripCount(-(MaxIter + 1));
+ }
+}
+
+/// Return true if we can compute the amount the instruction changes
+/// during each iteration. Set Delta to the amount of the change.
+bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineOperand *BaseOp;
+ int64_t Offset;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+ return false;
+
+ if (!BaseOp->isReg())
+ return false;
+
+ Register BaseReg = BaseOp->getReg();
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Check if there is a Phi. If so, get the definition in the loop.
+ MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
+ if (BaseDef && BaseDef->isPHI()) {
+ BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
+ BaseDef = MRI.getVRegDef(BaseReg);
+ }
+ if (!BaseDef)
+ return false;
+
+ int D = 0;
+ if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
+ return false;
+
+ Delta = D;
+ return true;
+}
+
+/// Update the memory operand with a new offset when the pipeliner
+/// generates a new copy of the instruction that refers to a
+/// different memory location.
+void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
+ MachineInstr &OldMI,
+ unsigned Num) {
+ if (Num == 0)
+ return;
+ // If the instruction has memory operands, then adjust the offset
+ // when the instruction appears in different stages.
+ if (NewMI.memoperands_empty())
+ return;
+ SmallVector<MachineMemOperand *, 2> NewMMOs;
+ for (MachineMemOperand *MMO : NewMI.memoperands()) {
+ // TODO: Figure out whether isAtomic is really necessary (see D57601).
+ if (MMO->isVolatile() || MMO->isAtomic() ||
+ (MMO->isInvariant() && MMO->isDereferenceable()) ||
+ (!MMO->getValue())) {
+ NewMMOs.push_back(MMO);
+ continue;
+ }
+ unsigned Delta;
+ if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
+ int64_t AdjOffset = Delta * Num;
+ NewMMOs.push_back(
+ MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
+ } else {
+ NewMMOs.push_back(
+ MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
+ }
+ }
+ NewMI.setMemRefs(MF, NewMMOs);
+}
+
+/// Clone the instruction for the new pipelined loop and update the
+/// memory operands, if needed.
+MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
+ unsigned CurStageNum,
+ unsigned InstStageNum) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+ // Check for tied operands in inline asm instructions. This should be handled
+ // elsewhere, but I'm not sure of the best solution.
+ if (OldMI->isInlineAsm())
+ for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
+ const auto &MO = OldMI->getOperand(i);
+ if (MO.isReg() && MO.isUse())
+ break;
+ unsigned UseIdx;
+ if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
+ NewMI->tieOperands(i, UseIdx);
+ }
+ updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+ return NewMI;
+}
+
+/// Clone the instruction for the new pipelined loop. If needed, this
+/// function updates the instruction using the values saved in the
+/// InstrChanges structure.
+MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
+ MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+ auto It = InstrChanges.find(OldMI);
+ if (It != InstrChanges.end()) {
+ std::pair<unsigned, int64_t> RegAndOffset = It->second;
+ unsigned BasePos, OffsetPos;
+ if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
+ return nullptr;
+ int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
+ MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
+ if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
+ NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
+ NewMI->getOperand(OffsetPos).setImm(NewOffset);
+ }
+ updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+ return NewMI;
+}
+
+/// Update the machine instruction with new virtual registers. This
+/// function may change the defintions and/or uses.
+void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
+ bool LastDef,
+ unsigned CurStageNum,
+ unsigned InstrStageNum,
+ ValueMapTy *VRMap) {
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ continue;
+ Register reg = MO.getReg();
+ if (MO.isDef()) {
+ // Create a new virtual register for the definition.
+ const TargetRegisterClass *RC = MRI.getRegClass(reg);
+ Register NewReg = MRI.createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ VRMap[CurStageNum][reg] = NewReg;
+ if (LastDef)
+ replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
+ } else if (MO.isUse()) {
+ MachineInstr *Def = MRI.getVRegDef(reg);
+ // Compute the stage that contains the last definition for instruction.
+ int DefStageNum = Schedule.getStage(Def);
+ unsigned StageNum = CurStageNum;
+ if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
+ // Compute the difference in stages between the defintion and the use.
+ unsigned StageDiff = (InstrStageNum - DefStageNum);
+ // Make an adjustment to get the last definition.
+ StageNum -= StageDiff;
+ }
+ if (VRMap[StageNum].count(reg))
+ MO.setReg(VRMap[StageNum][reg]);
+ }
+ }
+}
+
+/// Return the instruction in the loop that defines the register.
+/// If the definition is a Phi, then follow the Phi operand to
+/// the instruction in the loop.
+MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
+ SmallPtrSet<MachineInstr *, 8> Visited;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ while (Def->isPHI()) {
+ if (!Visited.insert(Def).second)
+ break;
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
+ if (Def->getOperand(i + 1).getMBB() == BB) {
+ Def = MRI.getVRegDef(Def->getOperand(i).getReg());
+ break;
+ }
+ }
+ return Def;
+}
+
+/// Return the new name for the value from the previous stage.
+unsigned ModuloScheduleExpander::getPrevMapVal(
+ unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
+ ValueMapTy *VRMap, MachineBasicBlock *BB) {
+ unsigned PrevVal = 0;
+ if (StageNum > PhiStage) {
+ MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
+ if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
+ // The name is defined in the previous stage.
+ PrevVal = VRMap[StageNum - 1][LoopVal];
+ else if (VRMap[StageNum].count(LoopVal))
+ // The previous name is defined in the current stage when the instruction
+ // order is swapped.
+ PrevVal = VRMap[StageNum][LoopVal];
+ else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
+ // The loop value hasn't yet been scheduled.
+ PrevVal = LoopVal;
+ else if (StageNum == PhiStage + 1)
+ // The loop value is another phi, which has not been scheduled.
+ PrevVal = getInitPhiReg(*LoopInst, BB);
+ else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
+ // The loop value is another phi, which has been scheduled.
+ PrevVal =
+ getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
+ LoopStage, VRMap, BB);
+ }
+ return PrevVal;
+}
+
+/// Rewrite the Phi values in the specified block to use the mappings
+/// from the initial operand. Once the Phi is scheduled, we switch
+/// to using the loop value instead of the Phi value, so those names
+/// do not need to be rewritten.
+void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
+ unsigned StageNum,
+ ValueMapTy *VRMap,
+ InstrMapTy &InstrMap) {
+ for (auto &PHI : BB->phis()) {
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(PHI, BB, InitVal, LoopVal);
+ Register PhiDef = PHI.getOperand(0).getReg();
+
+ unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
+ unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
+ unsigned NumPhis = getStagesForPhi(PhiDef);
+ if (NumPhis > StageNum)
+ NumPhis = StageNum;
+ for (unsigned np = 0; np <= NumPhis; ++np) {
+ unsigned NewVal =
+ getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
+ if (!NewVal)
+ NewVal = InitVal;
+ rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
+ NewVal);
+ }
+ }
+}
+
+/// Rewrite a previously scheduled instruction to use the register value
+/// from the new instruction. Make sure the instruction occurs in the
+/// basic block, and we don't change the uses in the new instruction.
+void ModuloScheduleExpander::rewriteScheduledInstr(
+ MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
+ unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
+ unsigned PrevReg) {
+ bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
+ int StagePhi = Schedule.getStage(Phi) + PhiNum;
+ // Rewrite uses that have been scheduled already to use the new
+ // Phi register.
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
+ EI = MRI.use_end();
+ UI != EI;) {
+ MachineOperand &UseOp = *UI;
+ MachineInstr *UseMI = UseOp.getParent();
+ ++UI;
+ if (UseMI->getParent() != BB)
+ continue;
+ if (UseMI->isPHI()) {
+ if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
+ continue;
+ if (getLoopPhiReg(*UseMI, BB) != OldReg)
+ continue;
+ }
+ InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
+ assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
+ MachineInstr *OrigMI = OrigInstr->second;
+ int StageSched = Schedule.getStage(OrigMI);
+ int CycleSched = Schedule.getCycle(OrigMI);
+ unsigned ReplaceReg = 0;
+ // This is the stage for the scheduled instruction.
+ if (StagePhi == StageSched && Phi->isPHI()) {
+ int CyclePhi = Schedule.getCycle(Phi);
+ if (PrevReg && InProlog)
+ ReplaceReg = PrevReg;
+ else if (PrevReg && !isLoopCarried(*Phi) &&
+ (CyclePhi <= CycleSched || OrigMI->isPHI()))
+ ReplaceReg = PrevReg;
+ else
+ ReplaceReg = NewReg;
+ }
+ // The scheduled instruction occurs before the scheduled Phi, and the
+ // Phi is not loop carried.
+ if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
+ ReplaceReg = NewReg;
+ if (StagePhi > StageSched && Phi->isPHI())
+ ReplaceReg = NewReg;
+ if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
+ ReplaceReg = NewReg;
+ if (ReplaceReg) {
+ MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
+ UseOp.setReg(ReplaceReg);
+ }
+ }
+}
+
+bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
+ if (!Phi.isPHI())
+ return false;
+ unsigned DefCycle = Schedule.getCycle(&Phi);
+ int DefStage = Schedule.getStage(&Phi);
+
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
+ MachineInstr *Use = MRI.getVRegDef(LoopVal);
+ if (!Use || Use->isPHI())
+ return true;
+ unsigned LoopCycle = Schedule.getCycle(Use);
+ int LoopStage = Schedule.getStage(Use);
+ return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
+}
+
+//===----------------------------------------------------------------------===//
+// PeelingModuloScheduleExpander implementation
+//===----------------------------------------------------------------------===//
+// This is a reimplementation of ModuloScheduleExpander that works by creating
+// a fully correct steady-state kernel and peeling off the prolog and epilogs.
+//===----------------------------------------------------------------------===//
+
+namespace {
+// Remove any dead phis in MBB. Dead phis either have only one block as input
+// (in which case they are the identity) or have no uses.
+void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
+ LiveIntervals *LIS) {
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+ for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) {
+ MachineInstr &MI = *I++;
+ assert(MI.isPHI());
+ if (MRI.use_empty(MI.getOperand(0).getReg())) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ Changed = true;
+ } else if (MI.getNumExplicitOperands() == 3) {
+ MRI.constrainRegClass(MI.getOperand(1).getReg(),
+ MRI.getRegClass(MI.getOperand(0).getReg()));
+ MRI.replaceRegWith(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg());
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ Changed = true;
+ }
+ }
+ }
+}
+
+/// Rewrites the kernel block in-place to adhere to the given schedule.
+/// KernelRewriter holds all of the state required to perform the rewriting.
+class KernelRewriter {
+ ModuloSchedule &S;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *PreheaderBB, *ExitBB;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ LiveIntervals *LIS;
+
+ // Map from register class to canonical undef register for that class.
+ DenseMap<const TargetRegisterClass *, Register> Undefs;
+ // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
+ // this map is only used when InitReg is non-undef.
+ DenseMap<std::pair<unsigned, unsigned>, Register> Phis;
+ // Map from LoopReg to phi register where the InitReg is undef.
+ DenseMap<Register, Register> UndefPhis;
+
+ // Reg is used by MI. Return the new register MI should use to adhere to the
+ // schedule. Insert phis as necessary.
+ Register remapUse(Register Reg, MachineInstr &MI);
+ // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
+ // If InitReg is not given it is chosen arbitrarily. It will either be undef
+ // or will be chosen so as to share another phi.
+ Register phi(Register LoopReg, Optional<Register> InitReg = {},
+ const TargetRegisterClass *RC = nullptr);
+ // Create an undef register of the given register class.
+ Register undef(const TargetRegisterClass *RC);
+
+public:
+ KernelRewriter(MachineLoop &L, ModuloSchedule &S,
+ LiveIntervals *LIS = nullptr);
+ void rewrite();
+};
+} // namespace
+
+KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
+ LiveIntervals *LIS)
+ : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()),
+ ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
+ TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
+ PreheaderBB = *BB->pred_begin();
+ if (PreheaderBB == BB)
+ PreheaderBB = *std::next(BB->pred_begin());
+}
+
+void KernelRewriter::rewrite() {
+ // Rearrange the loop to be in schedule order. Note that the schedule may
+ // contain instructions that are not owned by the loop block (InstrChanges and
+ // friends), so we gracefully handle unowned instructions and delete any
+ // instructions that weren't in the schedule.
+ auto InsertPt = BB->getFirstTerminator();
+ MachineInstr *FirstMI = nullptr;
+ for (MachineInstr *MI : S.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ if (MI->getParent())
+ MI->removeFromParent();
+ BB->insert(InsertPt, MI);
+ if (!FirstMI)
+ FirstMI = MI;
+ }
+ assert(FirstMI && "Failed to find first MI in schedule");
+
+ // At this point all of the scheduled instructions are between FirstMI
+ // and the end of the block. Kill from the first non-phi to FirstMI.
+ for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*I);
+ (I++)->eraseFromParent();
+ }
+
+ // Now remap every instruction in the loop.
+ for (MachineInstr &MI : *BB) {
+ if (MI.isPHI() || MI.isTerminator())
+ continue;
+ for (MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
+ continue;
+ Register Reg = remapUse(MO.getReg(), MI);
+ MO.setReg(Reg);
+ }
+ }
+ EliminateDeadPhis(BB, MRI, LIS);
+
+ // Ensure a phi exists for all instructions that are either referenced by
+ // an illegal phi or by an instruction outside the loop. This allows us to
+ // treat remaps of these values the same as "normal" values that come from
+ // loop-carried phis.
+ for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
+ if (MI->isPHI()) {
+ Register R = MI->getOperand(0).getReg();
+ phi(R);
+ continue;
+ }
+
+ for (MachineOperand &Def : MI->defs()) {
+ for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
+ if (MI.getParent() != BB) {
+ phi(Def.getReg());
+ break;
+ }
+ }
+ }
+ }
+}
+
+Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
+ MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
+ if (!Producer)
+ return Reg;
+
+ int ConsumerStage = S.getStage(&MI);
+ if (!Producer->isPHI()) {
+ // Non-phi producers are simple to remap. Insert as many phis as the
+ // difference between the consumer and producer stages.
+ if (Producer->getParent() != BB)
+ // Producer was not inside the loop. Use the register as-is.
+ return Reg;
+ int ProducerStage = S.getStage(Producer);
+ assert(ConsumerStage != -1 &&
+ "In-loop consumer should always be scheduled!");
+ assert(ConsumerStage >= ProducerStage);
+ unsigned StageDiff = ConsumerStage - ProducerStage;
+
+ for (unsigned I = 0; I < StageDiff; ++I)
+ Reg = phi(Reg);
+ return Reg;
+ }
+
+ // First, dive through the phi chain to find the defaults for the generated
+ // phis.
+ SmallVector<Optional<Register>, 4> Defaults;
+ Register LoopReg = Reg;
+ auto LoopProducer = Producer;
+ while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
+ LoopReg = getLoopPhiReg(*LoopProducer, BB);
+ Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
+ LoopProducer = MRI.getUniqueVRegDef(LoopReg);
+ assert(LoopProducer);
+ }
+ int LoopProducerStage = S.getStage(LoopProducer);
+
+ Optional<Register> IllegalPhiDefault;
+
+ if (LoopProducerStage == -1) {
+ // Do nothing.
+ } else if (LoopProducerStage > ConsumerStage) {
+ // This schedule is only representable if ProducerStage == ConsumerStage+1.
+ // In addition, Consumer's cycle must be scheduled after Producer in the
+ // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
+ // functions.
+#ifndef NDEBUG // Silence unused variables in non-asserts mode.
+ int LoopProducerCycle = S.getCycle(LoopProducer);
+ int ConsumerCycle = S.getCycle(&MI);
+#endif
+ assert(LoopProducerCycle <= ConsumerCycle);
+ assert(LoopProducerStage == ConsumerStage + 1);
+ // Peel off the first phi from Defaults and insert a phi between producer
+ // and consumer. This phi will not be at the front of the block so we
+ // consider it illegal. It will only exist during the rewrite process; it
+ // needs to exist while we peel off prologs because these could take the
+ // default value. After that we can replace all uses with the loop producer
+ // value.
+ IllegalPhiDefault = Defaults.front();
+ Defaults.erase(Defaults.begin());
+ } else {
+ assert(ConsumerStage >= LoopProducerStage);
+ int StageDiff = ConsumerStage - LoopProducerStage;
+ if (StageDiff > 0) {
+ LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
+ << " to " << (Defaults.size() + StageDiff) << "\n");
+ // If we need more phis than we have defaults for, pad out with undefs for
+ // the earliest phis, which are at the end of the defaults chain (the
+ // chain is in reverse order).
+ Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
+ ? Optional<Register>()
+ : Defaults.back());
+ }
+ }
+
+ // Now we know the number of stages to jump back, insert the phi chain.
+ auto DefaultI = Defaults.rbegin();
+ while (DefaultI != Defaults.rend())
+ LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
+
+ if (IllegalPhiDefault.hasValue()) {
+ // The consumer optionally consumes LoopProducer in the same iteration
+ // (because the producer is scheduled at an earlier cycle than the consumer)
+ // or the initial value. To facilitate this we create an illegal block here
+ // by embedding a phi in the middle of the block. We will fix this up
+ // immediately prior to pruning.
+ auto RC = MRI.getRegClass(Reg);
+ Register R = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
+ .addReg(IllegalPhiDefault.getValue())
+ .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
+ .addReg(LoopReg)
+ .addMBB(BB); // Block choice is arbitrary and has no effect.
+ return R;
+ }
+
+ return LoopReg;
+}
+
+Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
+ const TargetRegisterClass *RC) {
+ // If the init register is not undef, try and find an existing phi.
+ if (InitReg.hasValue()) {
+ auto I = Phis.find({LoopReg, InitReg.getValue()});
+ if (I != Phis.end())
+ return I->second;
+ } else {
+ for (auto &KV : Phis) {
+ if (KV.first.first == LoopReg)
+ return KV.second;
+ }
+ }
+
+ // InitReg is either undef or no existing phi takes InitReg as input. Try and
+ // find a phi that takes undef as input.
+ auto I = UndefPhis.find(LoopReg);
+ if (I != UndefPhis.end()) {
+ Register R = I->second;
+ if (!InitReg.hasValue())
+ // Found a phi taking undef as input, and this input is undef so return
+ // without any more changes.
+ return R;
+ // Found a phi taking undef as input, so rewrite it to take InitReg.
+ MachineInstr *MI = MRI.getVRegDef(R);
+ MI->getOperand(1).setReg(InitReg.getValue());
+ Phis.insert({{LoopReg, InitReg.getValue()}, R});
+ MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
+ UndefPhis.erase(I);
+ return R;
+ }
+
+ // Failed to find any existing phi to reuse, so create a new one.
+ if (!RC)
+ RC = MRI.getRegClass(LoopReg);
+ Register R = MRI.createVirtualRegister(RC);
+ if (InitReg.hasValue())
+ MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
+ BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
+ .addReg(InitReg.hasValue() ? *InitReg : undef(RC))
+ .addMBB(PreheaderBB)
+ .addReg(LoopReg)
+ .addMBB(BB);
+ if (!InitReg.hasValue())
+ UndefPhis[LoopReg] = R;
+ else
+ Phis[{LoopReg, *InitReg}] = R;
+ return R;
+}
+
+Register KernelRewriter::undef(const TargetRegisterClass *RC) {
+ Register &R = Undefs[RC];
+ if (R == 0) {
+ // Create an IMPLICIT_DEF that defines this register if we need it.
+ // All uses of this should be removed by the time we have finished unrolling
+ // prologs and epilogs.
+ R = MRI.createVirtualRegister(RC);
+ auto *InsertBB = &PreheaderBB->getParent()->front();
+ BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), R);
+ }
+ return R;
+}
+
+namespace {
+/// Describes an operand in the kernel of a pipelined loop. Characteristics of
+/// the operand are discovered, such as how many in-loop PHIs it has to jump
+/// through and defaults for these phis.
+class KernelOperandInfo {
+ MachineBasicBlock *BB;
+ MachineRegisterInfo &MRI;
+ SmallVector<Register, 4> PhiDefaults;
+ MachineOperand *Source;
+ MachineOperand *Target;
+
+public:
+ KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
+ const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
+ : MRI(MRI) {
+ Source = MO;
+ BB = MO->getParent()->getParent();
+ while (isRegInLoop(MO)) {
+ MachineInstr *MI = MRI.getVRegDef(MO->getReg());
+ if (MI->isFullCopy()) {
+ MO = &MI->getOperand(1);
+ continue;
+ }
+ if (!MI->isPHI())
+ break;
+ // If this is an illegal phi, don't count it in distance.
+ if (IllegalPhis.count(MI)) {
+ MO = &MI->getOperand(3);
+ continue;
+ }
+
+ Register Default = getInitPhiReg(*MI, BB);
+ MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
+ : &MI->getOperand(3);
+ PhiDefaults.push_back(Default);
+ }
+ Target = MO;
+ }
+
+ bool operator==(const KernelOperandInfo &Other) const {
+ return PhiDefaults.size() == Other.PhiDefaults.size();
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
+ << *Source->getParent();
+ }
+
+private:
+ bool isRegInLoop(MachineOperand *MO) {
+ return MO->isReg() && MO->getReg().isVirtual() &&
+ MRI.getVRegDef(MO->getReg())->getParent() == BB;
+ }
+};
+} // namespace
+
+MachineBasicBlock *
+PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
+ MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
+ if (LPD == LPD_Front)
+ PeeledFront.push_back(NewBB);
+ else
+ PeeledBack.push_front(NewBB);
+ for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
+ ++I, ++NI) {
+ CanonicalMIs[&*I] = &*I;
+ CanonicalMIs[&*NI] = &*I;
+ BlockMIs[{NewBB, &*I}] = &*NI;
+ BlockMIs[{BB, &*I}] = &*I;
+ }
+ return NewBB;
+}
+
+void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
+ BitVector LS(Schedule.getNumStages(), true);
+ BitVector AS(Schedule.getNumStages(), true);
+ LiveStages[BB] = LS;
+ AvailableStages[BB] = AS;
+
+ // Peel out the prologs.
+ LS.reset();
+ for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
+ LS[I] = 1;
+ Prologs.push_back(peelKernel(LPD_Front));
+ LiveStages[Prologs.back()] = LS;
+ AvailableStages[Prologs.back()] = LS;
+ }
+
+ // Create a block that will end up as the new loop exiting block (dominated by
+ // all prologs and epilogs). It will only contain PHIs, in the same order as
+ // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
+ // that the exiting block is a (sub) clone of BB. This in turn gives us the
+ // property that any value deffed in BB but used outside of BB is used by a
+ // PHI in the exiting block.
+ MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
+
+ // Push out the epilogs, again in reverse order.
+ // We can't assume anything about the minumum loop trip count at this point,
+ // so emit a fairly complex epilog:
+ // K[0, 1, 2] // Kernel runs stages 0, 1, 2
+ // E0[2] <- P1 // Epilog runs stage 2 only, so the state after is [0].
+ // E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2.
+ //
+ // This creates a single-successor single-predecessor sequence of blocks for
+ // each epilog, which are kept this way for simplicity at this stage and
+ // cleaned up by the optimizer later.
+ for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
+ Epilogs.push_back(nullptr);
+ for (int J = Schedule.getNumStages() - 1; J >= I; --J) {
+ LS.reset();
+ LS[J] = 1;
+ Epilogs.back() = peelKernel(LPD_Back);
+ LiveStages[Epilogs.back()] = LS;
+ AvailableStages[Epilogs.back()] = AS;
+ }
+ }
+
+ // Now we've defined all the prolog and epilog blocks as a fallthrough
+ // sequence, add the edges that will be followed if the loop trip count is
+ // lower than the number of stages (connecting prologs directly with epilogs).
+ auto PI = Prologs.begin();
+ auto EI = Epilogs.begin();
+ assert(Prologs.size() == Epilogs.size());
+ for (; PI != Prologs.end(); ++PI, ++EI) {
+ MachineBasicBlock *Pred = *(*EI)->pred_begin();
+ (*PI)->addSuccessor(*EI);
+ for (MachineInstr &MI : (*EI)->phis()) {
+ Register Reg = MI.getOperand(1).getReg();
+ MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
+ if (Use && Use->getParent() == Pred)
+ Reg = getEquivalentRegisterIn(Reg, *PI);
+ MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
+ MI.addOperand(MachineOperand::CreateMBB(*PI));
+ }
+ }
+
+ // Create a list of all blocks in order.
+ SmallVector<MachineBasicBlock *, 8> Blocks;
+ llvm::copy(PeeledFront, std::back_inserter(Blocks));
+ Blocks.push_back(BB);
+ llvm::copy(PeeledBack, std::back_inserter(Blocks));
+
+ // Iterate in reverse order over all instructions, remapping as we go.
+ for (MachineBasicBlock *B : reverse(Blocks)) {
+ for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
+ I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
+ MachineInstr *MI = &*I++;
+ rewriteUsesOf(MI);
+ }
+ }
+ // Now all remapping has been done, we're free to optimize the generated code.
+ for (MachineBasicBlock *B : reverse(Blocks))
+ EliminateDeadPhis(B, MRI, LIS);
+ EliminateDeadPhis(ExitingBB, MRI, LIS);
+}
+
+MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
+ MachineFunction &MF = *BB->getParent();
+ MachineBasicBlock *Exit = *BB->succ_begin();
+ if (Exit == BB)
+ Exit = *std::next(BB->succ_begin());
+
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+ MF.insert(std::next(BB->getIterator()), NewBB);
+
+ // Clone all phis in BB into NewBB and rewrite.
+ for (MachineInstr &MI : BB->phis()) {
+ auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
+ Register OldR = MI.getOperand(3).getReg();
+ Register R = MRI.createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 4> Uses;
+ for (MachineInstr &Use : MRI.use_instructions(OldR))
+ if (Use.getParent() != BB)
+ Uses.push_back(&Use);
+ for (MachineInstr *Use : Uses)
+ Use->substituteRegister(OldR, R, /*SubIdx=*/0,
+ *MRI.getTargetRegisterInfo());
+ MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
+ .addReg(OldR)
+ .addMBB(BB);
+ BlockMIs[{NewBB, &MI}] = NI;
+ CanonicalMIs[NI] = &MI;
+ }
+ BB->replaceSuccessor(Exit, NewBB);
+ Exit->replacePhiUsesWith(BB, NewBB);
+ NewBB->addSuccessor(Exit);
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
+ (void)CanAnalyzeBr;
+ assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
+ TII->removeBranch(*BB);
+ TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
+ Cond, DebugLoc());
+ TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
+ return NewBB;
+}
+
+Register
+PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
+ MachineBasicBlock *BB) {
+ MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
+ unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
+ return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
+}
+
+void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
+ if (MI->isPHI()) {
+ // This is an illegal PHI. The loop-carried (desired) value is operand 3,
+ // and it is produced by this block.
+ Register PhiR = MI->getOperand(0).getReg();
+ Register R = MI->getOperand(3).getReg();
+ int RMIStage = getStage(MRI.getUniqueVRegDef(R));
+ if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
+ R = MI->getOperand(1).getReg();
+ MRI.setRegClass(R, MRI.getRegClass(PhiR));
+ MRI.replaceRegWith(PhiR, R);
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ return;
+ }
+
+ int Stage = getStage(MI);
+ if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
+ LiveStages[MI->getParent()].test(Stage))
+ // Instruction is live, no rewriting to do.
+ return;
+
+ for (MachineOperand &DefMO : MI->defs()) {
+ SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
+ for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
+ // Only PHIs can use values from this block by construction.
+ // Match with the equivalent PHI in B.
+ assert(UseMI.isPHI());
+ Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
+ MI->getParent());
+ Subs.emplace_back(&UseMI, Reg);
+ }
+ for (auto &Sub : Subs)
+ Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
+ *MRI.getTargetRegisterInfo());
+ }
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+}
+
+void PeelingModuloScheduleExpander::fixupBranches() {
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info =
+ TII->analyzeLoopForPipelining(BB);
+ assert(Info);
+
+ // Work outwards from the kernel.
+ bool KernelDisposed = false;
+ int TC = Schedule.getNumStages() - 1;
+ for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
+ ++PI, ++EI, --TC) {
+ MachineBasicBlock *Prolog = *PI;
+ MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
+ MachineBasicBlock *Epilog = *EI;
+ SmallVector<MachineOperand, 4> Cond;
+ TII->removeBranch(*Prolog);
+ Optional<bool> StaticallyGreater =
+ Info->createTripCountGreaterCondition(TC, *Prolog, Cond);
+ if (!StaticallyGreater.hasValue()) {
+ LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
+ // Dynamically branch based on Cond.
+ TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
+ } else if (*StaticallyGreater == false) {
+ LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
+ // Prolog never falls through; branch to epilog and orphan interior
+ // blocks. Leave it to unreachable-block-elim to clean up.
+ Prolog->removeSuccessor(Fallthrough);
+ for (MachineInstr &P : Fallthrough->phis()) {
+ P.RemoveOperand(2);
+ P.RemoveOperand(1);
+ }
+ TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
+ KernelDisposed = true;
+ } else {
+ LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
+ // Prolog always falls through; remove incoming values in epilog.
+ Prolog->removeSuccessor(Epilog);
+ for (MachineInstr &P : Epilog->phis()) {
+ P.RemoveOperand(4);
+ P.RemoveOperand(3);
+ }
+ }
+ }
+
+ if (!KernelDisposed) {
+ Info->adjustTripCount(-(Schedule.getNumStages() - 1));
+ Info->setPreheader(Prologs.back());
+ } else {
+ Info->disposed();
+ }
+}
+
+void PeelingModuloScheduleExpander::rewriteKernel() {
+ KernelRewriter KR(*Schedule.getLoop(), Schedule);
+ KR.rewrite();
+}
+
+void PeelingModuloScheduleExpander::expand() {
+ BB = Schedule.getLoop()->getTopBlock();
+ Preheader = Schedule.getLoop()->getLoopPreheader();
+ LLVM_DEBUG(Schedule.dump());
+
+ rewriteKernel();
+ peelPrologAndEpilogs();
+ fixupBranches();
+}
+
+void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
+ BB = Schedule.getLoop()->getTopBlock();
+ Preheader = Schedule.getLoop()->getLoopPreheader();
+
+ // Dump the schedule before we invalidate and remap all its instructions.
+ // Stash it in a string so we can print it if we found an error.
+ std::string ScheduleDump;
+ raw_string_ostream OS(ScheduleDump);
+ Schedule.print(OS);
+ OS.flush();
+
+ // First, run the normal ModuleScheduleExpander. We don't support any
+ // InstrChanges.
+ assert(LIS && "Requires LiveIntervals!");
+ ModuloScheduleExpander MSE(MF, Schedule, *LIS,
+ ModuloScheduleExpander::InstrChangesTy());
+ MSE.expand();
+ MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
+ if (!ExpandedKernel) {
+ // The expander optimized away the kernel. We can't do any useful checking.
+ MSE.cleanup();
+ return;
+ }
+ // Before running the KernelRewriter, re-add BB into the CFG.
+ Preheader->addSuccessor(BB);
+
+ // Now run the new expansion algorithm.
+ KernelRewriter KR(*Schedule.getLoop(), Schedule);
+ KR.rewrite();
+ peelPrologAndEpilogs();
+
+ // Collect all illegal phis that the new algorithm created. We'll give these
+ // to KernelOperandInfo.
+ SmallPtrSet<MachineInstr *, 4> IllegalPhis;
+ for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
+ if (NI->isPHI())
+ IllegalPhis.insert(&*NI);
+ }
+
+ // Co-iterate across both kernels. We expect them to be identical apart from
+ // phis and full COPYs (we look through both).
+ SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs;
+ auto OI = ExpandedKernel->begin();
+ auto NI = BB->begin();
+ for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
+ while (OI->isPHI() || OI->isFullCopy())
+ ++OI;
+ while (NI->isPHI() || NI->isFullCopy())
+ ++NI;
+ assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
+ // Analyze every operand separately.
+ for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
+ OOpI != OI->operands_end(); ++OOpI, ++NOpI)
+ KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
+ KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
+ }
+
+ bool Failed = false;
+ for (auto &OldAndNew : KOIs) {
+ if (OldAndNew.first == OldAndNew.second)
+ continue;
+ Failed = true;
+ errs() << "Modulo kernel validation error: [\n";
+ errs() << " [golden] ";
+ OldAndNew.first.print(errs());
+ errs() << " ";
+ OldAndNew.second.print(errs());
+ errs() << "]\n";
+ }
+
+ if (Failed) {
+ errs() << "Golden reference kernel:\n";
+ ExpandedKernel->print(errs());
+ errs() << "New kernel:\n";
+ BB->print(errs());
+ errs() << ScheduleDump;
+ report_fatal_error(
+ "Modulo kernel validation (-pipeliner-experimental-cg) failed");
+ }
+
+ // Cleanup by removing BB from the CFG again as the original
+ // ModuloScheduleExpander intended.
+ Preheader->removeSuccessor(BB);
+ MSE.cleanup();
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloScheduleTestPass implementation
+//===----------------------------------------------------------------------===//
+// This pass constructs a ModuloSchedule from its module and runs
+// ModuloScheduleExpander.
+//
+// The module is expected to contain a single-block analyzable loop.
+// The total order of instructions is taken from the loop as-is.
+// Instructions are expected to be annotated with a PostInstrSymbol.
+// This PostInstrSymbol must have the following format:
+// "Stage=%d Cycle=%d".
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ModuloScheduleTest : public MachineFunctionPass {
+public:
+ static char ID;
+
+ ModuloScheduleTest() : MachineFunctionPass(ID) {
+ initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void runOnLoop(MachineFunction &MF, MachineLoop &L);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // namespace
+
+char ModuloScheduleTest::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
+ "Modulo Schedule test pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
+ "Modulo Schedule test pass", false, false)
+
+bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ for (auto *L : MLI) {
+ if (L->getTopBlock() != L->getBottomBlock())
+ continue;
+ runOnLoop(MF, *L);
+ return false;
+ }
+ return false;
+}
+
+static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
+ std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
+ std::pair<StringRef, StringRef> StageTokenAndValue =
+ getToken(StageAndCycle.first, "-");
+ std::pair<StringRef, StringRef> CycleTokenAndValue =
+ getToken(StageAndCycle.second, "-");
+ if (StageTokenAndValue.first != "Stage" ||
+ CycleTokenAndValue.first != "_Cycle") {
+ llvm_unreachable(
+ "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
+ return;
+ }
+
+ StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
+ CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
+
+ dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
+}
+
+void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineBasicBlock *BB = L.getTopBlock();
+ dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
+
+ DenseMap<MachineInstr *, int> Cycle, Stage;
+ std::vector<MachineInstr *> Instrs;
+ for (MachineInstr &MI : *BB) {
+ if (MI.isTerminator())
+ continue;
+ Instrs.push_back(&MI);
+ if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
+ dbgs() << "Parsing post-instr symbol for " << MI;
+ parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
+ }
+ }
+
+ ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
+ std::move(Stage));
+ ModuloScheduleExpander MSE(
+ MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
+ MSE.expand();
+ MSE.cleanup();
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloScheduleTestAnnotater implementation
+//===----------------------------------------------------------------------===//
+
+void ModuloScheduleTestAnnotater::annotate() {
+ for (MachineInstr *MI : S.getInstructions()) {
+ SmallVector<char, 16> SV;
+ raw_svector_ostream OS(SV);
+ OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
+ MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
+ MI->setPostInstrSymbol(MF, Sym);
+ }
+}
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index c70b62252139..1a493964e678 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -97,7 +97,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
unsigned &SingleValReg,
InstrSet &PHIsInCycle) {
assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
- unsigned DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
// See if we already saw this register.
if (!PHIsInCycle.insert(MI).second)
@@ -109,16 +109,15 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
// Scan the PHI operands.
for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
- unsigned SrcReg = MI->getOperand(i).getReg();
+ Register SrcReg = MI->getOperand(i).getReg();
if (SrcReg == DstReg)
continue;
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
// Skip over register-to-register moves.
- if (SrcMI && SrcMI->isCopy() &&
- !SrcMI->getOperand(0).getSubReg() &&
+ if (SrcMI && SrcMI->isCopy() && !SrcMI->getOperand(0).getSubReg() &&
!SrcMI->getOperand(1).getSubReg() &&
- TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) {
+ Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) {
SrcReg = SrcMI->getOperand(1).getReg();
SrcMI = MRI->getVRegDef(SrcReg);
}
@@ -142,8 +141,8 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
/// other PHIs in a cycle.
bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
- unsigned DstReg = MI->getOperand(0).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ Register DstReg = MI->getOperand(0).getReg();
+ assert(Register::isVirtualRegister(DstReg) &&
"PHI destination is not a virtual register");
// See if we already saw this register.
@@ -177,7 +176,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
InstrSet PHIsInCycle;
if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
SingleValReg != 0) {
- unsigned OldReg = MI->getOperand(0).getReg();
+ Register OldReg = MI->getOperand(0).getReg();
if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
continue;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 948a5835438c..4dd4c4b1084e 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -31,7 +31,9 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Pass.h"
@@ -168,7 +170,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
// Remove dead IMPLICIT_DEF instructions.
for (MachineInstr *DefMI : ImpDefs) {
- unsigned DefReg = DefMI->getOperand(0).getReg();
+ Register DefReg = DefMI->getOperand(0).getReg();
if (MRI->use_nodbg_empty(DefReg)) {
if (LIS)
LIS->RemoveMachineInstrFromMaps(*DefMI);
@@ -183,6 +185,11 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
MF.DeleteMachineInstr(I.first);
}
+ // TODO: we should use the incremental DomTree updater here.
+ if (Changed)
+ if (auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->getBase().recalculate(MF);
+
LoweredPHIs.clear();
ImpDefs.clear();
VRegPHIUseCount.clear();
@@ -240,7 +247,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
MachineInstr *MPhi = MBB.remove(&*MBB.begin());
unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
- unsigned DestReg = MPhi->getOperand(0).getReg();
+ Register DestReg = MPhi->getOperand(0).getReg();
assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
bool isDead = MPhi->getOperand(0).isDead();
@@ -252,11 +259,12 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Insert a register to register copy at the top of the current block (but
// after any remaining phi nodes) which copies the new incoming register
// into the phi node destination.
+ MachineInstr *PHICopy = nullptr;
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
if (allPhiOperandsUndefined(*MPhi, *MRI))
// If all sources of a PHI node are implicit_def or undef uses, just emit an
// implicit_def instead of a copy.
- BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
else {
// Can we reuse an earlier PHI node? This only happens for critical edges,
@@ -273,15 +281,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
}
- BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::COPY), DestReg)
- .addReg(IncomingReg);
+ // Give the target possiblity to handle special cases fallthrough otherwise
+ PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ IncomingReg, DestReg);
}
// Update live variable information if there is any.
if (LV) {
- MachineInstr &PHICopy = *std::prev(AfterPHIsIt);
-
if (IncomingReg) {
LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
@@ -302,7 +308,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// killed. Note that because the value is defined in several places (once
// each for each incoming block), the "def" block and instruction fields
// for the VarInfo is not filled in.
- LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+ LV->addVirtualRegisterKilled(IncomingReg, *PHICopy);
}
// Since we are going to be deleting the PHI node, if it is the last use of
@@ -312,15 +318,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// If the result is dead, update LV.
if (isDead) {
- LV->addVirtualRegisterDead(DestReg, PHICopy);
+ LV->addVirtualRegisterDead(DestReg, *PHICopy);
LV->removeVirtualRegisterDead(DestReg, *MPhi);
}
}
// Update LiveIntervals for the new copy or implicit def.
if (LIS) {
- SlotIndex DestCopyIndex =
- LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt));
+ SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(*PHICopy);
SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
if (IncomingReg) {
@@ -368,11 +373,11 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// IncomingReg register in the corresponding predecessor basic block.
SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
for (int i = NumSrcs - 1; i >= 0; --i) {
- unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg();
unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
isImplicitlyDefined(SrcReg, *MRI);
- assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ assert(Register::isVirtualRegister(SrcReg) &&
"Machine PHI Operands must all be virtual registers!");
// Get the MachineBasicBlock equivalent of the BasicBlock that is the source
@@ -406,9 +411,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (DefMI->isImplicitDef())
ImpDefs.insert(DefMI);
} else {
- NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::COPY), IncomingReg)
- .addReg(SrcReg, 0, SrcSubReg);
+ NewSrcInstr =
+ TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(),
+ SrcReg, SrcSubReg, IncomingReg);
}
}
@@ -457,7 +462,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
}
} else {
// We just inserted this copy.
- KillInst = std::prev(InsertPos);
+ KillInst = NewSrcInstr;
}
}
assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
@@ -567,7 +572,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
- unsigned Reg = BBI->getOperand(i).getReg();
+ Register Reg = BBI->getOperand(i).getReg();
MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
// Is there a critical edge from PreMBB to MBB?
if (PreMBB->succ_size() == 1)
diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp
index a3fa1b0ad8ed..529fde84e39a 100644
--- a/lib/CodeGen/PatchableFunction.cpp
+++ b/lib/CodeGen/PatchableFunction.cpp
@@ -78,7 +78,7 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
MIB.add(MO);
FirstActualI->eraseFromParent();
- MF.ensureAlignment(4);
+ MF.ensureAlignment(Align(16));
return true;
}
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index b918396aa8c5..54f1d38ed106 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -418,7 +418,7 @@ namespace {
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII = nullptr)
: DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
- if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (!Register::isPhysicalRegister(Reg)) {
Def = MRI.getVRegDef(Reg);
DefIdx = MRI.def_begin(Reg).getOperandNo();
}
@@ -460,8 +460,8 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
return false;
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ if (Register::isPhysicalRegister(DstReg) ||
+ Register::isPhysicalRegister(SrcReg))
return false;
if (MRI->hasOneNonDBGUse(SrcReg))
@@ -581,7 +581,7 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
MRI->constrainRegClass(DstReg, DstRC);
}
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVR)
.addReg(DstReg, 0, SubIdx);
@@ -609,8 +609,8 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
unsigned SrcReg, SrcReg2;
int CmpMask, CmpValue;
if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
- (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
+ Register::isPhysicalRegister(SrcReg) ||
+ (SrcReg2 != 0 && Register::isPhysicalRegister(SrcReg2)))
return false;
// Attempt to optimize the comparison instruction.
@@ -663,7 +663,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
// Thus, instead of maintaining untested code, we will revisit that if
// that changes at some point.
unsigned Reg = RegSubReg.Reg;
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return false;
const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
@@ -675,7 +675,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
do {
CurSrcPair = SrcToLook.pop_back_val();
// As explained above, do not handle physical registers
- if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
+ if (Register::isPhysicalRegister(CurSrcPair.Reg))
return false;
ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
@@ -723,7 +723,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
// constraints to the register allocator. Moreover, if we want to extend
// the live-range of a physical register, unlike SSA virtual register,
// we will have to check that they aren't redefine before the related use.
- if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
+ if (Register::isPhysicalRegister(CurSrcPair.Reg))
return false;
// Keep following the chain if the value isn't any better yet.
@@ -761,7 +761,7 @@ insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
// NewRC is only correct if no subregisters are involved. findNextSource()
// should have rejected those cases already.
assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
- unsigned NewVR = MRI.createVirtualRegister(NewRC);
+ Register NewVR = MRI.createVirtualRegister(NewRC);
MachineBasicBlock *MBB = OrigPHI.getParent();
MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
TII.get(TargetOpcode::PHI), NewVR);
@@ -1170,7 +1170,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
"Coalescer can understand multiple defs?!");
const MachineOperand &MODef = MI.getOperand(0);
// Do not rewrite physical definitions.
- if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
+ if (Register::isPhysicalRegister(MODef.getReg()))
return false;
bool Changed = false;
@@ -1221,7 +1221,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
MachineInstr &
PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
RegSubRegPair Def, RewriteMapTy &RewriteMap) {
- assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
+ assert(!Register::isPhysicalRegister(Def.Reg) &&
"We do not rewrite physical registers");
// Find the new source to use in the COPY rewrite.
@@ -1229,7 +1229,7 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
// Insert the COPY.
const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
- unsigned NewVReg = MRI->createVirtualRegister(DefRC);
+ Register NewVReg = MRI->createVirtualRegister(DefRC);
MachineInstr *NewCopy =
BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
@@ -1280,7 +1280,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
while (CpyRewriter.getNextRewritableSource(Src, Def)) {
// If a physical register is here, this is probably for a good reason.
// Do not rewrite that.
- if (TargetRegisterInfo::isPhysicalRegister(Def.Reg))
+ if (Register::isPhysicalRegister(Def.Reg))
return false;
// If we do not know how to rewrite this definition, there is no point
@@ -1315,12 +1315,11 @@ bool PeepholeOptimizer::isLoadFoldable(
if (MCID.getNumDefs() != 1)
return false;
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
// To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
// loads. It should be checked when processing uses of the load, since
// uses can be removed during peephole.
- if (!MI.getOperand(0).getSubReg() &&
- TargetRegisterInfo::isVirtualRegister(Reg) &&
+ if (!MI.getOperand(0).getSubReg() && Register::isVirtualRegister(Reg) &&
MRI->hasOneNonDBGUser(Reg)) {
FoldAsLoadDefCandidates.insert(Reg);
return true;
@@ -1336,8 +1335,8 @@ bool PeepholeOptimizer::isMoveImmediate(
return false;
if (MCID.getNumDefs() != 1)
return false;
- unsigned Reg = MI.getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = MI.getOperand(0).getReg();
+ if (Register::isVirtualRegister(Reg)) {
ImmDefMIs.insert(std::make_pair(Reg, &MI));
ImmDefRegs.insert(Reg);
return true;
@@ -1359,8 +1358,8 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr &MI,
// Ignore dead implicit defs.
if (MO.isImplicit() && MO.isDead())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
if (ImmDefRegs.count(Reg) == 0)
continue;
@@ -1393,12 +1392,12 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
DenseMap<unsigned, MachineInstr *> &CopyMIs) {
assert(MI.isCopy() && "expected a COPY machine instruction");
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return false;
- unsigned DstReg = MI.getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!Register::isVirtualRegister(DstReg))
return false;
if (CopySrcRegs.insert(SrcReg).second) {
@@ -1416,7 +1415,7 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
if (SrcSubReg != PrevSrcSubReg)
return false;
- unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
+ Register PrevDstReg = PrevCopy->getOperand(0).getReg();
// Only replace if the copy register class is the same.
//
@@ -1433,8 +1432,7 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
}
bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
- return TargetRegisterInfo::isPhysicalRegister(Reg) &&
- !MRI->isAllocatable(Reg);
+ return Register::isPhysicalRegister(Reg) && !MRI->isAllocatable(Reg);
}
bool PeepholeOptimizer::foldRedundantNAPhysCopy(
@@ -1444,9 +1442,9 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
if (DisableNAPhysCopyOpt)
return false;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) {
// %vreg = COPY %physreg
// Avoid using a datastructure which can track multiple live non-allocatable
// phys->virt copies since LLVM doesn't seem to do this.
@@ -1454,7 +1452,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
return false;
}
- if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
+ if (!(Register::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
return false;
// %physreg = COPY %vreg
@@ -1467,7 +1465,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
return false;
}
- unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg();
+ Register PrevDstReg = PrevCopy->second->getOperand(0).getReg();
if (PrevDstReg == SrcReg) {
// Remove the virt->phys copy: we saw the virtual register definition, and
// the non-allocatable physical register's state hasn't changed since then.
@@ -1489,7 +1487,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
static bool isVirtualRegisterOperand(MachineOperand &MO) {
if (!MO.isReg())
return false;
- return TargetRegisterInfo::isVirtualRegister(MO.getReg());
+ return Register::isVirtualRegister(MO.getReg());
}
bool PeepholeOptimizer::findTargetRecurrence(
@@ -1662,7 +1660,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
for (const MachineOperand &MO : MI->operands()) {
// Visit all operands: definitions can be implicit or explicit.
if (MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (MO.isDef() && isNAPhysCopy(Reg)) {
const auto &Def = NAPhysToVirtMIs.find(Reg);
if (Def != NAPhysToVirtMIs.end()) {
@@ -1778,7 +1776,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
LocalMIs.erase(DefMI);
LocalMIs.insert(FoldMI);
if (MI->isCall())
- MI->getMF()->updateCallSiteInfo(MI, FoldMI);
+ MI->getMF()->moveCallSiteInfo(MI, FoldMI);
MI->eraseFromParent();
DefMI->eraseFromParent();
MRI->markUsesInDebugValueAsUndef(FoldedReg);
@@ -1810,7 +1808,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
assert(Def->isCopy() && "Invalid definition");
// Copy instruction are supposed to be: Def = Src.
// If someone breaks this assumption, bad things will happen everywhere.
- assert(Def->getNumOperands() == 2 && "Invalid number of operands");
+ // There may be implicit uses preventing the copy to be moved across
+ // some target specific register definitions
+ assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
+ "Invalid number of operands");
+ assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of src.
@@ -1855,6 +1857,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
SrcIdx = OpIdx;
}
+ // In some rare case, Def has no input, SrcIdx is out of bound,
+ // getOperand(SrcIdx) will fail below.
+ if (SrcIdx >= Def->getNumOperands())
+ return ValueTrackerResult();
+
// Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
// will break the assumed guarantees for the upper bits.
for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
@@ -2087,7 +2094,7 @@ ValueTrackerResult ValueTracker::getNextSource() {
// If we can still move up in the use-def chain, move to the next
// definition.
- if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
+ if (!Register::isPhysicalRegister(Reg) && OneRegSrc) {
MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
if (DI != MRI.def_end()) {
Def = DI->getParent();
diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 2752e186875c..0d2f6f99ca96 100644
--- a/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -76,7 +76,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
}
for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
- auto *CI = dyn_cast<CallInst>(I->getUser());
+ auto *CI = cast<CallInst>(I->getUser());
assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
++I;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index b38987ad1c90..11bff45f9ad5 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -73,9 +73,9 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
LLVM_DEBUG(dbgs() << "Processing " << *MI);
- unsigned Reg = MI->getOperand(0).getReg();
+ Register Reg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
// For virtual registers, mark all uses as <undef>, and convert users to
// implicit-def when possible.
for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
@@ -100,8 +100,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
for (MachineOperand &MO : UserMI->operands()) {
if (!MO.isReg())
continue;
- unsigned UserReg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(UserReg) ||
+ Register UserReg = MO.getReg();
+ if (!Register::isPhysicalRegister(UserReg) ||
!TRI->regsOverlap(Reg, UserReg))
continue;
// UserMI uses or redefines Reg. Set <undef> flags on all uses.
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index d463bee67595..729f06dda62b 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -898,7 +898,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// frame index registers. Functions which don't want/need this optimization
// will continue to use the existing code path.
if (MFI.getUseLocalStackAllocationBlock()) {
- unsigned Align = MFI.getLocalFrameMaxAlign();
+ unsigned Align = MFI.getLocalFrameMaxAlign().value();
// Adjust to alignment boundary.
Offset = alignTo(Offset, Align, Skew);
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index da3ef4b771f3..74e721dbd138 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -129,7 +129,7 @@ const PseudoSourceValue *
PseudoSourceValueManager::getFixedStack(int FI) {
std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
if (!V)
- V = llvm::make_unique<FixedStackPseudoSourceValue>(FI, TII);
+ V = std::make_unique<FixedStackPseudoSourceValue>(FI, TII);
return V.get();
}
@@ -138,7 +138,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
GlobalCallEntries[GV];
if (!E)
- E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV, TII);
+ E = std::make_unique<GlobalValuePseudoSourceValue>(GV, TII);
return E.get();
}
@@ -147,6 +147,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
ExternalCallEntries[ES];
if (!E)
- E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII);
+ E = std::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII);
return E.get();
}
diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp
index f05c97ad621e..2850033e6419 100644
--- a/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -9,6 +9,7 @@
#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 1cbe75c27d13..156daaa03bb5 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -73,7 +73,7 @@ void RegAllocBase::seedLiveRegs() {
NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName,
TimerGroupDescription, TimePassesIsEnabled);
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
enqueue(&LIS->getInterval(Reg));
@@ -154,7 +154,7 @@ void RegAllocBase::allocatePhysRegs() {
continue;
}
LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
- assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ assert(Register::isVirtualRegister(SplitVirtReg->reg) &&
"expect split value in virtual register");
enqueue(SplitVirtReg);
++NumNewQueued;
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 2ffa5e389f89..44d0233604e7 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -90,7 +90,7 @@ namespace {
explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {}
unsigned getSparseSetIndex() const {
- return TargetRegisterInfo::virtReg2Index(VirtReg);
+ return Register::virtReg2Index(VirtReg);
}
};
@@ -200,11 +200,11 @@ namespace {
void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
- return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
- return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
@@ -264,7 +264,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
/// Returns false if \p VirtReg is known to not live out of the current block.
bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
- if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) {
+ if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
// Cannot be live-out if there are no successors.
return !MBB->succ_empty();
}
@@ -272,7 +272,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
// If this block loops back to itself, it would be necessary to check whether
// the use comes after the def.
if (MBB->isSuccessor(MBB)) {
- MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
}
@@ -282,7 +282,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
unsigned C = 0;
for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) {
if (UseInst.getParent() != MBB || ++C >= Limit) {
- MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
// Cannot be live-out if there are no successors.
return !MBB->succ_empty();
}
@@ -293,7 +293,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
/// Returns false if \p VirtReg is known to not be live into the current block.
bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
- if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg)))
+ if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
return !MBB->pred_empty();
// See if the first \p Limit def of the register are all in the current block.
@@ -301,7 +301,7 @@ bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
unsigned C = 0;
for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) {
if (DefInst.getParent() != MBB || ++C >= Limit) {
- MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return !MBB->pred_empty();
}
}
@@ -394,7 +394,7 @@ void RegAllocFast::killVirtReg(LiveReg &LR) {
/// Mark virtreg as no longer available.
void RegAllocFast::killVirtReg(unsigned VirtReg) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ assert(Register::isVirtualRegister(VirtReg) &&
"killVirtReg needs a virtual register");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
@@ -405,7 +405,7 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) {
/// stack slot if needed.
void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
unsigned VirtReg) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ assert(Register::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
@@ -455,9 +455,8 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
if (MO.isUndef())
return;
- unsigned PhysReg = MO.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
- "Bad usePhysReg operand");
+ Register PhysReg = MO.getReg();
+ assert(Register::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand");
markRegUsedInInstr(PhysReg);
switch (PhysRegState[PhysReg]) {
@@ -626,9 +625,9 @@ unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
static const unsigned ChainLengthLimit = 3;
unsigned C = 0;
do {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return Reg;
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg);
if (!VRegDef || !isCoalescable(*VRegDef))
@@ -646,7 +645,7 @@ unsigned RegAllocFast::traceCopies(unsigned VirtReg) const {
unsigned C = 0;
for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) {
if (isCoalescable(MI)) {
- unsigned Reg = MI.getOperand(1).getReg();
+ Register Reg = MI.getOperand(1).getReg();
Reg = traceCopyChain(Reg);
if (Reg != 0)
return Reg;
@@ -662,7 +661,7 @@ unsigned RegAllocFast::traceCopies(unsigned VirtReg) const {
void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
const unsigned VirtReg = LR.VirtReg;
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ assert(Register::isVirtualRegister(VirtReg) &&
"Can only allocate virtual registers");
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
@@ -671,8 +670,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
<< " with hint " << printReg(Hint0, TRI) << '\n');
// Take hint when possible.
- if (TargetRegisterInfo::isPhysicalRegister(Hint0) &&
- MRI->isAllocatable(Hint0) && RC.contains(Hint0)) {
+ if (Register::isPhysicalRegister(Hint0) && MRI->isAllocatable(Hint0) &&
+ RC.contains(Hint0)) {
// Ignore the hint if we would have to spill a dirty register.
unsigned Cost = calcSpillCost(Hint0);
if (Cost < spillDirty) {
@@ -692,9 +691,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
// Try other hint.
unsigned Hint1 = traceCopies(VirtReg);
- if (TargetRegisterInfo::isPhysicalRegister(Hint1) &&
- MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
- !isRegUsedInInstr(Hint1)) {
+ if (Register::isPhysicalRegister(Hint1) && MRI->isAllocatable(Hint1) &&
+ RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) {
// Ignore the hint if we would have to spill a dirty register.
unsigned Cost = calcSpillCost(Hint1);
if (Cost < spillDirty) {
@@ -752,8 +750,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
assert(MO.isUndef() && "expected undef use");
- unsigned VirtReg = MO.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Expected virtreg");
+ Register VirtReg = MO.getReg();
+ assert(Register::isVirtualRegister(VirtReg) && "Expected virtreg");
LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
MCPhysReg PhysReg;
@@ -778,14 +776,13 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
/// Allocates a register for VirtReg and mark it as dirty.
MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "Not a virtual register");
+ assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
if (!LRI->PhysReg) {
// If there is no hint, peek at the only use of this register.
- if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
+ if ((!Hint || !Register::isPhysicalRegister(Hint)) &&
MRI->hasOneNonDBGUse(VirtReg)) {
const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
// It's a copy, use the destination register as a hint.
@@ -812,8 +809,7 @@ RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
unsigned OpNum,
unsigned VirtReg,
unsigned Hint) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "Not a virtual register");
+ assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
@@ -866,7 +862,7 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
}
// Handle subregister index.
- MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+ MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register());
MO.setIsRenamable(true);
MO.setSubReg(0);
@@ -893,8 +889,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
SmallSet<unsigned, 8> ThroughRegs;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
(MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
@@ -908,8 +904,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
- unsigned Reg = MO.getReg();
- if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Reg || !Register::isPhysicalRegister(Reg))
+ continue;
markRegUsedInInstr(Reg);
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
if (ThroughRegs.count(PhysRegState[*AI]))
@@ -922,8 +919,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
+ continue;
if (MO.isUse()) {
if (!MO.isTied()) continue;
LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
@@ -947,8 +945,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
+ continue;
if (!MO.isEarlyClobber())
continue;
// Note: defineVirtReg may invalidate MO.
@@ -961,8 +960,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
UsedInInstr.clear();
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
- unsigned Reg = MO.getReg();
- if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Reg || !Register::isPhysicalRegister(Reg))
+ continue;
LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
<< " as used in instr\n");
markRegUsedInInstr(Reg);
@@ -1002,10 +1002,8 @@ void RegAllocFast::dumpState() {
e = LiveVirtRegs.end(); i != e; ++i) {
if (!i->PhysReg)
continue;
- assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
- "Bad map key");
- assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
- "Bad map value");
+ assert(Register::isVirtualRegister(i->VirtReg) && "Bad map key");
+ assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
}
}
@@ -1045,9 +1043,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
continue;
}
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg) continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
VirtOpEnd = i+1;
if (MO.isUse()) {
hasTiedOps = hasTiedOps ||
@@ -1096,8 +1094,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
for (unsigned I = 0; I != VirtOpEnd; ++I) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
+ continue;
if (MO.isUse()) {
if (MO.isUndef()) {
HasUndefUse = true;
@@ -1124,8 +1123,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
for (MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
assert(MO.isUndef() && "Should only have undef virtreg uses left");
@@ -1139,8 +1138,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (hasEarlyClobbers) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Reg || !Register::isPhysicalRegister(Reg))
+ continue;
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MO.isTied()) continue;
markRegUsedInInstr(Reg);
@@ -1166,10 +1166,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
- if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !MRI->isAllocatable(Reg))
+ if (!Reg || !Register::isPhysicalRegister(Reg) || !MRI->isAllocatable(Reg))
continue;
definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
}
@@ -1180,10 +1179,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// We have already dealt with phys regs in the previous scan.
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
@@ -1215,8 +1214,8 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
// mostly constants and frame indices.
if (!MO.isReg())
return;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
return;
// See if this virtual register has already been allocated to a physical
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 771fc46415db..d27db678f02a 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -685,7 +685,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// The queue holds (size, reg) pairs.
const unsigned Size = LI->getSize();
const unsigned Reg = LI->reg;
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ assert(Register::isVirtualRegister(Reg) &&
"Can only enqueue virtual registers");
unsigned Prio;
@@ -899,7 +899,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
// Check if any interfering live range is heavier than MaxWeight.
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
- assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
+ assert(Register::isVirtualRegister(Intf->reg) &&
"Only expecting virtual register interference from query");
// Do not allow eviction of a virtual register if we are in the middle
@@ -984,7 +984,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
continue;
// Cannot evict non virtual reg interference.
- if (!TargetRegisterInfo::isVirtualRegister(Intf->reg))
+ if (!Register::isVirtualRegister(Intf->reg))
return false;
// Never evict spill products. They cannot split or spill.
if (getStage(*Intf) == RS_Done)
@@ -2881,7 +2881,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
continue;
}
// Get the current assignment.
- Register OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
+ Register OtherPhysReg = Register::isPhysicalRegister(OtherReg)
? OtherReg
: VRM->getPhys(OtherReg);
// Push the collected information.
@@ -2919,7 +2919,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
SmallVector<unsigned, 2> RecoloringCandidates;
HintsInfo Info;
unsigned Reg = VirtReg.reg;
- unsigned PhysReg = VRM->getPhys(Reg);
+ Register PhysReg = VRM->getPhys(Reg);
// Start the recoloring algorithm from the input live-interval, then
// it will propagate to the ones that are copy-related with it.
Visited.insert(Reg);
@@ -2932,7 +2932,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
Reg = RecoloringCandidates.pop_back_val();
// We cannot recolor physical register.
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
assert(VRM->hasPhys(Reg) && "We have unallocated variable!!");
@@ -2940,7 +2940,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
// Get the live interval mapped with this virtual register to be able
// to check for the interference with the new color.
LiveInterval &LI = LIS->getInterval(Reg);
- unsigned CurrPhys = VRM->getPhys(Reg);
+ Register CurrPhys = VRM->getPhys(Reg);
// Check that the new color matches the register class constraints and
// that it is free for this live range.
if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
@@ -3021,7 +3021,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
/// getting rid of 2 copies.
void RAGreedy::tryHintsRecoloring() {
for (LiveInterval *LI : SetOfBrokenHints) {
- assert(TargetRegisterInfo::isVirtualRegister(LI->reg) &&
+ assert(Register::isVirtualRegister(LI->reg) &&
"Recoloring is possible only for virtual registers");
// Some dead defs may be around (e.g., because of debug uses).
// Ignore those.
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 7a5a6c148ed4..3c4a46b12f99 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -558,7 +558,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
// Iterate over all live ranges.
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
VRegsToAlloc.insert(Reg);
@@ -824,11 +824,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
if (!VRegsToAlloc.empty()) {
const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
- llvm::make_unique<PBQPRAConstraintList>();
- ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
- ConstraintsRoot->addConstraint(llvm::make_unique<Interference>());
+ std::make_unique<PBQPRAConstraintList>();
+ ConstraintsRoot->addConstraint(std::make_unique<SpillCosts>());
+ ConstraintsRoot->addConstraint(std::make_unique<Interference>());
if (PBQPCoalescing)
- ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>());
+ ConstraintsRoot->addConstraint(std::make_unique<Coalescing>());
ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
bool PBQPAllocComplete = false;
@@ -848,7 +848,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
".pbqpgraph";
std::error_code EC;
- raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
+ raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_Text);
LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
<< GraphFileName << "\"\n");
G.dump(OS);
diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp
index b37dfada7101..757ff0e44953 100644
--- a/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -142,6 +142,13 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
RegMask[Reg / 32] &= ~(1u << Reg % 32);
};
+
+ // Some targets can clobber registers "inside" a call, typically in
+ // linker-generated code.
+ for (const MCPhysReg Reg : TRI->getIntraCallClobberedRegs(&MF))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ SetRegAsDefined(*AI);
+
// Scan all the physical registers. When a register is defined in the current
// function set it and all the aliasing registers as defined in the regmask.
// FIXME: Rewrite to use regunits.
@@ -164,7 +171,8 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
SetRegAsDefined(PReg);
}
- if (TargetFrameLowering::isSafeForNoCSROpt(F)) {
+ if (TargetFrameLowering::isSafeForNoCSROpt(F) &&
+ MF.getSubtarget().getFrameLowering()->isProfitableForNoCSROpt(F)) {
++NumCSROpt;
LLVM_DEBUG(dbgs() << MF.getName()
<< " function optimized for not having CSR.\n");
diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp
index fc4be82d215e..0205e6193741 100644
--- a/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -130,7 +130,11 @@ bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) {
};
if (const Function *F = findCalledFunction(M, MI)) {
- UpdateRegMask(*F);
+ if (F->isDefinitionExact()) {
+ UpdateRegMask(*F);
+ } else {
+ LLVM_DEBUG(dbgs() << "Function definition is not exact\n");
+ }
} else {
LLVM_DEBUG(dbgs() << "Failed to find call target function\n");
}
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 2db6ab454cea..6ff5ddbc023d 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -406,8 +406,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
Partial = SrcSub || DstSub;
// If one register is a physreg, it must be Dst.
- if (TargetRegisterInfo::isPhysicalRegister(Src)) {
- if (TargetRegisterInfo::isPhysicalRegister(Dst))
+ if (Register::isPhysicalRegister(Src)) {
+ if (Register::isPhysicalRegister(Dst))
return false;
std::swap(Src, Dst);
std::swap(SrcSub, DstSub);
@@ -416,7 +416,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
- if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
+ if (Register::isPhysicalRegister(Dst)) {
// Eliminate DstSub on a physreg.
if (DstSub) {
Dst = TRI.getSubReg(Dst, DstSub);
@@ -474,8 +474,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
CrossClass = NewRC != DstRC || NewRC != SrcRC;
}
// Check our invariants
- assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
- assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
+ assert(Register::isVirtualRegister(Src) && "Src must be virtual");
+ assert(!(Register::isPhysicalRegister(Dst) && DstSub) &&
"Cannot have a physical SubIdx");
SrcReg = Src;
DstReg = Dst;
@@ -483,7 +483,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
}
bool CoalescerPair::flip() {
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ if (Register::isPhysicalRegister(DstReg))
return false;
std::swap(SrcReg, DstReg);
std::swap(SrcIdx, DstIdx);
@@ -507,8 +507,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
}
// Now check that Dst matches DstReg.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
- if (!TargetRegisterInfo::isPhysicalRegister(Dst))
+ if (Register::isPhysicalRegister(DstReg)) {
+ if (!Register::isPhysicalRegister(Dst))
return false;
assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
// DstSub could be set for a physreg from INSERT_SUBREG.
@@ -802,7 +802,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return { false, false };
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
- unsigned NewReg = NewDstMO.getReg();
+ Register NewReg = NewDstMO.getReg();
if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
return { false, false };
@@ -835,8 +835,8 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
if (!NewMI)
return { false, false };
- if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
- TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+ if (Register::isVirtualRegister(IntA.reg) &&
+ Register::isVirtualRegister(IntB.reg) &&
!MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
return { false, false };
if (NewMI != DefMI) {
@@ -877,7 +877,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
continue;
// Kill flags are no longer accurate. They are recomputed after RA.
UseMO.setIsKill(false);
- if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ if (Register::isPhysicalRegister(NewReg))
UseMO.substPhysReg(NewReg, *TRI);
else
UseMO.setReg(NewReg);
@@ -1188,7 +1188,7 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
/// defining a subregister.
static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
- assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ assert(!Register::isPhysicalRegister(Reg) &&
"This code cannot handle physreg aliasing");
for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
@@ -1209,7 +1209,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ if (Register::isPhysicalRegister(SrcReg))
return false;
LiveInterval &SrcInt = LIS->getInterval(SrcReg);
@@ -1240,7 +1240,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
return false;
// Only support subregister destinations when the def is read-undef.
MachineOperand &DstOperand = CopyMI->getOperand(0);
- unsigned CopyDstReg = DstOperand.getReg();
+ Register CopyDstReg = DstOperand.getReg();
if (DstOperand.getSubReg() && !DstOperand.isUndef())
return false;
@@ -1254,7 +1254,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
if (!DefMI->isImplicitDef()) {
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ if (Register::isPhysicalRegister(DstReg)) {
unsigned NewDstReg = DstReg;
unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
@@ -1269,7 +1269,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
// Theoretically, some stack frame reference could exist. Just make sure
// it hasn't actually happened.
- assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ assert(Register::isVirtualRegister(DstReg) &&
"Only expect to deal with virtual or physical registers");
}
}
@@ -1317,7 +1317,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (MO.isReg()) {
assert(MO.isImplicit() && "No explicit operands after implicit operands.");
// Discard VReg implicit defs.
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ if (Register::isPhysicalRegister(MO.getReg()))
ImplicitOps.push_back(MO);
}
}
@@ -1336,12 +1336,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MachineOperand &MO = NewMI.getOperand(i);
if (MO.isReg() && MO.isDef()) {
assert(MO.isImplicit() && MO.isDead() &&
- TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
+ Register::isPhysicalRegister(MO.getReg()));
NewMIImplDefs.push_back(MO.getReg());
}
}
- if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ if (Register::isVirtualRegister(DstReg)) {
unsigned NewIdx = NewMI.getOperand(0).getSubReg();
if (DefRC != nullptr) {
@@ -1428,7 +1428,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
// The New instruction may be defining a sub-register of what's actually
// been asked for. If so it must implicitly define the whole thing.
- assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ assert(Register::isPhysicalRegister(DstReg) &&
"Only expect virtual or physical registers in remat");
NewMI.getOperand(0).setIsDead(true);
NewMI.addOperand(MachineOperand::CreateReg(
@@ -1480,7 +1480,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
MachineInstr *UseMI = UseMO.getParent();
if (UseMI->isDebugValue()) {
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ if (Register::isPhysicalRegister(DstReg))
UseMO.substPhysReg(DstReg, *TRI);
else
UseMO.setReg(DstReg);
@@ -1651,7 +1651,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
unsigned DstReg,
unsigned SubIdx) {
- bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ bool DstIsPhys = Register::isPhysicalRegister(DstReg);
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
@@ -2411,8 +2411,8 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
assert(MI && "No defining instruction");
if (!MI->isFullCopy())
return std::make_pair(VNI, TrackReg);
- unsigned SrcReg = MI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI->getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return std::make_pair(VNI, TrackReg);
const LiveInterval &LI = LIS->getInterval(SrcReg);
@@ -3189,9 +3189,9 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
assert(MI && "No instruction to erase");
if (MI->isCopy()) {
- unsigned Reg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
- Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+ Register Reg = MI->getOperand(1).getReg();
+ if (Register::isVirtualRegister(Reg) && Reg != CP.getSrcReg() &&
+ Reg != CP.getDstReg())
ShrinkRegs.push_back(Reg);
}
ErasedInstrs.insert(MI);
@@ -3463,10 +3463,10 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
if (Copy->getOperand(1).isUndef())
return false;
- unsigned SrcReg = Copy->getOperand(1).getReg();
- unsigned DstReg = Copy->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
- || TargetRegisterInfo::isPhysicalRegister(DstReg))
+ Register SrcReg = Copy->getOperand(1).getReg();
+ Register DstReg = Copy->getOperand(0).getReg();
+ if (Register::isPhysicalRegister(SrcReg) ||
+ Register::isPhysicalRegister(DstReg))
return false;
return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
@@ -3526,12 +3526,11 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
return false;
// Check if the destination of this copy has any other affinity.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ if (Register::isPhysicalRegister(DstReg) ||
// If SrcReg is a physical register, the copy won't be coalesced.
// Ignoring it may have other side effect (like missing
// rematerialization). So keep it.
- TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
- !isTerminalReg(DstReg, Copy, MRI))
+ Register::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI))
return false;
// DstReg is a terminal node. Check if it interferes with any other
@@ -3554,7 +3553,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
if (OtherReg == SrcReg)
OtherReg = OtherSrcReg;
// Check if OtherReg is a non-terminal.
- if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
+ if (Register::isPhysicalRegister(OtherReg) ||
isTerminalReg(OtherReg, MI, MRI))
continue;
// Check that OtherReg interfere with DstReg.
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 7d9b3aa9b2d7..bf192d1c530d 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -134,6 +134,22 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
}
dbgs() << '\n';
}
+
+LLVM_DUMP_METHOD
+void PressureChange::dump() const {
+ dbgs() << "[" << getPSetOrMax() << ", " << getUnitInc() << "]\n";
+}
+
+void RegPressureDelta::dump() const {
+ dbgs() << "[Excess=";
+ Excess.dump();
+ dbgs() << ", CriticalMax=";
+ CriticalMax.dump();
+ dbgs() << ", CurrentMax=";
+ CurrentMax.dump();
+ dbgs() << "]\n";
+}
+
#endif
void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
@@ -219,7 +235,7 @@ void LiveRegSet::clear() {
}
static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return &LIS.getInterval(Reg);
return LIS.getCachedRegUnit(Reg);
}
@@ -345,7 +361,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
assert(isBottomClosed() && "need bottom-up tracking to intialize.");
for (const RegisterMaskPair &Pair : P.LiveOutRegs) {
unsigned RegUnit = Pair.RegUnit;
- if (TargetRegisterInfo::isVirtualRegister(RegUnit)
+ if (Register::isVirtualRegister(RegUnit)
&& !RPTracker.hasUntiedDef(RegUnit))
increaseSetPressure(LiveThruPressure, *MRI, RegUnit,
LaneBitmask::getNone(), Pair.LaneMask);
@@ -406,7 +422,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit,
SlotIndex Pos, LaneBitmask SafeDefault,
bool(*Property)(const LiveRange &LR, SlotIndex Pos)) {
- if (TargetRegisterInfo::isVirtualRegister(RegUnit)) {
+ if (Register::isVirtualRegister(RegUnit)) {
const LiveInterval &LI = LIS.getInterval(RegUnit);
LaneBitmask Result;
if (TrackLaneMasks && LI.hasSubRanges()) {
@@ -483,7 +499,7 @@ class RegisterOperandsCollector {
void collectOperand(const MachineOperand &MO) const {
if (!MO.isReg() || !MO.getReg())
return;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (MO.isUse()) {
if (!MO.isUndef() && !MO.isInternalRead())
pushReg(Reg, RegOpers.Uses);
@@ -503,7 +519,7 @@ class RegisterOperandsCollector {
void pushReg(unsigned Reg,
SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll()));
} else if (MRI.isAllocatable(Reg)) {
for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
@@ -514,7 +530,7 @@ class RegisterOperandsCollector {
void collectOperandLanes(const MachineOperand &MO) const {
if (!MO.isReg() || !MO.getReg())
return;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
unsigned SubRegIdx = MO.getSubReg();
if (MO.isUse()) {
if (!MO.isUndef() && !MO.isInternalRead())
@@ -535,7 +551,7 @@ class RegisterOperandsCollector {
void pushRegLanes(unsigned Reg, unsigned SubRegIdx,
SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
LaneBitmask LaneMask = SubRegIdx != 0
? TRI.getSubRegIndexLaneMask(SubRegIdx)
: MRI.getMaxLaneMaskForVReg(Reg);
@@ -590,7 +606,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
// If the def is all that is live after the instruction, then in case
// of a subregister def we need a read-undef flag.
unsigned RegUnit = I->RegUnit;
- if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
+ if (Register::isVirtualRegister(RegUnit) &&
AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none())
AddFlagsMI->setRegisterDefReadUndef(RegUnit);
@@ -616,7 +632,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
if (AddFlagsMI != nullptr) {
for (const RegisterMaskPair &P : DeadDefs) {
unsigned RegUnit = P.RegUnit;
- if (!TargetRegisterInfo::isVirtualRegister(RegUnit))
+ if (!Register::isVirtualRegister(RegUnit))
continue;
LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
Pos.getDeadSlot());
@@ -825,7 +841,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
if (TrackUntiedDefs) {
for (const RegisterMaskPair &Def : RegOpers.Defs) {
unsigned RegUnit = Def.RegUnit;
- if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
+ if (Register::isVirtualRegister(RegUnit) &&
(LiveRegs.contains(RegUnit) & Def.LaneMask).none())
UntiedDefs.insert(RegUnit);
}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index bb19110e6d70..ec0868acab38 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -49,7 +49,7 @@ using namespace llvm;
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
-void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
+void RegScavenger::setRegUsed(Register Reg, LaneBitmask LaneMask) {
LiveUnits.addRegMasked(Reg, LaneMask);
}
@@ -96,12 +96,12 @@ void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
}
}
-void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) {
+void RegScavenger::addRegUnits(BitVector &BV, Register Reg) {
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
BV.set(*RUI);
}
-void RegScavenger::removeRegUnits(BitVector &BV, unsigned Reg) {
+void RegScavenger::removeRegUnits(BitVector &BV, Register Reg) {
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
BV.reset(*RUI);
}
@@ -133,8 +133,8 @@ void RegScavenger::determineKillsAndDefs() {
}
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg) || isReserved(Reg))
continue;
if (MO.isUse()) {
@@ -204,8 +204,8 @@ void RegScavenger::forward() {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg) || isReserved(Reg))
continue;
if (MO.isUse()) {
if (MO.isUndef())
@@ -278,14 +278,14 @@ void RegScavenger::backward() {
--MBBI;
}
-bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
+bool RegScavenger::isRegUsed(Register Reg, bool includeReserved) const {
if (isReserved(Reg))
return includeReserved;
return !LiveUnits.available(Reg);
}
-unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
- for (unsigned Reg : *RC) {
+Register RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+ for (Register Reg : *RC) {
if (!isRegUsed(Reg)) {
LLVM_DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI)
<< "\n");
@@ -297,13 +297,13 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
BitVector Mask(TRI->getNumRegs());
- for (unsigned Reg : *RC)
+ for (Register Reg : *RC)
if (!isRegUsed(Reg))
Mask.set(Reg);
return Mask;
}
-unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
+Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
BitVector &Candidates,
unsigned InstrLimit,
MachineBasicBlock::iterator &UseMI) {
@@ -329,7 +329,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
Candidates.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg() || MO.isUndef() || !MO.getReg())
continue;
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (Register::isVirtualRegister(MO.getReg())) {
if (MO.isDef())
isVirtDefInsn = true;
else if (MO.isKill())
@@ -430,7 +430,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI,
// be usefull for this other vreg as well later.
bool FoundVReg = false;
for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
FoundVReg = true;
break;
}
@@ -457,7 +457,7 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
}
RegScavenger::ScavengedInfo &
-RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
+RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
MachineBasicBlock::iterator Before,
MachineBasicBlock::iterator &UseMI) {
// Find an available scavenging slot with size and alignment matching
@@ -531,7 +531,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
return Scavenged[SI];
}
-unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I,
int SPAdj, bool AllowSpill) {
MachineInstr &MI = *I;
@@ -542,7 +542,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// Exclude all the registers being used by the instruction.
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ !Register::isVirtualRegister(MO.getReg()))
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
Candidates.reset(*AI);
}
@@ -556,7 +556,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// Find the register whose use is furthest away.
MachineBasicBlock::iterator UseMI;
- unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+ Register SReg = findSurvivorReg(I, Candidates, 25, UseMI);
// If we found an unused register there is no reason to spill it.
if (!isRegUsed(SReg)) {
@@ -576,7 +576,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
-unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
+Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator To,
bool RestoreAfter, int SPAdj,
bool AllowSpill) {
@@ -620,8 +620,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
/// \p ReserveAfter controls whether the scavenged register needs to be reserved
/// after the current instruction, otherwise it will only be reserved before the
/// current instruction.
-static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
- unsigned VReg, bool ReserveAfter) {
+static Register scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
+ Register VReg, bool ReserveAfter) {
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
#ifndef NDEBUG
// Verify that all definitions and uses are in the same basic block.
@@ -664,7 +664,7 @@ static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
// spill/reload if necessary.
int SPAdj = 0;
const TargetRegisterClass &RC = *MRI.getRegClass(VReg);
- unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(),
+ Register SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(),
ReserveAfter, SPAdj);
MRI.replaceRegWith(VReg, SReg);
++NumScavengedRegs;
@@ -694,17 +694,17 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
for (const MachineOperand &MO : NMI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// We only care about virtual registers and ignore virtual registers
// created by the target callbacks in the process (those will be handled
// in a scavenging round).
- if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
- TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ if (!Register::isVirtualRegister(Reg) ||
+ Register::virtReg2Index(Reg) >= InitialNumVirtRegs)
continue;
if (!MO.readsReg())
continue;
- unsigned SReg = scavengeVReg(MRI, RS, Reg, true);
+ Register SReg = scavengeVReg(MRI, RS, Reg, true);
N->addRegisterKilled(SReg, &TRI, false);
RS.setRegUsed(SReg);
}
@@ -716,10 +716,10 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// Only vregs, no newly created vregs (see above).
- if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
- TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ if (!Register::isVirtualRegister(Reg) ||
+ Register::virtReg2Index(Reg) >= InitialNumVirtRegs)
continue;
// We have to look at all operands anyway so we can precalculate here
// whether there is a reading operand. This allows use to skip the use
@@ -730,14 +730,14 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
NextInstructionReadsVReg = true;
}
if (MO.isDef()) {
- unsigned SReg = scavengeVReg(MRI, RS, Reg, false);
+ Register SReg = scavengeVReg(MRI, RS, Reg, false);
I->addRegisterDead(SReg, &TRI, false);
}
}
}
#ifndef NDEBUG
for (const MachineOperand &MO : MBB.front().operands()) {
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
assert(!MO.isInternalRead() && "Cannot assign inside bundles");
assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index 22cff48c3051..e3f5abb6301f 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -138,7 +138,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
LLVM_DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:");
for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses;
++I) {
- unsigned NewVReg = MRI->createVirtualRegister(RegClass);
+ Register NewVReg = MRI->createVirtualRegister(RegClass);
LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg);
Intervals.push_back(&NewLI);
LLVM_DEBUG(dbgs() << ' ' << printReg(NewVReg));
@@ -390,7 +390,7 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
// there can't be any further splitting.
bool Changed = false;
for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
if (!LIS->hasInterval(Reg))
continue;
LiveInterval &LI = LIS->getInterval(Reg);
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index a6bc7330e2cc..ddbbd0f8d6e9 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -871,7 +871,7 @@ public:
report_fatal_error("TargetLowering instance is required");
auto *DL = &F.getParent()->getDataLayout();
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &ACT = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
// Compute DT and LI only for functions that have the attribute.
diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 7776dffb4e9c..b4037499d7d1 100644
--- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -173,15 +173,30 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
return;
}
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
// br i1 %mask_1, label %cond.load, label %else
//
-
- Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx);
+ }
// Create "cond" block
//
@@ -290,13 +305,29 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
return;
}
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
// br i1 %mask_1, label %cond.store, label %else
//
- Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx);
+ }
// Create "cond" block
//
@@ -392,15 +423,30 @@ static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
return;
}
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
- // %Mask1 = extractelement <16 x i1> %Mask, i32 1
+ // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
// br i1 %Mask1, label %cond.load, label %else
//
- Value *Predicate =
- Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
// Create "cond" block
//
@@ -499,14 +545,29 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
return;
}
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
- // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
+ // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
// br i1 %Mask1, label %cond.store, label %else
//
- Value *Predicate =
- Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
// Create "cond" block
//
@@ -555,6 +616,32 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
// The result vector
Value *VResult = PassThru;
+ // Shorten the way if the mask is a vector of constants.
+ if (isConstantIntVector(Mask)) {
+ unsigned MemIndex = 0;
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx));
+ VResult =
+ Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
+ ++MemIndex;
+ }
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
@@ -563,8 +650,14 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
// br i1 %mask_1, label %cond.load, label %else
//
- Value *Predicate =
- Builder.CreateExtractElement(Mask, Idx);
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
// Create "cond" block
//
@@ -633,13 +726,44 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
unsigned VectorWidth = VecType->getNumElements();
+ // Shorten the way if the mask is a vector of constants.
+ if (isConstantIntVector(Mask)) {
+ unsigned MemIndex = 0;
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *OneElt =
+ Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+ Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+ Builder.CreateAlignedStore(OneElt, NewPtr, 1);
+ ++MemIndex;
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
// br i1 %mask_1, label %cond.store, label %else
//
- Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
// Create "cond" block
//
@@ -727,17 +851,24 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
switch (II->getIntrinsicID()) {
default:
break;
- case Intrinsic::masked_load:
+ case Intrinsic::masked_load: {
// Scalarize unsupported vector masked load
- if (TTI->isLegalMaskedLoad(CI->getType()))
+ unsigned Alignment =
+ cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment)))
return false;
scalarizeMaskedLoad(CI, ModifiedDT);
return true;
- case Intrinsic::masked_store:
- if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType()))
+ }
+ case Intrinsic::masked_store: {
+ unsigned Alignment =
+ cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(),
+ MaybeAlign(Alignment)))
return false;
scalarizeMaskedStore(CI, ModifiedDT);
return true;
+ }
case Intrinsic::masked_gather:
if (TTI->isLegalMaskedGather(CI->getType()))
return false;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index d5ad7e92299d..96a1f86c3e04 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -205,10 +204,10 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
if (ExitMI) {
for (const MachineOperand &MO : ExitMI->operands()) {
if (!MO.isReg() || MO.isDef()) continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
- } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
+ } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
}
}
@@ -285,7 +284,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
MachineOperand &MO = MI->getOperand(OperIdx);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// We do not need to track any dependencies for constant registers.
if (MRI.isConstantPhysReg(Reg))
return;
@@ -361,7 +360,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
{
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// No point in tracking lanemasks if we don't have interesting subregisters.
const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
if (!RC.HasDisjunctSubRegs)
@@ -373,6 +372,13 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
return TRI->getSubRegIndexLaneMask(SubReg);
}
+bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) {
+ auto RegUse = CurrentVRegUses.find(MO.getReg());
+ if (RegUse == CurrentVRegUses.end())
+ return true;
+ return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
+}
+
/// Adds register output and data dependencies from this SUnit to instructions
/// that occur later in the same scheduling region if they read from or write to
/// the virtual register defined at OperIdx.
@@ -382,7 +388,7 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
MachineOperand &MO = MI->getOperand(OperIdx);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
LaneBitmask DefLaneMask;
LaneBitmask KillLaneMask;
@@ -393,6 +399,18 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
// earlier instruction.
KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
+ if (MO.getSubReg() != 0 && MO.isUndef()) {
+ // There may be other subregister defs on the same instruction of the same
+ // register in later operands. The lanes of other defs will now be live
+ // after this instruction, so these should not be treated as killed by the
+ // instruction even though they appear to be killed in this one operand.
+ for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &OtherMO = MI->getOperand(I);
+ if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
+ KillLaneMask &= ~getLaneMaskForMO(OtherMO);
+ }
+ }
+
// Clear undef flag, we'll re-add it later once we know which subregister
// Def is first.
MO.setIsUndef(false);
@@ -402,8 +420,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
}
if (MO.isDead()) {
- assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
- "Dead defs should have no uses");
+ assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
} else {
// Add data dependence to all uses we found so far.
const TargetSubtargetInfo &ST = MF.getSubtarget();
@@ -491,7 +508,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
const MachineOperand &MO = MI->getOperand(OperIdx);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// Remember the use. Data dependencies will be added when we find the def.
LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO)
@@ -514,7 +531,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
/// Returns true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
-static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
+static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
}
@@ -701,7 +718,7 @@ void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
map.reComputeSize();
}
-void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
+void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
RegPressureTracker *RPTracker,
PressureDiffs *PDiffs,
LiveIntervals *LIS,
@@ -821,10 +838,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
const MachineOperand &MO = MI.getOperand(j);
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
addPhysRegDeps(SU, j);
- } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ } else if (Register::isVirtualRegister(Reg)) {
HasVRegDef = true;
addVRegDefDeps(SU, j);
}
@@ -838,10 +855,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// additional use dependencies.
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
addPhysRegDeps(SU, j);
- } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
+ } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
addVRegUseDeps(SU, j);
}
}
@@ -1071,7 +1088,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.readsReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
@@ -1102,7 +1119,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
if (MO.isReg()) {
if (!MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
LiveRegs.removeReg(Reg);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 49c922f560fa..e8950b58d42d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -111,10 +110,20 @@ static cl::opt<bool>
MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
cl::desc("DAG combiner may split indexing from loads"));
+static cl::opt<bool>
+ EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner enable merging multiple stores "
+ "into a wider store"));
+
static cl::opt<unsigned> TokenFactorInlineLimit(
"combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
cl::desc("Limit the number of operands to inline for Token Factors"));
+static cl::opt<unsigned> StoreMergeDependenceLimit(
+ "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
+ cl::desc("Limit the number of times for the same StoreNode and RootNode "
+ "to bail out in store merging dependence check"));
+
namespace {
class DAGCombiner {
@@ -152,6 +161,14 @@ namespace {
/// which have not yet been combined to the worklist.
SmallPtrSet<SDNode *, 32> CombinedNodes;
+ /// Map from candidate StoreNode to the pair of RootNode and count.
+ /// The count is used to track how many times we have seen the StoreNode
+ /// with the same RootNode bail out in dependence check. If we have seen
+ /// the bail out for the same pair many times over a limit, we won't
+ /// consider the StoreNode with the same RootNode as store merging
+ /// candidate again.
+ DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
+
// AA - Used for DAG load/store alias analysis.
AliasAnalysis *AA;
@@ -236,6 +253,7 @@ namespace {
void removeFromWorklist(SDNode *N) {
CombinedNodes.erase(N);
PruningList.remove(N);
+ StoreRootCountMap.erase(N);
auto It = WorklistMap.find(N);
if (It == WorklistMap.end())
@@ -361,6 +379,7 @@ namespace {
SDValue visitSUBE(SDNode *N);
SDValue visitSUBCARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
+ SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
@@ -421,7 +440,6 @@ namespace {
SDValue visitFP_TO_SINT(SDNode *N);
SDValue visitFP_TO_UINT(SDNode *N);
SDValue visitFP_ROUND(SDNode *N);
- SDValue visitFP_ROUND_INREG(SDNode *N);
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
@@ -470,7 +488,7 @@ namespace {
SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
- SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
+ SDValue visitShiftByConstant(SDNode *N);
SDValue foldSelectOfConstants(SDNode *N);
SDValue foldVSelectOfConstants(SDNode *N);
@@ -497,6 +515,7 @@ namespace {
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const;
bool isOneUseSetCC(SDValue N) const;
+ bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
@@ -510,7 +529,7 @@ namespace {
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
- SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
@@ -521,11 +540,11 @@ namespace {
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
- SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
+ SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
SDValue InnerPos, SDValue InnerNeg,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
- SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+ SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
SDValue MatchStoreCombine(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
@@ -742,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
}
+bool TargetLowering::DAGCombinerInfo::
+recursivelyDeleteUnusedNodes(SDNode *N) {
+ return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
+}
+
void TargetLowering::DAGCombinerInfo::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
@@ -766,195 +790,6 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {
DAG.DeleteNode(N);
}
-/// Return 1 if we can compute the negated form of the specified expression for
-/// the same cost as the expression itself, or 2 if we can compute the negated
-/// form more cheaply than the expression itself.
-static char isNegatibleForFree(SDValue Op, bool LegalOperations,
- const TargetLowering &TLI,
- const TargetOptions *Options,
- bool ForCodeSize,
- unsigned Depth = 0) {
- // fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG)
- return 2;
-
- // Don't allow anything with multiple uses unless we know it is free.
- EVT VT = Op.getValueType();
- const SDNodeFlags Flags = Op->getFlags();
- if (!Op.hasOneUse() &&
- !(Op.getOpcode() == ISD::FP_EXTEND &&
- TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
- return 0;
-
- // Don't recurse exponentially.
- if (Depth > 6)
- return 0;
-
- switch (Op.getOpcode()) {
- default: return false;
- case ISD::ConstantFP: {
- if (!LegalOperations)
- return 1;
-
- // Don't invert constant FP values after legalization unless the target says
- // the negated constant is legal.
- return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
- ForCodeSize);
- }
- case ISD::BUILD_VECTOR: {
- // Only permit BUILD_VECTOR of constants.
- if (llvm::any_of(Op->op_values(), [&](SDValue N) {
- return !N.isUndef() && !isa<ConstantFPSDNode>(N);
- }))
- return 0;
- if (!LegalOperations)
- return 1;
- if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
- TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return 1;
- return llvm::all_of(Op->op_values(), [&](SDValue N) {
- return N.isUndef() ||
- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
- ForCodeSize);
- });
- }
- case ISD::FADD:
- if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
- return 0;
-
- // After operation legalization, it might not be legal to create new FSUBs.
- if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
- return 0;
-
- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, ForCodeSize, Depth + 1))
- return V;
- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- ForCodeSize, Depth + 1);
- case ISD::FSUB:
- // We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
- return 0;
-
- // fold (fneg (fsub A, B)) -> (fsub B, A)
- return 1;
-
- case ISD::FMUL:
- case ISD::FDIV:
- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, ForCodeSize, Depth + 1))
- return V;
-
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- ForCodeSize, Depth + 1);
-
- case ISD::FP_EXTEND:
- case ISD::FP_ROUND:
- case ISD::FSIN:
- return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
- ForCodeSize, Depth + 1);
- }
-}
-
-/// If isNegatibleForFree returns true, return the newly negated expression.
-static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations, bool ForCodeSize,
- unsigned Depth = 0) {
- // fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG)
- return Op.getOperand(0);
-
- assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
- const TargetOptions &Options = DAG.getTarget().Options;
- const SDNodeFlags Flags = Op->getFlags();
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Unknown code");
- case ISD::ConstantFP: {
- APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
- V.changeSign();
- return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
- }
- case ISD::BUILD_VECTOR: {
- SmallVector<SDValue, 4> Ops;
- for (SDValue C : Op->op_values()) {
- if (C.isUndef()) {
- Ops.push_back(C);
- continue;
- }
- APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
- V.changeSign();
- Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
- }
- return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
- }
- case ISD::FADD:
- assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
-
- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
- Depth + 1))
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1), Flags);
- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(0), Flags);
- case ISD::FSUB:
- // fold (fneg (fsub 0, B)) -> B
- if (ConstantFPSDNode *N0CFP =
- isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
- if (N0CFP->isZero())
- return Op.getOperand(1);
-
- // fold (fneg (fsub A, B)) -> (fsub B, A)
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- Op.getOperand(1), Op.getOperand(0), Flags);
-
- case ISD::FMUL:
- case ISD::FDIV:
- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
- Depth + 1))
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1), Flags);
-
- // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- Op.getOperand(0),
- GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1), Flags);
-
- case ISD::FP_EXTEND:
- case ISD::FSIN:
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1));
- case ISD::FP_ROUND:
- return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1));
- }
-}
-
// APInts must be the same size for most operations, this helper
// function zero extends the shorter of the pair so that they match.
// We provide an Offset so that we can create bitwidths that won't overflow.
@@ -1124,7 +959,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
if (!OpNode.getNode())
return SDValue();
- AddToWorklist(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
@@ -1438,7 +1272,6 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
SDValue RV =
DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
- AddToWorklist(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
@@ -1591,8 +1424,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
for (SDNode *LN : UpdatedNodes) {
- AddToWorklist(LN);
AddUsersToWorklist(LN);
+ AddToWorklist(LN);
}
if (!NIsValid)
continue;
@@ -1673,6 +1506,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADDCARRY: return visitADDCARRY(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::SUBCARRY: return visitSUBCARRY(N);
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: return visitMULFIX(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
@@ -1736,7 +1573,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
case ISD::FP_ROUND: return visitFP_ROUND(N);
- case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
@@ -3308,6 +3144,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
+ // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
+ if (SDValue Carry = getAsCarry(TLI, N0)) {
+ SDValue X = N1;
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
+ return DAG.getNode(ISD::ADDCARRY, DL,
+ DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
+ Carry);
+ }
+ }
+
return SDValue();
}
@@ -3442,6 +3290,30 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
return SDValue();
}
+// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
+// UMULFIXSAT here.
+SDValue DAGCombiner::visitMULFIX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Scale = N->getOperand(2);
+ EVT VT = N0.getValueType();
+
+ // fold (mulfix x, undef, scale) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // Canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
+
+ // fold (mulfix x, 0, scale) -> 0
+ if (isNullConstant(N1))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3537,7 +3409,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// x * 15 --> (x << 4) - x
// x * -33 --> -((x << 5) + x)
// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
- if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
+ if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
@@ -4083,10 +3955,10 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (VT.isVector()) {
// fold (mulhs x, 0) -> 0
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
- return N1;
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
- return N0;
+ // do not return N0/N1, because undef node may exist.
+ if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
+ ISD::isBuildVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
}
// fold (mulhs x, 0) -> 0
@@ -4095,7 +3967,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
// fold (mulhs x, 1) -> (sra x, size(x)-1)
if (isOneConstant(N1))
return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
- DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
+ DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
// fold (mulhs x, undef) -> 0
@@ -4130,10 +4002,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (VT.isVector()) {
// fold (mulhu x, 0) -> 0
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
- return N1;
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
- return N0;
+ // do not return N0/N1, because undef node may exist.
+ if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
+ ISD::isBuildVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
}
// fold (mulhu x, 0) -> 0
@@ -4265,6 +4137,18 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // (umul_lohi N0, 0) -> (0, 0)
+ if (isNullConstant(N->getOperand(1))) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return CombineTo(N, Zero, Zero);
+ }
+
+ // (umul_lohi N0, 1) -> (N0, 0)
+ if (isOneConstant(N->getOperand(1))) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return CombineTo(N, N->getOperand(0), Zero);
+ }
+
// If the type is twice as wide is legal, transform the mulhu to a wider
// multiply plus a shift.
if (VT.isSimple() && !VT.isVector()) {
@@ -4290,13 +4174,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
}
SDValue DAGCombiner::visitMULO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
bool IsSigned = (ISD::SMULO == N->getOpcode());
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
+
+ // fold (mulo x, 0) -> 0 + no carry out
+ if (isNullOrNullSplat(N1))
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
// (mulo x, 2) -> (addo x, x)
- if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
+ if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
if (C2->getAPIntValue() == 2)
- return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
- N->getVTList(), N->getOperand(0), N->getOperand(0));
+ return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
+ N->getVTList(), N0, N0);
return SDValue();
}
@@ -4444,7 +4344,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
Level <= AfterLegalizeTypes) {
// Input types must be integer and the same.
- if (XVT.isInteger() && XVT == Y.getValueType()) {
+ if (XVT.isInteger() && XVT == Y.getValueType() &&
+ !(VT.isVector() && TLI.isTypeLegal(VT) &&
+ !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
@@ -4770,8 +4672,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
return true;
}
- // Do not change the width of a volatile load.
- if (LoadN->isVolatile())
+ // Do not change the width of a volatile or atomic loads.
+ if (!LoadN->isSimple())
return false;
// Do not generate loads of non-round integer types since these can
@@ -4803,15 +4705,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
if (!MemVT.isRound())
return false;
- // Don't change the width of a volatile load.
- if (LDST->isVolatile())
+ // Don't change the width of a volatile or atomic loads.
+ if (!LDST->isSimple())
return false;
// Verify that we are actually reducing a load width here.
if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
return false;
- // Ensure that this isn't going to produce an unsupported unaligned access.
+ // Ensure that this isn't going to produce an unsupported memory access.
if (ShAmt &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
LDST->getAddressSpace(), ShAmt / 8,
@@ -5076,6 +4978,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
return T1;
}
+/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
+/// For a target with a bit test, this is expected to become test + set and save
+/// at least 1 instruction.
+static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
+ assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
+
+ // This is probably not worthwhile without a supported type.
+ EVT VT = And->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // Look through an optional extension and find a 'not'.
+ // TODO: Should we favor test+set even without the 'not' op?
+ SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
+ if (Not.getOpcode() == ISD::ANY_EXTEND)
+ Not = Not.getOperand(0);
+ if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
+ return SDValue();
+
+ // Look though an optional truncation. The source operand may not be the same
+ // type as the original 'and', but that is ok because we are masking off
+ // everything but the low bit.
+ SDValue Srl = Not.getOperand(0);
+ if (Srl.getOpcode() == ISD::TRUNCATE)
+ Srl = Srl.getOperand(0);
+
+ // Match a shift-right by constant.
+ if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
+ !isa<ConstantSDNode>(Srl.getOperand(1)))
+ return SDValue();
+
+ // We might have looked through casts that make this transform invalid.
+ // TODO: If the source type is wider than the result type, do the mask and
+ // compare in the source type.
+ const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
+ unsigned VTBitWidth = VT.getSizeInBits();
+ if (ShiftAmt.uge(VTBitWidth))
+ return SDValue();
+
+ // Turn this into a bit-test pattern using mask op + setcc:
+ // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
+ SDLoc DL(And);
+ SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
+ EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Mask = DAG.getConstant(
+ APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
+ return DAG.getZExtOrTrunc(Setcc, DL, VT);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5163,6 +5118,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+
// similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
// (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
// already be zero by virtue of the width of the base type of the load.
@@ -5337,7 +5293,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
unsigned MemBitSize = MemVT.getScalarSizeInBits();
APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
if (DAG.MaskedValueIsZero(N1, ExtBits) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
+ ((!LegalOperations && LN0->isSimple()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
@@ -5358,6 +5314,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
return Shifts;
+ if (TLI.hasBitTest(N0, N1))
+ if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
+ return V;
+
return SDValue();
}
@@ -5564,6 +5524,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
return true;
}
+// Match 2 elements of a packed halfword bswap.
+static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
+ if (N.getOpcode() == ISD::OR)
+ return isBSwapHWordElement(N.getOperand(0), Parts) &&
+ isBSwapHWordElement(N.getOperand(1), Parts);
+
+ if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
+ ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
+ if (!C || C->getAPIntValue() != 16)
+ return false;
+ Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
+ return true;
+ }
+
+ return false;
+}
+
/// Match a 32-bit packed halfword bswap. That is
/// ((x & 0x000000ff) << 8) |
/// ((x & 0x0000ff00) >> 8) |
@@ -5581,43 +5558,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
return SDValue();
// Look for either
- // (or (or (and), (and)), (or (and), (and)))
- // (or (or (or (and), (and)), (and)), (and))
- if (N0.getOpcode() != ISD::OR)
- return SDValue();
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
+ // (or (bswaphpair), (bswaphpair))
+ // (or (or (bswaphpair), (and)), (and))
+ // (or (or (and), (bswaphpair)), (and))
SDNode *Parts[4] = {};
- if (N1.getOpcode() == ISD::OR &&
- N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
+ if (isBSwapHWordPair(N0, Parts)) {
// (or (or (and), (and)), (or (and), (and)))
- if (!isBSwapHWordElement(N00, Parts))
+ if (!isBSwapHWordPair(N1, Parts))
return SDValue();
-
- if (!isBSwapHWordElement(N01, Parts))
- return SDValue();
- SDValue N10 = N1.getOperand(0);
- if (!isBSwapHWordElement(N10, Parts))
- return SDValue();
- SDValue N11 = N1.getOperand(1);
- if (!isBSwapHWordElement(N11, Parts))
- return SDValue();
- } else {
+ } else if (N0.getOpcode() == ISD::OR) {
// (or (or (or (and), (and)), (and)), (and))
if (!isBSwapHWordElement(N1, Parts))
return SDValue();
- if (!isBSwapHWordElement(N01, Parts))
- return SDValue();
- if (N00.getOpcode() != ISD::OR)
- return SDValue();
- SDValue N000 = N00.getOperand(0);
- if (!isBSwapHWordElement(N000, Parts))
- return SDValue();
- SDValue N001 = N00.getOperand(1);
- if (!isBSwapHWordElement(N001, Parts))
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
+ !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
return SDValue();
- }
+ } else
+ return SDValue();
// Make sure the parts are all coming from the same node.
if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
@@ -5791,15 +5751,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
- bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
- if (!LegalMask) {
- std::swap(NewLHS, NewRHS);
- ShuffleVectorSDNode::commuteMask(Mask);
- LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
- }
-
- if (LegalMask)
- return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
+ Mask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
}
}
}
@@ -5867,8 +5823,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return V;
// See if this is some rotate idiom.
- if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
- return SDValue(Rot, 0);
+ if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
+ return Rot;
if (SDValue Load = MatchLoadCombine(N))
return Load;
@@ -5914,6 +5870,9 @@ static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
/// Otherwise, returns an expansion of \p ExtractFrom based on the following
/// patterns:
///
+/// (or (add v v) (shrl v bitwidth-1)):
+/// expands (add v v) -> (shl v 1)
+///
/// (or (mul v c0) (shrl (mul v c1) c2)):
/// expands (mul v c0) -> (shl (mul v c1) c3)
///
@@ -5936,6 +5895,23 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
"Existing shift must be valid as a rotate half");
ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
+
+ // Value and Type of the shift.
+ SDValue OppShiftLHS = OppShift.getOperand(0);
+ EVT ShiftedVT = OppShiftLHS.getValueType();
+
+ // Amount of the existing shift.
+ ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
+
+ // (add v v) -> (shl v 1)
+ if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
+ ExtractFrom.getOpcode() == ISD::ADD &&
+ ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
+ ExtractFrom.getOperand(0) == OppShiftLHS &&
+ OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
+ return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
+ DAG.getShiftAmountConstant(1, ShiftedVT, DL));
+
// Preconditions:
// (or (op0 v c0) (shiftl/r (op0 v c1) c2))
//
@@ -5959,15 +5935,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
// op0 must be the same opcode on both sides, have the same LHS argument,
// and produce the same value type.
- SDValue OppShiftLHS = OppShift.getOperand(0);
- EVT ShiftedVT = OppShiftLHS.getValueType();
if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
ShiftedVT != ExtractFrom.getValueType())
return SDValue();
- // Amount of the existing shift.
- ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
// Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
// Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
@@ -6137,7 +6109,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
// former being preferred if supported. InnerPos and InnerNeg are Pos and
// Neg with outer conversions stripped away.
-SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
+SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
SDValue Neg, SDValue InnerPos,
SDValue InnerNeg, unsigned PosOpcode,
unsigned NegOpcode, const SDLoc &DL) {
@@ -6152,32 +6124,33 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
- HasPos ? Pos : Neg).getNode();
+ HasPos ? Pos : Neg);
}
- return nullptr;
+ return SDValue();
}
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr].
-SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
+SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
- if (!TLI.isTypeLegal(VT)) return nullptr;
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
// The target must have at least one rotate flavor.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
- if (!HasROTL && !HasROTR) return nullptr;
+ if (!HasROTL && !HasROTR)
+ return SDValue();
// Check for truncated rotate.
if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
assert(LHS.getValueType() == RHS.getValueType());
- if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
- return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
- SDValue(Rot, 0)).getNode();
+ if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
}
}
@@ -6192,7 +6165,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// If neither side matched a rotate half, bail
if (!LHSShift && !RHSShift)
- return nullptr;
+ return SDValue();
// InstCombine may have combined a constant shl, srl, mul, or udiv with one
// side of the rotate, so try to handle that here. In all cases we need to
@@ -6215,15 +6188,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// If a side is still missing, nothing else we can do.
if (!RHSShift || !LHSShift)
- return nullptr;
+ return SDValue();
// At this point we've matched or extracted a shift op on each side.
if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
- return nullptr; // Not shifting the same value.
+ return SDValue(); // Not shifting the same value.
if (LHSShift.getOpcode() == RHSShift.getOpcode())
- return nullptr; // Shifts must disagree.
+ return SDValue(); // Shifts must disagree.
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
@@ -6267,13 +6240,13 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
}
- return Rot.getNode();
+ return Rot;
}
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
- return nullptr;
+ return SDValue();
// If the shift amount is sign/zext/any-extended just peel it off.
SDValue LExtOp0 = LHSShiftAmt;
@@ -6290,17 +6263,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
RExtOp0 = RHSShiftAmt.getOperand(0);
}
- SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
+ SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
if (TryL)
return TryL;
- SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
+ SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
if (TryR)
return TryR;
- return nullptr;
+ return SDValue();
}
namespace {
@@ -6415,7 +6388,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
Depth + 1);
case ISD::LOAD: {
auto L = cast<LoadSDNode>(Op.getNode());
- if (L->isVolatile() || L->isIndexed())
+ if (!L->isSimple() || L->isIndexed())
return None;
unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
@@ -6504,8 +6477,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
SDValue Chain;
SmallVector<StoreSDNode *, 8> Stores;
for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
+ // TODO: Allow unordered atomics when wider type is legal (see D66309)
if (Store->getMemoryVT() != MVT::i8 ||
- Store->isVolatile() || Store->isIndexed())
+ !Store->isSimple() || Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
@@ -6716,7 +6690,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
LoadSDNode *L = P->Load;
- assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
+ assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
+ !L->isIndexed() &&
"Must be enforced by calculateByteProvider");
assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
@@ -6958,25 +6933,25 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
(N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
- SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
- if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
+ if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
- LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
- RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
- AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
- return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);
}
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
if (isAllOnesConstant(N1) && N0.hasOneUse() &&
(N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
- SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
- if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
+ if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
- LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
- RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
- AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
- return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);
}
}
@@ -7079,26 +7054,103 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return SDValue();
}
+/// If we have a shift-by-constant of a bitwise logic op that itself has a
+/// shift-by-constant operand with identical opcode, we may be able to convert
+/// that into 2 independent shifts followed by the logic op. This is a
+/// throughput improvement.
+static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
+ // Match a one-use bitwise logic op.
+ SDValue LogicOp = Shift->getOperand(0);
+ if (!LogicOp.hasOneUse())
+ return SDValue();
+
+ unsigned LogicOpcode = LogicOp.getOpcode();
+ if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
+ LogicOpcode != ISD::XOR)
+ return SDValue();
+
+ // Find a matching one-use shift by constant.
+ unsigned ShiftOpcode = Shift->getOpcode();
+ SDValue C1 = Shift->getOperand(1);
+ ConstantSDNode *C1Node = isConstOrConstSplat(C1);
+ assert(C1Node && "Expected a shift with constant operand");
+ const APInt &C1Val = C1Node->getAPIntValue();
+ auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
+ const APInt *&ShiftAmtVal) {
+ if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
+ return false;
+
+ ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
+ if (!ShiftCNode)
+ return false;
+
+ // Capture the shifted operand and shift amount value.
+ ShiftOp = V.getOperand(0);
+ ShiftAmtVal = &ShiftCNode->getAPIntValue();
+
+ // Shift amount types do not have to match their operand type, so check that
+ // the constants are the same width.
+ if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
+ return false;
+
+ // The fold is not valid if the sum of the shift values exceeds bitwidth.
+ if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
+ return false;
+
+ return true;
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ SDValue X, Y;
+ const APInt *C0Val;
+ if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
+ Y = LogicOp.getOperand(1);
+ else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
+ Y = LogicOp.getOperand(0);
+ else
+ return SDValue();
+
+ // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
+ SDLoc DL(Shift);
+ EVT VT = Shift->getValueType(0);
+ EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
+ SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
+ SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
+ SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
+ return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
+}
+
/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
/// We are looking for: (shift being one of shl/sra/srl)
/// shift (binop X, C0), C1
/// And want to transform into:
/// binop (shift X, C1), (shift C0, C1)
-SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
+ assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
+
// Do not turn a 'not' into a regular xor.
if (isBitwiseNot(N->getOperand(0)))
return SDValue();
// The inner binop must be one-use, since we want to replace it.
- SDNode *LHS = N->getOperand(0).getNode();
- if (!LHS->hasOneUse()) return SDValue();
+ SDValue LHS = N->getOperand(0);
+ if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
+ return SDValue();
+
+ // TODO: This is limited to early combining because it may reveal regressions
+ // otherwise. But since we just checked a target hook to see if this is
+ // desirable, that should have filtered out cases where this interferes
+ // with some other pattern matching.
+ if (!LegalTypes)
+ if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
+ return R;
// We want to pull some binops through shifts, so that we have (and (shift))
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
// thing happens with address calculations, so it's important to canonicalize
// it.
- switch (LHS->getOpcode()) {
+ switch (LHS.getOpcode()) {
default:
return SDValue();
case ISD::OR:
@@ -7112,14 +7164,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
}
// We require the RHS of the binop to be a constant and not opaque as well.
- ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
+ ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
if (!BinOpCst)
return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant
// or is copy/select. Enable this in other cases when figure out it's exactly
// profitable.
- SDValue BinOpLHSVal = LHS->getOperand(0);
+ SDValue BinOpLHSVal = LHS.getOperand(0);
bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
BinOpLHSVal.getOpcode() == ISD::SRA ||
BinOpLHSVal.getOpcode() == ISD::SRL) &&
@@ -7133,24 +7185,16 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
if (IsCopyOrSelect && N->hasOneUse())
return SDValue();
- EVT VT = N->getValueType(0);
-
- if (!TLI.isDesirableToCommuteWithShift(N, Level))
- return SDValue();
-
// Fold the constants, shifting the binop RHS by the shift amount.
- SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
- N->getValueType(0),
- LHS->getOperand(1), N->getOperand(1));
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
+ N->getOperand(1));
assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
- // Create the new shift.
- SDValue NewShift = DAG.getNode(N->getOpcode(),
- SDLoc(LHS->getOperand(0)),
- VT, LHS->getOperand(0), N->getOperand(1));
-
- // Create the new binop.
- return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
+ SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
+ N->getOperand(1));
+ return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
}
SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
@@ -7478,7 +7522,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
if (N1C && !N1C->isOpaque())
- if (SDValue NewSHL = visitShiftByConstant(N, N1C))
+ if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
return SDValue();
@@ -7597,6 +7641,37 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
}
+ // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
+ // sra (add (shl X, N1C), AddC), N1C -->
+ // sext (add (trunc X to (width - N1C)), AddC')
+ if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
+ if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
+ SDValue Shl = N0.getOperand(0);
+ // Determine what the truncate's type would be and ask the target if that
+ // is a free operation.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned ShiftAmt = N1C->getZExtValue();
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
+ // TODO: The simple type check probably belongs in the default hook
+ // implementation and/or target-specific overrides (because
+ // non-simple types likely require masking when legalized), but that
+ // restriction may conflict with other transforms.
+ if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
+ SDLoc DL(N);
+ SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+ SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
+ trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+ return DAG.getSExtOrTrunc(Add, DL, VT);
+ }
+ }
+ }
+
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -7638,7 +7713,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
if (N1C && !N1C->isOpaque())
- if (SDValue NewSRA = visitShiftByConstant(N, N1C))
+ if (SDValue NewSRA = visitShiftByConstant(N))
return NewSRA;
return SDValue();
@@ -7819,7 +7894,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue(N, 0);
if (N1C && !N1C->isOpaque())
- if (SDValue NewSRL = visitShiftByConstant(N, N1C))
+ if (SDValue NewSRL = visitShiftByConstant(N))
return NewSRL;
// Attempt to convert a srl of a load into a narrower zero-extending load.
@@ -8100,6 +8175,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
+/// If a (v)select has a condition value that is a sign-bit test, try to smear
+/// the condition operand sign-bit across the value width and use it as a mask.
+static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
+ SDValue Cond = N->getOperand(0);
+ SDValue C1 = N->getOperand(1);
+ SDValue C2 = N->getOperand(2);
+ assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
+ "Expected select-of-constants");
+
+ EVT VT = N->getValueType(0);
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
+ VT != Cond.getOperand(0).getValueType())
+ return SDValue();
+
+ // The inverted-condition + commuted-select variants of these patterns are
+ // canonicalized to these forms in IR.
+ SDValue X = Cond.getOperand(0);
+ SDValue CondC = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
+ isAllOnesOrAllOnesSplat(C2)) {
+ // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
+ }
+ if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
+ // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8148,22 +8260,36 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
return Cond;
}
- // For any constants that differ by 1, we can transform the select into an
- // extend and add. Use a target hook because some targets may prefer to
- // transform in the other direction.
+ // Use a target hook because some targets may prefer to transform in the
+ // other direction.
if (TLI.convertSelectOfConstantsToMath(VT)) {
- if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
+ // For any constants that differ by 1, we can transform the select into an
+ // extend and add.
+ const APInt &C1Val = C1->getAPIntValue();
+ const APInt &C2Val = C2->getAPIntValue();
+ if (C1Val - 1 == C2Val) {
// select Cond, C1, C1-1 --> add (zext Cond), C1-1
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
}
- if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
+ if (C1Val + 1 == C2Val) {
// select Cond, C1, C1+1 --> add (sext Cond), C1+1
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
}
+
+ // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
+ if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
}
return SDValue();
@@ -8381,23 +8507,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SDValue();
}
-static
-std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
- SDLoc DL(N);
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- // Split the inputs.
- SDValue Lo, Hi, LL, LH, RL, RH;
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
-
- return std::make_pair(Lo, Hi);
-}
-
// This function assumes all the vselect's arguments are CONCAT_VECTOR
// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
@@ -8456,7 +8565,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
- SDValue Data = MSC->getValue();
SDValue Chain = MSC->getChain();
SDLoc DL(N);
@@ -8464,123 +8572,19 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MSCATTER data type requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (Mask.getOpcode() != ISD::SETCC)
- return SDValue();
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
- SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
-
- EVT MemoryVT = MSC->getMemoryVT();
- unsigned Alignment = MSC->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- SDValue DataLo, DataHi;
- std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
-
- SDValue Scale = MSC->getScale();
- SDValue BasePtr = MSC->getBasePtr();
- SDValue IndexLo, IndexHi;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MSC->getPointerInfo(),
- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, MSC->getAAInfo(), MSC->getRanges());
-
- SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
- SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
- DataLo.getValueType(), DL, OpsLo, MMO);
-
- // The order of the Scatter operation after split is well defined. The "Hi"
- // part comes after the "Lo". So these two operations should be chained one
- // after another.
- SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO);
+ return SDValue();
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
- SDValue Data = MST->getValue();
SDValue Chain = MST->getChain();
- EVT VT = Data.getValueType();
SDLoc DL(N);
// Zap masked stores with a zero mask.
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MSTORE data type requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (Mask.getOpcode() == ISD::SETCC) {
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue MaskLo, MaskHi, Lo, Hi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- SDValue Ptr = MST->getBasePtr();
-
- EVT MemoryVT = MST->getMemoryVT();
- unsigned Alignment = MST->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- SDValue DataLo, DataHi;
- std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, MST->getAAInfo(), MST->getRanges());
-
- Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
- MST->isTruncatingStore(),
- MST->isCompressingStore());
-
- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- MST->isCompressingStore());
- unsigned HiOffset = LoMemVT.getStoreSize();
-
- MMO = DAG.getMachineFunction().getMachineMemOperand(
- MST->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
- MST->getAAInfo(), MST->getRanges());
-
- Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
- MST->isTruncatingStore(),
- MST->isCompressingStore());
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
- }
return SDValue();
}
@@ -8593,76 +8597,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MGT->getPassThru(), MGT->getChain());
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MGATHER result requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
-
- if (Mask.getOpcode() != ISD::SETCC)
- return SDValue();
-
- EVT VT = N->getValueType(0);
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue MaskLo, MaskHi, Lo, Hi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- SDValue PassThru = MGT->getPassThru();
- SDValue PassThruLo, PassThruHi;
- std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
-
- SDValue Chain = MGT->getChain();
- EVT MemoryVT = MGT->getMemoryVT();
- unsigned Alignment = MGT->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- SDValue Scale = MGT->getScale();
- SDValue BasePtr = MGT->getBasePtr();
- SDValue Index = MGT->getIndex();
- SDValue IndexLo, IndexHi;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MGT->getAAInfo(), MGT->getRanges());
-
- SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
- Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
- MMO);
-
- SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
- Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
- MMO);
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- // Build a factor node to remember that this load is independent of the
- // other one.
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Legalized the chain result - switch anything that used the old chain to
- // use the new one.
- DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
-
- SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
-
- SDValue RetOps[] = { GatherRes, Chain };
- return DAG.getMergeValues(RetOps, DL);
+ return SDValue();
}
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
@@ -8674,76 +8609,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MLOAD result requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (Mask.getOpcode() == ISD::SETCC) {
- EVT VT = N->getValueType(0);
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue MaskLo, MaskHi, Lo, Hi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- SDValue PassThru = MLD->getPassThru();
- SDValue PassThruLo, PassThruHi;
- std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
-
- SDValue Chain = MLD->getChain();
- SDValue Ptr = MLD->getBasePtr();
- EVT MemoryVT = MLD->getMemoryVT();
- unsigned Alignment = MLD->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MLD->getAAInfo(), MLD->getRanges());
-
- Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
- MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
-
- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- MLD->isExpandingLoad());
- unsigned HiOffset = LoMemVT.getStoreSize();
-
- MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
- MLD->getAAInfo(), MLD->getRanges());
-
- Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
- MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- // Build a factor node to remember that this load is independent of the
- // other one.
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Legalized the chain result - switch anything that used the old chain to
- // use the new one.
- DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
-
- SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
-
- SDValue RetOps[] = { LoadRes, Chain };
- return DAG.getMergeValues(RetOps, DL);
- }
return SDValue();
}
@@ -8791,6 +8656,18 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
}
+ // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
+ APInt Pow2C;
+ if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
+ isNullOrNullSplat(N2)) {
+ SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
+
// The general case for select-of-constants:
// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
// ...but that only makes sense if a vselect is slower than 2 logic ops, so
@@ -8832,13 +8709,12 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
if (isAbs) {
- EVT VT = LHS.getValueType();
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
return DAG.getNode(ISD::ABS, DL, VT, LHS);
- SDValue Shift = DAG.getNode(
- ISD::SRA, DL, VT, LHS,
- DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1,
+ DL, getShiftAmountTy(VT)));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
@@ -8851,10 +8727,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// This is OK if we don't care about what happens if either operand is a
// NaN.
//
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
- N0.getOperand(1), TLI)) {
- if (SDValue FMinMax = combineMinNumMaxNum(
- DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
+ if (SDValue FMinMax =
+ combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
return FMinMax;
}
@@ -9209,8 +9084,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
- !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
- !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ !N0.hasOneUse() || !LN0->isSimple() ||
+ !DstVT.isVector() || !DstVT.isPow2VectorType() ||
+ !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
return SDValue();
SmallVector<SDNode *, 4> SetCCs;
@@ -9411,7 +9287,8 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
+ if ((LegalOperations || !LN0->isSimple() ||
+ VT.isVector()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
return SDValue();
@@ -9436,7 +9313,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
if (!ISD::isNON_EXTLoad(N0.getNode()) ||
!ISD::isUNINDEXEDLoad(N0.getNode()) ||
((LegalOperations || VT.isVector() ||
- cast<LoadSDNode>(N0)->isVolatile()) &&
+ !cast<LoadSDNode>(N0)->isSimple()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
return {};
@@ -9468,6 +9345,35 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
+ const TargetLowering &TLI, EVT VT,
+ SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType,
+ ISD::NodeType ExtOpc) {
+ if (!N0.hasOneUse())
+ return SDValue();
+
+ MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
+ if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+
+ if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
+ return SDValue();
+
+ if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SDLoc dl(Ld);
+ SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
+ SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(),
+ Ld->getBasePtr(), Ld->getMask(),
+ PassThru, Ld->getMemoryVT(),
+ Ld->getMemOperand(), ExtLoadType,
+ Ld->isExpandingLoad());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
+ return NewLoad;
+}
+
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
bool LegalOperations) {
assert((N->getOpcode() == ISD::SIGN_EXTEND ||
@@ -9568,6 +9474,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
ISD::SEXTLOAD, ISD::SIGN_EXTEND))
return foldedExt;
+ if (SDValue foldedExt =
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
+ ISD::SIGN_EXTEND))
+ return foldedExt;
+
// fold (sext (load x)) to multiple smaller sextloads.
// Only on illegal but splittable vectors.
if (SDValue ExtLoad = CombineExtLoad(N))
@@ -9856,6 +9767,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
return foldedExt;
+ if (SDValue foldedExt =
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
+ ISD::ZERO_EXTEND))
+ return foldedExt;
+
// fold (zext (load x)) to multiple smaller zextloads.
// Only on illegal but splittable vectors.
if (SDValue ExtLoad = CombineExtLoad(N))
@@ -10340,7 +10256,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
+ // Reducing the width of a volatile load is illegal. For atomics, we may be
+ // able to reduce the width provided we never widen again. (see D66309)
+ if (!LN0->isSimple() ||
+ !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
auto AdjustBigEndianShift = [&](unsigned ShAmt) {
@@ -10369,11 +10288,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue Load;
if (ExtType == ISD::NON_EXTLOAD)
- Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
+ Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
else
- Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
+ Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
NewAlign, LN0->getMemOperand()->getFlags(),
LN0->getAAInfo());
@@ -10392,7 +10311,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
// of that operation is undefined.
- SDLoc DL(N0);
if (ShLeftAmt >= VT.getSizeInBits())
Result = DAG.getConstant(0, DL, VT);
else
@@ -10513,7 +10431,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (ISD::isEXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
N0.hasOneUse()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -10530,7 +10448,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse() &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -10757,7 +10675,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (!LN0->isVolatile() &&
+ if (LN0->isSimple() &&
LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
VT, LN0->getChain(), LN0->getBasePtr(),
@@ -11051,7 +10969,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// memory accesses. We don't care if the original type was legal or not
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
TLI.isOperationLegal(ISD::LOAD, VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -11237,15 +11155,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
for (int i = 0; i != MaskScale; ++i)
NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
- bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
- if (!LegalMask) {
- std::swap(SV0, SV1);
- ShuffleVectorSDNode::commuteMask(NewMask);
- LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
- }
-
- if (LegalMask)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
}
return SDValue();
@@ -11998,7 +11911,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
- if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
+ if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -12006,17 +11919,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
- return DAG.getNode(ISD::FSUB, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize), Flags);
+ TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2)
+ return DAG.getNode(
+ ISD::FSUB, DL, VT, N0,
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
- return DAG.getNode(ISD::FSUB, DL, VT, N1,
- GetNegatedExpression(N0, DAG, LegalOperations,
- ForCodeSize), Flags);
+ TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2)
+ return DAG.getNode(
+ ISD::FSUB, DL, VT, N1,
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -12056,7 +11969,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// If 'unsafe math' or reassoc and nsz, fold lots of things.
// TODO: break out portions of the transformations below for which Unsafe is
// considered and which do not require both nsz and reassoc
- if ((Options.UnsafeFPMath ||
+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
@@ -12175,7 +12088,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero()) {
- if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
+ if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
Flags.hasNoSignedZeros()) {
return N0;
}
@@ -12195,16 +12108,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
- return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
+ return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}
}
- if ((Options.UnsafeFPMath ||
- (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
- && N1.getOpcode() == ISD::FADD) {
+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
if (N0 == N1->getOperand(0))
return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
@@ -12214,10 +12127,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
- return DAG.getNode(ISD::FADD, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize), Flags);
+ if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(
+ ISD::FADD, DL, VT, N0,
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -12228,6 +12141,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return SDValue();
}
+/// Return true if both inputs are at least as cheap in negated form and at
+/// least one input is strictly cheaper in negated form.
+bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
+ if (char LHSNeg =
+ TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize))
+ if (char RHSNeg =
+ TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize))
+ // Both negated operands are at least as cheap as their counterparts.
+ // Check to see if at least one is cheaper negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return true;
+
+ return false;
+}
+
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -12254,10 +12182,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
!isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
- // fold (fmul A, 1.0) -> A
- if (N1CFP && N1CFP->isExactlyValue(1.0))
- return N0;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -12302,21 +12226,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N0);
- // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- // Both can be negated for free, check to see if at least one is cheaper
- // negated.
- if (LHSNeg == 2 || RHSNeg == 2)
- return DAG.getNode(ISD::FMUL, DL, VT,
- GetNegatedExpression(N0, DAG, LegalOperations,
- ForCodeSize),
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize),
- Flags);
- }
+ // -N0 * -N1 --> N0 * N1
+ if (isCheaperToUseNegatedFPOps(N0, N1)) {
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
}
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
@@ -12395,6 +12311,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
+ // (-N0 * -N1) + N2 --> (N0 * N1) + N2
+ if (isCheaperToUseNegatedFPOps(N0, N1)) {
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
+ }
+
if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;
@@ -12602,9 +12527,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
- }
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
@@ -12645,28 +12569,16 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
- if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
- AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
- }
+ if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
+ return RV;
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- // Both can be negated for free, check to see if at least one is cheaper
- // negated.
- if (LHSNeg == 2 || RHSNeg == 2)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
- GetNegatedExpression(N0, DAG, LegalOperations,
- ForCodeSize),
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize),
- Flags);
- }
- }
+ if (isCheaperToUseNegatedFPOps(N0, N1))
+ return DAG.getNode(
+ ISD::FDIV, SDLoc(N), VT,
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
return SDValue();
}
@@ -13112,22 +13024,6 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- EVT VT = N->getValueType(0);
- EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-
- // fold (fp_round_inreg c1fp) -> c1fp
- if (N0CFP && isTypeLegal(EVT)) {
- SDLoc DL(N);
- SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
- return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
- }
-
- return SDValue();
-}
-
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -13236,9 +13132,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
- if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
- &DAG.getTarget().Options, ForCodeSize))
- return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
+ return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
// Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
@@ -14004,11 +13899,12 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
}
SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
- if (OptLevel == CodeGenOpt::None || LD->isVolatile())
+ if (OptLevel == CodeGenOpt::None || !LD->isSimple())
return SDValue();
SDValue Chain = LD->getOperand(0);
StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
- if (!ST || ST->isVolatile())
+ // TODO: Relax this restriction for unordered atomics (see D66309)
+ if (!ST || !ST->isSimple())
return SDValue();
EVT LDType = LD->getValueType(0);
@@ -14107,7 +14003,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// If load is not volatile and there are no uses of the loaded value (and
// the updated indexed value in case of indexed loads), change uses of the
// chain value into uses of the chain input (i.e. delete the dead load).
- if (!LD->isVolatile()) {
+ // TODO: Allow this for unordered atomics (see D66309)
+ if (LD->isSimple()) {
if (N->getValueType(1) == MVT::Other) {
// Unindexed loads.
if (!N->hasAnyUseOfValue(0)) {
@@ -14241,7 +14138,7 @@ struct LoadedSlice {
/// Helper structure used to compute the cost of a slice.
struct Cost {
/// Are we optimizing for code size.
- bool ForCodeSize;
+ bool ForCodeSize = false;
/// Various cost.
unsigned Loads = 0;
@@ -14250,10 +14147,10 @@ struct LoadedSlice {
unsigned ZExts = 0;
unsigned Shift = 0;
- Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
+ explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
/// Get the cost of one isolated slice.
- Cost(const LoadedSlice &LS, bool ForCodeSize = false)
+ Cost(const LoadedSlice &LS, bool ForCodeSize)
: ForCodeSize(ForCodeSize), Loads(1) {
EVT TruncType = LS.Inst->getValueType(0);
EVT LoadedType = LS.getLoadedType();
@@ -14678,7 +14575,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
return false;
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
+ if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
!LD->getValueType(0).isInteger())
return false;
@@ -14829,13 +14726,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
else if (Chain->getOpcode() == ISD::TokenFactor &&
SDValue(LD, 1).hasOneUse()) {
// LD has only 1 chain use so they are no indirect dependencies.
- bool isOk = false;
- for (const SDValue &ChainOp : Chain->op_values())
- if (ChainOp.getNode() == LD) {
- isOk = true;
- break;
- }
- if (!isOk)
+ if (!LD->isOperandOf(Chain.getNode()))
return Result;
} else
return Result; // Fail.
@@ -14848,7 +14739,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
/// Check to see if IVal is something that provides a value as specified by
/// MaskInfo. If so, replace the specified store with a narrower store of
/// truncated IVal.
-static SDNode *
+static SDValue
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue IVal, StoreSDNode *St,
DAGCombiner *DC) {
@@ -14860,14 +14751,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
- if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
- // legalization.
- MVT VT = MVT::getIntegerVT(NumBytes*8);
+ // legalization (and the target doesn't explicitly think this is a bad idea).
+ MVT VT = MVT::getIntegerVT(NumBytes * 8);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DC->isTypeLegal(VT))
- return nullptr;
+ return SDValue();
+ if (St->getMemOperand() &&
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *St->getMemOperand()))
+ return SDValue();
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
@@ -14901,8 +14797,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
++OpsNarrowed;
return DAG
.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
- St->getPointerInfo().getWithOffset(StOffset), NewAlign)
- .getNode();
+ St->getPointerInfo().getWithOffset(StOffset), NewAlign);
}
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
@@ -14911,7 +14806,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
/// or code size.
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
- if (ST->isVolatile())
+ if (!ST->isSimple())
return SDValue();
SDValue Chain = ST->getChain();
@@ -14933,16 +14828,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
std::pair<unsigned, unsigned> MaskedLoad;
MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
if (MaskedLoad.first)
- if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(1), ST,this))
- return SDValue(NewST, 0);
+ return NewST;
// Or is commutative, so try swapping X and Y.
MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
if (MaskedLoad.first)
- if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(0), ST,this))
- return SDValue(NewST, 0);
+ return NewST;
}
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
@@ -15367,14 +15262,16 @@ void DAGCombiner::getStoreMergeCandidates(
// Loads must only have one use.
if (!Ld->hasNUsesOfValue(1, 0))
return;
- // The memory operands must not be volatile/indexed.
- if (Ld->isVolatile() || Ld->isIndexed())
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!Ld->isSimple() || Ld->isIndexed())
return;
}
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
- // The memory operands must not be volatile/indexed.
- if (Other->isVolatile() || Other->isIndexed())
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!Other->isSimple() || Other->isIndexed())
return false;
// Don't mix temporal stores with non-temporal stores.
if (St->isNonTemporal() != Other->isNonTemporal())
@@ -15394,8 +15291,10 @@ void DAGCombiner::getStoreMergeCandidates(
// Loads must only have one use.
if (!OtherLd->hasNUsesOfValue(1, 0))
return false;
- // The memory operands must not be volatile/indexed.
- if (OtherLd->isVolatile() || OtherLd->isIndexed())
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!OtherLd->isSimple() ||
+ OtherLd->isIndexed())
return false;
// Don't mix temporal loads with non-temporal loads.
if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
@@ -15425,6 +15324,18 @@ void DAGCombiner::getStoreMergeCandidates(
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
+ // Check if the pair of StoreNode and the RootNode already bail out many
+ // times which is over the limit in dependence check.
+ auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
+ SDNode *RootNode) -> bool {
+ auto RootCount = StoreRootCountMap.find(StoreNode);
+ if (RootCount != StoreRootCountMap.end() &&
+ RootCount->second.first == RootNode &&
+ RootCount->second.second > StoreMergeDependenceLimit)
+ return true;
+ return false;
+ };
+
// We looking for a root node which is an ancestor to all mergable
// stores. We search up through a load, to our root and then down
// through all children. For instance we will find Store{1,2,3} if
@@ -15454,7 +15365,8 @@ void DAGCombiner::getStoreMergeCandidates(
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
BaseIndexOffset Ptr;
int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherST, RootNode))
StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
} else
@@ -15464,7 +15376,8 @@ void DAGCombiner::getStoreMergeCandidates(
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
BaseIndexOffset Ptr;
int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherST, RootNode))
StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
}
@@ -15522,13 +15435,24 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
// Search through DAG. We can stop early if we find a store node.
for (unsigned i = 0; i < NumStores; ++i)
if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
- Max))
+ Max)) {
+ // If the searching bail out, record the StoreNode and RootNode in the
+ // StoreRootCountMap. If we have seen the pair many times over a limit,
+ // we won't add the StoreNode into StoreNodes set again.
+ if (Visited.size() >= Max) {
+ auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
+ if (RootCount.first == RootNode)
+ RootCount.second++;
+ else
+ RootCount = {RootNode, 1};
+ }
return false;
+ }
return true;
}
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
return false;
EVT MemVT = St->getMemoryVT();
@@ -15588,7 +15512,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
bool RV = false;
while (StoreNodes.size() > 1) {
- unsigned StartIdx = 0;
+ size_t StartIdx = 0;
while ((StartIdx + 1 < StoreNodes.size()) &&
StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
StoreNodes[StartIdx + 1].OffsetFromBase)
@@ -16113,7 +16037,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
case MVT::ppcf128:
return SDValue();
case MVT::f32:
- if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
;
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
@@ -16125,7 +16049,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
return SDValue();
case MVT::f64:
if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
- !ST->isVolatile()) ||
+ ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
;
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
@@ -16134,7 +16058,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
Ptr, ST->getMemOperand());
}
- if (!ST->isVolatile() &&
+ if (ST->isSimple() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
@@ -16181,7 +16105,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// memory accesses. We don't care if the original type was legal or not
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
- if (((!LegalOperations && !ST->isVolatile()) ||
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (((!LegalOperations && ST->isSimple()) ||
TLI.isOperationLegal(ISD::STORE, SVT)) &&
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
DAG, *ST->getMemOperand())) {
@@ -16242,9 +16167,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
- SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
AddToWorklist(Value.getNode());
- if (Shorter)
+ if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
ST->getMemOperand());
@@ -16263,9 +16187,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// If this is a load followed by a store to the same location, then the store
// is dead/noop.
+ // TODO: Can relax for unordered atomics (see D66309)
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
- ST->isUnindexed() && !ST->isVolatile() &&
+ ST->isUnindexed() && ST->isSimple() &&
// There can't be any side effects between the load and store, such as
// a call or store.
Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
@@ -16274,9 +16199,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
+ // TODO: Can relax for unordered atomics (see D66309)
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
- if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
- !ST1->isVolatile()) {
+ if (ST->isUnindexed() && ST->isSimple() &&
+ ST1->isUnindexed() && ST1->isSimple()) {
if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
ST->getMemoryVT() == ST1->getMemoryVT()) {
// If this is a store followed by a store with the same value to the
@@ -16405,7 +16331,8 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
break;
case ISD::STORE: {
StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
- if (ST->isVolatile() || ST->isIndexed())
+ // TODO: Can relax for unordered atomics (see D66309)
+ if (!ST->isSimple() || ST->isIndexed())
continue;
const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
// If we store purely within object bounds just before its lifetime ends,
@@ -16456,6 +16383,11 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
if (OptLevel == CodeGenOpt::None)
return SDValue();
+ // Can't change the number of memory accesses for a volatile store or break
+ // atomicity for an atomic one.
+ if (!ST->isSimple())
+ return SDValue();
+
SDValue Val = ST->getValue();
SDLoc DL(ST);
@@ -16531,12 +16463,52 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
}
/// Convert a disguised subvector insertion into a shuffle:
-/// insert_vector_elt V, (bitcast X from vector type), IdxC -->
-/// bitcast(shuffle (bitcast V), (extended X), Mask)
-/// Note: We do not use an insert_subvector node because that requires a legal
-/// subvector type.
SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
SDValue InsertVal = N->getOperand(1);
+ SDValue Vec = N->getOperand(0);
+
+ // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
+ // --> (vector_shuffle X, Y)
+ if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
+ InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(InsertVal.getOperand(1))) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
+ ArrayRef<int> Mask = SVN->getMask();
+
+ SDValue X = Vec.getOperand(0);
+ SDValue Y = Vec.getOperand(1);
+
+ // Vec's operand 0 is using indices from 0 to N-1 and
+ // operand 1 from N to 2N - 1, where N is the number of
+ // elements in the vectors.
+ int XOffset = -1;
+ if (InsertVal.getOperand(0) == X) {
+ XOffset = 0;
+ } else if (InsertVal.getOperand(0) == Y) {
+ XOffset = X.getValueType().getVectorNumElements();
+ }
+
+ if (XOffset != -1) {
+ SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
+
+ auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
+ NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
+ assert(NewMask[InsIndex] <
+ (int)(2 * Vec.getValueType().getVectorNumElements()) &&
+ NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
+
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
+ Y, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
+ }
+ }
+
+ // insert_vector_elt V, (bitcast X from vector type), IdxC -->
+ // bitcast(shuffle (bitcast V), (extended X), Mask)
+ // Note: We do not use an insert_subvector node because that requires a
+ // legal subvector type.
if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
!InsertVal.getOperand(0).getValueType().isVector())
return SDValue();
@@ -16674,7 +16646,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
SDValue EltNo,
LoadSDNode *OriginalLoad) {
- assert(!OriginalLoad->isVolatile());
+ assert(OriginalLoad->isSimple());
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
@@ -16747,12 +16719,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
SDValue To[] = { Load, Chain };
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorklist(EVE);
// Since we're explicitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
- AddToWorklist(Load.getNode());
AddUsersToWorklist(Load.getNode()); // Add users too
- // Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorklist(EVE);
+ AddToWorklist(Load.getNode());
++OpsNarrowed;
return SDValue(EVE, 0);
}
@@ -16982,7 +16954,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
ISD::isNormalLoad(VecOp.getNode()) &&
!Index->hasPredecessor(VecOp.getNode())) {
auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
- if (VecLoad && !VecLoad->isVolatile())
+ if (VecLoad && VecLoad->isSimple())
return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
}
@@ -17041,7 +17013,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Make sure we found a non-volatile load and the extractelement is
// the only use.
- if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
return SDValue();
// If Idx was -1 above, Elt is going to be -1, so just return undef.
@@ -17344,17 +17316,16 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// the shuffle mask with -1.
}
- // Turn this into a shuffle with zero if that's legal.
- EVT VecVT = Extract.getOperand(0).getValueType();
- if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
- return SDValue();
-
// buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
// bitcast (shuffle V, ZeroVec, VectorMask)
SDLoc DL(BV);
+ EVT VecVT = Extract.getOperand(0).getValueType();
SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
- SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
- ShufMask);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
+ ZeroVec, ShufMask, DAG);
+ if (!Shuf)
+ return SDValue();
return DAG.getBitcast(VT, Shuf);
}
@@ -17656,6 +17627,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
}
}
+ // A splat of a single element is a SPLAT_VECTOR if supported on the target.
+ if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
+ if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ assert(!V.isUndef() && "Splat of undef should have been handled earlier");
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
+ }
+
// Check if we can express BUILD VECTOR via subvector extract.
if (!LegalTypes && (N->getNumOperands() > 1)) {
SDValue Op0 = N->getOperand(0);
@@ -17829,11 +17807,9 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
}
}
- if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
- return SDValue();
-
- return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
- DAG.getBitcast(VT, SV1), Mask);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
+ DAG.getBitcast(VT, SV1), Mask, DAG);
}
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
@@ -17853,6 +17829,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
+ // If the input is a concat_vectors, just make a larger concat by padding
+ // with smaller undefs.
+ if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
+ unsigned NumOps = N->getNumOperands() * In.getNumOperands();
+ SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
+ Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+ }
+
SDValue Scalar = peekThroughOneUseBitcasts(In);
// concat_vectors(scalar_to_vector(scalar), undef) ->
@@ -18002,6 +17987,23 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
+// if the subvector can be sourced for free.
+static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
+ return V.getOperand(1);
+ }
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
+ V.getOperand(0).getValueType() == SubVT &&
+ (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
+ return V.getOperand(SubIdx);
+ }
+ return SDValue();
+}
+
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -18010,39 +18012,29 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
return SDValue();
+ EVT VecVT = BinOp.getValueType();
SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
- SDValue Index = Extract->getOperand(1);
- EVT VT = Extract->getValueType(0);
+ if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
+ return SDValue();
- // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
- // if the source subvector is the same type as the one being extracted.
- auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
- if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
- V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
- return V.getOperand(1);
- }
- auto *IndexC = dyn_cast<ConstantSDNode>(Index);
- if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
- V.getOperand(0).getValueType() == VT &&
- (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
- uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
- return V.getOperand(SubIdx);
- }
+ SDValue Index = Extract->getOperand(1);
+ EVT SubVT = Extract->getValueType(0);
+ if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
return SDValue();
- };
- SDValue Sub0 = GetSubVector(Bop0);
- SDValue Sub1 = GetSubVector(Bop1);
+
+ SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
+ SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
// TODO: We could handle the case where only 1 operand is being inserted by
// creating an extract of the other operand, but that requires checking
// number of uses and/or costs.
- if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT))
+ if (!Sub0 || !Sub1)
return SDValue();
// We are inserting both operands of the wide binop only to extract back
// to the narrow vector size. Eliminate all of the insert/extract:
// ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
- return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1,
+ return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
BinOp->getFlags());
}
@@ -18174,7 +18166,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
+ !ExtIdx)
return SDValue();
// Allow targets to opt-out.
@@ -18878,7 +18871,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
int SplatIndex = SVN->getSplatIndex();
- if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
+ if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
// splat (vector_bo L, R), Index -->
// splat (scalar_bo (extelt L, Index), (extelt R, Index))
@@ -19153,22 +19146,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SV1 = DAG.getUNDEF(VT);
// Avoid introducing shuffles with illegal mask.
- if (!TLI.isShuffleMaskLegal(Mask, VT)) {
- ShuffleVectorSDNode::commuteMask(Mask);
-
- if (!TLI.isShuffleMaskLegal(Mask, VT))
- return SDValue();
-
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
- std::swap(SV0, SV1);
- }
-
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
}
if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
@@ -19191,35 +19175,35 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
int Elt = C0->getZExtValue();
NewMask[0] = Elt;
- SDValue Val;
// If we have an implict truncate do truncate here as long as it's legal.
// if it's not legal, this should
if (VT.getScalarType() != InVal.getValueType() &&
InVal.getValueType().isScalarInteger() &&
isTypeLegal(VT.getScalarType())) {
- Val =
+ SDValue Val =
DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
}
if (VT.getScalarType() == InVecT.getScalarType() &&
- VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
- TLI.isShuffleMaskLegal(NewMask, VT)) {
- Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
- DAG.getUNDEF(InVecT), NewMask);
- // If the initial vector is the correct size this shuffle is a
- // valid result.
- if (VT == InVecT)
- return Val;
- // If not we must truncate the vector.
- if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
- EVT SubVT =
- EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
- VT.getVectorNumElements());
- Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
- ZeroIdx);
- return Val;
+ VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
+ DAG.getUNDEF(InVecT), NewMask, DAG);
+ if (LegalShuffle) {
+ // If the initial vector is the correct size this shuffle is a
+ // valid result.
+ if (VT == InVecT)
+ return LegalShuffle;
+ // If not we must truncate the vector.
+ if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
+ EVT SubVT =
+ EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
+ VT.getVectorNumElements());
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
+ LegalShuffle, ZeroIdx);
+ }
}
}
}
@@ -19627,6 +19611,39 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
}
+ // Make sure all but the first op are undef or constant.
+ auto ConcatWithConstantOrUndef = [](SDValue Concat) {
+ return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
+ std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
+ [](const SDValue &Op) {
+ return Op.isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
+ });
+ };
+
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of the concat may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
+ // concat (VBinOp X, Y), VecC
+ if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ EVT NarrowVT = LHS.getOperand(0).getValueType();
+ if (NarrowVT == RHS.getOperand(0).getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ SDLoc DL(N);
+ unsigned NumOperands = LHS.getNumOperands();
+ SmallVector<SDValue, 4> ConcatOps;
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ // This constant fold for operands 1 and up.
+ ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
+ RHS.getOperand(i)));
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ }
+ }
+
if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
return V;
@@ -19723,7 +19740,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// Token chains must be identical.
if (LHS.getOperand(0) != RHS.getOperand(0) ||
// Do not let this transformation reduce the number of volatile loads.
- LLD->isVolatile() || RLD->isVolatile() ||
+ // Be conservative for atomics for the moment
+ // TODO: This does appear to be legal for unordered atomics (see D66309)
+ !LLD->isSimple() || !RLD->isSimple() ||
// FIXME: If either is a pre/post inc/dec load,
// we'd need to split out the address adjustment.
LLD->isIndexed() || RLD->isIndexed() ||
@@ -19928,7 +19947,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
- if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
+ if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
return SDValue();
// If we are before legalize types, we want the other legalization to happen
@@ -20016,8 +20035,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// when the condition can be materialized as an all-ones register. Any
// single bit-test can be materialized as an all-ones register with
// shift-left and shift-right-arith.
+ // TODO: The operation legality checks could be loosened to include "custom",
+ // but that may cause regressions for targets that do not have shift
+ // instructions.
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
- N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) &&
+ TLI.isOperationLegal(ISD::SHL, VT) &&
+ TLI.isOperationLegal(ISD::SRA, VT)) {
SDValue AndLHS = N0->getOperand(0);
auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
@@ -20209,7 +20233,10 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
+/// For the last iteration, put numerator N into it to gain more precision:
+/// Result = N X_i + X_i (N - N A X_i)
+SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
+ SDNodeFlags Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -20230,25 +20257,39 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
AddToWorklist(Est.getNode());
+ SDLoc DL(Op);
if (Iterations) {
- SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ // Newton iterations: Est = Est + Est (N - Arg * Est)
+ // If this is the last iteration, also multiply by the numerator.
for (int i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
+ SDValue MulEst = Est;
+
+ if (i == Iterations - 1) {
+ MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
+ AddToWorklist(MulEst.getNode());
+ }
+
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT,
+ (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
+ Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
AddToWorklist(Est.getNode());
}
+ } else {
+ // If no iterations are available, multiply with N.
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
+ AddToWorklist(Est.getNode());
}
+
return Est;
}
@@ -20271,31 +20312,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
- AddToWorklist(HalfArg.getNode());
-
HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
- AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
- AddToWorklist(NewEst.getNode());
-
NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
- AddToWorklist(NewEst.getNode());
-
NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
- AddToWorklist(NewEst.getNode());
-
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
- AddToWorklist(Est.getNode());
}
// If non-reciprocal square root is requested, multiply the result by Arg.
- if (!Reciprocal) {
+ if (!Reciprocal)
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
- AddToWorklist(Est.getNode());
- }
return Est;
}
@@ -20321,13 +20350,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
// E = (E * -0.5) * ((A * E) * E + -3.0)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
- AddToWorklist(AE.getNode());
-
SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
- AddToWorklist(AEE.getNode());
-
SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
- AddToWorklist(RHS.getNode());
// When calculating a square root at the last iteration build:
// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -20340,10 +20364,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
// SQRT: LHS = (A * E) * -0.5
LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
}
- AddToWorklist(LHS.getNode());
Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
- AddToWorklist(Est.getNode());
}
return Est;
@@ -20400,16 +20422,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
- AddToWorklist(Fabs.getNode());
- AddToWorklist(IsDenorm.getNode());
- AddToWorklist(Est.getNode());
} else {
// X == 0.0 ? 0.0 : Est
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
- AddToWorklist(IsZero.getNode());
- AddToWorklist(Est.getNode());
}
}
}
@@ -20432,6 +20449,7 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
struct MemUseCharacteristics {
bool IsVolatile;
+ bool IsAtomic;
SDValue BasePtr;
int64_t Offset;
Optional<int64_t> NumBytes;
@@ -20447,18 +20465,20 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
: (LSN->getAddressingMode() == ISD::PRE_DEC)
? -1 * C->getSExtValue()
: 0;
- return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
+ return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
+ Offset /*base offset*/,
Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
- return {false /*isVolatile*/, LN->getOperand(1),
+ return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
(LN->hasOffset()) ? LN->getOffset() : 0,
(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
: Optional<int64_t>(),
(MachineMemOperand *)nullptr};
// Default.
- return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
+ return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
+ (int64_t)0 /*offset*/,
Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
};
@@ -20474,6 +20494,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
if (MUC0.IsVolatile && MUC1.IsVolatile)
return true;
+ // Be conservative about atomics for the moment
+ // TODO: This is way overconservative for unordered atomics (see D66309)
+ if (MUC0.IsAtomic && MUC1.IsAtomic)
+ return true;
+
if (MUC0.MMO && MUC1.MMO) {
if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
@@ -20555,7 +20580,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
// Get alias information for node.
- const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
+ // TODO: relax aliasing for unordered atomics (see D66309)
+ const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
// Starting off.
Chains.push_back(OriginalChain);
@@ -20571,8 +20597,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
case ISD::LOAD:
case ISD::STORE: {
// Get alias information for C.
+ // TODO: Relax aliasing for unordered atomics (see D66309)
bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
- !cast<LSBaseSDNode>(C.getNode())->isVolatile();
+ cast<LSBaseSDNode>(C.getNode())->isSimple();
if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
@@ -20727,7 +20754,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// If the chain has more than one use, then we can't reorder the mem ops.
if (!SDValue(Chain, 0)->hasOneUse())
break;
- if (Chain->isVolatile() || Chain->isIndexed())
+ // TODO: Relax for unordered atomics (see D66309)
+ if (!Chain->isSimple() || Chain->isIndexed())
break;
// Find the base pointer and offset for this memory node.
@@ -20795,11 +20823,11 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
CombineTo(St, TF);
- AddToWorklist(STChain);
- // Add TF operands worklist in reverse order.
- for (auto I = TF->getNumOperands(); I;)
- AddToWorklist(TF->getOperand(--I).getNode());
+ // Add TF and its operands to the worklist.
AddToWorklist(TF.getNode());
+ for (const SDValue &Op : TF->ops())
+ AddToWorklist(Op.getNode());
+ AddToWorklist(STChain);
return true;
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 22c23ba877e8..6d7260d7aee5 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -174,7 +174,7 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
if (RegDef)
return 0;
RegDef = MO.getReg();
- } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ } else if (Register::isVirtualRegister(MO.getReg())) {
// This is another use of a vreg. Don't try to sink it.
return 0;
}
@@ -1213,14 +1213,13 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
if (!FrameAlign)
FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
Flags.setByValSize(FrameSize);
- Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Arg.IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty);
- Flags.setOrigAlign(OriginalAlignment);
+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));
CLI.OutVals.push_back(Arg.Val);
CLI.OutFlags.push_back(Flags);
@@ -1237,8 +1236,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
// Set labels for heapallocsite call.
- if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) {
- MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
+ if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) {
+ const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
MF->addCodeViewHeapAllocSite(CLI.Call, MD);
}
@@ -1303,6 +1302,7 @@ bool FastISel::selectCall(const User *I) {
ExtraInfo |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::INLINEASM))
@@ -1388,9 +1388,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
"Expected inlined-at fields to agree");
// A dbg.declare describes the address of a source variable, so lower it
// into an indirect DBG_VALUE.
+ auto *Expr = DI->getExpression();
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
- *Op, DI->getVariable(), DI->getExpression());
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false,
+ *Op, DI->getVariable(), Expr);
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
@@ -1414,19 +1416,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
- .addImm(0U)
+ .addReg(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
- .addImm(0U)
+ .addReg(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
- .addImm(0U)
+ .addReg(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (unsigned Reg = lookUpRegForValue(V)) {
@@ -1453,24 +1455,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
return true;
}
- case Intrinsic::objectsize: {
- ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
- unsigned long long Res = CI->isZero() ? -1ULL : 0;
- Constant *ResCI = ConstantInt::get(II->getType(), Res);
- unsigned ResultReg = getRegForValue(ResCI);
- if (!ResultReg)
- return false;
- updateValueMap(II, ResultReg);
- return true;
- }
- case Intrinsic::is_constant: {
- Constant *ResCI = ConstantInt::get(II->getType(), 0);
- unsigned ResultReg = getRegForValue(ResCI);
- if (!ResultReg)
- return false;
- updateValueMap(II, ResultReg);
- return true;
- }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
@@ -1677,11 +1667,11 @@ bool FastISel::selectInstruction(const Instruction *I) {
/// (fall-through) successor, and update the CFG.
void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
const DebugLoc &DbgLoc) {
- if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
+ if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 &&
FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
- // For more accurate line information if this is the only instruction
- // in the block then emit it, otherwise we have the unconditional
- // fall-through case, which needs no instructions.
+ // For more accurate line information if this is the only non-debug
+ // instruction in the block then emit it, otherwise we have the
+ // unconditional fall-through case, which needs no instructions.
} else {
// The unconditional branch case.
TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr,
@@ -2028,7 +2018,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
unsigned OpNum) {
- if (TargetRegisterInfo::isVirtualRegister(Op)) {
+ if (Register::isVirtualRegister(Op)) {
const TargetRegisterClass *RegClass =
TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
if (!MRI.constrainRegClass(Op, RegClass)) {
@@ -2236,7 +2226,7 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
bool Op0IsKill, uint32_t Idx) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
- assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ assert(Register::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
@@ -2417,10 +2407,9 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
} else
return nullptr;
- bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr;
- bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
- bool IsDereferenceable =
- I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr;
+ bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal);
+ bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load);
+ bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
AAMDNodes AAInfo;
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 8b1759246b76..cf6711adad48 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -424,7 +425,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
unsigned BitWidth = IntVT.getSizeInBits();
unsigned DestReg = ValueMap[PN];
- if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ if (!Register::isVirtualRegister(DestReg))
return;
LiveOutRegInfo.grow(DestReg);
LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
@@ -445,7 +446,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
"CopyToReg node was created.");
unsigned SrcReg = ValueMap[V];
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (!Register::isVirtualRegister(SrcReg)) {
DestLOI.IsValid = false;
return;
}
@@ -480,7 +481,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
"its CopyToReg node was created.");
unsigned SrcReg = ValueMap[V];
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (!Register::isVirtualRegister(SrcReg)) {
DestLOI.IsValid = false;
return;
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 9bc07d35dfc5..c5095995ec2e 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -71,7 +71,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
continue;
if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
- if (TargetRegisterInfo::isPhysicalRegister(RN->getReg()))
+ if (Register::isPhysicalRegister(RN->getReg()))
continue;
NumImpUses = N - I;
break;
@@ -86,7 +86,7 @@ void InstrEmitter::
EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
unsigned VRBase = 0;
- if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (Register::isVirtualRegister(SrcReg)) {
// Just use the input register directly!
SDValue Op(Node, ResNo);
if (IsClone)
@@ -114,7 +114,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == ResNo) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ if (Register::isVirtualRegister(DestReg)) {
VRBase = DestReg;
Match = false;
} else if (DestReg != SrcReg)
@@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
UseRC = RC;
else if (RC) {
const TargetRegisterClass *ComRC =
- TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy);
+ TRI->getCommonSubClass(UseRC, RC);
// If multiple uses expect disjoint register classes, we emit
// copies in AddRegisterOperand.
if (ComRC)
@@ -219,7 +219,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+ assert(Register::isPhysicalRegister(VRBase));
MIB.addReg(VRBase, RegState::Define);
}
@@ -229,7 +229,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == i) {
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
if (RegRC == RC) {
VRBase = Reg;
@@ -272,7 +272,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
// does not include operand register class info.
const TargetRegisterClass *RC = TLI->getRegClassFor(
Op.getSimpleValueType(), Op.getNode()->isDivergent());
- unsigned VReg = MRI->createVirtualRegister(RC);
+ Register VReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
return VReg;
@@ -319,7 +319,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
if (!ConstrainedRC) {
OpRC = TRI->getAllocatableClass(OpRC);
assert(OpRC && "Constraints cannot be fulfilled for allocation");
- unsigned NewVReg = MRI->createVirtualRegister(OpRC);
+ Register NewVReg = MRI->createVirtualRegister(OpRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
VReg = NewVReg;
@@ -385,9 +385,8 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
(IIRC && TRI->isDivergentRegClass(IIRC)))
: nullptr;
- if (OpRC && IIRC && OpRC != IIRC &&
- TargetRegisterInfo::isVirtualRegister(VReg)) {
- unsigned NewVReg = MRI->createVirtualRegister(IIRC);
+ if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) {
+ Register NewVReg = MRI->createVirtualRegister(IIRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
VReg = NewVReg;
@@ -465,7 +464,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
// register instead.
RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
assert(RC && "No legal register class for VT supports that SubIdx");
- unsigned NewReg = MRI->createVirtualRegister(RC);
+ Register NewReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
.addReg(VReg);
return NewReg;
@@ -485,7 +484,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ if (Register::isVirtualRegister(DestReg)) {
VRBase = DestReg;
break;
}
@@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
unsigned Reg;
MachineInstr *DefMI;
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
- if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ if (R && Register::isPhysicalRegister(R->getReg())) {
Reg = R->getReg();
DefMI = nullptr;
} else {
@@ -529,7 +528,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// Reg may not support a SubIdx sub-register, and we may need to
// constrain its register class or issue a COPY to a compatible register
// class.
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
Reg = ConstrainForSubReg(Reg, SubIdx,
Node->getOperand(0).getSimpleValueType(),
Node->isDivergent(), Node->getDebugLoc());
@@ -541,7 +540,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
MachineInstrBuilder CopyMI =
BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
TII->get(TargetOpcode::COPY), VRBase);
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
CopyMI.addReg(Reg, 0, SubIdx);
else
CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
@@ -614,7 +613,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
const TargetRegisterClass *DstRC =
TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
- unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ Register NewVReg = MRI->createVirtualRegister(DstRC);
BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
NewVReg).addReg(VReg);
@@ -631,7 +630,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
bool IsClone, bool IsCloned) {
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
- unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
+ Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
unsigned NumOps = Node->getNumOperands();
@@ -649,7 +648,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
// Skip physical registers as they don't have a vreg to get and we'll
// insert copies for them in TwoAddressInstructionPass anyway.
- if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ if (!R || !Register::isPhysicalRegister(R->getReg())) {
unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
@@ -678,7 +677,7 @@ MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, unsigned> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ const DIExpression *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
@@ -702,12 +701,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
.addFrameIndex(SD->getFrameIx());
+
if (SD->isIndirect())
- // Push [fi + 0] onto the DIExpression stack.
- FrameMI.addImm(0);
- else
- // Push fi onto the DIExpression stack.
- FrameMI.addReg(0);
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
+ FrameMI.addReg(0);
return FrameMI.addMetadata(Var).addMetadata(Expr);
}
// Otherwise, we're going to create an instruction here.
@@ -753,9 +751,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// Indirect addressing is indicated by an Imm as the second parameter.
if (SD->isIndirect())
- MIB.addImm(0U);
- else
- MIB.addReg(0U, RegState::Debug);
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
+ MIB.addReg(0U, RegState::Debug);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
@@ -928,12 +926,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
//
// Collect all the used physreg defs, and make sure that any unused physreg
// defs are marked as dead.
- SmallVector<unsigned, 8> UsedRegs;
+ SmallVector<Register, 8> UsedRegs;
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = NumDefs; i < NumResults; ++i) {
- unsigned Reg = II.getImplicitDefs()[i - NumDefs];
+ Register Reg = II.getImplicitDefs()[i - NumDefs];
if (!Node->hasAnyUseOfValue(i))
continue;
// This implicitly defined physreg has a use.
@@ -960,8 +958,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// direct RegisterSDNode operands.
for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
- unsigned Reg = R->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = R->getReg();
+ if (Reg.isPhysical())
UsedRegs.push_back(Reg);
}
}
@@ -995,8 +993,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::CopyToReg: {
unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
SDValue SrcVal = Node->getOperand(2);
- if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
- SrcVal.isMachineOpcode() &&
+ if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() &&
SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Instead building a COPY to that vreg destination, build an
// IMPLICIT_DEF instruction instead.
@@ -1093,16 +1090,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// FIXME: Add dead flags for physical and virtual registers defined.
// For now, mark physical register defs as implicit to help fast
// regalloc. This makes inline asm look a lot like calls.
- MIB.addReg(Reg, RegState::Define |
- getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ MIB.addReg(Reg,
+ RegState::Define |
+ getImplRegState(Register::isPhysicalRegister(Reg)));
}
break;
case InlineAsm::Kind_RegDefEarlyClobber:
case InlineAsm::Kind_Clobber:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
- getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ MIB.addReg(Reg,
+ RegState::Define | RegState::EarlyClobber |
+ getImplRegState(Register::isPhysicalRegister(Reg)));
ECRegs.push_back(Reg);
}
break;
@@ -1136,7 +1135,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// then remove the early-clobber flag.
for (unsigned Reg : ECRegs) {
if (MIB->readsRegister(Reg, TRI)) {
- MachineOperand *MO =
+ MachineOperand *MO =
MIB->findRegisterDefOperand(Reg, false, false, TRI);
assert(MO && "No def operand for clobbered register?");
MO->setIsEarlyClobber(false);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bf817f00f83d..f9fdf525240f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -161,6 +162,7 @@ private:
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
const SDLoc &dl, SDValue ChainIn);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSPLAT_VECTOR(SDNode *Node);
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
SmallVectorImpl<SDValue> &Results);
@@ -236,6 +238,16 @@ public:
}
ReplacedNode(Old);
}
+
+ void ReplaceNodeWithValue(SDValue Old, SDValue New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ DAG.ReplaceAllUsesOfValueWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New.getNode());
+ ReplacedNode(Old.getNode());
+ }
};
} // end anonymous namespace
@@ -493,8 +505,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// expand it.
EVT MemVT = ST->getMemoryVT();
const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
- *ST->getMemOperand())) {
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
@@ -608,8 +620,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
EVT MemVT = ST->getMemoryVT();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
- *ST->getMemOperand())) {
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
}
@@ -669,8 +681,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
const DataLayout &DL = DAG.getDataLayout();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
- *LD->getMemOperand())) {
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
}
break;
@@ -894,11 +906,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
if (SrcVT.getScalarType() == MVT::f16) {
EVT ISrcVT = SrcVT.changeTypeToInteger();
EVT IDestVT = DestVT.changeTypeToInteger();
- EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+ EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
- SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,
- Chain, Ptr, ISrcVT,
- LD->getMemOperand());
+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain,
+ Ptr, ISrcVT, LD->getMemOperand());
Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
Chain = Result.getValue(1);
break;
@@ -959,15 +970,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
#ifndef NDEBUG
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
- TargetLowering::TypeLegal ||
- TLI.isTypeLegal(Node->getValueType(i))) &&
+ assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal &&
"Unexpected illegal type!");
for (const SDValue &Op : Node->op_values())
assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
TargetLowering::TypeLegal ||
- TLI.isTypeLegal(Op.getValueType()) ||
Op.getOpcode() == ISD::TargetConstant ||
Op.getOpcode() == ISD::Register) &&
"Unexpected illegal type!");
@@ -1004,7 +1013,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
- case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG: {
EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
@@ -1097,38 +1105,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
break;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_ROUND:
- case ISD::STRICT_FP_EXTEND:
- // These pseudo-ops get legalized as if they were their non-strict
- // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
- // is also legal, but if ISD::FSQRT requires expansion then so does
- // ISD::STRICT_FSQRT.
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These pseudo-ops are the same as the other STRICT_ ops except
+ // they are registered with setOperationAction() using the input type
+ // instead of the output type.
Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getValueType(0));
+ Node->getOperand(1).getValueType());
break;
case ISD::SADDSAT:
case ISD::UADDSAT:
@@ -1139,7 +1124,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: {
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -1650,7 +1636,6 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
- bool NeedSwap = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
@@ -1664,6 +1649,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
return true;
}
// Swapping operands didn't work. Try inverting the condition.
+ bool NeedSwap = false;
InvCC = getSetCCInverse(CCCode, OpVT.isInteger());
if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
// If inverting the condition is not enough, try swapping operands
@@ -2021,6 +2007,14 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
return ExpandVectorBuildThroughStack(Node);
}
+SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue SplatVal = Node->getOperand(0);
+
+ return DAG.getSplatBuildVector(VT, DL, SplatVal);
+}
+
// Expand a node into a call to a libcall. If the result value
// does not fit into a register, return the lo part and set the hi part to the
// by-reg argument. If it does fit into a single register, return the result
@@ -2074,12 +2068,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
if (!CallInfo.second.getNode()) {
- LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());
+ LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG));
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
}
- LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());
+ LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG));
return CallInfo.first;
}
@@ -2167,6 +2161,9 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128) {
+ if (Node->isStrictFPOpcode())
+ Node = DAG.mutateStrictFPToFP(Node);
+
RTLIB::Libcall LC;
switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
@@ -2815,6 +2812,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::STRICT_FP_ROUND:
+ // This expansion does not honor the "strict" properties anyway,
+ // so prefer falling back to the non-strict operation if legal.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ break;
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getValueType(0),
Node->getValueType(0), dl, Node->getOperand(0));
@@ -2829,6 +2832,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
case ISD::STRICT_FP_EXTEND:
+ // This expansion does not honor the "strict" properties anyway,
+ // so prefer falling back to the non-strict operation if legal.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ break;
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getOperand(1).getValueType(),
Node->getValueType(0), dl, Node->getOperand(0));
@@ -2873,19 +2882,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
- case ISD::FP_ROUND_INREG: {
- // The only way we can lower this is to turn it into a TRUNCSTORE,
- // EXTLOAD pair, targeting a temporary location (a stack slot).
-
- // NOTE: there is a choice here between constantly creating new stack
- // slots and always reusing the same one. We currently always create
- // new ones, as reuse may inhibit scheduling.
- EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
- Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
- Node->getValueType(0), dl);
- Results.push_back(Tmp1);
- break;
- }
case ISD::UINT_TO_FP:
if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
Results.push_back(Tmp1);
@@ -2901,33 +2897,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
Results.push_back(Tmp1);
break;
+ case ISD::STRICT_FP_TO_SINT:
+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n");
+ return true;
+ }
+ break;
case ISD::FP_TO_UINT:
- if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG))
Results.push_back(Tmp1);
break;
- case ISD::LROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
- RTLIB::LROUND_F64, RTLIB::LROUND_F80,
- RTLIB::LROUND_F128,
- RTLIB::LROUND_PPCF128));
- break;
- case ISD::LLROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
- RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
- RTLIB::LLROUND_F128,
- RTLIB::LLROUND_PPCF128));
- break;
- case ISD::LRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
- RTLIB::LRINT_F64, RTLIB::LRINT_F80,
- RTLIB::LRINT_F128,
- RTLIB::LRINT_PPCF128));
- break;
- case ISD::LLRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
- RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
- RTLIB::LLRINT_F128,
- RTLIB::LLRINT_PPCF128));
+ case ISD::STRICT_FP_TO_UINT:
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) {
+ // Relink the chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2);
+ // Replace the new UINT result.
+ ReplaceNodeWithValue(SDValue(Node, 0), Tmp1);
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n");
+ return true;
+ }
break;
case ISD::VAARG:
Results.push_back(DAG.expandVAArg(Node));
@@ -3348,6 +3337,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
case ISD::ADDCARRY:
@@ -3662,6 +3652,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::BUILD_VECTOR:
Results.push_back(ExpandBUILD_VECTOR(Node));
break;
+ case ISD::SPLAT_VECTOR:
+ Results.push_back(ExpandSPLAT_VECTOR(Node));
+ break;
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: {
@@ -3715,6 +3708,33 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
+ if (Results.empty() && Node->isStrictFPOpcode()) {
+ // FIXME: We were asked to expand a strict floating-point operation,
+ // but there is currently no expansion implemented that would preserve
+ // the "strict" properties. For now, we just fall back to the non-strict
+ // version if that is legal on the target. The actual mutation of the
+ // operation will happen in SelectionDAGISel::DoInstructionSelection.
+ switch (Node->getOpcode()) {
+ default:
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ return true;
+ break;
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These are registered by the operand type instead of the value
+ // type. Reflect that here.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType())
+ == TargetLowering::Legal)
+ return true;
+ break;
+ }
+ }
+
// Replace the original node with the legalized result.
if (Results.empty()) {
LLVM_DEBUG(dbgs() << "Cannot expand node\n");
@@ -3956,6 +3976,34 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::POW_F80, RTLIB::POW_F128,
RTLIB::POW_PPCF128));
break;
+ case ISD::LROUND:
+ case ISD::STRICT_LROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128));
+ break;
+ case ISD::LLROUND:
+ case ISD::STRICT_LLROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128));
+ break;
+ case ISD::LRINT:
+ case ISD::STRICT_LRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128));
+ break;
+ case ISD::LLRINT:
+ case ISD::STRICT_LLRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128));
+ break;
case ISD::FDIV:
Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
RTLIB::DIV_F80, RTLIB::DIV_F128,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b4849b2881e6..72d052473f11 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -42,10 +42,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
}
//===----------------------------------------------------------------------===//
-// Convert Float Results to Integer for Non-HW-supported Operations.
+// Convert Float Results to Integer
//===----------------------------------------------------------------------===//
-bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
dbgs() << "\n");
SDValue R = SDValue();
@@ -58,26 +58,18 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
- case ISD::Register:
- case ISD::CopyFromReg:
- case ISD::CopyToReg:
- assert(isLegalInHWReg(N->getValueType(ResNo)) &&
- "Unsupported SoftenFloatRes opcode!");
- // Only when isLegalInHWReg, we can skip check of the operands.
- R = SDValue(N, ResNo);
- break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
- case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
- case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break;
+ case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break;
case ISD::EXTRACT_VECTOR_ELT:
R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
- case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
- case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
@@ -89,7 +81,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
- case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
@@ -102,30 +94,24 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
- case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
- case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
- case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
}
- if (R.getNode() && R.getNode() != N) {
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode()) {
+ assert(R.getNode() != N);
SetSoftenedFloat(SDValue(N, ResNo), R);
- // Return true only if the node is changed, assuming that the operands
- // are also converted when necessary.
- return true;
}
-
- // Otherwise, return false to tell caller to scan operands.
- return false;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
@@ -144,10 +130,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
BitConvertToInteger(N->getOperand(1)));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, we can load better from the constant pool.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) {
ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
// In ppcf128, the high 64 bits are always first in memory regardless
// of Endianness. LLVM's APFloat representation is not Endian sensitive,
@@ -172,19 +155,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, keep the extracted value in register.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
NewOp.getValueType().getVectorElementType(),
NewOp, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, FABS can be implemented as native bitwise operations.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned Size = NVT.getSizeInBits();
@@ -200,57 +177,69 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMIN_F32,
RTLIB::FMIN_F64,
RTLIB::FMIN_F80,
RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMAX_F32,
RTLIB::FMAX_F64,
RTLIB::FMAX_F80,
RTLIB::FMAX_F128,
RTLIB::FMAX_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::ADD_F32,
RTLIB::ADD_F64,
RTLIB::ADD_F80,
RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::CEIL_F32,
RTLIB::CEIL_F64,
RTLIB::CEIL_F80,
RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(0));
SDValue RHS = BitConvertToInteger(N->getOperand(1));
SDLoc dl(N);
@@ -301,98 +290,123 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::COS_F32,
RTLIB::COS_F64,
RTLIB::COS_F80,
RTLIB::COS_F128,
RTLIB::COS_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::DIV_F32,
RTLIB::DIV_F64,
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::EXP_F32,
RTLIB::EXP_F64,
RTLIB::EXP_F80,
RTLIB::EXP_F128,
RTLIB::EXP_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::EXP2_F32,
RTLIB::EXP2_F64,
RTLIB::EXP2_F80,
RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FLOOR_F32,
RTLIB::FLOOR_F64,
RTLIB::FLOOR_F80,
RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::LOG_F32,
RTLIB::LOG_F64,
RTLIB::LOG_F80,
RTLIB::LOG_F128,
RTLIB::LOG_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::LOG2_F32,
RTLIB::LOG2_F64,
RTLIB::LOG2_F80,
RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::LOG10_F32,
RTLIB::LOG10_F64,
RTLIB::LOG10_F80,
RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -400,48 +414,57 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)),
GetSoftenedFloat(N->getOperand(2)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[3] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType(),
+ N->getOperand(2).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::NEARBYINT_F32,
RTLIB::NEARBYINT_F64,
RTLIB::NEARBYINT_F80,
RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, FNEG can be implemented as native bitwise operations.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
- EVT FloatVT = N->getValueType(ResNo);
+ EVT FloatVT = N->getValueType(0);
if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {
// Expand Y = FNEG(X) -> Y = X ^ sign mask
APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
@@ -452,13 +475,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
GetSoftenedFloat(N->getOperand(0)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, false, dl).first;
+ NVT, Ops, CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -485,7 +509,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -493,15 +520,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
SDValue Op = N->getOperand(0);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
- false, SDLoc(N)).first;
+ CallOptions, SDLoc(N)).first;
if (N->getValueType(0) == MVT::f32)
return Res32;
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -515,20 +545,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
- return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::POW_F32,
RTLIB::POW_F64,
RTLIB::POW_F80,
RTLIB::POW_F128,
RTLIB::POW_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -536,87 +573,111 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
"Unsupported power type!");
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::POWI_F32,
RTLIB::POWI_F64,
RTLIB::POWI_F80,
RTLIB::POWI_F128,
RTLIB::POWI_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::REM_F32,
RTLIB::REM_F64,
RTLIB::REM_F80,
RTLIB::REM_F128,
RTLIB::REM_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::RINT_F32,
RTLIB::RINT_F64,
RTLIB::RINT_F80,
RTLIB::RINT_F128,
RTLIB::RINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::ROUND_F32,
RTLIB::ROUND_F64,
RTLIB::ROUND_F80,
RTLIB::ROUND_F128,
RTLIB::ROUND_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SIN_F32,
RTLIB::SIN_F64,
RTLIB::SIN_F80,
RTLIB::SIN_F128,
RTLIB::SIN_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SQRT_F32,
RTLIB::SQRT_F64,
RTLIB::SQRT_F80,
RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
@@ -625,17 +686,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32,
RTLIB::TRUNC_F64,
RTLIB::TRUNC_F80,
RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
- bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo));
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
LoadSDNode *L = cast<LoadSDNode>(N);
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -666,23 +729,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);
- if (LegalInHWReg)
- return ExtendNode;
return BitConvertToInteger(ExtendNode);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) {
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(1));
SDValue RHS = GetSoftenedFloat(N->getOperand(2));
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), N->getOperand(0), LHS, RHS);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) {
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(2));
SDValue RHS = GetSoftenedFloat(N->getOperand(3));
return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
@@ -736,14 +793,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
// Sign/zero extend the argument if the libcall takes a larger type.
SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
NVT, N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(Signed);
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- Op, Signed, dl).first;
+ Op, CallOptions, dl).first;
}
//===----------------------------------------------------------------------===//
-// Convert Float Operand to Integer for Non-HW-supported Operations.
+// Convert Float Operand to Integer
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
@@ -753,8 +814,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
switch (N->getOpcode()) {
default:
- if (CanSkipSoftenFloatOperand(N, OpNo))
- return false;
#ifndef NDEBUG
dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
@@ -762,11 +821,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
llvm_unreachable("Do not know how to soften this operator's operand!");
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
- case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
- case ISD::FABS: Res = SoftenFloatOp_FABS(N); break;
- case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break;
- case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break;
case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
@@ -776,19 +831,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break;
case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break;
case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break;
- case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
- case ISD::STORE:
- Res = SoftenFloatOp_STORE(N, OpNo);
- // Do not try to analyze or soften this node again if the value is
- // or can be held in a register. In that case, Res.getNode() should
- // be equal to N.
- if (Res.getNode() == N &&
- isLegalInHWReg(N->getOperand(OpNo).getValueType()))
- return false;
- // Otherwise, we need to reanalyze and lower the new Res nodes.
- break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -800,60 +845,16 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
return true;
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
- "Invalid operand expansion");
+ "Invalid operand promotion");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
}
-bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
- if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))
- return false;
-
- // When the operand type can be kept in registers there is nothing to do for
- // the following opcodes.
- switch (N->getOperand(OpNo).getOpcode()) {
- case ISD::BITCAST:
- case ISD::ConstantFP:
- case ISD::CopyFromReg:
- case ISD::CopyToReg:
- case ISD::FABS:
- case ISD::FCOPYSIGN:
- case ISD::FNEG:
- case ISD::Register:
- case ISD::SELECT:
- case ISD::SELECT_CC:
- return true;
- }
-
- switch (N->getOpcode()) {
- case ISD::ConstantFP: // Leaf node.
- case ISD::CopyFromReg: // Operand is a register that we know to be left
- // unchanged by SoftenFloatResult().
- case ISD::Register: // Leaf node.
- return true;
- }
- return false;
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
- return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
- GetSoftenedFloat(N->getOperand(0)));
-}
-
-SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) {
- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
- SDValue Op2 = GetSoftenedFloat(N->getOperand(2));
-
- if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))
- return SDValue();
-
- if (N->getNumOperands() == 3)
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0);
+ SDValue Op0 = GetSoftenedFloat(N->getOperand(0));
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,
- N->getOperand(3)),
- 0);
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
@@ -868,7 +869,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
- return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
}
@@ -885,7 +889,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -895,7 +902,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
EVT VT = NewLHS.getValueType();
NewLHS = GetSoftenedFloat(NewLHS);
NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(2), N->getOperand(3));
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -911,34 +919,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
0);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) {
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
-
- if (Op == N->getOperand(0))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
-}
-
-SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {
- SDValue Op0 = GetSoftenedFloat(N->getOperand(0));
- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
-
- if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0);
-}
-
-SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) {
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
-
- if (Op == N->getOperand(0))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
EVT SVT = N->getOperand(0).getValueType();
@@ -962,23 +942,15 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first;
// Truncate the result if the libcall returns a larger type.
return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) {
- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
- SDValue Op2 = GetSoftenedFloat(N->getOperand(2));
-
- if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2),
- 0);
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
@@ -986,7 +958,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
EVT VT = NewLHS.getValueType();
NewLHS = GetSoftenedFloat(NewLHS);
NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(0), N->getOperand(1));
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -1009,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
EVT VT = NewLHS.getValueType();
NewLHS = GetSoftenedFloat(NewLHS);
NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(0), N->getOperand(1));
// If softenSetCCOperands returned a scalar, use it.
if (!NewRHS.getNode()) {
@@ -1047,13 +1021,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LROUND_F32,
RTLIB::LROUND_F64,
RTLIB::LROUND_F80,
RTLIB::LROUND_F128,
RTLIB::LROUND_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
@@ -1061,13 +1038,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLROUND_F32,
RTLIB::LLROUND_F64,
RTLIB::LLROUND_F80,
RTLIB::LLROUND_F128,
RTLIB::LLROUND_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
@@ -1075,13 +1055,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LRINT_F32,
RTLIB::LRINT_F64,
RTLIB::LRINT_F80,
RTLIB::LRINT_F128,
RTLIB::LRINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
@@ -1089,13 +1072,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLRINT_F32,
RTLIB::LLRINT_F64,
RTLIB::LLRINT_F80,
RTLIB::LLRINT_F128,
RTLIB::LLRINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
//===----------------------------------------------------------------------===//
@@ -1267,13 +1253,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::DIV_F32,
RTLIB::DIV_F64,
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1341,13 +1328,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1355,13 +1343,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1470,13 +1459,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1555,7 +1545,9 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
- Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first;
GetPairElements(Hi, Lo, Hi);
}
@@ -1732,7 +1724,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1741,8 +1734,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
- false, dl).first;
+ CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
@@ -1807,49 +1801,53 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LROUND_F32,
RTLIB::LROUND_F64,
RTLIB::LROUND_F80,
RTLIB::LROUND_F128,
RTLIB::LROUND_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLROUND_F32,
RTLIB::LLROUND_F64,
RTLIB::LLROUND_F80,
RTLIB::LLROUND_F128,
RTLIB::LLROUND_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LRINT_F32,
RTLIB::LRINT_F64,
RTLIB::LRINT_F80,
RTLIB::LRINT_F128,
RTLIB::LRINT_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLRINT_F32,
RTLIB::LLRINT_F64,
RTLIB::LLRINT_F80,
RTLIB::LLRINT_F128,
RTLIB::LLRINT_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
//===----------------------------------------------------------------------===//
@@ -2002,6 +2000,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
dbgs() << "\n");
SDValue R = SDValue();
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ return;
+ }
+
switch (N->getOpcode()) {
// These opcodes cannot appear if promotion of FP16 is done in the backend
// instead of Clang
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 15ac45c37c66..d5c1b539adbd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -100,6 +100,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_BUILD_VECTOR(N); break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ Res = PromoteIntRes_SPLAT_VECTOR(N); break;
case ISD::CONCAT_VECTORS:
Res = PromoteIntRes_CONCAT_VECTORS(N); break;
@@ -112,6 +114,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
@@ -148,9 +152,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
+
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break;
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
+
case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
case ISD::ATOMIC_LOAD:
@@ -494,7 +501,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
NewOpc = ISD::FP_TO_SINT;
- SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+ if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+ NewOpc = ISD::STRICT_FP_TO_SINT;
+
+ SDValue Res;
+ if (N->isStrictFPOpcode()) {
+ Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other },
+ { N->getOperand(0), N->getOperand(1) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ } else
+ Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
@@ -503,7 +523,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:
// before legalization: fp-to-uint16, 65534. -> 0xfffe
// after legalization: fp-to-sint32, 65534. -> 0x0000fffe
- return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+ return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_UINT) ?
ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
DAG.getValueType(N->getValueType(0).getScalarType()));
}
@@ -590,7 +611,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
N->getIndex(), N->getScale() };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
- N->getMemOperand());
+ N->getMemOperand(), N->getIndexType());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -623,48 +644,84 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
- // For promoting iN -> iM, this can be expanded by
- // 1. ANY_EXTEND iN to iM
- // 2. SHL by M-N
- // 3. [US][ADD|SUB]SAT
- // 4. L/ASHR by M-N
+ // If the promoted type is legal, we can convert this to:
+ // 1. ANY_EXTEND iN to iM
+ // 2. SHL by M-N
+ // 3. [US][ADD|SUB]SAT
+ // 4. L/ASHR by M-N
+ // Else it is more efficient to convert this to a min and a max
+ // operation in the higher precision arithmetic.
SDLoc dl(N);
SDValue Op1 = N->getOperand(0);
SDValue Op2 = N->getOperand(1);
unsigned OldBits = Op1.getScalarValueSizeInBits();
unsigned Opcode = N->getOpcode();
- unsigned ShiftOp;
- switch (Opcode) {
- case ISD::SADDSAT:
- case ISD::SSUBSAT:
- ShiftOp = ISD::SRA;
- break;
- case ISD::UADDSAT:
- case ISD::USUBSAT:
- ShiftOp = ISD::SRL;
- break;
- default:
- llvm_unreachable("Expected opcode to be signed or unsigned saturation "
- "addition or subtraction");
- }
-
- SDValue Op1Promoted = GetPromotedInteger(Op1);
- SDValue Op2Promoted = GetPromotedInteger(Op2);
+ SDValue Op1Promoted, Op2Promoted;
+ if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
+ Op1Promoted = ZExtPromotedInteger(Op1);
+ Op2Promoted = ZExtPromotedInteger(Op2);
+ } else {
+ Op1Promoted = SExtPromotedInteger(Op1);
+ Op2Promoted = SExtPromotedInteger(Op2);
+ }
EVT PromotedType = Op1Promoted.getValueType();
unsigned NewBits = PromotedType.getScalarSizeInBits();
- unsigned SHLAmount = NewBits - OldBits;
- EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
- SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
- Op1Promoted =
- DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
- Op2Promoted =
- DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
- SDValue Result =
- DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
- return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+ if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+ unsigned ShiftOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ ShiftOp = ISD::SRA;
+ break;
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ ShiftOp = ISD::SRL;
+ break;
+ default:
+ llvm_unreachable("Expected opcode to be signed or unsigned saturation "
+ "addition or subtraction");
+ }
+
+ unsigned SHLAmount = NewBits - OldBits;
+ EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
+ Op2Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+
+ SDValue Result =
+ DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+ } else {
+ if (Opcode == ISD::USUBSAT) {
+ SDValue Max =
+ DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted);
+ }
+
+ if (Opcode == ISD::UADDSAT) {
+ APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Add =
+ DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
+ }
+
+ unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
+ APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
+ APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Result =
+ DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
+ Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
+ Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
+ return Result;
+ }
}
SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
@@ -673,6 +730,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
SDValue Op1Promoted, Op2Promoted;
bool Signed =
N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;
+ bool Saturating =
+ N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT;
if (Signed) {
Op1Promoted = SExtPromotedInteger(N->getOperand(0));
Op2Promoted = SExtPromotedInteger(N->getOperand(1));
@@ -685,7 +744,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
unsigned DiffSize =
PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();
- bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
if (Saturating) {
// Promoting the operand and result values changes the saturation width,
// which is extends the values that we clamp to on saturation. This could be
@@ -1110,6 +1168,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ Res = PromoteIntOp_SPLAT_VECTOR(N); break;
case ISD::VSELECT:
case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
@@ -1148,7 +1208,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break;
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break;
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
@@ -1339,6 +1400,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
GetPromotedInteger(N->getOperand(0))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) {
+ // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the
+ // operand in place.
+ return SDValue(
+ DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
assert(OpNo == 0 && "Only know how to promote the condition!");
SDValue Cond = N->getOperand(0);
@@ -1454,8 +1522,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
EVT DataVT = N->getValueType(0);
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
} else if (OpNo == 4) {
- // Need to sign extend the index since the bits will likely be used.
- NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ // The Index
+ if (N->isIndexSigned())
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ else
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
@@ -1470,8 +1542,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
EVT DataVT = N->getValue().getValueType();
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
} else if (OpNo == 4) {
- // Need to sign extend the index since the bits will likely be used.
- NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ // The Index
+ if (N->isIndexSigned())
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ else
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
@@ -1715,7 +1791,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break;
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
@@ -2473,7 +2550,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2488,7 +2567,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2514,7 +2594,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT RetVT = N->getValueType(0);
- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2540,7 +2622,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT RetVT = N->getValueType(0);
- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2743,7 +2827,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
}
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first,
Lo, Hi);
}
@@ -2777,38 +2863,53 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
uint64_t Scale = N->getConstantOperandVal(2);
- bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
- EVT BoolVT = getSetCCResultType(VT);
- SDValue Zero = DAG.getConstant(0, dl, VT);
+ bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT ||
+ N->getOpcode() == ISD::UMULFIXSAT);
+ bool Signed = (N->getOpcode() == ISD::SMULFIX ||
+ N->getOpcode() == ISD::SMULFIXSAT);
+
+ // Handle special case when scale is equal to zero.
if (!Scale) {
SDValue Result;
if (!Saturating) {
Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
} else {
- Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ EVT BoolVT = getSetCCResultType(VT);
+ unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO;
+ Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue Product = Result.getValue(0);
SDValue Overflow = Result.getValue(1);
-
- APInt MinVal = APInt::getSignedMinValue(VTSize);
- APInt MaxVal = APInt::getSignedMaxValue(VTSize);
- SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
- Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ if (Signed) {
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ } else {
+ // For unsigned multiplication, we only need to check the max since we
+ // can't really overflow towards zero.
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product);
+ }
}
SplitInteger(Result, Lo, Hi);
return;
}
+ // For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will
+ // cover for unhandled cases below, while still being valid for UMULFIX[SAT].
+ assert(Scale <= VTSize && "Scale can't be larger than the value type size.");
+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue LL, LH, RL, RH;
GetExpandedInteger(LHS, LL, LH);
GetExpandedInteger(RHS, RL, RH);
SmallVector<SDValue, 4> Result;
- bool Signed = (N->getOpcode() == ISD::SMULFIX ||
- N->getOpcode() == ISD::SMULFIXSAT);
unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
@@ -2822,19 +2923,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
"the size of the current value type");
EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- // Shift whole amount by scale.
- SDValue ResultLL = Result[0];
- SDValue ResultLH = Result[1];
- SDValue ResultHL = Result[2];
- SDValue ResultHH = Result[3];
-
- SDValue SatMax, SatMin;
- SDValue NVTZero = DAG.getConstant(0, dl, NVT);
- SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
- EVT BoolNVT = getSetCCResultType(NVT);
-
- // After getting the multplication result in 4 parts, we need to perform a
+ // After getting the multiplication result in 4 parts, we need to perform a
// shift right by the amount of the scale to get the result in that scale.
+ //
// Let's say we multiply 2 64 bit numbers. The resulting value can be held in
// 128 bits that are cut into 4 32-bit parts:
//
@@ -2846,123 +2937,135 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
//
// |NVTSize-|
//
- // The resulting Lo and Hi will only need to be one of these 32-bit parts
- // after shifting.
+ // The resulting Lo and Hi would normally be in LL and LH after the shift. But
+ // to avoid unneccessary shifting of all 4 parts, we can adjust the shift
+ // amount and get Lo and Hi using two funnel shifts. Or for the special case
+ // when Scale is a multiple of NVTSize we can just pick the result without
+ // shifting.
+ uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.
+ if (Scale % NVTSize) {
+ SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy);
+ Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],
+ ShiftAmount);
+ Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],
+ ShiftAmount);
+ } else {
+ Lo = Result[Part0];
+ Hi = Result[Part0 + 1];
+ }
+
+ // Unless saturation is requested we are done. The result is in <Hi,Lo>.
+ if (!Saturating)
+ return;
+
+ // Can not overflow when there is no integer part.
+ if (Scale == VTSize)
+ return;
+
+ // To handle saturation we must check for overflow in the multiplication.
+ //
+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result)
+ // aren't all zeroes.
+ //
+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result)
+ // aren't all ones or all zeroes.
+ //
+ // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
+ // highest bit of HH determines saturation direction in the event of signed
+ // saturation.
+
+ SDValue ResultHL = Result[2];
+ SDValue ResultHH = Result[3];
+
+ SDValue SatMax, SatMin;
+ SDValue NVTZero = DAG.getConstant(0, dl, NVT);
+ SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
+ EVT BoolNVT = getSetCCResultType(NVT);
+
+ if (!Signed) {
+ if (Scale < NVTSize) {
+ // Overflow happened if ((HH | (HL >> Scale)) != 0).
+ SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getConstant(Scale, dl, ShiftTy));
+ SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH);
+ SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE);
+ } else if (Scale == NVTSize) {
+ // Overflow happened if (HH != 0).
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE);
+ } else if (Scale < VTSize) {
+ // Overflow happened if ((HH >> (Scale - NVTSize)) != 0).
+ SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getConstant(Scale - NVTSize, dl,
+ ShiftTy));
+ SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE);
+ } else
+ llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT"
+ "(and saturation can't happen with Scale==VTSize).");
+
+ Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo);
+ return;
+ }
+
if (Scale < NVTSize) {
- // If the scale is less than the size of the VT we expand to, the Hi and
- // Lo of the result will be in the first 2 parts of the result after
- // shifting right. This only requires shifting by the scale as far as the
- // third part in the result (ResultHL).
- SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy);
- SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy);
- Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt);
- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
- DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
-
- // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
- // highest bit of HH determines saturation direction in the event of
- // saturation.
// The number of overflow bits we can check are VTSize - Scale + 1 (we
// include the sign bit). If these top bits are > 0, then we overflowed past
// the max value. If these top bits are < -1, then we overflowed past the
// min value. Otherwise, we did not overflow.
- if (Saturating) {
- unsigned OverflowBits = VTSize - Scale + 1;
- assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
- "Extent of overflow bits must start within HL");
- SDValue HLHiMask = DAG.getConstant(
- APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
- SDValue HLLoMask = DAG.getConstant(
- APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
-
- // HH > 0 or HH == 0 && HL > HLLoMask
- SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
- SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
- SDValue HLPos =
- DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
- SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos));
-
- // HH < -1 or HH == -1 && HL < HLHiMask
- SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
- SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
- SDValue HLNeg =
- DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
- SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg));
- }
+ unsigned OverflowBits = VTSize - Scale + 1;
+ assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
+ "Extent of overflow bits must start within HL");
+ SDValue HLHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
+ SDValue HLLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
+ // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask).
+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT));
+ // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask).
+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT));
} else if (Scale == NVTSize) {
- // If the scales are equal, Lo and Hi are ResultLH and Result HL,
- // respectively. Avoid shifting to prevent undefined behavior.
- Lo = ResultLH;
- Hi = ResultHL;
-
- // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1.
- // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0.
- if (Saturating) {
- SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
- SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
- SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
- SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg));
-
- SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
- SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
- SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
- SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos));
- }
+ // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1).
+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg));
+ // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0).
+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos));
} else if (Scale < VTSize) {
- // If the scale is instead less than the old VT size, but greater than or
- // equal to the expanded VT size, the first part of the result (ResultLL) is
- // no longer a part of Lo because it would be scaled out anyway. Instead we
- // can start shifting right from the fourth part (ResultHH) to the second
- // part (ResultLH), and Result LH will be the new Lo.
- SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy);
- SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy);
- Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
-
// This is similar to the case when we saturate if Scale < NVTSize, but we
- // only need to chech HH.
- if (Saturating) {
- unsigned OverflowBits = VTSize - Scale + 1;
- SDValue HHHiMask = DAG.getConstant(
- APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
- SDValue HHLoMask = DAG.getConstant(
- APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
-
- SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
- SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
- }
- } else if (Scale == VTSize) {
- assert(
- !Signed &&
- "Only unsigned types can have a scale equal to the operand bit width");
-
- Lo = ResultHL;
- Hi = ResultHH;
- } else {
- llvm_unreachable("Expected the scale to be less than or equal to the width "
- "of the operands");
- }
+ // only need to check HH.
+ unsigned OverflowBits = VTSize - Scale + 1;
+ SDValue HHHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
+ SDValue HHLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
+ SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
+ } else
+ llvm_unreachable("Illegal scale for signed fixed point mul.");
- if (Saturating) {
- APInt LHMax = APInt::getSignedMaxValue(NVTSize);
- APInt LLMax = APInt::getAllOnesValue(NVTSize);
- APInt LHMin = APInt::getSignedMinValue(NVTSize);
- Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi);
- Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi);
- Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo);
- Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
- }
+ // Saturate to signed maximum.
+ APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
+ APInt MaxLo = APInt::getAllOnesValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
+ // Saturate to signed minimum.
+ APInt MinHi = APInt::getSignedMinValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
}
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
@@ -3030,7 +3133,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
LC = RTLIB::SDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -3129,7 +3234,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
return;
}
@@ -3217,7 +3324,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
LC = RTLIB::SREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -3373,7 +3482,8 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
LC = RTLIB::UDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -3399,7 +3509,8 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
LC = RTLIB::UREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -3759,7 +3870,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -3924,7 +4037,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
@@ -4033,6 +4148,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue SplatVal = N->getOperand(0);
+
+ assert(!SplatVal.getValueType().isVector() && "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+ EVT NOutElemVT = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal);
+
+ return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
SDLoc dl(N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 14fd5be23ccb..b596c174a287 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -81,7 +81,6 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
SDValue Res(&Node, i);
- EVT VT = Res.getValueType();
bool Failed = false;
// Don't create a value in map.
auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0;
@@ -135,17 +134,13 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
dbgs() << "Unprocessed value in a map!";
Failed = true;
}
- } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) {
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
if (Mapped > 1) {
dbgs() << "Value with legal type was transformed!";
Failed = true;
}
} else {
- // If the value can be kept in HW registers, softening machinery can
- // leave it unchanged and don't put it to any map.
- if (Mapped == 0 &&
- !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat &&
- isLegalInHWReg(VT))) {
+ if (Mapped == 0) {
dbgs() << "Processed value not in any map!";
Failed = true;
} else if (Mapped & (Mapped - 1)) {
@@ -257,13 +252,9 @@ bool DAGTypeLegalizer::run() {
Changed = true;
goto NodeDone;
case TargetLowering::TypeSoftenFloat:
- Changed = SoftenFloatResult(N, i);
- if (Changed)
- goto NodeDone;
- // If not changed, the result type should be legally in register.
- assert(isLegalInHWReg(ResultVT) &&
- "Unchanged SoftenFloatResult should be legal in register!");
- goto ScanOperands;
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
case TargetLowering::TypeExpandFloat:
ExpandFloatResult(N, i);
Changed = true;
@@ -439,15 +430,9 @@ NodeDone:
bool Failed = false;
// Check that all result types are legal.
- // A value type is illegal if its TypeAction is not TypeLegal,
- // and TLI.RegClassForVT does not have a register class for this type.
- // For example, the x86_64 target has f128 that is not TypeLegal,
- // to have softened operators, but it also has FR128 register class to
- // pass and return f128 values. Hence a legalized node can have f128 type.
if (!IgnoreNodeResults(&Node))
for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)
- if (!isTypeLegal(Node.getValueType(i)) &&
- !TLI.isTypeLegal(Node.getValueType(i))) {
+ if (!isTypeLegal(Node.getValueType(i))) {
dbgs() << "Result type " << i << " illegal: ";
Node.dump(&DAG);
Failed = true;
@@ -456,8 +441,7 @@ NodeDone:
// Check that all operand types are legal.
for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)
if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&
- !isTypeLegal(Node.getOperand(i).getValueType()) &&
- !TLI.isTypeLegal(Node.getOperand(i).getValueType())) {
+ !isTypeLegal(Node.getOperand(i).getValueType())) {
dbgs() << "Operand type " << i << " illegal: ";
Node.getOperand(i).dump(&DAG);
Failed = true;
@@ -713,23 +697,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
- // f128 of x86_64 could be kept in SSE registers,
- // but sometimes softened to i128.
- assert((Result.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) ||
- Op.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
"Invalid type for softened float");
AnalyzeNewValue(Result);
auto &OpIdEntry = SoftenedFloats[getTableId(Op)];
- // Allow repeated calls to save f128 type nodes
- // or any node with type that transforms to itself.
- // Many operations on these types are not softened.
- assert(((OpIdEntry == 0) ||
- Op.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
- "Node is already converted to integer!");
+ assert((OpIdEntry == 0) && "Node is already converted to integer!");
OpIdEntry = getTableId(Result);
}
@@ -1003,25 +977,27 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
/// Convert the node into a libcall with the same prototype.
SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
bool isSigned) {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
unsigned NumOps = N->getNumOperands();
SDLoc dl(N);
if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions,
dl).first;
} else if (NumOps == 1) {
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions,
dl).first;
} else if (NumOps == 2) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions,
dl).first;
}
SmallVector<SDValue, 8> Ops(NumOps);
for (unsigned i = 0; i < NumOps; ++i)
Ops[i] = N->getOperand(i);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first;
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first;
}
/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 1d489b1b3a33..4afbae69128a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -73,15 +73,6 @@ private:
return VT.isSimple() && TLI.isTypeLegal(VT);
}
- /// Return true if this type can be passed in registers.
- /// For example, x86_64's f128, should to be legally in registers
- /// and only some operations converted to library calls or integer
- /// bitwise operations.
- bool isLegalInHWReg(EVT VT) const {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- return VT == NVT && isSimpleLegalType(VT);
- }
-
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
@@ -306,6 +297,7 @@ private:
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
@@ -363,6 +355,7 @@ private:
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
@@ -472,14 +465,11 @@ private:
// Float to Integer Conversion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
- /// Given an operand Op of Float type, returns the integer if the Op is not
- /// supported in target HW and converted to the integer.
- /// The integer contains exactly the same bits as Op - only the type changed.
- /// For example, if Op is an f32 which was softened to an i32, then this
- /// method returns an i32, the bits of which coincide with those of Op.
- /// If the Op can be efficiently supported in target HW or the operand must
- /// stay in a register, the Op is not converted to an integer.
- /// In that case, the given op is returned.
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op
SDValue GetSoftenedFloat(SDValue Op) {
TableId Id = getTableId(Op);
auto Iter = SoftenedFloats.find(Id);
@@ -494,19 +484,19 @@ private:
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
- // Convert Float Results to Integer for Non-HW-supported Operations.
- bool SoftenFloatResult(SDNode *N, unsigned ResNo);
+ // Convert Float Results to Integer.
+ void SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
- SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_ConstantFP(SDNode *N);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
- SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
SDValue SoftenFloatRes_FCOS(SDNode *N);
SDValue SoftenFloatRes_FDIV(SDNode *N);
SDValue SoftenFloatRes_FEXP(SDNode *N);
@@ -518,7 +508,7 @@ private:
SDValue SoftenFloatRes_FMA(SDNode *N);
SDValue SoftenFloatRes_FMUL(SDNode *N);
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
- SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
@@ -531,27 +521,17 @@ private:
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
SDValue SoftenFloatRes_FTRUNC(SDNode *N);
- SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
SDValue SoftenFloatRes_UNDEF(SDNode *N);
SDValue SoftenFloatRes_VAARG(SDNode *N);
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
- // Return true if we can skip softening the given operand or SDNode because
- // either it was soften before by SoftenFloatResult and references to the
- // operand were replaced by ReplaceValueWith or it's value type is legal in HW
- // registers and the operand can be left unchanged.
- bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
-
- // Convert Float Operand to Integer for Non-HW-supported Operations.
+ // Convert Float Operand to Integer.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
- SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
- SDValue SoftenFloatOp_FABS(SDNode *N);
- SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
- SDValue SoftenFloatOp_FNEG(SDNode *N);
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
@@ -559,7 +539,6 @@ private:
SDValue SoftenFloatOp_LLROUND(SDNode *N);
SDValue SoftenFloatOp_LRINT(SDNode *N);
SDValue SoftenFloatOp_LLRINT(SDNode *N);
- SDValue SoftenFloatOp_SELECT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -715,6 +694,7 @@ private:
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_VSELECT(SDNode *N);
@@ -830,6 +810,7 @@ private:
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
+ SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_Convert(SDNode *N);
@@ -933,6 +914,8 @@ private:
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVSETCC(const SDNode *N);
+
//===--------------------------------------------------------------------===//
// Generic Expansion: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 943f63f46c47..5562f400b6e1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -52,17 +52,11 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypePromoteFloat:
llvm_unreachable("Bitcast of a promotion-needing float should never need"
"expansion");
- case TargetLowering::TypeSoftenFloat: {
- // Expand the floating point operand only if it was converted to integers.
- // Otherwise, it is a legal type like f128 that can be saved in a register.
- auto SoftenedOp = GetSoftenedFloat(InOp);
- if (isLegalInHWReg(SoftenedOp.getValueType()))
- break;
- SplitInteger(SoftenedOp, Lo, Hi);
+ case TargetLowering::TypeSoftenFloat:
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- }
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat: {
auto &DL = DAG.getDataLayout();
@@ -509,23 +503,6 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
GetSplitOp(Op, Lo, Hi);
}
-static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N,
- SelectionDAG &DAG) {
- SDLoc DL(N);
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- // Split the inputs.
- SDValue Lo, Hi, LL, LH, RL, RH;
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
-
- return std::make_pair(Lo, Hi);
-}
-
void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LL, LH, RL, RH, CL, CH;
SDLoc dl(N);
@@ -537,16 +514,25 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
if (Cond.getValueType().isVector()) {
if (SDValue Res = WidenVSELECTAndMask(N))
std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);
- // It seems to improve code to generate two narrow SETCCs as opposed to
- // splitting a wide result vector.
- else if (Cond.getOpcode() == ISD::SETCC)
- std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG);
// Check if there are already splitted versions of the vector available and
// use those instead of splitting the mask operand again.
else if (getTypeAction(Cond.getValueType()) ==
TargetLowering::TypeSplitVector)
GetSplitVector(Cond, CL, CH);
- else
+ // It seems to improve code to generate two narrow SETCCs as opposed to
+ // splitting a wide result vector.
+ else if (Cond.getOpcode() == ISD::SETCC) {
+ // If the condition is a vXi1 vector, and the LHS of the setcc is a legal
+ // type and the setcc result type is the same vXi1, then leave the setcc
+ // alone.
+ EVT CondLHSVT = Cond.getOperand(0).getValueType();
+ if (Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ isTypeLegal(CondLHSVT) &&
+ getSetCCResultType(CondLHSVT) == Cond.getValueType())
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ else
+ SplitVecRes_SETCC(Cond.getNode(), CL, CH);
+ } else
std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 10b8b705869e..15c3a0b6cfad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
@@ -333,14 +334,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
- // These pseudo-ops get legalized as if they were their non-strict
- // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
- // is also legal, but if ISD::FSQRT requires expansion then so does
- // ISD::STRICT_FSQRT.
- Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getValueType(0));
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ // If we're asked to expand a strict vector floating-point operation,
+ // by default we're going to simply unroll it. That is usually the
+ // best approach, except in the case where the resulting strict (scalar)
+ // operations would themselves use the fallback mutation to non-strict.
+ // In that specific case, just do the fallback on the vector op.
+ if (Action == TargetLowering::Expand &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal) {
+ EVT EltVT = Node->getValueType(0).getVectorElementType();
+ if (TLI.getOperationAction(Node->getOpcode(), EltVT)
+ == TargetLowering::Expand &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
+ == TargetLowering::Legal)
+ Action = TargetLowering::Legal;
+ }
break;
case ISD::ADD:
case ISD::SUB:
@@ -439,16 +453,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: {
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
break;
}
- case ISD::FP_ROUND_INREG:
- Action = TLI.getOperationAction(Node->getOpcode(),
- cast<VTSDNode>(Node->getOperand(1))->getVT());
- break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::VECREDUCE_ADD:
@@ -820,6 +831,13 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::SMULFIX:
case ISD::UMULFIX:
return ExpandFixedPointMul(Op);
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIXSAT:
+ // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
+ // why. Maybe it results in worse codegen compared to the unroll for some
+ // targets? This should probably be investigated. And if we still prefer to
+ // unroll an explanation could be helpful.
+ return DAG.UnrollVectorOp(Op.getNode());
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -844,6 +862,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
return ExpandStrictFPOp(Op);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
@@ -1168,9 +1188,13 @@ SDValue VectorLegalizer::ExpandABS(SDValue Op) {
SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
// Attempt to expand using TargetLowering.
- SDValue Result;
- if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG))
+ SDValue Result, Chain;
+ if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) {
+ if (Op.getNode()->isStrictFPOpcode())
+ // Relink the chain
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
return Result;
+ }
// Otherwise go ahead and unroll.
return DAG.UnrollVectorOp(Op.getNode());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7e4d52617977..3763e886cef2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -52,7 +52,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
- case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
@@ -171,6 +170,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_FP_EXTEND:
R = ScalarizeVecRes_StrictFPOp(N);
break;
@@ -185,6 +186,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
R = ScalarizeVecRes_MULFIX(N);
break;
}
@@ -604,6 +606,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP:
Res = ScalarizeVecOp_UnaryOp(N);
break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
+ break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
break;
@@ -679,6 +685,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
}
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
+/// Do the strict FP operation on the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
+ { N->getValueType(0).getScalarType(), MVT::Other },
+ { N->getOperand(0), Elt });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Ops(N->getNumOperands());
@@ -828,7 +851,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
- case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
@@ -883,7 +905,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FSIN:
@@ -977,6 +1001,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
SplitVecRes_MULFIX(N, Lo, Hi);
break;
}
@@ -1560,10 +1585,14 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
// Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
EVT MemoryVT = MLD->getMemoryVT();
EVT LoMemVT, HiMemVT;
@@ -1622,10 +1651,14 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
// Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
@@ -1651,11 +1684,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
- MMO);
+ MMO, MGT->getIndexType());
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
- MMO);
+ MMO, MGT->getIndexType());
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1979,6 +2012,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
@@ -2293,7 +2328,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
- OpsLo, MMO);
+ OpsLo, MMO, MGT->getIndexType());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
@@ -2303,7 +2338,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
- OpsHi, MMO);
+ OpsHi, MMO, MGT->getIndexType());
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2340,12 +2375,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
else
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- // Split Mask operand
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -2397,12 +2436,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
else
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- // Split Mask operand
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
SDValue IndexHi, IndexLo;
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
@@ -2418,7 +2461,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
- DL, OpsLo, MMO);
+ DL, OpsLo, MMO, N->getIndexType());
MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
@@ -2430,7 +2473,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
// after another.
SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO);
+ DL, OpsHi, MMO, N->getIndexType());
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2596,7 +2639,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
- return PromoteTargetBoolean(Con, N->getValueType(0));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con);
}
@@ -2663,7 +2710,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
- case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
@@ -2719,6 +2765,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryCanTrap(N);
break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ // These are binary operations, but with an extra operand that shouldn't
+ // be widened (the scale).
+ Res = WidenVecRes_BinaryWithExtraScalarOp(N);
+ break;
+
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -2790,6 +2845,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FP_EXTEND:
case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
Res = WidenVecRes_Convert_StrictFP(N);
break;
@@ -2866,6 +2923,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
}
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
+ // Binary op widening, but with an extra operand that shouldn't be widened.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = N->getOperand(2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3,
+ N->getFlags());
+}
+
// Given a vector of operations that have been broken up to widen, see
// if we can collect them together into the next widest legal VT. This
// implementation is trap-safe.
@@ -3716,7 +3784,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
Scale };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
- N->getMemOperand());
+ N->getMemOperand(), N->getIndexType());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -4094,7 +4162,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND:
case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::TRUNCATE:
@@ -4434,7 +4504,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
Scale};
SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
- MG->getMemOperand());
+ MG->getMemOperand(), MG->getIndexType());
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
return SDValue();
@@ -4472,7 +4542,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
MSC->getMemoryVT(), SDLoc(N), Ops,
- MSC->getMemOperand());
+ MSC->getMemOperand(), MSC->getIndexType());
}
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
@@ -4504,7 +4574,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- return PromoteTargetBoolean(CC, VT);
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, dl, VT, CC);
}
SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
@@ -4706,7 +4779,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth;
- unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads.
+ unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads.
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 2cb850fa1a3d..7ee44c808fcb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -498,7 +498,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
} else
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 34b4c8502353..ff806bdb822c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1188,6 +1188,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (!Pred.isArtificial())
AddPredQueued(NewSU, Pred);
+ // Make sure the clone comes after the original. (InstrEmitter assumes
+ // this ordering.)
+ AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
+
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
@@ -1374,7 +1378,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
} else
@@ -2358,7 +2362,7 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) {
PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
unsigned Reg =
cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
RetVal = true;
continue;
}
@@ -2379,7 +2383,7 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
unsigned Reg =
cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
RetVal = true;
continue;
}
@@ -2948,8 +2952,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyToReg &&
- TargetRegisterInfo::isVirtualRegister
- (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ Register::isVirtualRegister(
+ cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
SDNode *PredFrameSetup = nullptr;
@@ -2995,8 +2999,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
- TargetRegisterInfo::isVirtualRegister
- (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ Register::isVirtualRegister(
+ cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
// Perform checks on the successors of PredSU.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 568c6191e512..d4c1fb36475e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -115,7 +115,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
return;
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return;
unsigned ResNo = User->getOperand(2).getResNo();
@@ -528,7 +528,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// glued together nodes with a single SUnit.
-void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) {
// Cluster certain nodes which should be scheduled together.
ClusterNodes();
// Populate the SUnits array.
@@ -656,7 +656,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
!BB->succ_empty()) {
unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
// This copy is a liveout value. It is likely coalesced, so reduce the
// latency so not to penalize the def.
// FIXME: need target specific adjustment here?
@@ -808,7 +808,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
} else {
// Copy from physical register.
assert(I->getReg() && "Unknown physical register!");
- unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
(void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
@@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
+
+ if (MDNode *MD = DAG->getHeapAllocSite(N)) {
+ if (NewInsn && NewInsn->isCall())
+ MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ }
+
GluedNodes.pop_back();
}
auto NewInsn =
@@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (HasDbg)
ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
NewInsn);
+ if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
+ if (NewInsn && NewInsn->isCall())
+ MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ }
}
// Insert all the dbg_values which have not already been inserted in source
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5163b4fa4fd3..183ce4b0652d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -26,6 +26,7 @@
namespace llvm {
+class AAResults;
class InstrItineraryData;
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
@@ -93,7 +94,7 @@ class InstrItineraryData;
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
- void BuildSchedGraph(AliasAnalysis *AA);
+ void BuildSchedGraph(AAResults *AA);
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
///
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index ab06b55b49fd..e7bac73678a7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -63,14 +63,13 @@ private:
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
- /// AA - AliasAnalysis for making memory reference queries.
- AliasAnalysis *AA;
+ /// AA - AAResults for making memory reference queries.
+ AAResults *AA;
public:
- ScheduleDAGVLIW(MachineFunction &mf,
- AliasAnalysis *aa,
+ ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa,
SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
const TargetSubtargetInfo &STI = mf.getSubtarget();
HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5852e693fa9f..52a71b91d93f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -859,9 +859,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
break;
case ISD::TargetExternalSymbol: {
ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
- Erased = TargetExternalSymbols.erase(
- std::pair<std::string,unsigned char>(ESN->getSymbol(),
- ESN->getTargetFlags()));
+ Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>(
+ ESN->getSymbol(), ESN->getTargetFlags()));
break;
}
case ISD::MCSymbol: {
@@ -1084,6 +1083,7 @@ void SelectionDAG::clear() {
ExternalSymbols.clear();
TargetExternalSymbols.clear();
MCSymbols.clear();
+ SDCallSiteDbgInfo.clear();
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
static_cast<CondCodeSDNode*>(nullptr));
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
@@ -1353,7 +1353,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
EVT VT, int64_t Offset, bool isTargetGA,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTargetGA) &&
"Cannot set target flags on target-independent globals");
@@ -1400,7 +1400,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
}
SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent jump tables");
unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
@@ -1421,7 +1421,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
@@ -1449,7 +1449,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
@@ -1473,7 +1473,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
}
SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);
ID.AddInteger(Index);
@@ -1535,10 +1535,9 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
}
SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
SDNode *&N =
- TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
- TargetFlags)];
+ TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)];
if (N) return SDValue(N, 0);
N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);
InsertNode(N);
@@ -1802,9 +1801,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
}
SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
- int64_t Offset,
- bool isTarget,
- unsigned char TargetFlags) {
+ int64_t Offset, bool isTarget,
+ unsigned TargetFlags) {
unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
FoldingSetNodeID ID;
@@ -1900,20 +1898,19 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) {
EVT VT = Node->getValueType(0);
SDValue Tmp1 = Node->getOperand(0);
SDValue Tmp2 = Node->getOperand(1);
- unsigned Align = Node->getConstantOperandVal(3);
+ const MaybeAlign MA(Node->getConstantOperandVal(3));
SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1,
Tmp2, MachinePointerInfo(V));
SDValue VAList = VAListLoad;
- if (Align > TLI.getMinStackArgumentAlignment()) {
- assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
-
+ if (MA && *MA > TLI.getMinStackArgumentAlignment()) {
VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
- getConstant(Align - 1, dl, VAList.getValueType()));
+ getConstant(MA->value() - 1, dl, VAList.getValueType()));
- VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
- getConstant(-(int64_t)Align, dl, VAList.getValueType()));
+ VAList =
+ getNode(ISD::AND, dl, VAList.getValueType(), VAList,
+ getConstant(-(int64_t)MA->value(), dl, VAList.getValueType()));
}
// Increment the pointer, VAList, to the next vaarg
@@ -2154,12 +2151,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
}
case ISD::OR:
case ISD::XOR:
- // If the LHS or RHS don't contribute bits to the or, drop them.
- if (MaskedValueIsZero(V.getOperand(0), DemandedBits))
- return V.getOperand(1);
- if (MaskedValueIsZero(V.getOperand(1), DemandedBits))
- return V.getOperand(0);
- break;
+ case ISD::SIGN_EXTEND_INREG:
+ return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
+ *this, 0);
case ISD::SRL:
// Only look at single-use SRLs.
if (!V.getNode()->hasOneUse())
@@ -2203,15 +2197,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
break;
}
- case ISD::SIGN_EXTEND_INREG:
- EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT();
- unsigned ExVTBits = ExVT.getScalarSizeInBits();
-
- // If none of the extended bits are demanded, eliminate the sextinreg.
- if (DemandedBits.getActiveBits() <= ExVTBits)
- return V.getOperand(0);
-
- break;
}
return SDValue();
}
@@ -2395,15 +2380,39 @@ SDValue SelectionDAG::getSplatValue(SDValue V) {
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
/// is less than the element bit-width of the shift node, return it.
static const APInt *getValidShiftAmountConstant(SDValue V) {
+ unsigned BitWidth = V.getScalarValueSizeInBits();
if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
// Shifting more than the bitwidth is not valid.
const APInt &ShAmt = SA->getAPIntValue();
- if (ShAmt.ult(V.getScalarValueSizeInBits()))
+ if (ShAmt.ult(BitWidth))
return &ShAmt;
}
return nullptr;
}
+/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
+/// than the element bit-width of the shift node, return the minimum value.
+static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {
+ unsigned BitWidth = V.getScalarValueSizeInBits();
+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
+ if (!BV)
+ return nullptr;
+ const APInt *MinShAmt = nullptr;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!SA)
+ return nullptr;
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.uge(BitWidth))
+ return nullptr;
+ if (MinShAmt && MinShAmt->ule(ShAmt))
+ continue;
+ MinShAmt = &ShAmt;
+ }
+ return MinShAmt;
+}
+
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. For vectors, the known bits are those that are shared by
/// every vector element.
@@ -2437,7 +2446,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
return Known;
}
- if (Depth == 6)
+ if (Depth >= MaxRecursionDepth)
return Known; // Limit search depth.
KnownBits Known2;
@@ -2582,14 +2591,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
SDValue Src = Op.getOperand(0);
ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
- } else {
- Known = computeKnownBits(Src, Depth + 1);
+ DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
}
+ Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
break;
}
case ISD::SCALAR_TO_VECTOR: {
@@ -2800,25 +2808,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.One.lshrInPlace(Shift);
// High bits are known zero.
Known.Zero.setHighBits(Shift);
- } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) {
- // If the shift amount is a vector of constants see if we can bound
- // the number of upper zero bits.
- unsigned ShiftAmountMin = BitWidth;
- for (unsigned i = 0; i != BV->getNumOperands(); ++i) {
- if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) {
- const APInt &ShAmt = C->getAPIntValue();
- if (ShAmt.ult(BitWidth)) {
- ShiftAmountMin = std::min<unsigned>(ShiftAmountMin,
- ShAmt.getZExtValue());
- continue;
- }
- }
- // Don't know anything.
- ShiftAmountMin = 0;
- break;
- }
-
- Known.Zero.setHighBits(ShiftAmountMin);
+ } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) {
+ // Minimum shift high bits are known zero.
+ Known.Zero.setHighBits(ShMinAmt->getZExtValue());
}
break;
case ISD::SRA:
@@ -3105,12 +3097,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// If the first operand is non-negative or has all low bits zero, then
// the upper bits are all zero.
- if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits))
+ if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
Known.Zero |= ~LowBits;
// If the first operand is negative and not all low bits are zero, then
// the upper bits are all one.
- if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0))
+ if (Known2.isNegative() && LowBits.intersects(Known2.One))
Known.One |= ~LowBits;
assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");
}
@@ -3427,7 +3419,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Val.getNumSignBits();
}
- if (Depth == 6)
+ if (Depth >= MaxRecursionDepth)
return 1; // Limit search depth.
if (!DemandedElts)
@@ -3729,6 +3721,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp == 1) return 1; // Early out.
return std::min(Tmp, Tmp2)-1;
+ case ISD::MUL: {
+ // The output of the Mul can be at most twice the valid bits in the inputs.
+ unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (SignBitsOp0 == 1)
+ break;
+ unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ if (SignBitsOp1 == 1)
+ break;
+ unsigned OutValidBits =
+ (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
+ return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
+ }
case ISD::TRUNCATE: {
// Check if the sign bits of source go down as far as the truncated value.
unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
@@ -3817,13 +3821,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
SDValue Src = Op.getOperand(0);
ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
+ DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
}
- return ComputeNumSignBits(Src, Depth + 1);
+ return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
}
case ISD::CONCAT_VECTORS: {
// Determine the minimum number of sign bits across all demanded
@@ -3976,7 +3980,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
return true;
- if (Depth == 6)
+ if (Depth >= MaxRecursionDepth)
return false; // Limit search depth.
// TODO: Handle vectors.
@@ -4645,7 +4649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getUNDEF(VT);
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
+ if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
OpOpcode == ISD::FSUB)
return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
Operand.getOperand(0), Flags);
@@ -5156,22 +5160,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N2C && N2C->isNullValue())
return N1;
break;
- case ISD::FP_ROUND_INREG: {
- EVT EVT = cast<VTSDNode>(N2)->getVT();
- assert(VT == N1.getValueType() && "Not an inreg round!");
- assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
- "Cannot FP_ROUND_INREG integer types");
- assert(EVT.isVector() == VT.isVector() &&
- "FP_ROUND_INREG type should be vector iff the operand "
- "type is vector!");
- assert((!EVT.isVector() ||
- EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
- "Vector element counts must match in FP_ROUND_INREG");
- assert(EVT.bitsLE(VT) && "Not rounding down!");
- (void)EVT;
- if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
- break;
- }
case ISD::FP_ROUND:
assert(VT.isFloatingPoint() &&
N1.getValueType().isFloatingPoint() &&
@@ -5382,7 +5370,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
std::swap(N1, N2);
} else {
switch (Opcode) {
- case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG:
case ISD::SUB:
return getUNDEF(VT); // fold op(undef, arg2) -> undef
@@ -5770,7 +5757,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- uint64_t Size, unsigned Align,
+ uint64_t Size, unsigned Alignment,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
@@ -5795,15 +5782,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
- if (Align > SrcAlign)
- SrcAlign = Align;
+ if (Alignment > SrcAlign)
+ SrcAlign = Alignment;
ConstantDataArraySlice Slice;
bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
if (!TLI.findOptimalMemOpLowering(
- MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align),
+ MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment),
(isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
/*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
/*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
@@ -5818,15 +5805,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
- while (NewAlign > Align &&
- DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign /= 2;
+ while (NewAlign > Alignment &&
+ DL.exceedsNaturalStackAlignment(Align(NewAlign)))
+ NewAlign /= 2;
- if (NewAlign > Align) {
+ if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
- Align = NewAlign;
+ Alignment = NewAlign;
}
}
@@ -5869,10 +5856,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
}
Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
if (Value.getNode()) {
- Store = DAG.getStore(Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Align,
- MMOFlags);
+ Store = DAG.getStore(
+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
OutChains.push_back(Store);
}
}
@@ -5900,7 +5886,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags);
OutStoreChains.push_back(Store);
}
SrcOff += VTSize;
@@ -6567,7 +6553,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
- MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) {
+ MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
@@ -6619,7 +6605,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
createOperands(N, Ops);
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
@@ -7022,14 +7010,15 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
- MachineMemOperand *MMO) {
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
- dl.getIROrder(), VTs, VT, MMO));
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7038,7 +7027,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
}
auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO);
+ VTs, VT, MMO, IndexType);
createOperands(N, Ops);
assert(N->getPassThru().getValueType() == N->getValueType(0) &&
@@ -7062,14 +7051,15 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
- MachineMemOperand *MMO) {
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
- dl.getIROrder(), VTs, VT, MMO));
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7077,7 +7067,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO);
+ VTs, VT, MMO, IndexType);
createOperands(N, Ops);
assert(N->getMask().getValueType().getVectorNumElements() ==
@@ -7766,16 +7756,22 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break;
case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break;
case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break;
+ case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break;
+ case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break;
case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break;
case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break;
case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break;
+ case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break;
+ case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break;
case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break;
case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break;
case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break;
+ case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break;
+ case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break;
}
assert(Node->getNumValues() == 2 && "Unexpected number of results!");
@@ -7925,6 +7921,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
CSEMap.InsertNode(N, IP);
InsertNode(N);
+ NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this);
return N;
}
@@ -8619,7 +8616,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
// TokenFactor.
SDValue OldChain = SDValue(OldLoad, 1);
SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
- if (!OldLoad->hasAnyUseOfValue(1))
+ if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1))
return NewChain;
SDValue TokenFactor =
@@ -8812,7 +8809,7 @@ HandleSDNode::~HandleSDNode() {
GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
const DebugLoc &DL,
const GlobalValue *GA, EVT VT,
- int64_t o, unsigned char TF)
+ int64_t o, unsigned TF)
: SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
TheGlobal = GA;
}
@@ -8986,7 +8983,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
// Loads don't have side effects, look through them.
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
- if (!Ld->isVolatile())
+ if (Ld->isUnordered())
return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
}
return false;
@@ -9005,21 +9002,51 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
SDValue
SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
- ArrayRef<ISD::NodeType> CandidateBinOps) {
+ ArrayRef<ISD::NodeType> CandidateBinOps,
+ bool AllowPartials) {
// The pattern must end in an extract from index 0.
if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isNullConstant(Extract->getOperand(1)))
return SDValue();
- SDValue Op = Extract->getOperand(0);
- unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
-
// Match against one of the candidate binary ops.
+ SDValue Op = Extract->getOperand(0);
if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
return Op.getOpcode() == unsigned(BinOp);
}))
return SDValue();
+ // Floating-point reductions may require relaxed constraints on the final step
+ // of the reduction because they may reorder intermediate operations.
+ unsigned CandidateBinOp = Op.getOpcode();
+ if (Op.getValueType().isFloatingPoint()) {
+ SDNodeFlags Flags = Op->getFlags();
+ switch (CandidateBinOp) {
+ case ISD::FADD:
+ if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation())
+ return SDValue();
+ break;
+ default:
+ llvm_unreachable("Unhandled FP opcode for binop reduction");
+ }
+ }
+
+ // Matching failed - attempt to see if we did enough stages that a partial
+ // reduction from a subvector is possible.
+ auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) {
+ if (!AllowPartials || !Op)
+ return SDValue();
+ EVT OpVT = Op.getValueType();
+ EVT OpSVT = OpVT.getScalarType();
+ EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts);
+ if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0))
+ return SDValue();
+ BinOp = (ISD::NodeType)CandidateBinOp;
+ return getNode(
+ ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op,
+ getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout())));
+ };
+
// At each stage, we're looking for something that looks like:
// %s = shufflevector <8 x i32> %op, <8 x i32> undef,
// <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
@@ -9030,10 +9057,16 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
// <4,5,6,7,u,u,u,u>
// <2,3,u,u,u,u,u,u>
// <1,u,u,u,u,u,u,u>
- unsigned CandidateBinOp = Op.getOpcode();
+ // While a partial reduction match would be:
+ // <2,3,u,u,u,u,u,u>
+ // <1,u,u,u,u,u,u,u>
+ unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
+ SDValue PrevOp;
for (unsigned i = 0; i < Stages; ++i) {
+ unsigned MaskEnd = (1 << i);
+
if (Op.getOpcode() != CandidateBinOp)
- return SDValue();
+ return PartialReduction(PrevOp, MaskEnd);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -9049,12 +9082,14 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
// The first operand of the shuffle should be the same as the other operand
// of the binop.
if (!Shuffle || Shuffle->getOperand(0) != Op)
- return SDValue();
+ return PartialReduction(PrevOp, MaskEnd);
// Verify the shuffle has the expected (at this stage of the pyramid) mask.
- for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
- if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
- return SDValue();
+ for (int Index = 0; Index < (int)MaskEnd; ++Index)
+ if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index))
+ return PartialReduction(PrevOp, MaskEnd);
+
+ PrevOp = Op;
}
BinOp = (ISD::NodeType)CandidateBinOp;
@@ -9114,8 +9149,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
getShiftAmountOperand(Operands[0].getValueType(),
Operands[1])));
break;
- case ISD::SIGN_EXTEND_INREG:
- case ISD::FP_ROUND_INREG: {
+ case ISD::SIGN_EXTEND_INREG: {
EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
Operands[0],
@@ -9187,6 +9221,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
int Dist) const {
if (LD->isVolatile() || Base->isVolatile())
return false;
+ // TODO: probably too restrictive for atomics, revisit
+ if (!LD->isSimple())
+ return false;
if (LD->isIndexed() || Base->isIndexed())
return false;
if (LD->getChain() != Base->getChain())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 9592bc30a4e1..3a53ab9717a4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
#include <cstdint>
using namespace llvm;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e818dd27c05e..8c15563fcd23 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -833,7 +833,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// If the source register was virtual and if we know something about it,
// add an assert node.
- if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ if (!Register::isVirtualRegister(Regs[Part + i]) ||
!RegisterVT.isInteger())
continue;
@@ -948,8 +948,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
if (HasMatching)
Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
- else if (!Regs.empty() &&
- TargetRegisterInfo::isVirtualRegister(Regs.front())) {
+ else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
// Put the register class of the virtual registers in the flag word. That
// way, later passes can recompute register class constraints for inline
// assembly as well as normal instructions.
@@ -1810,7 +1809,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// offsets to its parts don't wrap either.
SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
- SDValue Val = RetOp.getValue(i);
+ SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
@@ -2263,7 +2262,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
Instruction::BinaryOps Opcode = BOp->getOpcode();
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
- !I.getMetadata(LLVMContext::MD_unpredictable) &&
+ !I.hasMetadata(LLVMContext::MD_unpredictable) &&
(Opcode == Instruction::And || Opcode == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
@@ -2600,9 +2599,11 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
void
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setDiscardResult(true);
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
- None, false, getCurSDLoc(), false, false).second;
+ None, CallOptions, getCurSDLoc()).second;
// On PS4, the "return address" must still be within the calling function,
// even if it's at the very end, so emit an explicit TRAP here.
// Passing 'true' for doesNotReturn above won't generate the trap for us.
@@ -2618,24 +2619,18 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
- // Subtract the minimum value
+ // Subtract the minimum value.
SDValue SwitchOp = getValue(B.SValue);
EVT VT = SwitchOp.getValueType();
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
- DAG.getConstant(B.First, dl, VT));
-
- // Check range
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue RangeCmp = DAG.getSetCC(
- dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
+ SDValue RangeSub =
+ DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));
// Determine the type of the test operands.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool UsePtrType = false;
- if (!TLI.isTypeLegal(VT))
+ if (!TLI.isTypeLegal(VT)) {
UsePtrType = true;
- else {
+ } else {
for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
// Switch table case range are encoded into series of masks.
@@ -2644,6 +2639,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
break;
}
}
+ SDValue Sub = RangeSub;
if (UsePtrType) {
VT = TLI.getPointerTy(DAG.getDataLayout());
Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
@@ -2655,20 +2651,29 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ if (!B.OmitRangeCheck)
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
- SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
- MVT::Other, CopyTo, RangeCmp,
- DAG.getBasicBlock(B.Default));
+ SDValue Root = CopyTo;
+ if (!B.OmitRangeCheck) {
+ // Conditional branch to the default block.
+ SDValue RangeCmp = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ RangeSub.getValueType()),
+ RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
+ ISD::SETUGT);
+
+ Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+ }
// Avoid emitting unnecessary branches to the next block.
if (MBB != NextBlock(SwitchBB))
- BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
- DAG.getBasicBlock(MBB));
+ Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));
- DAG.setRoot(BrRange);
+ DAG.setRoot(Root);
}
/// visitBitTestCase - this function produces one "bit test"
@@ -3266,8 +3271,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
// We care about the legality of the operation after it has been type
// legalized.
- while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
- VT != TLI.getTypeToTransformTo(Ctx, VT))
+ while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
VT = TLI.getTypeToTransformTo(Ctx, VT);
// If the vselect is legal, assume we want to leave this as a vector setcc +
@@ -3534,17 +3538,32 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
+ Constant *MaskV = cast<Constant>(I.getOperand(2));
SDLoc DL = getCurSDLoc();
-
- SmallVector<int, 8> Mask;
- ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
- unsigned MaskNumElts = Mask.size();
-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ if (MaskV->isNullValue() && VT.isScalableVector()) {
+ // Canonical splat form of first element of first input vector.
+ SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ SrcVT.getScalarType(), Src1,
+ DAG.getConstant(0, DL,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
+ return;
+ }
+
+ // For now, we only handle splats for scalable vectors.
+ // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
+ // for targets that support a SPLAT_VECTOR for non-scalable vector types.
+ assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
+
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(MaskV, Mask);
+ unsigned MaskNumElts = Mask.size();
+
if (SrcNumElts == MaskNumElts) {
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
return;
@@ -3825,7 +3844,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
unsigned VectorWidth = I.getType()->isVectorTy() ?
- cast<VectorType>(I.getType())->getVectorNumElements() : 0;
+ I.getType()->getVectorNumElements() : 0;
if (VectorWidth && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
@@ -3858,12 +3877,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
- const auto *CI = dyn_cast<ConstantInt>(Idx);
- if (!CI && isa<ConstantDataVector>(Idx) &&
- cast<ConstantDataVector>(Idx)->getSplatValue())
- CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
+ const auto *C = dyn_cast<Constant>(Idx);
+ if (C && isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
- if (CI) {
+ if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
if (CI->isZero())
continue;
APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
@@ -3872,7 +3890,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
DAG.getConstant(Offs, dl, IdxTy);
- // In an inbouds GEP with an offset that is nonnegative even when
+ // In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
@@ -4002,8 +4020,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
- bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+ bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal);
+ bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load);
bool isDereferenceable =
isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());
unsigned Alignment = I.getAlignment();
@@ -4118,7 +4136,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SDValue Src = getValue(SrcV);
// Create a virtual register, then update the virtual register.
- unsigned VReg =
+ Register VReg =
SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
// Chain can be getRoot or getControlRoot.
@@ -4132,8 +4150,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
"call visitLoadFromSwiftError when backend supports swifterror");
assert(!I.isVolatile() &&
- I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
- I.getMetadata(LLVMContext::MD_invariant_load) == nullptr &&
+ !I.hasMetadata(LLVMContext::MD_nontemporal) &&
+ !I.hasMetadata(LLVMContext::MD_invariant_load) &&
"Support volatile, non temporal, invariant for load_from_swift_error");
const Value *SV = I.getOperand(0);
@@ -4209,7 +4227,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
auto MMOFlags = MachineMemOperand::MONone;
if (I.isVolatile())
MMOFlags |= MachineMemOperand::MOVolatile;
- if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
+ if (I.hasMetadata(LLVMContext::MD_nontemporal))
MMOFlags |= MachineMemOperand::MONonTemporal;
MMOFlags |= TLI.getMMOFlags(I);
@@ -4309,8 +4327,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
// are looking for. If first operand of the GEP is a splat vector - we
// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
-static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
- SDValue &Scale, SelectionDAGBuilder* SDB) {
+static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
+ ISD::MemIndexType &IndexType, SDValue &Scale,
+ SelectionDAGBuilder *SDB) {
SelectionDAG& DAG = SDB->DAG;
LLVMContext &Context = *DAG.getContext();
@@ -4330,8 +4349,13 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
// Ensure all the other indices are 0.
for (unsigned i = 1; i < FinalIndex; ++i) {
- auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!C || !C->isZero())
+ auto *C = dyn_cast<Constant>(GEP->getOperand(i));
+ if (!C)
+ return false;
+ if (isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
+ auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (!CI || !CI->isZero())
return false;
}
@@ -4346,6 +4370,7 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
SDB->getCurSDLoc(), TLI.getPointerTy(DL));
Base = SDB->getValue(Ptr);
Index = SDB->getValue(IndexVal);
+ IndexType = ISD::SIGNED_SCALED;
if (!Index.getValueType().isVector()) {
unsigned GEPWidth = GEP->getType()->getVectorNumElements();
@@ -4373,9 +4398,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Base;
SDValue Index;
+ ISD::MemIndexType IndexType;
SDValue Scale;
const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
+ this);
const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -4385,11 +4412,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
- Ops, MMO);
+ Ops, MMO, IndexType);
DAG.setRoot(Scatter);
setValue(&I, Scatter);
}
@@ -4476,9 +4504,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Root = DAG.getRoot();
SDValue Base;
SDValue Index;
+ ISD::MemIndexType IndexType;
SDValue Scale;
const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
+ this);
bool ConstantMemory = false;
if (UniformBase && AA &&
AA->pointsToConstantMemory(
@@ -4500,11 +4530,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
- Ops, MMO);
+ Ops, MMO, IndexType);
SDValue OutChain = Gather.getValue(1);
if (!ConstantMemory)
@@ -4628,7 +4659,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
auto Flags = MachineMemOperand::MOLoad;
if (I.isVolatile())
Flags |= MachineMemOperand::MOVolatile;
- if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+ if (I.hasMetadata(LLVMContext::MD_invariant_load))
Flags |= MachineMemOperand::MOInvariant;
if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),
DAG.getDataLayout()))
@@ -4645,9 +4676,27 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
- SDValue L =
- DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
- getValue(I.getPointerOperand()), MMO);
+
+ SDValue Ptr = getValue(I.getPointerOperand());
+
+ if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
+ // TODO: Once this is better exercised by tests, it should be merged with
+ // the normal path for loads to prevent future divergence.
+ SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
+ if (MemVT != VT)
+ L = DAG.getPtrExtOrTrunc(L, dl, VT);
+
+ setValue(&I, L);
+ SDValue OutChain = L.getValue(1);
+ if (!I.isUnordered())
+ DAG.setRoot(OutChain);
+ else
+ PendingLoads.push_back(OutChain);
+ return;
+ }
+
+ SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
+ Ptr, MMO);
SDValue OutChain = L.getValue(1);
if (MemVT != VT)
@@ -4686,9 +4735,17 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDValue Val = getValue(I.getValueOperand());
if (Val.getValueType() != MemVT)
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
+ SDValue Ptr = getValue(I.getPointerOperand());
+ if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
+ // TODO: Once this is better exercised by tests, it should be merged with
+ // the normal path for stores to prevent future divergence.
+ SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
+ DAG.setRoot(S);
+ return;
+ }
SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
- getValue(I.getPointerOperand()), Val, MMO);
+ Ptr, Val, MMO);
DAG.setRoot(OutChain);
@@ -4731,8 +4788,22 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- SDValue Op = getValue(I.getArgOperand(i));
- Ops.push_back(Op);
+ const Value *Arg = I.getArgOperand(i);
+ if (!I.paramHasAttr(i, Attribute::ImmArg)) {
+ Ops.push_back(getValue(Arg));
+ continue;
+ }
+
+ // Use TargetConstant instead of a regular constant for immarg.
+ EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
+ assert(CI->getBitWidth() <= 64 &&
+ "large intrinsic immediates not handled");
+ Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
+ } else {
+ Ops.push_back(
+ DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
+ }
}
SmallVector<EVT, 4> ValueVTs;
@@ -4749,10 +4820,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// This is target intrinsic that touches memory
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- Result =
- DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.flags, Info.size, AAInfo);
+ Result = DAG.getMemIntrinsicNode(
+ Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -4918,12 +4989,11 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
// Put the exponent in the right bit position for later addition to the
// final result:
//
- // #define LOG2OFe 1.4426950f
- // t0 = Op * LOG2OFe
+ // t0 = Op * log2(e)
// TODO: What fast-math-flags should be set here?
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
- getF32Constant(DAG, 0x3fb8aa3b, dl));
+ DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
return getLimitedPrecisionExp2(t0, dl, DAG);
}
@@ -4941,10 +5011,11 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
- // Scale the exponent by log(2) [0.69314718f].
+ // Scale the exponent by log(2).
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
- SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
- getF32Constant(DAG, 0x3f317218, dl));
+ SDValue LogOfExponent =
+ DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));
// Get the significand and build it into a floating-point number with
// exponent of 1.
@@ -5311,19 +5382,32 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
-// getUnderlyingArgReg - Find underlying register used for a truncated or
-// bitcasted argument.
-static unsigned getUnderlyingArgReg(const SDValue &N) {
+// getUnderlyingArgRegs - Find underlying registers used for a truncated,
+// bitcasted, or split argument. Returns a list of <Register, size in bits>
+static void
+getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
+ const SDValue &N) {
switch (N.getOpcode()) {
- case ISD::CopyFromReg:
- return cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ case ISD::CopyFromReg: {
+ SDValue Op = N.getOperand(1);
+ Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
+ Op.getValueType().getSizeInBits());
+ return;
+ }
case ISD::BITCAST:
case ISD::AssertZext:
case ISD::AssertSext:
case ISD::TRUNCATE:
- return getUnderlyingArgReg(N.getOperand(0));
+ getUnderlyingArgRegs(Regs, N.getOperand(0));
+ return;
+ case ISD::BUILD_PAIR:
+ case ISD::BUILD_VECTOR:
+ case ISD::CONCAT_VECTORS:
+ for (SDValue Op : N->op_values())
+ getUnderlyingArgRegs(Regs, Op);
+ return;
default:
- return 0;
+ return;
}
}
@@ -5412,11 +5496,16 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (FI != std::numeric_limits<int>::max())
Op = MachineOperand::CreateFI(FI);
+ SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes;
if (!Op && N.getNode()) {
- unsigned Reg = getUnderlyingArgReg(N);
- if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ getUnderlyingArgRegs(ArgRegsAndSizes, N);
+ Register Reg;
+ if (ArgRegsAndSizes.size() == 1)
+ Reg = ArgRegsAndSizes.front().first;
+
+ if (Reg && Reg.isVirtual()) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+ Register PR = RegInfo.getLiveInPhysReg(Reg);
if (PR)
Reg = PR;
}
@@ -5436,29 +5525,42 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
if (!Op) {
+ // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
+ auto splitMultiRegDbgValue
+ = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {
+ unsigned Offset = 0;
+ for (auto RegAndSize : SplitRegs) {
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, RegAndSize.second);
+ if (!FragmentExpr)
+ continue;
+ assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+ RegAndSize.first, Variable, *FragmentExpr));
+ Offset += RegAndSize.second;
+ }
+ };
+
// Check if ValueMap has reg number.
- DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ DenseMap<const Value *, unsigned>::const_iterator
+ VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
V->getType(), getABIRegCopyCC(V));
if (RFV.occupiesMultipleRegs()) {
- unsigned Offset = 0;
- for (auto RegAndSize : RFV.getRegsAndSizes()) {
- Op = MachineOperand::CreateReg(RegAndSize.first, false);
- auto FragmentExpr = DIExpression::createFragmentExpression(
- Expr, Offset, RegAndSize.second);
- if (!FragmentExpr)
- continue;
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
- Op->getReg(), Variable, *FragmentExpr));
- Offset += RegAndSize.second;
- }
+ splitMultiRegDbgValue(RFV.getRegsAndSizes());
return true;
}
+
Op = MachineOperand::CreateReg(VMI->second, false);
IsIndirect = IsDbgDeclare;
+ } else if (ArgRegsAndSizes.size() > 1) {
+ // This was split due to the calling convention, and no virtual register
+ // mapping exists for the value.
+ splitMultiRegDbgValue(ArgRegsAndSizes);
+ return true;
}
}
@@ -5468,8 +5570,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
IsIndirect = (Op->isReg()) ? IsIndirect : true;
+ if (IsIndirect)
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
*Op, Variable, Expr));
return true;
@@ -5554,11 +5658,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
case Intrinsic::sponentry:
setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ TLI.getFrameIndexTy(DAG.getDataLayout())));
return;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
- TLI.getPointerTy(DAG.getDataLayout()),
+ TLI.getFrameIndexTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::read_register: {
@@ -5888,65 +5992,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::masked_compressstore:
visitMaskedStore(I, true /* IsCompressing */);
return;
- case Intrinsic::x86_mmx_pslli_w:
- case Intrinsic::x86_mmx_pslli_d:
- case Intrinsic::x86_mmx_pslli_q:
- case Intrinsic::x86_mmx_psrli_w:
- case Intrinsic::x86_mmx_psrli_d:
- case Intrinsic::x86_mmx_psrli_q:
- case Intrinsic::x86_mmx_psrai_w:
- case Intrinsic::x86_mmx_psrai_d: {
- SDValue ShAmt = getValue(I.getArgOperand(1));
- if (isa<ConstantSDNode>(ShAmt)) {
- visitTargetIntrinsic(I, Intrinsic);
- return;
- }
- unsigned NewIntrinsic = 0;
- EVT ShAmtVT = MVT::v2i32;
- switch (Intrinsic) {
- case Intrinsic::x86_mmx_pslli_w:
- NewIntrinsic = Intrinsic::x86_mmx_psll_w;
- break;
- case Intrinsic::x86_mmx_pslli_d:
- NewIntrinsic = Intrinsic::x86_mmx_psll_d;
- break;
- case Intrinsic::x86_mmx_pslli_q:
- NewIntrinsic = Intrinsic::x86_mmx_psll_q;
- break;
- case Intrinsic::x86_mmx_psrli_w:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
- break;
- case Intrinsic::x86_mmx_psrli_d:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
- break;
- case Intrinsic::x86_mmx_psrli_q:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
- break;
- case Intrinsic::x86_mmx_psrai_w:
- NewIntrinsic = Intrinsic::x86_mmx_psra_w;
- break;
- case Intrinsic::x86_mmx_psrai_d:
- NewIntrinsic = Intrinsic::x86_mmx_psra_d;
- break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- }
-
- // The vector shift intrinsics with scalars uses 32b shift amounts but
- // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
- // to be zero.
- // We must do this early because v2i32 is not a legal type.
- SDValue ShOps[2];
- ShOps[0] = ShAmt;
- ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
- ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps);
- EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
- Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
- DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
- getValue(I.getArgOperand(0)), ShAmt);
- setValue(&I, Res);
- return;
- }
case Intrinsic::powi:
setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
@@ -6063,6 +6108,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
@@ -6075,12 +6122,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_constrained_log:
case Intrinsic::experimental_constrained_log10:
case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_lrint:
+ case Intrinsic::experimental_constrained_llrint:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_maxnum:
case Intrinsic::experimental_constrained_minnum:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_lround:
+ case Intrinsic::experimental_constrained_llround:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
@@ -6272,6 +6323,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Op3));
return;
}
+ case Intrinsic::umul_fix_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
+ Op3));
+ return;
+ }
case Intrinsic::stacksave: {
SDValue Op = getRoot();
Res = DAG.getNode(
@@ -6347,29 +6406,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res);
return;
}
- case Intrinsic::objectsize: {
- // If we don't know by now, we're never going to know.
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
-
- assert(CI && "Non-constant type in __builtin_object_size?");
-
- SDValue Arg = getValue(I.getCalledValue());
- EVT Ty = Arg.getValueType();
-
- if (CI->isZero())
- Res = DAG.getConstant(-1ULL, sdl, Ty);
- else
- Res = DAG.getConstant(0, sdl, Ty);
-
- setValue(&I, Res);
- return;
- }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
case Intrinsic::is_constant:
- // If this wasn't constant-folded away by now, then it's not a
- // constant.
- setValue(&I, DAG.getConstant(0, sdl, MVT::i1));
- return;
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
@@ -6818,6 +6859,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, Val);
return;
}
+ case Intrinsic::ptrmask: {
+ SDValue Ptr = getValue(I.getOperand(0));
+ SDValue Const = getValue(I.getOperand(1));
+
+ EVT DestVT =
+ EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+
+ setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr,
+ DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT)));
+ return;
+ }
}
}
@@ -6845,6 +6897,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_fma:
Opcode = ISD::STRICT_FMA;
break;
+ case Intrinsic::experimental_constrained_fptosi:
+ Opcode = ISD::STRICT_FP_TO_SINT;
+ break;
+ case Intrinsic::experimental_constrained_fptoui:
+ Opcode = ISD::STRICT_FP_TO_UINT;
+ break;
case Intrinsic::experimental_constrained_fptrunc:
Opcode = ISD::STRICT_FP_ROUND;
break;
@@ -6881,6 +6939,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_log2:
Opcode = ISD::STRICT_FLOG2;
break;
+ case Intrinsic::experimental_constrained_lrint:
+ Opcode = ISD::STRICT_LRINT;
+ break;
+ case Intrinsic::experimental_constrained_llrint:
+ Opcode = ISD::STRICT_LLRINT;
+ break;
case Intrinsic::experimental_constrained_rint:
Opcode = ISD::STRICT_FRINT;
break;
@@ -6899,6 +6963,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_floor:
Opcode = ISD::STRICT_FFLOOR;
break;
+ case Intrinsic::experimental_constrained_lround:
+ Opcode = ISD::STRICT_LROUND;
+ break;
+ case Intrinsic::experimental_constrained_llround:
+ Opcode = ISD::STRICT_LLROUND;
+ break;
case Intrinsic::experimental_constrained_round:
Opcode = ISD::STRICT_FROUND;
break;
@@ -7102,7 +7172,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
- unsigned VReg = SwiftError.getOrCreateVRegDefAt(
+ Register VReg = SwiftError.getOrCreateVRegDefAt(
CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
DAG.setRoot(CopyNode);
@@ -8021,6 +8091,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(T, SDValue());
+ if (T.ConstraintType == TargetLowering::C_Immediate &&
+ OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
+ // We've delayed emitting a diagnostic like the "n" constraint because
+ // inlining could cause an integer showing up.
+ return emitInlineAsmError(
+ CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "
+ "integer constant expression");
+
ExtraInfo.update(T);
}
@@ -8105,7 +8183,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
- (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
OpInfo.isIndirect)) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
@@ -8119,13 +8198,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
- } else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
+ } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
!OpInfo.isIndirect) ||
OpInfo.ConstraintType == TargetLowering::C_Register ||
OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
// Otherwise, this outputs to a register (directly for C_Register /
- // C_RegisterClass, and a target-defined fashion for C_Other). Find a
- // register that we can use.
+ // C_RegisterClass, and a target-defined fashion for
+ // C_Immediate/C_Other). Find a register that we can use.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(
CS, "couldn't allocate output register for constraint '" +
@@ -8205,15 +8285,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Treat indirect 'X' constraint as memory.
- if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
- if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty()) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
+ if (isa<ConstantSDNode>(InOperandVal)) {
+ emitInlineAsmError(CS, "value out of range for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
@@ -8250,7 +8339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
- OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ OpInfo.ConstraintType == TargetLowering::C_Register ||
+ OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
"Unknown constraint type!");
// TODO: Support this.
@@ -8356,6 +8446,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
Val = OpInfo.AssignedRegs.getCopyFromRegs(
DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
break;
+ case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
OpInfo, DAG);
@@ -9018,7 +9109,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Certain targets (such as MIPS), may have a different ABI alignment
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
- unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
+ const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
if (Args[i].Ty->isPointerTy()) {
Flags.setPointer();
@@ -9073,7 +9164,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
FrameAlign = Args[i].Alignment;
else
FrameAlign = getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Args[i].IsNest)
Flags.setNest();
@@ -9129,7 +9220,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
- MyFlags.Flags.setOrigAlign(1);
+ MyFlags.Flags.setOrigAlign(Align::None());
if (j == NumParts - 1)
MyFlags.Flags.setSplitEnd();
}
@@ -9259,7 +9350,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
assert((Op.getOpcode() != ISD::CopyFromReg ||
cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
"Copy from a reg to the same reg!");
- assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+ assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If this is an InlineAsm we have to match the registers required, not the
@@ -9516,8 +9607,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Certain targets (such as MIPS), may have a different ABI alignment
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
- unsigned OriginalAlignment =
- TLI->getABIAlignmentForCallingConv(ArgTy, DL);
+ const Align OriginalAlignment(
+ TLI->getABIAlignmentForCallingConv(ArgTy, DL));
if (Arg.getType()->isPointerTy()) {
Flags.setPointer();
@@ -9577,7 +9668,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FrameAlign = Arg.getParamAlignment();
else
FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Arg.hasAttribute(Attribute::Nest))
Flags.setNest();
@@ -9586,6 +9677,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setOrigAlign(OriginalAlignment);
if (ArgCopyElisionCandidates.count(&Arg))
Flags.setCopyElisionCandidate();
+ if (Arg.hasAttribute(Attribute::Returned))
+ Flags.setReturned();
MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
@@ -9598,7 +9691,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
else if (i > 0) {
- MyFlags.Flags.setOrigAlign(1);
+ MyFlags.Flags.setOrigAlign(Align::None());
if (i == NumRegs - 1)
MyFlags.Flags.setSplitEnd();
}
@@ -9650,7 +9743,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
- unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
+ Register SRetReg =
+ RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
FuncInfo->DemoteRegister = SRetReg;
NewRoot =
SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
@@ -9748,10 +9842,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
+ // Analyses past this point are naive and don't expect an assertion.
+ if (Res.getOpcode() == ISD::AssertZext)
+ Res = Res.getOperand(0);
+
// Update the SwiftErrorVRegDefMap.
if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
Reg);
}
@@ -9763,7 +9861,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// FIXME: This isn't very clean... it would be nice to make this more
// general.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
FuncInfo->ValueMap[&Arg] = Reg;
continue;
}
@@ -10087,8 +10185,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
break;
}
case CC_BitTests: {
- // FIXME: If Fallthrough is unreachable, skip the range check.
-
// FIXME: Optimize away range check based on pivot comparisons.
BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
@@ -10109,6 +10205,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->DefaultProb -= DefaultProb / 2;
}
+ if (FallthroughUnreachable) {
+ // Skip the range check if the fallthrough block is unreachable.
+ BTB->OmitRangeCheck = true;
+ }
+
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
visitBitTestHeader(*BTB, SwitchMBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0072e33f23b7..bfcf30b430b6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -426,7 +426,7 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
: SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
- SL(make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
+ SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
SwiftError(swifterror) {}
void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index da3049881d31..bc10f7621239 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -280,6 +280,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::SPLAT_VECTOR: return "splat_vector";
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
@@ -305,6 +306,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SMULFIX: return "smulfix";
case ISD::SMULFIXSAT: return "smulfixsat";
case ISD::UMULFIX: return "umulfix";
+ case ISD::UMULFIXSAT: return "umulfixsat";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
@@ -318,22 +320,27 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP_ROUND: return "fp_round";
case ISD::STRICT_FP_ROUND: return "strict_fp_round";
case ISD::FLT_ROUNDS_: return "flt_rounds";
- case ISD::FP_ROUND_INREG: return "fp_round_inreg";
case ISD::FP_EXTEND: return "fp_extend";
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint";
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP: return "fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::LROUND: return "lround";
+ case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
+ case ISD::STRICT_LLROUND: return "strict_llround";
case ISD::LRINT: return "lrint";
+ case ISD::STRICT_LRINT: return "strict_lrint";
case ISD::LLRINT: return "llrint";
+ case ISD::STRICT_LLRINT: return "strict_llrint";
// Control flow instructions
case ISD::BR: return "br";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index bdf9f2c166e1..1f07a241a824 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -434,9 +435,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
- ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
@@ -524,8 +525,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
To = J->second;
}
// Make sure the new register has a sufficiently constrained register class.
- if (TargetRegisterInfo::isVirtualRegister(From) &&
- TargetRegisterInfo::isVirtualRegister(To))
+ if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
MRI.constrainRegClass(To, MRI.getRegClass(From));
// Replace it.
@@ -572,7 +572,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
bool hasFI = MI->getOperand(0).isFI();
Register Reg =
hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
EntryMBB->insert(EntryMBB->begin(), MI);
else {
MachineInstr *Def = RegInfo->getVRegDef(Reg);
@@ -582,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
Def->getParent()->insert(std::next(InsertPos), MI);
} else
LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg"
- << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
+ << Register::virtReg2Index(Reg) << "\n");
}
// If Reg is live-in then update debug info to track its copy in a vreg.
@@ -671,8 +671,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
To = J->second;
}
// Make sure the new register has a sufficiently constrained register class.
- if (TargetRegisterInfo::isVirtualRegister(From) &&
- TargetRegisterInfo::isVirtualRegister(To))
+ if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
MRI.constrainRegClass(To, MRI.getRegClass(From));
// Replace it.
@@ -760,7 +759,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
continue;
unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ if (!Register::isVirtualRegister(DestReg))
continue;
// Ignore non-integer values.
@@ -1652,9 +1651,8 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
// Make sure that the copy dest is not a vreg when the copy source is a
// physical register.
- if (!OPI2->isReg() ||
- (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) &&
- TargetRegisterInfo::isPhysicalRegister(OPI2->getReg())))
+ if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
+ Register::isPhysicalRegister(OPI2->getReg())))
return false;
return true;
@@ -2234,9 +2232,9 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- unsigned Reg =
+ Register Reg =
TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),
- *CurDAG);
+ CurDAG->getMachineFunction());
SDValue New = CurDAG->getCopyFromReg(
Op->getOperand(0), dl, Reg, Op->getValueType(0));
New->setNodeId(-1);
@@ -2248,9 +2246,9 @@ void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),
+ Register Reg = TLI->getRegisterByName(RegStr->getString().data(),
Op->getOperand(2).getValueType(),
- *CurDAG);
+ CurDAG->getMachineFunction());
SDValue New = CurDAG->getCopyToReg(
Op->getOperand(0), dl, Reg, Op->getOperand(2));
New->setNodeId(-1);
@@ -3323,10 +3321,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
- case OPC_EmitCopyToReg: {
+ case OPC_EmitCopyToReg:
+ case OPC_EmitCopyToReg2: {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+ if (Opcode == OPC_EmitCopyToReg2)
+ DestPhysReg |= MatcherTable[MatcherIndex++] << 8;
if (!InputChain.getNode())
InputChain = CurDAG->getEntryNode();
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 395e9a8a4fc5..fad98b6f50dc 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -378,7 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// We use TargetFrameIndex so that isel will not select it into LEA
Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());
-#ifndef NDEBUG
// Right now we always allocate spill slots that are of the same
// size as the value we're about to spill (the size of spillee can
// vary since we spill vectors of pointers too). At some point we
@@ -387,12 +386,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&
"Bad spill: stack slot does not match!");
-#endif
+ // Note: Using the alignment of the spill slot (rather than the abi or
+ // preferred alignment) is required for correctness when dealing with spill
+ // slots with preferred alignments larger than frame alignment..
auto &MF = Builder.DAG.getMachineFunction();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *StoreMMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlignment(Index));
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
- PtrInfo);
+ StoreMMO);
MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
@@ -1011,20 +1016,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
return;
}
- SDValue SpillSlot =
- DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
+ unsigned Index = *DerivedPtrLocation;
+ SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
// Note: We know all of these reloads are independent, but don't bother to
// exploit that chain wise. DAGCombine will happily do so as needed, so
// doing it here would be a small compile time win at most.
SDValue Chain = getRoot();
- SDValue SpillLoad =
- DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
- Relocate.getType()),
- getCurSDLoc(), Chain, SpillSlot,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
- *DerivedPtrLocation));
+ auto &MF = DAG.getMachineFunction();
+ auto &MFI = MF.getFrameInfo();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *LoadMMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlignment(Index));
+
+ auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ Relocate.getType());
+
+ SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain,
+ SpillSlot, LoadMMO);
DAG.setRoot(SpillLoad.getValue(1));
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b260cd91d468..9ab1324533f1 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -37,7 +36,7 @@ using namespace llvm;
/// NOTE: The TargetMachine owns TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm)
- : TargetLoweringBase(tm) {}
+ : TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
@@ -80,7 +79,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
const CCValAssign &ArgLoc = ArgLocs[I];
if (!ArgLoc.isRegLoc())
continue;
- unsigned Reg = ArgLoc.getLocReg();
+ Register Reg = ArgLoc.getLocReg();
// Only look at callee saved registers.
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
continue;
@@ -121,19 +120,25 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
/// result of type RetVT.
std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
- ArrayRef<SDValue> Ops, bool isSigned,
- const SDLoc &dl, bool doesNotReturn,
- bool isReturnValueUsed,
- bool isPostTypeLegalization) const {
+ ArrayRef<SDValue> Ops,
+ MakeLibCallOptions CallOptions,
+ const SDLoc &dl) const {
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
TargetLowering::ArgListEntry Entry;
- for (SDValue Op : Ops) {
- Entry.Node = Op;
+ for (unsigned i = 0; i < Ops.size(); ++i) {
+ SDValue NewOp = Ops[i];
+ Entry.Node = NewOp;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
- Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
+ CallOptions.IsSExt);
+ Entry.IsZExt = !Entry.IsSExt;
+
+ if (CallOptions.IsSoften &&
+ !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
+ Entry.IsSExt = Entry.IsZExt = false;
+ }
Args.push_back(Entry);
}
@@ -144,15 +149,22 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
+ bool zeroExtend = !signExtend;
+
+ if (CallOptions.IsSoften &&
+ !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
+ signExtend = zeroExtend = false;
+ }
+
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setNoReturn(doesNotReturn)
- .setDiscardResult(!isReturnValueUsed)
- .setIsPostTypeLegalization(isPostTypeLegalization)
+ .setNoReturn(CallOptions.DoesNotReturn)
+ .setDiscardResult(!CallOptions.IsReturnValueUsed)
+ .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
.setSExtResult(signExtend)
- .setZExtResult(!signExtend);
+ .setZExtResult(zeroExtend);
return LowerCallTo(CLI);
}
@@ -263,7 +275,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
- const SDLoc &dl) const {
+ const SDLoc &dl, const SDValue OldLHS,
+ const SDValue OldRHS) const {
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
@@ -365,8 +378,11 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = {NewLHS, NewRHS};
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
- dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { OldLHS.getValueType(),
+ OldRHS.getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
@@ -378,8 +394,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
- dl).first;
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
NewLHS = DAG.getNode(
ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
@@ -564,6 +579,170 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
AssumeSingleUse);
}
+// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
+// TODO: Under what circumstances can we create nodes? Constant folding?
+SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ // Limit search depth.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return SDValue();
+
+ // Ignore UNDEFs.
+ if (Op.isUndef())
+ return SDValue();
+
+ // Not demanding any bits/elts from Op.
+ if (DemandedBits == 0 || DemandedElts == 0)
+ return DAG.getUNDEF(Op.getValueType());
+
+ unsigned NumElts = DemandedElts.getBitWidth();
+ KnownBits LHSKnown, RHSKnown;
+ switch (Op.getOpcode()) {
+ case ISD::BITCAST: {
+ SDValue Src = peekThroughBitcasts(Op.getOperand(0));
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+ unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
+
+ if (NumSrcEltBits == NumDstEltBits)
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+
+ // TODO - bigendian once we have test coverage.
+ if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
+ DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumDstEltBits / NumSrcEltBits;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned Offset = i * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ if (!Sub.isNullValue()) {
+ DemandedSrcBits |= Sub;
+ for (unsigned j = 0; j != NumElts; ++j)
+ if (DemandedElts[j])
+ DemandedSrcElts.setBit((j * Scale) + i);
+ }
+ }
+
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+ }
+
+ // TODO - bigendian once we have test coverage.
+ if ((NumSrcEltBits % NumDstEltBits) == 0 &&
+ DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumSrcEltBits / NumDstEltBits;
+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % Scale) * NumDstEltBits;
+ DemandedSrcBits.insertBits(DemandedBits, Offset);
+ DemandedSrcElts.setBit(i / Scale);
+ }
+
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+ }
+
+ break;
+ }
+ case ISD::AND: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::OR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::XOR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other.
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
+ return Op.getOperand(0);
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ // If we don't demand the inserted element, return the base vector.
+ SDValue Vec = Op.getOperand(0);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ EVT VecVT = Vec.getValueType();
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
+ !DemandedElts[CIdx->getZExtValue()])
+ return Vec;
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // If all the demanded elts are from one operand and are inline,
+ // then we can use the operand directly.
+ bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0 || !DemandedElts[i])
+ continue;
+ AllUndef = false;
+ IdentityLHS &= (M == (int)i);
+ IdentityRHS &= ((M - NumElts) == i);
+ }
+
+ if (AllUndef)
+ return DAG.getUNDEF(Op.getValueType());
+ if (IdentityLHS)
+ return Op.getOperand(0);
+ if (IdentityRHS)
+ return Op.getOperand(1);
+ break;
+ }
+ default:
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+ if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
+ Op, DemandedBits, DemandedElts, DAG, Depth))
+ return V;
+ break;
+ }
+ return SDValue();
+}
+
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -619,12 +798,15 @@ bool TargetLowering::SimplifyDemandedBits(
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
- } else if (Depth == 6) { // Limit search depth.
+ } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
+ // Limit search depth.
return false;
}
KnownBits Known2, KnownOut;
switch (Op.getOpcode()) {
+ case ISD::TargetConstant:
+ llvm_unreachable("Can't simplify this node");
case ISD::SCALAR_TO_VECTOR: {
if (!DemandedElts[0])
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -728,6 +910,21 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
+ case ISD::EXTRACT_SUBVECTOR: {
+ // If index isn't constant, assume we need all the source vector elements.
+ SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ }
+ if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
+ return true;
+ break;
+ }
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();
@@ -773,22 +970,37 @@ bool TargetLowering::SimplifyDemandedBits(
}
if (!!DemandedLHS || !!DemandedRHS) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedLHS) {
- if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS,
- Known2, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
+ Depth + 1))
return true;
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
if (!!DemandedRHS) {
- if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS,
- Known2, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
+ Depth + 1))
return true;
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
+ return TLO.CombineTo(Op, NewOp);
+ }
}
break;
}
@@ -834,6 +1046,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
@@ -869,6 +1095,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
@@ -901,6 +1141,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if (DemandedBits.isSubsetOf(Known.Zero))
@@ -1034,7 +1288,7 @@ bool TargetLowering::SimplifyDemandedBits(
// out) are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
- if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
if (ConstantSDNode *SA2 =
isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
if (SA2->getAPIntValue().ult(BitWidth)) {
@@ -1141,7 +1395,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SA2 =
isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
- if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ if (!DemandedBits.intersects(
+ APInt::getHighBitsSet(BitWidth, ShAmt))) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
@@ -1479,6 +1734,11 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
Known = Known.trunc(BitWidth);
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
+
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
if (Src.getNode()->hasOneUse()) {
@@ -1595,9 +1855,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
// Demand the elt/bit if any of the original elts/bits are demanded.
// TODO - bigendian once we have test coverage.
- // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
- if (SrcVT.isVector() && NumSrcEltBits > 1 &&
- (BitWidth % NumSrcEltBits) == 0 &&
+ if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
@@ -1663,6 +1921,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ SDNodeFlags Flags = Op.getNode()->getFlags();
unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
@@ -1671,7 +1930,6 @@ bool TargetLowering::SimplifyDemandedBits(
Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
- SDNodeFlags Flags = Op.getNode()->getFlags();
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
@@ -1684,6 +1942,23 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
}
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If we have a constant operand, we may be able to turn it into -1 if we
// do not demand the high bits. This can make the constant smaller to
// encode, allow more general folding, or match specialized instruction
@@ -1694,10 +1969,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (C && !C->isAllOnesValue() && !C->isOne() &&
(C->getAPIntValue() | HighMask).isAllOnesValue()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
- // We can't guarantee that the new math op doesn't wrap, so explicitly
- // clear those flags to prevent folding with a potential existing node
- // that has those flags set.
- SDNodeFlags Flags;
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
@@ -1837,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
// Limit search depth.
- if (Depth >= 6)
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;
SDLoc DL(Op);
@@ -2001,6 +2274,15 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return true;
APInt BaseElts = DemandedElts;
BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
+
+ // If none of the base operand elements are demanded, replace it with undef.
+ if (!BaseElts && !Base.isUndef())
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ TLO.DAG.getUNDEF(VT),
+ Op.getOperand(1),
+ Op.getOperand(2)));
+
if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
Depth + 1))
return true;
@@ -2134,11 +2416,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Update legal shuffle masks based on demanded elements if it won't reduce
// to Identity which can cause premature removal of the shuffle mask.
- if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&
- isShuffleMaskLegal(NewMask, VT))
- return TLO.CombineTo(Op,
- TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
- Op.getOperand(1), NewMask));
+ if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
+ SDValue LegalShuffle =
+ buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
+ NewMask, TLO.DAG);
+ if (LegalShuffle)
+ return TLO.CombineTo(Op, LegalShuffle);
+ }
// Propagate undef/zero elements from LHS/RHS.
for (unsigned i = 0; i != NumElts; ++i) {
@@ -2304,6 +2588,13 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.resetAll();
}
+void TargetLowering::computeKnownBitsForTargetInstr(
+ GISelKnownBits &Analysis, Register R, KnownBits &Known,
+ const APInt &DemandedElts, const MachineRegisterInfo &MRI,
+ unsigned Depth) const {
+ Known.resetAll();
+}
+
void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
@@ -2357,6 +2648,36 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(
return false;
}
+SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ assert(
+ (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
+ " is a target node!");
+ return SDValue();
+}
+
+SDValue
+TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
+ SDValue N1, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG) const {
+ bool LegalMask = isShuffleMaskLegal(Mask, VT);
+ if (!LegalMask) {
+ std::swap(N0, N1);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ LegalMask = isShuffleMaskLegal(Mask, VT);
+ }
+
+ if (!LegalMask)
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
+}
+
const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
return nullptr;
}
@@ -2610,6 +2931,77 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
return T2;
}
+// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL) const {
+ assert(isConstOrConstSplat(N1C) &&
+ isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
+ "Should be a comparison with 0.");
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Valid only for [in]equality comparisons.");
+
+ unsigned NewShiftOpcode;
+ SDValue X, C, Y;
+
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Look for '(C l>>/<< Y)'.
+ auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
+ // The shift should be one-use.
+ if (!V.hasOneUse())
+ return false;
+ unsigned OldShiftOpcode = V.getOpcode();
+ switch (OldShiftOpcode) {
+ case ISD::SHL:
+ NewShiftOpcode = ISD::SRL;
+ break;
+ case ISD::SRL:
+ NewShiftOpcode = ISD::SHL;
+ break;
+ default:
+ return false; // must be a logical shift.
+ }
+ // We should be shifting a constant.
+ // FIXME: best to use isConstantOrConstantVector().
+ C = V.getOperand(0);
+ ConstantSDNode *CC =
+ isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ if (!CC)
+ return false;
+ Y = V.getOperand(1);
+
+ ConstantSDNode *XC =
+ isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
+ };
+
+ // LHS of comparison should be an one-use 'and'.
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+ return SDValue();
+
+ X = N0.getOperand(0);
+ SDValue Mask = N0.getOperand(1);
+
+ // 'and' is commutative!
+ if (!Match(Mask)) {
+ std::swap(X, Mask);
+ if (!Match(Mask))
+ return SDValue();
+ }
+
+ EVT VT = X.getValueType();
+
+ // Produce:
+ // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
+ SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
+ SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
+ return T2;
+}
+
/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
/// handle the commuted versions of these patterns.
@@ -2726,9 +3118,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// (ctpop x) u< 2 -> (x & x-1) == 0
// (ctpop x) u> 1 -> (x & x-1) != 0
if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
- SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
- DAG.getConstant(1, dl, CTVT));
- SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
}
@@ -2852,7 +3244,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
APInt bestMask;
unsigned bestWidth = 0, bestOffset = 0;
- if (!Lod->isVolatile() && Lod->isUnindexed()) {
+ if (Lod->isSimple() && Lod->isUnindexed()) {
unsigned origWidth = N0.getValueSizeInBits();
unsigned maskWidth = origWidth;
// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
@@ -3178,6 +3570,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
+ // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+ if (C1.isNullValue())
+ if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ VT, N0, N1, Cond, DCI, dl))
+ return CC;
+ }
+
// If we have "setcc X, C0", check to see if we can shrink the immediate
// by changing cc.
// TODO: Support this for vectors after legalize ops.
@@ -3203,33 +3603,35 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Back to non-vector simplifications.
// TODO: Can we do these for vector splats?
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const APInt &C1 = N1C->getAPIntValue();
+ EVT ShValTy = N0.getValueType();
// Fold bit comparisons when we can.
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
- (VT == N0.getValueType() ||
- (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+ (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
N0.getOpcode() == ISD::AND) {
auto &DL = DAG.getDataLayout();
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize());
+ EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
- if (AndRHS->getAPIntValue().isPowerOf2()) {
+ unsigned ShCt = AndRHS->getAPIntValue().logBase2();
+ if (AndRHS->getAPIntValue().isPowerOf2() &&
+ ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
- DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
- ShiftTy)));
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShCt, dl, ShiftTy)));
}
} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
// (X & 8) == 8 --> (X & 8) >> 3
// Perform the xform if C1 is a single bit.
- if (C1.isPowerOf2()) {
+ unsigned ShCt = C1.logBase2();
+ if (C1.isPowerOf2() &&
+ ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
- DAG.getConstant(C1.logBase2(), dl,
- ShiftTy)));
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShCt, dl, ShiftTy)));
}
}
}
@@ -3452,15 +3854,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// Fold remainder of division by a constant.
- if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
+ N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
- if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize))
- if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
- return Folded;
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
+ if (N0.getOpcode() == ISD::UREM) {
+ if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ } else if (N0.getOpcode() == ISD::SREM) {
+ if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ }
+ }
}
// Fold away ALL boolean setcc's.
@@ -3567,15 +3975,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
if (S == 1) {
switch (Constraint[0]) {
default: break;
- case 'r': return C_RegisterClass;
+ case 'r':
+ return C_RegisterClass;
case 'm': // memory
case 'o': // offsetable
case 'V': // not offsetable
return C_Memory;
- case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
case 'E': // Floating Point Constant
case 'F': // Floating Point Constant
+ return C_Immediate;
+ case 'i': // Simple Integer or Relocatable Constant
case 's': // Relocatable Constant
case 'p': // Address.
case 'X': // Allow ANY value.
@@ -3950,6 +4360,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
/// Return an integer indicating how general CT is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
+ case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
case TargetLowering::C_Unknown:
return 0;
@@ -4069,11 +4480,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
- // If this is an 'other' constraint, see if the operand is valid for it.
- // For example, on X86 we might have an 'rI' constraint. If the operand
- // is an integer in the range [0..31] we want to use I (saving a load
- // of a register), otherwise we must use 'r'.
- if (CType == TargetLowering::C_Other && Op.getNode()) {
+ // If this is an 'other' or 'immediate' constraint, see if the operand is
+ // valid for it. For example, on X86 we might have an 'rI' constraint. If
+ // the operand is an integer in the range [0..31] we want to use I (saving a
+ // load of a register), otherwise we must use 'r'.
+ if ((CType == TargetLowering::C_Other ||
+ CType == TargetLowering::C_Immediate) && Op.getNode()) {
assert(OpInfo.Codes[i].size() == 1 &&
"Unhandled multi-letter 'other' constraint");
std::vector<SDValue> ResultOps;
@@ -4455,6 +4867,34 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
+/// If all values in Values that *don't* match the predicate are same 'splat'
+/// value, then replace all values with that splat value.
+/// Else, if AlternativeReplacement was provided, then replace all values that
+/// do match predicate with AlternativeReplacement value.
+static void
+turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
+ std::function<bool(SDValue)> Predicate,
+ SDValue AlternativeReplacement = SDValue()) {
+ SDValue Replacement;
+ // Is there a value for which the Predicate does *NOT* match? What is it?
+ auto SplatValue = llvm::find_if_not(Values, Predicate);
+ if (SplatValue != Values.end()) {
+ // Does Values consist only of SplatValue's and values matching Predicate?
+ if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
+ return Value == *SplatValue || Predicate(Value);
+ })) // Then we shall replace values matching predicate with SplatValue.
+ Replacement = *SplatValue;
+ }
+ if (!Replacement) {
+ // Oops, we did not find the "baseline" splat value.
+ if (!AlternativeReplacement)
+ return; // Nothing to do.
+ // Let's replace with provided value then.
+ Replacement = AlternativeReplacement;
+ }
+ std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
+}
+
/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
/// where the divisor is constant and the comparison target is zero,
/// return a DAG expression that will generate the same comparison result
@@ -4482,77 +4922,409 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
DAGCombinerInfo &DCI, const SDLoc &DL,
SmallVectorImpl<SDNode *> &Created) const {
// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
- // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1
+ // - D must be constant, with D = D0 * 2^K where D0 is odd
// - P is the multiplicative inverse of D0 modulo 2^W
- // - Q = floor((2^W - 1) / D0)
+ // - Q = floor(((2^W) - 1) / D)
// where W is the width of the common type of N and D.
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Only applicable for (in)equality comparisons.");
+ SelectionDAG &DAG = DCI.DAG;
+
EVT VT = REMNode.getValueType();
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
// If MUL is unavailable, we cannot proceed in any case.
if (!isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
- // TODO: Add non-uniform constant support.
- ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1));
+ // TODO: Could support comparing with non-zero too.
ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!Divisor || !CompTarget || Divisor->isNullValue() ||
- !CompTarget->isNullValue())
+ if (!CompTarget || !CompTarget->isNullValue())
return SDValue();
- const APInt &D = Divisor->getAPIntValue();
+ bool HadOneDivisor = false;
+ bool AllDivisorsAreOnes = true;
+ bool HadEvenDivisor = false;
+ bool AllDivisorsArePowerOfTwo = true;
+ SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
+
+ auto BuildUREMPattern = [&](ConstantSDNode *C) {
+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.
+ if (C->isNullValue())
+ return false;
+
+ const APInt &D = C->getAPIntValue();
+ // If all divisors are ones, we will prefer to avoid the fold.
+ HadOneDivisor |= D.isOneValue();
+ AllDivisorsAreOnes &= D.isOneValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ APInt D0 = D.lshr(K);
+
+ // D is even if it has trailing zeros.
+ HadEvenDivisor |= (K != 0);
+ // D is a power-of-two if D0 is one.
+ // If all divisors are power-of-two, we will prefer to avoid the fold.
+ AllDivisorsArePowerOfTwo &= D0.isOneValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // Q = floor((2^W - 1) / D)
+ APInt Q = APInt::getAllOnesValue(W).udiv(D);
+
+ assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ "We are expecting that K is always less than all-ones for ShSVT");
+
+ // If the divisor is 1 the result can be constant-folded.
+ if (D.isOneValue()) {
+ // Set P and K amount to a bogus values so we can try to splat them.
+ P = 0;
+ K = -1;
+ assert(Q.isAllOnesValue() &&
+ "Expecting all-ones comparison for one divisor");
+ }
+
+ PAmts.push_back(DAG.getConstant(P, DL, SVT));
+ KAmts.push_back(
+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));
+ return true;
+ };
+
+ SDValue N = REMNode.getOperand(0);
+ SDValue D = REMNode.getOperand(1);
- // Decompose D into D0 * 2^K
- unsigned K = D.countTrailingZeros();
- bool DivisorIsEven = (K != 0);
- APInt D0 = D.lshr(K);
+ // Collect the values from each element.
+ if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
+ return SDValue();
- // The fold is invalid when D0 == 1.
- // This is reachable because visitSetCC happens before visitREM.
- if (D0.isOneValue())
+ // If this is a urem by a one, avoid the fold since it can be constant-folded.
+ if (AllDivisorsAreOnes)
return SDValue();
- // P = inv(D0, 2^W)
- // 2^W requires W + 1 bits, so we have to extend and then truncate.
- unsigned W = D.getBitWidth();
- APInt P = D0.zext(W + 1)
- .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
- .trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ // If this is a urem by a powers-of-two, avoid the fold since it can be
+ // best implemented as a bit test.
+ if (AllDivisorsArePowerOfTwo)
+ return SDValue();
- // Q = floor((2^W - 1) / D)
- APInt Q = APInt::getAllOnesValue(W).udiv(D);
+ SDValue PVal, KVal, QVal;
+ if (VT.isVector()) {
+ if (HadOneDivisor) {
+ // Try to turn PAmts into a splat, since we don't care about the values
+ // that are currently '0'. If we can't, just keep '0'`s.
+ turnVectorIntoSplatVector(PAmts, isNullConstant);
+ // Try to turn KAmts into a splat, since we don't care about the values
+ // that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, ShSVT));
+ }
- SelectionDAG &DAG = DCI.DAG;
+ PVal = DAG.getBuildVector(VT, DL, PAmts);
+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);
+ QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else {
+ PVal = PAmts[0];
+ KVal = KAmts[0];
+ QVal = QAmts[0];
+ }
- SDValue PVal = DAG.getConstant(P, DL, VT);
- SDValue QVal = DAG.getConstant(Q, DL, VT);
// (mul N, P)
- SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal);
- Created.push_back(Op1.getNode());
+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
+ Created.push_back(Op0.getNode());
- // Rotate right only if D was even.
- if (DivisorIsEven) {
+ // Rotate right only if any divisor was even. We avoid rotates for all-odd
+ // divisors as a performance improvement, since rotating by 0 is a no-op.
+ if (HadEvenDivisor) {
// We need ROTR to do this.
if (!isOperationLegalOrCustom(ISD::ROTR, VT))
return SDValue();
- SDValue ShAmt =
- DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout()));
SDNodeFlags Flags;
Flags.setExact(true);
// UREM: (rotr (mul N, P), K)
- Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags);
- Created.push_back(Op1.getNode());
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
+ Created.push_back(Op0.getNode());
}
// UREM: (setule/setugt (rotr (mul N, P), K), Q)
- return DAG.getSetCC(DL, SETCCVT, Op1, QVal,
+ return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
}
+/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ SmallVector<SDNode *, 7> Built;
+ if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
+ DCI, DL, Built)) {
+ assert(Built.size() <= 7 && "Max size prediction failed.");
+ for (SDNode *N : Built)
+ DCI.AddToWorklist(N);
+ return Folded;
+ }
+
+ return SDValue();
+}
+
+SDValue
+TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // Fold:
+ // (seteq/ne (srem N, D), 0)
+ // To:
+ // (setule/ugt (rotr (add (mul N, P), A), K), Q)
+ //
+ // - D must be constant, with D = D0 * 2^K where D0 is odd
+ // - P is the multiplicative inverse of D0 modulo 2^W
+ // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
+ // - Q = floor((2 * A) / (2^K))
+ // where W is the width of the common type of N and D.
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Only applicable for (in)equality comparisons.");
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ EVT VT = REMNode.getValueType();
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+
+ // If MUL is unavailable, we cannot proceed in any case.
+ if (!isOperationLegalOrCustom(ISD::MUL, VT))
+ return SDValue();
+
+ // TODO: Could support comparing with non-zero too.
+ ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
+ if (!CompTarget || !CompTarget->isNullValue())
+ return SDValue();
+
+ bool HadIntMinDivisor = false;
+ bool HadOneDivisor = false;
+ bool AllDivisorsAreOnes = true;
+ bool HadEvenDivisor = false;
+ bool NeedToApplyOffset = false;
+ bool AllDivisorsArePowerOfTwo = true;
+ SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
+
+ auto BuildSREMPattern = [&](ConstantSDNode *C) {
+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.
+ if (C->isNullValue())
+ return false;
+
+ // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
+
+ // WARNING: this fold is only valid for positive divisors!
+ APInt D = C->getAPIntValue();
+ if (D.isNegative())
+ D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
+
+ HadIntMinDivisor |= D.isMinSignedValue();
+
+ // If all divisors are ones, we will prefer to avoid the fold.
+ HadOneDivisor |= D.isOneValue();
+ AllDivisorsAreOnes &= D.isOneValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ APInt D0 = D.lshr(K);
+
+ if (!D.isMinSignedValue()) {
+ // D is even if it has trailing zeros; unless it's INT_MIN, in which case
+ // we don't care about this lane in this fold, we'll special-handle it.
+ HadEvenDivisor |= (K != 0);
+ }
+
+ // D is a power-of-two if D0 is one. This includes INT_MIN.
+ // If all divisors are power-of-two, we will prefer to avoid the fold.
+ AllDivisorsArePowerOfTwo &= D0.isOneValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // A = floor((2^(W - 1) - 1) / D0) & -2^K
+ APInt A = APInt::getSignedMaxValue(W).udiv(D0);
+ A.clearLowBits(K);
+
+ if (!D.isMinSignedValue()) {
+ // If divisor INT_MIN, then we don't care about this lane in this fold,
+ // we'll special-handle it.
+ NeedToApplyOffset |= A != 0;
+ }
+
+ // Q = floor((2 * A) / (2^K))
+ APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
+
+ assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
+ "We are expecting that A is always less than all-ones for SVT");
+ assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ "We are expecting that K is always less than all-ones for ShSVT");
+
+ // If the divisor is 1 the result can be constant-folded. Likewise, we
+ // don't care about INT_MIN lanes, those can be set to undef if appropriate.
+ if (D.isOneValue()) {
+ // Set P, A and K to a bogus values so we can try to splat them.
+ P = 0;
+ A = -1;
+ K = -1;
+
+ // x ?% 1 == 0 <--> true <--> x u<= -1
+ Q = -1;
+ }
+
+ PAmts.push_back(DAG.getConstant(P, DL, SVT));
+ AAmts.push_back(DAG.getConstant(A, DL, SVT));
+ KAmts.push_back(
+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));
+ return true;
+ };
+
+ SDValue N = REMNode.getOperand(0);
+ SDValue D = REMNode.getOperand(1);
+
+ // Collect the values from each element.
+ if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
+ return SDValue();
+
+ // If this is a srem by a one, avoid the fold since it can be constant-folded.
+ if (AllDivisorsAreOnes)
+ return SDValue();
+
+ // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
+ // since it can be best implemented as a bit test.
+ if (AllDivisorsArePowerOfTwo)
+ return SDValue();
+
+ SDValue PVal, AVal, KVal, QVal;
+ if (VT.isVector()) {
+ if (HadOneDivisor) {
+ // Try to turn PAmts into a splat, since we don't care about the values
+ // that are currently '0'. If we can't, just keep '0'`s.
+ turnVectorIntoSplatVector(PAmts, isNullConstant);
+ // Try to turn AAmts into a splat, since we don't care about the
+ // values that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, SVT));
+ // Try to turn KAmts into a splat, since we don't care about the values
+ // that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, ShSVT));
+ }
+
+ PVal = DAG.getBuildVector(VT, DL, PAmts);
+ AVal = DAG.getBuildVector(VT, DL, AAmts);
+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);
+ QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else {
+ PVal = PAmts[0];
+ AVal = AAmts[0];
+ KVal = KAmts[0];
+ QVal = QAmts[0];
+ }
+
+ // (mul N, P)
+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
+ Created.push_back(Op0.getNode());
+
+ if (NeedToApplyOffset) {
+ // We need ADD to do this.
+ if (!isOperationLegalOrCustom(ISD::ADD, VT))
+ return SDValue();
+
+ // (add (mul N, P), A)
+ Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
+ Created.push_back(Op0.getNode());
+ }
+
+ // Rotate right only if any divisor was even. We avoid rotates for all-odd
+ // divisors as a performance improvement, since rotating by 0 is a no-op.
+ if (HadEvenDivisor) {
+ // We need ROTR to do this.
+ if (!isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ // SREM: (rotr (add (mul N, P), A), K)
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
+ Created.push_back(Op0.getNode());
+ }
+
+ // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
+ SDValue Fold =
+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+
+ // If we didn't have lanes with INT_MIN divisor, then we're done.
+ if (!HadIntMinDivisor)
+ return Fold;
+
+ // That fold is only valid for positive divisors. Which effectively means,
+ // it is invalid for INT_MIN divisors. So if we have such a lane,
+ // we must fix-up results for said lanes.
+ assert(VT.isVector() && "Can/should only get here for vectors.");
+
+ if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
+ !isOperationLegalOrCustom(ISD::AND, VT) ||
+ !isOperationLegalOrCustom(Cond, VT) ||
+ !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return SDValue();
+
+ Created.push_back(Fold.getNode());
+
+ SDValue IntMin = DAG.getConstant(
+ APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
+ SDValue IntMax = DAG.getConstant(
+ APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
+ SDValue Zero =
+ DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
+
+ // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
+ SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
+ Created.push_back(DivisorIsIntMin.getNode());
+
+ // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
+ SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
+ Created.push_back(Masked.getNode());
+ SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
+ Created.push_back(MaskedIsZero.getNode());
+
+ // To produce final result we need to blend 2 vectors: 'SetCC' and
+ // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
+ // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
+ // constant-folded, select can get lowered to a shuffle with constant mask.
+ SDValue Blended =
+ DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
+
+ return Blended;
+}
+
bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(Op.getOperand(0))) {
@@ -4564,6 +5336,246 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
return false;
}
+char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, bool ForCodeSize,
+ unsigned Depth) const {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG)
+ return 2;
+
+ // Don't allow anything with multiple uses unless we know it is free.
+ EVT VT = Op.getValueType();
+ const SDNodeFlags Flags = Op->getFlags();
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
+ isFPExtFree(VT, Op.getOperand(0).getValueType())))
+ return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return 0;
+
+ switch (Op.getOpcode()) {
+ case ISD::ConstantFP: {
+ if (!LegalOperations)
+ return 1;
+
+ // Don't invert constant FP values after legalization unless the target says
+ // the negated constant is legal.
+ return isOperationLegal(ISD::ConstantFP, VT) ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+ ForCodeSize);
+ }
+ case ISD::BUILD_VECTOR: {
+ // Only permit BUILD_VECTOR of constants.
+ if (llvm::any_of(Op->op_values(), [&](SDValue N) {
+ return !N.isUndef() && !isa<ConstantFPSDNode>(N);
+ }))
+ return 0;
+ if (!LegalOperations)
+ return 1;
+ if (isOperationLegal(ISD::ConstantFP, VT) &&
+ isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return 1;
+ return llvm::all_of(Op->op_values(), [&](SDValue N) {
+ return N.isUndef() ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+ ForCodeSize);
+ });
+ }
+ case ISD::FADD:
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
+ return 0;
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1))
+ return V;
+
+ // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
+ if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
+ if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
+ return 0;
+
+ return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+
+ case ISD::FMA:
+ case ISD::FMAD: {
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+ char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ if (!V2)
+ return 0;
+
+ // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
+ char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V01 = std::max(V0, V1);
+ return V01 ? std::max(V01, V2) : 0;
+ }
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ }
+
+ return 0;
+}
+
+SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations,
+ bool ForCodeSize,
+ unsigned Depth) const {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG)
+ return Op.getOperand(0);
+
+ assert(Depth <= SelectionDAG::MaxRecursionDepth &&
+ "getNegatedExpression doesn't match isNegatibleForFree");
+ const SDNodeFlags Flags = Op->getFlags();
+
+ switch (Op.getOpcode()) {
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
+ }
+ case ISD::BUILD_VECTOR: {
+ SmallVector<SDValue, 4> Ops;
+ for (SDValue C : Op->op_values()) {
+ if (C.isUndef()) {
+ Ops.push_back(C);
+ continue;
+ }
+ APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+ V.changeSign();
+ Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+ }
+ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
+ }
+ case ISD::FADD:
+ assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros()) &&
+ "Expected NSZ fp-flag");
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
+ Depth + 1))
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1), Flags);
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(0), Flags);
+ case ISD::FSUB:
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP =
+ isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
+ if (N0CFP->isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0), Flags);
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
+ Depth + 1))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1), Flags);
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(
+ Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
+ getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1),
+ Flags);
+
+ case ISD::FMA:
+ case ISD::FMAD: {
+ assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros()) &&
+ "Expected NSZ fp-flag");
+
+ SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+
+ char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ if (V0 >= V1) {
+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ SDValue Neg0 = getNegatedExpression(
+ Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
+ Op.getOperand(1), Neg2, Flags);
+ }
+
+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+ SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), Neg1, Neg2, Flags);
+ }
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1));
+ }
+
+ llvm_unreachable("Unknown code");
+}
+
//===----------------------------------------------------------------------===//
// Legalization Utilities
//===----------------------------------------------------------------------===//
@@ -4862,7 +5874,8 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
- SDValue Src = Node->getOperand(0);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
SDLoc dl(SDValue(Node, 0));
@@ -4871,6 +5884,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
if (SrcVT != MVT::f32 || DstVT != MVT::i64)
return false;
+ if (Node->isStrictFPOpcode())
+ // When a NaN is converted to an integer a trap is allowed. We can't
+ // use this expansion here because it would eliminate that trap. Other
+ // traps are also allowed and cannot be eliminated. See
+ // IEEE 754-2008 sec 5.8.
+ return false;
+
// Expand f32 -> i64 conversion
// This algorithm comes from compiler-rt's implementation of fixsfdi:
// https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
@@ -4924,9 +5944,11 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
}
bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
+ SDValue &Chain,
SelectionDAG &DAG) const {
SDLoc dl(SDValue(Node, 0));
- SDValue Src = Node->getOperand(0);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -4934,7 +5956,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
// Only expand vector types if we have the appropriate vector bit operations.
- if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) ||
+ unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
+ ISD::FP_TO_SINT;
+ if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
return false;
@@ -4946,14 +5970,21 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
- Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ if (Node->isStrictFPOpcode()) {
+ Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), Src });
+ Chain = Result.getValue(1);
+ } else
+ Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
return true;
}
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
- bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+ bool Strict = Node->isStrictFPOpcode() ||
+ shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+
if (Strict) {
// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
// signmask then offset (the result of which should be fully representable).
@@ -4963,12 +5994,23 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// Result = fp_to_sint(Val) ^ Ofs
// TODO: Should any fast-math-flags be set for the FSUB?
- SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
- DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+ SDValue SrcBiased;
+ if (Node->isStrictFPOpcode())
+ SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
+ { Node->getOperand(0), Src, Cst });
+ else
+ SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
+ SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
DAG.getConstant(SignMask, dl, DstVT));
- Result = DAG.getNode(ISD::XOR, dl, DstVT,
- DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
+ SDValue SInt;
+ if (Node->isStrictFPOpcode()) {
+ SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { SrcBiased.getValue(1), Val });
+ Chain = SInt.getValue(1);
+ } else
+ SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
+ Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
} else {
// Expand based on maximum range of FP_TO_SINT:
// True = fp_to_sint(Src)
@@ -5918,7 +6960,8 @@ SDValue
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
assert((Node->getOpcode() == ISD::SMULFIX ||
Node->getOpcode() == ISD::UMULFIX ||
- Node->getOpcode() == ISD::SMULFIXSAT) &&
+ Node->getOpcode() == ISD::SMULFIXSAT ||
+ Node->getOpcode() == ISD::UMULFIXSAT) &&
"Expected a fixed point multiplication opcode");
SDLoc dl(Node);
@@ -5926,15 +6969,19 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
SDValue RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
unsigned Scale = Node->getConstantOperandVal(2);
- bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT;
+ bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
+ Node->getOpcode() == ISD::UMULFIXSAT);
+ bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
+ Node->getOpcode() == ISD::SMULFIXSAT);
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
unsigned VTSize = VT.getScalarSizeInBits();
if (!Scale) {
// [us]mul.fix(a, b, 0) -> mul(a, b)
- if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) {
- return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
- } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) {
+ if (!Saturating) {
+ if (isOperationLegalOrCustom(ISD::MUL, VT))
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
SDValue Result =
DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue Product = Result.getValue(0);
@@ -5948,11 +6995,18 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
return DAG.getSelect(dl, VT, Overflow, Result, Product);
+ } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
}
}
- bool Signed =
- Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT;
assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
"Expected scale to be less than the number of bits if signed or at "
"most the number of bits if unsigned.");
@@ -5978,7 +7032,8 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
if (Scale == VTSize)
// Result is just the top half since we'd be shifting by the width of the
- // operand.
+ // operand. Overflow impossible so this works for both UMULFIX and
+ // UMULFIXSAT.
return Hi;
// The result will need to be shifted right by the scale since both operands
@@ -5990,20 +7045,55 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
if (!Saturating)
return Result;
- unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign
- SDValue HiMask =
- DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT);
- SDValue LoMask = DAG.getConstant(
- APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT);
- APInt MaxVal = APInt::getSignedMaxValue(VTSize);
- APInt MinVal = APInt::getSignedMinValue(VTSize);
-
- Result = DAG.getSelectCC(dl, Hi, LoMask,
- DAG.getConstant(MaxVal, dl, VT), Result,
- ISD::SETGT);
- return DAG.getSelectCC(dl, Hi, HiMask,
- DAG.getConstant(MinVal, dl, VT), Result,
- ISD::SETLT);
+ if (!Signed) {
+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
+ // widened multiplication) aren't all zeroes.
+
+ // Saturate to max if ((Hi >> Scale) != 0),
+ // which is the same as if (Hi > ((1 << Scale) - 1))
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, LowMask,
+ DAG.getConstant(MaxVal, dl, VT), Result,
+ ISD::SETUGT);
+
+ return Result;
+ }
+
+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
+ // widened multiplication) aren't all ones or all zeroes.
+
+ SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
+ SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
+
+ if (Scale == 0) {
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
+ DAG.getConstant(VTSize - 1, dl, ShiftTy));
+ SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
+ // Saturated to SatMin if wide product is negative, and SatMax if wide
+ // product is positive ...
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
+ ISD::SETLT);
+ // ... but only if we overflowed.
+ return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
+ }
+
+ // We handled Scale==0 above so all the bits to examine is in Hi.
+
+ // Saturate to max if ((Hi >> (Scale - 1)) > 0),
+ // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
+ // Saturate to min if (Hi >> (Scale - 1)) < -1),
+ // which is the same as if (HI < (-1 << (Scale - 1))
+ SDValue HighMask =
+ DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
+ return Result;
}
void TargetLowering::expandUADDSUBO(
@@ -6060,24 +7150,19 @@ void TargetLowering::expandSADDSUBO(
SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Result >= 0
- //
- // Add:
- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
- // Sub:
- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
- SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
- SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
- SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
- IsAdd ? ISD::SETEQ : ISD::SETNE);
-
- SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE);
- SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
-
- SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
- Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType);
+ // For an addition, the result should be less than one of the operands (LHS)
+ // if and only if the other operand (RHS) is negative, otherwise there will
+ // be overflow.
+ // For a subtraction, the result should be less than one of the operands
+ // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
+ // otherwise there will be overflow.
+ SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
+ SDValue ConditionRHS =
+ DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
+
+ Overflow = DAG.getBoolExtOrTrunc(
+ DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
+ ResultType, ResultType);
}
bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
@@ -6176,20 +7261,19 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
// being a legal type for the architecture and thus has to be split to
// two arguments.
SDValue Ret;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ CallOptions.setIsPostTypeLegalization(true);
if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
// Halves of WideVT are packed into registers in different order
// depending on platform endianness. This is usually handled by
// the C calling convention, but we can't defer to it in
// the legalizer.
SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
- Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
- /* doesNotReturn */ false, /* isReturnValueUsed */ true,
- /* isPostTypeLegalization */ true).first;
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
} else {
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
- Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
- /* doesNotReturn */ false, /* isReturnValueUsed */ true,
- /* isPostTypeLegalization */ true).first;
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
}
assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
"Ret value is a collection of constituent nodes holding result.");
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index 2db0ea570598..412a00095b9b 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -278,11 +278,10 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
// Ignore instructions like DBG_VALUE which don't read/def the register.
if (!MO.isDef() && !MO.readsReg())
continue;
- unsigned PhysReg = MO.getReg();
+ Register PhysReg = MO.getReg();
if (!PhysReg)
continue;
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
- "Unallocated register?!");
+ assert(Register::isPhysicalRegister(PhysReg) && "Unallocated register?!");
// The stack pointer is not normally described as a callee-saved register
// in calling convention definitions, so we need to watch for it
// separately. An SP mentioned by a call instruction, we can ignore,
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 23e5ce0acae8..db520d4e6403 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -477,7 +477,10 @@ bool SjLjEHPrepare::runOnFunction(Function &F) {
UnregisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
PointerType::getUnqual(FunctionContextTy));
- FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ FrameAddrFn = Intrinsic::getDeclaration(
+ &M, Intrinsic::frameaddress,
+ {Type::getInt8PtrTy(M.getContext(),
+ M.getDataLayout().getAllocaAddrSpace())});
StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
BuiltinSetupDispatchFn =
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 5c944fe3f6b3..0c1f1220c421 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "SplitKit.h"
-#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
@@ -22,6 +21,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -437,7 +437,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
assert(DefMI != nullptr);
LaneBitmask LM;
for (const MachineOperand &DefOp : DefMI->defs()) {
- unsigned R = DefOp.getReg();
+ Register R = DefOp.getReg();
if (R != LI.reg)
continue;
if (unsigned SR = DefOp.getSubReg())
@@ -1373,7 +1373,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
assert(LI.hasSubRanges());
LiveRangeCalc SubLRC;
- unsigned Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg();
+ Register Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg();
LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub)
: MRI.getMaxLaneMaskForVReg(Reg);
for (LiveInterval::SubRange &S : LI.subranges()) {
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 86ad3811e3ad..78f0bbd24db5 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -14,7 +14,6 @@
#ifndef LLVM_LIB_CODEGEN_SPLITKIT_H
#define LLVM_LIB_CODEGEN_SPLITKIT_H
-#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
@@ -25,6 +24,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SlotIndexes.h"
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index ae9401b89700..383c91259ffc 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -113,7 +113,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
unsigned Size = DL.getPointerSizeInBits();
assert((Size % 8) == 0 && "Need pointer size in bytes.");
Size /= 8;
- unsigned Reg = (++MOI)->getReg();
+ Register Reg = (++MOI)->getReg();
int64_t Imm = (++MOI)->getImm();
Locs.emplace_back(StackMaps::Location::Direct, Size,
getDwarfRegNum(Reg, TRI), Imm);
@@ -122,7 +122,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
case StackMaps::IndirectMemRefOp: {
int64_t Size = (++MOI)->getImm();
assert(Size > 0 && "Need a valid size for indirect memory locations.");
- unsigned Reg = (++MOI)->getReg();
+ Register Reg = (++MOI)->getReg();
int64_t Imm = (++MOI)->getImm();
Locs.emplace_back(StackMaps::Location::Indirect, Size,
getDwarfRegNum(Reg, TRI), Imm);
@@ -148,14 +148,14 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
if (MOI->isImplicit())
return ++MOI;
- assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) &&
+ assert(Register::isPhysicalRegister(MOI->getReg()) &&
"Virtreg operands should have been rewritten before now.");
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
assert(!MOI->getSubReg() && "Physical subreg still around.");
unsigned Offset = 0;
unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI);
- unsigned LLVMRegNum = TRI->getLLVMRegNum(DwarfRegNum, false);
+ unsigned LLVMRegNum = *TRI->getLLVMRegNum(DwarfRegNum, false);
unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg());
if (SubRegIdx)
Offset = TRI->getSubRegIdxOffset(SubRegIdx);
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 809960c7fdf9..5683d1db473c 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
@@ -157,6 +156,68 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return NeedsProtector;
}
+bool StackProtector::HasAddressTaken(const Instruction *AI) {
+ for (const User *U : AI->users()) {
+ const auto *I = cast<Instruction>(U);
+ switch (I->getOpcode()) {
+ case Instruction::Store:
+ if (AI == cast<StoreInst>(I)->getValueOperand())
+ return true;
+ break;
+ case Instruction::AtomicCmpXchg:
+ // cmpxchg conceptually includes both a load and store from the same
+ // location. So, like store, the value being stored is what matters.
+ if (AI == cast<AtomicCmpXchgInst>(I)->getNewValOperand())
+ return true;
+ break;
+ case Instruction::PtrToInt:
+ if (AI == cast<PtrToIntInst>(I)->getOperand(0))
+ return true;
+ break;
+ case Instruction::Call: {
+ // Ignore intrinsics that do not become real instructions.
+ // TODO: Narrow this to intrinsics that have store-like effects.
+ const auto *CI = cast<CallInst>(I);
+ if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
+ return true;
+ break;
+ }
+ case Instruction::Invoke:
+ return true;
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::Select:
+ case Instruction::AddrSpaceCast:
+ if (HasAddressTaken(I))
+ return true;
+ break;
+ case Instruction::PHI: {
+ // Keep track of what PHI nodes we have already visited to ensure
+ // they are only visited once.
+ const auto *PN = cast<PHINode>(I);
+ if (VisitedPHIs.insert(PN).second)
+ if (HasAddressTaken(PN))
+ return true;
+ break;
+ }
+ case Instruction::Load:
+ case Instruction::AtomicRMW:
+ case Instruction::Ret:
+ // These instructions take an address operand, but have load-like or
+ // other innocuous behavior that should not trigger a stack protector.
+ // atomicrmw conceptually has both load and store semantics, but the
+ // value being stored must be integer; so if a pointer is being stored,
+ // we'll catch it in the PtrToInt case above.
+ break;
+ default:
+ // Conservatively return true for any instruction that takes an address
+ // operand, but is not handled above.
+ return true;
+ }
+ }
+ return false;
+}
+
/// Search for the first call to the llvm.stackprotector intrinsic and return it
/// if present.
static const CallInst *findStackProtectorIntrinsic(Function &F) {
@@ -264,9 +325,7 @@ bool StackProtector::RequiresStackProtector() {
continue;
}
- if (Strong && PointerMayBeCaptured(AI,
- /* ReturnCaptures */ false,
- /* StoreCaptures */ true)) {
+ if (Strong && HasAddressTaken(AI)) {
++NumAddrTaken;
Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
ORE.emit([&]() {
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 99b533e10b87..9c8143c55dc2 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -221,7 +221,7 @@ void StackSlotColoring::InitializeSlots() {
for (auto *I : Intervals) {
LiveInterval &li = I->second;
LLVM_DEBUG(li.dump());
- int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
+ int FI = Register::stackSlot2Index(li.reg);
if (MFI->isDeadObjectIndex(FI))
continue;
@@ -268,7 +268,7 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
int StackSlotColoring::ColorSlot(LiveInterval *li) {
int Color = -1;
bool Share = false;
- int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+ int FI = Register::stackSlot2Index(li->reg);
uint8_t StackID = MFI->getStackID(FI);
if (!DisableSharing) {
@@ -330,7 +330,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
bool Changed = false;
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ int SS = Register::stackSlot2Index(li->reg);
int NewSS = ColorSlot(li);
assert(NewSS >= 0 && "Stack coloring failed?");
SlotMapping[SS] = NewSS;
@@ -343,7 +343,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ int SS = Register::stackSlot2Index(li->reg);
li->weight = SlotWeights[SS];
}
// Sort them by new weight.
diff --git a/lib/CodeGen/SwiftErrorValueTracking.cpp b/lib/CodeGen/SwiftErrorValueTracking.cpp
index 96821cadb1b6..c72a04276a4f 100644
--- a/lib/CodeGen/SwiftErrorValueTracking.cpp
+++ b/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -13,9 +13,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/Value.h"
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index a0590a8a6cc6..03c68a37e459 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -235,8 +235,8 @@ bool TailDuplicator::tailDuplicateAndUpdate(
MachineInstr *Copy = Copies[i];
if (!Copy->isCopy())
continue;
- unsigned Dst = Copy->getOperand(0).getReg();
- unsigned Src = Copy->getOperand(1).getReg();
+ Register Dst = Copy->getOperand(0).getReg();
+ Register Src = Copy->getOperand(1).getReg();
if (MRI->hasOneNonDBGUse(Src) &&
MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
// Copy is the only use. Do trivial copy propagation here.
@@ -312,7 +312,7 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
if (!MI.isPHI())
break;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
- unsigned SrcReg = MI.getOperand(i).getReg();
+ Register SrcReg = MI.getOperand(i).getReg();
UsedByPhi->insert(SrcReg);
}
}
@@ -340,17 +340,17 @@ void TailDuplicator::processPHI(
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
- unsigned DefReg = MI->getOperand(0).getReg();
+ Register DefReg = MI->getOperand(0).getReg();
unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
assert(SrcOpIdx && "Unable to find matching PHI source?");
- unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+ Register SrcReg = MI->getOperand(SrcOpIdx).getReg();
unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
// Insert a copy from source to the end of the block. The def register is the
// available value liveout of the block.
- unsigned NewDef = MRI->createVirtualRegister(RC);
+ Register NewDef = MRI->createVirtualRegister(RC);
Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
addSSAUpdateEntry(DefReg, NewDef, PredBB);
@@ -384,12 +384,12 @@ void TailDuplicator::duplicateInstruction(
MachineOperand &MO = NewMI.getOperand(i);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
if (MO.isDef()) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- unsigned NewReg = MRI->createVirtualRegister(RC);
+ Register NewReg = MRI->createVirtualRegister(RC);
MO.setReg(NewReg);
LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
@@ -433,7 +433,7 @@ void TailDuplicator::duplicateInstruction(
auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
if (NewRC == nullptr)
NewRC = OrigRC;
- unsigned NewReg = MRI->createVirtualRegister(NewRC);
+ Register NewReg = MRI->createVirtualRegister(NewRC);
BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(),
TII->get(TargetOpcode::COPY), NewReg)
.addReg(VI->second.Reg, 0, VI->second.SubReg);
@@ -477,7 +477,7 @@ void TailDuplicator::updateSuccessorsPHIs(
assert(Idx != 0);
MachineOperand &MO0 = MI.getOperand(Idx);
- unsigned Reg = MO0.getReg();
+ Register Reg = MO0.getReg();
if (isDead) {
// Folded into the previous BB.
// There could be duplicate phi source entries. FIXME: Should sdisel
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 9c4483cb240d..9eeacc2584cb 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -71,7 +72,9 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
// When interprocedural register allocation is enabled caller saved registers
// are preferred over callee saved registers.
- if (MF.getTarget().Options.EnableIPRA && isSafeForNoCSROpt(MF.getFunction()))
+ if (MF.getTarget().Options.EnableIPRA &&
+ isSafeForNoCSROpt(MF.getFunction()) &&
+ isProfitableForNoCSROpt(MF.getFunction()))
return;
// Get the callee saved register list...
@@ -118,6 +121,18 @@ unsigned TargetFrameLowering::getStackAlignmentSkew(
return 0;
}
+bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) {
+ if (!F.hasLocalLinkage() || F.hasAddressTaken() ||
+ !F.hasFnAttribute(Attribute::NoRecurse))
+ return false;
+ // Function should not be optimized as tail call.
+ for (const User *U : F.users())
+ if (auto CS = ImmutableCallSite(U))
+ if (CS.isTailCall())
+ return false;
+ return true;
+}
+
int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
llvm_unreachable("getInitialCFAOffset() not implemented!");
}
@@ -125,4 +140,4 @@ int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF)
const {
llvm_unreachable("getInitialCFARegister() not implemented!");
-} \ No newline at end of file
+}
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 868617ffe14d..6cae3b869501 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
@@ -142,7 +143,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
while (Tail != MBB->end()) {
auto MI = Tail++;
if (MI->isCall())
- MBB->getParent()->updateCallSiteInfo(&*MI);
+ MBB->getParent()->eraseCallSiteInfo(&*MI);
MBB->erase(MI);
}
@@ -183,10 +184,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead();
// Avoid calling isRenamable for virtual registers since we assert that
// renamable property is only queried/set for physical registers.
- bool Reg1IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg1)
+ bool Reg1IsRenamable = Register::isPhysicalRegister(Reg1)
? MI.getOperand(Idx1).isRenamable()
: false;
- bool Reg2IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg2)
+ bool Reg2IsRenamable = Register::isPhysicalRegister(Reg2)
? MI.getOperand(Idx2).isRenamable()
: false;
// If destination is tied to either of the commuted source register, then
@@ -228,9 +229,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal);
// Avoid calling setIsRenamable for virtual registers since we assert that
// renamable property is only queried/set for physical registers.
- if (TargetRegisterInfo::isPhysicalRegister(Reg1))
+ if (Register::isPhysicalRegister(Reg1))
CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable);
- if (TargetRegisterInfo::isPhysicalRegister(Reg2))
+ if (Register::isPhysicalRegister(Reg2))
CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable);
return CommutedMI;
}
@@ -281,7 +282,7 @@ bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1,
return true;
}
-bool TargetInstrInfo::findCommutedOpIndices(MachineInstr &MI,
+bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
assert(!MI.isBundle() &&
@@ -393,7 +394,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
if (BitOffset < 0 || BitOffset % 8)
return false;
- Size = BitSize /= 8;
+ Size = BitSize / 8;
Offset = (unsigned)BitOffset / 8;
assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range");
@@ -442,16 +443,15 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
if (FoldOp.getSubReg() || LiveOp.getSubReg())
return nullptr;
- unsigned FoldReg = FoldOp.getReg();
- unsigned LiveReg = LiveOp.getReg();
+ Register FoldReg = FoldOp.getReg();
+ Register LiveReg = LiveOp.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
- "Cannot fold physregs");
+ assert(Register::isVirtualRegister(FoldReg) && "Cannot fold physregs");
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
- if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+ if (Register::isPhysicalRegister(LiveOp.getReg()))
return RC->contains(LiveOp.getReg()) ? RC : nullptr;
if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
@@ -674,9 +674,9 @@ bool TargetInstrInfo::hasReassociableOperands(
// reassociate.
MachineInstr *MI1 = nullptr;
MachineInstr *MI2 = nullptr;
- if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+ if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg()))
MI1 = MRI.getUniqueVRegDef(Op1.getReg());
- if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+ if (Op2.isReg() && Register::isVirtualRegister(Op2.getReg()))
MI2 = MRI.getUniqueVRegDef(Op2.getReg());
// And they need to be in the trace (otherwise, they won't have a depth).
@@ -805,27 +805,27 @@ void TargetInstrInfo::reassociateOps(
MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]);
MachineOperand &OpC = Root.getOperand(0);
- unsigned RegA = OpA.getReg();
- unsigned RegB = OpB.getReg();
- unsigned RegX = OpX.getReg();
- unsigned RegY = OpY.getReg();
- unsigned RegC = OpC.getReg();
+ Register RegA = OpA.getReg();
+ Register RegB = OpB.getReg();
+ Register RegX = OpX.getReg();
+ Register RegY = OpY.getReg();
+ Register RegC = OpC.getReg();
- if (TargetRegisterInfo::isVirtualRegister(RegA))
+ if (Register::isVirtualRegister(RegA))
MRI.constrainRegClass(RegA, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegB))
+ if (Register::isVirtualRegister(RegB))
MRI.constrainRegClass(RegB, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegX))
+ if (Register::isVirtualRegister(RegX))
MRI.constrainRegClass(RegX, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegY))
+ if (Register::isVirtualRegister(RegY))
MRI.constrainRegClass(RegY, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegC))
+ if (Register::isVirtualRegister(RegC))
MRI.constrainRegClass(RegC, RC);
// Create a new virtual register for the result of (X op Y) instead of
// recycling RegB because the MachineCombiner's computation of the critical
// path requires a new register definition rather than an existing one.
- unsigned NewVR = MRI.createVirtualRegister(RC);
+ Register NewVR = MRI.createVirtualRegister(RC);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
unsigned Opcode = Root.getOpcode();
@@ -880,21 +880,21 @@ void TargetInstrInfo::genAlternativeCodeSequence(
}
bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
- const MachineInstr &MI, AliasAnalysis *AA) const {
+ const MachineInstr &MI, AAResults *AA) const {
const MachineFunction &MF = *MI.getMF();
const MachineRegisterInfo &MRI = MF.getRegInfo();
// Remat clients assume operand 0 is the defined register.
if (!MI.getNumOperands() || !MI.getOperand(0).isReg())
return false;
- unsigned DefReg = MI.getOperand(0).getReg();
+ Register DefReg = MI.getOperand(0).getReg();
// A sub-register definition can only be rematerialized if the instruction
// doesn't read the other parts of the register. Otherwise it is really a
// read-modify-write operation on the full virtual register which cannot be
// moved safely.
- if (TargetRegisterInfo::isVirtualRegister(DefReg) &&
- MI.getOperand(0).getSubReg() && MI.readsVirtualRegister(DefReg))
+ if (Register::isVirtualRegister(DefReg) && MI.getOperand(0).getSubReg() &&
+ MI.readsVirtualRegister(DefReg))
return false;
// A load from a fixed stack slot can be rematerialized. This may be
@@ -924,12 +924,12 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0)
continue;
// Check for a well-behaved physical register.
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
@@ -1120,6 +1120,24 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
return (DefCycle != -1 && DefCycle <= 1);
}
+Optional<ParamLoadedValue>
+TargetInstrInfo::describeLoadedValue(const MachineInstr &MI) const {
+ const MachineFunction *MF = MI.getMF();
+ const MachineOperand *Op = nullptr;
+ DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});;
+ const MachineOperand *SrcRegOp, *DestRegOp;
+
+ if (isCopyInstr(MI, SrcRegOp, DestRegOp)) {
+ Op = SrcRegOp;
+ return ParamLoadedValue(*Op, Expr);
+ } else if (MI.isMoveImmediate()) {
+ Op = &MI.getOperand(1);
+ return ParamLoadedValue(*Op, Expr);
+ }
+
+ return None;
+}
+
/// Both DefMI and UseMI must be valid. By default, call directly to the
/// itinerary. This may be overriden by the target.
int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
@@ -1227,3 +1245,5 @@ bool TargetInstrInfo::getInsertSubregInputs(
InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm();
return true;
}
+
+TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 9b28c1a6c450..9b23012f47e3 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -167,6 +167,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::BZERO, "__bzero");
break;
case Triple::aarch64:
+ case Triple::aarch64_32:
setLibcallName(RTLIB::BZERO, "bzero");
break;
default:
@@ -197,6 +198,11 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl");
}
+ if (TT.isPS4CPU()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ }
+
if (TT.isOSOpenBSD()) {
setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
}
@@ -578,13 +584,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
BooleanFloatContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
SchedPreferenceInfo = Sched::ILP;
- JumpBufSize = 0;
- JumpBufAlignment = 0;
- MinFunctionAlignment = 0;
- PrefFunctionAlignment = 0;
- PrefLoopAlignment = 0;
GatherAllAliasesMaxDepth = 18;
- MinStackArgumentAlignment = 1;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
@@ -653,6 +653,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMULFIX, VT, Expand);
setOperationAction(ISD::SMULFIXSAT, VT, Expand);
setOperationAction(ISD::UMULFIX, VT, Expand);
+ setOperationAction(ISD::UMULFIXSAT, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -689,6 +690,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Expand);
}
// Constrained floating-point operations default to expand.
@@ -708,16 +710,22 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::STRICT_FLOG, VT, Expand);
setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
+ setOperationAction(ISD::STRICT_LRINT, VT, Expand);
+ setOperationAction(ISD::STRICT_LLRINT, VT, Expand);
setOperationAction(ISD::STRICT_FRINT, VT, Expand);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
+ setOperationAction(ISD::STRICT_LROUND, VT, Expand);
+ setOperationAction(ISD::STRICT_LLROUND, VT, Expand);
setOperationAction(ISD::STRICT_FROUND, VT, Expand);
setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand);
// For most targets @llvm.get.dynamic.area.offset just returns 0.
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
@@ -824,7 +832,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
assert((LA == TypeLegal || LA == TypeSoftenFloat ||
- ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) &&
+ (NVT.isVector() ||
+ ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) &&
"Promote may not follow Expand or Promote");
if (LA == TypeSplitVector)
@@ -1257,17 +1266,23 @@ void TargetLoweringBase::computeRegisterProperties(
MVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
bool IsLegalWiderType = false;
+ bool IsScalable = VT.isScalableVector();
LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
switch (PreferredAction) {
- case TypePromoteInteger:
+ case TypePromoteInteger: {
+ MVT::SimpleValueType EndVT = IsScalable ?
+ MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE :
+ MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE;
// Try to promote the elements of integer vectors. If no legal
// promotion was found, fall through to the widen-vector method.
- for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) {
+ for (unsigned nVT = i + 1;
+ (MVT::SimpleValueType)nVT <= EndVT; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
- SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) {
+ SVT.getVectorNumElements() == NElts &&
+ SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1279,23 +1294,37 @@ void TargetLoweringBase::computeRegisterProperties(
if (IsLegalWiderType)
break;
LLVM_FALLTHROUGH;
+ }
case TypeWidenVector:
- // Try to widen the vector.
- for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- MVT SVT = (MVT::SimpleValueType) nVT;
- if (SVT.getVectorElementType() == EltVT
- && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
- TransformToType[i] = SVT;
- RegisterTypeForVT[i] = SVT;
- NumRegistersForVT[i] = 1;
+ if (isPowerOf2_32(NElts)) {
+ // Try to widen the vector.
+ for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType) nVT;
+ if (SVT.getVectorElementType() == EltVT
+ && SVT.getVectorNumElements() > NElts
+ && SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType)
+ break;
+ } else {
+ // Only widen to the next power of 2 to keep consistency with EVT.
+ MVT NVT = VT.getPow2VectorType();
+ if (isTypeLegal(NVT)) {
+ TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
- IsLegalWiderType = true;
+ RegisterTypeForVT[i] = NVT;
+ NumRegistersForVT[i] = 1;
break;
}
}
- if (IsLegalWiderType)
- break;
LLVM_FALLTHROUGH;
case TypeSplitVector:
@@ -1488,12 +1517,9 @@ unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
return DL.getABITypeAlignment(Ty);
}
-bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
- const DataLayout &DL, EVT VT,
- unsigned AddrSpace,
- unsigned Alignment,
- MachineMemOperand::Flags Flags,
- bool *Fast) const {
+bool TargetLoweringBase::allowsMemoryAccessForAlignment(
+ LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
+ unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
// Check if the specified alignment is sufficient based on the data layout.
// TODO: While using the data layout works in practice, a better solution
// would be to implement this check directly (make this a virtual function).
@@ -1511,6 +1537,21 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
}
+bool TargetLoweringBase::allowsMemoryAccessForAlignment(
+ LLVMContext &Context, const DataLayout &DL, EVT VT,
+ const MachineMemOperand &MMO, bool *Fast) const {
+ return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(),
+ MMO.getAlignment(), MMO.getFlags(),
+ Fast);
+}
+
+bool TargetLoweringBase::allowsMemoryAccess(
+ LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
+ unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
+ return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment,
+ Flags, Fast);
+}
+
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 4c8f75b237aa..4978f4b9500b 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -43,6 +43,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
@@ -154,6 +155,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
break;
case Triple::aarch64:
case Triple::aarch64_be:
+ case Triple::aarch64_32:
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
@@ -375,7 +377,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
ELF::SHT_PROGBITS, Flags, 0);
unsigned Size = DL.getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0));
+ Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0).value());
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::create(Size, getContext());
Streamer.emitELFSize(Label, E);
@@ -524,8 +526,8 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO,
if (!VM)
report_fatal_error("MD_associated operand is not ValueAsMetadata");
- GlobalObject *OtherGO = dyn_cast<GlobalObject>(VM->getValue());
- return OtherGO ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGO)) : nullptr;
+ auto *OtherGV = dyn_cast<GlobalValue>(VM->getValue());
+ return OtherGV ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGV)) : nullptr;
}
static unsigned getEntrySizeForKind(SectionKind Kind) {
@@ -566,6 +568,8 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
SectionName = Attrs.getAttribute("bss-section").getValueAsString();
} else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) {
SectionName = Attrs.getAttribute("rodata-section").getValueAsString();
+ } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) {
+ SectionName = Attrs.getAttribute("relro-section").getValueAsString();
} else if (Attrs.hasAttribute("data-section") && Kind.isData()) {
SectionName = Attrs.getAttribute("data-section").getValueAsString();
}
@@ -1107,8 +1111,8 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
}
const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
- const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
- MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV,
+ int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const {
// Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation
// as 64-bit do, we replace the GOT equivalent by accessing the final symbol
// through a non_lazy_ptr stub instead. One advantage is that it allows the
@@ -1165,12 +1169,10 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
- if (!StubSym.getPointer()) {
- bool IsIndirectLocal = Sym->isDefined() && !Sym->isExternal();
- // With the assumption that IsIndirectLocal == GV->hasLocalLinkage().
+
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::StubValueTy(const_cast<MCSymbol *>(Sym),
- !IsIndirectLocal);
- }
+ !GV->hasLocalLinkage());
const MCExpr *BSymExpr =
MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
@@ -1519,7 +1521,8 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
// internally, so we use ".CRT$XCA00001" for them.
SmallString<24> Name;
raw_svector_ostream OS(Name);
- OS << ".CRT$XC" << (Priority < 200 ? 'A' : 'T') << format("%05u", Priority);
+ OS << ".CRT$X" << (IsCtor ? "C" : "T") <<
+ (Priority < 200 ? 'A' : 'T') << format("%05u", Priority);
MCSectionCOFF *Sec = Ctx.getCOFFSection(
Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
@@ -1595,7 +1598,8 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
static std::string APIntToHexString(const APInt &AI) {
unsigned Width = (AI.getBitWidth() / 8) * 2;
- std::string HexString = utohexstr(AI.getLimitedValue(), /*LowerCase=*/true);
+ std::string HexString = AI.toString(16, /*Signed=*/false);
+ transform(HexString.begin(), HexString.end(), HexString.begin(), tolower);
unsigned Size = HexString.size();
assert(Width >= Size && "hex string is too large!");
HexString.insert(HexString.begin(), Width - Size, '0');
@@ -1819,3 +1823,82 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
llvm_unreachable("@llvm.global_dtors should have been lowered already");
return nullptr;
}
+
+//===----------------------------------------------------------------------===//
+// XCOFF
+//===----------------------------------------------------------------------===//
+MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ report_fatal_error("XCOFF explicit sections not yet implemented.");
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ assert(!TM.getFunctionSections() && !TM.getDataSections() &&
+ "XCOFF unique sections not yet implemented.");
+
+ // Common symbols go into a csect with matching name which will get mapped
+ // into the .bss section.
+ if (Kind.isBSSLocal() || Kind.isCommon()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
+ return getContext().getXCOFFSection(
+ Name, Kind.isBSSLocal() ? XCOFF::XMC_BS : XCOFF::XMC_RW, XCOFF::XTY_CM,
+ SC, Kind, /* BeginSymbolName */ nullptr);
+ }
+
+ if (Kind.isText())
+ return TextSection;
+
+ if (Kind.isData())
+ return DataSection;
+
+ report_fatal_error("XCOFF other section types not yet implemented.");
+}
+
+bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ report_fatal_error("TLOF XCOFF not yet implemented.");
+}
+
+void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx,
+ const TargetMachine &TgtM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TgtM);
+ TTypeEncoding = 0;
+ PersonalityEncoding = 0;
+ LSDAEncoding = 0;
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("XCOFF ctor section not yet implemented.");
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("XCOFF dtor section not yet implemented.");
+}
+
+const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS,
+ const TargetMachine &TM) const {
+ report_fatal_error("XCOFF not yet implemented.");
+}
+
+XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(
+ const GlobalObject *GO) {
+ switch (GO->getLinkage()) {
+ case GlobalValue::InternalLinkage:
+ return XCOFF::C_HIDEXT;
+ case GlobalValue::ExternalLinkage:
+ case GlobalValue::CommonLinkage:
+ return XCOFF::C_EXT;
+ case GlobalValue::ExternalWeakLinkage:
+ return XCOFF::C_WEAKEXT;
+ default:
+ report_fatal_error(
+ "Unhandled linkage when mapping linkage to StorageClass.");
+ }
+}
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 36df02692f86..f1f4f65adf7c 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -49,9 +49,10 @@
using namespace llvm;
-cl::opt<bool> EnableIPRA("enable-ipra", cl::init(false), cl::Hidden,
- cl::desc("Enable interprocedural register allocation "
- "to reduce load/store at procedure calls."));
+static cl::opt<bool>
+ EnableIPRA("enable-ipra", cl::init(false), cl::Hidden,
+ cl::desc("Enable interprocedural register allocation "
+ "to reduce load/store at procedure calls."));
static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden,
cl::desc("Disable Post Regalloc Scheduler"));
static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -152,8 +153,10 @@ static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
// Targets can return true in targetSchedulesPostRAScheduling() and
// insert a PostRA scheduling pass wherever it wants.
-cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
- cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)"));
+static cl::opt<bool> MISchedPostRA(
+ "misched-postra", cl::Hidden,
+ cl::desc(
+ "Run MachineScheduler post regalloc (independent of preRA sched)"));
// Experimental option to run live interval analysis early.
static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
@@ -175,10 +178,10 @@ static cl::opt<CFLAAType> UseCFLAA(
/// Option names for limiting the codegen pipeline.
/// Those are used in error reporting and we didn't want
/// to duplicate their names all over the place.
-const char *StartAfterOptName = "start-after";
-const char *StartBeforeOptName = "start-before";
-const char *StopAfterOptName = "stop-after";
-const char *StopBeforeOptName = "stop-before";
+static const char *StartAfterOptName = "start-after";
+static const char *StartBeforeOptName = "start-before";
+static const char *StopAfterOptName = "stop-after";
+static const char *StopBeforeOptName = "stop-before";
static cl::opt<std::string>
StartAfterOpt(StringRef(StartAfterOptName),
@@ -654,6 +657,7 @@ void TargetPassConfig::addIRPasses() {
// TODO: add a pass insertion point here
addPass(createGCLoweringPass());
addPass(createShadowStackGCLoweringPass());
+ addPass(createLowerConstantIntrinsicsPass());
// Make sure that no unreachable blocks are instruction selected.
addPass(createUnreachableBlockEliminationPass());
@@ -1231,5 +1235,5 @@ bool TargetPassConfig::isGISelCSEEnabled() const {
}
std::unique_ptr<CSEConfigBase> TargetPassConfig::getCSEConfig() const {
- return make_unique<CSEConfigBase>();
+ return std::make_unique<CSEConfigBase>();
}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index f1b2ecf3243b..e5592c31098a 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -86,22 +86,21 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
namespace llvm {
-Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI,
+Printable printReg(Register Reg, const TargetRegisterInfo *TRI,
unsigned SubIdx, const MachineRegisterInfo *MRI) {
return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) {
if (!Reg)
OS << "$noreg";
- else if (TargetRegisterInfo::isStackSlot(Reg))
- OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
- else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ else if (Register::isStackSlot(Reg))
+ OS << "SS#" << Register::stackSlot2Index(Reg);
+ else if (Register::isVirtualRegister(Reg)) {
StringRef Name = MRI ? MRI->getVRegName(Reg) : "";
if (Name != "") {
OS << '%' << Name;
} else {
- OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
+ OS << '%' << Register::virtReg2Index(Reg);
}
- }
- else if (!TRI)
+ } else if (!TRI)
OS << '$' << "physreg" << Reg;
else if (Reg < TRI->getNumRegs()) {
OS << '$';
@@ -143,8 +142,8 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
return Printable([Unit, TRI](raw_ostream &OS) {
- if (TRI && TRI->isVirtualRegister(Unit)) {
- OS << '%' << TargetRegisterInfo::virtReg2Index(Unit);
+ if (Register::isVirtualRegister(Unit)) {
+ OS << '%' << Register::virtReg2Index(Unit);
} else {
OS << printRegUnit(Unit, TRI);
}
@@ -189,7 +188,8 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
/// the right type that contains this physreg.
const TargetRegisterClass *
TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const {
- assert(isPhysicalRegister(reg) && "reg must be a physical register");
+ assert(Register::isPhysicalRegister(reg) &&
+ "reg must be a physical register");
// Pick the most sub register class of the right type that contains
// this physreg.
@@ -238,24 +238,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
static inline
const TargetRegisterClass *firstCommonClass(const uint32_t *A,
const uint32_t *B,
- const TargetRegisterInfo *TRI,
- const MVT::SimpleValueType SVT =
- MVT::SimpleValueType::Any) {
- const MVT VT(SVT);
+ const TargetRegisterInfo *TRI) {
for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
- if (unsigned Common = *A++ & *B++) {
- const TargetRegisterClass *RC =
- TRI->getRegClass(I + countTrailingZeros(Common));
- if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT))
- return RC;
- }
+ if (unsigned Common = *A++ & *B++)
+ return TRI->getRegClass(I + countTrailingZeros(Common));
return nullptr;
}
const TargetRegisterClass *
TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B,
- const MVT::SimpleValueType SVT) const {
+ const TargetRegisterClass *B) const {
// First take care of the trivial cases.
if (A == B)
return A;
@@ -264,7 +256,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
// Register classes are ordered topologically, so the largest common
// sub-class it the common sub-class with the smallest ID.
- return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT);
+ return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
}
const TargetRegisterClass *
@@ -409,7 +401,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
// Target-independent hints are either a physical or a virtual register.
unsigned Phys = Reg;
- if (VRM && isVirtualRegister(Phys))
+ if (VRM && Register::isVirtualRegister(Phys))
Phys = VRM->getPhys(Phys);
// Don't add the same reg twice (Hints_MRI may contain multiple virtual
@@ -417,7 +409,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
if (!HintedRegs.insert(Phys).second)
continue;
// Check that Phys is a valid hint in VirtReg's register class.
- if (!isPhysicalRegister(Phys))
+ if (!Register::isPhysicalRegister(Phys))
continue;
if (MRI.isReserved(Phys))
continue;
@@ -433,6 +425,20 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
return false;
}
+bool TargetRegisterInfo::isCalleeSavedPhysReg(
+ unsigned PhysReg, const MachineFunction &MF) const {
+ if (PhysReg == 0)
+ return false;
+ const uint32_t *callerPreservedRegs =
+ getCallPreservedMask(MF, MF.getFunction().getCallingConv());
+ if (callerPreservedRegs) {
+ assert(Register::isPhysicalRegister(PhysReg) &&
+ "Expected physical register");
+ return (callerPreservedRegs[PhysReg / 32] >> PhysReg % 32) & 1;
+ }
+ return false;
+}
+
bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return !MF.getFunction().hasFnAttribute("no-realign-stack");
}
@@ -466,7 +472,7 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg,
const MachineRegisterInfo &MRI) const {
const TargetRegisterClass *RC{};
- if (isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
// The size is not directly available for physical registers.
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
@@ -501,7 +507,7 @@ TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg,
CopySrcReg = MI->getOperand(2).getReg();
}
- if (!isVirtualRegister(CopySrcReg))
+ if (!Register::isVirtualRegister(CopySrcReg))
return CopySrcReg;
SrcReg = CopySrcReg;
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 195279719ad4..ce59452fd1b8 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -300,7 +300,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
// TODO: The following hack exists because predication passes do not
// correctly append imp-use operands, and readsReg() strangely returns false
// for predicated defs.
- unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
+ Register Reg = DefMI->getOperand(DefOperIdx).getReg();
const MachineFunction &MF = *DefMI->getMF();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 43d876646967..ea971809d4e4 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -230,7 +230,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (MO.isUse() && MOReg != SavedReg)
@@ -299,7 +299,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
MachineOperand &MO = OtherMI.getOperand(i);
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (DefReg == MOReg)
@@ -418,8 +418,8 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
} else
return false;
- IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
- IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ IsSrcPhys = Register::isPhysicalRegister(SrcReg);
+ IsDstPhys = Register::isPhysicalRegister(DstReg);
return true;
}
@@ -427,8 +427,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
/// given instruction, is killed by the given instruction.
static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
LiveIntervals *LIS) {
- if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
- !LIS->isNotInMIMap(*MI)) {
+ if (LIS && Register::isVirtualRegister(Reg) && !LIS->isNotInMIMap(*MI)) {
// FIXME: Sometimes tryInstructionTransform() will add instructions and
// test whether they can be folded before keeping them. In this case it
// sets a kill before recursively calling tryInstructionTransform() again.
@@ -475,12 +474,12 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
MachineInstr *DefMI = &MI;
while (true) {
// All uses of physical registers are likely to be kills.
- if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ if (Register::isPhysicalRegister(Reg) &&
(allowFalsePositives || MRI->hasOneUse(Reg)))
return true;
if (!isPlainlyKilled(DefMI, Reg, LIS))
return false;
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return true;
MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
// If there are multiple defs, we can't do a simple analysis, so just
@@ -536,7 +535,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
}
IsDstPhys = false;
if (isTwoAddrUse(UseMI, Reg, DstReg)) {
- IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ IsDstPhys = Register::isPhysicalRegister(DstReg);
return &UseMI;
}
return nullptr;
@@ -546,13 +545,13 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
/// to.
static unsigned
getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
- while (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ while (Register::isVirtualRegister(Reg)) {
DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
if (SI == RegMap.end())
return 0;
Reg = SI->second;
}
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return Reg;
return 0;
}
@@ -683,7 +682,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
unsigned RegBIdx,
unsigned RegCIdx,
unsigned Dist) {
- unsigned RegC = MI->getOperand(RegCIdx).getReg();
+ Register RegC = MI->getOperand(RegCIdx).getReg();
LLVM_DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx);
@@ -700,7 +699,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
// Update source register map.
unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
if (FromRegC) {
- unsigned RegA = MI->getOperand(DstIdx).getReg();
+ Register RegA = MI->getOperand(DstIdx).getReg();
SrcRegMap[RegA] = FromRegC;
}
@@ -911,7 +910,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (MO.isDef())
@@ -955,7 +954,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
for (const MachineOperand &MO : OtherMI.operands()) {
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (MO.isDef()) {
@@ -1093,7 +1092,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
for (const MachineOperand &MO : KillMI->operands()) {
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (MO.isUse()) {
if (!MOReg)
continue;
@@ -1105,7 +1104,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
Uses.insert(MOReg);
if (isKill && MOReg != Reg)
Kills.insert(MOReg);
- } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ } else if (Register::isPhysicalRegister(MOReg)) {
Defs.insert(MOReg);
if (!MO.isDead())
LiveDefs.insert(MOReg);
@@ -1130,7 +1129,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
for (const MachineOperand &MO : OtherMI.operands()) {
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (MO.isUse()) {
@@ -1154,8 +1153,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
unsigned MOReg = OtherDefs[i];
if (Uses.count(MOReg))
return false;
- if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- LiveDefs.count(MOReg))
+ if (Register::isPhysicalRegister(MOReg) && LiveDefs.count(MOReg))
return false;
// Physical register def is seen.
Defs.erase(MOReg);
@@ -1208,8 +1206,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
return false;
bool MadeChange = false;
- unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
- unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
+ Register DstOpReg = MI->getOperand(DstOpIdx).getReg();
+ Register BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
unsigned OpsNum = MI->getDesc().getNumOperands();
unsigned OtherOpIdx = MI->getDesc().getNumDefs();
for (; OtherOpIdx < OpsNum; OtherOpIdx++) {
@@ -1221,7 +1219,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
!TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx))
continue;
- unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg();
+ Register OtherOpReg = MI->getOperand(OtherOpIdx).getReg();
bool AggressiveCommute = false;
// If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp
@@ -1276,14 +1274,14 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
return false;
MachineInstr &MI = *mi;
- unsigned regA = MI.getOperand(DstIdx).getReg();
- unsigned regB = MI.getOperand(SrcIdx).getReg();
+ Register regA = MI.getOperand(DstIdx).getReg();
+ Register regB = MI.getOperand(SrcIdx).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ assert(Register::isVirtualRegister(regB) &&
"cannot make instruction into two-address form");
bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
- if (TargetRegisterInfo::isVirtualRegister(regA))
+ if (Register::isVirtualRegister(regA))
scanUses(regA);
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
@@ -1363,7 +1361,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
const TargetRegisterClass *RC =
TRI->getAllocatableClass(
TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
- unsigned Reg = MRI->createVirtualRegister(RC);
+ Register Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
if (!TII->unfoldMemoryOperand(*MF, MI, Reg,
/*UnfoldLoad=*/true,
@@ -1399,8 +1397,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (LV) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
if (MO.isUse()) {
if (MO.isKill()) {
if (NewMIs[0]->killsRegister(MO.getReg()))
@@ -1474,8 +1471,8 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
AnyOps = true;
MachineOperand &SrcMO = MI->getOperand(SrcIdx);
MachineOperand &DstMO = MI->getOperand(DstIdx);
- unsigned SrcReg = SrcMO.getReg();
- unsigned DstReg = DstMO.getReg();
+ Register SrcReg = SrcMO.getReg();
+ Register DstReg = DstMO.getReg();
// Tied constraint already satisfied?
if (SrcReg == DstReg)
continue;
@@ -1485,7 +1482,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
// Deal with undef uses immediately - simply rewrite the src operand.
if (SrcMO.isUndef() && !DstMO.getSubReg()) {
// Constrain the DstReg register class if required.
- if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (Register::isVirtualRegister(DstReg))
if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
TRI, *MF))
MRI->constrainRegClass(DstReg, RC);
@@ -1522,7 +1519,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
unsigned DstIdx = TiedPairs[tpi].second;
const MachineOperand &DstMO = MI->getOperand(DstIdx);
- unsigned RegA = DstMO.getReg();
+ Register RegA = DstMO.getReg();
// Grab RegB from the instruction because it may have changed if the
// instruction was commuted.
@@ -1538,7 +1535,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
LastCopiedReg = RegA;
- assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ assert(Register::isVirtualRegister(RegB) &&
"cannot make instruction into two-address form");
#ifndef NDEBUG
@@ -1559,14 +1556,13 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
MIB.addReg(RegB, 0, SubRegB);
const TargetRegisterClass *RC = MRI->getRegClass(RegB);
if (SubRegB) {
- if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+ if (Register::isVirtualRegister(RegA)) {
assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA),
SubRegB) &&
"tied subregister must be a truncation");
// The superreg class will not be used to constrain the subreg class.
RC = nullptr;
- }
- else {
+ } else {
assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB))
&& "tied subregister must be a truncation");
}
@@ -1581,7 +1577,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (LIS) {
LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot();
- if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+ if (Register::isVirtualRegister(RegA)) {
LiveInterval &LI = LIS->getInterval(RegA);
VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
SlotIndex endIdx =
@@ -1601,8 +1597,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
// Make sure regA is a legal regclass for the SrcIdx operand.
- if (TargetRegisterInfo::isVirtualRegister(RegA) &&
- TargetRegisterInfo::isVirtualRegister(RegB))
+ if (Register::isVirtualRegister(RegA) && Register::isVirtualRegister(RegB))
MRI->constrainRegClass(RegA, RC);
MO.setReg(RegA);
// The getMatchingSuper asserts guarantee that the register class projected
@@ -1744,8 +1739,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
if (TiedPairs.size() == 1) {
unsigned SrcIdx = TiedPairs[0].first;
unsigned DstIdx = TiedPairs[0].second;
- unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
- unsigned DstReg = mi->getOperand(DstIdx).getReg();
+ Register SrcReg = mi->getOperand(SrcIdx).getReg();
+ Register DstReg = mi->getOperand(DstIdx).getReg();
if (SrcReg != DstReg &&
tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
// The tied operands have been eliminated or shifted further down
@@ -1803,9 +1798,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
void TwoAddressInstructionPass::
eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
- unsigned DstReg = MI.getOperand(0).getReg();
- if (MI.getOperand(0).getSubReg() ||
- TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ Register DstReg = MI.getOperand(0).getReg();
+ if (MI.getOperand(0).getSubReg() || Register::isPhysicalRegister(DstReg) ||
!(MI.getNumOperands() & 1)) {
LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
llvm_unreachable(nullptr);
@@ -1821,7 +1815,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
bool DefEmitted = false;
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
MachineOperand &UseMO = MI.getOperand(i);
- unsigned SrcReg = UseMO.getReg();
+ Register SrcReg = UseMO.getReg();
unsigned SubIdx = MI.getOperand(i+1).getImm();
// Nothing needs to be inserted for undef operands.
if (UseMO.isUndef())
@@ -1855,7 +1849,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
DefEmitted = true;
// Update LiveVariables' kill info.
- if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ if (LV && isKill && !Register::isPhysicalRegister(SrcReg))
LV->replaceKillInstruction(SrcReg, MI, *CopyMI);
LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI);
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 177bab32bccc..3289eff71336 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -103,7 +103,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
df_iterator_default_set<MachineBasicBlock*> Reachable;
bool ModifiedPHI = false;
- MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
@@ -146,8 +147,14 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
}
// Actually remove the blocks now.
- for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ // Remove any call site information for calls in the block.
+ for (auto &I : DeadBlocks[i]->instrs())
+ if (I.isCall(MachineInstr::IgnoreBundle))
+ DeadBlocks[i]->getParent()->eraseCallSiteInfo(&I);
+
DeadBlocks[i]->eraseFromParent();
+ }
// Cleanup PHI nodes.
for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
@@ -167,8 +174,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
if (phi->getNumOperands() == 3) {
const MachineOperand &Input = phi->getOperand(1);
const MachineOperand &Output = phi->getOperand(0);
- unsigned InputReg = Input.getReg();
- unsigned OutputReg = Output.getReg();
+ Register InputReg = Input.getReg();
+ Register OutputReg = Output.getReg();
assert(Output.getSubReg() == 0 && "Cannot have output subregister");
ModifiedPHI = true;
diff --git a/lib/CodeGen/ValueTypes.cpp b/lib/CodeGen/ValueTypes.cpp
index a911cdcbec9d..73b862d51c0f 100644
--- a/lib/CodeGen/ValueTypes.cpp
+++ b/lib/CodeGen/ValueTypes.cpp
@@ -115,8 +115,8 @@ std::string EVT::getEVTString() const {
switch (V.SimpleTy) {
default:
if (isVector())
- return "v" + utostr(getVectorNumElements()) +
- getVectorElementType().getEVTString();
+ return (isScalableVector() ? "nxv" : "v") + utostr(getVectorNumElements())
+ + getVectorElementType().getEVTString();
if (isInteger())
return "i" + utostr(getSizeInBits());
llvm_unreachable("Invalid EVT!");
@@ -144,6 +144,7 @@ std::string EVT::getEVTString() const {
case MVT::v32i1: return "v32i1";
case MVT::v64i1: return "v64i1";
case MVT::v128i1: return "v128i1";
+ case MVT::v256i1: return "v256i1";
case MVT::v512i1: return "v512i1";
case MVT::v1024i1: return "v1024i1";
case MVT::v1i8: return "v1i8";
@@ -157,6 +158,7 @@ std::string EVT::getEVTString() const {
case MVT::v256i8: return "v256i8";
case MVT::v1i16: return "v1i16";
case MVT::v2i16: return "v2i16";
+ case MVT::v3i16: return "v3i16";
case MVT::v4i16: return "v4i16";
case MVT::v8i16: return "v8i16";
case MVT::v16i16: return "v16i16";
@@ -187,8 +189,11 @@ std::string EVT::getEVTString() const {
case MVT::v1f32: return "v1f32";
case MVT::v2f32: return "v2f32";
case MVT::v2f16: return "v2f16";
+ case MVT::v3f16: return "v3f16";
case MVT::v4f16: return "v4f16";
case MVT::v8f16: return "v8f16";
+ case MVT::v16f16: return "v16f16";
+ case MVT::v32f16: return "v32f16";
case MVT::v3f32: return "v3f32";
case MVT::v4f32: return "v4f32";
case MVT::v5f32: return "v5f32";
@@ -205,6 +210,48 @@ std::string EVT::getEVTString() const {
case MVT::v2f64: return "v2f64";
case MVT::v4f64: return "v4f64";
case MVT::v8f64: return "v8f64";
+ case MVT::nxv1i1: return "nxv1i1";
+ case MVT::nxv2i1: return "nxv2i1";
+ case MVT::nxv4i1: return "nxv4i1";
+ case MVT::nxv8i1: return "nxv8i1";
+ case MVT::nxv16i1: return "nxv16i1";
+ case MVT::nxv32i1: return "nxv32i1";
+ case MVT::nxv1i8: return "nxv1i8";
+ case MVT::nxv2i8: return "nxv2i8";
+ case MVT::nxv4i8: return "nxv4i8";
+ case MVT::nxv8i8: return "nxv8i8";
+ case MVT::nxv16i8: return "nxv16i8";
+ case MVT::nxv32i8: return "nxv32i8";
+ case MVT::nxv1i16: return "nxv1i16";
+ case MVT::nxv2i16: return "nxv2i16";
+ case MVT::nxv4i16: return "nxv4i16";
+ case MVT::nxv8i16: return "nxv8i16";
+ case MVT::nxv16i16:return "nxv16i16";
+ case MVT::nxv32i16:return "nxv32i16";
+ case MVT::nxv1i32: return "nxv1i32";
+ case MVT::nxv2i32: return "nxv2i32";
+ case MVT::nxv4i32: return "nxv4i32";
+ case MVT::nxv8i32: return "nxv8i32";
+ case MVT::nxv16i32:return "nxv16i32";
+ case MVT::nxv32i32:return "nxv32i32";
+ case MVT::nxv1i64: return "nxv1i64";
+ case MVT::nxv2i64: return "nxv2i64";
+ case MVT::nxv4i64: return "nxv4i64";
+ case MVT::nxv8i64: return "nxv8i64";
+ case MVT::nxv16i64:return "nxv16i64";
+ case MVT::nxv32i64:return "nxv32i64";
+ case MVT::nxv2f16: return "nxv2f16";
+ case MVT::nxv4f16: return "nxv4f16";
+ case MVT::nxv8f16: return "nxv8f16";
+ case MVT::nxv1f32: return "nxv1f32";
+ case MVT::nxv2f32: return "nxv2f32";
+ case MVT::nxv4f32: return "nxv4f32";
+ case MVT::nxv8f32: return "nxv8f32";
+ case MVT::nxv16f32:return "nxv16f32";
+ case MVT::nxv1f64: return "nxv1f64";
+ case MVT::nxv2f64: return "nxv2f64";
+ case MVT::nxv4f64: return "nxv4f64";
+ case MVT::nxv8f64: return "nxv8f64";
case MVT::Metadata:return "Metadata";
case MVT::Untyped: return "Untyped";
case MVT::exnref : return "exnref";
@@ -241,6 +288,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32);
case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64);
case MVT::v128i1: return VectorType::get(Type::getInt1Ty(Context), 128);
+ case MVT::v256i1: return VectorType::get(Type::getInt1Ty(Context), 256);
case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512);
case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024);
case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1);
@@ -254,6 +302,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256);
case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1);
case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2);
+ case MVT::v3i16: return VectorType::get(Type::getInt16Ty(Context), 3);
case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4);
case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8);
case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16);
@@ -282,8 +331,11 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32);
case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1);
case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
+ case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3);
case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4);
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
+ case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16);
+ case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32);
case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3);
@@ -302,8 +354,92 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8);
+ case MVT::nxv1i1:
+ return VectorType::get(Type::getInt1Ty(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2i1:
+ return VectorType::get(Type::getInt1Ty(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4i1:
+ return VectorType::get(Type::getInt1Ty(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8i1:
+ return VectorType::get(Type::getInt1Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16i1:
+ return VectorType::get(Type::getInt1Ty(Context), 16, /*Scalable=*/ true);
+ case MVT::nxv32i1:
+ return VectorType::get(Type::getInt1Ty(Context), 32, /*Scalable=*/ true);
+ case MVT::nxv1i8:
+ return VectorType::get(Type::getInt8Ty(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2i8:
+ return VectorType::get(Type::getInt8Ty(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4i8:
+ return VectorType::get(Type::getInt8Ty(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8i8:
+ return VectorType::get(Type::getInt8Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16i8:
+ return VectorType::get(Type::getInt8Ty(Context), 16, /*Scalable=*/ true);
+ case MVT::nxv32i8:
+ return VectorType::get(Type::getInt8Ty(Context), 32, /*Scalable=*/ true);
+ case MVT::nxv1i16:
+ return VectorType::get(Type::getInt16Ty(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2i16:
+ return VectorType::get(Type::getInt16Ty(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4i16:
+ return VectorType::get(Type::getInt16Ty(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8i16:
+ return VectorType::get(Type::getInt16Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16i16:
+ return VectorType::get(Type::getInt16Ty(Context), 16, /*Scalable=*/ true);
+ case MVT::nxv32i16:
+ return VectorType::get(Type::getInt16Ty(Context), 32, /*Scalable=*/ true);
+ case MVT::nxv1i32:
+ return VectorType::get(Type::getInt32Ty(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2i32:
+ return VectorType::get(Type::getInt32Ty(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4i32:
+ return VectorType::get(Type::getInt32Ty(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8i32:
+ return VectorType::get(Type::getInt32Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16i32:
+ return VectorType::get(Type::getInt32Ty(Context), 16,/*Scalable=*/ true);
+ case MVT::nxv32i32:
+ return VectorType::get(Type::getInt32Ty(Context), 32,/*Scalable=*/ true);
+ case MVT::nxv1i64:
+ return VectorType::get(Type::getInt64Ty(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2i64:
+ return VectorType::get(Type::getInt64Ty(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4i64:
+ return VectorType::get(Type::getInt64Ty(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8i64:
+ return VectorType::get(Type::getInt64Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16i64:
+ return VectorType::get(Type::getInt64Ty(Context), 16, /*Scalable=*/ true);
+ case MVT::nxv32i64:
+ return VectorType::get(Type::getInt64Ty(Context), 32, /*Scalable=*/ true);
+ case MVT::nxv2f16:
+ return VectorType::get(Type::getHalfTy(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4f16:
+ return VectorType::get(Type::getHalfTy(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8f16:
+ return VectorType::get(Type::getHalfTy(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv1f32:
+ return VectorType::get(Type::getFloatTy(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2f32:
+ return VectorType::get(Type::getFloatTy(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4f32:
+ return VectorType::get(Type::getFloatTy(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8f32:
+ return VectorType::get(Type::getFloatTy(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16f32:
+ return VectorType::get(Type::getFloatTy(Context), 16, /*Scalable=*/ true);
+ case MVT::nxv1f64:
+ return VectorType::get(Type::getDoubleTy(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2f64:
+ return VectorType::get(Type::getDoubleTy(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4f64:
+ return VectorType::get(Type::getDoubleTy(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8f64:
+ return VectorType::get(Type::getDoubleTy(Context), 8, /*Scalable=*/ true);
case MVT::Metadata: return Type::getMetadataTy(Context);
- }
+ }
}
/// Return the value type corresponding to the specified type. This returns all
@@ -329,7 +465,8 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
case Type::VectorTyID: {
VectorType *VTy = cast<VectorType>(Ty);
return getVectorVT(
- getVT(VTy->getElementType(), false), VTy->getNumElements());
+ getVT(VTy->getElementType(), /*HandleUnknown=*/ false),
+ VTy->getElementCount());
}
}
}
@@ -345,8 +482,9 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
case Type::VectorTyID: {
VectorType *VTy = cast<VectorType>(Ty);
- return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
- VTy->getNumElements());
+ return getVectorVT(Ty->getContext(),
+ getEVT(VTy->getElementType(), /*HandleUnknown=*/ false),
+ VTy->getElementCount());
}
}
}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 4a06704a8876..5312e2eea96b 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -80,15 +80,14 @@ void VirtRegMap::grow() {
Virt2SplitMap.resize(NumRegs);
}
-void VirtRegMap::assignVirt2Phys(unsigned virtReg, MCPhysReg physReg) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
- TargetRegisterInfo::isPhysicalRegister(physReg));
- assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) {
+ assert(virtReg.isVirtual() && Register::isPhysicalRegister(physReg));
+ assert(Virt2PhysMap[virtReg.id()] == NO_PHYS_REG &&
"attempt to assign physical register to already mapped "
"virtual register");
assert(!getRegInfo().isReserved(physReg) &&
"Attempt to map virtReg to a reserved physReg");
- Virt2PhysMap[virtReg] = physReg;
+ Virt2PhysMap[virtReg.id()] = physReg;
}
unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
@@ -99,46 +98,46 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
return SS;
}
-bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) {
- unsigned Hint = MRI->getSimpleHint(VirtReg);
- if (!Hint)
+bool VirtRegMap::hasPreferredPhys(Register VirtReg) {
+ Register Hint = MRI->getSimpleHint(VirtReg);
+ if (!Hint.isValid())
return false;
- if (TargetRegisterInfo::isVirtualRegister(Hint))
+ if (Hint.isVirtual())
Hint = getPhys(Hint);
return getPhys(VirtReg) == Hint;
}
-bool VirtRegMap::hasKnownPreference(unsigned VirtReg) {
+bool VirtRegMap::hasKnownPreference(Register VirtReg) {
std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
- if (TargetRegisterInfo::isPhysicalRegister(Hint.second))
+ if (Register::isPhysicalRegister(Hint.second))
return true;
- if (TargetRegisterInfo::isVirtualRegister(Hint.second))
+ if (Register::isVirtualRegister(Hint.second))
return hasPhys(Hint.second);
return false;
}
-int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+int VirtRegMap::assignVirt2StackSlot(Register virtReg) {
+ assert(virtReg.isVirtual());
+ assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
- return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
+ return Virt2StackSlotMap[virtReg.id()] = createSpillSlot(RC);
}
-void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+void VirtRegMap::assignVirt2StackSlot(Register virtReg, int SS) {
+ assert(virtReg.isVirtual());
+ assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
assert((SS >= 0 ||
(SS >= MF->getFrameInfo().getObjectIndexBegin())) &&
"illegal fixed frame index");
- Virt2StackSlotMap[virtReg] = SS;
+ Virt2StackSlotMap[virtReg.id()] = SS;
}
void VirtRegMap::print(raw_ostream &OS, const Module*) const {
OS << "********** REGISTER MAP **********\n";
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
OS << '[' << printReg(Reg, TRI) << " -> "
<< printReg(Virt2PhysMap[Reg], TRI) << "] "
@@ -147,7 +146,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
}
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
<< "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
@@ -185,10 +184,10 @@ class VirtRegRewriter : public MachineFunctionPass {
void rewrite();
void addMBBLiveIns();
bool readsUndefSubreg(const MachineOperand &MO) const;
- void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
+ void addLiveInsForSubRanges(const LiveInterval &LI, Register PhysReg) const;
void handleIdentityCopy(MachineInstr &MI) const;
void expandCopyBundle(MachineInstr &MI) const;
- bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const;
+ bool subRegLiveThrough(const MachineInstr &MI, Register SuperPhysReg) const;
public:
static char ID;
@@ -265,7 +264,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
}
void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
- unsigned PhysReg) const {
+ Register PhysReg) const {
assert(!LI.empty());
assert(LI.hasSubRanges());
@@ -312,7 +311,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
// assignments.
void VirtRegRewriter::addMBBLiveIns() {
for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
- unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx);
+ Register VirtReg = Register::index2VirtReg(Idx);
if (MRI->reg_nodbg_empty(VirtReg))
continue;
LiveInterval &LI = LIS->getInterval(VirtReg);
@@ -320,7 +319,7 @@ void VirtRegRewriter::addMBBLiveIns() {
continue;
// This is a virtual register that is live across basic blocks. Its
// assigned PhysReg must be marked as live-in to those blocks.
- unsigned PhysReg = VRM->getPhys(VirtReg);
+ Register PhysReg = VRM->getPhys(VirtReg);
assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
if (LI.hasSubRanges()) {
@@ -353,7 +352,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
if (MO.isUndef())
return true;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
const LiveInterval &LI = LIS->getInterval(Reg);
const MachineInstr &MI = *MO.getParent();
SlotIndex BaseIndex = LIS->getInstructionIndex(MI);
@@ -469,7 +468,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
/// \pre \p MI defines a subregister of a virtual register that
/// has been assigned to \p SuperPhysReg.
bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
- unsigned SuperPhysReg) const {
+ Register SuperPhysReg) const {
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
@@ -493,9 +492,9 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
void VirtRegRewriter::rewrite() {
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
- SmallVector<unsigned, 8> SuperDeads;
- SmallVector<unsigned, 8> SuperDefs;
- SmallVector<unsigned, 8> SuperKills;
+ SmallVector<Register, 8> SuperDeads;
+ SmallVector<Register, 8> SuperDefs;
+ SmallVector<Register, 8> SuperKills;
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
@@ -513,10 +512,10 @@ void VirtRegRewriter::rewrite() {
if (MO.isRegMask())
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
- unsigned VirtReg = MO.getReg();
- unsigned PhysReg = VRM->getPhys(VirtReg);
+ Register VirtReg = MO.getReg();
+ Register PhysReg = VRM->getPhys(VirtReg);
assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
"Instruction uses unmapped VirtReg");
assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
@@ -562,7 +561,7 @@ void VirtRegRewriter::rewrite() {
// PhysReg operands cannot have subregister indexes.
PhysReg = TRI->getSubReg(PhysReg, SubReg);
- assert(PhysReg && "Invalid SubReg for physical register");
+ assert(PhysReg.isValid() && "Invalid SubReg for physical register");
MO.setSubReg(0);
}
// Rewrite. Note we could have used MachineOperand::substPhysReg(), but
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 19c59e9542b4..119c3fd1ec7f 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -111,7 +111,7 @@ void XRayInstrumentation::replaceRetWithPatchableRet(
MIB.add(MO);
Terminators.push_back(&T);
if (T.isCall())
- MF.updateCallSiteInfo(&T);
+ MF.eraseCallSiteInfo(&T);
}
}
}
diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index ec4773d571c8..dd6f75f97a4a 100644
--- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -209,14 +209,6 @@ struct VisitHelper {
}
}
- VisitHelper(TypeVisitorCallbackPipeline &Callbacks, VisitorDataSource Source)
- : Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) {
- if (Source == VDS_BytesPresent) {
- Pipeline = Callbacks;
- Pipeline.addCallbackToPipelineFront(Deserializer);
- }
- }
-
TypeDeserializer Deserializer;
TypeVisitorCallbackPipeline Pipeline;
CVTypeVisitor Visitor;
@@ -230,13 +222,6 @@ Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index,
return V.Visitor.visitTypeRecord(Record, Index);
}
-Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index,
- TypeVisitorCallbackPipeline &Callbacks,
- VisitorDataSource Source) {
- VisitHelper V(Callbacks, Source);
- return V.Visitor.visitTypeRecord(Record, Index);
-}
-
Error llvm::codeview::visitTypeRecord(CVType &Record,
TypeVisitorCallbacks &Callbacks,
VisitorDataSource Source) {
diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index 2f49474115a1..36a384baa13d 100644
--- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -20,7 +20,6 @@ Error CodeViewRecordIO::beginRecord(Optional<uint32_t> MaxLength) {
Limit.MaxLength = MaxLength;
Limit.BeginOffset = getCurrentOffset();
Limits.push_back(Limit);
- resetStreamedLen();
return Error::success();
}
@@ -50,6 +49,7 @@ Error CodeViewRecordIO::endRecord() {
Streamer->EmitBytes(BytesSR);
--PaddingBytes;
}
+ resetStreamedLen();
}
return Error::success();
}
@@ -126,7 +126,11 @@ Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes,
Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd, const Twine &Comment) {
if (isStreaming()) {
- emitComment(Comment);
+ std::string TypeNameStr = Streamer->getTypeName(TypeInd);
+ if (!TypeNameStr.empty())
+ emitComment(Comment + ": " + TypeNameStr);
+ else
+ emitComment(Comment);
Streamer->EmitIntValue(TypeInd.getIndex(), sizeof(TypeInd.getIndex()));
incrStreamedLen(sizeof(TypeInd.getIndex()));
} else if (isWriting()) {
diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp
index 54e68ae4ea9f..82f6713a88f5 100644
--- a/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -300,6 +300,128 @@ static const EnumEntry<COFF::SectionCharacteristics>
CV_ENUM_ENT(COFF, IMAGE_SCN_MEM_READ),
CV_ENUM_ENT(COFF, IMAGE_SCN_MEM_WRITE)};
+static const EnumEntry<uint16_t> ClassOptionNames[] = {
+ CV_ENUM_CLASS_ENT(ClassOptions, Packed),
+ CV_ENUM_CLASS_ENT(ClassOptions, HasConstructorOrDestructor),
+ CV_ENUM_CLASS_ENT(ClassOptions, HasOverloadedOperator),
+ CV_ENUM_CLASS_ENT(ClassOptions, Nested),
+ CV_ENUM_CLASS_ENT(ClassOptions, ContainsNestedClass),
+ CV_ENUM_CLASS_ENT(ClassOptions, HasOverloadedAssignmentOperator),
+ CV_ENUM_CLASS_ENT(ClassOptions, HasConversionOperator),
+ CV_ENUM_CLASS_ENT(ClassOptions, ForwardReference),
+ CV_ENUM_CLASS_ENT(ClassOptions, Scoped),
+ CV_ENUM_CLASS_ENT(ClassOptions, HasUniqueName),
+ CV_ENUM_CLASS_ENT(ClassOptions, Sealed),
+ CV_ENUM_CLASS_ENT(ClassOptions, Intrinsic),
+};
+
+static const EnumEntry<uint8_t> MemberAccessNames[] = {
+ CV_ENUM_CLASS_ENT(MemberAccess, None),
+ CV_ENUM_CLASS_ENT(MemberAccess, Private),
+ CV_ENUM_CLASS_ENT(MemberAccess, Protected),
+ CV_ENUM_CLASS_ENT(MemberAccess, Public),
+};
+
+static const EnumEntry<uint16_t> MethodOptionNames[] = {
+ CV_ENUM_CLASS_ENT(MethodOptions, Pseudo),
+ CV_ENUM_CLASS_ENT(MethodOptions, NoInherit),
+ CV_ENUM_CLASS_ENT(MethodOptions, NoConstruct),
+ CV_ENUM_CLASS_ENT(MethodOptions, CompilerGenerated),
+ CV_ENUM_CLASS_ENT(MethodOptions, Sealed),
+};
+
+static const EnumEntry<uint16_t> MemberKindNames[] = {
+ CV_ENUM_CLASS_ENT(MethodKind, Vanilla),
+ CV_ENUM_CLASS_ENT(MethodKind, Virtual),
+ CV_ENUM_CLASS_ENT(MethodKind, Static),
+ CV_ENUM_CLASS_ENT(MethodKind, Friend),
+ CV_ENUM_CLASS_ENT(MethodKind, IntroducingVirtual),
+ CV_ENUM_CLASS_ENT(MethodKind, PureVirtual),
+ CV_ENUM_CLASS_ENT(MethodKind, PureIntroducingVirtual),
+};
+
+static const EnumEntry<uint8_t> PtrKindNames[] = {
+ CV_ENUM_CLASS_ENT(PointerKind, Near16),
+ CV_ENUM_CLASS_ENT(PointerKind, Far16),
+ CV_ENUM_CLASS_ENT(PointerKind, Huge16),
+ CV_ENUM_CLASS_ENT(PointerKind, BasedOnSegment),
+ CV_ENUM_CLASS_ENT(PointerKind, BasedOnValue),
+ CV_ENUM_CLASS_ENT(PointerKind, BasedOnSegmentValue),
+ CV_ENUM_CLASS_ENT(PointerKind, BasedOnAddress),
+ CV_ENUM_CLASS_ENT(PointerKind, BasedOnSegmentAddress),
+ CV_ENUM_CLASS_ENT(PointerKind, BasedOnType),
+ CV_ENUM_CLASS_ENT(PointerKind, BasedOnSelf),
+ CV_ENUM_CLASS_ENT(PointerKind, Near32),
+ CV_ENUM_CLASS_ENT(PointerKind, Far32),
+ CV_ENUM_CLASS_ENT(PointerKind, Near64),
+};
+
+static const EnumEntry<uint8_t> PtrModeNames[] = {
+ CV_ENUM_CLASS_ENT(PointerMode, Pointer),
+ CV_ENUM_CLASS_ENT(PointerMode, LValueReference),
+ CV_ENUM_CLASS_ENT(PointerMode, PointerToDataMember),
+ CV_ENUM_CLASS_ENT(PointerMode, PointerToMemberFunction),
+ CV_ENUM_CLASS_ENT(PointerMode, RValueReference),
+};
+
+static const EnumEntry<uint16_t> PtrMemberRepNames[] = {
+ CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, Unknown),
+ CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, SingleInheritanceData),
+ CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, MultipleInheritanceData),
+ CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, VirtualInheritanceData),
+ CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, GeneralData),
+ CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, SingleInheritanceFunction),
+ CV_ENUM_CLASS_ENT(PointerToMemberRepresentation,
+ MultipleInheritanceFunction),
+ CV_ENUM_CLASS_ENT(PointerToMemberRepresentation,
+ VirtualInheritanceFunction),
+ CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, GeneralFunction),
+};
+
+static const EnumEntry<uint16_t> TypeModifierNames[] = {
+ CV_ENUM_CLASS_ENT(ModifierOptions, Const),
+ CV_ENUM_CLASS_ENT(ModifierOptions, Volatile),
+ CV_ENUM_CLASS_ENT(ModifierOptions, Unaligned),
+};
+
+static const EnumEntry<uint8_t> CallingConventions[] = {
+ CV_ENUM_CLASS_ENT(CallingConvention, NearC),
+ CV_ENUM_CLASS_ENT(CallingConvention, FarC),
+ CV_ENUM_CLASS_ENT(CallingConvention, NearPascal),
+ CV_ENUM_CLASS_ENT(CallingConvention, FarPascal),
+ CV_ENUM_CLASS_ENT(CallingConvention, NearFast),
+ CV_ENUM_CLASS_ENT(CallingConvention, FarFast),
+ CV_ENUM_CLASS_ENT(CallingConvention, NearStdCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, FarStdCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, NearSysCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, FarSysCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, ThisCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, MipsCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, Generic),
+ CV_ENUM_CLASS_ENT(CallingConvention, AlphaCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, PpcCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, SHCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, ArmCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, AM33Call),
+ CV_ENUM_CLASS_ENT(CallingConvention, TriCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, SH5Call),
+ CV_ENUM_CLASS_ENT(CallingConvention, M32RCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, ClrCall),
+ CV_ENUM_CLASS_ENT(CallingConvention, Inline),
+ CV_ENUM_CLASS_ENT(CallingConvention, NearVector),
+};
+
+static const EnumEntry<uint8_t> FunctionOptionEnum[] = {
+ CV_ENUM_CLASS_ENT(FunctionOptions, CxxReturnUdt),
+ CV_ENUM_CLASS_ENT(FunctionOptions, Constructor),
+ CV_ENUM_CLASS_ENT(FunctionOptions, ConstructorWithVirtualBases),
+};
+
+static const EnumEntry<uint16_t> LabelTypeEnum[] = {
+ CV_ENUM_CLASS_ENT(LabelType, Near),
+ CV_ENUM_CLASS_ENT(LabelType, Far),
+};
+
namespace llvm {
namespace codeview {
@@ -379,5 +501,49 @@ getImageSectionCharacteristicNames() {
return makeArrayRef(ImageSectionCharacteristicNames);
}
+ArrayRef<EnumEntry<uint16_t>> getClassOptionNames() {
+ return makeArrayRef(ClassOptionNames);
+}
+
+ArrayRef<EnumEntry<uint8_t>> getMemberAccessNames() {
+ return makeArrayRef(MemberAccessNames);
+}
+
+ArrayRef<EnumEntry<uint16_t>> getMethodOptionNames() {
+ return makeArrayRef(MethodOptionNames);
+}
+
+ArrayRef<EnumEntry<uint16_t>> getMemberKindNames() {
+ return makeArrayRef(MemberKindNames);
+}
+
+ArrayRef<EnumEntry<uint8_t>> getPtrKindNames() {
+ return makeArrayRef(PtrKindNames);
+}
+
+ArrayRef<EnumEntry<uint8_t>> getPtrModeNames() {
+ return makeArrayRef(PtrModeNames);
+}
+
+ArrayRef<EnumEntry<uint16_t>> getPtrMemberRepNames() {
+ return makeArrayRef(PtrMemberRepNames);
+}
+
+ArrayRef<EnumEntry<uint16_t>> getTypeModifierNames() {
+ return makeArrayRef(TypeModifierNames);
+}
+
+ArrayRef<EnumEntry<uint8_t>> getCallingConventions() {
+ return makeArrayRef(CallingConventions);
+}
+
+ArrayRef<EnumEntry<uint8_t>> getFunctionOptionEnum() {
+ return makeArrayRef(FunctionOptionEnum);
+}
+
+ArrayRef<EnumEntry<uint16_t>> getLabelTypeEnum() {
+ return makeArrayRef(LabelTypeEnum);
+}
+
} // end namespace codeview
} // end namespace llvm
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 27cb7e35234b..45b63983beb4 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -315,7 +315,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) {
- W.printNumber("Offset", DefRangeFramePointerRel.Offset);
+ W.printNumber("Offset", DefRangeFramePointerRel.Hdr.Offset);
printLocalVariableAddrRange(DefRangeFramePointerRel.Range,
DefRangeFramePointerRel.getRelocationOffset());
printLocalVariableAddrGap(DefRangeFramePointerRel.Gaps);
diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index 70889839ef48..3b627930e271 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -229,7 +229,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, DataSym &Data) {
Error SymbolRecordMapping::visitKnownRecord(
CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) {
- error(IO.mapInteger(DefRangeFramePointerRel.Offset));
+ error(IO.mapObject(DefRangeFramePointerRel.Hdr.Offset));
error(mapLocalVariableAddrRange(IO, DefRangeFramePointerRel.Range));
error(IO.mapVectorTail(DefRangeFramePointerRel.Gaps, MapGap()));
diff --git a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
index 47928c2eef64..1aded589e565 100644
--- a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -7,24 +7,125 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
using namespace llvm;
using namespace llvm::codeview;
+namespace {
+
#define error(X) \
if (auto EC = X) \
return EC;
-namespace {
+static const EnumEntry<TypeLeafKind> LeafTypeNames[] = {
+#define CV_TYPE(enum, val) {#enum, enum},
+#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
+};
+
+static StringRef getLeafTypeName(TypeLeafKind LT) {
+ switch (LT) {
+#define TYPE_RECORD(ename, value, name) \
+ case ename: \
+ return #name;
+#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
+ default:
+ break;
+ }
+ return "UnknownLeaf";
+}
+
+template <typename T>
+static bool compEnumNames(const EnumEntry<T> &lhs, const EnumEntry<T> &rhs) {
+ return lhs.Name < rhs.Name;
+}
+
+template <typename T, typename TFlag>
+static std::string getFlagNames(CodeViewRecordIO &IO, T Value,
+ ArrayRef<EnumEntry<TFlag>> Flags) {
+ if (!IO.isStreaming())
+ return std::string("");
+ typedef EnumEntry<TFlag> FlagEntry;
+ typedef SmallVector<FlagEntry, 10> FlagVector;
+ FlagVector SetFlags;
+ for (const auto &Flag : Flags) {
+ if (Flag.Value == 0)
+ continue;
+ if ((Value & Flag.Value) == Flag.Value) {
+ SetFlags.push_back(Flag);
+ }
+ }
+
+ llvm::sort(SetFlags, &compEnumNames<TFlag>);
+
+ std::string FlagLabel;
+ bool FirstOcc = true;
+ for (const auto &Flag : SetFlags) {
+ if (FirstOcc)
+ FirstOcc = false;
+ else
+ FlagLabel += (" | ");
+
+ FlagLabel += (Flag.Name.str() + " (0x" + utohexstr(Flag.Value) + ")");
+ }
+
+ if (!FlagLabel.empty()) {
+ std::string LabelWithBraces(" ( ");
+ LabelWithBraces += FlagLabel + " )";
+ return LabelWithBraces;
+ } else
+ return FlagLabel;
+}
+
+template <typename T, typename TEnum>
+static StringRef getEnumName(CodeViewRecordIO &IO, T Value,
+ ArrayRef<EnumEntry<TEnum>> EnumValues) {
+ if (!IO.isStreaming())
+ return "";
+ StringRef Name;
+ for (const auto &EnumItem : EnumValues) {
+ if (EnumItem.Value == Value) {
+ Name = EnumItem.Name;
+ break;
+ }
+ }
+
+ return Name;
+}
+
+static std::string getMemberAttributes(CodeViewRecordIO &IO,
+ MemberAccess Access, MethodKind Kind,
+ MethodOptions Options) {
+ if (!IO.isStreaming())
+ return "";
+ std::string AccessSpecifier =
+ getEnumName(IO, uint8_t(Access), makeArrayRef(getMemberAccessNames()));
+ std::string MemberAttrs(AccessSpecifier);
+ if (Kind != MethodKind::Vanilla) {
+ std::string MethodKind =
+ getEnumName(IO, unsigned(Kind), makeArrayRef(getMemberKindNames()));
+ MemberAttrs += ", " + MethodKind;
+ }
+ if (Options != MethodOptions::None) {
+ std::string MethodOptions = getFlagNames(
+ IO, unsigned(Options), makeArrayRef(getMethodOptionNames()));
+ MemberAttrs += ", " + MethodOptions;
+ }
+ return MemberAttrs;
+}
+
struct MapOneMethodRecord {
explicit MapOneMethodRecord(bool IsFromOverloadList)
: IsFromOverloadList(IsFromOverloadList) {}
Error operator()(CodeViewRecordIO &IO, OneMethodRecord &Method) const {
- error(IO.mapInteger(Method.Attrs.Attrs, "AccessSpecifier"));
+ std::string Attrs = getMemberAttributes(
+ IO, Method.getAccess(), Method.getMethodKind(), Method.getOptions());
+ error(IO.mapInteger(Method.Attrs.Attrs, "Attrs: " + Attrs));
if (IsFromOverloadList) {
uint16_t Padding = 0;
- error(IO.mapInteger(Padding, "Padding"));
+ error(IO.mapInteger(Padding));
}
error(IO.mapInteger(Method.Type, "Type"));
if (Method.isIntroducingVirtual()) {
@@ -41,7 +142,7 @@ struct MapOneMethodRecord {
private:
bool IsFromOverloadList;
};
-}
+} // namespace
static Error mapNameAndUniqueName(CodeViewRecordIO &IO, StringRef &Name,
StringRef &UniqueName, bool HasUniqueName) {
@@ -96,10 +197,22 @@ Error TypeRecordMapping::visitTypeBegin(CVType &CVR) {
MaxLen = MaxRecordLength - sizeof(RecordPrefix);
error(IO.beginRecord(MaxLen));
TypeKind = CVR.kind();
+
+ if (IO.isStreaming()) {
+ auto RecordKind = CVR.kind();
+ uint16_t RecordLen = CVR.length() - 2;
+ std::string RecordKindName =
+ getEnumName(IO, unsigned(RecordKind), makeArrayRef(LeafTypeNames));
+ error(IO.mapInteger(RecordLen, "Record length"));
+ error(IO.mapEnum(RecordKind, "Record kind: " + RecordKindName));
+ }
return Error::success();
}
Error TypeRecordMapping::visitTypeBegin(CVType &CVR, TypeIndex Index) {
+ if (IO.isStreaming())
+ IO.emitRawComment(" " + getLeafTypeName(CVR.kind()) + " (0x" +
+ utohexstr(Index.getIndex()) + ")");
return visitTypeBegin(CVR);
}
@@ -121,11 +234,21 @@ Error TypeRecordMapping::visitMemberBegin(CVMemberRecord &Record) {
// followed by the subrecord, followed by a continuation, and that entire
// sequence spaws `MaxRecordLength` bytes. So the record's length is
// calculated as follows.
+
constexpr uint32_t ContinuationLength = 8;
error(IO.beginRecord(MaxRecordLength - sizeof(RecordPrefix) -
ContinuationLength));
MemberKind = Record.Kind;
+ if (IO.isStreaming()) {
+ std::string MemberKindName = getLeafTypeName(Record.Kind);
+ MemberKindName +=
+ " ( " +
+ (getEnumName(IO, unsigned(Record.Kind), makeArrayRef(LeafTypeNames)))
+ .str() +
+ " )";
+ error(IO.mapEnum(Record.Kind, "Member kind: " + MemberKindName));
+ }
return Error::success();
}
@@ -144,16 +267,24 @@ Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) {
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ModifierRecord &Record) {
+ std::string ModifierNames =
+ getFlagNames(IO, static_cast<uint16_t>(Record.Modifiers),
+ makeArrayRef(getTypeModifierNames()));
error(IO.mapInteger(Record.ModifiedType, "ModifiedType"));
- error(IO.mapEnum(Record.Modifiers, "Modifiers"));
+ error(IO.mapEnum(Record.Modifiers, "Modifiers" + ModifierNames));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
ProcedureRecord &Record) {
+ std::string CallingConvName = getEnumName(
+ IO, uint8_t(Record.CallConv), makeArrayRef(getCallingConventions()));
+ std::string FuncOptionNames =
+ getFlagNames(IO, static_cast<uint16_t>(Record.Options),
+ makeArrayRef(getFunctionOptionEnum()));
error(IO.mapInteger(Record.ReturnType, "ReturnType"));
- error(IO.mapEnum(Record.CallConv, "CallingConvention"));
- error(IO.mapEnum(Record.Options, "FunctionOptions"));
+ error(IO.mapEnum(Record.CallConv, "CallingConvention: " + CallingConvName));
+ error(IO.mapEnum(Record.Options, "FunctionOptions" + FuncOptionNames));
error(IO.mapInteger(Record.ParameterCount, "NumParameters"));
error(IO.mapInteger(Record.ArgumentList, "ArgListType"));
@@ -162,11 +293,16 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
MemberFunctionRecord &Record) {
+ std::string CallingConvName = getEnumName(
+ IO, uint8_t(Record.CallConv), makeArrayRef(getCallingConventions()));
+ std::string FuncOptionNames =
+ getFlagNames(IO, static_cast<uint16_t>(Record.Options),
+ makeArrayRef(getFunctionOptionEnum()));
error(IO.mapInteger(Record.ReturnType, "ReturnType"));
error(IO.mapInteger(Record.ClassType, "ClassType"));
error(IO.mapInteger(Record.ThisType, "ThisType"));
- error(IO.mapEnum(Record.CallConv, "CallingConvention"));
- error(IO.mapEnum(Record.Options, "FunctionOptions"));
+ error(IO.mapEnum(Record.CallConv, "CallingConvention: " + CallingConvName));
+ error(IO.mapEnum(Record.Options, "FunctionOptions" + FuncOptionNames));
error(IO.mapInteger(Record.ParameterCount, "NumParameters"));
error(IO.mapInteger(Record.ArgumentList, "ArgListType"));
error(IO.mapInteger(Record.ThisPointerAdjustment, "ThisAdjustment"));
@@ -197,8 +333,40 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) {
+
+ SmallString<128> Attr("Attrs: ");
+
+ if (IO.isStreaming()) {
+ std::string PtrType = getEnumName(IO, unsigned(Record.getPointerKind()),
+ makeArrayRef(getPtrKindNames()));
+ Attr += "[ Type: " + PtrType;
+
+ std::string PtrMode = getEnumName(IO, unsigned(Record.getMode()),
+ makeArrayRef(getPtrModeNames()));
+ Attr += ", Mode: " + PtrMode;
+
+ auto PtrSizeOf = Record.getSize();
+ Attr += ", SizeOf: " + itostr(PtrSizeOf);
+
+ if (Record.isFlat())
+ Attr += ", isFlat";
+ if (Record.isConst())
+ Attr += ", isConst";
+ if (Record.isVolatile())
+ Attr += ", isVolatile";
+ if (Record.isUnaligned())
+ Attr += ", isUnaligned";
+ if (Record.isRestrict())
+ Attr += ", isRestricted";
+ if (Record.isLValueReferenceThisPtr())
+ Attr += ", isThisPtr&";
+ if (Record.isRValueReferenceThisPtr())
+ Attr += ", isThisPtr&&";
+ Attr += " ]";
+ }
+
error(IO.mapInteger(Record.ReferentType, "PointeeType"));
- error(IO.mapInteger(Record.Attrs, "Attributes"));
+ error(IO.mapInteger(Record.Attrs, Attr));
if (Record.isPointerToMember()) {
if (IO.isReading())
@@ -206,7 +374,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) {
MemberPointerInfo &M = *Record.MemberInfo;
error(IO.mapInteger(M.ContainingType, "ClassType"));
- error(IO.mapEnum(M.Representation, "Representation"));
+ std::string PtrMemberGetRepresentation = getEnumName(
+ IO, uint16_t(M.Representation), makeArrayRef(getPtrMemberRepNames()));
+ error(IO.mapEnum(M.Representation,
+ "Representation: " + PtrMemberGetRepresentation));
}
return Error::success();
@@ -226,8 +397,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) {
(CVR.kind() == TypeLeafKind::LF_CLASS) ||
(CVR.kind() == TypeLeafKind::LF_INTERFACE));
+ std::string PropertiesNames =
+ getFlagNames(IO, static_cast<uint16_t>(Record.Options),
+ makeArrayRef(getClassOptionNames()));
error(IO.mapInteger(Record.MemberCount, "MemberCount"));
- error(IO.mapEnum(Record.Options, "Properties"));
+ error(IO.mapEnum(Record.Options, "Properties" + PropertiesNames));
error(IO.mapInteger(Record.FieldList, "FieldList"));
error(IO.mapInteger(Record.DerivationList, "DerivedFrom"));
error(IO.mapInteger(Record.VTableShape, "VShape"));
@@ -239,8 +413,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) {
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) {
+ std::string PropertiesNames =
+ getFlagNames(IO, static_cast<uint16_t>(Record.Options),
+ makeArrayRef(getClassOptionNames()));
error(IO.mapInteger(Record.MemberCount, "MemberCount"));
- error(IO.mapEnum(Record.Options, "Properties"));
+ error(IO.mapEnum(Record.Options, "Properties" + PropertiesNames));
error(IO.mapInteger(Record.FieldList, "FieldList"));
error(IO.mapEncodedInteger(Record.Size, "SizeOf"));
error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
@@ -250,8 +427,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) {
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, EnumRecord &Record) {
+ std::string PropertiesNames =
+ getFlagNames(IO, static_cast<uint16_t>(Record.Options),
+ makeArrayRef(getClassOptionNames()));
error(IO.mapInteger(Record.MemberCount, "NumEnumerators"));
- error(IO.mapEnum(Record.Options, "Properties"));
+ error(IO.mapEnum(Record.Options, "Properties" + PropertiesNames));
error(IO.mapInteger(Record.UnderlyingType, "UnderlyingType"));
error(IO.mapInteger(Record.FieldList, "FieldListType"));
error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
@@ -383,7 +563,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
FieldListRecord &Record) {
- error(IO.mapByteVectorTail(Record.Data));
+ if (IO.isStreaming()) {
+ if (auto EC = codeview::visitMemberRecordStream(Record.Data, *this))
+ return EC;
+ } else
+ error(IO.mapByteVectorTail(Record.Data));
return Error::success();
}
@@ -397,13 +581,17 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, LabelRecord &Record) {
- error(IO.mapEnum(Record.Mode, "Mode"));
+ std::string ModeName =
+ getEnumName(IO, uint16_t(Record.Mode), makeArrayRef(getLabelTypeEnum()));
+ error(IO.mapEnum(Record.Mode, "Mode: " + ModeName));
return Error::success();
}
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
BaseClassRecord &Record) {
- error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+ std::string Attrs = getMemberAttributes(
+ IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None);
+ error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs));
error(IO.mapInteger(Record.Type, "BaseType"));
error(IO.mapEncodedInteger(Record.Offset, "BaseOffset"));
@@ -412,7 +600,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
EnumeratorRecord &Record) {
- error(IO.mapInteger(Record.Attrs.Attrs));
+ std::string Attrs = getMemberAttributes(
+ IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None);
+ error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs));
// FIXME: Handle full APInt such as __int128.
error(IO.mapEncodedInteger(Record.Value, "EnumValue"));
@@ -423,7 +613,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
DataMemberRecord &Record) {
- error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+ std::string Attrs = getMemberAttributes(
+ IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None);
+ error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs));
error(IO.mapInteger(Record.Type, "Type"));
error(IO.mapEncodedInteger(Record.FieldOffset, "FieldOffset"));
error(IO.mapStringZ(Record.Name, "Name"));
@@ -460,7 +652,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
StaticDataMemberRecord &Record) {
- error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+ std::string Attrs = getMemberAttributes(
+ IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None);
+ error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs));
error(IO.mapInteger(Record.Type, "Type"));
error(IO.mapStringZ(Record.Name, "Name"));
@@ -470,7 +664,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
VirtualBaseClassRecord &Record) {
- error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+ std::string Attrs = getMemberAttributes(
+ IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None);
+ error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs));
error(IO.mapInteger(Record.BaseType, "BaseType"));
error(IO.mapInteger(Record.VBPtrType, "VBPtrType"));
error(IO.mapEncodedInteger(Record.VBPtrOffset, "VBPtrOffset"));
diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index f4dd79937608..abbea3a868c8 100644
--- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -38,9 +38,9 @@ DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() {
bool
DWARFAbbreviationDeclaration::extract(DataExtractor Data,
- uint32_t* OffsetPtr) {
+ uint64_t* OffsetPtr) {
clear();
- const uint32_t Offset = *OffsetPtr;
+ const uint64_t Offset = *OffsetPtr;
Code = Data.getULEB128(OffsetPtr);
if (Code == 0) {
return false;
@@ -148,7 +148,7 @@ DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const {
}
Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue(
- const uint32_t DIEOffset, const dwarf::Attribute Attr,
+ const uint64_t DIEOffset, const dwarf::Attribute Attr,
const DWARFUnit &U) const {
Optional<uint32_t> MatchAttrIndex = findAttributeIndex(Attr);
if (!MatchAttrIndex)
@@ -158,7 +158,7 @@ Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue(
// Add the byte size of ULEB that for the abbrev Code so we can start
// skipping the attribute data.
- uint32_t Offset = DIEOffset + CodeByteSize;
+ uint64_t Offset = DIEOffset + CodeByteSize;
uint32_t AttrIndex = 0;
for (const auto &Spec : AttributeSpecs) {
if (*MatchAttrIndex == AttrIndex) {
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 0721efb40f6a..875f5e9989a0 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -42,7 +42,7 @@ static Atom formatAtom(unsigned Atom) { return {Atom}; }
DWARFAcceleratorTable::~DWARFAcceleratorTable() = default;
Error AppleAcceleratorTable::extract() {
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
// Check that we can at least read the header.
if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength) + 4))
@@ -111,15 +111,15 @@ bool AppleAcceleratorTable::validateForms() {
return true;
}
-std::pair<uint32_t, dwarf::Tag>
-AppleAcceleratorTable::readAtoms(uint32_t &HashDataOffset) {
- uint32_t DieOffset = dwarf::DW_INVALID_OFFSET;
+std::pair<uint64_t, dwarf::Tag>
+AppleAcceleratorTable::readAtoms(uint64_t *HashDataOffset) {
+ uint64_t DieOffset = dwarf::DW_INVALID_OFFSET;
dwarf::Tag DieTag = dwarf::DW_TAG_null;
dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32};
for (auto Atom : getAtomsDesc()) {
DWARFFormValue FormValue(Atom.second);
- FormValue.extractValue(AccelSection, &HashDataOffset, FormParams);
+ FormValue.extractValue(AccelSection, HashDataOffset, FormParams);
switch (Atom.first) {
case dwarf::DW_ATOM_die_offset:
DieOffset = *FormValue.getAsUnsignedConstant();
@@ -163,19 +163,19 @@ Optional<uint64_t> AppleAcceleratorTable::HeaderData::extractOffset(
bool AppleAcceleratorTable::dumpName(ScopedPrinter &W,
SmallVectorImpl<DWARFFormValue> &AtomForms,
- uint32_t *DataOffset) const {
+ uint64_t *DataOffset) const {
dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32};
- uint32_t NameOffset = *DataOffset;
+ uint64_t NameOffset = *DataOffset;
if (!AccelSection.isValidOffsetForDataOfSize(*DataOffset, 4)) {
W.printString("Incorrectly terminated list.");
return false;
}
- unsigned StringOffset = AccelSection.getRelocatedValue(4, DataOffset);
+ uint64_t StringOffset = AccelSection.getRelocatedValue(4, DataOffset);
if (!StringOffset)
return false; // End of list
DictScope NameScope(W, ("Name@0x" + Twine::utohexstr(NameOffset)).str());
- W.startLine() << format("String: 0x%08x", StringOffset);
+ W.startLine() << format("String: 0x%08" PRIx64, StringOffset);
W.getOStream() << " \"" << StringSection.getCStr(&StringOffset) << "\"\n";
unsigned NumData = AccelSection.getU32(DataOffset);
@@ -223,9 +223,9 @@ LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const {
}
// Now go through the actual tables and dump them.
- uint32_t Offset = sizeof(Hdr) + Hdr.HeaderDataLength;
- unsigned HashesBase = Offset + Hdr.BucketCount * 4;
- unsigned OffsetsBase = HashesBase + Hdr.HashCount * 4;
+ uint64_t Offset = sizeof(Hdr) + Hdr.HeaderDataLength;
+ uint64_t HashesBase = Offset + Hdr.BucketCount * 4;
+ uint64_t OffsetsBase = HashesBase + Hdr.HashCount * 4;
for (unsigned Bucket = 0; Bucket < Hdr.BucketCount; ++Bucket) {
unsigned Index = AccelSection.getU32(&Offset);
@@ -237,14 +237,14 @@ LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const {
}
for (unsigned HashIdx = Index; HashIdx < Hdr.HashCount; ++HashIdx) {
- unsigned HashOffset = HashesBase + HashIdx*4;
- unsigned OffsetsOffset = OffsetsBase + HashIdx*4;
+ uint64_t HashOffset = HashesBase + HashIdx*4;
+ uint64_t OffsetsOffset = OffsetsBase + HashIdx*4;
uint32_t Hash = AccelSection.getU32(&HashOffset);
if (Hash % Hdr.BucketCount != Bucket)
break;
- unsigned DataOffset = AccelSection.getU32(&OffsetsOffset);
+ uint64_t DataOffset = AccelSection.getU32(&OffsetsOffset);
ListScope HashScope(W, ("Hash 0x" + Twine::utohexstr(Hash)).str());
if (!AccelSection.isValidOffset(DataOffset)) {
W.printString("Invalid section offset");
@@ -265,7 +265,7 @@ AppleAcceleratorTable::Entry::Entry(
}
void AppleAcceleratorTable::Entry::extract(
- const AppleAcceleratorTable &AccelTable, uint32_t *Offset) {
+ const AppleAcceleratorTable &AccelTable, uint64_t *Offset) {
dwarf::FormParams FormParams = {AccelTable.Hdr.Version, 0,
dwarf::DwarfFormat::DWARF32};
@@ -302,7 +302,7 @@ Optional<dwarf::Tag> AppleAcceleratorTable::Entry::getTag() const {
}
AppleAcceleratorTable::ValueIterator::ValueIterator(
- const AppleAcceleratorTable &AccelTable, unsigned Offset)
+ const AppleAcceleratorTable &AccelTable, uint64_t Offset)
: AccelTable(&AccelTable), Current(AccelTable.HdrData), DataOffset(Offset) {
if (!AccelTable.AccelSection.isValidOffsetForDataOfSize(DataOffset, 4))
return;
@@ -333,25 +333,25 @@ AppleAcceleratorTable::equal_range(StringRef Key) const {
// Find the bucket.
unsigned HashValue = djbHash(Key);
unsigned Bucket = HashValue % Hdr.BucketCount;
- unsigned BucketBase = sizeof(Hdr) + Hdr.HeaderDataLength;
- unsigned HashesBase = BucketBase + Hdr.BucketCount * 4;
- unsigned OffsetsBase = HashesBase + Hdr.HashCount * 4;
+ uint64_t BucketBase = sizeof(Hdr) + Hdr.HeaderDataLength;
+ uint64_t HashesBase = BucketBase + Hdr.BucketCount * 4;
+ uint64_t OffsetsBase = HashesBase + Hdr.HashCount * 4;
- unsigned BucketOffset = BucketBase + Bucket * 4;
+ uint64_t BucketOffset = BucketBase + Bucket * 4;
unsigned Index = AccelSection.getU32(&BucketOffset);
// Search through all hashes in the bucket.
for (unsigned HashIdx = Index; HashIdx < Hdr.HashCount; ++HashIdx) {
- unsigned HashOffset = HashesBase + HashIdx * 4;
- unsigned OffsetsOffset = OffsetsBase + HashIdx * 4;
+ uint64_t HashOffset = HashesBase + HashIdx * 4;
+ uint64_t OffsetsOffset = OffsetsBase + HashIdx * 4;
uint32_t Hash = AccelSection.getU32(&HashOffset);
if (Hash % Hdr.BucketCount != Bucket)
// We are already in the next bucket.
break;
- unsigned DataOffset = AccelSection.getU32(&OffsetsOffset);
- unsigned StringOffset = AccelSection.getRelocatedValue(4, &DataOffset);
+ uint64_t DataOffset = AccelSection.getU32(&OffsetsOffset);
+ uint64_t StringOffset = AccelSection.getRelocatedValue(4, &DataOffset);
if (!StringOffset)
break;
@@ -377,7 +377,7 @@ void DWARFDebugNames::Header::dump(ScopedPrinter &W) const {
}
Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS,
- uint32_t *Offset) {
+ uint64_t *Offset) {
// Check that we can read the fixed-size part.
if (!AS.isValidOffset(*Offset + sizeof(HeaderPOD) - 1))
return createStringError(errc::illegal_byte_sequence,
@@ -437,7 +437,7 @@ DWARFDebugNames::Abbrev DWARFDebugNames::AbbrevMapInfo::getTombstoneKey() {
}
Expected<DWARFDebugNames::AttributeEncoding>
-DWARFDebugNames::NameIndex::extractAttributeEncoding(uint32_t *Offset) {
+DWARFDebugNames::NameIndex::extractAttributeEncoding(uint64_t *Offset) {
if (*Offset >= EntriesBase) {
return createStringError(errc::illegal_byte_sequence,
"Incorrectly terminated abbreviation table.");
@@ -449,7 +449,7 @@ DWARFDebugNames::NameIndex::extractAttributeEncoding(uint32_t *Offset) {
}
Expected<std::vector<DWARFDebugNames::AttributeEncoding>>
-DWARFDebugNames::NameIndex::extractAttributeEncodings(uint32_t *Offset) {
+DWARFDebugNames::NameIndex::extractAttributeEncodings(uint64_t *Offset) {
std::vector<AttributeEncoding> Result;
for (;;) {
auto AttrEncOr = extractAttributeEncoding(Offset);
@@ -463,7 +463,7 @@ DWARFDebugNames::NameIndex::extractAttributeEncodings(uint32_t *Offset) {
}
Expected<DWARFDebugNames::Abbrev>
-DWARFDebugNames::NameIndex::extractAbbrev(uint32_t *Offset) {
+DWARFDebugNames::NameIndex::extractAbbrev(uint64_t *Offset) {
if (*Offset >= EntriesBase) {
return createStringError(errc::illegal_byte_sequence,
"Incorrectly terminated abbreviation table.");
@@ -482,7 +482,7 @@ DWARFDebugNames::NameIndex::extractAbbrev(uint32_t *Offset) {
Error DWARFDebugNames::NameIndex::extract() {
const DWARFDataExtractor &AS = Section.AccelSection;
- uint32_t Offset = Base;
+ uint64_t Offset = Base;
if (Error E = Hdr.extract(AS, &Offset))
return E;
@@ -577,27 +577,27 @@ std::error_code DWARFDebugNames::SentinelError::convertToErrorCode() const {
return inconvertibleErrorCode();
}
-uint32_t DWARFDebugNames::NameIndex::getCUOffset(uint32_t CU) const {
+uint64_t DWARFDebugNames::NameIndex::getCUOffset(uint32_t CU) const {
assert(CU < Hdr.CompUnitCount);
- uint32_t Offset = CUsBase + 4 * CU;
+ uint64_t Offset = CUsBase + 4 * CU;
return Section.AccelSection.getRelocatedValue(4, &Offset);
}
-uint32_t DWARFDebugNames::NameIndex::getLocalTUOffset(uint32_t TU) const {
+uint64_t DWARFDebugNames::NameIndex::getLocalTUOffset(uint32_t TU) const {
assert(TU < Hdr.LocalTypeUnitCount);
- uint32_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + TU);
+ uint64_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + TU);
return Section.AccelSection.getRelocatedValue(4, &Offset);
}
uint64_t DWARFDebugNames::NameIndex::getForeignTUSignature(uint32_t TU) const {
assert(TU < Hdr.ForeignTypeUnitCount);
- uint32_t Offset =
+ uint64_t Offset =
CUsBase + 4 * (Hdr.CompUnitCount + Hdr.LocalTypeUnitCount) + 8 * TU;
return Section.AccelSection.getU64(&Offset);
}
Expected<DWARFDebugNames::Entry>
-DWARFDebugNames::NameIndex::getEntry(uint32_t *Offset) const {
+DWARFDebugNames::NameIndex::getEntry(uint64_t *Offset) const {
const DWARFDataExtractor &AS = Section.AccelSection;
if (!AS.isValidOffset(*Offset))
return createStringError(errc::illegal_byte_sequence,
@@ -625,12 +625,12 @@ DWARFDebugNames::NameIndex::getEntry(uint32_t *Offset) const {
DWARFDebugNames::NameTableEntry
DWARFDebugNames::NameIndex::getNameTableEntry(uint32_t Index) const {
assert(0 < Index && Index <= Hdr.NameCount);
- uint32_t StringOffsetOffset = StringOffsetsBase + 4 * (Index - 1);
- uint32_t EntryOffsetOffset = EntryOffsetsBase + 4 * (Index - 1);
+ uint64_t StringOffsetOffset = StringOffsetsBase + 4 * (Index - 1);
+ uint64_t EntryOffsetOffset = EntryOffsetsBase + 4 * (Index - 1);
const DWARFDataExtractor &AS = Section.AccelSection;
- uint32_t StringOffset = AS.getRelocatedValue(4, &StringOffsetOffset);
- uint32_t EntryOffset = AS.getU32(&EntryOffsetOffset);
+ uint64_t StringOffset = AS.getRelocatedValue(4, &StringOffsetOffset);
+ uint64_t EntryOffset = AS.getU32(&EntryOffsetOffset);
EntryOffset += EntriesBase;
return {Section.StringSection, Index, StringOffset, EntryOffset};
}
@@ -638,13 +638,13 @@ DWARFDebugNames::NameIndex::getNameTableEntry(uint32_t Index) const {
uint32_t
DWARFDebugNames::NameIndex::getBucketArrayEntry(uint32_t Bucket) const {
assert(Bucket < Hdr.BucketCount);
- uint32_t BucketOffset = BucketsBase + 4 * Bucket;
+ uint64_t BucketOffset = BucketsBase + 4 * Bucket;
return Section.AccelSection.getU32(&BucketOffset);
}
uint32_t DWARFDebugNames::NameIndex::getHashArrayEntry(uint32_t Index) const {
assert(0 < Index && Index <= Hdr.NameCount);
- uint32_t HashOffset = HashesBase + 4 * (Index - 1);
+ uint64_t HashOffset = HashesBase + 4 * (Index - 1);
return Section.AccelSection.getU32(&HashOffset);
}
@@ -653,8 +653,8 @@ uint32_t DWARFDebugNames::NameIndex::getHashArrayEntry(uint32_t Index) const {
// it's not possible to recover this entry list (but the other lists may still
// parse OK).
bool DWARFDebugNames::NameIndex::dumpEntry(ScopedPrinter &W,
- uint32_t *Offset) const {
- uint32_t EntryId = *Offset;
+ uint64_t *Offset) const {
+ uint64_t EntryId = *Offset;
auto EntryOr = getEntry(Offset);
if (!EntryOr) {
handleAllErrors(EntryOr.takeError(), [](const SentinelError &) {},
@@ -674,10 +674,10 @@ void DWARFDebugNames::NameIndex::dumpName(ScopedPrinter &W,
if (Hash)
W.printHex("Hash", *Hash);
- W.startLine() << format("String: 0x%08x", NTE.getStringOffset());
+ W.startLine() << format("String: 0x%08" PRIx64, NTE.getStringOffset());
W.getOStream() << " \"" << NTE.getString() << "\"\n";
- uint32_t EntryOffset = NTE.getEntryOffset();
+ uint64_t EntryOffset = NTE.getEntryOffset();
while (dumpEntry(W, &EntryOffset))
/*empty*/;
}
@@ -685,7 +685,7 @@ void DWARFDebugNames::NameIndex::dumpName(ScopedPrinter &W,
void DWARFDebugNames::NameIndex::dumpCUs(ScopedPrinter &W) const {
ListScope CUScope(W, "Compilation Unit offsets");
for (uint32_t CU = 0; CU < Hdr.CompUnitCount; ++CU)
- W.startLine() << format("CU[%u]: 0x%08x\n", CU, getCUOffset(CU));
+ W.startLine() << format("CU[%u]: 0x%08" PRIx64 "\n", CU, getCUOffset(CU));
}
void DWARFDebugNames::NameIndex::dumpLocalTUs(ScopedPrinter &W) const {
@@ -694,7 +694,8 @@ void DWARFDebugNames::NameIndex::dumpLocalTUs(ScopedPrinter &W) const {
ListScope TUScope(W, "Local Type Unit offsets");
for (uint32_t TU = 0; TU < Hdr.LocalTypeUnitCount; ++TU)
- W.startLine() << format("LocalTU[%u]: 0x%08x\n", TU, getLocalTUOffset(TU));
+ W.startLine() << format("LocalTU[%u]: 0x%08" PRIx64 "\n", TU,
+ getLocalTUOffset(TU));
}
void DWARFDebugNames::NameIndex::dumpForeignTUs(ScopedPrinter &W) const {
@@ -756,7 +757,7 @@ LLVM_DUMP_METHOD void DWARFDebugNames::NameIndex::dump(ScopedPrinter &W) const {
}
Error DWARFDebugNames::extract() {
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
while (AccelSection.isValidOffset(Offset)) {
NameIndex Next(*this, Offset);
if (Error E = Next.extract())
@@ -778,7 +779,7 @@ LLVM_DUMP_METHOD void DWARFDebugNames::dump(raw_ostream &OS) const {
NI.dump(W);
}
-Optional<uint32_t>
+Optional<uint64_t>
DWARFDebugNames::ValueIterator::findEntryOffsetInCurrentIndex() {
const Header &Hdr = CurrentIndex->Hdr;
if (Hdr.BucketCount == 0) {
@@ -822,7 +823,7 @@ bool DWARFDebugNames::ValueIterator::getEntryAtCurrentOffset() {
}
bool DWARFDebugNames::ValueIterator::findInCurrentIndex() {
- Optional<uint32_t> Offset = findEntryOffsetInCurrentIndex();
+ Optional<uint64_t> Offset = findEntryOffsetInCurrentIndex();
if (!Offset)
return false;
DataOffset = *Offset;
@@ -877,7 +878,7 @@ DWARFDebugNames::equal_range(StringRef Key) const {
}
const DWARFDebugNames::NameIndex *
-DWARFDebugNames::getCUNameIndex(uint32_t CUOffset) {
+DWARFDebugNames::getCUNameIndex(uint64_t CUOffset) {
if (CUToNameIndex.size() == 0 && NameIndices.size() > 0) {
for (const auto &NI : *this) {
for (uint32_t CU = 0; CU < NI.getCUCount(); ++CU)
diff --git a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
index 74cce42466dd..f59e49268288 100644
--- a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
@@ -15,16 +15,18 @@
using namespace llvm;
void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
- OS << format("0x%08x", getOffset()) << ": Compile Unit:"
- << " length = " << format("0x%08x", getLength())
+ OS << format("0x%08" PRIx64, getOffset()) << ": Compile Unit:"
+ << " length = " << format("0x%08" PRIx64, getLength())
<< " version = " << format("0x%04x", getVersion());
if (getVersion() >= 5)
OS << " unit_type = " << dwarf::UnitTypeString(getUnitType());
- OS << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset())
+ OS << " abbr_offset = "
+ << format("0x%04" PRIx64, getAbbreviations()->getOffset())
<< " addr_size = " << format("0x%02x", getAddressByteSize());
if (getVersion() >= 5 && getUnitType() != dwarf::DW_UT_compile)
OS << " DWO_id = " << format("0x%016" PRIx64, *getDWOId());
- OS << " (next unit at " << format("0x%08x", getNextUnitOffset()) << ")\n";
+ OS << " (next unit at " << format("0x%08" PRIx64, getNextUnitOffset())
+ << ")\n";
if (DWARFDie CUDie = getUnitDIE(false))
CUDie.dump(OS, 0, DumpOpts);
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 5ede9bf59619..c06d85d50609 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -138,7 +138,7 @@ static void dumpDWARFv5StringOffsetsSection(
DWARFDataExtractor StrOffsetExt(Obj, StringOffsetsSection, LittleEndian, 0);
DataExtractor StrData(StringSection, LittleEndian, 0);
uint64_t SectionSize = StringOffsetsSection.Data.size();
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
for (auto &Contribution : Contributions) {
// Report an ill-formed contribution.
if (!Contribution) {
@@ -166,10 +166,10 @@ static void dumpDWARFv5StringOffsetsSection(
}
// Report a gap in the table.
if (Offset < ContributionHeader) {
- OS << format("0x%8.8x: Gap, length = ", Offset);
+ OS << format("0x%8.8" PRIx64 ": Gap, length = ", Offset);
OS << (ContributionHeader - Offset) << "\n";
}
- OS << format("0x%8.8x: ", (uint32_t)ContributionHeader);
+ OS << format("0x%8.8" PRIx64 ": ", ContributionHeader);
// In DWARF v5 the contribution size in the descriptor does not equal
// the originally encoded length (it does not contain the length of the
// version field and the padding, a total of 4 bytes). Add them back in
@@ -181,26 +181,19 @@ static void dumpDWARFv5StringOffsetsSection(
Offset = Contribution->Base;
unsigned EntrySize = Contribution->getDwarfOffsetByteSize();
while (Offset - Contribution->Base < Contribution->Size) {
- OS << format("0x%8.8x: ", Offset);
- // FIXME: We can only extract strings if the offset fits in 32 bits.
+ OS << format("0x%8.8" PRIx64 ": ", Offset);
uint64_t StringOffset =
StrOffsetExt.getRelocatedValue(EntrySize, &Offset);
- // Extract the string if we can and display it. Otherwise just report
- // the offset.
- if (StringOffset <= std::numeric_limits<uint32_t>::max()) {
- uint32_t StringOffset32 = (uint32_t)StringOffset;
- OS << format("%8.8x ", StringOffset32);
- const char *S = StrData.getCStr(&StringOffset32);
- if (S)
- OS << format("\"%s\"", S);
- } else
- OS << format("%16.16" PRIx64 " ", StringOffset);
+ OS << format("%8.8" PRIx64 " ", StringOffset);
+ const char *S = StrData.getCStr(&StringOffset);
+ if (S)
+ OS << format("\"%s\"", S);
OS << "\n";
}
}
// Report a gap at the end of the table.
if (Offset < SectionSize) {
- OS << format("0x%8.8x: Gap, length = ", Offset);
+ OS << format("0x%8.8" PRIx64 ": Gap, length = ", Offset);
OS << (SectionSize - Offset) << "\n";
}
}
@@ -225,7 +218,7 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName,
StringSection, Units, LittleEndian);
else {
DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
- uint32_t offset = 0;
+ uint64_t offset = 0;
uint64_t size = StringOffsetsSection.Data.size();
// Ensure that size is a multiple of the size of an entry.
if (size & ((uint64_t)(sizeof(uint32_t) - 1))) {
@@ -235,9 +228,9 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName,
}
DataExtractor StrData(StringSection, LittleEndian, 0);
while (offset < size) {
- OS << format("0x%8.8x: ", offset);
- uint32_t StringOffset = strOffsetExt.getU32(&offset);
- OS << format("%8.8x ", StringOffset);
+ OS << format("0x%8.8" PRIx64 ": ", offset);
+ uint64_t StringOffset = strOffsetExt.getU32(&offset);
+ OS << format("%8.8" PRIx64 " ", StringOffset);
const char *S = StrData.getCStr(&StringOffset);
if (S)
OS << format("\"%s\"", S);
@@ -250,10 +243,10 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName,
static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData,
DIDumpOptions DumpOpts, uint16_t Version,
uint8_t AddrSize) {
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
while (AddrData.isValidOffset(Offset)) {
DWARFDebugAddrTable AddrTable;
- uint32_t TableOffset = Offset;
+ uint64_t TableOffset = Offset;
if (Error Err = AddrTable.extract(AddrData, &Offset, Version, AddrSize,
DWARFContext::dumpWarning)) {
WithColor::error() << toString(std::move(Err)) << '\n';
@@ -261,8 +254,7 @@ static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData,
// could be read. If it couldn't, stop reading the section.
if (!AddrTable.hasValidLength())
break;
- uint64_t Length = AddrTable.getLength();
- Offset = TableOffset + Length;
+ Offset = TableOffset + AddrTable.getLength();
} else {
AddrTable.dump(OS, DumpOpts);
}
@@ -275,10 +267,10 @@ static void dumpRnglistsSection(
llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
LookupPooledAddress,
DIDumpOptions DumpOpts) {
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
while (rnglistData.isValidOffset(Offset)) {
llvm::DWARFDebugRnglistTable Rnglists;
- uint32_t TableOffset = Offset;
+ uint64_t TableOffset = Offset;
if (Error Err = Rnglists.extract(rnglistData, &Offset)) {
WithColor::error() << toString(std::move(Err)) << '\n';
uint64_t Length = Rnglists.length();
@@ -297,21 +289,25 @@ static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts,
DWARFDataExtractor Data,
const MCRegisterInfo *MRI,
Optional<uint64_t> DumpOffset) {
- uint32_t Offset = 0;
- DWARFDebugLoclists Loclists;
+ uint64_t Offset = 0;
- DWARFListTableHeader Header(".debug_loclists", "locations");
- if (Error E = Header.extract(Data, &Offset)) {
- WithColor::error() << toString(std::move(E)) << '\n';
- return;
- }
+ while (Data.isValidOffset(Offset)) {
+ DWARFListTableHeader Header(".debug_loclists", "locations");
+ if (Error E = Header.extract(Data, &Offset)) {
+ WithColor::error() << toString(std::move(E)) << '\n';
+ return;
+ }
- Header.dump(OS, DumpOpts);
- DataExtractor LocData(Data.getData().drop_front(Offset),
- Data.isLittleEndian(), Header.getAddrSize());
+ Header.dump(OS, DumpOpts);
+ DataExtractor LocData(Data.getData(),
+ Data.isLittleEndian(), Header.getAddrSize());
- Loclists.parse(LocData, Header.getVersion());
- Loclists.dump(OS, 0, MRI, DumpOffset);
+ DWARFDebugLoclists Loclists;
+ uint64_t EndOffset = Header.length() + Header.getHeaderOffset();
+ Loclists.parse(LocData, Offset, EndOffset, Header.getVersion());
+ Loclists.dump(OS, 0, MRI, DumpOpts, DumpOffset);
+ Offset = EndOffset;
+ }
}
void DWARFContext::dump(
@@ -386,7 +382,7 @@ void DWARFContext::dump(
if (const auto *Off = shouldDump(Explicit, ".debug_loc", DIDT_ID_DebugLoc,
DObj->getLocSection().Data)) {
- getDebugLoc()->dump(OS, getRegisterInfo(), *Off);
+ getDebugLoc()->dump(OS, getRegisterInfo(), DumpOpts, *Off);
}
if (const auto *Off =
shouldDump(Explicit, ".debug_loclists", DIDT_ID_DebugLoclists,
@@ -398,15 +394,15 @@ void DWARFContext::dump(
if (const auto *Off =
shouldDump(ExplicitDWO, ".debug_loc.dwo", DIDT_ID_DebugLoc,
DObj->getLocDWOSection().Data)) {
- getDebugLocDWO()->dump(OS, 0, getRegisterInfo(), *Off);
+ getDebugLocDWO()->dump(OS, 0, getRegisterInfo(), DumpOpts, *Off);
}
if (const auto *Off = shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame,
- DObj->getDebugFrameSection()))
+ DObj->getFrameSection().Data))
getDebugFrame()->dump(OS, getRegisterInfo(), *Off);
if (const auto *Off = shouldDump(Explicit, ".eh_frame", DIDT_ID_DebugFrame,
- DObj->getEHFrameSection()))
+ DObj->getEHFrameSection().Data))
getEHFrame()->dump(OS, getRegisterInfo(), *Off);
if (DumpType & DIDT_DebugMacro) {
@@ -417,9 +413,9 @@ void DWARFContext::dump(
}
if (shouldDump(Explicit, ".debug_aranges", DIDT_ID_DebugAranges,
- DObj->getARangeSection())) {
- uint32_t offset = 0;
- DataExtractor arangesData(DObj->getARangeSection(), isLittleEndian(), 0);
+ DObj->getArangesSection())) {
+ uint64_t offset = 0;
+ DataExtractor arangesData(DObj->getArangesSection(), isLittleEndian(), 0);
DWARFDebugArangeSet set;
while (set.extract(arangesData, &offset))
set.dump(OS);
@@ -433,7 +429,8 @@ void DWARFContext::dump(
Parser.skip(dumpWarning);
continue;
}
- OS << "debug_line[" << format("0x%8.8x", Parser.getOffset()) << "]\n";
+ OS << "debug_line[" << format("0x%8.8" PRIx64, Parser.getOffset())
+ << "]\n";
if (DumpOpts.Verbose) {
Parser.parseNext(dumpWarning, dumpWarning, &OS);
} else {
@@ -474,32 +471,32 @@ void DWARFContext::dump(
}
if (shouldDump(Explicit, ".debug_str", DIDT_ID_DebugStr,
- DObj->getStringSection())) {
- DataExtractor strData(DObj->getStringSection(), isLittleEndian(), 0);
- uint32_t offset = 0;
- uint32_t strOffset = 0;
+ DObj->getStrSection())) {
+ DataExtractor strData(DObj->getStrSection(), isLittleEndian(), 0);
+ uint64_t offset = 0;
+ uint64_t strOffset = 0;
while (const char *s = strData.getCStr(&offset)) {
- OS << format("0x%8.8x: \"%s\"\n", strOffset, s);
+ OS << format("0x%8.8" PRIx64 ": \"%s\"\n", strOffset, s);
strOffset = offset;
}
}
if (shouldDump(ExplicitDWO, ".debug_str.dwo", DIDT_ID_DebugStr,
- DObj->getStringDWOSection())) {
- DataExtractor strDWOData(DObj->getStringDWOSection(), isLittleEndian(), 0);
- uint32_t offset = 0;
- uint32_t strDWOOffset = 0;
+ DObj->getStrDWOSection())) {
+ DataExtractor strDWOData(DObj->getStrDWOSection(), isLittleEndian(), 0);
+ uint64_t offset = 0;
+ uint64_t strDWOOffset = 0;
while (const char *s = strDWOData.getCStr(&offset)) {
- OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s);
+ OS << format("0x%8.8" PRIx64 ": \"%s\"\n", strDWOOffset, s);
strDWOOffset = offset;
}
}
if (shouldDump(Explicit, ".debug_line_str", DIDT_ID_DebugLineStr,
- DObj->getLineStringSection())) {
- DataExtractor strData(DObj->getLineStringSection(), isLittleEndian(), 0);
- uint32_t offset = 0;
- uint32_t strOffset = 0;
+ DObj->getLineStrSection())) {
+ DataExtractor strData(DObj->getLineStrSection(), isLittleEndian(), 0);
+ uint64_t offset = 0;
+ uint64_t strOffset = 0;
while (const char *s = strData.getCStr(&offset)) {
- OS << format("0x%8.8x: \"", strOffset);
+ OS << format("0x%8.8" PRIx64 ": \"", strOffset);
OS.write_escaped(s);
OS << "\"\n";
strOffset = offset;
@@ -514,11 +511,11 @@ void DWARFContext::dump(
}
if (shouldDump(Explicit, ".debug_ranges", DIDT_ID_DebugRanges,
- DObj->getRangeSection().Data)) {
+ DObj->getRangesSection().Data)) {
uint8_t savedAddressByteSize = getCUAddrSize();
- DWARFDataExtractor rangesData(*DObj, DObj->getRangeSection(),
+ DWARFDataExtractor rangesData(*DObj, DObj->getRangesSection(),
isLittleEndian(), savedAddressByteSize);
- uint32_t offset = 0;
+ uint64_t offset = 0;
DWARFDebugRangeList rangeList;
while (rangesData.isValidOffset(offset)) {
if (Error E = rangeList.extract(rangesData, &offset)) {
@@ -552,38 +549,38 @@ void DWARFContext::dump(
}
if (shouldDump(Explicit, ".debug_pubnames", DIDT_ID_DebugPubnames,
- DObj->getPubNamesSection().Data))
- DWARFDebugPubTable(*DObj, DObj->getPubNamesSection(), isLittleEndian(), false)
+ DObj->getPubnamesSection().Data))
+ DWARFDebugPubTable(*DObj, DObj->getPubnamesSection(), isLittleEndian(), false)
.dump(OS);
if (shouldDump(Explicit, ".debug_pubtypes", DIDT_ID_DebugPubtypes,
- DObj->getPubTypesSection().Data))
- DWARFDebugPubTable(*DObj, DObj->getPubTypesSection(), isLittleEndian(), false)
+ DObj->getPubtypesSection().Data))
+ DWARFDebugPubTable(*DObj, DObj->getPubtypesSection(), isLittleEndian(), false)
.dump(OS);
if (shouldDump(Explicit, ".debug_gnu_pubnames", DIDT_ID_DebugGnuPubnames,
- DObj->getGnuPubNamesSection().Data))
- DWARFDebugPubTable(*DObj, DObj->getGnuPubNamesSection(), isLittleEndian(),
+ DObj->getGnuPubnamesSection().Data))
+ DWARFDebugPubTable(*DObj, DObj->getGnuPubnamesSection(), isLittleEndian(),
true /* GnuStyle */)
.dump(OS);
if (shouldDump(Explicit, ".debug_gnu_pubtypes", DIDT_ID_DebugGnuPubtypes,
- DObj->getGnuPubTypesSection().Data))
- DWARFDebugPubTable(*DObj, DObj->getGnuPubTypesSection(), isLittleEndian(),
+ DObj->getGnuPubtypesSection().Data))
+ DWARFDebugPubTable(*DObj, DObj->getGnuPubtypesSection(), isLittleEndian(),
true /* GnuStyle */)
.dump(OS);
if (shouldDump(Explicit, ".debug_str_offsets", DIDT_ID_DebugStrOffsets,
- DObj->getStringOffsetSection().Data))
+ DObj->getStrOffsetsSection().Data))
dumpStringOffsetsSection(OS, "debug_str_offsets", *DObj,
- DObj->getStringOffsetSection(),
- DObj->getStringSection(), normal_units(),
+ DObj->getStrOffsetsSection(),
+ DObj->getStrSection(), normal_units(),
isLittleEndian(), getMaxVersion());
if (shouldDump(ExplicitDWO, ".debug_str_offsets.dwo", DIDT_ID_DebugStrOffsets,
- DObj->getStringOffsetDWOSection().Data))
+ DObj->getStrOffsetsDWOSection().Data))
dumpStringOffsetsSection(OS, "debug_str_offsets.dwo", *DObj,
- DObj->getStringOffsetDWOSection(),
- DObj->getStringDWOSection(), dwo_units(),
+ DObj->getStrOffsetsDWOSection(),
+ DObj->getStrDWOSection(), dwo_units(),
isLittleEndian(), getMaxDWOVersion());
if (shouldDump(Explicit, ".gdb_index", DIDT_ID_GdbIndex,
@@ -607,7 +604,7 @@ void DWARFContext::dump(
DObj->getAppleObjCSection().Data))
getAppleObjC().dump(OS);
if (shouldDump(Explicit, ".debug_names", DIDT_ID_DebugNames,
- DObj->getDebugNamesSection().Data))
+ DObj->getNamesSection().Data))
getDebugNames().dump(OS);
}
@@ -641,7 +638,7 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
return nullptr;
}
-DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) {
+DWARFDie DWARFContext::getDIEForOffset(uint64_t Offset) {
parseNormalUnits();
if (auto *CU = NormalUnits.getUnitForOffset(Offset))
return CU->getDIEForOffset(Offset);
@@ -667,7 +664,7 @@ const DWARFUnitIndex &DWARFContext::getCUIndex() {
DataExtractor CUIndexData(DObj->getCUIndexSection(), isLittleEndian(), 0);
- CUIndex = llvm::make_unique<DWARFUnitIndex>(DW_SECT_INFO);
+ CUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_INFO);
CUIndex->parse(CUIndexData);
return *CUIndex;
}
@@ -678,7 +675,7 @@ const DWARFUnitIndex &DWARFContext::getTUIndex() {
DataExtractor TUIndexData(DObj->getTUIndexSection(), isLittleEndian(), 0);
- TUIndex = llvm::make_unique<DWARFUnitIndex>(DW_SECT_TYPES);
+ TUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_TYPES);
TUIndex->parse(TUIndexData);
return *TUIndex;
}
@@ -688,7 +685,7 @@ DWARFGdbIndex &DWARFContext::getGdbIndex() {
return *GdbIndex;
DataExtractor GdbIndexData(DObj->getGdbIndexSection(), true /*LE*/, 0);
- GdbIndex = llvm::make_unique<DWARFGdbIndex>();
+ GdbIndex = std::make_unique<DWARFGdbIndex>();
GdbIndex->parse(GdbIndexData);
return *GdbIndex;
}
@@ -740,7 +737,7 @@ const DWARFDebugLoclists *DWARFContext::getDebugLocDWO() {
// Use version 4. DWO does not support the DWARF v5 .debug_loclists yet and
// that means we are parsing the new style .debug_loc (pre-standatized version
// of the .debug_loclists).
- LocDWO->parse(LocData, 4 /* Version */);
+ LocDWO->parse(LocData, 0, LocData.getData().size(), 4 /* Version */);
return LocDWO.get();
}
@@ -766,7 +763,7 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() {
// provides this information). This problem is fixed in DWARFv4
// See this dwarf-discuss discussion for more details:
// http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html
- DWARFDataExtractor debugFrameData(DObj->getDebugFrameSection(),
+ DWARFDataExtractor debugFrameData(*DObj, DObj->getFrameSection(),
isLittleEndian(), DObj->getAddressSize());
DebugFrame.reset(new DWARFDebugFrame(getArch(), false /* IsEH */));
DebugFrame->parse(debugFrameData);
@@ -777,8 +774,8 @@ const DWARFDebugFrame *DWARFContext::getEHFrame() {
if (EHFrame)
return EHFrame.get();
- DWARFDataExtractor debugFrameData(DObj->getEHFrameSection(), isLittleEndian(),
- DObj->getAddressSize());
+ DWARFDataExtractor debugFrameData(*DObj, DObj->getEHFrameSection(),
+ isLittleEndian(), DObj->getAddressSize());
DebugFrame.reset(new DWARFDebugFrame(getArch(), true /* IsEH */));
DebugFrame->parse(debugFrameData);
return DebugFrame.get();
@@ -809,29 +806,29 @@ static T &getAccelTable(std::unique_ptr<T> &Cache, const DWARFObject &Obj,
}
const DWARFDebugNames &DWARFContext::getDebugNames() {
- return getAccelTable(Names, *DObj, DObj->getDebugNamesSection(),
- DObj->getStringSection(), isLittleEndian());
+ return getAccelTable(Names, *DObj, DObj->getNamesSection(),
+ DObj->getStrSection(), isLittleEndian());
}
const AppleAcceleratorTable &DWARFContext::getAppleNames() {
return getAccelTable(AppleNames, *DObj, DObj->getAppleNamesSection(),
- DObj->getStringSection(), isLittleEndian());
+ DObj->getStrSection(), isLittleEndian());
}
const AppleAcceleratorTable &DWARFContext::getAppleTypes() {
return getAccelTable(AppleTypes, *DObj, DObj->getAppleTypesSection(),
- DObj->getStringSection(), isLittleEndian());
+ DObj->getStrSection(), isLittleEndian());
}
const AppleAcceleratorTable &DWARFContext::getAppleNamespaces() {
return getAccelTable(AppleNamespaces, *DObj,
DObj->getAppleNamespacesSection(),
- DObj->getStringSection(), isLittleEndian());
+ DObj->getStrSection(), isLittleEndian());
}
const AppleAcceleratorTable &DWARFContext::getAppleObjC() {
return getAccelTable(AppleObjC, *DObj, DObj->getAppleObjCSection(),
- DObj->getStringSection(), isLittleEndian());
+ DObj->getStrSection(), isLittleEndian());
}
const DWARFDebugLine::LineTable *
@@ -858,7 +855,7 @@ Expected<const DWARFDebugLine::LineTable *> DWARFContext::getLineTableForUnit(
if (!Offset)
return nullptr; // No line table for this compile unit.
- uint32_t stmtOffset = *Offset + U->getLineTableOffset();
+ uint64_t stmtOffset = *Offset + U->getLineTableOffset();
// See if the line table is cached.
if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset))
return lt;
@@ -898,7 +895,7 @@ void DWARFContext::parseDWOUnits(bool Lazy) {
});
}
-DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) {
+DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint64_t Offset) {
parseNormalUnits();
return dyn_cast_or_null<DWARFCompileUnit>(
NormalUnits.getUnitForOffset(Offset));
@@ -906,7 +903,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) {
DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
// First, get the offset of the compile unit.
- uint32_t CUOffset = getDebugAranges()->findAddress(Address);
+ uint64_t CUOffset = getDebugAranges()->findAddress(Address);
// Retrieve the compile unit.
return getCompileUnitForOffset(CUOffset);
}
@@ -1118,8 +1115,8 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange(
if (!CU)
return Lines;
- std::string FunctionName = "<invalid>";
uint32_t StartLine = 0;
+ std::string FunctionName(DILineInfo::BadString);
getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind,
FunctionName, StartLine);
@@ -1379,46 +1376,50 @@ class DWARFObjInMemory final : public DWARFObject {
InfoSectionMap TypesDWOSections;
DWARFSectionMap LocSection;
- DWARFSectionMap LocListsSection;
+ DWARFSectionMap LoclistsSection;
DWARFSectionMap LineSection;
- DWARFSectionMap RangeSection;
+ DWARFSectionMap RangesSection;
DWARFSectionMap RnglistsSection;
- DWARFSectionMap StringOffsetSection;
+ DWARFSectionMap StrOffsetsSection;
DWARFSectionMap LineDWOSection;
+ DWARFSectionMap FrameSection;
+ DWARFSectionMap EHFrameSection;
DWARFSectionMap LocDWOSection;
- DWARFSectionMap StringOffsetDWOSection;
- DWARFSectionMap RangeDWOSection;
+ DWARFSectionMap StrOffsetsDWOSection;
+ DWARFSectionMap RangesDWOSection;
DWARFSectionMap RnglistsDWOSection;
DWARFSectionMap AddrSection;
DWARFSectionMap AppleNamesSection;
DWARFSectionMap AppleTypesSection;
DWARFSectionMap AppleNamespacesSection;
DWARFSectionMap AppleObjCSection;
- DWARFSectionMap DebugNamesSection;
- DWARFSectionMap PubNamesSection;
- DWARFSectionMap PubTypesSection;
- DWARFSectionMap GnuPubNamesSection;
- DWARFSectionMap GnuPubTypesSection;
+ DWARFSectionMap NamesSection;
+ DWARFSectionMap PubnamesSection;
+ DWARFSectionMap PubtypesSection;
+ DWARFSectionMap GnuPubnamesSection;
+ DWARFSectionMap GnuPubtypesSection;
DWARFSectionMap *mapNameToDWARFSection(StringRef Name) {
return StringSwitch<DWARFSectionMap *>(Name)
.Case("debug_loc", &LocSection)
- .Case("debug_loclists", &LocListsSection)
+ .Case("debug_loclists", &LoclistsSection)
.Case("debug_line", &LineSection)
- .Case("debug_str_offsets", &StringOffsetSection)
- .Case("debug_ranges", &RangeSection)
+ .Case("debug_frame", &FrameSection)
+ .Case("eh_frame", &EHFrameSection)
+ .Case("debug_str_offsets", &StrOffsetsSection)
+ .Case("debug_ranges", &RangesSection)
.Case("debug_rnglists", &RnglistsSection)
.Case("debug_loc.dwo", &LocDWOSection)
.Case("debug_line.dwo", &LineDWOSection)
- .Case("debug_names", &DebugNamesSection)
+ .Case("debug_names", &NamesSection)
.Case("debug_rnglists.dwo", &RnglistsDWOSection)
- .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
+ .Case("debug_str_offsets.dwo", &StrOffsetsDWOSection)
.Case("debug_addr", &AddrSection)
.Case("apple_names", &AppleNamesSection)
- .Case("debug_pubnames", &PubNamesSection)
- .Case("debug_pubtypes", &PubTypesSection)
- .Case("debug_gnu_pubnames", &GnuPubNamesSection)
- .Case("debug_gnu_pubtypes", &GnuPubTypesSection)
+ .Case("debug_pubnames", &PubnamesSection)
+ .Case("debug_pubtypes", &PubtypesSection)
+ .Case("debug_gnu_pubnames", &GnuPubnamesSection)
+ .Case("debug_gnu_pubtypes", &GnuPubtypesSection)
.Case("apple_types", &AppleTypesSection)
.Case("apple_namespaces", &AppleNamespacesSection)
.Case("apple_namespac", &AppleNamespacesSection)
@@ -1427,17 +1428,15 @@ class DWARFObjInMemory final : public DWARFObject {
}
StringRef AbbrevSection;
- StringRef ARangeSection;
- StringRef DebugFrameSection;
- StringRef EHFrameSection;
- StringRef StringSection;
+ StringRef ArangesSection;
+ StringRef StrSection;
StringRef MacinfoSection;
StringRef AbbrevDWOSection;
- StringRef StringDWOSection;
+ StringRef StrDWOSection;
StringRef CUIndexSection;
StringRef GdbIndexSection;
StringRef TUIndexSection;
- StringRef LineStringSection;
+ StringRef LineStrSection;
// A deque holding section data whose iterators are not invalidated when
// new decompressed sections are inserted at the end.
@@ -1448,17 +1447,15 @@ class DWARFObjInMemory final : public DWARFObject {
return &Sec->Data;
return StringSwitch<StringRef *>(Name)
.Case("debug_abbrev", &AbbrevSection)
- .Case("debug_aranges", &ARangeSection)
- .Case("debug_frame", &DebugFrameSection)
- .Case("eh_frame", &EHFrameSection)
- .Case("debug_str", &StringSection)
+ .Case("debug_aranges", &ArangesSection)
+ .Case("debug_str", &StrSection)
.Case("debug_macinfo", &MacinfoSection)
.Case("debug_abbrev.dwo", &AbbrevDWOSection)
- .Case("debug_str.dwo", &StringDWOSection)
+ .Case("debug_str.dwo", &StrDWOSection)
.Case("debug_cu_index", &CUIndexSection)
.Case("debug_tu_index", &TUIndexSection)
.Case("gdb_index", &GdbIndexSection)
- .Case("debug_line_str", &LineStringSection)
+ .Case("debug_line_str", &LineStrSection)
// Any more debug info sections go here.
.Default(nullptr);
}
@@ -1513,7 +1510,11 @@ public:
StringMap<unsigned> SectionAmountMap;
for (const SectionRef &Section : Obj.sections()) {
StringRef Name;
- Section.getName(Name);
+ if (auto NameOrErr = Section.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
++SectionAmountMap[Name];
SectionNames.push_back({ Name, true });
@@ -1526,10 +1527,19 @@ public:
continue;
StringRef Data;
- section_iterator RelocatedSection = Section.getRelocatedSection();
+ Expected<section_iterator> SecOrErr = Section.getRelocatedSection();
+ if (!SecOrErr) {
+ ErrorPolicy EP = HandleError(createError(
+ "failed to get relocated section: ", SecOrErr.takeError()));
+ if (EP == ErrorPolicy::Halt)
+ return;
+ continue;
+ }
+
// Try to obtain an already relocated version of this section.
// Else use the unrelocated section from the object file. We'll have to
// apply relocations ourselves later.
+ section_iterator RelocatedSection = *SecOrErr;
if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data)) {
Expected<StringRef> E = Section.getContents();
if (E)
@@ -1560,7 +1570,7 @@ public:
*SectionData = Data;
if (Name == "debug_ranges") {
// FIXME: Use the other dwo range section when we emit it.
- RangeDWOSection.Data = Data;
+ RangesDWOSection.Data = Data;
}
} else if (Name == "debug_info") {
// Find debug_info and debug_types data by section rather than name as
@@ -1578,12 +1588,15 @@ public:
continue;
StringRef RelSecName;
- StringRef RelSecData;
- RelocatedSection->getName(RelSecName);
+ if (auto NameOrErr = RelocatedSection->getName())
+ RelSecName = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
// If the section we're relocating was relocated already by the JIT,
// then we used the relocated version above, so we do not need to process
// relocations for it now.
+ StringRef RelSecData;
if (L && L->getLoadedSectionContents(*RelocatedSection, RelSecData))
continue;
@@ -1710,12 +1723,12 @@ public:
const DWARFSection &getLocDWOSection() const override {
return LocDWOSection;
}
- StringRef getStringDWOSection() const override { return StringDWOSection; }
- const DWARFSection &getStringOffsetDWOSection() const override {
- return StringOffsetDWOSection;
+ StringRef getStrDWOSection() const override { return StrDWOSection; }
+ const DWARFSection &getStrOffsetsDWOSection() const override {
+ return StrOffsetsDWOSection;
}
- const DWARFSection &getRangeDWOSection() const override {
- return RangeDWOSection;
+ const DWARFSection &getRangesDWOSection() const override {
+ return RangesDWOSection;
}
const DWARFSection &getRnglistsDWOSection() const override {
return RnglistsDWOSection;
@@ -1726,10 +1739,10 @@ public:
StringRef getTUIndexSection() const override { return TUIndexSection; }
// DWARF v5
- const DWARFSection &getStringOffsetSection() const override {
- return StringOffsetSection;
+ const DWARFSection &getStrOffsetsSection() const override {
+ return StrOffsetsSection;
}
- StringRef getLineStringSection() const override { return LineStringSection; }
+ StringRef getLineStrSection() const override { return LineStrSection; }
// Sections for DWARF5 split dwarf proposal.
void forEachInfoDWOSections(
@@ -1745,24 +1758,28 @@ public:
StringRef getAbbrevSection() const override { return AbbrevSection; }
const DWARFSection &getLocSection() const override { return LocSection; }
- const DWARFSection &getLoclistsSection() const override { return LocListsSection; }
- StringRef getARangeSection() const override { return ARangeSection; }
- StringRef getDebugFrameSection() const override { return DebugFrameSection; }
- StringRef getEHFrameSection() const override { return EHFrameSection; }
+ const DWARFSection &getLoclistsSection() const override { return LoclistsSection; }
+ StringRef getArangesSection() const override { return ArangesSection; }
+ const DWARFSection &getFrameSection() const override {
+ return FrameSection;
+ }
+ const DWARFSection &getEHFrameSection() const override {
+ return EHFrameSection;
+ }
const DWARFSection &getLineSection() const override { return LineSection; }
- StringRef getStringSection() const override { return StringSection; }
- const DWARFSection &getRangeSection() const override { return RangeSection; }
+ StringRef getStrSection() const override { return StrSection; }
+ const DWARFSection &getRangesSection() const override { return RangesSection; }
const DWARFSection &getRnglistsSection() const override {
return RnglistsSection;
}
StringRef getMacinfoSection() const override { return MacinfoSection; }
- const DWARFSection &getPubNamesSection() const override { return PubNamesSection; }
- const DWARFSection &getPubTypesSection() const override { return PubTypesSection; }
- const DWARFSection &getGnuPubNamesSection() const override {
- return GnuPubNamesSection;
+ const DWARFSection &getPubnamesSection() const override { return PubnamesSection; }
+ const DWARFSection &getPubtypesSection() const override { return PubtypesSection; }
+ const DWARFSection &getGnuPubnamesSection() const override {
+ return GnuPubnamesSection;
}
- const DWARFSection &getGnuPubTypesSection() const override {
- return GnuPubTypesSection;
+ const DWARFSection &getGnuPubtypesSection() const override {
+ return GnuPubtypesSection;
}
const DWARFSection &getAppleNamesSection() const override {
return AppleNamesSection;
@@ -1776,8 +1793,8 @@ public:
const DWARFSection &getAppleObjCSection() const override {
return AppleObjCSection;
}
- const DWARFSection &getDebugNamesSection() const override {
- return DebugNamesSection;
+ const DWARFSection &getNamesSection() const override {
+ return NamesSection;
}
StringRef getFileName() const override { return FileName; }
@@ -1799,16 +1816,16 @@ std::unique_ptr<DWARFContext>
DWARFContext::create(const object::ObjectFile &Obj, const LoadedObjectInfo *L,
function_ref<ErrorPolicy(Error)> HandleError,
std::string DWPName) {
- auto DObj = llvm::make_unique<DWARFObjInMemory>(Obj, L, HandleError);
- return llvm::make_unique<DWARFContext>(std::move(DObj), std::move(DWPName));
+ auto DObj = std::make_unique<DWARFObjInMemory>(Obj, L, HandleError);
+ return std::make_unique<DWARFContext>(std::move(DObj), std::move(DWPName));
}
std::unique_ptr<DWARFContext>
DWARFContext::create(const StringMap<std::unique_ptr<MemoryBuffer>> &Sections,
uint8_t AddrSize, bool isLittleEndian) {
auto DObj =
- llvm::make_unique<DWARFObjInMemory>(Sections, AddrSize, isLittleEndian);
- return llvm::make_unique<DWARFContext>(std::move(DObj), "");
+ std::make_unique<DWARFObjInMemory>(Sections, AddrSize, isLittleEndian);
+ return std::make_unique<DWARFContext>(std::move(DObj), "");
}
Error DWARFContext::loadRegisterInfo(const object::ObjectFile &Obj) {
diff --git a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
index b9adf8cb1d99..53e676bc7031 100644
--- a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
@@ -12,14 +12,15 @@
using namespace llvm;
-uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off,
- uint64_t *SecNdx) const {
+uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint64_t *Off,
+ uint64_t *SecNdx,
+ Error *Err) const {
if (SecNdx)
*SecNdx = object::SectionedAddress::UndefSection;
if (!Section)
- return getUnsigned(Off, Size);
+ return getUnsigned(Off, Size, Err);
Optional<RelocAddrEntry> E = Obj->find(*Section, *Off);
- uint64_t A = getUnsigned(Off, Size);
+ uint64_t A = getUnsigned(Off, Size, Err);
if (!E)
return A;
if (SecNdx)
@@ -31,13 +32,13 @@ uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off,
}
Optional<uint64_t>
-DWARFDataExtractor::getEncodedPointer(uint32_t *Offset, uint8_t Encoding,
+DWARFDataExtractor::getEncodedPointer(uint64_t *Offset, uint8_t Encoding,
uint64_t PCRelOffset) const {
if (Encoding == dwarf::DW_EH_PE_omit)
return None;
uint64_t Result = 0;
- uint32_t OldOffset = *Offset;
+ uint64_t OldOffset = *Offset;
// First get value
switch (Encoding & 0x0F) {
case dwarf::DW_EH_PE_absptr:
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 31b324e5eb27..4afac2f99503 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -26,9 +26,9 @@ void DWARFAbbreviationDeclarationSet::clear() {
}
bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data,
- uint32_t *OffsetPtr) {
+ uint64_t *OffsetPtr) {
clear();
- const uint32_t BeginOffset = *OffsetPtr;
+ const uint64_t BeginOffset = *OffsetPtr;
Offset = BeginOffset;
DWARFAbbreviationDeclaration AbbrDecl;
uint32_t PrevAbbrCode = 0;
@@ -82,12 +82,12 @@ void DWARFDebugAbbrev::extract(DataExtractor Data) {
void DWARFDebugAbbrev::parse() const {
if (!Data)
return;
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
auto I = AbbrDeclSets.begin();
while (Data->isValidOffset(Offset)) {
while (I != AbbrDeclSets.end() && I->first < Offset)
++I;
- uint32_t CUAbbrOffset = Offset;
+ uint64_t CUAbbrOffset = Offset;
DWARFAbbreviationDeclarationSet AbbrDecls;
if (!AbbrDecls.extract(*Data, &Offset))
break;
@@ -124,7 +124,7 @@ DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const {
}
if (Data && CUAbbrOffset < Data->getData().size()) {
- uint32_t Offset = CUAbbrOffset;
+ uint64_t Offset = CUAbbrOffset;
DWARFAbbreviationDeclarationSet AbbrDecls;
if (!AbbrDecls.extract(*Data, &Offset))
return nullptr;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
index 58626539bba4..f71543799e28 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
@@ -19,7 +19,7 @@ void DWARFDebugAddrTable::clear() {
}
Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data,
- uint32_t *OffsetPtr,
+ uint64_t *OffsetPtr,
uint16_t Version,
uint8_t AddrSize,
std::function<void(Error)> WarnCallback) {
@@ -30,7 +30,7 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data,
return createStringError(errc::invalid_argument,
"section is not large enough to contain a "
".debug_addr table length at offset 0x%"
- PRIx32, *OffsetPtr);
+ PRIx64, *OffsetPtr);
uint16_t UnitVersion;
if (Version == 0) {
WarnCallback(createStringError(errc::invalid_argument,
@@ -44,28 +44,28 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data,
Format = dwarf::DwarfFormat::DWARF32;
if (UnitVersion >= 5) {
HeaderData.Length = Data.getU32(OffsetPtr);
- if (HeaderData.Length == 0xffffffffu) {
+ if (HeaderData.Length == dwarf::DW_LENGTH_DWARF64) {
invalidateLength();
return createStringError(errc::not_supported,
- "DWARF64 is not supported in .debug_addr at offset 0x%" PRIx32,
+ "DWARF64 is not supported in .debug_addr at offset 0x%" PRIx64,
HeaderOffset);
}
if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header)) {
uint32_t TmpLength = getLength();
invalidateLength();
return createStringError(errc::invalid_argument,
- ".debug_addr table at offset 0x%" PRIx32
+ ".debug_addr table at offset 0x%" PRIx64
" has too small length (0x%" PRIx32
") to contain a complete header",
HeaderOffset, TmpLength);
}
- uint32_t End = HeaderOffset + getLength();
+ uint64_t End = HeaderOffset + getLength();
if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset)) {
uint32_t TmpLength = getLength();
invalidateLength();
return createStringError(errc::invalid_argument,
"section is not large enough to contain a .debug_addr table "
- "of length 0x%" PRIx32 " at offset 0x%" PRIx32,
+ "of length 0x%" PRIx32 " at offset 0x%" PRIx64,
TmpLength, HeaderOffset);
}
@@ -88,7 +88,7 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data,
// and consists only of a series of addresses.
if (HeaderData.Version > 5) {
return createStringError(errc::not_supported, "version %" PRIu16
- " of .debug_addr section at offset 0x%" PRIx32 " is not supported",
+ " of .debug_addr section at offset 0x%" PRIx64 " is not supported",
HeaderData.Version, HeaderOffset);
}
// FIXME: For now we just treat version mismatch as an error,
@@ -97,19 +97,19 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data,
// attribute in the info table.
if (HeaderData.Version != UnitVersion)
return createStringError(errc::invalid_argument,
- ".debug_addr table at offset 0x%" PRIx32
+ ".debug_addr table at offset 0x%" PRIx64
" has version %" PRIu16
" which is different from the version suggested"
" by the DWARF unit header: %" PRIu16,
HeaderOffset, HeaderData.Version, UnitVersion);
if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)
return createStringError(errc::not_supported,
- ".debug_addr table at offset 0x%" PRIx32
+ ".debug_addr table at offset 0x%" PRIx64
" has unsupported address size %" PRIu8,
HeaderOffset, HeaderData.AddrSize);
if (HeaderData.AddrSize != AddrSize && AddrSize != 0)
return createStringError(errc::invalid_argument,
- ".debug_addr table at offset 0x%" PRIx32
+ ".debug_addr table at offset 0x%" PRIx64
" has address size %" PRIu8
" which is different from CU address size %" PRIu8,
HeaderOffset, HeaderData.AddrSize, AddrSize);
@@ -117,13 +117,13 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data,
// TODO: add support for non-zero segment selector size.
if (HeaderData.SegSize != 0)
return createStringError(errc::not_supported,
- ".debug_addr table at offset 0x%" PRIx32
+ ".debug_addr table at offset 0x%" PRIx64
" has unsupported segment selector size %" PRIu8,
HeaderOffset, HeaderData.SegSize);
if (DataSize % HeaderData.AddrSize != 0) {
invalidateLength();
return createStringError(errc::invalid_argument,
- ".debug_addr table at offset 0x%" PRIx32
+ ".debug_addr table at offset 0x%" PRIx64
" contains data of size %" PRIu32
" which is not a multiple of addr size %" PRIu8,
HeaderOffset, DataSize, HeaderData.AddrSize);
@@ -162,7 +162,7 @@ Expected<uint64_t> DWARFDebugAddrTable::getAddrEntry(uint32_t Index) const {
return Addrs[Index];
return createStringError(errc::invalid_argument,
"Index %" PRIu32 " is out of range of the "
- ".debug_addr table at offset 0x%" PRIx32,
+ ".debug_addr table at offset 0x%" PRIx64,
Index, HeaderOffset);
}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
index 6551b61accb8..200b2d52a02b 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
@@ -24,13 +24,13 @@ void DWARFDebugArangeSet::Descriptor::dump(raw_ostream &OS,
}
void DWARFDebugArangeSet::clear() {
- Offset = -1U;
+ Offset = -1ULL;
std::memset(&HeaderData, 0, sizeof(Header));
ArangeDescriptors.clear();
}
bool
-DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
+DWARFDebugArangeSet::extract(DataExtractor data, uint64_t *offset_ptr) {
if (data.isValidOffset(*offset_ptr)) {
ArangeDescriptors.clear();
Offset = *offset_ptr;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
index 6460c9feeab8..ca6043109cdb 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
@@ -23,11 +23,11 @@ using namespace llvm;
void DWARFDebugAranges::extract(DataExtractor DebugArangesData) {
if (!DebugArangesData.isValidOffset(0))
return;
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
DWARFDebugArangeSet Set;
while (Set.extract(DebugArangesData, &Offset)) {
- uint32_t CUOffset = Set.getCompileUnitDIEOffset();
+ uint64_t CUOffset = Set.getCompileUnitDIEOffset();
for (const auto &Desc : Set.descriptors()) {
uint64_t LowPC = Desc.Address;
uint64_t HighPC = Desc.getEndAddress();
@@ -43,7 +43,7 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) {
return;
// Extract aranges from .debug_aranges section.
- DataExtractor ArangesData(CTX->getDWARFObj().getARangeSection(),
+ DataExtractor ArangesData(CTX->getDWARFObj().getArangesSection(),
CTX->isLittleEndian(), 0);
extract(ArangesData);
@@ -51,7 +51,7 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) {
// it may describe only a small subset of compilation units, so we need to
// manually build aranges for the rest of them.
for (const auto &CU : CTX->compile_units()) {
- uint32_t CUOffset = CU->getOffset();
+ uint64_t CUOffset = CU->getOffset();
if (ParsedCUOffsets.insert(CUOffset).second) {
Expected<DWARFAddressRangesVector> CURanges = CU->collectAddressRanges();
if (!CURanges)
@@ -71,7 +71,7 @@ void DWARFDebugAranges::clear() {
ParsedCUOffsets.clear();
}
-void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC,
+void DWARFDebugAranges::appendRange(uint64_t CUOffset, uint64_t LowPC,
uint64_t HighPC) {
if (LowPC >= HighPC)
return;
@@ -80,7 +80,7 @@ void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC,
}
void DWARFDebugAranges::construct() {
- std::multiset<uint32_t> ValidCUs; // Maintain the set of CUs describing
+ std::multiset<uint64_t> ValidCUs; // Maintain the set of CUs describing
// a current address range.
llvm::sort(Endpoints);
uint64_t PrevAddress = -1ULL;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index b3f23366f2a2..81b00f65741b 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -34,10 +34,10 @@ using namespace dwarf;
const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f;
-Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset,
- uint32_t EndOffset) {
+Error CFIProgram::parse(DWARFDataExtractor Data, uint64_t *Offset,
+ uint64_t EndOffset) {
while (*Offset < EndOffset) {
- uint8_t Opcode = Data.getU8(Offset);
+ uint8_t Opcode = Data.getRelocatedValue(1, Offset);
// Some instructions have a primary opcode encoded in the top bits.
uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK;
@@ -74,19 +74,19 @@ Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset,
break;
case DW_CFA_set_loc:
// Operands: Address
- addInstruction(Opcode, Data.getAddress(Offset));
+ addInstruction(Opcode, Data.getRelocatedAddress(Offset));
break;
case DW_CFA_advance_loc1:
// Operands: 1-byte delta
- addInstruction(Opcode, Data.getU8(Offset));
+ addInstruction(Opcode, Data.getRelocatedValue(1, Offset));
break;
case DW_CFA_advance_loc2:
// Operands: 2-byte delta
- addInstruction(Opcode, Data.getU16(Offset));
+ addInstruction(Opcode, Data.getRelocatedValue(2, Offset));
break;
case DW_CFA_advance_loc4:
// Operands: 4-byte delta
- addInstruction(Opcode, Data.getU32(Offset));
+ addInstruction(Opcode, Data.getRelocatedValue(4, Offset));
break;
case DW_CFA_restore_extended:
case DW_CFA_undefined:
@@ -331,7 +331,7 @@ DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch,
DWARFDebugFrame::~DWARFDebugFrame() = default;
static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
- uint32_t Offset, int Length) {
+ uint64_t Offset, int Length) {
errs() << "DUMP: ";
for (int i = 0; i < Length; ++i) {
uint8_t c = Data.getU8(&Offset);
@@ -344,7 +344,7 @@ static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
// noreturn attribute usage in lambdas. Once the support for those
// compilers are phased out, we can remove this and return back to
// a ReportError lambda: [StartOffset](const char *ErrorMsg).
-static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset,
+static void LLVM_ATTRIBUTE_NORETURN ReportError(uint64_t StartOffset,
const char *ErrorMsg) {
std::string Str;
raw_string_ostream OS(Str);
@@ -354,32 +354,30 @@ static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset,
}
void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
- uint32_t Offset = 0;
- DenseMap<uint32_t, CIE *> CIEs;
+ uint64_t Offset = 0;
+ DenseMap<uint64_t, CIE *> CIEs;
while (Data.isValidOffset(Offset)) {
- uint32_t StartOffset = Offset;
+ uint64_t StartOffset = Offset;
bool IsDWARF64 = false;
- uint64_t Length = Data.getU32(&Offset);
+ uint64_t Length = Data.getRelocatedValue(4, &Offset);
uint64_t Id;
- if (Length == UINT32_MAX) {
+ if (Length == dwarf::DW_LENGTH_DWARF64) {
// DWARF-64 is distinguished by the first 32 bits of the initial length
// field being 0xffffffff. Then, the next 64 bits are the actual entry
// length.
IsDWARF64 = true;
- Length = Data.getU64(&Offset);
+ Length = Data.getRelocatedValue(8, &Offset);
}
// At this point, Offset points to the next field after Length.
// Length is the structure size excluding itself. Compute an offset one
// past the end of the structure (needed to know how many instructions to
// read).
- // TODO: For honest DWARF64 support, DataExtractor will have to treat
- // offset_ptr as uint64_t*
- uint32_t StartStructureOffset = Offset;
- uint32_t EndStructureOffset = Offset + static_cast<uint32_t>(Length);
+ uint64_t StartStructureOffset = Offset;
+ uint64_t EndStructureOffset = Offset + Length;
// The Id field's size depends on the DWARF format
Id = Data.getUnsigned(&Offset, (IsDWARF64 && !IsEH) ? 8 : 4);
@@ -407,22 +405,23 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
Optional<uint32_t> PersonalityEncoding;
if (IsEH) {
Optional<uint64_t> AugmentationLength;
- uint32_t StartAugmentationOffset;
- uint32_t EndAugmentationOffset;
+ uint64_t StartAugmentationOffset;
+ uint64_t EndAugmentationOffset;
// Walk the augmentation string to get all the augmentation data.
for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
switch (AugmentationString[i]) {
default:
- ReportError(StartOffset,
- "Unknown augmentation character in entry at %lx");
+ ReportError(
+ StartOffset,
+ "Unknown augmentation character in entry at %" PRIx64);
case 'L':
LSDAPointerEncoding = Data.getU8(&Offset);
break;
case 'P': {
if (Personality)
ReportError(StartOffset,
- "Duplicate personality in entry at %lx");
+ "Duplicate personality in entry at %" PRIx64);
PersonalityEncoding = Data.getU8(&Offset);
Personality = Data.getEncodedPointer(
&Offset, *PersonalityEncoding,
@@ -438,13 +437,12 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
case 'z':
if (i)
ReportError(StartOffset,
- "'z' must be the first character at %lx");
+ "'z' must be the first character at %" PRIx64);
// Parse the augmentation length first. We only parse it if
// the string contains a 'z'.
AugmentationLength = Data.getULEB128(&Offset);
StartAugmentationOffset = Offset;
- EndAugmentationOffset = Offset +
- static_cast<uint32_t>(*AugmentationLength);
+ EndAugmentationOffset = Offset + *AugmentationLength;
break;
case 'B':
// B-Key is used for signing functions associated with this
@@ -455,14 +453,15 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
if (AugmentationLength.hasValue()) {
if (Offset != EndAugmentationOffset)
- ReportError(StartOffset, "Parsing augmentation data at %lx failed");
+ ReportError(StartOffset,
+ "Parsing augmentation data at %" PRIx64 " failed");
AugmentationData = Data.getData().slice(StartAugmentationOffset,
EndAugmentationOffset);
}
}
- auto Cie = llvm::make_unique<CIE>(
+ auto Cie = std::make_unique<CIE>(
StartOffset, Length, Version, AugmentationString, AddressSize,
SegmentDescriptorSize, CodeAlignmentFactor, DataAlignmentFactor,
ReturnAddressRegister, AugmentationData, FDEPointerEncoding,
@@ -480,8 +479,8 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
if (IsEH) {
// The address size is encoded in the CIE we reference.
if (!Cie)
- ReportError(StartOffset,
- "Parsing FDE data at %lx failed due to missing CIE");
+ ReportError(StartOffset, "Parsing FDE data at %" PRIx64
+ " failed due to missing CIE");
if (auto Val = Data.getEncodedPointer(
&Offset, Cie->getFDEPointerEncoding(),
@@ -498,8 +497,7 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
// Parse the augmentation length and data for this FDE.
uint64_t AugmentationLength = Data.getULEB128(&Offset);
- uint32_t EndAugmentationOffset =
- Offset + static_cast<uint32_t>(AugmentationLength);
+ uint64_t EndAugmentationOffset = Offset + AugmentationLength;
// Decode the LSDA if the CIE augmentation string said we should.
if (Cie->getLSDAPointerEncoding() != DW_EH_PE_omit) {
@@ -509,11 +507,12 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
}
if (Offset != EndAugmentationOffset)
- ReportError(StartOffset, "Parsing augmentation data at %lx failed");
+ ReportError(StartOffset,
+ "Parsing augmentation data at %" PRIx64 " failed");
}
} else {
- InitialLocation = Data.getAddress(&Offset);
- AddressRange = Data.getAddress(&Offset);
+ InitialLocation = Data.getRelocatedAddress(&Offset);
+ AddressRange = Data.getRelocatedAddress(&Offset);
}
Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer,
@@ -527,7 +526,8 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
}
if (Offset != EndStructureOffset)
- ReportError(StartOffset, "Parsing entry instructions at %lx failed");
+ ReportError(StartOffset,
+ "Parsing entry instructions at %" PRIx64 " failed");
}
}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index d8a755e90df4..87eab34d58ee 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -19,15 +19,15 @@ using namespace llvm;
using namespace dwarf;
bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U,
- uint32_t *OffsetPtr) {
+ uint64_t *OffsetPtr) {
DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
- const uint32_t UEndOffset = U.getNextUnitOffset();
+ const uint64_t UEndOffset = U.getNextUnitOffset();
return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset, 0);
}
-bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr,
+bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
const DWARFDataExtractor &DebugInfoData,
- uint32_t UEndOffset, uint32_t D) {
+ uint64_t UEndOffset, uint32_t D) {
Offset = *OffsetPtr;
Depth = D;
if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset))
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index a1cb1e8582ed..dbee28ff5ab1 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -16,7 +16,6 @@
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -156,7 +155,7 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS,
// Parse v2-v4 directory and file tables.
static void
parseV2DirFileTables(const DWARFDataExtractor &DebugLineData,
- uint32_t *OffsetPtr, uint64_t EndPrologueOffset,
+ uint64_t *OffsetPtr, uint64_t EndPrologueOffset,
DWARFDebugLine::ContentTypeTracker &ContentTypes,
std::vector<DWARFFormValue> &IncludeDirectories,
std::vector<DWARFDebugLine::FileNameEntry> &FileNames) {
@@ -187,18 +186,24 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData,
}
// Parse v5 directory/file entry content descriptions.
-// Returns the descriptors, or an empty vector if we did not find a path or
-// ran off the end of the prologue.
-static ContentDescriptors
-parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t
- *OffsetPtr, uint64_t EndPrologueOffset, DWARFDebugLine::ContentTypeTracker
- *ContentTypes) {
+// Returns the descriptors, or an error if we did not find a path or ran off
+// the end of the prologue.
+static llvm::Expected<ContentDescriptors>
+parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
+ uint64_t EndPrologueOffset,
+ DWARFDebugLine::ContentTypeTracker *ContentTypes) {
ContentDescriptors Descriptors;
int FormatCount = DebugLineData.getU8(OffsetPtr);
bool HasPath = false;
for (int I = 0; I != FormatCount; ++I) {
if (*OffsetPtr >= EndPrologueOffset)
- return ContentDescriptors();
+ return createStringError(
+ errc::invalid_argument,
+ "failed to parse entry content descriptions at offset "
+ "0x%8.8" PRIx64
+ " because offset extends beyond the prologue end at offset "
+ "0x%8.8" PRIx64,
+ *OffsetPtr, EndPrologueOffset);
ContentDescriptor Descriptor;
Descriptor.Type =
dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr));
@@ -209,60 +214,82 @@ parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t
ContentTypes->trackContentType(Descriptor.Type);
Descriptors.push_back(Descriptor);
}
- return HasPath ? Descriptors : ContentDescriptors();
+
+ if (!HasPath)
+ return createStringError(errc::invalid_argument,
+ "failed to parse entry content descriptions"
+ " because no path was found");
+ return Descriptors;
}
-static bool
+static Error
parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
- uint32_t *OffsetPtr, uint64_t EndPrologueOffset,
+ uint64_t *OffsetPtr, uint64_t EndPrologueOffset,
const dwarf::FormParams &FormParams,
const DWARFContext &Ctx, const DWARFUnit *U,
DWARFDebugLine::ContentTypeTracker &ContentTypes,
std::vector<DWARFFormValue> &IncludeDirectories,
std::vector<DWARFDebugLine::FileNameEntry> &FileNames) {
// Get the directory entry description.
- ContentDescriptors DirDescriptors =
+ llvm::Expected<ContentDescriptors> DirDescriptors =
parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset, nullptr);
- if (DirDescriptors.empty())
- return false;
+ if (!DirDescriptors)
+ return DirDescriptors.takeError();
// Get the directory entries, according to the format described above.
int DirEntryCount = DebugLineData.getU8(OffsetPtr);
for (int I = 0; I != DirEntryCount; ++I) {
if (*OffsetPtr >= EndPrologueOffset)
- return false;
- for (auto Descriptor : DirDescriptors) {
+ return createStringError(
+ errc::invalid_argument,
+ "failed to parse directory entry at offset "
+ "0x%8.8" PRIx64
+ " because offset extends beyond the prologue end at offset "
+ "0x%8.8" PRIx64,
+ *OffsetPtr, EndPrologueOffset);
+ for (auto Descriptor : *DirDescriptors) {
DWARFFormValue Value(Descriptor.Form);
switch (Descriptor.Type) {
case DW_LNCT_path:
if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, &Ctx, U))
- return false;
+ return createStringError(errc::invalid_argument,
+ "failed to parse directory entry because "
+ "extracting the form value failed.");
IncludeDirectories.push_back(Value);
break;
default:
if (!Value.skipValue(DebugLineData, OffsetPtr, FormParams))
- return false;
+ return createStringError(errc::invalid_argument,
+ "failed to parse directory entry because "
+ "skipping the form value failed.");
}
}
}
// Get the file entry description.
- ContentDescriptors FileDescriptors =
- parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset,
- &ContentTypes);
- if (FileDescriptors.empty())
- return false;
+ llvm::Expected<ContentDescriptors> FileDescriptors = parseV5EntryFormat(
+ DebugLineData, OffsetPtr, EndPrologueOffset, &ContentTypes);
+ if (!FileDescriptors)
+ return FileDescriptors.takeError();
// Get the file entries, according to the format described above.
int FileEntryCount = DebugLineData.getU8(OffsetPtr);
for (int I = 0; I != FileEntryCount; ++I) {
if (*OffsetPtr >= EndPrologueOffset)
- return false;
+ return createStringError(
+ errc::invalid_argument,
+ "failed to parse file entry at offset "
+ "0x%8.8" PRIx64
+ " because offset extends beyond the prologue end at offset "
+ "0x%8.8" PRIx64,
+ *OffsetPtr, EndPrologueOffset);
DWARFDebugLine::FileNameEntry FileEntry;
- for (auto Descriptor : FileDescriptors) {
+ for (auto Descriptor : *FileDescriptors) {
DWARFFormValue Value(Descriptor.Form);
if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, &Ctx, U))
- return false;
+ return createStringError(errc::invalid_argument,
+ "failed to parse file entry because "
+ "extracting the form value failed.");
switch (Descriptor.Type) {
case DW_LNCT_path:
FileEntry.Name = Value;
@@ -280,7 +307,10 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
FileEntry.Length = Value.getAsUnsignedConstant().getValue();
break;
case DW_LNCT_MD5:
- assert(Value.getAsBlock().getValue().size() == 16);
+ if (!Value.getAsBlock() || Value.getAsBlock().getValue().size() != 16)
+ return createStringError(
+ errc::invalid_argument,
+ "failed to parse file entry because the MD5 hash is invalid");
std::uninitialized_copy_n(Value.getAsBlock().getValue().begin(), 16,
FileEntry.Checksum.Bytes.begin());
break;
@@ -290,21 +320,21 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
}
FileNames.push_back(FileEntry);
}
- return true;
+ return Error::success();
}
Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
- uint32_t *OffsetPtr,
+ uint64_t *OffsetPtr,
const DWARFContext &Ctx,
const DWARFUnit *U) {
const uint64_t PrologueOffset = *OffsetPtr;
clear();
TotalLength = DebugLineData.getRelocatedValue(4, OffsetPtr);
- if (TotalLength == UINT32_MAX) {
+ if (TotalLength == dwarf::DW_LENGTH_DWARF64) {
FormParams.Format = dwarf::DWARF64;
TotalLength = DebugLineData.getU64(OffsetPtr);
- } else if (TotalLength >= 0xfffffff0) {
+ } else if (TotalLength >= dwarf::DW_LENGTH_lo_reserved) {
return createStringError(errc::invalid_argument,
"parsing line table prologue at offset 0x%8.8" PRIx64
" unsupported reserved unit length found of value 0x%8.8" PRIx64,
@@ -343,14 +373,17 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
}
if (getVersion() >= 5) {
- if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset,
- FormParams, Ctx, U, ContentTypes,
- IncludeDirectories, FileNames)) {
- return createStringError(errc::invalid_argument,
- "parsing line table prologue at 0x%8.8" PRIx64
- " found an invalid directory or file table description at"
- " 0x%8.8" PRIx64,
- PrologueOffset, (uint64_t)*OffsetPtr);
+ if (Error e = parseV5DirFileTables(
+ DebugLineData, OffsetPtr, EndPrologueOffset, FormParams, Ctx, U,
+ ContentTypes, IncludeDirectories, FileNames)) {
+ return joinErrors(
+ createStringError(
+ errc::invalid_argument,
+ "parsing line table prologue at 0x%8.8" PRIx64
+ " found an invalid directory or file table description at"
+ " 0x%8.8" PRIx64,
+ PrologueOffset, *OffsetPtr),
+ std::move(e));
}
} else
parseV2DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset,
@@ -361,7 +394,7 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
"parsing line table prologue at 0x%8.8" PRIx64
" should have ended at 0x%8.8" PRIx64
" but it ended at 0x%8.8" PRIx64,
- PrologueOffset, EndPrologueOffset, (uint64_t)*OffsetPtr);
+ PrologueOffset, EndPrologueOffset, *OffsetPtr);
return Error::success();
}
@@ -468,7 +501,7 @@ void DWARFDebugLine::ParsingState::appendRowToMatrix() {
}
const DWARFDebugLine::LineTable *
-DWARFDebugLine::getLineTable(uint32_t Offset) const {
+DWARFDebugLine::getLineTable(uint64_t Offset) const {
LineTableConstIter Pos = LineTableMap.find(Offset);
if (Pos != LineTableMap.end())
return &Pos->second;
@@ -476,10 +509,10 @@ DWARFDebugLine::getLineTable(uint32_t Offset) const {
}
Expected<const DWARFDebugLine::LineTable *> DWARFDebugLine::getOrParseLineTable(
- DWARFDataExtractor &DebugLineData, uint32_t Offset, const DWARFContext &Ctx,
+ DWARFDataExtractor &DebugLineData, uint64_t Offset, const DWARFContext &Ctx,
const DWARFUnit *U, std::function<void(Error)> RecoverableErrorCallback) {
if (!DebugLineData.isValidOffset(Offset))
- return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx32
+ return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx64
" is not a valid debug line section offset",
Offset);
@@ -496,10 +529,10 @@ Expected<const DWARFDebugLine::LineTable *> DWARFDebugLine::getOrParseLineTable(
}
Error DWARFDebugLine::LineTable::parse(
- DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
+ DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
const DWARFContext &Ctx, const DWARFUnit *U,
std::function<void(Error)> RecoverableErrorCallback, raw_ostream *OS) {
- const uint32_t DebugLineOffset = *OffsetPtr;
+ const uint64_t DebugLineOffset = *OffsetPtr;
clear();
@@ -515,7 +548,7 @@ Error DWARFDebugLine::LineTable::parse(
if (PrologueErr)
return PrologueErr;
- const uint32_t EndOffset =
+ const uint64_t EndOffset =
DebugLineOffset + Prologue.TotalLength + Prologue.sizeofTotalLength();
// See if we should tell the data extractor the address size.
@@ -529,7 +562,7 @@ Error DWARFDebugLine::LineTable::parse(
while (*OffsetPtr < EndOffset) {
if (OS)
- *OS << format("0x%08.08" PRIx32 ": ", *OffsetPtr);
+ *OS << format("0x%08.08" PRIx64 ": ", *OffsetPtr);
uint8_t Opcode = DebugLineData.getU8(OffsetPtr);
@@ -540,7 +573,7 @@ Error DWARFDebugLine::LineTable::parse(
// Extended Opcodes always start with a zero opcode followed by
// a uleb128 length so you can skip ones you don't know about
uint64_t Len = DebugLineData.getULEB128(OffsetPtr);
- uint32_t ExtOffset = *OffsetPtr;
+ uint64_t ExtOffset = *OffsetPtr;
// Tolerate zero-length; assume length is correct and soldier on.
if (Len == 0) {
@@ -585,7 +618,7 @@ Error DWARFDebugLine::LineTable::parse(
DebugLineData.setAddressSize(Len - 1);
else if (DebugLineData.getAddressSize() != Len - 1) {
return createStringError(errc::invalid_argument,
- "mismatching address size at offset 0x%8.8" PRIx32
+ "mismatching address size at offset 0x%8.8" PRIx64
" expected 0x%2.2" PRIx8 " found 0x%2.2" PRIx64,
ExtOffset, DebugLineData.getAddressSize(),
Len - 1);
@@ -652,8 +685,8 @@ Error DWARFDebugLine::LineTable::parse(
// Otherwise we have an unparseable line-number program.
if (*OffsetPtr - ExtOffset != Len)
return createStringError(errc::illegal_byte_sequence,
- "unexpected line op length at offset 0x%8.8" PRIx32
- " expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx32,
+ "unexpected line op length at offset 0x%8.8" PRIx64
+ " expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx64,
ExtOffset, Len, *OffsetPtr - ExtOffset);
} else if (Opcode < Prologue.OpcodeBase) {
if (OS)
@@ -1007,10 +1040,9 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) {
sys::path::is_absolute(Path, sys::path::Style::windows);
}
-bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex,
- StringRef CompDir,
- FileLineInfoKind Kind,
- std::string &Result) const {
+bool DWARFDebugLine::Prologue::getFileNameByIndex(
+ uint64_t FileIndex, StringRef CompDir, FileLineInfoKind Kind,
+ std::string &Result, sys::path::Style Style) const {
if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
return false;
const FileNameEntry &Entry = getFileNameEntry(FileIndex);
@@ -1036,11 +1068,11 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex,
// We know that FileName is not absolute, the only way to have an
// absolute path at this point would be if IncludeDir is absolute.
if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
- sys::path::append(FilePath, CompDir);
+ sys::path::append(FilePath, Style, CompDir);
}
// sys::path::append skips empty strings.
- sys::path::append(FilePath, IncludeDir, FileName);
+ sys::path::append(FilePath, Style, IncludeDir, FileName);
Result = FilePath.str();
return true;
}
@@ -1092,7 +1124,8 @@ DWARFDebugLine::SectionParser::SectionParser(DWARFDataExtractor &Data,
}
bool DWARFDebugLine::Prologue::totalLengthIsValid() const {
- return TotalLength == 0xffffffff || TotalLength < 0xfffffff0;
+ return TotalLength == dwarf::DW_LENGTH_DWARF64 ||
+ TotalLength < dwarf::DW_LENGTH_lo_reserved;
}
DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext(
@@ -1101,7 +1134,7 @@ DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext(
assert(DebugLineData.isValidOffset(Offset) &&
"parsing should have terminated");
DWARFUnit *U = prepareToParse(Offset);
- uint32_t OldOffset = Offset;
+ uint64_t OldOffset = Offset;
LineTable LT;
if (Error Err = LT.parse(DebugLineData, &Offset, Context, U,
RecoverableErrorCallback, OS))
@@ -1115,14 +1148,14 @@ void DWARFDebugLine::SectionParser::skip(
assert(DebugLineData.isValidOffset(Offset) &&
"parsing should have terminated");
DWARFUnit *U = prepareToParse(Offset);
- uint32_t OldOffset = Offset;
+ uint64_t OldOffset = Offset;
LineTable LT;
if (Error Err = LT.Prologue.parse(DebugLineData, &Offset, Context, U))
ErrorCallback(std::move(Err));
moveToNextTable(OldOffset, LT.Prologue);
}
-DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint32_t Offset) {
+DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint64_t Offset) {
DWARFUnit *U = nullptr;
auto It = LineToUnit.find(Offset);
if (It != LineToUnit.end())
@@ -1131,7 +1164,7 @@ DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint32_t Offset) {
return U;
}
-void DWARFDebugLine::SectionParser::moveToNextTable(uint32_t OldOffset,
+void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset,
const Prologue &P) {
// If the length field is not valid, we don't know where the next table is, so
// cannot continue to parse. Mark the parser as done, and leave the Offset
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index 6d8f4bee77c4..4f7b01130a47 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -28,19 +28,18 @@ using namespace llvm;
// expression that LLVM doesn't produce. Guessing the wrong version means we
// won't be able to pretty print expressions in DWARF2 binaries produced by
// non-LLVM tools.
-static void dumpExpression(raw_ostream &OS, ArrayRef<char> Data,
+static void dumpExpression(raw_ostream &OS, ArrayRef<uint8_t> Data,
bool IsLittleEndian, unsigned AddressSize,
const MCRegisterInfo *MRI, DWARFUnit *U) {
- DWARFDataExtractor Extractor(StringRef(Data.data(), Data.size()),
- IsLittleEndian, AddressSize);
+ DWARFDataExtractor Extractor(toStringRef(Data), IsLittleEndian, AddressSize);
DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI, U);
}
-void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, bool IsLittleEndian,
+void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, uint64_t BaseAddress,
+ bool IsLittleEndian,
unsigned AddressSize,
- const MCRegisterInfo *MRI,
- DWARFUnit *U,
- uint64_t BaseAddress,
+ const MCRegisterInfo *MRI, DWARFUnit *U,
+ DIDumpOptions DumpOpts,
unsigned Indent) const {
for (const Entry &E : Entries) {
OS << '\n';
@@ -64,12 +63,12 @@ DWARFDebugLoc::getLocationListAtOffset(uint64_t Offset) const {
return nullptr;
}
-void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI,
+void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, DIDumpOptions DumpOpts,
Optional<uint64_t> Offset) const {
auto DumpLocationList = [&](const LocationList &L) {
- OS << format("0x%8.8x: ", L.Offset);
- L.dump(OS, IsLittleEndian, AddressSize, MRI, nullptr, 0, 12);
- OS << "\n\n";
+ OS << format("0x%8.8" PRIx64 ": ", L.Offset);
+ L.dump(OS, 0, IsLittleEndian, AddressSize, MRI, nullptr, DumpOpts, 12);
+ OS << "\n";
};
if (Offset) {
@@ -80,50 +79,47 @@ void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI,
for (const LocationList &L : Locations) {
DumpLocationList(L);
+ if (&L != &Locations.back())
+ OS << '\n';
}
}
-Optional<DWARFDebugLoc::LocationList>
-DWARFDebugLoc::parseOneLocationList(DWARFDataExtractor Data, unsigned *Offset) {
+Expected<DWARFDebugLoc::LocationList>
+DWARFDebugLoc::parseOneLocationList(const DWARFDataExtractor &Data,
+ uint64_t *Offset) {
LocationList LL;
LL.Offset = *Offset;
+ AddressSize = Data.getAddressSize();
+ DataExtractor::Cursor C(*Offset);
// 2.6.2 Location Lists
// A location list entry consists of:
while (true) {
Entry E;
- if (!Data.isValidOffsetForDataOfSize(*Offset, 2 * Data.getAddressSize())) {
- WithColor::error() << "location list overflows the debug_loc section.\n";
- return None;
- }
// 1. A beginning address offset. ...
- E.Begin = Data.getRelocatedAddress(Offset);
+ E.Begin = Data.getRelocatedAddress(C);
// 2. An ending address offset. ...
- E.End = Data.getRelocatedAddress(Offset);
+ E.End = Data.getRelocatedAddress(C);
+
+ if (Error Err = C.takeError())
+ return std::move(Err);
// The end of any given location list is marked by an end of list entry,
// which consists of a 0 for the beginning address offset and a 0 for the
// ending address offset.
- if (E.Begin == 0 && E.End == 0)
+ if (E.Begin == 0 && E.End == 0) {
+ *Offset = C.tell();
return LL;
-
- if (!Data.isValidOffsetForDataOfSize(*Offset, 2)) {
- WithColor::error() << "location list overflows the debug_loc section.\n";
- return None;
}
- unsigned Bytes = Data.getU16(Offset);
- if (!Data.isValidOffsetForDataOfSize(*Offset, Bytes)) {
- WithColor::error() << "location list overflows the debug_loc section.\n";
- return None;
+ if (E.Begin != (AddressSize == 4 ? -1U : -1ULL)) {
+ unsigned Bytes = Data.getU16(C);
+ // A single location description describing the location of the object...
+ Data.getU8(C, E.Loc, Bytes);
}
- // A single location description describing the location of the object...
- StringRef str = Data.getData().substr(*Offset, Bytes);
- *Offset += Bytes;
- E.Loc.reserve(str.size());
- llvm::copy(str, std::back_inserter(E.Loc));
+
LL.Entries.push_back(std::move(E));
}
}
@@ -132,81 +128,89 @@ void DWARFDebugLoc::parse(const DWARFDataExtractor &data) {
IsLittleEndian = data.isLittleEndian();
AddressSize = data.getAddressSize();
- uint32_t Offset = 0;
- while (data.isValidOffset(Offset + data.getAddressSize() - 1)) {
+ uint64_t Offset = 0;
+ while (Offset < data.getData().size()) {
if (auto LL = parseOneLocationList(data, &Offset))
Locations.push_back(std::move(*LL));
- else
+ else {
+ logAllUnhandledErrors(LL.takeError(), WithColor::error());
break;
+ }
}
- if (data.isValidOffset(Offset))
- WithColor::error() << "failed to consume entire .debug_loc section\n";
}
-Optional<DWARFDebugLoclists::LocationList>
-DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset,
- unsigned Version) {
+Expected<DWARFDebugLoclists::LocationList>
+DWARFDebugLoclists::parseOneLocationList(const DataExtractor &Data,
+ uint64_t *Offset, unsigned Version) {
LocationList LL;
LL.Offset = *Offset;
+ DataExtractor::Cursor C(*Offset);
// dwarf::DW_LLE_end_of_list_entry is 0 and indicates the end of the list.
- while (auto Kind =
- static_cast<dwarf::LocationListEntry>(Data.getU8(Offset))) {
-
+ while (auto Kind = Data.getU8(C)) {
Entry E;
E.Kind = Kind;
+ E.Offset = C.tell() - 1;
switch (Kind) {
+ case dwarf::DW_LLE_base_addressx:
+ E.Value0 = Data.getULEB128(C);
+ break;
case dwarf::DW_LLE_startx_length:
- E.Value0 = Data.getULEB128(Offset);
+ E.Value0 = Data.getULEB128(C);
// Pre-DWARF 5 has different interpretation of the length field. We have
// to support both pre- and standartized styles for the compatibility.
if (Version < 5)
- E.Value1 = Data.getU32(Offset);
+ E.Value1 = Data.getU32(C);
else
- E.Value1 = Data.getULEB128(Offset);
+ E.Value1 = Data.getULEB128(C);
break;
case dwarf::DW_LLE_start_length:
- E.Value0 = Data.getAddress(Offset);
- E.Value1 = Data.getULEB128(Offset);
+ E.Value0 = Data.getAddress(C);
+ E.Value1 = Data.getULEB128(C);
break;
case dwarf::DW_LLE_offset_pair:
- E.Value0 = Data.getULEB128(Offset);
- E.Value1 = Data.getULEB128(Offset);
+ E.Value0 = Data.getULEB128(C);
+ E.Value1 = Data.getULEB128(C);
break;
case dwarf::DW_LLE_base_address:
- E.Value0 = Data.getAddress(Offset);
+ E.Value0 = Data.getAddress(C);
break;
default:
- WithColor::error() << "dumping support for LLE of kind " << (int)Kind
- << " not implemented\n";
- return None;
+ cantFail(C.takeError());
+ return createStringError(errc::illegal_byte_sequence,
+ "LLE of kind %x not supported", (int)Kind);
}
- if (Kind != dwarf::DW_LLE_base_address) {
- unsigned Bytes =
- Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset);
+ if (Kind != dwarf::DW_LLE_base_address &&
+ Kind != dwarf::DW_LLE_base_addressx) {
+ unsigned Bytes = Version >= 5 ? Data.getULEB128(C) : Data.getU16(C);
// A single location description describing the location of the object...
- StringRef str = Data.getData().substr(*Offset, Bytes);
- *Offset += Bytes;
- E.Loc.resize(str.size());
- llvm::copy(str, E.Loc.begin());
+ Data.getU8(C, E.Loc, Bytes);
}
LL.Entries.push_back(std::move(E));
}
+ if (Error Err = C.takeError())
+ return std::move(Err);
+ Entry E;
+ E.Kind = dwarf::DW_LLE_end_of_list;
+ E.Offset = C.tell() - 1;
+ LL.Entries.push_back(E);
+ *Offset = C.tell();
return LL;
}
-void DWARFDebugLoclists::parse(DataExtractor data, unsigned Version) {
+void DWARFDebugLoclists::parse(DataExtractor data, uint64_t Offset, uint64_t EndOffset, uint16_t Version) {
IsLittleEndian = data.isLittleEndian();
AddressSize = data.getAddressSize();
- uint32_t Offset = 0;
- while (data.isValidOffset(Offset)) {
+ while (Offset < EndOffset) {
if (auto LL = parseOneLocationList(data, &Offset, Version))
Locations.push_back(std::move(*LL));
- else
+ else {
+ logAllUnhandledErrors(LL.takeError(), WithColor::error());
return;
+ }
}
}
@@ -219,51 +223,106 @@ DWARFDebugLoclists::getLocationListAtOffset(uint64_t Offset) const {
return nullptr;
}
-void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr,
- bool IsLittleEndian,
- unsigned AddressSize,
- const MCRegisterInfo *MRI,
- DWARFUnit *U,
- unsigned Indent) const {
- for (const Entry &E : Entries) {
- switch (E.Kind) {
+void DWARFDebugLoclists::Entry::dump(raw_ostream &OS, uint64_t &BaseAddr,
+ bool IsLittleEndian, unsigned AddressSize,
+ const MCRegisterInfo *MRI, DWARFUnit *U,
+ DIDumpOptions DumpOpts, unsigned Indent,
+ size_t MaxEncodingStringLength) const {
+ if (DumpOpts.Verbose) {
+ OS << "\n";
+ OS.indent(Indent);
+ auto EncodingString = dwarf::LocListEncodingString(Kind);
+ // Unsupported encodings should have been reported during parsing.
+ assert(!EncodingString.empty() && "Unknown loclist entry encoding");
+ OS << format("%s%*c", EncodingString.data(),
+ MaxEncodingStringLength - EncodingString.size() + 1, '(');
+ switch (Kind) {
case dwarf::DW_LLE_startx_length:
- OS << '\n';
- OS.indent(Indent);
- OS << "Addr idx " << E.Value0 << " (w/ length " << E.Value1 << "): ";
- break;
case dwarf::DW_LLE_start_length:
- OS << '\n';
- OS.indent(Indent);
- OS << format("[0x%*.*" PRIx64 ", 0x%*.*" PRIx64 "): ", AddressSize * 2,
- AddressSize * 2, E.Value0, AddressSize * 2, AddressSize * 2,
- E.Value0 + E.Value1);
- break;
case dwarf::DW_LLE_offset_pair:
- OS << '\n';
- OS.indent(Indent);
- OS << format("[0x%*.*" PRIx64 ", 0x%*.*" PRIx64 "): ", AddressSize * 2,
- AddressSize * 2, BaseAddr + E.Value0, AddressSize * 2,
- AddressSize * 2, BaseAddr + E.Value1);
+ OS << format("0x%*.*" PRIx64 ", 0x%*.*" PRIx64, AddressSize * 2,
+ AddressSize * 2, Value0, AddressSize * 2, AddressSize * 2,
+ Value1);
break;
+ case dwarf::DW_LLE_base_addressx:
case dwarf::DW_LLE_base_address:
- BaseAddr = E.Value0;
+ OS << format("0x%*.*" PRIx64, AddressSize * 2, AddressSize * 2,
+ Value0);
+ break;
+ case dwarf::DW_LLE_end_of_list:
break;
- default:
- llvm_unreachable("unreachable locations list kind");
}
-
- dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI, U);
+ OS << ')';
}
+ auto PrintPrefix = [&] {
+ OS << "\n";
+ OS.indent(Indent);
+ if (DumpOpts.Verbose)
+ OS << format("%*s", MaxEncodingStringLength, (const char *)"=> ");
+ };
+ switch (Kind) {
+ case dwarf::DW_LLE_startx_length:
+ PrintPrefix();
+ OS << "Addr idx " << Value0 << " (w/ length " << Value1 << "): ";
+ break;
+ case dwarf::DW_LLE_start_length:
+ PrintPrefix();
+ DWARFAddressRange(Value0, Value0 + Value1)
+ .dump(OS, AddressSize, DumpOpts);
+ OS << ": ";
+ break;
+ case dwarf::DW_LLE_offset_pair:
+ PrintPrefix();
+ DWARFAddressRange(BaseAddr + Value0, BaseAddr + Value1)
+ .dump(OS, AddressSize, DumpOpts);
+ OS << ": ";
+ break;
+ case dwarf::DW_LLE_base_addressx:
+ if (!DumpOpts.Verbose)
+ return;
+ break;
+ case dwarf::DW_LLE_end_of_list:
+ if (!DumpOpts.Verbose)
+ return;
+ break;
+ case dwarf::DW_LLE_base_address:
+ BaseAddr = Value0;
+ if (!DumpOpts.Verbose)
+ return;
+ break;
+ default:
+ llvm_unreachable("unreachable locations list kind");
+ }
+
+ dumpExpression(OS, Loc, IsLittleEndian, AddressSize, MRI, U);
+}
+void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr,
+ bool IsLittleEndian,
+ unsigned AddressSize,
+ const MCRegisterInfo *MRI,
+ DWARFUnit *U,
+ DIDumpOptions DumpOpts,
+ unsigned Indent) const {
+ size_t MaxEncodingStringLength = 0;
+ if (DumpOpts.Verbose)
+ for (const auto &Entry : Entries)
+ MaxEncodingStringLength =
+ std::max(MaxEncodingStringLength,
+ dwarf::LocListEncodingString(Entry.Kind).size());
+
+ for (const Entry &E : Entries)
+ E.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, U, DumpOpts, Indent,
+ MaxEncodingStringLength);
}
void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr,
- const MCRegisterInfo *MRI,
+ const MCRegisterInfo *MRI, DIDumpOptions DumpOpts,
Optional<uint64_t> Offset) const {
auto DumpLocationList = [&](const LocationList &L) {
- OS << format("0x%8.8x: ", L.Offset);
- L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, nullptr, /*Indent=*/12);
- OS << "\n\n";
+ OS << format("0x%8.8" PRIx64 ": ", L.Offset);
+ L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, nullptr, DumpOpts,
+ /*Indent=*/12);
+ OS << "\n";
};
if (Offset) {
@@ -274,5 +333,7 @@ void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr,
for (const LocationList &L : Locations) {
DumpLocationList(L);
+ if (&L != &Locations.back())
+ OS << '\n';
}
}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
index 3317a778cc70..9a0e770aed3d 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -53,7 +53,7 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const {
}
void DWARFDebugMacro::parse(DataExtractor data) {
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
while (data.isValidOffset(Offset)) {
// A macro list entry consists of:
Entry E;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
index 963ec64f5e91..ab71b239cb67 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
@@ -23,7 +23,7 @@ DWARFDebugPubTable::DWARFDebugPubTable(const DWARFObject &Obj,
bool LittleEndian, bool GnuStyle)
: GnuStyle(GnuStyle) {
DWARFDataExtractor PubNames(Obj, Sec, LittleEndian, 0);
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
while (PubNames.isValidOffset(Offset)) {
Sets.push_back({});
Set &SetData = Sets.back();
@@ -49,13 +49,13 @@ void DWARFDebugPubTable::dump(raw_ostream &OS) const {
for (const Set &S : Sets) {
OS << "length = " << format("0x%08x", S.Length);
OS << " version = " << format("0x%04x", S.Version);
- OS << " unit_offset = " << format("0x%08x", S.Offset);
+ OS << " unit_offset = " << format("0x%08" PRIx64, S.Offset);
OS << " unit_size = " << format("0x%08x", S.Size) << '\n';
OS << (GnuStyle ? "Offset Linkage Kind Name\n"
: "Offset Name\n");
for (const Entry &E : S.Entries) {
- OS << format("0x%8.8x ", E.SecOffset);
+ OS << format("0x%8.8" PRIx64 " ", E.SecOffset);
if (GnuStyle) {
StringRef EntryLinkage =
GDBIndexEntryLinkageString(E.Descriptor.Linkage);
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index d8df81a0aa0b..1a1857d8cd79 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -17,17 +17,17 @@
using namespace llvm;
void DWARFDebugRangeList::clear() {
- Offset = -1U;
+ Offset = -1ULL;
AddressSize = 0;
Entries.clear();
}
Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
- uint32_t *offset_ptr) {
+ uint64_t *offset_ptr) {
clear();
if (!data.isValidOffset(*offset_ptr))
return createStringError(errc::invalid_argument,
- "invalid range list offset 0x%" PRIx32, *offset_ptr);
+ "invalid range list offset 0x%" PRIx64, *offset_ptr);
AddressSize = data.getAddressSize();
if (AddressSize != 4 && AddressSize != 8)
@@ -38,7 +38,7 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
RangeListEntry Entry;
Entry.SectionIndex = -1ULL;
- uint32_t prev_offset = *offset_ptr;
+ uint64_t prev_offset = *offset_ptr;
Entry.StartAddress = data.getRelocatedAddress(offset_ptr);
Entry.EndAddress =
data.getRelocatedAddress(offset_ptr, &Entry.SectionIndex);
@@ -47,7 +47,7 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
if (*offset_ptr != prev_offset + 2 * AddressSize) {
clear();
return createStringError(errc::invalid_argument,
- "invalid range list entry at offset 0x%" PRIx32,
+ "invalid range list entry at offset 0x%" PRIx64,
prev_offset);
}
if (Entry.isEndOfListEntry())
@@ -59,12 +59,12 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
void DWARFDebugRangeList::dump(raw_ostream &OS) const {
for (const RangeListEntry &RLE : Entries) {
- const char *format_str = (AddressSize == 4
- ? "%08x %08" PRIx64 " %08" PRIx64 "\n"
- : "%08x %016" PRIx64 " %016" PRIx64 "\n");
+ const char *format_str =
+ (AddressSize == 4 ? "%08" PRIx64 " %08" PRIx64 " %08" PRIx64 "\n"
+ : "%08" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n");
OS << format(format_str, Offset, RLE.StartAddress, RLE.EndAddress);
}
- OS << format("%08x <End of list>\n", Offset);
+ OS << format("%08" PRIx64 " <End of list>\n", Offset);
}
DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges(
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
index 5ac3326f6681..f6785b89e86d 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
@@ -16,8 +16,8 @@
using namespace llvm;
-Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
- uint32_t *OffsetPtr) {
+Error RangeListEntry::extract(DWARFDataExtractor Data, uint64_t End,
+ uint64_t *OffsetPtr) {
Offset = *OffsetPtr;
SectionIndex = -1ULL;
// The caller should guarantee that we have at least 1 byte available, so
@@ -32,41 +32,41 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
break;
// TODO: Support other encodings.
case dwarf::DW_RLE_base_addressx: {
- uint32_t PreviousOffset = *OffsetPtr - 1;
+ uint64_t PreviousOffset = *OffsetPtr - 1;
Value0 = Data.getULEB128(OffsetPtr);
if (End < *OffsetPtr)
return createStringError(
errc::invalid_argument,
"read past end of table when reading "
- "DW_RLE_base_addressx encoding at offset 0x%" PRIx32,
+ "DW_RLE_base_addressx encoding at offset 0x%" PRIx64,
PreviousOffset);
break;
}
case dwarf::DW_RLE_startx_endx:
return createStringError(errc::not_supported,
"unsupported rnglists encoding DW_RLE_startx_endx at "
- "offset 0x%" PRIx32,
+ "offset 0x%" PRIx64,
*OffsetPtr - 1);
case dwarf::DW_RLE_startx_length: {
- uint32_t PreviousOffset = *OffsetPtr - 1;
+ uint64_t PreviousOffset = *OffsetPtr - 1;
Value0 = Data.getULEB128(OffsetPtr);
Value1 = Data.getULEB128(OffsetPtr);
if (End < *OffsetPtr)
return createStringError(
errc::invalid_argument,
"read past end of table when reading "
- "DW_RLE_startx_length encoding at offset 0x%" PRIx32,
+ "DW_RLE_startx_length encoding at offset 0x%" PRIx64,
PreviousOffset);
break;
}
case dwarf::DW_RLE_offset_pair: {
- uint32_t PreviousOffset = *OffsetPtr - 1;
+ uint64_t PreviousOffset = *OffsetPtr - 1;
Value0 = Data.getULEB128(OffsetPtr);
Value1 = Data.getULEB128(OffsetPtr);
if (End < *OffsetPtr)
return createStringError(errc::invalid_argument,
"read past end of table when reading "
- "DW_RLE_offset_pair encoding at offset 0x%" PRIx32,
+ "DW_RLE_offset_pair encoding at offset 0x%" PRIx64,
PreviousOffset);
break;
}
@@ -74,7 +74,7 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
if ((End - *OffsetPtr) < Data.getAddressSize())
return createStringError(errc::invalid_argument,
"insufficient space remaining in table for "
- "DW_RLE_base_address encoding at offset 0x%" PRIx32,
+ "DW_RLE_base_address encoding at offset 0x%" PRIx64,
*OffsetPtr - 1);
Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex);
break;
@@ -84,27 +84,27 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
return createStringError(errc::invalid_argument,
"insufficient space remaining in table for "
"DW_RLE_start_end encoding "
- "at offset 0x%" PRIx32,
+ "at offset 0x%" PRIx64,
*OffsetPtr - 1);
Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex);
Value1 = Data.getRelocatedAddress(OffsetPtr);
break;
}
case dwarf::DW_RLE_start_length: {
- uint32_t PreviousOffset = *OffsetPtr - 1;
+ uint64_t PreviousOffset = *OffsetPtr - 1;
Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex);
Value1 = Data.getULEB128(OffsetPtr);
if (End < *OffsetPtr)
return createStringError(errc::invalid_argument,
"read past end of table when reading "
- "DW_RLE_start_length encoding at offset 0x%" PRIx32,
+ "DW_RLE_start_length encoding at offset 0x%" PRIx64,
PreviousOffset);
break;
}
default:
return createStringError(errc::not_supported,
"unknown rnglists encoding 0x%" PRIx32
- " at offset 0x%" PRIx32,
+ " at offset 0x%" PRIx64,
uint32_t(Encoding), *OffsetPtr - 1);
}
@@ -187,7 +187,7 @@ void RangeListEntry::dump(
if (DumpOpts.Verbose) {
// Print the section offset in verbose mode.
- OS << format("0x%8.8" PRIx32 ":", Offset);
+ OS << format("0x%8.8" PRIx64 ":", Offset);
auto EncodingString = dwarf::RangeListEncodingString(EntryKind);
// Unsupported encodings should have been reported during parsing.
assert(!EncodingString.empty() && "Unknown range entry encoding");
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index d638dc4239f4..cec194e8b6b3 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -21,6 +21,7 @@
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatAdapters.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/WithColor.h"
@@ -91,21 +92,29 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
}
FormValue.dump(OS, DumpOpts);
+ const auto &DumpLL = [&](auto ExpectedLL) {
+ if (ExpectedLL) {
+ uint64_t BaseAddr = 0;
+ if (Optional<object::SectionedAddress> BA = U->getBaseAddress())
+ BaseAddr = BA->Address;
+ auto LLDumpOpts = DumpOpts;
+ LLDumpOpts.Verbose = false;
+ ExpectedLL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(),
+ MRI, U, LLDumpOpts, Indent);
+ } else {
+ OS << '\n';
+ OS.indent(Indent);
+ OS << formatv("error extracting location list: {0}",
+ fmt_consume(ExpectedLL.takeError()));
+ }
+ };
if (FormValue.isFormClass(DWARFFormValue::FC_SectionOffset)) {
- uint32_t Offset = *FormValue.getAsSectionOffset();
+ uint64_t Offset = *FormValue.getAsSectionOffset();
if (!U->isDWOUnit() && !U->getLocSection()->Data.empty()) {
DWARFDebugLoc DebugLoc;
DWARFDataExtractor Data(Obj, *U->getLocSection(), Ctx.isLittleEndian(),
Obj.getAddressSize());
- auto LL = DebugLoc.parseOneLocationList(Data, &Offset);
- if (LL) {
- uint64_t BaseAddr = 0;
- if (Optional<object::SectionedAddress> BA = U->getBaseAddress())
- BaseAddr = BA->Address;
- LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, U,
- BaseAddr, Indent);
- } else
- OS << "error extracting location list.";
+ DumpLL(DebugLoc.parseOneLocationList(Data, &Offset));
return;
}
@@ -121,18 +130,8 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
// Modern locations list (.debug_loclists) are used starting from v5.
// Ideally we should take the version from the .debug_loclists section
// header, but using CU's version for simplicity.
- auto LL = DWARFDebugLoclists::parseOneLocationList(
- Data, &Offset, UseLocLists ? U->getVersion() : 4);
-
- uint64_t BaseAddr = 0;
- if (Optional<object::SectionedAddress> BA = U->getBaseAddress())
- BaseAddr = BA->Address;
-
- if (LL)
- LL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI,
- U, Indent);
- else
- OS << "error extracting location list.";
+ DumpLL(DWARFDebugLoclists::parseOneLocationList(
+ Data, &Offset, UseLocLists ? U->getVersion() : 4));
}
}
}
@@ -264,7 +263,7 @@ static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) {
}
static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
- uint32_t *OffsetPtr, dwarf::Attribute Attr,
+ uint64_t *OffsetPtr, dwarf::Attribute Attr,
dwarf::Form Form, unsigned Indent,
DIDumpOptions DumpOpts) {
if (!Die.isValid())
@@ -568,8 +567,8 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent,
if (!isValid())
return;
DWARFDataExtractor debug_info_data = U->getDebugInfoExtractor();
- const uint32_t Offset = getOffset();
- uint32_t offset = Offset;
+ const uint64_t Offset = getOffset();
+ uint64_t offset = Offset;
if (DumpOpts.ShowParents) {
DIDumpOptions ParentDumpOpts = DumpOpts;
ParentDumpOpts.ShowParents = false;
@@ -581,7 +580,7 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent,
uint32_t abbrCode = debug_info_data.getULEB128(&offset);
if (DumpOpts.ShowAddresses)
WithColor(OS, HighlightColor::Address).get()
- << format("\n0x%8.8x: ", Offset);
+ << format("\n0x%8.8" PRIx64 ": ", Offset);
if (abbrCode) {
auto AbbrevDecl = getAbbreviationDeclarationPtr();
@@ -685,7 +684,7 @@ void DWARFDie::attribute_iterator::updateForIndex(
AttrValue.Attr = AbbrDecl.getAttrByIndex(Index);
// Add the previous byte size of any previous attribute value.
AttrValue.Offset += AttrValue.ByteSize;
- uint32_t ParseOffset = AttrValue.Offset;
+ uint64_t ParseOffset = AttrValue.Offset;
auto U = Die.getDwarfUnit();
assert(U && "Die must have valid DWARF unit");
AttrValue.Value = DWARFFormValue::createFromUnit(
@@ -733,6 +732,7 @@ bool DWARFAttribute::mayHaveLocationDescription(dwarf::Attribute Attr) {
case DW_AT_call_data_value:
// Extensions.
case DW_AT_GNU_call_site_value:
+ case DW_AT_GNU_call_site_target:
return true;
default:
return false;
diff --git a/lib/DebugInfo/DWARF/DWARFExpression.cpp b/lib/DebugInfo/DWARF/DWARFExpression.cpp
index 470d4b5364b4..5009b1b7b412 100644
--- a/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -119,7 +119,7 @@ static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) {
}
bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version,
- uint8_t AddressSize, uint32_t Offset) {
+ uint8_t AddressSize, uint64_t Offset) {
Opcode = Data.getU8(&Offset);
Desc = getOpDesc(Opcode);
@@ -218,9 +218,8 @@ static bool prettyPrintRegisterOp(raw_ostream &OS, uint8_t Opcode,
else
DwarfRegNum = Opcode - DW_OP_reg0;
- int LLVMRegNum = MRI->getLLVMRegNum(DwarfRegNum, isEH);
- if (LLVMRegNum >= 0) {
- if (const char *RegName = MRI->getName(LLVMRegNum)) {
+ if (Optional<unsigned> LLVMRegNum = MRI->getLLVMRegNum(DwarfRegNum, isEH)) {
+ if (const char *RegName = MRI->getName(*LLVMRegNum)) {
if ((Opcode >= DW_OP_breg0 && Opcode <= DW_OP_breg31) ||
Opcode == DW_OP_bregx)
OS << format(" %s%+" PRId64, RegName, Operands[OpNum]);
@@ -263,7 +262,7 @@ bool DWARFExpression::Operation::print(raw_ostream &OS,
if (Size == Operation::BaseTypeRef && U) {
auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
if (Die && Die.getTag() == dwarf::DW_TAG_base_type) {
- OS << format(" (0x%08x)", U->getOffset() + Operands[Operand]);
+ OS << format(" (0x%08" PRIx64 ")", U->getOffset() + Operands[Operand]);
if (auto Name = Die.find(dwarf::DW_AT_name))
OS << " \"" << Name->getAsCString() << "\"";
} else {
@@ -271,7 +270,7 @@ bool DWARFExpression::Operation::print(raw_ostream &OS,
Operands[Operand]);
}
} else if (Size == Operation::SizeBlock) {
- uint32_t Offset = Operands[Operand];
+ uint64_t Offset = Operands[Operand];
for (unsigned i = 0; i < Operands[Operand - 1]; ++i)
OS << format(" 0x%02x", Expr->Data.getU8(&Offset));
} else {
@@ -290,7 +289,7 @@ void DWARFExpression::print(raw_ostream &OS, const MCRegisterInfo *RegInfo,
uint32_t EntryValExprSize = 0;
for (auto &Op : *this) {
if (!Op.print(OS, this, RegInfo, U, IsEH)) {
- uint32_t FailOffset = Op.getEndOffset();
+ uint64_t FailOffset = Op.getEndOffset();
while (FailOffset < Data.getData().size())
OS << format(" %02x", Data.getU8(&FailOffset));
return;
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 290d35511cdb..26090638b34c 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -98,7 +98,7 @@ DWARFFormValue DWARFFormValue::createFromBlockValue(dwarf::Form F,
}
DWARFFormValue DWARFFormValue::createFromUnit(dwarf::Form F, const DWARFUnit *U,
- uint32_t *OffsetPtr) {
+ uint64_t *OffsetPtr) {
DWARFFormValue FormValue(F);
FormValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr,
U->getFormParams(), U);
@@ -106,7 +106,7 @@ DWARFFormValue DWARFFormValue::createFromUnit(dwarf::Form F, const DWARFUnit *U,
}
bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData,
- uint32_t *OffsetPtr,
+ uint64_t *OffsetPtr,
const dwarf::FormParams Params) {
bool Indirect = false;
do {
@@ -234,7 +234,7 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
}
bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
- uint32_t *OffsetPtr, dwarf::FormParams FP,
+ uint64_t *OffsetPtr, dwarf::FormParams FP,
const DWARFContext *Ctx,
const DWARFUnit *CU) {
if (!Ctx && CU)
@@ -590,7 +590,7 @@ Optional<const char *> DWARFFormValue::getAsCString() const {
// FIXME: Add support for DW_FORM_GNU_strp_alt
if (Form == DW_FORM_GNU_strp_alt || C == nullptr)
return None;
- uint32_t Offset = Value.uval;
+ uint64_t Offset = Value.uval;
if (Form == DW_FORM_line_strp) {
// .debug_line_str is tracked in the Context.
if (const char *Str = C->getLineStringExtractor().getCStr(&Offset))
@@ -624,6 +624,7 @@ Optional<uint64_t> DWARFFormValue::getAsAddress() const {
return SA->Address;
return None;
}
+
Optional<object::SectionedAddress>
DWARFFormValue::getAsSectionedAddress() const {
if (!isFormClass(FC_Address))
diff --git a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index f5f975578082..252b58e5a591 100644
--- a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -112,7 +112,7 @@ void DWARFGdbIndex::dump(raw_ostream &OS) {
}
bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
// Only version 7 is supported at this moment.
Version = Data.getU32(&Offset);
diff --git a/lib/DebugInfo/DWARF/DWARFListTable.cpp b/lib/DebugInfo/DWARF/DWARFListTable.cpp
index e38e706227da..269ea9f79a6e 100644
--- a/lib/DebugInfo/DWARF/DWARFListTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFListTable.cpp
@@ -16,33 +16,42 @@
using namespace llvm;
Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
- uint32_t *OffsetPtr) {
+ uint64_t *OffsetPtr) {
HeaderOffset = *OffsetPtr;
// Read and verify the length field.
if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, sizeof(uint32_t)))
return createStringError(errc::invalid_argument,
"section is not large enough to contain a "
- "%s table length at offset 0x%" PRIx32,
+ "%s table length at offset 0x%" PRIx64,
SectionName.data(), *OffsetPtr);
- // TODO: Add support for DWARF64.
- HeaderData.Length = Data.getRelocatedValue(4, OffsetPtr);
- if (HeaderData.Length == 0xffffffffu)
- return createStringError(errc::not_supported,
- "DWARF64 is not supported in %s at offset 0x%" PRIx32,
- SectionName.data(), HeaderOffset);
Format = dwarf::DwarfFormat::DWARF32;
- if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header))
+ uint8_t OffsetByteSize = 4;
+ HeaderData.Length = Data.getRelocatedValue(4, OffsetPtr);
+ if (HeaderData.Length == dwarf::DW_LENGTH_DWARF64) {
+ Format = dwarf::DwarfFormat::DWARF64;
+ OffsetByteSize = 8;
+ HeaderData.Length = Data.getU64(OffsetPtr);
+ } else if (HeaderData.Length >= dwarf::DW_LENGTH_lo_reserved) {
+ return createStringError(errc::invalid_argument,
+ "%s table at offset 0x%" PRIx64
+ " has unsupported reserved unit length of value 0x%8.8" PRIx64,
+ SectionName.data(), HeaderOffset, HeaderData.Length);
+ }
+ uint64_t FullLength =
+ HeaderData.Length + dwarf::getUnitLengthFieldByteSize(Format);
+ assert(FullLength == length());
+ if (FullLength < getHeaderSize(Format))
return createStringError(errc::invalid_argument,
- "%s table at offset 0x%" PRIx32
- " has too small length (0x%" PRIx32
+ "%s table at offset 0x%" PRIx64
+ " has too small length (0x%" PRIx64
") to contain a complete header",
- SectionName.data(), HeaderOffset, length());
- uint32_t End = HeaderOffset + length();
- if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset))
+ SectionName.data(), HeaderOffset, FullLength);
+ uint64_t End = HeaderOffset + FullLength;
+ if (!Data.isValidOffsetForDataOfSize(HeaderOffset, FullLength))
return createStringError(errc::invalid_argument,
"section is not large enough to contain a %s table "
- "of length 0x%" PRIx32 " at offset 0x%" PRIx32,
- SectionName.data(), length(), HeaderOffset);
+ "of length 0x%" PRIx64 " at offset 0x%" PRIx64,
+ SectionName.data(), FullLength, HeaderOffset);
HeaderData.Version = Data.getU16(OffsetPtr);
HeaderData.AddrSize = Data.getU8(OffsetPtr);
@@ -53,35 +62,35 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
if (HeaderData.Version != 5)
return createStringError(errc::invalid_argument,
"unrecognised %s table version %" PRIu16
- " in table at offset 0x%" PRIx32,
+ " in table at offset 0x%" PRIx64,
SectionName.data(), HeaderData.Version, HeaderOffset);
if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)
return createStringError(errc::not_supported,
- "%s table at offset 0x%" PRIx32
+ "%s table at offset 0x%" PRIx64
" has unsupported address size %" PRIu8,
SectionName.data(), HeaderOffset, HeaderData.AddrSize);
if (HeaderData.SegSize != 0)
return createStringError(errc::not_supported,
- "%s table at offset 0x%" PRIx32
+ "%s table at offset 0x%" PRIx64
" has unsupported segment selector size %" PRIu8,
SectionName.data(), HeaderOffset, HeaderData.SegSize);
- if (End < HeaderOffset + sizeof(HeaderData) +
- HeaderData.OffsetEntryCount * sizeof(uint32_t))
+ if (End < HeaderOffset + getHeaderSize(Format) +
+ HeaderData.OffsetEntryCount * OffsetByteSize)
return createStringError(errc::invalid_argument,
- "%s table at offset 0x%" PRIx32 " has more offset entries (%" PRIu32
+ "%s table at offset 0x%" PRIx64 " has more offset entries (%" PRIu32
") than there is space for",
SectionName.data(), HeaderOffset, HeaderData.OffsetEntryCount);
Data.setAddressSize(HeaderData.AddrSize);
for (uint32_t I = 0; I < HeaderData.OffsetEntryCount; ++I)
- Offsets.push_back(Data.getRelocatedValue(4, OffsetPtr));
+ Offsets.push_back(Data.getRelocatedValue(OffsetByteSize, OffsetPtr));
return Error::success();
}
void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
if (DumpOpts.Verbose)
- OS << format("0x%8.8" PRIx32 ": ", HeaderOffset);
+ OS << format("0x%8.8" PRIx64 ": ", HeaderOffset);
OS << format(
- "%s list header: length = 0x%8.8" PRIx32 ", version = 0x%4.4" PRIx16 ", "
+ "%s list header: length = 0x%8.8" PRIx64 ", version = 0x%4.4" PRIx16 ", "
"addr_size = 0x%2.2" PRIx8 ", seg_size = 0x%2.2" PRIx8
", offset_entry_count = "
"0x%8.8" PRIx32 "\n",
@@ -91,18 +100,17 @@ void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
if (HeaderData.OffsetEntryCount > 0) {
OS << "offsets: [";
for (const auto &Off : Offsets) {
- OS << format("\n0x%8.8" PRIx32, Off);
+ OS << format("\n0x%8.8" PRIx64, Off);
if (DumpOpts.Verbose)
- OS << format(" => 0x%8.8" PRIx32,
- Off + HeaderOffset + sizeof(HeaderData));
+ OS << format(" => 0x%8.8" PRIx64,
+ Off + HeaderOffset + getHeaderSize(Format));
}
OS << "\n]\n";
}
}
-uint32_t DWARFListTableHeader::length() const {
+uint64_t DWARFListTableHeader::length() const {
if (HeaderData.Length == 0)
return 0;
- // TODO: DWARF64 support.
- return HeaderData.Length + sizeof(uint32_t);
+ return HeaderData.Length + dwarf::getUnitLengthFieldByteSize(Format);
}
diff --git a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
index 844920ba5b11..bb81090ba25c 100644
--- a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
@@ -24,21 +24,23 @@ void DWARFTypeUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
if (DumpOpts.SummarizeTypes) {
OS << "name = '" << Name << "'"
<< " type_signature = " << format("0x%016" PRIx64, getTypeHash())
- << " length = " << format("0x%08x", getLength()) << '\n';
+ << " length = " << format("0x%08" PRIx64, getLength()) << '\n';
return;
}
- OS << format("0x%08x", getOffset()) << ": Type Unit:"
- << " length = " << format("0x%08x", getLength())
+ OS << format("0x%08" PRIx64, getOffset()) << ": Type Unit:"
+ << " length = " << format("0x%08" PRIx64, getLength())
<< " version = " << format("0x%04x", getVersion());
if (getVersion() >= 5)
OS << " unit_type = " << dwarf::UnitTypeString(getUnitType());
- OS << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset())
+ OS << " abbr_offset = "
+ << format("0x%04" PRIx64, getAbbreviations()->getOffset())
<< " addr_size = " << format("0x%02x", getAddressByteSize())
<< " name = '" << Name << "'"
<< " type_signature = " << format("0x%016" PRIx64, getTypeHash())
- << " type_offset = " << format("0x%04x", getTypeOffset())
- << " (next unit at " << format("0x%08x", getNextUnitOffset()) << ")\n";
+ << " type_offset = " << format("0x%04" PRIx64, getTypeOffset())
+ << " (next unit at " << format("0x%08" PRIx64, getNextUnitOffset())
+ << ")\n";
if (DWARFDie TU = getUnitDIE(false))
TU.dump(OS, 0, DumpOpts);
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index b74acf60c747..a56402a707ad 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -37,9 +37,9 @@ void DWARFUnitVector::addUnitsForSection(DWARFContext &C,
const DWARFSection &Section,
DWARFSectionKind SectionKind) {
const DWARFObject &D = C.getDWARFObj();
- addUnitsImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangeSection(),
- &D.getLocSection(), D.getStringSection(),
- D.getStringOffsetSection(), &D.getAddrSection(),
+ addUnitsImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangesSection(),
+ &D.getLocSection(), D.getStrSection(),
+ D.getStrOffsetsSection(), &D.getAddrSection(),
D.getLineSection(), D.isLittleEndian(), false, false,
SectionKind);
}
@@ -49,9 +49,9 @@ void DWARFUnitVector::addUnitsForDWOSection(DWARFContext &C,
DWARFSectionKind SectionKind,
bool Lazy) {
const DWARFObject &D = C.getDWARFObj();
- addUnitsImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangeDWOSection(),
- &D.getLocDWOSection(), D.getStringDWOSection(),
- D.getStringOffsetDWOSection(), &D.getAddrSection(),
+ addUnitsImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangesDWOSection(),
+ &D.getLocDWOSection(), D.getStrDWOSection(),
+ D.getStrOffsetsDWOSection(), &D.getAddrSection(),
D.getLineDWOSection(), C.isLittleEndian(), true, Lazy,
SectionKind);
}
@@ -66,7 +66,7 @@ void DWARFUnitVector::addUnitsImpl(
// Lazy initialization of Parser, now that we have all section info.
if (!Parser) {
Parser = [=, &Context, &Obj, &Section, &SOS,
- &LS](uint32_t Offset, DWARFSectionKind SectionKind,
+ &LS](uint64_t Offset, DWARFSectionKind SectionKind,
const DWARFSection *CurSection,
const DWARFUnitIndex::Entry *IndexEntry)
-> std::unique_ptr<DWARFUnit> {
@@ -83,11 +83,11 @@ void DWARFUnitVector::addUnitsImpl(
return nullptr;
std::unique_ptr<DWARFUnit> U;
if (Header.isTypeUnit())
- U = llvm::make_unique<DWARFTypeUnit>(Context, InfoSection, Header, DA,
+ U = std::make_unique<DWARFTypeUnit>(Context, InfoSection, Header, DA,
RS, LocSection, SS, SOS, AOS, LS,
LE, IsDWO, *this);
else
- U = llvm::make_unique<DWARFCompileUnit>(Context, InfoSection, Header,
+ U = std::make_unique<DWARFCompileUnit>(Context, InfoSection, Header,
DA, RS, LocSection, SS, SOS,
AOS, LS, LE, IsDWO, *this);
return U;
@@ -101,7 +101,7 @@ void DWARFUnitVector::addUnitsImpl(
// within a section, although not necessarily within the object file,
// even if we do lazy parsing.
auto I = this->begin();
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
while (Data.isValidOffset(Offset)) {
if (I != this->end() &&
(&(*I)->getInfoSection() != &Section || (*I)->getOffset() == Offset)) {
@@ -126,11 +126,11 @@ DWARFUnit *DWARFUnitVector::addUnit(std::unique_ptr<DWARFUnit> Unit) {
return this->insert(I, std::move(Unit))->get();
}
-DWARFUnit *DWARFUnitVector::getUnitForOffset(uint32_t Offset) const {
+DWARFUnit *DWARFUnitVector::getUnitForOffset(uint64_t Offset) const {
auto end = begin() + getNumInfoUnits();
auto *CU =
std::upper_bound(begin(), end, Offset,
- [](uint32_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
+ [](uint64_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
return LHS < RHS->getNextUnitOffset();
});
if (CU != end && (*CU)->getOffset() <= Offset)
@@ -149,7 +149,7 @@ DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
auto *CU =
std::upper_bound(begin(), end, CUOff->Offset,
- [](uint32_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
+ [](uint64_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
return LHS < RHS->getNextUnitOffset();
});
if (CU != end && (*CU)->getOffset() <= Offset)
@@ -209,7 +209,7 @@ DWARFUnit::getAddrOffsetSectionItem(uint32_t Index) const {
if (I != R.end() && std::next(I) == R.end())
return (*I)->getAddrOffsetSectionItem(Index);
}
- uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize();
+ uint64_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize();
if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize())
return None;
DWARFDataExtractor DA(Context.getDWARFObj(), *AddrOffsetSection,
@@ -223,7 +223,7 @@ Optional<uint64_t> DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const {
if (!StringOffsetsTableContribution)
return None;
unsigned ItemSize = getDwarfStringOffsetsByteSize();
- uint32_t Offset = getStringOffsetsBase() + Index * ItemSize;
+ uint64_t Offset = getStringOffsetsBase() + Index * ItemSize;
if (StringOffsetSection.Data.size() < Offset + ItemSize)
return None;
DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
@@ -233,7 +233,7 @@ Optional<uint64_t> DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const {
bool DWARFUnitHeader::extract(DWARFContext &Context,
const DWARFDataExtractor &debug_info,
- uint32_t *offset_ptr,
+ uint64_t *offset_ptr,
DWARFSectionKind SectionKind,
const DWARFUnitIndex *Index,
const DWARFUnitIndex::Entry *Entry) {
@@ -243,11 +243,9 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
IndexEntry = Index->getFromOffset(*offset_ptr);
Length = debug_info.getRelocatedValue(4, offset_ptr);
FormParams.Format = DWARF32;
- unsigned SizeOfLength = 4;
- if (Length == 0xffffffff) {
+ if (Length == dwarf::DW_LENGTH_DWARF64) {
Length = debug_info.getU64(offset_ptr);
FormParams.Format = DWARF64;
- SizeOfLength = 8;
}
FormParams.Version = debug_info.getU16(offset_ptr);
if (FormParams.Version >= 5) {
@@ -277,7 +275,8 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
}
if (isTypeUnit()) {
TypeHash = debug_info.getU64(offset_ptr);
- TypeOffset = debug_info.getU32(offset_ptr);
+ TypeOffset =
+ debug_info.getUnsigned(offset_ptr, FormParams.getDwarfOffsetByteSize());
} else if (UnitType == DW_UT_split_compile || UnitType == DW_UT_skeleton)
DWOId = debug_info.getU64(offset_ptr);
@@ -290,7 +289,8 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
bool TypeOffsetOK =
!isTypeUnit()
? true
- : TypeOffset >= Size && TypeOffset < getLength() + SizeOfLength;
+ : TypeOffset >= Size &&
+ TypeOffset < getLength() + getUnitLengthFieldByteSize();
bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1);
bool VersionOK = DWARFContext::isSupportedVersion(getVersion());
bool AddrSizeOK = getAddressByteSize() == 4 || getAddressByteSize() == 8;
@@ -306,16 +306,18 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
// Parse the rangelist table header, including the optional array of offsets
// following it (DWARF v5 and later).
static Expected<DWARFDebugRnglistTable>
-parseRngListTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
- // TODO: Support DWARF64
+parseRngListTableHeader(DWARFDataExtractor &DA, uint64_t Offset,
+ DwarfFormat Format) {
// We are expected to be called with Offset 0 or pointing just past the table
- // header, which is 12 bytes long for DWARF32.
+ // header. Correct Offset in the latter case so that it points to the start
+ // of the header.
if (Offset > 0) {
- if (Offset < 12U)
+ uint64_t HeaderSize = DWARFListTableHeader::getHeaderSize(Format);
+ if (Offset < HeaderSize)
return createStringError(errc::invalid_argument, "Did not detect a valid"
- " range list table with base = 0x%" PRIu32,
+ " range list table with base = 0x%" PRIx64 "\n",
Offset);
- Offset -= 12U;
+ Offset -= HeaderSize;
}
llvm::DWARFDebugRnglistTable Table;
if (Error E = Table.extractHeaderAndOffsets(DA, &Offset))
@@ -323,13 +325,13 @@ parseRngListTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
return Table;
}
-Error DWARFUnit::extractRangeList(uint32_t RangeListOffset,
+Error DWARFUnit::extractRangeList(uint64_t RangeListOffset,
DWARFDebugRangeList &RangeList) const {
// Require that compile unit is extracted.
assert(!DieArray.empty());
DWARFDataExtractor RangesData(Context.getDWARFObj(), *RangeSection,
isLittleEndian, getAddressByteSize());
- uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset;
+ uint64_t ActualRangeListOffset = RangeSectionBase + RangeListOffset;
return RangeList.extract(RangesData, &ActualRangeListOffset);
}
@@ -354,8 +356,8 @@ void DWARFUnit::extractDIEsToVector(
// Set the offset to that of the first DIE and calculate the start of the
// next compilation unit header.
- uint32_t DIEOffset = getOffset() + getHeaderSize();
- uint32_t NextCUOffset = getNextUnitOffset();
+ uint64_t DIEOffset = getOffset() + getHeaderSize();
+ uint64_t NextCUOffset = getNextUnitOffset();
DWARFDebugInfoEntry DIE;
DWARFDataExtractor DebugInfoData = getDebugInfoExtractor();
uint32_t Depth = 0;
@@ -396,90 +398,98 @@ void DWARFUnit::extractDIEsToVector(
// unit header).
if (DIEOffset > NextCUOffset)
WithColor::warning() << format("DWARF compile unit extends beyond its "
- "bounds cu 0x%8.8x at 0x%8.8x\n",
+ "bounds cu 0x%8.8" PRIx64 " "
+ "at 0x%8.8" PRIx64 "\n",
getOffset(), DIEOffset);
}
-size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
+void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
+ if (Error e = tryExtractDIEsIfNeeded(CUDieOnly))
+ WithColor::error() << toString(std::move(e));
+}
+
+Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
if ((CUDieOnly && !DieArray.empty()) ||
DieArray.size() > 1)
- return 0; // Already parsed.
+ return Error::success(); // Already parsed.
bool HasCUDie = !DieArray.empty();
extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
if (DieArray.empty())
- return 0;
+ return Error::success();
// If CU DIE was just parsed, copy several attribute values from it.
- if (!HasCUDie) {
- DWARFDie UnitDie = getUnitDIE();
- if (Optional<uint64_t> DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
- Header.setDWOId(*DWOId);
- if (!IsDWO) {
- assert(AddrOffsetSectionBase == 0);
- assert(RangeSectionBase == 0);
- AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base), 0);
- if (!AddrOffsetSectionBase)
- AddrOffsetSectionBase =
- toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0);
- RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
- }
-
- // In general, in DWARF v5 and beyond we derive the start of the unit's
- // contribution to the string offsets table from the unit DIE's
- // DW_AT_str_offsets_base attribute. Split DWARF units do not use this
- // attribute, so we assume that there is a contribution to the string
- // offsets table starting at offset 0 of the debug_str_offsets.dwo section.
- // In both cases we need to determine the format of the contribution,
- // which may differ from the unit's format.
- DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
- isLittleEndian, 0);
- if (IsDWO || getVersion() >= 5) {
- auto StringOffsetOrError =
- IsDWO ? determineStringOffsetsTableContributionDWO(DA)
- : determineStringOffsetsTableContribution(DA);
- if (!StringOffsetOrError) {
- WithColor::error() << "invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: "
- << toString(StringOffsetOrError.takeError()) << '\n';
- } else {
- StringOffsetsTableContribution = *StringOffsetOrError;
- }
- }
+ if (HasCUDie)
+ return Error::success();
+
+ DWARFDie UnitDie(this, &DieArray[0]);
+ if (Optional<uint64_t> DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
+ Header.setDWOId(*DWOId);
+ if (!IsDWO) {
+ assert(AddrOffsetSectionBase == 0);
+ assert(RangeSectionBase == 0);
+ AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base), 0);
+ if (!AddrOffsetSectionBase)
+ AddrOffsetSectionBase =
+ toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0);
+ RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
+ }
- // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
- // describe address ranges.
- if (getVersion() >= 5) {
- if (IsDWO)
- setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0);
- else
- setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
- toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0));
- if (RangeSection->Data.size()) {
- // Parse the range list table header. Individual range lists are
- // extracted lazily.
- DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection,
- isLittleEndian, 0);
- if (auto TableOrError =
- parseRngListTableHeader(RangesDA, RangeSectionBase))
- RngListTable = TableOrError.get();
- else
- WithColor::error() << "parsing a range list table: "
- << toString(TableOrError.takeError())
- << '\n';
-
- // In a split dwarf unit, there is no DW_AT_rnglists_base attribute.
- // Adjust RangeSectionBase to point past the table header.
- if (IsDWO && RngListTable)
- RangeSectionBase = RngListTable->getHeaderSize();
- }
- }
+ // In general, in DWARF v5 and beyond we derive the start of the unit's
+ // contribution to the string offsets table from the unit DIE's
+ // DW_AT_str_offsets_base attribute. Split DWARF units do not use this
+ // attribute, so we assume that there is a contribution to the string
+ // offsets table starting at offset 0 of the debug_str_offsets.dwo section.
+ // In both cases we need to determine the format of the contribution,
+ // which may differ from the unit's format.
+ DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
+ isLittleEndian, 0);
+ if (IsDWO || getVersion() >= 5) {
+ auto StringOffsetOrError =
+ IsDWO ? determineStringOffsetsTableContributionDWO(DA)
+ : determineStringOffsetsTableContribution(DA);
+ if (!StringOffsetOrError)
+ return createStringError(errc::invalid_argument,
+ "invalid reference to or invalid content in "
+ ".debug_str_offsets[.dwo]: " +
+ toString(StringOffsetOrError.takeError()));
+
+ StringOffsetsTableContribution = *StringOffsetOrError;
+ }
- // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
- // skeleton CU DIE, so that DWARF users not aware of it are not broken.
+ // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
+ // describe address ranges.
+ if (getVersion() >= 5) {
+ if (IsDWO)
+ setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0);
+ else
+ setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
+ toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0));
+ if (RangeSection->Data.size()) {
+ // Parse the range list table header. Individual range lists are
+ // extracted lazily.
+ DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection,
+ isLittleEndian, 0);
+ auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase,
+ Header.getFormat());
+ if (!TableOrError)
+ return createStringError(errc::invalid_argument,
+ "parsing a range list table: " +
+ toString(TableOrError.takeError()));
+
+ RngListTable = TableOrError.get();
+
+ // In a split dwarf unit, there is no DW_AT_rnglists_base attribute.
+ // Adjust RangeSectionBase to point past the table header.
+ if (IsDWO && RngListTable)
+ RangeSectionBase = RngListTable->getHeaderSize();
}
+ }
- return DieArray.size();
+ // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
+ // skeleton CU DIE, so that DWARF users not aware of it are not broken.
+ return Error::success();
}
bool DWARFUnit::parseDWO() {
@@ -517,7 +527,8 @@ bool DWARFUnit::parseDWO() {
DWO->setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0);
DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection,
isLittleEndian, 0);
- if (auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase))
+ if (auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase,
+ Header.getFormat()))
DWO->RngListTable = TableOrError.get();
else
WithColor::error() << "parsing a range list table: "
@@ -541,7 +552,7 @@ void DWARFUnit::clearDIEs(bool KeepCUDie) {
}
Expected<DWARFAddressRangesVector>
-DWARFUnit::findRnglistFromOffset(uint32_t Offset) {
+DWARFUnit::findRnglistFromOffset(uint64_t Offset) {
if (getVersion() <= 4) {
DWARFDebugRangeList RangeList;
if (Error E = extractRangeList(Offset, RangeList))
@@ -569,9 +580,9 @@ DWARFUnit::findRnglistFromIndex(uint32_t Index) {
if (RngListTable)
return createStringError(errc::invalid_argument,
"invalid range list table index %d", Index);
- else
- return createStringError(errc::invalid_argument,
- "missing or invalid range list table");
+
+ return createStringError(errc::invalid_argument,
+ "missing or invalid range list table");
}
Expected<DWARFAddressRangesVector> DWARFUnit::collectAddressRanges() {
@@ -780,11 +791,11 @@ StrOffsetsContributionDescriptor::validateContributionSize(
// Look for a DWARF64-formatted contribution to the string offsets table
// starting at a given offset and record it in a descriptor.
static Expected<StrOffsetsContributionDescriptor>
-parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
+parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint64_t Offset) {
if (!DA.isValidOffsetForDataOfSize(Offset, 16))
return createStringError(errc::invalid_argument, "section offset exceeds section size");
- if (DA.getU32(&Offset) != 0xffffffff)
+ if (DA.getU32(&Offset) != dwarf::DW_LENGTH_DWARF64)
return createStringError(errc::invalid_argument, "32 bit contribution referenced from a 64 bit unit");
uint64_t Size = DA.getU64(&Offset);
@@ -798,12 +809,12 @@ parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
// Look for a DWARF32-formatted contribution to the string offsets table
// starting at a given offset and record it in a descriptor.
static Expected<StrOffsetsContributionDescriptor>
-parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
+parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint64_t Offset) {
if (!DA.isValidOffsetForDataOfSize(Offset, 8))
return createStringError(errc::invalid_argument, "section offset exceeds section size");
uint32_t ContributionSize = DA.getU32(&Offset);
- if (ContributionSize >= 0xfffffff0)
+ if (ContributionSize >= dwarf::DW_LENGTH_lo_reserved)
return createStringError(errc::invalid_argument, "invalid length");
uint8_t Version = DA.getU16(&Offset);
@@ -823,7 +834,7 @@ parseDWARFStringOffsetsTableHeader(DWARFDataExtractor &DA,
case dwarf::DwarfFormat::DWARF64: {
if (Offset < 16)
return createStringError(errc::invalid_argument, "insufficient space for 64 bit header prefix");
- auto DescOrError = parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16);
+ auto DescOrError = parseDWARF64StringOffsetsTableHeader(DA, Offset - 16);
if (!DescOrError)
return DescOrError.takeError();
Desc = *DescOrError;
@@ -832,7 +843,7 @@ parseDWARFStringOffsetsTableHeader(DWARFDataExtractor &DA,
case dwarf::DwarfFormat::DWARF32: {
if (Offset < 8)
return createStringError(errc::invalid_argument, "insufficient space for 32 bit header prefix");
- auto DescOrError = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8);
+ auto DescOrError = parseDWARF32StringOffsetsTableHeader(DA, Offset - 8);
if (!DescOrError)
return DescOrError.takeError();
Desc = *DescOrError;
diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
index 047c63461ccf..f29c1e6cc5c7 100644
--- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -18,7 +18,7 @@
using namespace llvm;
bool DWARFUnitIndex::Header::parse(DataExtractor IndexData,
- uint32_t *OffsetPtr) {
+ uint64_t *OffsetPtr) {
if (!IndexData.isValidOffsetForDataOfSize(*OffsetPtr, 16))
return false;
Version = IndexData.getU32(OffsetPtr);
@@ -45,7 +45,7 @@ bool DWARFUnitIndex::parse(DataExtractor IndexData) {
}
bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) {
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
if (!Header.parse(IndexData, &Offset))
return false;
@@ -54,10 +54,10 @@ bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) {
(2 * Header.NumUnits + 1) * 4 * Header.NumColumns))
return false;
- Rows = llvm::make_unique<Entry[]>(Header.NumBuckets);
+ Rows = std::make_unique<Entry[]>(Header.NumBuckets);
auto Contribs =
- llvm::make_unique<Entry::SectionContribution *[]>(Header.NumUnits);
- ColumnKinds = llvm::make_unique<DWARFSectionKind[]>(Header.NumColumns);
+ std::make_unique<Entry::SectionContribution *[]>(Header.NumUnits);
+ ColumnKinds = std::make_unique<DWARFSectionKind[]>(Header.NumColumns);
// Read Hash Table of Signatures
for (unsigned i = 0; i != Header.NumBuckets; ++i)
@@ -70,7 +70,7 @@ bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) {
continue;
Rows[i].Index = this;
Rows[i].Contributions =
- llvm::make_unique<Entry::SectionContribution[]>(Header.NumColumns);
+ std::make_unique<Entry::SectionContribution[]>(Header.NumColumns);
Contribs[Index - 1] = Rows[i].Contributions.get();
}
diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index c2b3189514a8..bf499b6ee092 100644
--- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -34,11 +34,11 @@ DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) {
if (Pos != End) {
if (Pos->intersects(R))
- return Pos;
+ return std::move(Pos);
if (Pos != Begin) {
auto Iter = Pos - 1;
if (Iter->intersects(R))
- return Iter;
+ return std::move(Iter);
}
}
@@ -98,7 +98,7 @@ bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const {
}
bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
- uint32_t *Offset, unsigned UnitIndex,
+ uint64_t *Offset, unsigned UnitIndex,
uint8_t &UnitType, bool &isUnitDWARF64) {
uint64_t AbbrOffset, Length;
uint8_t AddrSize = 0;
@@ -111,9 +111,9 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
bool ValidType = true;
bool ValidAbbrevOffset = true;
- uint32_t OffsetStart = *Offset;
+ uint64_t OffsetStart = *Offset;
Length = DebugInfoData.getU32(Offset);
- if (Length == UINT32_MAX) {
+ if (Length == dwarf::DW_LENGTH_DWARF64) {
Length = DebugInfoData.getU64(Offset);
isUnitDWARF64 = true;
}
@@ -139,7 +139,7 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset ||
!ValidType) {
Success = false;
- error() << format("Units[%d] - start offset: 0x%08x \n", UnitIndex,
+ error() << format("Units[%d] - start offset: 0x%08" PRIx64 " \n", UnitIndex,
OffsetStart);
if (!ValidLength)
note() << "The length for this unit is too "
@@ -203,7 +203,7 @@ unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
}
unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) {
- if (Die.getTag() != DW_TAG_call_site)
+ if (Die.getTag() != DW_TAG_call_site && Die.getTag() != DW_TAG_GNU_call_site)
return 0;
DWARFDie Curr = Die.getParent();
@@ -223,7 +223,9 @@ unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) {
Optional<DWARFFormValue> CallAttr =
Curr.find({DW_AT_call_all_calls, DW_AT_call_all_source_calls,
- DW_AT_call_all_tail_calls});
+ DW_AT_call_all_tail_calls, DW_AT_GNU_all_call_sites,
+ DW_AT_GNU_all_source_call_sites,
+ DW_AT_GNU_all_tail_call_sites});
if (!CallAttr) {
error() << "Subprogram with call site entry has no DW_AT_call attribute:";
Curr.dump(OS);
@@ -273,7 +275,7 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
const DWARFObject &DObj = DCtx.getDWARFObj();
DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0);
unsigned NumDebugInfoErrors = 0;
- uint32_t OffsetStart = 0, Offset = 0, UnitIdx = 0;
+ uint64_t OffsetStart = 0, Offset = 0, UnitIdx = 0;
uint8_t UnitType = 0;
bool isUnitDWARF64 = false;
bool isHeaderChainValid = true;
@@ -294,10 +296,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
switch (UnitType) {
case dwarf::DW_UT_type:
case dwarf::DW_UT_split_type: {
- Unit = TypeUnitVector.addUnit(llvm::make_unique<DWARFTypeUnit>(
- DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(),
- &DObj.getLocSection(), DObj.getStringSection(),
- DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(),
+ Unit = TypeUnitVector.addUnit(std::make_unique<DWARFTypeUnit>(
+ DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(),
+ &DObj.getLocSection(), DObj.getStrSection(),
+ DObj.getStrOffsetsSection(), &DObj.getAppleObjCSection(),
DObj.getLineSection(), DCtx.isLittleEndian(), false,
TypeUnitVector));
break;
@@ -308,10 +310,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
case dwarf::DW_UT_partial:
// UnitType = 0 means that we are verifying a compile unit in DWARF v4.
case 0: {
- Unit = CompileUnitVector.addUnit(llvm::make_unique<DWARFCompileUnit>(
- DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(),
- &DObj.getLocSection(), DObj.getStringSection(),
- DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(),
+ Unit = CompileUnitVector.addUnit(std::make_unique<DWARFCompileUnit>(
+ DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(),
+ &DObj.getLocSection(), DObj.getStrSection(),
+ DObj.getStrOffsetsSection(), &DObj.getAppleObjCSection(),
DObj.getLineSection(), DCtx.isLittleEndian(), false,
CompileUnitVector));
break;
@@ -449,7 +451,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
case DW_AT_ranges:
// Make sure the offset in the DW_AT_ranges attribute is valid.
if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
- if (*SectionOffset >= DObj.getRangeSection().Data.size())
+ if (*SectionOffset >= DObj.getRangesSection().Data.size())
ReportError("DW_AT_ranges offset is beyond .debug_ranges bounds:");
break;
}
@@ -466,9 +468,9 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
ReportError("DIE has invalid DW_AT_stmt_list encoding:");
break;
case DW_AT_location: {
- auto VerifyLocationExpr = [&](StringRef D) {
+ auto VerifyLocationExpr = [&](ArrayRef<uint8_t> D) {
DWARFUnit *U = Die.getDwarfUnit();
- DataExtractor Data(D, DCtx.isLittleEndian(), 0);
+ DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), 0);
DWARFExpression Expression(Data, U->getVersion(),
U->getAddressByteSize());
bool Error = llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
@@ -479,13 +481,13 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
};
if (Optional<ArrayRef<uint8_t>> Expr = AttrValue.Value.getAsBlock()) {
// Verify inlined location.
- VerifyLocationExpr(llvm::toStringRef(*Expr));
+ VerifyLocationExpr(*Expr);
} else if (auto LocOffset = AttrValue.Value.getAsSectionOffset()) {
// Verify location list.
if (auto DebugLoc = DCtx.getDebugLoc())
if (auto LocList = DebugLoc->getLocationListAtOffset(*LocOffset))
for (const auto &Entry : LocList->Entries)
- VerifyLocationExpr({Entry.Loc.data(), Entry.Loc.size()});
+ VerifyLocationExpr(Entry.Loc);
}
break;
}
@@ -500,6 +502,9 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
break;
if (DieTag == DW_TAG_variable && RefTag == DW_TAG_member)
break;
+ // This might be reference to a function declaration.
+ if (DieTag == DW_TAG_GNU_call_site && RefTag == DW_TAG_subprogram)
+ break;
ReportError("DIE with tag " + TagString(DieTag) + " has " +
AttributeString(Attr) +
" that points to DIE with "
@@ -545,7 +550,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
error() << FormEncodingString(Form) << " CU offset "
<< format("0x%08" PRIx64, CUOffset)
<< " is invalid (must be less than CU size of "
- << format("0x%08" PRIx32, CUSize) << "):\n";
+ << format("0x%08" PRIx64, CUSize) << "):\n";
Die.dump(OS, 0, DumpOpts);
dump(Die) << '\n';
} else {
@@ -578,7 +583,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
case DW_FORM_strp: {
auto SecOffset = AttrValue.Value.getAsSectionOffset();
assert(SecOffset); // DW_FORM_strp is a section offset.
- if (SecOffset && *SecOffset >= DObj.getStringSection().size()) {
+ if (SecOffset && *SecOffset >= DObj.getStrSection().size()) {
++NumErrors;
error() << "DW_FORM_strp offset beyond .debug_str bounds:\n";
dump(Die) << '\n';
@@ -605,7 +610,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
// Use a 64-bit type to calculate the offset to guard against overflow.
uint64_t Offset =
(uint64_t)DieCU->getStringOffsetsBase() + Index * ItemSize;
- if (DObj.getStringOffsetSection().Data.size() < Offset + ItemSize) {
+ if (DObj.getStrOffsetsSection().Data.size() < Offset + ItemSize) {
++NumErrors;
error() << FormEncodingString(Form) << " uses index "
<< format("%" PRIu64, Index) << ", which is too large:\n";
@@ -614,7 +619,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
}
// Check that the string offset is valid.
uint64_t StringOffset = *DieCU->getStringOffsetSectionItem(Index);
- if (StringOffset >= DObj.getStringSection().size()) {
+ if (StringOffset >= DObj.getStrSection().size()) {
++NumErrors;
error() << FormEncodingString(Form) << " uses index "
<< format("%" PRIu64, Index)
@@ -635,7 +640,7 @@ unsigned DWARFVerifier::verifyDebugInfoReferences() {
// getting the DIE by offset and emitting an error
OS << "Verifying .debug_info references...\n";
unsigned NumErrors = 0;
- for (const std::pair<uint64_t, std::set<uint32_t>> &Pair :
+ for (const std::pair<uint64_t, std::set<uint64_t>> &Pair :
ReferenceToDIEOffsets) {
if (DCtx.getDIEForOffset(Pair.first))
continue;
@@ -659,12 +664,12 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() {
auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list));
if (!StmtSectionOffset)
continue;
- const uint32_t LineTableOffset = *StmtSectionOffset;
+ const uint64_t LineTableOffset = *StmtSectionOffset;
auto LineTable = DCtx.getLineTableForUnit(CU.get());
if (LineTableOffset < DCtx.getDWARFObj().getLineSection().Data.size()) {
if (!LineTable) {
++NumDebugLineErrors;
- error() << ".debug_line[" << format("0x%08" PRIx32, LineTableOffset)
+ error() << ".debug_line[" << format("0x%08" PRIx64, LineTableOffset)
<< "] was not able to be parsed for CU:\n";
dump(Die) << '\n';
continue;
@@ -680,8 +685,8 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() {
if (Iter != StmtListToDie.end()) {
++NumDebugLineErrors;
error() << "two compile unit DIEs, "
- << format("0x%08" PRIx32, Iter->second.getOffset()) << " and "
- << format("0x%08" PRIx32, Die.getOffset())
+ << format("0x%08" PRIx64, Iter->second.getOffset()) << " and "
+ << format("0x%08" PRIx64, Die.getOffset())
<< ", have the same DW_AT_stmt_list section offset:\n";
dump(Iter->second);
dump(Die) << '\n';
@@ -826,10 +831,10 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
uint32_t NumBuckets = AccelTable.getNumBuckets();
uint32_t NumHashes = AccelTable.getNumHashes();
- uint32_t BucketsOffset =
+ uint64_t BucketsOffset =
AccelTable.getSizeHdr() + AccelTable.getHeaderDataLength();
- uint32_t HashesBase = BucketsOffset + NumBuckets * 4;
- uint32_t OffsetsBase = HashesBase + NumHashes * 4;
+ uint64_t HashesBase = BucketsOffset + NumBuckets * 4;
+ uint64_t OffsetsBase = HashesBase + NumHashes * 4;
for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) {
uint32_t HashIdx = AccelSectionData.getU32(&BucketsOffset);
if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) {
@@ -849,28 +854,29 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
}
for (uint32_t HashIdx = 0; HashIdx < NumHashes; ++HashIdx) {
- uint32_t HashOffset = HashesBase + 4 * HashIdx;
- uint32_t DataOffset = OffsetsBase + 4 * HashIdx;
+ uint64_t HashOffset = HashesBase + 4 * HashIdx;
+ uint64_t DataOffset = OffsetsBase + 4 * HashIdx;
uint32_t Hash = AccelSectionData.getU32(&HashOffset);
- uint32_t HashDataOffset = AccelSectionData.getU32(&DataOffset);
+ uint64_t HashDataOffset = AccelSectionData.getU32(&DataOffset);
if (!AccelSectionData.isValidOffsetForDataOfSize(HashDataOffset,
sizeof(uint64_t))) {
- error() << format("Hash[%d] has invalid HashData offset: 0x%08x.\n",
+ error() << format("Hash[%d] has invalid HashData offset: "
+ "0x%08" PRIx64 ".\n",
HashIdx, HashDataOffset);
++NumErrors;
}
- uint32_t StrpOffset;
- uint32_t StringOffset;
+ uint64_t StrpOffset;
+ uint64_t StringOffset;
uint32_t StringCount = 0;
- unsigned Offset;
+ uint64_t Offset;
unsigned Tag;
while ((StrpOffset = AccelSectionData.getU32(&HashDataOffset)) != 0) {
const uint32_t NumHashDataObjects =
AccelSectionData.getU32(&HashDataOffset);
for (uint32_t HashDataIdx = 0; HashDataIdx < NumHashDataObjects;
++HashDataIdx) {
- std::tie(Offset, Tag) = AccelTable.readAtoms(HashDataOffset);
+ std::tie(Offset, Tag) = AccelTable.readAtoms(&HashDataOffset);
auto Die = DCtx.getDIEForOffset(Offset);
if (!Die) {
const uint32_t BucketIdx =
@@ -882,8 +888,8 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
error() << format(
"%s Bucket[%d] Hash[%d] = 0x%08x "
- "Str[%u] = 0x%08x "
- "DIE[%d] = 0x%08x is not a valid DIE offset for \"%s\".\n",
+ "Str[%u] = 0x%08" PRIx64 " DIE[%d] = 0x%08" PRIx64 " "
+ "is not a valid DIE offset for \"%s\".\n",
SectionName, BucketIdx, HashIdx, Hash, StringCount, StrpOffset,
HashDataIdx, Offset, Name);
@@ -908,8 +914,8 @@ unsigned
DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
// A map from CU offset to the (first) Name Index offset which claims to index
// this CU.
- DenseMap<uint32_t, uint32_t> CUMap;
- const uint32_t NotIndexed = std::numeric_limits<uint32_t>::max();
+ DenseMap<uint64_t, uint64_t> CUMap;
+ const uint64_t NotIndexed = std::numeric_limits<uint64_t>::max();
CUMap.reserve(DCtx.getNumCompileUnits());
for (const auto &CU : DCtx.compile_units())
@@ -924,7 +930,7 @@ DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
continue;
}
for (uint32_t CU = 0, End = NI.getCUCount(); CU < End; ++CU) {
- uint32_t Offset = NI.getCUOffset(CU);
+ uint64_t Offset = NI.getCUOffset(CU);
auto Iter = CUMap.find(Offset);
if (Iter == CUMap.end()) {
@@ -1205,8 +1211,8 @@ unsigned DWARFVerifier::verifyNameIndexEntries(
unsigned NumErrors = 0;
unsigned NumEntries = 0;
- uint32_t EntryID = NTE.getEntryOffset();
- uint32_t NextEntryID = EntryID;
+ uint64_t EntryID = NTE.getEntryOffset();
+ uint64_t NextEntryID = EntryID;
Expected<DWARFDebugNames::Entry> EntryOr = NI.getEntry(&NextEntryID);
for (; EntryOr; ++NumEntries, EntryID = NextEntryID,
EntryOr = NI.getEntry(&NextEntryID)) {
@@ -1218,7 +1224,7 @@ unsigned DWARFVerifier::verifyNameIndexEntries(
++NumErrors;
continue;
}
- uint32_t CUOffset = NI.getCUOffset(CUIndex);
+ uint64_t CUOffset = NI.getCUOffset(CUIndex);
uint64_t DIEOffset = CUOffset + *EntryOr->getDIEUnitOffset();
DWARFDie DIE = DCtx.getDIEForOffset(DIEOffset);
if (!DIE) {
@@ -1276,9 +1282,9 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
if (!Location)
return false;
- auto ContainsInterestingOperators = [&](StringRef D) {
+ auto ContainsInterestingOperators = [&](ArrayRef<uint8_t> D) {
DWARFUnit *U = Die.getDwarfUnit();
- DataExtractor Data(D, DCtx.isLittleEndian(), U->getAddressByteSize());
+ DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), U->getAddressByteSize());
DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize());
return any_of(Expression, [](DWARFExpression::Operation &Op) {
return !Op.isError() && (Op.getCode() == DW_OP_addr ||
@@ -1289,7 +1295,7 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
if (Optional<ArrayRef<uint8_t>> Expr = Location->getAsBlock()) {
// Inlined location.
- if (ContainsInterestingOperators(toStringRef(*Expr)))
+ if (ContainsInterestingOperators(*Expr))
return true;
} else if (Optional<uint64_t> Offset = Location->getAsSectionOffset()) {
// Location list.
@@ -1297,7 +1303,7 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
if (const DWARFDebugLoc::LocationList *LocList =
DebugLoc->getLocationListAtOffset(*Offset)) {
if (any_of(LocList->Entries, [&](const DWARFDebugLoc::Entry &E) {
- return ContainsInterestingOperators({E.Loc.data(), E.Loc.size()});
+ return ContainsInterestingOperators(E.Loc);
}))
return true;
}
@@ -1455,7 +1461,7 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
bool DWARFVerifier::handleAccelTables() {
const DWARFObject &D = DCtx.getDWARFObj();
- DataExtractor StrData(D.getStringSection(), DCtx.isLittleEndian(), 0);
+ DataExtractor StrData(D.getStrSection(), DCtx.isLittleEndian(), 0);
unsigned NumErrors = 0;
if (!D.getAppleNamesSection().Data.empty())
NumErrors += verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData,
@@ -1470,8 +1476,8 @@ bool DWARFVerifier::handleAccelTables() {
NumErrors += verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData,
".apple_objc");
- if (!D.getDebugNamesSection().Data.empty())
- NumErrors += verifyDebugNames(D.getDebugNamesSection(), StrData);
+ if (!D.getNamesSection().Data.empty())
+ NumErrors += verifyDebugNames(D.getNamesSection(), StrData);
return NumErrors == 0;
}
diff --git a/lib/DebugInfo/GSYM/FileWriter.cpp b/lib/DebugInfo/GSYM/FileWriter.cpp
new file mode 100644
index 000000000000..4b30dcb60a7b
--- /dev/null
+++ b/lib/DebugInfo/GSYM/FileWriter.cpp
@@ -0,0 +1,78 @@
+//===- FileWriter.cpp -------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+using namespace gsym;
+
+FileWriter::~FileWriter() { OS.flush(); }
+
+void FileWriter::writeSLEB(int64_t S) {
+ uint8_t Bytes[32];
+ auto Length = encodeSLEB128(S, Bytes);
+ assert(Length < sizeof(Bytes));
+ OS.write(reinterpret_cast<const char *>(Bytes), Length);
+}
+
+void FileWriter::writeULEB(uint64_t U) {
+ uint8_t Bytes[32];
+ auto Length = encodeULEB128(U, Bytes);
+ assert(Length < sizeof(Bytes));
+ OS.write(reinterpret_cast<const char *>(Bytes), Length);
+}
+
+void FileWriter::writeU8(uint8_t U) {
+ OS.write(reinterpret_cast<const char *>(&U), sizeof(U));
+}
+
+void FileWriter::writeU16(uint16_t U) {
+ const uint16_t Swapped = support::endian::byte_swap(U, ByteOrder);
+ OS.write(reinterpret_cast<const char *>(&Swapped), sizeof(Swapped));
+}
+
+void FileWriter::writeU32(uint32_t U) {
+ const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder);
+ OS.write(reinterpret_cast<const char *>(&Swapped), sizeof(Swapped));
+}
+
+void FileWriter::writeU64(uint64_t U) {
+ const uint64_t Swapped = support::endian::byte_swap(U, ByteOrder);
+ OS.write(reinterpret_cast<const char *>(&Swapped), sizeof(Swapped));
+}
+
+void FileWriter::fixup32(uint32_t U, uint64_t Offset) {
+ const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder);
+ OS.pwrite(reinterpret_cast<const char *>(&Swapped), sizeof(Swapped),
+ Offset);
+}
+
+void FileWriter::writeData(llvm::ArrayRef<uint8_t> Data) {
+ OS.write(reinterpret_cast<const char *>(Data.data()), Data.size());
+}
+
+void FileWriter::writeNullTerminated(llvm::StringRef Str) {
+ OS << Str << '\0';
+}
+
+uint64_t FileWriter::tell() {
+ return OS.tell();
+}
+
+void FileWriter::alignTo(size_t Align) {
+ off_t Offset = OS.tell();
+ off_t AlignedOffset = (Offset + Align - 1) / Align * Align;
+ if (AlignedOffset == Offset)
+ return;
+ off_t PadCount = AlignedOffset - Offset;
+ OS.write_zeros(PadCount);
+}
diff --git a/lib/DebugInfo/GSYM/FunctionInfo.cpp b/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 55c36a55b4be..ad022fec9e32 100644
--- a/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -1,22 +1,147 @@
-//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===//
+//===- FunctionInfo.cpp ---------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include "llvm/Support/DataExtractor.h"
using namespace llvm;
using namespace gsym;
+/// FunctionInfo information type that is used to encode the optional data
+/// that is associated with a FunctionInfo object.
+enum InfoType : uint32_t {
+ EndOfList = 0u,
+ LineTableInfo = 1u,
+ InlineInfo = 2u
+};
+
raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
OS << '[' << HEX64(FI.Range.Start) << '-' << HEX64(FI.Range.End) << "): "
- << "Name=" << HEX32(FI.Name) << '\n';
- for (const auto &Line : FI.Lines)
- OS << Line << '\n';
- OS << FI.Inline;
+ << "Name=" << HEX32(FI.Name) << '\n' << FI.OptLineTable << FI.Inline;
return OS;
}
+
+llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
+ uint64_t BaseAddr) {
+ FunctionInfo FI;
+ FI.Range.Start = BaseAddr;
+ uint64_t Offset = 0;
+ if (!Data.isValidOffsetForDataOfSize(Offset, 4))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing FunctionInfo Size", Offset);
+ FI.Range.End = FI.Range.Start + Data.getU32(&Offset);
+ if (!Data.isValidOffsetForDataOfSize(Offset, 4))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing FunctionInfo Name", Offset);
+ FI.Name = Data.getU32(&Offset);
+ if (FI.Name == 0)
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x%8.8x",
+ Offset - 4, FI.Name);
+ bool Done = false;
+ while (!Done) {
+ if (!Data.isValidOffsetForDataOfSize(Offset, 4))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing FunctionInfo InfoType value", Offset);
+ const uint32_t IT = Data.getU32(&Offset);
+ if (!Data.isValidOffsetForDataOfSize(Offset, 4))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing FunctionInfo InfoType length", Offset);
+ const uint32_t InfoLength = Data.getU32(&Offset);
+ if (!Data.isValidOffsetForDataOfSize(Offset, InfoLength))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u",
+ Offset, IT);
+ DataExtractor InfoData(Data.getData().substr(Offset, InfoLength),
+ Data.isLittleEndian(),
+ Data.getAddressSize());
+ switch (IT) {
+ case InfoType::EndOfList:
+ Done = true;
+ break;
+
+ case InfoType::LineTableInfo:
+ if (Expected<LineTable> LT = LineTable::decode(InfoData, BaseAddr))
+ FI.OptLineTable = std::move(LT.get());
+ else
+ return LT.takeError();
+ break;
+
+ case InfoType::InlineInfo:
+ if (Expected<InlineInfo> II = InlineInfo::decode(InfoData, BaseAddr))
+ FI.Inline = std::move(II.get());
+ else
+ return II.takeError();
+ break;
+
+ default:
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": unsupported InfoType %u",
+ Offset-8, IT);
+ }
+ Offset += InfoLength;
+ }
+ return std::move(FI);
+}
+
+llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
+ if (!isValid())
+ return createStringError(std::errc::invalid_argument,
+ "attempted to encode invalid FunctionInfo object");
+ // Align FunctionInfo data to a 4 byte alignment.
+ O.alignTo(4);
+ const uint64_t FuncInfoOffset = O.tell();
+ // Write the size in bytes of this function as a uint32_t. This can be zero
+ // if we just have a symbol from a symbol table and that symbol has no size.
+ O.writeU32(size());
+ // Write the name of this function as a uint32_t string table offset.
+ O.writeU32(Name);
+
+ if (OptLineTable.hasValue()) {
+ O.writeU32(InfoType::LineTableInfo);
+ // Write a uint32_t length as zero for now, we will fix this up after
+ // writing the LineTable out with the number of bytes that were written.
+ O.writeU32(0);
+ const auto StartOffset = O.tell();
+ llvm::Error err = OptLineTable->encode(O, Range.Start);
+ if (err)
+ return std::move(err);
+ const off_t Length = O.tell() - StartOffset;
+ if (Length > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument,
+ "LineTable length is greater than UINT32_MAX");
+ // Fixup the size of the LineTable data with the correct size.
+ O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+ }
+
+ // Write out the inline function info if we have any and if it is valid.
+ if (Inline.hasValue()) {
+ O.writeU32(InfoType::InlineInfo);
+ // Write a uint32_t length as zero for now, we will fix this up after
+ // writing the LineTable out with the number of bytes that were written.
+ O.writeU32(0);
+ const auto StartOffset = O.tell();
+ llvm::Error err = Inline->encode(O, Range.Start);
+ if (err)
+ return std::move(err);
+ const off_t Length = O.tell() - StartOffset;
+ if (Length > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument,
+ "InlineInfo length is greater than UINT32_MAX");
+ // Fixup the size of the InlineInfo data with the correct size.
+ O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+ }
+
+ // Terminate the data chunks with and end of list with zero size
+ O.writeU32(InfoType::EndOfList);
+ O.writeU32(0);
+ return FuncInfoOffset;
+}
diff --git a/lib/DebugInfo/GSYM/GsymCreator.cpp b/lib/DebugInfo/GSYM/GsymCreator.cpp
new file mode 100644
index 000000000000..f371426f2010
--- /dev/null
+++ b/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -0,0 +1,275 @@
+//===- GsymCreator.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/GsymCreator.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/Header.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <vector>
+
+using namespace llvm;
+using namespace gsym;
+
+
+GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) {
+ insertFile(StringRef());
+}
+
+uint32_t GsymCreator::insertFile(StringRef Path,
+ llvm::sys::path::Style Style) {
+ llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
+ llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
+ FileEntry FE(insertString(directory), insertString(filename));
+
+ std::lock_guard<std::recursive_mutex> Guard(Mutex);
+ const auto NextIndex = Files.size();
+ // Find FE in hash map and insert if not present.
+ auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
+ if (R.second)
+ Files.emplace_back(FE);
+ return R.first->second;
+}
+
+llvm::Error GsymCreator::save(StringRef Path,
+ llvm::support::endianness ByteOrder) const {
+ std::error_code EC;
+ raw_fd_ostream OutStrm(Path, EC);
+ if (EC)
+ return llvm::errorCodeToError(EC);
+ FileWriter O(OutStrm, ByteOrder);
+ return encode(O);
+}
+
+llvm::Error GsymCreator::encode(FileWriter &O) const {
+ std::lock_guard<std::recursive_mutex> Guard(Mutex);
+ if (Funcs.empty())
+ return createStringError(std::errc::invalid_argument,
+ "no functions to encode");
+ if (!Finalized)
+ return createStringError(std::errc::invalid_argument,
+ "GsymCreator wasn't finalized prior to encoding");
+
+ if (Funcs.size() > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument,
+ "too many FunctionInfos");
+ const uint64_t MinAddr = Funcs.front().startAddress();
+ const uint64_t MaxAddr = Funcs.back().startAddress();
+ const uint64_t AddrDelta = MaxAddr - MinAddr;
+ Header Hdr;
+ Hdr.Magic = GSYM_MAGIC;
+ Hdr.Version = GSYM_VERSION;
+ Hdr.AddrOffSize = 0;
+ Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
+ Hdr.BaseAddress = MinAddr;
+ Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
+ Hdr.StrtabOffset = 0; // We will fix this up later.
+ Hdr.StrtabOffset = 0; // We will fix this up later.
+ memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
+ if (UUID.size() > sizeof(Hdr.UUID))
+ return createStringError(std::errc::invalid_argument,
+ "invalid UUID size %u", (uint32_t)UUID.size());
+ // Set the address offset size correctly in the GSYM header.
+ if (AddrDelta <= UINT8_MAX)
+ Hdr.AddrOffSize = 1;
+ else if (AddrDelta <= UINT16_MAX)
+ Hdr.AddrOffSize = 2;
+ else if (AddrDelta <= UINT32_MAX)
+ Hdr.AddrOffSize = 4;
+ else
+ Hdr.AddrOffSize = 8;
+ // Copy the UUID value if we have one.
+ if (UUID.size() > 0)
+ memcpy(Hdr.UUID, UUID.data(), UUID.size());
+ // Write out the header.
+ llvm::Error Err = Hdr.encode(O);
+ if (Err)
+ return Err;
+
+ // Write out the address offsets.
+ O.alignTo(Hdr.AddrOffSize);
+ for (const auto &FuncInfo : Funcs) {
+ uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
+ switch(Hdr.AddrOffSize) {
+ case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
+ case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
+ case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
+ case 8: O.writeU64(AddrOffset); break;
+ }
+ }
+
+ // Write out all zeros for the AddrInfoOffsets.
+ O.alignTo(4);
+ const off_t AddrInfoOffsetsOffset = O.tell();
+ for (size_t i = 0, n = Funcs.size(); i < n; ++i)
+ O.writeU32(0);
+
+ // Write out the file table
+ O.alignTo(4);
+ assert(!Files.empty());
+ assert(Files[0].Dir == 0);
+ assert(Files[0].Base == 0);
+ size_t NumFiles = Files.size();
+ if (NumFiles > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument,
+ "too many files");
+ O.writeU32(static_cast<uint32_t>(NumFiles));
+ for (auto File: Files) {
+ O.writeU32(File.Dir);
+ O.writeU32(File.Base);
+ }
+
+ // Write out the sting table.
+ const off_t StrtabOffset = O.tell();
+ StrTab.write(O.get_stream());
+ const off_t StrtabSize = O.tell() - StrtabOffset;
+ std::vector<uint32_t> AddrInfoOffsets;
+
+ // Write out the address infos for each function info.
+ for (const auto &FuncInfo : Funcs) {
+ if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
+ AddrInfoOffsets.push_back(OffsetOrErr.get());
+ else
+ return OffsetOrErr.takeError();
+ }
+ // Fixup the string table offset and size in the header
+ O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
+ O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
+
+ // Fixup all address info offsets
+ uint64_t Offset = 0;
+ for (auto AddrInfoOffset: AddrInfoOffsets) {
+ O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
+ Offset += 4;
+ }
+ return ErrorSuccess();
+}
+
+llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
+ std::lock_guard<std::recursive_mutex> Guard(Mutex);
+ if (Finalized)
+ return createStringError(std::errc::invalid_argument,
+ "already finalized");
+ Finalized = true;
+
+ // Sort function infos so we can emit sorted functions.
+ llvm::sort(Funcs.begin(), Funcs.end());
+
+ // Don't let the string table indexes change by finalizing in order.
+ StrTab.finalizeInOrder();
+
+ // Remove duplicates function infos that have both entries from debug info
+ // (DWARF or Breakpad) and entries from the SymbolTable.
+ //
+ // Also handle overlapping function. Usually there shouldn't be any, but they
+ // can and do happen in some rare cases.
+ //
+ // (a) (b) (c)
+ // ^ ^ ^ ^
+ // |X |Y |X ^ |X
+ // | | | |Y | ^
+ // | | | v v |Y
+ // v v v v
+ //
+ // In (a) and (b), Y is ignored and X will be reported for the full range.
+ // In (c), both functions will be included in the result and lookups for an
+ // address in the intersection will return Y because of binary search.
+ //
+ // Note that in case of (b), we cannot include Y in the result because then
+ // we wouldn't find any function for range (end of Y, end of X)
+ // with binary search
+ auto NumBefore = Funcs.size();
+ auto Curr = Funcs.begin();
+ auto Prev = Funcs.end();
+ while (Curr != Funcs.end()) {
+ // Can't check for overlaps or same address ranges if we don't have a
+ // previous entry
+ if (Prev != Funcs.end()) {
+ if (Prev->Range.intersects(Curr->Range)) {
+ // Overlapping address ranges.
+ if (Prev->Range == Curr->Range) {
+ // Same address range. Check if one is from debug info and the other
+ // is from a symbol table. If so, then keep the one with debug info.
+ // Our sorting guarantees that entries with matching address ranges
+ // that have debug info are last in the sort.
+ if (*Prev == *Curr) {
+ // FunctionInfo entries match exactly (range, lines, inlines)
+ OS << "warning: duplicate function info entries, removing "
+ "duplicate:\n"
+ << *Curr << '\n';
+ Curr = Funcs.erase(Prev);
+ } else {
+ if (!Prev->hasRichInfo() && Curr->hasRichInfo()) {
+ // Same address range, one with no debug info (symbol) and the
+ // next with debug info. Keep the latter.
+ Curr = Funcs.erase(Prev);
+ } else {
+ OS << "warning: same address range contains different debug "
+ << "info. Removing:\n"
+ << *Prev << "\nIn favor of this one:\n"
+ << *Curr << "\n";
+ Curr = Funcs.erase(Prev);
+ }
+ }
+ } else {
+ // print warnings about overlaps
+ OS << "warning: function ranges overlap:\n"
+ << *Prev << "\n"
+ << *Curr << "\n";
+ }
+ } else if (Prev->Range.size() == 0 &&
+ Curr->Range.contains(Prev->Range.Start)) {
+ OS << "warning: removing symbol:\n"
+ << *Prev << "\nKeeping:\n"
+ << *Curr << "\n";
+ Curr = Funcs.erase(Prev);
+ }
+ }
+ if (Curr == Funcs.end())
+ break;
+ Prev = Curr++;
+ }
+
+ OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
+ << Funcs.size() << " total\n";
+ return Error::success();
+}
+
+uint32_t GsymCreator::insertString(StringRef S) {
+ std::lock_guard<std::recursive_mutex> Guard(Mutex);
+ if (S.empty())
+ return 0;
+ return StrTab.add(S);
+}
+
+void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
+ std::lock_guard<std::recursive_mutex> Guard(Mutex);
+ Funcs.emplace_back(FI);
+}
+
+void GsymCreator::forEachFunctionInfo(
+ std::function<bool(FunctionInfo &)> const &Callback) {
+ std::lock_guard<std::recursive_mutex> Guard(Mutex);
+ for (auto &FI : Funcs) {
+ if (!Callback(FI))
+ break;
+ }
+}
+
+void GsymCreator::forEachFunctionInfo(
+ std::function<bool(const FunctionInfo &)> const &Callback) const {
+ std::lock_guard<std::recursive_mutex> Guard(Mutex);
+ for (const auto &FI : Funcs) {
+ if (!Callback(FI))
+ break;
+ }
+}
diff --git a/lib/DebugInfo/GSYM/GsymReader.cpp b/lib/DebugInfo/GSYM/GsymReader.cpp
new file mode 100644
index 000000000000..1b448cf80b70
--- /dev/null
+++ b/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -0,0 +1,265 @@
+//===- GsymReader.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "llvm/DebugInfo/GSYM/GsymCreator.h"
+#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace gsym;
+
+GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer) :
+ MemBuffer(std::move(Buffer)),
+ Endian(support::endian::system_endianness()) {}
+
+ GsymReader::GsymReader(GsymReader &&RHS) = default;
+
+GsymReader::~GsymReader() = default;
+
+llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {
+ // Open the input file and return an appropriate error if needed.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
+ MemoryBuffer::getFileOrSTDIN(Filename);
+ auto Err = BuffOrErr.getError();
+ if (Err)
+ return llvm::errorCodeToError(Err);
+ return create(BuffOrErr.get());
+}
+
+llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {
+ auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
+ return create(MemBuffer);
+}
+
+llvm::Expected<llvm::gsym::GsymReader>
+GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
+ if (!MemBuffer.get())
+ return createStringError(std::errc::invalid_argument,
+ "invalid memory buffer");
+ GsymReader GR(std::move(MemBuffer));
+ llvm::Error Err = GR.parse();
+ if (Err)
+ return std::move(Err);
+ return std::move(GR);
+}
+
+llvm::Error
+GsymReader::parse() {
+ BinaryStreamReader FileData(MemBuffer->getBuffer(),
+ support::endian::system_endianness());
+ // Check for the magic bytes. This file format is designed to be mmap'ed
+ // into a process and accessed as read only. This is done for performance
+ // and efficiency for symbolicating and parsing GSYM data.
+ if (FileData.readObject(Hdr))
+ return createStringError(std::errc::invalid_argument,
+ "not enough data for a GSYM header");
+
+ const auto HostByteOrder = support::endian::system_endianness();
+ switch (Hdr->Magic) {
+ case GSYM_MAGIC:
+ Endian = HostByteOrder;
+ break;
+ case GSYM_CIGAM:
+ // This is a GSYM file, but not native endianness.
+ Endian = sys::IsBigEndianHost ? support::little : support::big;
+ Swap.reset(new SwappedData);
+ break;
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "not a GSYM file");
+ }
+
+ bool DataIsLittleEndian = HostByteOrder != support::little;
+ // Read a correctly byte swapped header if we need to.
+ if (Swap) {
+ DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
+ if (auto ExpectedHdr = Header::decode(Data))
+ Swap->Hdr = ExpectedHdr.get();
+ else
+ return ExpectedHdr.takeError();
+ Hdr = &Swap->Hdr;
+ }
+
+ // Detect errors in the header and report any that are found. If we make it
+ // past this without errors, we know we have a good magic value, a supported
+ // version number, verified address offset size and a valid UUID size.
+ if (Error Err = Hdr->checkForError())
+ return Err;
+
+ if (!Swap) {
+ // This is the native endianness case that is most common and optimized for
+ // efficient lookups. Here we just grab pointers to the native data and
+ // use ArrayRef objects to allow efficient read only access.
+
+ // Read the address offsets.
+ if (FileData.padToAlignment(Hdr->AddrOffSize) ||
+ FileData.readArray(AddrOffsets,
+ Hdr->NumAddresses * Hdr->AddrOffSize))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+
+ // Read the address info offsets.
+ if (FileData.padToAlignment(4) ||
+ FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address info offsets table");
+
+ // Read the file table.
+ uint32_t NumFiles = 0;
+ if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read file table");
+
+ // Get the string table.
+ FileData.setOffset(Hdr->StrtabOffset);
+ if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read string table");
+} else {
+ // This is the non native endianness case that is not common and not
+ // optimized for lookups. Here we decode the important tables into local
+ // storage and then set the ArrayRef objects to point to these swapped
+ // copies of the read only data so lookups can be as efficient as possible.
+ DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
+
+ // Read the address offsets.
+ uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize);
+ Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
+ switch (Hdr->AddrOffSize) {
+ case 1:
+ if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ break;
+ case 2:
+ if (!Data.getU16(&Offset,
+ reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ break;
+ case 4:
+ if (!Data.getU32(&Offset,
+ reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ break;
+ case 8:
+ if (!Data.getU64(&Offset,
+ reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ }
+ AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
+
+ // Read the address info offsets.
+ Offset = alignTo(Offset, 4);
+ Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
+ if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
+ AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
+ else
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ // Read the file table.
+ const uint32_t NumFiles = Data.getU32(&Offset);
+ if (NumFiles > 0) {
+ Swap->Files.resize(NumFiles);
+ if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2))
+ Files = ArrayRef<FileEntry>(Swap->Files);
+ else
+ return createStringError(std::errc::invalid_argument,
+ "failed to read file table");
+ }
+ // Get the string table.
+ StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,
+ Hdr->StrtabSize);
+ if (StrTab.Data.empty())
+ return createStringError(std::errc::invalid_argument,
+ "failed to read string table");
+ }
+ return Error::success();
+
+}
+
+const Header &GsymReader::getHeader() const {
+ // The only way to get a GsymReader is from GsymReader::openFile(...) or
+ // GsymReader::copyBuffer() and the header must be valid and initialized to
+ // a valid pointer value, so the assert below should not trigger.
+ assert(Hdr);
+ return *Hdr;
+}
+
+Optional<uint64_t> GsymReader::getAddress(size_t Index) const {
+ switch (Hdr->AddrOffSize) {
+ case 1: return addressForIndex<uint8_t>(Index);
+ case 2: return addressForIndex<uint16_t>(Index);
+ case 4: return addressForIndex<uint32_t>(Index);
+ case 8: return addressForIndex<uint64_t>(Index);
+ }
+ return llvm::None;
+}
+
+Optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
+ const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
+ if (Index < NumAddrInfoOffsets)
+ return AddrInfoOffsets[Index];
+ return llvm::None;
+}
+
+Expected<uint64_t>
+GsymReader::getAddressIndex(const uint64_t Addr) const {
+ if (Addr < Hdr->BaseAddress)
+ return createStringError(std::errc::invalid_argument,
+ "address 0x%" PRIx64 " not in GSYM", Addr);
+ const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
+ switch (Hdr->AddrOffSize) {
+ case 1: return getAddressOffsetIndex<uint8_t>(AddrOffset);
+ case 2: return getAddressOffsetIndex<uint16_t>(AddrOffset);
+ case 4: return getAddressOffsetIndex<uint32_t>(AddrOffset);
+ case 8: return getAddressOffsetIndex<uint64_t>(AddrOffset);
+ default: break;
+ }
+ return createStringError(std::errc::invalid_argument,
+ "unsupported address offset size %u",
+ Hdr->AddrOffSize);
+}
+
+llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
+ Expected<uint64_t> AddressIndex = getAddressIndex(Addr);
+ if (!AddressIndex)
+ return AddressIndex.takeError();
+ // Address info offsets size should have been checked in parse().
+ assert(*AddressIndex < AddrInfoOffsets.size());
+ auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
+ DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
+ if (Optional<uint64_t> OptAddr = getAddress(*AddressIndex)) {
+ auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr);
+ if (ExpectedFI) {
+ if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0)
+ return ExpectedFI;
+ return createStringError(std::errc::invalid_argument,
+ "address 0x%" PRIx64 " not in GSYM", Addr);
+ }
+ }
+ return createStringError(std::errc::invalid_argument,
+ "failed to extract address[%" PRIu64 "]",
+ *AddressIndex);
+}
diff --git a/lib/DebugInfo/GSYM/Header.cpp b/lib/DebugInfo/GSYM/Header.cpp
new file mode 100644
index 000000000000..0b3fb9c49894
--- /dev/null
+++ b/lib/DebugInfo/GSYM/Header.cpp
@@ -0,0 +1,109 @@
+//===- Header.cpp -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/Header.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define HEX8(v) llvm::format_hex(v, 4)
+#define HEX16(v) llvm::format_hex(v, 6)
+#define HEX32(v) llvm::format_hex(v, 10)
+#define HEX64(v) llvm::format_hex(v, 18)
+
+using namespace llvm;
+using namespace gsym;
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const Header &H) {
+ OS << "Header:\n";
+ OS << " Magic = " << HEX32(H.Magic) << "\n";
+ OS << " Version = " << HEX16(H.Version) << '\n';
+ OS << " AddrOffSize = " << HEX8(H.AddrOffSize) << '\n';
+ OS << " UUIDSize = " << HEX8(H.UUIDSize) << '\n';
+ OS << " BaseAddress = " << HEX64(H.BaseAddress) << '\n';
+ OS << " NumAddresses = " << HEX32(H.NumAddresses) << '\n';
+ OS << " StrtabOffset = " << HEX32(H.StrtabOffset) << '\n';
+ OS << " StrtabSize = " << HEX32(H.StrtabSize) << '\n';
+ OS << " UUID = ";
+ for (uint8_t I = 0; I < H.UUIDSize; ++I)
+ OS << format_hex_no_prefix(H.UUID[I], 2);
+ OS << '\n';
+ return OS;
+}
+
+/// Check the header and detect any errors.
+llvm::Error Header::checkForError() const {
+ if (Magic != GSYM_MAGIC)
+ return createStringError(std::errc::invalid_argument,
+ "invalid GSYM magic 0x%8.8x", Magic);
+ if (Version != GSYM_VERSION)
+ return createStringError(std::errc::invalid_argument,
+ "unsupported GSYM version %u", Version);
+ switch (AddrOffSize) {
+ case 1: break;
+ case 2: break;
+ case 4: break;
+ case 8: break;
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "invalid address offset size %u",
+ AddrOffSize);
+ }
+ if (UUIDSize > GSYM_MAX_UUID_SIZE)
+ return createStringError(std::errc::invalid_argument,
+ "invalid UUID size %u", UUIDSize);
+ return Error::success();
+}
+
+llvm::Expected<Header> Header::decode(DataExtractor &Data) {
+ uint64_t Offset = 0;
+ // The header is stored as a single blob of data that has a fixed byte size.
+ if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Header)))
+ return createStringError(std::errc::invalid_argument,
+ "not enough data for a gsym::Header");
+ Header H;
+ H.Magic = Data.getU32(&Offset);
+ H.Version = Data.getU16(&Offset);
+ H.AddrOffSize = Data.getU8(&Offset);
+ H.UUIDSize = Data.getU8(&Offset);
+ H.BaseAddress = Data.getU64(&Offset);
+ H.NumAddresses = Data.getU32(&Offset);
+ H.StrtabOffset = Data.getU32(&Offset);
+ H.StrtabSize = Data.getU32(&Offset);
+ Data.getU8(&Offset, H.UUID, GSYM_MAX_UUID_SIZE);
+ if (llvm::Error Err = H.checkForError())
+ return std::move(Err);
+ return H;
+}
+
+llvm::Error Header::encode(FileWriter &O) const {
+ // Users must verify the Header is valid prior to calling this funtion.
+ if (llvm::Error Err = checkForError())
+ return Err;
+ O.writeU32(Magic);
+ O.writeU16(Version);
+ O.writeU8(AddrOffSize);
+ O.writeU8(UUIDSize);
+ O.writeU64(BaseAddress);
+ O.writeU32(NumAddresses);
+ O.writeU32(StrtabOffset);
+ O.writeU32(StrtabSize);
+ O.writeData(llvm::ArrayRef<uint8_t>(UUID));
+ return Error::success();
+}
+
+bool llvm::gsym::operator==(const Header &LHS, const Header &RHS) {
+ return LHS.Magic == RHS.Magic && LHS.Version == RHS.Version &&
+ LHS.AddrOffSize == RHS.AddrOffSize && LHS.UUIDSize == RHS.UUIDSize &&
+ LHS.BaseAddress == RHS.BaseAddress &&
+ LHS.NumAddresses == RHS.NumAddresses &&
+ LHS.StrtabOffset == RHS.StrtabOffset &&
+ LHS.StrtabSize == RHS.StrtabSize &&
+ memcmp(LHS.UUID, RHS.UUID, LHS.UUIDSize) == 0;
+}
diff --git a/lib/DebugInfo/GSYM/InlineInfo.cpp b/lib/DebugInfo/GSYM/InlineInfo.cpp
index 781c1755241d..32ed2c709575 100644
--- a/lib/DebugInfo/GSYM/InlineInfo.cpp
+++ b/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -8,7 +8,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/GSYM/FileEntry.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include "llvm/Support/DataExtractor.h"
#include <algorithm>
#include <inttypes.h>
@@ -57,3 +59,101 @@ llvm::Optional<InlineInfo::InlineArray> InlineInfo::getInlineStack(uint64_t Addr
return Result;
return llvm::None;
}
+
+/// Decode an InlineInfo in Data at the specified offset.
+///
+/// A local helper function to decode InlineInfo objects. This function is
+/// called recursively when parsing child InlineInfo objects.
+///
+/// \param Data The data extractor to decode from.
+/// \param Offset The offset within \a Data to decode from.
+/// \param BaseAddr The base address to use when decoding address ranges.
+/// \returns An InlineInfo or an error describing the issue that was
+/// encountered during decoding.
+static llvm::Expected<InlineInfo> decode(DataExtractor &Data, uint64_t &Offset,
+ uint64_t BaseAddr) {
+ InlineInfo Inline;
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing InlineInfo address ranges data", Offset);
+ Inline.Ranges.decode(Data, BaseAddr, Offset);
+ if (Inline.Ranges.empty())
+ return Inline;
+ if (!Data.isValidOffsetForDataOfSize(Offset, 1))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing InlineInfo uint8_t indicating children",
+ Offset);
+ bool HasChildren = Data.getU8(&Offset) != 0;
+ if (!Data.isValidOffsetForDataOfSize(Offset, 4))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing InlineInfo uint32_t for name", Offset);
+ Inline.Name = Data.getU32(&Offset);
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call file", Offset);
+ Inline.CallFile = (uint32_t)Data.getULEB128(&Offset);
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call line", Offset);
+ Inline.CallLine = (uint32_t)Data.getULEB128(&Offset);
+ if (HasChildren) {
+ // Child address ranges are encoded relative to the first address in the
+ // parent InlineInfo object.
+ const auto ChildBaseAddr = Inline.Ranges[0].Start;
+ while (true) {
+ llvm::Expected<InlineInfo> Child = decode(Data, Offset, ChildBaseAddr);
+ if (!Child)
+ return Child.takeError();
+ // InlineInfo with empty Ranges termintes a child sibling chain.
+ if (Child.get().Ranges.empty())
+ break;
+ Inline.Children.emplace_back(std::move(*Child));
+ }
+ }
+ return Inline;
+}
+
+llvm::Expected<InlineInfo> InlineInfo::decode(DataExtractor &Data,
+ uint64_t BaseAddr) {
+ uint64_t Offset = 0;
+ return ::decode(Data, Offset, BaseAddr);
+}
+
+llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const {
+ // Users must verify the InlineInfo is valid prior to calling this funtion.
+ // We don't want to emit any InlineInfo objects if they are not valid since
+ // it will waste space in the GSYM file.
+ if (!isValid())
+ return createStringError(std::errc::invalid_argument,
+ "attempted to encode invalid InlineInfo object");
+ Ranges.encode(O, BaseAddr);
+ bool HasChildren = !Children.empty();
+ O.writeU8(HasChildren);
+ O.writeU32(Name);
+ O.writeULEB(CallFile);
+ O.writeULEB(CallLine);
+ if (HasChildren) {
+ // Child address ranges are encoded as relative to the first
+ // address in the Ranges for this object. This keeps the offsets
+ // small and allows for efficient encoding using ULEB offsets.
+ const uint64_t ChildBaseAddr = Ranges[0].Start;
+ for (const auto &Child : Children) {
+ // Make sure all child address ranges are contained in the parent address
+ // ranges.
+ for (const auto &ChildRange: Child.Ranges) {
+ if (!Ranges.contains(ChildRange))
+ return createStringError(std::errc::invalid_argument,
+ "child range not contained in parent");
+ }
+ llvm::Error Err = Child.encode(O, ChildBaseAddr);
+ if (Err)
+ return Err;
+ }
+
+ // Terminate child sibling chain by emitting a zero. This zero will cause
+ // the decodeAll() function above to return false and stop the decoding
+ // of child InlineInfo objects that are siblings.
+ O.writeULEB(0);
+ }
+ return Error::success();
+}
diff --git a/lib/DebugInfo/GSYM/LineTable.cpp b/lib/DebugInfo/GSYM/LineTable.cpp
new file mode 100644
index 000000000000..824c0041be9f
--- /dev/null
+++ b/lib/DebugInfo/GSYM/LineTable.cpp
@@ -0,0 +1,287 @@
+//===- LineTable.cpp --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/Support/DataExtractor.h"
+
+using namespace llvm;
+using namespace gsym;
+
+enum LineTableOpCode {
+ EndSequence = 0x00, ///< End of the line table.
+ SetFile = 0x01, ///< Set LineTableRow.file_idx, don't push a row.
+ AdvancePC = 0x02, ///< Increment LineTableRow.address, and push a row.
+ AdvanceLine = 0x03, ///< Set LineTableRow.file_line, don't push a row.
+ FirstSpecial = 0x04, ///< All special opcodes push a row.
+};
+
+struct DeltaInfo {
+ int64_t Delta;
+ uint32_t Count;
+ DeltaInfo(int64_t D, uint32_t C) : Delta(D), Count(C) {}
+};
+
+inline bool operator<(const DeltaInfo &LHS, int64_t Delta) {
+ return LHS.Delta < Delta;
+}
+
+static bool encodeSpecial(int64_t MinLineDelta, int64_t MaxLineDelta,
+ int64_t LineDelta, uint64_t AddrDelta,
+ uint8_t &SpecialOp) {
+ if (LineDelta < MinLineDelta)
+ return false;
+ if (LineDelta > MaxLineDelta)
+ return false;
+ int64_t LineRange = MaxLineDelta - MinLineDelta + 1;
+ int64_t AdjustedOp = ((LineDelta - MinLineDelta) + AddrDelta * LineRange);
+ int64_t Op = AdjustedOp + FirstSpecial;
+ if (Op < 0)
+ return false;
+ if (Op > 255)
+ return false;
+ SpecialOp = (uint8_t)Op;
+ return true;
+}
+
+typedef std::function<bool(const LineEntry &Row)> LineEntryCallback;
+
+static llvm::Error parse(DataExtractor &Data, uint64_t BaseAddr,
+ LineEntryCallback const &Callback) {
+ uint64_t Offset = 0;
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing LineTable MinDelta", Offset);
+ int64_t MinDelta = Data.getSLEB128(&Offset);
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing LineTable MaxDelta", Offset);
+ int64_t MaxDelta = Data.getSLEB128(&Offset);
+ int64_t LineRange = MaxDelta - MinDelta + 1;
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": missing LineTable FirstLine", Offset);
+ const uint32_t FirstLine = (uint32_t)Data.getULEB128(&Offset);
+ LineEntry Row(BaseAddr, 1, FirstLine);
+ bool Done = false;
+ while (!Done) {
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": EOF found before EndSequence", Offset);
+ uint8_t Op = Data.getU8(&Offset);
+ switch (Op) {
+ case EndSequence:
+ Done = true;
+ break;
+ case SetFile:
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": EOF found before SetFile value",
+ Offset);
+ Row.File = (uint32_t)Data.getULEB128(&Offset);
+ break;
+ case AdvancePC:
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": EOF found before AdvancePC value",
+ Offset);
+ Row.Addr += Data.getULEB128(&Offset);
+ // If the function callback returns false, we stop parsing.
+ if (Callback(Row) == false)
+ return Error::success();
+ break;
+ case AdvanceLine:
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": EOF found before AdvanceLine value",
+ Offset);
+ Row.Line += Data.getSLEB128(&Offset);
+ break;
+ default: {
+ // A byte that contains both address and line increment.
+ uint8_t AdjustedOp = Op - FirstSpecial;
+ int64_t LineDelta = MinDelta + (AdjustedOp % LineRange);
+ uint64_t AddrDelta = (AdjustedOp / LineRange);
+ Row.Line += LineDelta;
+ Row.Addr += AddrDelta;
+ // If the function callback returns false, we stop parsing.
+ if (Callback(Row) == false)
+ return Error::success();
+ break;
+ }
+ }
+ }
+ return Error::success();
+}
+
+llvm::Error LineTable::encode(FileWriter &Out, uint64_t BaseAddr) const {
+ // Users must verify the LineTable is valid prior to calling this funtion.
+ // We don't want to emit any LineTable objects if they are not valid since
+ // it will waste space in the GSYM file.
+ if (!isValid())
+ return createStringError(std::errc::invalid_argument,
+ "attempted to encode invalid LineTable object");
+
+ int64_t MinLineDelta = INT64_MAX;
+ int64_t MaxLineDelta = INT64_MIN;
+ std::vector<DeltaInfo> DeltaInfos;
+ if (Lines.size() == 1) {
+ MinLineDelta = 0;
+ MaxLineDelta = 0;
+ } else {
+ int64_t PrevLine = 1;
+ bool First = true;
+ for (const auto &line_entry : Lines) {
+ if (First)
+ First = false;
+ else {
+ int64_t LineDelta = (int64_t)line_entry.Line - PrevLine;
+ auto End = DeltaInfos.end();
+ auto Pos = std::lower_bound(DeltaInfos.begin(), End, LineDelta);
+ if (Pos != End && Pos->Delta == LineDelta)
+ ++Pos->Count;
+ else
+ DeltaInfos.insert(Pos, DeltaInfo(LineDelta, 1));
+ if (LineDelta < MinLineDelta)
+ MinLineDelta = LineDelta;
+ if (LineDelta > MaxLineDelta)
+ MaxLineDelta = LineDelta;
+ }
+ PrevLine = (int64_t)line_entry.Line;
+ }
+ assert(MinLineDelta <= MaxLineDelta);
+ }
+ // Set the min and max line delta intelligently based on the counts of
+ // the line deltas. if our range is too large.
+ const int64_t MaxLineRange = 14;
+ if (MaxLineDelta - MinLineDelta > MaxLineRange) {
+ uint32_t BestIndex = 0;
+ uint32_t BestEndIndex = 0;
+ uint32_t BestCount = 0;
+ const size_t NumDeltaInfos = DeltaInfos.size();
+ for (uint32_t I = 0; I < NumDeltaInfos; ++I) {
+ const int64_t FirstDelta = DeltaInfos[I].Delta;
+ uint32_t CurrCount = 0;
+ uint32_t J;
+ for (J = I; J < NumDeltaInfos; ++J) {
+ auto LineRange = DeltaInfos[J].Delta - FirstDelta;
+ if (LineRange > MaxLineRange)
+ break;
+ CurrCount += DeltaInfos[J].Count;
+ }
+ if (CurrCount > BestCount) {
+ BestIndex = I;
+ BestEndIndex = J - 1;
+ BestCount = CurrCount;
+ }
+ }
+ MinLineDelta = DeltaInfos[BestIndex].Delta;
+ MaxLineDelta = DeltaInfos[BestEndIndex].Delta;
+ }
+ if (MinLineDelta == MaxLineDelta && MinLineDelta > 0 &&
+ MinLineDelta < MaxLineRange)
+ MinLineDelta = 0;
+ assert(MinLineDelta <= MaxLineDelta);
+
+ // Initialize the line entry state as a starting point. All line entries
+ // will be deltas from this.
+ LineEntry Prev(BaseAddr, 1, Lines.front().Line);
+
+ // Write out the min and max line delta as signed LEB128.
+ Out.writeSLEB(MinLineDelta);
+ Out.writeSLEB(MaxLineDelta);
+ // Write out the starting line number as a unsigned LEB128.
+ Out.writeULEB(Prev.Line);
+
+ for (const auto &Curr : Lines) {
+ if (Curr.Addr < BaseAddr)
+ return createStringError(std::errc::invalid_argument,
+ "LineEntry has address 0x%" PRIx64 " which is "
+ "less than the function start address 0x%"
+ PRIx64, Curr.Addr, BaseAddr);
+ if (Curr.Addr < Prev.Addr)
+ return createStringError(std::errc::invalid_argument,
+ "LineEntry in LineTable not in ascending order");
+ const uint64_t AddrDelta = Curr.Addr - Prev.Addr;
+ int64_t LineDelta = 0;
+ if (Curr.Line > Prev.Line)
+ LineDelta = Curr.Line - Prev.Line;
+ else if (Prev.Line > Curr.Line)
+ LineDelta = -((int32_t)(Prev.Line - Curr.Line));
+
+ // Set the file if it doesn't match the current one.
+ if (Curr.File != Prev.File) {
+ Out.writeU8(SetFile);
+ Out.writeULEB(Curr.File);
+ }
+
+ uint8_t SpecialOp;
+ if (encodeSpecial(MinLineDelta, MaxLineDelta, LineDelta, AddrDelta,
+ SpecialOp)) {
+ // Advance the PC and line and push a row.
+ Out.writeU8(SpecialOp);
+ } else {
+ // We can't encode the address delta and line delta into
+ // a single special opcode, we must do them separately.
+
+ // Advance the line.
+ if (LineDelta != 0) {
+ Out.writeU8(AdvanceLine);
+ Out.writeSLEB(LineDelta);
+ }
+
+ // Advance the PC and push a row.
+ Out.writeU8(AdvancePC);
+ Out.writeULEB(AddrDelta);
+ }
+ Prev = Curr;
+ }
+ Out.writeU8(EndSequence);
+ return Error::success();
+}
+
+// Parse all line table entries into the "LineTable" vector. We can
+// cache the results of this if needed, or we can call LineTable::lookup()
+// below.
+llvm::Expected<LineTable> LineTable::decode(DataExtractor &Data,
+ uint64_t BaseAddr) {
+ LineTable LT;
+ llvm::Error Err = parse(Data, BaseAddr, [&](const LineEntry &Row) -> bool {
+ LT.Lines.push_back(Row);
+ return true; // Keep parsing by returning true.
+ });
+ if (Err)
+ return std::move(Err);
+ return LT;
+}
+// Parse the line table on the fly and find the row we are looking for.
+// We will need to determine if we need to cache the line table by calling
+// LineTable::parseAllEntries(...) or just call this function each time.
+// There is a CPU vs memory tradeoff we will need to determine.
+LineEntry LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) {
+ LineEntry Result;
+ llvm::Error Err = parse(Data, BaseAddr,
+ [Addr, &Result](const LineEntry &Row) -> bool {
+ if (Addr < Row.Addr)
+ return false; // Stop parsing, result contains the line table row!
+ Result = Row;
+ if (Addr == Row.Addr) {
+ // Stop parsing, this is the row we are looking for since the address
+ // matches.
+ return false;
+ }
+ return true; // Keep parsing till we find the right row.
+ });
+ return Result;
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LineTable &LT) {
+ for (const auto &LineEntry : LT)
+ OS << LineEntry << '\n';
+ return OS;
+}
diff --git a/lib/DebugInfo/GSYM/Range.cpp b/lib/DebugInfo/GSYM/Range.cpp
index ca61984dacbd..19ab700fdd57 100644
--- a/lib/DebugInfo/GSYM/Range.cpp
+++ b/lib/DebugInfo/GSYM/Range.cpp
@@ -8,6 +8,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/Support/DataExtractor.h"
#include <algorithm>
#include <inttypes.h>
@@ -40,6 +42,17 @@ bool AddressRanges::contains(uint64_t Addr) const {
return It != Ranges.begin() && Addr < It[-1].End;
}
+bool AddressRanges::contains(AddressRange Range) const {
+ if (Range.size() == 0)
+ return false;
+ auto It = std::partition_point(
+ Ranges.begin(), Ranges.end(),
+ [=](const AddressRange &R) { return R.Start <= Range.Start; });
+ if (It == Ranges.begin())
+ return false;
+ return Range.End <= It[-1].End;
+}
+
raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) {
return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")";
}
@@ -53,3 +66,37 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRanges &AR) {
}
return OS;
}
+
+void AddressRange::encode(FileWriter &O, uint64_t BaseAddr) const {
+ assert(Start >= BaseAddr);
+ O.writeULEB(Start - BaseAddr);
+ O.writeULEB(size());
+}
+
+void AddressRange::decode(DataExtractor &Data, uint64_t BaseAddr,
+ uint64_t &Offset) {
+ const uint64_t AddrOffset = Data.getULEB128(&Offset);
+ const uint64_t Size = Data.getULEB128(&Offset);
+ const uint64_t StartAddr = BaseAddr + AddrOffset;
+ Start = StartAddr;
+ End = StartAddr + Size;
+}
+
+void AddressRanges::encode(FileWriter &O, uint64_t BaseAddr) const {
+ O.writeULEB(Ranges.size());
+ if (Ranges.empty())
+ return;
+ for (auto Range : Ranges)
+ Range.encode(O, BaseAddr);
+}
+
+void AddressRanges::decode(DataExtractor &Data, uint64_t BaseAddr,
+ uint64_t &Offset) {
+ clear();
+ uint64_t NumRanges = Data.getULEB128(&Offset);
+ if (NumRanges == 0)
+ return;
+ Ranges.resize(NumRanges);
+ for (auto &Range : Ranges)
+ Range.decode(Data, BaseAddr, Offset);
+}
diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp
index df925771f0d9..5dc9c86b34fd 100644
--- a/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -52,7 +52,7 @@ MappedBlockStream::MappedBlockStream(uint32_t BlockSize,
std::unique_ptr<MappedBlockStream> MappedBlockStream::createStream(
uint32_t BlockSize, const MSFStreamLayout &Layout, BinaryStreamRef MsfData,
BumpPtrAllocator &Allocator) {
- return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
+ return std::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
BlockSize, Layout, MsfData, Allocator);
}
@@ -63,7 +63,7 @@ std::unique_ptr<MappedBlockStream> MappedBlockStream::createIndexedStream(
MSFStreamLayout SL;
SL.Blocks = Layout.StreamMap[StreamIndex];
SL.Length = Layout.StreamSizes[StreamIndex];
- return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
+ return std::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
Layout.SB->BlockSize, SL, MsfData, Allocator);
}
@@ -318,7 +318,7 @@ WritableMappedBlockStream::createStream(uint32_t BlockSize,
const MSFStreamLayout &Layout,
WritableBinaryStreamRef MsfData,
BumpPtrAllocator &Allocator) {
- return llvm::make_unique<MappedBlockStreamImpl<WritableMappedBlockStream>>(
+ return std::make_unique<MappedBlockStreamImpl<WritableMappedBlockStream>>(
BlockSize, Layout, MsfData, Allocator);
}
diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
index a8ae076e1d6c..c2552f55703c 100644
--- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
@@ -405,7 +405,7 @@ DIARawSymbol::findChildren(PDB_SymType Type) const {
return nullptr;
}
- return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+ return std::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
}
std::unique_ptr<IPDBEnumSymbols>
@@ -423,7 +423,7 @@ DIARawSymbol::findChildren(PDB_SymType Type, StringRef Name,
Symbol->findChildrenEx(EnumVal, Name16Str, CompareFlags, &DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+ return std::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
}
std::unique_ptr<IPDBEnumSymbols>
@@ -443,7 +443,7 @@ DIARawSymbol::findChildrenByAddr(PDB_SymType Type, StringRef Name,
Section, Offset, &DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+ return std::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
}
std::unique_ptr<IPDBEnumSymbols>
@@ -462,7 +462,7 @@ DIARawSymbol::findChildrenByVA(PDB_SymType Type, StringRef Name,
&DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+ return std::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
}
std::unique_ptr<IPDBEnumSymbols>
@@ -480,7 +480,7 @@ DIARawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name,
&DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+ return std::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
}
std::unique_ptr<IPDBEnumSymbols>
@@ -489,7 +489,7 @@ DIARawSymbol::findInlineFramesByAddr(uint32_t Section, uint32_t Offset) const {
if (S_OK != Symbol->findInlineFramesByAddr(Section, Offset, &DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+ return std::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
}
std::unique_ptr<IPDBEnumSymbols>
@@ -498,7 +498,7 @@ DIARawSymbol::findInlineFramesByRVA(uint32_t RVA) const {
if (S_OK != Symbol->findInlineFramesByRVA(RVA, &DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+ return std::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
}
std::unique_ptr<IPDBEnumSymbols>
@@ -507,7 +507,7 @@ DIARawSymbol::findInlineFramesByVA(uint64_t VA) const {
if (S_OK != Symbol->findInlineFramesByVA(VA, &DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+ return std::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
}
std::unique_ptr<IPDBEnumLineNumbers> DIARawSymbol::findInlineeLines() const {
@@ -515,7 +515,7 @@ std::unique_ptr<IPDBEnumLineNumbers> DIARawSymbol::findInlineeLines() const {
if (S_OK != Symbol->findInlineeLines(&DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumLineNumbers>(DiaEnumerator);
+ return std::make_unique<DIAEnumLineNumbers>(DiaEnumerator);
}
std::unique_ptr<IPDBEnumLineNumbers>
@@ -526,7 +526,7 @@ DIARawSymbol::findInlineeLinesByAddr(uint32_t Section, uint32_t Offset,
Symbol->findInlineeLinesByAddr(Section, Offset, Length, &DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumLineNumbers>(DiaEnumerator);
+ return std::make_unique<DIAEnumLineNumbers>(DiaEnumerator);
}
std::unique_ptr<IPDBEnumLineNumbers>
@@ -535,7 +535,7 @@ DIARawSymbol::findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const {
if (S_OK != Symbol->findInlineeLinesByRVA(RVA, Length, &DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumLineNumbers>(DiaEnumerator);
+ return std::make_unique<DIAEnumLineNumbers>(DiaEnumerator);
}
std::unique_ptr<IPDBEnumLineNumbers>
@@ -544,7 +544,7 @@ DIARawSymbol::findInlineeLinesByVA(uint64_t VA, uint32_t Length) const {
if (S_OK != Symbol->findInlineeLinesByVA(VA, Length, &DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumLineNumbers>(DiaEnumerator);
+ return std::make_unique<DIAEnumLineNumbers>(DiaEnumerator);
}
void DIARawSymbol::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) const {
@@ -776,7 +776,7 @@ std::unique_ptr<IPDBLineNumber> DIARawSymbol::getSrcLineOnTypeDefn() const {
if (FAILED(Symbol->getSrcLineOnTypeDefn(&LineNumber)) || !LineNumber)
return nullptr;
- return llvm::make_unique<DIALineNumber>(LineNumber);
+ return std::make_unique<DIALineNumber>(LineNumber);
}
uint32_t DIARawSymbol::getStride() const {
@@ -871,7 +871,7 @@ DIARawSymbol::getVirtualBaseTableType() const {
if (FAILED(Symbol->get_virtualBaseTableType(&TableType)) || !TableType)
return nullptr;
- auto RawVT = llvm::make_unique<DIARawSymbol>(Session, TableType);
+ auto RawVT = std::make_unique<DIARawSymbol>(Session, TableType);
auto Pointer =
PDBSymbol::createAs<PDBSymbolTypePointer>(Session, std::move(RawVT));
return unique_dyn_cast<PDBSymbolTypeBuiltin>(Pointer->getPointeeType());
diff --git a/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp b/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
index e2d928f2c4b2..4f0e078e6712 100644
--- a/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
@@ -23,7 +23,7 @@ std::unique_ptr<PDBSymbolCompiland> DIASectionContrib::getCompiland() const {
if (FAILED(Section->get_compiland(&Symbol)))
return nullptr;
- auto RawSymbol = llvm::make_unique<DIARawSymbol>(Session, Symbol);
+ auto RawSymbol = std::make_unique<DIARawSymbol>(Session, Symbol);
return PDBSymbol::createAs<PDBSymbolCompiland>(Session, std::move(RawSymbol));
}
diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp
index 4e0b8587c613..64ffa776bbd6 100644
--- a/lib/DebugInfo/PDB/DIA/DIASession.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp
@@ -73,15 +73,7 @@ static Error LoadDIA(CComPtr<IDiaDataSource> &DiaDataSource) {
#if !defined(_MSC_VER)
return llvm::make_error<PDBError>(pdb_error_code::dia_failed_loading);
#else
- const wchar_t *msdia_dll = nullptr;
-#if _MSC_VER >= 1900 && _MSC_VER < 2000
- msdia_dll = L"msdia140.dll"; // VS2015
-#elif _MSC_VER >= 1800
- msdia_dll = L"msdia120.dll"; // VS2013
-#else
-#error "Unknown Visual Studio version."
-#endif
-
+ const wchar_t *msdia_dll = L"msdia140.dll";
HRESULT HR;
if (FAILED(HR = NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource,
reinterpret_cast<LPVOID *>(&DiaDataSource))))
@@ -158,7 +150,7 @@ std::unique_ptr<PDBSymbolExe> DIASession::getGlobalScope() {
if (S_OK != Session->get_globalScope(&GlobalScope))
return nullptr;
- auto RawSymbol = llvm::make_unique<DIARawSymbol>(*this, GlobalScope);
+ auto RawSymbol = std::make_unique<DIARawSymbol>(*this, GlobalScope);
auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol)));
std::unique_ptr<PDBSymbolExe> ExeSymbol(
static_cast<PDBSymbolExe *>(PdbSymbol.release()));
@@ -193,7 +185,7 @@ DIASession::getSymbolById(SymIndexId SymbolId) const {
if (S_OK != Session->symbolById(SymbolId, &LocatedSymbol))
return nullptr;
- auto RawSymbol = llvm::make_unique<DIARawSymbol>(*this, LocatedSymbol);
+ auto RawSymbol = std::make_unique<DIARawSymbol>(*this, LocatedSymbol);
return PDBSymbol::create(*this, std::move(RawSymbol));
}
@@ -210,7 +202,7 @@ DIASession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const {
if (S_OK != Session->findSymbolByRVA(RVA, EnumVal, &Symbol))
return nullptr;
}
- auto RawSymbol = llvm::make_unique<DIARawSymbol>(*this, Symbol);
+ auto RawSymbol = std::make_unique<DIARawSymbol>(*this, Symbol);
return PDBSymbol::create(*this, std::move(RawSymbol));
}
@@ -222,7 +214,7 @@ std::unique_ptr<PDBSymbol> DIASession::findSymbolByRVA(uint32_t RVA,
if (S_OK != Session->findSymbolByRVA(RVA, EnumVal, &Symbol))
return nullptr;
- auto RawSymbol = llvm::make_unique<DIARawSymbol>(*this, Symbol);
+ auto RawSymbol = std::make_unique<DIARawSymbol>(*this, Symbol);
return PDBSymbol::create(*this, std::move(RawSymbol));
}
@@ -235,7 +227,7 @@ DIASession::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset,
if (S_OK != Session->findSymbolByAddr(Sect, Offset, EnumVal, &Symbol))
return nullptr;
- auto RawSymbol = llvm::make_unique<DIARawSymbol>(*this, Symbol);
+ auto RawSymbol = std::make_unique<DIARawSymbol>(*this, Symbol);
return PDBSymbol::create(*this, std::move(RawSymbol));
}
@@ -251,7 +243,7 @@ DIASession::findLineNumbers(const PDBSymbolCompiland &Compiland,
RawFile.getDiaFile(), &LineNumbers))
return nullptr;
- return llvm::make_unique<DIAEnumLineNumbers>(LineNumbers);
+ return std::make_unique<DIAEnumLineNumbers>(LineNumbers);
}
std::unique_ptr<IPDBEnumLineNumbers>
@@ -265,7 +257,7 @@ DIASession::findLineNumbersByAddress(uint64_t Address, uint32_t Length) const {
if (S_OK != Session->findLinesByRVA(RVA, Length, &LineNumbers))
return nullptr;
}
- return llvm::make_unique<DIAEnumLineNumbers>(LineNumbers);
+ return std::make_unique<DIAEnumLineNumbers>(LineNumbers);
}
std::unique_ptr<IPDBEnumLineNumbers>
@@ -274,7 +266,7 @@ DIASession::findLineNumbersByRVA(uint32_t RVA, uint32_t Length) const {
if (S_OK != Session->findLinesByRVA(RVA, Length, &LineNumbers))
return nullptr;
- return llvm::make_unique<DIAEnumLineNumbers>(LineNumbers);
+ return std::make_unique<DIAEnumLineNumbers>(LineNumbers);
}
std::unique_ptr<IPDBEnumLineNumbers>
@@ -284,7 +276,7 @@ DIASession::findLineNumbersBySectOffset(uint32_t Section, uint32_t Offset,
if (S_OK != Session->findLinesByAddr(Section, Offset, Length, &LineNumbers))
return nullptr;
- return llvm::make_unique<DIAEnumLineNumbers>(LineNumbers);
+ return std::make_unique<DIAEnumLineNumbers>(LineNumbers);
}
std::unique_ptr<IPDBEnumSourceFiles>
@@ -306,7 +298,7 @@ DIASession::findSourceFiles(const PDBSymbolCompiland *Compiland,
if (S_OK !=
Session->findFile(DiaCompiland, Utf16Pattern.m_str, Flags, &SourceFiles))
return nullptr;
- return llvm::make_unique<DIAEnumSourceFiles>(*this, SourceFiles);
+ return std::make_unique<DIAEnumSourceFiles>(*this, SourceFiles);
}
std::unique_ptr<IPDBSourceFile>
@@ -342,7 +334,7 @@ std::unique_ptr<IPDBEnumSourceFiles> DIASession::getAllSourceFiles() const {
if (S_OK != Session->findFile(nullptr, nullptr, nsNone, &Files))
return nullptr;
- return llvm::make_unique<DIAEnumSourceFiles>(*this, Files);
+ return std::make_unique<DIAEnumSourceFiles>(*this, Files);
}
std::unique_ptr<IPDBEnumSourceFiles> DIASession::getSourceFilesForCompiland(
@@ -355,7 +347,7 @@ std::unique_ptr<IPDBEnumSourceFiles> DIASession::getSourceFilesForCompiland(
Session->findFile(RawSymbol.getDiaSymbol(), nullptr, nsNone, &Files))
return nullptr;
- return llvm::make_unique<DIAEnumSourceFiles>(*this, Files);
+ return std::make_unique<DIAEnumSourceFiles>(*this, Files);
}
std::unique_ptr<IPDBSourceFile>
@@ -364,7 +356,7 @@ DIASession::getSourceFileById(uint32_t FileId) const {
if (S_OK != Session->findFileById(FileId, &LocatedFile))
return nullptr;
- return llvm::make_unique<DIASourceFile>(*this, LocatedFile);
+ return std::make_unique<DIASourceFile>(*this, LocatedFile);
}
std::unique_ptr<IPDBEnumDataStreams> DIASession::getDebugStreams() const {
@@ -372,7 +364,7 @@ std::unique_ptr<IPDBEnumDataStreams> DIASession::getDebugStreams() const {
if (S_OK != Session->getEnumDebugStreams(&DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumDebugStreams>(DiaEnumerator);
+ return std::make_unique<DIAEnumDebugStreams>(DiaEnumerator);
}
std::unique_ptr<IPDBEnumTables> DIASession::getEnumTables() const {
@@ -380,7 +372,7 @@ std::unique_ptr<IPDBEnumTables> DIASession::getEnumTables() const {
if (S_OK != Session->getEnumTables(&DiaEnumerator))
return nullptr;
- return llvm::make_unique<DIAEnumTables>(DiaEnumerator);
+ return std::make_unique<DIAEnumTables>(DiaEnumerator);
}
template <class T> static CComPtr<T> getTableEnumerator(IDiaSession &Session) {
@@ -407,7 +399,7 @@ DIASession::getInjectedSources() const {
if (!Files)
return nullptr;
- return llvm::make_unique<DIAEnumInjectedSources>(Files);
+ return std::make_unique<DIAEnumInjectedSources>(Files);
}
std::unique_ptr<IPDBEnumSectionContribs>
@@ -417,7 +409,7 @@ DIASession::getSectionContribs() const {
if (!Sections)
return nullptr;
- return llvm::make_unique<DIAEnumSectionContribs>(*this, Sections);
+ return std::make_unique<DIAEnumSectionContribs>(*this, Sections);
}
std::unique_ptr<IPDBEnumFrameData>
@@ -427,5 +419,5 @@ DIASession::getFrameData() const {
if (!FD)
return nullptr;
- return llvm::make_unique<DIAEnumFrameData>(FD);
+ return std::make_unique<DIAEnumFrameData>(FD);
}
diff --git a/lib/DebugInfo/PDB/GenericError.cpp b/lib/DebugInfo/PDB/GenericError.cpp
index 70dc094c42ec..0e4cba3174b2 100644
--- a/lib/DebugInfo/PDB/GenericError.cpp
+++ b/lib/DebugInfo/PDB/GenericError.cpp
@@ -34,8 +34,8 @@ public:
return "The PDB file path is an invalid UTF8 sequence.";
case pdb_error_code::signature_out_of_date:
return "The signature does not match; the file(s) might be out of date.";
- case pdb_error_code::external_cmdline_ref:
- return "The path to this file must be provided on the command-line.";
+ case pdb_error_code::no_matching_pch:
+ return "No matching precompiled header could be located.";
}
llvm_unreachable("Unrecognized generic_error_code");
}
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index 20b6c6142547..419734771ccd 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -180,12 +180,12 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
void DbiModuleDescriptorBuilder::addDebugSubsection(
std::shared_ptr<DebugSubsection> Subsection) {
assert(Subsection);
- C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>(
+ C13Builders.push_back(std::make_unique<DebugSubsectionRecordBuilder>(
std::move(Subsection), CodeViewContainer::Pdb));
}
void DbiModuleDescriptorBuilder::addDebugSubsection(
const DebugSubsectionRecord &SubsectionContents) {
- C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>(
+ C13Builders.push_back(std::make_unique<DebugSubsectionRecordBuilder>(
SubsectionContents, CodeViewContainer::Pdb));
}
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index b7ade0072ee5..0e00c2f7ff98 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -114,7 +114,7 @@ Expected<DbiModuleDescriptorBuilder &>
DbiStreamBuilder::addModuleInfo(StringRef ModuleName) {
uint32_t Index = ModiList.size();
ModiList.push_back(
- llvm::make_unique<DbiModuleDescriptorBuilder>(ModuleName, Index, Msf));
+ std::make_unique<DbiModuleDescriptorBuilder>(ModuleName, Index, Msf));
return *ModiList.back();
}
diff --git a/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index 8ed5b8b44c59..432f1e9b24d3 100644
--- a/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -183,8 +183,8 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
}
GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
- : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()),
- GSH(llvm::make_unique<GSIHashStreamBuilder>()) {}
+ : Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()),
+ GSH(std::make_unique<GSIHashStreamBuilder>()) {}
GSIStreamBuilder::~GSIStreamBuilder() {}
diff --git a/lib/DebugInfo/PDB/Native/Hash.cpp b/lib/DebugInfo/PDB/Native/Hash.cpp
index b5c139ecbec0..7fb6b4bd5d31 100644
--- a/lib/DebugInfo/PDB/Native/Hash.cpp
+++ b/lib/DebugInfo/PDB/Native/Hash.cpp
@@ -8,8 +8,8 @@
#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/CRC.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/JamCRC.h"
#include <cstdint>
using namespace llvm;
@@ -79,7 +79,6 @@ uint32_t pdb::hashStringV2(StringRef Str) {
// Corresponds to `SigForPbCb` in langapi/shared/crc32.h.
uint32_t pdb::hashBufferV8(ArrayRef<uint8_t> Buf) {
JamCRC JC(/*Init=*/0U);
- JC.update(makeArrayRef<char>(reinterpret_cast<const char *>(Buf.data()),
- Buf.size()));
+ JC.update(Buf);
return JC.getCRC();
}
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
index f17ff5bb01f2..2f6a5bc3d574 100644
--- a/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
@@ -46,30 +46,31 @@ public:
uint64_t getCodeByteSize() const override { return Entry.FileSize; }
std::string getFileName() const override {
- auto Name = Strings.getStringForID(Entry.FileNI);
- assert(Name && "InjectedSourceStream should have rejected this");
- return *Name;
+ StringRef Ret = cantFail(Strings.getStringForID(Entry.FileNI),
+ "InjectedSourceStream should have rejected this");
+ return Ret;
}
std::string getObjectFileName() const override {
- auto ObjName = Strings.getStringForID(Entry.ObjNI);
- assert(ObjName && "InjectedSourceStream should have rejected this");
- return *ObjName;
+ StringRef Ret = cantFail(Strings.getStringForID(Entry.ObjNI),
+ "InjectedSourceStream should have rejected this");
+ return Ret;
}
std::string getVirtualFileName() const override {
- auto VName = Strings.getStringForID(Entry.VFileNI);
- assert(VName && "InjectedSourceStream should have rejected this");
- return *VName;
+ StringRef Ret = cantFail(Strings.getStringForID(Entry.VFileNI),
+ "InjectedSourceStream should have rejected this");
+ return Ret;
}
uint32_t getCompression() const override { return Entry.Compression; }
std::string getCode() const override {
// Get name of stream storing the data.
- auto VName = Strings.getStringForID(Entry.VFileNI);
- assert(VName && "InjectedSourceStream should have rejected this");
- std::string StreamName = ("/src/files/" + *VName).str();
+ StringRef VName =
+ cantFail(Strings.getStringForID(Entry.VFileNI),
+ "InjectedSourceStream should have rejected this");
+ std::string StreamName = ("/src/files/" + VName).str();
// Find stream with that name and read its data.
// FIXME: Consider validating (or even loading) all this in
@@ -104,14 +105,14 @@ std::unique_ptr<IPDBInjectedSource>
NativeEnumInjectedSources::getChildAtIndex(uint32_t N) const {
if (N >= getChildCount())
return nullptr;
- return make_unique<NativeInjectedSource>(std::next(Stream.begin(), N)->second,
+ return std::make_unique<NativeInjectedSource>(std::next(Stream.begin(), N)->second,
File, Strings);
}
std::unique_ptr<IPDBInjectedSource> NativeEnumInjectedSources::getNext() {
if (Cur == Stream.end())
return nullptr;
- return make_unique<NativeInjectedSource>((Cur++)->second, File, Strings);
+ return std::make_unique<NativeInjectedSource>((Cur++)->second, File, Strings);
}
void NativeEnumInjectedSources::reset() { Cur = Stream.begin(); }
diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index 8e43cf24495a..2ad552470b61 100644
--- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -30,68 +30,68 @@ void NativeRawSymbol::dump(raw_ostream &OS, int Indent,
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findChildren(PDB_SymType Type) const {
- return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+ return std::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findChildren(PDB_SymType Type, StringRef Name,
PDB_NameSearchFlags Flags) const {
- return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+ return std::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findChildrenByAddr(PDB_SymType Type, StringRef Name,
PDB_NameSearchFlags Flags, uint32_t Section, uint32_t Offset) const {
- return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+ return std::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findChildrenByVA(PDB_SymType Type, StringRef Name,
PDB_NameSearchFlags Flags, uint64_t VA) const {
- return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+ return std::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name,
PDB_NameSearchFlags Flags, uint32_t RVA) const {
- return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+ return std::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findInlineFramesByAddr(uint32_t Section,
uint32_t Offset) const {
- return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+ return std::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findInlineFramesByRVA(uint32_t RVA) const {
- return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+ return std::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findInlineFramesByVA(uint64_t VA) const {
- return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+ return std::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumLineNumbers>
NativeRawSymbol::findInlineeLines() const {
- return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
+ return std::make_unique<NullEnumerator<IPDBLineNumber>>();
}
std::unique_ptr<IPDBEnumLineNumbers>
NativeRawSymbol::findInlineeLinesByAddr(uint32_t Section, uint32_t Offset,
uint32_t Length) const {
- return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
+ return std::make_unique<NullEnumerator<IPDBLineNumber>>();
}
std::unique_ptr<IPDBEnumLineNumbers>
NativeRawSymbol::findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const {
- return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
+ return std::make_unique<NullEnumerator<IPDBLineNumber>>();
}
std::unique_ptr<IPDBEnumLineNumbers>
NativeRawSymbol::findInlineeLinesByVA(uint64_t VA, uint32_t Length) const {
- return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
+ return std::make_unique<NullEnumerator<IPDBLineNumber>>();
}
void NativeRawSymbol::getDataBytes(SmallVector<uint8_t, 32> &bytes) const {
diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 8a49cb1c5963..b45a5881dcb5 100644
--- a/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -59,18 +59,18 @@ NativeSession::~NativeSession() = default;
Error NativeSession::createFromPdb(std::unique_ptr<MemoryBuffer> Buffer,
std::unique_ptr<IPDBSession> &Session) {
StringRef Path = Buffer->getBufferIdentifier();
- auto Stream = llvm::make_unique<MemoryBufferByteStream>(
+ auto Stream = std::make_unique<MemoryBufferByteStream>(
std::move(Buffer), llvm::support::little);
- auto Allocator = llvm::make_unique<BumpPtrAllocator>();
- auto File = llvm::make_unique<PDBFile>(Path, std::move(Stream), *Allocator);
+ auto Allocator = std::make_unique<BumpPtrAllocator>();
+ auto File = std::make_unique<PDBFile>(Path, std::move(Stream), *Allocator);
if (auto EC = File->parseFileHeaders())
return EC;
if (auto EC = File->parseStreamData())
return EC;
Session =
- llvm::make_unique<NativeSession>(std::move(File), std::move(Allocator));
+ std::make_unique<NativeSession>(std::move(File), std::move(Allocator));
return Error::success();
}
@@ -202,7 +202,7 @@ NativeSession::getInjectedSources() const {
consumeError(Strings.takeError());
return nullptr;
}
- return make_unique<NativeEnumInjectedSources>(*Pdb, *ISS, *Strings);
+ return std::make_unique<NativeEnumInjectedSources>(*Pdb, *ISS, *Strings);
}
std::unique_ptr<IPDBEnumSectionContribs>
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
index 9f5e86281a23..26ccb7daece0 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
@@ -163,14 +163,14 @@ void NativeTypeEnum::dump(raw_ostream &OS, int Indent,
std::unique_ptr<IPDBEnumSymbols>
NativeTypeEnum::findChildren(PDB_SymType Type) const {
if (Type != PDB_SymType::Data)
- return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+ return std::make_unique<NullEnumerator<PDBSymbol>>();
const NativeTypeEnum *ClassParent = nullptr;
if (!Modifiers)
ClassParent = this;
else
ClassParent = UnmodifiedType;
- return llvm::make_unique<NativeEnumEnumEnumerators>(Session, *ClassParent);
+ return std::make_unique<NativeEnumEnumEnumerators>(Session, *ClassParent);
}
PDB_SymType NativeTypeEnum::getSymTag() const { return PDB_SymType::Enum; }
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp b/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
index 405303469c18..f98a4c3043eb 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
@@ -65,7 +65,7 @@ private:
std::unique_ptr<PDBSymbol> wrap(std::unique_ptr<PDBSymbol> S) const {
if (!S)
return nullptr;
- auto NTFA = llvm::make_unique<NativeTypeFunctionArg>(Session, std::move(S));
+ auto NTFA = std::make_unique<NativeTypeFunctionArg>(Session, std::move(S));
return PDBSymbol::create(Session, std::move(NTFA));
}
NativeSession &Session;
@@ -133,9 +133,9 @@ void NativeTypeFunctionSig::dump(raw_ostream &OS, int Indent,
std::unique_ptr<IPDBEnumSymbols>
NativeTypeFunctionSig::findChildren(PDB_SymType Type) const {
if (Type != PDB_SymType::FunctionArg)
- return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+ return std::make_unique<NullEnumerator<PDBSymbol>>();
- auto NET = llvm::make_unique<NativeEnumTypes>(Session,
+ auto NET = std::make_unique<NativeEnumTypes>(Session,
/* copy */ ArgList.ArgIndices);
return std::unique_ptr<IPDBEnumSymbols>(
new NativeEnumFunctionArgs(Session, std::move(NET)));
diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp
index 983031dfcb78..9ac226b89139 100644
--- a/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -264,7 +264,7 @@ Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
if (!GlobalS)
return GlobalS.takeError();
- auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
+ auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS));
if (auto EC = TempGlobals->reload())
return std::move(EC);
Globals = std::move(TempGlobals);
@@ -277,7 +277,7 @@ Expected<InfoStream &> PDBFile::getPDBInfoStream() {
auto InfoS = safelyCreateIndexedStream(StreamPDB);
if (!InfoS)
return InfoS.takeError();
- auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
+ auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS));
if (auto EC = TempInfo->reload())
return std::move(EC);
Info = std::move(TempInfo);
@@ -290,7 +290,7 @@ Expected<DbiStream &> PDBFile::getPDBDbiStream() {
auto DbiS = safelyCreateIndexedStream(StreamDBI);
if (!DbiS)
return DbiS.takeError();
- auto TempDbi = llvm::make_unique<DbiStream>(std::move(*DbiS));
+ auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS));
if (auto EC = TempDbi->reload(this))
return std::move(EC);
Dbi = std::move(TempDbi);
@@ -303,7 +303,7 @@ Expected<TpiStream &> PDBFile::getPDBTpiStream() {
auto TpiS = safelyCreateIndexedStream(StreamTPI);
if (!TpiS)
return TpiS.takeError();
- auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
+ auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS));
if (auto EC = TempTpi->reload())
return std::move(EC);
Tpi = std::move(TempTpi);
@@ -319,7 +319,7 @@ Expected<TpiStream &> PDBFile::getPDBIpiStream() {
auto IpiS = safelyCreateIndexedStream(StreamIPI);
if (!IpiS)
return IpiS.takeError();
- auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
+ auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS));
if (auto EC = TempIpi->reload())
return std::move(EC);
Ipi = std::move(TempIpi);
@@ -337,7 +337,7 @@ Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
if (!PublicS)
return PublicS.takeError();
- auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS));
+ auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS));
if (auto EC = TempPublics->reload())
return std::move(EC);
Publics = std::move(TempPublics);
@@ -356,7 +356,7 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
if (!SymbolS)
return SymbolS.takeError();
- auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
+ auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS));
if (auto EC = TempSymbols->reload())
return std::move(EC);
Symbols = std::move(TempSymbols);
@@ -370,7 +370,7 @@ Expected<PDBStringTable &> PDBFile::getStringTable() {
if (!NS)
return NS.takeError();
- auto N = llvm::make_unique<PDBStringTable>();
+ auto N = std::make_unique<PDBStringTable>();
BinaryStreamReader Reader(**NS);
if (auto EC = N->reload(Reader))
return std::move(EC);
@@ -391,7 +391,7 @@ Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
if (!Strings)
return Strings.takeError();
- auto IJ = llvm::make_unique<InjectedSourceStream>(std::move(*IJS));
+ auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS));
if (auto EC = IJ->reload(*Strings))
return std::move(EC);
InjectedSources = std::move(IJ);
diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index 8f5a048ea4b5..aa3288724390 100644
--- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -22,7 +22,7 @@
#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
#include "llvm/Support/BinaryStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/JamCRC.h"
+#include "llvm/Support/CRC.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/xxhash.h"
@@ -42,7 +42,7 @@ Error PDBFileBuilder::initialize(uint32_t BlockSize) {
auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize);
if (!ExpectedMsf)
return ExpectedMsf.takeError();
- Msf = llvm::make_unique<MSFBuilder>(std::move(*ExpectedMsf));
+ Msf = std::make_unique<MSFBuilder>(std::move(*ExpectedMsf));
return Error::success();
}
@@ -50,25 +50,25 @@ MSFBuilder &PDBFileBuilder::getMsfBuilder() { return *Msf; }
InfoStreamBuilder &PDBFileBuilder::getInfoBuilder() {
if (!Info)
- Info = llvm::make_unique<InfoStreamBuilder>(*Msf, NamedStreams);
+ Info = std::make_unique<InfoStreamBuilder>(*Msf, NamedStreams);
return *Info;
}
DbiStreamBuilder &PDBFileBuilder::getDbiBuilder() {
if (!Dbi)
- Dbi = llvm::make_unique<DbiStreamBuilder>(*Msf);
+ Dbi = std::make_unique<DbiStreamBuilder>(*Msf);
return *Dbi;
}
TpiStreamBuilder &PDBFileBuilder::getTpiBuilder() {
if (!Tpi)
- Tpi = llvm::make_unique<TpiStreamBuilder>(*Msf, StreamTPI);
+ Tpi = std::make_unique<TpiStreamBuilder>(*Msf, StreamTPI);
return *Tpi;
}
TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() {
if (!Ipi)
- Ipi = llvm::make_unique<TpiStreamBuilder>(*Msf, StreamIPI);
+ Ipi = std::make_unique<TpiStreamBuilder>(*Msf, StreamIPI);
return *Ipi;
}
@@ -78,7 +78,7 @@ PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() {
GSIStreamBuilder &PDBFileBuilder::getGsiBuilder() {
if (!Gsi)
- Gsi = llvm::make_unique<GSIStreamBuilder>(*Msf);
+ Gsi = std::make_unique<GSIStreamBuilder>(*Msf);
return *Gsi;
}
@@ -174,8 +174,7 @@ Error PDBFileBuilder::finalizeMsfLayout() {
if (!InjectedSources.empty()) {
for (const auto &IS : InjectedSources) {
JamCRC CRC(0);
- CRC.update(makeArrayRef(IS.Content->getBufferStart(),
- IS.Content->getBufferSize()));
+ CRC.update(arrayRefFromStringRef(IS.Content->getBuffer()));
SrcHeaderBlockEntry Entry;
::memset(&Entry, 0, sizeof(SrcHeaderBlockEntry));
diff --git a/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
index b21b82bf76fd..b71b2b158144 100644
--- a/lib/DebugInfo/PDB/Native/TpiHashing.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
@@ -10,7 +10,7 @@
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
-#include "llvm/Support/JamCRC.h"
+#include "llvm/Support/CRC.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -124,8 +124,6 @@ Expected<uint32_t> llvm::pdb::hashTypeRecord(const CVType &Rec) {
// Run CRC32 over the bytes. This corresponds to `hashBufv8`.
JamCRC JC(/*Init=*/0U);
- ArrayRef<char> Bytes(reinterpret_cast<const char *>(Rec.data().data()),
- Rec.data().size());
- JC.update(Bytes);
+ JC.update(Rec.data());
return JC.getCRC();
}
diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp
index 8ee7f897b8bb..ac19db03fab2 100644
--- a/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -112,7 +112,7 @@ Error TpiStream::reload() {
HashStream = std::move(*HS);
}
- Types = llvm::make_unique<LazyRandomTypeCollection>(
+ Types = std::make_unique<LazyRandomTypeCollection>(
TypeRecords, getNumTypeRecords(), getTypeIndexOffsets());
return Error::success();
}
diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index 6b308453c2de..4f10f8524a9b 100644
--- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -135,7 +135,7 @@ Error TpiStreamBuilder::finalizeMsfLayout() {
reinterpret_cast<const uint8_t *>(HashBuffer.data()),
calculateHashBufferSize());
HashValueStream =
- llvm::make_unique<BinaryByteStream>(Bytes, llvm::support::little);
+ std::make_unique<BinaryByteStream>(Bytes, llvm::support::little);
}
return Error::success();
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index 7c3ba981fd6b..cb0329bc0ed7 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -79,7 +79,7 @@ private:
std::unique_ptr<IPDBEnumChildren<PDBSymbolData>>
PDBSymbolFunc::getArguments() const {
- return llvm::make_unique<FunctionArgEnumerator>(Session, *this);
+ return std::make_unique<FunctionArgEnumerator>(Session, *this);
}
void PDBSymbolFunc::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index 292320a6fe6d..1373615522eb 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -63,7 +63,7 @@ private:
std::unique_ptr<IPDBEnumSymbols>
PDBSymbolTypeFunctionSig::getArguments() const {
- return llvm::make_unique<FunctionArgEnumerator>(Session, *this);
+ return std::make_unique<FunctionArgEnumerator>(Session, *this);
}
void PDBSymbolTypeFunctionSig::dump(PDBSymDumper &Dumper) const {
diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp
index acb1599480b0..a8e1d0a619ca 100644
--- a/lib/DebugInfo/PDB/UDTLayout.cpp
+++ b/lib/DebugInfo/PDB/UDTLayout.cpp
@@ -71,7 +71,7 @@ DataMemberLayoutItem::DataMemberLayoutItem(
DataMember(std::move(Member)) {
auto Type = DataMember->getType();
if (auto UDT = unique_dyn_cast<PDBSymbolTypeUDT>(Type)) {
- UdtLayout = llvm::make_unique<ClassLayout>(std::move(UDT));
+ UdtLayout = std::make_unique<ClassLayout>(std::move(UDT));
UsedBytes = UdtLayout->usedBytes();
}
}
@@ -84,7 +84,7 @@ VBPtrLayoutItem::VBPtrLayoutItem(const UDTLayoutBase &Parent,
}
const PDBSymbolData &DataMemberLayoutItem::getDataMember() {
- return *dyn_cast<PDBSymbolData>(Symbol);
+ return *cast<PDBSymbolData>(Symbol);
}
bool DataMemberLayoutItem::hasUDTLayout() const { return UdtLayout != nullptr; }
@@ -205,7 +205,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
for (auto &Base : Bases) {
uint32_t Offset = Base->getOffset();
// Non-virtual bases never get elided.
- auto BL = llvm::make_unique<BaseClassLayout>(*this, Offset, false,
+ auto BL = std::make_unique<BaseClassLayout>(*this, Offset, false,
std::move(Base));
AllBases.push_back(BL.get());
@@ -216,7 +216,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
assert(VTables.size() <= 1);
if (!VTables.empty()) {
auto VTLayout =
- llvm::make_unique<VTableLayoutItem>(*this, std::move(VTables[0]));
+ std::make_unique<VTableLayoutItem>(*this, std::move(VTables[0]));
VTable = VTLayout.get();
@@ -224,7 +224,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
}
for (auto &Data : Members) {
- auto DM = llvm::make_unique<DataMemberLayoutItem>(*this, std::move(Data));
+ auto DM = std::make_unique<DataMemberLayoutItem>(*this, std::move(Data));
addChildToLayout(std::move(DM));
}
@@ -236,7 +236,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
int VBPO = VB->getVirtualBasePointerOffset();
if (!hasVBPtrAtOffset(VBPO)) {
if (auto VBP = VB->getRawSymbol().getVirtualBaseTableType()) {
- auto VBPL = llvm::make_unique<VBPtrLayoutItem>(*this, std::move(VBP),
+ auto VBPL = std::make_unique<VBPtrLayoutItem>(*this, std::move(VBP),
VBPO, VBP->getLength());
VBPtr = VBPL.get();
addChildToLayout(std::move(VBPL));
@@ -250,7 +250,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
uint32_t Offset = UsedBytes.find_last() + 1;
bool Elide = (Parent != nullptr);
auto BL =
- llvm::make_unique<BaseClassLayout>(*this, Offset, Elide, std::move(VB));
+ std::make_unique<BaseClassLayout>(*this, Offset, Elide, std::move(VB));
AllBases.push_back(BL.get());
// Only lay this virtual base out directly inside of *this* class if this
diff --git a/lib/DebugInfo/Symbolize/DIPrinter.cpp b/lib/DebugInfo/Symbolize/DIPrinter.cpp
index b2bfef251485..b1a80cbc4580 100644
--- a/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -30,11 +30,6 @@
namespace llvm {
namespace symbolize {
-// By default, DILineInfo contains "<invalid>" for function/filename it
-// cannot fetch. We replace it to "??" to make our output closer to addr2line.
-static const char kDILineInfoBadString[] = "<invalid>";
-static const char kBadString[] = "??";
-
// Prints source code around in the FileName the Line.
void DIPrinter::printContext(const std::string &FileName, int64_t Line) {
if (PrintSourceContext <= 0)
@@ -68,16 +63,16 @@ void DIPrinter::printContext(const std::string &FileName, int64_t Line) {
void DIPrinter::print(const DILineInfo &Info, bool Inlined) {
if (PrintFunctionNames) {
std::string FunctionName = Info.FunctionName;
- if (FunctionName == kDILineInfoBadString)
- FunctionName = kBadString;
+ if (FunctionName == DILineInfo::BadString)
+ FunctionName = DILineInfo::Addr2LineBadString;
StringRef Delimiter = PrintPretty ? " at " : "\n";
StringRef Prefix = (PrintPretty && Inlined) ? " (inlined by) " : "";
OS << Prefix << FunctionName << Delimiter;
}
std::string Filename = Info.FileName;
- if (Filename == kDILineInfoBadString)
- Filename = kBadString;
+ if (Filename == DILineInfo::BadString)
+ Filename = DILineInfo::Addr2LineBadString;
else if (Basenames)
Filename = llvm::sys::path::filename(Filename);
if (!Verbose) {
@@ -115,8 +110,8 @@ DIPrinter &DIPrinter::operator<<(const DIInliningInfo &Info) {
DIPrinter &DIPrinter::operator<<(const DIGlobal &Global) {
std::string Name = Global.Name;
- if (Name == kDILineInfoBadString)
- Name = kBadString;
+ if (Name == DILineInfo::BadString)
+ Name = DILineInfo::Addr2LineBadString;
OS << Name << "\n";
OS << Global.Start << " " << Global.Size << "\n";
return *this;
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 2765bf44d504..b4d49d9ff958 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -43,20 +43,22 @@ getDILineInfoSpecifier(FunctionNameKind FNKind) {
ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
SymbolizableObjectFile::create(const object::ObjectFile *Obj,
- std::unique_ptr<DIContext> DICtx) {
+ std::unique_ptr<DIContext> DICtx,
+ bool UntagAddresses) {
assert(DICtx);
std::unique_ptr<SymbolizableObjectFile> res(
- new SymbolizableObjectFile(Obj, std::move(DICtx)));
+ new SymbolizableObjectFile(Obj, std::move(DICtx), UntagAddresses));
std::unique_ptr<DataExtractor> OpdExtractor;
uint64_t OpdAddress = 0;
// Find the .opd (function descriptor) section if any, for big-endian
// PowerPC64 ELF.
if (Obj->getArch() == Triple::ppc64) {
for (section_iterator Section : Obj->sections()) {
- StringRef Name;
- if (auto EC = Section->getName(Name))
- return EC;
- if (Name == ".opd") {
+ Expected<StringRef> NameOrErr = Section->getName();
+ if (!NameOrErr)
+ return errorToErrorCode(NameOrErr.takeError());
+
+ if (*NameOrErr == ".opd") {
Expected<StringRef> E = Section->getContents();
if (!E)
return errorToErrorCode(E.takeError());
@@ -103,8 +105,10 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj,
}
SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile *Obj,
- std::unique_ptr<DIContext> DICtx)
- : Module(Obj), DebugInfoContext(std::move(DICtx)) {}
+ std::unique_ptr<DIContext> DICtx,
+ bool UntagAddresses)
+ : Module(Obj), DebugInfoContext(std::move(DICtx)),
+ UntagAddresses(UntagAddresses) {}
namespace {
@@ -172,6 +176,12 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
if (!SymbolAddressOrErr)
return errorToErrorCode(SymbolAddressOrErr.takeError());
uint64_t SymbolAddress = *SymbolAddressOrErr;
+ if (UntagAddresses) {
+ // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55
+ // into bits 56-63 instead of masking them out.
+ SymbolAddress &= (1ull << 56) - 1;
+ SymbolAddress = (int64_t(SymbolAddress) << 8) >> 8;
+ }
if (OpdExtractor) {
// For big-endian PowerPC64 ELF, symbols in the .opd section refer to
// function descriptors. The first word of the descriptor is a pointer to
@@ -179,10 +189,8 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
// For the purposes of symbolization, pretend the symbol's address is that
// of the function's code, not the descriptor.
uint64_t OpdOffset = SymbolAddress - OpdAddress;
- uint32_t OpdOffset32 = OpdOffset;
- if (OpdOffset == OpdOffset32 &&
- OpdExtractor->isValidOffsetForAddress(OpdOffset32))
- SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
+ if (OpdExtractor->isValidOffsetForAddress(OpdOffset))
+ SymbolAddress = OpdExtractor->getAddress(&OpdOffset);
}
Expected<StringRef> SymbolNameOrErr = Symbol.getName();
if (!SymbolNameOrErr)
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
index 9cab94178c1b..b5b9793a44d9 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -31,7 +31,8 @@ namespace symbolize {
class SymbolizableObjectFile : public SymbolizableModule {
public:
static ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
- create(const object::ObjectFile *Obj, std::unique_ptr<DIContext> DICtx);
+ create(const object::ObjectFile *Obj, std::unique_ptr<DIContext> DICtx,
+ bool UntagAddresses);
DILineInfo symbolizeCode(object::SectionedAddress ModuleOffset,
FunctionNameKind FNKind,
@@ -70,6 +71,7 @@ private:
const object::ObjectFile *Module;
std::unique_ptr<DIContext> DebugInfoContext;
+ bool UntagAddresses;
struct SymbolDesc {
uint64_t Addr;
@@ -85,7 +87,8 @@ private:
std::vector<std::pair<SymbolDesc, StringRef>> Objects;
SymbolizableObjectFile(const object::ObjectFile *Obj,
- std::unique_ptr<DIContext> DICtx);
+ std::unique_ptr<DIContext> DICtx,
+ bool UntagAddresses);
};
} // end namespace symbolize
diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp
index 6a619f8f2f37..be79d9e637c1 100644
--- a/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -35,19 +35,6 @@
#include <cassert>
#include <cstring>
-#if defined(_MSC_VER)
-#include <Windows.h>
-
-// This must be included after windows.h.
-#include <DbgHelp.h>
-#pragma comment(lib, "dbghelp.lib")
-
-// Windows.h conflicts with our COFF header definitions.
-#ifdef IMAGE_FILE_MACHINE_I386
-#undef IMAGE_FILE_MACHINE_I386
-#endif
-#endif
-
namespace llvm {
namespace symbolize {
@@ -205,7 +192,7 @@ bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
MemoryBuffer::getFileOrSTDIN(Path);
if (!MB)
return false;
- return CRCHash == llvm::crc32(0, MB.get()->getBuffer());
+ return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer()));
}
bool findDebugBinary(const std::string &OrigPath,
@@ -259,7 +246,11 @@ bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
return false;
for (const SectionRef &Section : Obj->sections()) {
StringRef Name;
- Section.getName(Name);
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
Name = Name.substr(Name.find_first_not_of("._"));
if (Name == "gnu_debuglink") {
Expected<StringRef> ContentsOrErr = Section.getContents();
@@ -268,7 +259,7 @@ bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
return false;
}
DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0);
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
if (const char *DebugNameStr = DE.getCStr(&Offset)) {
// 4-byte align the offset.
Offset = (Offset + 3) & ~0x3;
@@ -397,7 +388,7 @@ LLVMSymbolizer::getOrCreateObject(const std::string &Path,
return I->second.get();
Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
- UB->getObjectForArch(ArchName);
+ UB->getMachOObjectForArch(ArchName);
if (!ObjOrErr) {
ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
std::unique_ptr<ObjectFile>());
@@ -418,8 +409,8 @@ Expected<SymbolizableModule *>
LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
std::unique_ptr<DIContext> Context,
StringRef ModuleName) {
- auto InfoOrErr =
- SymbolizableObjectFile::create(Obj, std::move(Context));
+ auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context),
+ Opts.UntagAddresses);
std::unique_ptr<SymbolizableModule> SymMod;
if (InfoOrErr)
SymMod = std::move(*InfoOrErr);
@@ -530,21 +521,20 @@ LLVMSymbolizer::DemangleName(const std::string &Name,
return Result;
}
-#if defined(_MSC_VER)
if (!Name.empty() && Name.front() == '?') {
// Only do MSVC C++ demangling on symbols starting with '?'.
- char DemangledName[1024] = {0};
- DWORD result = ::UnDecorateSymbolName(
- Name.c_str(), DemangledName, 1023,
- UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected
- UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
- UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications
- UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers
- UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords
- UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
- return (result == 0) ? Name : std::string(DemangledName);
+ int status = 0;
+ char *DemangledName = microsoftDemangle(
+ Name.c_str(), nullptr, nullptr, &status,
+ MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention |
+ MSDF_NoMemberType | MSDF_NoReturnType));
+ if (status != 0)
+ return Name;
+ std::string Result = DemangledName;
+ free(DemangledName);
+ return Result;
}
-#endif
+
if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module())
return std::string(demanglePE32ExternCFunc(Name));
return Name;
diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp
index 5c99c70e3cc6..760d28b3ab9d 100644
--- a/lib/Demangle/ItaniumDemangle.cpp
+++ b/lib/Demangle/ItaniumDemangle.cpp
@@ -174,6 +174,16 @@ struct DumpVisitor {
return printStr("SpecialSubKind::iostream");
}
}
+ void print(TemplateParamKind TPK) {
+ switch (TPK) {
+ case TemplateParamKind::Type:
+ return printStr("TemplateParamKind::Type");
+ case TemplateParamKind::NonType:
+ return printStr("TemplateParamKind::NonType");
+ case TemplateParamKind::Template:
+ return printStr("TemplateParamKind::Template");
+ }
+ }
void newLine() {
printStr("\n");
diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp
index bf7d77638f34..c681d6e25b87 100644
--- a/lib/Demangle/MicrosoftDemangle.cpp
+++ b/lib/Demangle/MicrosoftDemangle.cpp
@@ -783,8 +783,26 @@ SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) {
return S;
}
+SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) {
+ assert(MangledName.startsWith('.'));
+ MangledName.consumeFront('.');
+
+ TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
+ if (Error || !MangledName.empty()) {
+ Error = true;
+ return nullptr;
+ }
+ return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'");
+}
+
// Parser entry point.
SymbolNode *Demangler::parse(StringView &MangledName) {
+ // Typeinfo names are strings stored in RTTI data. They're not symbol names.
+ // It's still useful to demangle them. They're the only demangled entity
+ // that doesn't start with a "?" but a ".".
+ if (MangledName.startsWith('.'))
+ return demangleTypeinfoName(MangledName);
+
if (MangledName.startsWith("??@"))
return demangleMD5Name(MangledName);
@@ -2161,7 +2179,7 @@ NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
NodeArrayNode *
Demangler::demangleTemplateParameterList(StringView &MangledName) {
- NodeList *Head;
+ NodeList *Head = nullptr;
NodeList **Current = &Head;
size_t Count = 0;
@@ -2328,12 +2346,22 @@ char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N,
if (Flags & MSDF_DumpBackrefs)
D.dumpBackReferences();
+ OutputFlags OF = OF_Default;
+ if (Flags & MSDF_NoCallingConvention)
+ OF = OutputFlags(OF | OF_NoCallingConvention);
+ if (Flags & MSDF_NoAccessSpecifier)
+ OF = OutputFlags(OF | OF_NoAccessSpecifier);
+ if (Flags & MSDF_NoReturnType)
+ OF = OutputFlags(OF | OF_NoReturnType);
+ if (Flags & MSDF_NoMemberType)
+ OF = OutputFlags(OF | OF_NoMemberType);
+
if (D.Error)
InternalStatus = demangle_invalid_mangled_name;
else if (!initializeOutputStream(Buf, N, S, 1024))
InternalStatus = demangle_memory_alloc_failure;
else {
- AST->output(S, OF_Default);
+ AST->output(S, OF);
S += '\0';
if (N != nullptr)
*N = S.getCurrentPosition();
diff --git a/lib/Demangle/MicrosoftDemangleNodes.cpp b/lib/Demangle/MicrosoftDemangleNodes.cpp
index 63ca475ec1fe..9cee975231a2 100644
--- a/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -120,8 +120,6 @@ std::string Node::toString(OutputFlags Flags) const {
return {OS.getBuffer()};
}
-void TypeNode::outputQuals(bool SpaceBefore, bool SpaceAfter) const {}
-
void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
switch (PrimKind) {
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Void, "void");
@@ -380,24 +378,28 @@ void LiteralOperatorIdentifierNode::output(OutputStream &OS,
void FunctionSignatureNode::outputPre(OutputStream &OS,
OutputFlags Flags) const {
- if (FunctionClass & FC_Public)
- OS << "public: ";
- if (FunctionClass & FC_Protected)
- OS << "protected: ";
- if (FunctionClass & FC_Private)
- OS << "private: ";
-
- if (!(FunctionClass & FC_Global)) {
- if (FunctionClass & FC_Static)
- OS << "static ";
+ if (!(Flags & OF_NoAccessSpecifier)) {
+ if (FunctionClass & FC_Public)
+ OS << "public: ";
+ if (FunctionClass & FC_Protected)
+ OS << "protected: ";
+ if (FunctionClass & FC_Private)
+ OS << "private: ";
}
- if (FunctionClass & FC_Virtual)
- OS << "virtual ";
- if (FunctionClass & FC_ExternC)
- OS << "extern \"C\" ";
+ if (!(Flags & OF_NoMemberType)) {
+ if (!(FunctionClass & FC_Global)) {
+ if (FunctionClass & FC_Static)
+ OS << "static ";
+ }
+ if (FunctionClass & FC_Virtual)
+ OS << "virtual ";
+
+ if (FunctionClass & FC_ExternC)
+ OS << "extern \"C\" ";
+ }
- if (ReturnType) {
+ if (!(Flags & OF_NoReturnType) && ReturnType) {
ReturnType->outputPre(OS, Flags);
OS << " ";
}
@@ -440,7 +442,7 @@ void FunctionSignatureNode::outputPost(OutputStream &OS,
else if (RefQualifier == FunctionRefQualifier::RValueReference)
OS << " &&";
- if (ReturnType)
+ if (!(Flags & OF_NoReturnType) && ReturnType)
ReturnType->outputPost(OS, Flags);
}
@@ -582,19 +584,26 @@ void FunctionSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
}
void VariableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+ const char *AccessSpec = nullptr;
+ bool IsStatic = true;
switch (SC) {
case StorageClass::PrivateStatic:
- OS << "private: static ";
+ AccessSpec = "private";
break;
case StorageClass::PublicStatic:
- OS << "public: static ";
+ AccessSpec = "public";
break;
case StorageClass::ProtectedStatic:
- OS << "protected: static ";
+ AccessSpec = "protected";
break;
default:
+ IsStatic = false;
break;
}
+ if (!(Flags & OF_NoAccessSpecifier) && AccessSpec)
+ OS << AccessSpec << ": ";
+ if (!(Flags & OF_NoMemberType) && IsStatic)
+ OS << "static ";
if (Type) {
Type->outputPre(OS, Flags);
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 1c6c0406d048..ee7a7cb60bc9 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -32,12 +32,12 @@
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <cmath>
#include <cstring>
+#include <mutex>
using namespace llvm;
#define DEBUG_TYPE "jit"
@@ -191,7 +191,7 @@ uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) {
std::string ExecutionEngine::getMangledName(const GlobalValue *GV) {
assert(GV->hasName() && "Global must have name.");
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
SmallString<128> FullName;
const DataLayout &DL =
@@ -204,12 +204,12 @@ std::string ExecutionEngine::getMangledName(const GlobalValue *GV) {
}
void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
addGlobalMapping(getMangledName(GV), (uint64_t) Addr);
}
void ExecutionEngine::addGlobalMapping(StringRef Name, uint64_t Addr) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
assert(!Name.empty() && "Empty GlobalMapping symbol name!");
@@ -228,14 +228,14 @@ void ExecutionEngine::addGlobalMapping(StringRef Name, uint64_t Addr) {
}
void ExecutionEngine::clearAllGlobalMappings() {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
EEState.getGlobalAddressMap().clear();
EEState.getGlobalAddressReverseMap().clear();
}
void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
for (GlobalObject &GO : M->global_objects())
EEState.RemoveMapping(getMangledName(&GO));
@@ -243,12 +243,12 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
uint64_t ExecutionEngine::updateGlobalMapping(const GlobalValue *GV,
void *Addr) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
return updateGlobalMapping(getMangledName(GV), (uint64_t) Addr);
}
uint64_t ExecutionEngine::updateGlobalMapping(StringRef Name, uint64_t Addr) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
ExecutionEngineState::GlobalAddressMapTy &Map =
EEState.getGlobalAddressMap();
@@ -275,7 +275,7 @@ uint64_t ExecutionEngine::updateGlobalMapping(StringRef Name, uint64_t Addr) {
}
uint64_t ExecutionEngine::getAddressToGlobalIfAvailable(StringRef S) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
uint64_t Address = 0;
ExecutionEngineState::GlobalAddressMapTy::iterator I =
EEState.getGlobalAddressMap().find(S);
@@ -286,19 +286,19 @@ uint64_t ExecutionEngine::getAddressToGlobalIfAvailable(StringRef S) {
void *ExecutionEngine::getPointerToGlobalIfAvailable(StringRef S) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
if (void* Address = (void *) getAddressToGlobalIfAvailable(S))
return Address;
return nullptr;
}
void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
return getPointerToGlobalIfAvailable(getMangledName(GV));
}
const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
// If we haven't computed the reverse mapping yet, do so first.
if (EEState.getGlobalAddressReverseMap().empty()) {
@@ -340,14 +340,14 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
Values.clear(); // Free the old contents.
Values.reserve(InputArgv.size());
unsigned PtrSize = EE->getDataLayout().getPointerSize();
- Array = make_unique<char[]>((InputArgv.size()+1)*PtrSize);
+ Array = std::make_unique<char[]>((InputArgv.size()+1)*PtrSize);
LLVM_DEBUG(dbgs() << "JIT: ARGV = " << (void *)Array.get() << "\n");
Type *SBytePtr = Type::getInt8PtrTy(C);
for (unsigned i = 0; i != InputArgv.size(); ++i) {
unsigned Size = InputArgv[i].size()+1;
- auto Dest = make_unique<char[]>(Size);
+ auto Dest = std::make_unique<char[]>(Size);
LLVM_DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void *)Dest.get()
<< "\n");
@@ -575,7 +575,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
if (Function *F = const_cast<Function*>(dyn_cast<Function>(GV)))
return getPointerToFunction(F);
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
if (void* P = getPointerToGlobalIfAvailable(GV))
return P;
@@ -626,7 +626,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
break;
case Type::VectorTyID:
// if the whole vector is 'undef' just reserve memory for the value.
- auto* VTy = dyn_cast<VectorType>(C->getType());
+ auto* VTy = cast<VectorType>(C->getType());
Type *ElemTy = VTy->getElementType();
unsigned int elemNum = VTy->getNumElements();
Result.AggregateVal.resize(elemNum);
@@ -925,7 +925,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
elemNum = CDV->getNumElements();
ElemTy = CDV->getElementType();
} else if (CV || CAZ) {
- VectorType* VTy = dyn_cast<VectorType>(C->getType());
+ auto* VTy = cast<VectorType>(C->getType());
elemNum = VTy->getNumElements();
ElemTy = VTy->getElementType();
} else {
diff --git a/lib/ExecutionEngine/GDBRegistrationListener.cpp b/lib/ExecutionEngine/GDBRegistrationListener.cpp
index 08d20156a590..7ed025fbb481 100644
--- a/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -14,7 +14,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
-#include "llvm/Support/MutexGuard.h"
+#include <mutex>
using namespace llvm;
using namespace llvm::object;
@@ -135,7 +135,7 @@ void NotifyDebugger(jit_code_entry* JITCodeEntry) {
GDBJITRegistrationListener::~GDBJITRegistrationListener() {
// Free all registered object files.
- llvm::MutexGuard locked(*JITDebugLock);
+ std::lock_guard<llvm::sys::Mutex> locked(*JITDebugLock);
for (RegisteredObjectBufferMap::iterator I = ObjectBufferMap.begin(),
E = ObjectBufferMap.end();
I != E; ++I) {
@@ -159,7 +159,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded(
const char *Buffer = DebugObj.getBinary()->getMemoryBufferRef().getBufferStart();
size_t Size = DebugObj.getBinary()->getMemoryBufferRef().getBufferSize();
- llvm::MutexGuard locked(*JITDebugLock);
+ std::lock_guard<llvm::sys::Mutex> locked(*JITDebugLock);
assert(ObjectBufferMap.find(K) == ObjectBufferMap.end() &&
"Second attempt to perform debug registration.");
jit_code_entry* JITCodeEntry = new jit_code_entry();
@@ -178,7 +178,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded(
}
void GDBJITRegistrationListener::notifyFreeingObject(ObjectKey K) {
- llvm::MutexGuard locked(*JITDebugLock);
+ std::lock_guard<llvm::sys::Mutex> locked(*JITDebugLock);
RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(K);
if (I != ObjectBufferMap.end()) {
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index c3a2ccc582c9..71b7f893d712 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -32,7 +32,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
-#include "llvm/Support/UniqueLock.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cmath>
@@ -41,6 +40,7 @@
#include <cstdio>
#include <cstring>
#include <map>
+#include <mutex>
#include <string>
#include <utility>
#include <vector>
@@ -258,7 +258,7 @@ GenericValue Interpreter::callExternalFunction(Function *F,
ArrayRef<GenericValue> ArgVals) {
TheInterpreter = this;
- unique_lock<sys::Mutex> Guard(*FunctionsLock);
+ std::unique_lock<sys::Mutex> Guard(*FunctionsLock);
// Do a lookup to see if the function is in our cache... this should just be a
// deferred annotation!
diff --git a/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h b/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
index 1271ad962b38..b47a798c7603 100644
--- a/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
+++ b/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
@@ -20,24 +20,23 @@ namespace jitlink {
template <typename BuilderImpl> class BasicGOTAndStubsBuilder {
public:
- BasicGOTAndStubsBuilder(AtomGraph &G) : G(G) {}
+ BasicGOTAndStubsBuilder(LinkGraph &G) : G(G) {}
void run() {
- // We're going to be adding new atoms, but we don't want to iterate over
- // the newly added ones, so just copy the existing atoms out.
- std::vector<DefinedAtom *> DAs(G.defined_atoms().begin(),
- G.defined_atoms().end());
+ // We're going to be adding new blocks, but we don't want to iterate over
+ // the newly added ones, so just copy the existing blocks out.
+ std::vector<Block *> Blocks(G.blocks().begin(), G.blocks().end());
- for (auto *DA : DAs)
- for (auto &E : DA->edges())
+ for (auto *B : Blocks)
+ for (auto &E : B->edges())
if (impl().isGOTEdge(E))
- impl().fixGOTEdge(E, getGOTEntryAtom(E.getTarget()));
+ impl().fixGOTEdge(E, getGOTEntrySymbol(E.getTarget()));
else if (impl().isExternalBranchEdge(E))
- impl().fixExternalBranchEdge(E, getStubAtom(E.getTarget()));
+ impl().fixExternalBranchEdge(E, getStubSymbol(E.getTarget()));
}
protected:
- Atom &getGOTEntryAtom(Atom &Target) {
+ Symbol &getGOTEntrySymbol(Symbol &Target) {
assert(Target.hasName() && "GOT edge cannot point to anonymous target");
auto GOTEntryI = GOTEntries.find(Target.getName());
@@ -49,31 +48,31 @@ protected:
GOTEntries.insert(std::make_pair(Target.getName(), &GOTEntry)).first;
}
- assert(GOTEntryI != GOTEntries.end() && "Could not get GOT entry atom");
+ assert(GOTEntryI != GOTEntries.end() && "Could not get GOT entry symbol");
return *GOTEntryI->second;
}
- Atom &getStubAtom(Atom &Target) {
+ Symbol &getStubSymbol(Symbol &Target) {
assert(Target.hasName() &&
"External branch edge can not point to an anonymous target");
auto StubI = Stubs.find(Target.getName());
if (StubI == Stubs.end()) {
- auto &StubAtom = impl().createStub(Target);
- StubI = Stubs.insert(std::make_pair(Target.getName(), &StubAtom)).first;
+ auto &StubSymbol = impl().createStub(Target);
+ StubI = Stubs.insert(std::make_pair(Target.getName(), &StubSymbol)).first;
}
- assert(StubI != Stubs.end() && "Count not get stub atom");
+ assert(StubI != Stubs.end() && "Count not get stub symbol");
return *StubI->second;
}
- AtomGraph &G;
+ LinkGraph &G;
private:
BuilderImpl &impl() { return static_cast<BuilderImpl &>(*this); }
- DenseMap<StringRef, DefinedAtom *> GOTEntries;
- DenseMap<StringRef, DefinedAtom *> Stubs;
+ DenseMap<StringRef, Symbol *> GOTEntries;
+ DenseMap<StringRef, Symbol *> Stubs;
};
} // end namespace jitlink
diff --git a/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
index 25f0e9040ffe..f80b0e7f8909 100644
--- a/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
+++ b/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -17,18 +17,14 @@
namespace llvm {
namespace jitlink {
-EHFrameParser::EHFrameParser(AtomGraph &G, Section &EHFrameSection,
- StringRef EHFrameContent,
- JITTargetAddress EHFrameAddress,
- Edge::Kind FDEToCIERelocKind,
- Edge::Kind FDEToTargetRelocKind)
- : G(G), EHFrameSection(EHFrameSection), EHFrameContent(EHFrameContent),
- EHFrameAddress(EHFrameAddress),
- EHFrameReader(EHFrameContent, G.getEndianness()),
- FDEToCIERelocKind(FDEToCIERelocKind),
- FDEToTargetRelocKind(FDEToTargetRelocKind) {}
-
-Error EHFrameParser::atomize() {
+EHFrameBinaryParser::EHFrameBinaryParser(JITTargetAddress EHFrameAddress,
+ StringRef EHFrameContent,
+ unsigned PointerSize,
+ support::endianness Endianness)
+ : EHFrameAddress(EHFrameAddress), EHFrameContent(EHFrameContent),
+ PointerSize(PointerSize), EHFrameReader(EHFrameContent, Endianness) {}
+
+Error EHFrameBinaryParser::addToGraph() {
while (!EHFrameReader.empty()) {
size_t RecordOffset = EHFrameReader.getOffset();
@@ -38,44 +34,39 @@ Error EHFrameParser::atomize() {
<< " (offset " << RecordOffset << ")\n";
});
- size_t CIELength = 0;
- uint32_t CIELengthField;
- if (auto Err = EHFrameReader.readInteger(CIELengthField))
+ size_t RecordLength = 0;
+ uint32_t RecordLengthField;
+ if (auto Err = EHFrameReader.readInteger(RecordLengthField))
return Err;
- // Process CIE length/extended-length fields to build the atom.
+ // Process CIE/FDE length/extended-length fields to build the blocks.
//
// The value of these fields describe the length of the *rest* of the CIE
// (not including data up to the end of the field itself) so we have to
- // bump CIELength to include the data up to the end of the field: 4 bytes
+ // bump RecordLength to include the data up to the end of the field: 4 bytes
// for Length, or 12 bytes (4 bytes + 8 bytes) for ExtendedLength.
- if (CIELengthField == 0) // Length 0 means end of __eh_frame section.
+ if (RecordLengthField == 0) // Length 0 means end of __eh_frame section.
break;
// If the regular length field's value is 0xffffffff, use extended length.
- if (CIELengthField == 0xffffffff) {
- uint64_t CIEExtendedLengthField;
- if (auto Err = EHFrameReader.readInteger(CIEExtendedLengthField))
+ if (RecordLengthField == 0xffffffff) {
+ uint64_t ExtendedLengthField;
+ if (auto Err = EHFrameReader.readInteger(ExtendedLengthField))
return Err;
- if (CIEExtendedLengthField > EHFrameReader.bytesRemaining())
+ if (ExtendedLengthField > EHFrameReader.bytesRemaining())
return make_error<JITLinkError>("CIE record extends past the end of "
"the __eh_frame section");
- if (CIEExtendedLengthField + 12 > std::numeric_limits<size_t>::max())
+ if (ExtendedLengthField + 12 > std::numeric_limits<size_t>::max())
return make_error<JITLinkError>("CIE record too large to process");
- CIELength = CIEExtendedLengthField + 12;
+ RecordLength = ExtendedLengthField + 12;
} else {
- if (CIELengthField > EHFrameReader.bytesRemaining())
+ if (RecordLengthField > EHFrameReader.bytesRemaining())
return make_error<JITLinkError>("CIE record extends past the end of "
"the __eh_frame section");
- CIELength = CIELengthField + 4;
+ RecordLength = RecordLengthField + 4;
}
- LLVM_DEBUG(dbgs() << " length: " << CIELength << "\n");
-
- // Add an atom for this record.
- CurRecordAtom = &G.addAnonymousAtom(
- EHFrameSection, EHFrameAddress + RecordOffset, G.getPointerSize());
- CurRecordAtom->setContent(EHFrameContent.substr(RecordOffset, CIELength));
+ LLVM_DEBUG(dbgs() << " length: " << RecordLength << "\n");
// Read the CIE Pointer.
size_t CIEPointerAddress = EHFrameAddress + EHFrameReader.getOffset();
@@ -85,21 +76,24 @@ Error EHFrameParser::atomize() {
// Based on the CIE pointer value, parse this as a CIE or FDE record.
if (CIEPointer == 0) {
- if (auto Err = processCIE())
+ if (auto Err = processCIE(RecordOffset, RecordLength))
return Err;
} else {
- if (auto Err = processFDE(CIEPointerAddress, CIEPointer))
+ if (auto Err = processFDE(RecordOffset, RecordLength, CIEPointerAddress,
+ CIEPointer))
return Err;
}
- EHFrameReader.setOffset(RecordOffset + CIELength);
+ EHFrameReader.setOffset(RecordOffset + RecordLength);
}
return Error::success();
}
-Expected<EHFrameParser::AugmentationInfo>
-EHFrameParser::parseAugmentationString() {
+void EHFrameBinaryParser::anchor() {}
+
+Expected<EHFrameBinaryParser::AugmentationInfo>
+EHFrameBinaryParser::parseAugmentationString() {
AugmentationInfo AugInfo;
uint8_t NextChar;
uint8_t *NextField = &AugInfo.Fields[0];
@@ -139,14 +133,14 @@ EHFrameParser::parseAugmentationString() {
return std::move(AugInfo);
}
-Expected<JITTargetAddress> EHFrameParser::readAbsolutePointer() {
+Expected<JITTargetAddress> EHFrameBinaryParser::readAbsolutePointer() {
static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t),
"Result must be able to hold a uint64_t");
JITTargetAddress Addr;
- if (G.getPointerSize() == 8) {
+ if (PointerSize == 8) {
if (auto Err = EHFrameReader.readInteger(Addr))
return std::move(Err);
- } else if (G.getPointerSize() == 4) {
+ } else if (PointerSize == 4) {
uint32_t Addr32;
if (auto Err = EHFrameReader.readInteger(Addr32))
return std::move(Err);
@@ -156,14 +150,19 @@ Expected<JITTargetAddress> EHFrameParser::readAbsolutePointer() {
return Addr;
}
-Error EHFrameParser::processCIE() {
+Error EHFrameBinaryParser::processCIE(size_t RecordOffset,
+ size_t RecordLength) {
// Use the dwarf namespace for convenient access to pointer encoding
// constants.
using namespace dwarf;
LLVM_DEBUG(dbgs() << " Record is CIE\n");
- CIEInformation CIEInfo(*CurRecordAtom);
+ auto &CIESymbol =
+ createCIERecord(EHFrameAddress + RecordOffset,
+ EHFrameContent.substr(RecordOffset, RecordLength));
+
+ CIEInformation CIEInfo(CIESymbol);
uint8_t Version = 0;
if (auto Err = EHFrameReader.readInteger(Version))
@@ -179,7 +178,7 @@ Error EHFrameParser::processCIE() {
// Skip the EH Data field if present.
if (AugInfo->EHDataFieldPresent)
- if (auto Err = EHFrameReader.skip(G.getPointerSize()))
+ if (auto Err = EHFrameReader.skip(PointerSize))
return Err;
// Read and sanity check the code alignment factor.
@@ -226,7 +225,7 @@ Error EHFrameParser::processCIE() {
return make_error<JITLinkError>(
"Unsupported LSDA pointer encoding " +
formatv("{0:x2}", LSDAPointerEncoding) + " in CIE at " +
- formatv("{0:x16}", CurRecordAtom->getAddress()));
+ formatv("{0:x16}", CIESymbol.getAddress()));
break;
}
case 'P': {
@@ -239,7 +238,7 @@ Error EHFrameParser::processCIE() {
"Unspported personality pointer "
"encoding " +
formatv("{0:x2}", PersonalityPointerEncoding) + " in CIE at " +
- formatv("{0:x16}", CurRecordAtom->getAddress()));
+ formatv("{0:x16}", CIESymbol.getAddress()));
uint32_t PersonalityPointerAddress;
if (auto Err = EHFrameReader.readInteger(PersonalityPointerAddress))
return Err;
@@ -254,7 +253,7 @@ Error EHFrameParser::processCIE() {
"Unsupported FDE address pointer "
"encoding " +
formatv("{0:x2}", FDEPointerEncoding) + " in CIE at " +
- formatv("{0:x16}", CurRecordAtom->getAddress()));
+ formatv("{0:x16}", CIESymbol.getAddress()));
break;
}
default:
@@ -267,15 +266,16 @@ Error EHFrameParser::processCIE() {
return make_error<JITLinkError>("Read past the end of the augmentation "
"data while parsing fields");
- assert(!CIEInfos.count(CurRecordAtom->getAddress()) &&
+ assert(!CIEInfos.count(CIESymbol.getAddress()) &&
"Multiple CIEs recorded at the same address?");
- CIEInfos[CurRecordAtom->getAddress()] = std::move(CIEInfo);
+ CIEInfos[CIESymbol.getAddress()] = std::move(CIEInfo);
return Error::success();
}
-Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress,
- uint32_t CIEPointer) {
+Error EHFrameBinaryParser::processFDE(size_t RecordOffset, size_t RecordLength,
+ JITTargetAddress CIEPointerAddress,
+ uint32_t CIEPointer) {
LLVM_DEBUG(dbgs() << " Record is FDE\n");
LLVM_DEBUG({
@@ -286,16 +286,11 @@ Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress,
auto CIEInfoItr = CIEInfos.find(CIEPointerAddress - CIEPointer);
if (CIEInfoItr == CIEInfos.end())
return make_error<JITLinkError>(
- "FDE at " + formatv("{0:x16}", CurRecordAtom->getAddress()) +
+ "FDE at " + formatv("{0:x16}", EHFrameAddress + RecordOffset) +
" points to non-existant CIE at " +
formatv("{0:x16}", CIEPointerAddress - CIEPointer));
auto &CIEInfo = CIEInfoItr->second;
- // The CIEPointer looks good. Add a relocation.
- CurRecordAtom->addEdge(FDEToCIERelocKind,
- CIEPointerAddress - CurRecordAtom->getAddress(),
- *CIEInfo.CIEAtom, 0);
-
// Read and sanity check the PC-start pointer and size.
JITTargetAddress PCBeginAddress = EHFrameAddress + EHFrameReader.getOffset();
@@ -305,83 +300,68 @@ Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress,
JITTargetAddress PCBegin = PCBeginAddress + *PCBeginDelta;
LLVM_DEBUG({
- dbgs() << " PC begin: " << format("0x%016" PRIx64, PCBegin) << "\n";
+ dbgs() << " PC begin: " << format("0x%016" PRIx64, PCBegin) << "\n";
});
- auto *TargetAtom = G.getAtomByAddress(PCBegin);
+ auto *TargetSymbol = getSymbolAtAddress(PCBegin);
- if (!TargetAtom)
+ if (!TargetSymbol)
return make_error<JITLinkError>("FDE PC-begin " +
formatv("{0:x16}", PCBegin) +
- " does not point at atom");
+ " does not point at symbol");
- if (TargetAtom->getAddress() != PCBegin)
+ if (TargetSymbol->getAddress() != PCBegin)
return make_error<JITLinkError>(
"FDE PC-begin " + formatv("{0:x16}", PCBegin) +
- " does not point to start of atom at " +
- formatv("{0:x16}", TargetAtom->getAddress()));
-
- LLVM_DEBUG(dbgs() << " FDE target: " << *TargetAtom << "\n");
+ " does not point to start of symbol at " +
+ formatv("{0:x16}", TargetSymbol->getAddress()));
- // The PC-start pointer and size look good. Add relocations.
- CurRecordAtom->addEdge(FDEToTargetRelocKind,
- PCBeginAddress - CurRecordAtom->getAddress(),
- *TargetAtom, 0);
-
- // Add a keep-alive relocation from the function to the FDE to ensure it is
- // not dead stripped.
- TargetAtom->addEdge(Edge::KeepAlive, 0, *CurRecordAtom, 0);
+ LLVM_DEBUG(dbgs() << " FDE target: " << *TargetSymbol << "\n");
// Skip over the PC range size field.
- if (auto Err = EHFrameReader.skip(G.getPointerSize()))
+ if (auto Err = EHFrameReader.skip(PointerSize))
return Err;
+ Symbol *LSDASymbol = nullptr;
+ JITTargetAddress LSDAAddress = 0;
if (CIEInfo.FDEsHaveLSDAField) {
uint64_t AugmentationDataSize;
if (auto Err = EHFrameReader.readULEB128(AugmentationDataSize))
return Err;
- if (AugmentationDataSize != G.getPointerSize())
+ if (AugmentationDataSize != PointerSize)
return make_error<JITLinkError>(
"Unexpected FDE augmentation data size (expected " +
- Twine(G.getPointerSize()) + ", got " + Twine(AugmentationDataSize) +
- ") for FDE at " + formatv("{0:x16}", CurRecordAtom->getAddress()));
- JITTargetAddress LSDAAddress = EHFrameAddress + EHFrameReader.getOffset();
+ Twine(PointerSize) + ", got " + Twine(AugmentationDataSize) +
+ ") for FDE at " + formatv("{0:x16}", EHFrameAddress + RecordOffset));
+ LSDAAddress = EHFrameAddress + EHFrameReader.getOffset();
auto LSDADelta = readAbsolutePointer();
if (!LSDADelta)
return LSDADelta.takeError();
JITTargetAddress LSDA = LSDAAddress + *LSDADelta;
- auto *LSDAAtom = G.getAtomByAddress(LSDA);
+ LSDASymbol = getSymbolAtAddress(LSDA);
- if (!LSDAAtom)
+ if (!LSDASymbol)
return make_error<JITLinkError>("FDE LSDA " + formatv("{0:x16}", LSDA) +
- " does not point at atom");
+ " does not point at symbol");
- if (LSDAAtom->getAddress() != LSDA)
+ if (LSDASymbol->getAddress() != LSDA)
return make_error<JITLinkError>(
"FDE LSDA " + formatv("{0:x16}", LSDA) +
- " does not point to start of atom at " +
- formatv("{0:x16}", LSDAAtom->getAddress()));
-
- LLVM_DEBUG(dbgs() << " FDE LSDA: " << *LSDAAtom << "\n");
+ " does not point to start of symbol at " +
+ formatv("{0:x16}", LSDASymbol->getAddress()));
- // LSDA looks good. Add relocations.
- CurRecordAtom->addEdge(FDEToTargetRelocKind,
- LSDAAddress - CurRecordAtom->getAddress(), *LSDAAtom,
- 0);
+ LLVM_DEBUG(dbgs() << " FDE LSDA: " << *LSDASymbol << "\n");
}
- return Error::success();
-}
+ JITTargetAddress RecordAddress = EHFrameAddress + RecordOffset;
+ auto FDESymbol = createFDERecord(
+ RecordAddress, EHFrameContent.substr(RecordOffset, RecordLength),
+ *CIEInfo.CIESymbol, CIEPointerAddress - RecordAddress, *TargetSymbol,
+ PCBeginAddress - RecordAddress, LSDASymbol, LSDAAddress - RecordAddress);
-Error addEHFrame(AtomGraph &G, Section &EHFrameSection,
- StringRef EHFrameContent, JITTargetAddress EHFrameAddress,
- Edge::Kind FDEToCIERelocKind,
- Edge::Kind FDEToTargetRelocKind) {
- return EHFrameParser(G, EHFrameSection, EHFrameContent, EHFrameAddress,
- FDEToCIERelocKind, FDEToTargetRelocKind)
- .atomize();
+ return FDESymbol.takeError();
}
// Determine whether we can register EH tables.
@@ -451,11 +431,13 @@ static Error deregisterFrameWrapper(const void *P) {
template <typename HandleFDEFn>
Error walkAppleEHFrameSection(const char *const SectionStart,
+ size_t SectionSize,
HandleFDEFn HandleFDE) {
const char *CurCFIRecord = SectionStart;
+ const char *End = SectionStart + SectionSize;
uint64_t Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
- while (Size != 0) {
+ while (CurCFIRecord != End && Size != 0) {
const char *OffsetField = CurCFIRecord + (Size == 0xffffffff ? 12 : 4);
if (Size == 0xffffffff)
Size = *reinterpret_cast<const uint64_t *>(CurCFIRecord + 4) + 12;
@@ -484,10 +466,12 @@ Error walkAppleEHFrameSection(const char *const SectionStart,
#endif // __APPLE__
-Error registerEHFrameSection(const void *EHFrameSectionAddr) {
+Error registerEHFrameSection(const void *EHFrameSectionAddr,
+ size_t EHFrameSectionSize) {
#ifdef __APPLE__
// On Darwin __register_frame has to be called for each FDE entry.
return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
+ EHFrameSectionSize,
registerFrameWrapper);
#else
// On Linux __register_frame takes a single argument:
@@ -499,9 +483,11 @@ Error registerEHFrameSection(const void *EHFrameSectionAddr) {
#endif
}
-Error deregisterEHFrameSection(const void *EHFrameSectionAddr) {
+Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
+ size_t EHFrameSectionSize) {
#ifdef __APPLE__
return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
+ EHFrameSectionSize,
deregisterFrameWrapper);
#else
return deregisterFrameWrapper(EHFrameSectionAddr);
@@ -517,23 +503,31 @@ InProcessEHFrameRegistrar &InProcessEHFrameRegistrar::getInstance() {
InProcessEHFrameRegistrar::InProcessEHFrameRegistrar() {}
-AtomGraphPassFunction
+LinkGraphPassFunction
createEHFrameRecorderPass(const Triple &TT,
- StoreFrameAddressFunction StoreFrameAddress) {
+ StoreFrameRangeFunction StoreRangeAddress) {
const char *EHFrameSectionName = nullptr;
if (TT.getObjectFormat() == Triple::MachO)
EHFrameSectionName = "__eh_frame";
else
EHFrameSectionName = ".eh_frame";
- auto RecordEHFrame = [EHFrameSectionName,
- StoreFrameAddress](AtomGraph &G) -> Error {
- // Search for a non-empty eh-frame and record the address of the first atom
- // in it.
+ auto RecordEHFrame =
+ [EHFrameSectionName,
+ StoreFrameRange = std::move(StoreRangeAddress)](LinkGraph &G) -> Error {
+ // Search for a non-empty eh-frame and record the address of the first
+ // symbol in it.
JITTargetAddress Addr = 0;
- if (auto *S = G.findSectionByName(EHFrameSectionName))
- Addr = S->getRange().getStart();
- StoreFrameAddress(Addr);
+ size_t Size = 0;
+ if (auto *S = G.findSectionByName(EHFrameSectionName)) {
+ auto R = SectionRange(*S);
+ Addr = R.getStart();
+ Size = R.getSize();
+ }
+ if (Addr == 0 && Size != 0)
+ return make_error<JITLinkError>("__eh_frame section can not have zero "
+ "address with non-zero size");
+ StoreFrameRange(Addr, Size);
return Error::success();
};
diff --git a/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
index d679edef7ea6..6f9f68ad8382 100644
--- a/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
+++ b/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
@@ -21,18 +21,31 @@
namespace llvm {
namespace jitlink {
-/// A generic parser for eh-frame sections.
+/// A generic binary parser for eh-frame sections.
///
-/// Adds atoms representing CIE and FDE entries, using the given FDE-to-CIE and
-/// FDEToTarget relocation kinds.
-class EHFrameParser {
+/// Adds blocks and symbols representing CIE and FDE entries to a JITLink graph.
+///
+/// This parser assumes that the user has already verified that the EH-frame's
+/// address range does not overlap any other section/symbol, so that generated
+/// CIE/FDE records do not overlap other sections/symbols.
+class EHFrameBinaryParser {
public:
- EHFrameParser(AtomGraph &G, Section &EHFrameSection, StringRef EHFrameContent,
- JITTargetAddress EHFrameAddress, Edge::Kind FDEToCIERelocKind,
- Edge::Kind FDEToTargetRelocKind);
- Error atomize();
+ EHFrameBinaryParser(JITTargetAddress EHFrameAddress, StringRef EHFrameContent,
+ unsigned PointerSize, support::endianness Endianness);
+ virtual ~EHFrameBinaryParser() {}
+
+ Error addToGraph();
private:
+ virtual void anchor();
+ virtual Symbol *getSymbolAtAddress(JITTargetAddress Addr) = 0;
+ virtual Symbol &createCIERecord(JITTargetAddress RecordAddr,
+ StringRef RecordContent) = 0;
+ virtual Expected<Symbol &>
+ createFDERecord(JITTargetAddress RecordAddr, StringRef RecordContent,
+ Symbol &CIE, size_t CIEOffset, Symbol &Func,
+ size_t FuncOffset, Symbol *LSDA, size_t LSDAOffset) = 0;
+
struct AugmentationInfo {
bool AugmentationDataPresent = false;
bool EHDataFieldPresent = false;
@@ -41,31 +54,24 @@ private:
Expected<AugmentationInfo> parseAugmentationString();
Expected<JITTargetAddress> readAbsolutePointer();
- Error processCIE();
- Error processFDE(JITTargetAddress CIEPointerAddress, uint32_t CIEPointer);
+ Error processCIE(size_t RecordOffset, size_t RecordLength);
+ Error processFDE(size_t RecordOffset, size_t RecordLength,
+ JITTargetAddress CIEPointerOffset, uint32_t CIEPointer);
struct CIEInformation {
CIEInformation() = default;
- CIEInformation(DefinedAtom &CIEAtom) : CIEAtom(&CIEAtom) {}
- DefinedAtom *CIEAtom = nullptr;
+ CIEInformation(Symbol &CIESymbol) : CIESymbol(&CIESymbol) {}
+ Symbol *CIESymbol = nullptr;
bool FDEsHaveLSDAField = false;
};
- AtomGraph &G;
- Section &EHFrameSection;
- StringRef EHFrameContent;
JITTargetAddress EHFrameAddress;
+ StringRef EHFrameContent;
+ unsigned PointerSize;
BinaryStreamReader EHFrameReader;
- DefinedAtom *CurRecordAtom = nullptr;
DenseMap<JITTargetAddress, CIEInformation> CIEInfos;
- Edge::Kind FDEToCIERelocKind;
- Edge::Kind FDEToTargetRelocKind;
};
-Error addEHFrame(AtomGraph &G, Section &EHFrameSection,
- StringRef EHFrameContent, JITTargetAddress EHFrameAddress,
- Edge::Kind FDEToCIERelocKind, Edge::Kind FDEToTargetRelocKind);
-
} // end namespace jitlink
} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/JITLink.cpp b/lib/ExecutionEngine/JITLink/JITLink.cpp
index 9d0a7459dc09..1e19038951ac 100644
--- a/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -56,95 +56,151 @@ std::error_code JITLinkError::convertToErrorCode() const {
return std::error_code(GenericJITLinkError, *JITLinkerErrorCategory);
}
-const StringRef getGenericEdgeKindName(Edge::Kind K) {
+const char *getGenericEdgeKindName(Edge::Kind K) {
switch (K) {
case Edge::Invalid:
return "INVALID RELOCATION";
case Edge::KeepAlive:
return "Keep-Alive";
- case Edge::LayoutNext:
- return "Layout-Next";
default:
llvm_unreachable("Unrecognized relocation kind");
}
}
-raw_ostream &operator<<(raw_ostream &OS, const Atom &A) {
+const char *getLinkageName(Linkage L) {
+ switch (L) {
+ case Linkage::Strong:
+ return "strong";
+ case Linkage::Weak:
+ return "weak";
+ }
+ llvm_unreachable("Unrecognized llvm.jitlink.Linkage enum");
+}
+
+const char *getScopeName(Scope S) {
+ switch (S) {
+ case Scope::Default:
+ return "default";
+ case Scope::Hidden:
+ return "hidden";
+ case Scope::Local:
+ return "local";
+ }
+ llvm_unreachable("Unrecognized llvm.jitlink.Scope enum");
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Block &B) {
+ return OS << formatv("{0:x16}", B.getAddress()) << " -- "
+ << formatv("{0:x16}", B.getAddress() + B.getSize()) << ": "
+ << (B.isZeroFill() ? "zero-fill" : "content")
+ << ", align = " << B.getAlignment()
+ << ", align-ofs = " << B.getAlignmentOffset()
+ << ", section = " << B.getSection().getName();
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) {
OS << "<";
- if (A.getName().empty())
- OS << "anon@" << format("0x%016" PRIx64, A.getAddress());
+ if (Sym.getName().empty())
+ OS << "*anon*";
else
- OS << A.getName();
- OS << " [";
- if (A.isDefined()) {
- auto &DA = static_cast<const DefinedAtom &>(A);
- OS << " section=" << DA.getSection().getName();
- if (DA.isLive())
- OS << " live";
- if (DA.shouldDiscard())
- OS << " should-discard";
- } else
- OS << " external";
- OS << " ]>";
+ OS << Sym.getName();
+ OS << ": flags = ";
+ switch (Sym.getLinkage()) {
+ case Linkage::Strong:
+ OS << 'S';
+ break;
+ case Linkage::Weak:
+ OS << 'W';
+ break;
+ }
+ switch (Sym.getScope()) {
+ case Scope::Default:
+ OS << 'D';
+ break;
+ case Scope::Hidden:
+ OS << 'H';
+ break;
+ case Scope::Local:
+ OS << 'L';
+ break;
+ }
+ OS << (Sym.isLive() ? '+' : '-')
+ << ", size = " << formatv("{0:x8}", Sym.getSize())
+ << ", addr = " << formatv("{0:x16}", Sym.getAddress()) << " ("
+ << formatv("{0:x16}", Sym.getAddressable().getAddress()) << " + "
+ << formatv("{0:x8}", Sym.getOffset());
+ if (Sym.isDefined())
+ OS << " " << Sym.getBlock().getSection().getName();
+ OS << ")>";
return OS;
}
-void printEdge(raw_ostream &OS, const Atom &FixupAtom, const Edge &E,
+void printEdge(raw_ostream &OS, const Block &B, const Edge &E,
StringRef EdgeKindName) {
- OS << "edge@" << formatv("{0:x16}", FixupAtom.getAddress() + E.getOffset())
- << ": " << FixupAtom << " + " << E.getOffset() << " -- " << EdgeKindName
- << " -> " << E.getTarget() << " + " << E.getAddend();
+ OS << "edge@" << formatv("{0:x16}", B.getAddress() + E.getOffset()) << ": "
+ << formatv("{0:x16}", B.getAddress()) << " + " << E.getOffset() << " -- "
+ << EdgeKindName << " -> " << E.getTarget() << " + " << E.getAddend();
}
Section::~Section() {
- for (auto *DA : DefinedAtoms)
- DA->~DefinedAtom();
+ for (auto *Sym : Symbols)
+ Sym->~Symbol();
}
-void AtomGraph::dump(raw_ostream &OS,
+LinkGraph::~LinkGraph() {
+ // Destroy blocks.
+ for (auto *B : Blocks)
+ B->~Block();
+}
+
+void LinkGraph::dump(raw_ostream &OS,
std::function<StringRef(Edge::Kind)> EdgeKindToName) {
if (!EdgeKindToName)
EdgeKindToName = [](Edge::Kind K) { return StringRef(); };
- OS << "Defined atoms:\n";
- for (auto *DA : defined_atoms()) {
- OS << " " << format("0x%016" PRIx64, DA->getAddress()) << ": " << *DA
+ OS << "Symbols:\n";
+ for (auto *Sym : defined_symbols()) {
+ OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym
<< "\n";
- for (auto &E : DA->edges()) {
- OS << " ";
- StringRef EdgeName = (E.getKind() < Edge::FirstRelocation
- ? getGenericEdgeKindName(E.getKind())
- : EdgeKindToName(E.getKind()));
-
- if (!EdgeName.empty())
- printEdge(OS, *DA, E, EdgeName);
- else {
- auto EdgeNumberString = std::to_string(E.getKind());
- printEdge(OS, *DA, E, EdgeNumberString);
+ if (Sym->isDefined()) {
+ for (auto &E : Sym->getBlock().edges()) {
+ OS << " ";
+ StringRef EdgeName = (E.getKind() < Edge::FirstRelocation
+ ? getGenericEdgeKindName(E.getKind())
+ : EdgeKindToName(E.getKind()));
+
+ if (!EdgeName.empty())
+ printEdge(OS, Sym->getBlock(), E, EdgeName);
+ else {
+ auto EdgeNumberString = std::to_string(E.getKind());
+ printEdge(OS, Sym->getBlock(), E, EdgeNumberString);
+ }
+ OS << "\n";
}
- OS << "\n";
}
}
- OS << "Absolute atoms:\n";
- for (auto *A : absolute_atoms())
- OS << " " << format("0x%016" PRIx64, A->getAddress()) << ": " << *A
+ OS << "Absolute symbols:\n";
+ for (auto *Sym : absolute_symbols())
+ OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym
<< "\n";
- OS << "External atoms:\n";
- for (auto *A : external_atoms())
- OS << " " << format("0x%016" PRIx64, A->getAddress()) << ": " << *A
+ OS << "External symbols:\n";
+ for (auto *Sym : external_symbols())
+ OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym
<< "\n";
}
+void JITLinkAsyncLookupContinuation::anchor() {}
+
JITLinkContext::~JITLinkContext() {}
bool JITLinkContext::shouldAddDefaultTargetPasses(const Triple &TT) const {
return true;
}
-AtomGraphPassFunction JITLinkContext::getMarkLivePass(const Triple &TT) const {
- return AtomGraphPassFunction();
+LinkGraphPassFunction JITLinkContext::getMarkLivePass(const Triple &TT) const {
+ return LinkGraphPassFunction();
}
Error JITLinkContext::modifyPassConfig(const Triple &TT,
@@ -152,9 +208,9 @@ Error JITLinkContext::modifyPassConfig(const Triple &TT,
return Error::success();
}
-Error markAllAtomsLive(AtomGraph &G) {
- for (auto *DA : G.defined_atoms())
- DA->setLive(true);
+Error markAllSymbolsLive(LinkGraph &G) {
+ for (auto *Sym : G.defined_symbols())
+ Sym->setLive(true);
return Error::success();
}
diff --git a/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
index 96e074da122b..d4270b5aa796 100644
--- a/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
+++ b/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "JITLinkGeneric.h"
-#include "EHFrameSupportImpl.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -25,7 +24,7 @@ JITLinkerBase::~JITLinkerBase() {}
void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
- // Build the atom graph.
+ // Build the link graph.
if (auto GraphOrErr = buildGraph(Ctx->getObjectBuffer()))
G = std::move(*GraphOrErr);
else
@@ -33,33 +32,33 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
assert(G && "Graph should have been created by buildGraph above");
// Prune and optimize the graph.
- if (auto Err = runPasses(Passes.PrePrunePasses, *G))
+ if (auto Err = runPasses(Passes.PrePrunePasses))
return Ctx->notifyFailed(std::move(Err));
LLVM_DEBUG({
- dbgs() << "Atom graph \"" << G->getName() << "\" pre-pruning:\n";
+ dbgs() << "Link graph \"" << G->getName() << "\" pre-pruning:\n";
dumpGraph(dbgs());
});
prune(*G);
LLVM_DEBUG({
- dbgs() << "Atom graph \"" << G->getName() << "\" post-pruning:\n";
+ dbgs() << "Link graph \"" << G->getName() << "\" post-pruning:\n";
dumpGraph(dbgs());
});
// Run post-pruning passes.
- if (auto Err = runPasses(Passes.PostPrunePasses, *G))
+ if (auto Err = runPasses(Passes.PostPrunePasses))
return Ctx->notifyFailed(std::move(Err));
- // Sort atoms into segments.
- layOutAtoms();
+ // Sort blocks into segments.
+ auto Layout = layOutBlocks();
// Allocate memory for segments.
if (auto Err = allocateSegments(Layout))
return Ctx->notifyFailed(std::move(Err));
- // Notify client that the defined atoms have been assigned addresses.
+ // Notify client that the defined symbols have been assigned addresses.
Ctx->notifyResolved(*G);
auto ExternalSymbols = getExternalSymbolNames();
@@ -74,42 +73,42 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
// [Self=std::move(Self)](Expected<AsyncLookupResult> Result) {
// Self->linkPhase2(std::move(Self), std::move(Result));
// });
- //
- // FIXME: Use move capture once we have c++14.
auto *TmpCtx = Ctx.get();
- auto *UnownedSelf = Self.release();
- auto Phase2Continuation =
- [UnownedSelf](Expected<AsyncLookupResult> LookupResult) {
- std::unique_ptr<JITLinkerBase> Self(UnownedSelf);
- UnownedSelf->linkPhase2(std::move(Self), std::move(LookupResult));
- };
- TmpCtx->lookup(std::move(ExternalSymbols), std::move(Phase2Continuation));
+ TmpCtx->lookup(std::move(ExternalSymbols),
+ createLookupContinuation(
+ [S = std::move(Self), L = std::move(Layout)](
+ Expected<AsyncLookupResult> LookupResult) mutable {
+ auto &TmpSelf = *S;
+ TmpSelf.linkPhase2(std::move(S), std::move(LookupResult),
+ std::move(L));
+ }));
}
void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
- Expected<AsyncLookupResult> LR) {
+ Expected<AsyncLookupResult> LR,
+ SegmentLayoutMap Layout) {
// If the lookup failed, bail out.
if (!LR)
return deallocateAndBailOut(LR.takeError());
- // Assign addresses to external atoms.
+ // Assign addresses to external addressables.
applyLookupResult(*LR);
LLVM_DEBUG({
- dbgs() << "Atom graph \"" << G->getName() << "\" before copy-and-fixup:\n";
+ dbgs() << "Link graph \"" << G->getName() << "\" before copy-and-fixup:\n";
dumpGraph(dbgs());
});
- // Copy atom content to working memory and fix up.
- if (auto Err = copyAndFixUpAllAtoms(Layout, *Alloc))
+ // Copy block content to working memory and fix up.
+ if (auto Err = copyAndFixUpBlocks(Layout, *Alloc))
return deallocateAndBailOut(std::move(Err));
LLVM_DEBUG({
- dbgs() << "Atom graph \"" << G->getName() << "\" after copy-and-fixup:\n";
+ dbgs() << "Link graph \"" << G->getName() << "\" after copy-and-fixup:\n";
dumpGraph(dbgs());
});
- if (auto Err = runPasses(Passes.PostFixupPasses, *G))
+ if (auto Err = runPasses(Passes.PostFixupPasses))
return deallocateAndBailOut(std::move(Err));
// FIXME: Use move capture once we have c++14.
@@ -128,82 +127,38 @@ void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err) {
Ctx->notifyFinalized(std::move(Alloc));
}
-Error JITLinkerBase::runPasses(AtomGraphPassList &Passes, AtomGraph &G) {
+Error JITLinkerBase::runPasses(LinkGraphPassList &Passes) {
for (auto &P : Passes)
- if (auto Err = P(G))
+ if (auto Err = P(*G))
return Err;
return Error::success();
}
-void JITLinkerBase::layOutAtoms() {
- // Group sections by protections, and whether or not they're zero-fill.
- for (auto &S : G->sections()) {
+JITLinkerBase::SegmentLayoutMap JITLinkerBase::layOutBlocks() {
- // Skip empty sections.
- if (S.atoms_empty())
- continue;
+ SegmentLayoutMap Layout;
- auto &SL = Layout[S.getProtectionFlags()];
- if (S.isZeroFill())
- SL.ZeroFillSections.push_back(SegmentLayout::SectionLayout(S));
+ /// Partition blocks based on permissions and content vs. zero-fill.
+ for (auto *B : G->blocks()) {
+ auto &SegLists = Layout[B->getSection().getProtectionFlags()];
+ if (!B->isZeroFill())
+ SegLists.ContentBlocks.push_back(B);
else
- SL.ContentSections.push_back(SegmentLayout::SectionLayout(S));
+ SegLists.ZeroFillBlocks.push_back(B);
}
- // Sort sections within the layout by ordinal.
- {
- auto CompareByOrdinal = [](const SegmentLayout::SectionLayout &LHS,
- const SegmentLayout::SectionLayout &RHS) {
- return LHS.S->getSectionOrdinal() < RHS.S->getSectionOrdinal();
+ /// Sort blocks within each list.
+ for (auto &KV : Layout) {
+
+ auto CompareBlocks = [](const Block *LHS, const Block *RHS) {
+ if (LHS->getSection().getOrdinal() != RHS->getSection().getOrdinal())
+ return LHS->getSection().getOrdinal() < RHS->getSection().getOrdinal();
+ return LHS->getOrdinal() < RHS->getOrdinal();
};
- for (auto &KV : Layout) {
- auto &SL = KV.second;
- std::sort(SL.ContentSections.begin(), SL.ContentSections.end(),
- CompareByOrdinal);
- std::sort(SL.ZeroFillSections.begin(), SL.ZeroFillSections.end(),
- CompareByOrdinal);
- }
- }
- // Add atoms to the sections.
- for (auto &KV : Layout) {
- auto &SL = KV.second;
- for (auto *SIList : {&SL.ContentSections, &SL.ZeroFillSections}) {
- for (auto &SI : *SIList) {
- // First build the set of layout-heads (i.e. "heads" of layout-next
- // chains) by copying the section atoms, then eliminating any that
- // appear as layout-next targets.
- DenseSet<DefinedAtom *> LayoutHeads;
- for (auto *DA : SI.S->atoms())
- LayoutHeads.insert(DA);
-
- for (auto *DA : SI.S->atoms())
- if (DA->hasLayoutNext())
- LayoutHeads.erase(&DA->getLayoutNext());
-
- // Next, sort the layout heads by address order.
- std::vector<DefinedAtom *> OrderedLayoutHeads;
- OrderedLayoutHeads.reserve(LayoutHeads.size());
- for (auto *DA : LayoutHeads)
- OrderedLayoutHeads.push_back(DA);
-
- // Now sort the list of layout heads by address.
- std::sort(OrderedLayoutHeads.begin(), OrderedLayoutHeads.end(),
- [](const DefinedAtom *LHS, const DefinedAtom *RHS) {
- return LHS->getAddress() < RHS->getAddress();
- });
-
- // Now populate the SI.Atoms field by appending each of the chains.
- for (auto *DA : OrderedLayoutHeads) {
- SI.Atoms.push_back(DA);
- while (DA->hasLayoutNext()) {
- auto &Next = DA->getLayoutNext();
- SI.Atoms.push_back(&Next);
- DA = &Next;
- }
- }
- }
- }
+ auto &SegLists = KV.second;
+ llvm::sort(SegLists.ContentBlocks, CompareBlocks);
+ llvm::sort(SegLists.ZeroFillBlocks, CompareBlocks);
}
LLVM_DEBUG({
@@ -213,18 +168,16 @@ void JITLinkerBase::layOutAtoms() {
<< static_cast<sys::Memory::ProtectionFlags>(KV.first) << ":\n";
auto &SL = KV.second;
for (auto &SIEntry :
- {std::make_pair(&SL.ContentSections, "content sections"),
- std::make_pair(&SL.ZeroFillSections, "zero-fill sections")}) {
- auto &SIList = *SIEntry.first;
+ {std::make_pair(&SL.ContentBlocks, "content block"),
+ std::make_pair(&SL.ZeroFillBlocks, "zero-fill block")}) {
dbgs() << " " << SIEntry.second << ":\n";
- for (auto &SI : SIList) {
- dbgs() << " " << SI.S->getName() << ":\n";
- for (auto *DA : SI.Atoms)
- dbgs() << " " << *DA << "\n";
- }
+ for (auto *B : *SIEntry.first)
+ dbgs() << " " << *B << "\n";
}
}
});
+
+ return Layout;
}
Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) {
@@ -234,74 +187,36 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) {
JITLinkMemoryManager::SegmentsRequestMap Segments;
for (auto &KV : Layout) {
auto &Prot = KV.first;
- auto &SegLayout = KV.second;
+ auto &SegLists = KV.second;
+
+ uint64_t SegAlign = 1;
// Calculate segment content size.
size_t SegContentSize = 0;
- for (auto &SI : SegLayout.ContentSections) {
- assert(!SI.S->atoms_empty() && "Sections in layout must not be empty");
- assert(!SI.Atoms.empty() && "Section layouts must not be empty");
-
- // Bump to section alignment before processing atoms.
- SegContentSize = alignTo(SegContentSize, SI.S->getAlignment());
-
- for (auto *DA : SI.Atoms) {
- SegContentSize = alignTo(SegContentSize, DA->getAlignment());
- SegContentSize += DA->getSize();
- }
+ for (auto *B : SegLists.ContentBlocks) {
+ SegAlign = std::max(SegAlign, B->getAlignment());
+ SegContentSize = alignToBlock(SegContentSize, *B);
+ SegContentSize += B->getSize();
}
- // Get segment content alignment.
- unsigned SegContentAlign = 1;
- if (!SegLayout.ContentSections.empty()) {
- auto &FirstContentSection = SegLayout.ContentSections.front();
- SegContentAlign =
- std::max(FirstContentSection.S->getAlignment(),
- FirstContentSection.Atoms.front()->getAlignment());
- }
+ uint64_t SegZeroFillStart = SegContentSize;
+ uint64_t SegZeroFillEnd = SegZeroFillStart;
- // Calculate segment zero-fill size.
- uint64_t SegZeroFillSize = 0;
- for (auto &SI : SegLayout.ZeroFillSections) {
- assert(!SI.S->atoms_empty() && "Sections in layout must not be empty");
- assert(!SI.Atoms.empty() && "Section layouts must not be empty");
-
- // Bump to section alignment before processing atoms.
- SegZeroFillSize = alignTo(SegZeroFillSize, SI.S->getAlignment());
-
- for (auto *DA : SI.Atoms) {
- SegZeroFillSize = alignTo(SegZeroFillSize, DA->getAlignment());
- SegZeroFillSize += DA->getSize();
- }
- }
-
- // Calculate segment zero-fill alignment.
- uint32_t SegZeroFillAlign = 1;
-
- if (!SegLayout.ZeroFillSections.empty()) {
- auto &FirstZeroFillSection = SegLayout.ZeroFillSections.front();
- SegZeroFillAlign =
- std::max(FirstZeroFillSection.S->getAlignment(),
- FirstZeroFillSection.Atoms.front()->getAlignment());
+ for (auto *B : SegLists.ZeroFillBlocks) {
+ SegAlign = std::max(SegAlign, B->getAlignment());
+ SegZeroFillEnd = alignToBlock(SegZeroFillEnd, *B);
+ SegZeroFillEnd += B->getSize();
}
- if (SegContentSize == 0)
- SegContentAlign = SegZeroFillAlign;
-
- if (SegContentAlign % SegZeroFillAlign != 0)
- return make_error<JITLinkError>("First content atom alignment does not "
- "accommodate first zero-fill atom "
- "alignment");
-
- Segments[Prot] = {SegContentSize, SegContentAlign, SegZeroFillSize,
- SegZeroFillAlign};
+ Segments[Prot] = {SegAlign, SegContentSize,
+ SegZeroFillEnd - SegZeroFillStart};
LLVM_DEBUG({
dbgs() << (&KV == &*Layout.begin() ? "" : "; ")
- << static_cast<sys::Memory::ProtectionFlags>(Prot) << ": "
- << SegContentSize << " content bytes (alignment "
- << SegContentAlign << ") + " << SegZeroFillSize
- << " zero-fill bytes (alignment " << SegZeroFillAlign << ")";
+ << static_cast<sys::Memory::ProtectionFlags>(Prot)
+ << ": alignment = " << SegAlign
+ << ", content size = " << SegContentSize
+ << ", zero-fill size = " << (SegZeroFillEnd - SegZeroFillStart);
});
}
LLVM_DEBUG(dbgs() << " }\n");
@@ -320,22 +235,19 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) {
}
});
- // Update atom target addresses.
+ // Update block target addresses.
for (auto &KV : Layout) {
auto &Prot = KV.first;
auto &SL = KV.second;
- JITTargetAddress AtomTargetAddr =
+ JITTargetAddress NextBlockAddr =
Alloc->getTargetMemory(static_cast<sys::Memory::ProtectionFlags>(Prot));
- for (auto *SIList : {&SL.ContentSections, &SL.ZeroFillSections})
- for (auto &SI : *SIList) {
- AtomTargetAddr = alignTo(AtomTargetAddr, SI.S->getAlignment());
- for (auto *DA : SI.Atoms) {
- AtomTargetAddr = alignTo(AtomTargetAddr, DA->getAlignment());
- DA->setAddress(AtomTargetAddr);
- AtomTargetAddr += DA->getSize();
- }
+ for (auto *SIList : {&SL.ContentBlocks, &SL.ZeroFillBlocks})
+ for (auto *B : *SIList) {
+ NextBlockAddr = alignToBlock(NextBlockAddr, *B);
+ B->setAddress(NextBlockAddr);
+ NextBlockAddr += B->getSize();
}
}
@@ -343,34 +255,35 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) {
}
DenseSet<StringRef> JITLinkerBase::getExternalSymbolNames() const {
- // Identify unresolved external atoms.
+ // Identify unresolved external symbols.
DenseSet<StringRef> UnresolvedExternals;
- for (auto *DA : G->external_atoms()) {
- assert(DA->getAddress() == 0 &&
+ for (auto *Sym : G->external_symbols()) {
+ assert(Sym->getAddress() == 0 &&
"External has already been assigned an address");
- assert(DA->getName() != StringRef() && DA->getName() != "" &&
+ assert(Sym->getName() != StringRef() && Sym->getName() != "" &&
"Externals must be named");
- UnresolvedExternals.insert(DA->getName());
+ UnresolvedExternals.insert(Sym->getName());
}
return UnresolvedExternals;
}
void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) {
- for (auto &KV : Result) {
- Atom &A = G->getAtomByName(KV.first);
- assert(A.getAddress() == 0 && "Atom already resolved");
- A.setAddress(KV.second.getAddress());
+ for (auto *Sym : G->external_symbols()) {
+ assert(Sym->getAddress() == 0 && "Symbol already resolved");
+ assert(!Sym->isDefined() && "Symbol being resolved is already defined");
+ assert(Result.count(Sym->getName()) && "Missing resolution for symbol");
+ Sym->getAddressable().setAddress(Result[Sym->getName()].getAddress());
}
LLVM_DEBUG({
dbgs() << "Externals after applying lookup result:\n";
- for (auto *A : G->external_atoms())
- dbgs() << " " << A->getName() << ": "
- << formatv("{0:x16}", A->getAddress()) << "\n";
+ for (auto *Sym : G->external_symbols())
+ dbgs() << " " << Sym->getName() << ": "
+ << formatv("{0:x16}", Sym->getAddress()) << "\n";
});
- assert(llvm::all_of(G->external_atoms(),
- [](Atom *A) { return A->getAddress() != 0; }) &&
- "All atoms should have been resolved by this point");
+ assert(llvm::all_of(G->external_symbols(),
+ [](Symbol *Sym) { return Sym->getAddress() != 0; }) &&
+ "All symbols should have been resolved by this point");
}
void JITLinkerBase::deallocateAndBailOut(Error Err) {
@@ -384,96 +297,60 @@ void JITLinkerBase::dumpGraph(raw_ostream &OS) {
G->dump(dbgs(), [this](Edge::Kind K) { return getEdgeKindName(K); });
}
-void prune(AtomGraph &G) {
- std::vector<DefinedAtom *> Worklist;
- DenseMap<DefinedAtom *, std::vector<Edge *>> EdgesToUpdate;
-
- // Build the initial worklist from all atoms initially live.
- for (auto *DA : G.defined_atoms()) {
- if (!DA->isLive() || DA->shouldDiscard())
- continue;
-
- for (auto &E : DA->edges()) {
- if (!E.getTarget().isDefined())
- continue;
+void prune(LinkGraph &G) {
+ std::vector<Symbol *> Worklist;
+ DenseSet<Block *> VisitedBlocks;
- auto &EDT = static_cast<DefinedAtom &>(E.getTarget());
+ // Build the initial worklist from all symbols initially live.
+ for (auto *Sym : G.defined_symbols())
+ if (Sym->isLive())
+ Worklist.push_back(Sym);
- if (EDT.shouldDiscard())
- EdgesToUpdate[&EDT].push_back(&E);
- else if (E.isKeepAlive() && !EDT.isLive())
- Worklist.push_back(&EDT);
- }
- }
-
- // Propagate live flags to all atoms reachable from the initial live set.
+ // Propagate live flags to all symbols reachable from the initial live set.
while (!Worklist.empty()) {
- DefinedAtom &NextLive = *Worklist.back();
+ auto *Sym = Worklist.back();
Worklist.pop_back();
- assert(!NextLive.shouldDiscard() &&
- "should-discard nodes should never make it into the worklist");
+ auto &B = Sym->getBlock();
- // If this atom has already been marked as live, or is marked to be
- // discarded, then skip it.
- if (NextLive.isLive())
+ // Skip addressables that we've visited before.
+ if (VisitedBlocks.count(&B))
continue;
- // Otherwise set it as live and add any non-live atoms that it points to
- // to the worklist.
- NextLive.setLive(true);
-
- for (auto &E : NextLive.edges()) {
- if (!E.getTarget().isDefined())
- continue;
+ VisitedBlocks.insert(&B);
- auto &EDT = static_cast<DefinedAtom &>(E.getTarget());
-
- if (EDT.shouldDiscard())
- EdgesToUpdate[&EDT].push_back(&E);
- else if (E.isKeepAlive() && !EDT.isLive())
- Worklist.push_back(&EDT);
+ for (auto &E : Sym->getBlock().edges()) {
+ if (E.getTarget().isDefined() && !E.getTarget().isLive()) {
+ E.getTarget().setLive(true);
+ Worklist.push_back(&E.getTarget());
+ }
}
}
- // Collect atoms to remove, then remove them from the graph.
- std::vector<DefinedAtom *> AtomsToRemove;
- for (auto *DA : G.defined_atoms())
- if (DA->shouldDiscard() || !DA->isLive())
- AtomsToRemove.push_back(DA);
-
- LLVM_DEBUG(dbgs() << "Pruning atoms:\n");
- for (auto *DA : AtomsToRemove) {
- LLVM_DEBUG(dbgs() << " " << *DA << "... ");
-
- // Check whether we need to replace this atom with an external atom.
- //
- // We replace if all of the following hold:
- // (1) The atom is marked should-discard,
- // (2) it has live edges (i.e. edges from live atoms) pointing to it.
- //
- // Otherwise we simply delete the atom.
-
- G.removeDefinedAtom(*DA);
-
- auto EdgesToUpdateItr = EdgesToUpdate.find(DA);
- if (EdgesToUpdateItr != EdgesToUpdate.end()) {
- auto &ExternalReplacement = G.addExternalAtom(DA->getName());
- for (auto *EdgeToUpdate : EdgesToUpdateItr->second)
- EdgeToUpdate->setTarget(ExternalReplacement);
- LLVM_DEBUG(dbgs() << "replaced with " << ExternalReplacement << "\n");
- } else
- LLVM_DEBUG(dbgs() << "deleted\n");
+ // Collect all the symbols to remove, then remove them.
+ {
+ LLVM_DEBUG(dbgs() << "Dead-stripping symbols:\n");
+ std::vector<Symbol *> SymbolsToRemove;
+ for (auto *Sym : G.defined_symbols())
+ if (!Sym->isLive())
+ SymbolsToRemove.push_back(Sym);
+ for (auto *Sym : SymbolsToRemove) {
+ LLVM_DEBUG(dbgs() << " " << *Sym << "...\n");
+ G.removeDefinedSymbol(*Sym);
+ }
}
- // Finally, discard any absolute symbols that were marked should-discard.
+ // Delete any unused blocks.
{
- std::vector<Atom *> AbsoluteAtomsToRemove;
- for (auto *A : G.absolute_atoms())
- if (A->shouldDiscard() || A->isLive())
- AbsoluteAtomsToRemove.push_back(A);
- for (auto *A : AbsoluteAtomsToRemove)
- G.removeAbsoluteAtom(*A);
+ LLVM_DEBUG(dbgs() << "Dead-stripping blocks:\n");
+ std::vector<Block *> BlocksToRemove;
+ for (auto *B : G.blocks())
+ if (!VisitedBlocks.count(B))
+ BlocksToRemove.push_back(B);
+ for (auto *B : BlocksToRemove) {
+ LLVM_DEBUG(dbgs() << " " << *B << "...\n");
+ G.removeBlock(*B);
+ }
}
}
diff --git a/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
index e6fd6e38f7a6..07dee6cee200 100644
--- a/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
+++ b/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
@@ -41,39 +41,32 @@ public:
protected:
struct SegmentLayout {
- using SectionAtomsList = std::vector<DefinedAtom *>;
- struct SectionLayout {
- SectionLayout(Section &S) : S(&S) {}
+ using BlocksList = std::vector<Block *>;
- Section *S;
- SectionAtomsList Atoms;
- };
-
- using SectionLayoutList = std::vector<SectionLayout>;
-
- SectionLayoutList ContentSections;
- SectionLayoutList ZeroFillSections;
+ BlocksList ContentBlocks;
+ BlocksList ZeroFillBlocks;
};
using SegmentLayoutMap = DenseMap<unsigned, SegmentLayout>;
// Phase 1:
- // 1.1: Build atom graph
+ // 1.1: Build link graph
// 1.2: Run pre-prune passes
// 1.2: Prune graph
// 1.3: Run post-prune passes
- // 1.4: Sort atoms into segments
+ // 1.4: Sort blocks into segments
// 1.5: Allocate segment memory
// 1.6: Identify externals and make an async call to resolve function
void linkPhase1(std::unique_ptr<JITLinkerBase> Self);
// Phase 2:
// 2.1: Apply resolution results
- // 2.2: Fix up atom contents
+ // 2.2: Fix up block contents
// 2.3: Call OnResolved callback
// 2.3: Make an async call to transfer and finalize memory.
void linkPhase2(std::unique_ptr<JITLinkerBase> Self,
- Expected<AsyncLookupResult> LookupResult);
+ Expected<AsyncLookupResult> LookupResult,
+ SegmentLayoutMap Layout);
// Phase 3:
// 3.1: Call OnFinalized callback, handing off allocation.
@@ -81,24 +74,37 @@ protected:
// Build a graph from the given object buffer.
// To be implemented by the client.
- virtual Expected<std::unique_ptr<AtomGraph>>
+ virtual Expected<std::unique_ptr<LinkGraph>>
buildGraph(MemoryBufferRef ObjBuffer) = 0;
- // For debug dumping of the atom graph.
+ // For debug dumping of the link graph.
virtual StringRef getEdgeKindName(Edge::Kind K) const = 0;
+ // Alight a JITTargetAddress to conform with block alignment requirements.
+ static JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) {
+ uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment();
+ return Addr + Delta;
+ }
+
+ // Alight a pointer to conform with block alignment requirements.
+ static char *alignToBlock(char *P, Block &B) {
+ uint64_t PAddr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(P));
+ uint64_t Delta = (B.getAlignmentOffset() - PAddr) % B.getAlignment();
+ return P + Delta;
+ }
+
private:
// Run all passes in the given pass list, bailing out immediately if any pass
// returns an error.
- Error runPasses(AtomGraphPassList &Passes, AtomGraph &G);
+ Error runPasses(LinkGraphPassList &Passes);
- // Copy atom contents and apply relocations.
+ // Copy block contents and apply relocations.
// Implemented in JITLinker.
virtual Error
- copyAndFixUpAllAtoms(const SegmentLayoutMap &Layout,
- JITLinkMemoryManager::Allocation &Alloc) const = 0;
+ copyAndFixUpBlocks(const SegmentLayoutMap &Layout,
+ JITLinkMemoryManager::Allocation &Alloc) const = 0;
- void layOutAtoms();
+ SegmentLayoutMap layOutBlocks();
Error allocateSegments(const SegmentLayoutMap &Layout);
DenseSet<StringRef> getExternalSymbolNames() const;
void applyLookupResult(AsyncLookupResult LR);
@@ -108,8 +114,7 @@ private:
std::unique_ptr<JITLinkContext> Ctx;
PassConfiguration Passes;
- std::unique_ptr<AtomGraph> G;
- SegmentLayoutMap Layout;
+ std::unique_ptr<LinkGraph> G;
std::unique_ptr<JITLinkMemoryManager::Allocation> Alloc;
};
@@ -121,7 +126,7 @@ public:
/// Link should be called with the constructor arguments for LinkerImpl, which
/// will be forwarded to the constructor.
template <typename... ArgTs> static void link(ArgTs &&... Args) {
- auto L = llvm::make_unique<LinkerImpl>(std::forward<ArgTs>(Args)...);
+ auto L = std::make_unique<LinkerImpl>(std::forward<ArgTs>(Args)...);
// Ownership of the linker is passed into the linker's doLink function to
// allow it to be passed on to async continuations.
@@ -140,17 +145,17 @@ private:
}
Error
- copyAndFixUpAllAtoms(const SegmentLayoutMap &Layout,
- JITLinkMemoryManager::Allocation &Alloc) const override {
- LLVM_DEBUG(dbgs() << "Copying and fixing up atoms:\n");
+ copyAndFixUpBlocks(const SegmentLayoutMap &Layout,
+ JITLinkMemoryManager::Allocation &Alloc) const override {
+ LLVM_DEBUG(dbgs() << "Copying and fixing up blocks:\n");
for (auto &KV : Layout) {
auto &Prot = KV.first;
auto &SegLayout = KV.second;
auto SegMem = Alloc.getWorkingMemory(
static_cast<sys::Memory::ProtectionFlags>(Prot));
- char *LastAtomEnd = SegMem.data();
- char *AtomDataPtr = LastAtomEnd;
+ char *LastBlockEnd = SegMem.data();
+ char *BlockDataPtr = LastBlockEnd;
LLVM_DEBUG({
dbgs() << " Processing segment "
@@ -160,93 +165,79 @@ private:
<< " ]\n Processing content sections:\n";
});
- for (auto &SI : SegLayout.ContentSections) {
- LLVM_DEBUG(dbgs() << " " << SI.S->getName() << ":\n");
+ for (auto *B : SegLayout.ContentBlocks) {
+ LLVM_DEBUG(dbgs() << " " << *B << ":\n");
+
+ // Pad to alignment/alignment-offset.
+ BlockDataPtr = alignToBlock(BlockDataPtr, *B);
- AtomDataPtr += alignmentAdjustment(AtomDataPtr, SI.S->getAlignment());
+ LLVM_DEBUG({
+ dbgs() << " Bumped block pointer to "
+ << (const void *)BlockDataPtr << " to meet block alignment "
+ << B->getAlignment() << " and alignment offset "
+ << B->getAlignmentOffset() << "\n";
+ });
+ // Zero pad up to alignment.
LLVM_DEBUG({
- dbgs() << " Bumped atom pointer to " << (const void *)AtomDataPtr
- << " to meet section alignment "
- << " of " << SI.S->getAlignment() << "\n";
+ if (LastBlockEnd != BlockDataPtr)
+ dbgs() << " Zero padding from " << (const void *)LastBlockEnd
+ << " to " << (const void *)BlockDataPtr << "\n";
});
- for (auto *DA : SI.Atoms) {
-
- // Align.
- AtomDataPtr += alignmentAdjustment(AtomDataPtr, DA->getAlignment());
- LLVM_DEBUG({
- dbgs() << " Bumped atom pointer to "
- << (const void *)AtomDataPtr << " to meet alignment of "
- << DA->getAlignment() << "\n";
- });
-
- // Zero pad up to alignment.
- LLVM_DEBUG({
- if (LastAtomEnd != AtomDataPtr)
- dbgs() << " Zero padding from " << (const void *)LastAtomEnd
- << " to " << (const void *)AtomDataPtr << "\n";
- });
- while (LastAtomEnd != AtomDataPtr)
- *LastAtomEnd++ = 0;
-
- // Copy initial atom content.
- LLVM_DEBUG({
- dbgs() << " Copying atom " << *DA << " content, "
- << DA->getContent().size() << " bytes, from "
- << (const void *)DA->getContent().data() << " to "
- << (const void *)AtomDataPtr << "\n";
- });
- memcpy(AtomDataPtr, DA->getContent().data(), DA->getContent().size());
-
- // Copy atom data and apply fixups.
- LLVM_DEBUG(dbgs() << " Applying fixups.\n");
- for (auto &E : DA->edges()) {
-
- // Skip non-relocation edges.
- if (!E.isRelocation())
- continue;
-
- // Dispatch to LinkerImpl for fixup.
- if (auto Err = impl().applyFixup(*DA, E, AtomDataPtr))
- return Err;
- }
-
- // Point the atom's content to the fixed up buffer.
- DA->setContent(StringRef(AtomDataPtr, DA->getContent().size()));
-
- // Update atom end pointer.
- LastAtomEnd = AtomDataPtr + DA->getContent().size();
- AtomDataPtr = LastAtomEnd;
+ while (LastBlockEnd != BlockDataPtr)
+ *LastBlockEnd++ = 0;
+
+ // Copy initial block content.
+ LLVM_DEBUG({
+ dbgs() << " Copying block " << *B << " content, "
+ << B->getContent().size() << " bytes, from "
+ << (const void *)B->getContent().data() << " to "
+ << (const void *)BlockDataPtr << "\n";
+ });
+ memcpy(BlockDataPtr, B->getContent().data(), B->getContent().size());
+
+ // Copy Block data and apply fixups.
+ LLVM_DEBUG(dbgs() << " Applying fixups.\n");
+ for (auto &E : B->edges()) {
+
+ // Skip non-relocation edges.
+ if (!E.isRelocation())
+ continue;
+
+ // Dispatch to LinkerImpl for fixup.
+ if (auto Err = impl().applyFixup(*B, E, BlockDataPtr))
+ return Err;
}
+
+ // Point the block's content to the fixed up buffer.
+ B->setContent(StringRef(BlockDataPtr, B->getContent().size()));
+
+ // Update block end pointer.
+ LastBlockEnd = BlockDataPtr + B->getContent().size();
+ BlockDataPtr = LastBlockEnd;
}
// Zero pad the rest of the segment.
LLVM_DEBUG({
dbgs() << " Zero padding end of segment from "
- << (const void *)LastAtomEnd << " to "
+ << (const void *)LastBlockEnd << " to "
<< (const void *)((char *)SegMem.data() + SegMem.size()) << "\n";
});
- while (LastAtomEnd != SegMem.data() + SegMem.size())
- *LastAtomEnd++ = 0;
+ while (LastBlockEnd != SegMem.data() + SegMem.size())
+ *LastBlockEnd++ = 0;
}
return Error::success();
}
};
-/// Dead strips and replaces discarded definitions with external atoms.
+/// Removes dead symbols/blocks/addressables.
///
-/// Finds the set of nodes reachable from any node initially marked live
-/// (nodes marked should-discard are treated as not live, even if they are
-/// reachable). All nodes not marked as live at the end of this process,
-/// are deleted. Nodes that are live, but marked should-discard are replaced
-/// with external atoms and all edges to them are re-written.
-void prune(AtomGraph &G);
-
-Error addEHFrame(AtomGraph &G, Section &EHFrameSection,
- StringRef EHFrameContent, JITTargetAddress EHFrameAddress,
- Edge::Kind FDEToCIERelocKind, Edge::Kind FDEToTargetRelocKind);
+/// Finds the set of symbols and addressables reachable from any symbol
+/// initially marked live. All symbols/addressables not marked live at the end
+/// of this process are removed.
+void prune(LinkGraph &G);
} // end namespace jitlink
} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index 267307cfde05..9e0d207e8bdb 100644
--- a/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -38,9 +38,21 @@ InProcessMemoryManager::allocate(const SegmentsRequestMap &Request) {
OnFinalize(applyProtections());
}
Error deallocate() override {
- for (auto &KV : SegBlocks)
- if (auto EC = sys::Memory::releaseMappedMemory(KV.second))
- return errorCodeToError(EC);
+ if (SegBlocks.empty())
+ return Error::success();
+ void *SlabStart = SegBlocks.begin()->second.base();
+ char *SlabEnd = (char *)SlabStart;
+ for (auto &KV : SegBlocks) {
+ SlabStart = std::min(SlabStart, KV.second.base());
+ SlabEnd = std::max(SlabEnd, (char *)(KV.second.base()) +
+ KV.second.allocatedSize());
+ }
+ size_t SlabSize = SlabEnd - (char *)SlabStart;
+ assert((SlabSize % sys::Process::getPageSizeEstimate()) == 0 &&
+ "Slab size is not a multiple of page size");
+ sys::MemoryBlock Slab(SlabStart, SlabSize);
+ if (auto EC = sys::Memory::releaseMappedMemory(Slab))
+ return errorCodeToError(EC);
return Error::success();
}
@@ -61,37 +73,52 @@ InProcessMemoryManager::allocate(const SegmentsRequestMap &Request) {
AllocationMap SegBlocks;
};
+ if (!isPowerOf2_64((uint64_t)sys::Process::getPageSizeEstimate()))
+ return make_error<StringError>("Page size is not a power of 2",
+ inconvertibleErrorCode());
+
AllocationMap Blocks;
const sys::Memory::ProtectionFlags ReadWrite =
static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
sys::Memory::MF_WRITE);
+ // Compute the total number of pages to allocate.
+ size_t TotalSize = 0;
for (auto &KV : Request) {
- auto &Seg = KV.second;
+ const auto &Seg = KV.second;
- if (Seg.getContentAlignment() > sys::Process::getPageSizeEstimate())
+ if (Seg.getAlignment() > sys::Process::getPageSizeEstimate())
return make_error<StringError>("Cannot request higher than page "
"alignment",
inconvertibleErrorCode());
- if (sys::Process::getPageSizeEstimate() % Seg.getContentAlignment() != 0)
- return make_error<StringError>("Page size is not a multiple of "
- "alignment",
- inconvertibleErrorCode());
+ TotalSize = alignTo(TotalSize, sys::Process::getPageSizeEstimate());
+ TotalSize += Seg.getContentSize();
+ TotalSize += Seg.getZeroFillSize();
+ }
+
+ // Allocate one slab to cover all the segments.
+ std::error_code EC;
+ auto SlabRemaining =
+ sys::Memory::allocateMappedMemory(TotalSize, nullptr, ReadWrite, EC);
+
+ if (EC)
+ return errorCodeToError(EC);
+
+ // Allocate segment memory from the slab.
+ for (auto &KV : Request) {
- uint64_t ZeroFillStart =
- alignTo(Seg.getContentSize(), Seg.getZeroFillAlignment());
- uint64_t SegmentSize = ZeroFillStart + Seg.getZeroFillSize();
+ const auto &Seg = KV.second;
- std::error_code EC;
- auto SegMem =
- sys::Memory::allocateMappedMemory(SegmentSize, nullptr, ReadWrite, EC);
+ uint64_t SegmentSize = alignTo(Seg.getContentSize() + Seg.getZeroFillSize(),
+ sys::Process::getPageSizeEstimate());
- if (EC)
- return errorCodeToError(EC);
+ sys::MemoryBlock SegMem(SlabRemaining.base(), SegmentSize);
+ SlabRemaining = sys::MemoryBlock((char *)SlabRemaining.base() + SegmentSize,
+ SegmentSize);
// Zero out the zero-fill memory.
- memset(static_cast<char *>(SegMem.base()) + ZeroFillStart, 0,
+ memset(static_cast<char *>(SegMem.base()) + Seg.getContentSize(), 0,
Seg.getZeroFillSize());
// Record the block for this segment.
diff --git a/lib/ExecutionEngine/JITLink/MachO.cpp b/lib/ExecutionEngine/JITLink/MachO.cpp
index 15995b8ce98f..58bc0f56e155 100644
--- a/lib/ExecutionEngine/JITLink/MachO.cpp
+++ b/lib/ExecutionEngine/JITLink/MachO.cpp
@@ -14,6 +14,7 @@
#include "llvm/ExecutionEngine/JITLink/MachO.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/ExecutionEngine/JITLink/MachO_arm64.h"
#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Format.h"
@@ -64,6 +65,8 @@ void jitLink_MachO(std::unique_ptr<JITLinkContext> Ctx) {
});
switch (Header.cputype) {
+ case MachO::CPU_TYPE_ARM64:
+ return jitLink_MachO_arm64(std::move(Ctx));
case MachO::CPU_TYPE_X86_64:
return jitLink_MachO_x86_64(std::move(Ctx));
}
diff --git a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp
deleted file mode 100644
index 1501c7ad0bc5..000000000000
--- a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp
+++ /dev/null
@@ -1,411 +0,0 @@
-//=--------- MachOAtomGraphBuilder.cpp - MachO AtomGraph builder ----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Generic MachO AtomGraph buliding code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MachOAtomGraphBuilder.h"
-
-#define DEBUG_TYPE "jitlink"
-
-namespace llvm {
-namespace jitlink {
-
-MachOAtomGraphBuilder::~MachOAtomGraphBuilder() {}
-
-Expected<std::unique_ptr<AtomGraph>> MachOAtomGraphBuilder::buildGraph() {
- if (auto Err = parseSections())
- return std::move(Err);
-
- if (auto Err = addAtoms())
- return std::move(Err);
-
- if (auto Err = addRelocations())
- return std::move(Err);
-
- return std::move(G);
-}
-
-MachOAtomGraphBuilder::MachOAtomGraphBuilder(const object::MachOObjectFile &Obj)
- : Obj(Obj),
- G(llvm::make_unique<AtomGraph>(Obj.getFileName(), getPointerSize(Obj),
- getEndianness(Obj))) {}
-
-void MachOAtomGraphBuilder::addCustomAtomizer(StringRef SectionName,
- CustomAtomizeFunction Atomizer) {
- assert(!CustomAtomizeFunctions.count(SectionName) &&
- "Custom atomizer for this section already exists");
- CustomAtomizeFunctions[SectionName] = std::move(Atomizer);
-}
-
-bool MachOAtomGraphBuilder::areLayoutLocked(const Atom &A, const Atom &B) {
- // If these atoms are the same then they're trivially "locked".
- if (&A == &B)
- return true;
-
- // If A and B are different, check whether either is undefined. (in which
- // case they are not locked).
- if (!A.isDefined() || !B.isDefined())
- return false;
-
- // A and B are different, but they're both defined atoms. We need to check
- // whether they're part of the same alt_entry chain.
- auto &DA = static_cast<const DefinedAtom &>(A);
- auto &DB = static_cast<const DefinedAtom &>(B);
-
- auto AStartItr = AltEntryStarts.find(&DA);
- if (AStartItr == AltEntryStarts.end()) // If A is not in a chain bail out.
- return false;
-
- auto BStartItr = AltEntryStarts.find(&DB);
- if (BStartItr == AltEntryStarts.end()) // If B is not in a chain bail out.
- return false;
-
- // A and B are layout locked if they're in the same chain.
- return AStartItr->second == BStartItr->second;
-}
-
-unsigned
-MachOAtomGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) {
- return Obj.is64Bit() ? 8 : 4;
-}
-
-support::endianness
-MachOAtomGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
- return Obj.isLittleEndian() ? support::little : support::big;
-}
-
-MachOAtomGraphBuilder::MachOSection &MachOAtomGraphBuilder::getCommonSection() {
- if (!CommonSymbolsSection) {
- auto Prot = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_WRITE);
- auto &GenericSection = G->createSection("<common>", 1, Prot, true);
- CommonSymbolsSection = MachOSection(GenericSection);
- }
- return *CommonSymbolsSection;
-}
-
-Error MachOAtomGraphBuilder::parseSections() {
- for (auto &SecRef : Obj.sections()) {
- assert((SecRef.getAlignment() <= std::numeric_limits<uint32_t>::max()) &&
- "Section alignment does not fit in 32 bits");
-
- StringRef Name;
- if (auto EC = SecRef.getName(Name))
- return errorCodeToError(EC);
-
- unsigned SectionIndex = SecRef.getIndex() + 1;
-
- uint32_t Align = SecRef.getAlignment();
- if (!isPowerOf2_32(Align))
- return make_error<JITLinkError>("Section " + Name +
- " has non-power-of-2 "
- "alignment");
-
- // FIXME: Get real section permissions
- // How, exactly, on MachO?
- sys::Memory::ProtectionFlags Prot;
- if (SecRef.isText())
- Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
- else
- Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
-
- auto &GenericSection = G->createSection(Name, Align, Prot, SecRef.isBSS());
-
- LLVM_DEBUG({
- dbgs() << "Adding section " << Name << ": "
- << format("0x%016" PRIx64, SecRef.getAddress())
- << ", align: " << SecRef.getAlignment() << "\n";
- });
-
- assert(!Sections.count(SectionIndex) && "Section index already in use");
-
- auto &MachOSec =
- Sections
- .try_emplace(SectionIndex, GenericSection, SecRef.getAddress(),
- SecRef.getAlignment())
- .first->second;
-
- if (!SecRef.isVirtual()) {
- // If this section has content then record it.
- Expected<StringRef> Content = SecRef.getContents();
- if (!Content)
- return Content.takeError();
- if (Content->size() != SecRef.getSize())
- return make_error<JITLinkError>("Section content size does not match "
- "declared size for " +
- Name);
- MachOSec.setContent(*Content);
- } else {
- // If this is a zero-fill section then just record the size.
- MachOSec.setZeroFill(SecRef.getSize());
- }
-
- uint32_t SectionFlags =
- Obj.is64Bit() ? Obj.getSection64(SecRef.getRawDataRefImpl()).flags
- : Obj.getSection(SecRef.getRawDataRefImpl()).flags;
-
- MachOSec.setNoDeadStrip(SectionFlags & MachO::S_ATTR_NO_DEAD_STRIP);
- }
-
- return Error::success();
-}
-
-// Adds atoms with identified start addresses (but not lengths) for all named
-// atoms.
-// Also, for every section that contains named atoms, but does not have an
-// atom at offset zero of that section, constructs an anonymous atom covering
-// that range.
-Error MachOAtomGraphBuilder::addNonCustomAtoms() {
- using AddrToAtomMap = std::map<JITTargetAddress, DefinedAtom *>;
- DenseMap<MachOSection *, AddrToAtomMap> SecToAtoms;
-
- DenseMap<MachOSection *, unsigned> FirstOrdinal;
- std::vector<DefinedAtom *> AltEntryAtoms;
-
- DenseSet<StringRef> ProcessedSymbols; // Used to check for duplicate defs.
-
- for (auto SymI = Obj.symbol_begin(), SymE = Obj.symbol_end(); SymI != SymE;
- ++SymI) {
- object::SymbolRef Sym(SymI->getRawDataRefImpl(), &Obj);
-
- auto Name = Sym.getName();
- if (!Name)
- return Name.takeError();
-
- // Bail out on duplicate definitions: There should never be more than one
- // definition for a symbol in a given object file.
- if (ProcessedSymbols.count(*Name))
- return make_error<JITLinkError>("Duplicate definition within object: " +
- *Name);
- else
- ProcessedSymbols.insert(*Name);
-
- auto Addr = Sym.getAddress();
- if (!Addr)
- return Addr.takeError();
-
- auto SymType = Sym.getType();
- if (!SymType)
- return SymType.takeError();
-
- auto Flags = Sym.getFlags();
-
- if (Flags & object::SymbolRef::SF_Undefined) {
- LLVM_DEBUG(dbgs() << "Adding undef atom \"" << *Name << "\"\n");
- G->addExternalAtom(*Name);
- continue;
- } else if (Flags & object::SymbolRef::SF_Absolute) {
- LLVM_DEBUG(dbgs() << "Adding absolute \"" << *Name << "\" addr: "
- << format("0x%016" PRIx64, *Addr) << "\n");
- auto &A = G->addAbsoluteAtom(*Name, *Addr);
- A.setGlobal(Flags & object::SymbolRef::SF_Global);
- A.setExported(Flags & object::SymbolRef::SF_Exported);
- A.setWeak(Flags & object::SymbolRef::SF_Weak);
- continue;
- } else if (Flags & object::SymbolRef::SF_Common) {
- LLVM_DEBUG({
- dbgs() << "Adding common \"" << *Name
- << "\" addr: " << format("0x%016" PRIx64, *Addr) << "\n";
- });
- auto &A =
- G->addCommonAtom(getCommonSection().getGenericSection(), *Name, *Addr,
- std::max(Sym.getAlignment(), 1U),
- Obj.getCommonSymbolSize(Sym.getRawDataRefImpl()));
- A.setGlobal(Flags & object::SymbolRef::SF_Global);
- A.setExported(Flags & object::SymbolRef::SF_Exported);
- continue;
- }
-
- LLVM_DEBUG(dbgs() << "Adding defined atom \"" << *Name << "\"\n");
-
- // This atom is neither undefined nor absolute, so it must be defined in
- // this object. Get its section index.
- auto SecItr = Sym.getSection();
- if (!SecItr)
- return SecItr.takeError();
-
- uint64_t SectionIndex = (*SecItr)->getIndex() + 1;
-
- LLVM_DEBUG(dbgs() << " to section index " << SectionIndex << "\n");
-
- auto SecByIndexItr = Sections.find(SectionIndex);
- if (SecByIndexItr == Sections.end())
- return make_error<JITLinkError>("Unrecognized section index in macho");
-
- auto &Sec = SecByIndexItr->second;
-
- auto &DA = G->addDefinedAtom(Sec.getGenericSection(), *Name, *Addr,
- std::max(Sym.getAlignment(), 1U));
-
- DA.setGlobal(Flags & object::SymbolRef::SF_Global);
- DA.setExported(Flags & object::SymbolRef::SF_Exported);
- DA.setWeak(Flags & object::SymbolRef::SF_Weak);
-
- DA.setCallable(*SymType & object::SymbolRef::ST_Function);
-
- // Check NDesc flags.
- {
- uint16_t NDesc = 0;
- if (Obj.is64Bit())
- NDesc = Obj.getSymbol64TableEntry(SymI->getRawDataRefImpl()).n_desc;
- else
- NDesc = Obj.getSymbolTableEntry(SymI->getRawDataRefImpl()).n_desc;
-
- // Record atom for alt-entry post-processing (where the layout-next
- // constraints will be added).
- if (NDesc & MachO::N_ALT_ENTRY)
- AltEntryAtoms.push_back(&DA);
-
- // If this atom has a no-dead-strip attr attached then mark it live.
- if (NDesc & MachO::N_NO_DEAD_STRIP)
- DA.setLive(true);
- }
-
- LLVM_DEBUG({
- dbgs() << " Added " << *Name
- << " addr: " << format("0x%016" PRIx64, *Addr)
- << ", align: " << DA.getAlignment()
- << ", section: " << Sec.getGenericSection().getName() << "\n";
- });
-
- auto &SecAtoms = SecToAtoms[&Sec];
- SecAtoms[DA.getAddress() - Sec.getAddress()] = &DA;
- }
-
- // Add anonymous atoms.
- for (auto &KV : Sections) {
- auto &S = KV.second;
-
- // Skip empty sections.
- if (S.empty())
- continue;
-
- // Skip sections with custom handling.
- if (CustomAtomizeFunctions.count(S.getName()))
- continue;
-
- auto SAI = SecToAtoms.find(&S);
-
- // If S is not in the SecToAtoms map then it contained no named atom. Add
- // one anonymous atom to cover the whole section.
- if (SAI == SecToAtoms.end()) {
- SecToAtoms[&S][0] = &G->addAnonymousAtom(
- S.getGenericSection(), S.getAddress(), S.getAlignment());
- continue;
- }
-
- // Otherwise, check whether this section had an atom covering offset zero.
- // If not, add one.
- auto &SecAtoms = SAI->second;
- if (!SecAtoms.count(0))
- SecAtoms[0] = &G->addAnonymousAtom(S.getGenericSection(), S.getAddress(),
- S.getAlignment());
- }
-
- LLVM_DEBUG(dbgs() << "MachOGraphBuilder setting atom content\n");
-
- // Set atom contents and any section-based flags.
- for (auto &KV : SecToAtoms) {
- auto &S = *KV.first;
- auto &SecAtoms = KV.second;
-
- // Iterate the atoms in reverse order and set up their contents.
- JITTargetAddress LastAtomAddr = S.getSize();
- for (auto I = SecAtoms.rbegin(), E = SecAtoms.rend(); I != E; ++I) {
- auto Offset = I->first;
- auto &A = *I->second;
- LLVM_DEBUG({
- dbgs() << " " << A << " to [ " << S.getAddress() + Offset << " .. "
- << S.getAddress() + LastAtomAddr << " ]\n";
- });
-
- if (S.isZeroFill())
- A.setZeroFill(LastAtomAddr - Offset);
- else
- A.setContent(S.getContent().substr(Offset, LastAtomAddr - Offset));
-
- // If the section has no-dead-strip set then mark the atom as live.
- if (S.isNoDeadStrip())
- A.setLive(true);
-
- LastAtomAddr = Offset;
- }
- }
-
- LLVM_DEBUG(dbgs() << "Adding alt-entry starts\n");
-
- // Sort alt-entry atoms by address in ascending order.
- llvm::sort(AltEntryAtoms.begin(), AltEntryAtoms.end(),
- [](const DefinedAtom *LHS, const DefinedAtom *RHS) {
- return LHS->getAddress() < RHS->getAddress();
- });
-
- // Process alt-entry atoms in address order to build the table of alt-entry
- // atoms to alt-entry chain starts.
- for (auto *DA : AltEntryAtoms) {
- assert(!AltEntryStarts.count(DA) && "Duplicate entry in AltEntryStarts");
-
- // DA is an alt-entry atom. Look for the predecessor atom that it is locked
- // to, bailing out if we do not find one.
- auto AltEntryPred = G->findAtomByAddress(DA->getAddress() - 1);
- if (!AltEntryPred)
- return AltEntryPred.takeError();
-
- // Add a LayoutNext edge from the predecessor to this atom.
- AltEntryPred->setLayoutNext(*DA);
-
- // Check to see whether the predecessor itself is an alt-entry atom.
- auto AltEntryStartItr = AltEntryStarts.find(&*AltEntryPred);
- if (AltEntryStartItr != AltEntryStarts.end()) {
- // If the predecessor was an alt-entry atom then re-use its value.
- LLVM_DEBUG({
- dbgs() << " " << *DA << " -> " << *AltEntryStartItr->second
- << " (based on existing entry for " << *AltEntryPred << ")\n";
- });
- AltEntryStarts[DA] = AltEntryStartItr->second;
- } else {
- // If the predecessor does not have an entry then add an entry for this
- // atom (i.e. the alt_entry atom) and a self-reference entry for the
- /// predecessory atom that is the start of this chain.
- LLVM_DEBUG({
- dbgs() << " " << *AltEntryPred << " -> " << *AltEntryPred << "\n"
- << " " << *DA << " -> " << *AltEntryPred << "\n";
- });
- AltEntryStarts[&*AltEntryPred] = &*AltEntryPred;
- AltEntryStarts[DA] = &*AltEntryPred;
- }
- }
-
- return Error::success();
-}
-
-Error MachOAtomGraphBuilder::addAtoms() {
- // Add all named atoms.
- if (auto Err = addNonCustomAtoms())
- return Err;
-
- // Process special sections.
- for (auto &KV : Sections) {
- auto &S = KV.second;
- auto HI = CustomAtomizeFunctions.find(S.getGenericSection().getName());
- if (HI != CustomAtomizeFunctions.end()) {
- auto &Atomize = HI->second;
- if (auto Err = Atomize(S))
- return Err;
- }
- }
-
- return Error::success();
-}
-
-} // end namespace jitlink
-} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h
deleted file mode 100644
index 72d441b24d06..000000000000
--- a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h
+++ /dev/null
@@ -1,138 +0,0 @@
-//===----- MachOAtomGraphBuilder.h - MachO AtomGraph builder ----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Generic MachO AtomGraph building code.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H
-#define LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H
-
-#include "llvm/ExecutionEngine/JITLink/JITLink.h"
-
-#include "JITLinkGeneric.h"
-
-#include "llvm/Object/MachO.h"
-
-namespace llvm {
-namespace jitlink {
-
-class MachOAtomGraphBuilder {
-public:
- virtual ~MachOAtomGraphBuilder();
- Expected<std::unique_ptr<AtomGraph>> buildGraph();
-
-protected:
- using OffsetToAtomMap = std::map<JITTargetAddress, DefinedAtom *>;
-
- class MachOSection {
- public:
- MachOSection() = default;
-
- /// Create a MachO section with the given address and alignment.
- MachOSection(Section &GenericSection, JITTargetAddress Address,
- unsigned Alignment)
- : Address(Address), GenericSection(&GenericSection),
- Alignment(Alignment) {}
-
- /// Create a section without address, content or size (used for common
- /// symbol sections).
- MachOSection(Section &GenericSection) : GenericSection(&GenericSection) {}
-
- Section &getGenericSection() const {
- assert(GenericSection && "Section is null");
- return *GenericSection;
- }
-
- StringRef getName() const {
- assert(GenericSection && "No generic section attached");
- return GenericSection->getName();
- }
-
- MachOSection &setContent(StringRef Content) {
- assert(!ContentPtr && !Size && "Content/zeroFill already set");
- ContentPtr = Content.data();
- Size = Content.size();
- return *this;
- }
-
- MachOSection &setZeroFill(uint64_t Size) {
- assert(!ContentPtr && !this->Size && "Content/zeroFill already set");
- this->Size = Size;
- return *this;
- }
-
- bool isZeroFill() const { return !ContentPtr; }
-
- bool empty() const { return getSize() == 0; }
-
- size_t getSize() const { return Size; }
-
- StringRef getContent() const {
- assert(ContentPtr && "getContent() called on zero-fill section");
- return {ContentPtr, static_cast<size_t>(Size)};
- }
-
- JITTargetAddress getAddress() const { return Address; }
-
- unsigned getAlignment() const { return Alignment; }
-
- MachOSection &setNoDeadStrip(bool NoDeadStrip) {
- this->NoDeadStrip = NoDeadStrip;
- return *this;
- }
-
- bool isNoDeadStrip() const { return NoDeadStrip; }
-
- private:
- JITTargetAddress Address = 0;
- Section *GenericSection = nullptr;
- const char *ContentPtr = nullptr;
- uint64_t Size = 0;
- unsigned Alignment = 0;
- bool NoDeadStrip = false;
- };
-
- using CustomAtomizeFunction = std::function<Error(MachOSection &S)>;
-
- MachOAtomGraphBuilder(const object::MachOObjectFile &Obj);
-
- AtomGraph &getGraph() const { return *G; }
-
- const object::MachOObjectFile &getObject() const { return Obj; }
-
- void addCustomAtomizer(StringRef SectionName, CustomAtomizeFunction Atomizer);
-
- virtual Error addRelocations() = 0;
-
- /// Returns true if Atom A and Atom B are at a fixed offset from one another
- /// (i.e. if they're part of the same alt-entry chain).
- bool areLayoutLocked(const Atom &A, const Atom &B);
-
-private:
- static unsigned getPointerSize(const object::MachOObjectFile &Obj);
- static support::endianness getEndianness(const object::MachOObjectFile &Obj);
-
- MachOSection &getCommonSection();
-
- Error parseSections();
- Error addNonCustomAtoms();
- Error addAtoms();
-
- const object::MachOObjectFile &Obj;
- std::unique_ptr<AtomGraph> G;
- DenseMap<const DefinedAtom *, const DefinedAtom *> AltEntryStarts;
- DenseMap<unsigned, MachOSection> Sections;
- StringMap<CustomAtomizeFunction> CustomAtomizeFunctions;
- Optional<MachOSection> CommonSymbolsSection;
-};
-
-} // end namespace jitlink
-} // end namespace llvm
-
-#endif // LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H
diff --git a/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
new file mode 100644
index 000000000000..7366f53ebf36
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
@@ -0,0 +1,535 @@
+//=--------- MachOLinkGraphBuilder.cpp - MachO LinkGraph builder ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic MachO LinkGraph buliding code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOLinkGraphBuilder.h"
+
+#define DEBUG_TYPE "jitlink"
+
+static const char *CommonSectionName = "__common";
+
+namespace llvm {
+namespace jitlink {
+
+MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {}
+
+Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
+
+ // Sanity check: we only operate on relocatable objects.
+ if (!Obj.isRelocatableObject())
+ return make_error<JITLinkError>("Object is not a relocatable MachO");
+
+ if (auto Err = createNormalizedSections())
+ return std::move(Err);
+
+ if (auto Err = createNormalizedSymbols())
+ return std::move(Err);
+
+ if (auto Err = graphifyRegularSymbols())
+ return std::move(Err);
+
+ if (auto Err = graphifySectionsWithCustomParsers())
+ return std::move(Err);
+
+ if (auto Err = addRelocations())
+ return std::move(Err);
+
+ return std::move(G);
+}
+
+MachOLinkGraphBuilder::MachOLinkGraphBuilder(const object::MachOObjectFile &Obj)
+ : Obj(Obj),
+ G(std::make_unique<LinkGraph>(Obj.getFileName(), getPointerSize(Obj),
+ getEndianness(Obj))) {}
+
+void MachOLinkGraphBuilder::addCustomSectionParser(
+ StringRef SectionName, SectionParserFunction Parser) {
+ assert(!CustomSectionParserFunctions.count(SectionName) &&
+ "Custom parser for this section already exists");
+ CustomSectionParserFunctions[SectionName] = std::move(Parser);
+}
+
+Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) {
+ if ((Desc & MachO::N_WEAK_DEF) || (Desc & MachO::N_WEAK_REF))
+ return Linkage::Weak;
+ return Linkage::Strong;
+}
+
+Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) {
+ if (Name.startswith("l"))
+ return Scope::Local;
+ if (Type & MachO::N_PEXT)
+ return Scope::Hidden;
+ if (Type & MachO::N_EXT)
+ return Scope::Default;
+ return Scope::Local;
+}
+
+bool MachOLinkGraphBuilder::isAltEntry(const NormalizedSymbol &NSym) {
+ return NSym.Desc & MachO::N_ALT_ENTRY;
+}
+
+unsigned
+MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) {
+ return Obj.is64Bit() ? 8 : 4;
+}
+
+support::endianness
+MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
+ return Obj.isLittleEndian() ? support::little : support::big;
+}
+
+Section &MachOLinkGraphBuilder::getCommonSection() {
+ if (!CommonSection) {
+ auto Prot = static_cast<sys::Memory::ProtectionFlags>(
+ sys::Memory::MF_READ | sys::Memory::MF_WRITE);
+ CommonSection = &G->createSection(CommonSectionName, Prot);
+ }
+ return *CommonSection;
+}
+
+Error MachOLinkGraphBuilder::createNormalizedSections() {
+ // Build normalized sections. Verifies that section data is in-range (for
+ // sections with content) and that address ranges are non-overlapping.
+
+ LLVM_DEBUG(dbgs() << "Creating normalized sections...\n");
+
+ for (auto &SecRef : Obj.sections()) {
+ NormalizedSection NSec;
+ uint32_t DataOffset = 0;
+
+ auto SecIndex = Obj.getSectionIndex(SecRef.getRawDataRefImpl());
+
+ auto Name = SecRef.getName();
+ if (!Name)
+ return Name.takeError();
+
+ if (Obj.is64Bit()) {
+ const MachO::section_64 &Sec64 =
+ Obj.getSection64(SecRef.getRawDataRefImpl());
+
+ NSec.Address = Sec64.addr;
+ NSec.Size = Sec64.size;
+ NSec.Alignment = 1ULL << Sec64.align;
+ NSec.Flags = Sec64.flags;
+ DataOffset = Sec64.offset;
+ } else {
+ const MachO::section &Sec32 = Obj.getSection(SecRef.getRawDataRefImpl());
+ NSec.Address = Sec32.addr;
+ NSec.Size = Sec32.size;
+ NSec.Alignment = 1ULL << Sec32.align;
+ NSec.Flags = Sec32.flags;
+ DataOffset = Sec32.offset;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << " " << *Name << ": " << formatv("{0:x16}", NSec.Address)
+ << " -- " << formatv("{0:x16}", NSec.Address + NSec.Size)
+ << ", align: " << NSec.Alignment << ", index: " << SecIndex
+ << "\n";
+ });
+
+ // Get the section data if any.
+ {
+ unsigned SectionType = NSec.Flags & MachO::SECTION_TYPE;
+ if (SectionType != MachO::S_ZEROFILL &&
+ SectionType != MachO::S_GB_ZEROFILL) {
+
+ if (DataOffset + NSec.Size > Obj.getData().size())
+ return make_error<JITLinkError>(
+ "Section data extends past end of file");
+
+ NSec.Data = Obj.getData().data() + DataOffset;
+ }
+ }
+
+ // Get prot flags.
+ // FIXME: Make sure this test is correct (it's probably missing cases
+ // as-is).
+ sys::Memory::ProtectionFlags Prot;
+ if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS)
+ Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC);
+ else
+ Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE);
+
+ NSec.GraphSection = &G->createSection(*Name, Prot);
+ IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec)));
+ }
+
+ std::vector<NormalizedSection *> Sections;
+ Sections.reserve(IndexToSection.size());
+ for (auto &KV : IndexToSection)
+ Sections.push_back(&KV.second);
+
+ // If we didn't end up creating any sections then bail out. The code below
+ // assumes that we have at least one section.
+ if (Sections.empty())
+ return Error::success();
+
+ llvm::sort(Sections,
+ [](const NormalizedSection *LHS, const NormalizedSection *RHS) {
+ assert(LHS && RHS && "Null section?");
+ return LHS->Address < RHS->Address;
+ });
+
+ for (unsigned I = 0, E = Sections.size() - 1; I != E; ++I) {
+ auto &Cur = *Sections[I];
+ auto &Next = *Sections[I + 1];
+ if (Next.Address < Cur.Address + Cur.Size)
+ return make_error<JITLinkError>(
+ "Address range for section " + Cur.GraphSection->getName() +
+ formatv(" [ {0:x16} -- {1:x16} ] ", Cur.Address,
+ Cur.Address + Cur.Size) +
+ "overlaps " +
+ formatv(" [ {0:x16} -- {1:x16} ] ", Next.Address,
+ Next.Address + Next.Size));
+ }
+
+ return Error::success();
+}
+
+Error MachOLinkGraphBuilder::createNormalizedSymbols() {
+ LLVM_DEBUG(dbgs() << "Creating normalized symbols...\n");
+
+ for (auto &SymRef : Obj.symbols()) {
+
+ unsigned SymbolIndex = Obj.getSymbolIndex(SymRef.getRawDataRefImpl());
+ uint64_t Value;
+ uint32_t NStrX;
+ uint8_t Type;
+ uint8_t Sect;
+ uint16_t Desc;
+
+ if (Obj.is64Bit()) {
+ const MachO::nlist_64 &NL64 =
+ Obj.getSymbol64TableEntry(SymRef.getRawDataRefImpl());
+ Value = NL64.n_value;
+ NStrX = NL64.n_strx;
+ Type = NL64.n_type;
+ Sect = NL64.n_sect;
+ Desc = NL64.n_desc;
+ } else {
+ const MachO::nlist &NL32 =
+ Obj.getSymbolTableEntry(SymRef.getRawDataRefImpl());
+ Value = NL32.n_value;
+ NStrX = NL32.n_strx;
+ Type = NL32.n_type;
+ Sect = NL32.n_sect;
+ Desc = NL32.n_desc;
+ }
+
+ // Skip stabs.
+ // FIXME: Are there other symbols we should be skipping?
+ if (Type & MachO::N_STAB)
+ continue;
+
+ Optional<StringRef> Name;
+ if (NStrX) {
+ if (auto NameOrErr = SymRef.getName())
+ Name = *NameOrErr;
+ else
+ return NameOrErr.takeError();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << " ";
+ if (!Name)
+ dbgs() << "<anonymous symbol>";
+ else
+ dbgs() << *Name;
+ dbgs() << ": value = " << formatv("{0:x16}", Value)
+ << ", type = " << formatv("{0:x2}", Type)
+ << ", desc = " << formatv("{0:x4}", Desc) << ", sect = ";
+ if (Sect)
+ dbgs() << static_cast<unsigned>(Sect - 1);
+ else
+ dbgs() << "none";
+ dbgs() << "\n";
+ });
+
+ // If this symbol has a section, sanity check that the addresses line up.
+ NormalizedSection *NSec = nullptr;
+ if (Sect != 0) {
+ if (auto NSecOrErr = findSectionByIndex(Sect - 1))
+ NSec = &*NSecOrErr;
+ else
+ return NSecOrErr.takeError();
+
+ if (Value < NSec->Address || Value > NSec->Address + NSec->Size)
+ return make_error<JITLinkError>("Symbol address does not fall within "
+ "section");
+ }
+
+ IndexToSymbol[SymbolIndex] =
+ &createNormalizedSymbol(*Name, Value, Type, Sect, Desc,
+ getLinkage(Type), getScope(*Name, Type));
+ }
+
+ return Error::success();
+}
+
+void MachOLinkGraphBuilder::addSectionStartSymAndBlock(
+ Section &GraphSec, uint64_t Address, const char *Data, uint64_t Size,
+ uint32_t Alignment, bool IsLive) {
+ Block &B =
+ Data ? G->createContentBlock(GraphSec, StringRef(Data, Size), Address,
+ Alignment, 0)
+ : G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0);
+ auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive);
+ assert(!AddrToCanonicalSymbol.count(Sym.getAddress()) &&
+ "Anonymous block start symbol clashes with existing symbol address");
+ AddrToCanonicalSymbol[Sym.getAddress()] = &Sym;
+}
+
+Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
+
+ LLVM_DEBUG(dbgs() << "Creating graph symbols...\n");
+
+ /// We only have 256 section indexes: Use a vector rather than a map.
+ std::vector<std::vector<NormalizedSymbol *>> SecIndexToSymbols;
+ SecIndexToSymbols.resize(256);
+
+ // Create commons, externs, and absolutes, and partition all other symbols by
+ // section.
+ for (auto &KV : IndexToSymbol) {
+ auto &NSym = *KV.second;
+
+ switch (NSym.Type & MachO::N_TYPE) {
+ case MachO::N_UNDF:
+ if (NSym.Value) {
+ if (!NSym.Name)
+ return make_error<JITLinkError>("Anonymous common symbol at index " +
+ Twine(KV.first));
+ NSym.GraphSymbol = &G->addCommonSymbol(
+ *NSym.Name, NSym.S, getCommonSection(), NSym.Value, 0,
+ 1ull << MachO::GET_COMM_ALIGN(NSym.Desc),
+ NSym.Desc & MachO::N_NO_DEAD_STRIP);
+ } else {
+ if (!NSym.Name)
+ return make_error<JITLinkError>("Anonymous external symbol at "
+ "index " +
+ Twine(KV.first));
+ NSym.GraphSymbol = &G->addExternalSymbol(*NSym.Name, 0);
+ }
+ break;
+ case MachO::N_ABS:
+ if (!NSym.Name)
+ return make_error<JITLinkError>("Anonymous absolute symbol at index " +
+ Twine(KV.first));
+ NSym.GraphSymbol = &G->addAbsoluteSymbol(
+ *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default,
+ NSym.Desc & MachO::N_NO_DEAD_STRIP);
+ break;
+ case MachO::N_SECT:
+ SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym);
+ break;
+ case MachO::N_PBUD:
+ return make_error<JITLinkError>(
+ "Unupported N_PBUD symbol " +
+ (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
+ " at index " + Twine(KV.first));
+ case MachO::N_INDR:
+ return make_error<JITLinkError>(
+ "Unupported N_INDR symbol " +
+ (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
+ " at index " + Twine(KV.first));
+ default:
+ return make_error<JITLinkError>(
+ "Unrecognized symbol type " + Twine(NSym.Type & MachO::N_TYPE) +
+ " for symbol " +
+ (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
+ " at index " + Twine(KV.first));
+ }
+ }
+
+ // Loop over sections performing regular graphification for those that
+ // don't have custom parsers.
+ for (auto &KV : IndexToSection) {
+ auto SecIndex = KV.first;
+ auto &NSec = KV.second;
+
+ // Skip sections with custom parsers.
+ if (CustomSectionParserFunctions.count(NSec.GraphSection->getName())) {
+ LLVM_DEBUG({
+ dbgs() << " Skipping section " << NSec.GraphSection->getName()
+ << " as it has a custom parser.\n";
+ });
+ continue;
+ } else
+ LLVM_DEBUG({
+ dbgs() << " Processing section " << NSec.GraphSection->getName()
+ << "...\n";
+ });
+
+ bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP;
+ bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
+
+ auto &SecNSymStack = SecIndexToSymbols[SecIndex];
+
+ // If this section is non-empty but there are no symbols covering it then
+ // create one block and anonymous symbol to cover the entire section.
+ if (SecNSymStack.empty()) {
+ if (NSec.Size > 0) {
+ LLVM_DEBUG({
+ dbgs() << " Section non-empty, but contains no symbols. "
+ "Creating anonymous block to cover "
+ << formatv("{0:x16}", NSec.Address) << " -- "
+ << formatv("{0:x16}", NSec.Address + NSec.Size) << "\n";
+ });
+ addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
+ NSec.Size, NSec.Alignment,
+ SectionIsNoDeadStrip);
+ } else
+ LLVM_DEBUG({
+ dbgs() << " Section empty and contains no symbols. Skipping.\n";
+ });
+ continue;
+ }
+
+ // Sort the symbol stack in by address, alt-entry status, scope, and name.
+ // We sort in reverse order so that symbols will be visited in the right
+ // order when we pop off the stack below.
+ llvm::sort(SecNSymStack, [](const NormalizedSymbol *LHS,
+ const NormalizedSymbol *RHS) {
+ if (LHS->Value != RHS->Value)
+ return LHS->Value > RHS->Value;
+ if (isAltEntry(*LHS) != isAltEntry(*RHS))
+ return isAltEntry(*RHS);
+ if (LHS->S != RHS->S)
+ return static_cast<uint8_t>(LHS->S) < static_cast<uint8_t>(RHS->S);
+ return LHS->Name < RHS->Name;
+ });
+
+ // The first symbol in a section can not be an alt-entry symbol.
+ if (!SecNSymStack.empty() && isAltEntry(*SecNSymStack.back()))
+ return make_error<JITLinkError>(
+ "First symbol in " + NSec.GraphSection->getName() + " is alt-entry");
+
+ // If the section is non-empty but there is no symbol covering the start
+ // address then add an anonymous one.
+ if (SecNSymStack.back()->Value != NSec.Address) {
+ auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address;
+ LLVM_DEBUG({
+ dbgs() << " Section start not covered by symbol. "
+ << "Creating anonymous block to cover [ "
+ << formatv("{0:x16}", NSec.Address) << " -- "
+ << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n";
+ });
+ addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
+ AnonBlockSize, NSec.Alignment,
+ SectionIsNoDeadStrip);
+ }
+
+ // Visit section symbols in order by popping off the reverse-sorted stack,
+ // building blocks for each alt-entry chain and creating symbols as we go.
+ while (!SecNSymStack.empty()) {
+ SmallVector<NormalizedSymbol *, 8> BlockSyms;
+
+ BlockSyms.push_back(SecNSymStack.back());
+ SecNSymStack.pop_back();
+ while (!SecNSymStack.empty() &&
+ (isAltEntry(*SecNSymStack.back()) ||
+ SecNSymStack.back()->Value == BlockSyms.back()->Value)) {
+ BlockSyms.push_back(SecNSymStack.back());
+ SecNSymStack.pop_back();
+ }
+
+ // BlockNSyms now contains the block symbols in reverse canonical order.
+ JITTargetAddress BlockStart = BlockSyms.front()->Value;
+ JITTargetAddress BlockEnd = SecNSymStack.empty()
+ ? NSec.Address + NSec.Size
+ : SecNSymStack.back()->Value;
+ JITTargetAddress BlockOffset = BlockStart - NSec.Address;
+ JITTargetAddress BlockSize = BlockEnd - BlockStart;
+
+ LLVM_DEBUG({
+ dbgs() << " Creating block for " << formatv("{0:x16}", BlockStart)
+ << " -- " << formatv("{0:x16}", BlockEnd) << ": "
+ << NSec.GraphSection->getName() << " + "
+ << formatv("{0:x16}", BlockOffset) << " with "
+ << BlockSyms.size() << " symbol(s)...\n";
+ });
+
+ Block &B =
+ NSec.Data
+ ? G->createContentBlock(
+ *NSec.GraphSection,
+ StringRef(NSec.Data + BlockOffset, BlockSize), BlockStart,
+ NSec.Alignment, BlockStart % NSec.Alignment)
+ : G->createZeroFillBlock(*NSec.GraphSection, BlockSize,
+ BlockStart, NSec.Alignment,
+ BlockStart % NSec.Alignment);
+
+ Optional<JITTargetAddress> LastCanonicalAddr;
+ JITTargetAddress SymEnd = BlockEnd;
+ while (!BlockSyms.empty()) {
+ auto &NSym = *BlockSyms.back();
+ BlockSyms.pop_back();
+
+ bool SymLive =
+ (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip;
+
+ LLVM_DEBUG({
+ dbgs() << " " << formatv("{0:x16}", NSym.Value) << " -- "
+ << formatv("{0:x16}", SymEnd) << ": ";
+ if (!NSym.Name)
+ dbgs() << "<anonymous symbol>";
+ else
+ dbgs() << NSym.Name;
+ if (SymLive)
+ dbgs() << " [no-dead-strip]";
+ if (LastCanonicalAddr == NSym.Value)
+ dbgs() << " [non-canonical]";
+ dbgs() << "\n";
+ });
+
+ auto &Sym =
+ NSym.Name
+ ? G->addDefinedSymbol(B, NSym.Value - BlockStart, *NSym.Name,
+ SymEnd - NSym.Value, NSym.L, NSym.S,
+ SectionIsText, SymLive)
+ : G->addAnonymousSymbol(B, NSym.Value - BlockStart,
+ SymEnd - NSym.Value, SectionIsText,
+ SymLive);
+ NSym.GraphSymbol = &Sym;
+ if (LastCanonicalAddr != Sym.getAddress()) {
+ if (LastCanonicalAddr)
+ SymEnd = *LastCanonicalAddr;
+ LastCanonicalAddr = Sym.getAddress();
+ setCanonicalSymbol(Sym);
+ }
+ }
+ }
+ }
+
+ return Error::success();
+}
+
+Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() {
+ // Graphify special sections.
+ for (auto &KV : IndexToSection) {
+ auto &NSec = KV.second;
+
+ auto HI = CustomSectionParserFunctions.find(NSec.GraphSection->getName());
+ if (HI != CustomSectionParserFunctions.end()) {
+ auto &Parse = HI->second;
+ if (auto Err = Parse(NSec))
+ return Err;
+ }
+ }
+
+ return Error::success();
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
new file mode 100644
index 000000000000..e1123cd11048
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -0,0 +1,269 @@
+//===----- MachOLinkGraphBuilder.h - MachO LinkGraph builder ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic MachO LinkGraph building code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_EXECUTIONENGINE_JITLINK_MACHOLINKGRAPHBUILDER_H
+#define LIB_EXECUTIONENGINE_JITLINK_MACHOLINKGRAPHBUILDER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+#include "EHFrameSupportImpl.h"
+#include "JITLinkGeneric.h"
+#include "llvm/Object/MachO.h"
+
+#include <list>
+
+namespace llvm {
+namespace jitlink {
+
+class MachOLinkGraphBuilder {
+public:
+ virtual ~MachOLinkGraphBuilder();
+ Expected<std::unique_ptr<LinkGraph>> buildGraph();
+
+protected:
+ class MachOEHFrameBinaryParser : public EHFrameBinaryParser {
+ public:
+ MachOEHFrameBinaryParser(MachOLinkGraphBuilder &Builder,
+ JITTargetAddress EHFrameAddress,
+ StringRef EHFrameContent, Section &EHFrameSection,
+ uint64_t CIEAlignment, uint64_t FDEAlignment,
+ Edge::Kind FDEToCIERelocKind,
+ Edge::Kind FDEToTargetRelocKind)
+ : EHFrameBinaryParser(EHFrameAddress, EHFrameContent,
+ Builder.getGraph().getPointerSize(),
+ Builder.getGraph().getEndianness()),
+ Builder(Builder), EHFrameSection(EHFrameSection),
+ CIEAlignment(CIEAlignment), FDEAlignment(FDEAlignment),
+ FDEToCIERelocKind(FDEToCIERelocKind),
+ FDEToTargetRelocKind(FDEToTargetRelocKind) {}
+
+ Symbol *getSymbolAtAddress(JITTargetAddress Address) override {
+ if (auto *Sym = Builder.getSymbolByAddress(Address))
+ if (Sym->getAddress() == Address)
+ return Sym;
+ return nullptr;
+ }
+
+ Symbol &createCIERecord(JITTargetAddress RecordAddr,
+ StringRef RecordContent) override {
+ auto &G = Builder.getGraph();
+ auto &B = G.createContentBlock(EHFrameSection, RecordContent, RecordAddr,
+ CIEAlignment, 0);
+ auto &CIESymbol =
+ G.addAnonymousSymbol(B, 0, RecordContent.size(), false, false);
+ Builder.setCanonicalSymbol(CIESymbol);
+ return CIESymbol;
+ }
+
+ Expected<Symbol &> createFDERecord(JITTargetAddress RecordAddr,
+ StringRef RecordContent, Symbol &CIE,
+ size_t CIEOffset, Symbol &Func,
+ size_t FuncOffset, Symbol *LSDA,
+ size_t LSDAOffset) override {
+ auto &G = Builder.getGraph();
+ auto &B = G.createContentBlock(EHFrameSection, RecordContent, RecordAddr,
+ FDEAlignment, 0);
+
+ // Add edges to CIE, Func, and (conditionally) LSDA.
+ B.addEdge(FDEToCIERelocKind, CIEOffset, CIE, 0);
+ B.addEdge(FDEToTargetRelocKind, FuncOffset, Func, 0);
+
+ if (LSDA)
+ B.addEdge(FDEToTargetRelocKind, LSDAOffset, *LSDA, 0);
+
+ auto &FDESymbol =
+ G.addAnonymousSymbol(B, 0, RecordContent.size(), false, false);
+
+ // Add a keep-alive relocation from the function to the FDE to ensure it
+ // is not dead stripped.
+ Func.getBlock().addEdge(Edge::KeepAlive, 0, FDESymbol, 0);
+
+ return FDESymbol;
+ }
+
+ private:
+ MachOLinkGraphBuilder &Builder;
+ Section &EHFrameSection;
+ uint64_t CIEAlignment;
+ uint64_t FDEAlignment;
+ Edge::Kind FDEToCIERelocKind;
+ Edge::Kind FDEToTargetRelocKind;
+ };
+
+ struct NormalizedSymbol {
+ friend class MachOLinkGraphBuilder;
+
+ private:
+ NormalizedSymbol(Optional<StringRef> Name, uint64_t Value, uint8_t Type,
+ uint8_t Sect, uint16_t Desc, Linkage L, Scope S)
+ : Name(Name), Value(Value), Type(Type), Sect(Sect), Desc(Desc), L(L),
+ S(S) {
+ assert((!Name || !Name->empty()) && "Name must be none or non-empty");
+ }
+
+ public:
+ NormalizedSymbol(const NormalizedSymbol &) = delete;
+ NormalizedSymbol &operator=(const NormalizedSymbol &) = delete;
+ NormalizedSymbol(NormalizedSymbol &&) = delete;
+ NormalizedSymbol &operator=(NormalizedSymbol &&) = delete;
+
+ Optional<StringRef> Name;
+ uint64_t Value = 0;
+ uint8_t Type = 0;
+ uint8_t Sect = 0;
+ uint16_t Desc = 0;
+ Linkage L = Linkage::Strong;
+ Scope S = Scope::Default;
+ Symbol *GraphSymbol = nullptr;
+ };
+
+ class NormalizedSection {
+ friend class MachOLinkGraphBuilder;
+
+ private:
+ NormalizedSection() = default;
+
+ public:
+ Section *GraphSection = nullptr;
+ uint64_t Address = 0;
+ uint64_t Size = 0;
+ uint64_t Alignment = 0;
+ uint32_t Flags = 0;
+ const char *Data = nullptr;
+ };
+
+ using SectionParserFunction = std::function<Error(NormalizedSection &S)>;
+
+ MachOLinkGraphBuilder(const object::MachOObjectFile &Obj);
+
+ LinkGraph &getGraph() const { return *G; }
+
+ const object::MachOObjectFile &getObject() const { return Obj; }
+
+ void addCustomSectionParser(StringRef SectionName,
+ SectionParserFunction Parse);
+
+ virtual Error addRelocations() = 0;
+
+ /// Create a symbol.
+ template <typename... ArgTs>
+ NormalizedSymbol &createNormalizedSymbol(ArgTs &&... Args) {
+ NormalizedSymbol *Sym = reinterpret_cast<NormalizedSymbol *>(
+ Allocator.Allocate<NormalizedSymbol>());
+ new (Sym) NormalizedSymbol(std::forward<ArgTs>(Args)...);
+ return *Sym;
+ }
+
+ /// Index is zero-based (MachO section indexes are usually one-based) and
+ /// assumed to be in-range. Client is responsible for checking.
+ NormalizedSection &getSectionByIndex(unsigned Index) {
+ auto I = IndexToSection.find(Index);
+ assert(I != IndexToSection.end() && "No section recorded at index");
+ return I->second;
+ }
+
+ /// Try to get the section at the given index. Will return an error if the
+ /// given index is out of range, or if no section has been added for the given
+ /// index.
+ Expected<NormalizedSection &> findSectionByIndex(unsigned Index) {
+ auto I = IndexToSection.find(Index);
+ if (I == IndexToSection.end())
+ return make_error<JITLinkError>("No section recorded for index " +
+ formatv("{0:u}", Index));
+ return I->second;
+ }
+
+ /// Try to get the symbol at the given index. Will return an error if the
+ /// given index is out of range, or if no symbol has been added for the given
+ /// index.
+ Expected<NormalizedSymbol &> findSymbolByIndex(uint64_t Index) {
+ if (Index >= IndexToSymbol.size())
+ return make_error<JITLinkError>("Symbol index out of range");
+ auto *Sym = IndexToSymbol[Index];
+ if (!Sym)
+ return make_error<JITLinkError>("No symbol at index " +
+ formatv("{0:u}", Index));
+ return *Sym;
+ }
+
+ /// Returns the symbol with the highest address not greater than the search
+ /// address, or null if no such symbol exists.
+ Symbol *getSymbolByAddress(JITTargetAddress Address) {
+ auto I = AddrToCanonicalSymbol.upper_bound(Address);
+ if (I == AddrToCanonicalSymbol.begin())
+ return nullptr;
+ return std::prev(I)->second;
+ }
+
+ /// Returns the symbol with the highest address not greater than the search
+ /// address, or an error if no such symbol exists.
+ Expected<Symbol &> findSymbolByAddress(JITTargetAddress Address) {
+ auto *Sym = getSymbolByAddress(Address);
+ if (Sym)
+ if (Address < Sym->getAddress() + Sym->getSize())
+ return *Sym;
+ return make_error<JITLinkError>("No symbol covering address " +
+ formatv("{0:x16}", Address));
+ }
+
+ static Linkage getLinkage(uint16_t Desc);
+ static Scope getScope(StringRef Name, uint8_t Type);
+ static bool isAltEntry(const NormalizedSymbol &NSym);
+
+private:
+ static unsigned getPointerSize(const object::MachOObjectFile &Obj);
+ static support::endianness getEndianness(const object::MachOObjectFile &Obj);
+
+ void setCanonicalSymbol(Symbol &Sym) {
+ auto *&CanonicalSymEntry = AddrToCanonicalSymbol[Sym.getAddress()];
+ // There should be no symbol at this address, or, if there is,
+ // it should be a zero-sized symbol from an empty section (which
+ // we can safely override).
+ assert((!CanonicalSymEntry || CanonicalSymEntry->getSize() == 0) &&
+ "Duplicate canonical symbol at address");
+ CanonicalSymEntry = &Sym;
+ }
+
+ Section &getCommonSection();
+ void addSectionStartSymAndBlock(Section &GraphSec, uint64_t Address,
+ const char *Data, uint64_t Size,
+ uint32_t Alignment, bool IsLive);
+
+ Error createNormalizedSections();
+ Error createNormalizedSymbols();
+
+ /// Create graph blocks and symbols for externals, absolutes, commons and
+ /// all defined symbols in sections without custom parsers.
+ Error graphifyRegularSymbols();
+
+ /// Create graph blocks and symbols for all sections.
+ Error graphifySectionsWithCustomParsers();
+
+ // Put the BumpPtrAllocator first so that we don't free any of the underlying
+ // memory until the Symbol/Addressable destructors have been run.
+ BumpPtrAllocator Allocator;
+
+ const object::MachOObjectFile &Obj;
+ std::unique_ptr<LinkGraph> G;
+
+ DenseMap<unsigned, NormalizedSection> IndexToSection;
+ Section *CommonSection = nullptr;
+
+ DenseMap<uint32_t, NormalizedSymbol *> IndexToSymbol;
+ std::map<JITTargetAddress, Symbol *> AddrToCanonicalSymbol;
+ StringMap<SectionParserFunction> CustomSectionParserFunctions;
+};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LIB_EXECUTIONENGINE_JITLINK_MACHOLINKGRAPHBUILDER_H
diff --git a/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
new file mode 100644
index 000000000000..945343bff89d
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -0,0 +1,736 @@
+//===---- MachO_arm64.cpp - JIT linker implementation for MachO/arm64 -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MachO/arm64 jit-link implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/MachO_arm64.h"
+
+#include "BasicGOTAndStubsBuilder.h"
+#include "MachOLinkGraphBuilder.h"
+
+#define DEBUG_TYPE "jitlink"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+using namespace llvm::jitlink::MachO_arm64_Edges;
+
+namespace {
+
+class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder {
+public:
+ MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj)
+ : MachOLinkGraphBuilder(Obj),
+ NumSymbols(Obj.getSymtabLoadCommand().nsyms) {
+ addCustomSectionParser(
+ "__eh_frame", [this](NormalizedSection &EHFrameSection) {
+ if (!EHFrameSection.Data)
+ return make_error<JITLinkError>(
+ "__eh_frame section is marked zero-fill");
+ return MachOEHFrameBinaryParser(
+ *this, EHFrameSection.Address,
+ StringRef(EHFrameSection.Data, EHFrameSection.Size),
+ *EHFrameSection.GraphSection, 8, 4, NegDelta32, Delta64)
+ .addToGraph();
+ });
+ }
+
+private:
+ static Expected<MachOARM64RelocationKind>
+ getRelocationKind(const MachO::relocation_info &RI) {
+ switch (RI.r_type) {
+ case MachO::ARM64_RELOC_UNSIGNED:
+ if (!RI.r_pcrel) {
+ if (RI.r_length == 3)
+ return RI.r_extern ? Pointer64 : Pointer64Anon;
+ else if (RI.r_length == 2)
+ return Pointer32;
+ }
+ break;
+ case MachO::ARM64_RELOC_SUBTRACTOR:
+ // SUBTRACTOR must be non-pc-rel, extern, with length 2 or 3.
+ // Initially represent SUBTRACTOR relocations with 'Delta<W>'.
+ // They may be turned into NegDelta<W> by parsePairRelocation.
+ if (!RI.r_pcrel && RI.r_extern) {
+ if (RI.r_length == 2)
+ return Delta32;
+ else if (RI.r_length == 3)
+ return Delta64;
+ }
+ break;
+ case MachO::ARM64_RELOC_BRANCH26:
+ if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return Branch26;
+ break;
+ case MachO::ARM64_RELOC_PAGE21:
+ if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return Page21;
+ break;
+ case MachO::ARM64_RELOC_PAGEOFF12:
+ if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return PageOffset12;
+ break;
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
+ if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return GOTPage21;
+ break;
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12:
+ if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return GOTPageOffset12;
+ break;
+ case MachO::ARM64_RELOC_POINTER_TO_GOT:
+ if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return PointerToGOT;
+ break;
+ case MachO::ARM64_RELOC_ADDEND:
+ if (!RI.r_pcrel && !RI.r_extern && RI.r_length == 2)
+ return PairedAddend;
+ break;
+ }
+
+ return make_error<JITLinkError>(
+ "Unsupported arm64 relocation: address=" +
+ formatv("{0:x8}", RI.r_address) +
+ ", symbolnum=" + formatv("{0:x6}", RI.r_symbolnum) +
+ ", kind=" + formatv("{0:x1}", RI.r_type) +
+ ", pc_rel=" + (RI.r_pcrel ? "true" : "false") +
+ ", extern=" + (RI.r_extern ? "true" : "false") +
+ ", length=" + formatv("{0:d}", RI.r_length));
+ }
+
+ MachO::relocation_info
+ getRelocationInfo(const object::relocation_iterator RelItr) {
+ MachO::any_relocation_info ARI =
+ getObject().getRelocation(RelItr->getRawDataRefImpl());
+ MachO::relocation_info RI;
+ memcpy(&RI, &ARI, sizeof(MachO::relocation_info));
+ return RI;
+ }
+
+ using PairRelocInfo =
+ std::tuple<MachOARM64RelocationKind, Symbol *, uint64_t>;
+
+ // Parses paired SUBTRACTOR/UNSIGNED relocations and, on success,
+ // returns the edge kind and addend to be used.
+ Expected<PairRelocInfo>
+ parsePairRelocation(Block &BlockToFix, Edge::Kind SubtractorKind,
+ const MachO::relocation_info &SubRI,
+ JITTargetAddress FixupAddress, const char *FixupContent,
+ object::relocation_iterator &UnsignedRelItr,
+ object::relocation_iterator &RelEnd) {
+ using namespace support;
+
+ assert(((SubtractorKind == Delta32 && SubRI.r_length == 2) ||
+ (SubtractorKind == Delta64 && SubRI.r_length == 3)) &&
+ "Subtractor kind should match length");
+ assert(SubRI.r_extern && "SUBTRACTOR reloc symbol should be extern");
+ assert(!SubRI.r_pcrel && "SUBTRACTOR reloc should not be PCRel");
+
+ if (UnsignedRelItr == RelEnd)
+ return make_error<JITLinkError>("arm64 SUBTRACTOR without paired "
+ "UNSIGNED relocation");
+
+ auto UnsignedRI = getRelocationInfo(UnsignedRelItr);
+
+ if (SubRI.r_address != UnsignedRI.r_address)
+ return make_error<JITLinkError>("arm64 SUBTRACTOR and paired UNSIGNED "
+ "point to different addresses");
+
+ if (SubRI.r_length != UnsignedRI.r_length)
+ return make_error<JITLinkError>("length of arm64 SUBTRACTOR and paired "
+ "UNSIGNED reloc must match");
+
+ Symbol *FromSymbol;
+ if (auto FromSymbolOrErr = findSymbolByIndex(SubRI.r_symbolnum))
+ FromSymbol = FromSymbolOrErr->GraphSymbol;
+ else
+ return FromSymbolOrErr.takeError();
+
+ // Read the current fixup value.
+ uint64_t FixupValue = 0;
+ if (SubRI.r_length == 3)
+ FixupValue = *(const little64_t *)FixupContent;
+ else
+ FixupValue = *(const little32_t *)FixupContent;
+
+ // Find 'ToSymbol' using symbol number or address, depending on whether the
+ // paired UNSIGNED relocation is extern.
+ Symbol *ToSymbol = nullptr;
+ if (UnsignedRI.r_extern) {
+ // Find target symbol by symbol index.
+ if (auto ToSymbolOrErr = findSymbolByIndex(UnsignedRI.r_symbolnum))
+ ToSymbol = ToSymbolOrErr->GraphSymbol;
+ else
+ return ToSymbolOrErr.takeError();
+ } else {
+ if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue))
+ ToSymbol = &*ToSymbolOrErr;
+ else
+ return ToSymbolOrErr.takeError();
+ FixupValue -= ToSymbol->getAddress();
+ }
+
+ MachOARM64RelocationKind DeltaKind;
+ Symbol *TargetSymbol;
+ uint64_t Addend;
+ if (&BlockToFix == &FromSymbol->getAddressable()) {
+ TargetSymbol = ToSymbol;
+ DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32;
+ Addend = FixupValue + (FixupAddress - FromSymbol->getAddress());
+ // FIXME: handle extern 'from'.
+ } else if (&BlockToFix == &ToSymbol->getAddressable()) {
+ TargetSymbol = &*FromSymbol;
+ DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32;
+ Addend = FixupValue - (FixupAddress - ToSymbol->getAddress());
+ } else {
+ // BlockToFix was neither FromSymbol nor ToSymbol.
+ return make_error<JITLinkError>("SUBTRACTOR relocation must fix up "
+ "either 'A' or 'B' (or a symbol in one "
+ "of their alt-entry groups)");
+ }
+
+ return PairRelocInfo(DeltaKind, TargetSymbol, Addend);
+ }
+
+ Error addRelocations() override {
+ using namespace support;
+ auto &Obj = getObject();
+
+ for (auto &S : Obj.sections()) {
+
+ JITTargetAddress SectionAddress = S.getAddress();
+
+ for (auto RelItr = S.relocation_begin(), RelEnd = S.relocation_end();
+ RelItr != RelEnd; ++RelItr) {
+
+ MachO::relocation_info RI = getRelocationInfo(RelItr);
+
+ // Sanity check the relocation kind.
+ auto Kind = getRelocationKind(RI);
+ if (!Kind)
+ return Kind.takeError();
+
+ // Find the address of the value to fix up.
+ JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address;
+
+ LLVM_DEBUG({
+ dbgs() << "Processing " << getMachOARM64RelocationKindName(*Kind)
+ << " relocation at " << format("0x%016" PRIx64, FixupAddress)
+ << "\n";
+ });
+
+ // Find the block that the fixup points to.
+ Block *BlockToFix = nullptr;
+ {
+ auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress);
+ if (!SymbolToFixOrErr)
+ return SymbolToFixOrErr.takeError();
+ BlockToFix = &SymbolToFixOrErr->getBlock();
+ }
+
+ if (FixupAddress + static_cast<JITTargetAddress>(1ULL << RI.r_length) >
+ BlockToFix->getAddress() + BlockToFix->getContent().size())
+ return make_error<JITLinkError>(
+ "Relocation content extends past end of fixup block");
+
+ // Get a pointer to the fixup content.
+ const char *FixupContent = BlockToFix->getContent().data() +
+ (FixupAddress - BlockToFix->getAddress());
+
+ // The target symbol and addend will be populated by the switch below.
+ Symbol *TargetSymbol = nullptr;
+ uint64_t Addend = 0;
+
+ if (*Kind == PairedAddend) {
+ // If this is an Addend relocation then process it and move to the
+ // paired reloc.
+
+ Addend = RI.r_symbolnum;
+
+ if (RelItr == RelEnd)
+ return make_error<JITLinkError>("Unpaired Addend reloc at " +
+ formatv("{0:x16}", FixupAddress));
+ ++RelItr;
+ RI = getRelocationInfo(RelItr);
+
+ Kind = getRelocationKind(RI);
+ if (!Kind)
+ return Kind.takeError();
+
+ if (*Kind != Branch26 && *Kind != Page21 && *Kind != PageOffset12)
+ return make_error<JITLinkError>(
+ "Invalid relocation pair: Addend + " +
+ getMachOARM64RelocationKindName(*Kind));
+ else
+ LLVM_DEBUG({
+ dbgs() << " pair is " << getMachOARM64RelocationKindName(*Kind)
+ << "`\n";
+ });
+
+ // Find the address of the value to fix up.
+ JITTargetAddress PairedFixupAddress =
+ SectionAddress + (uint32_t)RI.r_address;
+ if (PairedFixupAddress != FixupAddress)
+ return make_error<JITLinkError>("Paired relocation points at "
+ "different target");
+ }
+
+ switch (*Kind) {
+ case Branch26: {
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
+ else
+ return TargetSymbolOrErr.takeError();
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if ((Instr & 0x7fffffff) != 0x14000000)
+ return make_error<JITLinkError>("BRANCH26 target is not a B or BL "
+ "instruction with a zero addend");
+ break;
+ }
+ case Pointer32:
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
+ else
+ return TargetSymbolOrErr.takeError();
+ Addend = *(const ulittle32_t *)FixupContent;
+ break;
+ case Pointer64:
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
+ else
+ return TargetSymbolOrErr.takeError();
+ Addend = *(const ulittle64_t *)FixupContent;
+ break;
+ case Pointer64Anon: {
+ JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent;
+ if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ TargetSymbol = &*TargetSymbolOrErr;
+ else
+ return TargetSymbolOrErr.takeError();
+ Addend = TargetAddress - TargetSymbol->getAddress();
+ break;
+ }
+ case Page21:
+ case GOTPage21: {
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
+ else
+ return TargetSymbolOrErr.takeError();
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if ((Instr & 0xffffffe0) != 0x90000000)
+ return make_error<JITLinkError>("PAGE21/GOTPAGE21 target is not an "
+ "ADRP instruction with a zero "
+ "addend");
+ break;
+ }
+ case PageOffset12: {
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
+ else
+ return TargetSymbolOrErr.takeError();
+ break;
+ }
+ case GOTPageOffset12: {
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
+ else
+ return TargetSymbolOrErr.takeError();
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if ((Instr & 0xfffffc00) != 0xf9400000)
+ return make_error<JITLinkError>("GOTPAGEOFF12 target is not an LDR "
+ "immediate instruction with a zero "
+ "addend");
+ break;
+ }
+ case PointerToGOT:
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
+ else
+ return TargetSymbolOrErr.takeError();
+ break;
+ case Delta32:
+ case Delta64: {
+ // We use Delta32/Delta64 to represent SUBTRACTOR relocations.
+ // parsePairRelocation handles the paired reloc, and returns the
+ // edge kind to be used (either Delta32/Delta64, or
+ // NegDelta32/NegDelta64, depending on the direction of the
+ // subtraction) along with the addend.
+ auto PairInfo =
+ parsePairRelocation(*BlockToFix, *Kind, RI, FixupAddress,
+ FixupContent, ++RelItr, RelEnd);
+ if (!PairInfo)
+ return PairInfo.takeError();
+ std::tie(*Kind, TargetSymbol, Addend) = *PairInfo;
+ assert(TargetSymbol && "No target symbol from parsePairRelocation?");
+ break;
+ }
+ default:
+ llvm_unreachable("Special relocation kind should not appear in "
+ "mach-o file");
+ }
+
+ LLVM_DEBUG({
+ Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol,
+ Addend);
+ printEdge(dbgs(), *BlockToFix, GE,
+ getMachOARM64RelocationKindName(*Kind));
+ dbgs() << "\n";
+ });
+ BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(),
+ *TargetSymbol, Addend);
+ }
+ }
+ return Error::success();
+ }
+
+ unsigned NumSymbols = 0;
+};
+
+class MachO_arm64_GOTAndStubsBuilder
+ : public BasicGOTAndStubsBuilder<MachO_arm64_GOTAndStubsBuilder> {
+public:
+ MachO_arm64_GOTAndStubsBuilder(LinkGraph &G)
+ : BasicGOTAndStubsBuilder<MachO_arm64_GOTAndStubsBuilder>(G) {}
+
+ bool isGOTEdge(Edge &E) const {
+ return E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 ||
+ E.getKind() == PointerToGOT;
+ }
+
+ Symbol &createGOTEntry(Symbol &Target) {
+ auto &GOTEntryBlock = G.createContentBlock(
+ getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0);
+ GOTEntryBlock.addEdge(Pointer64, 0, Target, 0);
+ return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false);
+ }
+
+ void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
+ if (E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12) {
+ // Update the target, but leave the edge addend as-is.
+ E.setTarget(GOTEntry);
+ } else if (E.getKind() == PointerToGOT) {
+ E.setTarget(GOTEntry);
+ E.setKind(Delta32);
+ } else
+ llvm_unreachable("Not a GOT edge?");
+ }
+
+ bool isExternalBranchEdge(Edge &E) {
+ return E.getKind() == Branch26 && !E.getTarget().isDefined();
+ }
+
+ Symbol &createStub(Symbol &Target) {
+ auto &StubContentBlock =
+ G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0);
+ // Re-use GOT entries for stub targets.
+ auto &GOTEntrySymbol = getGOTEntrySymbol(Target);
+ StubContentBlock.addEdge(LDRLiteral19, 0, GOTEntrySymbol, 0);
+ return G.addAnonymousSymbol(StubContentBlock, 0, 8, true, false);
+ }
+
+ void fixExternalBranchEdge(Edge &E, Symbol &Stub) {
+ assert(E.getKind() == Branch26 && "Not a Branch32 edge?");
+ assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?");
+ E.setTarget(Stub);
+ }
+
+private:
+ Section &getGOTSection() {
+ if (!GOTSection)
+ GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ);
+ return *GOTSection;
+ }
+
+ Section &getStubsSection() {
+ if (!StubsSection) {
+ auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
+ sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+ StubsSection = &G.createSection("$__STUBS", StubsProt);
+ }
+ return *StubsSection;
+ }
+
+ StringRef getGOTEntryBlockContent() {
+ return StringRef(reinterpret_cast<const char *>(NullGOTEntryContent),
+ sizeof(NullGOTEntryContent));
+ }
+
+ StringRef getStubBlockContent() {
+ return StringRef(reinterpret_cast<const char *>(StubContent),
+ sizeof(StubContent));
+ }
+
+ static const uint8_t NullGOTEntryContent[8];
+ static const uint8_t StubContent[8];
+ Section *GOTSection = nullptr;
+ Section *StubsSection = nullptr;
+};
+
+const uint8_t MachO_arm64_GOTAndStubsBuilder::NullGOTEntryContent[8] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+const uint8_t MachO_arm64_GOTAndStubsBuilder::StubContent[8] = {
+ 0x10, 0x00, 0x00, 0x58, // LDR x16, <literal>
+ 0x00, 0x02, 0x1f, 0xd6 // BR x16
+};
+
+} // namespace
+
+namespace llvm {
+namespace jitlink {
+
+class MachOJITLinker_arm64 : public JITLinker<MachOJITLinker_arm64> {
+ friend class JITLinker<MachOJITLinker_arm64>;
+
+public:
+ MachOJITLinker_arm64(std::unique_ptr<JITLinkContext> Ctx,
+ PassConfiguration PassConfig)
+ : JITLinker(std::move(Ctx), std::move(PassConfig)) {}
+
+private:
+ StringRef getEdgeKindName(Edge::Kind R) const override {
+ return getMachOARM64RelocationKindName(R);
+ }
+
+ Expected<std::unique_ptr<LinkGraph>>
+ buildGraph(MemoryBufferRef ObjBuffer) override {
+ auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer);
+ if (!MachOObj)
+ return MachOObj.takeError();
+ return MachOLinkGraphBuilder_arm64(**MachOObj).buildGraph();
+ }
+
+ static Error targetOutOfRangeError(const Block &B, const Edge &E) {
+ std::string ErrMsg;
+ {
+ raw_string_ostream ErrStream(ErrMsg);
+ ErrStream << "Relocation target out of range: ";
+ printEdge(ErrStream, B, E, getMachOARM64RelocationKindName(E.getKind()));
+ ErrStream << "\n";
+ }
+ return make_error<JITLinkError>(std::move(ErrMsg));
+ }
+
+ static unsigned getPageOffset12Shift(uint32_t Instr) {
+ constexpr uint32_t LDRLiteralMask = 0x3ffffc00;
+
+ // Check for a GPR LDR immediate with a zero embedded literal.
+ // If found, the top two bits contain the shift.
+ if ((Instr & LDRLiteralMask) == 0x39400000)
+ return Instr >> 30;
+
+ // Check for a Neon LDR immediate of size 64-bit or less with a zero
+ // embedded literal. If found, the top two bits contain the shift.
+ if ((Instr & LDRLiteralMask) == 0x3d400000)
+ return Instr >> 30;
+
+ // Check for a Neon LDR immediate of size 128-bit with a zero embedded
+ // literal.
+ constexpr uint32_t SizeBitsMask = 0xc0000000;
+ if ((Instr & (LDRLiteralMask | SizeBitsMask)) == 0x3dc00000)
+ return 4;
+
+ return 0;
+ }
+
+ Error applyFixup(Block &B, const Edge &E, char *BlockWorkingMem) const {
+ using namespace support;
+
+ char *FixupPtr = BlockWorkingMem + E.getOffset();
+ JITTargetAddress FixupAddress = B.getAddress() + E.getOffset();
+
+ switch (E.getKind()) {
+ case Branch26: {
+ assert((FixupAddress & 0x3) == 0 && "Branch-inst is not 32-bit aligned");
+
+ int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
+
+ if (static_cast<uint64_t>(Value) & 0x3)
+ return make_error<JITLinkError>("Branch26 target is not 32-bit "
+ "aligned");
+
+ if (Value < -(1 << 27) || Value > ((1 << 27) - 1))
+ return targetOutOfRangeError(B, E);
+
+ uint32_t RawInstr = *(little32_t *)FixupPtr;
+ assert((RawInstr & 0x7fffffff) == 0x14000000 &&
+ "RawInstr isn't a B or BR immediate instruction");
+ uint32_t Imm = (static_cast<uint32_t>(Value) & ((1 << 28) - 1)) >> 2;
+ uint32_t FixedInstr = RawInstr | Imm;
+ *(little32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case Pointer32: {
+ uint64_t Value = E.getTarget().getAddress() + E.getAddend();
+ if (Value > std::numeric_limits<uint32_t>::max())
+ return targetOutOfRangeError(B, E);
+ *(ulittle32_t *)FixupPtr = Value;
+ break;
+ }
+ case Pointer64: {
+ uint64_t Value = E.getTarget().getAddress() + E.getAddend();
+ *(ulittle64_t *)FixupPtr = Value;
+ break;
+ }
+ case Page21:
+ case GOTPage21: {
+ assert(E.getAddend() == 0 && "PAGE21/GOTPAGE21 with non-zero addend");
+ uint64_t TargetPage =
+ E.getTarget().getAddress() & ~static_cast<uint64_t>(4096 - 1);
+ uint64_t PCPage = B.getAddress() & ~static_cast<uint64_t>(4096 - 1);
+
+ int64_t PageDelta = TargetPage - PCPage;
+ if (PageDelta < -(1 << 30) || PageDelta > ((1 << 30) - 1))
+ return targetOutOfRangeError(B, E);
+
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ assert((RawInstr & 0xffffffe0) == 0x90000000 &&
+ "RawInstr isn't an ADRP instruction");
+ uint32_t ImmLo = (static_cast<uint64_t>(PageDelta) >> 12) & 0x3;
+ uint32_t ImmHi = (static_cast<uint64_t>(PageDelta) >> 14) & 0x7ffff;
+ uint32_t FixedInstr = RawInstr | (ImmLo << 29) | (ImmHi << 5);
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case PageOffset12: {
+ assert(E.getAddend() == 0 && "PAGEOFF12 with non-zero addend");
+ uint64_t TargetOffset = E.getTarget().getAddress() & 0xfff;
+
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ unsigned ImmShift = getPageOffset12Shift(RawInstr);
+
+ if (TargetOffset & ((1 << ImmShift) - 1))
+ return make_error<JITLinkError>("PAGEOFF12 target is not aligned");
+
+ uint32_t EncodedImm = (TargetOffset >> ImmShift) << 10;
+ uint32_t FixedInstr = RawInstr | EncodedImm;
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case GOTPageOffset12: {
+ assert(E.getAddend() == 0 && "GOTPAGEOF12 with non-zero addend");
+
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ assert((RawInstr & 0xfffffc00) == 0xf9400000 &&
+ "RawInstr isn't a 64-bit LDR immediate");
+
+ uint32_t TargetOffset = E.getTarget().getAddress() & 0xfff;
+ assert((TargetOffset & 0x7) == 0 && "GOT entry is not 8-byte aligned");
+ uint32_t EncodedImm = (TargetOffset >> 3) << 10;
+ uint32_t FixedInstr = RawInstr | EncodedImm;
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case LDRLiteral19: {
+ assert((FixupAddress & 0x3) == 0 && "LDR is not 32-bit aligned");
+ assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend");
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal");
+ int64_t Delta = E.getTarget().getAddress() - FixupAddress;
+ if (Delta & 0x3)
+ return make_error<JITLinkError>("LDR literal target is not 32-bit "
+ "aligned");
+ if (Delta < -(1 << 20) || Delta > ((1 << 20) - 1))
+ return targetOutOfRangeError(B, E);
+
+ uint32_t EncodedImm = (static_cast<uint32_t>(Delta) >> 2) << 5;
+ uint32_t FixedInstr = RawInstr | EncodedImm;
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case Delta32:
+ case Delta64:
+ case NegDelta32:
+ case NegDelta64: {
+ int64_t Value;
+ if (E.getKind() == Delta32 || E.getKind() == Delta64)
+ Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
+ else
+ Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
+
+ if (E.getKind() == Delta32 || E.getKind() == NegDelta32) {
+ if (Value < std::numeric_limits<int32_t>::min() ||
+ Value > std::numeric_limits<int32_t>::max())
+ return targetOutOfRangeError(B, E);
+ *(little32_t *)FixupPtr = Value;
+ } else
+ *(little64_t *)FixupPtr = Value;
+ break;
+ }
+ default:
+ llvm_unreachable("Unrecognized edge kind");
+ }
+
+ return Error::success();
+ }
+
+ uint64_t NullValue = 0;
+};
+
+void jitLink_MachO_arm64(std::unique_ptr<JITLinkContext> Ctx) {
+ PassConfiguration Config;
+ Triple TT("arm64-apple-ios");
+
+ if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+ // Add a mark-live pass.
+ if (auto MarkLive = Ctx->getMarkLivePass(TT))
+ Config.PrePrunePasses.push_back(std::move(MarkLive));
+ else
+ Config.PrePrunePasses.push_back(markAllSymbolsLive);
+
+ // Add an in-place GOT/Stubs pass.
+ Config.PostPrunePasses.push_back([](LinkGraph &G) -> Error {
+ MachO_arm64_GOTAndStubsBuilder(G).run();
+ return Error::success();
+ });
+ }
+
+ if (auto Err = Ctx->modifyPassConfig(TT, Config))
+ return Ctx->notifyFailed(std::move(Err));
+
+ // Construct a JITLinker and run the link function.
+ MachOJITLinker_arm64::link(std::move(Ctx), std::move(Config));
+}
+
+StringRef getMachOARM64RelocationKindName(Edge::Kind R) {
+ switch (R) {
+ case Branch26:
+ return "Branch26";
+ case Pointer64:
+ return "Pointer64";
+ case Pointer64Anon:
+ return "Pointer64Anon";
+ case Page21:
+ return "Page21";
+ case PageOffset12:
+ return "PageOffset12";
+ case GOTPage21:
+ return "GOTPage21";
+ case GOTPageOffset12:
+ return "GOTPageOffset12";
+ case PointerToGOT:
+ return "PointerToGOT";
+ case PairedAddend:
+ return "PairedAddend";
+ case LDRLiteral19:
+ return "LDRLiteral19";
+ case Delta32:
+ return "Delta32";
+ case Delta64:
+ return "Delta64";
+ case NegDelta32:
+ return "NegDelta32";
+ case NegDelta64:
+ return "NegDelta64";
+ default:
+ return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
+ }
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index 4010678c6d33..d83787ffd598 100644
--- a/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -13,7 +13,7 @@
#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
#include "BasicGOTAndStubsBuilder.h"
-#include "MachOAtomGraphBuilder.h"
+#include "MachOLinkGraphBuilder.h"
#define DEBUG_TYPE "jitlink"
@@ -23,16 +23,21 @@ using namespace llvm::jitlink::MachO_x86_64_Edges;
namespace {
-class MachOAtomGraphBuilder_x86_64 : public MachOAtomGraphBuilder {
+class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder {
public:
- MachOAtomGraphBuilder_x86_64(const object::MachOObjectFile &Obj)
- : MachOAtomGraphBuilder(Obj),
- NumSymbols(Obj.getSymtabLoadCommand().nsyms) {
- addCustomAtomizer("__eh_frame", [this](MachOSection &EHFrameSection) {
- return addEHFrame(getGraph(), EHFrameSection.getGenericSection(),
- EHFrameSection.getContent(),
- EHFrameSection.getAddress(), NegDelta32, Delta64);
- });
+ MachOLinkGraphBuilder_x86_64(const object::MachOObjectFile &Obj)
+ : MachOLinkGraphBuilder(Obj) {
+ addCustomSectionParser(
+ "__eh_frame", [this](NormalizedSection &EHFrameSection) {
+ if (!EHFrameSection.Data)
+ return make_error<JITLinkError>(
+ "__eh_frame section is marked zero-fill");
+ return MachOEHFrameBinaryParser(
+ *this, EHFrameSection.Address,
+ StringRef(EHFrameSection.Data, EHFrameSection.Size),
+ *EHFrameSection.GraphSection, 8, 4, NegDelta32, Delta64)
+ .addToGraph();
+ });
}
private:
@@ -40,8 +45,12 @@ private:
getRelocationKind(const MachO::relocation_info &RI) {
switch (RI.r_type) {
case MachO::X86_64_RELOC_UNSIGNED:
- if (!RI.r_pcrel && RI.r_length == 3)
- return RI.r_extern ? Pointer64 : Pointer64Anon;
+ if (!RI.r_pcrel) {
+ if (RI.r_length == 3)
+ return RI.r_extern ? Pointer64 : Pointer64Anon;
+ else if (RI.r_extern && RI.r_length == 2)
+ return Pointer32;
+ }
break;
case MachO::X86_64_RELOC_SIGNED:
if (RI.r_pcrel && RI.r_length == 2)
@@ -94,21 +103,10 @@ private:
", symbolnum=" + formatv("{0:x6}", RI.r_symbolnum) +
", kind=" + formatv("{0:x1}", RI.r_type) +
", pc_rel=" + (RI.r_pcrel ? "true" : "false") +
- ", extern= " + (RI.r_extern ? "true" : "false") +
+ ", extern=" + (RI.r_extern ? "true" : "false") +
", length=" + formatv("{0:d}", RI.r_length));
}
- Expected<Atom &> findAtomBySymbolIndex(const MachO::relocation_info &RI) {
- auto &Obj = getObject();
- if (RI.r_symbolnum >= NumSymbols)
- return make_error<JITLinkError>("Symbol index out of range");
- auto SymI = Obj.getSymbolByIndex(RI.r_symbolnum);
- auto Name = SymI->getName();
- if (!Name)
- return Name.takeError();
- return getGraph().getAtomByName(*Name);
- }
-
MachO::relocation_info
getRelocationInfo(const object::relocation_iterator RelItr) {
MachO::any_relocation_info ARI =
@@ -118,12 +116,12 @@ private:
return RI;
}
- using PairRelocInfo = std::tuple<MachOX86RelocationKind, Atom *, uint64_t>;
+ using PairRelocInfo = std::tuple<MachOX86RelocationKind, Symbol *, uint64_t>;
// Parses paired SUBTRACTOR/UNSIGNED relocations and, on success,
// returns the edge kind and addend to be used.
Expected<PairRelocInfo>
- parsePairRelocation(DefinedAtom &AtomToFix, Edge::Kind SubtractorKind,
+ parsePairRelocation(Block &BlockToFix, Edge::Kind SubtractorKind,
const MachO::relocation_info &SubRI,
JITTargetAddress FixupAddress, const char *FixupContent,
object::relocation_iterator &UnsignedRelItr,
@@ -150,9 +148,11 @@ private:
return make_error<JITLinkError>("length of x86_64 SUBTRACTOR and paired "
"UNSIGNED reloc must match");
- auto FromAtom = findAtomBySymbolIndex(SubRI);
- if (!FromAtom)
- return FromAtom.takeError();
+ Symbol *FromSymbol;
+ if (auto FromSymbolOrErr = findSymbolByIndex(SubRI.r_symbolnum))
+ FromSymbol = FromSymbolOrErr->GraphSymbol;
+ else
+ return FromSymbolOrErr.takeError();
// Read the current fixup value.
uint64_t FixupValue = 0;
@@ -161,54 +161,60 @@ private:
else
FixupValue = *(const little32_t *)FixupContent;
- // Find 'ToAtom' using symbol number or address, depending on whether the
+ // Find 'ToSymbol' using symbol number or address, depending on whether the
// paired UNSIGNED relocation is extern.
- Atom *ToAtom = nullptr;
+ Symbol *ToSymbol = nullptr;
if (UnsignedRI.r_extern) {
- // Find target atom by symbol index.
- if (auto ToAtomOrErr = findAtomBySymbolIndex(UnsignedRI))
- ToAtom = &*ToAtomOrErr;
+ // Find target symbol by symbol index.
+ if (auto ToSymbolOrErr = findSymbolByIndex(UnsignedRI.r_symbolnum))
+ ToSymbol = ToSymbolOrErr->GraphSymbol;
else
- return ToAtomOrErr.takeError();
+ return ToSymbolOrErr.takeError();
} else {
- if (auto ToAtomOrErr = getGraph().findAtomByAddress(FixupValue))
- ToAtom = &*ToAtomOrErr;
+ if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue))
+ ToSymbol = &*ToSymbolOrErr;
else
- return ToAtomOrErr.takeError();
- FixupValue -= ToAtom->getAddress();
+ return ToSymbolOrErr.takeError();
+ FixupValue -= ToSymbol->getAddress();
}
MachOX86RelocationKind DeltaKind;
- Atom *TargetAtom;
+ Symbol *TargetSymbol;
uint64_t Addend;
- if (areLayoutLocked(AtomToFix, *FromAtom)) {
- TargetAtom = ToAtom;
+ if (&BlockToFix == &FromSymbol->getAddressable()) {
+ TargetSymbol = ToSymbol;
DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32;
- Addend = FixupValue + (FixupAddress - FromAtom->getAddress());
+ Addend = FixupValue + (FixupAddress - FromSymbol->getAddress());
// FIXME: handle extern 'from'.
- } else if (areLayoutLocked(AtomToFix, *ToAtom)) {
- TargetAtom = &*FromAtom;
+ } else if (&BlockToFix == &ToSymbol->getAddressable()) {
+ TargetSymbol = FromSymbol;
DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32;
- Addend = FixupValue - (FixupAddress - ToAtom->getAddress());
+ Addend = FixupValue - (FixupAddress - ToSymbol->getAddress());
} else {
- // AtomToFix was neither FromAtom nor ToAtom.
+ // BlockToFix was neither FromSymbol nor ToSymbol.
return make_error<JITLinkError>("SUBTRACTOR relocation must fix up "
- "either 'A' or 'B' (or an atom in one "
- "of their alt-entry groups)");
+ "either 'A' or 'B' (or a symbol in one "
+ "of their alt-entry chains)");
}
- return PairRelocInfo(DeltaKind, TargetAtom, Addend);
+ return PairRelocInfo(DeltaKind, TargetSymbol, Addend);
}
Error addRelocations() override {
using namespace support;
- auto &G = getGraph();
auto &Obj = getObject();
for (auto &S : Obj.sections()) {
JITTargetAddress SectionAddress = S.getAddress();
+ if (S.isVirtual()) {
+ if (S.relocation_begin() != S.relocation_end())
+ return make_error<JITLinkError>("Virtual section contains "
+ "relocations");
+ continue;
+ }
+
for (auto RelItr = S.relocation_begin(), RelEnd = S.relocation_end();
RelItr != RelEnd; ++RelItr) {
@@ -227,26 +233,26 @@ private:
<< format("0x%016" PRIx64, FixupAddress) << "\n";
});
- // Find the atom that the fixup points to.
- DefinedAtom *AtomToFix = nullptr;
+ // Find the block that the fixup points to.
+ Block *BlockToFix = nullptr;
{
- auto AtomToFixOrErr = G.findAtomByAddress(FixupAddress);
- if (!AtomToFixOrErr)
- return AtomToFixOrErr.takeError();
- AtomToFix = &*AtomToFixOrErr;
+ auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress);
+ if (!SymbolToFixOrErr)
+ return SymbolToFixOrErr.takeError();
+ BlockToFix = &SymbolToFixOrErr->getBlock();
}
if (FixupAddress + static_cast<JITTargetAddress>(1ULL << RI.r_length) >
- AtomToFix->getAddress() + AtomToFix->getContent().size())
+ BlockToFix->getAddress() + BlockToFix->getContent().size())
return make_error<JITLinkError>(
- "Relocation content extends past end of fixup atom");
+ "Relocation extends past end of fixup block");
// Get a pointer to the fixup content.
- const char *FixupContent = AtomToFix->getContent().data() +
- (FixupAddress - AtomToFix->getAddress());
+ const char *FixupContent = BlockToFix->getContent().data() +
+ (FixupAddress - BlockToFix->getAddress());
- // The target atom and addend will be populated by the switch below.
- Atom *TargetAtom = nullptr;
+ // The target symbol and addend will be populated by the switch below.
+ Symbol *TargetSymbol = nullptr;
uint64_t Addend = 0;
switch (*Kind) {
@@ -254,46 +260,53 @@ private:
case PCRel32:
case PCRel32GOTLoad:
case PCRel32GOT:
- if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI))
- TargetAtom = &*TargetAtomOrErr;
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
+ else
+ return TargetSymbolOrErr.takeError();
+ Addend = *(const ulittle32_t *)FixupContent;
+ break;
+ case Pointer32:
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
- return TargetAtomOrErr.takeError();
+ return TargetSymbolOrErr.takeError();
Addend = *(const ulittle32_t *)FixupContent;
break;
case Pointer64:
- if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI))
- TargetAtom = &*TargetAtomOrErr;
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
- return TargetAtomOrErr.takeError();
+ return TargetSymbolOrErr.takeError();
Addend = *(const ulittle64_t *)FixupContent;
break;
case Pointer64Anon: {
JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent;
- if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress))
- TargetAtom = &*TargetAtomOrErr;
+ if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ TargetSymbol = &*TargetSymbolOrErr;
else
- return TargetAtomOrErr.takeError();
- Addend = TargetAddress - TargetAtom->getAddress();
+ return TargetSymbolOrErr.takeError();
+ Addend = TargetAddress - TargetSymbol->getAddress();
break;
}
case PCRel32Minus1:
case PCRel32Minus2:
case PCRel32Minus4:
- if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI))
- TargetAtom = &*TargetAtomOrErr;
+ if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+ TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
- return TargetAtomOrErr.takeError();
+ return TargetSymbolOrErr.takeError();
Addend = *(const ulittle32_t *)FixupContent +
(1 << (*Kind - PCRel32Minus1));
break;
case PCRel32Anon: {
JITTargetAddress TargetAddress =
FixupAddress + 4 + *(const ulittle32_t *)FixupContent;
- if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress))
- TargetAtom = &*TargetAtomOrErr;
+ if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ TargetSymbol = &*TargetSymbolOrErr;
else
- return TargetAtomOrErr.takeError();
- Addend = TargetAddress - TargetAtom->getAddress();
+ return TargetSymbolOrErr.takeError();
+ Addend = TargetAddress - TargetSymbol->getAddress();
break;
}
case PCRel32Minus1Anon:
@@ -303,11 +316,11 @@ private:
static_cast<JITTargetAddress>(1ULL << (*Kind - PCRel32Minus1Anon));
JITTargetAddress TargetAddress =
FixupAddress + 4 + Delta + *(const ulittle32_t *)FixupContent;
- if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress))
- TargetAtom = &*TargetAtomOrErr;
+ if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ TargetSymbol = &*TargetSymbolOrErr;
else
- return TargetAtomOrErr.takeError();
- Addend = TargetAddress - TargetAtom->getAddress();
+ return TargetSymbolOrErr.takeError();
+ Addend = TargetAddress - TargetSymbol->getAddress();
break;
}
case Delta32:
@@ -318,12 +331,12 @@ private:
// NegDelta32/NegDelta64, depending on the direction of the
// subtraction) along with the addend.
auto PairInfo =
- parsePairRelocation(*AtomToFix, *Kind, RI, FixupAddress,
+ parsePairRelocation(*BlockToFix, *Kind, RI, FixupAddress,
FixupContent, ++RelItr, RelEnd);
if (!PairInfo)
return PairInfo.takeError();
- std::tie(*Kind, TargetAtom, Addend) = *PairInfo;
- assert(TargetAtom && "No target atom from parsePairRelocation?");
+ std::tie(*Kind, TargetSymbol, Addend) = *PairInfo;
+ assert(TargetSymbol && "No target symbol from parsePairRelocation?");
break;
}
default:
@@ -332,41 +345,38 @@ private:
}
LLVM_DEBUG({
- Edge GE(*Kind, FixupAddress - AtomToFix->getAddress(), *TargetAtom,
+ Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol,
Addend);
- printEdge(dbgs(), *AtomToFix, GE,
+ printEdge(dbgs(), *BlockToFix, GE,
getMachOX86RelocationKindName(*Kind));
dbgs() << "\n";
});
- AtomToFix->addEdge(*Kind, FixupAddress - AtomToFix->getAddress(),
- *TargetAtom, Addend);
+ BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(),
+ *TargetSymbol, Addend);
}
}
return Error::success();
}
-
- unsigned NumSymbols = 0;
};
class MachO_x86_64_GOTAndStubsBuilder
: public BasicGOTAndStubsBuilder<MachO_x86_64_GOTAndStubsBuilder> {
public:
- MachO_x86_64_GOTAndStubsBuilder(AtomGraph &G)
+ MachO_x86_64_GOTAndStubsBuilder(LinkGraph &G)
: BasicGOTAndStubsBuilder<MachO_x86_64_GOTAndStubsBuilder>(G) {}
bool isGOTEdge(Edge &E) const {
return E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad;
}
- DefinedAtom &createGOTEntry(Atom &Target) {
- auto &GOTEntryAtom = G.addAnonymousAtom(getGOTSection(), 0x0, 8);
- GOTEntryAtom.setContent(
- StringRef(reinterpret_cast<const char *>(NullGOTEntryContent), 8));
- GOTEntryAtom.addEdge(Pointer64, 0, Target, 0);
- return GOTEntryAtom;
+ Symbol &createGOTEntry(Symbol &Target) {
+ auto &GOTEntryBlock = G.createContentBlock(
+ getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0);
+ GOTEntryBlock.addEdge(Pointer64, 0, Target, 0);
+ return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false);
}
- void fixGOTEdge(Edge &E, Atom &GOTEntry) {
+ void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
assert((E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad) &&
"Not a GOT edge?");
E.setKind(PCRel32);
@@ -378,19 +388,16 @@ public:
return E.getKind() == Branch32 && !E.getTarget().isDefined();
}
- DefinedAtom &createStub(Atom &Target) {
- auto &StubAtom = G.addAnonymousAtom(getStubsSection(), 0x0, 2);
- StubAtom.setContent(
- StringRef(reinterpret_cast<const char *>(StubContent), 6));
-
+ Symbol &createStub(Symbol &Target) {
+ auto &StubContentBlock =
+ G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0);
// Re-use GOT entries for stub targets.
- auto &GOTEntryAtom = getGOTEntryAtom(Target);
- StubAtom.addEdge(PCRel32, 2, GOTEntryAtom, 0);
-
- return StubAtom;
+ auto &GOTEntrySymbol = getGOTEntrySymbol(Target);
+ StubContentBlock.addEdge(PCRel32, 2, GOTEntrySymbol, 0);
+ return G.addAnonymousSymbol(StubContentBlock, 0, 6, true, false);
}
- void fixExternalBranchEdge(Edge &E, Atom &Stub) {
+ void fixExternalBranchEdge(Edge &E, Symbol &Stub) {
assert(E.getKind() == Branch32 && "Not a Branch32 edge?");
assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?");
E.setTarget(Stub);
@@ -399,7 +406,7 @@ public:
private:
Section &getGOTSection() {
if (!GOTSection)
- GOTSection = &G.createSection("$__GOT", 8, sys::Memory::MF_READ, false);
+ GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ);
return *GOTSection;
}
@@ -407,11 +414,21 @@ private:
if (!StubsSection) {
auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
sys::Memory::MF_READ | sys::Memory::MF_EXEC);
- StubsSection = &G.createSection("$__STUBS", 8, StubsProt, false);
+ StubsSection = &G.createSection("$__STUBS", StubsProt);
}
return *StubsSection;
}
+ StringRef getGOTEntryBlockContent() {
+ return StringRef(reinterpret_cast<const char *>(NullGOTEntryContent),
+ sizeof(NullGOTEntryContent));
+ }
+
+ StringRef getStubBlockContent() {
+ return StringRef(reinterpret_cast<const char *>(StubContent),
+ sizeof(StubContent));
+ }
+
static const uint8_t NullGOTEntryContent[8];
static const uint8_t StubContent[6];
Section *GOTSection = nullptr;
@@ -440,30 +457,31 @@ private:
return getMachOX86RelocationKindName(R);
}
- Expected<std::unique_ptr<AtomGraph>>
+ Expected<std::unique_ptr<LinkGraph>>
buildGraph(MemoryBufferRef ObjBuffer) override {
auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer);
if (!MachOObj)
return MachOObj.takeError();
- return MachOAtomGraphBuilder_x86_64(**MachOObj).buildGraph();
+ return MachOLinkGraphBuilder_x86_64(**MachOObj).buildGraph();
}
- static Error targetOutOfRangeError(const Atom &A, const Edge &E) {
+ static Error targetOutOfRangeError(const Block &B, const Edge &E) {
std::string ErrMsg;
{
raw_string_ostream ErrStream(ErrMsg);
ErrStream << "Relocation target out of range: ";
- printEdge(ErrStream, A, E, getMachOX86RelocationKindName(E.getKind()));
+ printEdge(ErrStream, B, E, getMachOX86RelocationKindName(E.getKind()));
ErrStream << "\n";
}
return make_error<JITLinkError>(std::move(ErrMsg));
}
- Error applyFixup(DefinedAtom &A, const Edge &E, char *AtomWorkingMem) const {
+ Error applyFixup(Block &B, const Edge &E, char *BlockWorkingMem) const {
+
using namespace support;
- char *FixupPtr = AtomWorkingMem + E.getOffset();
- JITTargetAddress FixupAddress = A.getAddress() + E.getOffset();
+ char *FixupPtr = BlockWorkingMem + E.getOffset();
+ JITTargetAddress FixupAddress = B.getAddress() + E.getOffset();
switch (E.getKind()) {
case Branch32:
@@ -473,7 +491,7 @@ private:
E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend();
if (Value < std::numeric_limits<int32_t>::min() ||
Value > std::numeric_limits<int32_t>::max())
- return targetOutOfRangeError(A, E);
+ return targetOutOfRangeError(B, E);
*(little32_t *)FixupPtr = Value;
break;
}
@@ -491,7 +509,7 @@ private:
E.getTarget().getAddress() - (FixupAddress + Delta) + E.getAddend();
if (Value < std::numeric_limits<int32_t>::min() ||
Value > std::numeric_limits<int32_t>::max())
- return targetOutOfRangeError(A, E);
+ return targetOutOfRangeError(B, E);
*(little32_t *)FixupPtr = Value;
break;
}
@@ -503,7 +521,7 @@ private:
E.getTarget().getAddress() - (FixupAddress + Delta) + E.getAddend();
if (Value < std::numeric_limits<int32_t>::min() ||
Value > std::numeric_limits<int32_t>::max())
- return targetOutOfRangeError(A, E);
+ return targetOutOfRangeError(B, E);
*(little32_t *)FixupPtr = Value;
break;
}
@@ -520,12 +538,19 @@ private:
if (E.getKind() == Delta32 || E.getKind() == NegDelta32) {
if (Value < std::numeric_limits<int32_t>::min() ||
Value > std::numeric_limits<int32_t>::max())
- return targetOutOfRangeError(A, E);
+ return targetOutOfRangeError(B, E);
*(little32_t *)FixupPtr = Value;
} else
*(little64_t *)FixupPtr = Value;
break;
}
+ case Pointer32: {
+ uint64_t Value = E.getTarget().getAddress() + E.getAddend();
+ if (Value > std::numeric_limits<uint32_t>::max())
+ return targetOutOfRangeError(B, E);
+ *(ulittle32_t *)FixupPtr = Value;
+ break;
+ }
default:
llvm_unreachable("Unrecognized edge kind");
}
@@ -545,10 +570,10 @@ void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx) {
if (auto MarkLive = Ctx->getMarkLivePass(TT))
Config.PrePrunePasses.push_back(std::move(MarkLive));
else
- Config.PrePrunePasses.push_back(markAllAtomsLive);
+ Config.PrePrunePasses.push_back(markAllSymbolsLive);
// Add an in-place GOT/Stubs pass.
- Config.PostPrunePasses.push_back([](AtomGraph &G) -> Error {
+ Config.PostPrunePasses.push_back([](LinkGraph &G) -> Error {
MachO_x86_64_GOTAndStubsBuilder(G).run();
return Error::success();
});
@@ -565,6 +590,8 @@ StringRef getMachOX86RelocationKindName(Edge::Kind R) {
switch (R) {
case Branch32:
return "Branch32";
+ case Pointer32:
+ return "Pointer32";
case Pointer64:
return "Pointer64";
case Pointer64Anon:
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 08815b7a80ae..94741f5f01d5 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -23,7 +23,7 @@
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/MutexGuard.h"
+#include <mutex>
using namespace llvm;
@@ -88,7 +88,7 @@ MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> TM,
}
MCJIT::~MCJIT() {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
Dyld.deregisterEHFrames();
@@ -100,7 +100,7 @@ MCJIT::~MCJIT() {
}
void MCJIT::addModule(std::unique_ptr<Module> M) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
if (M->getDataLayout().isDefault())
M->setDataLayout(getDataLayout());
@@ -109,7 +109,7 @@ void MCJIT::addModule(std::unique_ptr<Module> M) {
}
bool MCJIT::removeModule(Module *M) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
return OwnedModules.removeModule(M);
}
@@ -136,14 +136,14 @@ void MCJIT::addArchive(object::OwningBinary<object::Archive> A) {
}
void MCJIT::setObjectCache(ObjectCache* NewCache) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
ObjCache = NewCache;
}
std::unique_ptr<MemoryBuffer> MCJIT::emitObject(Module *M) {
assert(M && "Can not emit a null module");
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
// Materialize all globals in the module if they have not been
// materialized already.
@@ -185,7 +185,7 @@ std::unique_ptr<MemoryBuffer> MCJIT::emitObject(Module *M) {
void MCJIT::generateCodeForModule(Module *M) {
// Get a thread lock to make sure we aren't trying to load multiple times
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
// This must be a module which has already been added to this MCJIT instance.
assert(OwnedModules.ownsModule(M) &&
@@ -234,7 +234,7 @@ void MCJIT::generateCodeForModule(Module *M) {
}
void MCJIT::finalizeLoadedModules() {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
// Resolve any outstanding relocations.
Dyld.resolveRelocations();
@@ -250,7 +250,7 @@ void MCJIT::finalizeLoadedModules() {
// FIXME: Rename this.
void MCJIT::finalizeObject() {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
// Generate code for module is going to move objects out of the 'added' list,
// so we need to copy that out before using it:
@@ -265,7 +265,7 @@ void MCJIT::finalizeObject() {
}
void MCJIT::finalizeModule(Module *M) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
// This must be a module which has already been added to this MCJIT instance.
assert(OwnedModules.ownsModule(M) && "MCJIT::finalizeModule: Unknown module.");
@@ -292,7 +292,7 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name,
if (DemangledName[0] == getDataLayout().getGlobalPrefix())
DemangledName = DemangledName.substr(1);
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
// If it hasn't already been generated, see if it's in one of our modules.
for (ModulePtrSet::iterator I = OwnedModules.begin_added(),
@@ -332,7 +332,7 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
JITSymbol MCJIT::findSymbol(const std::string &Name,
bool CheckFunctionsOnly) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
// First, check to see if we already have this symbol.
if (auto Sym = findExistingSymbol(Name))
@@ -388,7 +388,7 @@ JITSymbol MCJIT::findSymbol(const std::string &Name,
}
uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
uint64_t Result = getSymbolAddress(Name, false);
if (Result != 0)
finalizeLoadedModules();
@@ -396,7 +396,7 @@ uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
}
uint64_t MCJIT::getFunctionAddress(const std::string &Name) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
uint64_t Result = getSymbolAddress(Name, true);
if (Result != 0)
finalizeLoadedModules();
@@ -405,7 +405,7 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) {
// Deprecated. Use getFunctionAddress instead.
void *MCJIT::getPointerToFunction(Function *F) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
Mangler Mang;
SmallString<128> Name;
@@ -632,14 +632,14 @@ void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) {
void MCJIT::RegisterJITEventListener(JITEventListener *L) {
if (!L)
return;
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
EventListeners.push_back(L);
}
void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
if (!L)
return;
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
auto I = find(reverse(EventListeners), L);
if (I != EventListeners.rend()) {
std::swap(*I, EventListeners.back());
@@ -651,7 +651,7 @@ void MCJIT::notifyObjectLoaded(const object::ObjectFile &Obj,
const RuntimeDyld::LoadedObjectInfo &L) {
uint64_t Key =
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Obj.getData().data()));
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
MemMgr->notifyObjectLoaded(this, Obj);
for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
EventListeners[I]->notifyObjectLoaded(Key, Obj, L);
@@ -661,7 +661,7 @@ void MCJIT::notifyObjectLoaded(const object::ObjectFile &Obj,
void MCJIT::notifyFreeingObject(const object::ObjectFile &Obj) {
uint64_t Key =
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Obj.getData().data()));
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
for (JITEventListener *L : EventListeners)
L->notifyFreeingObject(Key);
}
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 2ad9d24555f3..bb5d96051da9 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -177,7 +177,7 @@ void OProfileJITEventListener::notifyFreeingObject(ObjectKey Key) {
namespace llvm {
JITEventListener *JITEventListener::createOProfileJITEventListener() {
- return new OProfileJITEventListener(llvm::make_unique<OProfileWrapper>());
+ return new OProfileJITEventListener(std::make_unique<OProfileWrapper>());
}
} // namespace llvm
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
index 1a2667736926..b78d2531382d 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -17,11 +17,11 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/Mutex.h"
-#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/raw_ostream.h"
#include <cstring>
#include <dirent.h>
#include <fcntl.h>
+#include <mutex>
#include <stddef.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -54,7 +54,7 @@ bool OProfileWrapper::initialize() {
using namespace llvm;
using namespace llvm::sys;
- MutexGuard Guard(OProfileInitializationMutex);
+ std::lock_guard<sys::Mutex> Guard(OProfileInitializationMutex);
if (Initialized)
return OpenAgentFunc != 0;
diff --git a/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index 99bf53bc3afa..75ddbc30445d 100644
--- a/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -54,11 +54,12 @@ static ThreadSafeModule extractSubModule(ThreadSafeModule &TSM,
llvm_unreachable("Unsupported global type");
};
- auto NewTSMod = cloneToNewContext(TSM, ShouldExtract, DeleteExtractedDefs);
- auto &M = *NewTSMod.getModule();
- M.setModuleIdentifier((M.getModuleIdentifier() + Suffix).str());
+ auto NewTSM = cloneToNewContext(TSM, ShouldExtract, DeleteExtractedDefs);
+ NewTSM.withModuleDo([&](Module &M) {
+ M.setModuleIdentifier((M.getModuleIdentifier() + Suffix).str());
+ });
- return NewTSMod;
+ return NewTSM;
}
namespace llvm {
@@ -117,39 +118,44 @@ void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) {
this->Partition = std::move(Partition);
}
+void CompileOnDemandLayer::setImplMap(ImplSymbolMap *Imp) {
+ this->AliaseeImpls = Imp;
+}
void CompileOnDemandLayer::emit(MaterializationResponsibility R,
ThreadSafeModule TSM) {
- assert(TSM.getModule() && "Null module");
+ assert(TSM && "Null module");
auto &ES = getExecutionSession();
- auto &M = *TSM.getModule();
-
- // First, do some cleanup on the module:
- cleanUpModule(M);
- // Now sort the callables and non-callables, build re-exports and lodge the
+ // Sort the callables and non-callables, build re-exports and lodge the
// actual module with the implementation dylib.
auto &PDR = getPerDylibResources(R.getTargetJITDylib());
- MangleAndInterner Mangle(ES, M.getDataLayout());
SymbolAliasMap NonCallables;
SymbolAliasMap Callables;
- for (auto &GV : M.global_values()) {
- if (GV.isDeclaration() || GV.hasLocalLinkage() || GV.hasAppendingLinkage())
- continue;
-
- auto Name = Mangle(GV.getName());
- auto Flags = JITSymbolFlags::fromGlobalValue(GV);
- if (Flags.isCallable())
- Callables[Name] = SymbolAliasMapEntry(Name, Flags);
- else
- NonCallables[Name] = SymbolAliasMapEntry(Name, Flags);
- }
+ TSM.withModuleDo([&](Module &M) {
+ // First, do some cleanup on the module:
+ cleanUpModule(M);
+
+ MangleAndInterner Mangle(ES, M.getDataLayout());
+ for (auto &GV : M.global_values()) {
+ if (GV.isDeclaration() || GV.hasLocalLinkage() ||
+ GV.hasAppendingLinkage())
+ continue;
+
+ auto Name = Mangle(GV.getName());
+ auto Flags = JITSymbolFlags::fromGlobalValue(GV);
+ if (Flags.isCallable())
+ Callables[Name] = SymbolAliasMapEntry(Name, Flags);
+ else
+ NonCallables[Name] = SymbolAliasMapEntry(Name, Flags);
+ }
+ });
// Create a partitioning materialization unit and lodge it with the
// implementation dylib.
if (auto Err = PDR.getImplDylib().define(
- llvm::make_unique<PartitioningIRMaterializationUnit>(
+ std::make_unique<PartitioningIRMaterializationUnit>(
ES, std::move(TSM), R.getVModuleKey(), *this))) {
ES.reportError(std::move(Err));
R.failMaterialization();
@@ -158,7 +164,7 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R,
R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), true));
R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(),
- std::move(Callables)));
+ std::move(Callables), AliaseeImpls));
}
CompileOnDemandLayer::PerDylibResources &
@@ -239,14 +245,16 @@ void CompileOnDemandLayer::emitPartition(
// memory manager instance to the linking layer.
auto &ES = getExecutionSession();
-
GlobalValueSet RequestedGVs;
for (auto &Name : R.getRequestedSymbols()) {
assert(Defs.count(Name) && "No definition for symbol");
RequestedGVs.insert(Defs[Name]);
}
- auto GVsToExtract = Partition(RequestedGVs);
+ /// Perform partitioning with the context lock held, since the partition
+ /// function is allowed to access the globals to compute the partition.
+ auto GVsToExtract =
+ TSM.withModuleDo([&](Module &M) { return Partition(RequestedGVs); });
// Take a 'None' partition to mean the whole module (as opposed to an empty
// partition, which means "materialize nothing"). Emit the whole module
@@ -259,43 +267,52 @@ void CompileOnDemandLayer::emitPartition(
// If the partition is empty, return the whole module to the symbol table.
if (GVsToExtract->empty()) {
- R.replace(llvm::make_unique<PartitioningIRMaterializationUnit>(
+ R.replace(std::make_unique<PartitioningIRMaterializationUnit>(
std::move(TSM), R.getSymbols(), std::move(Defs), *this));
return;
}
// Ok -- we actually need to partition the symbols. Promote the symbol
- // linkages/names.
- // FIXME: We apply this once per partitioning. It's safe, but overkill.
- {
- auto PromotedGlobals = PromoteSymbols(*TSM.getModule());
- if (!PromotedGlobals.empty()) {
- MangleAndInterner Mangle(ES, TSM.getModule()->getDataLayout());
- SymbolFlagsMap SymbolFlags;
- for (auto &GV : PromotedGlobals)
- SymbolFlags[Mangle(GV->getName())] =
- JITSymbolFlags::fromGlobalValue(*GV);
- if (auto Err = R.defineMaterializing(SymbolFlags)) {
- ES.reportError(std::move(Err));
- R.failMaterialization();
- return;
- }
- }
+ // linkages/names, expand the partition to include any required symbols
+ // (i.e. symbols that can't be separated from our partition), and
+ // then extract the partition.
+ //
+ // FIXME: We apply this promotion once per partitioning. It's safe, but
+ // overkill.
+
+ auto ExtractedTSM =
+ TSM.withModuleDo([&](Module &M) -> Expected<ThreadSafeModule> {
+ auto PromotedGlobals = PromoteSymbols(M);
+ if (!PromotedGlobals.empty()) {
+ MangleAndInterner Mangle(ES, M.getDataLayout());
+ SymbolFlagsMap SymbolFlags;
+ for (auto &GV : PromotedGlobals)
+ SymbolFlags[Mangle(GV->getName())] =
+ JITSymbolFlags::fromGlobalValue(*GV);
+ if (auto Err = R.defineMaterializing(SymbolFlags))
+ return std::move(Err);
+ }
+
+ expandPartition(*GVsToExtract);
+
+ // Extract the requested partiton (plus any necessary aliases) and
+ // put the rest back into the impl dylib.
+ auto ShouldExtract = [&](const GlobalValue &GV) -> bool {
+ return GVsToExtract->count(&GV);
+ };
+
+ return extractSubModule(TSM, ".submodule", ShouldExtract);
+ });
+
+ if (!ExtractedTSM) {
+ ES.reportError(ExtractedTSM.takeError());
+ R.failMaterialization();
+ return;
}
- expandPartition(*GVsToExtract);
-
- // Extract the requested partiton (plus any necessary aliases) and
- // put the rest back into the impl dylib.
- auto ShouldExtract = [&](const GlobalValue &GV) -> bool {
- return GVsToExtract->count(&GV);
- };
-
- auto ExtractedTSM = extractSubModule(TSM, ".submodule", ShouldExtract);
- R.replace(llvm::make_unique<PartitioningIRMaterializationUnit>(
+ R.replace(std::make_unique<PartitioningIRMaterializationUnit>(
ES, std::move(TSM), R.getVModuleKey(), *this));
-
- BaseLayer.emit(std::move(R), std::move(ExtractedTSM));
+ BaseLayer.emit(std::move(R), std::move(*ExtractedTSM));
}
} // end namespace orc
diff --git a/lib/ExecutionEngine/Orc/CompileUtils.cpp b/lib/ExecutionEngine/Orc/CompileUtils.cpp
index d46b6fcf9a5f..f8251627a4ef 100644
--- a/lib/ExecutionEngine/Orc/CompileUtils.cpp
+++ b/lib/ExecutionEngine/Orc/CompileUtils.cpp
@@ -42,7 +42,7 @@ SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) {
PM.run(M);
}
- auto ObjBuffer = llvm::make_unique<SmallVectorMemoryBuffer>(
+ auto ObjBuffer = std::make_unique<SmallVectorMemoryBuffer>(
std::move(ObjBufferSV),
"<in memory object compiled from " + M.getModuleIdentifier() + ">");
diff --git a/lib/ExecutionEngine/Orc/Core.cpp b/lib/ExecutionEngine/Orc/Core.cpp
index dac37e030e0c..5c7d888c2d6e 100644
--- a/lib/ExecutionEngine/Orc/Core.cpp
+++ b/lib/ExecutionEngine/Orc/Core.cpp
@@ -151,6 +151,8 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) {
}
raw_ostream &operator<<(raw_ostream &OS, const JITSymbolFlags &Flags) {
+ if (Flags.hasError())
+ OS << "[*ERROR*]";
if (Flags.isCallable())
OS << "[Callable]";
else
@@ -224,7 +226,7 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases) {
for (auto &KV : Aliases)
OS << " " << *KV.first << ": " << KV.second.Aliasee << " "
<< KV.second.AliasFlags;
- OS << " }\n";
+ OS << " }";
return OS;
}
@@ -238,15 +240,18 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S) {
return OS << "Materializing";
case SymbolState::Resolved:
return OS << "Resolved";
+ case SymbolState::Emitted:
+ return OS << "Emitted";
case SymbolState::Ready:
return OS << "Ready";
}
llvm_unreachable("Invalid state");
}
-FailedToMaterialize::FailedToMaterialize(SymbolNameSet Symbols)
+FailedToMaterialize::FailedToMaterialize(
+ std::shared_ptr<SymbolDependenceMap> Symbols)
: Symbols(std::move(Symbols)) {
- assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
+ assert(!this->Symbols->empty() && "Can not fail to resolve an empty set");
}
std::error_code FailedToMaterialize::convertToErrorCode() const {
@@ -254,7 +259,7 @@ std::error_code FailedToMaterialize::convertToErrorCode() const {
}
void FailedToMaterialize::log(raw_ostream &OS) const {
- OS << "Failed to materialize symbols: " << Symbols;
+ OS << "Failed to materialize symbols: " << *Symbols;
}
SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols)
@@ -367,35 +372,35 @@ SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const {
return JD.getRequestedSymbols(SymbolFlags);
}
-void MaterializationResponsibility::notifyResolved(const SymbolMap &Symbols) {
+Error MaterializationResponsibility::notifyResolved(const SymbolMap &Symbols) {
LLVM_DEBUG({
dbgs() << "In " << JD.getName() << " resolving " << Symbols << "\n";
});
#ifndef NDEBUG
for (auto &KV : Symbols) {
+ auto WeakFlags = JITSymbolFlags::Weak | JITSymbolFlags::Common;
auto I = SymbolFlags.find(KV.first);
assert(I != SymbolFlags.end() &&
"Resolving symbol outside this responsibility set");
- if (I->second.isWeak())
- assert(I->second == (KV.second.getFlags() | JITSymbolFlags::Weak) &&
- "Resolving symbol with incorrect flags");
- else
- assert(I->second == KV.second.getFlags() &&
- "Resolving symbol with incorrect flags");
+ assert((KV.second.getFlags() & ~WeakFlags) == (I->second & ~WeakFlags) &&
+ "Resolving symbol with incorrect flags");
}
#endif
- JD.resolve(Symbols);
+ return JD.resolve(Symbols);
}
-void MaterializationResponsibility::notifyEmitted() {
+Error MaterializationResponsibility::notifyEmitted() {
LLVM_DEBUG({
dbgs() << "In " << JD.getName() << " emitting " << SymbolFlags << "\n";
});
- JD.emit(SymbolFlags);
+ if (auto Err = JD.emit(SymbolFlags))
+ return Err;
+
SymbolFlags.clear();
+ return Error::success();
}
Error MaterializationResponsibility::defineMaterializing(
@@ -417,12 +422,13 @@ void MaterializationResponsibility::failMaterialization() {
<< SymbolFlags << "\n";
});
- SymbolNameSet FailedSymbols;
- for (auto &KV : SymbolFlags)
- FailedSymbols.insert(KV.first);
+ JITDylib::FailedSymbolsWorklist Worklist;
- JD.notifyFailed(FailedSymbols);
+ for (auto &KV : SymbolFlags)
+ Worklist.push_back(std::make_pair(&JD, KV.first));
SymbolFlags.clear();
+
+ JD.notifyFailed(std::move(Worklist));
}
void MaterializationResponsibility::replace(
@@ -485,8 +491,9 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const {
void AbsoluteSymbolsMaterializationUnit::materialize(
MaterializationResponsibility R) {
- R.notifyResolved(Symbols);
- R.notifyEmitted();
+ // No dependencies, so these calls can't fail.
+ cantFail(R.notifyResolved(Symbols));
+ cantFail(R.notifyEmitted());
}
void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD,
@@ -625,6 +632,7 @@ void ReExportsMaterializationUnit::materialize(
};
auto OnComplete = [QueryInfo](Expected<SymbolMap> Result) {
+ auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession();
if (Result) {
SymbolMap ResolutionMap;
for (auto &KV : QueryInfo->Aliases) {
@@ -633,10 +641,17 @@ void ReExportsMaterializationUnit::materialize(
ResolutionMap[KV.first] = JITEvaluatedSymbol(
(*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags);
}
- QueryInfo->R.notifyResolved(ResolutionMap);
- QueryInfo->R.notifyEmitted();
+ if (auto Err = QueryInfo->R.notifyResolved(ResolutionMap)) {
+ ES.reportError(std::move(Err));
+ QueryInfo->R.failMaterialization();
+ return;
+ }
+ if (auto Err = QueryInfo->R.notifyEmitted()) {
+ ES.reportError(std::move(Err));
+ QueryInfo->R.failMaterialization();
+ return;
+ }
} else {
- auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession();
ES.reportError(Result.takeError());
QueryInfo->R.failMaterialization();
}
@@ -694,7 +709,7 @@ ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD,
Allow(std::move(Allow)) {}
Expected<SymbolNameSet>
-ReexportsGenerator::operator()(JITDylib &JD, const SymbolNameSet &Names) {
+ReexportsGenerator::tryToGenerate(JITDylib &JD, const SymbolNameSet &Names) {
orc::SymbolNameSet Added;
orc::SymbolAliasMap AliasMap;
@@ -716,6 +731,19 @@ ReexportsGenerator::operator()(JITDylib &JD, const SymbolNameSet &Names) {
return Added;
}
+JITDylib::DefinitionGenerator::~DefinitionGenerator() {}
+
+void JITDylib::removeGenerator(DefinitionGenerator &G) {
+ ES.runSessionLocked([&]() {
+ auto I = std::find_if(DefGenerators.begin(), DefGenerators.end(),
+ [&](const std::unique_ptr<DefinitionGenerator> &H) {
+ return H.get() == &G;
+ });
+ assert(I != DefGenerators.end() && "Generator not found");
+ DefGenerators.erase(I);
+ });
+}
+
Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
return ES.runSessionLocked([&]() -> Error {
std::vector<SymbolTable::iterator> AddedSyms;
@@ -823,26 +851,52 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name,
assert(Symbols[Name].isInMaterializationPhase() &&
"Can not add dependencies for a symbol that is not materializing");
+ // If Name is already in an error state then just bail out.
+ if (Symbols[Name].getFlags().hasError())
+ return;
+
auto &MI = MaterializingInfos[Name];
- assert(!MI.IsEmitted && "Can not add dependencies to an emitted symbol");
+ assert(Symbols[Name].getState() != SymbolState::Emitted &&
+ "Can not add dependencies to an emitted symbol");
+ bool DependsOnSymbolInErrorState = false;
+
+ // Register dependencies, record whether any depenendency is in the error
+ // state.
for (auto &KV : Dependencies) {
assert(KV.first && "Null JITDylib in dependency?");
auto &OtherJITDylib = *KV.first;
auto &DepsOnOtherJITDylib = MI.UnemittedDependencies[&OtherJITDylib];
for (auto &OtherSymbol : KV.second) {
+
+ // Check the sym entry for the dependency.
+ auto OtherSymI = OtherJITDylib.Symbols.find(OtherSymbol);
+
#ifndef NDEBUG
- // Assert that this symbol exists and has not been emitted already.
- auto SymI = OtherJITDylib.Symbols.find(OtherSymbol);
- assert(SymI != OtherJITDylib.Symbols.end() &&
- (SymI->second.getState() != SymbolState::Ready &&
- "Dependency on emitted symbol"));
+ // Assert that this symbol exists and has not reached the ready state
+ // already.
+ assert(OtherSymI != OtherJITDylib.Symbols.end() &&
+ (OtherSymI->second.getState() != SymbolState::Ready &&
+ "Dependency on emitted/ready symbol"));
#endif
+ auto &OtherSymEntry = OtherSymI->second;
+
+ // If the dependency is in an error state then note this and continue,
+ // we will move this symbol to the error state below.
+ if (OtherSymEntry.getFlags().hasError()) {
+ DependsOnSymbolInErrorState = true;
+ continue;
+ }
+
+ // If the dependency was not in the error state then add it to
+ // our list of dependencies.
+ assert(OtherJITDylib.MaterializingInfos.count(OtherSymbol) &&
+ "No MaterializingInfo for dependency");
auto &OtherMI = OtherJITDylib.MaterializingInfos[OtherSymbol];
- if (OtherMI.IsEmitted)
+ if (OtherSymEntry.getState() == SymbolState::Emitted)
transferEmittedNodeDependencies(MI, Name, OtherMI);
else if (&OtherJITDylib != this || OtherSymbol != Name) {
OtherMI.Dependants[this].insert(Name);
@@ -853,63 +907,142 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name,
if (DepsOnOtherJITDylib.empty())
MI.UnemittedDependencies.erase(&OtherJITDylib);
}
+
+ // If this symbol dependended on any symbols in the error state then move
+ // this symbol to the error state too.
+ if (DependsOnSymbolInErrorState)
+ Symbols[Name].setFlags(Symbols[Name].getFlags() | JITSymbolFlags::HasError);
}
-void JITDylib::resolve(const SymbolMap &Resolved) {
- auto CompletedQueries = ES.runSessionLocked([&, this]() {
- AsynchronousSymbolQuerySet CompletedQueries;
+Error JITDylib::resolve(const SymbolMap &Resolved) {
+ SymbolNameSet SymbolsInErrorState;
+ AsynchronousSymbolQuerySet CompletedQueries;
+
+ ES.runSessionLocked([&, this]() {
+ struct WorklistEntry {
+ SymbolTable::iterator SymI;
+ JITEvaluatedSymbol ResolvedSym;
+ };
+
+ std::vector<WorklistEntry> Worklist;
+ Worklist.reserve(Resolved.size());
+
+ // Build worklist and check for any symbols in the error state.
for (const auto &KV : Resolved) {
- auto &Name = KV.first;
- auto Sym = KV.second;
- auto I = Symbols.find(Name);
+ assert(!KV.second.getFlags().hasError() &&
+ "Resolution result can not have error flag set");
- assert(I != Symbols.end() && "Symbol not found");
- assert(!I->second.hasMaterializerAttached() &&
+ auto SymI = Symbols.find(KV.first);
+
+ assert(SymI != Symbols.end() && "Symbol not found");
+ assert(!SymI->second.hasMaterializerAttached() &&
"Resolving symbol with materializer attached?");
- assert(I->second.getState() == SymbolState::Materializing &&
+ assert(SymI->second.getState() == SymbolState::Materializing &&
"Symbol should be materializing");
- assert(I->second.getAddress() == 0 && "Symbol has already been resolved");
+ assert(SymI->second.getAddress() == 0 &&
+ "Symbol has already been resolved");
+
+ if (SymI->second.getFlags().hasError())
+ SymbolsInErrorState.insert(KV.first);
+ else {
+ auto Flags = KV.second.getFlags();
+ Flags &= ~(JITSymbolFlags::Weak | JITSymbolFlags::Common);
+ assert(Flags == (SymI->second.getFlags() &
+ ~(JITSymbolFlags::Weak | JITSymbolFlags::Common)) &&
+ "Resolved flags should match the declared flags");
+
+ Worklist.push_back(
+ {SymI, JITEvaluatedSymbol(KV.second.getAddress(), Flags)});
+ }
+ }
+
+ // If any symbols were in the error state then bail out.
+ if (!SymbolsInErrorState.empty())
+ return;
+
+ while (!Worklist.empty()) {
+ auto SymI = Worklist.back().SymI;
+ auto ResolvedSym = Worklist.back().ResolvedSym;
+ Worklist.pop_back();
- assert((Sym.getFlags() & ~JITSymbolFlags::Weak) ==
- (I->second.getFlags() & ~JITSymbolFlags::Weak) &&
- "Resolved flags should match the declared flags");
+ auto &Name = SymI->first;
- // Once resolved, symbols can never be weak.
- JITSymbolFlags ResolvedFlags = Sym.getFlags();
- ResolvedFlags &= ~JITSymbolFlags::Weak;
- I->second.setAddress(Sym.getAddress());
- I->second.setFlags(ResolvedFlags);
- I->second.setState(SymbolState::Resolved);
+ // Resolved symbols can not be weak: discard the weak flag.
+ JITSymbolFlags ResolvedFlags = ResolvedSym.getFlags();
+ SymI->second.setAddress(ResolvedSym.getAddress());
+ SymI->second.setFlags(ResolvedFlags);
+ SymI->second.setState(SymbolState::Resolved);
auto &MI = MaterializingInfos[Name];
for (auto &Q : MI.takeQueriesMeeting(SymbolState::Resolved)) {
- Q->notifySymbolMetRequiredState(Name, Sym);
+ Q->notifySymbolMetRequiredState(Name, ResolvedSym);
+ Q->removeQueryDependence(*this, Name);
if (Q->isComplete())
CompletedQueries.insert(std::move(Q));
}
}
-
- return CompletedQueries;
});
+ assert((SymbolsInErrorState.empty() || CompletedQueries.empty()) &&
+ "Can't fail symbols and completed queries at the same time");
+
+ // If we failed any symbols then return an error.
+ if (!SymbolsInErrorState.empty()) {
+ auto FailedSymbolsDepMap = std::make_shared<SymbolDependenceMap>();
+ (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState);
+ return make_error<FailedToMaterialize>(std::move(FailedSymbolsDepMap));
+ }
+
+ // Otherwise notify all the completed queries.
for (auto &Q : CompletedQueries) {
assert(Q->isComplete() && "Q not completed");
Q->handleComplete();
}
+
+ return Error::success();
}
-void JITDylib::emit(const SymbolFlagsMap &Emitted) {
- auto CompletedQueries = ES.runSessionLocked([&, this]() {
- AsynchronousSymbolQuerySet CompletedQueries;
+Error JITDylib::emit(const SymbolFlagsMap &Emitted) {
+ AsynchronousSymbolQuerySet CompletedQueries;
+ SymbolNameSet SymbolsInErrorState;
+ ES.runSessionLocked([&, this]() {
+ std::vector<SymbolTable::iterator> Worklist;
+
+ // Scan to build worklist, record any symbols in the erorr state.
for (const auto &KV : Emitted) {
- const auto &Name = KV.first;
+ auto &Name = KV.first;
+
+ auto SymI = Symbols.find(Name);
+ assert(SymI != Symbols.end() && "No symbol table entry for Name");
+
+ if (SymI->second.getFlags().hasError())
+ SymbolsInErrorState.insert(Name);
+ else
+ Worklist.push_back(SymI);
+ }
+
+ // If any symbols were in the error state then bail out.
+ if (!SymbolsInErrorState.empty())
+ return;
+
+ // Otherwise update dependencies and move to the emitted state.
+ while (!Worklist.empty()) {
+ auto SymI = Worklist.back();
+ Worklist.pop_back();
+
+ auto &Name = SymI->first;
+ auto &SymEntry = SymI->second;
+
+ // Move symbol to the emitted state.
+ assert(SymEntry.getState() == SymbolState::Resolved &&
+ "Emitting from state other than Resolved");
+ SymEntry.setState(SymbolState::Emitted);
auto MII = MaterializingInfos.find(Name);
assert(MII != MaterializingInfos.end() &&
"Missing MaterializingInfo entry");
-
auto &MI = MII->second;
// For each dependant, transfer this node's emitted dependencies to
@@ -926,8 +1059,12 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) {
auto &DependantMI = DependantMII->second;
// Remove the dependant's dependency on this node.
+ assert(DependantMI.UnemittedDependencies.count(this) &&
+ "Dependant does not have an unemitted dependencies record for "
+ "this JITDylib");
assert(DependantMI.UnemittedDependencies[this].count(Name) &&
"Dependant does not count this symbol as a dependency?");
+
DependantMI.UnemittedDependencies[this].erase(Name);
if (DependantMI.UnemittedDependencies[this].empty())
DependantMI.UnemittedDependencies.erase(this);
@@ -936,20 +1073,22 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) {
DependantJD.transferEmittedNodeDependencies(DependantMI,
DependantName, MI);
+ auto DependantSymI = DependantJD.Symbols.find(DependantName);
+ assert(DependantSymI != DependantJD.Symbols.end() &&
+ "Dependant has no entry in the Symbols table");
+ auto &DependantSymEntry = DependantSymI->second;
+
// If the dependant is emitted and this node was the last of its
// unemitted dependencies then the dependant node is now ready, so
// notify any pending queries on the dependant node.
- if (DependantMI.IsEmitted &&
+ if (DependantSymEntry.getState() == SymbolState::Emitted &&
DependantMI.UnemittedDependencies.empty()) {
assert(DependantMI.Dependants.empty() &&
"Dependants should be empty by now");
// Since this dependant is now ready, we erase its MaterializingInfo
// and update its materializing state.
- auto DependantSymI = DependantJD.Symbols.find(DependantName);
- assert(DependantSymI != DependantJD.Symbols.end() &&
- "Dependant has no entry in the Symbols table");
- DependantSymI->second.setState(SymbolState::Ready);
+ DependantSymEntry.setState(SymbolState::Ready);
for (auto &Q : DependantMI.takeQueriesMeeting(SymbolState::Ready)) {
Q->notifySymbolMetRequiredState(
@@ -963,12 +1102,9 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) {
}
}
}
- MI.Dependants.clear();
- MI.IsEmitted = true;
+ MI.Dependants.clear();
if (MI.UnemittedDependencies.empty()) {
- auto SymI = Symbols.find(Name);
- assert(SymI != Symbols.end() && "Symbol has no entry in Symbols table");
SymI->second.setState(SymbolState::Ready);
for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) {
Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
@@ -979,80 +1115,138 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) {
MaterializingInfos.erase(MII);
}
}
-
- return CompletedQueries;
});
+ assert((SymbolsInErrorState.empty() || CompletedQueries.empty()) &&
+ "Can't fail symbols and completed queries at the same time");
+
+ // If we failed any symbols then return an error.
+ if (!SymbolsInErrorState.empty()) {
+ auto FailedSymbolsDepMap = std::make_shared<SymbolDependenceMap>();
+ (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState);
+ return make_error<FailedToMaterialize>(std::move(FailedSymbolsDepMap));
+ }
+
+ // Otherwise notify all the completed queries.
for (auto &Q : CompletedQueries) {
assert(Q->isComplete() && "Q is not complete");
Q->handleComplete();
}
+
+ return Error::success();
}
-void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) {
+void JITDylib::notifyFailed(FailedSymbolsWorklist Worklist) {
+ AsynchronousSymbolQuerySet FailedQueries;
+ auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
- // FIXME: This should fail any transitively dependant symbols too.
+ // Failing no symbols is a no-op.
+ if (Worklist.empty())
+ return;
- auto FailedQueriesToNotify = ES.runSessionLocked([&, this]() {
- AsynchronousSymbolQuerySet FailedQueries;
- std::vector<MaterializingInfosMap::iterator> MIIsToRemove;
+ auto &ES = Worklist.front().first->getExecutionSession();
- for (auto &Name : FailedSymbols) {
- auto I = Symbols.find(Name);
- assert(I != Symbols.end() && "Symbol not present in this JITDylib");
- Symbols.erase(I);
+ ES.runSessionLocked([&]() {
+ while (!Worklist.empty()) {
+ assert(Worklist.back().first && "Failed JITDylib can not be null");
+ auto &JD = *Worklist.back().first;
+ auto Name = std::move(Worklist.back().second);
+ Worklist.pop_back();
- auto MII = MaterializingInfos.find(Name);
+ (*FailedSymbolsMap)[&JD].insert(Name);
+
+ assert(JD.Symbols.count(Name) && "No symbol table entry for Name");
+ auto &Sym = JD.Symbols[Name];
- // If we have not created a MaterializingInfo for this symbol yet then
- // there is nobody to notify.
- if (MII == MaterializingInfos.end())
+ // Move the symbol into the error state.
+ // Note that this may be redundant: The symbol might already have been
+ // moved to this state in response to the failure of a dependence.
+ Sym.setFlags(Sym.getFlags() | JITSymbolFlags::HasError);
+
+ // FIXME: Come up with a sane mapping of state to
+ // presence-of-MaterializingInfo so that we can assert presence / absence
+ // here, rather than testing it.
+ auto MII = JD.MaterializingInfos.find(Name);
+
+ if (MII == JD.MaterializingInfos.end())
continue;
- // Remove this symbol from the dependants list of any dependencies.
- for (auto &KV : MII->second.UnemittedDependencies) {
- auto *DependencyJD = KV.first;
- auto &Dependencies = KV.second;
- for (auto &DependencyName : Dependencies) {
- auto DependencyMII =
- DependencyJD->MaterializingInfos.find(DependencyName);
- assert(DependencyMII != DependencyJD->MaterializingInfos.end() &&
- "Unemitted dependency must have a MaterializingInfo entry");
- assert(DependencyMII->second.Dependants.count(this) &&
- "Dependency's dependants list does not contain this JITDylib");
- assert(DependencyMII->second.Dependants[this].count(Name) &&
- "Dependency's dependants list does not contain dependant");
- DependencyMII->second.Dependants[this].erase(Name);
+ auto &MI = MII->second;
+
+ // Move all dependants to the error state and disconnect from them.
+ for (auto &KV : MI.Dependants) {
+ auto &DependantJD = *KV.first;
+ for (auto &DependantName : KV.second) {
+ assert(DependantJD.Symbols.count(DependantName) &&
+ "No symbol table entry for DependantName");
+ auto &DependantSym = DependantJD.Symbols[DependantName];
+ DependantSym.setFlags(DependantSym.getFlags() |
+ JITSymbolFlags::HasError);
+
+ assert(DependantJD.MaterializingInfos.count(DependantName) &&
+ "No MaterializingInfo for dependant");
+ auto &DependantMI = DependantJD.MaterializingInfos[DependantName];
+
+ auto UnemittedDepI = DependantMI.UnemittedDependencies.find(&JD);
+ assert(UnemittedDepI != DependantMI.UnemittedDependencies.end() &&
+ "No UnemittedDependencies entry for this JITDylib");
+ assert(UnemittedDepI->second.count(Name) &&
+ "No UnemittedDependencies entry for this symbol");
+ UnemittedDepI->second.erase(Name);
+ if (UnemittedDepI->second.empty())
+ DependantMI.UnemittedDependencies.erase(UnemittedDepI);
+
+ // If this symbol is already in the emitted state then we need to
+ // take responsibility for failing its queries, so add it to the
+ // worklist.
+ if (DependantSym.getState() == SymbolState::Emitted) {
+ assert(DependantMI.Dependants.empty() &&
+ "Emitted symbol should not have dependants");
+ Worklist.push_back(std::make_pair(&DependantJD, DependantName));
+ }
}
}
+ MI.Dependants.clear();
- // Copy all the queries to the FailedQueries list, then abandon them.
- // This has to be a copy, and the copy has to come before the abandon
- // operation: Each Q.detach() call will reach back into this
- // PendingQueries list to remove Q.
- for (auto &Q : MII->second.pendingQueries())
- FailedQueries.insert(Q);
-
- MIIsToRemove.push_back(std::move(MII));
- }
-
- // Detach failed queries.
- for (auto &Q : FailedQueries)
- Q->detach();
+ // Disconnect from all unemitted depenencies.
+ for (auto &KV : MI.UnemittedDependencies) {
+ auto &UnemittedDepJD = *KV.first;
+ for (auto &UnemittedDepName : KV.second) {
+ auto UnemittedDepMII =
+ UnemittedDepJD.MaterializingInfos.find(UnemittedDepName);
+ assert(UnemittedDepMII != UnemittedDepJD.MaterializingInfos.end() &&
+ "Missing MII for unemitted dependency");
+ assert(UnemittedDepMII->second.Dependants.count(&JD) &&
+ "JD not listed as a dependant of unemitted dependency");
+ assert(UnemittedDepMII->second.Dependants[&JD].count(Name) &&
+ "Name is not listed as a dependant of unemitted dependency");
+ UnemittedDepMII->second.Dependants[&JD].erase(Name);
+ if (UnemittedDepMII->second.Dependants[&JD].empty())
+ UnemittedDepMII->second.Dependants.erase(&JD);
+ }
+ }
+ MI.UnemittedDependencies.clear();
- // Remove the MaterializingInfos.
- for (auto &MII : MIIsToRemove) {
- assert(!MII->second.hasQueriesPending() &&
- "Queries remain after symbol was failed");
+ // Collect queries to be failed for this MII.
+ for (auto &Q : MII->second.pendingQueries()) {
+ // Add the query to the list to be failed and detach it.
+ FailedQueries.insert(Q);
+ Q->detach();
+ }
- MaterializingInfos.erase(MII);
+ assert(MI.Dependants.empty() &&
+ "Can not delete MaterializingInfo with dependants still attached");
+ assert(MI.UnemittedDependencies.empty() &&
+ "Can not delete MaterializingInfo with unemitted dependencies "
+ "still attached");
+ assert(!MI.hasQueriesPending() &&
+ "Can not delete MaterializingInfo with queries pending");
+ JD.MaterializingInfos.erase(MII);
}
-
- return FailedQueries;
});
- for (auto &Q : FailedQueriesToNotify)
- Q->handleFailed(make_error<FailedToMaterialize>(FailedSymbols));
+ for (auto &Q : FailedQueries)
+ Q->handleFailed(make_error<FailedToMaterialize>(FailedSymbolsMap));
}
void JITDylib::setSearchOrder(JITDylibSearchList NewSearchOrder,
@@ -1159,10 +1353,18 @@ Expected<SymbolFlagsMap> JITDylib::lookupFlags(const SymbolNameSet &Names) {
if (!Unresolved)
return Unresolved.takeError();
- if (DefGenerator && !Unresolved->empty()) {
- auto NewDefs = DefGenerator(*this, *Unresolved);
+ /// Run any definition generators.
+ for (auto &DG : DefGenerators) {
+
+ // Bail out early if we've resolved everything.
+ if (Unresolved->empty())
+ break;
+
+ // Run this generator.
+ auto NewDefs = DG->tryToGenerate(*this, *Unresolved);
if (!NewDefs)
return NewDefs.takeError();
+
if (!NewDefs->empty()) {
auto Unresolved2 = lookupFlagsImpl(Result, *NewDefs);
if (!Unresolved2)
@@ -1171,7 +1373,10 @@ Expected<SymbolFlagsMap> JITDylib::lookupFlags(const SymbolNameSet &Names) {
assert(Unresolved2->empty() &&
"All fallback defs should have been found by lookupFlagsImpl");
}
- };
+
+ for (auto &Name : *NewDefs)
+ Unresolved->erase(Name);
+ }
return Result;
});
}
@@ -1197,15 +1402,34 @@ Error JITDylib::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
MaterializationUnitList &MUs) {
assert(Q && "Query can not be null");
- lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs);
- if (DefGenerator && !Unresolved.empty()) {
- auto NewDefs = DefGenerator(*this, Unresolved);
+ if (auto Err = lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs))
+ return Err;
+
+ // Run any definition generators.
+ for (auto &DG : DefGenerators) {
+
+ // Bail out early if we have resolved everything.
+ if (Unresolved.empty())
+ break;
+
+ // Run the generator.
+ auto NewDefs = DG->tryToGenerate(*this, Unresolved);
+
+ // If the generator returns an error then bail out.
if (!NewDefs)
return NewDefs.takeError();
+
+ // If the generator was able to generate new definitions for any of the
+ // unresolved symbols then lodge the query against them.
if (!NewDefs->empty()) {
for (auto &D : *NewDefs)
Unresolved.erase(D);
- lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs);
+
+ // Lodge query. This can not fail as any new definitions were added
+ // by the generator under the session locked. Since they can't have
+ // started materializing yet the can not have failed.
+ cantFail(lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs));
+
assert(NewDefs->empty() &&
"All fallback defs should have been found by lookupImpl");
}
@@ -1214,7 +1438,7 @@ Error JITDylib::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
return Error::success();
}
-void JITDylib::lodgeQueryImpl(
+Error JITDylib::lodgeQueryImpl(
std::shared_ptr<AsynchronousSymbolQuery> &Q, SymbolNameSet &Unresolved,
bool MatchNonExported,
std::vector<std::unique_ptr<MaterializationUnit>> &MUs) {
@@ -1235,6 +1459,14 @@ void JITDylib::lodgeQueryImpl(
// Unresolved set.
ToRemove.push_back(Name);
+ // If we matched against this symbol but it is in the error state then
+ // bail out and treat it as a failure to materialize.
+ if (SymI->second.getFlags().hasError()) {
+ auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
+ (*FailedSymbolsMap)[this] = {Name};
+ return make_error<FailedToMaterialize>(std::move(FailedSymbolsMap));
+ }
+
// If this symbol already meets the required state for then notify the
// query and continue.
if (SymI->second.getState() >= Q->getRequiredState()) {
@@ -1277,6 +1509,8 @@ void JITDylib::lodgeQueryImpl(
// Remove any symbols that we found.
for (auto &Name : ToRemove)
Unresolved.erase(Name);
+
+ return Error::success();
}
Expected<SymbolNameSet>
@@ -1292,9 +1526,16 @@ JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
SymbolNameSet Unresolved = std::move(Names);
auto Err = ES.runSessionLocked([&, this]() -> Error {
QueryComplete = lookupImpl(Q, MUs, Unresolved);
- if (DefGenerator && !Unresolved.empty()) {
+
+ // Run any definition generators.
+ for (auto &DG : DefGenerators) {
+
+ // Bail out early if we have resolved everything.
+ if (Unresolved.empty())
+ break;
+
assert(!QueryComplete && "query complete but unresolved symbols remain?");
- auto NewDefs = DefGenerator(*this, Unresolved);
+ auto NewDefs = DG->tryToGenerate(*this, Unresolved);
if (!NewDefs)
return NewDefs.takeError();
if (!NewDefs->empty()) {
@@ -1432,8 +1673,6 @@ void JITDylib::dump(raw_ostream &OS) {
OS << " MaterializingInfos entries:\n";
for (auto &KV : MaterializingInfos) {
OS << " \"" << *KV.first << "\":\n"
- << " IsEmitted = " << (KV.second.IsEmitted ? "true" : "false")
- << "\n"
<< " " << KV.second.pendingQueries().size()
<< " pending queries: { ";
for (const auto &Q : KV.second.pendingQueries())
@@ -1486,13 +1725,6 @@ JITDylib::MaterializingInfo::takeQueriesMeeting(SymbolState RequiredState) {
return Result;
}
-JITDylib::AsynchronousSymbolQueryList
-JITDylib::MaterializingInfo::takeAllQueries() {
- AsynchronousSymbolQueryList Result;
- std::swap(Result, PendingQueries);
- return Result;
-}
-
JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
: ES(ES), JITDylibName(std::move(Name)) {
SearchOrder.push_back({this, true});
diff --git a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index f7fc5f8f1797..4a886ac0597c 100644
--- a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -8,6 +8,7 @@
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/Layer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -67,7 +68,7 @@ CtorDtorIterator::Element CtorDtorIterator::operator*() const {
}
}
- ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ auto *Priority = cast<ConstantInt>(CS->getOperand(0));
Value *Data = CS->getNumOperands() == 3 ? CS->getOperand(2) : nullptr;
if (Data && !isa<GlobalValue>(Data))
Data = nullptr;
@@ -87,7 +88,7 @@ iterator_range<CtorDtorIterator> getDestructors(const Module &M) {
}
void CtorDtorRunner::add(iterator_range<CtorDtorIterator> CtorDtors) {
- if (empty(CtorDtors))
+ if (CtorDtors.empty())
return;
MangleAndInterner Mangle(
@@ -178,20 +179,20 @@ DynamicLibrarySearchGenerator::DynamicLibrarySearchGenerator(
: Dylib(std::move(Dylib)), Allow(std::move(Allow)),
GlobalPrefix(GlobalPrefix) {}
-Expected<DynamicLibrarySearchGenerator>
+Expected<std::unique_ptr<DynamicLibrarySearchGenerator>>
DynamicLibrarySearchGenerator::Load(const char *FileName, char GlobalPrefix,
SymbolPredicate Allow) {
std::string ErrMsg;
auto Lib = sys::DynamicLibrary::getPermanentLibrary(FileName, &ErrMsg);
if (!Lib.isValid())
return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
- return DynamicLibrarySearchGenerator(std::move(Lib), GlobalPrefix,
- std::move(Allow));
+ return std::make_unique<DynamicLibrarySearchGenerator>(
+ std::move(Lib), GlobalPrefix, std::move(Allow));
}
Expected<SymbolNameSet>
-DynamicLibrarySearchGenerator::operator()(JITDylib &JD,
- const SymbolNameSet &Names) {
+DynamicLibrarySearchGenerator::tryToGenerate(JITDylib &JD,
+ const SymbolNameSet &Names) {
orc::SymbolNameSet Added;
orc::SymbolMap NewSymbols;
@@ -226,5 +227,82 @@ DynamicLibrarySearchGenerator::operator()(JITDylib &JD,
return Added;
}
+Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
+StaticLibraryDefinitionGenerator::Load(ObjectLayer &L, const char *FileName) {
+ auto ArchiveBuffer = errorOrToExpected(MemoryBuffer::getFile(FileName));
+
+ if (!ArchiveBuffer)
+ return ArchiveBuffer.takeError();
+
+ return Create(L, std::move(*ArchiveBuffer));
+}
+
+Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
+StaticLibraryDefinitionGenerator::Create(
+ ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer) {
+ Error Err = Error::success();
+
+ std::unique_ptr<StaticLibraryDefinitionGenerator> ADG(
+ new StaticLibraryDefinitionGenerator(L, std::move(ArchiveBuffer), Err));
+
+ if (Err)
+ return std::move(Err);
+
+ return std::move(ADG);
+}
+
+Expected<SymbolNameSet>
+StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD,
+ const SymbolNameSet &Names) {
+
+ DenseSet<std::pair<StringRef, StringRef>> ChildBufferInfos;
+ SymbolNameSet NewDefs;
+
+ for (const auto &Name : Names) {
+ auto Child = Archive.findSym(*Name);
+ if (!Child)
+ return Child.takeError();
+ if (*Child == None)
+ continue;
+ auto ChildBuffer = (*Child)->getMemoryBufferRef();
+ if (!ChildBuffer)
+ return ChildBuffer.takeError();
+ ChildBufferInfos.insert(
+ {ChildBuffer->getBuffer(), ChildBuffer->getBufferIdentifier()});
+ NewDefs.insert(Name);
+ }
+
+ for (auto ChildBufferInfo : ChildBufferInfos) {
+ MemoryBufferRef ChildBufferRef(ChildBufferInfo.first,
+ ChildBufferInfo.second);
+
+ if (auto Err =
+ L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef), VModuleKey()))
+ return std::move(Err);
+
+ --UnrealizedObjects;
+ }
+
+ return NewDefs;
+}
+
+StaticLibraryDefinitionGenerator::StaticLibraryDefinitionGenerator(
+ ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer, Error &Err)
+ : L(L), ArchiveBuffer(std::move(ArchiveBuffer)),
+ Archive(*this->ArchiveBuffer, Err) {
+
+ if (Err)
+ return;
+
+ Error Err2 = Error::success();
+ for (auto _ : Archive.children(Err2)) {
+ (void)_;
+ ++UnrealizedObjects;
+ }
+
+ // No need to check this: We will leave it to the caller.
+ Err = std::move(Err2);
+}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
index 81dfc02f55b2..d311f34179c7 100644
--- a/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
+++ b/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
@@ -22,9 +22,9 @@ void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) {
void IRCompileLayer::emit(MaterializationResponsibility R,
ThreadSafeModule TSM) {
- assert(TSM.getModule() && "Module must not be null");
+ assert(TSM && "Module must not be null");
- if (auto Obj = Compile(*TSM.getModule())) {
+ if (auto Obj = TSM.withModuleDo(Compile)) {
{
std::lock_guard<std::mutex> Lock(IRLayerMutex);
if (NotifyCompiled)
diff --git a/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
index e3519284613e..845ecc71eb87 100644
--- a/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
+++ b/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
@@ -19,7 +19,7 @@ IRTransformLayer::IRTransformLayer(ExecutionSession &ES,
void IRTransformLayer::emit(MaterializationResponsibility R,
ThreadSafeModule TSM) {
- assert(TSM.getModule() && "Module must not be null");
+ assert(TSM && "Module must not be null");
if (auto TransformedTSM = Transform(std::move(TSM), R))
BaseLayer.emit(std::move(R), std::move(*TransformedTSM));
diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index cc3656fe5dc5..0295db7633dd 100644
--- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -37,8 +37,9 @@ private:
void materialize(MaterializationResponsibility R) override {
SymbolMap Result;
Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported);
- R.notifyResolved(Result);
- R.notifyEmitted();
+ // No dependencies, so these calls cannot fail.
+ cantFail(R.notifyResolved(Result));
+ cantFail(R.notifyEmitted());
}
void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {
@@ -66,7 +67,7 @@ JITCompileCallbackManager::getCompileCallback(CompileFunction Compile) {
std::lock_guard<std::mutex> Lock(CCMgrMutex);
AddrToSymbol[*TrampolineAddr] = CallbackName;
cantFail(CallbacksJD.define(
- llvm::make_unique<CompileCallbackMaterializationUnit>(
+ std::make_unique<CompileCallbackMaterializationUnit>(
std::move(CallbackName), std::move(Compile),
ES.allocateVModule())));
return *TrampolineAddr;
@@ -119,7 +120,8 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
return make_error<StringError>(
std::string("No callback manager available for ") + T.str(),
inconvertibleErrorCode());
- case Triple::aarch64: {
+ case Triple::aarch64:
+ case Triple::aarch64_32: {
typedef orc::LocalJITCompileCallbackManager<orc::OrcAArch64> CCMgrT;
return CCMgrT::Create(ES, ErrorHandlerAddress);
}
@@ -162,50 +164,51 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
switch (T.getArch()) {
default:
return [](){
- return llvm::make_unique<
+ return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcGenericABI>>();
};
case Triple::aarch64:
+ case Triple::aarch64_32:
return [](){
- return llvm::make_unique<
+ return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcAArch64>>();
};
case Triple::x86:
return [](){
- return llvm::make_unique<
+ return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcI386>>();
};
case Triple::mips:
return [](){
- return llvm::make_unique<
+ return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcMips32Be>>();
};
case Triple::mipsel:
return [](){
- return llvm::make_unique<
+ return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcMips32Le>>();
};
case Triple::mips64:
case Triple::mips64el:
return [](){
- return llvm::make_unique<
+ return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcMips64>>();
};
case Triple::x86_64:
if (T.getOS() == Triple::OSType::Win32) {
return [](){
- return llvm::make_unique<
+ return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcX86_64_Win32>>();
};
} else {
return [](){
- return llvm::make_unique<
+ return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcX86_64_SysV>>();
};
}
diff --git a/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
index df23547a9de3..1d3e6db913e2 100644
--- a/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
+++ b/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -8,6 +8,7 @@
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
namespace llvm {
@@ -22,7 +23,21 @@ JITTargetMachineBuilder::JITTargetMachineBuilder(Triple TT)
Expected<JITTargetMachineBuilder> JITTargetMachineBuilder::detectHost() {
// FIXME: getProcessTriple is bogus. It returns the host LLVM was compiled on,
// rather than a valid triple for the current process.
- return JITTargetMachineBuilder(Triple(sys::getProcessTriple()));
+ JITTargetMachineBuilder TMBuilder((Triple(sys::getProcessTriple())));
+
+ // Retrieve host CPU name and sub-target features and add them to builder.
+ // Relocation model, code model and codegen opt level are kept to default
+ // values.
+ llvm::SubtargetFeatures SubtargetFeatures;
+ llvm::StringMap<bool> FeatureMap;
+ llvm::sys::getHostCPUFeatures(FeatureMap);
+ for (auto &Feature : FeatureMap)
+ SubtargetFeatures.AddFeature(Feature.first(), Feature.second);
+
+ TMBuilder.setCPU(llvm::sys::getHostCPUName());
+ TMBuilder.addFeatures(SubtargetFeatures.getFeatures());
+
+ return TMBuilder;
}
Expected<std::unique_ptr<TargetMachine>>
diff --git a/lib/ExecutionEngine/Orc/LLJIT.cpp b/lib/ExecutionEngine/Orc/LLJIT.cpp
index b120691faf07..a80f78afe80f 100644
--- a/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -41,7 +41,8 @@ Error LLJIT::defineAbsolute(StringRef Name, JITEvaluatedSymbol Sym) {
Error LLJIT::addIRModule(JITDylib &JD, ThreadSafeModule TSM) {
assert(TSM && "Can not add null module");
- if (auto Err = applyDataLayout(*TSM.getModule()))
+ if (auto Err =
+ TSM.withModuleDo([&](Module &M) { return applyDataLayout(M); }))
return Err;
return CompileLayer->add(JD, std::move(TSM), ES->allocateVModule());
@@ -63,12 +64,21 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
// If the config state provided an ObjectLinkingLayer factory then use it.
if (S.CreateObjectLinkingLayer)
- return S.CreateObjectLinkingLayer(ES);
+ return S.CreateObjectLinkingLayer(ES, S.JTMB->getTargetTriple());
// Otherwise default to creating an RTDyldObjectLinkingLayer that constructs
// a new SectionMemoryManager for each object.
- auto GetMemMgr = []() { return llvm::make_unique<SectionMemoryManager>(); };
- return llvm::make_unique<RTDyldObjectLinkingLayer>(ES, std::move(GetMemMgr));
+ auto GetMemMgr = []() { return std::make_unique<SectionMemoryManager>(); };
+ auto ObjLinkingLayer =
+ std::make_unique<RTDyldObjectLinkingLayer>(ES, std::move(GetMemMgr));
+
+ if (S.JTMB->getTargetTriple().isOSBinFormatCOFF())
+ ObjLinkingLayer->setOverrideObjectFlagsWithResponsibilityFlags(true);
+
+ // FIXME: Explicit conversion to std::unique_ptr<ObjectLayer> added to silence
+ // errors from some GCC / libstdc++ bots. Remove this conversion (i.e.
+ // just return ObjLinkingLayer) once those bots are upgraded.
+ return std::unique_ptr<ObjectLayer>(std::move(ObjLinkingLayer));
}
Expected<IRCompileLayer::CompileFunction>
@@ -92,7 +102,7 @@ LLJIT::createCompileFunction(LLJITBuilderState &S,
}
LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
- : ES(S.ES ? std::move(S.ES) : llvm::make_unique<ExecutionSession>()),
+ : ES(S.ES ? std::move(S.ES) : std::make_unique<ExecutionSession>()),
Main(this->ES->getMainJITDylib()), DL(""), CtorRunner(Main),
DtorRunner(Main) {
@@ -113,13 +123,13 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
Err = CompileFunction.takeError();
return;
}
- CompileLayer = llvm::make_unique<IRCompileLayer>(
+ CompileLayer = std::make_unique<IRCompileLayer>(
*ES, *ObjLinkingLayer, std::move(*CompileFunction));
}
if (S.NumCompileThreads > 0) {
CompileLayer->setCloneToNewContextOnEmit(true);
- CompileThreads = llvm::make_unique<ThreadPool>(S.NumCompileThreads);
+ CompileThreads = std::make_unique<ThreadPool>(S.NumCompileThreads);
ES->setDispatchMaterialization(
[this](JITDylib &JD, std::unique_ptr<MaterializationUnit> MU) {
// FIXME: Switch to move capture once we have c++14.
@@ -166,10 +176,14 @@ Error LLLazyJITBuilderState::prepareForConstruction() {
Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) {
assert(TSM && "Can not add null module");
- if (auto Err = applyDataLayout(*TSM.getModule()))
- return Err;
+ if (auto Err = TSM.withModuleDo([&](Module &M) -> Error {
+ if (auto Err = applyDataLayout(M))
+ return Err;
- recordCtorDtors(*TSM.getModule());
+ recordCtorDtors(M);
+ return Error::success();
+ }))
+ return Err;
return CODLayer->add(JD, std::move(TSM), ES->allocateVModule());
}
@@ -212,10 +226,10 @@ LLLazyJIT::LLLazyJIT(LLLazyJITBuilderState &S, Error &Err) : LLJIT(S, Err) {
}
// Create the transform layer.
- TransformLayer = llvm::make_unique<IRTransformLayer>(*ES, *CompileLayer);
+ TransformLayer = std::make_unique<IRTransformLayer>(*ES, *CompileLayer);
// Create the COD layer.
- CODLayer = llvm::make_unique<CompileOnDemandLayer>(
+ CODLayer = std::make_unique<CompileOnDemandLayer>(
*ES, *TransformLayer, *LCTMgr, std::move(ISMBuilder));
if (S.NumCompileThreads > 0)
diff --git a/lib/ExecutionEngine/Orc/Layer.cpp b/lib/ExecutionEngine/Orc/Layer.cpp
index 3ed2dabf4545..580e2682ec8c 100644
--- a/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/lib/ExecutionEngine/Orc/Layer.cpp
@@ -19,7 +19,7 @@ IRLayer::IRLayer(ExecutionSession &ES) : ES(ES) {}
IRLayer::~IRLayer() {}
Error IRLayer::add(JITDylib &JD, ThreadSafeModule TSM, VModuleKey K) {
- return JD.define(llvm::make_unique<BasicIRLayerMaterializationUnit>(
+ return JD.define(std::make_unique<BasicIRLayerMaterializationUnit>(
*this, std::move(K), std::move(TSM)));
}
@@ -29,15 +29,17 @@ IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES,
assert(this->TSM && "Module must not be null");
- MangleAndInterner Mangle(ES, this->TSM.getModule()->getDataLayout());
- for (auto &G : this->TSM.getModule()->global_values()) {
- if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() &&
- !G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) {
- auto MangledName = Mangle(G.getName());
- SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G);
- SymbolToDefinition[MangledName] = &G;
+ MangleAndInterner Mangle(ES, this->TSM.getModuleUnlocked()->getDataLayout());
+ this->TSM.withModuleDo([&](Module &M) {
+ for (auto &G : M.global_values()) {
+ if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() &&
+ !G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) {
+ auto MangledName = Mangle(G.getName());
+ SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G);
+ SymbolToDefinition[MangledName] = &G;
+ }
}
- }
+ });
}
IRMaterializationUnit::IRMaterializationUnit(
@@ -47,8 +49,9 @@ IRMaterializationUnit::IRMaterializationUnit(
TSM(std::move(TSM)), SymbolToDefinition(std::move(SymbolToDefinition)) {}
StringRef IRMaterializationUnit::getName() const {
- if (TSM.getModule())
- return TSM.getModule()->getModuleIdentifier();
+ if (TSM)
+ return TSM.withModuleDo(
+ [](const Module &M) -> StringRef { return M.getModuleIdentifier(); });
return "<null module>";
}
@@ -90,7 +93,6 @@ void BasicIRLayerMaterializationUnit::materialize(
auto &N = R.getTargetJITDylib().getName();
#endif // NDEBUG
- auto Lock = TSM.getContextLock();
LLVM_DEBUG(ES.runSessionLocked(
[&]() { dbgs() << "Emitting, for " << N << ", " << *this << "\n"; }););
L.emit(std::move(R), std::move(TSM));
diff --git a/lib/ExecutionEngine/Orc/LazyReexports.cpp b/lib/ExecutionEngine/Orc/LazyReexports.cpp
index fc8205845654..93aabd817d60 100644
--- a/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -50,7 +50,6 @@ LazyCallThroughManager::callThroughToSymbol(JITTargetAddress TrampolineAddr) {
SourceJD = I->second.first;
SymbolName = I->second.second;
}
-
auto LookupResult =
ES.lookup(JITDylibSearchList({{SourceJD, true}}), SymbolName);
@@ -91,6 +90,7 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
inconvertibleErrorCode());
case Triple::aarch64:
+ case Triple::aarch64_32:
return LocalLazyCallThroughManager::Create<OrcAArch64>(ES,
ErrorHandlerAddr);
@@ -121,7 +121,8 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
LazyReexportsMaterializationUnit::LazyReexportsMaterializationUnit(
LazyCallThroughManager &LCTManager, IndirectStubsManager &ISManager,
- JITDylib &SourceJD, SymbolAliasMap CallableAliases, VModuleKey K)
+ JITDylib &SourceJD, SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc,
+ VModuleKey K)
: MaterializationUnit(extractFlags(CallableAliases), std::move(K)),
LCTManager(LCTManager), ISManager(ISManager), SourceJD(SourceJD),
CallableAliases(std::move(CallableAliases)),
@@ -129,7 +130,8 @@ LazyReexportsMaterializationUnit::LazyReexportsMaterializationUnit(
[&ISManager](JITDylib &JD, const SymbolStringPtr &SymbolName,
JITTargetAddress ResolvedAddr) {
return ISManager.updatePointer(*SymbolName, ResolvedAddr);
- })) {}
+ })),
+ AliaseeTable(SrcJDLoc) {}
StringRef LazyReexportsMaterializationUnit::getName() const {
return "<Lazy Reexports>";
@@ -149,7 +151,7 @@ void LazyReexportsMaterializationUnit::materialize(
if (!CallableAliases.empty())
R.replace(lazyReexports(LCTManager, ISManager, SourceJD,
- std::move(CallableAliases)));
+ std::move(CallableAliases), AliaseeTable));
IndirectStubsManager::StubInitsMap StubInits;
for (auto &Alias : RequestedAliases) {
@@ -168,6 +170,9 @@ void LazyReexportsMaterializationUnit::materialize(
std::make_pair(*CallThroughTrampoline, Alias.second.AliasFlags);
}
+ if (AliaseeTable != nullptr && !RequestedAliases.empty())
+ AliaseeTable->trackImpls(RequestedAliases, &SourceJD);
+
if (auto Err = ISManager.createStubs(StubInits)) {
SourceJD.getExecutionSession().reportError(std::move(Err));
R.failMaterialization();
@@ -178,8 +183,9 @@ void LazyReexportsMaterializationUnit::materialize(
for (auto &Alias : RequestedAliases)
Stubs[Alias.first] = ISManager.findStub(*Alias.first, false);
- R.notifyResolved(Stubs);
- R.notifyEmitted();
+ // No registered dependencies, so these calls cannot fail.
+ cantFail(R.notifyResolved(Stubs));
+ cantFail(R.notifyEmitted());
}
void LazyReexportsMaterializationUnit::discard(const JITDylib &JD,
diff --git a/lib/ExecutionEngine/Orc/Legacy.cpp b/lib/ExecutionEngine/Orc/Legacy.cpp
index ce6368b57a89..9f9a6730b2c3 100644
--- a/lib/ExecutionEngine/Orc/Legacy.cpp
+++ b/lib/ExecutionEngine/Orc/Legacy.cpp
@@ -23,7 +23,8 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols,
for (auto &S : Symbols)
InternedSymbols.insert(ES.intern(S));
- auto OnResolvedWithUnwrap = [OnResolved](Expected<SymbolMap> InternedResult) {
+ auto OnResolvedWithUnwrap = [OnResolved = std::move(OnResolved)](
+ Expected<SymbolMap> InternedResult) mutable {
if (!InternedResult) {
OnResolved(InternedResult.takeError());
return;
@@ -36,7 +37,7 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols,
};
auto Q = std::make_shared<AsynchronousSymbolQuery>(
- InternedSymbols, SymbolState::Resolved, OnResolvedWithUnwrap);
+ InternedSymbols, SymbolState::Resolved, std::move(OnResolvedWithUnwrap));
auto Unresolved = R.lookup(Q, InternedSymbols);
if (Unresolved.empty()) {
diff --git a/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
index def0b300eca1..874decb2ade0 100644
--- a/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
+++ b/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -29,6 +29,13 @@ public:
std::unique_ptr<MemoryBuffer> ObjBuffer)
: Layer(Layer), MR(std::move(MR)), ObjBuffer(std::move(ObjBuffer)) {}
+ ~ObjectLinkingLayerJITLinkContext() {
+ // If there is an object buffer return function then use it to
+ // return ownership of the buffer.
+ if (Layer.ReturnObjectBuffer)
+ Layer.ReturnObjectBuffer(std::move(ObjBuffer));
+ }
+
JITLinkMemoryManager &getMemoryManager() override { return Layer.MemMgr; }
MemoryBufferRef getObjectBuffer() const override {
@@ -41,7 +48,7 @@ public:
}
void lookup(const DenseSet<StringRef> &Symbols,
- JITLinkAsyncLookupContinuation LookupContinuation) override {
+ std::unique_ptr<JITLinkAsyncLookupContinuation> LC) override {
JITDylibSearchList SearchOrder;
MR.getTargetJITDylib().withSearchOrderDo(
@@ -54,18 +61,16 @@ public:
InternedSymbols.insert(ES.intern(S));
// OnResolve -- De-intern the symbols and pass the result to the linker.
- // FIXME: Capture LookupContinuation by move once we have c++14.
- auto SharedLookupContinuation =
- std::make_shared<JITLinkAsyncLookupContinuation>(
- std::move(LookupContinuation));
- auto OnResolve = [SharedLookupContinuation](Expected<SymbolMap> Result) {
+ auto OnResolve = [this, LookupContinuation = std::move(LC)](
+ Expected<SymbolMap> Result) mutable {
+ auto Main = Layer.getExecutionSession().intern("_main");
if (!Result)
- (*SharedLookupContinuation)(Result.takeError());
+ LookupContinuation->run(Result.takeError());
else {
AsyncLookupResult LR;
for (auto &KV : *Result)
LR[*KV.first] = KV.second;
- (*SharedLookupContinuation)(std::move(LR));
+ LookupContinuation->run(std::move(LR));
}
};
@@ -75,29 +80,25 @@ public:
});
}
- void notifyResolved(AtomGraph &G) override {
+ void notifyResolved(LinkGraph &G) override {
auto &ES = Layer.getExecutionSession();
SymbolFlagsMap ExtraSymbolsToClaim;
bool AutoClaim = Layer.AutoClaimObjectSymbols;
SymbolMap InternedResult;
- for (auto *DA : G.defined_atoms())
- if (DA->hasName() && DA->isGlobal()) {
- auto InternedName = ES.intern(DA->getName());
+ for (auto *Sym : G.defined_symbols())
+ if (Sym->hasName() && Sym->getScope() != Scope::Local) {
+ auto InternedName = ES.intern(Sym->getName());
JITSymbolFlags Flags;
- if (DA->isExported())
- Flags |= JITSymbolFlags::Exported;
- if (DA->isWeak())
- Flags |= JITSymbolFlags::Weak;
- if (DA->isCallable())
+ if (Sym->isCallable())
Flags |= JITSymbolFlags::Callable;
- if (DA->isCommon())
- Flags |= JITSymbolFlags::Common;
+ if (Sym->getScope() == Scope::Default)
+ Flags |= JITSymbolFlags::Exported;
InternedResult[InternedName] =
- JITEvaluatedSymbol(DA->getAddress(), Flags);
+ JITEvaluatedSymbol(Sym->getAddress(), Flags);
if (AutoClaim && !MR.getSymbols().count(InternedName)) {
assert(!ExtraSymbolsToClaim.count(InternedName) &&
"Duplicate symbol to claim?");
@@ -105,17 +106,17 @@ public:
}
}
- for (auto *A : G.absolute_atoms())
- if (A->hasName()) {
- auto InternedName = ES.intern(A->getName());
+ for (auto *Sym : G.absolute_symbols())
+ if (Sym->hasName()) {
+ auto InternedName = ES.intern(Sym->getName());
JITSymbolFlags Flags;
Flags |= JITSymbolFlags::Absolute;
- if (A->isWeak())
- Flags |= JITSymbolFlags::Weak;
- if (A->isCallable())
+ if (Sym->isCallable())
Flags |= JITSymbolFlags::Callable;
+ if (Sym->getLinkage() == Linkage::Weak)
+ Flags |= JITSymbolFlags::Weak;
InternedResult[InternedName] =
- JITEvaluatedSymbol(A->getAddress(), Flags);
+ JITEvaluatedSymbol(Sym->getAddress(), Flags);
if (AutoClaim && !MR.getSymbols().count(InternedName)) {
assert(!ExtraSymbolsToClaim.count(InternedName) &&
"Duplicate symbol to claim?");
@@ -126,35 +127,38 @@ public:
if (!ExtraSymbolsToClaim.empty())
if (auto Err = MR.defineMaterializing(ExtraSymbolsToClaim))
return notifyFailed(std::move(Err));
-
- MR.notifyResolved(InternedResult);
-
+ if (auto Err = MR.notifyResolved(InternedResult)) {
+ Layer.getExecutionSession().reportError(std::move(Err));
+ MR.failMaterialization();
+ return;
+ }
Layer.notifyLoaded(MR);
}
void notifyFinalized(
std::unique_ptr<JITLinkMemoryManager::Allocation> A) override {
-
if (auto Err = Layer.notifyEmitted(MR, std::move(A))) {
Layer.getExecutionSession().reportError(std::move(Err));
MR.failMaterialization();
-
return;
}
- MR.notifyEmitted();
+ if (auto Err = MR.notifyEmitted()) {
+ Layer.getExecutionSession().reportError(std::move(Err));
+ MR.failMaterialization();
+ }
}
- AtomGraphPassFunction getMarkLivePass(const Triple &TT) const override {
- return [this](AtomGraph &G) { return markResponsibilitySymbolsLive(G); };
+ LinkGraphPassFunction getMarkLivePass(const Triple &TT) const override {
+ return [this](LinkGraph &G) { return markResponsibilitySymbolsLive(G); };
}
Error modifyPassConfig(const Triple &TT, PassConfiguration &Config) override {
// Add passes to mark duplicate defs as should-discard, and to walk the
- // atom graph to build the symbol dependence graph.
+ // link graph to build the symbol dependence graph.
Config.PrePrunePasses.push_back(
- [this](AtomGraph &G) { return markSymbolsToDiscard(G); });
+ [this](LinkGraph &G) { return externalizeWeakAndCommonSymbols(G); });
Config.PostPrunePasses.push_back(
- [this](AtomGraph &G) { return computeNamedSymbolDependencies(G); });
+ [this](LinkGraph &G) { return computeNamedSymbolDependencies(G); });
Layer.modifyPassConfig(MR, TT, Config);
@@ -162,65 +166,59 @@ public:
}
private:
- using AnonAtomNamedDependenciesMap =
- DenseMap<const DefinedAtom *, SymbolNameSet>;
+ using AnonToNamedDependenciesMap = DenseMap<const Symbol *, SymbolNameSet>;
- Error markSymbolsToDiscard(AtomGraph &G) {
+ Error externalizeWeakAndCommonSymbols(LinkGraph &G) {
auto &ES = Layer.getExecutionSession();
- for (auto *DA : G.defined_atoms())
- if (DA->isWeak() && DA->hasName()) {
- auto S = ES.intern(DA->getName());
- auto I = MR.getSymbols().find(S);
- if (I == MR.getSymbols().end())
- DA->setShouldDiscard(true);
+ for (auto *Sym : G.defined_symbols())
+ if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) {
+ if (!MR.getSymbols().count(ES.intern(Sym->getName())))
+ G.makeExternal(*Sym);
}
- for (auto *A : G.absolute_atoms())
- if (A->isWeak() && A->hasName()) {
- auto S = ES.intern(A->getName());
- auto I = MR.getSymbols().find(S);
- if (I == MR.getSymbols().end())
- A->setShouldDiscard(true);
+ for (auto *Sym : G.absolute_symbols())
+ if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) {
+ if (!MR.getSymbols().count(ES.intern(Sym->getName())))
+ G.makeExternal(*Sym);
}
return Error::success();
}
- Error markResponsibilitySymbolsLive(AtomGraph &G) const {
+ Error markResponsibilitySymbolsLive(LinkGraph &G) const {
auto &ES = Layer.getExecutionSession();
- for (auto *DA : G.defined_atoms())
- if (DA->hasName() &&
- MR.getSymbols().count(ES.intern(DA->getName())))
- DA->setLive(true);
+ for (auto *Sym : G.defined_symbols())
+ if (Sym->hasName() && MR.getSymbols().count(ES.intern(Sym->getName())))
+ Sym->setLive(true);
return Error::success();
}
- Error computeNamedSymbolDependencies(AtomGraph &G) {
+ Error computeNamedSymbolDependencies(LinkGraph &G) {
auto &ES = MR.getTargetJITDylib().getExecutionSession();
auto AnonDeps = computeAnonDeps(G);
- for (auto *DA : G.defined_atoms()) {
+ for (auto *Sym : G.defined_symbols()) {
// Skip anonymous and non-global atoms: we do not need dependencies for
// these.
- if (!DA->hasName() || !DA->isGlobal())
+ if (Sym->getScope() == Scope::Local)
continue;
- auto DAName = ES.intern(DA->getName());
- SymbolNameSet &DADeps = NamedSymbolDeps[DAName];
+ auto SymName = ES.intern(Sym->getName());
+ SymbolNameSet &SymDeps = NamedSymbolDeps[SymName];
- for (auto &E : DA->edges()) {
- auto &TA = E.getTarget();
+ for (auto &E : Sym->getBlock().edges()) {
+ auto &TargetSym = E.getTarget();
- if (TA.hasName())
- DADeps.insert(ES.intern(TA.getName()));
+ if (TargetSym.getScope() != Scope::Local)
+ SymDeps.insert(ES.intern(TargetSym.getName()));
else {
- assert(TA.isDefined() && "Anonymous atoms must be defined");
- auto &DTA = static_cast<DefinedAtom &>(TA);
- auto I = AnonDeps.find(&DTA);
+ assert(TargetSym.isDefined() &&
+ "Anonymous/local symbols must be defined");
+ auto I = AnonDeps.find(&TargetSym);
if (I != AnonDeps.end())
for (auto &S : I->second)
- DADeps.insert(S);
+ SymDeps.insert(S);
}
}
}
@@ -228,58 +226,59 @@ private:
return Error::success();
}
- AnonAtomNamedDependenciesMap computeAnonDeps(AtomGraph &G) {
+ AnonToNamedDependenciesMap computeAnonDeps(LinkGraph &G) {
auto &ES = MR.getTargetJITDylib().getExecutionSession();
- AnonAtomNamedDependenciesMap DepMap;
+ AnonToNamedDependenciesMap DepMap;
- // For all anonymous atoms:
+ // For all anonymous symbols:
// (1) Add their named dependencies.
// (2) Add them to the worklist for further iteration if they have any
- // depend on any other anonymous atoms.
+ // depend on any other anonymous symbols.
struct WorklistEntry {
- WorklistEntry(DefinedAtom *DA, DenseSet<DefinedAtom *> DAAnonDeps)
- : DA(DA), DAAnonDeps(std::move(DAAnonDeps)) {}
+ WorklistEntry(Symbol *Sym, DenseSet<Symbol *> SymAnonDeps)
+ : Sym(Sym), SymAnonDeps(std::move(SymAnonDeps)) {}
- DefinedAtom *DA = nullptr;
- DenseSet<DefinedAtom *> DAAnonDeps;
+ Symbol *Sym = nullptr;
+ DenseSet<Symbol *> SymAnonDeps;
};
std::vector<WorklistEntry> Worklist;
- for (auto *DA : G.defined_atoms())
- if (!DA->hasName()) {
- auto &DANamedDeps = DepMap[DA];
- DenseSet<DefinedAtom *> DAAnonDeps;
-
- for (auto &E : DA->edges()) {
- auto &TA = E.getTarget();
- if (TA.hasName())
- DANamedDeps.insert(ES.intern(TA.getName()));
+ for (auto *Sym : G.defined_symbols())
+ if (!Sym->hasName()) {
+ auto &SymNamedDeps = DepMap[Sym];
+ DenseSet<Symbol *> SymAnonDeps;
+
+ for (auto &E : Sym->getBlock().edges()) {
+ auto &TargetSym = E.getTarget();
+ if (TargetSym.hasName())
+ SymNamedDeps.insert(ES.intern(TargetSym.getName()));
else {
- assert(TA.isDefined() && "Anonymous atoms must be defined");
- DAAnonDeps.insert(static_cast<DefinedAtom *>(&TA));
+ assert(TargetSym.isDefined() &&
+ "Anonymous symbols must be defined");
+ SymAnonDeps.insert(&TargetSym);
}
}
- if (!DAAnonDeps.empty())
- Worklist.push_back(WorklistEntry(DA, std::move(DAAnonDeps)));
+ if (!SymAnonDeps.empty())
+ Worklist.push_back(WorklistEntry(Sym, std::move(SymAnonDeps)));
}
- // Loop over all anonymous atoms with anonymous dependencies, propagating
+ // Loop over all anonymous symbols with anonymous dependencies, propagating
// their respective *named* dependencies. Iterate until we hit a stable
// state.
bool Changed;
do {
Changed = false;
for (auto &WLEntry : Worklist) {
- auto *DA = WLEntry.DA;
- auto &DANamedDeps = DepMap[DA];
- auto &DAAnonDeps = WLEntry.DAAnonDeps;
+ auto *Sym = WLEntry.Sym;
+ auto &SymNamedDeps = DepMap[Sym];
+ auto &SymAnonDeps = WLEntry.SymAnonDeps;
- for (auto *TA : DAAnonDeps) {
- auto I = DepMap.find(TA);
+ for (auto *TargetSym : SymAnonDeps) {
+ auto I = DepMap.find(TargetSym);
if (I != DepMap.end())
for (const auto &S : I->second)
- Changed |= DANamedDeps.insert(S).second;
+ Changed |= SymNamedDeps.insert(S).second;
}
}
} while (Changed);
@@ -330,7 +329,7 @@ ObjectLinkingLayer::~ObjectLinkingLayer() {
void ObjectLinkingLayer::emit(MaterializationResponsibility R,
std::unique_ptr<MemoryBuffer> O) {
assert(O && "Object must not be null");
- jitLink(llvm::make_unique<ObjectLinkingLayerJITLinkContext>(
+ jitLink(std::make_unique<ObjectLinkingLayerJITLinkContext>(
*this, std::move(R), std::move(O)));
}
@@ -410,7 +409,7 @@ Error ObjectLinkingLayer::removeAllModules() {
}
EHFrameRegistrationPlugin::EHFrameRegistrationPlugin(
- jitlink::EHFrameRegistrar &Registrar)
+ EHFrameRegistrar &Registrar)
: Registrar(Registrar) {}
void EHFrameRegistrationPlugin::modifyPassConfig(
@@ -419,61 +418,66 @@ void EHFrameRegistrationPlugin::modifyPassConfig(
assert(!InProcessLinks.count(&MR) && "Link for MR already being tracked?");
PassConfig.PostFixupPasses.push_back(
- createEHFrameRecorderPass(TT, [this, &MR](JITTargetAddress Addr) {
+ createEHFrameRecorderPass(TT, [this, &MR](JITTargetAddress Addr,
+ size_t Size) {
if (Addr)
- InProcessLinks[&MR] = Addr;
+ InProcessLinks[&MR] = { Addr, Size };
}));
}
Error EHFrameRegistrationPlugin::notifyEmitted(
MaterializationResponsibility &MR) {
- auto EHFrameAddrItr = InProcessLinks.find(&MR);
- if (EHFrameAddrItr == InProcessLinks.end())
+ auto EHFrameRangeItr = InProcessLinks.find(&MR);
+ if (EHFrameRangeItr == InProcessLinks.end())
return Error::success();
- auto EHFrameAddr = EHFrameAddrItr->second;
- assert(EHFrameAddr && "eh-frame addr to register can not be null");
+ auto EHFrameRange = EHFrameRangeItr->second;
+ assert(EHFrameRange.Addr &&
+ "eh-frame addr to register can not be null");
- InProcessLinks.erase(EHFrameAddrItr);
+ InProcessLinks.erase(EHFrameRangeItr);
if (auto Key = MR.getVModuleKey())
- TrackedEHFrameAddrs[Key] = EHFrameAddr;
+ TrackedEHFrameRanges[Key] = EHFrameRange;
else
- UntrackedEHFrameAddrs.push_back(EHFrameAddr);
+ UntrackedEHFrameRanges.push_back(EHFrameRange);
- return Registrar.registerEHFrames(EHFrameAddr);
+ return Registrar.registerEHFrames(EHFrameRange.Addr, EHFrameRange.Size);
}
Error EHFrameRegistrationPlugin::notifyRemovingModule(VModuleKey K) {
- auto EHFrameAddrItr = TrackedEHFrameAddrs.find(K);
- if (EHFrameAddrItr == TrackedEHFrameAddrs.end())
+ auto EHFrameRangeItr = TrackedEHFrameRanges.find(K);
+ if (EHFrameRangeItr == TrackedEHFrameRanges.end())
return Error::success();
- auto EHFrameAddr = EHFrameAddrItr->second;
- assert(EHFrameAddr && "Tracked eh-frame addr must not be null");
+ auto EHFrameRange = EHFrameRangeItr->second;
+ assert(EHFrameRange.Addr && "Tracked eh-frame range must not be null");
- TrackedEHFrameAddrs.erase(EHFrameAddrItr);
+ TrackedEHFrameRanges.erase(EHFrameRangeItr);
- return Registrar.deregisterEHFrames(EHFrameAddr);
+ return Registrar.deregisterEHFrames(EHFrameRange.Addr, EHFrameRange.Size);
}
Error EHFrameRegistrationPlugin::notifyRemovingAllModules() {
- std::vector<JITTargetAddress> EHFrameAddrs = std::move(UntrackedEHFrameAddrs);
- EHFrameAddrs.reserve(EHFrameAddrs.size() + TrackedEHFrameAddrs.size());
+ std::vector<EHFrameRange> EHFrameRanges =
+ std::move(UntrackedEHFrameRanges);
+ EHFrameRanges.reserve(EHFrameRanges.size() + TrackedEHFrameRanges.size());
- for (auto &KV : TrackedEHFrameAddrs)
- EHFrameAddrs.push_back(KV.second);
+ for (auto &KV : TrackedEHFrameRanges)
+ EHFrameRanges.push_back(KV.second);
- TrackedEHFrameAddrs.clear();
+ TrackedEHFrameRanges.clear();
Error Err = Error::success();
- while (!EHFrameAddrs.empty()) {
- auto EHFrameAddr = EHFrameAddrs.back();
- assert(EHFrameAddr && "Untracked eh-frame addr must not be null");
- EHFrameAddrs.pop_back();
- Err = joinErrors(std::move(Err), Registrar.deregisterEHFrames(EHFrameAddr));
+ while (!EHFrameRanges.empty()) {
+ auto EHFrameRange = EHFrameRanges.back();
+ assert(EHFrameRange.Addr && "Untracked eh-frame range must not be null");
+ EHFrameRanges.pop_back();
+ Err = joinErrors(std::move(Err),
+ Registrar.deregisterEHFrames(EHFrameRange.Addr,
+ EHFrameRange.Size));
}
return Err;
diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index 98129e1690d2..e0af3df9d010 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -97,7 +97,7 @@ public:
template <typename LayerT>
std::unique_ptr<GenericLayerImpl<LayerT>> createGenericLayer(LayerT &Layer) {
- return llvm::make_unique<GenericLayerImpl<LayerT>>(Layer);
+ return std::make_unique<GenericLayerImpl<LayerT>>(Layer);
}
} // end namespace detail
@@ -316,7 +316,8 @@ public:
if (auto Err = CtorRunner.runViaLayer(*this))
return std::move(Err);
- IRStaticDestructorRunners.emplace_back(std::move(DtorNames), K);
+ IRStaticDestructorRunners.emplace_back(AcknowledgeORCv1Deprecation,
+ std::move(DtorNames), K);
return K;
}
@@ -326,7 +327,7 @@ public:
LLVMOrcSymbolResolverFn ExternalResolver,
void *ExternalResolverCtx) {
return addIRModule(CompileLayer, std::move(M),
- llvm::make_unique<SectionMemoryManager>(),
+ std::make_unique<SectionMemoryManager>(),
std::move(ExternalResolver), ExternalResolverCtx);
}
@@ -340,7 +341,7 @@ public:
inconvertibleErrorCode());
return addIRModule(*CODLayer, std::move(M),
- llvm::make_unique<SectionMemoryManager>(),
+ std::make_unique<SectionMemoryManager>(),
std::move(ExternalResolver), ExternalResolverCtx);
}
@@ -468,7 +469,7 @@ private:
if (!CCMgr)
return nullptr;
- return llvm::make_unique<CODLayerT>(
+ return std::make_unique<CODLayerT>(
AcknowledgeORCv1Deprecation, ES, CompileLayer,
[&Resolvers](orc::VModuleKey K) {
auto ResolverI = Resolvers.find(K);
diff --git a/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index b22ecd5f80a1..939cd539d1fb 100644
--- a/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -27,9 +27,9 @@ public:
// Build an OnResolve callback to unwrap the interned strings and pass them
// to the OnResolved callback.
- // FIXME: Switch to move capture of OnResolved once we have c++14.
auto OnResolvedWithUnwrap =
- [OnResolved](Expected<SymbolMap> InternedResult) {
+ [OnResolved = std::move(OnResolved)](
+ Expected<SymbolMap> InternedResult) mutable {
if (!InternedResult) {
OnResolved(InternedResult.takeError());
return;
@@ -50,7 +50,7 @@ public:
MR.getTargetJITDylib().withSearchOrderDo(
[&](const JITDylibSearchList &JDs) { SearchOrder = JDs; });
ES.lookup(SearchOrder, InternedSymbols, SymbolState::Resolved,
- OnResolvedWithUnwrap, RegisterDependencies);
+ std::move(OnResolvedWithUnwrap), RegisterDependencies);
}
Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) {
@@ -133,8 +133,6 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
JITDylibSearchOrderResolver Resolver(*SharedR);
- // FIXME: Switch to move-capture for the 'O' buffer once we have c++14.
- MemoryBuffer *UnownedObjBuffer = O.release();
jitLinkForORC(
**Obj, std::move(O), *MemMgr, Resolver, ProcessAllSections,
[this, K, SharedR, &Obj, InternalSymbols](
@@ -143,9 +141,8 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
return onObjLoad(K, *SharedR, **Obj, std::move(LoadedObjInfo),
ResolvedSymbols, *InternalSymbols);
},
- [this, K, SharedR, UnownedObjBuffer](Error Err) {
- std::unique_ptr<MemoryBuffer> ObjBuffer(UnownedObjBuffer);
- onObjEmit(K, std::move(ObjBuffer), *SharedR, std::move(Err));
+ [this, K, SharedR, O = std::move(O)](Error Err) mutable {
+ onObjEmit(K, std::move(O), *SharedR, std::move(Err));
});
}
@@ -184,7 +181,10 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
if (auto Err = R.defineMaterializing(ExtraSymbolsToClaim))
return Err;
- R.notifyResolved(Symbols);
+ if (auto Err = R.notifyResolved(Symbols)) {
+ R.failMaterialization();
+ return Err;
+ }
if (NotifyLoaded)
NotifyLoaded(K, Obj, *LoadedObjInfo);
@@ -201,7 +201,11 @@ void RTDyldObjectLinkingLayer::onObjEmit(
return;
}
- R.notifyEmitted();
+ if (auto Err = R.notifyEmitted()) {
+ getExecutionSession().reportError(std::move(Err));
+ R.failMaterialization();
+ return;
+ }
if (NotifyEmitted)
NotifyEmitted(K, std::move(ObjBuffer));
diff --git a/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp b/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp
new file mode 100644
index 000000000000..f22acf50419d
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp
@@ -0,0 +1,307 @@
+//===-- SpeculateAnalyses.cpp --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/SpeculateAnalyses.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <algorithm>
+
+namespace {
+using namespace llvm;
+SmallVector<const BasicBlock *, 8> findBBwithCalls(const Function &F,
+ bool IndirectCall = false) {
+ SmallVector<const BasicBlock *, 8> BBs;
+
+ auto findCallInst = [&IndirectCall](const Instruction &I) {
+ if (auto Call = dyn_cast<CallBase>(&I))
+ return Call->isIndirectCall() ? IndirectCall : true;
+ else
+ return false;
+ };
+ for (auto &BB : F)
+ if (findCallInst(*BB.getTerminator()) ||
+ llvm::any_of(BB.instructionsWithoutDebug(), findCallInst))
+ BBs.emplace_back(&BB);
+
+ return BBs;
+}
+} // namespace
+
+// Implementations of Queries shouldn't need to lock the resources
+// such as LLVMContext, each argument (function) has a non-shared LLVMContext
+// Plus, if Queries contain states necessary locking scheme should be provided.
+namespace llvm {
+namespace orc {
+
+// Collect direct calls only
+void SpeculateQuery::findCalles(const BasicBlock *BB,
+ DenseSet<StringRef> &CallesNames) {
+ assert(BB != nullptr && "Traversing Null BB to find calls?");
+
+ auto getCalledFunction = [&CallesNames](const CallBase *Call) {
+ auto CalledValue = Call->getCalledOperand()->stripPointerCasts();
+ if (auto DirectCall = dyn_cast<Function>(CalledValue))
+ CallesNames.insert(DirectCall->getName());
+ };
+ for (auto &I : BB->instructionsWithoutDebug())
+ if (auto CI = dyn_cast<CallInst>(&I))
+ getCalledFunction(CI);
+
+ if (auto II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ getCalledFunction(II);
+}
+
+bool SpeculateQuery::isStraightLine(const Function &F) {
+ return llvm::all_of(F.getBasicBlockList(), [](const BasicBlock &BB) {
+ return BB.getSingleSuccessor() != nullptr;
+ });
+}
+
+// BlockFreqQuery Implementations
+
+size_t BlockFreqQuery::numBBToGet(size_t numBB) {
+ // small CFG
+ if (numBB < 4)
+ return numBB;
+ // mid-size CFG
+ else if (numBB < 20)
+ return (numBB / 2);
+ else
+ return (numBB / 2) + (numBB / 4);
+}
+
+BlockFreqQuery::ResultTy BlockFreqQuery::operator()(Function &F) {
+ DenseMap<StringRef, DenseSet<StringRef>> CallerAndCalles;
+ DenseSet<StringRef> Calles;
+ SmallVector<std::pair<const BasicBlock *, uint64_t>, 8> BBFreqs;
+
+ PassBuilder PB;
+ FunctionAnalysisManager FAM;
+ PB.registerFunctionAnalyses(FAM);
+
+ auto IBBs = findBBwithCalls(F);
+
+ if (IBBs.empty())
+ return None;
+
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+
+ for (const auto I : IBBs)
+ BBFreqs.push_back({I, BFI.getBlockFreq(I).getFrequency()});
+
+ assert(IBBs.size() == BBFreqs.size() && "BB Count Mismatch");
+
+ llvm::sort(BBFreqs.begin(), BBFreqs.end(),
+ [](decltype(BBFreqs)::const_reference BBF,
+ decltype(BBFreqs)::const_reference BBS) {
+ return BBF.second > BBS.second ? true : false;
+ });
+
+ // ignoring number of direct calls in a BB
+ auto Topk = numBBToGet(BBFreqs.size());
+
+ for (size_t i = 0; i < Topk; i++)
+ findCalles(BBFreqs[i].first, Calles);
+
+ assert(!Calles.empty() && "Running Analysis on Function with no calls?");
+
+ CallerAndCalles.insert({F.getName(), std::move(Calles)});
+
+ return CallerAndCalles;
+}
+
+// SequenceBBQuery Implementation
+std::size_t SequenceBBQuery::getHottestBlocks(std::size_t TotalBlocks) {
+ if (TotalBlocks == 1)
+ return TotalBlocks;
+ return TotalBlocks / 2;
+}
+
+// FIXME : find good implementation.
+SequenceBBQuery::BlockListTy
+SequenceBBQuery::rearrangeBB(const Function &F, const BlockListTy &BBList) {
+ BlockListTy RearrangedBBSet;
+
+ for (auto &Block : F.getBasicBlockList())
+ if (llvm::is_contained(BBList, &Block))
+ RearrangedBBSet.push_back(&Block);
+
+ assert(RearrangedBBSet.size() == BBList.size() &&
+ "BasicBlock missing while rearranging?");
+ return RearrangedBBSet;
+}
+
+void SequenceBBQuery::traverseToEntryBlock(const BasicBlock *AtBB,
+ const BlockListTy &CallerBlocks,
+ const BackEdgesInfoTy &BackEdgesInfo,
+ const BranchProbabilityInfo *BPI,
+ VisitedBlocksInfoTy &VisitedBlocks) {
+ auto Itr = VisitedBlocks.find(AtBB);
+ if (Itr != VisitedBlocks.end()) { // already visited.
+ if (!Itr->second.Upward)
+ return;
+ Itr->second.Upward = false;
+ } else {
+ // Create hint for newly discoverd blocks.
+ WalkDirection BlockHint;
+ BlockHint.Upward = false;
+ // FIXME: Expensive Check
+ if (llvm::is_contained(CallerBlocks, AtBB))
+ BlockHint.CallerBlock = true;
+ VisitedBlocks.insert(std::make_pair(AtBB, BlockHint));
+ }
+
+ const_pred_iterator PIt = pred_begin(AtBB), EIt = pred_end(AtBB);
+ // Move this check to top, when we have code setup to launch speculative
+ // compiles for function in entry BB, this triggers the speculative compiles
+ // before running the program.
+ if (PIt == EIt) // No Preds.
+ return;
+
+ DenseSet<const BasicBlock *> PredSkipNodes;
+
+ // Since we are checking for predecessor's backedges, this Block
+ // occurs in second position.
+ for (auto &I : BackEdgesInfo)
+ if (I.second == AtBB)
+ PredSkipNodes.insert(I.first);
+
+ // Skip predecessors which source of back-edges.
+ for (; PIt != EIt; ++PIt)
+ // checking EdgeHotness is cheaper
+ if (BPI->isEdgeHot(*PIt, AtBB) && !PredSkipNodes.count(*PIt))
+ traverseToEntryBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI,
+ VisitedBlocks);
+}
+
+void SequenceBBQuery::traverseToExitBlock(const BasicBlock *AtBB,
+ const BlockListTy &CallerBlocks,
+ const BackEdgesInfoTy &BackEdgesInfo,
+ const BranchProbabilityInfo *BPI,
+ VisitedBlocksInfoTy &VisitedBlocks) {
+ auto Itr = VisitedBlocks.find(AtBB);
+ if (Itr != VisitedBlocks.end()) { // already visited.
+ if (!Itr->second.Downward)
+ return;
+ Itr->second.Downward = false;
+ } else {
+ // Create hint for newly discoverd blocks.
+ WalkDirection BlockHint;
+ BlockHint.Downward = false;
+ // FIXME: Expensive Check
+ if (llvm::is_contained(CallerBlocks, AtBB))
+ BlockHint.CallerBlock = true;
+ VisitedBlocks.insert(std::make_pair(AtBB, BlockHint));
+ }
+
+ succ_const_iterator PIt = succ_begin(AtBB), EIt = succ_end(AtBB);
+ if (PIt == EIt) // No succs.
+ return;
+
+ // If there are hot edges, then compute SuccSkipNodes.
+ DenseSet<const BasicBlock *> SuccSkipNodes;
+
+ // Since we are checking for successor's backedges, this Block
+ // occurs in first position.
+ for (auto &I : BackEdgesInfo)
+ if (I.first == AtBB)
+ SuccSkipNodes.insert(I.second);
+
+ for (; PIt != EIt; ++PIt)
+ if (BPI->isEdgeHot(AtBB, *PIt) && !SuccSkipNodes.count(*PIt))
+ traverseToExitBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI,
+ VisitedBlocks);
+}
+
+// Get Block frequencies for blocks and take most frquently executed block,
+// walk towards the entry block from those blocks and discover the basic blocks
+// with call.
+SequenceBBQuery::BlockListTy
+SequenceBBQuery::queryCFG(Function &F, const BlockListTy &CallerBlocks) {
+
+ BlockFreqInfoTy BBFreqs;
+ VisitedBlocksInfoTy VisitedBlocks;
+ BackEdgesInfoTy BackEdgesInfo;
+
+ PassBuilder PB;
+ FunctionAnalysisManager FAM;
+ PB.registerFunctionAnalyses(FAM);
+
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+
+ llvm::FindFunctionBackedges(F, BackEdgesInfo);
+
+ for (const auto I : CallerBlocks)
+ BBFreqs.push_back({I, BFI.getBlockFreq(I).getFrequency()});
+
+ llvm::sort(BBFreqs, [](decltype(BBFreqs)::const_reference Bbf,
+ decltype(BBFreqs)::const_reference Bbs) {
+ return Bbf.second > Bbs.second;
+ });
+
+ ArrayRef<std::pair<const BasicBlock *, uint64_t>> HotBlocksRef(BBFreqs);
+ HotBlocksRef =
+ HotBlocksRef.drop_back(BBFreqs.size() - getHottestBlocks(BBFreqs.size()));
+
+ BranchProbabilityInfo *BPI =
+ FAM.getCachedResult<BranchProbabilityAnalysis>(F);
+
+ // visit NHotBlocks,
+ // traverse upwards to entry
+ // traverse downwards to end.
+
+ for (auto I : HotBlocksRef) {
+ traverseToEntryBlock(I.first, CallerBlocks, BackEdgesInfo, BPI,
+ VisitedBlocks);
+ traverseToExitBlock(I.first, CallerBlocks, BackEdgesInfo, BPI,
+ VisitedBlocks);
+ }
+
+ BlockListTy MinCallerBlocks;
+ for (auto &I : VisitedBlocks)
+ if (I.second.CallerBlock)
+ MinCallerBlocks.push_back(std::move(I.first));
+
+ return rearrangeBB(F, MinCallerBlocks);
+}
+
+SpeculateQuery::ResultTy SequenceBBQuery::operator()(Function &F) {
+ // reduce the number of lists!
+ DenseMap<StringRef, DenseSet<StringRef>> CallerAndCalles;
+ DenseSet<StringRef> Calles;
+ BlockListTy SequencedBlocks;
+ BlockListTy CallerBlocks;
+
+ CallerBlocks = findBBwithCalls(F);
+ if (CallerBlocks.empty())
+ return None;
+
+ if (isStraightLine(F))
+ SequencedBlocks = rearrangeBB(F, CallerBlocks);
+ else
+ SequencedBlocks = queryCFG(F, CallerBlocks);
+
+ for (auto BB : SequencedBlocks)
+ findCalles(BB, Calles);
+
+ CallerAndCalles.insert({F.getName(), std::move(Calles)});
+ return CallerAndCalles;
+}
+
+} // namespace orc
+} // namespace llvm
diff --git a/lib/ExecutionEngine/Orc/Speculation.cpp b/lib/ExecutionEngine/Orc/Speculation.cpp
new file mode 100644
index 000000000000..f29201c147a1
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/Speculation.cpp
@@ -0,0 +1,146 @@
+//===---------- speculation.cpp - Utilities for Speculation ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Speculation.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Debug.h"
+
+#include <vector>
+
+namespace llvm {
+
+namespace orc {
+
+// ImplSymbolMap methods
+void ImplSymbolMap::trackImpls(SymbolAliasMap ImplMaps, JITDylib *SrcJD) {
+ assert(SrcJD && "Tracking on Null Source .impl dylib");
+ std::lock_guard<std::mutex> Lockit(ConcurrentAccess);
+ for (auto &I : ImplMaps) {
+ auto It = Maps.insert({I.first, {I.second.Aliasee, SrcJD}});
+ // check rationale when independent dylibs have same symbol name?
+ assert(It.second && "ImplSymbols are already tracked for this Symbol?");
+ (void)(It);
+ }
+}
+
+// Trigger Speculative Compiles.
+void Speculator::speculateForEntryPoint(Speculator *Ptr, uint64_t StubId) {
+ assert(Ptr && " Null Address Received in orc_speculate_for ");
+ Ptr->speculateFor(StubId);
+}
+
+Error Speculator::addSpeculationRuntime(JITDylib &JD,
+ MangleAndInterner &Mangle) {
+ JITEvaluatedSymbol ThisPtr(pointerToJITTargetAddress(this),
+ JITSymbolFlags::Exported);
+ JITEvaluatedSymbol SpeculateForEntryPtr(
+ pointerToJITTargetAddress(&speculateForEntryPoint),
+ JITSymbolFlags::Exported);
+ return JD.define(absoluteSymbols({
+ {Mangle("__orc_speculator"), ThisPtr}, // Data Symbol
+ {Mangle("__orc_speculate_for"), SpeculateForEntryPtr} // Callable Symbol
+ }));
+}
+
+// If two modules, share the same LLVMContext, different threads must
+// not access them concurrently without locking the associated LLVMContext
+// this implementation follows this contract.
+void IRSpeculationLayer::emit(MaterializationResponsibility R,
+ ThreadSafeModule TSM) {
+
+ assert(TSM && "Speculation Layer received Null Module ?");
+ assert(TSM.getContext().getContext() != nullptr &&
+ "Module with null LLVMContext?");
+
+ // Instrumentation of runtime calls, lock the Module
+ TSM.withModuleDo([this, &R](Module &M) {
+ auto &MContext = M.getContext();
+ auto SpeculatorVTy = StructType::create(MContext, "Class.Speculator");
+ auto RuntimeCallTy = FunctionType::get(
+ Type::getVoidTy(MContext),
+ {SpeculatorVTy->getPointerTo(), Type::getInt64Ty(MContext)}, false);
+ auto RuntimeCall =
+ Function::Create(RuntimeCallTy, Function::LinkageTypes::ExternalLinkage,
+ "__orc_speculate_for", &M);
+ auto SpeclAddr = new GlobalVariable(
+ M, SpeculatorVTy, false, GlobalValue::LinkageTypes::ExternalLinkage,
+ nullptr, "__orc_speculator");
+
+ IRBuilder<> Mutator(MContext);
+
+ // QueryAnalysis allowed to transform the IR source, one such example is
+ // Simplify CFG helps the static branch prediction heuristics!
+ for (auto &Fn : M.getFunctionList()) {
+ if (!Fn.isDeclaration()) {
+
+ auto IRNames = QueryAnalysis(Fn);
+ // Instrument and register if Query has result
+ if (IRNames.hasValue()) {
+
+ // Emit globals for each function.
+ auto LoadValueTy = Type::getInt8Ty(MContext);
+ auto SpeculatorGuard = new GlobalVariable(
+ M, LoadValueTy, false, GlobalValue::LinkageTypes::InternalLinkage,
+ ConstantInt::get(LoadValueTy, 0),
+ "__orc_speculate.guard.for." + Fn.getName());
+ SpeculatorGuard->setAlignment(Align::None());
+ SpeculatorGuard->setUnnamedAddr(GlobalValue::UnnamedAddr::Local);
+
+ BasicBlock &ProgramEntry = Fn.getEntryBlock();
+ // Create BasicBlocks before the program's entry basicblock
+ BasicBlock *SpeculateBlock = BasicBlock::Create(
+ MContext, "__orc_speculate.block", &Fn, &ProgramEntry);
+ BasicBlock *SpeculateDecisionBlock = BasicBlock::Create(
+ MContext, "__orc_speculate.decision.block", &Fn, SpeculateBlock);
+
+ assert(SpeculateDecisionBlock == &Fn.getEntryBlock() &&
+ "SpeculateDecisionBlock not updated?");
+ Mutator.SetInsertPoint(SpeculateDecisionBlock);
+
+ auto LoadGuard =
+ Mutator.CreateLoad(LoadValueTy, SpeculatorGuard, "guard.value");
+ // if just loaded value equal to 0,return true.
+ auto CanSpeculate =
+ Mutator.CreateICmpEQ(LoadGuard, ConstantInt::get(LoadValueTy, 0),
+ "compare.to.speculate");
+ Mutator.CreateCondBr(CanSpeculate, SpeculateBlock, &ProgramEntry);
+
+ Mutator.SetInsertPoint(SpeculateBlock);
+ auto ImplAddrToUint =
+ Mutator.CreatePtrToInt(&Fn, Type::getInt64Ty(MContext));
+ Mutator.CreateCall(RuntimeCallTy, RuntimeCall,
+ {SpeclAddr, ImplAddrToUint});
+ Mutator.CreateStore(ConstantInt::get(LoadValueTy, 1),
+ SpeculatorGuard);
+ Mutator.CreateBr(&ProgramEntry);
+
+ assert(Mutator.GetInsertBlock()->getParent() == &Fn &&
+ "IR builder association mismatch?");
+ S.registerSymbols(internToJITSymbols(IRNames.getValue()),
+ &R.getTargetJITDylib());
+ }
+ }
+ }
+ });
+
+ assert(!TSM.withModuleDo([](const Module &M) { return verifyModule(M); }) &&
+ "Speculation Instrumentation breaks IR?");
+
+ NextLayer.emit(std::move(R), std::move(TSM));
+}
+
+} // namespace orc
+} // namespace llvm
diff --git a/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp b/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
index 4cb7376758a7..1f4e6f132115 100644
--- a/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
+++ b/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
@@ -23,41 +23,41 @@ ThreadSafeModule cloneToNewContext(ThreadSafeModule &TSM,
if (!ShouldCloneDef)
ShouldCloneDef = [](const GlobalValue &) { return true; };
- auto Lock = TSM.getContextLock();
+ return TSM.withModuleDo([&](Module &M) {
+ SmallVector<char, 1> ClonedModuleBuffer;
- SmallVector<char, 1> ClonedModuleBuffer;
+ {
+ std::set<GlobalValue *> ClonedDefsInSrc;
+ ValueToValueMapTy VMap;
+ auto Tmp = CloneModule(M, VMap, [&](const GlobalValue *GV) {
+ if (ShouldCloneDef(*GV)) {
+ ClonedDefsInSrc.insert(const_cast<GlobalValue *>(GV));
+ return true;
+ }
+ return false;
+ });
- {
- std::set<GlobalValue *> ClonedDefsInSrc;
- ValueToValueMapTy VMap;
- auto Tmp = CloneModule(*TSM.getModule(), VMap, [&](const GlobalValue *GV) {
- if (ShouldCloneDef(*GV)) {
- ClonedDefsInSrc.insert(const_cast<GlobalValue *>(GV));
- return true;
- }
- return false;
- });
+ if (UpdateClonedDefSource)
+ for (auto *GV : ClonedDefsInSrc)
+ UpdateClonedDefSource(*GV);
- if (UpdateClonedDefSource)
- for (auto *GV : ClonedDefsInSrc)
- UpdateClonedDefSource(*GV);
+ BitcodeWriter BCWriter(ClonedModuleBuffer);
- BitcodeWriter BCWriter(ClonedModuleBuffer);
+ BCWriter.writeModule(*Tmp);
+ BCWriter.writeSymtab();
+ BCWriter.writeStrtab();
+ }
- BCWriter.writeModule(*Tmp);
- BCWriter.writeSymtab();
- BCWriter.writeStrtab();
- }
+ MemoryBufferRef ClonedModuleBufferRef(
+ StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()),
+ "cloned module buffer");
+ ThreadSafeContext NewTSCtx(std::make_unique<LLVMContext>());
- MemoryBufferRef ClonedModuleBufferRef(
- StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()),
- "cloned module buffer");
- ThreadSafeContext NewTSCtx(llvm::make_unique<LLVMContext>());
-
- auto ClonedModule =
- cantFail(parseBitcodeFile(ClonedModuleBufferRef, *NewTSCtx.getContext()));
- ClonedModule->setModuleIdentifier(TSM.getModule()->getName());
- return ThreadSafeModule(std::move(ClonedModule), std::move(NewTSCtx));
+ auto ClonedModule = cantFail(
+ parseBitcodeFile(ClonedModuleBufferRef, *NewTSCtx.getContext()));
+ ClonedModule->setModuleIdentifier(M.getName());
+ return ThreadSafeModule(std::move(ClonedModule), std::move(NewTSCtx));
+ });
}
} // end namespace orc
diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index 5606421a3cb0..184388dc4d7a 100644
--- a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -26,11 +26,11 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
-#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/raw_ostream.h"
+#include <mutex>
#include <sys/mman.h> // mmap()
#include <sys/types.h> // getpid()
@@ -203,7 +203,7 @@ PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
return;
}
- Dumpstream = make_unique<raw_fd_ostream>(DumpFd, true);
+ Dumpstream = std::make_unique<raw_fd_ostream>(DumpFd, true);
LLVMPerfJitHeader Header = {0};
if (!FillMachine(Header))
@@ -420,7 +420,7 @@ void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
rec.Tid = get_threadid();
// avoid interspersing output
- MutexGuard Guard(Mutex);
+ std::lock_guard<sys::Mutex> Guard(Mutex);
rec.CodeIndex = CodeGeneration++; // under lock!
@@ -462,7 +462,7 @@ void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
// * char name[n] : source file name in ASCII, including null termination
// avoid interspersing output
- MutexGuard Guard(Mutex);
+ std::lock_guard<sys::Mutex> Guard(Mutex);
Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index e26e6ce45db4..2df71a5e5e74 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -17,10 +17,11 @@
#include "RuntimeDyldMachO.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MutexGuard.h"
+#include <mutex>
#include <future>
@@ -120,7 +121,7 @@ static void dumpSectionMemory(const SectionEntry &S, StringRef State) {
// Resolve the relocations for all symbols we currently know about.
void RuntimeDyldImpl::resolveRelocations() {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
// Print out the sections prior to relocation.
LLVM_DEBUG(for (int i = 0, e = Sections.size(); i != e; ++i)
@@ -156,7 +157,7 @@ void RuntimeDyldImpl::resolveLocalRelocations() {
void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
uint64_t TargetAddress) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
for (unsigned i = 0, e = Sections.size(); i != e; ++i) {
if (Sections[i].getAddress() == LocalAddress) {
reassignSectionAddress(i, TargetAddress);
@@ -177,7 +178,7 @@ static Error getOffset(const SymbolRef &Sym, SectionRef Sec,
Expected<RuntimeDyldImpl::ObjSectionToIDMap>
RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
- MutexGuard locked(lock);
+ std::lock_guard<sys::Mutex> locked(lock);
// Save information about our target
Arch = (Triple::ArchType)Obj.getArch();
@@ -347,8 +348,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
SI != SE; ++SI) {
StubMap Stubs;
- section_iterator RelocatedSection = SI->getRelocatedSection();
+ Expected<section_iterator> RelSecOrErr = SI->getRelocatedSection();
+ if (!RelSecOrErr)
+ return RelSecOrErr.takeError();
+
+ section_iterator RelocatedSection = *RelSecOrErr;
if (RelocatedSection == SE)
continue;
@@ -535,9 +540,10 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
bool IsCode = Section.isText();
bool IsReadOnly = isReadOnlyData(Section);
- StringRef Name;
- if (auto EC = Section.getName(Name))
- return errorCodeToError(EC);
+ Expected<StringRef> NameOrErr = Section.getName();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ StringRef Name = *NameOrErr;
uint64_t StubBufSize = computeSectionStubBufSize(Obj, Section);
@@ -646,7 +652,12 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(const ObjectFile &Obj,
unsigned StubBufSize = 0;
for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
SI != SE; ++SI) {
- section_iterator RelSecI = SI->getRelocatedSection();
+
+ Expected<section_iterator> RelSecOrErr = SI->getRelocatedSection();
+ if (!RelSecOrErr)
+ report_fatal_error(toString(RelSecOrErr.takeError()));
+
+ section_iterator RelSecI = *RelSecOrErr;
if (!(RelSecI == Section))
continue;
@@ -727,16 +738,17 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
// Assign the address of each symbol
for (auto &Sym : SymbolsToAllocate) {
- uint32_t Align = Sym.getAlignment();
+ uint32_t Alignment = Sym.getAlignment();
uint64_t Size = Sym.getCommonSize();
StringRef Name;
if (auto NameOrErr = Sym.getName())
Name = *NameOrErr;
else
return NameOrErr.takeError();
- if (Align) {
+ if (Alignment) {
// This symbol has an alignment requirement.
- uint64_t AlignOffset = OffsetToAlignment((uint64_t)Addr, Align);
+ uint64_t AlignOffset =
+ offsetToAlignment((uint64_t)Addr, Align(Alignment));
Addr += AlignOffset;
Offset += AlignOffset;
}
@@ -777,9 +789,10 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
// anyway, so we should guarantee that the alignment is always at least 1.
Alignment = std::max(1u, Alignment);
- StringRef Name;
- if (auto EC = Section.getName(Name))
- return errorCodeToError(EC);
+ Expected<StringRef> NameOrErr = Section.getName();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ StringRef Name = *NameOrErr;
StubBufSize = computeSectionStubBufSize(Obj, Section);
@@ -917,7 +930,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr,
unsigned AbiVariant) {
- if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) {
+ if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be ||
+ Arch == Triple::aarch64_32) {
// This stub has to be able to access the full address space,
// since symbol lookup won't necessarily find a handy, in-range,
// PLT stub for functions which could be anywhere.
@@ -1175,17 +1189,15 @@ Error RuntimeDyldImpl::resolveExternalSymbols() {
}
void RuntimeDyldImpl::finalizeAsync(
- std::unique_ptr<RuntimeDyldImpl> This, std::function<void(Error)> OnEmitted,
+ std::unique_ptr<RuntimeDyldImpl> This,
+ unique_function<void(Error)> OnEmitted,
std::unique_ptr<MemoryBuffer> UnderlyingBuffer) {
- // FIXME: Move-capture OnRelocsApplied and UnderlyingBuffer once we have
- // c++14.
- auto SharedUnderlyingBuffer =
- std::shared_ptr<MemoryBuffer>(std::move(UnderlyingBuffer));
auto SharedThis = std::shared_ptr<RuntimeDyldImpl>(std::move(This));
auto PostResolveContinuation =
- [SharedThis, OnEmitted, SharedUnderlyingBuffer](
- Expected<JITSymbolResolver::LookupResult> Result) {
+ [SharedThis, OnEmitted = std::move(OnEmitted),
+ UnderlyingBuffer = std::move(UnderlyingBuffer)](
+ Expected<JITSymbolResolver::LookupResult> Result) mutable {
if (!Result) {
OnEmitted(Result.takeError());
return;
@@ -1219,7 +1231,7 @@ void RuntimeDyldImpl::finalizeAsync(
}
if (!Symbols.empty()) {
- SharedThis->Resolver.lookup(Symbols, PostResolveContinuation);
+ SharedThis->Resolver.lookup(Symbols, std::move(PostResolveContinuation));
} else
PostResolveContinuation(std::map<StringRef, JITEvaluatedSymbol>());
}
@@ -1395,11 +1407,11 @@ void jitLinkForORC(object::ObjectFile &Obj,
std::unique_ptr<MemoryBuffer> UnderlyingBuffer,
RuntimeDyld::MemoryManager &MemMgr,
JITSymbolResolver &Resolver, bool ProcessAllSections,
- std::function<Error(
+ unique_function<Error(
std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObj,
std::map<StringRef, JITEvaluatedSymbol>)>
OnLoaded,
- std::function<void(Error)> OnEmitted) {
+ unique_function<void(Error)> OnEmitted) {
RuntimeDyld RTDyld(MemMgr, Resolver);
RTDyld.setProcessAllSections(ProcessAllSections);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index d4e3b0ba7670..27a7690db34f 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -50,18 +50,18 @@ llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch,
switch (Arch) {
default: llvm_unreachable("Unsupported target for RuntimeDyldCOFF.");
case Triple::x86:
- return make_unique<RuntimeDyldCOFFI386>(MemMgr, Resolver);
+ return std::make_unique<RuntimeDyldCOFFI386>(MemMgr, Resolver);
case Triple::thumb:
- return make_unique<RuntimeDyldCOFFThumb>(MemMgr, Resolver);
+ return std::make_unique<RuntimeDyldCOFFThumb>(MemMgr, Resolver);
case Triple::x86_64:
- return make_unique<RuntimeDyldCOFFX86_64>(MemMgr, Resolver);
+ return std::make_unique<RuntimeDyldCOFFX86_64>(MemMgr, Resolver);
}
}
std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) {
if (auto ObjSectionToIDOrErr = loadObjectImpl(O)) {
- return llvm::make_unique<LoadedCOFFObjectInfo>(*this, *ObjSectionToIDOrErr);
+ return std::make_unique<LoadedCOFFObjectInfo>(*this, *ObjSectionToIDOrErr);
} else {
HasError = true;
raw_string_ostream ErrStream(ErrorStr);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index ec31ea4e573c..b9c5a12e08d8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -851,7 +851,7 @@ RuntimeDyldChecker::RuntimeDyldChecker(
GetGOTInfoFunction GetGOTInfo, support::endianness Endianness,
MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
raw_ostream &ErrStream)
- : Impl(::llvm::make_unique<RuntimeDyldCheckerImpl>(
+ : Impl(::std::make_unique<RuntimeDyldCheckerImpl>(
std::move(IsSymbolValid), std::move(GetSymbolInfo),
std::move(GetSectionInfo), std::move(GetStubInfo),
std::move(GetGOTInfo), Endianness, Disassembler, InstPrinter,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 60041a45e2b8..440ab4174a56 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -160,9 +160,13 @@ createRTDyldELFObject(MemoryBufferRef Buffer, const ObjectFile &SourceObject,
// Iterate over all sections in the object.
auto SI = SourceObject.section_begin();
for (const auto &Sec : Obj->sections()) {
- StringRef SectionName;
- Sec.getName(SectionName);
- if (SectionName != "") {
+ Expected<StringRef> NameOrErr = Sec.getName();
+ if (!NameOrErr) {
+ consumeError(NameOrErr.takeError());
+ continue;
+ }
+
+ if (*NameOrErr != "") {
DataRefImpl ShdrRef = Sec.getRawDataRefImpl();
Elf_Shdr *shdr = const_cast<Elf_Shdr *>(
reinterpret_cast<const Elf_Shdr *>(ShdrRef.p));
@@ -238,19 +242,19 @@ llvm::RuntimeDyldELF::create(Triple::ArchType Arch,
JITSymbolResolver &Resolver) {
switch (Arch) {
default:
- return make_unique<RuntimeDyldELF>(MemMgr, Resolver);
+ return std::make_unique<RuntimeDyldELF>(MemMgr, Resolver);
case Triple::mips:
case Triple::mipsel:
case Triple::mips64:
case Triple::mips64el:
- return make_unique<RuntimeDyldELFMips>(MemMgr, Resolver);
+ return std::make_unique<RuntimeDyldELFMips>(MemMgr, Resolver);
}
}
std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
RuntimeDyldELF::loadObject(const object::ObjectFile &O) {
if (auto ObjSectionToIDOrErr = loadObjectImpl(O))
- return llvm::make_unique<LoadedELFObjectInfo>(*this, *ObjSectionToIDOrErr);
+ return std::make_unique<LoadedELFObjectInfo>(*this, *ObjSectionToIDOrErr);
else {
HasError = true;
raw_string_ostream ErrStream(ErrorStr);
@@ -567,10 +571,11 @@ Error RuntimeDyldELF::findPPC64TOCSection(const ELFObjectFileBase &Obj,
// The TOC consists of sections .got, .toc, .tocbss, .plt in that
// order. The TOC starts where the first of these sections starts.
- for (auto &Section: Obj.sections()) {
- StringRef SectionName;
- if (auto EC = Section.getName(SectionName))
- return errorCodeToError(EC);
+ for (auto &Section : Obj.sections()) {
+ Expected<StringRef> NameOrErr = Section.getName();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ StringRef SectionName = *NameOrErr;
if (SectionName == ".got"
|| SectionName == ".toc"
@@ -601,13 +606,19 @@ Error RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj,
// .opd entries
for (section_iterator si = Obj.section_begin(), se = Obj.section_end();
si != se; ++si) {
- section_iterator RelSecI = si->getRelocatedSection();
+
+ Expected<section_iterator> RelSecOrErr = si->getRelocatedSection();
+ if (!RelSecOrErr)
+ report_fatal_error(toString(RelSecOrErr.takeError()));
+
+ section_iterator RelSecI = *RelSecOrErr;
if (RelSecI == Obj.section_end())
continue;
- StringRef RelSectionName;
- if (auto EC = RelSecI->getName(RelSectionName))
- return errorCodeToError(EC);
+ Expected<StringRef> NameOrErr = RelSecI->getName();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ StringRef RelSectionName = *NameOrErr;
if (RelSectionName != ".opd")
continue;
@@ -1865,7 +1876,12 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
SI != SE; ++SI) {
if (SI->relocation_begin() != SI->relocation_end()) {
- section_iterator RelocatedSection = SI->getRelocatedSection();
+ Expected<section_iterator> RelSecOrErr = SI->getRelocatedSection();
+ if (!RelSecOrErr)
+ return make_error<RuntimeDyldError>(
+ toString(RelSecOrErr.takeError()));
+
+ section_iterator RelocatedSection = *RelSecOrErr;
ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection);
assert (i != SectionMap.end());
SectionToGOTMap[i->second] = GOTSectionID;
@@ -1879,8 +1895,14 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
ObjSectionToIDMap::iterator i, e;
for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
const SectionRef &Section = i->first;
+
StringRef Name;
- Section.getName(Name);
+ Expected<StringRef> NameOrErr = Section.getName();
+ if (NameOrErr)
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
if (Name == ".eh_frame") {
UnregisteredEHFrameSections.push_back(i->second);
break;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 68b3468fbc9d..cec7b92b8c48 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -549,7 +549,7 @@ public:
void resolveLocalRelocations();
static void finalizeAsync(std::unique_ptr<RuntimeDyldImpl> This,
- std::function<void(Error)> OnEmitted,
+ unique_function<void(Error)> OnEmitted,
std::unique_ptr<MemoryBuffer> UnderlyingBuffer);
void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 202c3ca1c507..9ca76602ea18 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -233,7 +233,10 @@ RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj,
for (const auto &Section : Obj.sections()) {
StringRef Name;
- Section.getName(Name);
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
// Force emission of the __text, __eh_frame, and __gcc_except_tab sections
// if they're present. Otherwise call down to the impl to handle other
@@ -351,20 +354,22 @@ RuntimeDyldMachO::create(Triple::ArchType Arch,
llvm_unreachable("Unsupported target for RuntimeDyldMachO.");
break;
case Triple::arm:
- return make_unique<RuntimeDyldMachOARM>(MemMgr, Resolver);
+ return std::make_unique<RuntimeDyldMachOARM>(MemMgr, Resolver);
case Triple::aarch64:
- return make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver);
+ return std::make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver);
+ case Triple::aarch64_32:
+ return std::make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver);
case Triple::x86:
- return make_unique<RuntimeDyldMachOI386>(MemMgr, Resolver);
+ return std::make_unique<RuntimeDyldMachOI386>(MemMgr, Resolver);
case Triple::x86_64:
- return make_unique<RuntimeDyldMachOX86_64>(MemMgr, Resolver);
+ return std::make_unique<RuntimeDyldMachOX86_64>(MemMgr, Resolver);
}
}
std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
RuntimeDyldMachO::loadObject(const object::ObjectFile &O) {
if (auto ObjSectionToIDOrErr = loadObjectImpl(O))
- return llvm::make_unique<LoadedMachOObjectInfo>(*this,
+ return std::make_unique<LoadedMachOObjectInfo>(*this,
*ObjSectionToIDOrErr);
else {
HasError = true;
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index d2d74534cf90..dc4af08583de 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -284,14 +284,14 @@ public:
// Look for and record the EH frame section IDs.
for (const auto &SectionPair : SectionMap) {
const object::SectionRef &Section = SectionPair.first;
- StringRef Name;
- if (auto EC = Section.getName(Name))
- return errorCodeToError(EC);
+ Expected<StringRef> NameOrErr = Section.getName();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
// Note unwind info is stored in .pdata but often points to .xdata
// with an IMAGE_REL_AMD64_ADDR32NB relocation. Using a memory manager
// that keeps sections ordered in relation to __ImageBase is necessary.
- if (Name == ".pdata")
+ if ((*NameOrErr) == ".pdata")
UnregisteredEHFrameSections.push_back(SectionPair.second);
}
return Error::success();
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 3bec8b979f7d..a76958a9e2c2 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -289,7 +289,10 @@ public:
Error finalizeSection(const ObjectFile &Obj, unsigned SectionID,
const SectionRef &Section) {
StringRef Name;
- Section.getName(Name);
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
if (Name == "__nl_symbol_ptr")
return populateIndirectSymbolPointersSection(cast<MachOObjectFile>(Obj),
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index f0de27ba14bb..523deb29b723 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -128,7 +128,10 @@ public:
Error finalizeSection(const ObjectFile &Obj, unsigned SectionID,
const SectionRef &Section) {
StringRef Name;
- Section.getName(Name);
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
if (Name == "__jump_table")
return populateJumpTable(cast<MachOObjectFile>(Obj), Section, SectionID);
diff --git a/lib/FuzzMutate/FuzzerCLI.cpp b/lib/FuzzMutate/FuzzerCLI.cpp
index 63d31c035390..f2368ea7f26b 100644
--- a/lib/FuzzMutate/FuzzerCLI.cpp
+++ b/lib/FuzzMutate/FuzzerCLI.cpp
@@ -171,7 +171,7 @@ std::unique_ptr<Module> llvm::parseModule(
if (Size <= 1)
// We get bogus data given an empty corpus - just create a new module.
- return llvm::make_unique<Module>("M", Context);
+ return std::make_unique<Module>("M", Context);
auto Buffer = MemoryBuffer::getMemBuffer(
StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index eb5760daecb3..b0c26e0ecaf5 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -352,6 +352,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::PreserveAll: Out << "preserve_allcc"; break;
case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break;
case CallingConv::GHC: Out << "ghccc"; break;
+ case CallingConv::Tail: Out << "tailcc"; break;
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
@@ -835,7 +836,7 @@ SlotTracker *ModuleSlotTracker::getMachine() {
ShouldCreateStorage = false;
MachineStorage =
- llvm::make_unique<SlotTracker>(M, ShouldInitializeAllMetadata);
+ std::make_unique<SlotTracker>(M, ShouldInitializeAllMetadata);
Machine = MachineStorage.get();
return Machine;
}
@@ -2312,7 +2313,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
if (const MDNode *N = dyn_cast<MDNode>(MD)) {
std::unique_ptr<SlotTracker> MachineStorage;
if (!Machine) {
- MachineStorage = make_unique<SlotTracker>(Context);
+ MachineStorage = std::make_unique<SlotTracker>(Context);
Machine = MachineStorage.get();
}
int Slot = Machine->getMetadataSlot(N);
@@ -2950,7 +2951,7 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
FunctionSummary::FFlags FFlags = FS->fflags();
if (FFlags.ReadNone | FFlags.ReadOnly | FFlags.NoRecurse |
- FFlags.ReturnDoesNotAlias) {
+ FFlags.ReturnDoesNotAlias | FFlags.NoInline) {
Out << ", funcFlags: (";
Out << "readNone: " << FFlags.ReadNone;
Out << ", readOnly: " << FFlags.ReadOnly;
@@ -3553,6 +3554,10 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) {
if (Arg->hasName()) {
Out << ' ';
PrintLLVMName(Out, Arg);
+ } else {
+ int Slot = Machine.getLocalSlot(Arg);
+ assert(Slot != -1 && "expect argument in function here");
+ Out << " %" << Slot;
}
}
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index f989fa3b910e..15e488bbb13b 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -159,7 +159,7 @@ public:
};
class TypeAttributeImpl : public EnumAttributeImpl {
- virtual void anchor();
+ void anchor() override;
Type *Ty;
@@ -208,8 +208,8 @@ public:
Attribute getAttribute(Attribute::AttrKind Kind) const;
Attribute getAttribute(StringRef Kind) const;
- unsigned getAlignment() const;
- unsigned getStackAlignment() const;
+ MaybeAlign getAlignment() const;
+ MaybeAlign getStackAlignment() const;
uint64_t getDereferenceableBytes() const;
uint64_t getDereferenceableOrNullBytes() const;
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index bb90bcd7dd74..cc370e628e9a 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -142,17 +142,14 @@ Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
return Attribute(PA);
}
-Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) {
- assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
- assert(Align <= 0x40000000 && "Alignment too large.");
- return get(Context, Alignment, Align);
+Attribute Attribute::getWithAlignment(LLVMContext &Context, Align A) {
+ assert(A <= 0x40000000 && "Alignment too large.");
+ return get(Context, Alignment, A.value());
}
-Attribute Attribute::getWithStackAlignment(LLVMContext &Context,
- uint64_t Align) {
- assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
- assert(Align <= 0x100 && "Alignment too large.");
- return get(Context, StackAlignment, Align);
+Attribute Attribute::getWithStackAlignment(LLVMContext &Context, Align A) {
+ assert(A <= 0x100 && "Alignment too large.");
+ return get(Context, StackAlignment, A.value());
}
Attribute Attribute::getWithDereferenceableBytes(LLVMContext &Context,
@@ -244,16 +241,16 @@ bool Attribute::hasAttribute(StringRef Kind) const {
return pImpl && pImpl->hasAttribute(Kind);
}
-unsigned Attribute::getAlignment() const {
+MaybeAlign Attribute::getAlignment() const {
assert(hasAttribute(Attribute::Alignment) &&
"Trying to get alignment from non-alignment attribute!");
- return pImpl->getValueAsInt();
+ return MaybeAlign(pImpl->getValueAsInt());
}
-unsigned Attribute::getStackAlignment() const {
+MaybeAlign Attribute::getStackAlignment() const {
assert(hasAttribute(Attribute::StackAlignment) &&
"Trying to get alignment from non-alignment attribute!");
- return pImpl->getValueAsInt();
+ return MaybeAlign(pImpl->getValueAsInt());
}
uint64_t Attribute::getDereferenceableBytes() const {
@@ -670,12 +667,12 @@ Attribute AttributeSet::getAttribute(StringRef Kind) const {
return SetNode ? SetNode->getAttribute(Kind) : Attribute();
}
-unsigned AttributeSet::getAlignment() const {
- return SetNode ? SetNode->getAlignment() : 0;
+MaybeAlign AttributeSet::getAlignment() const {
+ return SetNode ? SetNode->getAlignment() : None;
}
-unsigned AttributeSet::getStackAlignment() const {
- return SetNode ? SetNode->getStackAlignment() : 0;
+MaybeAlign AttributeSet::getStackAlignment() const {
+ return SetNode ? SetNode->getStackAlignment() : None;
}
uint64_t AttributeSet::getDereferenceableBytes() const {
@@ -782,10 +779,12 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
Attr = Attribute::getWithByValType(C, B.getByValType());
break;
case Attribute::Alignment:
- Attr = Attribute::getWithAlignment(C, B.getAlignment());
+ assert(B.getAlignment() && "Alignment must be set");
+ Attr = Attribute::getWithAlignment(C, *B.getAlignment());
break;
case Attribute::StackAlignment:
- Attr = Attribute::getWithStackAlignment(C, B.getStackAlignment());
+ assert(B.getStackAlignment() && "StackAlignment must be set");
+ Attr = Attribute::getWithStackAlignment(C, *B.getStackAlignment());
break;
case Attribute::Dereferenceable:
Attr = Attribute::getWithDereferenceableBytes(
@@ -836,18 +835,18 @@ Attribute AttributeSetNode::getAttribute(StringRef Kind) const {
return {};
}
-unsigned AttributeSetNode::getAlignment() const {
+MaybeAlign AttributeSetNode::getAlignment() const {
for (const auto I : *this)
if (I.hasAttribute(Attribute::Alignment))
return I.getAlignment();
- return 0;
+ return None;
}
-unsigned AttributeSetNode::getStackAlignment() const {
+MaybeAlign AttributeSetNode::getStackAlignment() const {
for (const auto I : *this)
if (I.hasAttribute(Attribute::StackAlignment))
return I.getStackAlignment();
- return 0;
+ return None;
}
Type *AttributeSetNode::getByValType() const {
@@ -1164,8 +1163,8 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
#ifndef NDEBUG
// FIXME it is not obvious how this should work for alignment. For now, say
// we can't change a known alignment.
- unsigned OldAlign = getAttributes(Index).getAlignment();
- unsigned NewAlign = B.getAlignment();
+ const MaybeAlign OldAlign = getAttributes(Index).getAlignment();
+ const MaybeAlign NewAlign = B.getAlignment();
assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
"Attempt to change alignment!");
#endif
@@ -1349,11 +1348,11 @@ Attribute AttributeList::getAttribute(unsigned Index, StringRef Kind) const {
return getAttributes(Index).getAttribute(Kind);
}
-unsigned AttributeList::getRetAlignment() const {
+MaybeAlign AttributeList::getRetAlignment() const {
return getAttributes(ReturnIndex).getAlignment();
}
-unsigned AttributeList::getParamAlignment(unsigned ArgNo) const {
+MaybeAlign AttributeList::getParamAlignment(unsigned ArgNo) const {
return getAttributes(ArgNo + FirstArgIndex).getAlignment();
}
@@ -1361,8 +1360,7 @@ Type *AttributeList::getParamByValType(unsigned Index) const {
return getAttributes(Index+FirstArgIndex).getByValType();
}
-
-unsigned AttributeList::getStackAlignment(unsigned Index) const {
+MaybeAlign AttributeList::getStackAlignment(unsigned Index) const {
return getAttributes(Index).getStackAlignment();
}
@@ -1438,7 +1436,9 @@ AttrBuilder::AttrBuilder(AttributeSet AS) {
void AttrBuilder::clear() {
Attrs.reset();
TargetDepAttrs.clear();
- Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
+ Alignment.reset();
+ StackAlignment.reset();
+ DerefBytes = DerefOrNullBytes = 0;
AllocSizeArgs = 0;
ByValType = nullptr;
}
@@ -1486,9 +1486,9 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
Attrs[Val] = false;
if (Val == Attribute::Alignment)
- Alignment = 0;
+ Alignment.reset();
else if (Val == Attribute::StackAlignment)
- StackAlignment = 0;
+ StackAlignment.reset();
else if (Val == Attribute::ByVal)
ByValType = nullptr;
else if (Val == Attribute::Dereferenceable)
@@ -1517,23 +1517,23 @@ std::pair<unsigned, Optional<unsigned>> AttrBuilder::getAllocSizeArgs() const {
return unpackAllocSizeArgs(AllocSizeArgs);
}
-AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) {
- if (Align == 0) return *this;
+AttrBuilder &AttrBuilder::addAlignmentAttr(MaybeAlign Align) {
+ if (!Align)
+ return *this;
- assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
- assert(Align <= 0x40000000 && "Alignment too large.");
+ assert(*Align <= 0x40000000 && "Alignment too large.");
Attrs[Attribute::Alignment] = true;
Alignment = Align;
return *this;
}
-AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) {
+AttrBuilder &AttrBuilder::addStackAlignmentAttr(MaybeAlign Align) {
// Default alignment, allow the target to define how to align it.
- if (Align == 0) return *this;
+ if (!Align)
+ return *this;
- assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
- assert(Align <= 0x100 && "Alignment too large.");
+ assert(*Align <= 0x100 && "Alignment too large.");
Attrs[Attribute::StackAlignment] = true;
StackAlignment = Align;
@@ -1610,10 +1610,10 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) {
// FIXME: What if both have alignments, but they don't match?!
if (B.Alignment)
- Alignment = 0;
+ Alignment.reset();
if (B.StackAlignment)
- StackAlignment = 0;
+ StackAlignment.reset();
if (B.DerefBytes)
DerefBytes = 0;
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index a2d820352825..79f580d0e14d 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -490,12 +490,6 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
- // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.".
- if (F->getName() == "clang.arc.use") {
- NewFn = nullptr;
- return true;
- }
-
// Quickly eliminate it, if it's not a candidate.
StringRef Name = F->getName();
if (Name.size() <= 8 || !Name.startswith("llvm."))
@@ -528,7 +522,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
F->arg_begin()->getType());
return true;
}
- Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
+ static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vldRegex.match(Name)) {
auto fArgs = F->getFunctionType()->params();
SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
@@ -539,7 +533,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
"llvm." + Name + ".p0i8", F->getParent());
return true;
}
- Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
+ static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vstRegex.match(Name)) {
static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
Intrinsic::arm_neon_vst2,
@@ -604,7 +598,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
case 'e': {
SmallVector<StringRef, 2> Groups;
- Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
+ static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
if (R.match(Name, &Groups)) {
Intrinsic::ID ID = Intrinsic::not_intrinsic;
if (Groups[1] == "fadd")
@@ -789,6 +783,19 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
+ case 'p':
+ if (Name == "prefetch") {
+ // Handle address space overloading.
+ Type *Tys[] = {F->arg_begin()->getType()};
+ if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
+ rename(F);
+ NewFn =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
+ return true;
+ }
+ }
+ break;
+
case 's':
if (Name == "stackprotectorcheck") {
NewFn = nullptr;
@@ -1648,14 +1655,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Get the Function's name.
StringRef Name = F->getName();
- // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped
- // from upgrader because the optimizer now only recognizes intrinsics for
- // ARC runtime calls.
- if (Name == "clang.arc.use") {
- CI->eraseFromParent();
- return;
- }
-
assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
Name = Name.substr(5);
@@ -3831,7 +3830,9 @@ bool llvm::UpgradeDebugInfo(Module &M) {
return Modified;
}
-bool llvm::UpgradeRetainReleaseMarker(Module &M) {
+/// This checks for objc retain release marker which should be upgraded. It
+/// returns true if module is modified.
+static bool UpgradeRetainReleaseMarker(Module &M) {
bool Changed = false;
const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
@@ -3855,6 +3856,106 @@ bool llvm::UpgradeRetainReleaseMarker(Module &M) {
return Changed;
}
+void llvm::UpgradeARCRuntime(Module &M) {
+ // This lambda converts normal function calls to ARC runtime functions to
+ // intrinsic calls.
+ auto UpgradeToIntrinsic = [&](const char *OldFunc,
+ llvm::Intrinsic::ID IntrinsicFunc) {
+ Function *Fn = M.getFunction(OldFunc);
+
+ if (!Fn)
+ return;
+
+ Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
+
+ for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) {
+ CallInst *CI = dyn_cast<CallInst>(*I++);
+ if (!CI || CI->getCalledFunction() != Fn)
+ continue;
+
+ IRBuilder<> Builder(CI->getParent(), CI->getIterator());
+ FunctionType *NewFuncTy = NewFn->getFunctionType();
+ SmallVector<Value *, 2> Args;
+
+ for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+ Value *Arg = CI->getArgOperand(I);
+ // Bitcast argument to the parameter type of the new function if it's
+ // not a variadic argument.
+ if (I < NewFuncTy->getNumParams())
+ Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
+ Args.push_back(Arg);
+ }
+
+ // Create a call instruction that calls the new function.
+ CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
+ NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
+ NewCall->setName(CI->getName());
+
+ // Bitcast the return value back to the type of the old call.
+ Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
+
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewRetVal);
+ CI->eraseFromParent();
+ }
+
+ if (Fn->use_empty())
+ Fn->eraseFromParent();
+ };
+
+ // Unconditionally convert a call to "clang.arc.use" to a call to
+ // "llvm.objc.clang.arc.use".
+ UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
+
+ // Upgrade the retain release marker. If there is no need to upgrade
+ // the marker, that means either the module is already new enough to contain
+ // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
+ if (!UpgradeRetainReleaseMarker(M))
+ return;
+
+ std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
+ {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
+ {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
+ {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
+ {"objc_autoreleaseReturnValue",
+ llvm::Intrinsic::objc_autoreleaseReturnValue},
+ {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
+ {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
+ {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
+ {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
+ {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
+ {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
+ {"objc_release", llvm::Intrinsic::objc_release},
+ {"objc_retain", llvm::Intrinsic::objc_retain},
+ {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
+ {"objc_retainAutoreleaseReturnValue",
+ llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
+ {"objc_retainAutoreleasedReturnValue",
+ llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
+ {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
+ {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
+ {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
+ {"objc_unsafeClaimAutoreleasedReturnValue",
+ llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
+ {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
+ {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
+ {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
+ {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
+ {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
+ {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
+ {"objc_arc_annotation_topdown_bbstart",
+ llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
+ {"objc_arc_annotation_topdown_bbend",
+ llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
+ {"objc_arc_annotation_bottomup_bbstart",
+ llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
+ {"objc_arc_annotation_bottomup_bbend",
+ llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
+
+ for (auto &I : RuntimeFuncs)
+ UpgradeToIntrinsic(I.first, I.second);
+}
+
bool llvm::UpgradeModuleFlags(Module &M) {
NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
if (!ModFlags)
@@ -4012,3 +4113,23 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
return MDTuple::get(T->getContext(), Ops);
}
+
+std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
+ std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
+
+ // If X86, and the datalayout matches the expected format, add pointer size
+ // address spaces to the datalayout.
+ Triple::ArchType Arch = Triple(TT).getArch();
+ if ((Arch != llvm::Triple::x86 && Arch != llvm::Triple::x86_64) ||
+ DL.contains(AddrSpaces))
+ return DL;
+
+ SmallVector<StringRef, 4> Groups;
+ Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
+ if (!R.match(DL, &Groups))
+ return DL;
+
+ SmallString<1024> Buf;
+ std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str();
+ return Res;
+}
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 34410712645d..bdee6990f932 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -107,6 +107,13 @@ BasicBlock::instructionsWithoutDebug() {
return make_filter_range(*this, Fn);
}
+filter_iterator<BasicBlock::const_iterator,
+ std::function<bool(const Instruction &)>>::difference_type
+BasicBlock::sizeWithoutDebug() const {
+ return std::distance(instructionsWithoutDebug().begin(),
+ instructionsWithoutDebug().end());
+}
+
void BasicBlock::removeFromParent() {
getParent()->getBasicBlockList().remove(getIterator());
}
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 835fbb3443b8..71fa795ec294 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -746,7 +746,7 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
ConstantInt::get(Ty, i));
Constant *V2Element = ConstantExpr::getExtractElement(V2,
ConstantInt::get(Ty, i));
- Constant *Cond = dyn_cast<Constant>(CondV->getOperand(i));
+ auto *Cond = cast<Constant>(CondV->getOperand(i));
if (V1Element == V2Element) {
V = V1Element;
} else if (isa<UndefValue>(Cond)) {
@@ -787,12 +787,9 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
Constant *Idx) {
- if (isa<UndefValue>(Val)) // ee(undef, x) -> undef
- return UndefValue::get(Val->getType()->getVectorElementType());
- if (Val->isNullValue()) // ee(zero, x) -> zero
- return Constant::getNullValue(Val->getType()->getVectorElementType());
- // ee({w,x,y,z}, undef) -> undef
- if (isa<UndefValue>(Idx))
+ // extractelt undef, C -> undef
+ // extractelt C, undef -> undef
+ if (isa<UndefValue>(Val) || isa<UndefValue>(Idx))
return UndefValue::get(Val->getType()->getVectorElementType());
if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
@@ -1125,7 +1122,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
isa<GlobalValue>(CE1->getOperand(0))) {
GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
- unsigned GVAlign;
+ MaybeAlign GVAlign;
if (Module *TheModule = GV->getParent()) {
GVAlign = GV->getPointerAlignment(TheModule->getDataLayout());
@@ -1139,19 +1136,19 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
// increased code size (see https://reviews.llvm.org/D55115)
// FIXME: This code should be deleted once existing targets have
// appropriate defaults
- if (GVAlign == 0U && isa<Function>(GV))
- GVAlign = 4U;
+ if (!GVAlign && isa<Function>(GV))
+ GVAlign = Align(4);
} else if (isa<Function>(GV)) {
// Without a datalayout we have to assume the worst case: that the
// function pointer isn't aligned at all.
- GVAlign = 0U;
+ GVAlign = llvm::None;
} else {
- GVAlign = GV->getAlignment();
+ GVAlign = MaybeAlign(GV->getAlignment());
}
- if (GVAlign > 1) {
+ if (GVAlign && *GVAlign > 1) {
unsigned DstWidth = CI2->getType()->getBitWidth();
- unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign));
+ unsigned SrcWidth = std::min(DstWidth, Log2(*GVAlign));
APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
// If checking bits we know are clear, return zero.
diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp
index 920fdc01a14f..642bf0f39342 100644
--- a/lib/IR/ConstantRange.cpp
+++ b/lib/IR/ConstantRange.cpp
@@ -269,6 +269,27 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
return makeExactMulNSWRegion(Other.getSignedMin())
.intersectWith(makeExactMulNSWRegion(Other.getSignedMax()));
+
+ case Instruction::Shl: {
+ // For given range of shift amounts, if we ignore all illegal shift amounts
+ // (that always produce poison), what shift amount range is left?
+ ConstantRange ShAmt = Other.intersectWith(
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, (BitWidth - 1) + 1)));
+ if (ShAmt.isEmptySet()) {
+ // If the entire range of shift amounts is already poison-producing,
+ // then we can freely add more poison-producing flags ontop of that.
+ return getFull(BitWidth);
+ }
+ // There are some legal shift amounts, we can compute conservatively-correct
+ // range of no-wrap inputs. Note that by now we have clamped the ShAmtUMax
+ // to be at most bitwidth-1, which results in most conservative range.
+ APInt ShAmtUMax = ShAmt.getUnsignedMax();
+ if (Unsigned)
+ return getNonEmpty(APInt::getNullValue(BitWidth),
+ APInt::getMaxValue(BitWidth).lshr(ShAmtUMax) + 1);
+ return getNonEmpty(APInt::getSignedMinValue(BitWidth).ashr(ShAmtUMax),
+ APInt::getSignedMaxValue(BitWidth).ashr(ShAmtUMax) + 1);
+ }
}
}
@@ -815,14 +836,55 @@ ConstantRange::add(const ConstantRange &Other) const {
return X;
}
-ConstantRange ConstantRange::addWithNoSignedWrap(const APInt &Other) const {
- // Calculate the subset of this range such that "X + Other" is
- // guaranteed not to wrap (overflow) for all X in this subset.
- auto NSWRange = ConstantRange::makeExactNoWrapRegion(
- BinaryOperator::Add, Other, OverflowingBinaryOperator::NoSignedWrap);
- auto NSWConstrainedRange = intersectWith(NSWRange);
+ConstantRange ConstantRange::addWithNoWrap(const ConstantRange &Other,
+ unsigned NoWrapKind,
+ PreferredRangeType RangeType) const {
+ // Calculate the range for "X + Y" which is guaranteed not to wrap(overflow).
+ // (X is from this, and Y is from Other)
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+ if (isFullSet() && Other.isFullSet())
+ return getFull();
+
+ using OBO = OverflowingBinaryOperator;
+ ConstantRange Result = add(Other);
+
+ auto addWithNoUnsignedWrap = [this](const ConstantRange &Other) {
+ APInt LMin = getUnsignedMin(), LMax = getUnsignedMax();
+ APInt RMin = Other.getUnsignedMin(), RMax = Other.getUnsignedMax();
+ bool Overflow;
+ APInt NewMin = LMin.uadd_ov(RMin, Overflow);
+ if (Overflow)
+ return getEmpty();
+ APInt NewMax = LMax.uadd_sat(RMax);
+ return getNonEmpty(std::move(NewMin), std::move(NewMax) + 1);
+ };
+
+ auto addWithNoSignedWrap = [this](const ConstantRange &Other) {
+ APInt LMin = getSignedMin(), LMax = getSignedMax();
+ APInt RMin = Other.getSignedMin(), RMax = Other.getSignedMax();
+ if (LMin.isNonNegative()) {
+ bool Overflow;
+ APInt Temp = LMin.sadd_ov(RMin, Overflow);
+ if (Overflow)
+ return getEmpty();
+ }
+ if (LMax.isNegative()) {
+ bool Overflow;
+ APInt Temp = LMax.sadd_ov(RMax, Overflow);
+ if (Overflow)
+ return getEmpty();
+ }
+ APInt NewMin = LMin.sadd_sat(RMin);
+ APInt NewMax = LMax.sadd_sat(RMax);
+ return getNonEmpty(std::move(NewMin), std::move(NewMax) + 1);
+ };
- return NSWConstrainedRange.add(ConstantRange(Other));
+ if (NoWrapKind & OBO::NoSignedWrap)
+ Result = Result.intersectWith(addWithNoSignedWrap(Other), RangeType);
+ if (NoWrapKind & OBO::NoUnsignedWrap)
+ Result = Result.intersectWith(addWithNoUnsignedWrap(Other), RangeType);
+ return Result;
}
ConstantRange
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index ff551da29ae6..f792f01efc1a 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
@@ -250,6 +251,20 @@ bool Constant::isNaN() const {
return true;
}
+bool Constant::isElementWiseEqual(Value *Y) const {
+ // Are they fully identical?
+ if (this == Y)
+ return true;
+ // They may still be identical element-wise (if they have `undef`s).
+ auto *Cy = dyn_cast<Constant>(Y);
+ if (!Cy)
+ return false;
+ return PatternMatch::match(ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_EQ,
+ const_cast<Constant *>(this),
+ Cy),
+ PatternMatch::m_One());
+}
+
bool Constant::containsUndefElement() const {
if (!getType()->isVectorTy())
return false;
@@ -502,22 +517,32 @@ bool Constant::needsRelocation() const {
if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
return BA->getFunction()->needsRelocation();
- // While raw uses of blockaddress need to be relocated, differences between
- // two of them don't when they are for labels in the same function. This is a
- // common idiom when creating a table for the indirect goto extension, so we
- // handle it efficiently here.
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(this))
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(this)) {
if (CE->getOpcode() == Instruction::Sub) {
ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
if (LHS && RHS && LHS->getOpcode() == Instruction::PtrToInt &&
- RHS->getOpcode() == Instruction::PtrToInt &&
- isa<BlockAddress>(LHS->getOperand(0)) &&
- isa<BlockAddress>(RHS->getOperand(0)) &&
- cast<BlockAddress>(LHS->getOperand(0))->getFunction() ==
- cast<BlockAddress>(RHS->getOperand(0))->getFunction())
- return false;
+ RHS->getOpcode() == Instruction::PtrToInt) {
+ Constant *LHSOp0 = LHS->getOperand(0);
+ Constant *RHSOp0 = RHS->getOperand(0);
+
+ // While raw uses of blockaddress need to be relocated, differences
+ // between two of them don't when they are for labels in the same
+ // function. This is a common idiom when creating a table for the
+ // indirect goto extension, so we handle it efficiently here.
+ if (isa<BlockAddress>(LHSOp0) && isa<BlockAddress>(RHSOp0) &&
+ cast<BlockAddress>(LHSOp0)->getFunction() ==
+ cast<BlockAddress>(RHSOp0)->getFunction())
+ return false;
+
+ // Relative pointers do not need to be dynamically relocated.
+ if (auto *LHSGV = dyn_cast<GlobalValue>(LHSOp0->stripPointerCasts()))
+ if (auto *RHSGV = dyn_cast<GlobalValue>(RHSOp0->stripPointerCasts()))
+ if (LHSGV->isDSOLocal() && RHSGV->isDSOLocal())
+ return false;
+ }
}
+ }
bool Result = false;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
@@ -563,13 +588,10 @@ void Constant::removeDeadConstantUsers() const {
}
// If the constant was dead, then the iterator is invalidated.
- if (LastNonDeadUser == E) {
+ if (LastNonDeadUser == E)
I = user_begin();
- if (I == E) break;
- } else {
- I = LastNonDeadUser;
- ++I;
- }
+ else
+ I = std::next(LastNonDeadUser);
}
}
diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h
index 7614dab9f15d..1ec9087551f8 100644
--- a/lib/IR/ConstantsContext.h
+++ b/lib/IR/ConstantsContext.h
@@ -480,14 +480,16 @@ struct ConstantExprKeyType {
: Opcode(CE->getOpcode()),
SubclassOptionalData(CE->getRawSubclassOptionalData()),
SubclassData(CE->isCompare() ? CE->getPredicate() : 0), Ops(Operands),
- Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef<unsigned>()) {}
+ Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef<unsigned>()),
+ ExplicitTy(nullptr) {}
ConstantExprKeyType(const ConstantExpr *CE,
SmallVectorImpl<Constant *> &Storage)
: Opcode(CE->getOpcode()),
SubclassOptionalData(CE->getRawSubclassOptionalData()),
SubclassData(CE->isCompare() ? CE->getPredicate() : 0),
- Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef<unsigned>()) {
+ Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef<unsigned>()),
+ ExplicitTy(nullptr) {
assert(Storage.empty() && "Expected empty storage");
for (unsigned I = 0, E = CE->getNumOperands(); I != E; ++I)
Storage.push_back(CE->getOperand(I));
@@ -676,9 +678,9 @@ public:
/// Hash once, and reuse it for the lookup and the insertion if needed.
LookupKeyHashed Lookup(MapInfo::getHashValue(Key), Key);
- auto I = Map.find_as(Lookup);
- if (I != Map.end())
- return *I;
+ auto ItMap = Map.find_as(Lookup);
+ if (ItMap != Map.end())
+ return *ItMap;
// Update to the new value. Optimize for the case when we have a single
// operand that we're changing, but handle bulk updates efficiently.
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 310935b5213a..a5f46b16e600 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -140,7 +140,16 @@ unsigned LLVMGetLastEnumAttributeKind(void) {
LLVMAttributeRef LLVMCreateEnumAttribute(LLVMContextRef C, unsigned KindID,
uint64_t Val) {
- return wrap(Attribute::get(*unwrap(C), (Attribute::AttrKind)KindID, Val));
+ auto &Ctx = *unwrap(C);
+ auto AttrKind = (Attribute::AttrKind)KindID;
+
+ if (AttrKind == Attribute::AttrKind::ByVal) {
+ // After r362128, byval attributes need to have a type attribute. Provide a
+ // NULL one until a proper API is added for this.
+ return wrap(Attribute::getWithByValType(Ctx, NULL));
+ } else {
+ return wrap(Attribute::get(Ctx, AttrKind, Val));
+ }
}
unsigned LLVMGetEnumAttributeKind(LLVMAttributeRef A) {
@@ -386,7 +395,7 @@ void LLVMDumpModule(LLVMModuleRef M) {
LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename,
char **ErrorMessage) {
std::error_code EC;
- raw_fd_ostream dest(Filename, EC, sys::fs::F_Text);
+ raw_fd_ostream dest(Filename, EC, sys::fs::OF_Text);
if (EC) {
*ErrorMessage = strdup(EC.message().c_str());
return true;
@@ -1999,13 +2008,13 @@ unsigned LLVMGetAlignment(LLVMValueRef V) {
void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) {
Value *P = unwrap<Value>(V);
if (GlobalObject *GV = dyn_cast<GlobalObject>(P))
- GV->setAlignment(Bytes);
+ GV->setAlignment(MaybeAlign(Bytes));
else if (AllocaInst *AI = dyn_cast<AllocaInst>(P))
- AI->setAlignment(Bytes);
+ AI->setAlignment(MaybeAlign(Bytes));
else if (LoadInst *LI = dyn_cast<LoadInst>(P))
- LI->setAlignment(Bytes);
+ LI->setAlignment(MaybeAlign(Bytes));
else if (StoreInst *SI = dyn_cast<StoreInst>(P))
- SI->setAlignment(Bytes);
+ SI->setAlignment(MaybeAlign(Bytes));
else
llvm_unreachable(
"only GlobalValue, AllocaInst, LoadInst and StoreInst have alignment");
@@ -2480,7 +2489,7 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
Argument *A = unwrap<Argument>(Arg);
- A->addAttr(Attribute::getWithAlignment(A->getContext(), align));
+ A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(align)));
}
/*--.. Operations on ifuncs ................................................--*/
@@ -2779,7 +2788,8 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
unsigned align) {
auto *Call = unwrap<CallBase>(Instr);
- Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), align);
+ Attribute AlignAttr =
+ Attribute::getWithAlignment(Call->getContext(), Align(align));
Call->addAttribute(index, AlignAttr);
}
@@ -3518,6 +3528,47 @@ static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) {
llvm_unreachable("Invalid AtomicOrdering value!");
}
+static AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMAtomicRMWBinOp BinOp) {
+ switch (BinOp) {
+ case LLVMAtomicRMWBinOpXchg: return AtomicRMWInst::Xchg;
+ case LLVMAtomicRMWBinOpAdd: return AtomicRMWInst::Add;
+ case LLVMAtomicRMWBinOpSub: return AtomicRMWInst::Sub;
+ case LLVMAtomicRMWBinOpAnd: return AtomicRMWInst::And;
+ case LLVMAtomicRMWBinOpNand: return AtomicRMWInst::Nand;
+ case LLVMAtomicRMWBinOpOr: return AtomicRMWInst::Or;
+ case LLVMAtomicRMWBinOpXor: return AtomicRMWInst::Xor;
+ case LLVMAtomicRMWBinOpMax: return AtomicRMWInst::Max;
+ case LLVMAtomicRMWBinOpMin: return AtomicRMWInst::Min;
+ case LLVMAtomicRMWBinOpUMax: return AtomicRMWInst::UMax;
+ case LLVMAtomicRMWBinOpUMin: return AtomicRMWInst::UMin;
+ case LLVMAtomicRMWBinOpFAdd: return AtomicRMWInst::FAdd;
+ case LLVMAtomicRMWBinOpFSub: return AtomicRMWInst::FSub;
+ }
+
+ llvm_unreachable("Invalid LLVMAtomicRMWBinOp value!");
+}
+
+static LLVMAtomicRMWBinOp mapToLLVMRMWBinOp(AtomicRMWInst::BinOp BinOp) {
+ switch (BinOp) {
+ case AtomicRMWInst::Xchg: return LLVMAtomicRMWBinOpXchg;
+ case AtomicRMWInst::Add: return LLVMAtomicRMWBinOpAdd;
+ case AtomicRMWInst::Sub: return LLVMAtomicRMWBinOpSub;
+ case AtomicRMWInst::And: return LLVMAtomicRMWBinOpAnd;
+ case AtomicRMWInst::Nand: return LLVMAtomicRMWBinOpNand;
+ case AtomicRMWInst::Or: return LLVMAtomicRMWBinOpOr;
+ case AtomicRMWInst::Xor: return LLVMAtomicRMWBinOpXor;
+ case AtomicRMWInst::Max: return LLVMAtomicRMWBinOpMax;
+ case AtomicRMWInst::Min: return LLVMAtomicRMWBinOpMin;
+ case AtomicRMWInst::UMax: return LLVMAtomicRMWBinOpUMax;
+ case AtomicRMWInst::UMin: return LLVMAtomicRMWBinOpUMin;
+ case AtomicRMWInst::FAdd: return LLVMAtomicRMWBinOpFAdd;
+ case AtomicRMWInst::FSub: return LLVMAtomicRMWBinOpFSub;
+ default: break;
+ }
+
+ llvm_unreachable("Invalid AtomicRMWBinOp value!");
+}
+
// TODO: Should this and other atomic instructions support building with
// "syncscope"?
LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering,
@@ -3593,14 +3644,30 @@ LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) {
Value *P = unwrap<Value>(MemAccessInst);
if (LoadInst *LI = dyn_cast<LoadInst>(P))
return LI->isVolatile();
- return cast<StoreInst>(P)->isVolatile();
+ if (StoreInst *SI = dyn_cast<StoreInst>(P))
+ return SI->isVolatile();
+ if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(P))
+ return AI->isVolatile();
+ return cast<AtomicCmpXchgInst>(P)->isVolatile();
}
void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) {
Value *P = unwrap<Value>(MemAccessInst);
if (LoadInst *LI = dyn_cast<LoadInst>(P))
return LI->setVolatile(isVolatile);
- return cast<StoreInst>(P)->setVolatile(isVolatile);
+ if (StoreInst *SI = dyn_cast<StoreInst>(P))
+ return SI->setVolatile(isVolatile);
+ if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(P))
+ return AI->setVolatile(isVolatile);
+ return cast<AtomicCmpXchgInst>(P)->setVolatile(isVolatile);
+}
+
+LLVMBool LLVMGetWeak(LLVMValueRef CmpXchgInst) {
+ return unwrap<AtomicCmpXchgInst>(CmpXchgInst)->isWeak();
+}
+
+void LLVMSetWeak(LLVMValueRef CmpXchgInst, LLVMBool isWeak) {
+ return unwrap<AtomicCmpXchgInst>(CmpXchgInst)->setWeak(isWeak);
}
LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) {
@@ -3608,8 +3675,10 @@ LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) {
AtomicOrdering O;
if (LoadInst *LI = dyn_cast<LoadInst>(P))
O = LI->getOrdering();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(P))
+ O = SI->getOrdering();
else
- O = cast<StoreInst>(P)->getOrdering();
+ O = cast<AtomicRMWInst>(P)->getOrdering();
return mapToLLVMOrdering(O);
}
@@ -3622,6 +3691,14 @@ void LLVMSetOrdering(LLVMValueRef MemAccessInst, LLVMAtomicOrdering Ordering) {
return cast<StoreInst>(P)->setOrdering(O);
}
+LLVMAtomicRMWBinOp LLVMGetAtomicRMWBinOp(LLVMValueRef Inst) {
+ return mapToLLVMRMWBinOp(unwrap<AtomicRMWInst>(Inst)->getOperation());
+}
+
+void LLVMSetAtomicRMWBinOp(LLVMValueRef Inst, LLVMAtomicRMWBinOp BinOp) {
+ unwrap<AtomicRMWInst>(Inst)->setOperation(mapFromLLVMRMWBinOp(BinOp));
+}
+
/*--.. Casts ...............................................................--*/
LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
@@ -3840,20 +3917,7 @@ LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
LLVMValueRef PTR, LLVMValueRef Val,
LLVMAtomicOrdering ordering,
LLVMBool singleThread) {
- AtomicRMWInst::BinOp intop;
- switch (op) {
- case LLVMAtomicRMWBinOpXchg: intop = AtomicRMWInst::Xchg; break;
- case LLVMAtomicRMWBinOpAdd: intop = AtomicRMWInst::Add; break;
- case LLVMAtomicRMWBinOpSub: intop = AtomicRMWInst::Sub; break;
- case LLVMAtomicRMWBinOpAnd: intop = AtomicRMWInst::And; break;
- case LLVMAtomicRMWBinOpNand: intop = AtomicRMWInst::Nand; break;
- case LLVMAtomicRMWBinOpOr: intop = AtomicRMWInst::Or; break;
- case LLVMAtomicRMWBinOpXor: intop = AtomicRMWInst::Xor; break;
- case LLVMAtomicRMWBinOpMax: intop = AtomicRMWInst::Max; break;
- case LLVMAtomicRMWBinOpMin: intop = AtomicRMWInst::Min; break;
- case LLVMAtomicRMWBinOpUMax: intop = AtomicRMWInst::UMax; break;
- case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break;
- }
+ AtomicRMWInst::BinOp intop = mapFromLLVMRMWBinOp(op);
return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val),
mapFromLLVMOrdering(ordering), singleThread ? SyncScope::SingleThread
: SyncScope::System));
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 2493c6cbe532..5d5671227430 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -25,7 +25,7 @@
using namespace llvm;
using namespace llvm::dwarf;
-cl::opt<bool>
+static cl::opt<bool>
UseDbgAddr("use-dbg-addr",
llvm::cl::desc("Use llvm.dbg.addr for all local variables"),
cl::init(false), cl::Hidden);
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index 6e0ebbd4a730..5fe7a2e94b6a 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TypeSize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -44,7 +45,6 @@ using namespace llvm;
StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
- StructAlignment = 0;
StructSize = 0;
IsPadded = false;
NumElements = ST->getNumElements();
@@ -52,10 +52,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
// Loop over each of the elements, placing them in memory.
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Type *Ty = ST->getElementType(i);
- unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty);
+ const Align TyAlign(ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty));
// Add padding if necessary to align the data element properly.
- if ((StructSize & (TyAlign-1)) != 0) {
+ if (!isAligned(TyAlign, StructSize)) {
IsPadded = true;
StructSize = alignTo(StructSize, TyAlign);
}
@@ -67,12 +67,9 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item
}
- // Empty structures have alignment of 1 byte.
- if (StructAlignment == 0) StructAlignment = 1;
-
// Add padding to the end of the struct so that it could be put in an array
// and all array elements would be aligned correctly.
- if ((StructSize & (StructAlignment-1)) != 0) {
+ if (!isAligned(StructAlignment, StructSize)) {
IsPadded = true;
StructSize = alignTo(StructSize, StructAlignment);
}
@@ -102,9 +99,8 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
// LayoutAlignElem, LayoutAlign support
//===----------------------------------------------------------------------===//
-LayoutAlignElem
-LayoutAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
- unsigned pref_align, uint32_t bit_width) {
+LayoutAlignElem LayoutAlignElem::get(AlignTypeEnum align_type, Align abi_align,
+ Align pref_align, uint32_t bit_width) {
assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
LayoutAlignElem retval;
retval.AlignType = align_type;
@@ -126,10 +122,9 @@ LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const {
// PointerAlignElem, PointerAlign support
//===----------------------------------------------------------------------===//
-PointerAlignElem
-PointerAlignElem::get(uint32_t AddressSpace, unsigned ABIAlign,
- unsigned PrefAlign, uint32_t TypeByteWidth,
- uint32_t IndexWidth) {
+PointerAlignElem PointerAlignElem::get(uint32_t AddressSpace, Align ABIAlign,
+ Align PrefAlign, uint32_t TypeByteWidth,
+ uint32_t IndexWidth) {
assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!");
PointerAlignElem retval;
retval.AddressSpace = AddressSpace;
@@ -162,18 +157,18 @@ const char *DataLayout::getManglingComponent(const Triple &T) {
}
static const LayoutAlignElem DefaultAlignments[] = {
- { INTEGER_ALIGN, 1, 1, 1 }, // i1
- { INTEGER_ALIGN, 8, 1, 1 }, // i8
- { INTEGER_ALIGN, 16, 2, 2 }, // i16
- { INTEGER_ALIGN, 32, 4, 4 }, // i32
- { INTEGER_ALIGN, 64, 4, 8 }, // i64
- { FLOAT_ALIGN, 16, 2, 2 }, // half
- { FLOAT_ALIGN, 32, 4, 4 }, // float
- { FLOAT_ALIGN, 64, 8, 8 }, // double
- { FLOAT_ALIGN, 128, 16, 16 }, // ppcf128, quad, ...
- { VECTOR_ALIGN, 64, 8, 8 }, // v2i32, v1i64, ...
- { VECTOR_ALIGN, 128, 16, 16 }, // v16i8, v8i16, v4i32, ...
- { AGGREGATE_ALIGN, 0, 0, 8 } // struct
+ {INTEGER_ALIGN, 1, Align(1), Align(1)}, // i1
+ {INTEGER_ALIGN, 8, Align(1), Align(1)}, // i8
+ {INTEGER_ALIGN, 16, Align(2), Align(2)}, // i16
+ {INTEGER_ALIGN, 32, Align(4), Align(4)}, // i32
+ {INTEGER_ALIGN, 64, Align(4), Align(8)}, // i64
+ {FLOAT_ALIGN, 16, Align(2), Align(2)}, // half
+ {FLOAT_ALIGN, 32, Align(4), Align(4)}, // float
+ {FLOAT_ALIGN, 64, Align(8), Align(8)}, // double
+ {FLOAT_ALIGN, 128, Align(16), Align(16)}, // ppcf128, quad, ...
+ {VECTOR_ALIGN, 64, Align(8), Align(8)}, // v2i32, v1i64, ...
+ {VECTOR_ALIGN, 128, Align(16), Align(16)}, // v16i8, v8i16, v4i32, ...
+ {AGGREGATE_ALIGN, 0, Align(1), Align(8)} // struct
};
void DataLayout::reset(StringRef Desc) {
@@ -182,9 +177,9 @@ void DataLayout::reset(StringRef Desc) {
LayoutMap = nullptr;
BigEndian = false;
AllocaAddrSpace = 0;
- StackNaturalAlign = 0;
+ StackNaturalAlign.reset();
ProgramAddrSpace = 0;
- FunctionPtrAlign = 0;
+ FunctionPtrAlign.reset();
TheFunctionPtrAlignType = FunctionPtrAlignType::Independent;
ManglingMode = MM_None;
NonIntegralAddressSpaces.clear();
@@ -194,7 +189,7 @@ void DataLayout::reset(StringRef Desc) {
setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign, E.PrefAlign,
E.TypeBitWidth);
}
- setPointerAlignment(0, 8, 8, 8, 8);
+ setPointerAlignment(0, Align(8), Align(8), 8, 8);
parseSpecifier(Desc);
}
@@ -320,8 +315,9 @@ void DataLayout::parseSpecifier(StringRef Desc) {
report_fatal_error("Invalid index size of 0 bytes");
}
}
- setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign,
- PointerMemSize, IndexSize);
+ setPointerAlignment(AddrSpace, assumeAligned(PointerABIAlign),
+ assumeAligned(PointerPrefAlign), PointerMemSize,
+ IndexSize);
break;
}
case 'i':
@@ -349,11 +345,16 @@ void DataLayout::parseSpecifier(StringRef Desc) {
report_fatal_error(
"Missing alignment specification in datalayout string");
Split = split(Rest, ':');
- unsigned ABIAlign = inBytes(getInt(Tok));
+ const unsigned ABIAlign = inBytes(getInt(Tok));
if (AlignType != AGGREGATE_ALIGN && !ABIAlign)
report_fatal_error(
"ABI alignment specification must be >0 for non-aggregate types");
+ if (!isUInt<16>(ABIAlign))
+ report_fatal_error("Invalid ABI alignment, must be a 16bit integer");
+ if (ABIAlign != 0 && !isPowerOf2_64(ABIAlign))
+ report_fatal_error("Invalid ABI alignment, must be a power of 2");
+
// Preferred alignment.
unsigned PrefAlign = ABIAlign;
if (!Rest.empty()) {
@@ -361,7 +362,14 @@ void DataLayout::parseSpecifier(StringRef Desc) {
PrefAlign = inBytes(getInt(Tok));
}
- setAlignment(AlignType, ABIAlign, PrefAlign, Size);
+ if (!isUInt<16>(PrefAlign))
+ report_fatal_error(
+ "Invalid preferred alignment, must be a 16bit integer");
+ if (PrefAlign != 0 && !isPowerOf2_64(PrefAlign))
+ report_fatal_error("Invalid preferred alignment, must be a power of 2");
+
+ setAlignment(AlignType, assumeAligned(ABIAlign), assumeAligned(PrefAlign),
+ Size);
break;
}
@@ -378,7 +386,10 @@ void DataLayout::parseSpecifier(StringRef Desc) {
}
break;
case 'S': { // Stack natural alignment.
- StackNaturalAlign = inBytes(getInt(Tok));
+ uint64_t Alignment = inBytes(getInt(Tok));
+ if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment))
+ report_fatal_error("Alignment is neither 0 nor a power of 2");
+ StackNaturalAlign = MaybeAlign(Alignment);
break;
}
case 'F': {
@@ -394,7 +405,10 @@ void DataLayout::parseSpecifier(StringRef Desc) {
"datalayout string");
}
Tok = Tok.substr(1);
- FunctionPtrAlign = inBytes(getInt(Tok));
+ uint64_t Alignment = inBytes(getInt(Tok));
+ if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment))
+ report_fatal_error("Alignment is neither 0 nor a power of 2");
+ FunctionPtrAlign = MaybeAlign(Alignment);
break;
}
case 'P': { // Function address space.
@@ -468,20 +482,15 @@ DataLayout::findAlignmentLowerBound(AlignTypeEnum AlignType,
});
}
-void
-DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
- unsigned pref_align, uint32_t bit_width) {
+void DataLayout::setAlignment(AlignTypeEnum align_type, Align abi_align,
+ Align pref_align, uint32_t bit_width) {
+ // AlignmentsTy::ABIAlign and AlignmentsTy::PrefAlign were once stored as
+ // uint16_t, it is unclear if there are requirements for alignment to be less
+ // than 2^16 other than storage. In the meantime we leave the restriction as
+ // an assert. See D67400 for context.
+ assert(Log2(abi_align) < 16 && Log2(pref_align) < 16 && "Alignment too big");
if (!isUInt<24>(bit_width))
report_fatal_error("Invalid bit width, must be a 24bit integer");
- if (!isUInt<16>(abi_align))
- report_fatal_error("Invalid ABI alignment, must be a 16bit integer");
- if (!isUInt<16>(pref_align))
- report_fatal_error("Invalid preferred alignment, must be a 16bit integer");
- if (abi_align != 0 && !isPowerOf2_64(abi_align))
- report_fatal_error("Invalid ABI alignment, must be a power of 2");
- if (pref_align != 0 && !isPowerOf2_64(pref_align))
- report_fatal_error("Invalid preferred alignment, must be a power of 2");
-
if (pref_align < abi_align)
report_fatal_error(
"Preferred alignment cannot be less than the ABI alignment");
@@ -507,8 +516,8 @@ DataLayout::findPointerLowerBound(uint32_t AddressSpace) {
});
}
-void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
- unsigned PrefAlign, uint32_t TypeByteWidth,
+void DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign,
+ Align PrefAlign, uint32_t TypeByteWidth,
uint32_t IndexWidth) {
if (PrefAlign < ABIAlign)
report_fatal_error(
@@ -528,9 +537,8 @@ void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
/// preferred if ABIInfo = false) the layout wants for the specified datatype.
-unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
- uint32_t BitWidth, bool ABIInfo,
- Type *Ty) const {
+Align DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, uint32_t BitWidth,
+ bool ABIInfo, Type *Ty) const {
AlignmentsTy::const_iterator I = findAlignmentLowerBound(AlignType, BitWidth);
// See if we found an exact match. Of if we are looking for an integer type,
// but don't have an exact match take the next largest integer. This is where
@@ -549,10 +557,11 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
} else if (AlignType == VECTOR_ALIGN) {
// By default, use natural alignment for vector types. This is consistent
// with what clang and llvm-gcc do.
- unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
- Align *= cast<VectorType>(Ty)->getNumElements();
- Align = PowerOf2Ceil(Align);
- return Align;
+ unsigned Alignment =
+ getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+ Alignment *= cast<VectorType>(Ty)->getNumElements();
+ Alignment = PowerOf2Ceil(Alignment);
+ return Align(Alignment);
}
// If we still couldn't find a reasonable default alignment, fall back
@@ -561,9 +570,9 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
// approximation of reality, and if the user wanted something less
// less conservative, they should have specified it explicitly in the data
// layout.
- unsigned Align = getTypeStoreSize(Ty);
- Align = PowerOf2Ceil(Align);
- return Align;
+ unsigned Alignment = getTypeStoreSize(Ty);
+ Alignment = PowerOf2Ceil(Alignment);
+ return Align(Alignment);
}
namespace {
@@ -624,7 +633,7 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
return L;
}
-unsigned DataLayout::getPointerABIAlignment(unsigned AS) const {
+Align DataLayout::getPointerABIAlignment(unsigned AS) const {
PointersTy::const_iterator I = findPointerLowerBound(AS);
if (I == Pointers.end() || I->AddressSpace != AS) {
I = findPointerLowerBound(0);
@@ -633,7 +642,7 @@ unsigned DataLayout::getPointerABIAlignment(unsigned AS) const {
return I->ABIAlign;
}
-unsigned DataLayout::getPointerPrefAlignment(unsigned AS) const {
+Align DataLayout::getPointerPrefAlignment(unsigned AS) const {
PointersTy::const_iterator I = findPointerLowerBound(AS);
if (I == Pointers.end() || I->AddressSpace != AS) {
I = findPointerLowerBound(0);
@@ -690,21 +699,18 @@ unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const {
Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
== false) for the requested type \a Ty.
*/
-unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
+Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
AlignTypeEnum AlignType;
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
switch (Ty->getTypeID()) {
// Early escape for the non-numeric types.
case Type::LabelTyID:
- return (abi_or_pref
- ? getPointerABIAlignment(0)
- : getPointerPrefAlignment(0));
+ return abi_or_pref ? getPointerABIAlignment(0) : getPointerPrefAlignment(0);
case Type::PointerTyID: {
unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
- return (abi_or_pref
- ? getPointerABIAlignment(AS)
- : getPointerPrefAlignment(AS));
+ return abi_or_pref ? getPointerABIAlignment(AS)
+ : getPointerPrefAlignment(AS);
}
case Type::ArrayTyID:
return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
@@ -712,11 +718,11 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
case Type::StructTyID: {
// Packed structure types always have an ABI alignment of one.
if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
- return 1;
+ return Align::None();
// Get the layout annotation... which is lazily created on demand.
const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
- unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
+ const Align Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
return std::max(Align, Layout->getAlignment());
}
case Type::IntegerTyID:
@@ -740,27 +746,24 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
llvm_unreachable("Bad type for getAlignment!!!");
}
- return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty), abi_or_pref, Ty);
+ // If we're dealing with a scalable vector, we just need the known minimum
+ // size for determining alignment. If not, we'll get the exact size.
+ return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty).getKnownMinSize(),
+ abi_or_pref, Ty);
}
unsigned DataLayout::getABITypeAlignment(Type *Ty) const {
- return getAlignment(Ty, true);
+ return getAlignment(Ty, true).value();
}
/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
/// an integer type of the specified bitwidth.
-unsigned DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+Align DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const {
return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, nullptr);
}
unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const {
- return getAlignment(Ty, false);
-}
-
-unsigned DataLayout::getPreferredTypeAlignmentShift(Type *Ty) const {
- unsigned Align = getPrefTypeAlignment(Ty);
- assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
- return Log2_32(Align);
+ return getAlignment(Ty, false).value();
}
IntegerType *DataLayout::getIntPtrType(LLVMContext &C,
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index ce47ef207434..1bbe6b85d260 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -279,7 +279,7 @@ bool DebugInfoFinder::addScope(DIScope *Scope) {
}
static MDNode *stripDebugLocFromLoopID(MDNode *N) {
- assert(!empty(N->operands()) && "Missing self reference?");
+ assert(!N->operands().empty() && "Missing self reference?");
// if there is no debug location, we do not have to rewrite this MDNode.
if (std::none_of(N->op_begin() + 1, N->op_end(), [](const MDOperand &Op) {
@@ -929,6 +929,26 @@ const char *LLVMDIFileGetSource(LLVMMetadataRef File, unsigned *Len) {
return "";
}
+LLVMMetadataRef LLVMDIBuilderCreateMacro(LLVMDIBuilderRef Builder,
+ LLVMMetadataRef ParentMacroFile,
+ unsigned Line,
+ LLVMDWARFMacinfoRecordType RecordType,
+ const char *Name, size_t NameLen,
+ const char *Value, size_t ValueLen) {
+ return wrap(
+ unwrap(Builder)->createMacro(unwrapDI<DIMacroFile>(ParentMacroFile), Line,
+ static_cast<MacinfoRecordType>(RecordType),
+ {Name, NameLen}, {Value, ValueLen}));
+}
+
+LLVMMetadataRef
+LLVMDIBuilderCreateTempMacroFile(LLVMDIBuilderRef Builder,
+ LLVMMetadataRef ParentMacroFile, unsigned Line,
+ LLVMMetadataRef File) {
+ return wrap(unwrap(Builder)->createTempMacroFile(
+ unwrapDI<DIMacroFile>(ParentMacroFile), Line, unwrapDI<DIFile>(File)));
+}
+
LLVMMetadataRef LLVMDIBuilderCreateEnumerator(LLVMDIBuilderRef Builder,
const char *Name, size_t NameLen,
int64_t Value,
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index 900df27d1d33..94ec3abfa7a2 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -828,15 +828,23 @@ DIExpression *DIExpression::getImpl(LLVMContext &Context,
}
unsigned DIExpression::ExprOperand::getSize() const {
- switch (getOp()) {
+ uint64_t Op = getOp();
+
+ if (Op >= dwarf::DW_OP_breg0 && Op <= dwarf::DW_OP_breg31)
+ return 2;
+
+ switch (Op) {
case dwarf::DW_OP_LLVM_convert:
case dwarf::DW_OP_LLVM_fragment:
+ case dwarf::DW_OP_bregx:
return 3;
case dwarf::DW_OP_constu:
+ case dwarf::DW_OP_consts:
case dwarf::DW_OP_deref_size:
case dwarf::DW_OP_plus_uconst:
case dwarf::DW_OP_LLVM_tag_offset:
- case dwarf::DW_OP_entry_value:
+ case dwarf::DW_OP_LLVM_entry_value:
+ case dwarf::DW_OP_regx:
return 2;
default:
return 1;
@@ -849,8 +857,13 @@ bool DIExpression::isValid() const {
if (I->get() + I->getSize() > E->get())
return false;
+ uint64_t Op = I->getOp();
+ if ((Op >= dwarf::DW_OP_reg0 && Op <= dwarf::DW_OP_reg31) ||
+ (Op >= dwarf::DW_OP_breg0 && Op <= dwarf::DW_OP_breg31))
+ return true;
+
// Check that the operand is valid.
- switch (I->getOp()) {
+ switch (Op) {
default:
return false;
case dwarf::DW_OP_LLVM_fragment:
@@ -877,10 +890,12 @@ bool DIExpression::isValid() const {
return false;
break;
}
- case dwarf::DW_OP_entry_value: {
- // An entry value operator must appear at the begin and the size
- // of following expression should be 1, because we support only
- // entry values of a simple register location.
+ case dwarf::DW_OP_LLVM_entry_value: {
+ // An entry value operator must appear at the beginning and the number of
+ // operations it cover can currently only be 1, because we support only
+ // entry values of a simple register location. One reason for this is that
+ // we currently can't calculate the size of the resulting DWARF block for
+ // other expressions.
return I->get() == expr_op_begin()->get() && I->getArg(0) == 1 &&
getNumElements() == 2;
}
@@ -905,6 +920,8 @@ bool DIExpression::isValid() const {
case dwarf::DW_OP_lit0:
case dwarf::DW_OP_not:
case dwarf::DW_OP_dup:
+ case dwarf::DW_OP_regx:
+ case dwarf::DW_OP_bregx:
break;
}
}
@@ -1035,7 +1052,7 @@ DIExpression *DIExpression::prependOpcodes(const DIExpression *Expr,
assert(Expr && "Can't prepend ops to this expression");
if (EntryValue) {
- Ops.push_back(dwarf::DW_OP_entry_value);
+ Ops.push_back(dwarf::DW_OP_LLVM_entry_value);
// Add size info needed for entry value expression.
// Add plus one for target register operand.
Ops.push_back(Expr->getNumElements() + 1);
@@ -1146,6 +1163,7 @@ Optional<DIExpression *> DIExpression::createFragmentExpression(
Op.appendToVector(Ops);
}
}
+ assert(Expr && "Unknown DIExpression");
Ops.push_back(dwarf::DW_OP_LLVM_fragment);
Ops.push_back(OffsetInBits);
Ops.push_back(SizeInBits);
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index 4a8e3cca3493..99d5aec3f043 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -370,5 +370,16 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
return OS.str();
}
+DiagnosticInfoMisExpect::DiagnosticInfoMisExpect(const Instruction *Inst,
+ Twine &Msg)
+ : DiagnosticInfoWithLocationBase(DK_MisExpect, DS_Warning,
+ *Inst->getParent()->getParent(),
+ Inst->getDebugLoc()),
+ Msg(Msg) {}
+
+void DiagnosticInfoMisExpect::print(DiagnosticPrinter &DP) const {
+ DP << getLocationStr() << ": " << getMsg();
+}
+
void OptimizationRemarkAnalysisFPCommute::anchor() {}
void OptimizationRemarkAnalysisAliasing::anchor() {}
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index dc28d22548dd..3f70d2c904e5 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -251,7 +251,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
// We only need a symbol table for a function if the context keeps value names
if (!getContext().shouldDiscardValueNames())
- SymTab = make_unique<ValueSymbolTable>();
+ SymTab = std::make_unique<ValueSymbolTable>();
// If the function has arguments, mark them as lazily built.
if (Ty->getNumParams())
@@ -293,7 +293,8 @@ void Function::BuildLazyArguments() const {
// Clear the lazy arguments bit.
unsigned SDC = getSubclassDataFromValue();
- const_cast<Function*>(this)->setValueSubclassData(SDC &= ~(1<<0));
+ SDC &= ~(1 << 0);
+ const_cast<Function*>(this)->setValueSubclassData(SDC);
assert(!hasLazyArguments());
}
@@ -611,9 +612,11 @@ static std::string getMangledTypeStr(Type* Ty) {
Result += "vararg";
// Ensure nested function types are distinguishable.
Result += "f";
- } else if (isa<VectorType>(Ty)) {
- Result += "v" + utostr(Ty->getVectorNumElements()) +
- getMangledTypeStr(Ty->getVectorElementType());
+ } else if (VectorType* VTy = dyn_cast<VectorType>(Ty)) {
+ if (VTy->isScalable())
+ Result += "nx";
+ Result += "v" + utostr(VTy->getVectorNumElements()) +
+ getMangledTypeStr(VTy->getVectorElementType());
} else if (Ty) {
switch (Ty->getTypeID()) {
default: llvm_unreachable("Unhandled type");
@@ -700,7 +703,11 @@ enum IIT_Info {
IIT_STRUCT7 = 39,
IIT_STRUCT8 = 40,
IIT_F128 = 41,
- IIT_VEC_ELEMENT = 42
+ IIT_VEC_ELEMENT = 42,
+ IIT_SCALABLE_VEC = 43,
+ IIT_SUBDIVIDE2_ARG = 44,
+ IIT_SUBDIVIDE4_ARG = 45,
+ IIT_VEC_OF_BITCASTS_TO_INT = 46
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -865,12 +872,36 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
DecodeIITType(NextElt, Infos, OutputTable);
return;
}
+ case IIT_SUBDIVIDE2_ARG: {
+ unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Subdivide2Argument,
+ ArgInfo));
+ return;
+ }
+ case IIT_SUBDIVIDE4_ARG: {
+ unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Subdivide4Argument,
+ ArgInfo));
+ return;
+ }
case IIT_VEC_ELEMENT: {
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecElementArgument,
ArgInfo));
return;
}
+ case IIT_SCALABLE_VEC: {
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::ScalableVecArgument,
+ 0));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ }
+ case IIT_VEC_OF_BITCASTS_TO_INT: {
+ unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfBitcastsToInt,
+ ArgInfo));
+ return;
+ }
}
llvm_unreachable("unhandled");
}
@@ -961,6 +992,14 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
assert(ITy->getBitWidth() % 2 == 0);
return IntegerType::get(Context, ITy->getBitWidth() / 2);
}
+ case IITDescriptor::Subdivide2Argument:
+ case IITDescriptor::Subdivide4Argument: {
+ Type *Ty = Tys[D.getArgumentNumber()];
+ VectorType *VTy = dyn_cast<VectorType>(Ty);
+ assert(VTy && "Expected an argument of Vector Type");
+ int SubDivs = D.Kind == IITDescriptor::Subdivide2Argument ? 1 : 2;
+ return VectorType::getSubdividedVectorType(VTy, SubDivs);
+ }
case IITDescriptor::HalfVecArgument:
return VectorType::getHalfElementsVectorType(cast<VectorType>(
Tys[D.getArgumentNumber()]));
@@ -968,7 +1007,7 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
Type *EltTy = DecodeFixedType(Infos, Tys, Context);
Type *Ty = Tys[D.getArgumentNumber()];
if (auto *VTy = dyn_cast<VectorType>(Ty))
- return VectorType::get(EltTy, VTy->getNumElements());
+ return VectorType::get(EltTy, VTy->getElementCount());
return EltTy;
}
case IITDescriptor::PtrToArgument: {
@@ -989,9 +1028,20 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
return VTy->getElementType();
llvm_unreachable("Expected an argument of Vector Type");
}
+ case IITDescriptor::VecOfBitcastsToInt: {
+ Type *Ty = Tys[D.getArgumentNumber()];
+ VectorType *VTy = dyn_cast<VectorType>(Ty);
+ assert(VTy && "Expected an argument of Vector Type");
+ return VectorType::getInteger(VTy);
+ }
case IITDescriptor::VecOfAnyPtrsToElt:
// Return the overloaded type (which determines the pointers address space)
return Tys[D.getOverloadArgNumber()];
+ case IITDescriptor::ScalableVecArgument: {
+ Type *Ty = DecodeFixedType(Infos, Tys, Context);
+ return VectorType::get(Ty->getVectorElementType(),
+ { Ty->getVectorNumElements(), true });
+ }
}
llvm_unreachable("unhandled");
}
@@ -1174,8 +1224,9 @@ static bool matchIntrinsicType(
}
case IITDescriptor::HalfVecArgument:
// If this is a forward reference, defer the check for later.
- return D.getArgumentNumber() >= ArgTys.size() ||
- !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
+ if (D.getArgumentNumber() >= ArgTys.size())
+ return IsDeferredCheck || DeferCheck(Ty);
+ return !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
VectorType::getHalfElementsVectorType(
cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
case IITDescriptor::SameVecWidthArgument: {
@@ -1191,8 +1242,8 @@ static bool matchIntrinsicType(
return true;
Type *EltTy = Ty;
if (ThisArgType) {
- if (ReferenceType->getVectorNumElements() !=
- ThisArgType->getVectorNumElements())
+ if (ReferenceType->getElementCount() !=
+ ThisArgType->getElementCount())
return true;
EltTy = ThisArgType->getVectorElementType();
}
@@ -1255,6 +1306,36 @@ static bool matchIntrinsicType(
auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
return !ReferenceType || Ty != ReferenceType->getElementType();
}
+ case IITDescriptor::Subdivide2Argument:
+ case IITDescriptor::Subdivide4Argument: {
+ // If this is a forward reference, defer the check for later.
+ if (D.getArgumentNumber() >= ArgTys.size())
+ return IsDeferredCheck || DeferCheck(Ty);
+
+ Type *NewTy = ArgTys[D.getArgumentNumber()];
+ if (auto *VTy = dyn_cast<VectorType>(NewTy)) {
+ int SubDivs = D.Kind == IITDescriptor::Subdivide2Argument ? 1 : 2;
+ NewTy = VectorType::getSubdividedVectorType(VTy, SubDivs);
+ return Ty != NewTy;
+ }
+ return true;
+ }
+ case IITDescriptor::ScalableVecArgument: {
+ VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (!VTy || !VTy->isScalable())
+ return true;
+ return matchIntrinsicType(VTy, Infos, ArgTys, DeferredChecks,
+ IsDeferredCheck);
+ }
+ case IITDescriptor::VecOfBitcastsToInt: {
+ if (D.getArgumentNumber() >= ArgTys.size())
+ return IsDeferredCheck || DeferCheck(Ty);
+ auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
+ auto *ThisArgVecTy = dyn_cast<VectorType>(Ty);
+ if (!ThisArgVecTy || !ReferenceType)
+ return true;
+ return ThisArgVecTy != VectorType::getInteger(ReferenceType);
+ }
}
llvm_unreachable("unhandled");
}
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index e2bfc0420bc5..46a9696b2944 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -114,18 +114,22 @@ unsigned GlobalValue::getAddressSpace() const {
}
void GlobalObject::setAlignment(unsigned Align) {
- assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
- assert(Align <= MaximumAlignment &&
+ setAlignment(MaybeAlign(Align));
+}
+
+void GlobalObject::setAlignment(MaybeAlign Align) {
+ assert((!Align || Align <= MaximumAlignment) &&
"Alignment is greater than MaximumAlignment!");
- unsigned AlignmentData = Log2_32(Align) + 1;
+ unsigned AlignmentData = encode(Align);
unsigned OldData = getGlobalValueSubClassData();
setGlobalValueSubClassData((OldData & ~AlignmentMask) | AlignmentData);
- assert(getAlignment() == Align && "Alignment representation error!");
+ assert(MaybeAlign(getAlignment()) == Align &&
+ "Alignment representation error!");
}
void GlobalObject::copyAttributesFrom(const GlobalObject *Src) {
GlobalValue::copyAttributesFrom(Src);
- setAlignment(Src->getAlignment());
+ setAlignment(MaybeAlign(Src->getAlignment()));
setSection(Src->getSection());
}
@@ -427,6 +431,43 @@ GlobalIndirectSymbol::GlobalIndirectSymbol(Type *Ty, ValueTy VTy,
Op<0>() = Symbol;
}
+static const GlobalObject *
+findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) {
+ if (auto *GO = dyn_cast<GlobalObject>(C))
+ return GO;
+ if (auto *GA = dyn_cast<GlobalAlias>(C))
+ if (Aliases.insert(GA).second)
+ return findBaseObject(GA->getOperand(0), Aliases);
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ switch (CE->getOpcode()) {
+ case Instruction::Add: {
+ auto *LHS = findBaseObject(CE->getOperand(0), Aliases);
+ auto *RHS = findBaseObject(CE->getOperand(1), Aliases);
+ if (LHS && RHS)
+ return nullptr;
+ return LHS ? LHS : RHS;
+ }
+ case Instruction::Sub: {
+ if (findBaseObject(CE->getOperand(1), Aliases))
+ return nullptr;
+ return findBaseObject(CE->getOperand(0), Aliases);
+ }
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ return findBaseObject(CE->getOperand(0), Aliases);
+ default:
+ break;
+ }
+ }
+ return nullptr;
+}
+
+const GlobalObject *GlobalIndirectSymbol::getBaseObject() const {
+ DenseSet<const GlobalAlias *> Aliases;
+ return findBaseObject(getOperand(0), Aliases);
+}
//===----------------------------------------------------------------------===//
// GlobalAlias Implementation
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index 0c6461c9078f..b782012e9731 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -49,7 +49,7 @@ GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str,
nullptr, GlobalVariable::NotThreadLocal,
AddressSpace);
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- GV->setAlignment(1);
+ GV->setAlignment(Align::None());
return GV;
}
@@ -289,8 +289,10 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove(
CallInst *CI = createCallHelper(TheFn, Ops, this);
// Set the alignment of the pointer args.
- CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), DstAlign));
- CI->addParamAttr(1, Attribute::getWithAlignment(CI->getContext(), SrcAlign));
+ CI->addParamAttr(
+ 0, Attribute::getWithAlignment(CI->getContext(), Align(DstAlign)));
+ CI->addParamAttr(
+ 1, Attribute::getWithAlignment(CI->getContext(), Align(SrcAlign)));
// Set the TBAA info if present.
if (TBAATag)
diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp
index 35b06135a828..953cf9410162 100644
--- a/lib/IR/IRPrintingPasses.cpp
+++ b/lib/IR/IRPrintingPasses.cpp
@@ -26,14 +26,22 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
PreservedAnalyses PrintModulePass::run(Module &M, ModuleAnalysisManager &) {
- if (!Banner.empty())
- OS << Banner << "\n";
- if (llvm::isFunctionInPrintList("*"))
+ if (llvm::isFunctionInPrintList("*")) {
+ if (!Banner.empty())
+ OS << Banner << "\n";
M.print(OS, nullptr, ShouldPreserveUseListOrder);
+ }
else {
- for(const auto &F : M.functions())
- if (llvm::isFunctionInPrintList(F.getName()))
+ bool BannerPrinted = false;
+ for(const auto &F : M.functions()) {
+ if (llvm::isFunctionInPrintList(F.getName())) {
+ if (!BannerPrinted && !Banner.empty()) {
+ OS << Banner << "\n";
+ BannerPrinted = true;
+ }
F.print(OS);
+ }
+ }
}
return PreservedAnalyses::all();
}
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 99da7caaccf0..fd732f9eda8b 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -181,6 +181,16 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
// FIXME: For now assuming these are 2-character constraints.
pCodes->push_back(StringRef(I+1, 2));
I += 3;
+ } else if (*I == '@') {
+ // Multi-letter constraint
+ ++I;
+ unsigned char C = static_cast<unsigned char>(*I);
+ assert(isdigit(C) && "Expected a digit!");
+ int N = C - '0';
+ assert(N > 0 && "Found a zero letter constraint!");
+ ++I;
+ pCodes->push_back(StringRef(I, N));
+ I += N;
} else {
// Single letter constraint.
pCodes->push_back(StringRef(I, 1));
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index ba5629d1662b..b157c7bb34bf 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -524,7 +524,7 @@ bool Instruction::mayReadFromMemory() const {
case Instruction::Call:
case Instruction::Invoke:
case Instruction::CallBr:
- return !cast<CallBase>(this)->doesNotAccessMemory();
+ return !cast<CallBase>(this)->doesNotReadMemory();
case Instruction::Store:
return !cast<StoreInst>(this)->isUnordered();
}
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 2e7cad103c12..245c7628b08e 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -38,6 +38,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TypeSize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -45,12 +46,6 @@
using namespace llvm;
-static cl::opt<bool> SwitchInstProfUpdateWrapperStrict(
- "switch-inst-prof-update-wrapper-strict", cl::Hidden,
- cl::desc("Assert that prof branch_weights metadata is valid when creating "
- "an instance of SwitchInstProfUpdateWrapper"),
- cl::init(false));
-
//===----------------------------------------------------------------------===//
// AllocaInst Class
//===----------------------------------------------------------------------===//
@@ -822,6 +817,17 @@ void CallBrInst::init(FunctionType *FTy, Value *Fn, BasicBlock *Fallthrough,
setName(NameStr);
}
+void CallBrInst::updateArgBlockAddresses(unsigned i, BasicBlock *B) {
+ assert(getNumIndirectDests() > i && "IndirectDest # out of range for callbr");
+ if (BasicBlock *OldBB = getIndirectDest(i)) {
+ BlockAddress *Old = BlockAddress::get(OldBB);
+ BlockAddress *New = BlockAddress::get(B);
+ for (unsigned ArgNo = 0, e = getNumArgOperands(); ArgNo != e; ++ArgNo)
+ if (dyn_cast<BlockAddress>(getArgOperand(ArgNo)) == Old)
+ setArgOperand(ArgNo, New);
+ }
+}
+
CallBrInst::CallBrInst(const CallBrInst &CBI)
: CallBase(CBI.Attrs, CBI.FTy, CBI.getType(), Instruction::CallBr,
OperandTraits<CallBase>::op_end(this) - CBI.getNumOperands(),
@@ -1223,7 +1229,7 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
: UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca,
getAISize(Ty->getContext(), ArraySize), InsertBefore),
AllocatedType(Ty) {
- setAlignment(Align);
+ setAlignment(MaybeAlign(Align));
assert(!Ty->isVoidTy() && "Cannot allocate void!");
setName(Name);
}
@@ -1234,18 +1240,21 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
: UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca,
getAISize(Ty->getContext(), ArraySize), InsertAtEnd),
AllocatedType(Ty) {
- setAlignment(Align);
+ setAlignment(MaybeAlign(Align));
assert(!Ty->isVoidTy() && "Cannot allocate void!");
setName(Name);
}
-void AllocaInst::setAlignment(unsigned Align) {
- assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
- assert(Align <= MaximumAlignment &&
+void AllocaInst::setAlignment(MaybeAlign Align) {
+ assert((!Align || *Align <= MaximumAlignment) &&
"Alignment is greater than MaximumAlignment!");
setInstructionSubclassData((getSubclassDataFromInstruction() & ~31) |
- (Log2_32(Align) + 1));
- assert(getAlignment() == Align && "Alignment representation error!");
+ encode(Align));
+ if (Align)
+ assert(getAlignment() == Align->value() &&
+ "Alignment representation error!");
+ else
+ assert(getAlignment() == 0 && "Alignment representation error!");
}
bool AllocaInst::isArrayAllocation() const {
@@ -1287,36 +1296,36 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name,
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
Instruction *InsertBef)
- : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertBef) {}
+ : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/None, InsertBef) {}
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
BasicBlock *InsertAE)
- : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertAE) {}
+ : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/None, InsertAE) {}
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
- unsigned Align, Instruction *InsertBef)
+ MaybeAlign Align, Instruction *InsertBef)
: LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic,
SyncScope::System, InsertBef) {}
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
- unsigned Align, BasicBlock *InsertAE)
+ MaybeAlign Align, BasicBlock *InsertAE)
: LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic,
SyncScope::System, InsertAE) {}
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
- unsigned Align, AtomicOrdering Order,
- SyncScope::ID SSID, Instruction *InsertBef)
+ MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID,
+ Instruction *InsertBef)
: UnaryInstruction(Ty, Load, Ptr, InsertBef) {
assert(Ty == cast<PointerType>(Ptr->getType())->getElementType());
setVolatile(isVolatile);
- setAlignment(Align);
+ setAlignment(MaybeAlign(Align));
setAtomic(Order, SSID);
AssertOK();
setName(Name);
}
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
- unsigned Align, AtomicOrdering Order, SyncScope::ID SSID,
+ MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID,
BasicBlock *InsertAE)
: UnaryInstruction(Ty, Load, Ptr, InsertAE) {
assert(Ty == cast<PointerType>(Ptr->getType())->getElementType());
@@ -1327,13 +1336,16 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
setName(Name);
}
-void LoadInst::setAlignment(unsigned Align) {
- assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
- assert(Align <= MaximumAlignment &&
+void LoadInst::setAlignment(MaybeAlign Align) {
+ assert((!Align || *Align <= MaximumAlignment) &&
"Alignment is greater than MaximumAlignment!");
setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
- ((Log2_32(Align)+1)<<1));
- assert(getAlignment() == Align && "Alignment representation error!");
+ (encode(Align) << 1));
+ if (Align)
+ assert(getAlignment() == Align->value() &&
+ "Alignment representation error!");
+ else
+ assert(getAlignment() == 0 && "Alignment representation error!");
}
//===----------------------------------------------------------------------===//
@@ -1359,30 +1371,28 @@ StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
Instruction *InsertBefore)
- : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertBefore) {}
+ : StoreInst(val, addr, isVolatile, /*Align=*/None, InsertBefore) {}
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
BasicBlock *InsertAtEnd)
- : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertAtEnd) {}
+ : StoreInst(val, addr, isVolatile, /*Align=*/None, InsertAtEnd) {}
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align,
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align,
Instruction *InsertBefore)
: StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic,
SyncScope::System, InsertBefore) {}
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align,
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align,
BasicBlock *InsertAtEnd)
: StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic,
SyncScope::System, InsertAtEnd) {}
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
- unsigned Align, AtomicOrdering Order,
- SyncScope::ID SSID,
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align,
+ AtomicOrdering Order, SyncScope::ID SSID,
Instruction *InsertBefore)
- : Instruction(Type::getVoidTy(val->getContext()), Store,
- OperandTraits<StoreInst>::op_begin(this),
- OperandTraits<StoreInst>::operands(this),
- InsertBefore) {
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this), InsertBefore) {
Op<0>() = val;
Op<1>() = addr;
setVolatile(isVolatile);
@@ -1391,14 +1401,12 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
AssertOK();
}
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
- unsigned Align, AtomicOrdering Order,
- SyncScope::ID SSID,
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align,
+ AtomicOrdering Order, SyncScope::ID SSID,
BasicBlock *InsertAtEnd)
- : Instruction(Type::getVoidTy(val->getContext()), Store,
- OperandTraits<StoreInst>::op_begin(this),
- OperandTraits<StoreInst>::operands(this),
- InsertAtEnd) {
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this), InsertAtEnd) {
Op<0>() = val;
Op<1>() = addr;
setVolatile(isVolatile);
@@ -1407,13 +1415,16 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
AssertOK();
}
-void StoreInst::setAlignment(unsigned Align) {
- assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
- assert(Align <= MaximumAlignment &&
+void StoreInst::setAlignment(MaybeAlign Align) {
+ assert((!Align || *Align <= MaximumAlignment) &&
"Alignment is greater than MaximumAlignment!");
setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
- ((Log2_32(Align)+1) << 1));
- assert(getAlignment() == Align && "Alignment representation error!");
+ (encode(Align) << 1));
+ if (Align)
+ assert(getAlignment() == Align->value() &&
+ "Alignment representation error!");
+ else
+ assert(getAlignment() == 0 && "Alignment representation error!");
}
//===----------------------------------------------------------------------===//
@@ -1778,7 +1789,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
const Twine &Name,
Instruction *InsertBefore)
: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
- cast<VectorType>(Mask->getType())->getNumElements()),
+ cast<VectorType>(Mask->getType())->getElementCount()),
ShuffleVector,
OperandTraits<ShuffleVectorInst>::op_begin(this),
OperandTraits<ShuffleVectorInst>::operands(this),
@@ -1795,7 +1806,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
const Twine &Name,
BasicBlock *InsertAtEnd)
: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
- cast<VectorType>(Mask->getType())->getNumElements()),
+ cast<VectorType>(Mask->getType())->getElementCount()),
ShuffleVector,
OperandTraits<ShuffleVectorInst>::op_begin(this),
OperandTraits<ShuffleVectorInst>::operands(this),
@@ -2968,8 +2979,8 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
}
// Get the bit sizes, we'll need these
- unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr
- unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
+ TypeSize SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr
+ TypeSize DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
// Run through the possibilities ...
if (DestTy->isIntegerTy()) { // Casting to integral
@@ -3016,7 +3027,7 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) {
if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy)) {
if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy)) {
- if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
+ if (SrcVecTy->getElementCount() == DestVecTy->getElementCount()) {
// An element by element cast. Valid if casting the elements is valid.
SrcTy = SrcVecTy->getElementType();
DestTy = DestVecTy->getElementType();
@@ -3030,12 +3041,12 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) {
}
}
- unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr
- unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
+ TypeSize SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr
+ TypeSize DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
// Could still have vectors of pointers if the number of elements doesn't
// match
- if (SrcBits == 0 || DestBits == 0)
+ if (SrcBits.getKnownMinSize() == 0 || DestBits.getKnownMinSize() == 0)
return false;
if (SrcBits != DestBits)
@@ -3886,7 +3897,7 @@ SwitchInstProfUpdateWrapper::getProfBranchWeightsMD(const SwitchInst &SI) {
}
MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() {
- assert(State == Changed && "called only if metadata has changed");
+ assert(Changed && "called only if metadata has changed");
if (!Weights)
return nullptr;
@@ -3905,17 +3916,12 @@ MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() {
void SwitchInstProfUpdateWrapper::init() {
MDNode *ProfileData = getProfBranchWeightsMD(SI);
- if (!ProfileData) {
- State = Initialized;
+ if (!ProfileData)
return;
- }
if (ProfileData->getNumOperands() != SI.getNumSuccessors() + 1) {
- State = Invalid;
- if (SwitchInstProfUpdateWrapperStrict)
- llvm_unreachable("number of prof branch_weights metadata operands does "
- "not correspond to number of succesors");
- return;
+ llvm_unreachable("number of prof branch_weights metadata operands does "
+ "not correspond to number of succesors");
}
SmallVector<uint32_t, 8> Weights;
@@ -3924,7 +3930,6 @@ void SwitchInstProfUpdateWrapper::init() {
uint32_t CW = C->getValue().getZExtValue();
Weights.push_back(CW);
}
- State = Initialized;
this->Weights = std::move(Weights);
}
@@ -3933,7 +3938,7 @@ SwitchInstProfUpdateWrapper::removeCase(SwitchInst::CaseIt I) {
if (Weights) {
assert(SI.getNumSuccessors() == Weights->size() &&
"num of prof branch_weights must accord with num of successors");
- State = Changed;
+ Changed = true;
// Copy the last case to the place of the removed one and shrink.
// This is tightly coupled with the way SwitchInst::removeCase() removes
// the cases in SwitchInst::removeCase(CaseIt).
@@ -3948,15 +3953,12 @@ void SwitchInstProfUpdateWrapper::addCase(
SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
SI.addCase(OnVal, Dest);
- if (State == Invalid)
- return;
-
if (!Weights && W && *W) {
- State = Changed;
+ Changed = true;
Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0);
Weights.getValue()[SI.getNumSuccessors() - 1] = *W;
} else if (Weights) {
- State = Changed;
+ Changed = true;
Weights.getValue().push_back(W ? *W : 0);
}
if (Weights)
@@ -3967,11 +3969,9 @@ void SwitchInstProfUpdateWrapper::addCase(
SymbolTableList<Instruction>::iterator
SwitchInstProfUpdateWrapper::eraseFromParent() {
// Instruction is erased. Mark as unchanged to not touch it in the destructor.
- if (State != Invalid) {
- State = Initialized;
- if (Weights)
- Weights->resize(0);
- }
+ Changed = false;
+ if (Weights)
+ Weights->resize(0);
return SI.eraseFromParent();
}
@@ -3984,7 +3984,7 @@ SwitchInstProfUpdateWrapper::getSuccessorWeight(unsigned idx) {
void SwitchInstProfUpdateWrapper::setSuccessorWeight(
unsigned idx, SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
- if (!W || State == Invalid)
+ if (!W)
return;
if (!Weights && *W)
@@ -3993,7 +3993,7 @@ void SwitchInstProfUpdateWrapper::setSuccessorWeight(
if (Weights) {
auto &OldW = Weights.getValue()[idx];
if (*W != OldW) {
- State = Changed;
+ Changed = true;
OldW = *W;
}
}
@@ -4136,13 +4136,14 @@ AllocaInst *AllocaInst::cloneImpl() const {
LoadInst *LoadInst::cloneImpl() const {
return new LoadInst(getType(), getOperand(0), Twine(), isVolatile(),
- getAlignment(), getOrdering(), getSyncScopeID());
+ MaybeAlign(getAlignment()), getOrdering(),
+ getSyncScopeID());
}
StoreInst *StoreInst::cloneImpl() const {
return new StoreInst(getOperand(0), getOperand(1), isVolatile(),
- getAlignment(), getOrdering(), getSyncScopeID());
-
+ MaybeAlign(getAlignment()), getOrdering(),
+ getSyncScopeID());
}
AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const {
diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp
index 7a042326f67f..26ed46a9cd91 100644
--- a/lib/IR/IntrinsicInst.cpp
+++ b/lib/IR/IntrinsicInst.cpp
@@ -67,13 +67,12 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
// size 1. During the search, we can skip the prefix that we already know is
// identical. By using strncmp we consider names with differing suffixes to
// be part of the equal range.
- size_t CmpStart = 0;
size_t CmpEnd = 4; // Skip the "llvm" component.
const char *const *Low = NameTable.begin();
const char *const *High = NameTable.end();
const char *const *LastLow = Low;
while (CmpEnd < Name.size() && High - Low > 0) {
- CmpStart = CmpEnd;
+ size_t CmpStart = CmpEnd;
CmpEnd = Name.find('.', CmpStart + 1);
CmpEnd = CmpEnd == StringRef::npos ? Name.size() : CmpEnd;
auto Cmp = [CmpStart, CmpEnd](const char *LHS, const char *RHS) {
@@ -107,7 +106,7 @@ Optional<ConstrainedFPIntrinsic::RoundingMode>
ConstrainedFPIntrinsic::getRoundingMode() const {
unsigned NumOperands = getNumArgOperands();
Metadata *MD =
- dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
+ cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
if (!MD || !isa<MDString>(MD))
return None;
return StrToRoundingMode(cast<MDString>(MD)->getString());
@@ -143,7 +142,7 @@ ConstrainedFPIntrinsic::RoundingModeToStr(RoundingMode UseRounding) {
RoundingStr = "round.upward";
break;
case ConstrainedFPIntrinsic::rmTowardZero:
- RoundingStr = "round.tozero";
+ RoundingStr = "round.towardzero";
break;
}
return RoundingStr;
@@ -153,7 +152,7 @@ Optional<ConstrainedFPIntrinsic::ExceptionBehavior>
ConstrainedFPIntrinsic::getExceptionBehavior() const {
unsigned NumOperands = getNumArgOperands();
Metadata *MD =
- dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
+ cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
if (!MD || !isa<MDString>(MD))
return None;
return StrToExceptionBehavior(cast<MDString>(MD)->getString());
@@ -189,6 +188,8 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
default:
return false;
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
@@ -199,10 +200,14 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
case Intrinsic::experimental_constrained_log:
case Intrinsic::experimental_constrained_log10:
case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_lrint:
+ case Intrinsic::experimental_constrained_llrint:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_lround:
+ case Intrinsic::experimental_constrained_llround:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
return true;
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index e1cdf6b539db..5e8772186a2a 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -36,34 +36,9 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
// Create the fixed metadata kinds. This is done in the same order as the
// MD_* enum values so that they correspond.
std::pair<unsigned, StringRef> MDKinds[] = {
- {MD_dbg, "dbg"},
- {MD_tbaa, "tbaa"},
- {MD_prof, "prof"},
- {MD_fpmath, "fpmath"},
- {MD_range, "range"},
- {MD_tbaa_struct, "tbaa.struct"},
- {MD_invariant_load, "invariant.load"},
- {MD_alias_scope, "alias.scope"},
- {MD_noalias, "noalias"},
- {MD_nontemporal, "nontemporal"},
- {MD_mem_parallel_loop_access, "llvm.mem.parallel_loop_access"},
- {MD_nonnull, "nonnull"},
- {MD_dereferenceable, "dereferenceable"},
- {MD_dereferenceable_or_null, "dereferenceable_or_null"},
- {MD_make_implicit, "make.implicit"},
- {MD_unpredictable, "unpredictable"},
- {MD_invariant_group, "invariant.group"},
- {MD_align, "align"},
- {MD_loop, "llvm.loop"},
- {MD_type, "type"},
- {MD_section_prefix, "section_prefix"},
- {MD_absolute_symbol, "absolute_symbol"},
- {MD_associated, "associated"},
- {MD_callees, "callees"},
- {MD_irr_loop, "irr_loop"},
- {MD_access_group, "llvm.access.group"},
- {MD_callback, "callback"},
- {MD_preserve_access_index, "llvm.preserve.access.index"},
+#define LLVM_FIXED_MD_KIND(EnumID, Name, Value) {EnumID, Name},
+#include "llvm/IR/FixedMetadataKinds.def"
+#undef LLVM_FIXED_MD_KIND
};
for (auto &MDKind : MDKinds) {
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index c6ab2c6f213a..5f9782714170 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
- : DiagHandler(llvm::make_unique<DiagnosticHandler>()),
+ : DiagHandler(std::make_unique<DiagnosticHandler>()),
VoidTy(C, Type::VoidTyID),
LabelTy(C, Type::LabelTyID),
HalfTy(C, Type::HalfTyID),
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index c575d6e782b9..3a03c493100b 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -1680,7 +1680,6 @@ bool FPPassManager::runOnFunction(Function &F) {
bool FPPassManager::runOnModule(Module &M) {
bool Changed = false;
- llvm::TimeTraceScope TimeScope("OptModule", M.getName());
for (Function &F : M)
Changed |= runOnFunction(F);
@@ -1999,10 +1998,28 @@ void FunctionPass::assignPassManager(PMStack &PMS,
FPP->add(this);
}
+void BasicBlockPass::preparePassManager(PMStack &PMS) {
+ // Find BBPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_BasicBlockPassManager)
+ PMS.pop();
+
+ // If this pass is destroying high level information that is used
+ // by other passes that are managed by BBPM then do not insert
+ // this pass in current BBPM. Use new BBPassManager.
+ if (PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager &&
+ !PMS.top()->preserveHigherLevelAnalysis(this))
+ PMS.pop();
+}
+
/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
/// in the PM Stack and add self into that manager.
void BasicBlockPass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_BasicBlockPassManager)
+ PMS.pop();
+
BBPassManager *BBP;
// Basic Pass Manager is a leaf pass manager. It does not handle
@@ -2018,6 +2035,7 @@ void BasicBlockPass::assignPassManager(PMStack &PMS,
// [1] Create new Basic Block Manager
BBP = new BBPassManager();
+ BBP->populateInheritedAnalysis(PMS);
// [2] Set up new manager's top level manager
// Basic Block Pass Manager does not live by itself
diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp
index 14bcb3a29b07..7bdb85ace522 100644
--- a/lib/IR/MDBuilder.cpp
+++ b/lib/IR/MDBuilder.cpp
@@ -309,3 +309,15 @@ MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) {
};
return MDNode::get(Context, Vals);
}
+
+MDNode *MDBuilder::createMisExpect(uint64_t Index, uint64_t LikleyWeight,
+ uint64_t UnlikleyWeight) {
+ auto *IntType = Type::getInt64Ty(Context);
+ Metadata *Vals[] = {
+ createString("misexpect"),
+ createConstant(ConstantInt::get(IntType, Index)),
+ createConstant(ConstantInt::get(IntType, LikleyWeight)),
+ createConstant(ConstantInt::get(IntType, UnlikleyWeight)),
+ };
+ return MDNode::get(Context, Vals);
+}
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 748a2238e642..62c2aa86f3b0 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -1497,6 +1497,24 @@ void GlobalObject::addTypeMetadata(unsigned Offset, Metadata *TypeID) {
TypeID}));
}
+void GlobalObject::addVCallVisibilityMetadata(VCallVisibility Visibility) {
+ addMetadata(LLVMContext::MD_vcall_visibility,
+ *MDNode::get(getContext(),
+ {ConstantAsMetadata::get(ConstantInt::get(
+ Type::getInt64Ty(getContext()), Visibility))}));
+}
+
+GlobalObject::VCallVisibility GlobalObject::getVCallVisibility() const {
+ if (MDNode *MD = getMetadata(LLVMContext::MD_vcall_visibility)) {
+ uint64_t Val = cast<ConstantInt>(
+ cast<ConstantAsMetadata>(MD->getOperand(0))->getValue())
+ ->getZExtValue();
+ assert(Val <= 2 && "unknown vcall visibility!");
+ return (VCallVisibility)Val;
+ }
+ return VCallVisibility::VCallVisibilityPublic;
+}
+
void Function::setSubprogram(DISubprogram *SP) {
setMetadata(LLVMContext::MD_dbg, SP);
}
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index dbf4035ac7c1..25efd009194f 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -604,7 +604,7 @@ GlobalVariable *llvm::collectUsedGlobalVariables(
const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
for (Value *Op : Init->operands()) {
- GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases());
+ GlobalValue *G = cast<GlobalValue>(Op->stripPointerCasts());
Set.insert(G);
}
return GV;
diff --git a/lib/IR/RemarkStreamer.cpp b/lib/IR/RemarkStreamer.cpp
index 5b4c7e72b479..0fcc06b961f3 100644
--- a/lib/IR/RemarkStreamer.cpp
+++ b/lib/IR/RemarkStreamer.cpp
@@ -15,15 +15,17 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Remarks/BitstreamRemarkSerializer.h"
#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/Remarks/RemarkSerializer.h"
using namespace llvm;
-RemarkStreamer::RemarkStreamer(StringRef Filename,
- std::unique_ptr<remarks::Serializer> Serializer)
- : Filename(Filename), PassFilter(), Serializer(std::move(Serializer)) {
- assert(!Filename.empty() && "This needs to be a real filename.");
-}
+RemarkStreamer::RemarkStreamer(
+ std::unique_ptr<remarks::RemarkSerializer> RemarkSerializer,
+ Optional<StringRef> FilenameIn)
+ : PassFilter(), RemarkSerializer(std::move(RemarkSerializer)),
+ Filename(FilenameIn ? Optional<std::string>(FilenameIn->str()) : None) {}
Error RemarkStreamer::setFilter(StringRef Filter) {
Regex R = Regex(Filter);
@@ -99,24 +101,13 @@ void RemarkStreamer::emit(const DiagnosticInfoOptimizationBase &Diag) {
// First, convert the diagnostic to a remark.
remarks::Remark R = toRemark(Diag);
// Then, emit the remark through the serializer.
- Serializer->emit(R);
+ RemarkSerializer->emit(R);
}
char RemarkSetupFileError::ID = 0;
char RemarkSetupPatternError::ID = 0;
char RemarkSetupFormatError::ID = 0;
-static std::unique_ptr<remarks::Serializer>
-formatToSerializer(remarks::Format RemarksFormat, raw_ostream &OS) {
- switch (RemarksFormat) {
- default:
- llvm_unreachable("Unknown remark serializer format.");
- return nullptr;
- case remarks::Format::YAML:
- return llvm::make_unique<remarks::YAMLSerializer>(OS);
- };
-}
-
Expected<std::unique_ptr<ToolOutputFile>>
llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
StringRef RemarksPasses, StringRef RemarksFormat,
@@ -131,24 +122,63 @@ llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
if (RemarksFilename.empty())
return nullptr;
+ Expected<remarks::Format> Format = remarks::parseFormat(RemarksFormat);
+ if (Error E = Format.takeError())
+ return make_error<RemarkSetupFormatError>(std::move(E));
+
std::error_code EC;
+ auto Flags = *Format == remarks::Format::YAML ? sys::fs::OF_Text
+ : sys::fs::OF_None;
auto RemarksFile =
- llvm::make_unique<ToolOutputFile>(RemarksFilename, EC, sys::fs::F_None);
+ std::make_unique<ToolOutputFile>(RemarksFilename, EC, Flags);
// We don't use llvm::FileError here because some diagnostics want the file
// name separately.
if (EC)
return make_error<RemarkSetupFileError>(errorCodeToError(EC));
+ Expected<std::unique_ptr<remarks::RemarkSerializer>> RemarkSerializer =
+ remarks::createRemarkSerializer(
+ *Format, remarks::SerializerMode::Separate, RemarksFile->os());
+ if (Error E = RemarkSerializer.takeError())
+ return make_error<RemarkSetupFormatError>(std::move(E));
+
+ Context.setRemarkStreamer(std::make_unique<RemarkStreamer>(
+ std::move(*RemarkSerializer), RemarksFilename));
+
+ if (!RemarksPasses.empty())
+ if (Error E = Context.getRemarkStreamer()->setFilter(RemarksPasses))
+ return make_error<RemarkSetupPatternError>(std::move(E));
+
+ return std::move(RemarksFile);
+}
+
+Error llvm::setupOptimizationRemarks(LLVMContext &Context, raw_ostream &OS,
+ StringRef RemarksPasses,
+ StringRef RemarksFormat,
+ bool RemarksWithHotness,
+ unsigned RemarksHotnessThreshold) {
+ if (RemarksWithHotness)
+ Context.setDiagnosticsHotnessRequested(true);
+
+ if (RemarksHotnessThreshold)
+ Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold);
+
Expected<remarks::Format> Format = remarks::parseFormat(RemarksFormat);
if (Error E = Format.takeError())
return make_error<RemarkSetupFormatError>(std::move(E));
- Context.setRemarkStreamer(llvm::make_unique<RemarkStreamer>(
- RemarksFilename, formatToSerializer(*Format, RemarksFile->os())));
+ Expected<std::unique_ptr<remarks::RemarkSerializer>> RemarkSerializer =
+ remarks::createRemarkSerializer(*Format,
+ remarks::SerializerMode::Separate, OS);
+ if (Error E = RemarkSerializer.takeError())
+ return make_error<RemarkSetupFormatError>(std::move(E));
+
+ Context.setRemarkStreamer(
+ std::make_unique<RemarkStreamer>(std::move(*RemarkSerializer)));
if (!RemarksPasses.empty())
if (Error E = Context.getRemarkStreamer()->setFilter(RemarksPasses))
return make_error<RemarkSetupPatternError>(std::move(E));
- return std::move(RemarksFile);
+ return Error::success();
}
diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp
index 7f3dea5e6a6d..c90347ec48fd 100644
--- a/lib/IR/SafepointIRVerifier.cpp
+++ b/lib/IR/SafepointIRVerifier.cpp
@@ -102,11 +102,11 @@ public:
}
bool isDeadEdge(const Use *U) const {
- assert(dyn_cast<Instruction>(U->getUser())->isTerminator() &&
+ assert(cast<Instruction>(U->getUser())->isTerminator() &&
"edge must be operand of terminator");
assert(cast_or_null<BasicBlock>(U->get()) &&
"edge must refer to basic block");
- assert(!isDeadBlock(dyn_cast<Instruction>(U->getUser())->getParent()) &&
+ assert(!isDeadBlock(cast<Instruction>(U->getUser())->getParent()) &&
"isDeadEdge() must be applied to edge from live block");
return DeadEdges.count(U);
}
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index 8ece7f223dd2..3eab5042b542 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TypeSize.h"
#include <cassert>
#include <utility>
@@ -111,18 +112,22 @@ bool Type::isEmptyTy() const {
return false;
}
-unsigned Type::getPrimitiveSizeInBits() const {
+TypeSize Type::getPrimitiveSizeInBits() const {
switch (getTypeID()) {
- case Type::HalfTyID: return 16;
- case Type::FloatTyID: return 32;
- case Type::DoubleTyID: return 64;
- case Type::X86_FP80TyID: return 80;
- case Type::FP128TyID: return 128;
- case Type::PPC_FP128TyID: return 128;
- case Type::X86_MMXTyID: return 64;
- case Type::IntegerTyID: return cast<IntegerType>(this)->getBitWidth();
- case Type::VectorTyID: return cast<VectorType>(this)->getBitWidth();
- default: return 0;
+ case Type::HalfTyID: return TypeSize::Fixed(16);
+ case Type::FloatTyID: return TypeSize::Fixed(32);
+ case Type::DoubleTyID: return TypeSize::Fixed(64);
+ case Type::X86_FP80TyID: return TypeSize::Fixed(80);
+ case Type::FP128TyID: return TypeSize::Fixed(128);
+ case Type::PPC_FP128TyID: return TypeSize::Fixed(128);
+ case Type::X86_MMXTyID: return TypeSize::Fixed(64);
+ case Type::IntegerTyID:
+ return TypeSize::Fixed(cast<IntegerType>(this)->getBitWidth());
+ case Type::VectorTyID: {
+ const VectorType *VTy = cast<VectorType>(this);
+ return TypeSize(VTy->getBitWidth(), VTy->isScalable());
+ }
+ default: return TypeSize::Fixed(0);
}
}
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index b7f77dc3043e..3c8a5b536695 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -444,15 +444,11 @@ void Value::replaceUsesOutsideBlock(Value *New, BasicBlock *BB) {
"replaceUses of value with new value of different type!");
assert(BB && "Basic block that may contain a use of 'New' must be defined\n");
- use_iterator UI = use_begin(), E = use_end();
- for (; UI != E;) {
- Use &U = *UI;
- ++UI;
- auto *Usr = dyn_cast<Instruction>(U.getUser());
- if (Usr && Usr->getParent() == BB)
- continue;
- U.set(New);
- }
+ replaceUsesWithIf(New, [BB](Use &U) {
+ auto *I = dyn_cast<Instruction>(U.getUser());
+ // Don't replace if it's an instruction in the BB basic block.
+ return !I || I->getParent() != BB;
+ });
}
namespace {
@@ -460,8 +456,8 @@ namespace {
enum PointerStripKind {
PSK_ZeroIndices,
PSK_ZeroIndicesAndAliases,
- PSK_ZeroIndicesAndAliasesSameRepresentation,
- PSK_ZeroIndicesAndAliasesAndInvariantGroups,
+ PSK_ZeroIndicesSameRepresentation,
+ PSK_ZeroIndicesAndInvariantGroups,
PSK_InBoundsConstantIndices,
PSK_InBounds
};
@@ -479,10 +475,10 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
do {
if (auto *GEP = dyn_cast<GEPOperator>(V)) {
switch (StripKind) {
- case PSK_ZeroIndicesAndAliases:
- case PSK_ZeroIndicesAndAliasesSameRepresentation:
- case PSK_ZeroIndicesAndAliasesAndInvariantGroups:
case PSK_ZeroIndices:
+ case PSK_ZeroIndicesAndAliases:
+ case PSK_ZeroIndicesSameRepresentation:
+ case PSK_ZeroIndicesAndInvariantGroups:
if (!GEP->hasAllZeroIndices())
return V;
break;
@@ -498,15 +494,13 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
V = cast<Operator>(V)->getOperand(0);
- } else if (StripKind != PSK_ZeroIndicesAndAliasesSameRepresentation &&
+ } else if (StripKind != PSK_ZeroIndicesSameRepresentation &&
Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
// TODO: If we know an address space cast will not change the
// representation we could look through it here as well.
V = cast<Operator>(V)->getOperand(0);
- } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
- if (StripKind == PSK_ZeroIndices || GA->isInterposable())
- return V;
- V = GA->getAliasee();
+ } else if (StripKind == PSK_ZeroIndicesAndAliases && isa<GlobalAlias>(V)) {
+ V = cast<GlobalAlias>(V)->getAliasee();
} else {
if (const auto *Call = dyn_cast<CallBase>(V)) {
if (const Value *RV = Call->getReturnedArgOperand()) {
@@ -516,7 +510,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
// The result of launder.invariant.group must alias it's argument,
// but it can't be marked with returned attribute, that's why it needs
// special case.
- if (StripKind == PSK_ZeroIndicesAndAliasesAndInvariantGroups &&
+ if (StripKind == PSK_ZeroIndicesAndInvariantGroups &&
(Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
Call->getIntrinsicID() == Intrinsic::strip_invariant_group)) {
V = Call->getArgOperand(0);
@@ -533,16 +527,15 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
} // end anonymous namespace
const Value *Value::stripPointerCasts() const {
- return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliases>(this);
+ return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
}
-const Value *Value::stripPointerCastsSameRepresentation() const {
- return stripPointerCastsAndOffsets<
- PSK_ZeroIndicesAndAliasesSameRepresentation>(this);
+const Value *Value::stripPointerCastsAndAliases() const {
+ return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliases>(this);
}
-const Value *Value::stripPointerCastsNoFollowAliases() const {
- return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
+const Value *Value::stripPointerCastsSameRepresentation() const {
+ return stripPointerCastsAndOffsets<PSK_ZeroIndicesSameRepresentation>(this);
}
const Value *Value::stripInBoundsConstantOffsets() const {
@@ -550,8 +543,7 @@ const Value *Value::stripInBoundsConstantOffsets() const {
}
const Value *Value::stripPointerCastsAndInvariantGroups() const {
- return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliasesAndInvariantGroups>(
- this);
+ return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndInvariantGroups>(this);
}
const Value *
@@ -650,6 +642,19 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
}
CanBeNull = true;
}
+ } else if (auto *IP = dyn_cast<IntToPtrInst>(this)) {
+ if (MDNode *MD = IP->getMetadata(LLVMContext::MD_dereferenceable)) {
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+ DerefBytes = CI->getLimitedValue();
+ }
+ if (DerefBytes == 0) {
+ if (MDNode *MD =
+ IP->getMetadata(LLVMContext::MD_dereferenceable_or_null)) {
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+ DerefBytes = CI->getLimitedValue();
+ }
+ CanBeNull = true;
+ }
} else if (auto *AI = dyn_cast<AllocaInst>(this)) {
if (!AI->isArrayAllocation()) {
DerefBytes = DL.getTypeStoreSize(AI->getAllocatedType());
@@ -666,21 +671,21 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
return DerefBytes;
}
-unsigned Value::getPointerAlignment(const DataLayout &DL) const {
+MaybeAlign Value::getPointerAlignment(const DataLayout &DL) const {
assert(getType()->isPointerTy() && "must be pointer");
-
- unsigned Align = 0;
if (auto *GO = dyn_cast<GlobalObject>(this)) {
if (isa<Function>(GO)) {
+ const MaybeAlign FunctionPtrAlign = DL.getFunctionPtrAlign();
switch (DL.getFunctionPtrAlignType()) {
case DataLayout::FunctionPtrAlignType::Independent:
- return DL.getFunctionPtrAlign();
+ return FunctionPtrAlign;
case DataLayout::FunctionPtrAlignType::MultipleOfFunctionAlign:
- return std::max(DL.getFunctionPtrAlign(), GO->getAlignment());
+ return std::max(FunctionPtrAlign, MaybeAlign(GO->getAlignment()));
}
+ llvm_unreachable("Unhandled FunctionPtrAlignType");
}
- Align = GO->getAlignment();
- if (Align == 0) {
+ const MaybeAlign Alignment(GO->getAlignment());
+ if (!Alignment) {
if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
Type *ObjectType = GVar->getValueType();
if (ObjectType->isSized()) {
@@ -688,37 +693,43 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const {
// it the preferred alignment. Otherwise, we have to assume that it
// may only have the minimum ABI alignment.
if (GVar->isStrongDefinitionForLinker())
- Align = DL.getPreferredAlignment(GVar);
+ return MaybeAlign(DL.getPreferredAlignment(GVar));
else
- Align = DL.getABITypeAlignment(ObjectType);
+ return Align(DL.getABITypeAlignment(ObjectType));
}
}
}
+ return Alignment;
} else if (const Argument *A = dyn_cast<Argument>(this)) {
- Align = A->getParamAlignment();
-
- if (!Align && A->hasStructRetAttr()) {
+ const MaybeAlign Alignment(A->getParamAlignment());
+ if (!Alignment && A->hasStructRetAttr()) {
// An sret parameter has at least the ABI alignment of the return type.
Type *EltTy = cast<PointerType>(A->getType())->getElementType();
if (EltTy->isSized())
- Align = DL.getABITypeAlignment(EltTy);
+ return Align(DL.getABITypeAlignment(EltTy));
}
+ return Alignment;
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(this)) {
- Align = AI->getAlignment();
- if (Align == 0) {
+ const MaybeAlign Alignment(AI->getAlignment());
+ if (!Alignment) {
Type *AllocatedType = AI->getAllocatedType();
if (AllocatedType->isSized())
- Align = DL.getPrefTypeAlignment(AllocatedType);
+ return MaybeAlign(DL.getPrefTypeAlignment(AllocatedType));
}
- } else if (const auto *Call = dyn_cast<CallBase>(this))
- Align = Call->getAttributes().getRetAlignment();
- else if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+ return Alignment;
+ } else if (const auto *Call = dyn_cast<CallBase>(this)) {
+ const MaybeAlign Alignment(Call->getRetAlignment());
+ if (!Alignment && Call->getCalledFunction())
+ return MaybeAlign(
+ Call->getCalledFunction()->getAttributes().getRetAlignment());
+ return Alignment;
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(this)) {
if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) {
ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
- Align = CI->getLimitedValue();
+ return MaybeAlign(CI->getLimitedValue());
}
-
- return Align;
+ }
+ return llvm::None;
}
const Value *Value::DoPHITranslation(const BasicBlock *CurBB,
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 9346c8bda75d..b17fc433ed74 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -119,6 +119,7 @@ struct VerifierSupport {
raw_ostream *OS;
const Module &M;
ModuleSlotTracker MST;
+ Triple TT;
const DataLayout &DL;
LLVMContext &Context;
@@ -130,7 +131,8 @@ struct VerifierSupport {
bool TreatBrokenDebugInfoAsError = true;
explicit VerifierSupport(raw_ostream *OS, const Module &M)
- : OS(OS), M(M), MST(&M), DL(M.getDataLayout()), Context(M.getContext()) {}
+ : OS(OS), M(M), MST(&M), TT(M.getTargetTriple()), DL(M.getDataLayout()),
+ Context(M.getContext()) {}
private:
void Write(const Module *M) {
@@ -416,6 +418,7 @@ private:
void visitBasicBlock(BasicBlock &BB);
void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty);
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
+ void visitProfMetadata(Instruction &I, MDNode *MD);
template <class Ty> bool isValidMetadataArray(const MDTuple &N);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -515,6 +518,7 @@ private:
DIExpression::FragmentInfo Fragment,
ValueOrMetadata *Desc);
void verifyFnArgs(const DbgVariableIntrinsic &I);
+ void verifyNotEntryValue(const DbgVariableIntrinsic &I);
/// Module-level debug info verification...
void verifyCompileUnits();
@@ -670,7 +674,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
Assert(InitArray, "wrong initalizer for intrinsic global variable",
Init);
for (Value *Op : InitArray->operands()) {
- Value *V = Op->stripPointerCastsNoFollowAliases();
+ Value *V = Op->stripPointerCasts();
Assert(isa<GlobalVariable>(V) || isa<Function>(V) ||
isa<GlobalAlias>(V),
"invalid llvm.used member", V);
@@ -979,6 +983,9 @@ void Verifier::visitDICompositeType(const DICompositeType &N) {
N.getRawVTableHolder());
AssertDI(!hasConflictingReferenceFlags(N.getFlags()),
"invalid reference flags", &N);
+ unsigned DIBlockByRefStruct = 1 << 4;
+ AssertDI((N.getFlags() & DIBlockByRefStruct) == 0,
+ "DIBlockByRefStruct on DICompositeType is no longer supported", &N);
if (N.isVector()) {
const DINodeArray Elements = N.getElements();
@@ -1306,11 +1313,12 @@ void Verifier::visitDIImportedEntity(const DIImportedEntity &N) {
}
void Verifier::visitComdat(const Comdat &C) {
- // The Module is invalid if the GlobalValue has private linkage. Entities
- // with private linkage don't have entries in the symbol table.
- if (const GlobalValue *GV = M.getNamedValue(C.getName()))
- Assert(!GV->hasPrivateLinkage(), "comdat global value has private linkage",
- GV);
+ // In COFF the Module is invalid if the GlobalValue has private linkage.
+ // Entities with private linkage don't have entries in the symbol table.
+ if (TT.isOSBinFormatCOFF())
+ if (const GlobalValue *GV = M.getNamedValue(C.getName()))
+ Assert(!GV->hasPrivateLinkage(),
+ "comdat global value has private linkage", GV);
}
void Verifier::visitModuleIdents(const Module &M) {
@@ -2497,6 +2505,15 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) {
Assert(CBI.getOperand(i) != CBI.getOperand(j),
"Duplicate callbr destination!", &CBI);
}
+ {
+ SmallPtrSet<BasicBlock *, 4> ArgBBs;
+ for (Value *V : CBI.args())
+ if (auto *BA = dyn_cast<BlockAddress>(V))
+ ArgBBs.insert(BA->getBasicBlock());
+ for (BasicBlock *BB : CBI.getIndirectDests())
+ Assert(ArgBBs.find(BB) != ArgBBs.end(),
+ "Indirect label missing from arglist.", &CBI);
+ }
visitTerminator(CBI);
}
@@ -2715,8 +2732,8 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
&I);
if (SrcTy->isVectorTy()) {
- VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
- VectorType *VDest = dyn_cast<VectorType>(DestTy);
+ VectorType *VSrc = cast<VectorType>(SrcTy);
+ VectorType *VDest = cast<VectorType>(DestTy);
Assert(VSrc->getNumElements() == VDest->getNumElements(),
"PtrToInt Vector width mismatch", &I);
}
@@ -2740,8 +2757,8 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch",
&I);
if (SrcTy->isVectorTy()) {
- VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
- VectorType *VDest = dyn_cast<VectorType>(DestTy);
+ VectorType *VSrc = cast<VectorType>(SrcTy);
+ VectorType *VDest = cast<VectorType>(DestTy);
Assert(VSrc->getNumElements() == VDest->getNumElements(),
"IntToPtr Vector width mismatch", &I);
}
@@ -3983,9 +4000,9 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) {
Assert(I.getType()->isPointerTy(), "dereferenceable, dereferenceable_or_null "
"apply only to pointer types", &I);
- Assert(isa<LoadInst>(I),
+ Assert((isa<LoadInst>(I) || isa<IntToPtrInst>(I)),
"dereferenceable, dereferenceable_or_null apply only to load"
- " instructions, use attributes for calls or invokes", &I);
+ " and inttoptr instructions, use attributes for calls or invokes", &I);
Assert(MD->getNumOperands() == 1, "dereferenceable, dereferenceable_or_null "
"take one operand!", &I);
ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0));
@@ -3993,6 +4010,45 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) {
"dereferenceable_or_null metadata value must be an i64!", &I);
}
+void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
+ Assert(MD->getNumOperands() >= 2,
+ "!prof annotations should have no less than 2 operands", MD);
+
+ // Check first operand.
+ Assert(MD->getOperand(0) != nullptr, "first operand should not be null", MD);
+ Assert(isa<MDString>(MD->getOperand(0)),
+ "expected string with name of the !prof annotation", MD);
+ MDString *MDS = cast<MDString>(MD->getOperand(0));
+ StringRef ProfName = MDS->getString();
+
+ // Check consistency of !prof branch_weights metadata.
+ if (ProfName.equals("branch_weights")) {
+ unsigned ExpectedNumOperands = 0;
+ if (BranchInst *BI = dyn_cast<BranchInst>(&I))
+ ExpectedNumOperands = BI->getNumSuccessors();
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I))
+ ExpectedNumOperands = SI->getNumSuccessors();
+ else if (isa<CallInst>(&I) || isa<InvokeInst>(&I))
+ ExpectedNumOperands = 1;
+ else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(&I))
+ ExpectedNumOperands = IBI->getNumDestinations();
+ else if (isa<SelectInst>(&I))
+ ExpectedNumOperands = 2;
+ else
+ CheckFailed("!prof branch_weights are not allowed for this instruction",
+ MD);
+
+ Assert(MD->getNumOperands() == 1 + ExpectedNumOperands,
+ "Wrong number of operands", MD);
+ for (unsigned i = 1; i < MD->getNumOperands(); ++i) {
+ auto &MDO = MD->getOperand(i);
+ Assert(MDO, "second operand should not be null", MD);
+ Assert(mdconst::dyn_extract<ConstantInt>(MDO),
+ "!prof brunch_weights operand is not a const int");
+ }
+ }
+}
+
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
@@ -4150,13 +4206,18 @@ void Verifier::visitInstruction(Instruction &I) {
"alignment is larger that implementation defined limit", &I);
}
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_prof))
+ visitProfMetadata(I, MD);
+
if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
AssertDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
visitMDNode(*N);
}
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I))
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
verifyFragmentExpression(*DII);
+ verifyNotEntryValue(*DII);
+ }
InstsInThisBlock.insert(&I);
}
@@ -4236,6 +4297,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
@@ -4248,12 +4311,16 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::experimental_constrained_log:
case Intrinsic::experimental_constrained_log10:
case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_lrint:
+ case Intrinsic::experimental_constrained_llrint:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_maxnum:
case Intrinsic::experimental_constrained_minnum:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_lround:
+ case Intrinsic::experimental_constrained_llround:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(Call));
@@ -4623,7 +4690,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
}
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
- case Intrinsic::umul_fix: {
+ case Intrinsic::umul_fix:
+ case Intrinsic::umul_fix_sat: {
Value *Op1 = Call.getArgOperand(0);
Value *Op2 = Call.getArgOperand(1);
Assert(Op1->getType()->isIntOrIntVectorTy(),
@@ -4705,6 +4773,31 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
HasRoundingMD = true;
break;
+ case Intrinsic::experimental_constrained_lrint:
+ case Intrinsic::experimental_constrained_llrint: {
+ Assert((NumOperands == 3), "invalid arguments for constrained FP intrinsic",
+ &FPI);
+ Type *ValTy = FPI.getArgOperand(0)->getType();
+ Type *ResultTy = FPI.getType();
+ Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
+ "Intrinsic does not support vectors", &FPI);
+ HasExceptionMD = true;
+ HasRoundingMD = true;
+ }
+ break;
+
+ case Intrinsic::experimental_constrained_lround:
+ case Intrinsic::experimental_constrained_llround: {
+ Assert((NumOperands == 2), "invalid arguments for constrained FP intrinsic",
+ &FPI);
+ Type *ValTy = FPI.getArgOperand(0)->getType();
+ Type *ResultTy = FPI.getType();
+ Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
+ "Intrinsic does not support vectors", &FPI);
+ HasExceptionMD = true;
+ break;
+ }
+
case Intrinsic::experimental_constrained_fma:
Assert((NumOperands == 5), "invalid arguments for constrained FP intrinsic",
&FPI);
@@ -4727,6 +4820,33 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
HasRoundingMD = true;
break;
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui: {
+ Assert((NumOperands == 2),
+ "invalid arguments for constrained FP intrinsic", &FPI);
+ HasExceptionMD = true;
+
+ Value *Operand = FPI.getArgOperand(0);
+ uint64_t NumSrcElem = 0;
+ Assert(Operand->getType()->isFPOrFPVectorTy(),
+ "Intrinsic first argument must be floating point", &FPI);
+ if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
+ NumSrcElem = OperandT->getNumElements();
+ }
+
+ Operand = &FPI;
+ Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
+ "Intrinsic first argument and result disagree on vector use", &FPI);
+ Assert(Operand->getType()->isIntOrIntVectorTy(),
+ "Intrinsic result must be an integer", &FPI);
+ if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
+ Assert(NumSrcElem == OperandT->getNumElements(),
+ "Intrinsic first argument and result vector lengths must be equal",
+ &FPI);
+ }
+ }
+ break;
+
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext: {
if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
@@ -4826,11 +4946,6 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) {
// This check is redundant with one in visitLocalVariable().
AssertDI(isType(Var->getRawType()), "invalid type ref", Var,
Var->getRawType());
- if (auto *Type = dyn_cast_or_null<DIType>(Var->getRawType()))
- if (Type->isBlockByrefStruct())
- AssertDI(DII.getExpression() && DII.getExpression()->getNumElements(),
- "BlockByRef variable without complex expression", Var, &DII);
-
verifyFnArgs(DII);
}
@@ -4935,6 +5050,16 @@ void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) {
Prev, Var);
}
+void Verifier::verifyNotEntryValue(const DbgVariableIntrinsic &I) {
+ DIExpression *E = dyn_cast_or_null<DIExpression>(I.getRawExpression());
+
+ // We don't know whether this intrinsic verified correctly.
+ if (!E || !E->isValid())
+ return;
+
+ AssertDI(!E->isEntryValue(), "Entry values are only allowed in MIR", &I);
+}
+
void Verifier::verifyCompileUnits() {
// When more than one Module is imported into the same context, such as during
// an LTO build before linking the modules, ODR type uniquing may cause types
@@ -5021,7 +5146,7 @@ struct VerifierLegacyPass : public FunctionPass {
}
bool doInitialization(Module &M) override {
- V = llvm::make_unique<Verifier>(
+ V = std::make_unique<Verifier>(
&dbgs(), /*ShouldTreatBrokenDebugInfoAsError=*/false, M);
return false;
}
diff --git a/lib/LTO/Caching.cpp b/lib/LTO/Caching.cpp
index 000ab91dba7c..12dcd182de2d 100644
--- a/lib/LTO/Caching.cpp
+++ b/lib/LTO/Caching.cpp
@@ -142,8 +142,8 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
}
// This CacheStream will move the temporary file into the cache when done.
- return llvm::make_unique<CacheStream>(
- llvm::make_unique<raw_fd_ostream>(Temp->FD, /* ShouldClose */ false),
+ return std::make_unique<CacheStream>(
+ std::make_unique<raw_fd_ostream>(Temp->FD, /* ShouldClose */ false),
AddBuffer, std::move(*Temp), EntryPath.str(), Task);
};
};
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 64506890956a..1e345e7dd89e 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -44,6 +44,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
@@ -383,7 +384,9 @@ static bool isWeakObjectWithRWAccess(GlobalValueSummary *GVS) {
static void thinLTOInternalizeAndPromoteGUID(
GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID,
- function_ref<bool(StringRef, GlobalValue::GUID)> isExported) {
+ function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing) {
for (auto &S : GVSummaryList) {
if (isExported(S->modulePath(), GUID)) {
if (GlobalValue::isLocalLinkage(S->linkage()))
@@ -392,6 +395,8 @@ static void thinLTOInternalizeAndPromoteGUID(
// Ignore local and appending linkage values since the linker
// doesn't resolve them.
!GlobalValue::isLocalLinkage(S->linkage()) &&
+ (!GlobalValue::isInterposableLinkage(S->linkage()) ||
+ isPrevailing(GUID, S.get())) &&
S->linkage() != GlobalValue::AppendingLinkage &&
// We can't internalize available_externally globals because this
// can break function pointer equality.
@@ -410,9 +415,12 @@ static void thinLTOInternalizeAndPromoteGUID(
// as external and non-exported values as internal.
void llvm::thinLTOInternalizeAndPromoteInIndex(
ModuleSummaryIndex &Index,
- function_ref<bool(StringRef, GlobalValue::GUID)> isExported) {
+ function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing) {
for (auto &I : Index)
- thinLTOInternalizeAndPromoteGUID(I.second.SummaryList, I.first, isExported);
+ thinLTOInternalizeAndPromoteGUID(I.second.SummaryList, I.first, isExported,
+ isPrevailing);
}
// Requires a destructor for std::vector<InputModule>.
@@ -459,8 +467,8 @@ BitcodeModule &InputFile::getSingleBitcodeModule() {
LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
Config &Conf)
: ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel),
- Ctx(Conf), CombinedModule(llvm::make_unique<Module>("ld-temp.o", Ctx)),
- Mover(llvm::make_unique<IRMover>(*CombinedModule)) {}
+ Ctx(Conf), CombinedModule(std::make_unique<Module>("ld-temp.o", Ctx)),
+ Mover(std::make_unique<IRMover>(*CombinedModule)) {}
LTO::ThinLTOState::ThinLTOState(ThinBackend Backend)
: Backend(Backend), CombinedIndex(/*HaveGVs*/ false) {
@@ -754,7 +762,8 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
// For now they aren't reported correctly by ModuleSymbolTable.
auto &CommonRes = RegularLTO.Commons[Sym.getIRName()];
CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize());
- CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment());
+ CommonRes.Align =
+ std::max(CommonRes.Align, MaybeAlign(Sym.getCommonAlignment()));
CommonRes.Prevailing |= Res.Prevailing;
}
@@ -899,8 +908,7 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
GlobalValue::dropLLVMManglingEscape(Res.second.IRName));
if (Res.second.VisibleOutsideSummary && Res.second.Prevailing)
- GUIDPreservedSymbols.insert(GlobalValue::getGUID(
- GlobalValue::dropLLVMManglingEscape(Res.second.IRName)));
+ GUIDPreservedSymbols.insert(GUID);
GUIDPrevailingResolutions[GUID] =
Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No;
@@ -996,6 +1004,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
GV->setLinkage(GlobalValue::InternalLinkage);
}
+ RegularLTO.CombinedModule->addModuleFlag(Module::Error, "LTOPostLink", 1);
+
if (Conf.PostInternalizeModuleHook &&
!Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
return Error::success();
@@ -1004,6 +1014,16 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex);
}
+static const char *libcallRoutineNames[] = {
+#define HANDLE_LIBCALL(code, name) name,
+#include "llvm/IR/RuntimeLibcalls.def"
+#undef HANDLE_LIBCALL
+};
+
+ArrayRef<const char*> LTO::getRuntimeLibcallSymbols() {
+ return makeArrayRef(libcallRoutineNames);
+}
+
/// This class defines the interface to the ThinLTO backend.
class lto::ThinBackendProc {
protected:
@@ -1141,7 +1161,7 @@ ThinBackend lto::createInProcessThinBackend(unsigned ParallelismLevel) {
return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, NativeObjectCache Cache) {
- return llvm::make_unique<InProcessThinBackend>(
+ return std::make_unique<InProcessThinBackend>(
Conf, CombinedIndex, ParallelismLevel, ModuleToDefinedGVSummaries,
AddStream, Cache);
};
@@ -1204,7 +1224,7 @@ public:
std::error_code EC;
raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
- sys::fs::OpenFlags::F_None);
+ sys::fs::OpenFlags::OF_None);
if (EC)
return errorCodeToError(EC);
WriteIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);
@@ -1231,7 +1251,7 @@ ThinBackend lto::createWriteIndexesThinBackend(
return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, NativeObjectCache Cache) {
- return llvm::make_unique<WriteIndexesThinBackend>(
+ return std::make_unique<WriteIndexesThinBackend>(
Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix,
ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite);
};
@@ -1274,6 +1294,15 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
if (DumpThinCGSCCs)
ThinLTO.CombinedIndex.dumpSCCs(outs());
+ std::set<GlobalValue::GUID> ExportedGUIDs;
+
+ // Perform index-based WPD. This will return immediately if there are
+ // no index entries in the typeIdMetadata map (e.g. if we are instead
+ // performing IR-based WPD in hybrid regular/thin LTO mode).
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
+ runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs,
+ LocalWPDTargetsMap);
+
if (Conf.OptLevel > 0)
ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
ImportLists, ExportLists);
@@ -1282,7 +1311,6 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
// at -O0 because summary-based DCE is implemented using internalization, and
// we must apply DCE consistently with the full LTO module in order to avoid
// undefined references during the final link.
- std::set<GlobalValue::GUID> ExportedGUIDs;
for (auto &Res : GlobalResolutions) {
// If the symbol does not have external references or it is not prevailing,
// then not need to mark it as exported from a ThinLTO partition.
@@ -1308,12 +1336,19 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
ExportList->second.count(GUID)) ||
ExportedGUIDs.count(GUID);
};
- thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported);
+
+ // Update local devirtualized targets that were exported by cross-module
+ // importing or by other devirtualizations marked in the ExportedGUIDs set.
+ updateIndexWPDForExports(ThinLTO.CombinedIndex, isExported,
+ LocalWPDTargetsMap);
auto isPrevailing = [&](GlobalValue::GUID GUID,
const GlobalValueSummary *S) {
return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
};
+ thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported,
+ isPrevailing);
+
auto recordNewLinkage = [&](StringRef ModuleIdentifier,
GlobalValue::GUID GUID,
GlobalValue::LinkageTypes NewLinkage) {
@@ -1368,7 +1403,7 @@ lto::setupStatsFile(StringRef StatsFilename) {
llvm::EnableStatistics(false);
std::error_code EC;
auto StatsFile =
- llvm::make_unique<ToolOutputFile>(StatsFilename, EC, sys::fs::F_None);
+ std::make_unique<ToolOutputFile>(StatsFilename, EC, sys::fs::OF_None);
if (EC)
return errorCodeToError(EC);
diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp
index 7456e7175163..2761f8367b0d 100644
--- a/lib/LTO/LTOBackend.cpp
+++ b/lib/LTO/LTOBackend.cpp
@@ -28,6 +28,7 @@
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Passes/StandardInstrumentations.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -57,8 +58,8 @@ Error Config::addSaveTemps(std::string OutputFileName,
ShouldDiscardValueNames = false;
std::error_code EC;
- ResolutionFile = llvm::make_unique<raw_fd_ostream>(
- OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text);
+ ResolutionFile = std::make_unique<raw_fd_ostream>(
+ OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::OF_Text);
if (EC)
return errorCodeToError(EC);
@@ -83,7 +84,7 @@ Error Config::addSaveTemps(std::string OutputFileName,
PathPrefix = M.getModuleIdentifier() + ".";
std::string Path = PathPrefix + PathSuffix + ".bc";
std::error_code EC;
- raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
// Because -save-temps is a debugging feature, we report the error
// directly and exit.
if (EC)
@@ -103,7 +104,7 @@ Error Config::addSaveTemps(std::string OutputFileName,
CombinedIndexHook = [=](const ModuleSummaryIndex &Index) {
std::string Path = OutputFileName + "index.bc";
std::error_code EC;
- raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
// Because -save-temps is a debugging feature, we report the error
// directly and exit.
if (EC)
@@ -111,7 +112,7 @@ Error Config::addSaveTemps(std::string OutputFileName,
WriteIndexToFile(Index, OS);
Path = OutputFileName + "index.dot";
- raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None);
if (EC)
reportOpenError(Path, EC.message());
Index.exportToDot(OSDot);
@@ -165,7 +166,10 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
PGOOptions::IRUse, PGOOptions::CSIRUse);
}
- PassBuilder PB(TM, PipelineTuningOptions(), PGOOpt);
+ PassInstrumentationCallbacks PIC;
+ StandardInstrumentations SI;
+ SI.registerCallbacks(PIC);
+ PassBuilder PB(TM, PipelineTuningOptions(),PGOOpt, &PIC);
AAManager AA;
// Parse a custom AA pipeline if asked to.
@@ -329,7 +333,7 @@ void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
if (!DwoFile.empty()) {
std::error_code EC;
- DwoOut = llvm::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::F_None);
+ DwoOut = std::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::OF_None);
if (EC)
report_fatal_error("Failed to open " + DwoFile + ": " + EC.message());
}
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 6bb3bfaefc9c..882192892867 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -151,7 +151,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) {
const std::vector<StringRef> &undefs = Mod->getAsmUndefinedRefs();
for (int i = 0, e = undefs.size(); i != e; ++i)
- AsmUndefinedRefs[undefs[i]] = 1;
+ AsmUndefinedRefs.insert(undefs[i]);
}
bool LTOCodeGenerator::addModule(LTOModule *Mod) {
@@ -174,7 +174,7 @@ void LTOCodeGenerator::setModule(std::unique_ptr<LTOModule> Mod) {
AsmUndefinedRefs.clear();
MergedModule = Mod->takeModule();
- TheLinker = make_unique<Linker>(*MergedModule);
+ TheLinker = std::make_unique<Linker>(*MergedModule);
setAsmUndefinedRefs(&*Mod);
// We've just changed the input, so let's make sure we verify it.
@@ -229,7 +229,7 @@ bool LTOCodeGenerator::writeMergedModules(StringRef Path) {
// create output file
std::error_code EC;
- ToolOutputFile Out(Path, EC, sys::fs::F_None);
+ ToolOutputFile Out(Path, EC, sys::fs::OF_None);
if (EC) {
std::string ErrMsg = "could not open bitcode file for writing: ";
ErrMsg += Path.str() + ": " + EC.message();
@@ -365,7 +365,8 @@ bool LTOCodeGenerator::determineTarget() {
MCpu = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
MCpu = "yonah";
- else if (Triple.getArch() == llvm::Triple::aarch64)
+ else if (Triple.getArch() == llvm::Triple::aarch64 ||
+ Triple.getArch() == llvm::Triple::aarch64_32)
MCpu = "cyclone";
}
@@ -462,6 +463,8 @@ void LTOCodeGenerator::applyScopeRestrictions() {
internalizeModule(*MergedModule, mustPreserveGV);
+ MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1);
+
ScopeRestrictionsDone = true;
}
@@ -690,7 +693,7 @@ LTOCodeGenerator::setDiagnosticHandler(lto_diagnostic_handler_t DiagHandler,
return Context.setDiagnosticHandler(nullptr);
// Register the LTOCodeGenerator stub in the LLVMContext to forward the
// diagnostic to the external DiagHandler.
- Context.setDiagnosticHandler(llvm::make_unique<LTODiagnosticHandler>(this),
+ Context.setDiagnosticHandler(std::make_unique<LTODiagnosticHandler>(this),
true);
}
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 7ffe7bf84ba8..587b332e7064 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -220,7 +220,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options,
CPU = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
CPU = "yonah";
- else if (Triple.getArch() == llvm::Triple::aarch64)
+ else if (Triple.getArch() == llvm::Triple::aarch64 ||
+ Triple.getArch() == llvm::Triple::aarch64_32)
CPU = "cyclone";
}
diff --git a/lib/LTO/SummaryBasedOptimizations.cpp b/lib/LTO/SummaryBasedOptimizations.cpp
index e919fd530fb0..6db495de003b 100644
--- a/lib/LTO/SummaryBasedOptimizations.cpp
+++ b/lib/LTO/SummaryBasedOptimizations.cpp
@@ -18,7 +18,7 @@
using namespace llvm;
-cl::opt<bool> ThinLTOSynthesizeEntryCounts(
+static cl::opt<bool> ThinLTOSynthesizeEntryCounts(
"thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden,
cl::desc("Synthesize entry counts based on the summary"));
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index 1c52218836ca..d151de17896f 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/CachePruning.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
@@ -52,6 +53,7 @@
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
@@ -89,7 +91,7 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
// User asked to save temps, let dump the bitcode file after import.
std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str();
std::error_code EC;
- raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
+ raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None);
if (EC)
report_fatal_error(Twine("Failed to open ") + SaveTempPath +
" to save optimized bitcode\n");
@@ -224,7 +226,8 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
}
static void optimizeModule(Module &TheModule, TargetMachine &TM,
- unsigned OptLevel, bool Freestanding) {
+ unsigned OptLevel, bool Freestanding,
+ ModuleSummaryIndex *Index) {
// Populate the PassManager
PassManagerBuilder PMB;
PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
@@ -238,6 +241,7 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM,
// Already did this in verifyLoadedModule().
PMB.VerifyInput = false;
PMB.VerifyOutput = false;
+ PMB.ImportSummary = Index;
legacy::PassManager PM;
@@ -295,7 +299,7 @@ std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
// Run codegen now. resulting binary is in OutputBuffer.
PM.run(TheModule);
}
- return make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer));
+ return std::make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer));
}
/// Manage caching for a single Module.
@@ -368,23 +372,26 @@ public:
// Write to a temporary to avoid race condition
SmallString<128> TempFilename;
SmallString<128> CachePath(EntryPath);
- int TempFD;
llvm::sys::path::remove_filename(CachePath);
sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o");
- std::error_code EC =
- sys::fs::createUniqueFile(TempFilename, TempFD, TempFilename);
- if (EC) {
- errs() << "Error: " << EC.message() << "\n";
- report_fatal_error("ThinLTO: Can't get a temporary file");
- }
- {
- raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
- OS << OutputBuffer.getBuffer();
+
+ if (auto Err = handleErrors(
+ llvm::writeFileAtomically(TempFilename, EntryPath,
+ OutputBuffer.getBuffer()),
+ [](const llvm::AtomicFileWriteError &E) {
+ std::string ErrorMsgBuffer;
+ llvm::raw_string_ostream S(ErrorMsgBuffer);
+ E.log(S);
+
+ if (E.Error ==
+ llvm::atomic_write_error::failed_to_create_uniq_file) {
+ errs() << "Error: " << ErrorMsgBuffer << "\n";
+ report_fatal_error("ThinLTO: Can't get a temporary file");
+ }
+ })) {
+ // FIXME
+ consumeError(std::move(Err));
}
- // Rename temp file to final destination; rename is atomic
- EC = sys::fs::rename(TempFilename, EntryPath);
- if (EC)
- sys::fs::remove(TempFilename);
}
};
@@ -429,7 +436,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
}
- optimizeModule(TheModule, TM, OptLevel, Freestanding);
+ optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index);
saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
@@ -442,7 +449,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
auto Index = buildModuleSummaryIndex(TheModule, nullptr, &PSI);
WriteBitcodeToFile(TheModule, OS, true, &Index);
}
- return make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer));
+ return std::make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer));
}
return codegenModule(TheModule, TM);
@@ -457,10 +464,9 @@ static void resolvePrevailingInIndex(
ModuleSummaryIndex &Index,
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
&ResolvedODR,
- const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
-
- DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
- computePrevailingCopies(Index, PrevailingCopy);
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
+ const DenseMap<GlobalValue::GUID, const GlobalValueSummary *>
+ &PrevailingCopy) {
auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
const auto &Prevailing = PrevailingCopy.find(GUID);
@@ -490,7 +496,8 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder,
TMBuilder.MCpu = "core2";
else if (TheTriple.getArch() == llvm::Triple::x86)
TMBuilder.MCpu = "yonah";
- else if (TheTriple.getArch() == llvm::Triple::aarch64)
+ else if (TheTriple.getArch() == llvm::Triple::aarch64 ||
+ TheTriple.getArch() == llvm::Triple::aarch64_32)
TMBuilder.MCpu = "cyclone";
}
TMBuilder.TheTriple = std::move(TheTriple);
@@ -557,7 +564,7 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
*/
std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
std::unique_ptr<ModuleSummaryIndex> CombinedIndex =
- llvm::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
+ std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
uint64_t NextModuleId = 0;
for (auto &Mod : Modules) {
auto &M = Mod->getSingleBitcodeModule();
@@ -573,19 +580,36 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
return CombinedIndex;
}
-static void internalizeAndPromoteInIndex(
- const StringMap<FunctionImporter::ExportSetTy> &ExportLists,
- const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
- ModuleSummaryIndex &Index) {
- auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
+struct IsExported {
+ const StringMap<FunctionImporter::ExportSetTy> &ExportLists;
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols;
+
+ IsExported(const StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols)
+ : ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {}
+
+ bool operator()(StringRef ModuleIdentifier, GlobalValue::GUID GUID) const {
const auto &ExportList = ExportLists.find(ModuleIdentifier);
return (ExportList != ExportLists.end() &&
ExportList->second.count(GUID)) ||
GUIDPreservedSymbols.count(GUID);
- };
+ }
+};
- thinLTOInternalizeAndPromoteInIndex(Index, isExported);
-}
+struct IsPrevailing {
+ const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy;
+ IsPrevailing(const DenseMap<GlobalValue::GUID, const GlobalValueSummary *>
+ &PrevailingCopy)
+ : PrevailingCopy(PrevailingCopy) {}
+
+ bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const {
+ const auto &Prevailing = PrevailingCopy.find(GUID);
+ // Not in map means that there was only one copy, which must be prevailing.
+ if (Prevailing == PrevailingCopy.end())
+ return true;
+ return Prevailing->second == S;
+ };
+};
static void computeDeadSymbolsInIndex(
ModuleSummaryIndex &Index,
@@ -629,16 +653,22 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
ExportLists);
+ DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
+ computePrevailingCopies(Index, PrevailingCopy);
+
// Resolve prevailing symbols
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
- resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols);
+ resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols,
+ PrevailingCopy);
thinLTOResolvePrevailingInModule(
TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
// Promote the exported values in the index, so that they are promoted
// in the module.
- internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index);
+ thinLTOInternalizeAndPromoteInIndex(
+ Index, IsExported(ExportLists, GUIDPreservedSymbols),
+ IsPrevailing(PrevailingCopy));
promoteModule(TheModule, Index);
}
@@ -785,13 +815,19 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
if (ExportList.empty() && GUIDPreservedSymbols.empty())
return;
+ DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
+ computePrevailingCopies(Index, PrevailingCopy);
+
// Resolve prevailing symbols
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
- resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols);
+ resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols,
+ PrevailingCopy);
// Promote the exported values in the index, so that they are promoted
// in the module.
- internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index);
+ thinLTOInternalizeAndPromoteInIndex(
+ Index, IsExported(ExportLists, GUIDPreservedSymbols),
+ IsPrevailing(PrevailingCopy));
promoteModule(TheModule, Index);
@@ -810,7 +846,8 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) {
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
// Optimize now
- optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding);
+ optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding,
+ nullptr);
}
/// Write out the generated object file, either from CacheEntryPath or from
@@ -845,7 +882,7 @@ ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath,
}
// No cache entry, just write out the buffer.
std::error_code Err;
- raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None);
+ raw_fd_ostream OS(OutputPath, Err, sys::fs::OF_None);
if (Err)
report_fatal_error("Can't open output '" + OutputPath + "'\n");
OS << OutputBuffer.getBuffer();
@@ -900,7 +937,7 @@ void ThinLTOCodeGenerator::run() {
if (!SaveTempsDir.empty()) {
auto SaveTempPath = SaveTempsDir + "index.bc";
std::error_code EC;
- raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
+ raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None);
if (EC)
report_fatal_error(Twine("Failed to open ") + SaveTempPath +
" to save optimized bitcode\n");
@@ -931,6 +968,15 @@ void ThinLTOCodeGenerator::run() {
// Synthesize entry counts for functions in the combined index.
computeSyntheticCounts(*Index);
+ // Perform index-based WPD. This will return immediately if there are
+ // no index entries in the typeIdMetadata map (e.g. if we are instead
+ // performing IR-based WPD in hybrid regular/thin LTO mode).
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
+ std::set<GlobalValue::GUID> ExportedGUIDs;
+ runWholeProgramDevirtOnIndex(*Index, ExportedGUIDs, LocalWPDTargetsMap);
+ for (auto GUID : ExportedGUIDs)
+ GUIDPreservedSymbols.insert(GUID);
+
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
@@ -944,14 +990,23 @@ void ThinLTOCodeGenerator::run() {
// on the index, and nuke this map.
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
+ DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
+ computePrevailingCopies(*Index, PrevailingCopy);
+
// Resolve prevailing symbols, this has to be computed early because it
// impacts the caching.
- resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols);
+ resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols,
+ PrevailingCopy);
// Use global summary-based analysis to identify symbols that can be
// internalized (because they aren't exported or preserved as per callback).
// Changes are made in the index, consumed in the ThinLTO backends.
- internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, *Index);
+ updateIndexWPDForExports(*Index,
+ IsExported(ExportLists, GUIDPreservedSymbols),
+ LocalWPDTargetsMap);
+ thinLTOInternalizeAndPromoteInIndex(
+ *Index, IsExported(ExportLists, GUIDPreservedSymbols),
+ IsPrevailing(PrevailingCopy));
// Make sure that every module has an entry in the ExportLists, ImportList,
// GVSummary and ResolvedODR maps to enable threaded access to these maps
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index 37515d93ed50..6784d81595e5 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp
@@ -398,7 +398,7 @@ class IRLinker {
/// due to the use of Value handles which the Linker doesn't actually need,
/// but this allows us to reuse the ValueMapper code.
ValueToValueMapTy ValueMap;
- ValueToValueMapTy AliasValueMap;
+ ValueToValueMapTy IndirectSymbolValueMap;
DenseSet<GlobalValue *> ValuesToLink;
std::vector<GlobalValue *> Worklist;
@@ -437,7 +437,7 @@ class IRLinker {
/// Entry point for mapping values and alternate context for mapping aliases.
ValueMapper Mapper;
- unsigned AliasMCID;
+ unsigned IndirectSymbolMCID;
/// Handles cloning of a global values from the source module into
/// the destination module, including setting the attributes and visibility.
@@ -480,13 +480,15 @@ class IRLinker {
///
/// Note this code may call the client-provided \p AddLazyFor.
bool shouldLink(GlobalValue *DGV, GlobalValue &SGV);
- Expected<Constant *> linkGlobalValueProto(GlobalValue *GV, bool ForAlias);
+ Expected<Constant *> linkGlobalValueProto(GlobalValue *GV,
+ bool ForIndirectSymbol);
Error linkModuleFlagsMetadata();
void linkGlobalVariable(GlobalVariable &Dst, GlobalVariable &Src);
Error linkFunctionBody(Function &Dst, Function &Src);
- void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src);
+ void linkIndirectSymbolBody(GlobalIndirectSymbol &Dst,
+ GlobalIndirectSymbol &Src);
Error linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src);
/// Replace all types in the source AttributeList with the
@@ -497,7 +499,7 @@ class IRLinker {
/// into the destination module.
GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar);
Function *copyFunctionProto(const Function *SF);
- GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA);
+ GlobalValue *copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS);
/// Perform "replace all uses with" operations. These work items need to be
/// performed as part of materialization, but we postpone them to happen after
@@ -524,8 +526,8 @@ public:
SharedMDs(SharedMDs), IsPerformingImport(IsPerformingImport),
Mapper(ValueMap, RF_MoveDistinctMDs | RF_IgnoreMissingLocals, &TypeMap,
&GValMaterializer),
- AliasMCID(Mapper.registerAlternateMappingContext(AliasValueMap,
- &LValMaterializer)) {
+ IndirectSymbolMCID(Mapper.registerAlternateMappingContext(
+ IndirectSymbolValueMap, &LValMaterializer)) {
ValueMap.getMDMap() = std::move(SharedMDs);
for (GlobalValue *GV : ValuesToLink)
maybeAdd(GV);
@@ -535,7 +537,7 @@ public:
~IRLinker() { SharedMDs = std::move(*ValueMap.getMDMap()); }
Error run();
- Value *materialize(Value *V, bool ForAlias);
+ Value *materialize(Value *V, bool ForIndirectSymbol);
};
}
@@ -568,12 +570,12 @@ Value *LocalValueMaterializer::materialize(Value *SGV) {
return TheIRLinker.materialize(SGV, true);
}
-Value *IRLinker::materialize(Value *V, bool ForAlias) {
+Value *IRLinker::materialize(Value *V, bool ForIndirectSymbol) {
auto *SGV = dyn_cast<GlobalValue>(V);
if (!SGV)
return nullptr;
- Expected<Constant *> NewProto = linkGlobalValueProto(SGV, ForAlias);
+ Expected<Constant *> NewProto = linkGlobalValueProto(SGV, ForIndirectSymbol);
if (!NewProto) {
setError(NewProto.takeError());
return nullptr;
@@ -593,23 +595,23 @@ Value *IRLinker::materialize(Value *V, bool ForAlias) {
if (V->hasInitializer() || V->hasAppendingLinkage())
return New;
} else {
- auto *A = cast<GlobalAlias>(New);
- if (A->getAliasee())
+ auto *IS = cast<GlobalIndirectSymbol>(New);
+ if (IS->getIndirectSymbol())
return New;
}
- // When linking a global for an alias, it will always be linked. However we
- // need to check if it was not already scheduled to satisfy a reference from a
- // regular global value initializer. We know if it has been schedule if the
- // "New" GlobalValue that is mapped here for the alias is the same as the one
- // already mapped. If there is an entry in the ValueMap but the value is
- // different, it means that the value already had a definition in the
- // destination module (linkonce for instance), but we need a new definition
- // for the alias ("New" will be different.
- if (ForAlias && ValueMap.lookup(SGV) == New)
+ // When linking a global for an indirect symbol, it will always be linked.
+ // However we need to check if it was not already scheduled to satisfy a
+ // reference from a regular global value initializer. We know if it has been
+ // schedule if the "New" GlobalValue that is mapped here for the indirect
+ // symbol is the same as the one already mapped. If there is an entry in the
+ // ValueMap but the value is different, it means that the value already had a
+ // definition in the destination module (linkonce for instance), but we need a
+ // new definition for the indirect symbol ("New" will be different.
+ if (ForIndirectSymbol && ValueMap.lookup(SGV) == New)
return New;
- if (ForAlias || shouldLink(New, *SGV))
+ if (ForIndirectSymbol || shouldLink(New, *SGV))
setError(linkGlobalValueBody(*New, *SGV));
return New;
@@ -627,7 +629,7 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
/*init*/ nullptr, SGVar->getName(),
/*insertbefore*/ nullptr, SGVar->getThreadLocalMode(),
SGVar->getType()->getAddressSpace());
- NewDGV->setAlignment(SGVar->getAlignment());
+ NewDGV->setAlignment(MaybeAlign(SGVar->getAlignment()));
NewDGV->copyAttributesFrom(SGVar);
return NewDGV;
}
@@ -660,16 +662,24 @@ Function *IRLinker::copyFunctionProto(const Function *SF) {
return F;
}
-/// Set up prototypes for any aliases that come over from the source module.
-GlobalValue *IRLinker::copyGlobalAliasProto(const GlobalAlias *SGA) {
+/// Set up prototypes for any indirect symbols that come over from the source
+/// module.
+GlobalValue *
+IRLinker::copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS) {
// If there is no linkage to be performed or we're linking from the source,
// bring over SGA.
- auto *Ty = TypeMap.get(SGA->getValueType());
- auto *GA =
- GlobalAlias::create(Ty, SGA->getType()->getPointerAddressSpace(),
- GlobalValue::ExternalLinkage, SGA->getName(), &DstM);
- GA->copyAttributesFrom(SGA);
- return GA;
+ auto *Ty = TypeMap.get(SGIS->getValueType());
+ GlobalIndirectSymbol *GIS;
+ if (isa<GlobalAlias>(SGIS))
+ GIS = GlobalAlias::create(Ty, SGIS->getType()->getPointerAddressSpace(),
+ GlobalValue::ExternalLinkage, SGIS->getName(),
+ &DstM);
+ else
+ GIS = GlobalIFunc::create(Ty, SGIS->getType()->getPointerAddressSpace(),
+ GlobalValue::ExternalLinkage, SGIS->getName(),
+ nullptr, &DstM);
+ GIS->copyAttributesFrom(SGIS);
+ return GIS;
}
GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
@@ -681,7 +691,7 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
NewGV = copyFunctionProto(SF);
} else {
if (ForDefinition)
- NewGV = copyGlobalAliasProto(cast<GlobalAlias>(SGV));
+ NewGV = copyGlobalIndirectSymbolProto(cast<GlobalIndirectSymbol>(SGV));
else if (SGV->getValueType()->isFunctionTy())
NewGV =
Function::Create(cast<FunctionType>(TypeMap.get(SGV->getValueType())),
@@ -748,8 +758,18 @@ void IRLinker::computeTypeMapping() {
}
for (GlobalValue &SGV : *SrcM)
- if (GlobalValue *DGV = getLinkedToGlobal(&SGV))
+ if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) {
+ if (DGV->getType() == SGV.getType()) {
+ // If the types of DGV and SGV are the same, it means that DGV is from
+ // the source module and got added to DstM from a shared metadata. We
+ // shouldn't map this type to itself in case the type's components get
+ // remapped to a new type from DstM (for instance, during the loop over
+ // SrcM->getIdentifiedStructTypes() below).
+ continue;
+ }
+
TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
+ }
for (GlobalValue &SGV : SrcM->aliases())
if (GlobalValue *DGV = getLinkedToGlobal(&SGV))
@@ -940,7 +960,7 @@ bool IRLinker::shouldLink(GlobalValue *DGV, GlobalValue &SGV) {
}
Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
- bool ForAlias) {
+ bool ForIndirectSymbol) {
GlobalValue *DGV = getLinkedToGlobal(SGV);
bool ShouldLink = shouldLink(DGV, *SGV);
@@ -951,12 +971,12 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
if (I != ValueMap.end())
return cast<Constant>(I->second);
- I = AliasValueMap.find(SGV);
- if (I != AliasValueMap.end())
+ I = IndirectSymbolValueMap.find(SGV);
+ if (I != IndirectSymbolValueMap.end())
return cast<Constant>(I->second);
}
- if (!ShouldLink && ForAlias)
+ if (!ShouldLink && ForIndirectSymbol)
DGV = nullptr;
// Handle the ultra special appending linkage case first.
@@ -975,8 +995,8 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
if (DoneLinkingBodies)
return nullptr;
- NewGV = copyGlobalValueProto(SGV, ShouldLink || ForAlias);
- if (ShouldLink || !ForAlias)
+ NewGV = copyGlobalValueProto(SGV, ShouldLink || ForIndirectSymbol);
+ if (ShouldLink || !ForIndirectSymbol)
forceRenaming(NewGV, SGV->getName());
}
@@ -987,7 +1007,7 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F))
NewGV = Remangled.getValue();
- if (ShouldLink || ForAlias) {
+ if (ShouldLink || ForIndirectSymbol) {
if (const Comdat *SC = SGV->getComdat()) {
if (auto *GO = dyn_cast<GlobalObject>(NewGV)) {
Comdat *DC = DstM.getOrInsertComdat(SC->getName());
@@ -997,7 +1017,7 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
}
}
- if (!ShouldLink && ForAlias)
+ if (!ShouldLink && ForIndirectSymbol)
NewGV->setLinkage(GlobalValue::InternalLinkage);
Constant *C = NewGV;
@@ -1060,8 +1080,10 @@ Error IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
return Error::success();
}
-void IRLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) {
- Mapper.scheduleMapGlobalAliasee(Dst, *Src.getAliasee(), AliasMCID);
+void IRLinker::linkIndirectSymbolBody(GlobalIndirectSymbol &Dst,
+ GlobalIndirectSymbol &Src) {
+ Mapper.scheduleMapGlobalIndirectSymbol(Dst, *Src.getIndirectSymbol(),
+ IndirectSymbolMCID);
}
Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
@@ -1071,7 +1093,7 @@ Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
linkGlobalVariable(cast<GlobalVariable>(Dst), *GVar);
return Error::success();
}
- linkAliasBody(cast<GlobalAlias>(Dst), cast<GlobalAlias>(Src));
+ linkIndirectSymbolBody(cast<GlobalIndirectSymbol>(Dst), cast<GlobalIndirectSymbol>(Src));
return Error::success();
}
@@ -1411,7 +1433,7 @@ Error IRLinker::run() {
// Already mapped.
if (ValueMap.find(GV) != ValueMap.end() ||
- AliasValueMap.find(GV) != AliasValueMap.end())
+ IndirectSymbolValueMap.find(GV) != IndirectSymbolValueMap.end())
continue;
assert(!GV->isDeclaration());
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index a18f4cc25bcc..35d6290e901b 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -351,7 +351,8 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
SGVar->setConstant(false);
}
if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) {
- unsigned Align = std::max(DGVar->getAlignment(), SGVar->getAlignment());
+ MaybeAlign Align(
+ std::max(DGVar->getAlignment(), SGVar->getAlignment()));
SGVar->setAlignment(Align);
DGVar->setAlignment(Align);
}
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 2c68723a12f8..6f160e491cea 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -36,6 +36,7 @@
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compression.h"
@@ -336,7 +337,7 @@ public:
} // end anonymous namespace
void ELFWriter::align(unsigned Alignment) {
- uint64_t Padding = OffsetToAlignment(W.OS.tell(), Alignment);
+ uint64_t Padding = offsetToAlignment(W.OS.tell(), Align(Alignment));
W.OS.write_zeros(Padding);
}
@@ -511,6 +512,19 @@ static uint8_t mergeTypeForSet(uint8_t origType, uint8_t newType) {
return Type;
}
+static bool isIFunc(const MCSymbolELF *Symbol) {
+ while (Symbol->getType() != ELF::STT_GNU_IFUNC) {
+ const MCSymbolRefExpr *Value;
+ if (!Symbol->isVariable() ||
+ !(Value = dyn_cast<MCSymbolRefExpr>(Symbol->getVariableValue())) ||
+ Value->getKind() != MCSymbolRefExpr::VK_None ||
+ mergeTypeForSet(Symbol->getType(), ELF::STT_GNU_IFUNC) != ELF::STT_GNU_IFUNC)
+ return false;
+ Symbol = &cast<MCSymbolELF>(Value->getSymbol());
+ }
+ return true;
+}
+
void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex,
ELFSymbolData &MSD, const MCAsmLayout &Layout) {
const auto &Symbol = cast<MCSymbolELF>(*MSD.Symbol);
@@ -524,6 +538,8 @@ void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex,
// Binding and Type share the same byte as upper and lower nibbles
uint8_t Binding = Symbol.getBinding();
uint8_t Type = Symbol.getType();
+ if (isIFunc(&Symbol))
+ Type = ELF::STT_GNU_IFUNC;
if (Base) {
Type = mergeTypeForSet(Type, Base->getType());
}
@@ -622,7 +638,7 @@ void ELFWriter::computeSymbolTable(
unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
MCSectionELF *SymtabSection =
Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0, EntrySize, "");
- SymtabSection->setAlignment(is64Bit() ? 8 : 4);
+ SymtabSection->setAlignment(is64Bit() ? Align(8) : Align(4));
SymbolTableIndex = addToSectionTable(SymtabSection);
align(SymtabSection->getAlignment());
@@ -720,7 +736,7 @@ void ELFWriter::computeSymbolTable(
MCSectionELF *SymtabShndxSection =
Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0, 4, "");
SymtabShndxSectionIndex = addToSectionTable(SymtabShndxSection);
- SymtabShndxSection->setAlignment(4);
+ SymtabShndxSection->setAlignment(Align(4));
}
ArrayRef<std::string> FileNames = Asm.getFileNames();
@@ -808,7 +824,7 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
MCSectionELF *RelaSection = Ctx.createELFRelSection(
RelaSectionName, hasRelocationAddend() ? ELF::SHT_RELA : ELF::SHT_REL,
Flags, EntrySize, Sec.getGroup(), &Sec);
- RelaSection->setAlignment(is64Bit() ? 8 : 4);
+ RelaSection->setAlignment(is64Bit() ? Align(8) : Align(4));
return RelaSection;
}
@@ -895,7 +911,7 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED);
// Alignment field should reflect the requirements of
// the compressed section header.
- Section.setAlignment(is64Bit() ? 8 : 4);
+ Section.setAlignment(is64Bit() ? Align(8) : Align(4));
} else {
// Add "z" prefix to section name. This is zlib-gnu style.
MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str());
@@ -1119,7 +1135,7 @@ uint64_t ELFWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
if (!GroupIdx) {
MCSectionELF *Group = Ctx.createELFGroupSection(SignatureSymbol);
GroupIdx = addToSectionTable(Group);
- Group->setAlignment(4);
+ Group->setAlignment(Align(4));
Groups.push_back(Group);
}
std::vector<const MCSectionELF *> &Members =
@@ -1437,22 +1453,7 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
MCContext &Ctx = Asm.getContext();
if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
- // Let A, B and C being the components of Target and R be the location of
- // the fixup. If the fixup is not pcrel, we want to compute (A - B + C).
- // If it is pcrel, we want to compute (A - B + C - R).
-
- // In general, ELF has no relocations for -B. It can only represent (A + C)
- // or (A + C - R). If B = R + K and the relocation is not pcrel, we can
- // replace B to implement it: (A - R - K + C)
- if (IsPCRel) {
- Ctx.reportError(
- Fixup.getLoc(),
- "No relocation available to represent this relative expression");
- return;
- }
-
const auto &SymB = cast<MCSymbolELF>(RefB->getSymbol());
-
if (SymB.isUndefined()) {
Ctx.reportError(Fixup.getLoc(),
Twine("symbol '") + SymB.getName() +
@@ -1468,10 +1469,9 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
return;
}
- uint64_t SymBOffset = Layout.getSymbolOffset(SymB);
- uint64_t K = SymBOffset - FixupOffset;
+ assert(!IsPCRel && "should have been folded");
IsPCRel = true;
- C -= K;
+ C += FixupOffset - Layout.getSymbolOffset(SymB);
}
// We either rejected the fixup or folded B into C at this point.
@@ -1489,38 +1489,35 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
}
}
- unsigned Type = TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel);
- uint64_t OriginalC = C;
- bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type);
- if (!RelocateWithSymbol && SymA && !SymA->isUndefined())
- C += Layout.getSymbolOffset(*SymA);
-
- uint64_t Addend = 0;
- if (hasRelocationAddend()) {
- Addend = C;
- C = 0;
- }
-
- FixedValue = C;
-
const MCSectionELF *SecA = (SymA && SymA->isInSection())
? cast<MCSectionELF>(&SymA->getSection())
: nullptr;
if (!checkRelocation(Ctx, Fixup.getLoc(), &FixupSection, SecA))
return;
+ unsigned Type = TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel);
+ bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type);
+ uint64_t Addend = 0;
+
+ FixedValue = !RelocateWithSymbol && SymA && !SymA->isUndefined()
+ ? C + Layout.getSymbolOffset(*SymA)
+ : C;
+ if (hasRelocationAddend()) {
+ Addend = FixedValue;
+ FixedValue = 0;
+ }
+
if (!RelocateWithSymbol) {
const auto *SectionSymbol =
SecA ? cast<MCSymbolELF>(SecA->getBeginSymbol()) : nullptr;
if (SectionSymbol)
SectionSymbol->setUsedInReloc();
- ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA,
- OriginalC);
+ ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA, C);
Relocations[&FixupSection].push_back(Rec);
return;
}
- const auto *RenamedSymA = SymA;
+ const MCSymbolELF *RenamedSymA = SymA;
if (SymA) {
if (const MCSymbolELF *R = Renames.lookup(SymA))
RenamedSymA = R;
@@ -1530,8 +1527,7 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
else
RenamedSymA->setUsedInReloc();
}
- ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA,
- OriginalC);
+ ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA, C);
Relocations[&FixupSection].push_back(Rec);
}
@@ -1551,7 +1547,7 @@ bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
std::unique_ptr<MCObjectWriter>
llvm::createELFObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, bool IsLittleEndian) {
- return llvm::make_unique<ELFSingleObjectWriter>(std::move(MOTW), OS,
+ return std::make_unique<ELFSingleObjectWriter>(std::move(MOTW), OS,
IsLittleEndian);
}
@@ -1559,6 +1555,6 @@ std::unique_ptr<MCObjectWriter>
llvm::createELFDwoObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS,
bool IsLittleEndian) {
- return llvm::make_unique<ELFDwoObjectWriter>(std::move(MOTW), OS, DwoOS,
+ return std::make_unique<ELFDwoObjectWriter>(std::move(MOTW), OS, DwoOS,
IsLittleEndian);
}
diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp
index 9b1102cbe7d1..b800e9caee22 100644
--- a/lib/MC/MCAsmBackend.cpp
+++ b/lib/MC/MCAsmBackend.cpp
@@ -73,6 +73,7 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"FK_Data_2", 0, 16, 0},
{"FK_Data_4", 0, 32, 0},
{"FK_Data_8", 0, 64, 0},
+ {"FK_Data_6b", 0, 6, 0},
{"FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel},
{"FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
{"FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
@@ -93,10 +94,12 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"FK_Data_Add_2", 0, 16, 0},
{"FK_Data_Add_4", 0, 32, 0},
{"FK_Data_Add_8", 0, 64, 0},
+ {"FK_Data_Add_6b", 0, 6, 0},
{"FK_Data_Sub_1", 0, 8, 0},
{"FK_Data_Sub_2", 0, 16, 0},
{"FK_Data_Sub_4", 0, 32, 0},
- {"FK_Data_Sub_8", 0, 64, 0}};
+ {"FK_Data_Sub_8", 0, 64, 0},
+ {"FK_Data_Sub_6b", 0, 6, 0}};
assert((size_t)Kind <= array_lengthof(Builtins) && "Unknown fixup kind");
return Builtins[Kind];
diff --git a/lib/MC/MCAsmInfoXCOFF.cpp b/lib/MC/MCAsmInfoXCOFF.cpp
index 74c21f0c9e6d..65fe8848e20f 100644
--- a/lib/MC/MCAsmInfoXCOFF.cpp
+++ b/lib/MC/MCAsmInfoXCOFF.cpp
@@ -15,4 +15,21 @@ void MCAsmInfoXCOFF::anchor() {}
MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
IsLittleEndian = false;
HasDotTypeDotSizeDirective = false;
+ COMMDirectiveAlignmentIsInBytes = false;
+ LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
+ UseDotAlignForAlignment = true;
+ AsciiDirective = nullptr; // not supported
+ AscizDirective = nullptr; // not supported
+ NeedsFunctionDescriptors = true;
+ HasDotLGloblDirective = true;
+ Data64bitsDirective = "\t.llong\t";
+ SupportsQuotedNames = false;
+}
+
+bool MCAsmInfoXCOFF::isValidUnquotedName(StringRef Name) const {
+ // FIXME: Remove this function when we stop using "TOC[TC0]" as a symbol name.
+ if (Name.equals("TOC[TC0]"))
+ return true;
+
+ return MCAsmInfo::isValidUnquotedName(Name);
}
diff --git a/lib/MC/MCAsmMacro.cpp b/lib/MC/MCAsmMacro.cpp
index ba4fb7d4f387..186a68b02a29 100644
--- a/lib/MC/MCAsmMacro.cpp
+++ b/lib/MC/MCAsmMacro.cpp
@@ -11,6 +11,7 @@
using namespace llvm;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MCAsmMacroParameter::dump(raw_ostream &OS) const {
OS << "\"" << Name << "\"";
if (Required)
@@ -39,3 +40,4 @@ void MCAsmMacro::dump(raw_ostream &OS) const {
}
OS << " (BEGIN BODY)" << Body << "(END BODY)\n";
}
+#endif
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 7a2b0b8a1220..2d9c2cb21255 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
@@ -23,6 +24,7 @@
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
@@ -66,7 +68,7 @@ public:
std::unique_ptr<MCAsmBackend> asmbackend, bool showInst)
: MCStreamer(Context), OSOwner(std::move(os)), OS(*OSOwner),
MAI(Context.getAsmInfo()), InstPrinter(printer),
- Assembler(llvm::make_unique<MCAssembler>(
+ Assembler(std::make_unique<MCAssembler>(
Context, std::move(asmbackend), std::move(emitter),
(asmbackend) ? asmbackend->createObjectWriter(NullStream)
: nullptr)),
@@ -162,6 +164,8 @@ public:
void EmitCOFFSectionIndex(MCSymbol const *Symbol) override;
void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override;
+ void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlign) override;
void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override;
void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
@@ -254,9 +258,26 @@ public:
unsigned SourceLineNum,
const MCSymbol *FnStartSym,
const MCSymbol *FnEndSym) override;
+
+ void PrintCVDefRangePrefix(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges);
+
+ void EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeRegisterRelHeader DRHdr) override;
+
+ void EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeSubfieldRegisterHeader DRHdr) override;
+
+ void EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeRegisterHeader DRHdr) override;
+
void EmitCVDefRangeDirective(
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
- StringRef FixedSizePortion) override;
+ codeview::DefRangeFramePointerRelHeader DRHdr) override;
+
void EmitCVStringTableDirective() override;
void EmitCVFileChecksumsDirective() override;
void EmitCVFileChecksumOffsetDirective(unsigned FileNo) override;
@@ -291,13 +312,13 @@ public:
void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) override;
void EmitWinCFIStartChained(SMLoc Loc) override;
void EmitWinCFIEndChained(SMLoc Loc) override;
- void EmitWinCFIPushReg(unsigned Register, SMLoc Loc) override;
- void EmitWinCFISetFrame(unsigned Register, unsigned Offset,
+ void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) override;
+ void EmitWinCFISetFrame(MCRegister Register, unsigned Offset,
SMLoc Loc) override;
void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) override;
- void EmitWinCFISaveReg(unsigned Register, unsigned Offset,
+ void EmitWinCFISaveReg(MCRegister Register, unsigned Offset,
SMLoc Loc) override;
- void EmitWinCFISaveXMM(unsigned Register, unsigned Offset,
+ void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset,
SMLoc Loc) override;
void EmitWinCFIPushFrame(bool Code, SMLoc Loc) override;
void EmitWinCFIEndProlog(SMLoc Loc) override;
@@ -630,6 +651,7 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_Global: // .globl/.global
OS << MAI->getGlobalDirective();
break;
+ case MCSA_LGlobal: OS << "\t.lglobl\t"; break;
case MCSA_Hidden: OS << "\t.hidden\t"; break;
case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break;
case MCSA_Internal: OS << "\t.internal\t"; break;
@@ -740,6 +762,24 @@ void MCAsmStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {
EmitEOL();
}
+// We need an XCOFF-specific version of this directive as the AIX syntax
+// requires a QualName argument identifying the csect name and storage mapping
+// class to appear before the alignment if we are specifying it.
+void MCAsmStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ assert(MAI->getLCOMMDirectiveAlignmentType() == LCOMM::Log2Alignment &&
+ "We only support writing log base-2 alignment format with XCOFF.");
+ assert(isPowerOf2_32(ByteAlignment) && "Alignment must be a power of 2.");
+
+ OS << "\t.lcomm\t";
+ Symbol->print(OS, MAI);
+ OS << ',' << Size;
+ OS << ',' << Symbol->getName();
+ OS << ',' << Log2_32(ByteAlignment);
+
+ EmitEOL();
+}
+
void MCAsmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
assert(MAI->hasDotTypeDotSizeDirective());
OS << "\t.size\t";
@@ -1082,6 +1122,16 @@ void MCAsmStreamer::emitFill(const MCExpr &NumValues, int64_t Size,
void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
unsigned ValueSize,
unsigned MaxBytesToEmit) {
+ if (MAI->useDotAlignForAlignment()) {
+ if (!isPowerOf2_32(ByteAlignment))
+ report_fatal_error("Only power-of-two alignments are supported "
+ "with .align.");
+ OS << "\t.align\t";
+ OS << Log2_32(ByteAlignment);
+ EmitEOL();
+ return;
+ }
+
// Some assemblers don't support non-power of two alignments, so we always
// emit alignments as a power of two if possible.
if (isPowerOf2_32(ByteAlignment)) {
@@ -1376,9 +1426,8 @@ void MCAsmStreamer::EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId,
PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym);
}
-void MCAsmStreamer::EmitCVDefRangeDirective(
- ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
- StringRef FixedSizePortion) {
+void MCAsmStreamer::PrintCVDefRangePrefix(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges) {
OS << "\t.cv_def_range\t";
for (std::pair<const MCSymbol *, const MCSymbol *> Range : Ranges) {
OS << ' ';
@@ -1386,10 +1435,43 @@ void MCAsmStreamer::EmitCVDefRangeDirective(
OS << ' ';
Range.second->print(OS, MAI);
}
- OS << ", ";
- PrintQuotedString(FixedSizePortion, OS);
+}
+
+void MCAsmStreamer::EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeRegisterRelHeader DRHdr) {
+ PrintCVDefRangePrefix(Ranges);
+ OS << ", reg_rel, ";
+ OS << DRHdr.Register << ", " << DRHdr.Flags << ", "
+ << DRHdr.BasePointerOffset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeSubfieldRegisterHeader DRHdr) {
+ PrintCVDefRangePrefix(Ranges);
+ OS << ", subfield_reg, ";
+ OS << DRHdr.Register << ", " << DRHdr.OffsetInParent;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeRegisterHeader DRHdr) {
+ PrintCVDefRangePrefix(Ranges);
+ OS << ", reg, ";
+ OS << DRHdr.Register;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeFramePointerRelHeader DRHdr) {
+ PrintCVDefRangePrefix(Ranges);
+ OS << ", frame_ptr_rel, ";
+ OS << DRHdr.Offset;
EmitEOL();
- this->MCStreamer::EmitCVDefRangeDirective(Ranges, FixedSizePortion);
}
void MCAsmStreamer::EmitCVStringTableDirective() {
@@ -1453,9 +1535,8 @@ void MCAsmStreamer::EmitRegisterName(int64_t Register) {
// just ones that map to LLVM register numbers and have known names.
// Fall back to using the original number directly if no name is known.
const MCRegisterInfo *MRI = getContext().getRegisterInfo();
- int LLVMRegister = MRI->getLLVMRegNumFromEH(Register);
- if (LLVMRegister != -1) {
- InstPrinter->printRegName(OS, LLVMRegister);
+ if (Optional<unsigned> LLVMRegister = MRI->getLLVMRegNum(Register, true)) {
+ InstPrinter->printRegName(OS, *LLVMRegister);
return;
}
}
@@ -1668,6 +1749,12 @@ void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) {
// We only do this so the section switch that terminates the handler
// data block is visible.
WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo();
+
+ // Do nothing if no frame is open. MCStreamer should've already reported an
+ // error.
+ if (!CurFrame)
+ return;
+
MCSection *TextSec = &CurFrame->Function->getSection();
MCSection *XData = getAssociatedXDataSection(TextSec);
SwitchSectionNoChange(XData);
@@ -1676,18 +1763,21 @@ void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) {
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIPushReg(unsigned Register, SMLoc Loc) {
+void MCAsmStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) {
MCStreamer::EmitWinCFIPushReg(Register, Loc);
- OS << "\t.seh_pushreg " << Register;
+ OS << "\t.seh_pushreg ";
+ InstPrinter->printRegName(OS, Register);
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset,
+void MCAsmStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset,
SMLoc Loc) {
MCStreamer::EmitWinCFISetFrame(Register, Offset, Loc);
- OS << "\t.seh_setframe " << Register << ", " << Offset;
+ OS << "\t.seh_setframe ";
+ InstPrinter->printRegName(OS, Register);
+ OS << ", " << Offset;
EmitEOL();
}
@@ -1698,19 +1788,23 @@ void MCAsmStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) {
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset,
+void MCAsmStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset,
SMLoc Loc) {
MCStreamer::EmitWinCFISaveReg(Register, Offset, Loc);
- OS << "\t.seh_savereg " << Register << ", " << Offset;
+ OS << "\t.seh_savereg ";
+ InstPrinter->printRegName(OS, Register);
+ OS << ", " << Offset;
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset,
+void MCAsmStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset,
SMLoc Loc) {
MCStreamer::EmitWinCFISaveXMM(Register, Offset, Loc);
- OS << "\t.seh_savexmm " << Register << ", " << Offset;
+ OS << "\t.seh_savexmm ";
+ InstPrinter->printRegName(OS, Register);
+ OS << ", " << Offset;
EmitEOL();
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index c4f4d4c2870e..cf42fe85b8e5 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -30,6 +30,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -321,7 +322,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
case MCFragment::FT_Align: {
const MCAlignFragment &AF = cast<MCAlignFragment>(F);
unsigned Offset = Layout.getFragmentOffset(&AF);
- unsigned Size = OffsetToAlignment(Offset, AF.getAlignment());
+ unsigned Size = offsetToAlignment(Offset, Align(AF.getAlignment()));
// Insert extra Nops for code alignment if the target define
// shouldInsertExtraNopBytesForCodeAlign target hook.
@@ -840,6 +841,10 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
getBackend().shouldInsertFixupForCodeAlign(*this, Layout, *AF);
}
continue;
+ } else if (auto *FragWithFixups =
+ dyn_cast<MCDwarfCallFrameFragment>(&Frag)) {
+ Fixups = FragWithFixups->getFixups();
+ Contents = FragWithFixups->getContents();
} else
llvm_unreachable("Unknown fragment with fixups!");
for (const MCFixup &Fixup : Fixups) {
@@ -969,13 +974,9 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
MCContext &Context = Layout.getAssembler().getContext();
uint64_t OldSize = DF.getContents().size();
int64_t AddrDelta;
- bool Abs;
- if (getBackend().requiresDiffExpressionRelocations())
- Abs = DF.getAddrDelta().evaluateAsAbsolute(AddrDelta, Layout);
- else {
- Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
- assert(Abs && "We created a line delta with an invalid expression");
- }
+ bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
+ assert(Abs && "We created a line delta with an invalid expression");
+ (void)Abs;
int64_t LineDelta;
LineDelta = DF.getLineDelta();
SmallVectorImpl<char> &Data = DF.getContents();
@@ -983,7 +984,7 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
raw_svector_ostream OSE(Data);
DF.getFixups().clear();
- if (Abs) {
+ if (!getBackend().requiresDiffExpressionRelocations()) {
MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta,
AddrDelta, OSE);
} else {
@@ -1017,10 +1018,25 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
assert(Abs && "We created call frame with an invalid expression");
(void) Abs;
- SmallString<8> &Data = DF.getContents();
+ SmallVectorImpl<char> &Data = DF.getContents();
Data.clear();
raw_svector_ostream OSE(Data);
- MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE);
+ DF.getFixups().clear();
+
+ if (getBackend().requiresDiffExpressionRelocations()) {
+ uint32_t Offset;
+ uint32_t Size;
+ MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE, &Offset,
+ &Size);
+ if (Size) {
+ DF.getFixups().push_back(MCFixup::create(
+ Offset, &DF.getAddrDelta(),
+ MCFixup::getKindForSizeInBits(Size /*In bits.*/, false /*isPCRel*/)));
+ }
+ } else {
+ MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE);
+ }
+
return OldSize != Data.size();
}
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 0dc2e2d37caf..a69ee19e1a1a 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -58,11 +58,12 @@ AsSecureLogFileName("as-secure-log-file-name",
MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
const MCObjectFileInfo *mofi, const SourceMgr *mgr,
- bool DoAutoReset)
+ MCTargetOptions const *TargetOpts, bool DoAutoReset)
: SrcMgr(mgr), InlineSrcMgr(nullptr), MAI(mai), MRI(mri), MOFI(mofi),
Symbols(Allocator), UsedNames(Allocator),
+ InlineAsmUsedLabelNames(Allocator),
CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
- AutoReset(DoAutoReset) {
+ AutoReset(DoAutoReset), TargetOptions(TargetOpts) {
SecureLogFile = AsSecureLogFileName;
if (SrcMgr && SrcMgr->getNumBuffers())
@@ -90,6 +91,7 @@ void MCContext::reset() {
XCOFFAllocator.DestroyAll();
MCSubtargetAllocator.DestroyAll();
+ InlineAsmUsedLabelNames.clear();
UsedNames.clear();
Symbols.clear();
Allocator.Reset();
@@ -272,6 +274,10 @@ void MCContext::setSymbolValue(MCStreamer &Streamer,
Streamer.EmitAssignment(Symbol, MCConstantExpr::create(Val, *this));
}
+void MCContext::registerInlineAsmLabel(MCSymbol *Sym) {
+ InlineAsmUsedLabelNames[Sym->getName()] = Sym;
+}
+
//===----------------------------------------------------------------------===//
// Section Management
//===----------------------------------------------------------------------===//
@@ -531,6 +537,8 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind,
MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section,
XCOFF::StorageMappingClass SMC,
+ XCOFF::SymbolType Type,
+ XCOFF::StorageClass SC,
SectionKind Kind,
const char *BeginSymName) {
// Do the lookup. If we have a hit, return it.
@@ -548,7 +556,7 @@ MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section,
Begin = createTempSymbol(BeginSymName, false);
MCSectionXCOFF *Result = new (XCOFFAllocator.Allocate())
- MCSectionXCOFF(CachedName, SMC, Kind, Begin);
+ MCSectionXCOFF(CachedName, SMC, Type, SC, Kind, Begin);
Entry.second = Result;
auto *F = new MCDataFragment();
@@ -690,6 +698,21 @@ void MCContext::reportError(SMLoc Loc, const Twine &Msg) {
report_fatal_error(Msg, false);
}
+void MCContext::reportWarning(SMLoc Loc, const Twine &Msg) {
+ if (TargetOptions && TargetOptions->MCNoWarn)
+ return;
+ if (TargetOptions && TargetOptions->MCFatalWarnings)
+ reportError(Loc, Msg);
+ else {
+ // If we have a source manager use it. Otherwise, try using the inline
+ // source manager.
+ if (SrcMgr)
+ SrcMgr->PrintMessage(Loc, SourceMgr::DK_Warning, Msg);
+ else if (InlineSrcMgr)
+ InlineSrcMgr->PrintMessage(Loc, SourceMgr::DK_Warning, Msg);
+ }
+}
+
void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) {
reportError(Loc, Msg);
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index aae6fdf90931..bcc7c45afc01 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -544,8 +544,8 @@ Expected<unsigned> MCDwarfLineTable::tryGetFile(StringRef &Directory,
FileNumber);
}
-bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory,
- StringRef &FileName, Optional<MD5::MD5Result> Checksum) {
+static bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory,
+ StringRef &FileName, Optional<MD5::MD5Result> Checksum) {
if (RootFile.Name.empty() || RootFile.Name != FileName.data())
return false;
return RootFile.Checksum == Checksum;
@@ -1897,26 +1897,54 @@ void MCDwarfFrameEmitter::EmitAdvanceLoc(MCObjectStreamer &Streamer,
}
void MCDwarfFrameEmitter::EncodeAdvanceLoc(MCContext &Context,
- uint64_t AddrDelta,
- raw_ostream &OS) {
+ uint64_t AddrDelta, raw_ostream &OS,
+ uint32_t *Offset, uint32_t *Size) {
// Scale the address delta by the minimum instruction length.
AddrDelta = ScaleAddrDelta(Context, AddrDelta);
+ bool WithFixups = false;
+ if (Offset && Size)
+ WithFixups = true;
+
support::endianness E =
Context.getAsmInfo()->isLittleEndian() ? support::little : support::big;
if (AddrDelta == 0) {
+ if (WithFixups) {
+ *Offset = 0;
+ *Size = 0;
+ }
} else if (isUIntN(6, AddrDelta)) {
uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
- OS << Opcode;
+ if (WithFixups) {
+ *Offset = OS.tell();
+ *Size = 6;
+ OS << uint8_t(dwarf::DW_CFA_advance_loc);
+ } else
+ OS << Opcode;
} else if (isUInt<8>(AddrDelta)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc1);
- OS << uint8_t(AddrDelta);
+ if (WithFixups) {
+ *Offset = OS.tell();
+ *Size = 8;
+ OS.write_zeros(1);
+ } else
+ OS << uint8_t(AddrDelta);
} else if (isUInt<16>(AddrDelta)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc2);
- support::endian::write<uint16_t>(OS, AddrDelta, E);
+ if (WithFixups) {
+ *Offset = OS.tell();
+ *Size = 16;
+ OS.write_zeros(2);
+ } else
+ support::endian::write<uint16_t>(OS, AddrDelta, E);
} else {
assert(isUInt<32>(AddrDelta));
OS << uint8_t(dwarf::DW_CFA_advance_loc4);
- support::endian::write<uint32_t>(OS, AddrDelta, E);
+ if (WithFixups) {
+ *Offset = OS.tell();
+ *Size = 32;
+ OS.write_zeros(4);
+ } else
+ support::endian::write<uint32_t>(OS, AddrDelta, E);
}
}
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 245dd063004f..fa2133078bfe 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -139,7 +139,7 @@ static void setSectionAlignmentForBundling(const MCAssembler &Assembler,
MCSection *Section) {
if (Section && Assembler.isBundlingEnabled() && Section->hasInstructions() &&
Section->getAlignment() < Assembler.getBundleAlignSize())
- Section->setAlignment(Assembler.getBundleAlignSize());
+ Section->setAlignment(Align(Assembler.getBundleAlignSize()));
}
void MCELFStreamer::ChangeSection(MCSection *Section,
@@ -277,6 +277,9 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
case MCSA_AltEntry:
llvm_unreachable("ELF doesn't support the .alt_entry attribute");
+
+ case MCSA_LGlobal:
+ llvm_unreachable("ELF doesn't support the .lglobl attribute");
}
return true;
@@ -306,7 +309,7 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
// Update the maximum alignment of the section if necessary.
if (ByteAlignment > Section.getAlignment())
- Section.setAlignment(ByteAlignment);
+ Section.setAlignment(Align(ByteAlignment));
SwitchSection(P.first, P.second);
} else {
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index ab53ed42778e..813c00f6f3bb 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -259,6 +259,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_PPC_TOC_LO: return "toc@l";
case VK_PPC_TOC_HI: return "toc@h";
case VK_PPC_TOC_HA: return "toc@ha";
+ case VK_PPC_U: return "u";
+ case VK_PPC_L: return "l";
case VK_PPC_DTPMOD: return "dtpmod";
case VK_PPC_TPREL_LO: return "tprel@l";
case VK_PPC_TPREL_HI: return "tprel@h";
@@ -373,6 +375,8 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("toc@l", VK_PPC_TOC_LO)
.Case("toc@h", VK_PPC_TOC_HI)
.Case("toc@ha", VK_PPC_TOC_HA)
+ .Case("u", VK_PPC_U)
+ .Case("l", VK_PPC_L)
.Case("tls", VK_PPC_TLS)
.Case("dtpmod", VK_PPC_DTPMOD)
.Case("tprel@l", VK_PPC_TPREL_LO)
@@ -453,26 +457,28 @@ void MCTargetExpr::anchor() {}
/* *** */
bool MCExpr::evaluateAsAbsolute(int64_t &Res) const {
- return evaluateAsAbsolute(Res, nullptr, nullptr, nullptr);
+ return evaluateAsAbsolute(Res, nullptr, nullptr, nullptr, false);
}
bool MCExpr::evaluateAsAbsolute(int64_t &Res,
const MCAsmLayout &Layout) const {
- return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr);
+ return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr, false);
}
bool MCExpr::evaluateAsAbsolute(int64_t &Res,
const MCAsmLayout &Layout,
const SectionAddrMap &Addrs) const {
- return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs);
+ // Setting InSet causes us to absolutize differences across sections and that
+ // is what the MachO writer uses Addrs for.
+ return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs, true);
}
bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
- return evaluateAsAbsolute(Res, &Asm, nullptr, nullptr);
+ return evaluateAsAbsolute(Res, &Asm, nullptr, nullptr, false);
}
bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm) const {
- return evaluateAsAbsolute(Res, Asm, nullptr, nullptr);
+ return evaluateAsAbsolute(Res, Asm, nullptr, nullptr, false);
}
bool MCExpr::evaluateKnownAbsolute(int64_t &Res,
@@ -483,15 +489,6 @@ bool MCExpr::evaluateKnownAbsolute(int64_t &Res,
bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
- const SectionAddrMap *Addrs) const {
- // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us
- // absolutize differences across sections and that is what the MachO writer
- // uses Addrs for.
- return evaluateAsAbsolute(Res, Asm, Layout, Addrs, Addrs);
-}
-
-bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
- const MCAsmLayout *Layout,
const SectionAddrMap *Addrs, bool InSet) const {
MCValue Value;
@@ -577,6 +574,24 @@ static void AttemptToFoldSymbolOffsetDifference(
A = B = nullptr;
}
+static bool canFold(const MCAssembler *Asm, const MCSymbolRefExpr *A,
+ const MCSymbolRefExpr *B, bool InSet) {
+ if (InSet)
+ return true;
+
+ if (!Asm->getBackend().requiresDiffExpressionRelocations())
+ return true;
+
+ const MCSymbol &CheckSym = A ? A->getSymbol() : B->getSymbol();
+ if (!CheckSym.isInSection())
+ return true;
+
+ if (!CheckSym.getSection().hasInstructions())
+ return true;
+
+ return false;
+}
+
/// Evaluate the result of an add between (conceptually) two MCValues.
///
/// This routine conceptually attempts to construct an MCValue:
@@ -617,8 +632,7 @@ EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout,
// the backend requires this to be emitted as individual relocations, unless
// the InSet flag is set to get the current difference anyway (used for
// example to calculate symbol sizes).
- if (Asm &&
- (InSet || !Asm->getBackend().requiresDiffExpressionRelocations())) {
+ if (Asm && canFold(Asm, LHS_A, LHS_B, InSet)) {
// First, fold out any differences which are fully resolved. By
// reassociating terms in
// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 159f4070fe9f..c5c06f323e68 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -64,12 +64,6 @@ StringRef MCInstPrinter::markup(StringRef s) const {
else
return "";
}
-StringRef MCInstPrinter::markup(StringRef a, StringRef b) const {
- if (getUseMarkup())
- return a;
- else
- return b;
-}
// For asm-style hex (e.g. 0ffh) the first digit always has to be a number.
static bool needsLeadingZero(uint64_t Value)
@@ -89,24 +83,25 @@ format_object<int64_t> MCInstPrinter::formatDec(int64_t Value) const {
}
format_object<int64_t> MCInstPrinter::formatHex(int64_t Value) const {
- switch(PrintHexStyle) {
+ switch (PrintHexStyle) {
case HexStyle::C:
- if (Value < 0)
+ if (Value < 0) {
+ if (Value == std::numeric_limits<int64_t>::min())
+ return format<int64_t>("-0x8000000000000000", Value);
return format("-0x%" PRIx64, -Value);
- else
- return format("0x%" PRIx64, Value);
+ }
+ return format("0x%" PRIx64, Value);
case HexStyle::Asm:
if (Value < 0) {
- if (needsLeadingZero((uint64_t)(-Value)))
+ if (Value == std::numeric_limits<int64_t>::min())
+ return format<int64_t>("-8000000000000000h", Value);
+ if (needsLeadingZero(-(uint64_t)(Value)))
return format("-0%" PRIx64 "h", -Value);
- else
- return format("-%" PRIx64 "h", -Value);
- } else {
- if (needsLeadingZero((uint64_t)(Value)))
- return format("0%" PRIx64 "h", Value);
- else
- return format("%" PRIx64 "h", Value);
+ return format("-%" PRIx64 "h", -Value);
}
+ if (needsLeadingZero((uint64_t)(Value)))
+ return format("0%" PRIx64 "h", Value);
+ return format("%" PRIx64 "h", Value);
}
llvm_unreachable("unsupported print style");
}
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index eca87f940bf5..54741fdd686d 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -33,3 +33,9 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
Target = Addr+Size+Imm;
return true;
}
+
+Optional<uint64_t>
+MCInstrAnalysis::evaluateMemoryOperandAddress(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size) const {
+ return None;
+}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 613f255a4ea4..8e558a36b7a1 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -330,6 +330,7 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Sym,
case MCSA_Protected:
case MCSA_Weak:
case MCSA_Local:
+ case MCSA_LGlobal:
return false;
case MCSA_Global:
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 9f555abe1404..70c0409ece7a 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -28,7 +28,7 @@ static bool useCompactUnwind(const Triple &T) {
return false;
// aarch64 always has it.
- if (T.getArch() == Triple::aarch64)
+ if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)
return true;
// armv7k always has it.
@@ -57,7 +57,8 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT,
SectionKind::getReadOnly());
- if (T.isOSDarwin() && T.getArch() == Triple::aarch64)
+ if (T.isOSDarwin() &&
+ (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32))
SupportsCompactUnwindWithoutEHFrame = true;
if (T.isWatchABI())
@@ -193,7 +194,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_X86_64_MODE_DWARF
- else if (T.getArch() == Triple::aarch64)
+ else if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)
CompactUnwindDwarfEHFrameOnly = 0x03000000; // UNWIND_ARM64_MODE_DWARF
else if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb)
CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_ARM_MODE_DWARF
@@ -768,7 +769,12 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
// the ABI or object file format. For example, the XL compiler uses an unnamed
// csect for program code.
TextSection = Ctx->getXCOFFSection(
- ".text", XCOFF::StorageMappingClass::XMC_PR, SectionKind::getText());
+ ".text", XCOFF::StorageMappingClass::XMC_PR, XCOFF::XTY_SD,
+ XCOFF::C_HIDEXT, SectionKind::getText());
+
+ DataSection = Ctx->getXCOFFSection(
+ ".data", XCOFF::StorageMappingClass::XMC_RW, XCOFF::XTY_SD,
+ XCOFF::C_HIDEXT, SectionKind::getData());
}
void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 1587d8498666..83f6ab8fe332 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -27,7 +27,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context,
std::unique_ptr<MCObjectWriter> OW,
std::unique_ptr<MCCodeEmitter> Emitter)
: MCStreamer(Context),
- Assembler(llvm::make_unique<MCAssembler>(
+ Assembler(std::make_unique<MCAssembler>(
Context, std::move(TAB), std::move(Emitter), std::move(OW))),
EmitEHFrame(true), EmitDebugFrame(false) {}
@@ -539,7 +539,7 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
// Update the maximum alignment on the current section if necessary.
MCSection *CurSec = getCurrentSectionOnly();
if (ByteAlignment > CurSec->getAlignment())
- CurSec->setAlignment(ByteAlignment);
+ CurSec->setAlignment(Align(ByteAlignment));
}
void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment,
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 084f6a7a2e14..b59ac08ad6cc 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeView.h"
#include "llvm/MC/MCContext.h"
@@ -524,6 +525,19 @@ private:
/// directives parsed by this class.
StringMap<DirectiveKind> DirectiveKindMap;
+ // Codeview def_range type parsing.
+ enum CVDefRangeType {
+ CVDR_DEFRANGE = 0, // Placeholder
+ CVDR_DEFRANGE_REGISTER,
+ CVDR_DEFRANGE_FRAMEPOINTER_REL,
+ CVDR_DEFRANGE_SUBFIELD_REGISTER,
+ CVDR_DEFRANGE_REGISTER_REL
+ };
+
+ /// Maps Codeview def_range types --> CVDefRangeType enum, for
+ /// Codeview def_range types parsed by this class.
+ StringMap<CVDefRangeType> CVDefRangeTypeMap;
+
// ".ascii", ".asciz", ".string"
bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
bool parseDirectiveReloc(SMLoc DirectiveLoc); // ".reloc"
@@ -671,6 +685,7 @@ private:
bool parseDirectiveAddrsigSym();
void initializeDirectiveKindMap();
+ void initializeCVDefRangeTypeMap();
};
} // end anonymous namespace
@@ -714,12 +729,14 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
PlatformParser.reset(createWasmAsmParser());
break;
case MCObjectFileInfo::IsXCOFF:
- // TODO: Need to implement createXCOFFAsmParser for XCOFF format.
+ report_fatal_error(
+ "Need to implement createXCOFFAsmParser for XCOFF format.");
break;
}
PlatformParser->Initialize(*this);
initializeDirectiveKindMap();
+ initializeCVDefRangeTypeMap();
NumOfMacroInstantiations = 0;
}
@@ -1142,7 +1159,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
}
}
- MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName);
+ MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
+ if (!Sym)
+ Sym = getContext().getOrCreateSymbol(SymbolName);
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
@@ -1737,6 +1756,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
StringMap<DirectiveKind>::const_iterator DirKindIt =
DirectiveKindMap.find(IDVal);
DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
+
? DK_NO_DIRECTIVE
: DirKindIt->getValue();
switch (DirKind) {
@@ -2895,11 +2915,27 @@ bool AsmParser::parseEscapedString(std::string &Data) {
}
// Recognize escaped characters. Note that this escape semantics currently
- // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
+ // loosely follows Darwin 'as'.
++i;
if (i == e)
return TokError("unexpected backslash at end of string");
+ // Recognize hex sequences similarly to GNU 'as'.
+ if (Str[i] == 'x' || Str[i] == 'X') {
+ size_t length = Str.size();
+ if (i + 1 >= length || !isHexDigit(Str[i + 1]))
+ return TokError("invalid hexadecimal escape sequence");
+
+ // Consume hex characters. GNU 'as' reads all hexadecimal characters and
+ // then truncates to the lower 16 bits. Seems reasonable.
+ unsigned Value = 0;
+ while (i + 1 < length && isHexDigit(Str[i + 1]))
+ Value = Value * 16 + hexDigitValue(Str[++i]);
+
+ Data += (unsigned char)(Value & 0xFF);
+ continue;
+ }
+
// Recognize octal sequences.
if ((unsigned)(Str[i] - '0') <= 7) {
// Consume up to three octal characters.
@@ -3825,6 +3861,13 @@ bool AsmParser::parseDirectiveCVInlineLinetable() {
return false;
}
+void AsmParser::initializeCVDefRangeTypeMap() {
+ CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
+ CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
+ CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
+ CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
+}
+
/// parseDirectiveCVDefRange
/// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
bool AsmParser::parseDirectiveCVDefRange() {
@@ -3846,13 +3889,92 @@ bool AsmParser::parseDirectiveCVDefRange() {
Ranges.push_back({GapStartSym, GapEndSym});
}
- std::string FixedSizePortion;
- if (parseToken(AsmToken::Comma, "unexpected token in directive") ||
- parseEscapedString(FixedSizePortion))
- return true;
-
- getStreamer().EmitCVDefRangeDirective(Ranges, FixedSizePortion);
- return false;
+ StringRef CVDefRangeTypeStr;
+ if (parseToken(
+ AsmToken::Comma,
+ "expected comma before def_range type in .cv_def_range directive") ||
+ parseIdentifier(CVDefRangeTypeStr))
+ return Error(Loc, "expected def_range type in directive");
+
+ StringMap<CVDefRangeType>::const_iterator CVTypeIt =
+ CVDefRangeTypeMap.find(CVDefRangeTypeStr);
+ CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
+ ? CVDR_DEFRANGE
+ : CVTypeIt->getValue();
+ switch (CVDRType) {
+ case CVDR_DEFRANGE_REGISTER: {
+ int64_t DRRegister;
+ if (parseToken(AsmToken::Comma, "expected comma before register number in "
+ ".cv_def_range directive") ||
+ parseAbsoluteExpression(DRRegister))
+ return Error(Loc, "expected register number");
+
+ codeview::DefRangeRegisterHeader DRHdr;
+ DRHdr.Register = DRRegister;
+ DRHdr.MayHaveNoName = 0;
+ getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr);
+ break;
+ }
+ case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
+ int64_t DROffset;
+ if (parseToken(AsmToken::Comma,
+ "expected comma before offset in .cv_def_range directive") ||
+ parseAbsoluteExpression(DROffset))
+ return Error(Loc, "expected offset value");
+
+ codeview::DefRangeFramePointerRelHeader DRHdr;
+ DRHdr.Offset = DROffset;
+ getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr);
+ break;
+ }
+ case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
+ int64_t DRRegister;
+ int64_t DROffsetInParent;
+ if (parseToken(AsmToken::Comma, "expected comma before register number in "
+ ".cv_def_range directive") ||
+ parseAbsoluteExpression(DRRegister))
+ return Error(Loc, "expected register number");
+ if (parseToken(AsmToken::Comma,
+ "expected comma before offset in .cv_def_range directive") ||
+ parseAbsoluteExpression(DROffsetInParent))
+ return Error(Loc, "expected offset value");
+
+ codeview::DefRangeSubfieldRegisterHeader DRHdr;
+ DRHdr.Register = DRRegister;
+ DRHdr.MayHaveNoName = 0;
+ DRHdr.OffsetInParent = DROffsetInParent;
+ getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr);
+ break;
+ }
+ case CVDR_DEFRANGE_REGISTER_REL: {
+ int64_t DRRegister;
+ int64_t DRFlags;
+ int64_t DRBasePointerOffset;
+ if (parseToken(AsmToken::Comma, "expected comma before register number in "
+ ".cv_def_range directive") ||
+ parseAbsoluteExpression(DRRegister))
+ return Error(Loc, "expected register value");
+ if (parseToken(
+ AsmToken::Comma,
+ "expected comma before flag value in .cv_def_range directive") ||
+ parseAbsoluteExpression(DRFlags))
+ return Error(Loc, "expected flag value");
+ if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
+ "in .cv_def_range directive") ||
+ parseAbsoluteExpression(DRBasePointerOffset))
+ return Error(Loc, "expected base pointer offset value");
+
+ codeview::DefRangeRegisterRelHeader DRHdr;
+ DRHdr.Register = DRRegister;
+ DRHdr.Flags = DRFlags;
+ DRHdr.BasePointerOffset = DRBasePointerOffset;
+ getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr);
+ break;
+ }
+ default:
+ return Error(Loc, "unexpected def_range type in .cv_def_range directive");
+ }
+ return true;
}
/// parseDirectiveCVString
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 1217ea99e465..06f8310ae061 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -69,6 +69,7 @@ class COFFAsmParser : public MCAsmParserExtension {
addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecIdx>(".secidx");
addDirectiveHandler<&COFFAsmParser::ParseDirectiveLinkOnce>(".linkonce");
addDirectiveHandler<&COFFAsmParser::ParseDirectiveRVA>(".rva");
+ addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
// Win64 EH directives.
addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>(
@@ -83,21 +84,10 @@ class COFFAsmParser : public MCAsmParserExtension {
".seh_handler");
addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandlerData>(
".seh_handlerdata");
- addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushReg>(
- ".seh_pushreg");
- addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSetFrame>(
- ".seh_setframe");
addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveAllocStack>(
".seh_stackalloc");
- addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveReg>(
- ".seh_savereg");
- addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveXMM>(
- ".seh_savexmm");
- addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushFrame>(
- ".seh_pushframe");
addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>(
".seh_endprologue");
- addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
}
bool ParseSectionDirectiveText(StringRef, SMLoc) {
@@ -143,12 +133,7 @@ class COFFAsmParser : public MCAsmParserExtension {
bool ParseSEHDirectiveEndChained(StringRef, SMLoc);
bool ParseSEHDirectiveHandler(StringRef, SMLoc);
bool ParseSEHDirectiveHandlerData(StringRef, SMLoc);
- bool ParseSEHDirectivePushReg(StringRef, SMLoc);
- bool ParseSEHDirectiveSetFrame(StringRef, SMLoc);
bool ParseSEHDirectiveAllocStack(StringRef, SMLoc);
- bool ParseSEHDirectiveSaveReg(StringRef, SMLoc);
- bool ParseSEHDirectiveSaveXMM(StringRef, SMLoc);
- bool ParseSEHDirectivePushFrame(StringRef, SMLoc);
bool ParseSEHDirectiveEndProlog(StringRef, SMLoc);
bool ParseAtUnwindOrAtExcept(bool &unwind, bool &except);
@@ -682,39 +667,6 @@ bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc Loc) {
return false;
}
-bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc Loc) {
- unsigned Reg = 0;
- if (ParseSEHRegisterNumber(Reg))
- return true;
-
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
-
- Lex();
- getStreamer().EmitWinCFIPushReg(Reg, Loc);
- return false;
-}
-
-bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc Loc) {
- unsigned Reg = 0;
- int64_t Off;
- if (ParseSEHRegisterNumber(Reg))
- return true;
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("you must specify a stack pointer offset");
-
- Lex();
- if (getParser().parseAbsoluteExpression(Off))
- return true;
-
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
-
- Lex();
- getStreamer().EmitWinCFISetFrame(Reg, Off, Loc);
- return false;
-}
-
bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc Loc) {
int64_t Size;
if (getParser().parseAbsoluteExpression(Size))
@@ -728,71 +680,6 @@ bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc Loc) {
return false;
}
-bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc Loc) {
- unsigned Reg = 0;
- int64_t Off;
- if (ParseSEHRegisterNumber(Reg))
- return true;
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("you must specify an offset on the stack");
-
- Lex();
- if (getParser().parseAbsoluteExpression(Off))
- return true;
-
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
-
- Lex();
- // FIXME: Err on %xmm* registers
- getStreamer().EmitWinCFISaveReg(Reg, Off, Loc);
- return false;
-}
-
-// FIXME: This method is inherently x86-specific. It should really be in the
-// x86 backend.
-bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc Loc) {
- unsigned Reg = 0;
- int64_t Off;
- if (ParseSEHRegisterNumber(Reg))
- return true;
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("you must specify an offset on the stack");
-
- Lex();
- if (getParser().parseAbsoluteExpression(Off))
- return true;
-
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
-
- Lex();
- // FIXME: Err on non-%xmm* registers
- getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc);
- return false;
-}
-
-bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc Loc) {
- bool Code = false;
- StringRef CodeID;
- if (getLexer().is(AsmToken::At)) {
- SMLoc startLoc = getLexer().getLoc();
- Lex();
- if (!getParser().parseIdentifier(CodeID)) {
- if (CodeID != "code")
- return Error(startLoc, "expected @code");
- Code = true;
- }
- }
-
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
-
- Lex();
- getStreamer().EmitWinCFIPushFrame(Code, Loc);
- return false;
-}
-
bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc Loc) {
Lex();
getStreamer().EmitWinCFIEndProlog(Loc);
@@ -816,46 +703,6 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) {
return false;
}
-bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
- SMLoc startLoc = getLexer().getLoc();
- if (getLexer().is(AsmToken::Percent)) {
- const MCRegisterInfo *MRI = getContext().getRegisterInfo();
- SMLoc endLoc;
- unsigned LLVMRegNo;
- if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc))
- return true;
-
-#if 0
- // FIXME: TargetAsmInfo::getCalleeSavedRegs() commits a serious layering
- // violation so this validation code is disabled.
-
- // Check that this is a non-volatile register.
- const unsigned *NVRegs = TAI.getCalleeSavedRegs();
- unsigned i;
- for (i = 0; NVRegs[i] != 0; ++i)
- if (NVRegs[i] == LLVMRegNo)
- break;
- if (NVRegs[i] == 0)
- return Error(startLoc, "expected non-volatile register");
-#endif
-
- int SEHRegNo = MRI->getSEHRegNum(LLVMRegNo);
- if (SEHRegNo < 0)
- return Error(startLoc,"register can't be represented in SEH unwind info");
- RegNo = SEHRegNo;
- }
- else {
- int64_t n;
- if (getParser().parseAbsoluteExpression(n))
- return true;
- if (n > 15)
- return Error(startLoc, "register number is too high");
- RegNo = n;
- }
-
- return false;
-}
-
namespace llvm {
MCAsmParserExtension *createCOFFAsmParser() {
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 1160934dc62c..bd66e5f39c0d 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -778,8 +778,8 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
raw_fd_ostream *OS = getContext().getSecureLog();
if (!OS) {
std::error_code EC;
- auto NewOS = llvm::make_unique<raw_fd_ostream>(
- StringRef(SecureLogFile), EC, sys::fs::F_Append | sys::fs::F_Text);
+ auto NewOS = std::make_unique<raw_fd_ostream>(
+ StringRef(SecureLogFile), EC, sys::fs::OF_Append | sys::fs::OF_Text);
if (EC)
return Error(IDLoc, Twine("can't open secure log file: ") +
SecureLogFile + " (" + EC.message() + ")");
diff --git a/lib/MC/MCParser/WasmAsmParser.cpp b/lib/MC/MCParser/WasmAsmParser.cpp
index 28d4459fecd4..0c242aed706d 100644
--- a/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/lib/MC/MCParser/WasmAsmParser.cpp
@@ -123,6 +123,7 @@ public:
// See use of .init_array in WasmObjectWriter and
// TargetLoweringObjectFileWasm
.StartsWith(".init_array", SectionKind::getData())
+ .StartsWith(".debug_", SectionKind::getMetadata())
.Default(Optional<SectionKind>());
if (!Kind.hasValue())
return Parser->Error(Lexer->getLoc(), "unknown section kind: " + Name);
diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp
index 4273b876b7bb..d491c0eb7e06 100644
--- a/lib/MC/MCRegisterInfo.cpp
+++ b/lib/MC/MCRegisterInfo.cpp
@@ -20,15 +20,16 @@
using namespace llvm;
-unsigned MCRegisterInfo::getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
- const MCRegisterClass *RC) const {
+MCRegister
+MCRegisterInfo::getMatchingSuperReg(MCRegister Reg, unsigned SubIdx,
+ const MCRegisterClass *RC) const {
for (MCSuperRegIterator Supers(Reg, this); Supers.isValid(); ++Supers)
if (RC->contains(*Supers) && Reg == getSubReg(*Supers, SubIdx))
return *Supers;
return 0;
}
-unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const {
+MCRegister MCRegisterInfo::getSubReg(MCRegister Reg, unsigned Idx) const {
assert(Idx && Idx < getNumSubRegIndices() &&
"This is not a subregister index");
// Get a pointer to the corresponding SubRegIndices list. This list has the
@@ -40,7 +41,8 @@ unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const {
return 0;
}
-unsigned MCRegisterInfo::getSubRegIndex(unsigned Reg, unsigned SubReg) const {
+unsigned MCRegisterInfo::getSubRegIndex(MCRegister Reg,
+ MCRegister SubReg) const {
assert(SubReg && SubReg < getNumRegs() && "This is not a register");
// Get a pointer to the corresponding SubRegIndices list. This list has the
// name of each sub-register in the same order as MCSubRegIterator.
@@ -63,7 +65,7 @@ unsigned MCRegisterInfo::getSubRegIdxOffset(unsigned Idx) const {
return SubRegIdxRanges[Idx].Offset;
}
-int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+int MCRegisterInfo::getDwarfRegNum(MCRegister RegNum, bool isEH) const {
const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs;
unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize;
@@ -76,29 +78,18 @@ int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
return I->ToReg;
}
-int MCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
+Optional<unsigned> MCRegisterInfo::getLLVMRegNum(unsigned RegNum,
+ bool isEH) const {
const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs;
unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize;
if (!M)
- return -1;
- DwarfLLVMRegPair Key = { RegNum, 0 };
- const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
- assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum");
- return I->ToReg;
-}
-
-int MCRegisterInfo::getLLVMRegNumFromEH(unsigned RegNum) const {
- const DwarfLLVMRegPair *M = EHDwarf2LRegs;
- unsigned Size = EHDwarf2LRegsSize;
-
- if (!M)
- return -1;
+ return None;
DwarfLLVMRegPair Key = { RegNum, 0 };
const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
- if (I == M+Size || I->FromReg != RegNum)
- return -1;
- return I->ToReg;
+ if (I != M + Size && I->FromReg == RegNum)
+ return I->ToReg;
+ return None;
}
int MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const {
@@ -110,22 +101,21 @@ int MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const {
// a corresponding LLVM register number at all. So if we can't map the
// EH register number to an LLVM register number, assume it's just a
// valid DWARF register number as is.
- int LRegNum = getLLVMRegNumFromEH(RegNum);
- if (LRegNum != -1)
- return getDwarfRegNum(LRegNum, false);
+ if (Optional<unsigned> LRegNum = getLLVMRegNum(RegNum, true))
+ return getDwarfRegNum(*LRegNum, false);
return RegNum;
}
-int MCRegisterInfo::getSEHRegNum(unsigned RegNum) const {
- const DenseMap<unsigned, int>::const_iterator I = L2SEHRegs.find(RegNum);
+int MCRegisterInfo::getSEHRegNum(MCRegister RegNum) const {
+ const DenseMap<MCRegister, int>::const_iterator I = L2SEHRegs.find(RegNum);
if (I == L2SEHRegs.end()) return (int)RegNum;
return I->second;
}
-int MCRegisterInfo::getCodeViewRegNum(unsigned RegNum) const {
+int MCRegisterInfo::getCodeViewRegNum(MCRegister RegNum) const {
if (L2CVRegs.empty())
report_fatal_error("target does not implement codeview register mapping");
- const DenseMap<unsigned, int>::const_iterator I = L2CVRegs.find(RegNum);
+ const DenseMap<MCRegister, int>::const_iterator I = L2CVRegs.find(RegNum);
if (I == L2CVRegs.end())
report_fatal_error("unknown codeview register " + (RegNum < getNumRegs()
? getName(RegNum)
diff --git a/lib/MC/MCSectionXCOFF.cpp b/lib/MC/MCSectionXCOFF.cpp
index d1a637345024..d52959f15f92 100644
--- a/lib/MC/MCSectionXCOFF.cpp
+++ b/lib/MC/MCSectionXCOFF.cpp
@@ -15,19 +15,65 @@ using namespace llvm;
MCSectionXCOFF::~MCSectionXCOFF() = default;
+static StringRef getMappingClassString(XCOFF::StorageMappingClass SMC) {
+ switch (SMC) {
+ case XCOFF::XMC_DS:
+ return "DS";
+ case XCOFF::XMC_RW:
+ return "RW";
+ case XCOFF::XMC_PR:
+ return "PR";
+ default:
+ report_fatal_error("Unhandled storage-mapping class.");
+ }
+}
+
void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const {
if (getKind().isText()) {
+ if (getMappingClass() != XCOFF::XMC_PR)
+ report_fatal_error("Unhandled storage-mapping class for .text csect");
+
OS << "\t.csect " << getSectionName() << "["
- << "PR"
+ << getMappingClassString(getMappingClass())
<< "]" << '\n';
return;
}
+ if (getKind().isData()) {
+ switch (getMappingClass()) {
+ case XCOFF::XMC_RW:
+ case XCOFF::XMC_DS:
+ OS << "\t.csect " << getSectionName() << "["
+ << getMappingClassString(getMappingClass()) << "]" << '\n';
+ break;
+ case XCOFF::XMC_TC0:
+ OS << "\t.toc\n";
+ break;
+ default:
+ report_fatal_error(
+ "Unhandled storage-mapping class for .data csect.");
+ }
+ return;
+ }
+
+ if (getKind().isBSSLocal() || getKind().isCommon()) {
+ assert((getMappingClass() == XCOFF::XMC_RW ||
+ getMappingClass() == XCOFF::XMC_BS) &&
+ "Generated a storage-mapping class for a common/bss csect we don't "
+ "understand how to switch to.");
+ assert(getCSectType() == XCOFF::XTY_CM &&
+ "wrong csect type for .bss csect");
+ // Don't have to print a directive for switching to section for commons.
+ // '.comm' and '.lcomm' directives for the variable will create the needed
+ // csect.
+ return;
+ }
+
report_fatal_error("Printing for this SectionKind is unimplemented.");
}
bool MCSectionXCOFF::UseCodeAlign() const { return getKind().isText(); }
-bool MCSectionXCOFF::isVirtualSection() const { return !getKind().isCommon(); }
+bool MCSectionXCOFF::isVirtualSection() const { return XCOFF::XTY_CM == Type; }
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index decbb96817e3..b8278cb11079 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeView.h"
@@ -21,6 +22,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
@@ -327,10 +330,56 @@ void MCStreamer::EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId,
const MCSymbol *FnStartSym,
const MCSymbol *FnEndSym) {}
+/// Only call this on endian-specific types like ulittle16_t and little32_t, or
+/// structs composed of them.
+template <typename T>
+static void copyBytesForDefRange(SmallString<20> &BytePrefix,
+ codeview::SymbolKind SymKind,
+ const T &DefRangeHeader) {
+ BytePrefix.resize(2 + sizeof(T));
+ codeview::ulittle16_t SymKindLE = codeview::ulittle16_t(SymKind);
+ memcpy(&BytePrefix[0], &SymKindLE, 2);
+ memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T));
+}
+
void MCStreamer::EmitCVDefRangeDirective(
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
StringRef FixedSizePortion) {}
+void MCStreamer::EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeRegisterRelHeader DRHdr) {
+ SmallString<20> BytePrefix;
+ copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_REGISTER_REL, DRHdr);
+ EmitCVDefRangeDirective(Ranges, BytePrefix);
+}
+
+void MCStreamer::EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeSubfieldRegisterHeader DRHdr) {
+ SmallString<20> BytePrefix;
+ copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_SUBFIELD_REGISTER,
+ DRHdr);
+ EmitCVDefRangeDirective(Ranges, BytePrefix);
+}
+
+void MCStreamer::EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeRegisterHeader DRHdr) {
+ SmallString<20> BytePrefix;
+ copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_REGISTER, DRHdr);
+ EmitCVDefRangeDirective(Ranges, BytePrefix);
+}
+
+void MCStreamer::EmitCVDefRangeDirective(
+ ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
+ codeview::DefRangeFramePointerRelHeader DRHdr) {
+ SmallString<20> BytePrefix;
+ copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_FRAMEPOINTER_REL,
+ DRHdr);
+ EmitCVDefRangeDirective(Ranges, BytePrefix);
+}
+
void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
MCSymbol *EHSymbol) {
}
@@ -631,7 +680,7 @@ void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) {
MCSymbol *StartProc = EmitCFILabel();
WinFrameInfos.emplace_back(
- llvm::make_unique<WinEH::FrameInfo>(Symbol, StartProc));
+ std::make_unique<WinEH::FrameInfo>(Symbol, StartProc));
CurrentWinFrameInfo = WinFrameInfos.back().get();
CurrentWinFrameInfo->TextSection = getCurrentSectionOnly();
}
@@ -665,7 +714,7 @@ void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) {
MCSymbol *StartProc = EmitCFILabel();
- WinFrameInfos.emplace_back(llvm::make_unique<WinEH::FrameInfo>(
+ WinFrameInfos.emplace_back(std::make_unique<WinEH::FrameInfo>(
CurFrame->Function, StartProc, CurFrame));
CurrentWinFrameInfo = WinFrameInfos.back().get();
CurrentWinFrameInfo->TextSection = getCurrentSectionOnly();
@@ -763,18 +812,23 @@ MCSection *MCStreamer::getAssociatedXDataSection(const MCSection *TextSec) {
void MCStreamer::EmitSyntaxDirective() {}
-void MCStreamer::EmitWinCFIPushReg(unsigned Register, SMLoc Loc) {
+static unsigned encodeSEHRegNum(MCContext &Ctx, MCRegister Reg) {
+ return Ctx.getRegisterInfo()->getSEHRegNum(Reg);
+}
+
+void MCStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
return;
MCSymbol *Label = EmitCFILabel();
- WinEH::Instruction Inst = Win64EH::Instruction::PushNonVol(Label, Register);
+ WinEH::Instruction Inst = Win64EH::Instruction::PushNonVol(
+ Label, encodeSEHRegNum(Context, Register));
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset,
+void MCStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset,
SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
@@ -790,8 +844,8 @@ void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset,
MCSymbol *Label = EmitCFILabel();
- WinEH::Instruction Inst =
- Win64EH::Instruction::SetFPReg(Label, Register, Offset);
+ WinEH::Instruction Inst = Win64EH::Instruction::SetFPReg(
+ Label, encodeSEHRegNum(getContext(), Register), Offset);
CurFrame->LastFrameInst = CurFrame->Instructions.size();
CurFrame->Instructions.push_back(Inst);
}
@@ -813,7 +867,7 @@ void MCStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) {
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset,
+void MCStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset,
SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
@@ -825,12 +879,12 @@ void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset,
MCSymbol *Label = EmitCFILabel();
- WinEH::Instruction Inst =
- Win64EH::Instruction::SaveNonVol(Label, Register, Offset);
+ WinEH::Instruction Inst = Win64EH::Instruction::SaveNonVol(
+ Label, encodeSEHRegNum(Context, Register), Offset);
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset,
+void MCStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset,
SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
@@ -840,8 +894,8 @@ void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset,
MCSymbol *Label = EmitCFILabel();
- WinEH::Instruction Inst =
- Win64EH::Instruction::SaveXMM(Label, Register, Offset);
+ WinEH::Instruction Inst = Win64EH::Instruction::SaveXMM(
+ Label, encodeSEHRegNum(Context, Register), Offset);
CurFrame->Instructions.push_back(Inst);
}
@@ -1009,6 +1063,10 @@ void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
void MCStreamer::EmitCOFFSymbolType(int Type) {
llvm_unreachable("this directive only supported on COFF targets");
}
+void MCStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlign) {
+ llvm_unreachable("this directive only supported on XCOFF targets");
+}
void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
void MCStreamer::emitELFSymverDirective(StringRef AliasName,
const MCSymbol *Aliasee) {}
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index 5fd48d9e1010..c8678df02bfd 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -315,3 +315,28 @@ void MCSubtargetInfo::initInstrItins(InstrItineraryData &InstrItins) const {
InstrItins = InstrItineraryData(getSchedModel(), Stages, OperandCycles,
ForwardingPaths);
}
+
+Optional<unsigned> MCSubtargetInfo::getCacheSize(unsigned Level) const {
+ return Optional<unsigned>();
+}
+
+Optional<unsigned>
+MCSubtargetInfo::getCacheAssociativity(unsigned Level) const {
+ return Optional<unsigned>();
+}
+
+Optional<unsigned> MCSubtargetInfo::getCacheLineSize(unsigned Level) const {
+ return Optional<unsigned>();
+}
+
+unsigned MCSubtargetInfo::getPrefetchDistance() const {
+ return 0;
+}
+
+unsigned MCSubtargetInfo::getMaxPrefetchIterationsAhead() const {
+ return UINT_MAX;
+}
+
+unsigned MCSubtargetInfo::getMinPrefetchStride() const {
+ return 1;
+}
diff --git a/lib/MC/MCWasmObjectTargetWriter.cpp b/lib/MC/MCWasmObjectTargetWriter.cpp
index e46257823e34..1ccb3a58d5c1 100644
--- a/lib/MC/MCWasmObjectTargetWriter.cpp
+++ b/lib/MC/MCWasmObjectTargetWriter.cpp
@@ -10,8 +10,9 @@
using namespace llvm;
-MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit)
- : Is64Bit(Is64Bit) {}
+MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit,
+ bool IsEmscripten)
+ : Is64Bit(Is64Bit), IsEmscripten(IsEmscripten) {}
// Pin the vtable to this object file
MCWasmObjectTargetWriter::~MCWasmObjectTargetWriter() = default;
diff --git a/lib/MC/MCWasmStreamer.cpp b/lib/MC/MCWasmStreamer.cpp
index 86fa72197855..e7e96ecbb3a0 100644
--- a/lib/MC/MCWasmStreamer.cpp
+++ b/lib/MC/MCWasmStreamer.cpp
@@ -122,7 +122,7 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
break;
case MCSA_NoDeadStrip:
- Symbol->setExported();
+ Symbol->setNoStrip();
break;
default:
diff --git a/lib/MC/MCWinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp
index 04d5f100a2ff..c5a21312140b 100644
--- a/lib/MC/MCWinCOFFStreamer.cpp
+++ b/lib/MC/MCWinCOFFStreamer.cpp
@@ -88,7 +88,19 @@ void MCWinCOFFStreamer::EmitLabel(MCSymbol *S, SMLoc Loc) {
}
void MCWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
- llvm_unreachable("not implemented");
+ // Let the target do whatever target specific stuff it needs to do.
+ getAssembler().getBackend().handleAssemblerFlag(Flag);
+
+ switch (Flag) {
+ // None of these require COFF specific handling.
+ case MCAF_SyntaxUnified:
+ case MCAF_Code16:
+ case MCAF_Code32:
+ case MCAF_Code64:
+ break;
+ case MCAF_SubsectionsViaSymbols:
+ llvm_unreachable("COFF doesn't support .subsections_via_symbols");
+ }
}
void MCWinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
@@ -180,7 +192,7 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
MCSection *SXData = getContext().getObjectFileInfo()->getSXDataSection();
getAssembler().registerSection(*SXData);
if (SXData->getAlignment() < 4)
- SXData->setAlignment(4);
+ SXData->setAlignment(Align(4));
new MCSymbolIdFragment(Symbol, SXData);
@@ -197,7 +209,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) {
MCSection *Sec = getCurrentSectionOnly();
getAssembler().registerSection(*Sec);
if (Sec->getAlignment() < 4)
- Sec->setAlignment(4);
+ Sec->setAlignment(Align(4));
new MCSymbolIdFragment(Symbol, getCurrentSectionOnly());
diff --git a/lib/MC/MCXCOFFStreamer.cpp b/lib/MC/MCXCOFFStreamer.cpp
index 071de024a3fa..50937d6adc0c 100644
--- a/lib/MC/MCXCOFFStreamer.cpp
+++ b/lib/MC/MCXCOFFStreamer.cpp
@@ -10,10 +10,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCXCOFFStreamer.h"
+#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
+#include "llvm/MC/MCXCOFFStreamer.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -25,14 +27,38 @@ MCXCOFFStreamer::MCXCOFFStreamer(MCContext &Context,
: MCObjectStreamer(Context, std::move(MAB), std::move(OW),
std::move(Emitter)) {}
-bool MCXCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+bool MCXCOFFStreamer::EmitSymbolAttribute(MCSymbol *Sym,
MCSymbolAttr Attribute) {
- report_fatal_error("Symbol attributes not implemented for XCOFF.");
+ auto *Symbol = cast<MCSymbolXCOFF>(Sym);
+ getAssembler().registerSymbol(*Symbol);
+
+ switch (Attribute) {
+ case MCSA_Global:
+ Symbol->setStorageClass(XCOFF::C_EXT);
+ Symbol->setExternal(true);
+ break;
+ default:
+ report_fatal_error("Not implemented yet.");
+ }
+ return true;
}
void MCXCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {
- report_fatal_error("Emiting common symbols not implemented for XCOFF.");
+ getAssembler().registerSymbol(*Symbol);
+ Symbol->setExternal(cast<MCSymbolXCOFF>(Symbol)->getStorageClass() !=
+ XCOFF::C_HIDEXT);
+ Symbol->setCommon(Size, ByteAlignment);
+
+ // Need to add this symbol to the current Fragment which will belong to the
+ // containing CSECT.
+ auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ assert(F && "Expected a valid section with a fragment set.");
+ Symbol->setFragment(F);
+
+ // Emit the alignment and storage for the variable to the section.
+ EmitValueToAlignment(ByteAlignment);
+ EmitZeros(Size);
}
void MCXCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
@@ -42,8 +68,18 @@ void MCXCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
}
void MCXCOFFStreamer::EmitInstToData(const MCInst &Inst,
- const MCSubtargetInfo &) {
- report_fatal_error("Instruction emission not implemented for XCOFF.");
+ const MCSubtargetInfo &STI) {
+ MCAssembler &Assembler = getAssembler();
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+
+ // TODO: Handle Fixups later
+
+ MCDataFragment *DF = getOrCreateDataFragment(&STI);
+ DF->setHasInstructions(STI);
+ DF->getContents().append(Code.begin(), Code.end());
}
MCStreamer *llvm::createXCOFFStreamer(MCContext &Context,
@@ -57,3 +93,9 @@ MCStreamer *llvm::createXCOFFStreamer(MCContext &Context,
S->getAssembler().setRelaxAll(true);
return S;
}
+
+void MCXCOFFStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol,
+ uint64_t Size,
+ unsigned ByteAlignment) {
+ EmitCommonSymbol(Symbol, Size, ByteAlignment);
+}
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index f0ceb86b25af..9f6af981aca1 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -25,6 +25,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -126,7 +127,7 @@ uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
const MCSection &NextSec = *Layout.getSectionOrder()[Next];
if (NextSec.isVirtualSection())
return 0;
- return OffsetToAlignment(EndAddr, NextSec.getAlignment());
+ return offsetToAlignment(EndAddr, Align(NextSec.getAlignment()));
}
void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
@@ -444,7 +445,8 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand(
}
// Pad to a multiple of the pointer size.
- W.OS.write_zeros(OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4));
+ W.OS.write_zeros(
+ offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4)));
assert(W.OS.tell() - Start == Size);
}
@@ -832,7 +834,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
// The section data is padded to 4 bytes.
//
// FIXME: Is this machine dependent?
- unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+ unsigned SectionDataPadding =
+ offsetToAlignment(SectionDataFileSize, Align(4));
SectionDataFileSize += SectionDataPadding;
// Write the prolog, starting with the header and load command...
@@ -997,7 +1000,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
#endif
Asm.getLOHContainer().emit(*this, Layout);
// Pad to a multiple of the pointer size.
- W.OS.write_zeros(OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4));
+ W.OS.write_zeros(
+ offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4)));
assert(W.OS.tell() - Start == LOHSize);
}
@@ -1043,6 +1047,6 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
std::unique_ptr<MCObjectWriter>
llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, bool IsLittleEndian) {
- return llvm::make_unique<MachObjectWriter>(std::move(MOTW), OS,
+ return std::make_unique<MachObjectWriter>(std::move(MOTW), OS,
IsLittleEndian);
}
diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp
index cb3db8e2268c..c9c88ec58432 100644
--- a/lib/MC/StringTableBuilder.cpp
+++ b/lib/MC/StringTableBuilder.cpp
@@ -38,6 +38,7 @@ void StringTableBuilder::initSize() {
// Start the table with a NUL byte.
Size = 1;
break;
+ case XCOFF:
case WinCOFF:
// Make room to write the table size later.
Size = 4;
@@ -67,9 +68,12 @@ void StringTableBuilder::write(uint8_t *Buf) const {
if (!Data.empty())
memcpy(Buf + P.second, Data.data(), Data.size());
}
- if (K != WinCOFF)
- return;
- support::endian::write32le(Buf, Size);
+ // The COFF formats store the size of the string table in the first 4 bytes.
+ // For Windows, the format is little-endian; for AIX, it is big-endian.
+ if (K == WinCOFF)
+ support::endian::write32le(Buf, Size);
+ else if (K == XCOFF)
+ support::endian::write32be(Buf, Size);
}
// Returns the character at Pos from end of a string.
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 098343cd0107..c1ff3cc2480c 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -258,6 +258,7 @@ class WasmObjectWriter : public MCObjectWriter {
// TargetObjectWriter wrappers.
bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+ bool isEmscripten() const { return TargetObjectWriter->isEmscripten(); }
void startSection(SectionBookkeeping &Section, unsigned SectionId);
void startCustomSection(SectionBookkeeping &Section, StringRef Name);
@@ -426,9 +427,10 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
- MCAsmBackend &Backend = Asm.getBackend();
- bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
- MCFixupKindInfo::FKF_IsPCRel;
+ // The WebAssembly backend should never generate FKF_IsPCRel fixups
+ assert(!(Asm.getBackend().getFixupKindInfo(Fixup.getKind()).Flags &
+ MCFixupKindInfo::FKF_IsPCRel));
+
const auto &FixupSection = cast<MCSectionWasm>(*Fragment->getParent());
uint64_t C = Target.getConstant();
uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
@@ -439,51 +441,22 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
return;
if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
- assert(RefB->getKind() == MCSymbolRefExpr::VK_None &&
- "Should not have constructed this");
-
- // Let A, B and C being the components of Target and R be the location of
- // the fixup. If the fixup is not pcrel, we want to compute (A - B + C).
- // If it is pcrel, we want to compute (A - B + C - R).
-
- // In general, Wasm has no relocations for -B. It can only represent (A + C)
- // or (A + C - R). If B = R + K and the relocation is not pcrel, we can
- // replace B to implement it: (A - R - K + C)
- if (IsPCRel) {
- Ctx.reportError(
- Fixup.getLoc(),
- "No relocation available to represent this relative expression");
- return;
- }
-
+ // To get here the A - B expression must have failed evaluateAsRelocatable.
+ // This means either A or B must be undefined and in WebAssembly we can't
+ // support either of those cases.
const auto &SymB = cast<MCSymbolWasm>(RefB->getSymbol());
-
- if (SymB.isUndefined()) {
- Ctx.reportError(Fixup.getLoc(),
- Twine("symbol '") + SymB.getName() +
- "' can not be undefined in a subtraction expression");
- return;
- }
-
- assert(!SymB.isAbsolute() && "Should have been folded");
- const MCSection &SecB = SymB.getSection();
- if (&SecB != &FixupSection) {
- Ctx.reportError(Fixup.getLoc(),
- "Cannot represent a difference across sections");
- return;
- }
-
- uint64_t SymBOffset = Layout.getSymbolOffset(SymB);
- uint64_t K = SymBOffset - FixupOffset;
- IsPCRel = true;
- C -= K;
+ Ctx.reportError(
+ Fixup.getLoc(),
+ Twine("symbol '") + SymB.getName() +
+ "': unsupported subtraction expression used in relocation.");
+ return;
}
// We either rejected the fixup or folded B into C at this point.
const MCSymbolRefExpr *RefA = Target.getSymA();
- const auto *SymA = RefA ? cast<MCSymbolWasm>(&RefA->getSymbol()) : nullptr;
+ const auto *SymA = cast<MCSymbolWasm>(&RefA->getSymbol());
- if (SymA && SymA->isVariable()) {
+ if (SymA->isVariable()) {
const MCExpr *Expr = SymA->getVariableValue();
const auto *Inner = cast<MCSymbolRefExpr>(Expr);
if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF)
@@ -496,8 +469,6 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
FixedValue = 0;
unsigned Type = TargetObjectWriter->getRelocType(Target, Fixup);
- assert(!IsPCRel);
- assert(SymA);
// Absolute offset within a section or a function.
// Currently only supported for for metadata sections.
@@ -1296,12 +1267,12 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
// Separate out the producers and target features sections
if (Name == "producers") {
- ProducersSection = llvm::make_unique<WasmCustomSection>(Name, &Section);
+ ProducersSection = std::make_unique<WasmCustomSection>(Name, &Section);
continue;
}
if (Name == "target_features") {
TargetFeaturesSection =
- llvm::make_unique<WasmCustomSection>(Name, &Section);
+ std::make_unique<WasmCustomSection>(Name, &Section);
continue;
}
@@ -1379,7 +1350,9 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
report_fatal_error(".size expression must be evaluatable");
auto &DataSection = static_cast<MCSectionWasm &>(WS.getSection());
- assert(DataSection.isWasmData());
+ if (!DataSection.isWasmData())
+ report_fatal_error("data symbols must live in a data section: " +
+ WS.getName());
// For each data symbol, export it in the symtab as a reference to the
// corresponding Wasm data segment.
@@ -1473,8 +1446,12 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
Flags |= wasm::WASM_SYMBOL_BINDING_LOCAL;
if (WS.isUndefined())
Flags |= wasm::WASM_SYMBOL_UNDEFINED;
- if (WS.isExported())
- Flags |= wasm::WASM_SYMBOL_EXPORTED;
+ if (WS.isNoStrip()) {
+ Flags |= wasm::WASM_SYMBOL_NO_STRIP;
+ if (isEmscripten()) {
+ Flags |= wasm::WASM_SYMBOL_EXPORTED;
+ }
+ }
if (WS.getName() != WS.getImportName())
Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME;
@@ -1618,5 +1595,5 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
std::unique_ptr<MCObjectWriter>
llvm::createWasmObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW,
raw_pwrite_stream &OS) {
- return llvm::make_unique<WasmObjectWriter>(std::move(MOTW), OS);
+ return std::make_unique<WasmObjectWriter>(std::move(MOTW), OS);
}
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 0e6c05bc726d..749ed8badfaa 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -31,10 +31,10 @@
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/CRC.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/JamCRC.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -239,7 +239,7 @@ WinCOFFObjectWriter::WinCOFFObjectWriter(
}
COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
- Symbols.push_back(make_unique<COFFSymbol>(Name));
+ Symbols.push_back(std::make_unique<COFFSymbol>(Name));
return Symbols.back().get();
}
@@ -251,7 +251,7 @@ COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol *Symbol) {
}
COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) {
- Sections.emplace_back(make_unique<COFFSection>(Name));
+ Sections.emplace_back(std::make_unique<COFFSection>(Name));
return Sections.back().get();
}
@@ -605,7 +605,7 @@ uint32_t WinCOFFObjectWriter::writeSectionContents(MCAssembler &Asm,
// Calculate our CRC with an initial value of '0', this is not how
// JamCRC is specified but it aligns with the expected output.
JamCRC JC(/*Init=*/0);
- JC.update(Buf);
+ JC.update(makeArrayRef(reinterpret_cast<uint8_t*>(Buf.data()), Buf.size()));
return JC.getCRC();
}
@@ -1098,5 +1098,5 @@ void MCWinCOFFObjectTargetWriter::anchor() {}
std::unique_ptr<MCObjectWriter> llvm::createWinCOFFObjectWriter(
std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS) {
- return llvm::make_unique<WinCOFFObjectWriter>(std::move(MOTW), OS);
+ return std::make_unique<WinCOFFObjectWriter>(std::move(MOTW), OS);
}
diff --git a/lib/MC/XCOFFObjectWriter.cpp b/lib/MC/XCOFFObjectWriter.cpp
index 9b9a7b6c118c..353c21068735 100644
--- a/lib/MC/XCOFFObjectWriter.cpp
+++ b/lib/MC/XCOFFObjectWriter.cpp
@@ -10,18 +10,135 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionXCOFF.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCXCOFFObjectWriter.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <deque>
using namespace llvm;
+// An XCOFF object file has a limited set of predefined sections. The most
+// important ones for us (right now) are:
+// .text --> contains program code and read-only data.
+// .data --> contains initialized data, function descriptors, and the TOC.
+// .bss --> contains uninitialized data.
+// Each of these sections is composed of 'Control Sections'. A Control Section
+// is more commonly referred to as a csect. A csect is an indivisible unit of
+// code or data, and acts as a container for symbols. A csect is mapped
+// into a section based on its storage-mapping class, with the exception of
+// XMC_RW which gets mapped to either .data or .bss based on whether it's
+// explicitly initialized or not.
+//
+// We don't represent the sections in the MC layer as there is nothing
+// interesting about them at at that level: they carry information that is
+// only relevant to the ObjectWriter, so we materialize them in this class.
namespace {
+constexpr unsigned DefaultSectionAlign = 4;
+
+// Packs the csect's alignment and type into a byte.
+uint8_t getEncodedType(const MCSectionXCOFF *);
+
+// Wrapper around an MCSymbolXCOFF.
+struct Symbol {
+ const MCSymbolXCOFF *const MCSym;
+ uint32_t SymbolTableIndex;
+
+ XCOFF::StorageClass getStorageClass() const {
+ return MCSym->getStorageClass();
+ }
+ StringRef getName() const { return MCSym->getName(); }
+ Symbol(const MCSymbolXCOFF *MCSym) : MCSym(MCSym), SymbolTableIndex(-1) {}
+};
+
+// Wrapper for an MCSectionXCOFF.
+struct ControlSection {
+ const MCSectionXCOFF *const MCCsect;
+ uint32_t SymbolTableIndex;
+ uint32_t Address;
+ uint32_t Size;
+
+ SmallVector<Symbol, 1> Syms;
+ StringRef getName() const { return MCCsect->getSectionName(); }
+ ControlSection(const MCSectionXCOFF *MCSec)
+ : MCCsect(MCSec), SymbolTableIndex(-1), Address(-1), Size(0) {}
+};
+
+// Represents the data related to a section excluding the csects that make up
+// the raw data of the section. The csects are stored separately as not all
+// sections contain csects, and some sections contain csects which are better
+// stored separately, e.g. the .data section containing read-write, descriptor,
+// TOCBase and TOC-entry csects.
+struct Section {
+ char Name[XCOFF::NameSize];
+ // The physical/virtual address of the section. For an object file
+ // these values are equivalent.
+ uint32_t Address;
+ uint32_t Size;
+ uint32_t FileOffsetToData;
+ uint32_t FileOffsetToRelocations;
+ uint32_t RelocationCount;
+ int32_t Flags;
+
+ int16_t Index;
+
+ // Virtual sections do not need storage allocated in the object file.
+ const bool IsVirtual;
+
+ void reset() {
+ Address = 0;
+ Size = 0;
+ FileOffsetToData = 0;
+ FileOffsetToRelocations = 0;
+ RelocationCount = 0;
+ Index = -1;
+ }
+
+ Section(const char *N, XCOFF::SectionTypeFlags Flags, bool IsVirtual)
+ : Address(0), Size(0), FileOffsetToData(0), FileOffsetToRelocations(0),
+ RelocationCount(0), Flags(Flags), Index(-1), IsVirtual(IsVirtual) {
+ strncpy(Name, N, XCOFF::NameSize);
+ }
+};
+
class XCOFFObjectWriter : public MCObjectWriter {
+ // Type to be used for a container representing a set of csects with
+ // (approximately) the same storage mapping class. For example all the csects
+ // with a storage mapping class of `xmc_pr` will get placed into the same
+ // container.
+ using CsectGroup = std::deque<ControlSection>;
+
support::endian::Writer W;
std::unique_ptr<MCXCOFFObjectTargetWriter> TargetObjectWriter;
+ StringTableBuilder Strings;
+
+ // The non-empty sections, in the order they will appear in the section header
+ // table.
+ std::vector<Section *> Sections;
+
+ // The Predefined sections.
+ Section Text;
+ Section BSS;
+
+ // CsectGroups. These store the csects which make up different parts of
+ // the sections. Should have one for each set of csects that get mapped into
+ // the same section and get handled in a 'similar' way.
+ CsectGroup ProgramCodeCsects;
+ CsectGroup BSSCsects;
+
+ uint32_t SymbolTableEntryCount = 0;
+ uint32_t SymbolTableOffset = 0;
+
+ virtual void reset() override;
void executePostLayoutBinding(MCAssembler &, const MCAsmLayout &) override;
@@ -30,6 +147,40 @@ class XCOFFObjectWriter : public MCObjectWriter {
uint64_t writeObject(MCAssembler &, const MCAsmLayout &) override;
+ static bool nameShouldBeInStringTable(const StringRef &);
+ void writeSymbolName(const StringRef &);
+ void writeSymbolTableEntryForCsectMemberLabel(const Symbol &,
+ const ControlSection &, int16_t,
+ uint64_t);
+ void writeSymbolTableEntryForControlSection(const ControlSection &, int16_t,
+ XCOFF::StorageClass);
+ void writeFileHeader();
+ void writeSectionHeaderTable();
+ void writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout);
+ void writeSymbolTable(const MCAsmLayout &Layout);
+
+ // Called after all the csects and symbols have been processed by
+ // `executePostLayoutBinding`, this function handles building up the majority
+ // of the structures in the object file representation. Namely:
+ // *) Calculates physical/virtual addresses, raw-pointer offsets, and section
+ // sizes.
+ // *) Assigns symbol table indices.
+ // *) Builds up the section header table by adding any non-empty sections to
+ // `Sections`.
+ void assignAddressesAndIndices(const MCAsmLayout &);
+
+ bool
+ needsAuxiliaryHeader() const { /* TODO aux header support not implemented. */
+ return false;
+ }
+
+ // Returns the size of the auxiliary header to be written to the object file.
+ size_t auxiliaryHeaderSize() const {
+ assert(!needsAuxiliaryHeader() &&
+ "Auxiliary header support not implemented.");
+ return 0;
+ }
+
public:
XCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS);
@@ -37,11 +188,100 @@ public:
XCOFFObjectWriter::XCOFFObjectWriter(
std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
- : W(OS, support::big), TargetObjectWriter(std::move(MOTW)) {}
+ : W(OS, support::big), TargetObjectWriter(std::move(MOTW)),
+ Strings(StringTableBuilder::XCOFF),
+ Text(".text", XCOFF::STYP_TEXT, /* IsVirtual */ false),
+ BSS(".bss", XCOFF::STYP_BSS, /* IsVirtual */ true) {}
+
+void XCOFFObjectWriter::reset() {
+ // Reset any sections we have written to, and empty the section header table.
+ for (auto *Sec : Sections)
+ Sec->reset();
+ Sections.clear();
+
+ // Clear any csects we have stored.
+ ProgramCodeCsects.clear();
+ BSSCsects.clear();
+
+ // Reset the symbol table and string table.
+ SymbolTableEntryCount = 0;
+ SymbolTableOffset = 0;
+ Strings.clear();
+
+ MCObjectWriter::reset();
+}
+
+void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ if (TargetObjectWriter->is64Bit())
+ report_fatal_error("64-bit XCOFF object files are not supported yet.");
+
+ // Maps the MC Section representation to its corresponding ControlSection
+ // wrapper. Needed for finding the ControlSection to insert an MCSymbol into
+ // from its containing MCSectionXCOFF.
+ DenseMap<const MCSectionXCOFF *, ControlSection *> WrapperMap;
+
+ for (const auto &S : Asm) {
+ const auto *MCSec = cast<const MCSectionXCOFF>(&S);
+ assert(WrapperMap.find(MCSec) == WrapperMap.end() &&
+ "Cannot add a csect twice.");
-void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &,
- const MCAsmLayout &) {
- // TODO Implement once we have sections and symbols to handle.
+ // If the name does not fit in the storage provided in the symbol table
+ // entry, add it to the string table.
+ if (nameShouldBeInStringTable(MCSec->getSectionName()))
+ Strings.add(MCSec->getSectionName());
+
+ switch (MCSec->getMappingClass()) {
+ case XCOFF::XMC_PR:
+ assert(XCOFF::XTY_SD == MCSec->getCSectType() &&
+ "Only an initialized csect can contain program code.");
+ ProgramCodeCsects.emplace_back(MCSec);
+ WrapperMap[MCSec] = &ProgramCodeCsects.back();
+ break;
+ case XCOFF::XMC_RW:
+ if (XCOFF::XTY_CM == MCSec->getCSectType()) {
+ BSSCsects.emplace_back(MCSec);
+ WrapperMap[MCSec] = &BSSCsects.back();
+ break;
+ }
+ report_fatal_error("Unhandled mapping of read-write csect to section.");
+ case XCOFF::XMC_TC0:
+ // TODO FIXME Handle emiting the TOC base.
+ break;
+ case XCOFF::XMC_BS:
+ assert(XCOFF::XTY_CM == MCSec->getCSectType() &&
+ "Mapping invalid csect. CSECT with bss storage class must be "
+ "common type.");
+ BSSCsects.emplace_back(MCSec);
+ WrapperMap[MCSec] = &BSSCsects.back();
+ break;
+ default:
+ report_fatal_error("Unhandled mapping of csect to section.");
+ }
+ }
+
+ for (const MCSymbol &S : Asm.symbols()) {
+ // Nothing to do for temporary symbols.
+ if (S.isTemporary())
+ continue;
+ const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(&S);
+
+ // Map the symbol into its containing csect.
+ const MCSectionXCOFF *ContainingCsect = XSym->getContainingCsect();
+ assert(WrapperMap.find(ContainingCsect) != WrapperMap.end() &&
+ "Expected containing csect to exist in map");
+
+ // Lookup the containing csect and add the symbol to it.
+ WrapperMap[ContainingCsect]->Syms.emplace_back(XSym);
+
+ // If the name does not fit in the storage provided in the symbol table
+ // entry, add it to the string table.
+ if (nameShouldBeInStringTable(XSym->getName()))
+ Strings.add(XSym->getName());
+ }
+
+ Strings.finalize();
+ assignAddressesAndIndices(Layout);
}
void XCOFFObjectWriter::recordRelocation(MCAssembler &, const MCAsmLayout &,
@@ -50,7 +290,29 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &, const MCAsmLayout &,
report_fatal_error("XCOFF relocations not supported.");
}
-uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) {
+void XCOFFObjectWriter::writeSections(const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // Write the program code control sections one at a time.
+ uint32_t CurrentAddressLocation = Text.Address;
+ for (const auto &Csect : ProgramCodeCsects) {
+ if (uint32_t PaddingSize = Csect.Address - CurrentAddressLocation)
+ W.OS.write_zeros(PaddingSize);
+ Asm.writeSectionData(W.OS, Csect.MCCsect, Layout);
+ CurrentAddressLocation = Csect.Address + Csect.Size;
+ }
+
+ if (Text.Index != -1) {
+ // The size of the tail padding in a section is the end virtual address of
+ // the current section minus the the end virtual address of the last csect
+ // in that section.
+ if (uint32_t PaddingSize =
+ Text.Address + Text.Size - CurrentAddressLocation)
+ W.OS.write_zeros(PaddingSize);
+ }
+}
+
+uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
// We always emit a timestamp of 0 for reproducibility, so ensure incremental
// linking is not enabled, in case, like with Windows COFF, such a timestamp
// is incompatible with incremental linking of XCOFF.
@@ -62,27 +324,274 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) {
uint64_t StartOffset = W.OS.tell();
- // TODO FIXME Assign section numbers/finalize sections.
+ writeFileHeader();
+ writeSectionHeaderTable();
+ writeSections(Asm, Layout);
+ // TODO writeRelocations();
- // TODO FIXME Finalize symbols.
+ writeSymbolTable(Layout);
+ // Write the string table.
+ Strings.write(W.OS);
+
+ return W.OS.tell() - StartOffset;
+}
+bool XCOFFObjectWriter::nameShouldBeInStringTable(const StringRef &SymbolName) {
+ return SymbolName.size() > XCOFF::NameSize;
+}
+
+void XCOFFObjectWriter::writeSymbolName(const StringRef &SymbolName) {
+ if (nameShouldBeInStringTable(SymbolName)) {
+ W.write<int32_t>(0);
+ W.write<uint32_t>(Strings.getOffset(SymbolName));
+ } else {
+ char Name[XCOFF::NameSize];
+ std::strncpy(Name, SymbolName.data(), XCOFF::NameSize);
+ ArrayRef<char> NameRef(Name, XCOFF::NameSize);
+ W.write(NameRef);
+ }
+}
+
+void XCOFFObjectWriter::writeSymbolTableEntryForCsectMemberLabel(
+ const Symbol &SymbolRef, const ControlSection &CSectionRef,
+ int16_t SectionIndex, uint64_t SymbolOffset) {
+ // Name or Zeros and string table offset
+ writeSymbolName(SymbolRef.getName());
+ assert(SymbolOffset <= UINT32_MAX - CSectionRef.Address &&
+ "Symbol address overflows.");
+ W.write<uint32_t>(CSectionRef.Address + SymbolOffset);
+ W.write<int16_t>(SectionIndex);
+ // Basic/Derived type. See the description of the n_type field for symbol
+ // table entries for a detailed description. Since we don't yet support
+ // visibility, and all other bits are either optionally set or reserved, this
+ // is always zero.
+ // TODO FIXME How to assert a symbol's visibilty is default?
+ // TODO Set the function indicator (bit 10, 0x0020) for functions
+ // when debugging is enabled.
+ W.write<uint16_t>(0);
+ W.write<uint8_t>(SymbolRef.getStorageClass());
+ // Always 1 aux entry for now.
+ W.write<uint8_t>(1);
+
+ // Now output the auxiliary entry.
+ W.write<uint32_t>(CSectionRef.SymbolTableIndex);
+ // Parameter typecheck hash. Not supported.
+ W.write<uint32_t>(0);
+ // Typecheck section number. Not supported.
+ W.write<uint16_t>(0);
+ // Symbol type: Label
+ W.write<uint8_t>(XCOFF::XTY_LD);
+ // Storage mapping class.
+ W.write<uint8_t>(CSectionRef.MCCsect->getMappingClass());
+ // Reserved (x_stab).
+ W.write<uint32_t>(0);
+ // Reserved (x_snstab).
+ W.write<uint16_t>(0);
+}
+
+void XCOFFObjectWriter::writeSymbolTableEntryForControlSection(
+ const ControlSection &CSectionRef, int16_t SectionIndex,
+ XCOFF::StorageClass StorageClass) {
+ // n_name, n_zeros, n_offset
+ writeSymbolName(CSectionRef.getName());
+ // n_value
+ W.write<uint32_t>(CSectionRef.Address);
+ // n_scnum
+ W.write<int16_t>(SectionIndex);
+ // Basic/Derived type. See the description of the n_type field for symbol
+ // table entries for a detailed description. Since we don't yet support
+ // visibility, and all other bits are either optionally set or reserved, this
+ // is always zero.
+ // TODO FIXME How to assert a symbol's visibilty is default?
+ // TODO Set the function indicator (bit 10, 0x0020) for functions
+ // when debugging is enabled.
+ W.write<uint16_t>(0);
+ // n_sclass
+ W.write<uint8_t>(StorageClass);
+ // Always 1 aux entry for now.
+ W.write<uint8_t>(1);
+
+ // Now output the auxiliary entry.
+ W.write<uint32_t>(CSectionRef.Size);
+ // Parameter typecheck hash. Not supported.
+ W.write<uint32_t>(0);
+ // Typecheck section number. Not supported.
+ W.write<uint16_t>(0);
+ // Symbol type.
+ W.write<uint8_t>(getEncodedType(CSectionRef.MCCsect));
+ // Storage mapping class.
+ W.write<uint8_t>(CSectionRef.MCCsect->getMappingClass());
+ // Reserved (x_stab).
+ W.write<uint32_t>(0);
+ // Reserved (x_snstab).
+ W.write<uint16_t>(0);
+}
+
+void XCOFFObjectWriter::writeFileHeader() {
// Magic.
W.write<uint16_t>(0x01df);
// Number of sections.
- W.write<uint16_t>(0);
+ W.write<uint16_t>(Sections.size());
// Timestamp field. For reproducible output we write a 0, which represents no
// timestamp.
W.write<int32_t>(0);
// Byte Offset to the start of the symbol table.
- W.write<uint32_t>(0);
+ W.write<uint32_t>(SymbolTableOffset);
// Number of entries in the symbol table.
- W.write<int32_t>(0);
+ W.write<int32_t>(SymbolTableEntryCount);
// Size of the optional header.
W.write<uint16_t>(0);
// Flags.
W.write<uint16_t>(0);
+}
- return W.OS.tell() - StartOffset;
+void XCOFFObjectWriter::writeSectionHeaderTable() {
+ for (const auto *Sec : Sections) {
+ // Write Name.
+ ArrayRef<char> NameRef(Sec->Name, XCOFF::NameSize);
+ W.write(NameRef);
+
+ // Write the Physical Address and Virtual Address. In an object file these
+ // are the same.
+ W.write<uint32_t>(Sec->Address);
+ W.write<uint32_t>(Sec->Address);
+
+ W.write<uint32_t>(Sec->Size);
+ W.write<uint32_t>(Sec->FileOffsetToData);
+
+ // Relocation pointer and Lineno pointer. Not supported yet.
+ W.write<uint32_t>(0);
+ W.write<uint32_t>(0);
+
+ // Relocation and line-number counts. Not supported yet.
+ W.write<uint16_t>(0);
+ W.write<uint16_t>(0);
+
+ W.write<int32_t>(Sec->Flags);
+ }
+}
+
+void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
+ // Print out symbol table for the program code.
+ for (const auto &Csect : ProgramCodeCsects) {
+ // Write out the control section first and then each symbol in it.
+ writeSymbolTableEntryForControlSection(Csect, Text.Index,
+ Csect.MCCsect->getStorageClass());
+ for (const auto &Sym : Csect.Syms)
+ writeSymbolTableEntryForCsectMemberLabel(
+ Sym, Csect, Text.Index, Layout.getSymbolOffset(*Sym.MCSym));
+ }
+
+ // The BSS Section is special in that the csects must contain a single symbol,
+ // and the contained symbol cannot be represented in the symbol table as a
+ // label definition.
+ for (auto &Csect : BSSCsects) {
+ assert(Csect.Syms.size() == 1 &&
+ "Uninitialized csect cannot contain more then 1 symbol.");
+ Symbol &Sym = Csect.Syms.back();
+ writeSymbolTableEntryForControlSection(Csect, BSS.Index,
+ Sym.getStorageClass());
+ }
+}
+
+void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
+ // The address corrresponds to the address of sections and symbols in the
+ // object file. We place the shared address 0 immediately after the
+ // section header table.
+ uint32_t Address = 0;
+ // Section indices are 1-based in XCOFF.
+ int16_t SectionIndex = 1;
+ // The first symbol table entry is for the file name. We are not emitting it
+ // yet, so start at index 0.
+ uint32_t SymbolTableIndex = 0;
+
+ // Text section comes first.
+ if (!ProgramCodeCsects.empty()) {
+ Sections.push_back(&Text);
+ Text.Index = SectionIndex++;
+ for (auto &Csect : ProgramCodeCsects) {
+ const MCSectionXCOFF *MCSec = Csect.MCCsect;
+ Csect.Address = alignTo(Address, MCSec->getAlignment());
+ Csect.Size = Layout.getSectionAddressSize(MCSec);
+ Address = Csect.Address + Csect.Size;
+ Csect.SymbolTableIndex = SymbolTableIndex;
+ // 1 main and 1 auxiliary symbol table entry for the csect.
+ SymbolTableIndex += 2;
+ for (auto &Sym : Csect.Syms) {
+ Sym.SymbolTableIndex = SymbolTableIndex;
+ // 1 main and 1 auxiliary symbol table entry for each contained symbol
+ SymbolTableIndex += 2;
+ }
+ }
+ Address = alignTo(Address, DefaultSectionAlign);
+
+ // The first csect of a section can be aligned by adjusting the virtual
+ // address of its containing section instead of writing zeroes into the
+ // object file.
+ Text.Address = ProgramCodeCsects.front().Address;
+
+ Text.Size = Address - Text.Address;
+ }
+
+ // Data section Second. TODO
+
+ // BSS Section third.
+ if (!BSSCsects.empty()) {
+ Sections.push_back(&BSS);
+ BSS.Index = SectionIndex++;
+ for (auto &Csect : BSSCsects) {
+ const MCSectionXCOFF *MCSec = Csect.MCCsect;
+ Csect.Address = alignTo(Address, MCSec->getAlignment());
+ Csect.Size = Layout.getSectionAddressSize(MCSec);
+ Address = Csect.Address + Csect.Size;
+ Csect.SymbolTableIndex = SymbolTableIndex;
+ // 1 main and 1 auxiliary symbol table entry for the csect.
+ SymbolTableIndex += 2;
+
+ assert(Csect.Syms.size() == 1 &&
+ "csect in the BSS can only contain a single symbol.");
+ Csect.Syms[0].SymbolTableIndex = Csect.SymbolTableIndex;
+ }
+ // Pad out Address to the default alignment. This is to match how the system
+ // assembler handles the .bss section. Its size is always a multiple of 4.
+ Address = alignTo(Address, DefaultSectionAlign);
+
+ BSS.Address = BSSCsects.front().Address;
+ BSS.Size = Address - BSS.Address;
+ }
+
+ SymbolTableEntryCount = SymbolTableIndex;
+
+ // Calculate the RawPointer value for each section.
+ uint64_t RawPointer = sizeof(XCOFF::FileHeader32) + auxiliaryHeaderSize() +
+ Sections.size() * sizeof(XCOFF::SectionHeader32);
+ for (auto *Sec : Sections) {
+ if (!Sec->IsVirtual) {
+ Sec->FileOffsetToData = RawPointer;
+ RawPointer += Sec->Size;
+ }
+ }
+
+ // TODO Add in Relocation storage to the RawPointer Calculation.
+ // TODO What to align the SymbolTable to?
+ // TODO Error check that the number of symbol table entries fits in 32-bits
+ // signed ...
+ if (SymbolTableEntryCount)
+ SymbolTableOffset = RawPointer;
+}
+
+// Takes the log base 2 of the alignment and shifts the result into the 5 most
+// significant bits of a byte, then or's in the csect type into the least
+// significant 3 bits.
+uint8_t getEncodedType(const MCSectionXCOFF *Sec) {
+ unsigned Align = Sec->getAlignment();
+ assert(isPowerOf2_32(Align) && "Alignment must be a power of 2.");
+ unsigned Log2Align = Log2_32(Align);
+ // Result is a number in the range [0, 31] which fits in the 5 least
+ // significant bits. Shift this value into the 5 most significant bits, and
+ // bitwise-or in the csect type.
+ uint8_t EncodedAlign = Log2Align << 3;
+ return EncodedAlign | Sec->getCSectType();
}
} // end anonymous namespace
@@ -90,5 +599,5 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) {
std::unique_ptr<MCObjectWriter>
llvm::createXCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS) {
- return llvm::make_unique<XCOFFObjectWriter>(std::move(MOTW), OS);
+ return std::make_unique<XCOFFObjectWriter>(std::move(MOTW), OS);
}
diff --git a/lib/MCA/CodeEmitter.cpp b/lib/MCA/CodeEmitter.cpp
new file mode 100644
index 000000000000..294107219cb0
--- /dev/null
+++ b/lib/MCA/CodeEmitter.cpp
@@ -0,0 +1,37 @@
+//===--------------------- CodeEmitter.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CodeEmitter API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/CodeEmitter.h"
+
+namespace llvm {
+namespace mca {
+
+CodeEmitter::EncodingInfo
+CodeEmitter::getOrCreateEncodingInfo(unsigned MCID) {
+ EncodingInfo &EI = Encodings[MCID];
+ if (EI.second)
+ return EI;
+
+ SmallVector<llvm::MCFixup, 2> Fixups;
+ const MCInst &Inst = Sequence[MCID];
+ MCInst Relaxed(Sequence[MCID]);
+ if (MAB.mayNeedRelaxation(Inst, STI))
+ MAB.relaxInstruction(Inst, STI, Relaxed);
+
+ EI.first = Code.size();
+ MCE.encodeInstruction(Relaxed, VecOS, Fixups, STI);
+ EI.second = Code.size() - EI.first;
+ return EI;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/lib/MCA/Context.cpp b/lib/MCA/Context.cpp
index f0e8dfab8680..0160e1f9f787 100644
--- a/lib/MCA/Context.cpp
+++ b/lib/MCA/Context.cpp
@@ -28,24 +28,23 @@ namespace llvm {
namespace mca {
std::unique_ptr<Pipeline>
-Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
- SourceMgr &SrcMgr) {
+Context::createDefaultPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) {
const MCSchedModel &SM = STI.getSchedModel();
// Create the hardware units defining the backend.
- auto RCU = llvm::make_unique<RetireControlUnit>(SM);
- auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
- auto LSU = llvm::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
+ auto RCU = std::make_unique<RetireControlUnit>(SM);
+ auto PRF = std::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
+ auto LSU = std::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
Opts.StoreQueueSize, Opts.AssumeNoAlias);
- auto HWS = llvm::make_unique<Scheduler>(SM, *LSU);
+ auto HWS = std::make_unique<Scheduler>(SM, *LSU);
// Create the pipeline stages.
- auto Fetch = llvm::make_unique<EntryStage>(SrcMgr);
- auto Dispatch = llvm::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth,
+ auto Fetch = std::make_unique<EntryStage>(SrcMgr);
+ auto Dispatch = std::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth,
*RCU, *PRF);
auto Execute =
- llvm::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis);
- auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
+ std::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis);
+ auto Retire = std::make_unique<RetireStage>(*RCU, *PRF, *LSU);
// Pass the ownership of all the hardware units to this Context.
addHardwareUnit(std::move(RCU));
@@ -54,10 +53,10 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
addHardwareUnit(std::move(HWS));
// Build the pipeline.
- auto StagePipeline = llvm::make_unique<Pipeline>();
+ auto StagePipeline = std::make_unique<Pipeline>();
StagePipeline->appendStage(std::move(Fetch));
if (Opts.MicroOpQueueSize)
- StagePipeline->appendStage(llvm::make_unique<MicroOpQueueStage>(
+ StagePipeline->appendStage(std::make_unique<MicroOpQueueStage>(
Opts.MicroOpQueueSize, Opts.DecodersThroughput));
StagePipeline->appendStage(std::move(Dispatch));
StagePipeline->appendStage(std::move(Execute));
diff --git a/lib/MCA/HardwareUnits/LSUnit.cpp b/lib/MCA/HardwareUnits/LSUnit.cpp
index ac1a6a36547b..0ee084c7ce1a 100644
--- a/lib/MCA/HardwareUnits/LSUnit.cpp
+++ b/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -29,12 +29,12 @@ LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
if (!LQSize && EPI.LoadQueueID) {
const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID);
- LQSize = LdQDesc.BufferSize;
+ LQSize = std::max(0, LdQDesc.BufferSize);
}
if (!SQSize && EPI.StoreQueueID) {
const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID);
- SQSize = StQDesc.BufferSize;
+ SQSize = std::max(0, StQDesc.BufferSize);
}
}
}
@@ -72,9 +72,9 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
if (Desc.MayLoad)
- assignLQSlot();
+ acquireLQSlot();
if (Desc.MayStore)
- assignSQSlot();
+ acquireSQSlot();
if (Desc.MayStore) {
// Always create a new group for store operations.
@@ -160,26 +160,28 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
}
void LSUnitBase::onInstructionExecuted(const InstRef &IR) {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- bool IsALoad = Desc.MayLoad;
- bool IsAStore = Desc.MayStore;
- assert((IsALoad || IsAStore) && "Expected a memory operation!");
-
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
auto It = Groups.find(GroupID);
+ assert(It != Groups.end() && "Instruction not dispatched to the LS unit");
It->second->onInstructionExecuted();
- if (It->second->isExecuted()) {
+ if (It->second->isExecuted())
Groups.erase(It);
- }
+}
+
+void LSUnitBase::onInstructionRetired(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ bool IsALoad = Desc.MayLoad;
+ bool IsAStore = Desc.MayStore;
+ assert((IsALoad || IsAStore) && "Expected a memory operation!");
if (IsALoad) {
- UsedLQEntries--;
+ releaseLQSlot();
LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
<< " has been removed from the load queue.\n");
}
if (IsAStore) {
- UsedSQEntries--;
+ releaseSQSlot();
LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
<< " has been removed from the store queue.\n");
}
diff --git a/lib/MCA/HardwareUnits/RegisterFile.cpp b/lib/MCA/HardwareUnits/RegisterFile.cpp
index 86a888ea8cae..7ea5506f11d6 100644
--- a/lib/MCA/HardwareUnits/RegisterFile.cpp
+++ b/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -147,7 +147,7 @@ void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
void RegisterFile::addRegisterWrite(WriteRef Write,
MutableArrayRef<unsigned> UsedPhysRegs) {
WriteState &WS = *Write.getWriteState();
- unsigned RegID = WS.getRegisterID();
+ MCPhysReg RegID = WS.getRegisterID();
assert(RegID && "Adding an invalid register definition?");
LLVM_DEBUG({
@@ -194,7 +194,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
}
// Update zero registers.
- unsigned ZeroRegisterID =
+ MCPhysReg ZeroRegisterID =
WS.clearsSuperRegisters() ? RegID : WS.getRegisterID();
if (IsWriteZero) {
ZeroRegisters.setBit(ZeroRegisterID);
@@ -247,7 +247,7 @@ void RegisterFile::removeRegisterWrite(
if (WS.isEliminated())
return;
- unsigned RegID = WS.getRegisterID();
+ MCPhysReg RegID = WS.getRegisterID();
assert(RegID != 0 && "Invalidating an already invalid register?");
assert(WS.getCyclesLeft() != UNKNOWN_CYCLES &&
@@ -255,7 +255,7 @@ void RegisterFile::removeRegisterWrite(
assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
bool ShouldFreePhysRegs = !WS.isWriteZero();
- unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
+ MCPhysReg RenameAs = RegisterMappings[RegID].second.RenameAs;
if (RenameAs && RenameAs != RegID) {
RegID = RenameAs;
@@ -355,7 +355,7 @@ bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
void RegisterFile::collectWrites(const ReadState &RS,
SmallVectorImpl<WriteRef> &Writes) const {
- unsigned RegID = RS.getRegisterID();
+ MCPhysReg RegID = RS.getRegisterID();
assert(RegID && RegID < RegisterMappings.size());
LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
<< MRI.getName(RegID) << '\n');
@@ -397,7 +397,7 @@ void RegisterFile::collectWrites(const ReadState &RS,
void RegisterFile::addRegisterRead(ReadState &RS,
const MCSubtargetInfo &STI) const {
- unsigned RegID = RS.getRegisterID();
+ MCPhysReg RegID = RS.getRegisterID();
const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
RS.setPRF(RRI.IndexPlusCost.first);
if (RS.isIndependentFromDef())
@@ -424,11 +424,11 @@ void RegisterFile::addRegisterRead(ReadState &RS,
}
}
-unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
+unsigned RegisterFile::isAvailable(ArrayRef<MCPhysReg> Regs) const {
SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
// Find how many new mappings must be created for each register file.
- for (const unsigned RegID : Regs) {
+ for (const MCPhysReg RegID : Regs) {
const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost;
if (Entry.first)
diff --git a/lib/MCA/HardwareUnits/ResourceManager.cpp b/lib/MCA/HardwareUnits/ResourceManager.cpp
index 06f2476353d6..088aea3e23c6 100644
--- a/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -104,7 +104,7 @@ void ResourceState::dump() const {
static std::unique_ptr<ResourceStrategy>
getStrategyFor(const ResourceState &RS) {
if (RS.isAResourceGroup() || RS.getNumUnits() > 1)
- return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask());
+ return std::make_unique<DefaultResourceStrategy>(RS.getReadyMask());
return std::unique_ptr<ResourceStrategy>(nullptr);
}
@@ -114,7 +114,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
- ProcResUnitMask(0), ReservedResourceGroups(0) {
+ ProcResUnitMask(0), ReservedResourceGroups(0),
+ AvailableBuffers(~0ULL), ReservedBuffers(0) {
computeProcResourceMasks(SM, ProcResID2Mask);
// initialize vector ResIndex2ProcResID.
@@ -127,7 +128,7 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
uint64_t Mask = ProcResID2Mask[I];
unsigned Index = getResourceStateIndex(Mask);
Resources[Index] =
- llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask);
+ std::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask);
Strategies[Index] = getStrategyFor(*Resources[Index]);
}
@@ -241,33 +242,41 @@ void ResourceManager::release(const ResourceRef &RR) {
}
ResourceStateEvent
-ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const {
- ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
- for (uint64_t Buffer : Buffers) {
- ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
- Result = RS.isBufferAvailable();
- if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
- break;
- }
- return Result;
+ResourceManager::canBeDispatched(uint64_t ConsumedBuffers) const {
+ if (ConsumedBuffers & ReservedBuffers)
+ return ResourceStateEvent::RS_RESERVED;
+ if (ConsumedBuffers & (~AvailableBuffers))
+ return ResourceStateEvent::RS_BUFFER_UNAVAILABLE;
+ return ResourceStateEvent::RS_BUFFER_AVAILABLE;
}
-void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) {
- for (const uint64_t Buffer : Buffers) {
- ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+void ResourceManager::reserveBuffers(uint64_t ConsumedBuffers) {
+ while (ConsumedBuffers) {
+ uint64_t CurrentBuffer = ConsumedBuffers & (-ConsumedBuffers);
+ ResourceState &RS = *Resources[getResourceStateIndex(CurrentBuffer)];
+ ConsumedBuffers ^= CurrentBuffer;
assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE);
- RS.reserveBuffer();
-
+ if (!RS.reserveBuffer())
+ AvailableBuffers ^= CurrentBuffer;
if (RS.isADispatchHazard()) {
- assert(!RS.isReserved());
- RS.setReserved();
+ // Reserve this buffer now, and release it once pipeline resources
+ // consumed by the instruction become available again.
+ // We do this to simulate an in-order dispatch/issue of instructions.
+ ReservedBuffers ^= CurrentBuffer;
}
}
}
-void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
- for (const uint64_t R : Buffers)
- Resources[getResourceStateIndex(R)]->releaseBuffer();
+void ResourceManager::releaseBuffers(uint64_t ConsumedBuffers) {
+ AvailableBuffers |= ConsumedBuffers;
+ while (ConsumedBuffers) {
+ uint64_t CurrentBuffer = ConsumedBuffers & (-ConsumedBuffers);
+ ResourceState &RS = *Resources[getResourceStateIndex(CurrentBuffer)];
+ ConsumedBuffers ^= CurrentBuffer;
+ RS.releaseBuffer();
+ // Do not unreserve dispatch hazard resource buffers. Wait until all
+ // pipeline resources have been freed too.
+ }
}
uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
@@ -322,7 +331,6 @@ void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
if (countPopulation(RR.first) == 1)
release(RR);
-
releaseResource(RR.first);
ResourcesFreed.push_back(RR);
}
@@ -336,7 +344,7 @@ void ResourceManager::reserveResource(uint64_t ResourceID) {
const unsigned Index = getResourceStateIndex(ResourceID);
ResourceState &Resource = *Resources[Index];
assert(Resource.isAResourceGroup() && !Resource.isReserved() &&
- "Unexpected resource found!");
+ "Unexpected resource state found!");
Resource.setReserved();
ReservedResourceGroups ^= 1ULL << Index;
}
@@ -347,6 +355,9 @@ void ResourceManager::releaseResource(uint64_t ResourceID) {
Resource.clearReserved();
if (Resource.isAResourceGroup())
ReservedResourceGroups ^= 1ULL << Index;
+ // Now it is safe to release dispatch/issue resources.
+ if (Resource.isADispatchHazard())
+ ReservedBuffers ^= 1ULL << Index;
}
} // namespace mca
diff --git a/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/lib/MCA/HardwareUnits/RetireControlUnit.cpp
index 068c5062ccdf..de519d7fd94a 100644
--- a/lib/MCA/HardwareUnits/RetireControlUnit.cpp
+++ b/lib/MCA/HardwareUnits/RetireControlUnit.cpp
@@ -21,65 +21,78 @@ namespace mca {
RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
: NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
- AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) {
+ NumROBEntries(SM.MicroOpBufferSize),
+ AvailableEntries(SM.MicroOpBufferSize), MaxRetirePerCycle(0) {
// Check if the scheduling model provides extra information about the machine
// processor. If so, then use that information to set the reorder buffer size
// and the maximum number of instructions retired per cycle.
if (SM.hasExtraProcessorInfo()) {
const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
if (EPI.ReorderBufferSize)
- AvailableSlots = EPI.ReorderBufferSize;
+ AvailableEntries = EPI.ReorderBufferSize;
MaxRetirePerCycle = EPI.MaxRetirePerCycle;
}
-
- assert(AvailableSlots && "Invalid reorder buffer size!");
- Queue.resize(AvailableSlots);
+ NumROBEntries = AvailableEntries;
+ assert(NumROBEntries && "Invalid reorder buffer size!");
+ Queue.resize(2 * NumROBEntries);
}
// Reserves a number of slots, and returns a new token.
-unsigned RetireControlUnit::reserveSlot(const InstRef &IR,
- unsigned NumMicroOps) {
- assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!");
- unsigned NormalizedQuantity =
- std::min(NumMicroOps, static_cast<unsigned>(Queue.size()));
- // Zero latency instructions may have zero uOps. Artificially bump this
- // value to 1. Although zero latency instructions don't consume scheduler
- // resources, they still consume one slot in the retire queue.
- NormalizedQuantity = std::max(NormalizedQuantity, 1U);
+unsigned RetireControlUnit::dispatch(const InstRef &IR) {
+ const Instruction &Inst = *IR.getInstruction();
+ unsigned Entries = normalizeQuantity(Inst.getNumMicroOps());
+ assert((AvailableEntries >= Entries) && "Reorder Buffer unavailable!");
+
unsigned TokenID = NextAvailableSlotIdx;
- Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false};
- NextAvailableSlotIdx += NormalizedQuantity;
+ Queue[NextAvailableSlotIdx] = {IR, Entries, false};
+ NextAvailableSlotIdx += std::max(1U, Entries);
NextAvailableSlotIdx %= Queue.size();
- AvailableSlots -= NormalizedQuantity;
+
+ AvailableEntries -= Entries;
return TokenID;
}
-const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const {
- return Queue[CurrentInstructionSlotIdx];
+const RetireControlUnit::RUToken &RetireControlUnit::getCurrentToken() const {
+ const RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx];
+#ifndef NDEBUG
+ const Instruction *Inst = Current.IR.getInstruction();
+ assert(Inst && "Invalid RUToken in the RCU queue.");
+#endif
+ return Current;
+}
+
+unsigned RetireControlUnit::computeNextSlotIdx() const {
+ const RetireControlUnit::RUToken &Current = getCurrentToken();
+ unsigned NextSlotIdx = CurrentInstructionSlotIdx + std::max(1U, Current.NumSlots);
+ return NextSlotIdx % Queue.size();
+}
+
+const RetireControlUnit::RUToken &RetireControlUnit::peekNextToken() const {
+ return Queue[computeNextSlotIdx()];
}
void RetireControlUnit::consumeCurrentToken() {
RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx];
- assert(Current.NumSlots && "Reserved zero slots?");
- assert(Current.IR && "Invalid RUToken in the RCU queue.");
Current.IR.getInstruction()->retire();
// Update the slot index to be the next item in the circular queue.
- CurrentInstructionSlotIdx += Current.NumSlots;
+ CurrentInstructionSlotIdx += std::max(1U, Current.NumSlots);
CurrentInstructionSlotIdx %= Queue.size();
- AvailableSlots += Current.NumSlots;
+ AvailableEntries += Current.NumSlots;
+ Current = { InstRef(), 0U, false };
}
void RetireControlUnit::onInstructionExecuted(unsigned TokenID) {
assert(Queue.size() > TokenID);
- assert(Queue[TokenID].Executed == false && Queue[TokenID].IR);
+ assert(Queue[TokenID].IR.getInstruction() && "Instruction was not dispatched!");
+ assert(Queue[TokenID].Executed == false && "Instruction already executed!");
Queue[TokenID].Executed = true;
}
#ifndef NDEBUG
void RetireControlUnit::dump() const {
- dbgs() << "Retire Unit: { Total Slots=" << Queue.size()
- << ", Available Slots=" << AvailableSlots << " }\n";
+ dbgs() << "Retire Unit: { Total ROB Entries =" << NumROBEntries
+ << ", Available ROB entries=" << AvailableEntries << " }\n";
}
#endif
diff --git a/lib/MCA/HardwareUnits/Scheduler.cpp b/lib/MCA/HardwareUnits/Scheduler.cpp
index 0f0f2ffb8325..8730336c6669 100644
--- a/lib/MCA/HardwareUnits/Scheduler.cpp
+++ b/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -21,7 +21,7 @@ namespace mca {
void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) {
// Ensure we have a valid (non-null) strategy object.
- Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>();
+ Strategy = S ? std::move(S) : std::make_unique<DefaultSchedulerStrategy>();
}
// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy.
@@ -38,9 +38,8 @@ void Scheduler::dump() const {
#endif
Scheduler::Status Scheduler::isAvailable(const InstRef &IR) {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
-
- ResourceStateEvent RSE = Resources->canBeDispatched(Desc.Buffers);
+ ResourceStateEvent RSE =
+ Resources->canBeDispatched(IR.getInstruction()->getUsedBuffers());
HadTokenStall = RSE != RS_BUFFER_AVAILABLE;
switch (RSE) {
@@ -106,7 +105,7 @@ void Scheduler::issueInstruction(
bool HasDependentUsers = Inst.hasDependentUsers();
HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR);
- Resources->releaseBuffers(Inst.getDesc().Buffers);
+ Resources->releaseBuffers(Inst.getUsedBuffers());
issueInstructionImpl(IR, UsedResources);
// Instructions that have been issued during this cycle might have unblocked
// other dependent instructions. Dependent instructions may be issued during
@@ -300,8 +299,7 @@ bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
bool Scheduler::dispatch(InstRef &IR) {
Instruction &IS = *IR.getInstruction();
- const InstrDesc &Desc = IS.getDesc();
- Resources->reserveBuffers(Desc.Buffers);
+ Resources->reserveBuffers(IS.getUsedBuffers());
// If necessary, reserve queue entries in the load-store unit (LSU).
if (IS.isMemOp())
diff --git a/lib/MCA/InstrBuilder.cpp b/lib/MCA/InstrBuilder.cpp
index 829920366c90..bd28c733535c 100644
--- a/lib/MCA/InstrBuilder.cpp
+++ b/lib/MCA/InstrBuilder.cpp
@@ -80,7 +80,7 @@ static void initializeUsedResources(InstrDesc &ID,
if (PR.BufferSize < 0) {
AllInOrderResources = false;
} else {
- Buffers.setBit(PRE->ProcResourceIdx);
+ Buffers.setBit(getResourceStateIndex(Mask));
AnyDispatchHazards |= (PR.BufferSize == 0);
AllInOrderResources &= (PR.BufferSize <= 1);
}
@@ -139,9 +139,6 @@ static void initializeUsedResources(InstrDesc &ID,
}
}
- ID.UsedProcResUnits = UsedResourceUnits;
- ID.UsedProcResGroups = UsedResourceGroups;
-
// A SchedWrite may specify a number of cycles in which a resource group
// is reserved. For example (on target x86; cpu Haswell):
//
@@ -177,20 +174,13 @@ static void initializeUsedResources(InstrDesc &ID,
uint64_t Mask = ProcResourceMasks[I];
if (Mask != SR.first && ((Mask & SR.first) == SR.first))
- Buffers.setBit(I);
+ Buffers.setBit(getResourceStateIndex(Mask));
}
}
- // Now set the buffers.
- if (unsigned NumBuffers = Buffers.countPopulation()) {
- ID.Buffers.resize(NumBuffers);
- for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) {
- if (Buffers[I]) {
- --NumBuffers;
- ID.Buffers[NumBuffers] = ProcResourceMasks[I];
- }
- }
- }
+ ID.UsedBuffers = Buffers.getZExtValue();
+ ID.UsedProcResUnits = UsedResourceUnits;
+ ID.UsedProcResGroups = UsedResourceGroups;
LLVM_DEBUG({
for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
@@ -198,8 +188,12 @@ static void initializeUsedResources(InstrDesc &ID,
<< "Reserved=" << R.second.isReserved() << ", "
<< "#Units=" << R.second.NumUnits << ", "
<< "cy=" << R.second.size() << '\n';
- for (const uint64_t R : ID.Buffers)
- dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n';
+ uint64_t BufferIDs = ID.UsedBuffers;
+ while (BufferIDs) {
+ uint64_t Current = BufferIDs & (-BufferIDs);
+ dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
+ BufferIDs ^= Current;
+ }
dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
<< '\n';
@@ -464,9 +458,8 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
// FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
// "unmodeledSideEffects", then this logic optimistically assumes that any
- // extra register operands in the variadic sequence are not register
+ // extra register operand in the variadic sequence is not a register
// definition.
-
bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
!MCDesc.hasUnmodeledSideEffects();
for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
@@ -493,7 +486,7 @@ Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
return ErrorSuccess();
bool UsesMemory = ID.MayLoad || ID.MayStore;
- bool UsesBuffers = !ID.Buffers.empty();
+ bool UsesBuffers = ID.UsedBuffers;
bool UsesResources = !ID.Resources.empty();
if (!UsesMemory && !UsesBuffers && !UsesResources)
return ErrorSuccess();
@@ -550,7 +543,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
// Create a new empty descriptor.
- std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
+ std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
ID->NumMicroOps = SCDesc.NumMicroOps;
ID->SchedClassID = SchedClassID;
@@ -619,7 +612,7 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
if (!DescOrErr)
return DescOrErr.takeError();
const InstrDesc &D = *DescOrErr;
- std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
+ std::unique_ptr<Instruction> NewIS = std::make_unique<Instruction>(D);
// Check if this is a dependency breaking instruction.
APInt Mask;
@@ -636,8 +629,8 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
}
// Initialize Reads first.
+ MCPhysReg RegID = 0;
for (const ReadDescriptor &RD : D.Reads) {
- int RegID = -1;
if (!RD.isImplicitRead()) {
// explicit read.
const MCOperand &Op = MCI.getOperand(RD.OpIndex);
@@ -655,7 +648,6 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
continue;
// Okay, this is a register operand. Create a ReadState for it.
- assert(RegID > 0 && "Invalid register ID found!");
NewIS->getUses().emplace_back(RD, RegID);
ReadState &RS = NewIS->getUses().back();
@@ -696,8 +688,8 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
// Initialize writes.
unsigned WriteIndex = 0;
for (const WriteDescriptor &WD : D.Writes) {
- unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID
- : MCI.getOperand(WD.OpIndex).getReg();
+ RegID = WD.isImplicitWrite() ? WD.RegisterID
+ : MCI.getOperand(WD.OpIndex).getReg();
// Check if this is a optional definition that references NoReg.
if (WD.IsOptionalDef && !RegID) {
++WriteIndex;
diff --git a/lib/MCA/Instruction.cpp b/lib/MCA/Instruction.cpp
index 001842bca318..e5f2c4fd1eec 100644
--- a/lib/MCA/Instruction.cpp
+++ b/lib/MCA/Instruction.cpp
@@ -18,7 +18,7 @@
namespace llvm {
namespace mca {
-void WriteState::writeStartEvent(unsigned IID, unsigned RegID,
+void WriteState::writeStartEvent(unsigned IID, MCPhysReg RegID,
unsigned Cycles) {
CRD.IID = IID;
CRD.RegID = RegID;
@@ -27,7 +27,7 @@ void WriteState::writeStartEvent(unsigned IID, unsigned RegID,
DependentWrite = nullptr;
}
-void ReadState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) {
+void ReadState::writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles) {
assert(DependentWrites);
assert(CyclesLeft == UNKNOWN_CYCLES);
diff --git a/lib/MCA/Stages/DispatchStage.cpp b/lib/MCA/Stages/DispatchStage.cpp
index 7334a268e9a6..3a3d82259160 100644
--- a/lib/MCA/Stages/DispatchStage.cpp
+++ b/lib/MCA/Stages/DispatchStage.cpp
@@ -44,7 +44,7 @@ void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
}
bool DispatchStage::checkPRF(const InstRef &IR) const {
- SmallVector<unsigned, 4> RegDefs;
+ SmallVector<MCPhysReg, 4> RegDefs;
for (const WriteState &RegDef : IR.getInstruction()->getDefs())
RegDefs.emplace_back(RegDef.getRegisterID());
@@ -60,7 +60,7 @@ bool DispatchStage::checkPRF(const InstRef &IR) const {
}
bool DispatchStage::checkRCU(const InstRef &IR) const {
- const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps;
+ const unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps();
if (RCU.isAvailable(NumMicroOps))
return true;
notifyEvent<HWStallEvent>(
@@ -79,7 +79,7 @@ Error DispatchStage::dispatch(InstRef IR) {
assert(!CarryOver && "Cannot dispatch another instruction!");
Instruction &IS = *IR.getInstruction();
const InstrDesc &Desc = IS.getDesc();
- const unsigned NumMicroOps = Desc.NumMicroOps;
+ const unsigned NumMicroOps = IS.getNumMicroOps();
if (NumMicroOps > DispatchWidth) {
assert(AvailableEntries == DispatchWidth);
AvailableEntries = 0;
@@ -123,9 +123,10 @@ Error DispatchStage::dispatch(InstRef IR) {
for (WriteState &WS : IS.getDefs())
PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles);
- // Reserve slots in the RCU, and notify the instruction that it has been
- // dispatched to the schedulers for execution.
- IS.dispatch(RCU.reserveSlot(IR, NumMicroOps));
+ // Reserve entries in the reorder buffer.
+ unsigned RCUTokenID = RCU.dispatch(IR);
+ // Notify the instruction that it has been dispatched.
+ IS.dispatch(RCUTokenID);
// Notify listeners of the "instruction dispatched" event,
// and move IR to the next stage.
@@ -155,8 +156,10 @@ Error DispatchStage::cycleStart() {
}
bool DispatchStage::isAvailable(const InstRef &IR) const {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth);
+ const Instruction &Inst = *IR.getInstruction();
+ unsigned NumMicroOps = Inst.getNumMicroOps();
+ const InstrDesc &Desc = Inst.getDesc();
+ unsigned Required = std::min(NumMicroOps, DispatchWidth);
if (Required > AvailableEntries)
return false;
diff --git a/lib/MCA/Stages/EntryStage.cpp b/lib/MCA/Stages/EntryStage.cpp
index d2f5613a0fb6..66135790a4cd 100644
--- a/lib/MCA/Stages/EntryStage.cpp
+++ b/lib/MCA/Stages/EntryStage.cpp
@@ -33,7 +33,7 @@ void EntryStage::getNextInstruction() {
if (!SM.hasNext())
return;
SourceRef SR = SM.peekNext();
- std::unique_ptr<Instruction> Inst = llvm::make_unique<Instruction>(SR.second);
+ std::unique_ptr<Instruction> Inst = std::make_unique<Instruction>(SR.second);
CurrentInstruction = InstRef(SR.first, Inst.get());
Instructions.emplace_back(std::move(Inst));
SM.updateNext();
diff --git a/lib/MCA/Stages/ExecuteStage.cpp b/lib/MCA/Stages/ExecuteStage.cpp
index a2b361fcd1bf..2284ed7f2816 100644
--- a/lib/MCA/Stages/ExecuteStage.cpp
+++ b/lib/MCA/Stages/ExecuteStage.cpp
@@ -56,12 +56,13 @@ Error ExecuteStage::issueInstruction(InstRef &IR) {
SmallVector<InstRef, 4> Ready;
HWS.issueInstruction(IR, Used, Pending, Ready);
- NumIssuedOpcodes += IR.getInstruction()->getDesc().NumMicroOps;
+ Instruction &IS = *IR.getInstruction();
+ NumIssuedOpcodes += IS.getNumMicroOps();
notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
notifyInstructionIssued(IR, Used);
- if (IR.getInstruction()->isExecuted()) {
+ if (IS.isExecuted()) {
notifyInstructionExecuted(IR);
// FIXME: add a buffer of executed instructions.
if (Error S = moveToTheNextStage(IR))
@@ -199,7 +200,8 @@ Error ExecuteStage::execute(InstRef &IR) {
// units have been consumed.
bool IsReadyInstruction = HWS.dispatch(IR);
const Instruction &Inst = *IR.getInstruction();
- NumDispatchedOpcodes += Inst.getDesc().NumMicroOps;
+ unsigned NumMicroOps = Inst.getNumMicroOps();
+ NumDispatchedOpcodes += NumMicroOps;
notifyReservedOrReleasedBuffers(IR, /* Reserved */ true);
if (!IsReadyInstruction) {
@@ -269,13 +271,17 @@ void ExecuteStage::notifyInstructionIssued(
void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR,
bool Reserved) const {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- if (Desc.Buffers.empty())
+ uint64_t UsedBuffers = IR.getInstruction()->getDesc().UsedBuffers;
+ if (!UsedBuffers)
return;
- SmallVector<unsigned, 4> BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end());
- std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(),
- [&](uint64_t Op) { return HWS.getResourceID(Op); });
+ SmallVector<unsigned, 4> BufferIDs(countPopulation(UsedBuffers), 0);
+ for (unsigned I = 0, E = BufferIDs.size(); I < E; ++I) {
+ uint64_t CurrentBufferMask = UsedBuffers & (-UsedBuffers);
+ BufferIDs[I] = HWS.getResourceID(CurrentBufferMask);
+ UsedBuffers ^= CurrentBufferMask;
+ }
+
if (Reserved) {
for (HWEventListener *Listener : getListeners())
Listener->onReservedBuffers(IR, BufferIDs);
diff --git a/lib/MCA/Stages/RetireStage.cpp b/lib/MCA/Stages/RetireStage.cpp
index e1789dd7fa2a..f792af748bce 100644
--- a/lib/MCA/Stages/RetireStage.cpp
+++ b/lib/MCA/Stages/RetireStage.cpp
@@ -31,11 +31,11 @@ llvm::Error RetireStage::cycleStart() {
while (!RCU.isEmpty()) {
if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle)
break;
- const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken();
+ const RetireControlUnit::RUToken &Current = RCU.getCurrentToken();
if (!Current.Executed)
break;
- RCU.consumeCurrentToken();
notifyInstructionRetired(Current.IR);
+ RCU.consumeCurrentToken();
NumRetired++;
}
@@ -52,6 +52,10 @@ void RetireStage::notifyInstructionRetired(const InstRef &IR) const {
llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
const Instruction &Inst = *IR.getInstruction();
+ // Release the load/store queue entries.
+ if (Inst.isMemOp())
+ LSU.onInstructionRetired(IR);
+
for (const WriteState &WS : Inst.getDefs())
PRF.removeRegisterWrite(WS, FreedRegs);
notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 49e66f46ab3f..148c011d9cd4 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -223,8 +223,8 @@ Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
return Name.drop_back(1);
}
-Expected<uint32_t> ArchiveMemberHeader::getSize() const {
- uint32_t Ret;
+Expected<uint64_t> ArchiveMemberHeader::getSize() const {
+ uint64_t Ret;
if (StringRef(ArMemHdr->Size,
sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
std::string Buf;
@@ -550,7 +550,7 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
} else if (Buffer.startswith(Magic)) {
IsThin = false;
} else {
- Err = make_error<GenericBinaryError>("File too small to be an archive",
+ Err = make_error<GenericBinaryError>("file too small to be an archive",
object_error::invalid_file_type);
return;
}
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index 228f6b40c5ec..5234b0e18233 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -16,8 +16,10 @@
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/Archive.h"
+#include "llvm/Object/Error.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
@@ -147,7 +149,7 @@ static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) {
static void printRestOfMemberHeader(
raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime,
- unsigned UID, unsigned GID, unsigned Perms, unsigned Size) {
+ unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) {
printWithSpacePadding(Out, sys::toTimeT(ModTime), 12);
// The format has only 6 chars for uid and gid. Truncate if the provided
@@ -164,7 +166,7 @@ static void
printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name,
const sys::TimePoint<std::chrono::seconds> &ModTime,
unsigned UID, unsigned GID, unsigned Perms,
- unsigned Size) {
+ uint64_t Size) {
printWithSpacePadding(Out, Twine(Name) + "/", 16);
printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
}
@@ -172,11 +174,10 @@ printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name,
static void
printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name,
const sys::TimePoint<std::chrono::seconds> &ModTime,
- unsigned UID, unsigned GID, unsigned Perms,
- unsigned Size) {
+ unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) {
uint64_t PosAfterHeader = Pos + 60 + Name.size();
// Pad so that even 64 bit object files are aligned.
- unsigned Pad = OffsetToAlignment(PosAfterHeader, 8);
+ unsigned Pad = offsetToAlignment(PosAfterHeader, Align(8));
unsigned NameWithPadding = Name.size() + Pad;
printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16);
printRestOfMemberHeader(Out, ModTime, UID, GID, Perms,
@@ -208,7 +209,7 @@ static void
printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable,
StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind,
bool Thin, const NewArchiveMember &M,
- sys::TimePoint<std::chrono::seconds> ModTime, unsigned Size) {
+ sys::TimePoint<std::chrono::seconds> ModTime, uint64_t Size) {
if (isBSDLike(Kind))
return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID,
M.Perms, Size);
@@ -243,7 +244,7 @@ struct MemberData {
static MemberData computeStringTable(StringRef Names) {
unsigned Size = Names.size();
- unsigned Pad = OffsetToAlignment(Size, 2);
+ unsigned Pad = offsetToAlignment(Size, Align(2));
std::string Header;
raw_string_ostream Out(Header);
printWithSpacePadding(Out, "//", 48);
@@ -307,8 +308,8 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly.
// We do this for all bsd formats because it simplifies aligning members.
- unsigned Alignment = isBSDLike(Kind) ? 8 : 2;
- unsigned Pad = OffsetToAlignment(Size, Alignment);
+ const Align Alignment(isBSDLike(Kind) ? 8 : 2);
+ unsigned Pad = offsetToAlignment(Size, Alignment);
Size += Pad;
if (isBSDLike(Kind)) {
@@ -464,8 +465,9 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
// uniformly. This matches the behaviour with cctools and ensures that ld64
// is happy with archives that we generate.
unsigned MemberPadding =
- isDarwin(Kind) ? OffsetToAlignment(Data.size(), 8) : 0;
- unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2);
+ isDarwin(Kind) ? offsetToAlignment(Data.size(), Align(8)) : 0;
+ unsigned TailPadding =
+ offsetToAlignment(Data.size() + MemberPadding, Align(2));
StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding);
sys::TimePoint<std::chrono::seconds> ModTime;
@@ -474,8 +476,17 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++);
else
ModTime = M.ModTime;
+
+ uint64_t Size = Buf.getBufferSize() + MemberPadding;
+ if (Size > object::Archive::MaxMemberSize) {
+ std::string StringMsg =
+ "File " + M.MemberName.str() + " exceeds size limit";
+ return make_error<object::GenericBinaryError>(
+ std::move(StringMsg), object::object_error::parse_failed);
+ }
+
printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M,
- ModTime, Buf.getBufferSize() + MemberPadding);
+ ModTime, Size);
Out.flush();
Expected<std::vector<unsigned>> Symbols =
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
index a953c1d8cb80..944d2bc1bca7 100644
--- a/lib/Object/Binary.cpp
+++ b/lib/Object/Binary.cpp
@@ -18,6 +18,7 @@
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/Minidump.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/TapiUniversal.h"
#include "llvm/Object/WindowsResource.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
@@ -86,6 +87,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
return errorCodeToError(object_error::invalid_file_type);
case file_magic::minidump:
return MinidumpFile::create(Buffer);
+ case file_magic::tapi_file:
+ return TapiUniversal::create(Buffer);
}
llvm_unreachable("Unexpected Binary File Type");
}
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 854664e679df..2c0f6dc2b1e9 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -937,29 +937,6 @@ iterator_range<base_reloc_iterator> COFFObjectFile::base_relocs() const {
}
std::error_code
-COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const {
- Res = COFFHeader;
- return std::error_code();
-}
-
-std::error_code
-COFFObjectFile::getCOFFBigObjHeader(const coff_bigobj_file_header *&Res) const {
- Res = COFFBigObjHeader;
- return std::error_code();
-}
-
-std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const {
- Res = PE32Header;
- return std::error_code();
-}
-
-std::error_code
-COFFObjectFile::getPE32PlusHeader(const pe32plus_header *&Res) const {
- Res = PE32PlusHeader;
- return std::error_code();
-}
-
-std::error_code
COFFObjectFile::getDataDirectory(uint32_t Index,
const data_directory *&Res) const {
// Error if there's no data directory or the index is out of range.
@@ -994,11 +971,12 @@ std::error_code COFFObjectFile::getSection(int32_t Index,
std::error_code COFFObjectFile::getSection(StringRef SectionName,
const coff_section *&Result) const {
Result = nullptr;
- StringRef SecName;
for (const SectionRef &Section : sections()) {
- if (std::error_code E = Section.getName(SecName))
- return E;
- if (SecName == SectionName) {
+ auto NameOrErr = Section.getName();
+ if (!NameOrErr)
+ return errorToErrorCode(NameOrErr.takeError());
+
+ if (*NameOrErr == SectionName) {
Result = getCOFFSection(Section);
return std::error_code();
}
@@ -1684,9 +1662,12 @@ std::error_code BaseRelocRef::getRVA(uint32_t &Result) const {
return std::error_code();
}
-#define RETURN_IF_ERROR(E) \
- if (E) \
- return E;
+#define RETURN_IF_ERROR(Expr) \
+ do { \
+ Error E = (Expr); \
+ if (E) \
+ return std::move(E); \
+ } while (0)
Expected<ArrayRef<UTF16>>
ResourceSectionRef::getDirStringAtOffset(uint32_t Offset) {
@@ -1715,11 +1696,168 @@ ResourceSectionRef::getTableAtOffset(uint32_t Offset) {
return *Table;
}
+Expected<const coff_resource_dir_entry &>
+ResourceSectionRef::getTableEntryAtOffset(uint32_t Offset) {
+ const coff_resource_dir_entry *Entry = nullptr;
+
+ BinaryStreamReader Reader(BBS);
+ Reader.setOffset(Offset);
+ RETURN_IF_ERROR(Reader.readObject(Entry));
+ assert(Entry != nullptr);
+ return *Entry;
+}
+
+Expected<const coff_resource_data_entry &>
+ResourceSectionRef::getDataEntryAtOffset(uint32_t Offset) {
+ const coff_resource_data_entry *Entry = nullptr;
+
+ BinaryStreamReader Reader(BBS);
+ Reader.setOffset(Offset);
+ RETURN_IF_ERROR(Reader.readObject(Entry));
+ assert(Entry != nullptr);
+ return *Entry;
+}
+
Expected<const coff_resource_dir_table &>
ResourceSectionRef::getEntrySubDir(const coff_resource_dir_entry &Entry) {
+ assert(Entry.Offset.isSubDir());
return getTableAtOffset(Entry.Offset.value());
}
+Expected<const coff_resource_data_entry &>
+ResourceSectionRef::getEntryData(const coff_resource_dir_entry &Entry) {
+ assert(!Entry.Offset.isSubDir());
+ return getDataEntryAtOffset(Entry.Offset.value());
+}
+
Expected<const coff_resource_dir_table &> ResourceSectionRef::getBaseTable() {
return getTableAtOffset(0);
}
+
+Expected<const coff_resource_dir_entry &>
+ResourceSectionRef::getTableEntry(const coff_resource_dir_table &Table,
+ uint32_t Index) {
+ if (Index >= (uint32_t)(Table.NumberOfNameEntries + Table.NumberOfIDEntries))
+ return createStringError(object_error::parse_failed, "index out of range");
+ const uint8_t *TablePtr = reinterpret_cast<const uint8_t *>(&Table);
+ ptrdiff_t TableOffset = TablePtr - BBS.data().data();
+ return getTableEntryAtOffset(TableOffset + sizeof(Table) +
+ Index * sizeof(coff_resource_dir_entry));
+}
+
+Error ResourceSectionRef::load(const COFFObjectFile *O) {
+ for (const SectionRef &S : O->sections()) {
+ Expected<StringRef> Name = S.getName();
+ if (!Name)
+ return Name.takeError();
+
+ if (*Name == ".rsrc" || *Name == ".rsrc$01")
+ return load(O, S);
+ }
+ return createStringError(object_error::parse_failed,
+ "no resource section found");
+}
+
+Error ResourceSectionRef::load(const COFFObjectFile *O, const SectionRef &S) {
+ Obj = O;
+ Section = S;
+ Expected<StringRef> Contents = Section.getContents();
+ if (!Contents)
+ return Contents.takeError();
+ BBS = BinaryByteStream(*Contents, support::little);
+ const coff_section *COFFSect = Obj->getCOFFSection(Section);
+ ArrayRef<coff_relocation> OrigRelocs = Obj->getRelocations(COFFSect);
+ Relocs.reserve(OrigRelocs.size());
+ for (const coff_relocation &R : OrigRelocs)
+ Relocs.push_back(&R);
+ std::sort(Relocs.begin(), Relocs.end(),
+ [](const coff_relocation *A, const coff_relocation *B) {
+ return A->VirtualAddress < B->VirtualAddress;
+ });
+ return Error::success();
+}
+
+Expected<StringRef>
+ResourceSectionRef::getContents(const coff_resource_data_entry &Entry) {
+ if (!Obj)
+ return createStringError(object_error::parse_failed, "no object provided");
+
+ // Find a potential relocation at the DataRVA field (first member of
+ // the coff_resource_data_entry struct).
+ const uint8_t *EntryPtr = reinterpret_cast<const uint8_t *>(&Entry);
+ ptrdiff_t EntryOffset = EntryPtr - BBS.data().data();
+ coff_relocation RelocTarget{ulittle32_t(EntryOffset), ulittle32_t(0),
+ ulittle16_t(0)};
+ auto RelocsForOffset =
+ std::equal_range(Relocs.begin(), Relocs.end(), &RelocTarget,
+ [](const coff_relocation *A, const coff_relocation *B) {
+ return A->VirtualAddress < B->VirtualAddress;
+ });
+
+ if (RelocsForOffset.first != RelocsForOffset.second) {
+ // We found a relocation with the right offset. Check that it does have
+ // the expected type.
+ const coff_relocation &R = **RelocsForOffset.first;
+ uint16_t RVAReloc;
+ switch (Obj->getMachine()) {
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ RVAReloc = COFF::IMAGE_REL_I386_DIR32NB;
+ break;
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ RVAReloc = COFF::IMAGE_REL_AMD64_ADDR32NB;
+ break;
+ case COFF::IMAGE_FILE_MACHINE_ARMNT:
+ RVAReloc = COFF::IMAGE_REL_ARM_ADDR32NB;
+ break;
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
+ RVAReloc = COFF::IMAGE_REL_ARM64_ADDR32NB;
+ break;
+ default:
+ return createStringError(object_error::parse_failed,
+ "unsupported architecture");
+ }
+ if (R.Type != RVAReloc)
+ return createStringError(object_error::parse_failed,
+ "unexpected relocation type");
+ // Get the relocation's symbol
+ Expected<COFFSymbolRef> Sym = Obj->getSymbol(R.SymbolTableIndex);
+ if (!Sym)
+ return Sym.takeError();
+ const coff_section *Section = nullptr;
+ // And the symbol's section
+ if (std::error_code EC = Obj->getSection(Sym->getSectionNumber(), Section))
+ return errorCodeToError(EC);
+ // Add the initial value of DataRVA to the symbol's offset to find the
+ // data it points at.
+ uint64_t Offset = Entry.DataRVA + Sym->getValue();
+ ArrayRef<uint8_t> Contents;
+ if (Error E = Obj->getSectionContents(Section, Contents))
+ return std::move(E);
+ if (Offset + Entry.DataSize > Contents.size())
+ return createStringError(object_error::parse_failed,
+ "data outside of section");
+ // Return a reference to the data inside the section.
+ return StringRef(reinterpret_cast<const char *>(Contents.data()) + Offset,
+ Entry.DataSize);
+ } else {
+ // Relocatable objects need a relocation for the DataRVA field.
+ if (Obj->isRelocatableObject())
+ return createStringError(object_error::parse_failed,
+ "no relocation found for DataRVA");
+
+ // Locate the section that contains the address that DataRVA points at.
+ uint64_t VA = Entry.DataRVA + Obj->getImageBase();
+ for (const SectionRef &S : Obj->sections()) {
+ if (VA >= S.getAddress() &&
+ VA + Entry.DataSize <= S.getAddress() + S.getSize()) {
+ uint64_t Offset = VA - S.getAddress();
+ Expected<StringRef> Contents = S.getContents();
+ if (!Contents)
+ return Contents.takeError();
+ return Contents->slice(Offset, Offset + Entry.DataSize);
+ }
+ }
+ return createStringError(object_error::parse_failed,
+ "address not found in image");
+ }
+}
diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp
index ec15e6f69ada..11efd857d1a1 100644
--- a/lib/Object/Decompressor.cpp
+++ b/lib/Object/Decompressor.cpp
@@ -56,7 +56,7 @@ Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit,
return createError("corrupted compressed section header");
DataExtractor Extractor(SectionData, IsLittleEndian, 0);
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
if (Extractor.getUnsigned(&Offset, Is64Bit ? sizeof(Elf64_Word)
: sizeof(Elf32_Word)) !=
ELFCOMPRESS_ZLIB)
@@ -77,10 +77,15 @@ bool Decompressor::isGnuStyle(StringRef Name) {
}
bool Decompressor::isCompressed(const object::SectionRef &Section) {
- StringRef Name;
- if (Section.getName(Name))
- return false;
- return Section.isCompressed() || isGnuStyle(Name);
+ if (Section.isCompressed())
+ return true;
+
+ Expected<StringRef> SecNameOrErr = Section.getName();
+ if (SecNameOrErr)
+ return isGnuStyle(*SecNameOrErr);
+
+ consumeError(SecNameOrErr.takeError());
+ return false;
}
bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) {
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index 8660b1a64bdd..d491288579df 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -255,6 +255,8 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ADDRSIG);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_DEPENDENT_LIBRARIES);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_SYMPART);
+ STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_EHDR);
+ STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_PHDR);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef);
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index c7b715793048..bf6ffd6c37b9 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -43,7 +43,16 @@ const EnumEntry<unsigned> llvm::object::ElfSymbolTypes[NumElfSymbolTypes] = {
{"File", "FILE", ELF::STT_FILE},
{"Common", "COMMON", ELF::STT_COMMON},
{"TLS", "TLS", ELF::STT_TLS},
- {"GNU_IFunc", "IFUNC", ELF::STT_GNU_IFUNC}};
+ {"Unknown", "<unknown>: 7", 7},
+ {"Unknown", "<unknown>: 8", 8},
+ {"Unknown", "<unknown>: 9", 9},
+ {"GNU_IFunc", "IFUNC", ELF::STT_GNU_IFUNC},
+ {"OS Specific", "<OS specific>: 11", 11},
+ {"OS Specific", "<OS specific>: 12", 12},
+ {"Proc Specific", "<processor specific>: 13", 13},
+ {"Proc Specific", "<processor specific>: 14", 14},
+ {"Proc Specific", "<processor specific>: 15", 15}
+};
ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source)
: ObjectFile(Type, Source) {}
@@ -54,7 +63,7 @@ createPtr(MemoryBufferRef Object) {
auto Ret = ELFObjectFile<ELFT>::create(Object);
if (Error E = Ret.takeError())
return std::move(E);
- return make_unique<ELFObjectFile<ELFT>>(std::move(*Ret));
+ return std::make_unique<ELFObjectFile<ELFT>>(std::move(*Ret));
}
Expected<std::unique_ptr<ObjectFile>>
@@ -194,7 +203,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
default:
break;
case ARMBuildAttrs::Not_Allowed:
- Features.AddFeature("vfp2d16sp", false);
+ Features.AddFeature("vfp2sp", false);
Features.AddFeature("vfp3d16sp", false);
Features.AddFeature("vfp4d16sp", false);
break;
@@ -347,6 +356,21 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
case ARMBuildAttrs::v7E_M:
Triple += "v7em";
break;
+ case ARMBuildAttrs::v8_A:
+ Triple += "v8a";
+ break;
+ case ARMBuildAttrs::v8_R:
+ Triple += "v8r";
+ break;
+ case ARMBuildAttrs::v8_M_Base:
+ Triple += "v8m.base";
+ break;
+ case ARMBuildAttrs::v8_M_Main:
+ Triple += "v8m.main";
+ break;
+ case ARMBuildAttrs::v8_1_M_Main:
+ Triple += "v8.1m.main";
+ break;
}
}
if (!isLittleEndian())
@@ -383,9 +407,13 @@ ELFObjectFileBase::getPltAddresses() const {
return {};
Optional<SectionRef> Plt = None, RelaPlt = None, GotPlt = None;
for (const SectionRef &Section : sections()) {
- StringRef Name;
- if (Section.getName(Name))
+ Expected<StringRef> NameOrErr = Section.getName();
+ if (!NameOrErr) {
+ consumeError(NameOrErr.takeError());
continue;
+ }
+ StringRef Name = *NameOrErr;
+
if (Name == ".plt")
Plt = Section;
else if (Name == ".rela.plt" || Name == ".rel.plt")
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 5aec844003c0..c0c873f97354 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -57,12 +57,6 @@ namespace {
} // end anonymous namespace
-static const std::array<StringRef, 17> validArchs = {
- "i386", "x86_64", "x86_64h", "armv4t", "arm", "armv5e",
- "armv6", "armv6m", "armv7", "armv7em", "armv7k", "armv7m",
- "armv7s", "arm64", "arm64_32", "ppc", "ppc64",
-};
-
static Error malformedError(const Twine &Msg) {
return make_error<GenericBinaryError>("truncated or malformed object (" +
Msg + ")",
@@ -1951,6 +1945,11 @@ uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const {
return SectSize;
}
+ArrayRef<uint8_t> MachOObjectFile::getSectionContents(uint32_t Offset,
+ uint64_t Size) const {
+ return arrayRefFromStringRef(getData().substr(Offset, Size));
+}
+
Expected<ArrayRef<uint8_t>>
MachOObjectFile::getSectionContents(DataRefImpl Sec) const {
uint32_t Offset;
@@ -1966,7 +1965,7 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec) const {
Size = Sect.size;
}
- return arrayRefFromStringRef(getData().substr(Offset, Size));
+ return getSectionContents(Offset, Size);
}
uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const {
@@ -1992,13 +1991,12 @@ Expected<SectionRef> MachOObjectFile::getSection(unsigned SectionIndex) const {
}
Expected<SectionRef> MachOObjectFile::getSection(StringRef SectionName) const {
- StringRef SecName;
for (const SectionRef &Section : sections()) {
- if (std::error_code E = Section.getName(SecName))
- return errorCodeToError(E);
- if (SecName == SectionName) {
+ auto NameOrErr = Section.getName();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ if (*NameOrErr == SectionName)
return Section;
- }
}
return errorCodeToError(object_error::parse_failed);
}
@@ -2724,11 +2722,19 @@ Triple MachOObjectFile::getHostArch() {
}
bool MachOObjectFile::isValidArch(StringRef ArchFlag) {
- return std::find(validArchs.cbegin(), validArchs.cend(), ArchFlag) !=
- validArchs.cend();
+ auto validArchs = getValidArchs();
+ return llvm::find(validArchs, ArchFlag) != validArchs.end();
}
-ArrayRef<StringRef> MachOObjectFile::getValidArchs() { return validArchs; }
+ArrayRef<StringRef> MachOObjectFile::getValidArchs() {
+ static const std::array<StringRef, 17> validArchs = {{
+ "i386", "x86_64", "x86_64h", "armv4t", "arm", "armv5e",
+ "armv6", "armv6m", "armv7", "armv7em", "armv7k", "armv7m",
+ "armv7s", "arm64", "arm64_32", "ppc", "ppc64",
+ }};
+
+ return validArchs;
+}
Triple::ArchType MachOObjectFile::getArch() const {
return getArch(getCPUType(*this));
@@ -3427,7 +3433,7 @@ iterator_range<rebase_iterator>
MachOObjectFile::rebaseTable(Error &Err, MachOObjectFile *O,
ArrayRef<uint8_t> Opcodes, bool is64) {
if (O->BindRebaseSectionTable == nullptr)
- O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O);
+ O->BindRebaseSectionTable = std::make_unique<BindRebaseSegInfo>(O);
MachORebaseEntry Start(&Err, O, Opcodes, is64);
Start.moveToFirst();
@@ -3993,7 +3999,11 @@ BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) {
uint64_t CurSegAddress;
for (const SectionRef &Section : Obj->sections()) {
SectionInfo Info;
- Section.getName(Info.SectionName);
+ Expected<StringRef> NameOrErr = Section.getName();
+ if (!NameOrErr)
+ consumeError(NameOrErr.takeError());
+ else
+ Info.SectionName = *NameOrErr;
Info.Address = Section.getAddress();
Info.Size = Section.getSize();
Info.SegmentName =
@@ -4094,7 +4104,7 @@ MachOObjectFile::bindTable(Error &Err, MachOObjectFile *O,
ArrayRef<uint8_t> Opcodes, bool is64,
MachOBindEntry::Kind BKind) {
if (O->BindRebaseSectionTable == nullptr)
- O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O);
+ O->BindRebaseSectionTable = std::make_unique<BindRebaseSegInfo>(O);
MachOBindEntry Start(&Err, O, Opcodes, is64, BKind);
Start.moveToFirst();
@@ -4610,7 +4620,7 @@ void MachOObjectFile::ReadULEB128s(uint64_t Index,
SmallVectorImpl<uint64_t> &Out) const {
DataExtractor extractor(ObjectFile::getData(), true, 0);
- uint32_t offset = Index;
+ uint64_t offset = Index;
uint64_t data = 0;
while (uint64_t delta = extractor.getULEB128(&offset)) {
data += delta;
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index b3f0993412c6..a178ecde949e 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -155,15 +155,16 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err)
") extends past the end of the file");
return;
}
-#define MAXSECTALIGN 15 /* 2**15 or 0x8000 */
- if (A.getAlign() > MAXSECTALIGN) {
- Err = malformedError("align (2^" + Twine(A.getAlign()) + ") too large "
- "for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" +
- Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) +
- ") (maximum 2^" + Twine(MAXSECTALIGN) + ")");
+
+ if (A.getAlign() > MaxSectionAlignment) {
+ Err = malformedError("align (2^" + Twine(A.getAlign()) +
+ ") too large for cputype (" + Twine(A.getCPUType()) +
+ ") cpusubtype (" +
+ Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) +
+ ") (maximum 2^" + Twine(MaxSectionAlignment) + ")");
return;
}
- if(A.getOffset() % (1 << A.getAlign()) != 0){
+ if(A.getOffset() % (1ull << A.getAlign()) != 0){
Err = malformedError("offset: " + Twine(A.getOffset()) +
" for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" +
Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) +
@@ -209,19 +210,34 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err)
Err = Error::success();
}
-Expected<std::unique_ptr<MachOObjectFile>>
+Expected<MachOUniversalBinary::ObjectForArch>
MachOUniversalBinary::getObjectForArch(StringRef ArchName) const {
if (Triple(ArchName).getArch() == Triple::ArchType::UnknownArch)
return make_error<GenericBinaryError>("Unknown architecture "
"named: " +
ArchName,
object_error::arch_not_found);
-
- for (auto &Obj : objects())
+ for (const auto &Obj : objects())
if (Obj.getArchFlagName() == ArchName)
- return Obj.getAsObjectFile();
+ return Obj;
return make_error<GenericBinaryError>("fat file does not "
"contain " +
ArchName,
object_error::arch_not_found);
}
+
+Expected<std::unique_ptr<MachOObjectFile>>
+MachOUniversalBinary::getMachOObjectForArch(StringRef ArchName) const {
+ Expected<ObjectForArch> O = getObjectForArch(ArchName);
+ if (!O)
+ return O.takeError();
+ return O->getAsObjectFile();
+}
+
+Expected<std::unique_ptr<Archive>>
+MachOUniversalBinary::getArchiveForArch(StringRef ArchName) const {
+ Expected<ObjectForArch> O = getObjectForArch(ArchName);
+ if (!O)
+ return O.takeError();
+ return O->getAsArchive();
+}
diff --git a/lib/Object/Minidump.cpp b/lib/Object/Minidump.cpp
index 7b5b21558699..3e932fe7be28 100644
--- a/lib/Object/Minidump.cpp
+++ b/lib/Object/Minidump.cpp
@@ -53,13 +53,30 @@ Expected<std::string> MinidumpFile::getString(size_t Offset) const {
return Result;
}
+Expected<iterator_range<MinidumpFile::MemoryInfoIterator>>
+MinidumpFile::getMemoryInfoList() const {
+ Optional<ArrayRef<uint8_t>> Stream = getRawStream(StreamType::MemoryInfoList);
+ if (!Stream)
+ return createError("No such stream");
+ auto ExpectedHeader =
+ getDataSliceAs<minidump::MemoryInfoListHeader>(*Stream, 0, 1);
+ if (!ExpectedHeader)
+ return ExpectedHeader.takeError();
+ const minidump::MemoryInfoListHeader &H = ExpectedHeader.get()[0];
+ Expected<ArrayRef<uint8_t>> Data =
+ getDataSlice(*Stream, H.SizeOfHeader, H.SizeOfEntry * H.NumberOfEntries);
+ if (!Data)
+ return Data.takeError();
+ return make_range(MemoryInfoIterator(*Data, H.SizeOfEntry),
+ MemoryInfoIterator({}, H.SizeOfEntry));
+}
+
template <typename T>
-Expected<ArrayRef<T>> MinidumpFile::getListStream(StreamType Stream) const {
- auto OptionalStream = getRawStream(Stream);
- if (!OptionalStream)
+Expected<ArrayRef<T>> MinidumpFile::getListStream(StreamType Type) const {
+ Optional<ArrayRef<uint8_t>> Stream = getRawStream(Type);
+ if (!Stream)
return createError("No such stream");
- auto ExpectedSize =
- getDataSliceAs<support::ulittle32_t>(*OptionalStream, 0, 1);
+ auto ExpectedSize = getDataSliceAs<support::ulittle32_t>(*Stream, 0, 1);
if (!ExpectedSize)
return ExpectedSize.takeError();
@@ -69,10 +86,10 @@ Expected<ArrayRef<T>> MinidumpFile::getListStream(StreamType Stream) const {
// Some producers insert additional padding bytes to align the list to an
// 8-byte boundary. Check for that by comparing the list size with the overall
// stream size.
- if (ListOffset + sizeof(T) * ListSize < OptionalStream->size())
+ if (ListOffset + sizeof(T) * ListSize < Stream->size())
ListOffset = 8;
- return getDataSliceAs<T>(*OptionalStream, ListOffset, ListSize);
+ return getDataSliceAs<T>(*Stream, ListOffset, ListSize);
}
template Expected<ArrayRef<Module>>
MinidumpFile::getListStream(StreamType) const;
@@ -109,13 +126,14 @@ MinidumpFile::create(MemoryBufferRef Source) {
return ExpectedStreams.takeError();
DenseMap<StreamType, std::size_t> StreamMap;
- for (const auto &Stream : llvm::enumerate(*ExpectedStreams)) {
- StreamType Type = Stream.value().Type;
- const LocationDescriptor &Loc = Stream.value().Location;
+ for (const auto &StreamDescriptor : llvm::enumerate(*ExpectedStreams)) {
+ StreamType Type = StreamDescriptor.value().Type;
+ const LocationDescriptor &Loc = StreamDescriptor.value().Location;
- auto ExpectedStream = getDataSlice(Data, Loc.RVA, Loc.DataSize);
- if (!ExpectedStream)
- return ExpectedStream.takeError();
+ Expected<ArrayRef<uint8_t>> Stream =
+ getDataSlice(Data, Loc.RVA, Loc.DataSize);
+ if (!Stream)
+ return Stream.takeError();
if (Type == StreamType::Unused && Loc.DataSize == 0) {
// Ignore dummy streams. This is technically ill-formed, but a number of
@@ -128,7 +146,7 @@ MinidumpFile::create(MemoryBufferRef Source) {
return createError("Cannot handle one of the minidump streams");
// Update the directory map, checking for duplicate stream types.
- if (!StreamMap.try_emplace(Type, Stream.index()).second)
+ if (!StreamMap.try_emplace(Type, StreamDescriptor.index()).second)
return createError("Duplicate stream type");
}
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index d84798cc6dd0..b486e9f5c9a8 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -138,7 +138,7 @@ LLVMBinaryRef LLVMMachOUniversalBinaryCopyObjectForArch(LLVMBinaryRef BR,
char **ErrorMessage) {
auto universal = cast<MachOUniversalBinary>(unwrap(BR));
Expected<std::unique_ptr<ObjectFile>> ObjOrErr(
- universal->getObjectForArch({Arch, ArchLen}));
+ universal->getMachOObjectForArch({Arch, ArchLen}));
if (!ObjOrErr) {
*ErrorMessage = strdup(toString(ObjOrErr.takeError()).c_str());
return nullptr;
@@ -251,10 +251,10 @@ void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) {
// SectionRef accessors
const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
- StringRef ret;
- if (std::error_code ec = (*unwrap(SI))->getName(ret))
- report_fatal_error(ec.message());
- return ret.data();
+ auto NameOrErr = (*unwrap(SI))->getName();
+ if (!NameOrErr)
+ report_fatal_error(NameOrErr.takeError());
+ return NameOrErr->data();
}
uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 101f5dcc0821..e0e63a5a7d76 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -67,8 +67,10 @@ Error ObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const {
uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; }
bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const {
- if (Expected<StringRef> NameOrErr = getSectionName(Sec))
+ Expected<StringRef> NameOrErr = getSectionName(Sec);
+ if (NameOrErr)
return *NameOrErr == ".llvmbc";
+ consumeError(NameOrErr.takeError());
return false;
}
@@ -82,7 +84,8 @@ bool ObjectFile::isBerkeleyData(DataRefImpl Sec) const {
return isSectionData(Sec);
}
-section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const {
+Expected<section_iterator>
+ObjectFile::getRelocatedSection(DataRefImpl Sec) const {
return section_iterator(SectionRef(Sec, this));
}
@@ -103,7 +106,7 @@ Triple ObjectFile::makeTriple() const {
TheTriple.setObjectFormat(Triple::MachO);
if (isCOFF()) {
- const auto COFFObj = dyn_cast<COFFObjectFile>(this);
+ const auto COFFObj = cast<COFFObjectFile>(this);
if (COFFObj->getArch() == Triple::thumb)
TheTriple.setTriple("thumbv7-windows");
}
@@ -127,6 +130,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) {
case file_magic::pdb:
case file_magic::minidump:
return errorCodeToError(object_error::invalid_file_type);
+ case file_magic::tapi_file:
+ return errorCodeToError(object_error::invalid_file_type);
case file_magic::elf:
case file_magic::elf_relocatable:
case file_magic::elf_executable:
diff --git a/lib/Object/RelocationResolver.cpp b/lib/Object/RelocationResolver.cpp
index 0a243f32e12c..ca89f5671b8a 100644
--- a/lib/Object/RelocationResolver.cpp
+++ b/lib/Object/RelocationResolver.cpp
@@ -30,6 +30,7 @@ static bool supportsX86_64(uint64_t Type) {
case ELF::R_X86_64_DTPOFF32:
case ELF::R_X86_64_DTPOFF64:
case ELF::R_X86_64_PC32:
+ case ELF::R_X86_64_PC64:
case ELF::R_X86_64_32:
case ELF::R_X86_64_32S:
return true;
@@ -47,6 +48,7 @@ static uint64_t resolveX86_64(RelocationRef R, uint64_t S, uint64_t A) {
case ELF::R_X86_64_DTPOFF64:
return S + getELFAddend(R);
case ELF::R_X86_64_PC32:
+ case ELF::R_X86_64_PC64:
return S + getELFAddend(R) - R.getOffset();
case ELF::R_X86_64_32:
case ELF::R_X86_64_32S:
@@ -90,9 +92,9 @@ static bool supportsBPF(uint64_t Type) {
static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) {
switch (R.getType()) {
case ELF::R_BPF_64_32:
- return S & 0xFFFFFFFF;
+ return (S + A) & 0xFFFFFFFF;
case ELF::R_BPF_64_64:
- return S;
+ return S + A;
default:
llvm_unreachable("Invalid relocation type");
}
@@ -335,6 +337,8 @@ static bool supportsRISCV(uint64_t Type) {
case ELF::R_RISCV_NONE:
case ELF::R_RISCV_32:
case ELF::R_RISCV_64:
+ case ELF::R_RISCV_SET6:
+ case ELF::R_RISCV_SUB6:
case ELF::R_RISCV_ADD8:
case ELF::R_RISCV_SUB8:
case ELF::R_RISCV_ADD16:
@@ -358,6 +362,10 @@ static uint64_t resolveRISCV(RelocationRef R, uint64_t S, uint64_t A) {
return (S + RA) & 0xFFFFFFFF;
case ELF::R_RISCV_64:
return S + RA;
+ case ELF::R_RISCV_SET6:
+ return (A + (S + RA)) & 0xFF;
+ case ELF::R_RISCV_SUB6:
+ return (A - (S + RA)) & 0xFF;
case ELF::R_RISCV_ADD8:
return (A + (S + RA)) & 0xFF;
case ELF::R_RISCV_SUB8:
@@ -420,6 +428,47 @@ static uint64_t resolveCOFFX86_64(RelocationRef R, uint64_t S, uint64_t A) {
}
}
+static bool supportsCOFFARM(uint64_t Type) {
+ switch (Type) {
+ case COFF::IMAGE_REL_ARM_SECREL:
+ case COFF::IMAGE_REL_ARM_ADDR32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveCOFFARM(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case COFF::IMAGE_REL_ARM_SECREL:
+ case COFF::IMAGE_REL_ARM_ADDR32:
+ return (S + A) & 0xFFFFFFFF;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsCOFFARM64(uint64_t Type) {
+ switch (Type) {
+ case COFF::IMAGE_REL_ARM64_SECREL:
+ case COFF::IMAGE_REL_ARM64_ADDR64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveCOFFARM64(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case COFF::IMAGE_REL_ARM64_SECREL:
+ return (S + A) & 0xFFFFFFFF;
+ case COFF::IMAGE_REL_ARM64_ADDR64:
+ return S + A;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
static bool supportsMachOX86_64(uint64_t Type) {
return Type == MachO::X86_64_RELOC_UNSIGNED;
}
@@ -472,9 +521,19 @@ static uint64_t resolveWasm32(RelocationRef R, uint64_t S, uint64_t A) {
std::pair<bool (*)(uint64_t), RelocationResolver>
getRelocationResolver(const ObjectFile &Obj) {
if (Obj.isCOFF()) {
- if (Obj.getBytesInAddress() == 8)
+ switch (Obj.getArch()) {
+ case Triple::x86_64:
return {supportsCOFFX86_64, resolveCOFFX86_64};
- return {supportsCOFFX86, resolveCOFFX86};
+ case Triple::x86:
+ return {supportsCOFFX86, resolveCOFFX86};
+ case Triple::arm:
+ case Triple::thumb:
+ return {supportsCOFFARM, resolveCOFFARM};
+ case Triple::aarch64:
+ return {supportsCOFFARM64, resolveCOFFARM64};
+ default:
+ return {nullptr, nullptr};
+ }
} else if (Obj.isELF()) {
if (Obj.getBytesInAddress() == 8) {
switch (Obj.getArch()) {
diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp
index 2b152b7d8da3..3db4ad9ed14b 100644
--- a/lib/Object/SymbolicFile.cpp
+++ b/lib/Object/SymbolicFile.cpp
@@ -53,6 +53,7 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type,
case file_magic::windows_resource:
case file_magic::pdb:
case file_magic::minidump:
+ case file_magic::tapi_file:
return errorCodeToError(object_error::invalid_file_type);
case file_magic::elf:
case file_magic::elf_executable:
diff --git a/lib/Object/TapiFile.cpp b/lib/Object/TapiFile.cpp
new file mode 100644
index 000000000000..c409bd8e5995
--- /dev/null
+++ b/lib/Object/TapiFile.cpp
@@ -0,0 +1,104 @@
+//===- TapiFile.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Text-based Dynamcic Library Stub format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/TapiFile.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace MachO;
+using namespace object;
+
+static constexpr StringLiteral ObjC1ClassNamePrefix = ".objc_class_name_";
+static constexpr StringLiteral ObjC2ClassNamePrefix = "_OBJC_CLASS_$_";
+static constexpr StringLiteral ObjC2MetaClassNamePrefix = "_OBJC_METACLASS_$_";
+static constexpr StringLiteral ObjC2EHTypePrefix = "_OBJC_EHTYPE_$_";
+static constexpr StringLiteral ObjC2IVarPrefix = "_OBJC_IVAR_$_";
+
+static uint32_t getFlags(const Symbol *Sym) {
+ uint32_t Flags = BasicSymbolRef::SF_Global;
+ if (Sym->isUndefined())
+ Flags |= BasicSymbolRef::SF_Undefined;
+ else
+ Flags |= BasicSymbolRef::SF_Exported;
+
+ if (Sym->isWeakDefined() || Sym->isWeakReferenced())
+ Flags |= BasicSymbolRef::SF_Weak;
+
+ return Flags;
+}
+
+TapiFile::TapiFile(MemoryBufferRef Source, const InterfaceFile &interface,
+ Architecture Arch)
+ : SymbolicFile(ID_TapiFile, Source) {
+ for (const auto *Symbol : interface.symbols()) {
+ if (!Symbol->getArchitectures().has(Arch))
+ continue;
+
+ switch (Symbol->getKind()) {
+ case SymbolKind::GlobalSymbol:
+ Symbols.emplace_back(StringRef(), Symbol->getName(), getFlags(Symbol));
+ break;
+ case SymbolKind::ObjectiveCClass:
+ if (interface.getPlatforms().count(PlatformKind::macOS) &&
+ Arch == AK_i386) {
+ Symbols.emplace_back(ObjC1ClassNamePrefix, Symbol->getName(),
+ getFlags(Symbol));
+ } else {
+ Symbols.emplace_back(ObjC2ClassNamePrefix, Symbol->getName(),
+ getFlags(Symbol));
+ Symbols.emplace_back(ObjC2MetaClassNamePrefix, Symbol->getName(),
+ getFlags(Symbol));
+ }
+ break;
+ case SymbolKind::ObjectiveCClassEHType:
+ Symbols.emplace_back(ObjC2EHTypePrefix, Symbol->getName(),
+ getFlags(Symbol));
+ break;
+ case SymbolKind::ObjectiveCInstanceVariable:
+ Symbols.emplace_back(ObjC2IVarPrefix, Symbol->getName(),
+ getFlags(Symbol));
+ break;
+ }
+ }
+}
+
+TapiFile::~TapiFile() = default;
+
+void TapiFile::moveSymbolNext(DataRefImpl &DRI) const {
+ const auto *Sym = reinterpret_cast<const Symbol *>(DRI.p);
+ DRI.p = reinterpret_cast<uintptr_t>(++Sym);
+}
+
+Error TapiFile::printSymbolName(raw_ostream &OS, DataRefImpl DRI) const {
+ const auto *Sym = reinterpret_cast<const Symbol *>(DRI.p);
+ OS << Sym->Prefix << Sym->Name;
+ return Error::success();
+}
+
+uint32_t TapiFile::getSymbolFlags(DataRefImpl DRI) const {
+ const auto *Sym = reinterpret_cast<const Symbol *>(DRI.p);
+ return Sym->Flags;
+}
+
+basic_symbol_iterator TapiFile::symbol_begin() const {
+ DataRefImpl DRI;
+ DRI.p = reinterpret_cast<uintptr_t>(&*Symbols.begin());
+ return BasicSymbolRef{DRI, this};
+}
+
+basic_symbol_iterator TapiFile::symbol_end() const {
+ DataRefImpl DRI;
+ DRI.p = reinterpret_cast<uintptr_t>(&*Symbols.end());
+ return BasicSymbolRef{DRI, this};
+}
diff --git a/lib/Object/TapiUniversal.cpp b/lib/Object/TapiUniversal.cpp
new file mode 100644
index 000000000000..b3273e345a61
--- /dev/null
+++ b/lib/Object/TapiUniversal.cpp
@@ -0,0 +1,54 @@
+//===- TapiUniversal.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Text-based Dynamic Library Stub format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/TapiUniversal.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TextAPI/MachO/TextAPIReader.h"
+
+using namespace llvm;
+using namespace MachO;
+using namespace object;
+
+TapiUniversal::TapiUniversal(MemoryBufferRef Source, Error &Err)
+ : Binary(ID_TapiUniversal, Source) {
+ auto Result = TextAPIReader::get(Source);
+ ErrorAsOutParameter ErrAsOuParam(&Err);
+ if (!Result) {
+ Err = Result.takeError();
+ return;
+ }
+ ParsedFile = std::move(Result.get());
+
+ auto Archs = ParsedFile->getArchitectures();
+ for (auto Arch : Archs)
+ Architectures.emplace_back(Arch);
+}
+
+TapiUniversal::~TapiUniversal() = default;
+
+Expected<std::unique_ptr<TapiFile>>
+TapiUniversal::ObjectForArch::getAsObjectFile() const {
+ return std::unique_ptr<TapiFile>(new TapiFile(Parent->getMemoryBufferRef(),
+ *Parent->ParsedFile.get(),
+ Parent->Architectures[Index]));
+}
+
+Expected<std::unique_ptr<TapiUniversal>>
+TapiUniversal::create(MemoryBufferRef Source) {
+ Error Err = Error::success();
+ std::unique_ptr<TapiUniversal> Ret(new TapiUniversal(Source, Err));
+ if (Err)
+ return std::move(Err);
+ return std::move(Ret);
+}
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index 82aa1830dced..014b403556df 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -56,7 +56,7 @@ LLVM_DUMP_METHOD void WasmSymbol::dump() const { print(dbgs()); }
Expected<std::unique_ptr<WasmObjectFile>>
ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) {
Error Err = Error::success();
- auto ObjectFile = llvm::make_unique<WasmObjectFile>(Buffer, Err);
+ auto ObjectFile = std::make_unique<WasmObjectFile>(Buffer, Err);
if (Err)
return std::move(Err);
@@ -781,7 +781,7 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
break;
case wasm::R_WASM_GLOBAL_INDEX_LEB:
// R_WASM_GLOBAL_INDEX_LEB are can be used against function and data
- // symbols to refer to thier GOT enties.
+ // symbols to refer to their GOT entries.
if (!isValidGlobalSymbol(Reloc.Index) &&
!isValidDataSymbol(Reloc.Index) &&
!isValidFunctionSymbol(Reloc.Index))
@@ -881,12 +881,9 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
Sig.Params.push_back(wasm::ValType(ParamType));
}
uint32_t ReturnCount = readVaruint32(Ctx);
- if (ReturnCount) {
- if (ReturnCount != 1) {
- return make_error<GenericBinaryError>(
- "Multiple return types not supported", object_error::parse_failed);
- }
- Sig.Returns.push_back(wasm::ValType(readUint8(Ctx)));
+ while (ReturnCount--) {
+ uint32_t ReturnType = readUint8(Ctx);
+ Sig.Returns.push_back(wasm::ValType(ReturnType));
}
Signatures.push_back(std::move(Sig));
}
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
index d76e1231684c..10717718b201 100644
--- a/lib/Object/WindowsResource.cpp
+++ b/lib/Object/WindowsResource.cpp
@@ -30,15 +30,24 @@ namespace object {
if (auto EC = X) \
return EC;
+#define UNWRAP_REF_OR_RETURN(Name, Expr) \
+ auto Name##OrErr = Expr; \
+ if (!Name##OrErr) \
+ return Name##OrErr.takeError(); \
+ const auto &Name = *Name##OrErr;
+
+#define UNWRAP_OR_RETURN(Name, Expr) \
+ auto Name##OrErr = Expr; \
+ if (!Name##OrErr) \
+ return Name##OrErr.takeError(); \
+ auto Name = *Name##OrErr;
+
const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t);
// COFF files seem to be inconsistent with alignment between sections, just use
// 8-byte because it makes everyone happy.
const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t);
-uint32_t WindowsResourceParser::TreeNode::StringCount = 0;
-uint32_t WindowsResourceParser::TreeNode::DataCount = 0;
-
WindowsResource::WindowsResource(MemoryBufferRef Source)
: Binary(Binary::ID_WinRes, Source) {
size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE;
@@ -128,7 +137,8 @@ Error ResourceEntryRef::loadNext() {
return Error::success();
}
-WindowsResourceParser::WindowsResourceParser() : Root(false) {}
+WindowsResourceParser::WindowsResourceParser(bool MinGW)
+ : Root(false), MinGW(MinGW) {}
void printResourceTypeName(uint16_t TypeID, raw_ostream &OS) {
switch (TypeID) {
@@ -200,6 +210,122 @@ static std::string makeDuplicateResourceError(
return OS.str();
}
+static void printStringOrID(const WindowsResourceParser::StringOrID &S,
+ raw_string_ostream &OS, bool IsType, bool IsID) {
+ if (S.IsString) {
+ std::string UTF8;
+ if (!convertUTF16LEToUTF8String(S.String, UTF8))
+ UTF8 = "(failed conversion from UTF16)";
+ OS << '\"' << UTF8 << '\"';
+ } else if (IsType)
+ printResourceTypeName(S.ID, OS);
+ else if (IsID)
+ OS << "ID " << S.ID;
+ else
+ OS << S.ID;
+}
+
+static std::string makeDuplicateResourceError(
+ const std::vector<WindowsResourceParser::StringOrID> &Context,
+ StringRef File1, StringRef File2) {
+ std::string Ret;
+ raw_string_ostream OS(Ret);
+
+ OS << "duplicate resource:";
+
+ if (Context.size() >= 1) {
+ OS << " type ";
+ printStringOrID(Context[0], OS, /* IsType */ true, /* IsID */ true);
+ }
+
+ if (Context.size() >= 2) {
+ OS << "/name ";
+ printStringOrID(Context[1], OS, /* IsType */ false, /* IsID */ true);
+ }
+
+ if (Context.size() >= 3) {
+ OS << "/language ";
+ printStringOrID(Context[2], OS, /* IsType */ false, /* IsID */ false);
+ }
+ OS << ", in " << File1 << " and in " << File2;
+
+ return OS.str();
+}
+
+// MinGW specific. Remove default manifests (with language zero) if there are
+// other manifests present, and report an error if there are more than one
+// manifest with a non-zero language code.
+// GCC has the concept of a default manifest resource object, which gets
+// linked in implicitly if present. This default manifest has got language
+// id zero, and should be dropped silently if there's another manifest present.
+// If the user resources surprisignly had a manifest with language id zero,
+// we should also ignore the duplicate default manifest.
+void WindowsResourceParser::cleanUpManifests(
+ std::vector<std::string> &Duplicates) {
+ auto TypeIt = Root.IDChildren.find(/* RT_MANIFEST */ 24);
+ if (TypeIt == Root.IDChildren.end())
+ return;
+
+ TreeNode *TypeNode = TypeIt->second.get();
+ auto NameIt =
+ TypeNode->IDChildren.find(/* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1);
+ if (NameIt == TypeNode->IDChildren.end())
+ return;
+
+ TreeNode *NameNode = NameIt->second.get();
+ if (NameNode->IDChildren.size() <= 1)
+ return; // None or one manifest present, all good.
+
+ // If we have more than one manifest, drop the language zero one if present,
+ // and check again.
+ auto LangZeroIt = NameNode->IDChildren.find(0);
+ if (LangZeroIt != NameNode->IDChildren.end() &&
+ LangZeroIt->second->IsDataNode) {
+ uint32_t RemovedIndex = LangZeroIt->second->DataIndex;
+ NameNode->IDChildren.erase(LangZeroIt);
+ Data.erase(Data.begin() + RemovedIndex);
+ Root.shiftDataIndexDown(RemovedIndex);
+
+ // If we're now down to one manifest, all is good.
+ if (NameNode->IDChildren.size() <= 1)
+ return;
+ }
+
+ // More than one non-language-zero manifest
+ auto FirstIt = NameNode->IDChildren.begin();
+ uint32_t FirstLang = FirstIt->first;
+ TreeNode *FirstNode = FirstIt->second.get();
+ auto LastIt = NameNode->IDChildren.rbegin();
+ uint32_t LastLang = LastIt->first;
+ TreeNode *LastNode = LastIt->second.get();
+ Duplicates.push_back(
+ ("duplicate non-default manifests with languages " + Twine(FirstLang) +
+ " in " + InputFilenames[FirstNode->Origin] + " and " + Twine(LastLang) +
+ " in " + InputFilenames[LastNode->Origin])
+ .str());
+}
+
+// Ignore duplicates of manifests with language zero (the default manifest),
+// in case the user has provided a manifest with that language id. See
+// the function comment above for context. Only returns true if MinGW is set
+// to true.
+bool WindowsResourceParser::shouldIgnoreDuplicate(
+ const ResourceEntryRef &Entry) const {
+ return MinGW && !Entry.checkTypeString() &&
+ Entry.getTypeID() == /* RT_MANIFEST */ 24 &&
+ !Entry.checkNameString() &&
+ Entry.getNameID() == /* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1 &&
+ Entry.getLanguage() == 0;
+}
+
+bool WindowsResourceParser::shouldIgnoreDuplicate(
+ const std::vector<StringOrID> &Context) const {
+ return MinGW && Context.size() == 3 && !Context[0].IsString &&
+ Context[0].ID == /* RT_MANIFEST */ 24 && !Context[1].IsString &&
+ Context[1].ID == /* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1 &&
+ !Context[2].IsString && Context[2].ID == 0;
+}
+
Error WindowsResourceParser::parse(WindowsResource *WR,
std::vector<std::string> &Duplicates) {
auto EntryOrErr = WR->getHeadEntry();
@@ -219,112 +345,176 @@ Error WindowsResourceParser::parse(WindowsResource *WR,
}
ResourceEntryRef Entry = EntryOrErr.get();
+ uint32_t Origin = InputFilenames.size();
+ InputFilenames.push_back(WR->getFileName());
bool End = false;
while (!End) {
- Data.push_back(Entry.getData());
- bool IsNewTypeString = false;
- bool IsNewNameString = false;
-
- TreeNode* Node;
- bool IsNewNode = Root.addEntry(Entry, InputFilenames.size(),
- IsNewTypeString, IsNewNameString, Node);
- InputFilenames.push_back(WR->getFileName());
+ TreeNode *Node;
+ bool IsNewNode = Root.addEntry(Entry, Origin, Data, StringTable, Node);
if (!IsNewNode) {
- Duplicates.push_back(makeDuplicateResourceError(
- Entry, InputFilenames[Node->Origin], WR->getFileName()));
+ if (!shouldIgnoreDuplicate(Entry))
+ Duplicates.push_back(makeDuplicateResourceError(
+ Entry, InputFilenames[Node->Origin], WR->getFileName()));
}
- if (IsNewTypeString)
- StringTable.push_back(Entry.getTypeString());
-
- if (IsNewNameString)
- StringTable.push_back(Entry.getNameString());
-
RETURN_IF_ERROR(Entry.moveNext(End));
}
return Error::success();
}
+Error WindowsResourceParser::parse(ResourceSectionRef &RSR, StringRef Filename,
+ std::vector<std::string> &Duplicates) {
+ UNWRAP_REF_OR_RETURN(BaseTable, RSR.getBaseTable());
+ uint32_t Origin = InputFilenames.size();
+ InputFilenames.push_back(Filename);
+ std::vector<StringOrID> Context;
+ return addChildren(Root, RSR, BaseTable, Origin, Context, Duplicates);
+}
+
void WindowsResourceParser::printTree(raw_ostream &OS) const {
ScopedPrinter Writer(OS);
Root.print(Writer, "Resource Tree");
}
-bool WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry,
- uint32_t Origin,
- bool &IsNewTypeString,
- bool &IsNewNameString,
- TreeNode *&Result) {
- TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString);
- TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString);
- return NameNode.addLanguageNode(Entry, Origin, Result);
+bool WindowsResourceParser::TreeNode::addEntry(
+ const ResourceEntryRef &Entry, uint32_t Origin,
+ std::vector<std::vector<uint8_t>> &Data,
+ std::vector<std::vector<UTF16>> &StringTable, TreeNode *&Result) {
+ TreeNode &TypeNode = addTypeNode(Entry, StringTable);
+ TreeNode &NameNode = TypeNode.addNameNode(Entry, StringTable);
+ return NameNode.addLanguageNode(Entry, Origin, Data, Result);
}
-WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) {
- if (IsStringNode)
- StringIndex = StringCount++;
+Error WindowsResourceParser::addChildren(TreeNode &Node,
+ ResourceSectionRef &RSR,
+ const coff_resource_dir_table &Table,
+ uint32_t Origin,
+ std::vector<StringOrID> &Context,
+ std::vector<std::string> &Duplicates) {
+
+ for (int i = 0; i < Table.NumberOfNameEntries + Table.NumberOfIDEntries;
+ i++) {
+ UNWRAP_REF_OR_RETURN(Entry, RSR.getTableEntry(Table, i));
+ TreeNode *Child;
+
+ if (Entry.Offset.isSubDir()) {
+
+ // Create a new subdirectory and recurse
+ if (i < Table.NumberOfNameEntries) {
+ UNWRAP_OR_RETURN(NameString, RSR.getEntryNameString(Entry));
+ Child = &Node.addNameChild(NameString, StringTable);
+ Context.push_back(StringOrID(NameString));
+ } else {
+ Child = &Node.addIDChild(Entry.Identifier.ID);
+ Context.push_back(StringOrID(Entry.Identifier.ID));
+ }
+
+ UNWRAP_REF_OR_RETURN(NextTable, RSR.getEntrySubDir(Entry));
+ Error E =
+ addChildren(*Child, RSR, NextTable, Origin, Context, Duplicates);
+ if (E)
+ return E;
+ Context.pop_back();
+
+ } else {
+
+ // Data leaves are supposed to have a numeric ID as identifier (language).
+ if (Table.NumberOfNameEntries > 0)
+ return createStringError(object_error::parse_failed,
+ "unexpected string key for data object");
+
+ // Try adding a data leaf
+ UNWRAP_REF_OR_RETURN(DataEntry, RSR.getEntryData(Entry));
+ TreeNode *Child;
+ Context.push_back(StringOrID(Entry.Identifier.ID));
+ bool Added = Node.addDataChild(Entry.Identifier.ID, Table.MajorVersion,
+ Table.MinorVersion, Table.Characteristics,
+ Origin, Data.size(), Child);
+ if (Added) {
+ UNWRAP_OR_RETURN(Contents, RSR.getContents(DataEntry));
+ Data.push_back(ArrayRef<uint8_t>(
+ reinterpret_cast<const uint8_t *>(Contents.data()),
+ Contents.size()));
+ } else {
+ if (!shouldIgnoreDuplicate(Context))
+ Duplicates.push_back(makeDuplicateResourceError(
+ Context, InputFilenames[Child->Origin], InputFilenames.back()));
+ }
+ Context.pop_back();
+
+ }
+ }
+ return Error::success();
}
+WindowsResourceParser::TreeNode::TreeNode(uint32_t StringIndex)
+ : StringIndex(StringIndex) {}
+
WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion,
uint16_t MinorVersion,
uint32_t Characteristics,
- uint32_t Origin)
- : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion),
- Characteristics(Characteristics), Origin(Origin) {
- DataIndex = DataCount++;
-}
+ uint32_t Origin, uint32_t DataIndex)
+ : IsDataNode(true), DataIndex(DataIndex), MajorVersion(MajorVersion),
+ MinorVersion(MinorVersion), Characteristics(Characteristics),
+ Origin(Origin) {}
std::unique_ptr<WindowsResourceParser::TreeNode>
-WindowsResourceParser::TreeNode::createStringNode() {
- return std::unique_ptr<TreeNode>(new TreeNode(true));
+WindowsResourceParser::TreeNode::createStringNode(uint32_t Index) {
+ return std::unique_ptr<TreeNode>(new TreeNode(Index));
}
std::unique_ptr<WindowsResourceParser::TreeNode>
WindowsResourceParser::TreeNode::createIDNode() {
- return std::unique_ptr<TreeNode>(new TreeNode(false));
+ return std::unique_ptr<TreeNode>(new TreeNode(0));
}
std::unique_ptr<WindowsResourceParser::TreeNode>
WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion,
uint16_t MinorVersion,
uint32_t Characteristics,
- uint32_t Origin) {
- return std::unique_ptr<TreeNode>(
- new TreeNode(MajorVersion, MinorVersion, Characteristics, Origin));
+ uint32_t Origin,
+ uint32_t DataIndex) {
+ return std::unique_ptr<TreeNode>(new TreeNode(
+ MajorVersion, MinorVersion, Characteristics, Origin, DataIndex));
}
-WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry,
- bool &IsNewTypeString) {
+WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addTypeNode(
+ const ResourceEntryRef &Entry,
+ std::vector<std::vector<UTF16>> &StringTable) {
if (Entry.checkTypeString())
- return addNameChild(Entry.getTypeString(), IsNewTypeString);
+ return addNameChild(Entry.getTypeString(), StringTable);
else
return addIDChild(Entry.getTypeID());
}
-WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry,
- bool &IsNewNameString) {
+WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addNameNode(
+ const ResourceEntryRef &Entry,
+ std::vector<std::vector<UTF16>> &StringTable) {
if (Entry.checkNameString())
- return addNameChild(Entry.getNameString(), IsNewNameString);
+ return addNameChild(Entry.getNameString(), StringTable);
else
return addIDChild(Entry.getNameID());
}
bool WindowsResourceParser::TreeNode::addLanguageNode(
- const ResourceEntryRef &Entry, uint32_t Origin, TreeNode *&Result) {
- return addDataChild(Entry.getLanguage(), Entry.getMajorVersion(),
- Entry.getMinorVersion(), Entry.getCharacteristics(),
- Origin, Result);
+ const ResourceEntryRef &Entry, uint32_t Origin,
+ std::vector<std::vector<uint8_t>> &Data, TreeNode *&Result) {
+ bool Added = addDataChild(Entry.getLanguage(), Entry.getMajorVersion(),
+ Entry.getMinorVersion(), Entry.getCharacteristics(),
+ Origin, Data.size(), Result);
+ if (Added)
+ Data.push_back(Entry.getData());
+ return Added;
}
bool WindowsResourceParser::TreeNode::addDataChild(
uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion,
- uint32_t Characteristics, uint32_t Origin, TreeNode *&Result) {
- auto NewChild =
- createDataNode(MajorVersion, MinorVersion, Characteristics, Origin);
+ uint32_t Characteristics, uint32_t Origin, uint32_t DataIndex,
+ TreeNode *&Result) {
+ auto NewChild = createDataNode(MajorVersion, MinorVersion, Characteristics,
+ Origin, DataIndex);
auto ElementInserted = IDChildren.emplace(ID, std::move(NewChild));
Result = ElementInserted.first->second.get();
return ElementInserted.second;
@@ -342,16 +532,15 @@ WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addIDChild(
return *(Child->second);
}
-WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addNameChild(ArrayRef<UTF16> NameRef,
- bool &IsNewString) {
+WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addNameChild(
+ ArrayRef<UTF16> NameRef, std::vector<std::vector<UTF16>> &StringTable) {
std::string NameString;
convertUTF16LEToUTF8String(NameRef, NameString);
auto Child = StringChildren.find(NameString);
if (Child == StringChildren.end()) {
- auto NewChild = createStringNode();
- IsNewString = true;
+ auto NewChild = createStringNode(StringTable.size());
+ StringTable.push_back(NameRef);
WindowsResourceParser::TreeNode &Node = *NewChild;
StringChildren.emplace(NameString, std::move(NewChild));
return Node;
@@ -396,6 +585,19 @@ uint32_t WindowsResourceParser::TreeNode::getTreeSize() const {
return Size;
}
+// Shift DataIndex of all data children with an Index greater or equal to the
+// given one, to fill a gap from removing an entry from the Data vector.
+void WindowsResourceParser::TreeNode::shiftDataIndexDown(uint32_t Index) {
+ if (IsDataNode && DataIndex >= Index) {
+ DataIndex--;
+ } else {
+ for (auto &Child : IDChildren)
+ Child.second->shiftDataIndexDown(Index);
+ for (auto &Child : StringChildren)
+ Child.second->shiftDataIndexDown(Index);
+ }
+}
+
class WindowsResourceCOFFWriter {
public:
WindowsResourceCOFFWriter(COFF::MachineTypes MachineType,
@@ -515,6 +717,14 @@ WindowsResourceCOFFWriter::write(uint32_t TimeDateStamp) {
return std::move(OutputBuffer);
}
+// According to COFF specification, if the Src has a size equal to Dest,
+// it's okay to *not* copy the trailing zero.
+static void coffnamecpy(char (&Dest)[COFF::NameSize], StringRef Src) {
+ assert(Src.size() <= COFF::NameSize &&
+ "Src is not larger than COFF::NameSize");
+ strncpy(Dest, Src.data(), (size_t)COFF::NameSize);
+}
+
void WindowsResourceCOFFWriter::writeCOFFHeader(uint32_t TimeDateStamp) {
// Write the COFF header.
auto *Header = reinterpret_cast<coff_file_header *>(BufferStart);
@@ -534,7 +744,7 @@ void WindowsResourceCOFFWriter::writeFirstSectionHeader() {
CurrentOffset += sizeof(coff_file_header);
auto *SectionOneHeader =
reinterpret_cast<coff_section *>(BufferStart + CurrentOffset);
- strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)COFF::NameSize);
+ coffnamecpy(SectionOneHeader->Name, ".rsrc$01");
SectionOneHeader->VirtualSize = 0;
SectionOneHeader->VirtualAddress = 0;
SectionOneHeader->SizeOfRawData = SectionOneSize;
@@ -552,7 +762,7 @@ void WindowsResourceCOFFWriter::writeSecondSectionHeader() {
CurrentOffset += sizeof(coff_section);
auto *SectionTwoHeader =
reinterpret_cast<coff_section *>(BufferStart + CurrentOffset);
- strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)COFF::NameSize);
+ coffnamecpy(SectionTwoHeader->Name, ".rsrc$02");
SectionTwoHeader->VirtualSize = 0;
SectionTwoHeader->VirtualAddress = 0;
SectionTwoHeader->SizeOfRawData = SectionTwoSize;
@@ -590,7 +800,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() {
// Now write the symbol table.
// First, the feat symbol.
auto *Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
- strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)COFF::NameSize);
+ coffnamecpy(Symbol->Name.ShortName, "@feat.00");
Symbol->Value = 0x11;
Symbol->SectionNumber = 0xffff;
Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
@@ -600,7 +810,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() {
// Now write the .rsrc1 symbol + aux.
Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
- strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)COFF::NameSize);
+ coffnamecpy(Symbol->Name.ShortName, ".rsrc$01");
Symbol->Value = 0;
Symbol->SectionNumber = 1;
Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
@@ -619,7 +829,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() {
// Now write the .rsrc2 symbol + aux.
Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
- strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)COFF::NameSize);
+ coffnamecpy(Symbol->Name.ShortName, ".rsrc$02");
Symbol->Value = 0;
Symbol->SectionNumber = 2;
Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
@@ -640,7 +850,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() {
for (unsigned i = 0; i < Data.size(); i++) {
auto RelocationName = formatv("$R{0:X-6}", i & 0xffffff).sstr<COFF::NameSize>();
Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
- memcpy(Symbol->Name.ShortName, RelocationName.data(), (size_t) COFF::NameSize);
+ coffnamecpy(Symbol->Name.ShortName, RelocationName);
Symbol->Value = DataOffsets[i];
Symbol->SectionNumber = 2;
Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
diff --git a/lib/Object/XCOFFObjectFile.cpp b/lib/Object/XCOFFObjectFile.cpp
index 602b7357986a..98782c2701c1 100644
--- a/lib/Object/XCOFFObjectFile.cpp
+++ b/lib/Object/XCOFFObjectFile.cpp
@@ -11,17 +11,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/XCOFFObjectFile.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include <cstddef>
#include <cstring>
namespace llvm {
namespace object {
+enum { FUNCTION_SYM = 0x20, SYM_TYPE_MASK = 0x07, RELOC_OVERFLOW = 65535 };
+
// Checks that [Ptr, Ptr + Size) bytes fall inside the memory buffer
// 'M'. Returns a pointer to the underlying object on success.
template <typename T>
@@ -42,10 +39,25 @@ template <typename T> static const T *viewAs(uintptr_t in) {
return reinterpret_cast<const T *>(in);
}
-static StringRef generateStringRef(const char *Name, uint64_t Size) {
- auto NulCharPtr = static_cast<const char *>(memchr(Name, '\0', Size));
+static StringRef generateXCOFFFixedNameStringRef(const char *Name) {
+ auto NulCharPtr =
+ static_cast<const char *>(memchr(Name, '\0', XCOFF::NameSize));
return NulCharPtr ? StringRef(Name, NulCharPtr - Name)
- : StringRef(Name, Size);
+ : StringRef(Name, XCOFF::NameSize);
+}
+
+bool XCOFFRelocation32::isRelocationSigned() const {
+ return Info & XR_SIGN_INDICATOR_MASK;
+}
+
+bool XCOFFRelocation32::isFixupIndicated() const {
+ return Info & XR_FIXUP_INDICATOR_MASK;
+}
+
+uint8_t XCOFFRelocation32::getRelocatedLength() const {
+ // The relocation encodes the bit length being relocated minus 1. Add back
+ // the 1 to get the actual length being relocated.
+ return (Info & XR_BIASED_LENGTH_MASK) + 1;
}
void XCOFFObjectFile::checkSectionAddress(uintptr_t Addr,
@@ -83,6 +95,9 @@ XCOFFObjectFile::toSection64(DataRefImpl Ref) const {
const XCOFFSymbolEntry *XCOFFObjectFile::toSymbolEntry(DataRefImpl Ref) const {
assert(!is64Bit() && "Symbol table support not implemented for 64-bit.");
assert(Ref.p != 0 && "Symbol table pointer can not be nullptr!");
+#ifndef NDEBUG
+ checkSymbolEntryPointer(Ref.p);
+#endif
auto SymEntPtr = viewAs<XCOFFSymbolEntry>(Ref.p);
return SymEntPtr;
}
@@ -112,23 +127,19 @@ XCOFFObjectFile::sectionHeaderTable64() const {
void XCOFFObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
SymEntPtr += SymEntPtr->NumberOfAuxEntries + 1;
+#ifndef NDEBUG
+ // This function is used by basic_symbol_iterator, which allows to
+ // point to the end-of-symbol-table address.
+ if (reinterpret_cast<uintptr_t>(SymEntPtr) != getEndOfSymbolTableAddress())
+ checkSymbolEntryPointer(reinterpret_cast<uintptr_t>(SymEntPtr));
+#endif
Symb.p = reinterpret_cast<uintptr_t>(SymEntPtr);
}
-Expected<StringRef> XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const {
- const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
-
- if (SymEntPtr->NameInStrTbl.Magic != XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC)
- return generateStringRef(SymEntPtr->SymbolName, XCOFF::SymbolNameSize);
-
- // A storage class value with the high-order bit on indicates that the name is
- // a symbolic debugger stabstring.
- if (SymEntPtr->StorageClass & 0x80)
- return StringRef("Unimplemented Debug Name");
-
- uint32_t Offset = SymEntPtr->NameInStrTbl.Offset;
- // The byte offset is relative to the start of the string table
- // or .debug section. A byte offset value of 0 is a null or zero-length symbol
+Expected<StringRef>
+XCOFFObjectFile::getStringTableEntry(uint32_t Offset) const {
+ // The byte offset is relative to the start of the string table.
+ // A byte offset value of 0 is a null or zero-length symbol
// name. A byte offset in the range 1 to 3 (inclusive) points into the length
// field; as a soft-error recovery mechanism, we treat such cases as having an
// offset of 0.
@@ -138,10 +149,32 @@ Expected<StringRef> XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const {
if (StringTable.Data != nullptr && StringTable.Size > Offset)
return (StringTable.Data + Offset);
- return make_error<GenericBinaryError>("Symbol Name parse failed",
+ return make_error<GenericBinaryError>("Bad offset for string table entry",
object_error::parse_failed);
}
+Expected<StringRef>
+XCOFFObjectFile::getCFileName(const XCOFFFileAuxEnt *CFileEntPtr) const {
+ if (CFileEntPtr->NameInStrTbl.Magic !=
+ XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC)
+ return generateXCOFFFixedNameStringRef(CFileEntPtr->Name);
+ return getStringTableEntry(CFileEntPtr->NameInStrTbl.Offset);
+}
+
+Expected<StringRef> XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const {
+ const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
+
+ // A storage class value with the high-order bit on indicates that the name is
+ // a symbolic debugger stabstring.
+ if (SymEntPtr->StorageClass & 0x80)
+ return StringRef("Unimplemented Debug Name");
+
+ if (SymEntPtr->NameInStrTbl.Magic != XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC)
+ return generateXCOFFFixedNameStringRef(SymEntPtr->SymbolName);
+
+ return getStringTableEntry(SymEntPtr->NameInStrTbl.Offset);
+}
+
Expected<uint64_t> XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
uint64_t Result = 0;
llvm_unreachable("Not yet implemented!");
@@ -149,6 +182,7 @@ Expected<uint64_t> XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
}
uint64_t XCOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
+ assert(!is64Bit() && "Symbol table support not implemented for 64-bit.");
return toSymbolEntry(Symb)->Value;
}
@@ -185,7 +219,7 @@ void XCOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const {
}
Expected<StringRef> XCOFFObjectFile::getSectionName(DataRefImpl Sec) const {
- return generateStringRef(getSectionNameInternal(Sec), XCOFF::SectionNameSize);
+ return generateXCOFFFixedNameStringRef(getSectionNameInternal(Sec));
}
uint64_t XCOFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
@@ -393,8 +427,8 @@ XCOFFObjectFile::getSymbolSectionName(const XCOFFSymbolEntry *SymEntPtr) const {
default:
Expected<DataRefImpl> SecRef = getSectionByNum(SectionNum);
if (SecRef)
- return generateStringRef(getSectionNameInternal(SecRef.get()),
- XCOFF::SectionNameSize);
+ return generateXCOFFFixedNameStringRef(
+ getSectionNameInternal(SecRef.get()));
return SecRef.takeError();
}
}
@@ -442,6 +476,48 @@ uint32_t XCOFFObjectFile::getNumberOfSymbolTableEntries64() const {
return fileHeader64()->NumberOfSymTableEntries;
}
+uintptr_t XCOFFObjectFile::getEndOfSymbolTableAddress() const {
+ uint32_t NumberOfSymTableEntries =
+ is64Bit() ? getNumberOfSymbolTableEntries64()
+ : getLogicalNumberOfSymbolTableEntries32();
+ return getWithOffset(reinterpret_cast<uintptr_t>(SymbolTblPtr),
+ XCOFF::SymbolTableEntrySize * NumberOfSymTableEntries);
+}
+
+void XCOFFObjectFile::checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const {
+ if (SymbolEntPtr < reinterpret_cast<uintptr_t>(SymbolTblPtr))
+ report_fatal_error("Symbol table entry is outside of symbol table.");
+
+ if (SymbolEntPtr >= getEndOfSymbolTableAddress())
+ report_fatal_error("Symbol table entry is outside of symbol table.");
+
+ ptrdiff_t Offset = reinterpret_cast<const char *>(SymbolEntPtr) -
+ reinterpret_cast<const char *>(SymbolTblPtr);
+
+ if (Offset % XCOFF::SymbolTableEntrySize != 0)
+ report_fatal_error(
+ "Symbol table entry position is not valid inside of symbol table.");
+}
+
+uint32_t XCOFFObjectFile::getSymbolIndex(uintptr_t SymbolEntPtr) const {
+ return (reinterpret_cast<const char *>(SymbolEntPtr) -
+ reinterpret_cast<const char *>(SymbolTblPtr)) /
+ XCOFF::SymbolTableEntrySize;
+}
+
+Expected<StringRef>
+XCOFFObjectFile::getSymbolNameByIndex(uint32_t Index) const {
+ if (is64Bit())
+ report_fatal_error("64-bit symbol table support not implemented yet.");
+
+ if (Index >= getLogicalNumberOfSymbolTableEntries32())
+ return errorCodeToError(object_error::invalid_symbol_index);
+
+ DataRefImpl SymDRI;
+ SymDRI.p = reinterpret_cast<uintptr_t>(getPointerToSymbolTable() + Index);
+ return getSymbolName(SymDRI);
+}
+
uint16_t XCOFFObjectFile::getFlags() const {
return is64Bit() ? fileHeader64()->Flags : fileHeader32()->Flags;
}
@@ -477,6 +553,46 @@ ArrayRef<XCOFFSectionHeader32> XCOFFObjectFile::sections32() const {
TablePtr + getNumberOfSections());
}
+// In an XCOFF32 file, when the field value is 65535, then an STYP_OVRFLO
+// section header contains the actual count of relocation entries in the s_paddr
+// field. STYP_OVRFLO headers contain the section index of their corresponding
+// sections as their raw "NumberOfRelocations" field value.
+Expected<uint32_t> XCOFFObjectFile::getLogicalNumberOfRelocationEntries(
+ const XCOFFSectionHeader32 &Sec) const {
+
+ uint16_t SectionIndex = &Sec - sectionHeaderTable32() + 1;
+
+ if (Sec.NumberOfRelocations < RELOC_OVERFLOW)
+ return Sec.NumberOfRelocations;
+ for (const auto &Sec : sections32()) {
+ if (Sec.Flags == XCOFF::STYP_OVRFLO &&
+ Sec.NumberOfRelocations == SectionIndex)
+ return Sec.PhysicalAddress;
+ }
+ return errorCodeToError(object_error::parse_failed);
+}
+
+Expected<ArrayRef<XCOFFRelocation32>>
+XCOFFObjectFile::relocations(const XCOFFSectionHeader32 &Sec) const {
+ uintptr_t RelocAddr = getWithOffset(reinterpret_cast<uintptr_t>(FileHeader),
+ Sec.FileOffsetToRelocationInfo);
+ auto NumRelocEntriesOrErr = getLogicalNumberOfRelocationEntries(Sec);
+ if (Error E = NumRelocEntriesOrErr.takeError())
+ return std::move(E);
+
+ uint32_t NumRelocEntries = NumRelocEntriesOrErr.get();
+
+ auto RelocationOrErr =
+ getObject<XCOFFRelocation32>(Data, reinterpret_cast<void *>(RelocAddr),
+ NumRelocEntries * sizeof(XCOFFRelocation32));
+ if (Error E = RelocationOrErr.takeError())
+ return std::move(E);
+
+ const XCOFFRelocation32 *StartReloc = RelocationOrErr.get();
+
+ return ArrayRef<XCOFFRelocation32>(StartReloc, StartReloc + NumRelocEntries);
+}
+
Expected<XCOFFStringTable>
XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) {
// If there is a string table, then the buffer must contain at least 4 bytes
@@ -507,7 +623,7 @@ XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) {
Expected<std::unique_ptr<XCOFFObjectFile>>
XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) {
- // Can't use make_unique because of the private constructor.
+ // Can't use std::make_unique because of the private constructor.
std::unique_ptr<XCOFFObjectFile> Obj;
Obj.reset(new XCOFFObjectFile(Type, MBR));
@@ -573,11 +689,77 @@ ObjectFile::createXCOFFObjectFile(MemoryBufferRef MemBufRef,
}
StringRef XCOFFSectionHeader32::getName() const {
- return generateStringRef(Name, XCOFF::SectionNameSize);
+ return generateXCOFFFixedNameStringRef(Name);
}
StringRef XCOFFSectionHeader64::getName() const {
- return generateStringRef(Name, XCOFF::SectionNameSize);
+ return generateXCOFFFixedNameStringRef(Name);
+}
+
+XCOFF::StorageClass XCOFFSymbolRef::getStorageClass() const {
+ return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->StorageClass;
+}
+
+uint8_t XCOFFSymbolRef::getNumberOfAuxEntries() const {
+ return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->NumberOfAuxEntries;
+}
+
+const XCOFFCsectAuxEnt32 *XCOFFSymbolRef::getXCOFFCsectAuxEnt32() const {
+ assert(!OwningObjectPtr->is64Bit() &&
+ "32-bit interface called on 64-bit object file.");
+ assert(hasCsectAuxEnt() && "No Csect Auxiliary Entry is found.");
+
+ // In XCOFF32, the csect auxilliary entry is always the last auxiliary
+ // entry for the symbol.
+ uintptr_t AuxAddr = getWithOffset(
+ SymEntDataRef.p, XCOFF::SymbolTableEntrySize * getNumberOfAuxEntries());
+
+#ifndef NDEBUG
+ OwningObjectPtr->checkSymbolEntryPointer(AuxAddr);
+#endif
+
+ return reinterpret_cast<const XCOFFCsectAuxEnt32 *>(AuxAddr);
+}
+
+uint16_t XCOFFSymbolRef::getType() const {
+ return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->SymbolType;
+}
+
+int16_t XCOFFSymbolRef::getSectionNumber() const {
+ return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->SectionNumber;
+}
+
+bool XCOFFSymbolRef::hasCsectAuxEnt() const {
+ XCOFF::StorageClass SC = getStorageClass();
+ return (SC == XCOFF::C_EXT || SC == XCOFF::C_WEAKEXT ||
+ SC == XCOFF::C_HIDEXT);
+}
+
+bool XCOFFSymbolRef::isFunction() const {
+ if (OwningObjectPtr->is64Bit())
+ report_fatal_error("64-bit support is unimplemented yet.");
+
+ if (getType() & FUNCTION_SYM)
+ return true;
+
+ if (!hasCsectAuxEnt())
+ return false;
+
+ const XCOFFCsectAuxEnt32 *CsectAuxEnt = getXCOFFCsectAuxEnt32();
+
+ // A function definition should be a label definition.
+ if ((CsectAuxEnt->SymbolAlignmentAndType & SYM_TYPE_MASK) != XCOFF::XTY_LD)
+ return false;
+
+ if (CsectAuxEnt->StorageMappingClass != XCOFF::XMC_PR)
+ return false;
+
+ int16_t SectNum = getSectionNumber();
+ Expected<DataRefImpl> SI = OwningObjectPtr->getSectionByNum(SectNum);
+ if (!SI)
+ return false;
+
+ return (OwningObjectPtr->getSectionFlags(SI.get()) & XCOFF::STYP_TEXT);
}
} // namespace object
diff --git a/lib/ObjectYAML/COFFEmitter.cpp b/lib/ObjectYAML/COFFEmitter.cpp
new file mode 100644
index 000000000000..efcdc51e1670
--- /dev/null
+++ b/lib/ObjectYAML/COFFEmitter.cpp
@@ -0,0 +1,622 @@
+//===- yaml2coff - Convert YAML to a COFF object file ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// The COFF component of yaml2obj.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/ObjectYAML/ObjectYAML.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+/// This parses a yaml stream that represents a COFF object file.
+/// See docs/yaml2obj for the yaml scheema.
+struct COFFParser {
+ COFFParser(COFFYAML::Object &Obj, yaml::ErrorHandler EH)
+ : Obj(Obj), SectionTableStart(0), SectionTableSize(0), ErrHandler(EH) {
+ // A COFF string table always starts with a 4 byte size field. Offsets into
+ // it include this size, so allocate it now.
+ StringTable.append(4, char(0));
+ }
+
+ bool useBigObj() const {
+ return static_cast<int32_t>(Obj.Sections.size()) >
+ COFF::MaxNumberOfSections16;
+ }
+
+ bool isPE() const { return Obj.OptionalHeader.hasValue(); }
+ bool is64Bit() const {
+ return Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 ||
+ Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64;
+ }
+
+ uint32_t getFileAlignment() const {
+ return Obj.OptionalHeader->Header.FileAlignment;
+ }
+
+ unsigned getHeaderSize() const {
+ return useBigObj() ? COFF::Header32Size : COFF::Header16Size;
+ }
+
+ unsigned getSymbolSize() const {
+ return useBigObj() ? COFF::Symbol32Size : COFF::Symbol16Size;
+ }
+
+ bool parseSections() {
+ for (std::vector<COFFYAML::Section>::iterator i = Obj.Sections.begin(),
+ e = Obj.Sections.end();
+ i != e; ++i) {
+ COFFYAML::Section &Sec = *i;
+
+ // If the name is less than 8 bytes, store it in place, otherwise
+ // store it in the string table.
+ StringRef Name = Sec.Name;
+
+ if (Name.size() <= COFF::NameSize) {
+ std::copy(Name.begin(), Name.end(), Sec.Header.Name);
+ } else {
+ // Add string to the string table and format the index for output.
+ unsigned Index = getStringIndex(Name);
+ std::string str = utostr(Index);
+ if (str.size() > 7) {
+ ErrHandler("string table got too large");
+ return false;
+ }
+ Sec.Header.Name[0] = '/';
+ std::copy(str.begin(), str.end(), Sec.Header.Name + 1);
+ }
+
+ if (Sec.Alignment) {
+ if (Sec.Alignment > 8192) {
+ ErrHandler("section alignment is too large");
+ return false;
+ }
+ if (!isPowerOf2_32(Sec.Alignment)) {
+ ErrHandler("section alignment is not a power of 2");
+ return false;
+ }
+ Sec.Header.Characteristics |= (Log2_32(Sec.Alignment) + 1) << 20;
+ }
+ }
+ return true;
+ }
+
+ bool parseSymbols() {
+ for (std::vector<COFFYAML::Symbol>::iterator i = Obj.Symbols.begin(),
+ e = Obj.Symbols.end();
+ i != e; ++i) {
+ COFFYAML::Symbol &Sym = *i;
+
+ // If the name is less than 8 bytes, store it in place, otherwise
+ // store it in the string table.
+ StringRef Name = Sym.Name;
+ if (Name.size() <= COFF::NameSize) {
+ std::copy(Name.begin(), Name.end(), Sym.Header.Name);
+ } else {
+ // Add string to the string table and format the index for output.
+ unsigned Index = getStringIndex(Name);
+ *reinterpret_cast<support::aligned_ulittle32_t *>(Sym.Header.Name + 4) =
+ Index;
+ }
+
+ Sym.Header.Type = Sym.SimpleType;
+ Sym.Header.Type |= Sym.ComplexType << COFF::SCT_COMPLEX_TYPE_SHIFT;
+ }
+ return true;
+ }
+
+ bool parse() {
+ if (!parseSections())
+ return false;
+ if (!parseSymbols())
+ return false;
+ return true;
+ }
+
+ unsigned getStringIndex(StringRef Str) {
+ StringMap<unsigned>::iterator i = StringTableMap.find(Str);
+ if (i == StringTableMap.end()) {
+ unsigned Index = StringTable.size();
+ StringTable.append(Str.begin(), Str.end());
+ StringTable.push_back(0);
+ StringTableMap[Str] = Index;
+ return Index;
+ }
+ return i->second;
+ }
+
+ COFFYAML::Object &Obj;
+
+ codeview::StringsAndChecksums StringsAndChecksums;
+ BumpPtrAllocator Allocator;
+ StringMap<unsigned> StringTableMap;
+ std::string StringTable;
+ uint32_t SectionTableStart;
+ uint32_t SectionTableSize;
+
+ yaml::ErrorHandler ErrHandler;
+};
+
+enum { DOSStubSize = 128 };
+
+} // end anonymous namespace
+
+// Take a CP and assign addresses and sizes to everything. Returns false if the
+// layout is not valid to do.
+static bool layoutOptionalHeader(COFFParser &CP) {
+ if (!CP.isPE())
+ return true;
+ unsigned PEHeaderSize = CP.is64Bit() ? sizeof(object::pe32plus_header)
+ : sizeof(object::pe32_header);
+ CP.Obj.Header.SizeOfOptionalHeader =
+ PEHeaderSize +
+ sizeof(object::data_directory) * (COFF::NUM_DATA_DIRECTORIES + 1);
+ return true;
+}
+
+static yaml::BinaryRef
+toDebugS(ArrayRef<CodeViewYAML::YAMLDebugSubsection> Subsections,
+ const codeview::StringsAndChecksums &SC, BumpPtrAllocator &Allocator) {
+ using namespace codeview;
+ ExitOnError Err("Error occurred writing .debug$S section");
+ auto CVSS =
+ Err(CodeViewYAML::toCodeViewSubsectionList(Allocator, Subsections, SC));
+
+ std::vector<DebugSubsectionRecordBuilder> Builders;
+ uint32_t Size = sizeof(uint32_t);
+ for (auto &SS : CVSS) {
+ DebugSubsectionRecordBuilder B(SS, CodeViewContainer::ObjectFile);
+ Size += B.calculateSerializedLength();
+ Builders.push_back(std::move(B));
+ }
+ uint8_t *Buffer = Allocator.Allocate<uint8_t>(Size);
+ MutableArrayRef<uint8_t> Output(Buffer, Size);
+ BinaryStreamWriter Writer(Output, support::little);
+
+ Err(Writer.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC));
+ for (const auto &B : Builders) {
+ Err(B.commit(Writer));
+ }
+ return {Output};
+}
+
+// Take a CP and assign addresses and sizes to everything. Returns false if the
+// layout is not valid to do.
+static bool layoutCOFF(COFFParser &CP) {
+ // The section table starts immediately after the header, including the
+ // optional header.
+ CP.SectionTableStart =
+ CP.getHeaderSize() + CP.Obj.Header.SizeOfOptionalHeader;
+ if (CP.isPE())
+ CP.SectionTableStart += DOSStubSize + sizeof(COFF::PEMagic);
+ CP.SectionTableSize = COFF::SectionSize * CP.Obj.Sections.size();
+
+ uint32_t CurrentSectionDataOffset =
+ CP.SectionTableStart + CP.SectionTableSize;
+
+ for (COFFYAML::Section &S : CP.Obj.Sections) {
+ // We support specifying exactly one of SectionData or Subsections. So if
+ // there is already some SectionData, then we don't need to do any of this.
+ if (S.Name == ".debug$S" && S.SectionData.binary_size() == 0) {
+ CodeViewYAML::initializeStringsAndChecksums(S.DebugS,
+ CP.StringsAndChecksums);
+ if (CP.StringsAndChecksums.hasChecksums() &&
+ CP.StringsAndChecksums.hasStrings())
+ break;
+ }
+ }
+
+ // Assign each section data address consecutively.
+ for (COFFYAML::Section &S : CP.Obj.Sections) {
+ if (S.Name == ".debug$S") {
+ if (S.SectionData.binary_size() == 0) {
+ assert(CP.StringsAndChecksums.hasStrings() &&
+ "Object file does not have debug string table!");
+
+ S.SectionData =
+ toDebugS(S.DebugS, CP.StringsAndChecksums, CP.Allocator);
+ }
+ } else if (S.Name == ".debug$T") {
+ if (S.SectionData.binary_size() == 0)
+ S.SectionData = CodeViewYAML::toDebugT(S.DebugT, CP.Allocator, S.Name);
+ } else if (S.Name == ".debug$P") {
+ if (S.SectionData.binary_size() == 0)
+ S.SectionData = CodeViewYAML::toDebugT(S.DebugP, CP.Allocator, S.Name);
+ } else if (S.Name == ".debug$H") {
+ if (S.DebugH.hasValue() && S.SectionData.binary_size() == 0)
+ S.SectionData = CodeViewYAML::toDebugH(*S.DebugH, CP.Allocator);
+ }
+
+ if (S.SectionData.binary_size() > 0) {
+ CurrentSectionDataOffset = alignTo(CurrentSectionDataOffset,
+ CP.isPE() ? CP.getFileAlignment() : 4);
+ S.Header.SizeOfRawData = S.SectionData.binary_size();
+ if (CP.isPE())
+ S.Header.SizeOfRawData =
+ alignTo(S.Header.SizeOfRawData, CP.getFileAlignment());
+ S.Header.PointerToRawData = CurrentSectionDataOffset;
+ CurrentSectionDataOffset += S.Header.SizeOfRawData;
+ if (!S.Relocations.empty()) {
+ S.Header.PointerToRelocations = CurrentSectionDataOffset;
+ S.Header.NumberOfRelocations = S.Relocations.size();
+ CurrentSectionDataOffset +=
+ S.Header.NumberOfRelocations * COFF::RelocationSize;
+ }
+ } else {
+ // Leave SizeOfRawData unaltered. For .bss sections in object files, it
+ // carries the section size.
+ S.Header.PointerToRawData = 0;
+ }
+ }
+
+ uint32_t SymbolTableStart = CurrentSectionDataOffset;
+
+ // Calculate number of symbols.
+ uint32_t NumberOfSymbols = 0;
+ for (std::vector<COFFYAML::Symbol>::iterator i = CP.Obj.Symbols.begin(),
+ e = CP.Obj.Symbols.end();
+ i != e; ++i) {
+ uint32_t NumberOfAuxSymbols = 0;
+ if (i->FunctionDefinition)
+ NumberOfAuxSymbols += 1;
+ if (i->bfAndefSymbol)
+ NumberOfAuxSymbols += 1;
+ if (i->WeakExternal)
+ NumberOfAuxSymbols += 1;
+ if (!i->File.empty())
+ NumberOfAuxSymbols +=
+ (i->File.size() + CP.getSymbolSize() - 1) / CP.getSymbolSize();
+ if (i->SectionDefinition)
+ NumberOfAuxSymbols += 1;
+ if (i->CLRToken)
+ NumberOfAuxSymbols += 1;
+ i->Header.NumberOfAuxSymbols = NumberOfAuxSymbols;
+ NumberOfSymbols += 1 + NumberOfAuxSymbols;
+ }
+
+ // Store all the allocated start addresses in the header.
+ CP.Obj.Header.NumberOfSections = CP.Obj.Sections.size();
+ CP.Obj.Header.NumberOfSymbols = NumberOfSymbols;
+ if (NumberOfSymbols > 0 || CP.StringTable.size() > 4)
+ CP.Obj.Header.PointerToSymbolTable = SymbolTableStart;
+ else
+ CP.Obj.Header.PointerToSymbolTable = 0;
+
+ *reinterpret_cast<support::ulittle32_t *>(&CP.StringTable[0]) =
+ CP.StringTable.size();
+
+ return true;
+}
+
+template <typename value_type> struct binary_le_impl {
+ value_type Value;
+ binary_le_impl(value_type V) : Value(V) {}
+};
+
+template <typename value_type>
+raw_ostream &operator<<(raw_ostream &OS,
+ const binary_le_impl<value_type> &BLE) {
+ char Buffer[sizeof(BLE.Value)];
+ support::endian::write<value_type, support::little, support::unaligned>(
+ Buffer, BLE.Value);
+ OS.write(Buffer, sizeof(BLE.Value));
+ return OS;
+}
+
+template <typename value_type>
+binary_le_impl<value_type> binary_le(value_type V) {
+ return binary_le_impl<value_type>(V);
+}
+
+template <size_t NumBytes> struct zeros_impl {};
+
+template <size_t NumBytes>
+raw_ostream &operator<<(raw_ostream &OS, const zeros_impl<NumBytes> &) {
+ char Buffer[NumBytes];
+ memset(Buffer, 0, sizeof(Buffer));
+ OS.write(Buffer, sizeof(Buffer));
+ return OS;
+}
+
+template <typename T> zeros_impl<sizeof(T)> zeros(const T &) {
+ return zeros_impl<sizeof(T)>();
+}
+
+template <typename T>
+static uint32_t initializeOptionalHeader(COFFParser &CP, uint16_t Magic,
+ T Header) {
+ memset(Header, 0, sizeof(*Header));
+ Header->Magic = Magic;
+ Header->SectionAlignment = CP.Obj.OptionalHeader->Header.SectionAlignment;
+ Header->FileAlignment = CP.Obj.OptionalHeader->Header.FileAlignment;
+ uint32_t SizeOfCode = 0, SizeOfInitializedData = 0,
+ SizeOfUninitializedData = 0;
+ uint32_t SizeOfHeaders = alignTo(CP.SectionTableStart + CP.SectionTableSize,
+ Header->FileAlignment);
+ uint32_t SizeOfImage = alignTo(SizeOfHeaders, Header->SectionAlignment);
+ uint32_t BaseOfData = 0;
+ for (const COFFYAML::Section &S : CP.Obj.Sections) {
+ if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_CODE)
+ SizeOfCode += S.Header.SizeOfRawData;
+ if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+ SizeOfInitializedData += S.Header.SizeOfRawData;
+ if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+ SizeOfUninitializedData += S.Header.SizeOfRawData;
+ if (S.Name.equals(".text"))
+ Header->BaseOfCode = S.Header.VirtualAddress; // RVA
+ else if (S.Name.equals(".data"))
+ BaseOfData = S.Header.VirtualAddress; // RVA
+ if (S.Header.VirtualAddress)
+ SizeOfImage += alignTo(S.Header.VirtualSize, Header->SectionAlignment);
+ }
+ Header->SizeOfCode = SizeOfCode;
+ Header->SizeOfInitializedData = SizeOfInitializedData;
+ Header->SizeOfUninitializedData = SizeOfUninitializedData;
+ Header->AddressOfEntryPoint =
+ CP.Obj.OptionalHeader->Header.AddressOfEntryPoint; // RVA
+ Header->ImageBase = CP.Obj.OptionalHeader->Header.ImageBase;
+ Header->MajorOperatingSystemVersion =
+ CP.Obj.OptionalHeader->Header.MajorOperatingSystemVersion;
+ Header->MinorOperatingSystemVersion =
+ CP.Obj.OptionalHeader->Header.MinorOperatingSystemVersion;
+ Header->MajorImageVersion = CP.Obj.OptionalHeader->Header.MajorImageVersion;
+ Header->MinorImageVersion = CP.Obj.OptionalHeader->Header.MinorImageVersion;
+ Header->MajorSubsystemVersion =
+ CP.Obj.OptionalHeader->Header.MajorSubsystemVersion;
+ Header->MinorSubsystemVersion =
+ CP.Obj.OptionalHeader->Header.MinorSubsystemVersion;
+ Header->SizeOfImage = SizeOfImage;
+ Header->SizeOfHeaders = SizeOfHeaders;
+ Header->Subsystem = CP.Obj.OptionalHeader->Header.Subsystem;
+ Header->DLLCharacteristics = CP.Obj.OptionalHeader->Header.DLLCharacteristics;
+ Header->SizeOfStackReserve = CP.Obj.OptionalHeader->Header.SizeOfStackReserve;
+ Header->SizeOfStackCommit = CP.Obj.OptionalHeader->Header.SizeOfStackCommit;
+ Header->SizeOfHeapReserve = CP.Obj.OptionalHeader->Header.SizeOfHeapReserve;
+ Header->SizeOfHeapCommit = CP.Obj.OptionalHeader->Header.SizeOfHeapCommit;
+ Header->NumberOfRvaAndSize = COFF::NUM_DATA_DIRECTORIES + 1;
+ return BaseOfData;
+}
+
+static bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
+ if (CP.isPE()) {
+ // PE files start with a DOS stub.
+ object::dos_header DH;
+ memset(&DH, 0, sizeof(DH));
+
+ // DOS EXEs start with "MZ" magic.
+ DH.Magic[0] = 'M';
+ DH.Magic[1] = 'Z';
+ // Initializing the AddressOfRelocationTable is strictly optional but
+ // mollifies certain tools which expect it to have a value greater than
+ // 0x40.
+ DH.AddressOfRelocationTable = sizeof(DH);
+ // This is the address of the PE signature.
+ DH.AddressOfNewExeHeader = DOSStubSize;
+
+ // Write out our DOS stub.
+ OS.write(reinterpret_cast<char *>(&DH), sizeof(DH));
+ // Write padding until we reach the position of where our PE signature
+ // should live.
+ OS.write_zeros(DOSStubSize - sizeof(DH));
+ // Write out the PE signature.
+ OS.write(COFF::PEMagic, sizeof(COFF::PEMagic));
+ }
+ if (CP.useBigObj()) {
+ OS << binary_le(static_cast<uint16_t>(COFF::IMAGE_FILE_MACHINE_UNKNOWN))
+ << binary_le(static_cast<uint16_t>(0xffff))
+ << binary_le(
+ static_cast<uint16_t>(COFF::BigObjHeader::MinBigObjectVersion))
+ << binary_le(CP.Obj.Header.Machine)
+ << binary_le(CP.Obj.Header.TimeDateStamp);
+ OS.write(COFF::BigObjMagic, sizeof(COFF::BigObjMagic));
+ OS << zeros(uint32_t(0)) << zeros(uint32_t(0)) << zeros(uint32_t(0))
+ << zeros(uint32_t(0)) << binary_le(CP.Obj.Header.NumberOfSections)
+ << binary_le(CP.Obj.Header.PointerToSymbolTable)
+ << binary_le(CP.Obj.Header.NumberOfSymbols);
+ } else {
+ OS << binary_le(CP.Obj.Header.Machine)
+ << binary_le(static_cast<int16_t>(CP.Obj.Header.NumberOfSections))
+ << binary_le(CP.Obj.Header.TimeDateStamp)
+ << binary_le(CP.Obj.Header.PointerToSymbolTable)
+ << binary_le(CP.Obj.Header.NumberOfSymbols)
+ << binary_le(CP.Obj.Header.SizeOfOptionalHeader)
+ << binary_le(CP.Obj.Header.Characteristics);
+ }
+ if (CP.isPE()) {
+ if (CP.is64Bit()) {
+ object::pe32plus_header PEH;
+ initializeOptionalHeader(CP, COFF::PE32Header::PE32_PLUS, &PEH);
+ OS.write(reinterpret_cast<char *>(&PEH), sizeof(PEH));
+ } else {
+ object::pe32_header PEH;
+ uint32_t BaseOfData =
+ initializeOptionalHeader(CP, COFF::PE32Header::PE32, &PEH);
+ PEH.BaseOfData = BaseOfData;
+ OS.write(reinterpret_cast<char *>(&PEH), sizeof(PEH));
+ }
+ for (const Optional<COFF::DataDirectory> &DD :
+ CP.Obj.OptionalHeader->DataDirectories) {
+ if (!DD.hasValue()) {
+ OS << zeros(uint32_t(0));
+ OS << zeros(uint32_t(0));
+ } else {
+ OS << binary_le(DD->RelativeVirtualAddress);
+ OS << binary_le(DD->Size);
+ }
+ }
+ OS << zeros(uint32_t(0));
+ OS << zeros(uint32_t(0));
+ }
+
+ assert(OS.tell() == CP.SectionTableStart);
+ // Output section table.
+ for (std::vector<COFFYAML::Section>::iterator i = CP.Obj.Sections.begin(),
+ e = CP.Obj.Sections.end();
+ i != e; ++i) {
+ OS.write(i->Header.Name, COFF::NameSize);
+ OS << binary_le(i->Header.VirtualSize)
+ << binary_le(i->Header.VirtualAddress)
+ << binary_le(i->Header.SizeOfRawData)
+ << binary_le(i->Header.PointerToRawData)
+ << binary_le(i->Header.PointerToRelocations)
+ << binary_le(i->Header.PointerToLineNumbers)
+ << binary_le(i->Header.NumberOfRelocations)
+ << binary_le(i->Header.NumberOfLineNumbers)
+ << binary_le(i->Header.Characteristics);
+ }
+ assert(OS.tell() == CP.SectionTableStart + CP.SectionTableSize);
+
+ unsigned CurSymbol = 0;
+ StringMap<unsigned> SymbolTableIndexMap;
+ for (std::vector<COFFYAML::Symbol>::iterator I = CP.Obj.Symbols.begin(),
+ E = CP.Obj.Symbols.end();
+ I != E; ++I) {
+ SymbolTableIndexMap[I->Name] = CurSymbol;
+ CurSymbol += 1 + I->Header.NumberOfAuxSymbols;
+ }
+
+ // Output section data.
+ for (const COFFYAML::Section &S : CP.Obj.Sections) {
+ if (S.Header.SizeOfRawData == 0 || S.Header.PointerToRawData == 0)
+ continue;
+ assert(S.Header.PointerToRawData >= OS.tell());
+ OS.write_zeros(S.Header.PointerToRawData - OS.tell());
+ S.SectionData.writeAsBinary(OS);
+ assert(S.Header.SizeOfRawData >= S.SectionData.binary_size());
+ OS.write_zeros(S.Header.SizeOfRawData - S.SectionData.binary_size());
+ for (const COFFYAML::Relocation &R : S.Relocations) {
+ uint32_t SymbolTableIndex;
+ if (R.SymbolTableIndex) {
+ if (!R.SymbolName.empty())
+ WithColor::error()
+ << "Both SymbolName and SymbolTableIndex specified\n";
+ SymbolTableIndex = *R.SymbolTableIndex;
+ } else {
+ SymbolTableIndex = SymbolTableIndexMap[R.SymbolName];
+ }
+ OS << binary_le(R.VirtualAddress) << binary_le(SymbolTableIndex)
+ << binary_le(R.Type);
+ }
+ }
+
+ // Output symbol table.
+
+ for (std::vector<COFFYAML::Symbol>::const_iterator i = CP.Obj.Symbols.begin(),
+ e = CP.Obj.Symbols.end();
+ i != e; ++i) {
+ OS.write(i->Header.Name, COFF::NameSize);
+ OS << binary_le(i->Header.Value);
+ if (CP.useBigObj())
+ OS << binary_le(i->Header.SectionNumber);
+ else
+ OS << binary_le(static_cast<int16_t>(i->Header.SectionNumber));
+ OS << binary_le(i->Header.Type) << binary_le(i->Header.StorageClass)
+ << binary_le(i->Header.NumberOfAuxSymbols);
+
+ if (i->FunctionDefinition) {
+ OS << binary_le(i->FunctionDefinition->TagIndex)
+ << binary_le(i->FunctionDefinition->TotalSize)
+ << binary_le(i->FunctionDefinition->PointerToLinenumber)
+ << binary_le(i->FunctionDefinition->PointerToNextFunction)
+ << zeros(i->FunctionDefinition->unused);
+ OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size);
+ }
+ if (i->bfAndefSymbol) {
+ OS << zeros(i->bfAndefSymbol->unused1)
+ << binary_le(i->bfAndefSymbol->Linenumber)
+ << zeros(i->bfAndefSymbol->unused2)
+ << binary_le(i->bfAndefSymbol->PointerToNextFunction)
+ << zeros(i->bfAndefSymbol->unused3);
+ OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size);
+ }
+ if (i->WeakExternal) {
+ OS << binary_le(i->WeakExternal->TagIndex)
+ << binary_le(i->WeakExternal->Characteristics)
+ << zeros(i->WeakExternal->unused);
+ OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size);
+ }
+ if (!i->File.empty()) {
+ unsigned SymbolSize = CP.getSymbolSize();
+ uint32_t NumberOfAuxRecords =
+ (i->File.size() + SymbolSize - 1) / SymbolSize;
+ uint32_t NumberOfAuxBytes = NumberOfAuxRecords * SymbolSize;
+ uint32_t NumZeros = NumberOfAuxBytes - i->File.size();
+ OS.write(i->File.data(), i->File.size());
+ OS.write_zeros(NumZeros);
+ }
+ if (i->SectionDefinition) {
+ OS << binary_le(i->SectionDefinition->Length)
+ << binary_le(i->SectionDefinition->NumberOfRelocations)
+ << binary_le(i->SectionDefinition->NumberOfLinenumbers)
+ << binary_le(i->SectionDefinition->CheckSum)
+ << binary_le(static_cast<int16_t>(i->SectionDefinition->Number))
+ << binary_le(i->SectionDefinition->Selection)
+ << zeros(i->SectionDefinition->unused)
+ << binary_le(static_cast<int16_t>(i->SectionDefinition->Number >> 16));
+ OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size);
+ }
+ if (i->CLRToken) {
+ OS << binary_le(i->CLRToken->AuxType) << zeros(i->CLRToken->unused1)
+ << binary_le(i->CLRToken->SymbolTableIndex)
+ << zeros(i->CLRToken->unused2);
+ OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size);
+ }
+ }
+
+ // Output string table.
+ if (CP.Obj.Header.PointerToSymbolTable)
+ OS.write(&CP.StringTable[0], CP.StringTable.size());
+ return true;
+}
+
+namespace llvm {
+namespace yaml {
+
+bool yaml2coff(llvm::COFFYAML::Object &Doc, raw_ostream &Out,
+ ErrorHandler ErrHandler) {
+ COFFParser CP(Doc, ErrHandler);
+ if (!CP.parse()) {
+ ErrHandler("failed to parse YAML file");
+ return false;
+ }
+
+ if (!layoutOptionalHeader(CP)) {
+ ErrHandler("failed to layout optional header for COFF file");
+ return false;
+ }
+
+ if (!layoutCOFF(CP)) {
+ ErrHandler("failed to layout COFF file");
+ return false;
+ }
+ if (!writeCOFF(CP, Out)) {
+ ErrHandler("failed to write COFF file");
+ return false;
+ }
+ return true;
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 227107c051dd..95409fdc3300 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -391,7 +391,7 @@ template <> void SymbolRecordImpl<DefRangeRegisterSym>::map(IO &IO) {
}
template <> void SymbolRecordImpl<DefRangeFramePointerRelSym>::map(IO &IO) {
- IO.mapRequired("Offset", Symbol.Offset);
+ IO.mapRequired("Offset", Symbol.Hdr.Offset);
IO.mapRequired("Range", Symbol.Range);
IO.mapRequired("Gaps", Symbol.Gaps);
}
diff --git a/lib/ObjectYAML/ELFEmitter.cpp b/lib/ObjectYAML/ELFEmitter.cpp
new file mode 100644
index 000000000000..e0faed256f6b
--- /dev/null
+++ b/lib/ObjectYAML/ELFEmitter.cpp
@@ -0,0 +1,1152 @@
+//===- yaml2elf - Convert YAML to a ELF object file -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// The ELF component of yaml2obj.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/ObjectYAML/ELFYAML.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// This class is used to build up a contiguous binary blob while keeping
+// track of an offset in the output (which notionally begins at
+// `InitialOffset`).
+namespace {
+class ContiguousBlobAccumulator {
+ const uint64_t InitialOffset;
+ SmallVector<char, 128> Buf;
+ raw_svector_ostream OS;
+
+ /// \returns The new offset.
+ uint64_t padToAlignment(unsigned Align) {
+ if (Align == 0)
+ Align = 1;
+ uint64_t CurrentOffset = InitialOffset + OS.tell();
+ uint64_t AlignedOffset = alignTo(CurrentOffset, Align);
+ OS.write_zeros(AlignedOffset - CurrentOffset);
+ return AlignedOffset; // == CurrentOffset;
+ }
+
+public:
+ ContiguousBlobAccumulator(uint64_t InitialOffset_)
+ : InitialOffset(InitialOffset_), Buf(), OS(Buf) {}
+ template <class Integer>
+ raw_ostream &getOSAndAlignedOffset(Integer &Offset, unsigned Align) {
+ Offset = padToAlignment(Align);
+ return OS;
+ }
+ void writeBlobToStream(raw_ostream &Out) { Out << OS.str(); }
+};
+
+// Used to keep track of section and symbol names, so that in the YAML file
+// sections and symbols can be referenced by name instead of by index.
+class NameToIdxMap {
+ StringMap<unsigned> Map;
+
+public:
+ /// \Returns false if name is already present in the map.
+ bool addName(StringRef Name, unsigned Ndx) {
+ return Map.insert({Name, Ndx}).second;
+ }
+ /// \Returns false if name is not present in the map.
+ bool lookup(StringRef Name, unsigned &Idx) const {
+ auto I = Map.find(Name);
+ if (I == Map.end())
+ return false;
+ Idx = I->getValue();
+ return true;
+ }
+ /// Asserts if name is not present in the map.
+ unsigned get(StringRef Name) const {
+ unsigned Idx;
+ if (lookup(Name, Idx))
+ return Idx;
+ assert(false && "Expected section not found in index");
+ return 0;
+ }
+ unsigned size() const { return Map.size(); }
+};
+
+/// "Single point of truth" for the ELF file construction.
+/// TODO: This class still has a ways to go before it is truly a "single
+/// point of truth".
+template <class ELFT> class ELFState {
+ typedef typename ELFT::Ehdr Elf_Ehdr;
+ typedef typename ELFT::Phdr Elf_Phdr;
+ typedef typename ELFT::Shdr Elf_Shdr;
+ typedef typename ELFT::Sym Elf_Sym;
+ typedef typename ELFT::Rel Elf_Rel;
+ typedef typename ELFT::Rela Elf_Rela;
+ typedef typename ELFT::Relr Elf_Relr;
+ typedef typename ELFT::Dyn Elf_Dyn;
+
+ enum class SymtabType { Static, Dynamic };
+
+ /// The future ".strtab" section.
+ StringTableBuilder DotStrtab{StringTableBuilder::ELF};
+
+ /// The future ".shstrtab" section.
+ StringTableBuilder DotShStrtab{StringTableBuilder::ELF};
+
+ /// The future ".dynstr" section.
+ StringTableBuilder DotDynstr{StringTableBuilder::ELF};
+
+ NameToIdxMap SN2I;
+ NameToIdxMap SymN2I;
+ NameToIdxMap DynSymN2I;
+ ELFYAML::Object &Doc;
+
+ bool HasError = false;
+ yaml::ErrorHandler ErrHandler;
+ void reportError(const Twine &Msg);
+
+ std::vector<Elf_Sym> toELFSymbols(ArrayRef<ELFYAML::Symbol> Symbols,
+ const StringTableBuilder &Strtab);
+ unsigned toSectionIndex(StringRef S, StringRef LocSec, StringRef LocSym = "");
+ unsigned toSymbolIndex(StringRef S, StringRef LocSec, bool IsDynamic);
+
+ void buildSectionIndex();
+ void buildSymbolIndexes();
+ void initProgramHeaders(std::vector<Elf_Phdr> &PHeaders);
+ bool initImplicitHeader(ContiguousBlobAccumulator &CBA, Elf_Shdr &Header,
+ StringRef SecName, ELFYAML::Section *YAMLSec);
+ void initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
+ ContiguousBlobAccumulator &CBA);
+ void initSymtabSectionHeader(Elf_Shdr &SHeader, SymtabType STType,
+ ContiguousBlobAccumulator &CBA,
+ ELFYAML::Section *YAMLSec);
+ void initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name,
+ StringTableBuilder &STB,
+ ContiguousBlobAccumulator &CBA,
+ ELFYAML::Section *YAMLSec);
+ void setProgramHeaderLayout(std::vector<Elf_Phdr> &PHeaders,
+ std::vector<Elf_Shdr> &SHeaders);
+ void finalizeStrings();
+ void writeELFHeader(ContiguousBlobAccumulator &CBA, raw_ostream &OS);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::RawContentSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::RelocationSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader, const ELFYAML::Group &Group,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::SymtabShndxSection &Shndx,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::SymverSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::VerneedSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::VerdefSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::MipsABIFlags &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::DynamicSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::StackSizesSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::HashSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::AddrsigSection &Section,
+ ContiguousBlobAccumulator &CBA);
+
+ ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH);
+
+public:
+ static bool writeELF(raw_ostream &OS, ELFYAML::Object &Doc,
+ yaml::ErrorHandler EH);
+};
+} // end anonymous namespace
+
+template <class T> static size_t arrayDataSize(ArrayRef<T> A) {
+ return A.size() * sizeof(T);
+}
+
+template <class T> static void writeArrayData(raw_ostream &OS, ArrayRef<T> A) {
+ OS.write((const char *)A.data(), arrayDataSize(A));
+}
+
+template <class T> static void zero(T &Obj) { memset(&Obj, 0, sizeof(Obj)); }
+
+template <class ELFT>
+ELFState<ELFT>::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH)
+ : Doc(D), ErrHandler(EH) {
+ StringSet<> DocSections;
+ for (std::unique_ptr<ELFYAML::Section> &D : Doc.Sections) {
+ if (!D->Name.empty())
+ DocSections.insert(D->Name);
+
+ // Some sections wants to link to .symtab by default.
+ // That means we want to create the symbol table for them.
+ if (D->Type == llvm::ELF::SHT_REL || D->Type == llvm::ELF::SHT_RELA)
+ if (!Doc.Symbols && D->Link.empty())
+ Doc.Symbols.emplace();
+ }
+
+ // Insert SHT_NULL section implicitly when it is not defined in YAML.
+ if (Doc.Sections.empty() || Doc.Sections.front()->Type != ELF::SHT_NULL)
+ Doc.Sections.insert(
+ Doc.Sections.begin(),
+ std::make_unique<ELFYAML::Section>(
+ ELFYAML::Section::SectionKind::RawContent, /*IsImplicit=*/true));
+
+ std::vector<StringRef> ImplicitSections;
+ if (Doc.Symbols)
+ ImplicitSections.push_back(".symtab");
+ ImplicitSections.insert(ImplicitSections.end(), {".strtab", ".shstrtab"});
+
+ if (!Doc.DynamicSymbols.empty())
+ ImplicitSections.insert(ImplicitSections.end(), {".dynsym", ".dynstr"});
+
+ // Insert placeholders for implicit sections that are not
+ // defined explicitly in YAML.
+ for (StringRef SecName : ImplicitSections) {
+ if (DocSections.count(SecName))
+ continue;
+
+ std::unique_ptr<ELFYAML::Section> Sec = std::make_unique<ELFYAML::Section>(
+ ELFYAML::Section::SectionKind::RawContent, true /*IsImplicit*/);
+ Sec->Name = SecName;
+ Doc.Sections.push_back(std::move(Sec));
+ }
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeELFHeader(ContiguousBlobAccumulator &CBA, raw_ostream &OS) {
+ using namespace llvm::ELF;
+
+ Elf_Ehdr Header;
+ zero(Header);
+ Header.e_ident[EI_MAG0] = 0x7f;
+ Header.e_ident[EI_MAG1] = 'E';
+ Header.e_ident[EI_MAG2] = 'L';
+ Header.e_ident[EI_MAG3] = 'F';
+ Header.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32;
+ Header.e_ident[EI_DATA] = Doc.Header.Data;
+ Header.e_ident[EI_VERSION] = EV_CURRENT;
+ Header.e_ident[EI_OSABI] = Doc.Header.OSABI;
+ Header.e_ident[EI_ABIVERSION] = Doc.Header.ABIVersion;
+ Header.e_type = Doc.Header.Type;
+ Header.e_machine = Doc.Header.Machine;
+ Header.e_version = EV_CURRENT;
+ Header.e_entry = Doc.Header.Entry;
+ Header.e_phoff = Doc.ProgramHeaders.size() ? sizeof(Header) : 0;
+ Header.e_flags = Doc.Header.Flags;
+ Header.e_ehsize = sizeof(Elf_Ehdr);
+ Header.e_phentsize = Doc.ProgramHeaders.size() ? sizeof(Elf_Phdr) : 0;
+ Header.e_phnum = Doc.ProgramHeaders.size();
+
+ Header.e_shentsize =
+ Doc.Header.SHEntSize ? (uint16_t)*Doc.Header.SHEntSize : sizeof(Elf_Shdr);
+ // Immediately following the ELF header and program headers.
+ // Align the start of the section header and write the ELF header.
+ uint64_t SHOff;
+ CBA.getOSAndAlignedOffset(SHOff, sizeof(typename ELFT::uint));
+ Header.e_shoff =
+ Doc.Header.SHOff ? typename ELFT::uint(*Doc.Header.SHOff) : SHOff;
+ Header.e_shnum =
+ Doc.Header.SHNum ? (uint16_t)*Doc.Header.SHNum : Doc.Sections.size();
+ Header.e_shstrndx = Doc.Header.SHStrNdx ? (uint16_t)*Doc.Header.SHStrNdx
+ : SN2I.get(".shstrtab");
+
+ OS.write((const char *)&Header, sizeof(Header));
+}
+
+template <class ELFT>
+void ELFState<ELFT>::initProgramHeaders(std::vector<Elf_Phdr> &PHeaders) {
+ for (const auto &YamlPhdr : Doc.ProgramHeaders) {
+ Elf_Phdr Phdr;
+ Phdr.p_type = YamlPhdr.Type;
+ Phdr.p_flags = YamlPhdr.Flags;
+ Phdr.p_vaddr = YamlPhdr.VAddr;
+ Phdr.p_paddr = YamlPhdr.PAddr;
+ PHeaders.push_back(Phdr);
+ }
+}
+
+template <class ELFT>
+unsigned ELFState<ELFT>::toSectionIndex(StringRef S, StringRef LocSec,
+ StringRef LocSym) {
+ unsigned Index;
+ if (SN2I.lookup(S, Index) || to_integer(S, Index))
+ return Index;
+
+ assert(LocSec.empty() || LocSym.empty());
+ if (!LocSym.empty())
+ reportError("unknown section referenced: '" + S + "' by YAML symbol '" +
+ LocSym + "'");
+ else
+ reportError("unknown section referenced: '" + S + "' by YAML section '" +
+ LocSec + "'");
+ return 0;
+}
+
+template <class ELFT>
+unsigned ELFState<ELFT>::toSymbolIndex(StringRef S, StringRef LocSec,
+ bool IsDynamic) {
+ const NameToIdxMap &SymMap = IsDynamic ? DynSymN2I : SymN2I;
+ unsigned Index;
+ // Here we try to look up S in the symbol table. If it is not there,
+ // treat its value as a symbol index.
+ if (!SymMap.lookup(S, Index) && !to_integer(S, Index)) {
+ reportError("unknown symbol referenced: '" + S + "' by YAML section '" +
+ LocSec + "'");
+ return 0;
+ }
+ return Index;
+}
+
+template <class ELFT>
+bool ELFState<ELFT>::initImplicitHeader(ContiguousBlobAccumulator &CBA,
+ Elf_Shdr &Header, StringRef SecName,
+ ELFYAML::Section *YAMLSec) {
+ // Check if the header was already initialized.
+ if (Header.sh_offset)
+ return false;
+
+ if (SecName == ".symtab")
+ initSymtabSectionHeader(Header, SymtabType::Static, CBA, YAMLSec);
+ else if (SecName == ".strtab")
+ initStrtabSectionHeader(Header, SecName, DotStrtab, CBA, YAMLSec);
+ else if (SecName == ".shstrtab")
+ initStrtabSectionHeader(Header, SecName, DotShStrtab, CBA, YAMLSec);
+ else if (SecName == ".dynsym")
+ initSymtabSectionHeader(Header, SymtabType::Dynamic, CBA, YAMLSec);
+ else if (SecName == ".dynstr")
+ initStrtabSectionHeader(Header, SecName, DotDynstr, CBA, YAMLSec);
+ else
+ return false;
+
+ // Override the fields if requested.
+ if (YAMLSec) {
+ if (YAMLSec->ShName)
+ Header.sh_name = *YAMLSec->ShName;
+ if (YAMLSec->ShOffset)
+ Header.sh_offset = *YAMLSec->ShOffset;
+ if (YAMLSec->ShSize)
+ Header.sh_size = *YAMLSec->ShSize;
+ }
+
+ return true;
+}
+
+StringRef llvm::ELFYAML::dropUniqueSuffix(StringRef S) {
+ size_t SuffixPos = S.rfind(" [");
+ if (SuffixPos == StringRef::npos)
+ return S;
+ return S.substr(0, SuffixPos);
+}
+
+template <class ELFT>
+void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
+ ContiguousBlobAccumulator &CBA) {
+ // Ensure SHN_UNDEF entry is present. An all-zero section header is a
+ // valid SHN_UNDEF entry since SHT_NULL == 0.
+ SHeaders.resize(Doc.Sections.size());
+
+ for (size_t I = 0; I < Doc.Sections.size(); ++I) {
+ ELFYAML::Section *Sec = Doc.Sections[I].get();
+ if (I == 0 && Sec->IsImplicit)
+ continue;
+
+ // We have a few sections like string or symbol tables that are usually
+ // added implicitly to the end. However, if they are explicitly specified
+ // in the YAML, we need to write them here. This ensures the file offset
+ // remains correct.
+ Elf_Shdr &SHeader = SHeaders[I];
+ if (initImplicitHeader(CBA, SHeader, Sec->Name,
+ Sec->IsImplicit ? nullptr : Sec))
+ continue;
+
+ assert(Sec && "It can't be null unless it is an implicit section. But all "
+ "implicit sections should already have been handled above.");
+
+ SHeader.sh_name =
+ DotShStrtab.getOffset(ELFYAML::dropUniqueSuffix(Sec->Name));
+ SHeader.sh_type = Sec->Type;
+ if (Sec->Flags)
+ SHeader.sh_flags = *Sec->Flags;
+ SHeader.sh_addr = Sec->Address;
+ SHeader.sh_addralign = Sec->AddressAlign;
+
+ if (!Sec->Link.empty())
+ SHeader.sh_link = toSectionIndex(Sec->Link, Sec->Name);
+
+ if (I == 0) {
+ if (auto RawSec = dyn_cast<ELFYAML::RawContentSection>(Sec)) {
+ // We do not write any content for special SHN_UNDEF section.
+ if (RawSec->Size)
+ SHeader.sh_size = *RawSec->Size;
+ if (RawSec->Info)
+ SHeader.sh_info = *RawSec->Info;
+ }
+ if (Sec->EntSize)
+ SHeader.sh_entsize = *Sec->EntSize;
+ } else if (auto S = dyn_cast<ELFYAML::RawContentSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::SymtabShndxSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::RelocationSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::Group>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::MipsABIFlags>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::NoBitsSection>(Sec)) {
+ SHeader.sh_entsize = 0;
+ SHeader.sh_size = S->Size;
+ // SHT_NOBITS section does not have content
+ // so just to setup the section offset.
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ } else if (auto S = dyn_cast<ELFYAML::DynamicSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::SymverSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::VerneedSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::VerdefSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::StackSizesSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::HashSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::AddrsigSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else {
+ llvm_unreachable("Unknown section type");
+ }
+
+ // Override the fields if requested.
+ if (Sec) {
+ if (Sec->ShName)
+ SHeader.sh_name = *Sec->ShName;
+ if (Sec->ShOffset)
+ SHeader.sh_offset = *Sec->ShOffset;
+ if (Sec->ShSize)
+ SHeader.sh_size = *Sec->ShSize;
+ }
+ }
+}
+
+static size_t findFirstNonGlobal(ArrayRef<ELFYAML::Symbol> Symbols) {
+ for (size_t I = 0; I < Symbols.size(); ++I)
+ if (Symbols[I].Binding.value != ELF::STB_LOCAL)
+ return I;
+ return Symbols.size();
+}
+
+static uint64_t writeContent(raw_ostream &OS,
+ const Optional<yaml::BinaryRef> &Content,
+ const Optional<llvm::yaml::Hex64> &Size) {
+ size_t ContentSize = 0;
+ if (Content) {
+ Content->writeAsBinary(OS);
+ ContentSize = Content->binary_size();
+ }
+
+ if (!Size)
+ return ContentSize;
+
+ OS.write_zeros(*Size - ContentSize);
+ return *Size;
+}
+
+template <class ELFT>
+std::vector<typename ELFT::Sym>
+ELFState<ELFT>::toELFSymbols(ArrayRef<ELFYAML::Symbol> Symbols,
+ const StringTableBuilder &Strtab) {
+ std::vector<Elf_Sym> Ret;
+ Ret.resize(Symbols.size() + 1);
+
+ size_t I = 0;
+ for (const auto &Sym : Symbols) {
+ Elf_Sym &Symbol = Ret[++I];
+
+ // If NameIndex, which contains the name offset, is explicitly specified, we
+ // use it. This is useful for preparing broken objects. Otherwise, we add
+ // the specified Name to the string table builder to get its offset.
+ if (Sym.NameIndex)
+ Symbol.st_name = *Sym.NameIndex;
+ else if (!Sym.Name.empty())
+ Symbol.st_name = Strtab.getOffset(ELFYAML::dropUniqueSuffix(Sym.Name));
+
+ Symbol.setBindingAndType(Sym.Binding, Sym.Type);
+ if (!Sym.Section.empty())
+ Symbol.st_shndx = toSectionIndex(Sym.Section, "", Sym.Name);
+ else if (Sym.Index)
+ Symbol.st_shndx = *Sym.Index;
+
+ Symbol.st_value = Sym.Value;
+ Symbol.st_other = Sym.Other ? *Sym.Other : 0;
+ Symbol.st_size = Sym.Size;
+ }
+
+ return Ret;
+}
+
+template <class ELFT>
+void ELFState<ELFT>::initSymtabSectionHeader(Elf_Shdr &SHeader,
+ SymtabType STType,
+ ContiguousBlobAccumulator &CBA,
+ ELFYAML::Section *YAMLSec) {
+
+ bool IsStatic = STType == SymtabType::Static;
+ ArrayRef<ELFYAML::Symbol> Symbols;
+ if (IsStatic && Doc.Symbols)
+ Symbols = *Doc.Symbols;
+ else if (!IsStatic)
+ Symbols = Doc.DynamicSymbols;
+
+ ELFYAML::RawContentSection *RawSec =
+ dyn_cast_or_null<ELFYAML::RawContentSection>(YAMLSec);
+ if (RawSec && !Symbols.empty() && (RawSec->Content || RawSec->Size)) {
+ if (RawSec->Content)
+ reportError("cannot specify both `Content` and " +
+ (IsStatic ? Twine("`Symbols`") : Twine("`DynamicSymbols`")) +
+ " for symbol table section '" + RawSec->Name + "'");
+ if (RawSec->Size)
+ reportError("cannot specify both `Size` and " +
+ (IsStatic ? Twine("`Symbols`") : Twine("`DynamicSymbols`")) +
+ " for symbol table section '" + RawSec->Name + "'");
+ return;
+ }
+
+ zero(SHeader);
+ SHeader.sh_name = DotShStrtab.getOffset(IsStatic ? ".symtab" : ".dynsym");
+
+ if (YAMLSec)
+ SHeader.sh_type = YAMLSec->Type;
+ else
+ SHeader.sh_type = IsStatic ? ELF::SHT_SYMTAB : ELF::SHT_DYNSYM;
+
+ if (RawSec && !RawSec->Link.empty()) {
+ // If the Link field is explicitly defined in the document,
+ // we should use it.
+ SHeader.sh_link = toSectionIndex(RawSec->Link, RawSec->Name);
+ } else {
+ // When we describe the .dynsym section in the document explicitly, it is
+ // allowed to omit the "DynamicSymbols" tag. In this case .dynstr is not
+ // added implicitly and we should be able to leave the Link zeroed if
+ // .dynstr is not defined.
+ unsigned Link = 0;
+ if (IsStatic)
+ Link = SN2I.get(".strtab");
+ else
+ SN2I.lookup(".dynstr", Link);
+ SHeader.sh_link = Link;
+ }
+
+ if (YAMLSec && YAMLSec->Flags)
+ SHeader.sh_flags = *YAMLSec->Flags;
+ else if (!IsStatic)
+ SHeader.sh_flags = ELF::SHF_ALLOC;
+
+ // If the symbol table section is explicitly described in the YAML
+ // then we should set the fields requested.
+ SHeader.sh_info = (RawSec && RawSec->Info) ? (unsigned)(*RawSec->Info)
+ : findFirstNonGlobal(Symbols) + 1;
+ SHeader.sh_entsize = (YAMLSec && YAMLSec->EntSize)
+ ? (uint64_t)(*YAMLSec->EntSize)
+ : sizeof(Elf_Sym);
+ SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 8;
+ SHeader.sh_addr = YAMLSec ? (uint64_t)YAMLSec->Address : 0;
+
+ auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ if (RawSec && (RawSec->Content || RawSec->Size)) {
+ assert(Symbols.empty());
+ SHeader.sh_size = writeContent(OS, RawSec->Content, RawSec->Size);
+ return;
+ }
+
+ std::vector<Elf_Sym> Syms =
+ toELFSymbols(Symbols, IsStatic ? DotStrtab : DotDynstr);
+ writeArrayData(OS, makeArrayRef(Syms));
+ SHeader.sh_size = arrayDataSize(makeArrayRef(Syms));
+}
+
+template <class ELFT>
+void ELFState<ELFT>::initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name,
+ StringTableBuilder &STB,
+ ContiguousBlobAccumulator &CBA,
+ ELFYAML::Section *YAMLSec) {
+ zero(SHeader);
+ SHeader.sh_name = DotShStrtab.getOffset(Name);
+ SHeader.sh_type = YAMLSec ? YAMLSec->Type : ELF::SHT_STRTAB;
+ SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 1;
+
+ ELFYAML::RawContentSection *RawSec =
+ dyn_cast_or_null<ELFYAML::RawContentSection>(YAMLSec);
+
+ auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ if (RawSec && (RawSec->Content || RawSec->Size)) {
+ SHeader.sh_size = writeContent(OS, RawSec->Content, RawSec->Size);
+ } else {
+ STB.write(OS);
+ SHeader.sh_size = STB.getSize();
+ }
+
+ if (YAMLSec && YAMLSec->EntSize)
+ SHeader.sh_entsize = *YAMLSec->EntSize;
+
+ if (RawSec && RawSec->Info)
+ SHeader.sh_info = *RawSec->Info;
+
+ if (YAMLSec && YAMLSec->Flags)
+ SHeader.sh_flags = *YAMLSec->Flags;
+ else if (Name == ".dynstr")
+ SHeader.sh_flags = ELF::SHF_ALLOC;
+
+ // If the section is explicitly described in the YAML
+ // then we want to use its section address.
+ if (YAMLSec)
+ SHeader.sh_addr = YAMLSec->Address;
+}
+
+template <class ELFT> void ELFState<ELFT>::reportError(const Twine &Msg) {
+ ErrHandler(Msg);
+ HasError = true;
+}
+
+template <class ELFT>
+void ELFState<ELFT>::setProgramHeaderLayout(std::vector<Elf_Phdr> &PHeaders,
+ std::vector<Elf_Shdr> &SHeaders) {
+ uint32_t PhdrIdx = 0;
+ for (auto &YamlPhdr : Doc.ProgramHeaders) {
+ Elf_Phdr &PHeader = PHeaders[PhdrIdx++];
+
+ std::vector<Elf_Shdr *> Sections;
+ for (const ELFYAML::SectionName &SecName : YamlPhdr.Sections) {
+ unsigned Index;
+ if (!SN2I.lookup(SecName.Section, Index)) {
+ reportError("unknown section referenced: '" + SecName.Section +
+ "' by program header");
+ continue;
+ }
+ Sections.push_back(&SHeaders[Index]);
+ }
+
+ if (YamlPhdr.Offset) {
+ PHeader.p_offset = *YamlPhdr.Offset;
+ } else {
+ if (YamlPhdr.Sections.size())
+ PHeader.p_offset = UINT32_MAX;
+ else
+ PHeader.p_offset = 0;
+
+ // Find the minimum offset for the program header.
+ for (Elf_Shdr *SHeader : Sections)
+ PHeader.p_offset = std::min(PHeader.p_offset, SHeader->sh_offset);
+ }
+
+ // Find the maximum offset of the end of a section in order to set p_filesz
+ // and p_memsz. When setting p_filesz, trailing SHT_NOBITS sections are not
+ // counted.
+ uint64_t FileOffset = PHeader.p_offset, MemOffset = PHeader.p_offset;
+ for (Elf_Shdr *SHeader : Sections) {
+ uint64_t End = SHeader->sh_offset + SHeader->sh_size;
+ MemOffset = std::max(MemOffset, End);
+
+ if (SHeader->sh_type != llvm::ELF::SHT_NOBITS)
+ FileOffset = std::max(FileOffset, End);
+ }
+
+ // Set the file size and the memory size if not set explicitly.
+ PHeader.p_filesz = YamlPhdr.FileSize ? uint64_t(*YamlPhdr.FileSize)
+ : FileOffset - PHeader.p_offset;
+ PHeader.p_memsz = YamlPhdr.MemSize ? uint64_t(*YamlPhdr.MemSize)
+ : MemOffset - PHeader.p_offset;
+
+ if (YamlPhdr.Align) {
+ PHeader.p_align = *YamlPhdr.Align;
+ } else {
+ // Set the alignment of the segment to be the maximum alignment of the
+ // sections so that by default the segment has a valid and sensible
+ // alignment.
+ PHeader.p_align = 1;
+ for (Elf_Shdr *SHeader : Sections)
+ PHeader.p_align = std::max(PHeader.p_align, SHeader->sh_addralign);
+ }
+ }
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(
+ Elf_Shdr &SHeader, const ELFYAML::RawContentSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ SHeader.sh_size = writeContent(OS, Section.Content, Section.Size);
+
+ if (Section.EntSize)
+ SHeader.sh_entsize = *Section.EntSize;
+ else if (Section.Type == llvm::ELF::SHT_RELR)
+ SHeader.sh_entsize = sizeof(Elf_Relr);
+ else
+ SHeader.sh_entsize = 0;
+
+ if (Section.Info)
+ SHeader.sh_info = *Section.Info;
+}
+
+static bool isMips64EL(const ELFYAML::Object &Doc) {
+ return Doc.Header.Machine == ELFYAML::ELF_EM(llvm::ELF::EM_MIPS) &&
+ Doc.Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64) &&
+ Doc.Header.Data == ELFYAML::ELF_ELFDATA(ELF::ELFDATA2LSB);
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(
+ Elf_Shdr &SHeader, const ELFYAML::RelocationSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ assert((Section.Type == llvm::ELF::SHT_REL ||
+ Section.Type == llvm::ELF::SHT_RELA) &&
+ "Section type is not SHT_REL nor SHT_RELA");
+
+ bool IsRela = Section.Type == llvm::ELF::SHT_RELA;
+ SHeader.sh_entsize = IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel);
+ SHeader.sh_size = SHeader.sh_entsize * Section.Relocations.size();
+
+ // For relocation section set link to .symtab by default.
+ if (Section.Link.empty())
+ SHeader.sh_link = SN2I.get(".symtab");
+
+ if (!Section.RelocatableSec.empty())
+ SHeader.sh_info = toSectionIndex(Section.RelocatableSec, Section.Name);
+
+ auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ for (const auto &Rel : Section.Relocations) {
+ unsigned SymIdx = Rel.Symbol ? toSymbolIndex(*Rel.Symbol, Section.Name,
+ Section.Link == ".dynsym")
+ : 0;
+ if (IsRela) {
+ Elf_Rela REntry;
+ zero(REntry);
+ REntry.r_offset = Rel.Offset;
+ REntry.r_addend = Rel.Addend;
+ REntry.setSymbolAndType(SymIdx, Rel.Type, isMips64EL(Doc));
+ OS.write((const char *)&REntry, sizeof(REntry));
+ } else {
+ Elf_Rel REntry;
+ zero(REntry);
+ REntry.r_offset = Rel.Offset;
+ REntry.setSymbolAndType(SymIdx, Rel.Type, isMips64EL(Doc));
+ OS.write((const char *)&REntry, sizeof(REntry));
+ }
+ }
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(
+ Elf_Shdr &SHeader, const ELFYAML::SymtabShndxSection &Shndx,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+
+ for (uint32_t E : Shndx.Entries)
+ support::endian::write<uint32_t>(OS, E, ELFT::TargetEndianness);
+
+ SHeader.sh_entsize = Shndx.EntSize ? (uint64_t)*Shndx.EntSize : 4;
+ SHeader.sh_size = Shndx.Entries.size() * SHeader.sh_entsize;
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::Group &Section,
+ ContiguousBlobAccumulator &CBA) {
+ assert(Section.Type == llvm::ELF::SHT_GROUP &&
+ "Section type is not SHT_GROUP");
+
+ SHeader.sh_entsize = 4;
+ SHeader.sh_size = SHeader.sh_entsize * Section.Members.size();
+ SHeader.sh_info =
+ toSymbolIndex(Section.Signature, Section.Name, /*IsDynamic=*/false);
+
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+
+ for (const ELFYAML::SectionOrType &Member : Section.Members) {
+ unsigned int SectionIndex = 0;
+ if (Member.sectionNameOrType == "GRP_COMDAT")
+ SectionIndex = llvm::ELF::GRP_COMDAT;
+ else
+ SectionIndex = toSectionIndex(Member.sectionNameOrType, Section.Name);
+ support::endian::write<uint32_t>(OS, SectionIndex, ELFT::TargetEndianness);
+ }
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::SymverSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ for (uint16_t Version : Section.Entries)
+ support::endian::write<uint16_t>(OS, Version, ELFT::TargetEndianness);
+
+ SHeader.sh_entsize = Section.EntSize ? (uint64_t)*Section.EntSize : 2;
+ SHeader.sh_size = Section.Entries.size() * SHeader.sh_entsize;
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(
+ Elf_Shdr &SHeader, const ELFYAML::StackSizesSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ using uintX_t = typename ELFT::uint;
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+
+ if (Section.Content || Section.Size) {
+ SHeader.sh_size = writeContent(OS, Section.Content, Section.Size);
+ return;
+ }
+
+ for (const ELFYAML::StackSizeEntry &E : *Section.Entries) {
+ support::endian::write<uintX_t>(OS, E.Address, ELFT::TargetEndianness);
+ SHeader.sh_size += sizeof(uintX_t) + encodeULEB128(E.Size, OS);
+ }
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::HashSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+
+ unsigned Link = 0;
+ if (Section.Link.empty() && SN2I.lookup(".dynsym", Link))
+ SHeader.sh_link = Link;
+
+ if (Section.Content || Section.Size) {
+ SHeader.sh_size = writeContent(OS, Section.Content, Section.Size);
+ return;
+ }
+
+ support::endian::write<uint32_t>(OS, Section.Bucket->size(),
+ ELFT::TargetEndianness);
+ support::endian::write<uint32_t>(OS, Section.Chain->size(),
+ ELFT::TargetEndianness);
+ for (uint32_t Val : *Section.Bucket)
+ support::endian::write<uint32_t>(OS, Val, ELFT::TargetEndianness);
+ for (uint32_t Val : *Section.Chain)
+ support::endian::write<uint32_t>(OS, Val, ELFT::TargetEndianness);
+
+ SHeader.sh_size = (2 + Section.Bucket->size() + Section.Chain->size()) * 4;
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::VerdefSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ typedef typename ELFT::Verdef Elf_Verdef;
+ typedef typename ELFT::Verdaux Elf_Verdaux;
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+
+ uint64_t AuxCnt = 0;
+ for (size_t I = 0; I < Section.Entries.size(); ++I) {
+ const ELFYAML::VerdefEntry &E = Section.Entries[I];
+
+ Elf_Verdef VerDef;
+ VerDef.vd_version = E.Version;
+ VerDef.vd_flags = E.Flags;
+ VerDef.vd_ndx = E.VersionNdx;
+ VerDef.vd_hash = E.Hash;
+ VerDef.vd_aux = sizeof(Elf_Verdef);
+ VerDef.vd_cnt = E.VerNames.size();
+ if (I == Section.Entries.size() - 1)
+ VerDef.vd_next = 0;
+ else
+ VerDef.vd_next =
+ sizeof(Elf_Verdef) + E.VerNames.size() * sizeof(Elf_Verdaux);
+ OS.write((const char *)&VerDef, sizeof(Elf_Verdef));
+
+ for (size_t J = 0; J < E.VerNames.size(); ++J, ++AuxCnt) {
+ Elf_Verdaux VernAux;
+ VernAux.vda_name = DotDynstr.getOffset(E.VerNames[J]);
+ if (J == E.VerNames.size() - 1)
+ VernAux.vda_next = 0;
+ else
+ VernAux.vda_next = sizeof(Elf_Verdaux);
+ OS.write((const char *)&VernAux, sizeof(Elf_Verdaux));
+ }
+ }
+
+ SHeader.sh_size = Section.Entries.size() * sizeof(Elf_Verdef) +
+ AuxCnt * sizeof(Elf_Verdaux);
+ SHeader.sh_info = Section.Info;
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::VerneedSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ typedef typename ELFT::Verneed Elf_Verneed;
+ typedef typename ELFT::Vernaux Elf_Vernaux;
+
+ auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+
+ uint64_t AuxCnt = 0;
+ for (size_t I = 0; I < Section.VerneedV.size(); ++I) {
+ const ELFYAML::VerneedEntry &VE = Section.VerneedV[I];
+
+ Elf_Verneed VerNeed;
+ VerNeed.vn_version = VE.Version;
+ VerNeed.vn_file = DotDynstr.getOffset(VE.File);
+ if (I == Section.VerneedV.size() - 1)
+ VerNeed.vn_next = 0;
+ else
+ VerNeed.vn_next =
+ sizeof(Elf_Verneed) + VE.AuxV.size() * sizeof(Elf_Vernaux);
+ VerNeed.vn_cnt = VE.AuxV.size();
+ VerNeed.vn_aux = sizeof(Elf_Verneed);
+ OS.write((const char *)&VerNeed, sizeof(Elf_Verneed));
+
+ for (size_t J = 0; J < VE.AuxV.size(); ++J, ++AuxCnt) {
+ const ELFYAML::VernauxEntry &VAuxE = VE.AuxV[J];
+
+ Elf_Vernaux VernAux;
+ VernAux.vna_hash = VAuxE.Hash;
+ VernAux.vna_flags = VAuxE.Flags;
+ VernAux.vna_other = VAuxE.Other;
+ VernAux.vna_name = DotDynstr.getOffset(VAuxE.Name);
+ if (J == VE.AuxV.size() - 1)
+ VernAux.vna_next = 0;
+ else
+ VernAux.vna_next = sizeof(Elf_Vernaux);
+ OS.write((const char *)&VernAux, sizeof(Elf_Vernaux));
+ }
+ }
+
+ SHeader.sh_size = Section.VerneedV.size() * sizeof(Elf_Verneed) +
+ AuxCnt * sizeof(Elf_Vernaux);
+ SHeader.sh_info = Section.Info;
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::MipsABIFlags &Section,
+ ContiguousBlobAccumulator &CBA) {
+ assert(Section.Type == llvm::ELF::SHT_MIPS_ABIFLAGS &&
+ "Section type is not SHT_MIPS_ABIFLAGS");
+
+ object::Elf_Mips_ABIFlags<ELFT> Flags;
+ zero(Flags);
+ SHeader.sh_entsize = sizeof(Flags);
+ SHeader.sh_size = SHeader.sh_entsize;
+
+ auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ Flags.version = Section.Version;
+ Flags.isa_level = Section.ISALevel;
+ Flags.isa_rev = Section.ISARevision;
+ Flags.gpr_size = Section.GPRSize;
+ Flags.cpr1_size = Section.CPR1Size;
+ Flags.cpr2_size = Section.CPR2Size;
+ Flags.fp_abi = Section.FpABI;
+ Flags.isa_ext = Section.ISAExtension;
+ Flags.ases = Section.ASEs;
+ Flags.flags1 = Section.Flags1;
+ Flags.flags2 = Section.Flags2;
+ OS.write((const char *)&Flags, sizeof(Flags));
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::DynamicSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ typedef typename ELFT::uint uintX_t;
+
+ assert(Section.Type == llvm::ELF::SHT_DYNAMIC &&
+ "Section type is not SHT_DYNAMIC");
+
+ if (!Section.Entries.empty() && Section.Content)
+ reportError("cannot specify both raw content and explicit entries "
+ "for dynamic section '" +
+ Section.Name + "'");
+
+ if (Section.Content)
+ SHeader.sh_size = Section.Content->binary_size();
+ else
+ SHeader.sh_size = 2 * sizeof(uintX_t) * Section.Entries.size();
+ if (Section.EntSize)
+ SHeader.sh_entsize = *Section.EntSize;
+ else
+ SHeader.sh_entsize = sizeof(Elf_Dyn);
+
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ for (const ELFYAML::DynamicEntry &DE : Section.Entries) {
+ support::endian::write<uintX_t>(OS, DE.Tag, ELFT::TargetEndianness);
+ support::endian::write<uintX_t>(OS, DE.Val, ELFT::TargetEndianness);
+ }
+ if (Section.Content)
+ Section.Content->writeAsBinary(OS);
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::AddrsigSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+
+ unsigned Link = 0;
+ if (Section.Link.empty() && SN2I.lookup(".symtab", Link))
+ SHeader.sh_link = Link;
+
+ if (Section.Content || Section.Size) {
+ SHeader.sh_size = writeContent(OS, Section.Content, Section.Size);
+ return;
+ }
+
+ for (const ELFYAML::AddrsigSymbol &Sym : *Section.Symbols) {
+ uint64_t Val =
+ Sym.Name ? toSymbolIndex(*Sym.Name, Section.Name, /*IsDynamic=*/false)
+ : (uint32_t)*Sym.Index;
+ SHeader.sh_size += encodeULEB128(Val, OS);
+ }
+}
+
+template <class ELFT> void ELFState<ELFT>::buildSectionIndex() {
+ for (unsigned I = 0, E = Doc.Sections.size(); I != E; ++I) {
+ StringRef Name = Doc.Sections[I]->Name;
+ if (Name.empty())
+ continue;
+
+ DotShStrtab.add(ELFYAML::dropUniqueSuffix(Name));
+ if (!SN2I.addName(Name, I))
+ reportError("repeated section name: '" + Name +
+ "' at YAML section number " + Twine(I));
+ }
+
+ DotShStrtab.finalize();
+}
+
+template <class ELFT> void ELFState<ELFT>::buildSymbolIndexes() {
+ auto Build = [this](ArrayRef<ELFYAML::Symbol> V, NameToIdxMap &Map) {
+ for (size_t I = 0, S = V.size(); I < S; ++I) {
+ const ELFYAML::Symbol &Sym = V[I];
+ if (!Sym.Name.empty() && !Map.addName(Sym.Name, I + 1))
+ reportError("repeated symbol name: '" + Sym.Name + "'");
+ }
+ };
+
+ if (Doc.Symbols)
+ Build(*Doc.Symbols, SymN2I);
+ Build(Doc.DynamicSymbols, DynSymN2I);
+}
+
+template <class ELFT> void ELFState<ELFT>::finalizeStrings() {
+ // Add the regular symbol names to .strtab section.
+ if (Doc.Symbols)
+ for (const ELFYAML::Symbol &Sym : *Doc.Symbols)
+ DotStrtab.add(ELFYAML::dropUniqueSuffix(Sym.Name));
+ DotStrtab.finalize();
+
+ // Add the dynamic symbol names to .dynstr section.
+ for (const ELFYAML::Symbol &Sym : Doc.DynamicSymbols)
+ DotDynstr.add(ELFYAML::dropUniqueSuffix(Sym.Name));
+
+ // SHT_GNU_verdef and SHT_GNU_verneed sections might also
+ // add strings to .dynstr section.
+ for (const std::unique_ptr<ELFYAML::Section> &Sec : Doc.Sections) {
+ if (auto VerNeed = dyn_cast<ELFYAML::VerneedSection>(Sec.get())) {
+ for (const ELFYAML::VerneedEntry &VE : VerNeed->VerneedV) {
+ DotDynstr.add(VE.File);
+ for (const ELFYAML::VernauxEntry &Aux : VE.AuxV)
+ DotDynstr.add(Aux.Name);
+ }
+ } else if (auto VerDef = dyn_cast<ELFYAML::VerdefSection>(Sec.get())) {
+ for (const ELFYAML::VerdefEntry &E : VerDef->Entries)
+ for (StringRef Name : E.VerNames)
+ DotDynstr.add(Name);
+ }
+ }
+
+ DotDynstr.finalize();
+}
+
+template <class ELFT>
+bool ELFState<ELFT>::writeELF(raw_ostream &OS, ELFYAML::Object &Doc,
+ yaml::ErrorHandler EH) {
+ ELFState<ELFT> State(Doc, EH);
+
+ // Finalize .strtab and .dynstr sections. We do that early because want to
+ // finalize the string table builders before writing the content of the
+ // sections that might want to use them.
+ State.finalizeStrings();
+
+ State.buildSectionIndex();
+ State.buildSymbolIndexes();
+
+ std::vector<Elf_Phdr> PHeaders;
+ State.initProgramHeaders(PHeaders);
+
+ // XXX: This offset is tightly coupled with the order that we write
+ // things to `OS`.
+ const size_t SectionContentBeginOffset =
+ sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * Doc.ProgramHeaders.size();
+ ContiguousBlobAccumulator CBA(SectionContentBeginOffset);
+
+ std::vector<Elf_Shdr> SHeaders;
+ State.initSectionHeaders(SHeaders, CBA);
+
+ // Now we can decide segment offsets
+ State.setProgramHeaderLayout(PHeaders, SHeaders);
+
+ if (State.HasError)
+ return false;
+
+ State.writeELFHeader(CBA, OS);
+ writeArrayData(OS, makeArrayRef(PHeaders));
+ CBA.writeBlobToStream(OS);
+ writeArrayData(OS, makeArrayRef(SHeaders));
+ return true;
+}
+
+namespace llvm {
+namespace yaml {
+
+bool yaml2elf(llvm::ELFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH) {
+ bool IsLE = Doc.Header.Data == ELFYAML::ELF_ELFDATA(ELF::ELFDATA2LSB);
+ bool Is64Bit = Doc.Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64);
+ if (Is64Bit) {
+ if (IsLE)
+ return ELFState<object::ELF64LE>::writeELF(Out, Doc, EH);
+ return ELFState<object::ELF64BE>::writeELF(Out, Doc, EH);
+ }
+ if (IsLE)
+ return ELFState<object::ELF32LE>::writeELF(Out, Doc, EH);
+ return ELFState<object::ELF32BE>::writeELF(Out, Doc, EH);
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp
index 7497154c757d..29585abe6e80 100644
--- a/lib/ObjectYAML/ELFYAML.cpp
+++ b/lib/ObjectYAML/ELFYAML.cpp
@@ -11,12 +11,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/ELFYAML.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MipsABIFlags.h"
#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/WithColor.h"
#include <cassert>
#include <cstdint>
@@ -50,6 +52,8 @@ void ScalarEnumerationTraits<ELFYAML::ELF_PT>::enumeration(
ECase(PT_PHDR);
ECase(PT_TLS);
ECase(PT_GNU_EH_FRAME);
+ ECase(PT_GNU_STACK);
+ ECase(PT_GNU_RELRO);
#undef ECase
IO.enumFallback<Hex32>(Value);
}
@@ -217,6 +221,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_EM>::enumeration(
ECase(EM_LANAI);
ECase(EM_BPF);
#undef ECase
+ IO.enumFallback<Hex16>(Value);
}
void ScalarEnumerationTraits<ELFYAML::ELF_ELFCLASS>::enumeration(
@@ -459,6 +464,9 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_LLVM_CALL_GRAPH_PROFILE);
ECase(SHT_LLVM_ADDRSIG);
ECase(SHT_LLVM_DEPENDENT_LIBRARIES);
+ ECase(SHT_LLVM_SYMPART);
+ ECase(SHT_LLVM_PART_EHDR);
+ ECase(SHT_LLVM_PART_PHDR);
ECase(SHT_GNU_ATTRIBUTES);
ECase(SHT_GNU_HASH);
ECase(SHT_GNU_verdef);
@@ -563,7 +571,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHN>::enumeration(
ECase(SHN_HEXAGON_SCOMMON_4);
ECase(SHN_HEXAGON_SCOMMON_8);
#undef ECase
- IO.enumFallback<Hex32>(Value);
+ IO.enumFallback<Hex16>(Value);
}
void ScalarEnumerationTraits<ELFYAML::ELF_STB>::enumeration(
@@ -592,34 +600,6 @@ void ScalarEnumerationTraits<ELFYAML::ELF_STT>::enumeration(
IO.enumFallback<Hex8>(Value);
}
-void ScalarEnumerationTraits<ELFYAML::ELF_STV>::enumeration(
- IO &IO, ELFYAML::ELF_STV &Value) {
-#define ECase(X) IO.enumCase(Value, #X, ELF::X)
- ECase(STV_DEFAULT);
- ECase(STV_INTERNAL);
- ECase(STV_HIDDEN);
- ECase(STV_PROTECTED);
-#undef ECase
-}
-
-void ScalarBitSetTraits<ELFYAML::ELF_STO>::bitset(IO &IO,
- ELFYAML::ELF_STO &Value) {
- const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
- assert(Object && "The IO context is not initialized");
-#define BCase(X) IO.bitSetCase(Value, #X, ELF::X)
- switch (Object->Header.Machine) {
- case ELF::EM_MIPS:
- BCase(STO_MIPS_OPTIONAL);
- BCase(STO_MIPS_PLT);
- BCase(STO_MIPS_PIC);
- BCase(STO_MIPS_MICROMIPS);
- break;
- default:
- break; // Nothing to do
- }
-#undef BCase
-#undef BCaseMask
-}
void ScalarEnumerationTraits<ELFYAML::ELF_RSS>::enumeration(
IO &IO, ELFYAML::ELF_RSS &Value) {
@@ -671,8 +651,12 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
case ELF::EM_BPF:
#include "llvm/BinaryFormat/ELFRelocs/BPF.def"
break;
+ case ELF::EM_PPC64:
+#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def"
+ break;
default:
- llvm_unreachable("Unsupported architecture");
+ // Nothing to do.
+ break;
}
#undef ELF_RELOC
IO.enumFallback<Hex32>(Value);
@@ -845,7 +829,7 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
IO.mapOptional("Entry", FileHdr.Entry, Hex64(0));
IO.mapOptional("SHEntSize", FileHdr.SHEntSize);
- IO.mapOptional("SHOffset", FileHdr.SHOffset);
+ IO.mapOptional("SHOff", FileHdr.SHOff);
IO.mapOptional("SHNum", FileHdr.SHNum);
IO.mapOptional("SHStrNdx", FileHdr.SHStrNdx);
}
@@ -863,18 +847,111 @@ void MappingTraits<ELFYAML::ProgramHeader>::mapping(
IO.mapOptional("Offset", Phdr.Offset);
}
+LLVM_YAML_STRONG_TYPEDEF(StringRef, StOtherPiece)
+
+template <> struct ScalarTraits<StOtherPiece> {
+ static void output(const StOtherPiece &Val, void *, raw_ostream &Out) {
+ Out << Val;
+ }
+ static StringRef input(StringRef Scalar, void *, StOtherPiece &Val) {
+ Val = Scalar;
+ return {};
+ }
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+template <> struct SequenceElementTraits<StOtherPiece> {
+ static const bool flow = true;
+};
+
namespace {
struct NormalizedOther {
- NormalizedOther(IO &)
- : Visibility(ELFYAML::ELF_STV(0)), Other(ELFYAML::ELF_STO(0)) {}
- NormalizedOther(IO &, uint8_t Original)
- : Visibility(Original & 0x3), Other(Original & ~0x3) {}
+ NormalizedOther(IO &IO) : YamlIO(IO) {}
+ NormalizedOther(IO &IO, Optional<uint8_t> Original) : YamlIO(IO) {
+ assert(Original && "This constructor is only used for outputting YAML and "
+ "assumes a non-empty Original");
+ std::vector<StOtherPiece> Ret;
+ const auto *Object = static_cast<ELFYAML::Object *>(YamlIO.getContext());
+ for (std::pair<StringRef, uint8_t> &P :
+ getFlags(Object->Header.Machine).takeVector()) {
+ uint8_t FlagValue = P.second;
+ if ((*Original & FlagValue) != FlagValue)
+ continue;
+ *Original &= ~FlagValue;
+ Ret.push_back({P.first});
+ }
+
+ if (*Original != 0) {
+ UnknownFlagsHolder = std::to_string(*Original);
+ Ret.push_back({UnknownFlagsHolder});
+ }
+
+ if (!Ret.empty())
+ Other = std::move(Ret);
+ }
+
+ uint8_t toValue(StringRef Name) {
+ const auto *Object = static_cast<ELFYAML::Object *>(YamlIO.getContext());
+ MapVector<StringRef, uint8_t> Flags = getFlags(Object->Header.Machine);
- uint8_t denormalize(IO &) { return Visibility | Other; }
+ auto It = Flags.find(Name);
+ if (It != Flags.end())
+ return It->second;
+
+ uint8_t Val;
+ if (to_integer(Name, Val))
+ return Val;
+
+ YamlIO.setError("an unknown value is used for symbol's 'Other' field: " +
+ Name);
+ return 0;
+ }
- ELFYAML::ELF_STV Visibility;
- ELFYAML::ELF_STO Other;
+ Optional<uint8_t> denormalize(IO &) {
+ if (!Other)
+ return None;
+ uint8_t Ret = 0;
+ for (StOtherPiece &Val : *Other)
+ Ret |= toValue(Val);
+ return Ret;
+ }
+
+ // st_other field is used to encode symbol visibility and platform-dependent
+ // flags and values. This method returns a name to value map that is used for
+ // parsing and encoding this field.
+ MapVector<StringRef, uint8_t> getFlags(unsigned EMachine) {
+ MapVector<StringRef, uint8_t> Map;
+ // STV_* values are just enumeration values. We add them in a reversed order
+ // because when we convert the st_other to named constants when printing
+ // YAML we want to use a maximum number of bits on each step:
+ // when we have st_other == 3, we want to print it as STV_PROTECTED (3), but
+ // not as STV_HIDDEN (2) + STV_INTERNAL (1).
+ Map["STV_PROTECTED"] = ELF::STV_PROTECTED;
+ Map["STV_HIDDEN"] = ELF::STV_HIDDEN;
+ Map["STV_INTERNAL"] = ELF::STV_INTERNAL;
+ // STV_DEFAULT is used to represent the default visibility and has a value
+ // 0. We want to be able to read it from YAML documents, but there is no
+ // reason to print it.
+ if (!YamlIO.outputting())
+ Map["STV_DEFAULT"] = ELF::STV_DEFAULT;
+
+ // MIPS is not consistent. All of the STO_MIPS_* values are bit flags,
+ // except STO_MIPS_MIPS16 which overlaps them. It should be checked and
+ // consumed first when we print the output, because we do not want to print
+ // any other flags that have the same bits instead.
+ if (EMachine == ELF::EM_MIPS) {
+ Map["STO_MIPS_MIPS16"] = ELF::STO_MIPS_MIPS16;
+ Map["STO_MIPS_MICROMIPS"] = ELF::STO_MIPS_MICROMIPS;
+ Map["STO_MIPS_PIC"] = ELF::STO_MIPS_PIC;
+ Map["STO_MIPS_PLT"] = ELF::STO_MIPS_PLT;
+ Map["STO_MIPS_OPTIONAL"] = ELF::STO_MIPS_OPTIONAL;
+ }
+ return Map;
+ }
+
+ IO &YamlIO;
+ Optional<std::vector<StOtherPiece>> Other;
+ std::string UnknownFlagsHolder;
};
} // end anonymous namespace
@@ -888,17 +965,21 @@ void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
IO.mapOptional("Binding", Symbol.Binding, ELFYAML::ELF_STB(0));
IO.mapOptional("Value", Symbol.Value, Hex64(0));
IO.mapOptional("Size", Symbol.Size, Hex64(0));
- MappingNormalization<NormalizedOther, uint8_t> Keys(IO, Symbol.Other);
- IO.mapOptional("Visibility", Keys->Visibility, ELFYAML::ELF_STV(0));
- IO.mapOptional("Other", Keys->Other, ELFYAML::ELF_STO(0));
+
+ // Symbol's Other field is a bit special. It is usually a field that
+ // represents st_other and holds the symbol visibility. However, on some
+ // platforms, it can contain bit fields and regular values, or even sometimes a
+ // crazy mix of them (see comments for NormalizedOther). Because of this, we
+ // need special handling.
+ MappingNormalization<NormalizedOther, Optional<uint8_t>> Keys(IO,
+ Symbol.Other);
+ IO.mapOptional("Other", Keys->Other);
}
StringRef MappingTraits<ELFYAML::Symbol>::validate(IO &IO,
ELFYAML::Symbol &Symbol) {
if (Symbol.Index && Symbol.Section.data())
return "Index and Section cannot both be specified for Symbol";
- if (Symbol.Index && *Symbol.Index == ELFYAML::ELF_SHN(ELF::SHN_XINDEX))
- return "Large indexes are not supported";
if (Symbol.NameIndex && !Symbol.Name.empty())
return "Name and NameIndex cannot both be specified for Symbol";
return StringRef();
@@ -914,10 +995,11 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
IO.mapOptional("EntSize", Section.EntSize);
// obj2yaml does not dump these fields. They are expected to be empty when we
- // are producing YAML, because yaml2obj sets appropriate values for sh_offset
- // and sh_size automatically when they are not explicitly defined.
+ // are producing YAML, because yaml2obj sets appropriate values for them
+ // automatically when they are not explicitly defined.
assert(!IO.outputting() ||
(!Section.ShOffset.hasValue() && !Section.ShSize.hasValue()));
+ IO.mapOptional("ShName", Section.ShName);
IO.mapOptional("ShOffset", Section.ShOffset);
IO.mapOptional("ShSize", Section.ShSize);
}
@@ -935,6 +1017,21 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
IO.mapOptional("Info", Section.Info);
}
+static void sectionMapping(IO &IO, ELFYAML::StackSizesSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Content", Section.Content);
+ IO.mapOptional("Size", Section.Size);
+ IO.mapOptional("Entries", Section.Entries);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::HashSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Content", Section.Content);
+ IO.mapOptional("Bucket", Section.Bucket);
+ IO.mapOptional("Chain", Section.Chain);
+ IO.mapOptional("Size", Section.Size);
+}
+
static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Size", Section.Size, Hex64(0));
@@ -969,6 +1066,18 @@ static void groupSectionMapping(IO &IO, ELFYAML::Group &Group) {
IO.mapRequired("Members", Group.Members);
}
+static void sectionMapping(IO &IO, ELFYAML::SymtabShndxSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Entries", Section.Entries);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::AddrsigSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Content", Section.Content);
+ IO.mapOptional("Size", Section.Size);
+ IO.mapOptional("Symbols", Section.Symbols);
+}
+
void MappingTraits<ELFYAML::SectionOrType>::mapping(
IO &IO, ELFYAML::SectionOrType &sectionOrType) {
IO.mapRequired("SectionOrType", sectionOrType.sectionNameOrType);
@@ -1029,6 +1138,11 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
Section.reset(new ELFYAML::NoBitsSection());
sectionMapping(IO, *cast<ELFYAML::NoBitsSection>(Section.get()));
break;
+ case ELF::SHT_HASH:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::HashSection());
+ sectionMapping(IO, *cast<ELFYAML::HashSection>(Section.get()));
+ break;
case ELF::SHT_MIPS_ABIFLAGS:
if (!IO.outputting())
Section.reset(new ELFYAML::MipsABIFlags());
@@ -1049,21 +1163,113 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
Section.reset(new ELFYAML::VerneedSection());
sectionMapping(IO, *cast<ELFYAML::VerneedSection>(Section.get()));
break;
- default:
+ case ELF::SHT_SYMTAB_SHNDX:
if (!IO.outputting())
- Section.reset(new ELFYAML::RawContentSection());
- sectionMapping(IO, *cast<ELFYAML::RawContentSection>(Section.get()));
+ Section.reset(new ELFYAML::SymtabShndxSection());
+ sectionMapping(IO, *cast<ELFYAML::SymtabShndxSection>(Section.get()));
+ break;
+ case ELF::SHT_LLVM_ADDRSIG:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::AddrsigSection());
+ sectionMapping(IO, *cast<ELFYAML::AddrsigSection>(Section.get()));
+ break;
+ default:
+ if (!IO.outputting()) {
+ StringRef Name;
+ IO.mapOptional("Name", Name, StringRef());
+ Name = ELFYAML::dropUniqueSuffix(Name);
+
+ if (ELFYAML::StackSizesSection::nameMatches(Name))
+ Section = std::make_unique<ELFYAML::StackSizesSection>();
+ else
+ Section = std::make_unique<ELFYAML::RawContentSection>();
+ }
+
+ if (auto S = dyn_cast<ELFYAML::RawContentSection>(Section.get()))
+ sectionMapping(IO, *S);
+ else
+ sectionMapping(IO, *cast<ELFYAML::StackSizesSection>(Section.get()));
}
}
StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
IO &io, std::unique_ptr<ELFYAML::Section> &Section) {
- const auto *RawSection = dyn_cast<ELFYAML::RawContentSection>(Section.get());
- if (!RawSection)
+ if (const auto *RawSection =
+ dyn_cast<ELFYAML::RawContentSection>(Section.get())) {
+ if (RawSection->Size && RawSection->Content &&
+ (uint64_t)(*RawSection->Size) < RawSection->Content->binary_size())
+ return "Section size must be greater than or equal to the content size";
return {};
- if (RawSection->Size && RawSection->Content &&
- (uint64_t)(*RawSection->Size) < RawSection->Content->binary_size())
- return "Section size must be greater than or equal to the content size";
+ }
+
+ if (const auto *SS = dyn_cast<ELFYAML::StackSizesSection>(Section.get())) {
+ if (!SS->Entries && !SS->Content && !SS->Size)
+ return ".stack_sizes: one of Content, Entries and Size must be specified";
+
+ if (SS->Size && SS->Content &&
+ (uint64_t)(*SS->Size) < SS->Content->binary_size())
+ return ".stack_sizes: Size must be greater than or equal to the content "
+ "size";
+
+ // We accept Content, Size or both together when there are no Entries.
+ if (!SS->Entries)
+ return {};
+
+ if (SS->Size)
+ return ".stack_sizes: Size and Entries cannot be used together";
+ if (SS->Content)
+ return ".stack_sizes: Content and Entries cannot be used together";
+ return {};
+ }
+
+ if (const auto *HS = dyn_cast<ELFYAML::HashSection>(Section.get())) {
+ if (!HS->Content && !HS->Bucket && !HS->Chain && !HS->Size)
+ return "one of \"Content\", \"Size\", \"Bucket\" or \"Chain\" must be "
+ "specified";
+
+ if (HS->Content || HS->Size) {
+ if (HS->Size && HS->Content &&
+ (uint64_t)*HS->Size < HS->Content->binary_size())
+ return "\"Size\" must be greater than or equal to the content "
+ "size";
+
+ if (HS->Bucket)
+ return "\"Bucket\" cannot be used with \"Content\" or \"Size\"";
+ if (HS->Chain)
+ return "\"Chain\" cannot be used with \"Content\" or \"Size\"";
+ return {};
+ }
+
+ if ((HS->Bucket && !HS->Chain) || (!HS->Bucket && HS->Chain))
+ return "\"Bucket\" and \"Chain\" must be used together";
+ return {};
+ }
+
+ if (const auto *Sec = dyn_cast<ELFYAML::AddrsigSection>(Section.get())) {
+ if (!Sec->Symbols && !Sec->Content && !Sec->Size)
+ return "one of \"Content\", \"Size\" or \"Symbols\" must be specified";
+
+ if (Sec->Content || Sec->Size) {
+ if (Sec->Size && Sec->Content &&
+ (uint64_t)*Sec->Size < Sec->Content->binary_size())
+ return "\"Size\" must be greater than or equal to the content "
+ "size";
+
+ if (Sec->Symbols)
+ return "\"Symbols\" cannot be used with \"Content\" or \"Size\"";
+ return {};
+ }
+
+ if (!Sec->Symbols)
+ return {};
+
+ for (const ELFYAML::AddrsigSymbol &AS : *Sec->Symbols)
+ if (AS.Index && AS.Name)
+ return "\"Index\" and \"Name\" cannot be used together when defining a "
+ "symbol";
+ return {};
+ }
+
return {};
}
@@ -1092,6 +1298,13 @@ struct NormalizedMips64RelType {
} // end anonymous namespace
+void MappingTraits<ELFYAML::StackSizeEntry>::mapping(
+ IO &IO, ELFYAML::StackSizeEntry &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapOptional("Address", E.Address, Hex64(0));
+ IO.mapRequired("Size", E.Size);
+}
+
void MappingTraits<ELFYAML::DynamicEntry>::mapping(IO &IO,
ELFYAML::DynamicEntry &Rel) {
assert(IO.getContext() && "The IO context is not initialized");
@@ -1164,6 +1377,12 @@ void MappingTraits<ELFYAML::Object>::mapping(IO &IO, ELFYAML::Object &Object) {
IO.setContext(nullptr);
}
+void MappingTraits<ELFYAML::AddrsigSymbol>::mapping(IO &IO, ELFYAML::AddrsigSymbol &Sym) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapOptional("Name", Sym.Name);
+ IO.mapOptional("Index", Sym.Index);
+}
+
LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_AFL_REG)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_ABI_FP)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_EXT)
diff --git a/lib/ObjectYAML/MachOEmitter.cpp b/lib/ObjectYAML/MachOEmitter.cpp
new file mode 100644
index 000000000000..b56f811ce67d
--- /dev/null
+++ b/lib/ObjectYAML/MachOEmitter.cpp
@@ -0,0 +1,580 @@
+//===- yaml2macho - Convert YAML to a Mach object file --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// The Mach component of yaml2obj.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/ObjectYAML/DWARFEmitter.h"
+#include "llvm/ObjectYAML/ObjectYAML.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+namespace {
+
+class MachOWriter {
+public:
+ MachOWriter(MachOYAML::Object &Obj) : Obj(Obj), is64Bit(true), fileStart(0) {
+ is64Bit = Obj.Header.magic == MachO::MH_MAGIC_64 ||
+ Obj.Header.magic == MachO::MH_CIGAM_64;
+ memset(reinterpret_cast<void *>(&Header), 0, sizeof(MachO::mach_header_64));
+ }
+
+ void writeMachO(raw_ostream &OS);
+
+private:
+ void writeHeader(raw_ostream &OS);
+ void writeLoadCommands(raw_ostream &OS);
+ void writeSectionData(raw_ostream &OS);
+ void writeLinkEditData(raw_ostream &OS);
+
+ void writeBindOpcodes(raw_ostream &OS,
+ std::vector<MachOYAML::BindOpcode> &BindOpcodes);
+ // LinkEdit writers
+ void writeRebaseOpcodes(raw_ostream &OS);
+ void writeBasicBindOpcodes(raw_ostream &OS);
+ void writeWeakBindOpcodes(raw_ostream &OS);
+ void writeLazyBindOpcodes(raw_ostream &OS);
+ void writeNameList(raw_ostream &OS);
+ void writeStringTable(raw_ostream &OS);
+ void writeExportTrie(raw_ostream &OS);
+
+ void dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry);
+ void ZeroToOffset(raw_ostream &OS, size_t offset);
+
+ MachOYAML::Object &Obj;
+ bool is64Bit;
+ uint64_t fileStart;
+
+ MachO::mach_header_64 Header;
+};
+
+void MachOWriter::writeMachO(raw_ostream &OS) {
+ fileStart = OS.tell();
+ writeHeader(OS);
+ writeLoadCommands(OS);
+ writeSectionData(OS);
+}
+
+void MachOWriter::writeHeader(raw_ostream &OS) {
+ Header.magic = Obj.Header.magic;
+ Header.cputype = Obj.Header.cputype;
+ Header.cpusubtype = Obj.Header.cpusubtype;
+ Header.filetype = Obj.Header.filetype;
+ Header.ncmds = Obj.Header.ncmds;
+ Header.sizeofcmds = Obj.Header.sizeofcmds;
+ Header.flags = Obj.Header.flags;
+ Header.reserved = Obj.Header.reserved;
+
+ if (Obj.IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(Header);
+
+ auto header_size =
+ is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+ OS.write((const char *)&Header, header_size);
+}
+
+template <typename SectionType>
+SectionType constructSection(MachOYAML::Section Sec) {
+ SectionType TempSec;
+ memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
+ memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
+ TempSec.addr = Sec.addr;
+ TempSec.size = Sec.size;
+ TempSec.offset = Sec.offset;
+ TempSec.align = Sec.align;
+ TempSec.reloff = Sec.reloff;
+ TempSec.nreloc = Sec.nreloc;
+ TempSec.flags = Sec.flags;
+ TempSec.reserved1 = Sec.reserved1;
+ TempSec.reserved2 = Sec.reserved2;
+ return TempSec;
+}
+
+template <typename StructType>
+size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, raw_ostream &OS,
+ bool IsLittleEndian) {
+ return 0;
+}
+
+template <>
+size_t writeLoadCommandData<MachO::segment_command>(MachOYAML::LoadCommand &LC,
+ raw_ostream &OS,
+ bool IsLittleEndian) {
+ size_t BytesWritten = 0;
+ for (const auto &Sec : LC.Sections) {
+ auto TempSec = constructSection<MachO::section>(Sec);
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(TempSec);
+ OS.write(reinterpret_cast<const char *>(&(TempSec)),
+ sizeof(MachO::section));
+ BytesWritten += sizeof(MachO::section);
+ }
+ return BytesWritten;
+}
+
+template <>
+size_t writeLoadCommandData<MachO::segment_command_64>(
+ MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
+ size_t BytesWritten = 0;
+ for (const auto &Sec : LC.Sections) {
+ auto TempSec = constructSection<MachO::section_64>(Sec);
+ TempSec.reserved3 = Sec.reserved3;
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(TempSec);
+ OS.write(reinterpret_cast<const char *>(&(TempSec)),
+ sizeof(MachO::section_64));
+ BytesWritten += sizeof(MachO::section_64);
+ }
+ return BytesWritten;
+}
+
+size_t writePayloadString(MachOYAML::LoadCommand &LC, raw_ostream &OS) {
+ size_t BytesWritten = 0;
+ if (!LC.PayloadString.empty()) {
+ OS.write(LC.PayloadString.c_str(), LC.PayloadString.length());
+ BytesWritten = LC.PayloadString.length();
+ }
+ return BytesWritten;
+}
+
+template <>
+size_t writeLoadCommandData<MachO::dylib_command>(MachOYAML::LoadCommand &LC,
+ raw_ostream &OS,
+ bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
+size_t writeLoadCommandData<MachO::dylinker_command>(MachOYAML::LoadCommand &LC,
+ raw_ostream &OS,
+ bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
+size_t writeLoadCommandData<MachO::rpath_command>(MachOYAML::LoadCommand &LC,
+ raw_ostream &OS,
+ bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
+size_t writeLoadCommandData<MachO::build_version_command>(
+ MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
+ size_t BytesWritten = 0;
+ for (const auto &T : LC.Tools) {
+ struct MachO::build_tool_version tool = T;
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(tool);
+ OS.write(reinterpret_cast<const char *>(&tool),
+ sizeof(MachO::build_tool_version));
+ BytesWritten += sizeof(MachO::build_tool_version);
+ }
+ return BytesWritten;
+}
+
+void ZeroFillBytes(raw_ostream &OS, size_t Size) {
+ std::vector<uint8_t> FillData;
+ FillData.insert(FillData.begin(), Size, 0);
+ OS.write(reinterpret_cast<char *>(FillData.data()), Size);
+}
+
+void Fill(raw_ostream &OS, size_t Size, uint32_t Data) {
+ std::vector<uint32_t> FillData;
+ FillData.insert(FillData.begin(), (Size / 4) + 1, Data);
+ OS.write(reinterpret_cast<char *>(FillData.data()), Size);
+}
+
+void MachOWriter::ZeroToOffset(raw_ostream &OS, size_t Offset) {
+ auto currOffset = OS.tell() - fileStart;
+ if (currOffset < Offset)
+ ZeroFillBytes(OS, Offset - currOffset);
+}
+
+void MachOWriter::writeLoadCommands(raw_ostream &OS) {
+ for (auto &LC : Obj.LoadCommands) {
+ size_t BytesWritten = 0;
+ llvm::MachO::macho_load_command Data = LC.Data;
+
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
+ case MachO::LCName: \
+ if (Obj.IsLittleEndian != sys::IsLittleEndianHost) \
+ MachO::swapStruct(Data.LCStruct##_data); \
+ OS.write(reinterpret_cast<const char *>(&(Data.LCStruct##_data)), \
+ sizeof(MachO::LCStruct)); \
+ BytesWritten = sizeof(MachO::LCStruct); \
+ BytesWritten += \
+ writeLoadCommandData<MachO::LCStruct>(LC, OS, Obj.IsLittleEndian); \
+ break;
+
+ switch (LC.Data.load_command_data.cmd) {
+ default:
+ if (Obj.IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(Data.load_command_data);
+ OS.write(reinterpret_cast<const char *>(&(Data.load_command_data)),
+ sizeof(MachO::load_command));
+ BytesWritten = sizeof(MachO::load_command);
+ BytesWritten +=
+ writeLoadCommandData<MachO::load_command>(LC, OS, Obj.IsLittleEndian);
+ break;
+#include "llvm/BinaryFormat/MachO.def"
+ }
+
+ if (LC.PayloadBytes.size() > 0) {
+ OS.write(reinterpret_cast<const char *>(LC.PayloadBytes.data()),
+ LC.PayloadBytes.size());
+ BytesWritten += LC.PayloadBytes.size();
+ }
+
+ if (LC.ZeroPadBytes > 0) {
+ ZeroFillBytes(OS, LC.ZeroPadBytes);
+ BytesWritten += LC.ZeroPadBytes;
+ }
+
+ // Fill remaining bytes with 0. This will only get hit in partially
+ // specified test cases.
+ auto BytesRemaining = LC.Data.load_command_data.cmdsize - BytesWritten;
+ if (BytesRemaining > 0) {
+ ZeroFillBytes(OS, BytesRemaining);
+ }
+ }
+}
+
+void MachOWriter::writeSectionData(raw_ostream &OS) {
+ bool FoundLinkEditSeg = false;
+ for (auto &LC : Obj.LoadCommands) {
+ switch (LC.Data.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ case MachO::LC_SEGMENT_64:
+ uint64_t segOff = is64Bit ? LC.Data.segment_command_64_data.fileoff
+ : LC.Data.segment_command_data.fileoff;
+ if (0 ==
+ strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) {
+ FoundLinkEditSeg = true;
+ writeLinkEditData(OS);
+ }
+ for (auto &Sec : LC.Sections) {
+ ZeroToOffset(OS, Sec.offset);
+ // Zero Fill any data between the end of the last thing we wrote and the
+ // start of this section.
+ assert((OS.tell() - fileStart <= Sec.offset ||
+ Sec.offset == (uint32_t)0) &&
+ "Wrote too much data somewhere, section offsets don't line up.");
+ if (0 == strncmp(&Sec.segname[0], "__DWARF", 16)) {
+ if (0 == strncmp(&Sec.sectname[0], "__debug_str", 16)) {
+ DWARFYAML::EmitDebugStr(OS, Obj.DWARF);
+ } else if (0 == strncmp(&Sec.sectname[0], "__debug_abbrev", 16)) {
+ DWARFYAML::EmitDebugAbbrev(OS, Obj.DWARF);
+ } else if (0 == strncmp(&Sec.sectname[0], "__debug_aranges", 16)) {
+ DWARFYAML::EmitDebugAranges(OS, Obj.DWARF);
+ } else if (0 == strncmp(&Sec.sectname[0], "__debug_pubnames", 16)) {
+ DWARFYAML::EmitPubSection(OS, Obj.DWARF.PubNames,
+ Obj.IsLittleEndian);
+ } else if (0 == strncmp(&Sec.sectname[0], "__debug_pubtypes", 16)) {
+ DWARFYAML::EmitPubSection(OS, Obj.DWARF.PubTypes,
+ Obj.IsLittleEndian);
+ } else if (0 == strncmp(&Sec.sectname[0], "__debug_info", 16)) {
+ DWARFYAML::EmitDebugInfo(OS, Obj.DWARF);
+ } else if (0 == strncmp(&Sec.sectname[0], "__debug_line", 16)) {
+ DWARFYAML::EmitDebugLine(OS, Obj.DWARF);
+ }
+
+ continue;
+ }
+
+ // Skip if it's a virtual section.
+ if (MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE))
+ continue;
+
+ if (Sec.content) {
+ yaml::BinaryRef Content = *Sec.content;
+ Content.writeAsBinary(OS);
+ ZeroFillBytes(OS, Sec.size - Content.binary_size());
+ } else {
+ // Fill section data with 0xDEADBEEF.
+ Fill(OS, Sec.size, 0xDEADBEEFu);
+ }
+ }
+ uint64_t segSize = is64Bit ? LC.Data.segment_command_64_data.filesize
+ : LC.Data.segment_command_data.filesize;
+ ZeroToOffset(OS, segOff + segSize);
+ break;
+ }
+ }
+ // Old PPC Object Files didn't have __LINKEDIT segments, the data was just
+ // stuck at the end of the file.
+ if (!FoundLinkEditSeg)
+ writeLinkEditData(OS);
+}
+
+void MachOWriter::writeBindOpcodes(
+ raw_ostream &OS, std::vector<MachOYAML::BindOpcode> &BindOpcodes) {
+
+ for (auto Opcode : BindOpcodes) {
+ uint8_t OpByte = Opcode.Opcode | Opcode.Imm;
+ OS.write(reinterpret_cast<char *>(&OpByte), 1);
+ for (auto Data : Opcode.ULEBExtraData) {
+ encodeULEB128(Data, OS);
+ }
+ for (auto Data : Opcode.SLEBExtraData) {
+ encodeSLEB128(Data, OS);
+ }
+ if (!Opcode.Symbol.empty()) {
+ OS.write(Opcode.Symbol.data(), Opcode.Symbol.size());
+ OS.write('\0');
+ }
+ }
+}
+
+void MachOWriter::dumpExportEntry(raw_ostream &OS,
+ MachOYAML::ExportEntry &Entry) {
+ encodeSLEB128(Entry.TerminalSize, OS);
+ if (Entry.TerminalSize > 0) {
+ encodeSLEB128(Entry.Flags, OS);
+ if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
+ encodeSLEB128(Entry.Other, OS);
+ OS << Entry.ImportName;
+ OS.write('\0');
+ } else {
+ encodeSLEB128(Entry.Address, OS);
+ if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER)
+ encodeSLEB128(Entry.Other, OS);
+ }
+ }
+ OS.write(static_cast<uint8_t>(Entry.Children.size()));
+ for (auto EE : Entry.Children) {
+ OS << EE.Name;
+ OS.write('\0');
+ encodeSLEB128(EE.NodeOffset, OS);
+ }
+ for (auto EE : Entry.Children)
+ dumpExportEntry(OS, EE);
+}
+
+void MachOWriter::writeExportTrie(raw_ostream &OS) {
+ dumpExportEntry(OS, Obj.LinkEdit.ExportTrie);
+}
+
+template <typename NListType>
+void writeNListEntry(MachOYAML::NListEntry &NLE, raw_ostream &OS,
+ bool IsLittleEndian) {
+ NListType ListEntry;
+ ListEntry.n_strx = NLE.n_strx;
+ ListEntry.n_type = NLE.n_type;
+ ListEntry.n_sect = NLE.n_sect;
+ ListEntry.n_desc = NLE.n_desc;
+ ListEntry.n_value = NLE.n_value;
+
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(ListEntry);
+ OS.write(reinterpret_cast<const char *>(&ListEntry), sizeof(NListType));
+}
+
+void MachOWriter::writeLinkEditData(raw_ostream &OS) {
+ typedef void (MachOWriter::*writeHandler)(raw_ostream &);
+ typedef std::pair<uint64_t, writeHandler> writeOperation;
+ std::vector<writeOperation> WriteQueue;
+
+ MachO::dyld_info_command *DyldInfoOnlyCmd = 0;
+ MachO::symtab_command *SymtabCmd = 0;
+ for (auto &LC : Obj.LoadCommands) {
+ switch (LC.Data.load_command_data.cmd) {
+ case MachO::LC_SYMTAB:
+ SymtabCmd = &LC.Data.symtab_command_data;
+ WriteQueue.push_back(
+ std::make_pair(SymtabCmd->symoff, &MachOWriter::writeNameList));
+ WriteQueue.push_back(
+ std::make_pair(SymtabCmd->stroff, &MachOWriter::writeStringTable));
+ break;
+ case MachO::LC_DYLD_INFO_ONLY:
+ DyldInfoOnlyCmd = &LC.Data.dyld_info_command_data;
+ WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->rebase_off,
+ &MachOWriter::writeRebaseOpcodes));
+ WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->bind_off,
+ &MachOWriter::writeBasicBindOpcodes));
+ WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->weak_bind_off,
+ &MachOWriter::writeWeakBindOpcodes));
+ WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->lazy_bind_off,
+ &MachOWriter::writeLazyBindOpcodes));
+ WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->export_off,
+ &MachOWriter::writeExportTrie));
+ break;
+ }
+ }
+
+ llvm::sort(WriteQueue, [](const writeOperation &a, const writeOperation &b) {
+ return a.first < b.first;
+ });
+
+ for (auto writeOp : WriteQueue) {
+ ZeroToOffset(OS, writeOp.first);
+ (this->*writeOp.second)(OS);
+ }
+}
+
+void MachOWriter::writeRebaseOpcodes(raw_ostream &OS) {
+ MachOYAML::LinkEditData &LinkEdit = Obj.LinkEdit;
+
+ for (auto Opcode : LinkEdit.RebaseOpcodes) {
+ uint8_t OpByte = Opcode.Opcode | Opcode.Imm;
+ OS.write(reinterpret_cast<char *>(&OpByte), 1);
+ for (auto Data : Opcode.ExtraData)
+ encodeULEB128(Data, OS);
+ }
+}
+
+void MachOWriter::writeBasicBindOpcodes(raw_ostream &OS) {
+ writeBindOpcodes(OS, Obj.LinkEdit.BindOpcodes);
+}
+
+void MachOWriter::writeWeakBindOpcodes(raw_ostream &OS) {
+ writeBindOpcodes(OS, Obj.LinkEdit.WeakBindOpcodes);
+}
+
+void MachOWriter::writeLazyBindOpcodes(raw_ostream &OS) {
+ writeBindOpcodes(OS, Obj.LinkEdit.LazyBindOpcodes);
+}
+
+void MachOWriter::writeNameList(raw_ostream &OS) {
+ for (auto NLE : Obj.LinkEdit.NameList) {
+ if (is64Bit)
+ writeNListEntry<MachO::nlist_64>(NLE, OS, Obj.IsLittleEndian);
+ else
+ writeNListEntry<MachO::nlist>(NLE, OS, Obj.IsLittleEndian);
+ }
+}
+
+void MachOWriter::writeStringTable(raw_ostream &OS) {
+ for (auto Str : Obj.LinkEdit.StringTable) {
+ OS.write(Str.data(), Str.size());
+ OS.write('\0');
+ }
+}
+
+class UniversalWriter {
+public:
+ UniversalWriter(yaml::YamlObjectFile &ObjectFile)
+ : ObjectFile(ObjectFile), fileStart(0) {}
+
+ void writeMachO(raw_ostream &OS);
+
+private:
+ void writeFatHeader(raw_ostream &OS);
+ void writeFatArchs(raw_ostream &OS);
+
+ void ZeroToOffset(raw_ostream &OS, size_t offset);
+
+ yaml::YamlObjectFile &ObjectFile;
+ uint64_t fileStart;
+};
+
+void UniversalWriter::writeMachO(raw_ostream &OS) {
+ fileStart = OS.tell();
+ if (ObjectFile.MachO) {
+ MachOWriter Writer(*ObjectFile.MachO);
+ Writer.writeMachO(OS);
+ return;
+ }
+
+ writeFatHeader(OS);
+ writeFatArchs(OS);
+
+ auto &FatFile = *ObjectFile.FatMachO;
+ assert(FatFile.FatArchs.size() == FatFile.Slices.size());
+ for (size_t i = 0; i < FatFile.Slices.size(); i++) {
+ ZeroToOffset(OS, FatFile.FatArchs[i].offset);
+ MachOWriter Writer(FatFile.Slices[i]);
+ Writer.writeMachO(OS);
+
+ auto SliceEnd = FatFile.FatArchs[i].offset + FatFile.FatArchs[i].size;
+ ZeroToOffset(OS, SliceEnd);
+ }
+}
+
+void UniversalWriter::writeFatHeader(raw_ostream &OS) {
+ auto &FatFile = *ObjectFile.FatMachO;
+ MachO::fat_header header;
+ header.magic = FatFile.Header.magic;
+ header.nfat_arch = FatFile.Header.nfat_arch;
+ if (sys::IsLittleEndianHost)
+ swapStruct(header);
+ OS.write(reinterpret_cast<const char *>(&header), sizeof(MachO::fat_header));
+}
+
+template <typename FatArchType>
+FatArchType constructFatArch(MachOYAML::FatArch &Arch) {
+ FatArchType FatArch;
+ FatArch.cputype = Arch.cputype;
+ FatArch.cpusubtype = Arch.cpusubtype;
+ FatArch.offset = Arch.offset;
+ FatArch.size = Arch.size;
+ FatArch.align = Arch.align;
+ return FatArch;
+}
+
+template <typename StructType>
+void writeFatArch(MachOYAML::FatArch &LC, raw_ostream &OS) {}
+
+template <>
+void writeFatArch<MachO::fat_arch>(MachOYAML::FatArch &Arch, raw_ostream &OS) {
+ auto FatArch = constructFatArch<MachO::fat_arch>(Arch);
+ if (sys::IsLittleEndianHost)
+ swapStruct(FatArch);
+ OS.write(reinterpret_cast<const char *>(&FatArch), sizeof(MachO::fat_arch));
+}
+
+template <>
+void writeFatArch<MachO::fat_arch_64>(MachOYAML::FatArch &Arch,
+ raw_ostream &OS) {
+ auto FatArch = constructFatArch<MachO::fat_arch_64>(Arch);
+ FatArch.reserved = Arch.reserved;
+ if (sys::IsLittleEndianHost)
+ swapStruct(FatArch);
+ OS.write(reinterpret_cast<const char *>(&FatArch),
+ sizeof(MachO::fat_arch_64));
+}
+
+void UniversalWriter::writeFatArchs(raw_ostream &OS) {
+ auto &FatFile = *ObjectFile.FatMachO;
+ bool is64Bit = FatFile.Header.magic == MachO::FAT_MAGIC_64;
+ for (auto Arch : FatFile.FatArchs) {
+ if (is64Bit)
+ writeFatArch<MachO::fat_arch_64>(Arch, OS);
+ else
+ writeFatArch<MachO::fat_arch>(Arch, OS);
+ }
+}
+
+void UniversalWriter::ZeroToOffset(raw_ostream &OS, size_t Offset) {
+ auto currOffset = OS.tell() - fileStart;
+ if (currOffset < Offset)
+ ZeroFillBytes(OS, Offset - currOffset);
+}
+
+} // end anonymous namespace
+
+namespace llvm {
+namespace yaml {
+
+bool yaml2macho(YamlObjectFile &Doc, raw_ostream &Out, ErrorHandler /*EH*/) {
+ UniversalWriter Writer(Doc);
+ Writer.writeMachO(Out);
+ return true;
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp
index d12f12cf4435..0f7cd1e1495c 100644
--- a/lib/ObjectYAML/MachOYAML.cpp
+++ b/lib/ObjectYAML/MachOYAML.cpp
@@ -287,6 +287,15 @@ void MappingTraits<MachOYAML::Section>::mapping(IO &IO,
IO.mapRequired("reserved1", Section.reserved1);
IO.mapRequired("reserved2", Section.reserved2);
IO.mapOptional("reserved3", Section.reserved3);
+ IO.mapOptional("content", Section.content);
+}
+
+StringRef
+MappingTraits<MachOYAML::Section>::validate(IO &IO,
+ MachOYAML::Section &Section) {
+ if (Section.content && Section.size < Section.content->binary_size())
+ return "Section size must be greater than or equal to the content size";
+ return {};
}
void MappingTraits<MachO::build_tool_version>::mapping(
diff --git a/lib/ObjectYAML/MinidumpEmitter.cpp b/lib/ObjectYAML/MinidumpEmitter.cpp
new file mode 100644
index 000000000000..bbfd2cd8cbab
--- /dev/null
+++ b/lib/ObjectYAML/MinidumpEmitter.cpp
@@ -0,0 +1,247 @@
+//===- yaml2minidump.cpp - Convert a YAML file to a minidump file ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/MinidumpYAML.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::minidump;
+using namespace llvm::MinidumpYAML;
+
+namespace {
+/// A helper class to manage the placement of various structures into the final
+/// minidump binary. Space for objects can be allocated via various allocate***
+/// methods, while the final minidump file is written by calling the writeTo
+/// method. The plain versions of allocation functions take a reference to the
+/// data which is to be written (and hence the data must be available until
+/// writeTo is called), while the "New" versions allocate the data in an
+/// allocator-managed buffer, which is available until the allocator object is
+/// destroyed. For both kinds of functions, it is possible to modify the
+/// data for which the space has been "allocated" until the final writeTo call.
+/// This is useful for "linking" the allocated structures via their offsets.
+class BlobAllocator {
+public:
+ size_t tell() const { return NextOffset; }
+
+ size_t allocateCallback(size_t Size,
+ std::function<void(raw_ostream &)> Callback) {
+ size_t Offset = NextOffset;
+ NextOffset += Size;
+ Callbacks.push_back(std::move(Callback));
+ return Offset;
+ }
+
+ size_t allocateBytes(ArrayRef<uint8_t> Data) {
+ return allocateCallback(
+ Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); });
+ }
+
+ size_t allocateBytes(yaml::BinaryRef Data) {
+ return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) {
+ Data.writeAsBinary(OS);
+ });
+ }
+
+ template <typename T> size_t allocateArray(ArrayRef<T> Data) {
+ return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()),
+ sizeof(T) * Data.size()});
+ }
+
+ template <typename T, typename RangeType>
+ std::pair<size_t, MutableArrayRef<T>>
+ allocateNewArray(const iterator_range<RangeType> &Range);
+
+ template <typename T> size_t allocateObject(const T &Data) {
+ return allocateArray(makeArrayRef(Data));
+ }
+
+ template <typename T, typename... Types>
+ std::pair<size_t, T *> allocateNewObject(Types &&... Args) {
+ T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...);
+ return {allocateObject(*Object), Object};
+ }
+
+ size_t allocateString(StringRef Str);
+
+ void writeTo(raw_ostream &OS) const;
+
+private:
+ size_t NextOffset = 0;
+
+ BumpPtrAllocator Temporaries;
+ std::vector<std::function<void(raw_ostream &)>> Callbacks;
+};
+} // namespace
+
+template <typename T, typename RangeType>
+std::pair<size_t, MutableArrayRef<T>>
+BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) {
+ size_t Num = std::distance(Range.begin(), Range.end());
+ MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num);
+ std::uninitialized_copy(Range.begin(), Range.end(), Array.begin());
+ return {allocateArray(Array), Array};
+}
+
+size_t BlobAllocator::allocateString(StringRef Str) {
+ SmallVector<UTF16, 32> WStr;
+ bool OK = convertUTF8ToUTF16String(Str, WStr);
+ assert(OK && "Invalid UTF8 in Str?");
+ (void)OK;
+
+ // The utf16 string is null-terminated, but the terminator is not counted in
+ // the string size.
+ WStr.push_back(0);
+ size_t Result =
+ allocateNewObject<support::ulittle32_t>(2 * (WStr.size() - 1)).first;
+ allocateNewArray<support::ulittle16_t>(make_range(WStr.begin(), WStr.end()));
+ return Result;
+}
+
+void BlobAllocator::writeTo(raw_ostream &OS) const {
+ size_t BeginOffset = OS.tell();
+ for (const auto &Callback : Callbacks)
+ Callback(OS);
+ assert(OS.tell() == BeginOffset + NextOffset &&
+ "Callbacks wrote an unexpected number of bytes.");
+ (void)BeginOffset;
+}
+
+static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) {
+ return {support::ulittle32_t(Data.binary_size()),
+ support::ulittle32_t(File.allocateBytes(Data))};
+}
+
+static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) {
+ File.allocateObject(S.MDExceptionStream);
+
+ size_t DataEnd = File.tell();
+
+ // Lay out the thread context data, (which is not a part of the stream).
+ // TODO: This usually (always?) matches the thread context of the
+ // corresponding thread, and may overlap memory regions as well. We could
+ // add a level of indirection to the MinidumpYAML format (like an array of
+ // Blobs that the LocationDescriptors index into) to be able to distinguish
+ // the cases where location descriptions overlap vs happen to reference
+ // identical data.
+ S.MDExceptionStream.ThreadContext = layout(File, S.ThreadContext);
+
+ return DataEnd;
+}
+
+static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) {
+ Range.Entry.Memory = layout(File, Range.Content);
+}
+
+static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) {
+ M.Entry.ModuleNameRVA = File.allocateString(M.Name);
+
+ M.Entry.CvRecord = layout(File, M.CvRecord);
+ M.Entry.MiscRecord = layout(File, M.MiscRecord);
+}
+
+static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) {
+ T.Entry.Stack.Memory = layout(File, T.Stack);
+ T.Entry.Context = layout(File, T.Context);
+}
+
+template <typename EntryT>
+static size_t layout(BlobAllocator &File,
+ MinidumpYAML::detail::ListStream<EntryT> &S) {
+
+ File.allocateNewObject<support::ulittle32_t>(S.Entries.size());
+ for (auto &E : S.Entries)
+ File.allocateObject(E.Entry);
+
+ size_t DataEnd = File.tell();
+
+ // Lay out the auxiliary data, (which is not a part of the stream).
+ DataEnd = File.tell();
+ for (auto &E : S.Entries)
+ layout(File, E);
+
+ return DataEnd;
+}
+
+static Directory layout(BlobAllocator &File, Stream &S) {
+ Directory Result;
+ Result.Type = S.Type;
+ Result.Location.RVA = File.tell();
+ Optional<size_t> DataEnd;
+ switch (S.Kind) {
+ case Stream::StreamKind::Exception:
+ DataEnd = layout(File, cast<MinidumpYAML::ExceptionStream>(S));
+ break;
+ case Stream::StreamKind::MemoryInfoList: {
+ MemoryInfoListStream &InfoList = cast<MemoryInfoListStream>(S);
+ File.allocateNewObject<minidump::MemoryInfoListHeader>(
+ sizeof(minidump::MemoryInfoListHeader), sizeof(minidump::MemoryInfo),
+ InfoList.Infos.size());
+ File.allocateArray(makeArrayRef(InfoList.Infos));
+ break;
+ }
+ case Stream::StreamKind::MemoryList:
+ DataEnd = layout(File, cast<MemoryListStream>(S));
+ break;
+ case Stream::StreamKind::ModuleList:
+ DataEnd = layout(File, cast<ModuleListStream>(S));
+ break;
+ case Stream::StreamKind::RawContent: {
+ RawContentStream &Raw = cast<RawContentStream>(S);
+ File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) {
+ Raw.Content.writeAsBinary(OS);
+ assert(Raw.Content.binary_size() <= Raw.Size);
+ OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0');
+ });
+ break;
+ }
+ case Stream::StreamKind::SystemInfo: {
+ SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S);
+ File.allocateObject(SystemInfo.Info);
+ // The CSD string is not a part of the stream.
+ DataEnd = File.tell();
+ SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
+ break;
+ }
+ case Stream::StreamKind::TextContent:
+ File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text));
+ break;
+ case Stream::StreamKind::ThreadList:
+ DataEnd = layout(File, cast<ThreadListStream>(S));
+ break;
+ }
+ // If DataEnd is not set, we assume everything we generated is a part of the
+ // stream.
+ Result.Location.DataSize =
+ DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
+ return Result;
+}
+
+namespace llvm {
+namespace yaml {
+
+bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out,
+ ErrorHandler /*EH*/) {
+ BlobAllocator File;
+ File.allocateObject(Obj.Header);
+
+ std::vector<Directory> StreamDirectory(Obj.Streams.size());
+ Obj.Header.StreamDirectoryRVA =
+ File.allocateArray(makeArrayRef(StreamDirectory));
+ Obj.Header.NumberOfStreams = StreamDirectory.size();
+
+ for (auto &Stream : enumerate(Obj.Streams))
+ StreamDirectory[Stream.index()] = layout(File, *Stream.value());
+
+ File.writeTo(Out);
+ return true;
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/lib/ObjectYAML/MinidumpYAML.cpp b/lib/ObjectYAML/MinidumpYAML.cpp
index f5f2acd0cc4b..21b2a4d78629 100644
--- a/lib/ObjectYAML/MinidumpYAML.cpp
+++ b/lib/ObjectYAML/MinidumpYAML.cpp
@@ -8,110 +8,11 @@
#include "llvm/ObjectYAML/MinidumpYAML.h"
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/ConvertUTF.h"
using namespace llvm;
using namespace llvm::MinidumpYAML;
using namespace llvm::minidump;
-namespace {
-/// A helper class to manage the placement of various structures into the final
-/// minidump binary. Space for objects can be allocated via various allocate***
-/// methods, while the final minidump file is written by calling the writeTo
-/// method. The plain versions of allocation functions take a reference to the
-/// data which is to be written (and hence the data must be available until
-/// writeTo is called), while the "New" versions allocate the data in an
-/// allocator-managed buffer, which is available until the allocator object is
-/// destroyed. For both kinds of functions, it is possible to modify the
-/// data for which the space has been "allocated" until the final writeTo call.
-/// This is useful for "linking" the allocated structures via their offsets.
-class BlobAllocator {
-public:
- size_t tell() const { return NextOffset; }
-
- size_t allocateCallback(size_t Size,
- std::function<void(raw_ostream &)> Callback) {
- size_t Offset = NextOffset;
- NextOffset += Size;
- Callbacks.push_back(std::move(Callback));
- return Offset;
- }
-
- size_t allocateBytes(ArrayRef<uint8_t> Data) {
- return allocateCallback(
- Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); });
- }
-
- size_t allocateBytes(yaml::BinaryRef Data) {
- return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) {
- Data.writeAsBinary(OS);
- });
- }
-
- template <typename T> size_t allocateArray(ArrayRef<T> Data) {
- return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()),
- sizeof(T) * Data.size()});
- }
-
- template <typename T, typename RangeType>
- std::pair<size_t, MutableArrayRef<T>>
- allocateNewArray(const iterator_range<RangeType> &Range);
-
- template <typename T> size_t allocateObject(const T &Data) {
- return allocateArray(makeArrayRef(Data));
- }
-
- template <typename T, typename... Types>
- std::pair<size_t, T *> allocateNewObject(Types &&... Args) {
- T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...);
- return {allocateObject(*Object), Object};
- }
-
- size_t allocateString(StringRef Str);
-
- void writeTo(raw_ostream &OS) const;
-
-private:
- size_t NextOffset = 0;
-
- BumpPtrAllocator Temporaries;
- std::vector<std::function<void(raw_ostream &)>> Callbacks;
-};
-} // namespace
-
-template <typename T, typename RangeType>
-std::pair<size_t, MutableArrayRef<T>>
-BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) {
- size_t Num = std::distance(Range.begin(), Range.end());
- MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num);
- std::uninitialized_copy(Range.begin(), Range.end(), Array.begin());
- return {allocateArray(Array), Array};
-}
-
-size_t BlobAllocator::allocateString(StringRef Str) {
- SmallVector<UTF16, 32> WStr;
- bool OK = convertUTF8ToUTF16String(Str, WStr);
- assert(OK && "Invalid UTF8 in Str?");
- (void)OK;
-
- // The utf16 string is null-terminated, but the terminator is not counted in
- // the string size.
- WStr.push_back(0);
- size_t Result =
- allocateNewObject<support::ulittle32_t>(2 * (WStr.size() - 1)).first;
- allocateNewArray<support::ulittle16_t>(make_range(WStr.begin(), WStr.end()));
- return Result;
-}
-
-void BlobAllocator::writeTo(raw_ostream &OS) const {
- size_t BeginOffset = OS.tell();
- for (const auto &Callback : Callbacks)
- Callback(OS);
- assert(OS.tell() == BeginOffset + NextOffset &&
- "Callbacks wrote an unexpected number of bytes.");
- (void)BeginOffset;
-}
-
/// Perform an optional yaml-mapping of an endian-aware type EndianType. The
/// only purpose of this function is to avoid casting the Default value to the
/// endian type;
@@ -168,6 +69,10 @@ Stream::~Stream() = default;
Stream::StreamKind Stream::getKind(StreamType Type) {
switch (Type) {
+ case StreamType::Exception:
+ return StreamKind::Exception;
+ case StreamType::MemoryInfoList:
+ return StreamKind::MemoryInfoList;
case StreamType::MemoryList:
return StreamKind::MemoryList;
case StreamType::ModuleList:
@@ -192,22 +97,45 @@ Stream::StreamKind Stream::getKind(StreamType Type) {
std::unique_ptr<Stream> Stream::create(StreamType Type) {
StreamKind Kind = getKind(Type);
switch (Kind) {
+ case StreamKind::Exception:
+ return std::make_unique<ExceptionStream>();
+ case StreamKind::MemoryInfoList:
+ return std::make_unique<MemoryInfoListStream>();
case StreamKind::MemoryList:
- return llvm::make_unique<MemoryListStream>();
+ return std::make_unique<MemoryListStream>();
case StreamKind::ModuleList:
- return llvm::make_unique<ModuleListStream>();
+ return std::make_unique<ModuleListStream>();
case StreamKind::RawContent:
- return llvm::make_unique<RawContentStream>(Type);
+ return std::make_unique<RawContentStream>(Type);
case StreamKind::SystemInfo:
- return llvm::make_unique<SystemInfoStream>();
+ return std::make_unique<SystemInfoStream>();
case StreamKind::TextContent:
- return llvm::make_unique<TextContentStream>(Type);
+ return std::make_unique<TextContentStream>(Type);
case StreamKind::ThreadList:
- return llvm::make_unique<ThreadListStream>();
+ return std::make_unique<ThreadListStream>();
}
llvm_unreachable("Unhandled stream kind!");
}
+void yaml::ScalarBitSetTraits<MemoryProtection>::bitset(
+ IO &IO, MemoryProtection &Protect) {
+#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) \
+ IO.bitSetCase(Protect, #NATIVENAME, MemoryProtection::NAME);
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+}
+
+void yaml::ScalarBitSetTraits<MemoryState>::bitset(IO &IO, MemoryState &State) {
+#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) \
+ IO.bitSetCase(State, #NATIVENAME, MemoryState::NAME);
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+}
+
+void yaml::ScalarBitSetTraits<MemoryType>::bitset(IO &IO, MemoryType &Type) {
+#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) \
+ IO.bitSetCase(Type, #NATIVENAME, MemoryType::NAME);
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+}
+
void yaml::ScalarEnumerationTraits<ProcessorArchitecture>::enumeration(
IO &IO, ProcessorArchitecture &Arch) {
#define HANDLE_MDMP_ARCH(CODE, NAME) \
@@ -314,6 +242,20 @@ void yaml::MappingTraits<CPUInfo::X86Info>::mapping(IO &IO,
mapOptionalHex(IO, "AMD Extended Features", Info.AMDExtendedFeatures, 0);
}
+void yaml::MappingTraits<MemoryInfo>::mapping(IO &IO, MemoryInfo &Info) {
+ mapRequiredHex(IO, "Base Address", Info.BaseAddress);
+ mapOptionalHex(IO, "Allocation Base", Info.AllocationBase, Info.BaseAddress);
+ mapRequiredAs<MemoryProtection>(IO, "Allocation Protect",
+ Info.AllocationProtect);
+ mapOptionalHex(IO, "Reserved0", Info.Reserved0, 0);
+ mapRequiredHex(IO, "Region Size", Info.RegionSize);
+ mapRequiredAs<MemoryState>(IO, "State", Info.State);
+ mapOptionalAs<MemoryProtection>(IO, "Protect", Info.Protect,
+ Info.AllocationProtect);
+ mapRequiredAs<MemoryType>(IO, "Type", Info.Type);
+ mapOptionalHex(IO, "Reserved1", Info.Reserved1, 0);
+}
+
void yaml::MappingTraits<VSFixedFileInfo>::mapping(IO &IO,
VSFixedFileInfo &Info) {
mapOptionalHex(IO, "Signature", Info.Signature, 0);
@@ -336,8 +278,7 @@ void yaml::MappingTraits<ModuleListStream::entry_type>::mapping(
mapRequiredHex(IO, "Base of Image", M.Entry.BaseOfImage);
mapRequiredHex(IO, "Size of Image", M.Entry.SizeOfImage);
mapOptionalHex(IO, "Checksum", M.Entry.Checksum, 0);
- IO.mapOptional("Time Date Stamp", M.Entry.TimeDateStamp,
- support::ulittle32_t(0));
+ mapOptional(IO, "Time Date Stamp", M.Entry.TimeDateStamp, 0);
IO.mapRequired("Module Name", M.Name);
IO.mapOptional("Version Info", M.Entry.VersionInfo, VSFixedFileInfo());
IO.mapRequired("CodeView Record", M.CvRecord);
@@ -363,6 +304,10 @@ void yaml::MappingTraits<MemoryListStream::entry_type>::mapping(
IO, Range.Entry, Range.Content);
}
+static void streamMapping(yaml::IO &IO, MemoryInfoListStream &Stream) {
+ IO.mapRequired("Memory Ranges", Stream.Infos);
+}
+
static void streamMapping(yaml::IO &IO, MemoryListStream &Stream) {
IO.mapRequired("Memory Ranges", Stream.Entries);
}
@@ -425,6 +370,32 @@ static void streamMapping(yaml::IO &IO, ThreadListStream &Stream) {
IO.mapRequired("Threads", Stream.Entries);
}
+static void streamMapping(yaml::IO &IO, MinidumpYAML::ExceptionStream &Stream) {
+ mapRequiredHex(IO, "Thread ID", Stream.MDExceptionStream.ThreadId);
+ IO.mapRequired("Exception Record", Stream.MDExceptionStream.ExceptionRecord);
+ IO.mapRequired("Thread Context", Stream.ThreadContext);
+}
+
+void yaml::MappingTraits<minidump::Exception>::mapping(
+ yaml::IO &IO, minidump::Exception &Exception) {
+ mapRequiredHex(IO, "Exception Code", Exception.ExceptionCode);
+ mapOptionalHex(IO, "Exception Flags", Exception.ExceptionFlags, 0);
+ mapOptionalHex(IO, "Exception Record", Exception.ExceptionRecord, 0);
+ mapOptionalHex(IO, "Exception Address", Exception.ExceptionAddress, 0);
+ mapOptional(IO, "Number of Parameters", Exception.NumberParameters, 0);
+
+ for (size_t Index = 0; Index < Exception.MaxParameters; ++Index) {
+ SmallString<16> Name("Parameter ");
+ Twine(Index).toVector(Name);
+ support::ulittle64_t &Field = Exception.ExceptionInformation[Index];
+
+ if (Index < Exception.NumberParameters)
+ mapRequiredHex(IO, Name.c_str(), Field);
+ else
+ mapOptionalHex(IO, Name.c_str(), Field, 0);
+ }
+}
+
void yaml::MappingTraits<std::unique_ptr<Stream>>::mapping(
yaml::IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S) {
StreamType Type;
@@ -435,6 +406,12 @@ void yaml::MappingTraits<std::unique_ptr<Stream>>::mapping(
if (!IO.outputting())
S = MinidumpYAML::Stream::create(Type);
switch (S->Kind) {
+ case MinidumpYAML::Stream::StreamKind::Exception:
+ streamMapping(IO, llvm::cast<MinidumpYAML::ExceptionStream>(*S));
+ break;
+ case MinidumpYAML::Stream::StreamKind::MemoryInfoList:
+ streamMapping(IO, llvm::cast<MemoryInfoListStream>(*S));
+ break;
case MinidumpYAML::Stream::StreamKind::MemoryList:
streamMapping(IO, llvm::cast<MemoryListStream>(*S));
break;
@@ -461,6 +438,8 @@ StringRef yaml::MappingTraits<std::unique_ptr<Stream>>::validate(
switch (S->Kind) {
case MinidumpYAML::Stream::StreamKind::RawContent:
return streamValidate(cast<RawContentStream>(*S));
+ case MinidumpYAML::Stream::StreamKind::Exception:
+ case MinidumpYAML::Stream::StreamKind::MemoryInfoList:
case MinidumpYAML::Stream::StreamKind::MemoryList:
case MinidumpYAML::Stream::StreamKind::ModuleList:
case MinidumpYAML::Stream::StreamKind::SystemInfo:
@@ -479,118 +458,28 @@ void yaml::MappingTraits<Object>::mapping(IO &IO, Object &O) {
IO.mapRequired("Streams", O.Streams);
}
-static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) {
- return {support::ulittle32_t(Data.binary_size()),
- support::ulittle32_t(File.allocateBytes(Data))};
-}
-
-static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) {
- Range.Entry.Memory = layout(File, Range.Content);
-}
-
-static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) {
- M.Entry.ModuleNameRVA = File.allocateString(M.Name);
-
- M.Entry.CvRecord = layout(File, M.CvRecord);
- M.Entry.MiscRecord = layout(File, M.MiscRecord);
-}
-
-static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) {
- T.Entry.Stack.Memory = layout(File, T.Stack);
- T.Entry.Context = layout(File, T.Context);
-}
-
-template <typename EntryT>
-static size_t layout(BlobAllocator &File,
- MinidumpYAML::detail::ListStream<EntryT> &S) {
-
- File.allocateNewObject<support::ulittle32_t>(S.Entries.size());
- for (auto &E : S.Entries)
- File.allocateObject(E.Entry);
-
- size_t DataEnd = File.tell();
-
- // Lay out the auxiliary data, (which is not a part of the stream).
- DataEnd = File.tell();
- for (auto &E : S.Entries)
- layout(File, E);
-
- return DataEnd;
-}
-
-static Directory layout(BlobAllocator &File, Stream &S) {
- Directory Result;
- Result.Type = S.Type;
- Result.Location.RVA = File.tell();
- Optional<size_t> DataEnd;
- switch (S.Kind) {
- case Stream::StreamKind::MemoryList:
- DataEnd = layout(File, cast<MemoryListStream>(S));
- break;
- case Stream::StreamKind::ModuleList:
- DataEnd = layout(File, cast<ModuleListStream>(S));
- break;
- case Stream::StreamKind::RawContent: {
- RawContentStream &Raw = cast<RawContentStream>(S);
- File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) {
- Raw.Content.writeAsBinary(OS);
- assert(Raw.Content.binary_size() <= Raw.Size);
- OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0');
- });
- break;
- }
- case Stream::StreamKind::SystemInfo: {
- SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S);
- File.allocateObject(SystemInfo.Info);
- // The CSD string is not a part of the stream.
- DataEnd = File.tell();
- SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
- break;
- }
- case Stream::StreamKind::TextContent:
- File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text));
- break;
- case Stream::StreamKind::ThreadList:
- DataEnd = layout(File, cast<ThreadListStream>(S));
- break;
- }
- // If DataEnd is not set, we assume everything we generated is a part of the
- // stream.
- Result.Location.DataSize =
- DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
- return Result;
-}
-
-void MinidumpYAML::writeAsBinary(Object &Obj, raw_ostream &OS) {
- BlobAllocator File;
- File.allocateObject(Obj.Header);
-
- std::vector<Directory> StreamDirectory(Obj.Streams.size());
- Obj.Header.StreamDirectoryRVA =
- File.allocateArray(makeArrayRef(StreamDirectory));
- Obj.Header.NumberOfStreams = StreamDirectory.size();
-
- for (auto &Stream : enumerate(Obj.Streams))
- StreamDirectory[Stream.index()] = layout(File, *Stream.value());
-
- File.writeTo(OS);
-}
-
-Error MinidumpYAML::writeAsBinary(StringRef Yaml, raw_ostream &OS) {
- yaml::Input Input(Yaml);
- Object Obj;
- Input >> Obj;
- if (std::error_code EC = Input.error())
- return errorCodeToError(EC);
-
- writeAsBinary(Obj, OS);
- return Error::success();
-}
-
Expected<std::unique_ptr<Stream>>
Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
StreamKind Kind = getKind(StreamDesc.Type);
switch (Kind) {
+ case StreamKind::Exception: {
+ Expected<const minidump::ExceptionStream &> ExpectedExceptionStream =
+ File.getExceptionStream();
+ if (!ExpectedExceptionStream)
+ return ExpectedExceptionStream.takeError();
+ Expected<ArrayRef<uint8_t>> ExpectedThreadContext =
+ File.getRawData(ExpectedExceptionStream->ThreadContext);
+ if (!ExpectedThreadContext)
+ return ExpectedThreadContext.takeError();
+ return std::make_unique<ExceptionStream>(*ExpectedExceptionStream,
+ *ExpectedThreadContext);
+ }
+ case StreamKind::MemoryInfoList: {
+ if (auto ExpectedList = File.getMemoryInfoList())
+ return std::make_unique<MemoryInfoListStream>(*ExpectedList);
+ else
+ return ExpectedList.takeError();
+ }
case StreamKind::MemoryList: {
auto ExpectedList = File.getMemoryList();
if (!ExpectedList)
@@ -602,7 +491,7 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
return ExpectedContent.takeError();
Ranges.push_back({MD, *ExpectedContent});
}
- return llvm::make_unique<MemoryListStream>(std::move(Ranges));
+ return std::make_unique<MemoryListStream>(std::move(Ranges));
}
case StreamKind::ModuleList: {
auto ExpectedList = File.getModuleList();
@@ -622,10 +511,10 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
Modules.push_back(
{M, std::move(*ExpectedName), *ExpectedCv, *ExpectedMisc});
}
- return llvm::make_unique<ModuleListStream>(std::move(Modules));
+ return std::make_unique<ModuleListStream>(std::move(Modules));
}
case StreamKind::RawContent:
- return llvm::make_unique<RawContentStream>(StreamDesc.Type,
+ return std::make_unique<RawContentStream>(StreamDesc.Type,
File.getRawStream(StreamDesc));
case StreamKind::SystemInfo: {
auto ExpectedInfo = File.getSystemInfo();
@@ -634,11 +523,11 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
auto ExpectedCSDVersion = File.getString(ExpectedInfo->CSDVersionRVA);
if (!ExpectedCSDVersion)
return ExpectedInfo.takeError();
- return llvm::make_unique<SystemInfoStream>(*ExpectedInfo,
+ return std::make_unique<SystemInfoStream>(*ExpectedInfo,
std::move(*ExpectedCSDVersion));
}
case StreamKind::TextContent:
- return llvm::make_unique<TextContentStream>(
+ return std::make_unique<TextContentStream>(
StreamDesc.Type, toStringRef(File.getRawStream(StreamDesc)));
case StreamKind::ThreadList: {
auto ExpectedList = File.getThreadList();
@@ -654,7 +543,7 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
return ExpectedContext.takeError();
Threads.push_back({T, *ExpectedStack, *ExpectedContext});
}
- return llvm::make_unique<ThreadListStream>(std::move(Threads));
+ return std::make_unique<ThreadListStream>(std::move(Threads));
}
}
llvm_unreachable("Unhandled stream kind!");
diff --git a/lib/ObjectYAML/WasmEmitter.cpp b/lib/ObjectYAML/WasmEmitter.cpp
new file mode 100644
index 000000000000..debc040587a8
--- /dev/null
+++ b/lib/ObjectYAML/WasmEmitter.cpp
@@ -0,0 +1,633 @@
+//===- yaml2wasm - Convert YAML to a Wasm object file --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// The Wasm component of yaml2obj.
+///
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/Object/Wasm.h"
+#include "llvm/ObjectYAML/ObjectYAML.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+
+namespace {
+/// This parses a yaml stream that represents a Wasm object file.
+/// See docs/yaml2obj for the yaml scheema.
+class WasmWriter {
+public:
+ WasmWriter(WasmYAML::Object &Obj, yaml::ErrorHandler EH)
+ : Obj(Obj), ErrHandler(EH) {}
+ bool writeWasm(raw_ostream &OS);
+
+private:
+ void writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec,
+ uint32_t SectionIndex);
+
+ void writeInitExpr(raw_ostream &OS, const wasm::WasmInitExpr &InitExpr);
+
+ void writeSectionContent(raw_ostream &OS, WasmYAML::CustomSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::TypeSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::ImportSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::FunctionSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::TableSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::MemorySection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::GlobalSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::EventSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::ExportSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::StartSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::ElemSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::CodeSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::DataSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::DataCountSection &Section);
+
+ // Custom section types
+ void writeSectionContent(raw_ostream &OS, WasmYAML::DylinkSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::NameSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::LinkingSection &Section);
+ void writeSectionContent(raw_ostream &OS, WasmYAML::ProducersSection &Section);
+ void writeSectionContent(raw_ostream &OS,
+ WasmYAML::TargetFeaturesSection &Section);
+ WasmYAML::Object &Obj;
+ uint32_t NumImportedFunctions = 0;
+ uint32_t NumImportedGlobals = 0;
+ uint32_t NumImportedEvents = 0;
+
+ bool HasError = false;
+ yaml::ErrorHandler ErrHandler;
+ void reportError(const Twine &Msg);
+};
+
+class SubSectionWriter {
+ raw_ostream &OS;
+ std::string OutString;
+ raw_string_ostream StringStream;
+
+public:
+ SubSectionWriter(raw_ostream &OS) : OS(OS), StringStream(OutString) {}
+
+ void done() {
+ StringStream.flush();
+ encodeULEB128(OutString.size(), OS);
+ OS << OutString;
+ OutString.clear();
+ }
+
+ raw_ostream &getStream() { return StringStream; }
+};
+
+} // end anonymous namespace
+
+static int writeUint64(raw_ostream &OS, uint64_t Value) {
+ char Data[sizeof(Value)];
+ support::endian::write64le(Data, Value);
+ OS.write(Data, sizeof(Data));
+ return 0;
+}
+
+static int writeUint32(raw_ostream &OS, uint32_t Value) {
+ char Data[sizeof(Value)];
+ support::endian::write32le(Data, Value);
+ OS.write(Data, sizeof(Data));
+ return 0;
+}
+
+static int writeUint8(raw_ostream &OS, uint8_t Value) {
+ char Data[sizeof(Value)];
+ memcpy(Data, &Value, sizeof(Data));
+ OS.write(Data, sizeof(Data));
+ return 0;
+}
+
+static int writeStringRef(const StringRef &Str, raw_ostream &OS) {
+ encodeULEB128(Str.size(), OS);
+ OS << Str;
+ return 0;
+}
+
+static int writeLimits(const WasmYAML::Limits &Lim, raw_ostream &OS) {
+ writeUint8(OS, Lim.Flags);
+ encodeULEB128(Lim.Initial, OS);
+ if (Lim.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX)
+ encodeULEB128(Lim.Maximum, OS);
+ return 0;
+}
+
+void WasmWriter::reportError(const Twine &Msg) {
+ ErrHandler(Msg);
+ HasError = true;
+}
+
+void WasmWriter::writeInitExpr(raw_ostream &OS,
+ const wasm::WasmInitExpr &InitExpr) {
+ writeUint8(OS, InitExpr.Opcode);
+ switch (InitExpr.Opcode) {
+ case wasm::WASM_OPCODE_I32_CONST:
+ encodeSLEB128(InitExpr.Value.Int32, OS);
+ break;
+ case wasm::WASM_OPCODE_I64_CONST:
+ encodeSLEB128(InitExpr.Value.Int64, OS);
+ break;
+ case wasm::WASM_OPCODE_F32_CONST:
+ writeUint32(OS, InitExpr.Value.Float32);
+ break;
+ case wasm::WASM_OPCODE_F64_CONST:
+ writeUint64(OS, InitExpr.Value.Float64);
+ break;
+ case wasm::WASM_OPCODE_GLOBAL_GET:
+ encodeULEB128(InitExpr.Value.Global, OS);
+ break;
+ default:
+ reportError("unknown opcode in init_expr: " + Twine(InitExpr.Opcode));
+ return;
+ }
+ writeUint8(OS, wasm::WASM_OPCODE_END);
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::DylinkSection &Section) {
+ writeStringRef(Section.Name, OS);
+ encodeULEB128(Section.MemorySize, OS);
+ encodeULEB128(Section.MemoryAlignment, OS);
+ encodeULEB128(Section.TableSize, OS);
+ encodeULEB128(Section.TableAlignment, OS);
+ encodeULEB128(Section.Needed.size(), OS);
+ for (StringRef Needed : Section.Needed)
+ writeStringRef(Needed, OS);
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::LinkingSection &Section) {
+ writeStringRef(Section.Name, OS);
+ encodeULEB128(Section.Version, OS);
+
+ SubSectionWriter SubSection(OS);
+
+ // SYMBOL_TABLE subsection
+ if (Section.SymbolTable.size()) {
+ writeUint8(OS, wasm::WASM_SYMBOL_TABLE);
+
+ encodeULEB128(Section.SymbolTable.size(), SubSection.getStream());
+#ifndef NDEBUG
+ uint32_t SymbolIndex = 0;
+#endif
+ for (const WasmYAML::SymbolInfo &Info : Section.SymbolTable) {
+ assert(Info.Index == SymbolIndex++);
+ writeUint8(SubSection.getStream(), Info.Kind);
+ encodeULEB128(Info.Flags, SubSection.getStream());
+ switch (Info.Kind) {
+ case wasm::WASM_SYMBOL_TYPE_FUNCTION:
+ case wasm::WASM_SYMBOL_TYPE_GLOBAL:
+ case wasm::WASM_SYMBOL_TYPE_EVENT:
+ encodeULEB128(Info.ElementIndex, SubSection.getStream());
+ if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 ||
+ (Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
+ writeStringRef(Info.Name, SubSection.getStream());
+ break;
+ case wasm::WASM_SYMBOL_TYPE_DATA:
+ writeStringRef(Info.Name, SubSection.getStream());
+ if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) {
+ encodeULEB128(Info.DataRef.Segment, SubSection.getStream());
+ encodeULEB128(Info.DataRef.Offset, SubSection.getStream());
+ encodeULEB128(Info.DataRef.Size, SubSection.getStream());
+ }
+ break;
+ case wasm::WASM_SYMBOL_TYPE_SECTION:
+ encodeULEB128(Info.ElementIndex, SubSection.getStream());
+ break;
+ default:
+ llvm_unreachable("unexpected kind");
+ }
+ }
+
+ SubSection.done();
+ }
+
+ // SEGMENT_NAMES subsection
+ if (Section.SegmentInfos.size()) {
+ writeUint8(OS, wasm::WASM_SEGMENT_INFO);
+ encodeULEB128(Section.SegmentInfos.size(), SubSection.getStream());
+ for (const WasmYAML::SegmentInfo &SegmentInfo : Section.SegmentInfos) {
+ writeStringRef(SegmentInfo.Name, SubSection.getStream());
+ encodeULEB128(SegmentInfo.Alignment, SubSection.getStream());
+ encodeULEB128(SegmentInfo.Flags, SubSection.getStream());
+ }
+ SubSection.done();
+ }
+
+ // INIT_FUNCS subsection
+ if (Section.InitFunctions.size()) {
+ writeUint8(OS, wasm::WASM_INIT_FUNCS);
+ encodeULEB128(Section.InitFunctions.size(), SubSection.getStream());
+ for (const WasmYAML::InitFunction &Func : Section.InitFunctions) {
+ encodeULEB128(Func.Priority, SubSection.getStream());
+ encodeULEB128(Func.Symbol, SubSection.getStream());
+ }
+ SubSection.done();
+ }
+
+ // COMDAT_INFO subsection
+ if (Section.Comdats.size()) {
+ writeUint8(OS, wasm::WASM_COMDAT_INFO);
+ encodeULEB128(Section.Comdats.size(), SubSection.getStream());
+ for (const auto &C : Section.Comdats) {
+ writeStringRef(C.Name, SubSection.getStream());
+ encodeULEB128(0, SubSection.getStream()); // flags for future use
+ encodeULEB128(C.Entries.size(), SubSection.getStream());
+ for (const WasmYAML::ComdatEntry &Entry : C.Entries) {
+ writeUint8(SubSection.getStream(), Entry.Kind);
+ encodeULEB128(Entry.Index, SubSection.getStream());
+ }
+ }
+ SubSection.done();
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::NameSection &Section) {
+ writeStringRef(Section.Name, OS);
+ if (Section.FunctionNames.size()) {
+ writeUint8(OS, wasm::WASM_NAMES_FUNCTION);
+
+ SubSectionWriter SubSection(OS);
+
+ encodeULEB128(Section.FunctionNames.size(), SubSection.getStream());
+ for (const WasmYAML::NameEntry &NameEntry : Section.FunctionNames) {
+ encodeULEB128(NameEntry.Index, SubSection.getStream());
+ writeStringRef(NameEntry.Name, SubSection.getStream());
+ }
+
+ SubSection.done();
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::ProducersSection &Section) {
+ writeStringRef(Section.Name, OS);
+ int Fields = int(!Section.Languages.empty()) + int(!Section.Tools.empty()) +
+ int(!Section.SDKs.empty());
+ if (Fields == 0)
+ return;
+ encodeULEB128(Fields, OS);
+ for (auto &Field : {std::make_pair(StringRef("language"), &Section.Languages),
+ std::make_pair(StringRef("processed-by"), &Section.Tools),
+ std::make_pair(StringRef("sdk"), &Section.SDKs)}) {
+ if (Field.second->empty())
+ continue;
+ writeStringRef(Field.first, OS);
+ encodeULEB128(Field.second->size(), OS);
+ for (auto &Entry : *Field.second) {
+ writeStringRef(Entry.Name, OS);
+ writeStringRef(Entry.Version, OS);
+ }
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::TargetFeaturesSection &Section) {
+ writeStringRef(Section.Name, OS);
+ encodeULEB128(Section.Features.size(), OS);
+ for (auto &E : Section.Features) {
+ writeUint8(OS, E.Prefix);
+ writeStringRef(E.Name, OS);
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::CustomSection &Section) {
+ if (auto S = dyn_cast<WasmYAML::DylinkSection>(&Section)) {
+ writeSectionContent(OS, *S);
+ } else if (auto S = dyn_cast<WasmYAML::NameSection>(&Section)) {
+ writeSectionContent(OS, *S);
+ } else if (auto S = dyn_cast<WasmYAML::LinkingSection>(&Section)) {
+ writeSectionContent(OS, *S);
+ } else if (auto S = dyn_cast<WasmYAML::ProducersSection>(&Section)) {
+ writeSectionContent(OS, *S);
+ } else if (auto S = dyn_cast<WasmYAML::TargetFeaturesSection>(&Section)) {
+ writeSectionContent(OS, *S);
+ } else {
+ writeStringRef(Section.Name, OS);
+ Section.Payload.writeAsBinary(OS);
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::TypeSection &Section) {
+ encodeULEB128(Section.Signatures.size(), OS);
+ uint32_t ExpectedIndex = 0;
+ for (const WasmYAML::Signature &Sig : Section.Signatures) {
+ if (Sig.Index != ExpectedIndex) {
+ reportError("unexpected type index: " + Twine(Sig.Index));
+ return;
+ }
+ ++ExpectedIndex;
+ writeUint8(OS, Sig.Form);
+ encodeULEB128(Sig.ParamTypes.size(), OS);
+ for (auto ParamType : Sig.ParamTypes)
+ writeUint8(OS, ParamType);
+ encodeULEB128(Sig.ReturnTypes.size(), OS);
+ for (auto ReturnType : Sig.ReturnTypes)
+ writeUint8(OS, ReturnType);
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::ImportSection &Section) {
+ encodeULEB128(Section.Imports.size(), OS);
+ for (const WasmYAML::Import &Import : Section.Imports) {
+ writeStringRef(Import.Module, OS);
+ writeStringRef(Import.Field, OS);
+ writeUint8(OS, Import.Kind);
+ switch (Import.Kind) {
+ case wasm::WASM_EXTERNAL_FUNCTION:
+ encodeULEB128(Import.SigIndex, OS);
+ NumImportedFunctions++;
+ break;
+ case wasm::WASM_EXTERNAL_GLOBAL:
+ writeUint8(OS, Import.GlobalImport.Type);
+ writeUint8(OS, Import.GlobalImport.Mutable);
+ NumImportedGlobals++;
+ break;
+ case wasm::WASM_EXTERNAL_EVENT:
+ writeUint32(OS, Import.EventImport.Attribute);
+ writeUint32(OS, Import.EventImport.SigIndex);
+ NumImportedGlobals++;
+ break;
+ case wasm::WASM_EXTERNAL_MEMORY:
+ writeLimits(Import.Memory, OS);
+ break;
+ case wasm::WASM_EXTERNAL_TABLE:
+ writeUint8(OS, Import.TableImport.ElemType);
+ writeLimits(Import.TableImport.TableLimits, OS);
+ break;
+ default:
+ reportError("unknown import type: " +Twine(Import.Kind));
+ return;
+ }
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::FunctionSection &Section) {
+ encodeULEB128(Section.FunctionTypes.size(), OS);
+ for (uint32_t FuncType : Section.FunctionTypes)
+ encodeULEB128(FuncType, OS);
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::ExportSection &Section) {
+ encodeULEB128(Section.Exports.size(), OS);
+ for (const WasmYAML::Export &Export : Section.Exports) {
+ writeStringRef(Export.Name, OS);
+ writeUint8(OS, Export.Kind);
+ encodeULEB128(Export.Index, OS);
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::StartSection &Section) {
+ encodeULEB128(Section.StartFunction, OS);
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::TableSection &Section) {
+ encodeULEB128(Section.Tables.size(), OS);
+ for (auto &Table : Section.Tables) {
+ writeUint8(OS, Table.ElemType);
+ writeLimits(Table.TableLimits, OS);
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::MemorySection &Section) {
+ encodeULEB128(Section.Memories.size(), OS);
+ for (const WasmYAML::Limits &Mem : Section.Memories)
+ writeLimits(Mem, OS);
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::GlobalSection &Section) {
+ encodeULEB128(Section.Globals.size(), OS);
+ uint32_t ExpectedIndex = NumImportedGlobals;
+ for (auto &Global : Section.Globals) {
+ if (Global.Index != ExpectedIndex) {
+ reportError("unexpected global index: " + Twine(Global.Index));
+ return;
+ }
+ ++ExpectedIndex;
+ writeUint8(OS, Global.Type);
+ writeUint8(OS, Global.Mutable);
+ writeInitExpr(OS, Global.InitExpr);
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::EventSection &Section) {
+ encodeULEB128(Section.Events.size(), OS);
+ uint32_t ExpectedIndex = NumImportedEvents;
+ for (auto &Event : Section.Events) {
+ if (Event.Index != ExpectedIndex) {
+ reportError("unexpected event index: " + Twine(Event.Index));
+ return;
+ }
+ ++ExpectedIndex;
+ encodeULEB128(Event.Attribute, OS);
+ encodeULEB128(Event.SigIndex, OS);
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::ElemSection &Section) {
+ encodeULEB128(Section.Segments.size(), OS);
+ for (auto &Segment : Section.Segments) {
+ encodeULEB128(Segment.TableIndex, OS);
+ writeInitExpr(OS, Segment.Offset);
+
+ encodeULEB128(Segment.Functions.size(), OS);
+ for (auto &Function : Segment.Functions)
+ encodeULEB128(Function, OS);
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::CodeSection &Section) {
+ encodeULEB128(Section.Functions.size(), OS);
+ uint32_t ExpectedIndex = NumImportedFunctions;
+ for (auto &Func : Section.Functions) {
+ std::string OutString;
+ raw_string_ostream StringStream(OutString);
+ if (Func.Index != ExpectedIndex) {
+ reportError("unexpected function index: " + Twine(Func.Index));
+ return;
+ }
+ ++ExpectedIndex;
+
+ encodeULEB128(Func.Locals.size(), StringStream);
+ for (auto &LocalDecl : Func.Locals) {
+ encodeULEB128(LocalDecl.Count, StringStream);
+ writeUint8(StringStream, LocalDecl.Type);
+ }
+
+ Func.Body.writeAsBinary(StringStream);
+
+ // Write the section size followed by the content
+ StringStream.flush();
+ encodeULEB128(OutString.size(), OS);
+ OS << OutString;
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::DataSection &Section) {
+ encodeULEB128(Section.Segments.size(), OS);
+ for (auto &Segment : Section.Segments) {
+ encodeULEB128(Segment.InitFlags, OS);
+ if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX)
+ encodeULEB128(Segment.MemoryIndex, OS);
+ if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0)
+ writeInitExpr(OS, Segment.Offset);
+ encodeULEB128(Segment.Content.binary_size(), OS);
+ Segment.Content.writeAsBinary(OS);
+ }
+}
+
+void WasmWriter::writeSectionContent(raw_ostream &OS,
+ WasmYAML::DataCountSection &Section) {
+ encodeULEB128(Section.Count, OS);
+}
+
+void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec,
+ uint32_t SectionIndex) {
+ switch (Sec.Type) {
+ case wasm::WASM_SEC_CODE:
+ writeStringRef("reloc.CODE", OS);
+ break;
+ case wasm::WASM_SEC_DATA:
+ writeStringRef("reloc.DATA", OS);
+ break;
+ case wasm::WASM_SEC_CUSTOM: {
+ auto *CustomSection = cast<WasmYAML::CustomSection>(&Sec);
+ writeStringRef(("reloc." + CustomSection->Name).str(), OS);
+ break;
+ }
+ default:
+ llvm_unreachable("not yet implemented");
+ }
+
+ encodeULEB128(SectionIndex, OS);
+ encodeULEB128(Sec.Relocations.size(), OS);
+
+ for (auto Reloc : Sec.Relocations) {
+ writeUint8(OS, Reloc.Type);
+ encodeULEB128(Reloc.Offset, OS);
+ encodeULEB128(Reloc.Index, OS);
+ switch (Reloc.Type) {
+ case wasm::R_WASM_MEMORY_ADDR_LEB:
+ case wasm::R_WASM_MEMORY_ADDR_SLEB:
+ case wasm::R_WASM_MEMORY_ADDR_I32:
+ case wasm::R_WASM_FUNCTION_OFFSET_I32:
+ case wasm::R_WASM_SECTION_OFFSET_I32:
+ encodeULEB128(Reloc.Addend, OS);
+ }
+ }
+}
+
+bool WasmWriter::writeWasm(raw_ostream &OS) {
+ // Write headers
+ OS.write(wasm::WasmMagic, sizeof(wasm::WasmMagic));
+ writeUint32(OS, Obj.Header.Version);
+
+ // Write each section
+ llvm::object::WasmSectionOrderChecker Checker;
+ for (const std::unique_ptr<WasmYAML::Section> &Sec : Obj.Sections) {
+ StringRef SecName = "";
+ if (auto S = dyn_cast<WasmYAML::CustomSection>(Sec.get()))
+ SecName = S->Name;
+ if (!Checker.isValidSectionOrder(Sec->Type, SecName)) {
+ reportError("out of order section type: " + Twine(Sec->Type));
+ return false;
+ }
+ encodeULEB128(Sec->Type, OS);
+ std::string OutString;
+ raw_string_ostream StringStream(OutString);
+ if (auto S = dyn_cast<WasmYAML::CustomSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::TypeSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::ImportSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::FunctionSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::TableSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::MemorySection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::GlobalSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::EventSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::ExportSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::StartSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::ElemSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::CodeSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::DataSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else if (auto S = dyn_cast<WasmYAML::DataCountSection>(Sec.get()))
+ writeSectionContent(StringStream, *S);
+ else
+ reportError("unknown section type: " + Twine(Sec->Type));
+
+ if (HasError)
+ return false;
+
+ StringStream.flush();
+
+ // Write the section size followed by the content
+ encodeULEB128(OutString.size(), OS);
+ OS << OutString;
+ }
+
+ // write reloc sections for any section that have relocations
+ uint32_t SectionIndex = 0;
+ for (const std::unique_ptr<WasmYAML::Section> &Sec : Obj.Sections) {
+ if (Sec->Relocations.empty()) {
+ SectionIndex++;
+ continue;
+ }
+
+ writeUint8(OS, wasm::WASM_SEC_CUSTOM);
+ std::string OutString;
+ raw_string_ostream StringStream(OutString);
+ writeRelocSection(StringStream, *Sec, SectionIndex++);
+ StringStream.flush();
+
+ encodeULEB128(OutString.size(), OS);
+ OS << OutString;
+ }
+
+ return true;
+}
+
+namespace llvm {
+namespace yaml {
+
+bool yaml2wasm(WasmYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH) {
+ WasmWriter Writer(Doc, EH);
+ return Writer.writeWasm(Out);
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
index 88491d955c49..232d5122004a 100644
--- a/lib/ObjectYAML/WasmYAML.cpp
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -295,8 +295,8 @@ void ScalarEnumerationTraits<WasmYAML::SectionType>::enumeration(
void MappingTraits<WasmYAML::Signature>::mapping(
IO &IO, WasmYAML::Signature &Signature) {
IO.mapRequired("Index", Signature.Index);
- IO.mapRequired("ReturnType", Signature.ReturnType);
IO.mapRequired("ParamTypes", Signature.ParamTypes);
+ IO.mapRequired("ReturnTypes", Signature.ReturnTypes);
}
void MappingTraits<WasmYAML::Table>::mapping(IO &IO, WasmYAML::Table &Table) {
@@ -535,6 +535,7 @@ void ScalarBitSetTraits<WasmYAML::SymbolFlags>::bitset(
BCaseMask(UNDEFINED, UNDEFINED);
BCaseMask(EXPORTED, EXPORTED);
BCaseMask(EXPLICIT_NAME, EXPLICIT_NAME);
+ BCaseMask(NO_STRIP, NO_STRIP);
#undef BCaseMask
}
@@ -559,7 +560,6 @@ void ScalarEnumerationTraits<WasmYAML::ValueType>::enumeration(
ECase(V128);
ECase(FUNCREF);
ECase(FUNC);
- ECase(NORESULT);
#undef ECase
}
diff --git a/lib/ObjectYAML/yaml2obj.cpp b/lib/ObjectYAML/yaml2obj.cpp
new file mode 100644
index 000000000000..c18fa5cfdb5e
--- /dev/null
+++ b/lib/ObjectYAML/yaml2obj.cpp
@@ -0,0 +1,77 @@
+//===-- yaml2obj.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ObjectYAML/ObjectYAML.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace llvm {
+namespace yaml {
+
+bool convertYAML(yaml::Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler,
+ unsigned DocNum) {
+ unsigned CurDocNum = 0;
+ do {
+ if (++CurDocNum != DocNum)
+ continue;
+
+ yaml::YamlObjectFile Doc;
+ YIn >> Doc;
+ if (std::error_code EC = YIn.error()) {
+ ErrHandler("failed to parse YAML input: " + EC.message());
+ return false;
+ }
+
+ if (Doc.Elf)
+ return yaml2elf(*Doc.Elf, Out, ErrHandler);
+ if (Doc.Coff)
+ return yaml2coff(*Doc.Coff, Out, ErrHandler);
+ if (Doc.MachO || Doc.FatMachO)
+ return yaml2macho(Doc, Out, ErrHandler);
+ if (Doc.Minidump)
+ return yaml2minidump(*Doc.Minidump, Out, ErrHandler);
+ if (Doc.Wasm)
+ return yaml2wasm(*Doc.Wasm, Out, ErrHandler);
+
+ ErrHandler("unknown document type");
+ return false;
+
+ } while (YIn.nextDocument());
+
+ ErrHandler("cannot find the " + Twine(DocNum) +
+ getOrdinalSuffix(DocNum).data() + " document");
+ return false;
+}
+
+std::unique_ptr<object::ObjectFile>
+yaml2ObjectFile(SmallVectorImpl<char> &Storage, StringRef Yaml,
+ ErrorHandler ErrHandler) {
+ Storage.clear();
+ raw_svector_ostream OS(Storage);
+
+ yaml::Input YIn(Yaml);
+ if (!convertYAML(YIn, OS, ErrHandler))
+ return {};
+
+ Expected<std::unique_ptr<object::ObjectFile>> ObjOrErr =
+ object::ObjectFile::createObjectFile(
+ MemoryBufferRef(OS.str(), "YamlObject"));
+ if (ObjOrErr)
+ return std::move(*ObjOrErr);
+
+ ErrHandler(toString(ObjOrErr.takeError()));
+ return {};
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index f37c142da69b..09e921502eb6 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -241,7 +241,7 @@ void DerivedArgList::AddSynthesizedArg(Arg *A) {
Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const {
SynthesizedArgs.push_back(
- make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+ std::make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
BaseArgs.MakeIndex(Opt.getName()), BaseArg));
return SynthesizedArgs.back().get();
}
@@ -250,7 +250,7 @@ Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt,
StringRef Value) const {
unsigned Index = BaseArgs.MakeIndex(Value);
SynthesizedArgs.push_back(
- make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+ std::make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
Index, BaseArgs.getArgString(Index), BaseArg));
return SynthesizedArgs.back().get();
}
@@ -259,7 +259,7 @@ Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
StringRef Value) const {
unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
SynthesizedArgs.push_back(
- make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+ std::make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
Index, BaseArgs.getArgString(Index + 1), BaseArg));
return SynthesizedArgs.back().get();
}
@@ -267,7 +267,7 @@ Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
StringRef Value) const {
unsigned Index = BaseArgs.MakeIndex((Opt.getName() + Value).str());
- SynthesizedArgs.push_back(make_unique<Arg>(
+ SynthesizedArgs.push_back(std::make_unique<Arg>(
Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index,
BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
return SynthesizedArgs.back().get();
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index e2b2a2b25268..1aaccb510f8c 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/DDG.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DominanceFrontier.h"
@@ -35,6 +36,7 @@
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
@@ -51,6 +53,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
#include "llvm/CodeGen/UnreachableBlockElim.h"
#include "llvm/IR/Dominators.h"
@@ -101,6 +104,7 @@
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
@@ -138,13 +142,14 @@
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LowerAtomic.h"
+#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
-#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/Transforms/Scalar/MergeICmps.h"
+#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/NewGVN.h"
#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
@@ -206,7 +211,7 @@ static cl::opt<bool> EnableSyntheticCounts(
cl::desc("Run synthetic function entry count generation "
"pass"));
-static Regex DefaultAliasRegex(
+static const Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
// This option is used in simplifying testing SampleFDO optimizations for
@@ -466,8 +471,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
PGOOpt->Action != PGOOptions::SampleUse) &&
PTO.LoopUnrolling)
- LPM2.addPass(
- LoopFullUnrollPass(Level, false, PTO.ForgetAllSCEVInLoopUnroll));
+ LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false,
+ PTO.ForgetAllSCEVInLoopUnroll));
for (auto &C : LoopOptimizerEndEPCallbacks)
C(LPM2, Level);
@@ -475,10 +480,15 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// We provide the opt remark emitter pass for LICM to use. We only need to do
// this once as it is immutable.
FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging));
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ std::move(LPM1), EnableMSSALoopDependency, DebugLogging));
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging));
+ // The loop passes in LPM2 (IndVarSimplifyPass, LoopIdiomRecognizePass,
+ // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
+ // *All* loop passes must preserve it, in order to be able to use it.
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging));
// Eliminate redundancies.
if (Level != O1) {
@@ -515,7 +525,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(DSEPass());
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- DebugLogging));
+ EnableMSSALoopDependency, DebugLogging));
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
@@ -540,6 +550,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
bool RunProfileGen, bool IsCS,
std::string ProfileFile,
std::string ProfileRemappingFile) {
+ assert(Level != O0 && "Not expecting O0 here!");
// Generally running simplification passes and the inliner with an high
// threshold results in smaller executables, but there may be cases where
// the size grows, so let's be conservative here and skip this simplification
@@ -570,34 +581,63 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline)));
+
+ // Delete anything that is now dead to make sure that we don't instrument
+ // dead code. Instrumentation can end up keeping dead code around and
+ // dramatically increase code size.
+ MPM.addPass(GlobalDCEPass());
}
- // Delete anything that is now dead to make sure that we don't instrument
- // dead code. Instrumentation can end up keeping dead code around and
- // dramatically increase code size.
- MPM.addPass(GlobalDCEPass());
+ if (!RunProfileGen) {
+ assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
+ MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ return;
+ }
- if (RunProfileGen) {
- MPM.addPass(PGOInstrumentationGen(IsCS));
+ // Perform PGO instrumentation.
+ MPM.addPass(PGOInstrumentationGen(IsCS));
- FunctionPassManager FPM;
- FPM.addPass(
- createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-
- // Add the profile lowering pass.
- InstrProfOptions Options;
- if (!ProfileFile.empty())
- Options.InstrProfileOutput = ProfileFile;
- Options.DoCounterPromotion = true;
- Options.UseBFIInPromotion = IsCS;
- MPM.addPass(InstrProfiling(Options, IsCS));
- } else if (!ProfileFile.empty()) {
+ FunctionPassManager FPM;
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LoopRotatePass(), EnableMSSALoopDependency, DebugLogging));
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+
+ // Add the profile lowering pass.
+ InstrProfOptions Options;
+ if (!ProfileFile.empty())
+ Options.InstrProfileOutput = ProfileFile;
+ // Do counter promotion at Level greater than O0.
+ Options.DoCounterPromotion = true;
+ Options.UseBFIInPromotion = IsCS;
+ MPM.addPass(InstrProfiling(Options, IsCS));
+}
+
+void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
+ bool DebugLogging, bool RunProfileGen,
+ bool IsCS, std::string ProfileFile,
+ std::string ProfileRemappingFile) {
+ if (!RunProfileGen) {
+ assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ return;
}
+
+ // Perform PGO instrumentation.
+ MPM.addPass(PGOInstrumentationGen(IsCS));
+ // Add the profile lowering pass.
+ InstrProfOptions Options;
+ if (!ProfileFile.empty())
+ Options.InstrProfileOutput = ProfileFile;
+ // Do not do counter promotion at O0.
+ Options.DoCounterPromotion = false;
+ Options.UseBFIInPromotion = IsCS;
+ MPM.addPass(InstrProfiling(Options, IsCS));
}
static InlineParams
@@ -852,6 +892,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
FunctionPassManager OptimizePM(DebugLogging);
OptimizePM.addPass(Float2IntPass());
+ OptimizePM.addPass(LowerConstantIntrinsicsPass());
+
// FIXME: We need to run some loop optimizations to re-rotate loops after
// simplify-cfg and others undo their rotation.
@@ -863,8 +905,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
C(OptimizePM, Level);
// First rotate loops that may have been un-rotated by prior passes.
- OptimizePM.addPass(
- createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(
+ LoopRotatePass(), EnableMSSALoopDependency, DebugLogging));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
@@ -911,19 +953,19 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// combiner for cleanup here so that the unrolling and LICM can be pipelined
// across the loop nests.
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
- if (EnableUnrollAndJam) {
+ if (EnableUnrollAndJam && PTO.LoopUnrolling) {
OptimizePM.addPass(
createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
}
- if (PTO.LoopUnrolling)
- OptimizePM.addPass(LoopUnrollPass(
- LoopUnrollOptions(Level, false, PTO.ForgetAllSCEVInLoopUnroll)));
+ OptimizePM.addPass(LoopUnrollPass(
+ LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll)));
OptimizePM.addPass(WarnMissedTransformationsPass());
OptimizePM.addPass(InstCombinePass());
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- DebugLogging));
+ EnableMSSALoopDependency, DebugLogging));
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
@@ -1422,12 +1464,23 @@ Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
UnrollOpts.setOptLevel(OptLevel);
continue;
}
+ if (ParamName.consume_front("full-unroll-max=")) {
+ int Count;
+ if (ParamName.getAsInteger(0, Count))
+ return make_error<StringError>(
+ formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ UnrollOpts.setFullUnrollMaxCount(Count);
+ continue;
+ }
bool Enable = !ParamName.consume_front("no-");
if (ParamName == "partial") {
UnrollOpts.setPartial(Enable);
} else if (ParamName == "peeling") {
UnrollOpts.setPeeling(Enable);
+ } else if (ParamName == "profile-peeling") {
+ UnrollOpts.setProfileBasedPeeling(Enable);
} else if (ParamName == "runtime") {
UnrollOpts.setRuntime(Enable);
} else if (ParamName == "upperbound") {
@@ -1542,6 +1595,26 @@ Expected<bool> parseLoopUnswitchOptions(StringRef Params) {
}
return Result;
}
+
+Expected<bool> parseMergedLoadStoreMotionOptions(StringRef Params) {
+ bool Result = false;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ bool Enable = !ParamName.consume_front("no-");
+ if (ParamName == "split-footer-bb") {
+ Result = Enable;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid MergedLoadStoreMotion pass parameter '{0}' ",
+ ParamName)
+ .str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
@@ -1629,7 +1702,7 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
if (Name == "function")
return true;
- if (Name == "loop")
+ if (Name == "loop" || Name == "loop-mssa")
return true;
// Explicitly handle custom-parsed pass names.
@@ -1653,7 +1726,7 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
template <typename CallbacksT>
static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
- if (Name == "loop")
+ if (Name == "loop" || Name == "loop-mssa")
return true;
// Explicitly handle custom-parsed pass names.
@@ -1800,9 +1873,19 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
.Case("O3", O3)
.Case("Os", Os)
.Case("Oz", Oz);
- if (L == O0)
- // At O0 we do nothing at all!
+ if (L == O0) {
+ // Add instrumentation PGO passes -- at O0 we can still do PGO.
+ if (PGOOpt && Matches[1] != "thinlto" &&
+ (PGOOpt->Action == PGOOptions::IRInstr ||
+ PGOOpt->Action == PGOOptions::IRUse))
+ addPGOInstrPassesForO0(
+ MPM, DebugLogging,
+ /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
+ /* IsCS */ false, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile);
+ // Do nothing else at all!
return Error::success();
+ }
if (Matches[1] == "default") {
MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
@@ -1947,14 +2030,15 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
FPM.addPass(std::move(NestedFPM));
return Error::success();
}
- if (Name == "loop") {
+ if (Name == "loop" || Name == "loop-mssa") {
LoopPassManager LPM(DebugLogging);
if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass,
DebugLogging))
return Err;
// Add the nested pass manager with the appropriate adaptor.
- FPM.addPass(
- createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging));
+ bool UseMemorySSA = (Name == "loop-mssa");
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
+ DebugLogging));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def
index 347f75870eb3..1fa274d172b1 100644
--- a/lib/Passes/PassRegistry.def
+++ b/lib/Passes/PassRegistry.def
@@ -24,7 +24,6 @@ MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis())
MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis())
MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis())
MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis())
-MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
MODULE_ANALYSIS("verify", VerifierAnalysis())
MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis())
@@ -87,7 +86,10 @@ MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
MODULE_PASS("verify", VerifierPass())
MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false))
+MODULE_PASS("msan-module", MemorySanitizerPass({}))
+MODULE_PASS("tsan-module", ThreadSanitizerPass())
MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false))
+MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
MODULE_PASS("poison-checking", PoisonCheckingPass())
#undef MODULE_PASS
@@ -185,6 +187,7 @@ FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
FUNCTION_PASS("loweratomic", LowerAtomicPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
+FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("gvn", GVN())
@@ -195,7 +198,6 @@ FUNCTION_PASS("lowerinvoke", LowerInvokePass())
FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
-FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
FUNCTION_PASS("newgvn", NewGVNPass())
FUNCTION_PASS("jump-threading", JumpThreadingPass())
@@ -270,6 +272,11 @@ FUNCTION_PASS_WITH_PARAMS("loop-vectorize",
return LoopVectorizePass(Opts);
},
parseLoopVectorizeOptions)
+FUNCTION_PASS_WITH_PARAMS("mldst-motion",
+ [](MergedLoadStoreMotionOptions Opts) {
+ return MergedLoadStoreMotionPass(Opts);
+ },
+ parseMergedLoadStoreMotionOptions)
#undef FUNCTION_PASS_WITH_PARAMS
#ifndef LOOP_ANALYSIS
@@ -277,6 +284,7 @@ FUNCTION_PASS_WITH_PARAMS("loop-vectorize",
#endif
LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
LOOP_ANALYSIS("access-info", LoopAccessAnalysis())
+LOOP_ANALYSIS("ddg", DDGAnalysis())
LOOP_ANALYSIS("ivusers", IVUsersAnalysis())
LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#undef LOOP_ANALYSIS
@@ -299,7 +307,9 @@ LOOP_PASS("irce", IRCEPass())
LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass())
LOOP_PASS("unroll-full", LoopFullUnrollPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
+LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))
LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
+LOOP_PASS("print<loop-cache-cost>", LoopCachePrinterPass(dbgs()))
LOOP_PASS("loop-predication", LoopPredicationPass())
LOOP_PASS("guard-widening", GuardWideningPass())
#undef LOOP_PASS
diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp
index afd6618e7cb3..8d5e56e26c0f 100644
--- a/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -194,6 +194,15 @@ void FunctionRecordIterator::skipOtherFiles() {
*this = FunctionRecordIterator();
}
+ArrayRef<unsigned> CoverageMapping::getImpreciseRecordIndicesForFilename(
+ StringRef Filename) const {
+ size_t FilenameHash = hash_value(Filename);
+ auto RecordIt = FilenameHash2RecordIndices.find(FilenameHash);
+ if (RecordIt == FilenameHash2RecordIndices.end())
+ return {};
+ return RecordIt->second;
+}
+
Error CoverageMapping::loadFunctionRecord(
const CoverageMappingRecord &Record,
IndexedInstrProfReader &ProfileReader) {
@@ -249,6 +258,20 @@ Error CoverageMapping::loadFunctionRecord(
return Error::success();
Functions.push_back(std::move(Function));
+
+ // Performance optimization: keep track of the indices of the function records
+ // which correspond to each filename. This can be used to substantially speed
+ // up queries for coverage info in a file.
+ unsigned RecordIndex = Functions.size() - 1;
+ for (StringRef Filename : Record.Filenames) {
+ auto &RecordIndices = FilenameHash2RecordIndices[hash_value(Filename)];
+ // Note that there may be duplicates in the filename set for a function
+ // record, because of e.g. macro expansions in the function in which both
+ // the macro and the function are defined in the same file.
+ if (RecordIndices.empty() || RecordIndices.back() != RecordIndex)
+ RecordIndices.push_back(RecordIndex);
+ }
+
return Error::success();
}
@@ -270,6 +293,16 @@ Expected<std::unique_ptr<CoverageMapping>> CoverageMapping::load(
return std::move(Coverage);
}
+// If E is a no_data_found error, returns success. Otherwise returns E.
+static Error handleMaybeNoDataFoundError(Error E) {
+ return handleErrors(
+ std::move(E), [](const CoverageMapError &CME) {
+ if (CME.get() == coveragemap_error::no_data_found)
+ return static_cast<Error>(Error::success());
+ return make_error<CoverageMapError>(CME.get());
+ });
+}
+
Expected<std::unique_ptr<CoverageMapping>>
CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
StringRef ProfileFilename, ArrayRef<StringRef> Arches) {
@@ -289,12 +322,21 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
CovMappingBufOrErr.get()->getMemBufferRef();
auto CoverageReadersOrErr =
BinaryCoverageReader::create(CovMappingBufRef, Arch, Buffers);
- if (Error E = CoverageReadersOrErr.takeError())
- return std::move(E);
+ if (Error E = CoverageReadersOrErr.takeError()) {
+ E = handleMaybeNoDataFoundError(std::move(E));
+ if (E)
+ return std::move(E);
+ // E == success (originally a no_data_found error).
+ continue;
+ }
for (auto &Reader : CoverageReadersOrErr.get())
Readers.push_back(std::move(Reader));
Buffers.push_back(std::move(CovMappingBufOrErr.get()));
}
+ // If no readers were created, either no objects were provided or none of them
+ // had coverage data. Return an error in the latter case.
+ if (Readers.empty() && !ObjectFilenames.empty())
+ return make_error<CoverageMapError>(coveragemap_error::no_data_found);
return load(Readers, *ProfileReader);
}
@@ -607,7 +649,12 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const {
CoverageData FileCoverage(Filename);
std::vector<CountedRegion> Regions;
- for (const auto &Function : Functions) {
+ // Look up the function records in the given file. Due to hash collisions on
+ // the filename, we may get back some records that are not in the file.
+ ArrayRef<unsigned> RecordIndices =
+ getImpreciseRecordIndicesForFilename(Filename);
+ for (unsigned RecordIndex : RecordIndices) {
+ const FunctionRecord &Function = Functions[RecordIndex];
auto MainFileID = findMainViewFileID(Filename, Function);
auto FileIDs = gatherFileIDs(Filename, Function);
for (const auto &CR : Function.CountedRegions)
@@ -627,7 +674,12 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const {
std::vector<InstantiationGroup>
CoverageMapping::getInstantiationGroups(StringRef Filename) const {
FunctionInstantiationSetCollector InstantiationSetCollector;
- for (const auto &Function : Functions) {
+ // Look up the function records in the given file. Due to hash collisions on
+ // the filename, we may get back some records that are not in the file.
+ ArrayRef<unsigned> RecordIndices =
+ getImpreciseRecordIndicesForFilename(Filename);
+ for (unsigned RecordIndex : RecordIndices) {
+ const FunctionRecord &Function = Functions[RecordIndex];
auto MainFileID = findMainViewFileID(Filename, Function);
if (!MainFileID)
continue;
diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index e193e10f91d9..679ff3525eeb 100644
--- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -506,7 +506,7 @@ public:
return make_error<CoverageMapError>(coveragemap_error::malformed);
// Each coverage map has an alignment of 8, so we need to adjust alignment
// before reading the next map.
- Buf += alignmentAdjustment(Buf, 8);
+ Buf += offsetToAlignedAddr(Buf, Align(8));
auto CFR = reinterpret_cast<const FuncRecordType *>(FunBuf);
while ((const char *)CFR < FunEnd) {
@@ -539,7 +539,7 @@ Expected<std::unique_ptr<CovMapFuncRecordReader>> CovMapFuncRecordReader::get(
switch (Version) {
case CovMapVersion::Version1:
- return llvm::make_unique<VersionedCovMapFuncRecordReader<
+ return std::make_unique<VersionedCovMapFuncRecordReader<
CovMapVersion::Version1, IntPtrT, Endian>>(P, R, F);
case CovMapVersion::Version2:
case CovMapVersion::Version3:
@@ -547,10 +547,10 @@ Expected<std::unique_ptr<CovMapFuncRecordReader>> CovMapFuncRecordReader::get(
if (Error E = P.create(P.getNameData()))
return std::move(E);
if (Version == CovMapVersion::Version2)
- return llvm::make_unique<VersionedCovMapFuncRecordReader<
+ return std::make_unique<VersionedCovMapFuncRecordReader<
CovMapVersion::Version2, IntPtrT, Endian>>(P, R, F);
else
- return llvm::make_unique<VersionedCovMapFuncRecordReader<
+ return std::make_unique<VersionedCovMapFuncRecordReader<
CovMapVersion::Version3, IntPtrT, Endian>>(P, R, F);
}
llvm_unreachable("Unsupported version");
@@ -648,7 +648,7 @@ loadTestingFormat(StringRef Data) {
// Skip the padding bytes because coverage map data has an alignment of 8.
if (CoverageMapping.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
- size_t Pad = alignmentAdjustment(CoverageMapping.data(), 8);
+ size_t Pad = offsetToAlignedAddr(CoverageMapping.data(), Align(8));
if (CoverageMapping.size() < Pad)
return make_error<CoverageMapError>(coveragemap_error::malformed);
CoverageMapping = CoverageMapping.substr(Pad);
@@ -666,11 +666,11 @@ static Expected<SectionRef> lookupSection(ObjectFile &OF, StringRef Name) {
};
Name = stripSuffix(Name);
- StringRef FoundName;
for (const auto &Section : OF.sections()) {
- if (auto EC = Section.getName(FoundName))
- return errorCodeToError(EC);
- if (stripSuffix(FoundName) == Name)
+ Expected<StringRef> NameOrErr = Section.getName();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ if (stripSuffix(*NameOrErr) == Name)
return Section;
}
return make_error<CoverageMapError>(coveragemap_error::no_data_found);
@@ -682,7 +682,7 @@ loadBinaryFormat(std::unique_ptr<Binary> Bin, StringRef Arch) {
if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
// If we have a universal binary, try to look up the object for the
// appropriate architecture.
- auto ObjectFileOrErr = Universal->getObjectForArch(Arch);
+ auto ObjectFileOrErr = Universal->getMachOObjectForArch(Arch);
if (!ObjectFileOrErr)
return ObjectFileOrErr.takeError();
OF = std::move(ObjectFileOrErr.get());
diff --git a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index 432b20f217ca..d75854a60d1e 100644
--- a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -24,6 +24,16 @@
using namespace llvm;
using namespace coverage;
+CoverageFilenamesSectionWriter::CoverageFilenamesSectionWriter(
+ ArrayRef<StringRef> Filenames)
+ : Filenames(Filenames) {
+#ifndef NDEBUG
+ StringSet<> NameSet;
+ for (StringRef Name : Filenames)
+ assert(NameSet.insert(Name).second && "Duplicate filename");
+#endif
+}
+
void CoverageFilenamesSectionWriter::write(raw_ostream &OS) {
encodeULEB128(Filenames.size(), OS);
for (const auto &Filename : Filenames) {
diff --git a/lib/ProfileData/GCOV.cpp b/lib/ProfileData/GCOV.cpp
index fa4e433d7aa6..00e6294c57a6 100644
--- a/lib/ProfileData/GCOV.cpp
+++ b/lib/ProfileData/GCOV.cpp
@@ -40,7 +40,7 @@ bool GCOVFile::readGCNO(GCOVBuffer &Buffer) {
while (true) {
if (!Buffer.readFunctionTag())
break;
- auto GFun = make_unique<GCOVFunction>(*this);
+ auto GFun = std::make_unique<GCOVFunction>(*this);
if (!GFun->readGCNO(Buffer, Version))
return false;
Functions.push_back(std::move(GFun));
@@ -164,7 +164,7 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
for (uint32_t i = 0, e = BlockCount; i != e; ++i) {
if (!Buff.readInt(Dummy))
return false; // Block flags;
- Blocks.push_back(make_unique<GCOVBlock>(*this, i));
+ Blocks.push_back(std::make_unique<GCOVBlock>(*this, i));
}
// read edges.
@@ -185,7 +185,7 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
uint32_t Dst;
if (!Buff.readInt(Dst))
return false;
- Edges.push_back(make_unique<GCOVEdge>(*Blocks[BlockNo], *Blocks[Dst]));
+ Edges.push_back(std::make_unique<GCOVEdge>(*Blocks[BlockNo], *Blocks[Dst]));
GCOVEdge *Edge = Edges.back().get();
Blocks[BlockNo]->addDstEdge(Edge);
Blocks[Dst]->addSrcEdge(Edge);
@@ -702,14 +702,14 @@ std::string FileInfo::getCoveragePath(StringRef Filename,
std::unique_ptr<raw_ostream>
FileInfo::openCoveragePath(StringRef CoveragePath) {
if (Options.NoOutput)
- return llvm::make_unique<raw_null_ostream>();
+ return std::make_unique<raw_null_ostream>();
std::error_code EC;
auto OS =
- llvm::make_unique<raw_fd_ostream>(CoveragePath, EC, sys::fs::F_Text);
+ std::make_unique<raw_fd_ostream>(CoveragePath, EC, sys::fs::OF_Text);
if (EC) {
errs() << EC.message() << "\n";
- return llvm::make_unique<raw_null_ostream>();
+ return std::make_unique<raw_null_ostream>();
}
return std::move(OS);
}
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 510fd9887d9a..57d4fbc59f83 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -478,7 +478,7 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
return Error::success();
}
-void InstrProfRecord::accumuateCounts(CountSumOrPercent &Sum) const {
+void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const {
uint64_t FuncSum = 0;
Sum.NumEntries += Counts.size();
for (size_t F = 0, E = Counts.size(); F < E; ++F)
@@ -552,7 +552,7 @@ void InstrProfRecord::overlap(InstrProfRecord &Other, OverlapStats &Overlap,
uint64_t ValueCutoff) {
// FuncLevel CountSum for other should already computed and nonzero.
assert(FuncLevelOverlap.Test.CountSum >= 1.0f);
- accumuateCounts(FuncLevelOverlap.Base);
+ accumulateCounts(FuncLevelOverlap.Base);
bool Mismatch = (Counts.size() != Other.Counts.size());
// Check if the value profiles mismatch.
@@ -1078,12 +1078,10 @@ bool isIRPGOFlagSet(const Module *M) {
if (!IRInstrVar->hasInitializer())
return false;
- const Constant *InitVal = IRInstrVar->getInitializer();
+ auto *InitVal = dyn_cast_or_null<ConstantInt>(IRInstrVar->getInitializer());
if (!InitVal)
return false;
-
- return (dyn_cast<ConstantInt>(InitVal)->getZExtValue() &
- VARIANT_MASK_IR_PROF) != 0;
+ return (InitVal->getZExtValue() & VARIANT_MASK_IR_PROF) != 0;
}
// Check if we can safely rename this Comdat function.
@@ -1166,9 +1164,9 @@ void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) {
}
}
-Error OverlapStats::accumuateCounts(const std::string &BaseFilename,
- const std::string &TestFilename,
- bool IsCS) {
+Error OverlapStats::accumulateCounts(const std::string &BaseFilename,
+ const std::string &TestFilename,
+ bool IsCS) {
auto getProfileSum = [IsCS](const std::string &Filename,
CountSumOrPercent &Sum) -> Error {
auto ReaderOrErr = InstrProfReader::create(Filename);
@@ -1176,7 +1174,7 @@ Error OverlapStats::accumuateCounts(const std::string &BaseFilename,
return E;
}
auto Reader = std::move(ReaderOrErr.get());
- Reader->accumuateCounts(Sum, IsCS);
+ Reader->accumulateCounts(Sum, IsCS);
return Error::success();
};
auto Ret = getProfileSum(BaseFilename, Base);
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index fec1c152991c..23d078a3ddee 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -119,7 +119,7 @@ IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
// Create the reader.
if (!IndexedInstrProfReader::hasFormat(*Buffer))
return make_error<InstrProfError>(instrprof_error::bad_magic);
- auto Result = llvm::make_unique<IndexedInstrProfReader>(
+ auto Result = std::make_unique<IndexedInstrProfReader>(
std::move(Buffer), std::move(RemappingBuffer));
// Initialize the reader and return the result.
@@ -385,7 +385,7 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
NamesStart = Start + NamesOffset;
ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
- std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
+ std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
if (Error E = createSymtab(*NewSymtab.get()))
return E;
@@ -413,13 +413,19 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts(
if (NumCounters == 0)
return error(instrprof_error::malformed);
- auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters);
auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
+ ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart;
- // Check bounds.
- if (RawCounts.data() < CountersStart ||
- RawCounts.data() + RawCounts.size() > NamesStartAsCounter)
+ // Check bounds. Note that the counter pointer embedded in the data record
+ // may itself be corrupt.
+ if (NumCounters > MaxNumCounters)
return error(instrprof_error::malformed);
+ ptrdiff_t CounterOffset = getCounterOffset(CounterPtr);
+ if (CounterOffset < 0 || CounterOffset > MaxNumCounters ||
+ (CounterOffset + NumCounters) > MaxNumCounters)
+ return error(instrprof_error::malformed);
+
+ auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters);
if (ShouldSwapBytes) {
Record.Counts.clear();
@@ -767,7 +773,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
UseCS ? this->CS_Summary : this->Summary;
// initialize InstrProfSummary using the SummaryData from disk.
- Summary = llvm::make_unique<ProfileSummary>(
+ Summary = std::make_unique<ProfileSummary>(
UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
DetailedSummary, SummaryData->get(Summary::TotalBlockCount),
SummaryData->get(Summary::MaxBlockCount),
@@ -777,13 +783,13 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
SummaryData->get(Summary::TotalNumFunctions));
return Cur + SummarySize;
} else {
- // For older version of profile data, we need to compute on the fly:
- using namespace IndexedInstrProf;
-
+ // The older versions do not support a profile summary. This just computes
+ // an empty summary, which will not result in accurate hot/cold detection.
+ // We would need to call addRecord for all NamedInstrProfRecords to get the
+ // correct summary. However, this version is old (prior to early 2016) and
+ // has not been supporting an accurate summary for several years.
InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
- // FIXME: This only computes an empty summary. Need to call addRecord for
- // all NamedInstrProfRecords to get the correct summary.
- this->Summary = Builder.getSummary();
+ Summary = Builder.getSummary();
return Cur;
}
}
@@ -827,18 +833,18 @@ Error IndexedInstrProfReader::readHeader() {
// The rest of the file is an on disk hash table.
auto IndexPtr =
- llvm::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
+ std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
Start + HashOffset, Cur, Start, HashType, FormatVersion);
// Load the remapping table now if requested.
if (RemappingBuffer) {
- Remapper = llvm::make_unique<
+ Remapper = std::make_unique<
InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
std::move(RemappingBuffer), *IndexPtr);
if (Error E = Remapper->populateRemappings())
return E;
} else {
- Remapper = llvm::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
+ Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
}
Index = std::move(IndexPtr);
@@ -849,7 +855,7 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
if (Symtab.get())
return *Symtab.get();
- std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
+ std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
if (Error E = Index->populateSymtab(*NewSymtab.get())) {
consumeError(error(InstrProfError::take(std::move(E))));
}
@@ -901,7 +907,7 @@ Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
return success();
}
-void InstrProfReader::accumuateCounts(CountSumOrPercent &Sum, bool IsCS) {
+void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
uint64_t NumFuncs = 0;
for (const auto &Func : *this) {
if (isIRLevelProfile()) {
@@ -909,7 +915,7 @@ void InstrProfReader::accumuateCounts(CountSumOrPercent &Sum, bool IsCS) {
if (FuncIsCS != IsCS)
continue;
}
- Func.accumuateCounts(Sum);
+ Func.accumulateCounts(Sum);
++NumFuncs;
}
Sum.NumEntries = NumFuncs;
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 4ca2defd26da..ccb270e0b719 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -193,7 +193,7 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
const OverlapFuncFilters &FuncFilter) {
auto Name = Other.Name;
auto Hash = Other.Hash;
- Other.accumuateCounts(FuncLevelOverlap.Test);
+ Other.accumulateCounts(FuncLevelOverlap.Test);
if (FunctionData.find(Name) == FunctionData.end()) {
Overlap.addOneUnique(FuncLevelOverlap.Test);
return;
diff --git a/lib/ProfileData/ProfileSummaryBuilder.cpp b/lib/ProfileData/ProfileSummaryBuilder.cpp
index 4d5b00935742..3299b5f92069 100644
--- a/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -93,14 +93,14 @@ void ProfileSummaryBuilder::computeDetailedSummary() {
std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
computeDetailedSummary();
- return llvm::make_unique<ProfileSummary>(
+ return std::make_unique<ProfileSummary>(
ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0,
MaxFunctionCount, NumCounts, NumFunctions);
}
std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
computeDetailedSummary();
- return llvm::make_unique<ProfileSummary>(
+ return std::make_unique<ProfileSummary>(
ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount,
MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions);
}
diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp
index e17865cd15a4..003e8d4d4296 100644
--- a/lib/ProfileData/SampleProf.cpp
+++ b/lib/ProfileData/SampleProf.cpp
@@ -16,7 +16,9 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
@@ -28,8 +30,6 @@ using namespace sampleprof;
namespace llvm {
namespace sampleprof {
SampleProfileFormat FunctionSamples::Format;
-DenseMap<uint64_t, StringRef> FunctionSamples::GUIDToFuncNameMap;
-Module *FunctionSamples::CurrentModule;
} // namespace sampleprof
} // namespace llvm
@@ -68,6 +68,12 @@ class SampleProfErrorCategoryType : public std::error_category {
return "Counter overflow";
case sampleprof_error::ostream_seek_unsupported:
return "Ostream does not support seek";
+ case sampleprof_error::compress_failed:
+ return "Compress failure";
+ case sampleprof_error::uncompress_failed:
+ return "Uncompress failure";
+ case sampleprof_error::zlib_unavailable:
+ return "Zlib is unavailable";
}
llvm_unreachable("A value of sampleprof_error has no message.");
}
@@ -102,8 +108,8 @@ void SampleRecord::print(raw_ostream &OS, unsigned Indent) const {
OS << NumSamples;
if (hasCalls()) {
OS << ", calls:";
- for (const auto &I : getCallTargets())
- OS << " " << I.first() << ":" << I.second;
+ for (const auto &I : getSortedCallTargets())
+ OS << " " << I.first << ":" << I.second;
}
OS << "\n";
}
@@ -149,6 +155,7 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
FS.second.print(OS, Indent + 4);
}
}
+ OS.indent(Indent);
OS << "}\n";
} else {
OS << "No inlined callsites in this function\n";
@@ -190,3 +197,44 @@ FunctionSamples::findFunctionSamples(const DILocation *DIL) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); }
#endif
+
+std::error_code ProfileSymbolList::read(const uint8_t *Data,
+ uint64_t ListSize) {
+ const char *ListStart = reinterpret_cast<const char *>(Data);
+ uint64_t Size = 0;
+ while (Size < ListSize) {
+ StringRef Str(ListStart + Size);
+ add(Str);
+ Size += Str.size() + 1;
+ }
+ if (Size != ListSize)
+ return sampleprof_error::malformed;
+ return sampleprof_error::success;
+}
+
+std::error_code ProfileSymbolList::write(raw_ostream &OS) {
+ // Sort the symbols before output. If doing compression.
+ // It will make the compression much more effective.
+ std::vector<StringRef> SortedList;
+ SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end());
+ llvm::sort(SortedList);
+
+ std::string OutputString;
+ for (auto &Sym : SortedList) {
+ OutputString.append(Sym.str());
+ OutputString.append(1, '\0');
+ }
+
+ OS << OutputString;
+ return sampleprof_error::success;
+}
+
+void ProfileSymbolList::dump(raw_ostream &OS) const {
+ OS << "======== Dump profile symbol list ========\n";
+ std::vector<StringRef> SortedList;
+ SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end());
+ llvm::sort(SortedList);
+
+ for (auto &Sym : SortedList)
+ OS << Sym << "\n";
+}
diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp
index 192b6c711562..001aafce7bfd 100644
--- a/lib/ProfileData/SampleProfReader.cpp
+++ b/lib/ProfileData/SampleProfReader.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/LineIterator.h"
@@ -190,7 +191,7 @@ static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
/// the expected format.
///
/// \returns true if the file was loaded successfully, false otherwise.
-std::error_code SampleProfileReaderText::read() {
+std::error_code SampleProfileReaderText::readImpl() {
line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
sampleprof_error Result = sampleprof_error::success;
@@ -345,7 +346,7 @@ inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
return *Idx;
}
-ErrorOr<StringRef> SampleProfileReaderRawBinary::readStringFromTable() {
+ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
auto Idx = readStringIndex(NameTable);
if (std::error_code EC = Idx.getError())
return EC;
@@ -438,7 +439,9 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderBinary::readFuncProfile() {
+std::error_code
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+ Data = Start;
auto NumHeadSamples = readNumber<uint64_t>();
if (std::error_code EC = NumHeadSamples.getError())
return EC;
@@ -458,25 +461,210 @@ std::error_code SampleProfileReaderBinary::readFuncProfile() {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderBinary::read() {
+std::error_code SampleProfileReaderBinary::readImpl() {
while (!at_eof()) {
- if (std::error_code EC = readFuncProfile())
+ if (std::error_code EC = readFuncProfile(Data))
+ return EC;
+ }
+
+ return sampleprof_error::success;
+}
+
+std::error_code
+SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start,
+ uint64_t Size, SecType Type) {
+ Data = Start;
+ End = Start + Size;
+ switch (Type) {
+ case SecProfSummary:
+ if (std::error_code EC = readSummary())
+ return EC;
+ break;
+ case SecNameTable:
+ if (std::error_code EC = readNameTable())
+ return EC;
+ break;
+ case SecLBRProfile:
+ if (std::error_code EC = readFuncProfiles())
+ return EC;
+ break;
+ case SecProfileSymbolList:
+ if (std::error_code EC = readProfileSymbolList())
return EC;
+ break;
+ case SecFuncOffsetTable:
+ if (std::error_code EC = readFuncOffsetTable())
+ return EC;
+ break;
+ default:
+ break;
}
+ return sampleprof_error::success;
+}
+
+void SampleProfileReaderExtBinary::collectFuncsFrom(const Module &M) {
+ UseAllFuncs = false;
+ FuncsToUse.clear();
+ for (auto &F : M)
+ FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
+}
+
+std::error_code SampleProfileReaderExtBinary::readFuncOffsetTable() {
+ auto Size = readNumber<uint64_t>();
+ if (std::error_code EC = Size.getError())
+ return EC;
+
+ FuncOffsetTable.reserve(*Size);
+ for (uint32_t I = 0; I < *Size; ++I) {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+ auto Offset = readNumber<uint64_t>();
+ if (std::error_code EC = Offset.getError())
+ return EC;
+
+ FuncOffsetTable[*FName] = *Offset;
+ }
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderCompactBinary::read() {
- for (auto Name : FuncsToUse) {
- auto GUID = std::to_string(MD5Hash(Name));
- auto iter = FuncOffsetTable.find(StringRef(GUID));
- if (iter == FuncOffsetTable.end())
+std::error_code SampleProfileReaderExtBinary::readFuncProfiles() {
+ const uint8_t *Start = Data;
+ if (UseAllFuncs) {
+ while (Data < End) {
+ if (std::error_code EC = readFuncProfile(Data))
+ return EC;
+ }
+ assert(Data == End && "More data is read than expected");
+ return sampleprof_error::success;
+ }
+
+ if (Remapper) {
+ for (auto Name : FuncsToUse) {
+ Remapper->insert(Name);
+ }
+ }
+
+ for (auto NameOffset : FuncOffsetTable) {
+ auto FuncName = NameOffset.first;
+ if (!FuncsToUse.count(FuncName) &&
+ (!Remapper || !Remapper->exist(FuncName)))
continue;
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
+
+ Data = End;
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderExtBinary::readProfileSymbolList() {
+ if (!ProfSymList)
+ ProfSymList = std::make_unique<ProfileSymbolList>();
+
+ if (std::error_code EC = ProfSymList->read(Data, End - Data))
+ return EC;
+
+ Data = End;
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
+ const uint8_t *SecStart, const uint64_t SecSize,
+ const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
+ Data = SecStart;
+ End = SecStart + SecSize;
+ auto DecompressSize = readNumber<uint64_t>();
+ if (std::error_code EC = DecompressSize.getError())
+ return EC;
+ DecompressBufSize = *DecompressSize;
+
+ auto CompressSize = readNumber<uint64_t>();
+ if (std::error_code EC = CompressSize.getError())
+ return EC;
+
+ if (!llvm::zlib::isAvailable())
+ return sampleprof_error::zlib_unavailable;
+
+ StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
+ *CompressSize);
+ char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
+ size_t UCSize = DecompressBufSize;
+ llvm::Error E =
+ zlib::uncompress(CompressedStrings, Buffer, UCSize);
+ if (E)
+ return sampleprof_error::uncompress_failed;
+ DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
+ const uint8_t *BufStart =
+ reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
+
+ for (auto &Entry : SecHdrTable) {
+ // Skip empty section.
+ if (!Entry.Size)
+ continue;
+
+ const uint8_t *SecStart = BufStart + Entry.Offset;
+ uint64_t SecSize = Entry.Size;
+
+ // If the section is compressed, decompress it into a buffer
+ // DecompressBuf before reading the actual data. The pointee of
+ // 'Data' will be changed to buffer hold by DecompressBuf
+ // temporarily when reading the actual data.
+ bool isCompressed = hasSecFlag(Entry, SecFlagCompress);
+ if (isCompressed) {
+ const uint8_t *DecompressBuf;
+ uint64_t DecompressBufSize;
+ if (std::error_code EC = decompressSection(
+ SecStart, SecSize, DecompressBuf, DecompressBufSize))
+ return EC;
+ SecStart = DecompressBuf;
+ SecSize = DecompressBufSize;
+ }
+
+ if (std::error_code EC = readOneSection(SecStart, SecSize, Entry.Type))
+ return EC;
+ if (Data != SecStart + SecSize)
+ return sampleprof_error::malformed;
+
+ // Change the pointee of 'Data' from DecompressBuf to original Buffer.
+ if (isCompressed) {
+ Data = BufStart + Entry.Offset;
+ End = BufStart + Buffer->getBufferSize();
+ }
+ }
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderCompactBinary::readImpl() {
+ std::vector<uint64_t> OffsetsToUse;
+ if (UseAllFuncs) {
+ for (auto FuncEntry : FuncOffsetTable) {
+ OffsetsToUse.push_back(FuncEntry.second);
+ }
+ }
+ else {
+ for (auto Name : FuncsToUse) {
+ auto GUID = std::to_string(MD5Hash(Name));
+ auto iter = FuncOffsetTable.find(StringRef(GUID));
+ if (iter == FuncOffsetTable.end())
+ continue;
+ OffsetsToUse.push_back(iter->second);
+ }
+ }
+
+ for (auto Offset : OffsetsToUse) {
const uint8_t *SavedData = Data;
- Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
- iter->second;
- if (std::error_code EC = readFuncProfile())
+ if (std::error_code EC = readFuncProfile(
+ reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
+ Offset))
return EC;
Data = SavedData;
}
@@ -489,6 +677,12 @@ std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
return sampleprof_error::bad_magic;
}
+std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
+ if (Magic == SPMagic(SPF_Ext_Binary))
+ return sampleprof_error::success;
+ return sampleprof_error::bad_magic;
+}
+
std::error_code
SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
if (Magic == SPMagic(SPF_Compact_Binary))
@@ -496,7 +690,7 @@ SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
return sampleprof_error::bad_magic;
}
-std::error_code SampleProfileReaderRawBinary::readNameTable() {
+std::error_code SampleProfileReaderBinary::readNameTable() {
auto Size = readNumber<uint32_t>();
if (std::error_code EC = Size.getError())
return EC;
@@ -525,10 +719,98 @@ std::error_code SampleProfileReaderCompactBinary::readNameTable() {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderBinary::readHeader() {
- Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
- End = Data + Buffer->getBufferSize();
+std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTableEntry() {
+ SecHdrTableEntry Entry;
+ auto Type = readUnencodedNumber<uint64_t>();
+ if (std::error_code EC = Type.getError())
+ return EC;
+ Entry.Type = static_cast<SecType>(*Type);
+ auto Flags = readUnencodedNumber<uint64_t>();
+ if (std::error_code EC = Flags.getError())
+ return EC;
+ Entry.Flags = *Flags;
+
+ auto Offset = readUnencodedNumber<uint64_t>();
+ if (std::error_code EC = Offset.getError())
+ return EC;
+ Entry.Offset = *Offset;
+
+ auto Size = readUnencodedNumber<uint64_t>();
+ if (std::error_code EC = Size.getError())
+ return EC;
+ Entry.Size = *Size;
+
+ SecHdrTable.push_back(std::move(Entry));
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
+ auto EntryNum = readUnencodedNumber<uint64_t>();
+ if (std::error_code EC = EntryNum.getError())
+ return EC;
+
+ for (uint32_t i = 0; i < (*EntryNum); i++)
+ if (std::error_code EC = readSecHdrTableEntry())
+ return EC;
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
+ const uint8_t *BufStart =
+ reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
+ Data = BufStart;
+ End = BufStart + Buffer->getBufferSize();
+
+ if (std::error_code EC = readMagicIdent())
+ return EC;
+
+ if (std::error_code EC = readSecHdrTable())
+ return EC;
+
+ return sampleprof_error::success;
+}
+
+uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
+ for (auto &Entry : SecHdrTable) {
+ if (Entry.Type == Type)
+ return Entry.Size;
+ }
+ return 0;
+}
+
+uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
+ // Sections in SecHdrTable is not necessarily in the same order as
+ // sections in the profile because section like FuncOffsetTable needs
+ // to be written after section LBRProfile but needs to be read before
+ // section LBRProfile, so we cannot simply use the last entry in
+ // SecHdrTable to calculate the file size.
+ uint64_t FileSize = 0;
+ for (auto &Entry : SecHdrTable) {
+ FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
+ }
+ return FileSize;
+}
+
+bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
+ uint64_t TotalSecsSize = 0;
+ for (auto &Entry : SecHdrTable) {
+ OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
+ << ", Size: " << Entry.Size << "\n";
+ TotalSecsSize += getSectionSize(Entry.Type);
+ }
+ uint64_t HeaderSize = SecHdrTable.front().Offset;
+ assert(HeaderSize + TotalSecsSize == getFileSize() &&
+ "Size of 'header + sections' doesn't match the total size of profile");
+
+ OS << "Header Size: " << HeaderSize << "\n";
+ OS << "Total Sections Size: " << TotalSecsSize << "\n";
+ OS << "File Size: " << getFileSize() << "\n";
+ return true;
+}
+
+std::error_code SampleProfileReaderBinary::readMagicIdent() {
// Read and check the magic identifier.
auto Magic = readNumber<uint64_t>();
if (std::error_code EC = Magic.getError())
@@ -543,6 +825,16 @@ std::error_code SampleProfileReaderBinary::readHeader() {
else if (*Version != SPVersion())
return sampleprof_error::unsupported_version;
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderBinary::readHeader() {
+ Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
+ End = Data + Buffer->getBufferSize();
+
+ if (std::error_code EC = readMagicIdent())
+ return EC;
+
if (std::error_code EC = readSummary())
return EC;
@@ -590,12 +882,11 @@ std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
return sampleprof_error::success;
}
-void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) {
+void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module &M) {
+ UseAllFuncs = false;
FuncsToUse.clear();
- for (auto &F : M) {
- StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
- FuncsToUse.insert(CanonName);
- }
+ for (auto &F : M)
+ FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
}
std::error_code SampleProfileReaderBinary::readSummaryEntry(
@@ -647,7 +938,7 @@ std::error_code SampleProfileReaderBinary::readSummary() {
if (EC != sampleprof_error::success)
return EC;
}
- Summary = llvm::make_unique<ProfileSummary>(
+ Summary = std::make_unique<ProfileSummary>(
ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
*MaxFunctionCount, *NumBlocks, *NumFunctions);
@@ -661,6 +952,13 @@ bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
return Magic == SPMagic();
}
+bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
+ const uint8_t *Data =
+ reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
+ uint64_t Magic = decodeULEB128(Data);
+ return Magic == SPMagic(SPF_Ext_Binary);
+}
+
bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
const uint8_t *Data =
reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
@@ -894,7 +1192,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
///
/// This format is generated by the Linux Perf conversion tool at
/// https://github.com/google/autofdo.
-std::error_code SampleProfileReaderGCC::read() {
+std::error_code SampleProfileReaderGCC::readImpl() {
// Read the string table.
if (std::error_code EC = readNameTable())
return EC;
@@ -911,38 +1209,31 @@ bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
return Magic == "adcg*704";
}
-std::error_code SampleProfileReaderItaniumRemapper::read() {
- // If the underlying data is in compact format, we can't remap it because
+void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
+ // If the reader is in compact format, we can't remap it because
// we don't know what the original function names were.
- if (getFormat() == SPF_Compact_Binary) {
+ if (Reader.getFormat() == SPF_Compact_Binary) {
Ctx.diagnose(DiagnosticInfoSampleProfile(
- Buffer->getBufferIdentifier(),
+ Reader.getBuffer()->getBufferIdentifier(),
"Profile data remapping cannot be applied to profile data "
"in compact format (original mangled names are not available).",
DS_Warning));
- return sampleprof_error::success;
- }
-
- if (Error E = Remappings.read(*Buffer)) {
- handleAllErrors(
- std::move(E), [&](const SymbolRemappingParseError &ParseError) {
- reportError(ParseError.getLineNum(), ParseError.getMessage());
- });
- return sampleprof_error::malformed;
+ return;
}
- for (auto &Sample : getProfiles())
- if (auto Key = Remappings.insert(Sample.first()))
+ assert(Remappings && "should be initialized while creating remapper");
+ for (auto &Sample : Reader.getProfiles())
+ if (auto Key = Remappings->insert(Sample.first()))
SampleMap.insert({Key, &Sample.second});
- return sampleprof_error::success;
+ RemappingApplied = true;
}
FunctionSamples *
SampleProfileReaderItaniumRemapper::getSamplesFor(StringRef Fname) {
- if (auto Key = Remappings.lookup(Fname))
+ if (auto Key = Remappings->lookup(Fname))
return SampleMap.lookup(Key);
- return SampleProfileReader::getSamplesFor(Fname);
+ return nullptr;
}
/// Prepare a memory buffer for the contents of \p Filename.
@@ -968,13 +1259,16 @@ setupMemoryBuffer(const Twine &Filename) {
///
/// \param C The LLVM context to use to emit diagnostics.
///
+/// \param RemapFilename The file used for profile remapping.
+///
/// \returns an error code indicating the status of the created reader.
ErrorOr<std::unique_ptr<SampleProfileReader>>
-SampleProfileReader::create(const Twine &Filename, LLVMContext &C) {
+SampleProfileReader::create(const std::string Filename, LLVMContext &C,
+ const std::string RemapFilename) {
auto BufferOrError = setupMemoryBuffer(Filename);
if (std::error_code EC = BufferOrError.getError())
return EC;
- return create(BufferOrError.get(), C);
+ return create(BufferOrError.get(), C, RemapFilename);
}
/// Create a sample profile remapper from the given input, to remap the
@@ -982,20 +1276,48 @@ SampleProfileReader::create(const Twine &Filename, LLVMContext &C) {
///
/// \param Filename The file to open.
///
-/// \param C The LLVM context to use to emit diagnostics.
+/// \param Reader The profile reader the remapper is going to be applied to.
///
-/// \param Underlying The underlying profile data reader to remap.
+/// \param C The LLVM context to use to emit diagnostics.
///
/// \returns an error code indicating the status of the created reader.
-ErrorOr<std::unique_ptr<SampleProfileReader>>
-SampleProfileReaderItaniumRemapper::create(
- const Twine &Filename, LLVMContext &C,
- std::unique_ptr<SampleProfileReader> Underlying) {
+ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
+SampleProfileReaderItaniumRemapper::create(const std::string Filename,
+ SampleProfileReader &Reader,
+ LLVMContext &C) {
auto BufferOrError = setupMemoryBuffer(Filename);
if (std::error_code EC = BufferOrError.getError())
return EC;
- return llvm::make_unique<SampleProfileReaderItaniumRemapper>(
- std::move(BufferOrError.get()), C, std::move(Underlying));
+ return create(BufferOrError.get(), Reader, C);
+}
+
+/// Create a sample profile remapper from the given input, to remap the
+/// function names in the given profile data.
+///
+/// \param B The memory buffer to create the reader from (assumes ownership).
+///
+/// \param C The LLVM context to use to emit diagnostics.
+///
+/// \param Reader The profile reader the remapper is going to be applied to.
+///
+/// \returns an error code indicating the status of the created reader.
+ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
+SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
+ SampleProfileReader &Reader,
+ LLVMContext &C) {
+ auto Remappings = std::make_unique<SymbolRemappingReader>();
+ if (Error E = Remappings->read(*B.get())) {
+ handleAllErrors(
+ std::move(E), [&](const SymbolRemappingParseError &ParseError) {
+ C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
+ ParseError.getLineNum(),
+ ParseError.getMessage()));
+ });
+ return sampleprof_error::malformed;
+ }
+
+ return std::make_unique<SampleProfileReaderItaniumRemapper>(
+ std::move(B), std::move(Remappings), Reader);
}
/// Create a sample profile reader based on the format of the input data.
@@ -1004,12 +1326,17 @@ SampleProfileReaderItaniumRemapper::create(
///
/// \param C The LLVM context to use to emit diagnostics.
///
+/// \param RemapFilename The file used for profile remapping.
+///
/// \returns an error code indicating the status of the created reader.
ErrorOr<std::unique_ptr<SampleProfileReader>>
-SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) {
+SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
+ const std::string RemapFilename) {
std::unique_ptr<SampleProfileReader> Reader;
if (SampleProfileReaderRawBinary::hasFormat(*B))
Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
+ else if (SampleProfileReaderExtBinary::hasFormat(*B))
+ Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
else if (SampleProfileReaderCompactBinary::hasFormat(*B))
Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
else if (SampleProfileReaderGCC::hasFormat(*B))
@@ -1019,9 +1346,21 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) {
else
return sampleprof_error::unrecognized_format;
+ if (!RemapFilename.empty()) {
+ auto ReaderOrErr =
+ SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not create remapper: " + EC.message();
+ C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
+ return EC;
+ }
+ Reader->Remapper = std::move(ReaderOrErr.get());
+ }
+
FunctionSamples::Format = Reader->getFormat();
- if (std::error_code EC = Reader->readHeader())
+ if (std::error_code EC = Reader->readHeader()) {
return EC;
+ }
return std::move(Reader);
}
diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp
index 8b876e0aa5d9..8d09af31f94b 100644
--- a/lib/ProfileData/SampleProfWriter.cpp
+++ b/lib/ProfileData/SampleProfWriter.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorOr.h"
@@ -39,11 +40,8 @@
using namespace llvm;
using namespace sampleprof;
-std::error_code
-SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
- if (std::error_code EC = writeHeader(ProfileMap))
- return EC;
-
+std::error_code SampleProfileWriter::writeFuncProfiles(
+ const StringMap<FunctionSamples> &ProfileMap) {
// Sort the ProfileMap by total samples.
typedef std::pair<StringRef, const FunctionSamples *> NameFunctionSamples;
std::vector<NameFunctionSamples> V;
@@ -58,12 +56,161 @@ SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
});
for (const auto &I : V) {
- if (std::error_code EC = write(*I.second))
+ if (std::error_code EC = writeSample(*I.second))
return EC;
}
return sampleprof_error::success;
}
+std::error_code
+SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
+ if (std::error_code EC = writeHeader(ProfileMap))
+ return EC;
+
+ if (std::error_code EC = writeFuncProfiles(ProfileMap))
+ return EC;
+
+ return sampleprof_error::success;
+}
+
+SecHdrTableEntry &
+SampleProfileWriterExtBinaryBase::getEntryInLayout(SecType Type) {
+ auto SecIt = std::find_if(
+ SectionHdrLayout.begin(), SectionHdrLayout.end(),
+ [=](const auto &Entry) -> bool { return Entry.Type == Type; });
+ return *SecIt;
+}
+
+/// Return the current position and prepare to use it as the start
+/// position of a section.
+uint64_t SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type) {
+ uint64_t SectionStart = OutputStream->tell();
+ auto &Entry = getEntryInLayout(Type);
+ // Use LocalBuf as a temporary output for writting data.
+ if (hasSecFlag(Entry, SecFlagCompress))
+ LocalBufStream.swap(OutputStream);
+ return SectionStart;
+}
+
+std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() {
+ if (!llvm::zlib::isAvailable())
+ return sampleprof_error::zlib_unavailable;
+ std::string &UncompressedStrings =
+ static_cast<raw_string_ostream *>(LocalBufStream.get())->str();
+ if (UncompressedStrings.size() == 0)
+ return sampleprof_error::success;
+ auto &OS = *OutputStream;
+ SmallString<128> CompressedStrings;
+ llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings,
+ zlib::BestSizeCompression);
+ if (E)
+ return sampleprof_error::compress_failed;
+ encodeULEB128(UncompressedStrings.size(), OS);
+ encodeULEB128(CompressedStrings.size(), OS);
+ OS << CompressedStrings.str();
+ UncompressedStrings.clear();
+ return sampleprof_error::success;
+}
+
+/// Add a new section into section header table.
+std::error_code
+SampleProfileWriterExtBinaryBase::addNewSection(SecType Type,
+ uint64_t SectionStart) {
+ auto Entry = getEntryInLayout(Type);
+ if (hasSecFlag(Entry, SecFlagCompress)) {
+ LocalBufStream.swap(OutputStream);
+ if (std::error_code EC = compressAndOutput())
+ return EC;
+ }
+ SecHdrTable.push_back({Type, Entry.Flags, SectionStart - FileStart,
+ OutputStream->tell() - SectionStart});
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterExtBinaryBase::write(
+ const StringMap<FunctionSamples> &ProfileMap) {
+ if (std::error_code EC = writeHeader(ProfileMap))
+ return EC;
+
+ std::string LocalBuf;
+ LocalBufStream = std::make_unique<raw_string_ostream>(LocalBuf);
+ if (std::error_code EC = writeSections(ProfileMap))
+ return EC;
+
+ if (std::error_code EC = writeSecHdrTable())
+ return EC;
+
+ return sampleprof_error::success;
+}
+
+std::error_code
+SampleProfileWriterExtBinary::writeSample(const FunctionSamples &S) {
+ uint64_t Offset = OutputStream->tell();
+ StringRef Name = S.getName();
+ FuncOffsetTable[Name] = Offset - SecLBRProfileStart;
+ encodeULEB128(S.getHeadSamples(), *OutputStream);
+ return writeBody(S);
+}
+
+std::error_code SampleProfileWriterExtBinary::writeFuncOffsetTable() {
+ auto &OS = *OutputStream;
+
+ // Write out the table size.
+ encodeULEB128(FuncOffsetTable.size(), OS);
+
+ // Write out FuncOffsetTable.
+ for (auto entry : FuncOffsetTable) {
+ writeNameIdx(entry.first);
+ encodeULEB128(entry.second, OS);
+ }
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterExtBinary::writeSections(
+ const StringMap<FunctionSamples> &ProfileMap) {
+ uint64_t SectionStart = markSectionStart(SecProfSummary);
+ computeSummary(ProfileMap);
+ if (auto EC = writeSummary())
+ return EC;
+ if (std::error_code EC = addNewSection(SecProfSummary, SectionStart))
+ return EC;
+
+ // Generate the name table for all the functions referenced in the profile.
+ SectionStart = markSectionStart(SecNameTable);
+ for (const auto &I : ProfileMap) {
+ addName(I.first());
+ addNames(I.second);
+ }
+ writeNameTable();
+ if (std::error_code EC = addNewSection(SecNameTable, SectionStart))
+ return EC;
+
+ SectionStart = markSectionStart(SecLBRProfile);
+ SecLBRProfileStart = OutputStream->tell();
+ if (std::error_code EC = writeFuncProfiles(ProfileMap))
+ return EC;
+ if (std::error_code EC = addNewSection(SecLBRProfile, SectionStart))
+ return EC;
+
+ if (ProfSymList && ProfSymList->toCompress())
+ setToCompressSection(SecProfileSymbolList);
+
+ SectionStart = markSectionStart(SecProfileSymbolList);
+ if (ProfSymList && ProfSymList->size() > 0)
+ if (std::error_code EC = ProfSymList->write(*OutputStream))
+ return EC;
+ if (std::error_code EC = addNewSection(SecProfileSymbolList, SectionStart))
+ return EC;
+
+ SectionStart = markSectionStart(SecFuncOffsetTable);
+ if (std::error_code EC = writeFuncOffsetTable())
+ return EC;
+ if (std::error_code EC = addNewSection(SecFuncOffsetTable, SectionStart))
+ return EC;
+
+ return sampleprof_error::success;
+}
+
std::error_code SampleProfileWriterCompactBinary::write(
const StringMap<FunctionSamples> &ProfileMap) {
if (std::error_code EC = SampleProfileWriter::write(ProfileMap))
@@ -81,7 +228,7 @@ std::error_code SampleProfileWriterCompactBinary::write(
///
/// The format used here is more structured and deliberate because
/// it needs to be parsed by the SampleProfileReaderText class.
-std::error_code SampleProfileWriterText::write(const FunctionSamples &S) {
+std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
auto &OS = *OutputStream;
OS << S.getName() << ":" << S.getTotalSamples();
if (Indent == 0)
@@ -100,8 +247,8 @@ std::error_code SampleProfileWriterText::write(const FunctionSamples &S) {
OS << Sample.getSamples();
- for (const auto &J : Sample.getCallTargets())
- OS << " " << J.first() << ":" << J.second;
+ for (const auto &J : Sample.getSortedCallTargets())
+ OS << " " << J.first << ":" << J.second;
OS << "\n";
}
@@ -117,7 +264,7 @@ std::error_code SampleProfileWriterText::write(const FunctionSamples &S) {
OS << Loc.LineOffset << ": ";
else
OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
- if (std::error_code EC = write(CalleeSamples))
+ if (std::error_code EC = writeSample(CalleeSamples))
return EC;
}
Indent -= 1;
@@ -163,7 +310,7 @@ void SampleProfileWriterBinary::stablizeNameTable(std::set<StringRef> &V) {
NameTable[N] = i++;
}
-std::error_code SampleProfileWriterRawBinary::writeNameTable() {
+std::error_code SampleProfileWriterBinary::writeNameTable() {
auto &OS = *OutputStream;
std::set<StringRef> V;
stablizeNameTable(V);
@@ -214,25 +361,18 @@ std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterRawBinary::writeMagicIdent() {
- auto &OS = *OutputStream;
- // Write file magic identifier.
- encodeULEB128(SPMagic(), OS);
- encodeULEB128(SPVersion(), OS);
- return sampleprof_error::success;
-}
-
-std::error_code SampleProfileWriterCompactBinary::writeMagicIdent() {
+std::error_code
+SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) {
auto &OS = *OutputStream;
// Write file magic identifier.
- encodeULEB128(SPMagic(SPF_Compact_Binary), OS);
+ encodeULEB128(SPMagic(Format), OS);
encodeULEB128(SPVersion(), OS);
return sampleprof_error::success;
}
std::error_code SampleProfileWriterBinary::writeHeader(
const StringMap<FunctionSamples> &ProfileMap) {
- writeMagicIdent();
+ writeMagicIdent(Format);
computeSummary(ProfileMap);
if (auto EC = writeSummary())
@@ -248,6 +388,82 @@ std::error_code SampleProfileWriterBinary::writeHeader(
return sampleprof_error::success;
}
+void SampleProfileWriterExtBinaryBase::setToCompressAllSections() {
+ for (auto &Entry : SectionHdrLayout)
+ addSecFlags(Entry, SecFlagCompress);
+}
+
+void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) {
+ addSectionFlags(Type, SecFlagCompress);
+}
+
+void SampleProfileWriterExtBinaryBase::addSectionFlags(SecType Type,
+ SecFlags Flags) {
+ for (auto &Entry : SectionHdrLayout) {
+ if (Entry.Type == Type)
+ addSecFlags(Entry, Flags);
+ }
+}
+
+void SampleProfileWriterExtBinaryBase::allocSecHdrTable() {
+ support::endian::Writer Writer(*OutputStream, support::little);
+
+ Writer.write(static_cast<uint64_t>(SectionHdrLayout.size()));
+ SecHdrTableOffset = OutputStream->tell();
+ for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) {
+ Writer.write(static_cast<uint64_t>(-1));
+ Writer.write(static_cast<uint64_t>(-1));
+ Writer.write(static_cast<uint64_t>(-1));
+ Writer.write(static_cast<uint64_t>(-1));
+ }
+}
+
+std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
+ auto &OFS = static_cast<raw_fd_ostream &>(*OutputStream);
+ uint64_t Saved = OutputStream->tell();
+
+ // Set OutputStream to the location saved in SecHdrTableOffset.
+ if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1)
+ return sampleprof_error::ostream_seek_unsupported;
+ support::endian::Writer Writer(*OutputStream, support::little);
+
+ DenseMap<uint32_t, uint32_t> IndexMap;
+ for (uint32_t i = 0; i < SecHdrTable.size(); i++) {
+ IndexMap.insert({static_cast<uint32_t>(SecHdrTable[i].Type), i});
+ }
+
+ // Write the section header table in the order specified in
+ // SectionHdrLayout. That is the sections order Reader will see.
+ // Note that the sections order in which Reader expects to read
+ // may be different from the order in which Writer is able to
+ // write, so we need to adjust the order in SecHdrTable to be
+ // consistent with SectionHdrLayout when we write SecHdrTable
+ // to the memory.
+ for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) {
+ uint32_t idx = IndexMap[static_cast<uint32_t>(SectionHdrLayout[i].Type)];
+ Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Type));
+ Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Flags));
+ Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Offset));
+ Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Size));
+ }
+
+ // Reset OutputStream.
+ if (OFS.seek(Saved) == (uint64_t)-1)
+ return sampleprof_error::ostream_seek_unsupported;
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterExtBinaryBase::writeHeader(
+ const StringMap<FunctionSamples> &ProfileMap) {
+ auto &OS = *OutputStream;
+ FileStart = OS.tell();
+ writeMagicIdent(Format);
+
+ allocSecHdrTable();
+ return sampleprof_error::success;
+}
+
std::error_code SampleProfileWriterCompactBinary::writeHeader(
const StringMap<FunctionSamples> &ProfileMap) {
support::endian::Writer Writer(*OutputStream, support::little);
@@ -294,8 +510,8 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
encodeULEB128(Loc.Discriminator, OS);
encodeULEB128(Sample.getSamples(), OS);
encodeULEB128(Sample.getCallTargets().size(), OS);
- for (const auto &J : Sample.getCallTargets()) {
- StringRef Callee = J.first();
+ for (const auto &J : Sample.getSortedCallTargets()) {
+ StringRef Callee = J.first;
uint64_t CalleeSamples = J.second;
if (std::error_code EC = writeNameIdx(Callee))
return EC;
@@ -324,13 +540,14 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
/// Write samples of a top-level function to a binary file.
///
/// \returns true if the samples were written successfully, false otherwise.
-std::error_code SampleProfileWriterBinary::write(const FunctionSamples &S) {
+std::error_code
+SampleProfileWriterBinary::writeSample(const FunctionSamples &S) {
encodeULEB128(S.getHeadSamples(), *OutputStream);
return writeBody(S);
}
std::error_code
-SampleProfileWriterCompactBinary::write(const FunctionSamples &S) {
+SampleProfileWriterCompactBinary::writeSample(const FunctionSamples &S) {
uint64_t Offset = OutputStream->tell();
StringRef Name = S.getName();
FuncOffsetTable[Name] = Offset;
@@ -349,10 +566,11 @@ ErrorOr<std::unique_ptr<SampleProfileWriter>>
SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
std::error_code EC;
std::unique_ptr<raw_ostream> OS;
- if (Format == SPF_Binary || Format == SPF_Compact_Binary)
- OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None));
+ if (Format == SPF_Binary || Format == SPF_Ext_Binary ||
+ Format == SPF_Compact_Binary)
+ OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_None));
else
- OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text));
+ OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_Text));
if (EC)
return EC;
@@ -374,6 +592,8 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
if (Format == SPF_Binary)
Writer.reset(new SampleProfileWriterRawBinary(OS));
+ else if (Format == SPF_Ext_Binary)
+ Writer.reset(new SampleProfileWriterExtBinary(OS));
else if (Format == SPF_Compact_Binary)
Writer.reset(new SampleProfileWriterCompactBinary(OS));
else if (Format == SPF_Text)
@@ -386,6 +606,7 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
if (EC)
return EC;
+ Writer->Format = Format;
return std::move(Writer);
}
diff --git a/lib/Remarks/BitstreamRemarkParser.cpp b/lib/Remarks/BitstreamRemarkParser.cpp
new file mode 100644
index 000000000000..99a82e1ee3af
--- /dev/null
+++ b/lib/Remarks/BitstreamRemarkParser.cpp
@@ -0,0 +1,597 @@
+//===- BitstreamRemarkParser.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utility methods used by clients that want to use the
+// parser for remark diagnostics in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/BitstreamRemarkParser.h"
+#include "BitstreamRemarkParser.h"
+#include "llvm/Remarks/BitstreamRemarkContainer.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+static Error unknownRecord(const char *BlockName, unsigned RecordID) {
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing %s: unknown record entry (%lu).", BlockName,
+ RecordID);
+}
+
+static Error malformedRecord(const char *BlockName, const char *RecordName) {
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing %s: malformed record entry (%s).", BlockName,
+ RecordName);
+}
+
+BitstreamMetaParserHelper::BitstreamMetaParserHelper(
+ BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo)
+ : Stream(Stream), BlockInfo(BlockInfo) {}
+
+/// Parse a record and fill in the fields in the parser.
+static Error parseRecord(BitstreamMetaParserHelper &Parser, unsigned Code) {
+ BitstreamCursor &Stream = Parser.Stream;
+ // Note: 2 is used here because it's the max number of fields we have per
+ // record.
+ SmallVector<uint64_t, 2> Record;
+ StringRef Blob;
+ Expected<unsigned> RecordID = Stream.readRecord(Code, Record, &Blob);
+ if (!RecordID)
+ return RecordID.takeError();
+
+ switch (*RecordID) {
+ case RECORD_META_CONTAINER_INFO: {
+ if (Record.size() != 2)
+ return malformedRecord("BLOCK_META", "RECORD_META_CONTAINER_INFO");
+ Parser.ContainerVersion = Record[0];
+ Parser.ContainerType = Record[1];
+ break;
+ }
+ case RECORD_META_REMARK_VERSION: {
+ if (Record.size() != 1)
+ return malformedRecord("BLOCK_META", "RECORD_META_REMARK_VERSION");
+ Parser.RemarkVersion = Record[0];
+ break;
+ }
+ case RECORD_META_STRTAB: {
+ if (Record.size() != 0)
+ return malformedRecord("BLOCK_META", "RECORD_META_STRTAB");
+ Parser.StrTabBuf = Blob;
+ break;
+ }
+ case RECORD_META_EXTERNAL_FILE: {
+ if (Record.size() != 0)
+ return malformedRecord("BLOCK_META", "RECORD_META_EXTERNAL_FILE");
+ Parser.ExternalFilePath = Blob;
+ break;
+ }
+ default:
+ return unknownRecord("BLOCK_META", *RecordID);
+ }
+ return Error::success();
+}
+
+BitstreamRemarkParserHelper::BitstreamRemarkParserHelper(
+ BitstreamCursor &Stream)
+ : Stream(Stream) {}
+
+/// Parse a record and fill in the fields in the parser.
+static Error parseRecord(BitstreamRemarkParserHelper &Parser, unsigned Code) {
+ BitstreamCursor &Stream = Parser.Stream;
+ // Note: 5 is used here because it's the max number of fields we have per
+ // record.
+ SmallVector<uint64_t, 5> Record;
+ StringRef Blob;
+ Expected<unsigned> RecordID = Stream.readRecord(Code, Record, &Blob);
+ if (!RecordID)
+ return RecordID.takeError();
+
+ switch (*RecordID) {
+ case RECORD_REMARK_HEADER: {
+ if (Record.size() != 4)
+ return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_HEADER");
+ Parser.Type = Record[0];
+ Parser.RemarkNameIdx = Record[1];
+ Parser.PassNameIdx = Record[2];
+ Parser.FunctionNameIdx = Record[3];
+ break;
+ }
+ case RECORD_REMARK_DEBUG_LOC: {
+ if (Record.size() != 3)
+ return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_DEBUG_LOC");
+ Parser.SourceFileNameIdx = Record[0];
+ Parser.SourceLine = Record[1];
+ Parser.SourceColumn = Record[2];
+ break;
+ }
+ case RECORD_REMARK_HOTNESS: {
+ if (Record.size() != 1)
+ return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_HOTNESS");
+ Parser.Hotness = Record[0];
+ break;
+ }
+ case RECORD_REMARK_ARG_WITH_DEBUGLOC: {
+ if (Record.size() != 5)
+ return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_ARG_WITH_DEBUGLOC");
+ // Create a temporary argument. Use that as a valid memory location for this
+ // argument entry.
+ Parser.TmpArgs.emplace_back();
+ Parser.TmpArgs.back().KeyIdx = Record[0];
+ Parser.TmpArgs.back().ValueIdx = Record[1];
+ Parser.TmpArgs.back().SourceFileNameIdx = Record[2];
+ Parser.TmpArgs.back().SourceLine = Record[3];
+ Parser.TmpArgs.back().SourceColumn = Record[4];
+ Parser.Args =
+ ArrayRef<BitstreamRemarkParserHelper::Argument>(Parser.TmpArgs);
+ break;
+ }
+ case RECORD_REMARK_ARG_WITHOUT_DEBUGLOC: {
+ if (Record.size() != 2)
+ return malformedRecord("BLOCK_REMARK",
+ "RECORD_REMARK_ARG_WITHOUT_DEBUGLOC");
+ // Create a temporary argument. Use that as a valid memory location for this
+ // argument entry.
+ Parser.TmpArgs.emplace_back();
+ Parser.TmpArgs.back().KeyIdx = Record[0];
+ Parser.TmpArgs.back().ValueIdx = Record[1];
+ Parser.Args =
+ ArrayRef<BitstreamRemarkParserHelper::Argument>(Parser.TmpArgs);
+ break;
+ }
+ default:
+ return unknownRecord("BLOCK_REMARK", *RecordID);
+ }
+ return Error::success();
+}
+
+template <typename T>
+static Error parseBlock(T &ParserHelper, unsigned BlockID,
+ const char *BlockName) {
+ BitstreamCursor &Stream = ParserHelper.Stream;
+ Expected<BitstreamEntry> Next = Stream.advance();
+ if (!Next)
+ return Next.takeError();
+ if (Next->Kind != BitstreamEntry::SubBlock || Next->ID != BlockID)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing %s: expecting [ENTER_SUBBLOCK, %s, ...].",
+ BlockName, BlockName);
+ if (Stream.EnterSubBlock(BlockID))
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while entering %s.", BlockName);
+
+ // Stop when there is nothing to read anymore or when we encounter an
+ // END_BLOCK.
+ while (!Stream.AtEndOfStream()) {
+ Expected<BitstreamEntry> Next = Stream.advance();
+ if (!Next)
+ return Next.takeError();
+ switch (Next->Kind) {
+ case BitstreamEntry::EndBlock:
+ return Error::success();
+ case BitstreamEntry::Error:
+ case BitstreamEntry::SubBlock:
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing %s: expecting records.", BlockName);
+ case BitstreamEntry::Record:
+ if (Error E = parseRecord(ParserHelper, Next->ID))
+ return E;
+ continue;
+ }
+ }
+ // If we're here, it means we didn't get an END_BLOCK yet, but we're at the
+ // end of the stream. In this case, error.
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing %s: unterminated block.", BlockName);
+}
+
+Error BitstreamMetaParserHelper::parse() {
+ return parseBlock(*this, META_BLOCK_ID, "META_BLOCK");
+}
+
+Error BitstreamRemarkParserHelper::parse() {
+ return parseBlock(*this, REMARK_BLOCK_ID, "REMARK_BLOCK");
+}
+
+BitstreamParserHelper::BitstreamParserHelper(StringRef Buffer)
+ : Stream(Buffer) {}
+
+Expected<std::array<char, 4>> BitstreamParserHelper::parseMagic() {
+ std::array<char, 4> Result;
+ for (unsigned i = 0; i < 4; ++i)
+ if (Expected<unsigned> R = Stream.Read(8))
+ Result[i] = *R;
+ else
+ return R.takeError();
+ return Result;
+}
+
+Error BitstreamParserHelper::parseBlockInfoBlock() {
+ Expected<BitstreamEntry> Next = Stream.advance();
+ if (!Next)
+ return Next.takeError();
+ if (Next->Kind != BitstreamEntry::SubBlock ||
+ Next->ID != llvm::bitc::BLOCKINFO_BLOCK_ID)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCKINFO_BLOCK: expecting [ENTER_SUBBLOCK, "
+ "BLOCKINFO_BLOCK, ...].");
+
+ Expected<Optional<BitstreamBlockInfo>> MaybeBlockInfo =
+ Stream.ReadBlockInfoBlock();
+ if (!MaybeBlockInfo)
+ return MaybeBlockInfo.takeError();
+
+ if (!*MaybeBlockInfo)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCKINFO_BLOCK.");
+
+ BlockInfo = **MaybeBlockInfo;
+
+ Stream.setBlockInfo(&BlockInfo);
+ return Error::success();
+}
+
+static Expected<bool> isBlock(BitstreamCursor &Stream, unsigned BlockID) {
+ bool Result = false;
+ uint64_t PreviousBitNo = Stream.GetCurrentBitNo();
+ Expected<BitstreamEntry> Next = Stream.advance();
+ if (!Next)
+ return Next.takeError();
+ switch (Next->Kind) {
+ case BitstreamEntry::SubBlock:
+ // Check for the block id.
+ Result = Next->ID == BlockID;
+ break;
+ case BitstreamEntry::Error:
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Unexpected error while parsing bitstream.");
+ default:
+ Result = false;
+ break;
+ }
+ if (Error E = Stream.JumpToBit(PreviousBitNo))
+ return std::move(E);
+ return Result;
+}
+
+Expected<bool> BitstreamParserHelper::isMetaBlock() {
+ return isBlock(Stream, META_BLOCK_ID);
+}
+
+Expected<bool> BitstreamParserHelper::isRemarkBlock() {
+ return isBlock(Stream, META_BLOCK_ID);
+}
+
+static Error validateMagicNumber(StringRef Magic) {
+ if (Magic != remarks::ContainerMagic)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown magic number: expecting %s, got %.4s.",
+ remarks::ContainerMagic.data(), Magic.data());
+ return Error::success();
+}
+
+static Error advanceToMetaBlock(BitstreamParserHelper &Helper) {
+ Expected<std::array<char, 4>> Magic = Helper.parseMagic();
+ if (!Magic)
+ return Magic.takeError();
+ if (Error E = validateMagicNumber(StringRef(Magic->data(), Magic->size())))
+ return E;
+ if (Error E = Helper.parseBlockInfoBlock())
+ return E;
+ Expected<bool> isMetaBlock = Helper.isMetaBlock();
+ if (!isMetaBlock)
+ return isMetaBlock.takeError();
+ if (!*isMetaBlock)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Expecting META_BLOCK after the BLOCKINFO_BLOCK.");
+ return Error::success();
+}
+
+Expected<std::unique_ptr<BitstreamRemarkParser>>
+remarks::createBitstreamParserFromMeta(
+ StringRef Buf, Optional<ParsedStringTable> StrTab,
+ Optional<StringRef> ExternalFilePrependPath) {
+ BitstreamParserHelper Helper(Buf);
+ Expected<std::array<char, 4>> Magic = Helper.parseMagic();
+ if (!Magic)
+ return Magic.takeError();
+
+ if (Error E = validateMagicNumber(StringRef(Magic->data(), Magic->size())))
+ return std::move(E);
+
+ auto Parser =
+ StrTab ? std::make_unique<BitstreamRemarkParser>(Buf, std::move(*StrTab))
+ : std::make_unique<BitstreamRemarkParser>(Buf);
+
+ if (ExternalFilePrependPath)
+ Parser->ExternalFilePrependPath = *ExternalFilePrependPath;
+
+ return std::move(Parser);
+}
+
+Expected<std::unique_ptr<Remark>> BitstreamRemarkParser::next() {
+ if (ParserHelper.atEndOfStream())
+ return make_error<EndOfFileError>();
+
+ if (!ReadyToParseRemarks) {
+ if (Error E = parseMeta())
+ return std::move(E);
+ ReadyToParseRemarks = true;
+ }
+
+ return parseRemark();
+}
+
+Error BitstreamRemarkParser::parseMeta() {
+ // Advance and to the meta block.
+ if (Error E = advanceToMetaBlock(ParserHelper))
+ return E;
+
+ BitstreamMetaParserHelper MetaHelper(ParserHelper.Stream,
+ ParserHelper.BlockInfo);
+ if (Error E = MetaHelper.parse())
+ return E;
+
+ if (Error E = processCommonMeta(MetaHelper))
+ return E;
+
+ switch (ContainerType) {
+ case BitstreamRemarkContainerType::Standalone:
+ return processStandaloneMeta(MetaHelper);
+ case BitstreamRemarkContainerType::SeparateRemarksFile:
+ return processSeparateRemarksFileMeta(MetaHelper);
+ case BitstreamRemarkContainerType::SeparateRemarksMeta:
+ return processSeparateRemarksMetaMeta(MetaHelper);
+ }
+ llvm_unreachable("Unknown BitstreamRemarkContainerType enum");
+}
+
+Error BitstreamRemarkParser::processCommonMeta(
+ BitstreamMetaParserHelper &MetaHelper) {
+ if (Optional<uint64_t> Version = MetaHelper.ContainerVersion)
+ ContainerVersion = *Version;
+ else
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_META: missing container version.");
+
+ if (Optional<uint8_t> Type = MetaHelper.ContainerType) {
+ // Always >= BitstreamRemarkContainerType::First since it's unsigned.
+ if (*Type > static_cast<uint8_t>(BitstreamRemarkContainerType::Last))
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_META: invalid container type.");
+
+ ContainerType = static_cast<BitstreamRemarkContainerType>(*Type);
+ } else
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_META: missing container type.");
+
+ return Error::success();
+}
+
+static Error processStrTab(BitstreamRemarkParser &P,
+ Optional<StringRef> StrTabBuf) {
+ if (!StrTabBuf)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_META: missing string table.");
+ // Parse and assign the string table.
+ P.StrTab.emplace(*StrTabBuf);
+ return Error::success();
+}
+
+static Error processRemarkVersion(BitstreamRemarkParser &P,
+ Optional<uint64_t> RemarkVersion) {
+ if (!RemarkVersion)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_META: missing remark version.");
+ P.RemarkVersion = *RemarkVersion;
+ return Error::success();
+}
+
+Error BitstreamRemarkParser::processExternalFilePath(
+ Optional<StringRef> ExternalFilePath) {
+ if (!ExternalFilePath)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_META: missing external file path.");
+
+ SmallString<80> FullPath(ExternalFilePrependPath);
+ sys::path::append(FullPath, *ExternalFilePath);
+
+ // External file: open the external file, parse it, check if its metadata
+ // matches the one from the separate metadata, then replace the current parser
+ // with the one parsing the remarks.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(FullPath);
+ if (std::error_code EC = BufferOrErr.getError())
+ return createFileError(FullPath, EC);
+ TmpRemarkBuffer = std::move(*BufferOrErr);
+
+ // Create a separate parser used for parsing the separate file.
+ ParserHelper = BitstreamParserHelper(TmpRemarkBuffer->getBuffer());
+ // Advance and check until we can parse the meta block.
+ if (Error E = advanceToMetaBlock(ParserHelper))
+ return E;
+ // Parse the meta from the separate file.
+ // Note: here we overwrite the BlockInfo with the one from the file. This will
+ // be used to parse the rest of the file.
+ BitstreamMetaParserHelper SeparateMetaHelper(ParserHelper.Stream,
+ ParserHelper.BlockInfo);
+ if (Error E = SeparateMetaHelper.parse())
+ return E;
+
+ uint64_t PreviousContainerVersion = ContainerVersion;
+ if (Error E = processCommonMeta(SeparateMetaHelper))
+ return E;
+
+ if (ContainerType != BitstreamRemarkContainerType::SeparateRemarksFile)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing external file's BLOCK_META: wrong container "
+ "type.");
+
+ if (PreviousContainerVersion != ContainerVersion)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing external file's BLOCK_META: mismatching versions: "
+ "original meta: %lu, external file meta: %lu.",
+ PreviousContainerVersion, ContainerVersion);
+
+ // Process the meta from the separate file.
+ return processSeparateRemarksFileMeta(SeparateMetaHelper);
+}
+
+Error BitstreamRemarkParser::processStandaloneMeta(
+ BitstreamMetaParserHelper &Helper) {
+ if (Error E = processStrTab(*this, Helper.StrTabBuf))
+ return E;
+ return processRemarkVersion(*this, Helper.RemarkVersion);
+}
+
+Error BitstreamRemarkParser::processSeparateRemarksFileMeta(
+ BitstreamMetaParserHelper &Helper) {
+ return processRemarkVersion(*this, Helper.RemarkVersion);
+}
+
+Error BitstreamRemarkParser::processSeparateRemarksMetaMeta(
+ BitstreamMetaParserHelper &Helper) {
+ if (Error E = processStrTab(*this, Helper.StrTabBuf))
+ return E;
+ return processExternalFilePath(Helper.ExternalFilePath);
+}
+
+Expected<std::unique_ptr<Remark>> BitstreamRemarkParser::parseRemark() {
+ BitstreamRemarkParserHelper RemarkHelper(ParserHelper.Stream);
+ if (Error E = RemarkHelper.parse())
+ return std::move(E);
+
+ return processRemark(RemarkHelper);
+}
+
+Expected<std::unique_ptr<Remark>>
+BitstreamRemarkParser::processRemark(BitstreamRemarkParserHelper &Helper) {
+ std::unique_ptr<Remark> Result = std::make_unique<Remark>();
+ Remark &R = *Result;
+
+ if (StrTab == None)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Error while parsing BLOCK_REMARK: missing string table.");
+
+ if (!Helper.Type)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_REMARK: missing remark type.");
+
+ // Always >= Type::First since it's unsigned.
+ if (*Helper.Type > static_cast<uint8_t>(Type::Last))
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_REMARK: unknown remark type.");
+
+ R.RemarkType = static_cast<Type>(*Helper.Type);
+
+ if (!Helper.RemarkNameIdx)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_REMARK: missing remark name.");
+
+ if (Expected<StringRef> RemarkName = (*StrTab)[*Helper.RemarkNameIdx])
+ R.RemarkName = *RemarkName;
+ else
+ return RemarkName.takeError();
+
+ if (!Helper.PassNameIdx)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_REMARK: missing remark pass.");
+
+ if (Expected<StringRef> PassName = (*StrTab)[*Helper.PassNameIdx])
+ R.PassName = *PassName;
+ else
+ return PassName.takeError();
+
+ if (!Helper.FunctionNameIdx)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_REMARK: missing remark function name.");
+ if (Expected<StringRef> FunctionName = (*StrTab)[*Helper.FunctionNameIdx])
+ R.FunctionName = *FunctionName;
+ else
+ return FunctionName.takeError();
+
+ if (Helper.SourceFileNameIdx && Helper.SourceLine && Helper.SourceColumn) {
+ Expected<StringRef> SourceFileName = (*StrTab)[*Helper.SourceFileNameIdx];
+ if (!SourceFileName)
+ return SourceFileName.takeError();
+ R.Loc.emplace();
+ R.Loc->SourceFilePath = *SourceFileName;
+ R.Loc->SourceLine = *Helper.SourceLine;
+ R.Loc->SourceColumn = *Helper.SourceColumn;
+ }
+
+ if (Helper.Hotness)
+ R.Hotness = *Helper.Hotness;
+
+ if (!Helper.Args)
+ return std::move(Result);
+
+ for (const BitstreamRemarkParserHelper::Argument &Arg : *Helper.Args) {
+ if (!Arg.KeyIdx)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_REMARK: missing key in remark argument.");
+ if (!Arg.ValueIdx)
+ return createStringError(
+ std::make_error_code(std::errc::illegal_byte_sequence),
+ "Error while parsing BLOCK_REMARK: missing value in remark "
+ "argument.");
+
+ // We have at least a key and a value, create an entry.
+ R.Args.emplace_back();
+
+ if (Expected<StringRef> Key = (*StrTab)[*Arg.KeyIdx])
+ R.Args.back().Key = *Key;
+ else
+ return Key.takeError();
+
+ if (Expected<StringRef> Value = (*StrTab)[*Arg.ValueIdx])
+ R.Args.back().Val = *Value;
+ else
+ return Value.takeError();
+
+ if (Arg.SourceFileNameIdx && Arg.SourceLine && Arg.SourceColumn) {
+ if (Expected<StringRef> SourceFileName =
+ (*StrTab)[*Arg.SourceFileNameIdx]) {
+ R.Args.back().Loc.emplace();
+ R.Args.back().Loc->SourceFilePath = *SourceFileName;
+ R.Args.back().Loc->SourceLine = *Arg.SourceLine;
+ R.Args.back().Loc->SourceColumn = *Arg.SourceColumn;
+ } else
+ return SourceFileName.takeError();
+ }
+ }
+
+ return std::move(Result);
+}
diff --git a/lib/Remarks/BitstreamRemarkParser.h b/lib/Remarks/BitstreamRemarkParser.h
new file mode 100644
index 000000000000..7c9cc2f1e7db
--- /dev/null
+++ b/lib/Remarks/BitstreamRemarkParser.h
@@ -0,0 +1,83 @@
+//===-- BitstreamRemarkParser.h - Parser for Bitstream remarks --*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the impementation of the Bitstream remark parser.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H
+#define LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Remarks/BitstreamRemarkParser.h"
+#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/Remarks/RemarkParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+#include <string>
+
+namespace llvm {
+namespace remarks {
+/// Parses and holds the state of the latest parsed remark.
+struct BitstreamRemarkParser : public RemarkParser {
+ /// The buffer to parse.
+ BitstreamParserHelper ParserHelper;
+ /// The string table used for parsing strings.
+ Optional<ParsedStringTable> StrTab;
+ /// Temporary remark buffer used when the remarks are stored separately.
+ std::unique_ptr<MemoryBuffer> TmpRemarkBuffer;
+ /// The common metadata used to decide how to parse the buffer.
+ /// This is filled when parsing the metadata block.
+ uint64_t ContainerVersion;
+ uint64_t RemarkVersion;
+ BitstreamRemarkContainerType ContainerType;
+ /// Wether the parser is ready to parse remarks.
+ bool ReadyToParseRemarks = false;
+
+ /// Create a parser that expects to find a string table embedded in the
+ /// stream.
+ BitstreamRemarkParser(StringRef Buf)
+ : RemarkParser(Format::Bitstream), ParserHelper(Buf) {}
+
+ /// Create a parser that uses a pre-parsed string table.
+ BitstreamRemarkParser(StringRef Buf, ParsedStringTable StrTab)
+ : RemarkParser(Format::Bitstream), ParserHelper(Buf),
+ StrTab(std::move(StrTab)) {}
+
+ Expected<std::unique_ptr<Remark>> next() override;
+
+ static bool classof(const RemarkParser *P) {
+ return P->ParserFormat == Format::Bitstream;
+ }
+
+ /// Parse and process the metadata of the buffer.
+ Error parseMeta();
+
+ /// Parse a Bitstream remark.
+ Expected<std::unique_ptr<Remark>> parseRemark();
+
+private:
+ /// Helper functions.
+ Error processCommonMeta(BitstreamMetaParserHelper &Helper);
+ Error processStandaloneMeta(BitstreamMetaParserHelper &Helper);
+ Error processSeparateRemarksFileMeta(BitstreamMetaParserHelper &Helper);
+ Error processSeparateRemarksMetaMeta(BitstreamMetaParserHelper &Helper);
+ Expected<std::unique_ptr<Remark>>
+ processRemark(BitstreamRemarkParserHelper &Helper);
+ Error processExternalFilePath(Optional<StringRef> ExternalFilePath);
+};
+
+Expected<std::unique_ptr<BitstreamRemarkParser>> createBitstreamParserFromMeta(
+ StringRef Buf, Optional<ParsedStringTable> StrTab = None,
+ Optional<StringRef> ExternalFilePrependPath = None);
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H */
diff --git a/lib/Remarks/BitstreamRemarkSerializer.cpp b/lib/Remarks/BitstreamRemarkSerializer.cpp
new file mode 100644
index 000000000000..d02782c7954d
--- /dev/null
+++ b/lib/Remarks/BitstreamRemarkSerializer.cpp
@@ -0,0 +1,386 @@
+//===- BitstreamRemarkSerializer.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the LLVM bitstream remark serializer
+// using LLVM's bitstream writer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/BitstreamRemarkSerializer.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+BitstreamRemarkSerializerHelper::BitstreamRemarkSerializerHelper(
+ BitstreamRemarkContainerType ContainerType)
+ : Encoded(), R(), Bitstream(Encoded), ContainerType(ContainerType) {}
+
+static void push(SmallVectorImpl<uint64_t> &R, StringRef Str) {
+ for (const char C : Str)
+ R.push_back(C);
+}
+
+static void setRecordName(unsigned RecordID, BitstreamWriter &Bitstream,
+ SmallVectorImpl<uint64_t> &R, StringRef Str) {
+ R.clear();
+ R.push_back(RecordID);
+ push(R, Str);
+ Bitstream.EmitRecord(bitc::BLOCKINFO_CODE_SETRECORDNAME, R);
+}
+
+static void initBlock(unsigned BlockID, BitstreamWriter &Bitstream,
+ SmallVectorImpl<uint64_t> &R, StringRef Str) {
+ R.clear();
+ R.push_back(BlockID);
+ Bitstream.EmitRecord(bitc::BLOCKINFO_CODE_SETBID, R);
+
+ R.clear();
+ push(R, Str);
+ Bitstream.EmitRecord(bitc::BLOCKINFO_CODE_BLOCKNAME, R);
+}
+
+void BitstreamRemarkSerializerHelper::setupMetaBlockInfo() {
+ // Setup the metadata block.
+ initBlock(META_BLOCK_ID, Bitstream, R, MetaBlockName);
+
+ // The container information.
+ setRecordName(RECORD_META_CONTAINER_INFO, Bitstream, R,
+ MetaContainerInfoName);
+
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(RECORD_META_CONTAINER_INFO));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Version.
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // Type.
+ RecordMetaContainerInfoAbbrevID =
+ Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev);
+}
+
+void BitstreamRemarkSerializerHelper::setupMetaRemarkVersion() {
+ setRecordName(RECORD_META_REMARK_VERSION, Bitstream, R,
+ MetaRemarkVersionName);
+
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(RECORD_META_REMARK_VERSION));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Version.
+ RecordMetaRemarkVersionAbbrevID =
+ Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev);
+}
+
+void BitstreamRemarkSerializerHelper::emitMetaRemarkVersion(
+ uint64_t RemarkVersion) {
+ // The remark version is emitted only if we emit remarks.
+ R.clear();
+ R.push_back(RECORD_META_REMARK_VERSION);
+ R.push_back(RemarkVersion);
+ Bitstream.EmitRecordWithAbbrev(RecordMetaRemarkVersionAbbrevID, R);
+}
+
+void BitstreamRemarkSerializerHelper::setupMetaStrTab() {
+ setRecordName(RECORD_META_STRTAB, Bitstream, R, MetaStrTabName);
+
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(RECORD_META_STRTAB));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Raw table.
+ RecordMetaStrTabAbbrevID =
+ Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev);
+}
+
+void BitstreamRemarkSerializerHelper::emitMetaStrTab(
+ const StringTable &StrTab) {
+ // The string table is not emitted if we emit remarks separately.
+ R.clear();
+ R.push_back(RECORD_META_STRTAB);
+
+ // Serialize to a blob.
+ std::string Buf;
+ raw_string_ostream OS(Buf);
+ StrTab.serialize(OS);
+ StringRef Blob = OS.str();
+ Bitstream.EmitRecordWithBlob(RecordMetaStrTabAbbrevID, R, Blob);
+}
+
+void BitstreamRemarkSerializerHelper::setupMetaExternalFile() {
+ setRecordName(RECORD_META_EXTERNAL_FILE, Bitstream, R, MetaExternalFileName);
+
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(RECORD_META_EXTERNAL_FILE));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Filename.
+ RecordMetaExternalFileAbbrevID =
+ Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev);
+}
+
+void BitstreamRemarkSerializerHelper::emitMetaExternalFile(StringRef Filename) {
+ // The external file is emitted only if we emit the separate metadata.
+ R.clear();
+ R.push_back(RECORD_META_EXTERNAL_FILE);
+ Bitstream.EmitRecordWithBlob(RecordMetaExternalFileAbbrevID, R, Filename);
+}
+
+void BitstreamRemarkSerializerHelper::setupRemarkBlockInfo() {
+ // Setup the remark block.
+ initBlock(REMARK_BLOCK_ID, Bitstream, R, RemarkBlockName);
+
+ // The header of a remark.
+ {
+ setRecordName(RECORD_REMARK_HEADER, Bitstream, R, RemarkHeaderName);
+
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_HEADER));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Type
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Remark Name
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Pass name
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Function name
+ RecordRemarkHeaderAbbrevID =
+ Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev);
+ }
+
+ // The location of a remark.
+ {
+ setRecordName(RECORD_REMARK_DEBUG_LOC, Bitstream, R, RemarkDebugLocName);
+
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_DEBUG_LOC));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // File
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Line
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Column
+ RecordRemarkDebugLocAbbrevID =
+ Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev);
+ }
+
+ // The hotness of a remark.
+ {
+ setRecordName(RECORD_REMARK_HOTNESS, Bitstream, R, RemarkHotnessName);
+
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_HOTNESS));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Hotness
+ RecordRemarkHotnessAbbrevID =
+ Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev);
+ }
+
+ // An argument entry with a debug location attached.
+ {
+ setRecordName(RECORD_REMARK_ARG_WITH_DEBUGLOC, Bitstream, R,
+ RemarkArgWithDebugLocName);
+
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_ARG_WITH_DEBUGLOC));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Key
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Value
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // File
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Line
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Column
+ RecordRemarkArgWithDebugLocAbbrevID =
+ Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev);
+ }
+
+ // An argument entry with no debug location attached.
+ {
+ setRecordName(RECORD_REMARK_ARG_WITHOUT_DEBUGLOC, Bitstream, R,
+ RemarkArgWithoutDebugLocName);
+
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_ARG_WITHOUT_DEBUGLOC));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Key
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Value
+ RecordRemarkArgWithoutDebugLocAbbrevID =
+ Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev);
+ }
+}
+
+void BitstreamRemarkSerializerHelper::setupBlockInfo() {
+ // Emit magic number.
+ for (const char C : ContainerMagic)
+ Bitstream.Emit(static_cast<unsigned>(C), 8);
+
+ Bitstream.EnterBlockInfoBlock();
+
+ // Setup the main metadata. Depending on the container type, we'll setup the
+ // required records next.
+ setupMetaBlockInfo();
+
+ switch (ContainerType) {
+ case BitstreamRemarkContainerType::SeparateRemarksMeta:
+ // Needs a string table that the separate remark file is using.
+ setupMetaStrTab();
+ // Needs to know where the external remarks file is.
+ setupMetaExternalFile();
+ break;
+ case BitstreamRemarkContainerType::SeparateRemarksFile:
+ // Contains remarks: emit the version.
+ setupMetaRemarkVersion();
+ // Contains remarks: emit the remark abbrevs.
+ setupRemarkBlockInfo();
+ break;
+ case BitstreamRemarkContainerType::Standalone:
+ // Contains remarks: emit the version.
+ setupMetaRemarkVersion();
+ // Needs a string table.
+ setupMetaStrTab();
+ // Contains remarks: emit the remark abbrevs.
+ setupRemarkBlockInfo();
+ break;
+ }
+
+ Bitstream.ExitBlock();
+}
+
+void BitstreamRemarkSerializerHelper::emitMetaBlock(
+ uint64_t ContainerVersion, Optional<uint64_t> RemarkVersion,
+ Optional<const StringTable *> StrTab, Optional<StringRef> Filename) {
+ // Emit the meta block
+ Bitstream.EnterSubblock(META_BLOCK_ID, 3);
+
+ // The container version and type.
+ R.clear();
+ R.push_back(RECORD_META_CONTAINER_INFO);
+ R.push_back(ContainerVersion);
+ R.push_back(static_cast<uint64_t>(ContainerType));
+ Bitstream.EmitRecordWithAbbrev(RecordMetaContainerInfoAbbrevID, R);
+
+ switch (ContainerType) {
+ case BitstreamRemarkContainerType::SeparateRemarksMeta:
+ assert(StrTab != None && *StrTab != nullptr);
+ emitMetaStrTab(**StrTab);
+ assert(Filename != None);
+ emitMetaExternalFile(*Filename);
+ break;
+ case BitstreamRemarkContainerType::SeparateRemarksFile:
+ assert(RemarkVersion != None);
+ emitMetaRemarkVersion(*RemarkVersion);
+ break;
+ case BitstreamRemarkContainerType::Standalone:
+ assert(RemarkVersion != None);
+ emitMetaRemarkVersion(*RemarkVersion);
+ assert(StrTab != None && *StrTab != nullptr);
+ emitMetaStrTab(**StrTab);
+ break;
+ }
+
+ Bitstream.ExitBlock();
+}
+
+void BitstreamRemarkSerializerHelper::emitRemarkBlock(const Remark &Remark,
+ StringTable &StrTab) {
+ Bitstream.EnterSubblock(REMARK_BLOCK_ID, 4);
+
+ R.clear();
+ R.push_back(RECORD_REMARK_HEADER);
+ R.push_back(static_cast<uint64_t>(Remark.RemarkType));
+ R.push_back(StrTab.add(Remark.RemarkName).first);
+ R.push_back(StrTab.add(Remark.PassName).first);
+ R.push_back(StrTab.add(Remark.FunctionName).first);
+ Bitstream.EmitRecordWithAbbrev(RecordRemarkHeaderAbbrevID, R);
+
+ if (const Optional<RemarkLocation> &Loc = Remark.Loc) {
+ R.clear();
+ R.push_back(RECORD_REMARK_DEBUG_LOC);
+ R.push_back(StrTab.add(Loc->SourceFilePath).first);
+ R.push_back(Loc->SourceLine);
+ R.push_back(Loc->SourceColumn);
+ Bitstream.EmitRecordWithAbbrev(RecordRemarkDebugLocAbbrevID, R);
+ }
+
+ if (Optional<uint64_t> Hotness = Remark.Hotness) {
+ R.clear();
+ R.push_back(RECORD_REMARK_HOTNESS);
+ R.push_back(*Hotness);
+ Bitstream.EmitRecordWithAbbrev(RecordRemarkHotnessAbbrevID, R);
+ }
+
+ for (const Argument &Arg : Remark.Args) {
+ R.clear();
+ unsigned Key = StrTab.add(Arg.Key).first;
+ unsigned Val = StrTab.add(Arg.Val).first;
+ bool HasDebugLoc = Arg.Loc != None;
+ R.push_back(HasDebugLoc ? RECORD_REMARK_ARG_WITH_DEBUGLOC
+ : RECORD_REMARK_ARG_WITHOUT_DEBUGLOC);
+ R.push_back(Key);
+ R.push_back(Val);
+ if (HasDebugLoc) {
+ R.push_back(StrTab.add(Arg.Loc->SourceFilePath).first);
+ R.push_back(Arg.Loc->SourceLine);
+ R.push_back(Arg.Loc->SourceColumn);
+ }
+ Bitstream.EmitRecordWithAbbrev(HasDebugLoc
+ ? RecordRemarkArgWithDebugLocAbbrevID
+ : RecordRemarkArgWithoutDebugLocAbbrevID,
+ R);
+ }
+ Bitstream.ExitBlock();
+}
+
+void BitstreamRemarkSerializerHelper::flushToStream(raw_ostream &OS) {
+ OS.write(Encoded.data(), Encoded.size());
+ Encoded.clear();
+}
+
+StringRef BitstreamRemarkSerializerHelper::getBuffer() {
+ return StringRef(Encoded.data(), Encoded.size());
+}
+
+BitstreamRemarkSerializer::BitstreamRemarkSerializer(raw_ostream &OS,
+ SerializerMode Mode)
+ : RemarkSerializer(Format::Bitstream, OS, Mode),
+ Helper(BitstreamRemarkContainerType::SeparateRemarksFile) {
+ assert(Mode == SerializerMode::Separate &&
+ "For SerializerMode::Standalone, a pre-filled string table needs to "
+ "be provided.");
+ // We always use a string table with bitstream.
+ StrTab.emplace();
+}
+
+BitstreamRemarkSerializer::BitstreamRemarkSerializer(raw_ostream &OS,
+ SerializerMode Mode,
+ StringTable StrTabIn)
+ : RemarkSerializer(Format::Bitstream, OS, Mode),
+ Helper(Mode == SerializerMode::Separate
+ ? BitstreamRemarkContainerType::SeparateRemarksFile
+ : BitstreamRemarkContainerType::Standalone) {
+ StrTab = std::move(StrTabIn);
+}
+
+void BitstreamRemarkSerializer::emit(const Remark &Remark) {
+ if (!DidSetUp) {
+ // Emit the metadata that is embedded in the remark file.
+ // If we're in standalone mode, serialize the string table as well.
+ bool IsStandalone =
+ Helper.ContainerType == BitstreamRemarkContainerType::Standalone;
+ BitstreamMetaSerializer MetaSerializer(
+ OS, Helper,
+ IsStandalone ? &*StrTab : Optional<const StringTable *>(None));
+ MetaSerializer.emit();
+ DidSetUp = true;
+ }
+
+ assert(DidSetUp &&
+ "The Block info block and the meta block were not emitted yet.");
+ Helper.emitRemarkBlock(Remark, *StrTab);
+
+ Helper.flushToStream(OS);
+}
+
+std::unique_ptr<MetaSerializer> BitstreamRemarkSerializer::metaSerializer(
+ raw_ostream &OS, Optional<StringRef> ExternalFilename) {
+ assert(Helper.ContainerType !=
+ BitstreamRemarkContainerType::SeparateRemarksMeta);
+ bool IsStandalone =
+ Helper.ContainerType == BitstreamRemarkContainerType::Standalone;
+ return std::make_unique<BitstreamMetaSerializer>(
+ OS,
+ IsStandalone ? BitstreamRemarkContainerType::Standalone
+ : BitstreamRemarkContainerType::SeparateRemarksMeta,
+ &*StrTab, ExternalFilename);
+}
+
+void BitstreamMetaSerializer::emit() {
+ Helper->setupBlockInfo();
+ Helper->emitMetaBlock(CurrentContainerVersion, CurrentRemarkVersion, StrTab,
+ ExternalFilename);
+ Helper->flushToStream(OS);
+}
diff --git a/lib/Remarks/RemarkFormat.cpp b/lib/Remarks/RemarkFormat.cpp
index bcd0f753ff64..f2d0331ec6a8 100644
--- a/lib/Remarks/RemarkFormat.cpp
+++ b/lib/Remarks/RemarkFormat.cpp
@@ -19,11 +19,13 @@ using namespace llvm::remarks;
Expected<Format> llvm::remarks::parseFormat(StringRef FormatStr) {
auto Result = StringSwitch<Format>(FormatStr)
.Cases("", "yaml", Format::YAML)
+ .Case("yaml-strtab", Format::YAMLStrTab)
+ .Case("bitstream", Format::Bitstream)
.Default(Format::Unknown);
if (Result == Format::Unknown)
return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Unknown remark serializer format: '%s'",
+ "Unknown remark format: '%s'",
FormatStr.data());
return Result;
diff --git a/lib/Remarks/RemarkParser.cpp b/lib/Remarks/RemarkParser.cpp
index f67464073bd1..c5c3d0badd3e 100644
--- a/lib/Remarks/RemarkParser.cpp
+++ b/lib/Remarks/RemarkParser.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Remarks/RemarkParser.h"
+#include "BitstreamRemarkParser.h"
#include "YAMLRemarkParser.h"
#include "llvm-c/Remarks.h"
#include "llvm/ADT/STLExtras.h"
@@ -47,32 +48,81 @@ Expected<StringRef> ParsedStringTable::operator[](size_t Index) const {
return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1);
}
-Expected<std::unique_ptr<Parser>>
+Expected<std::unique_ptr<RemarkParser>>
+llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf) {
+ switch (ParserFormat) {
+ case Format::YAML:
+ return std::make_unique<YAMLRemarkParser>(Buf);
+ case Format::YAMLStrTab:
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "The YAML with string table format requires a parsed string table.");
+ case Format::Bitstream:
+ return std::make_unique<BitstreamRemarkParser>(Buf);
+ case Format::Unknown:
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown remark parser format.");
+ }
+ llvm_unreachable("unhandled ParseFormat");
+}
+
+Expected<std::unique_ptr<RemarkParser>>
llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf,
- Optional<const ParsedStringTable *> StrTab) {
+ ParsedStringTable StrTab) {
+ switch (ParserFormat) {
+ case Format::YAML:
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "The YAML format can't be used with a string "
+ "table. Use yaml-strtab instead.");
+ case Format::YAMLStrTab:
+ return std::make_unique<YAMLStrTabRemarkParser>(Buf, std::move(StrTab));
+ case Format::Bitstream:
+ return std::make_unique<BitstreamRemarkParser>(Buf, std::move(StrTab));
+ case Format::Unknown:
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown remark parser format.");
+ }
+ llvm_unreachable("unhandled ParseFormat");
+}
+
+Expected<std::unique_ptr<RemarkParser>>
+llvm::remarks::createRemarkParserFromMeta(
+ Format ParserFormat, StringRef Buf, Optional<ParsedStringTable> StrTab,
+ Optional<StringRef> ExternalFilePrependPath) {
switch (ParserFormat) {
+ // Depending on the metadata, the format can be either yaml or yaml-strtab,
+ // regardless of the input argument.
case Format::YAML:
- return llvm::make_unique<YAMLRemarkParser>(Buf, StrTab);
+ case Format::YAMLStrTab:
+ return createYAMLParserFromMeta(Buf, std::move(StrTab),
+ std::move(ExternalFilePrependPath));
+ case Format::Bitstream:
+ return createBitstreamParserFromMeta(Buf, std::move(StrTab),
+ std::move(ExternalFilePrependPath));
case Format::Unknown:
return createStringError(std::make_error_code(std::errc::invalid_argument),
"Unknown remark parser format.");
}
- llvm_unreachable("unknown format");
+ llvm_unreachable("unhandled ParseFormat");
}
+namespace {
// Wrapper that holds the state needed to interact with the C API.
struct CParser {
- std::unique_ptr<Parser> TheParser;
+ std::unique_ptr<RemarkParser> TheParser;
Optional<std::string> Err;
CParser(Format ParserFormat, StringRef Buf,
- Optional<const ParsedStringTable *> StrTab = None)
- : TheParser(cantFail(createRemarkParser(ParserFormat, Buf, StrTab))) {}
+ Optional<ParsedStringTable> StrTab = None)
+ : TheParser(cantFail(
+ StrTab ? createRemarkParser(ParserFormat, Buf, std::move(*StrTab))
+ : createRemarkParser(ParserFormat, Buf))) {}
void handleError(Error E) { Err.emplace(toString(std::move(E))); }
bool hasError() const { return Err.hasValue(); }
const char *getMessage() const { return Err ? Err->c_str() : nullptr; };
};
+} // namespace
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(CParser, LLVMRemarkParserRef)
@@ -83,10 +133,16 @@ extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
StringRef(static_cast<const char *>(Buf), Size)));
}
+extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateBitstream(const void *Buf,
+ uint64_t Size) {
+ return wrap(new CParser(Format::Bitstream,
+ StringRef(static_cast<const char *>(Buf), Size)));
+}
+
extern "C" LLVMRemarkEntryRef
LLVMRemarkParserGetNext(LLVMRemarkParserRef Parser) {
CParser &TheCParser = *unwrap(Parser);
- remarks::Parser &TheParser = *TheCParser.TheParser;
+ remarks::RemarkParser &TheParser = *TheCParser.TheParser;
Expected<std::unique_ptr<Remark>> MaybeRemark = TheParser.next();
if (Error E = MaybeRemark.takeError()) {
diff --git a/lib/Remarks/RemarkSerializer.cpp b/lib/Remarks/RemarkSerializer.cpp
new file mode 100644
index 000000000000..ab19c84bbadb
--- /dev/null
+++ b/lib/Remarks/RemarkSerializer.cpp
@@ -0,0 +1,54 @@
+//===- RemarkSerializer.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides tools for serializing remarks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Remarks/BitstreamRemarkSerializer.h"
+#include "llvm/Remarks/YAMLRemarkSerializer.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+Expected<std::unique_ptr<RemarkSerializer>>
+remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode,
+ raw_ostream &OS) {
+ switch (RemarksFormat) {
+ case Format::Unknown:
+ return createStringError(std::errc::invalid_argument,
+ "Unknown remark serializer format.");
+ case Format::YAML:
+ return std::make_unique<YAMLRemarkSerializer>(OS, Mode);
+ case Format::YAMLStrTab:
+ return std::make_unique<YAMLStrTabRemarkSerializer>(OS, Mode);
+ case Format::Bitstream:
+ return std::make_unique<BitstreamRemarkSerializer>(OS, Mode);
+ }
+ llvm_unreachable("Unknown remarks::Format enum");
+}
+
+Expected<std::unique_ptr<RemarkSerializer>>
+remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode,
+ raw_ostream &OS, remarks::StringTable StrTab) {
+ switch (RemarksFormat) {
+ case Format::Unknown:
+ return createStringError(std::errc::invalid_argument,
+ "Unknown remark serializer format.");
+ case Format::YAML:
+ return std::make_unique<YAMLRemarkSerializer>(OS, Mode, std::move(StrTab));
+ case Format::YAMLStrTab:
+ return std::make_unique<YAMLStrTabRemarkSerializer>(OS, Mode,
+ std::move(StrTab));
+ case Format::Bitstream:
+ return std::make_unique<BitstreamRemarkSerializer>(OS, Mode,
+ std::move(StrTab));
+ }
+ llvm_unreachable("Unknown remarks::Format enum");
+}
diff --git a/lib/Remarks/RemarkStringTable.cpp b/lib/Remarks/RemarkStringTable.cpp
index 984aa5b33b48..51156465be51 100644
--- a/lib/Remarks/RemarkStringTable.cpp
+++ b/lib/Remarks/RemarkStringTable.cpp
@@ -11,6 +11,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"
#include <vector>
@@ -18,6 +20,14 @@
using namespace llvm;
using namespace llvm::remarks;
+StringTable::StringTable(const ParsedStringTable &Other) : StrTab() {
+ for (unsigned i = 0, e = Other.size(); i < e; ++i)
+ if (Expected<StringRef> MaybeStr = Other[i])
+ add(*MaybeStr);
+ else
+ llvm_unreachable("Unexpected error while building remarks string table.");
+}
+
std::pair<unsigned, StringRef> StringTable::add(StringRef Str) {
size_t NextID = StrTab.size();
auto KV = StrTab.insert({Str, NextID});
@@ -28,10 +38,22 @@ std::pair<unsigned, StringRef> StringTable::add(StringRef Str) {
return {KV.first->second, KV.first->first()};
}
+void StringTable::internalize(Remark &R) {
+ auto Impl = [&](StringRef &S) { S = add(S).second; };
+ Impl(R.PassName);
+ Impl(R.RemarkName);
+ Impl(R.FunctionName);
+ if (R.Loc)
+ Impl(R.Loc->SourceFilePath);
+ for (Argument &Arg : R.Args) {
+ Impl(Arg.Key);
+ Impl(Arg.Val);
+ if (Arg.Loc)
+ Impl(Arg.Loc->SourceFilePath);
+ }
+}
+
void StringTable::serialize(raw_ostream &OS) const {
- // Emit the number of strings.
- uint64_t StrTabSize = SerializedSize;
- support::endian::write(OS, StrTabSize, support::little);
// Emit the sequence of strings.
for (StringRef Str : serialize()) {
OS << Str;
diff --git a/lib/Remarks/YAMLRemarkParser.cpp b/lib/Remarks/YAMLRemarkParser.cpp
index ed78b7ba5d95..dd834d85676e 100644
--- a/lib/Remarks/YAMLRemarkParser.cpp
+++ b/lib/Remarks/YAMLRemarkParser.cpp
@@ -14,6 +14,8 @@
#include "YAMLRemarkParser.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Remarks/RemarkParser.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Path.h"
using namespace llvm;
using namespace llvm::remarks;
@@ -54,9 +56,123 @@ static SourceMgr setupSM(std::string &LastErrorMessage) {
return SM;
}
+// Parse the magic number. This function returns true if this represents remark
+// metadata, false otherwise.
+static Expected<bool> parseMagic(StringRef &Buf) {
+ if (!Buf.consume_front(remarks::Magic))
+ return false;
+
+ if (Buf.size() < 1 || !Buf.consume_front(StringRef("\0", 1)))
+ return createStringError(std::errc::illegal_byte_sequence,
+ "Expecting \\0 after magic number.");
+ return true;
+}
+
+static Expected<uint64_t> parseVersion(StringRef &Buf) {
+ if (Buf.size() < sizeof(uint64_t))
+ return createStringError(std::errc::illegal_byte_sequence,
+ "Expecting version number.");
+
+ uint64_t Version =
+ support::endian::read<uint64_t, support::little, support::unaligned>(
+ Buf.data());
+ if (Version != remarks::CurrentRemarkVersion)
+ return createStringError(std::errc::illegal_byte_sequence,
+ "Mismatching remark version. Got %" PRId64
+ ", expected %" PRId64 ".",
+ Version, remarks::CurrentRemarkVersion);
+ Buf = Buf.drop_front(sizeof(uint64_t));
+ return Version;
+}
+
+static Expected<uint64_t> parseStrTabSize(StringRef &Buf) {
+ if (Buf.size() < sizeof(uint64_t))
+ return createStringError(std::errc::illegal_byte_sequence,
+ "Expecting string table size.");
+ uint64_t StrTabSize =
+ support::endian::read<uint64_t, support::little, support::unaligned>(
+ Buf.data());
+ Buf = Buf.drop_front(sizeof(uint64_t));
+ return StrTabSize;
+}
+
+static Expected<ParsedStringTable> parseStrTab(StringRef &Buf,
+ uint64_t StrTabSize) {
+ if (Buf.size() < StrTabSize)
+ return createStringError(std::errc::illegal_byte_sequence,
+ "Expecting string table.");
+
+ // Attach the string table to the parser.
+ ParsedStringTable Result(StringRef(Buf.data(), StrTabSize));
+ Buf = Buf.drop_front(StrTabSize);
+ return Expected<ParsedStringTable>(std::move(Result));
+}
+
+Expected<std::unique_ptr<YAMLRemarkParser>>
+remarks::createYAMLParserFromMeta(StringRef Buf,
+ Optional<ParsedStringTable> StrTab,
+ Optional<StringRef> ExternalFilePrependPath) {
+ // We now have a magic number. The metadata has to be correct.
+ Expected<bool> isMeta = parseMagic(Buf);
+ if (!isMeta)
+ return isMeta.takeError();
+ // If it's not recognized as metadata, roll back.
+ std::unique_ptr<MemoryBuffer> SeparateBuf;
+ if (*isMeta) {
+ Expected<uint64_t> Version = parseVersion(Buf);
+ if (!Version)
+ return Version.takeError();
+
+ Expected<uint64_t> StrTabSize = parseStrTabSize(Buf);
+ if (!StrTabSize)
+ return StrTabSize.takeError();
+
+ // If the size of string table is not 0, try to build one.
+ if (*StrTabSize != 0) {
+ if (StrTab)
+ return createStringError(std::errc::illegal_byte_sequence,
+ "String table already provided.");
+ Expected<ParsedStringTable> MaybeStrTab = parseStrTab(Buf, *StrTabSize);
+ if (!MaybeStrTab)
+ return MaybeStrTab.takeError();
+ StrTab = std::move(*MaybeStrTab);
+ }
+ // If it starts with "---", there is no external file.
+ if (!Buf.startswith("---")) {
+ // At this point, we expect Buf to contain the external file path.
+ StringRef ExternalFilePath = Buf;
+ SmallString<80> FullPath;
+ if (ExternalFilePrependPath)
+ FullPath = *ExternalFilePrependPath;
+ sys::path::append(FullPath, ExternalFilePath);
+
+ // Try to open the file and start parsing from there.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(FullPath);
+ if (std::error_code EC = BufferOrErr.getError())
+ return createFileError(FullPath, EC);
+
+ // Keep the buffer alive.
+ SeparateBuf = std::move(*BufferOrErr);
+ Buf = SeparateBuf->getBuffer();
+ }
+ }
+
+ std::unique_ptr<YAMLRemarkParser> Result =
+ StrTab
+ ? std::make_unique<YAMLStrTabRemarkParser>(Buf, std::move(*StrTab))
+ : std::make_unique<YAMLRemarkParser>(Buf);
+ if (SeparateBuf)
+ Result->SeparateBuf = std::move(SeparateBuf);
+ return std::move(Result);
+}
+
+YAMLRemarkParser::YAMLRemarkParser(StringRef Buf)
+ : YAMLRemarkParser(Buf, None) {}
+
YAMLRemarkParser::YAMLRemarkParser(StringRef Buf,
- Optional<const ParsedStringTable *> StrTab)
- : Parser{Format::YAML}, StrTab(StrTab), LastErrorMessage(),
+ Optional<ParsedStringTable> StrTab)
+ : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), LastErrorMessage(),
SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {}
Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) {
@@ -86,7 +202,7 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
if (!Root)
return error("document root is not of mapping type.", *YAMLRoot);
- std::unique_ptr<Remark> Result = llvm::make_unique<Remark>();
+ std::unique_ptr<Remark> Result = std::make_unique<Remark>();
Remark &TheRemark = *Result;
// First, the type. It needs special handling since is not part of the
@@ -179,22 +295,7 @@ Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
if (!Value)
return error("expected a value of scalar type.", Node);
- StringRef Result;
- if (!StrTab) {
- Result = Value->getRawValue();
- } else {
- // If we have a string table, parse it as an unsigned.
- unsigned StrID = 0;
- if (Expected<unsigned> MaybeStrID = parseUnsigned(Node))
- StrID = *MaybeStrID;
- else
- return MaybeStrID.takeError();
-
- if (Expected<StringRef> Str = (**StrTab)[StrID])
- Result = *Str;
- else
- return Str.takeError();
- }
+ StringRef Result = Value->getRawValue();
if (Result.front() == '\'')
Result = Result.drop_front();
@@ -325,3 +426,29 @@ Expected<std::unique_ptr<Remark>> YAMLRemarkParser::next() {
return std::move(*MaybeResult);
}
+
+Expected<StringRef> YAMLStrTabRemarkParser::parseStr(yaml::KeyValueNode &Node) {
+ auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+ if (!Value)
+ return error("expected a value of scalar type.", Node);
+ StringRef Result;
+ // If we have a string table, parse it as an unsigned.
+ unsigned StrID = 0;
+ if (Expected<unsigned> MaybeStrID = parseUnsigned(Node))
+ StrID = *MaybeStrID;
+ else
+ return MaybeStrID.takeError();
+
+ if (Expected<StringRef> Str = (*StrTab)[StrID])
+ Result = *Str;
+ else
+ return Str.takeError();
+
+ if (Result.front() == '\'')
+ Result = Result.drop_front();
+
+ if (Result.back() == '\'')
+ Result = Result.drop_back();
+
+ return Result;
+}
diff --git a/lib/Remarks/YAMLRemarkParser.h b/lib/Remarks/YAMLRemarkParser.h
index cea76e63e75c..03707433bc03 100644
--- a/lib/Remarks/YAMLRemarkParser.h
+++ b/lib/Remarks/YAMLRemarkParser.h
@@ -18,6 +18,7 @@
#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkParser.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/YAMLParser.h"
#include "llvm/Support/YAMLTraits.h"
@@ -46,9 +47,9 @@ private:
};
/// Regular YAML to Remark parser.
-struct YAMLRemarkParser : public Parser {
+struct YAMLRemarkParser : public RemarkParser {
/// The string table used for parsing strings.
- Optional<const ParsedStringTable *> StrTab;
+ Optional<ParsedStringTable> StrTab;
/// Last error message that can come from the YAML parser diagnostics.
/// We need this for catching errors in the constructor.
std::string LastErrorMessage;
@@ -58,17 +59,20 @@ struct YAMLRemarkParser : public Parser {
yaml::Stream Stream;
/// Iterator in the YAML stream.
yaml::document_iterator YAMLIt;
+ /// If we parse remark metadata in separate mode, we need to open a new file
+ /// and parse that.
+ std::unique_ptr<MemoryBuffer> SeparateBuf;
- YAMLRemarkParser(StringRef Buf,
- Optional<const ParsedStringTable *> StrTab = None);
+ YAMLRemarkParser(StringRef Buf);
Expected<std::unique_ptr<Remark>> next() override;
- static bool classof(const Parser *P) {
+ static bool classof(const RemarkParser *P) {
return P->ParserFormat == Format::YAML;
}
-private:
+protected:
+ YAMLRemarkParser(StringRef Buf, Optional<ParsedStringTable> StrTab);
/// Create a YAMLParseError error from an existing error generated by the YAML
/// parser.
/// If there is no error, this returns Success.
@@ -82,7 +86,7 @@ private:
/// Parse one key to a string.
Expected<StringRef> parseKey(yaml::KeyValueNode &Node);
/// Parse one value to a string.
- Expected<StringRef> parseStr(yaml::KeyValueNode &Node);
+ virtual Expected<StringRef> parseStr(yaml::KeyValueNode &Node);
/// Parse one value to an unsigned.
Expected<unsigned> parseUnsigned(yaml::KeyValueNode &Node);
/// Parse a debug location.
@@ -90,6 +94,26 @@ private:
/// Parse an argument.
Expected<Argument> parseArg(yaml::Node &Node);
};
+
+/// YAML with a string table to Remark parser.
+struct YAMLStrTabRemarkParser : public YAMLRemarkParser {
+ YAMLStrTabRemarkParser(StringRef Buf, ParsedStringTable StrTab)
+ : YAMLRemarkParser(Buf, std::move(StrTab)) {}
+
+ static bool classof(const RemarkParser *P) {
+ return P->ParserFormat == Format::YAMLStrTab;
+ }
+
+protected:
+ /// Parse one value to a string.
+ Expected<StringRef> parseStr(yaml::KeyValueNode &Node) override;
+};
+
+Expected<std::unique_ptr<YAMLRemarkParser>>
+createYAMLParserFromMeta(StringRef Buf,
+ Optional<ParsedStringTable> StrTab = None,
+ Optional<StringRef> ExternalFilePrependPath = None);
+
} // end namespace remarks
} // end namespace llvm
diff --git a/lib/Remarks/YAMLRemarkSerializer.cpp b/lib/Remarks/YAMLRemarkSerializer.cpp
index d64ae8e12ab0..3a42fe0678eb 100644
--- a/lib/Remarks/YAMLRemarkSerializer.cpp
+++ b/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -11,16 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Remarks/YAMLRemarkSerializer.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
using namespace llvm::remarks;
-cl::opt<bool> RemarksYAMLStringTable(
- "remarks-yaml-string-table", cl::init(false), cl::Hidden,
- cl::desc("Enable the usage of a string table with YAML remarks."));
-
// Use the same keys whether we use a string table or not (respectively, T is an
// unsigned or a StringRef).
template <typename T>
@@ -60,11 +56,14 @@ template <> struct MappingTraits<remarks::Remark *> {
else
llvm_unreachable("Unknown remark type");
- if (Optional<StringTable> &StrTab =
- reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
- unsigned PassID = StrTab->add(Remark->PassName).first;
- unsigned NameID = StrTab->add(Remark->RemarkName).first;
- unsigned FunctionID = StrTab->add(Remark->FunctionName).first;
+ if (auto *Serializer = dyn_cast<YAMLStrTabRemarkSerializer>(
+ reinterpret_cast<RemarkSerializer *>(io.getContext()))) {
+ assert(Serializer->StrTab.hasValue() &&
+ "YAMLStrTabSerializer with no StrTab.");
+ StringTable &StrTab = *Serializer->StrTab;
+ unsigned PassID = StrTab.add(Remark->PassName).first;
+ unsigned NameID = StrTab.add(Remark->RemarkName).first;
+ unsigned FunctionID = StrTab.add(Remark->FunctionName).first;
mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID,
Remark->Hotness, Remark->Args);
} else {
@@ -82,9 +81,12 @@ template <> struct MappingTraits<RemarkLocation> {
unsigned Line = RL.SourceLine;
unsigned Col = RL.SourceColumn;
- if (Optional<StringTable> &StrTab =
- reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
- unsigned FileID = StrTab->add(File).first;
+ if (auto *Serializer = dyn_cast<YAMLStrTabRemarkSerializer>(
+ reinterpret_cast<RemarkSerializer *>(io.getContext()))) {
+ assert(Serializer->StrTab.hasValue() &&
+ "YAMLStrTabSerializer with no StrTab.");
+ StringTable &StrTab = *Serializer->StrTab;
+ unsigned FileID = StrTab.add(File).first;
io.mapRequired("File", FileID);
} else {
io.mapRequired("File", File);
@@ -101,7 +103,7 @@ template <> struct MappingTraits<RemarkLocation> {
/// newlines in strings.
struct StringBlockVal {
StringRef Value;
- StringBlockVal(const std::string &Value) : Value(Value) {}
+ StringBlockVal(StringRef R) : Value(R) {}
};
template <> struct BlockScalarTraits<StringBlockVal> {
@@ -134,9 +136,12 @@ template <> struct MappingTraits<Argument> {
static void mapping(IO &io, Argument &A) {
assert(io.outputting() && "input not yet implemented");
- if (Optional<StringTable> &StrTab =
- reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
- auto ValueID = StrTab->add(A.Val).first;
+ if (auto *Serializer = dyn_cast<YAMLStrTabRemarkSerializer>(
+ reinterpret_cast<RemarkSerializer *>(io.getContext()))) {
+ assert(Serializer->StrTab.hasValue() &&
+ "YAMLStrTabSerializer with no StrTab.");
+ StringTable &StrTab = *Serializer->StrTab;
+ auto ValueID = StrTab.add(A.Val).first;
io.mapRequired(A.Key.data(), ValueID);
} else if (StringRef(A.Val).count('\n') > 1) {
StringBlockVal S(A.Val);
@@ -153,15 +158,100 @@ template <> struct MappingTraits<Argument> {
LLVM_YAML_IS_SEQUENCE_VECTOR(Argument)
-YAMLSerializer::YAMLSerializer(raw_ostream &OS, UseStringTable UseStringTable)
- : Serializer(OS), YAMLOutput(OS, reinterpret_cast<void *>(this)) {
- if (UseStringTable == remarks::UseStringTable::Yes || RemarksYAMLStringTable)
- StrTab.emplace();
+YAMLRemarkSerializer::YAMLRemarkSerializer(raw_ostream &OS, SerializerMode Mode,
+ Optional<StringTable> StrTabIn)
+ : YAMLRemarkSerializer(Format::YAML, OS, Mode, std::move(StrTabIn)) {}
+
+YAMLRemarkSerializer::YAMLRemarkSerializer(Format SerializerFormat,
+ raw_ostream &OS, SerializerMode Mode,
+ Optional<StringTable> StrTabIn)
+ : RemarkSerializer(SerializerFormat, OS, Mode),
+ YAMLOutput(OS, reinterpret_cast<void *>(this)) {
+ StrTab = std::move(StrTabIn);
}
-void YAMLSerializer::emit(const Remark &Remark) {
+void YAMLRemarkSerializer::emit(const Remark &Remark) {
// Again, YAMLTraits expect a non-const object for inputting, but we're not
// using that here.
auto R = const_cast<remarks::Remark *>(&Remark);
YAMLOutput << R;
}
+
+std::unique_ptr<MetaSerializer>
+YAMLRemarkSerializer::metaSerializer(raw_ostream &OS,
+ Optional<StringRef> ExternalFilename) {
+ return std::make_unique<YAMLMetaSerializer>(OS, ExternalFilename);
+}
+
+void YAMLStrTabRemarkSerializer::emit(const Remark &Remark) {
+ // In standalone mode, for the serializer with a string table, emit the
+ // metadata first and set DidEmitMeta to avoid emitting it again.
+ if (Mode == SerializerMode::Standalone && !DidEmitMeta) {
+ std::unique_ptr<MetaSerializer> MetaSerializer =
+ metaSerializer(OS, /*ExternalFilename=*/None);
+ MetaSerializer->emit();
+ DidEmitMeta = true;
+ }
+
+ // Then do the usual remark emission.
+ YAMLRemarkSerializer::emit(Remark);
+}
+
+std::unique_ptr<MetaSerializer> YAMLStrTabRemarkSerializer::metaSerializer(
+ raw_ostream &OS, Optional<StringRef> ExternalFilename) {
+ assert(StrTab);
+ return std::make_unique<YAMLStrTabMetaSerializer>(OS, ExternalFilename,
+ *StrTab);
+}
+
+static void emitMagic(raw_ostream &OS) {
+ // Emit the magic number.
+ OS << remarks::Magic;
+ // Explicitly emit a '\0'.
+ OS.write('\0');
+}
+
+static void emitVersion(raw_ostream &OS) {
+ // Emit the version number: little-endian uint64_t.
+ std::array<char, 8> Version;
+ support::endian::write64le(Version.data(), remarks::CurrentRemarkVersion);
+ OS.write(Version.data(), Version.size());
+}
+
+static void emitStrTab(raw_ostream &OS, Optional<const StringTable *> StrTab) {
+ // Emit the string table in the section.
+ uint64_t StrTabSize = StrTab ? (*StrTab)->SerializedSize : 0;
+ // Emit the total size of the string table (the size itself excluded):
+ // little-endian uint64_t.
+ // Note: even if no string table is used, emit 0.
+ std::array<char, 8> StrTabSizeBuf;
+ support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
+ OS.write(StrTabSizeBuf.data(), StrTabSizeBuf.size());
+ if (StrTab)
+ (*StrTab)->serialize(OS);
+}
+
+static void emitExternalFile(raw_ostream &OS, StringRef Filename) {
+ // Emit the null-terminated absolute path to the remark file.
+ SmallString<128> FilenameBuf = Filename;
+ sys::fs::make_absolute(FilenameBuf);
+ assert(!FilenameBuf.empty() && "The filename can't be empty.");
+ OS.write(FilenameBuf.data(), FilenameBuf.size());
+ OS.write('\0');
+}
+
+void YAMLMetaSerializer::emit() {
+ emitMagic(OS);
+ emitVersion(OS);
+ emitStrTab(OS, None);
+ if (ExternalFilename)
+ emitExternalFile(OS, *ExternalFilename);
+}
+
+void YAMLStrTabMetaSerializer::emit() {
+ emitMagic(OS);
+ emitVersion(OS);
+ emitStrTab(OS, &StrTab);
+ if (ExternalFilename)
+ emitExternalFile(OS, *ExternalFilename);
+}
diff --git a/lib/Support/AArch64TargetParser.cpp b/lib/Support/AArch64TargetParser.cpp
index df4caa1f07fd..6f1d6d50eee2 100644
--- a/lib/Support/AArch64TargetParser.cpp
+++ b/lib/Support/AArch64TargetParser.cpp
@@ -96,8 +96,8 @@ bool AArch64::getExtensionFeatures(unsigned Extensions,
Features.push_back("+sve2-sm4");
if (Extensions & AEK_SVE2SHA3)
Features.push_back("+sve2-sha3");
- if (Extensions & AEK_BITPERM)
- Features.push_back("+bitperm");
+ if (Extensions & AEK_SVE2BITPERM)
+ Features.push_back("+sve2-bitperm");
if (Extensions & AEK_RCPC)
Features.push_back("+rcpc");
diff --git a/lib/Support/ABIBreak.cpp b/lib/Support/ABIBreak.cpp
new file mode 100644
index 000000000000..247b635e02b8
--- /dev/null
+++ b/lib/Support/ABIBreak.cpp
@@ -0,0 +1,24 @@
+//===----- lib/Support/ABIBreak.cpp - EnableABIBreakingChecks -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/abi-breaking.h"
+
+#ifndef _MSC_VER
+namespace llvm {
+
+// One of these two variables will be referenced by a symbol defined in
+// llvm-config.h. We provide a link-time (or load time for DSO) failure when
+// there is a mismatch in the build configuration of the API client and LLVM.
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+int EnableABIBreakingChecks;
+#else
+int DisableABIBreakingChecks;
+#endif
+
+} // end namespace llvm
+#endif
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 43173311cd80..758fe8b4f866 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -401,6 +401,33 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
}
}
+void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
+ uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
+ subBits &= maskBits;
+ if (isSingleWord()) {
+ U.VAL &= ~(maskBits << bitPosition);
+ U.VAL |= subBits << bitPosition;
+ return;
+ }
+
+ unsigned loBit = whichBit(bitPosition);
+ unsigned loWord = whichWord(bitPosition);
+ unsigned hiWord = whichWord(bitPosition + numBits - 1);
+ if (loWord == hiWord) {
+ U.pVal[loWord] &= ~(maskBits << loBit);
+ U.pVal[loWord] |= subBits << loBit;
+ return;
+ }
+
+ static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
+ unsigned wordBits = 8 * sizeof(WordType);
+ U.pVal[loWord] &= ~(maskBits << loBit);
+ U.pVal[loWord] |= subBits << loBit;
+
+ U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
+ U.pVal[hiWord] |= subBits >> (wordBits - loBit);
+}
+
APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
assert(numBits > 0 && "Can't extract zero bits");
assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
@@ -438,6 +465,31 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
return Result.clearUnusedBits();
}
+uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
+ unsigned bitPosition) const {
+ assert(numBits > 0 && "Can't extract zero bits");
+ assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
+ "Illegal bit extraction");
+ assert(numBits <= 64 && "Illegal bit extraction");
+
+ uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
+ if (isSingleWord())
+ return (U.VAL >> bitPosition) & maskBits;
+
+ unsigned loBit = whichBit(bitPosition);
+ unsigned loWord = whichWord(bitPosition);
+ unsigned hiWord = whichWord(bitPosition + numBits - 1);
+ if (loWord == hiWord)
+ return (U.pVal[loWord] >> loBit) & maskBits;
+
+ static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
+ unsigned wordBits = 8 * sizeof(WordType);
+ uint64_t retBits = U.pVal[loWord] >> loBit;
+ retBits |= U.pVal[hiWord] << (wordBits - loBit);
+ retBits &= maskBits;
+ return retBits;
+}
+
unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
assert(!str.empty() && "Invalid string length");
assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
diff --git a/lib/Support/ARMTargetParser.cpp b/lib/Support/ARMTargetParser.cpp
index be948cfc95d4..ce5daa7fe58c 100644
--- a/lib/Support/ARMTargetParser.cpp
+++ b/lib/Support/ARMTargetParser.cpp
@@ -176,10 +176,8 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
// exist).
{"+fpregs", "-fpregs", FPUVersion::VFPV2, FPURestriction::SP_D16},
- {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::None},
- {"+vfp2d16", "-vfp2d16", FPUVersion::VFPV2, FPURestriction::D16},
- {"+vfp2d16sp", "-vfp2d16sp", FPUVersion::VFPV2, FPURestriction::SP_D16},
- {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::None},
+ {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::D16},
+ {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::SP_D16},
{"+vfp3", "-vfp3", FPUVersion::VFPV3, FPURestriction::None},
{"+vfp3d16", "-vfp3d16", FPUVersion::VFPV3, FPURestriction::D16},
{"+vfp3d16sp", "-vfp3d16sp", FPUVersion::VFPV3, FPURestriction::SP_D16},
@@ -195,7 +193,7 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
{"+fp-armv8sp", "-fp-armv8sp", FPUVersion::VFPV5, FPURestriction::None},
{"+fullfp16", "-fullfp16", FPUVersion::VFPV5_FULLFP16, FPURestriction::SP_D16},
{"+fp64", "-fp64", FPUVersion::VFPV2, FPURestriction::D16},
- {"+d32", "-d32", FPUVersion::VFPV2, FPURestriction::None},
+ {"+d32", "-d32", FPUVersion::VFPV3, FPURestriction::None},
};
for (const auto &Info: FPUFeatureInfoList) {
diff --git a/lib/Support/CRC.cpp b/lib/Support/CRC.cpp
index fd98f3a24003..7c008d3b599d 100644
--- a/lib/Support/CRC.cpp
+++ b/lib/Support/CRC.cpp
@@ -6,63 +6,94 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements llvm::crc32 function.
+// This file contains implementations of CRC functions.
+//
+// The implementation technique is the one mentioned in:
+// D. V. Sarwate. 1988. Computation of cyclic redundancy checks via table
+// look-up. Commun. ACM 31, 8 (August 1988)
+//
+// See also Ross N. Williams "A Painless Guide to CRC Error Detection
+// Algorithms" (https://zlib.net/crc_v3.txt) or Hacker's Delight (2nd ed.)
+// Chapter 14 (Figure 14-7 in particular) for how the algorithm works.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/CRC.h"
+
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Config/config.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Threading.h"
-#include <array>
using namespace llvm;
#if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H
-using CRC32Table = std::array<uint32_t, 256>;
-
-static void initCRC32Table(CRC32Table *Tbl) {
- auto Shuffle = [](uint32_t V) {
- return (V & 1) ? (V >> 1) ^ 0xEDB88320U : V >> 1;
- };
-
- for (size_t I = 0; I < Tbl->size(); ++I) {
- uint32_t V = Shuffle(I);
- V = Shuffle(V);
- V = Shuffle(V);
- V = Shuffle(V);
- V = Shuffle(V);
- V = Shuffle(V);
- V = Shuffle(V);
- (*Tbl)[I] = Shuffle(V);
- }
-}
-uint32_t llvm::crc32(uint32_t CRC, StringRef S) {
- static llvm::once_flag InitFlag;
- static CRC32Table Tbl;
- llvm::call_once(InitFlag, initCRC32Table, &Tbl);
+static const uint32_t CRCTable[256] = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+ 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+ 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
+ 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+ 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
+ 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+ 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+ 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+ 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+ 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+ 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+ 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+ 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+ 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+ 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+ 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+ 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+ 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d};
- const uint8_t *P = reinterpret_cast<const uint8_t *>(S.data());
- size_t Len = S.size();
+uint32_t llvm::crc32(uint32_t CRC, ArrayRef<uint8_t> Data) {
CRC ^= 0xFFFFFFFFU;
- for (; Len >= 8; Len -= 8) {
- CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
- CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
- CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
- CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
- CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
- CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
- CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
- CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+ for (uint8_t Byte : Data) {
+ int TableIdx = (CRC ^ Byte) & 0xff;
+ CRC = CRCTable[TableIdx] ^ (CRC >> 8);
}
- while (Len--)
- CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
return CRC ^ 0xFFFFFFFFU;
}
+
#else
+
#include <zlib.h>
-uint32_t llvm::crc32(uint32_t CRC, StringRef S) {
- return ::crc32(CRC, (const Bytef *)S.data(), S.size());
+uint32_t llvm::crc32(uint32_t CRC, ArrayRef<uint8_t> Data) {
+ return ::crc32(CRC, (const Bytef *)Data.data(), Data.size());
}
+
#endif
+
+uint32_t llvm::crc32(ArrayRef<uint8_t> Data) { return crc32(0, Data); }
+
+void JamCRC::update(ArrayRef<uint8_t> Data) {
+ CRC ^= 0xFFFFFFFFU; // Undo CRC-32 Init.
+ CRC = crc32(CRC, Data);
+ CRC ^= 0xFFFFFFFFU; // Undo CRC-32 XorOut.
+}
diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp
index 9813eec0e433..7a2f6c53435a 100644
--- a/lib/Support/CachePruning.cpp
+++ b/lib/Support/CachePruning.cpp
@@ -45,7 +45,7 @@ struct FileInfo {
/// interval option.
static void writeTimestampFile(StringRef TimestampFile) {
std::error_code EC;
- raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::F_None);
+ raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None);
}
static Expected<std::chrono::seconds> parseDuration(StringRef Duration) {
diff --git a/lib/Support/CodeGenCoverage.cpp b/lib/Support/CodeGenCoverage.cpp
index f39eb7533b43..2db4193ce382 100644
--- a/lib/Support/CodeGenCoverage.cpp
+++ b/lib/Support/CodeGenCoverage.cpp
@@ -101,9 +101,9 @@ bool CodeGenCoverage::emit(StringRef CoveragePrefix,
std::string CoverageFilename = (CoveragePrefix + Pid).str();
std::error_code EC;
- sys::fs::OpenFlags OpenFlags = sys::fs::F_Append;
+ sys::fs::OpenFlags OpenFlags = sys::fs::OF_Append;
std::unique_ptr<ToolOutputFile> CoverageFile =
- llvm::make_unique<ToolOutputFile>(CoverageFilename, EC, OpenFlags);
+ std::make_unique<ToolOutputFile>(CoverageFilename, EC, OpenFlags);
if (EC)
return false;
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 25510fa58ff5..620f7ffd4c9f 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -692,7 +692,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
return false;
}
-static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) {
+bool llvm::cl::ProvidePositionalOption(Option *Handler, StringRef Arg, int i) {
int Dummy = i;
return ProvideOption(Handler, Handler->ArgStr, Arg, 0, nullptr, Dummy);
}
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index c2459256f8fe..9d13fce9cc52 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -10,8 +10,8 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
#include "llvm/Support/ThreadLocal.h"
+#include <mutex>
#include <setjmp.h>
using namespace llvm;
@@ -71,7 +71,7 @@ public:
}
-static ManagedStatic<sys::Mutex> gCrashRecoveryContextMutex;
+static ManagedStatic<std::mutex> gCrashRecoveryContextMutex;
static bool gCrashRecoveryEnabled = false;
static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContext>>
@@ -116,7 +116,7 @@ CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
}
void CrashRecoveryContext::Enable() {
- sys::ScopedLock L(*gCrashRecoveryContextMutex);
+ std::lock_guard<std::mutex> L(*gCrashRecoveryContextMutex);
// FIXME: Shouldn't this be a refcount or something?
if (gCrashRecoveryEnabled)
return;
@@ -125,7 +125,7 @@ void CrashRecoveryContext::Enable() {
}
void CrashRecoveryContext::Disable() {
- sys::ScopedLock L(*gCrashRecoveryContextMutex);
+ std::lock_guard<std::mutex> L(*gCrashRecoveryContextMutex);
if (!gCrashRecoveryEnabled)
return;
gCrashRecoveryEnabled = false;
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index 673bbb4d06f4..a98297cdb35f 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -7,111 +7,137 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/SwapByteOrder.h"
+
using namespace llvm;
+static void unexpectedEndReached(Error *E) {
+ if (E)
+ *E = createStringError(errc::illegal_byte_sequence,
+ "unexpected end of data");
+}
+
+static bool isError(Error *E) { return E && *E; }
+
template <typename T>
-static T getU(uint32_t *offset_ptr, const DataExtractor *de,
- bool isLittleEndian, const char *Data) {
+static T getU(uint64_t *offset_ptr, const DataExtractor *de,
+ bool isLittleEndian, const char *Data, llvm::Error *Err) {
+ ErrorAsOutParameter ErrAsOut(Err);
T val = 0;
- uint32_t offset = *offset_ptr;
- if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) {
- std::memcpy(&val, &Data[offset], sizeof(val));
- if (sys::IsLittleEndianHost != isLittleEndian)
- sys::swapByteOrder(val);
-
- // Advance the offset
- *offset_ptr += sizeof(val);
+ if (isError(Err))
+ return val;
+
+ uint64_t offset = *offset_ptr;
+ if (!de->isValidOffsetForDataOfSize(offset, sizeof(T))) {
+ unexpectedEndReached(Err);
+ return val;
}
+ std::memcpy(&val, &Data[offset], sizeof(val));
+ if (sys::IsLittleEndianHost != isLittleEndian)
+ sys::swapByteOrder(val);
+
+ // Advance the offset
+ *offset_ptr += sizeof(val);
return val;
}
template <typename T>
-static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count,
- const DataExtractor *de, bool isLittleEndian, const char *Data){
- uint32_t offset = *offset_ptr;
-
- if (count > 0 && de->isValidOffsetForDataOfSize(offset, sizeof(*dst)*count)) {
- for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
- ++value_ptr, offset += sizeof(*dst))
- *value_ptr = getU<T>(offset_ptr, de, isLittleEndian, Data);
- // Advance the offset
- *offset_ptr = offset;
- // Return a non-NULL pointer to the converted data as an indicator of
- // success
- return dst;
+static T *getUs(uint64_t *offset_ptr, T *dst, uint32_t count,
+ const DataExtractor *de, bool isLittleEndian, const char *Data,
+ llvm::Error *Err) {
+ ErrorAsOutParameter ErrAsOut(Err);
+ if (isError(Err))
+ return nullptr;
+
+ uint64_t offset = *offset_ptr;
+
+ if (!de->isValidOffsetForDataOfSize(offset, sizeof(*dst) * count)) {
+ unexpectedEndReached(Err);
+ return nullptr;
}
- return nullptr;
+ for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
+ ++value_ptr, offset += sizeof(*dst))
+ *value_ptr = getU<T>(offset_ptr, de, isLittleEndian, Data, Err);
+ // Advance the offset
+ *offset_ptr = offset;
+ // Return a non-NULL pointer to the converted data as an indicator of
+ // success
+ return dst;
}
-uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const {
- return getU<uint8_t>(offset_ptr, this, IsLittleEndian, Data.data());
+uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const {
+ return getU<uint8_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
}
uint8_t *
-DataExtractor::getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const {
+DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const {
return getUs<uint8_t>(offset_ptr, dst, count, this, IsLittleEndian,
- Data.data());
+ Data.data(), nullptr);
}
+uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const {
+ return getUs<uint8_t>(&C.Offset, Dst, Count, this, IsLittleEndian,
+ Data.data(), &C.Err);
+}
-uint16_t DataExtractor::getU16(uint32_t *offset_ptr) const {
- return getU<uint16_t>(offset_ptr, this, IsLittleEndian, Data.data());
+uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const {
+ return getU<uint16_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
}
-uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst,
+uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst,
uint32_t count) const {
return getUs<uint16_t>(offset_ptr, dst, count, this, IsLittleEndian,
- Data.data());
+ Data.data(), nullptr);
}
-uint32_t DataExtractor::getU24(uint32_t *offset_ptr) const {
+uint32_t DataExtractor::getU24(uint64_t *offset_ptr) const {
uint24_t ExtractedVal =
- getU<uint24_t>(offset_ptr, this, IsLittleEndian, Data.data());
+ getU<uint24_t>(offset_ptr, this, IsLittleEndian, Data.data(), nullptr);
// The 3 bytes are in the correct byte order for the host.
return ExtractedVal.getAsUint32(sys::IsLittleEndianHost);
}
-uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const {
- return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data());
+uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const {
+ return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
}
-uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst,
+uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst,
uint32_t count) const {
return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian,
- Data.data());
+ Data.data(), nullptr);
}
-uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const {
- return getU<uint64_t>(offset_ptr, this, IsLittleEndian, Data.data());
+uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const {
+ return getU<uint64_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
}
-uint64_t *DataExtractor::getU64(uint32_t *offset_ptr, uint64_t *dst,
+uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst,
uint32_t count) const {
return getUs<uint64_t>(offset_ptr, dst, count, this, IsLittleEndian,
- Data.data());
+ Data.data(), nullptr);
}
-uint64_t
-DataExtractor::getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const {
+uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
+ llvm::Error *Err) const {
switch (byte_size) {
case 1:
- return getU8(offset_ptr);
+ return getU8(offset_ptr, Err);
case 2:
- return getU16(offset_ptr);
+ return getU16(offset_ptr, Err);
case 4:
- return getU32(offset_ptr);
+ return getU32(offset_ptr, Err);
case 8:
- return getU64(offset_ptr);
+ return getU64(offset_ptr, Err);
}
llvm_unreachable("getUnsigned unhandled case!");
}
int64_t
-DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const {
+DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
switch (byte_size) {
case 1:
return (int8_t)getU8(offset_ptr);
@@ -125,8 +151,8 @@ DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const {
llvm_unreachable("getSigned unhandled case!");
}
-const char *DataExtractor::getCStr(uint32_t *offset_ptr) const {
- uint32_t offset = *offset_ptr;
+const char *DataExtractor::getCStr(uint64_t *offset_ptr) const {
+ uint64_t offset = *offset_ptr;
StringRef::size_type pos = Data.find('\0', offset);
if (pos != StringRef::npos) {
*offset_ptr = pos + 1;
@@ -135,31 +161,38 @@ const char *DataExtractor::getCStr(uint32_t *offset_ptr) const {
return nullptr;
}
-StringRef DataExtractor::getCStrRef(uint32_t *OffsetPtr) const {
- uint32_t Start = *OffsetPtr;
+StringRef DataExtractor::getCStrRef(uint64_t *offset_ptr) const {
+ uint64_t Start = *offset_ptr;
StringRef::size_type Pos = Data.find('\0', Start);
if (Pos != StringRef::npos) {
- *OffsetPtr = Pos + 1;
+ *offset_ptr = Pos + 1;
return StringRef(Data.data() + Start, Pos - Start);
}
return StringRef();
}
-uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const {
+uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr,
+ llvm::Error *Err) const {
assert(*offset_ptr <= Data.size());
+ ErrorAsOutParameter ErrAsOut(Err);
+ if (isError(Err))
+ return 0;
const char *error;
unsigned bytes_read;
uint64_t result = decodeULEB128(
reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read,
reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error);
- if (error)
+ if (error) {
+ if (Err)
+ *Err = createStringError(errc::illegal_byte_sequence, error);
return 0;
+ }
*offset_ptr += bytes_read;
return result;
}
-int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const {
+int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr) const {
assert(*offset_ptr <= Data.size());
const char *error;
@@ -172,3 +205,14 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const {
*offset_ptr += bytes_read;
return result;
}
+
+void DataExtractor::skip(Cursor &C, uint64_t Length) const {
+ ErrorAsOutParameter ErrAsOut(&C.Err);
+ if (isError(&C.Err))
+ return;
+
+ if (isValidOffsetForDataOfSize(C.Offset, Length))
+ C.Offset += Length;
+ else
+ unexpectedEndReached(&C.Err);
+}
diff --git a/lib/Support/Error.cpp b/lib/Support/Error.cpp
index 72bc08af2ddb..9ea08c37478e 100644
--- a/lib/Support/Error.cpp
+++ b/lib/Support/Error.cpp
@@ -87,7 +87,7 @@ std::error_code FileError::convertToErrorCode() const {
Error errorCodeToError(std::error_code EC) {
if (!EC)
return Error::success();
- return Error(llvm::make_unique<ECError>(ECError(EC)));
+ return Error(std::make_unique<ECError>(ECError(EC)));
}
std::error_code errorToErrorCode(Error Err) {
@@ -167,18 +167,3 @@ void LLVMDisposeErrorMessage(char *ErrMsg) { delete[] ErrMsg; }
LLVMErrorTypeId LLVMGetStringErrorTypeId() {
return reinterpret_cast<void *>(&StringError::ID);
}
-
-#ifndef _MSC_VER
-namespace llvm {
-
-// One of these two variables will be referenced by a symbol defined in
-// llvm-config.h. We provide a link-time (or load time for DSO) failure when
-// there is a mismatch in the build configuration of the API client and LLVM.
-#if LLVM_ENABLE_ABI_BREAKING_CHECKS
-int EnableABIBreakingChecks;
-#else
-int DisableABIBreakingChecks;
-#endif
-
-} // end namespace llvm
-#endif
diff --git a/lib/Support/FileCheck.cpp b/lib/Support/FileCheck.cpp
index e0f17787bdf8..841e406a7b69 100644
--- a/lib/Support/FileCheck.cpp
+++ b/lib/Support/FileCheck.cpp
@@ -14,31 +14,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/FileCheck.h"
+#include "FileCheckImpl.h"
#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/FormatVariadic.h"
#include <cstdint>
#include <list>
-#include <map>
#include <tuple>
#include <utility>
using namespace llvm;
-void FileCheckNumericVariable::setValue(uint64_t NewValue) {
- assert(!Value && "Overwriting numeric variable's value is not allowed");
- Value = NewValue;
-}
-
-void FileCheckNumericVariable::clearValue() {
- if (!Value)
- return;
- Value = None;
-}
-
Expected<uint64_t> FileCheckNumericVariableUse::eval() const {
Optional<uint64_t> Value = NumericVariable->getValue();
if (Value)
return *Value;
+
return make_error<FileCheckUndefVarError>(Name);
}
@@ -109,7 +100,7 @@ FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
// StringRef holding all characters considered as horizontal whitespaces by
// FileCheck input canonicalization.
-StringRef SpaceChars = " \t";
+constexpr StringLiteral SpaceChars = " \t";
// Parsing helper function that strips the first character in S and returns it.
static char popFront(StringRef &S) {
@@ -159,7 +150,9 @@ FileCheckPattern::parseNumericVariableDefinition(
Expected<std::unique_ptr<FileCheckNumericVariableUse>>
FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo,
- const SourceMgr &SM) const {
+ Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context,
+ const SourceMgr &SM) {
if (IsPseudo && !Name.equals("@LINE"))
return FileCheckErrorDiagnostic::get(
SM, Name, "invalid pseudo numeric variable '" + Name + "'");
@@ -185,21 +178,25 @@ FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo,
if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber)
return FileCheckErrorDiagnostic::get(
SM, Name,
- "numeric variable '" + Name + "' defined on the same line as used");
+ "numeric variable '" + Name +
+ "' defined earlier in the same CHECK directive");
- return llvm::make_unique<FileCheckNumericVariableUse>(Name, NumericVariable);
+ return std::make_unique<FileCheckNumericVariableUse>(Name, NumericVariable);
}
Expected<std::unique_ptr<FileCheckExpressionAST>>
FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO,
- const SourceMgr &SM) const {
+ Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context,
+ const SourceMgr &SM) {
if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) {
// Try to parse as a numeric variable use.
Expected<FileCheckPattern::VariableProperties> ParseVarResult =
parseVariable(Expr, SM);
if (ParseVarResult)
return parseNumericVariableUse(ParseVarResult->Name,
- ParseVarResult->IsPseudo, SM);
+ ParseVarResult->IsPseudo, LineNumber,
+ Context, SM);
if (AO == AllowedOperand::LineVar)
return ParseVarResult.takeError();
// Ignore the error and retry parsing as a literal.
@@ -209,7 +206,7 @@ FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO,
// Otherwise, parse it as a literal.
uint64_t LiteralValue;
if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue))
- return llvm::make_unique<FileCheckExpressionLiteral>(LiteralValue);
+ return std::make_unique<FileCheckExpressionLiteral>(LiteralValue);
return FileCheckErrorDiagnostic::get(SM, Expr,
"invalid operand format '" + Expr + "'");
@@ -223,10 +220,10 @@ static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) {
return LeftOp - RightOp;
}
-Expected<std::unique_ptr<FileCheckExpressionAST>>
-FileCheckPattern::parseBinop(StringRef &Expr,
- std::unique_ptr<FileCheckExpressionAST> LeftOp,
- bool IsLegacyLineExpr, const SourceMgr &SM) const {
+Expected<std::unique_ptr<FileCheckExpressionAST>> FileCheckPattern::parseBinop(
+ StringRef &Expr, std::unique_ptr<FileCheckExpressionAST> LeftOp,
+ bool IsLegacyLineExpr, Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context, const SourceMgr &SM) {
Expr = Expr.ltrim(SpaceChars);
if (Expr.empty())
return std::move(LeftOp);
@@ -257,12 +254,12 @@ FileCheckPattern::parseBinop(StringRef &Expr,
AllowedOperand AO =
IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any;
Expected<std::unique_ptr<FileCheckExpressionAST>> RightOpResult =
- parseNumericOperand(Expr, AO, SM);
+ parseNumericOperand(Expr, AO, LineNumber, Context, SM);
if (!RightOpResult)
return RightOpResult;
Expr = Expr.ltrim(SpaceChars);
- return llvm::make_unique<FileCheckASTBinop>(EvalBinop, std::move(LeftOp),
+ return std::make_unique<FileCheckASTBinop>(EvalBinop, std::move(LeftOp),
std::move(*RightOpResult));
}
@@ -270,56 +267,60 @@ Expected<std::unique_ptr<FileCheckExpressionAST>>
FileCheckPattern::parseNumericSubstitutionBlock(
StringRef Expr,
Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
- bool IsLegacyLineExpr, const SourceMgr &SM) const {
- // Parse the numeric variable definition.
+ bool IsLegacyLineExpr, Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context, const SourceMgr &SM) {
+ std::unique_ptr<FileCheckExpressionAST> ExpressionAST = nullptr;
+ StringRef DefExpr = StringRef();
DefinedNumericVariable = None;
+ // Save variable definition expression if any.
size_t DefEnd = Expr.find(':');
if (DefEnd != StringRef::npos) {
- StringRef DefExpr = Expr.substr(0, DefEnd);
- StringRef UseExpr = Expr.substr(DefEnd + 1);
+ DefExpr = Expr.substr(0, DefEnd);
+ Expr = Expr.substr(DefEnd + 1);
+ }
- UseExpr = UseExpr.ltrim(SpaceChars);
- if (!UseExpr.empty())
- return FileCheckErrorDiagnostic::get(
- SM, UseExpr,
- "unexpected string after variable definition: '" + UseExpr + "'");
+ // Parse the expression itself.
+ Expr = Expr.ltrim(SpaceChars);
+ if (!Expr.empty()) {
+ // The first operand in a legacy @LINE expression is always the @LINE
+ // pseudo variable.
+ AllowedOperand AO =
+ IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any;
+ Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
+ parseNumericOperand(Expr, AO, LineNumber, Context, SM);
+ while (ParseResult && !Expr.empty()) {
+ ParseResult = parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr,
+ LineNumber, Context, SM);
+ // Legacy @LINE expressions only allow 2 operands.
+ if (ParseResult && IsLegacyLineExpr && !Expr.empty())
+ return FileCheckErrorDiagnostic::get(
+ SM, Expr,
+ "unexpected characters at end of expression '" + Expr + "'");
+ }
+ if (!ParseResult)
+ return ParseResult;
+ ExpressionAST = std::move(*ParseResult);
+ }
+ // Parse the numeric variable definition.
+ if (DefEnd != StringRef::npos) {
DefExpr = DefExpr.ltrim(SpaceChars);
Expected<FileCheckNumericVariable *> ParseResult =
parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM);
+
if (!ParseResult)
return ParseResult.takeError();
DefinedNumericVariable = *ParseResult;
-
- return nullptr;
}
- // Parse the expression itself.
- Expr = Expr.ltrim(SpaceChars);
- // The first operand in a legacy @LINE expression is always the @LINE pseudo
- // variable.
- AllowedOperand AO =
- IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any;
- Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
- parseNumericOperand(Expr, AO, SM);
- while (ParseResult && !Expr.empty()) {
- ParseResult =
- parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, SM);
- // Legacy @LINE expressions only allow 2 operands.
- if (ParseResult && IsLegacyLineExpr && !Expr.empty())
- return FileCheckErrorDiagnostic::get(
- SM, Expr,
- "unexpected characters at end of expression '" + Expr + "'");
- }
- if (!ParseResult)
- return ParseResult;
- return std::move(*ParseResult);
+ return std::move(ExpressionAST);
}
bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
SourceMgr &SM,
const FileCheckRequest &Req) {
bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
+ IgnoreCase = Req.IgnoreCase;
PatternLoc = SMLoc::getFromPointer(PatternStr.data());
@@ -396,14 +397,15 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
continue;
}
- // String and numeric substitution blocks. String substitution blocks come
+ // String and numeric substitution blocks. Pattern substitution blocks come
// in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some
// other regex) and assigns it to the string variable 'foo'. The latter
- // substitutes foo's value. Numeric substitution blocks work the same way
- // as string ones, but start with a '#' sign after the double brackets.
- // Both string and numeric variable names must satisfy the regular
- // expression "[a-zA-Z_][0-9a-zA-Z_]*" to be valid, as this helps catch
- // some common errors.
+ // substitutes foo's value. Numeric substitution blocks recognize the same
+ // form as string ones, but start with a '#' sign after the double
+ // brackets. They also accept a combined form which sets a numeric variable
+ // to the evaluation of an expression. Both string and numeric variable
+ // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be
+ // valid, as this helps catch some common errors.
if (PatternStr.startswith("[[")) {
StringRef UnparsedPatternStr = PatternStr.substr(2);
// Find the closing bracket pair ending the match. End is going to be an
@@ -424,6 +426,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
PatternStr = UnparsedPatternStr.substr(End + 2);
bool IsDefinition = false;
+ bool SubstNeeded = false;
// Whether the substitution block is a legacy use of @LINE with string
// substitution block syntax.
bool IsLegacyLineExpr = false;
@@ -454,6 +457,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
bool IsPseudo = ParseVarResult->IsPseudo;
IsDefinition = (VarEndIdx != StringRef::npos);
+ SubstNeeded = !IsDefinition;
if (IsDefinition) {
if ((IsPseudo || !MatchStr.consume_front(":"))) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
@@ -488,22 +492,61 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
if (IsNumBlock) {
Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable,
- IsLegacyLineExpr, SM);
+ IsLegacyLineExpr, LineNumber, Context,
+ SM);
if (!ParseResult) {
logAllUnhandledErrors(ParseResult.takeError(), errs());
return true;
}
ExpressionAST = std::move(*ParseResult);
+ SubstNeeded = ExpressionAST != nullptr;
if (DefinedNumericVariable) {
IsDefinition = true;
DefName = (*DefinedNumericVariable)->getName();
- MatchRegexp = StringRef("[0-9]+");
- } else
+ }
+ if (SubstNeeded)
SubstStr = MatchStr;
+ else
+ MatchRegexp = "[0-9]+";
}
+ // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]].
+ if (IsDefinition) {
+ RegExStr += '(';
+ ++SubstInsertIdx;
+
+ if (IsNumBlock) {
+ FileCheckNumericVariableMatch NumericVariableDefinition = {
+ *DefinedNumericVariable, CurParen};
+ NumericVariableDefs[DefName] = NumericVariableDefinition;
+ // This store is done here rather than in match() to allow
+ // parseNumericVariableUse() to get the pointer to the class instance
+ // of the right variable definition corresponding to a given numeric
+ // variable use.
+ Context->GlobalNumericVariableTable[DefName] =
+ *DefinedNumericVariable;
+ } else {
+ VariableDefs[DefName] = CurParen;
+ // Mark string variable as defined to detect collisions between
+ // string and numeric variables in parseNumericVariableUse() and
+ // defineCmdlineVariables() when the latter is created later than the
+ // former. We cannot reuse GlobalVariableTable for this by populating
+ // it with an empty string since we would then lose the ability to
+ // detect the use of an undefined variable in match().
+ Context->DefinedVariableTable[DefName] = true;
+ }
+
+ ++CurParen;
+ }
+
+ if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM))
+ return true;
+
+ if (IsDefinition)
+ RegExStr += ')';
+
// Handle substitutions: [[foo]] and [[#<foo expr>]].
- if (!IsDefinition) {
+ if (SubstNeeded) {
// Handle substitution of string variables that were defined earlier on
// the same line by emitting a backreference. Expressions do not
// support substituting a numeric variable defined on the same line.
@@ -526,37 +569,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
: Context->makeStringSubstitution(SubstStr, SubstInsertIdx);
Substitutions.push_back(Substitution);
}
- continue;
- }
-
- // Handle variable definitions: [[<def>:(...)]] and
- // [[#(...)<def>:(...)]].
- if (IsNumBlock) {
- FileCheckNumericVariableMatch NumericVariableDefinition = {
- *DefinedNumericVariable, CurParen};
- NumericVariableDefs[DefName] = NumericVariableDefinition;
- // This store is done here rather than in match() to allow
- // parseNumericVariableUse() to get the pointer to the class instance
- // of the right variable definition corresponding to a given numeric
- // variable use.
- Context->GlobalNumericVariableTable[DefName] = *DefinedNumericVariable;
- } else {
- VariableDefs[DefName] = CurParen;
- // Mark the string variable as defined to detect collisions between
- // string and numeric variables in parseNumericVariableUse() and
- // DefineCmdlineVariables() when the latter is created later than the
- // former. We cannot reuse GlobalVariableTable for this by populating
- // it with an empty string since we would then lose the ability to
- // detect the use of an undefined variable in match().
- Context->DefinedVariableTable[DefName] = true;
}
- RegExStr += '(';
- ++CurParen;
-
- if (AddRegExToRegEx(MatchRegexp, CurParen, SM))
- return true;
-
- RegExStr += ')';
}
// Handle fixed string matches.
@@ -607,7 +620,8 @@ Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
// If this is a fixed string pattern, just match it now.
if (!FixedStr.empty()) {
MatchLen = FixedStr.size();
- size_t Pos = Buffer.find(FixedStr);
+ size_t Pos = IgnoreCase ? Buffer.find_lower(FixedStr)
+ : Buffer.find(FixedStr);
if (Pos == StringRef::npos)
return make_error<FileCheckNotFoundError>();
return Pos;
@@ -631,10 +645,8 @@ Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
for (const auto &Substitution : Substitutions) {
// Substitute and check for failure (e.g. use of undefined variable).
Expected<std::string> Value = Substitution->getResult();
- if (!Value) {
- Context->LineVariable->clearValue();
+ if (!Value)
return Value.takeError();
- }
// Plop it into the regex at the adjusted offset.
TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset,
@@ -644,11 +656,13 @@ Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
// Match the newly constructed regex.
RegExToMatch = TmpStr;
- Context->LineVariable->clearValue();
}
SmallVector<StringRef, 4> MatchInfo;
- if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
+ unsigned int Flags = Regex::Newline;
+ if (IgnoreCase)
+ Flags |= Regex::IgnoreCase;
+ if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo))
return make_error<FileCheckNotFoundError>();
// Successful regex match.
@@ -824,7 +838,7 @@ template <class... Types>
FileCheckNumericVariable *
FileCheckPatternContext::makeNumericVariable(Types... args) {
NumericVariables.push_back(
- llvm::make_unique<FileCheckNumericVariable>(args...));
+ std::make_unique<FileCheckNumericVariable>(args...));
return NumericVariables.back().get();
}
@@ -832,14 +846,14 @@ FileCheckSubstitution *
FileCheckPatternContext::makeStringSubstitution(StringRef VarName,
size_t InsertIdx) {
Substitutions.push_back(
- llvm::make_unique<FileCheckStringSubstitution>(this, VarName, InsertIdx));
+ std::make_unique<FileCheckStringSubstitution>(this, VarName, InsertIdx));
return Substitutions.back().get();
}
FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution(
StringRef ExpressionStr,
std::unique_ptr<FileCheckExpressionAST> ExpressionAST, size_t InsertIdx) {
- Substitutions.push_back(llvm::make_unique<FileCheckNumericSubstitution>(
+ Substitutions.push_back(std::make_unique<FileCheckNumericSubstitution>(
this, ExpressionStr, std::move(ExpressionAST), InsertIdx));
return Substitutions.back().get();
}
@@ -1108,16 +1122,22 @@ void FileCheckPatternContext::createLineVariable() {
GlobalNumericVariableTable[LineName] = LineVariable;
}
-bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
- std::vector<FileCheckString> &CheckStrings) {
+FileCheck::FileCheck(FileCheckRequest Req)
+ : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()),
+ CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {}
+
+FileCheck::~FileCheck() = default;
+
+bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
+ Regex &PrefixRE) {
Error DefineError =
- PatternContext.defineCmdlineVariables(Req.GlobalDefines, SM);
+ PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM);
if (DefineError) {
logAllUnhandledErrors(std::move(DefineError), errs());
return true;
}
- PatternContext.createLineVariable();
+ PatternContext->createLineVariable();
std::vector<FileCheckPattern> ImplicitNegativeChecks;
for (const auto &PatternString : Req.ImplicitCheckNot) {
@@ -1133,7 +1153,7 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
ImplicitNegativeChecks.push_back(
- FileCheckPattern(Check::CheckNot, &PatternContext));
+ FileCheckPattern(Check::CheckNot, PatternContext.get()));
ImplicitNegativeChecks.back().parsePattern(PatternInBuffer,
"IMPLICIT-CHECK", SM, Req);
}
@@ -1196,7 +1216,7 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
// Parse the pattern.
- FileCheckPattern P(CheckTy, &PatternContext, LineNumber);
+ FileCheckPattern P(CheckTy, PatternContext.get(), LineNumber);
if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req))
return true;
@@ -1214,7 +1234,7 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
// Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
CheckTy == Check::CheckEmpty) &&
- CheckStrings.empty()) {
+ CheckStrings->empty()) {
StringRef Type = CheckTy == Check::CheckNext
? "NEXT"
: CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
@@ -1232,21 +1252,21 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
}
// Okay, add the string we captured to the output vector and move on.
- CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
- std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
+ CheckStrings->emplace_back(P, UsedPrefix, PatternLoc);
+ std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
DagNotMatches = ImplicitNegativeChecks;
}
// Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
// prefix as a filler for the error message.
if (!DagNotMatches.empty()) {
- CheckStrings.emplace_back(
- FileCheckPattern(Check::CheckEOF, &PatternContext, LineNumber + 1),
+ CheckStrings->emplace_back(
+ FileCheckPattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1),
*Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
- std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
+ std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
}
- if (CheckStrings.empty()) {
+ if (CheckStrings->empty()) {
errs() << "error: no check strings found with prefix"
<< (Req.CheckPrefixes.size() > 1 ? "es " : " ");
auto I = Req.CheckPrefixes.begin();
@@ -1704,7 +1724,7 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
// A check prefix must contain only alphanumeric, hyphens and underscores.
static bool ValidateCheckPrefix(StringRef CheckPrefix) {
- Regex Validator("^[a-zA-Z0-9_-]*$");
+ static const Regex Validator("^[a-zA-Z0-9_-]*$");
return Validator.match(CheckPrefix);
}
@@ -1759,11 +1779,32 @@ Error FileCheckPatternContext::defineCmdlineVariables(
unsigned I = 0;
Error Errs = Error::success();
std::string CmdlineDefsDiag;
- StringRef Prefix1 = "Global define #";
- StringRef Prefix2 = ": ";
- for (StringRef CmdlineDef : CmdlineDefines)
- CmdlineDefsDiag +=
- (Prefix1 + Twine(++I) + Prefix2 + CmdlineDef + "\n").str();
+ SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices;
+ for (StringRef CmdlineDef : CmdlineDefines) {
+ std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str();
+ size_t EqIdx = CmdlineDef.find('=');
+ if (EqIdx == StringRef::npos) {
+ CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0));
+ continue;
+ }
+ // Numeric variable definition.
+ if (CmdlineDef[0] == '#') {
+ // Append a copy of the command-line definition adapted to use the same
+ // format as in the input file to be able to reuse
+ // parseNumericSubstitutionBlock.
+ CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str();
+ std::string SubstitutionStr = CmdlineDef;
+ SubstitutionStr[EqIdx] = ':';
+ CmdlineDefsIndices.push_back(
+ std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size()));
+ CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str();
+ } else {
+ CmdlineDefsDiag += DefPrefix;
+ CmdlineDefsIndices.push_back(
+ std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size()));
+ CmdlineDefsDiag += (CmdlineDef + "\n").str();
+ }
+ }
// Create a buffer with fake command line content in order to display
// parsing diagnostic with location information and point to the
@@ -1773,14 +1814,10 @@ Error FileCheckPatternContext::defineCmdlineVariables(
StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer();
SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc());
- SmallVector<StringRef, 4> CmdlineDefsDiagVec;
- CmdlineDefsDiagRef.split(CmdlineDefsDiagVec, '\n', -1 /*MaxSplit*/,
- false /*KeepEmpty*/);
- for (StringRef CmdlineDefDiag : CmdlineDefsDiagVec) {
- unsigned DefStart = CmdlineDefDiag.find(Prefix2) + Prefix2.size();
- StringRef CmdlineDef = CmdlineDefDiag.substr(DefStart);
- size_t EqIdx = CmdlineDef.find('=');
- if (EqIdx == StringRef::npos) {
+ for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) {
+ StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first,
+ CmdlineDefIndices.second);
+ if (CmdlineDef.empty()) {
Errs = joinErrors(
std::move(Errs),
FileCheckErrorDiagnostic::get(
@@ -1790,31 +1827,35 @@ Error FileCheckPatternContext::defineCmdlineVariables(
// Numeric variable definition.
if (CmdlineDef[0] == '#') {
- StringRef CmdlineName = CmdlineDef.substr(1, EqIdx - 1);
- Expected<FileCheckNumericVariable *> ParseResult =
- FileCheckPattern::parseNumericVariableDefinition(CmdlineName, this,
- None, SM);
- if (!ParseResult) {
- Errs = joinErrors(std::move(Errs), ParseResult.takeError());
+ // Now parse the definition both to check that the syntax is correct and
+ // to create the necessary class instance.
+ StringRef CmdlineDefExpr = CmdlineDef.substr(1);
+ Optional<FileCheckNumericVariable *> DefinedNumericVariable;
+ Expected<std::unique_ptr<FileCheckExpressionAST>> ExpressionASTResult =
+ FileCheckPattern::parseNumericSubstitutionBlock(
+ CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM);
+ if (!ExpressionASTResult) {
+ Errs = joinErrors(std::move(Errs), ExpressionASTResult.takeError());
continue;
}
-
- StringRef CmdlineVal = CmdlineDef.substr(EqIdx + 1);
- uint64_t Val;
- if (CmdlineVal.getAsInteger(10, Val)) {
- Errs = joinErrors(std::move(Errs),
- FileCheckErrorDiagnostic::get(
- SM, CmdlineVal,
- "invalid value in numeric variable definition '" +
- CmdlineVal + "'"));
+ std::unique_ptr<FileCheckExpressionAST> ExpressionAST =
+ std::move(*ExpressionASTResult);
+ // Now evaluate the expression whose value this variable should be set
+ // to, since the expression of a command-line variable definition should
+ // only use variables defined earlier on the command-line. If not, this
+ // is an error and we report it.
+ Expected<uint64_t> Value = ExpressionAST->eval();
+ if (!Value) {
+ Errs = joinErrors(std::move(Errs), Value.takeError());
continue;
}
- FileCheckNumericVariable *DefinedNumericVariable = *ParseResult;
- DefinedNumericVariable->setValue(Val);
+
+ assert(DefinedNumericVariable && "No variable defined");
+ (*DefinedNumericVariable)->setValue(*Value);
// Record this variable definition.
- GlobalNumericVariableTable[DefinedNumericVariable->getName()] =
- DefinedNumericVariable;
+ GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] =
+ *DefinedNumericVariable;
} else {
// String variable definition.
std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('=');
@@ -1851,7 +1892,7 @@ Error FileCheckPatternContext::defineCmdlineVariables(
}
GlobalVariableTable.insert(CmdlineNameVal);
// Mark the string variable as defined to detect collisions between
- // string and numeric variables in DefineCmdlineVariables when the latter
+ // string and numeric variables in defineCmdlineVariables when the latter
// is created later than the former. We cannot reuse GlobalVariableTable
// for this by populating it with an empty string since we would then
// lose the ability to detect the use of an undefined variable in
@@ -1887,18 +1928,17 @@ void FileCheckPatternContext::clearLocalVars() {
GlobalNumericVariableTable.erase(Var);
}
-bool FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
- ArrayRef<FileCheckString> CheckStrings,
+bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer,
std::vector<FileCheckDiag> *Diags) {
bool ChecksFailed = false;
- unsigned i = 0, j = 0, e = CheckStrings.size();
+ unsigned i = 0, j = 0, e = CheckStrings->size();
while (true) {
StringRef CheckRegion;
if (j == e) {
CheckRegion = Buffer;
} else {
- const FileCheckString &CheckLabelStr = CheckStrings[j];
+ const FileCheckString &CheckLabelStr = (*CheckStrings)[j];
if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
++j;
continue;
@@ -1921,10 +1961,10 @@ bool FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
// CHECK-LABEL and it would clear variables defined on the command-line
// before they get used.
if (i != 0 && Req.EnableVarScope)
- PatternContext.clearLocalVars();
+ PatternContext->clearLocalVars();
for (; i != j; ++i) {
- const FileCheckString &CheckStr = CheckStrings[i];
+ const FileCheckString &CheckStr = (*CheckStrings)[i];
// Check each string within the scanned region, including a second check
// of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
diff --git a/lib/Support/FileCheckImpl.h b/lib/Support/FileCheckImpl.h
new file mode 100644
index 000000000000..06ce8301cec4
--- /dev/null
+++ b/lib/Support/FileCheckImpl.h
@@ -0,0 +1,624 @@
+//===-- FileCheckImpl.h - Private FileCheck Interface ------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the private interfaces of FileCheck. Its purpose is to
+// allow unit testing of FileCheck and to separate the interface from the
+// implementation. It is only meant to be used by FileCheck.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_SUPPORT_FILECHECKIMPL_H
+#define LLVM_LIB_SUPPORT_FILECHECKIMPL_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SourceMgr.h"
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Numeric substitution handling code.
+//===----------------------------------------------------------------------===//
+
+/// Base class representing the AST of a given expression.
+class FileCheckExpressionAST {
+public:
+ virtual ~FileCheckExpressionAST() = default;
+
+ /// Evaluates and \returns the value of the expression represented by this
+ /// AST or an error if evaluation fails.
+ virtual Expected<uint64_t> eval() const = 0;
+};
+
+/// Class representing an unsigned literal in the AST of an expression.
+class FileCheckExpressionLiteral : public FileCheckExpressionAST {
+private:
+ /// Actual value of the literal.
+ uint64_t Value;
+
+public:
+ /// Constructs a literal with the specified value.
+ FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {}
+
+ /// \returns the literal's value.
+ Expected<uint64_t> eval() const { return Value; }
+};
+
+/// Class to represent an undefined variable error, which quotes that
+/// variable's name when printed.
+class FileCheckUndefVarError : public ErrorInfo<FileCheckUndefVarError> {
+private:
+ StringRef VarName;
+
+public:
+ static char ID;
+
+ FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {}
+
+ StringRef getVarName() const { return VarName; }
+
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+
+ /// Print name of variable associated with this error.
+ void log(raw_ostream &OS) const override {
+ OS << "\"";
+ OS.write_escaped(VarName) << "\"";
+ }
+};
+
+/// Class representing a numeric variable and its associated current value.
+class FileCheckNumericVariable {
+private:
+ /// Name of the numeric variable.
+ StringRef Name;
+
+ /// Value of numeric variable, if defined, or None otherwise.
+ Optional<uint64_t> Value;
+
+ /// Line number where this variable is defined, or None if defined before
+ /// input is parsed. Used to determine whether a variable is defined on the
+ /// same line as a given use.
+ Optional<size_t> DefLineNumber;
+
+public:
+ /// Constructor for a variable \p Name defined at line \p DefLineNumber or
+ /// defined before input is parsed if \p DefLineNumber is None.
+ explicit FileCheckNumericVariable(StringRef Name,
+ Optional<size_t> DefLineNumber = None)
+ : Name(Name), DefLineNumber(DefLineNumber) {}
+
+ /// \returns name of this numeric variable.
+ StringRef getName() const { return Name; }
+
+ /// \returns this variable's value.
+ Optional<uint64_t> getValue() const { return Value; }
+
+ /// Sets value of this numeric variable to \p NewValue.
+ void setValue(uint64_t NewValue) { Value = NewValue; }
+
+ /// Clears value of this numeric variable, regardless of whether it is
+ /// currently defined or not.
+ void clearValue() { Value = None; }
+
+ /// \returns the line number where this variable is defined, if any, or None
+ /// if defined before input is parsed.
+ Optional<size_t> getDefLineNumber() { return DefLineNumber; }
+};
+
+/// Class representing the use of a numeric variable in the AST of an
+/// expression.
+class FileCheckNumericVariableUse : public FileCheckExpressionAST {
+private:
+ /// Name of the numeric variable.
+ StringRef Name;
+
+ /// Pointer to the class instance for the variable this use is about.
+ FileCheckNumericVariable *NumericVariable;
+
+public:
+ FileCheckNumericVariableUse(StringRef Name,
+ FileCheckNumericVariable *NumericVariable)
+ : Name(Name), NumericVariable(NumericVariable) {}
+
+ /// \returns the value of the variable referenced by this instance.
+ Expected<uint64_t> eval() const;
+};
+
+/// Type of functions evaluating a given binary operation.
+using binop_eval_t = uint64_t (*)(uint64_t, uint64_t);
+
+/// Class representing a single binary operation in the AST of an expression.
+class FileCheckASTBinop : public FileCheckExpressionAST {
+private:
+ /// Left operand.
+ std::unique_ptr<FileCheckExpressionAST> LeftOperand;
+
+ /// Right operand.
+ std::unique_ptr<FileCheckExpressionAST> RightOperand;
+
+ /// Pointer to function that can evaluate this binary operation.
+ binop_eval_t EvalBinop;
+
+public:
+ FileCheckASTBinop(binop_eval_t EvalBinop,
+ std::unique_ptr<FileCheckExpressionAST> LeftOp,
+ std::unique_ptr<FileCheckExpressionAST> RightOp)
+ : EvalBinop(EvalBinop) {
+ LeftOperand = std::move(LeftOp);
+ RightOperand = std::move(RightOp);
+ }
+
+ /// Evaluates the value of the binary operation represented by this AST,
+ /// using EvalBinop on the result of recursively evaluating the operands.
+ /// \returns the expression value or an error if an undefined numeric
+ /// variable is used in one of the operands.
+ Expected<uint64_t> eval() const;
+};
+
+class FileCheckPatternContext;
+
+/// Class representing a substitution to perform in the RegExStr string.
+class FileCheckSubstitution {
+protected:
+ /// Pointer to a class instance holding, among other things, the table with
+ /// the values of live string variables at the start of any given CHECK line.
+ /// Used for substituting string variables with the text they were defined
+ /// as. Expressions are linked to the numeric variables they use at
+ /// parse time and directly access the value of the numeric variable to
+ /// evaluate their value.
+ FileCheckPatternContext *Context;
+
+ /// The string that needs to be substituted for something else. For a
+ /// string variable this is its name, otherwise this is the whole expression.
+ StringRef FromStr;
+
+ // Index in RegExStr of where to do the substitution.
+ size_t InsertIdx;
+
+public:
+ FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName,
+ size_t InsertIdx)
+ : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {}
+
+ virtual ~FileCheckSubstitution() = default;
+
+ /// \returns the string to be substituted for something else.
+ StringRef getFromString() const { return FromStr; }
+
+ /// \returns the index where the substitution is to be performed in RegExStr.
+ size_t getIndex() const { return InsertIdx; }
+
+ /// \returns a string containing the result of the substitution represented
+ /// by this class instance or an error if substitution failed.
+ virtual Expected<std::string> getResult() const = 0;
+};
+
+class FileCheckStringSubstitution : public FileCheckSubstitution {
+public:
+ FileCheckStringSubstitution(FileCheckPatternContext *Context,
+ StringRef VarName, size_t InsertIdx)
+ : FileCheckSubstitution(Context, VarName, InsertIdx) {}
+
+ /// \returns the text that the string variable in this substitution matched
+ /// when defined, or an error if the variable is undefined.
+ Expected<std::string> getResult() const override;
+};
+
+class FileCheckNumericSubstitution : public FileCheckSubstitution {
+private:
+ /// Pointer to the class representing the expression whose value is to be
+ /// substituted.
+ std::unique_ptr<FileCheckExpressionAST> ExpressionAST;
+
+public:
+ FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr,
+ std::unique_ptr<FileCheckExpressionAST> ExprAST,
+ size_t InsertIdx)
+ : FileCheckSubstitution(Context, Expr, InsertIdx) {
+ ExpressionAST = std::move(ExprAST);
+ }
+
+ /// \returns a string containing the result of evaluating the expression in
+ /// this substitution, or an error if evaluation failed.
+ Expected<std::string> getResult() const override;
+};
+
+//===----------------------------------------------------------------------===//
+// Pattern handling code.
+//===----------------------------------------------------------------------===//
+
+struct FileCheckDiag;
+
+/// Class holding the FileCheckPattern global state, shared by all patterns:
+/// tables holding values of variables and whether they are defined or not at
+/// any given time in the matching process.
+class FileCheckPatternContext {
+ friend class FileCheckPattern;
+
+private:
+ /// When matching a given pattern, this holds the value of all the string
+ /// variables defined in previous patterns. In a pattern, only the last
+ /// definition for a given variable is recorded in this table.
+ /// Back-references are used for uses after any the other definition.
+ StringMap<StringRef> GlobalVariableTable;
+
+ /// Map of all string variables defined so far. Used at parse time to detect
+ /// a name conflict between a numeric variable and a string variable when
+ /// the former is defined on a later line than the latter.
+ StringMap<bool> DefinedVariableTable;
+
+ /// When matching a given pattern, this holds the pointers to the classes
+ /// representing the numeric variables defined in previous patterns. When
+ /// matching a pattern all definitions for that pattern are recorded in the
+ /// NumericVariableDefs table in the FileCheckPattern instance of that
+ /// pattern.
+ StringMap<FileCheckNumericVariable *> GlobalNumericVariableTable;
+
+ /// Pointer to the class instance representing the @LINE pseudo variable for
+ /// easily updating its value.
+ FileCheckNumericVariable *LineVariable = nullptr;
+
+ /// Vector holding pointers to all parsed numeric variables. Used to
+ /// automatically free them once they are guaranteed to no longer be used.
+ std::vector<std::unique_ptr<FileCheckNumericVariable>> NumericVariables;
+
+ /// Vector holding pointers to all substitutions. Used to automatically free
+ /// them once they are guaranteed to no longer be used.
+ std::vector<std::unique_ptr<FileCheckSubstitution>> Substitutions;
+
+public:
+ /// \returns the value of string variable \p VarName or an error if no such
+ /// variable has been defined.
+ Expected<StringRef> getPatternVarValue(StringRef VarName);
+
+ /// Defines string and numeric variables from definitions given on the
+ /// command line, passed as a vector of [#]VAR=VAL strings in
+ /// \p CmdlineDefines. \returns an error list containing diagnostics against
+ /// \p SM for all definition parsing failures, if any, or Success otherwise.
+ Error defineCmdlineVariables(std::vector<std::string> &CmdlineDefines,
+ SourceMgr &SM);
+
+ /// Create @LINE pseudo variable. Value is set when pattern are being
+ /// matched.
+ void createLineVariable();
+
+ /// Undefines local variables (variables whose name does not start with a '$'
+ /// sign), i.e. removes them from GlobalVariableTable and from
+ /// GlobalNumericVariableTable and also clears the value of numeric
+ /// variables.
+ void clearLocalVars();
+
+private:
+ /// Makes a new numeric variable and registers it for destruction when the
+ /// context is destroyed.
+ template <class... Types>
+ FileCheckNumericVariable *makeNumericVariable(Types... args);
+
+ /// Makes a new string substitution and registers it for destruction when the
+ /// context is destroyed.
+ FileCheckSubstitution *makeStringSubstitution(StringRef VarName,
+ size_t InsertIdx);
+
+ /// Makes a new numeric substitution and registers it for destruction when
+ /// the context is destroyed.
+ FileCheckSubstitution *
+ makeNumericSubstitution(StringRef ExpressionStr,
+ std::unique_ptr<FileCheckExpressionAST> ExpressionAST,
+ size_t InsertIdx);
+};
+
+/// Class to represent an error holding a diagnostic with location information
+/// used when printing it.
+class FileCheckErrorDiagnostic : public ErrorInfo<FileCheckErrorDiagnostic> {
+private:
+ SMDiagnostic Diagnostic;
+
+public:
+ static char ID;
+
+ FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {}
+
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+
+ /// Print diagnostic associated with this error when printing the error.
+ void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); }
+
+ static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) {
+ return make_error<FileCheckErrorDiagnostic>(
+ SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg));
+ }
+
+ static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) {
+ return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg);
+ }
+};
+
+class FileCheckNotFoundError : public ErrorInfo<FileCheckNotFoundError> {
+public:
+ static char ID;
+
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+
+ /// Print diagnostic associated with this error when printing the error.
+ void log(raw_ostream &OS) const override {
+ OS << "String not found in input";
+ }
+};
+
+class FileCheckPattern {
+ SMLoc PatternLoc;
+
+ /// A fixed string to match as the pattern or empty if this pattern requires
+ /// a regex match.
+ StringRef FixedStr;
+
+ /// A regex string to match as the pattern or empty if this pattern requires
+ /// a fixed string to match.
+ std::string RegExStr;
+
+ /// Entries in this vector represent a substitution of a string variable or
+ /// an expression in the RegExStr regex at match time. For example, in the
+ /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]",
+ /// RegExStr will contain "foobaz" and we'll get two entries in this vector
+ /// that tells us to insert the value of string variable "bar" at offset 3
+ /// and the value of expression "N+1" at offset 6.
+ std::vector<FileCheckSubstitution *> Substitutions;
+
+ /// Maps names of string variables defined in a pattern to the number of
+ /// their parenthesis group in RegExStr capturing their last definition.
+ ///
+ /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])",
+ /// RegExStr will be "foo(.*)baz(\1<quux value>(.*))" where <quux value> is
+ /// the value captured for QUUX on the earlier line where it was defined, and
+ /// VariableDefs will map "bar" to the third parenthesis group which captures
+ /// the second definition of "bar".
+ ///
+ /// Note: uses std::map rather than StringMap to be able to get the key when
+ /// iterating over values.
+ std::map<StringRef, unsigned> VariableDefs;
+
+ /// Structure representing the definition of a numeric variable in a pattern.
+ /// It holds the pointer to the class representing the numeric variable whose
+ /// value is being defined and the number of the parenthesis group in
+ /// RegExStr to capture that value.
+ struct FileCheckNumericVariableMatch {
+ /// Pointer to class representing the numeric variable whose value is being
+ /// defined.
+ FileCheckNumericVariable *DefinedNumericVariable;
+
+ /// Number of the parenthesis group in RegExStr that captures the value of
+ /// this numeric variable definition.
+ unsigned CaptureParenGroup;
+ };
+
+ /// Holds the number of the parenthesis group in RegExStr and pointer to the
+ /// corresponding FileCheckNumericVariable class instance of all numeric
+ /// variable definitions. Used to set the matched value of all those
+ /// variables.
+ StringMap<FileCheckNumericVariableMatch> NumericVariableDefs;
+
+ /// Pointer to a class instance holding the global state shared by all
+ /// patterns:
+ /// - separate tables with the values of live string and numeric variables
+ /// respectively at the start of any given CHECK line;
+ /// - table holding whether a string variable has been defined at any given
+ /// point during the parsing phase.
+ FileCheckPatternContext *Context;
+
+ Check::FileCheckType CheckTy;
+
+ /// Line number for this CHECK pattern or None if it is an implicit pattern.
+ /// Used to determine whether a variable definition is made on an earlier
+ /// line to the one with this CHECK.
+ Optional<size_t> LineNumber;
+
+ /// Ignore case while matching if set to true.
+ bool IgnoreCase = false;
+
+public:
+ FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context,
+ Optional<size_t> Line = None)
+ : Context(Context), CheckTy(Ty), LineNumber(Line) {}
+
+ /// \returns the location in source code.
+ SMLoc getLoc() const { return PatternLoc; }
+
+ /// \returns the pointer to the global state for all patterns in this
+ /// FileCheck instance.
+ FileCheckPatternContext *getContext() const { return Context; }
+
+ /// \returns whether \p C is a valid first character for a variable name.
+ static bool isValidVarNameStart(char C);
+
+ /// Parsing information about a variable.
+ struct VariableProperties {
+ StringRef Name;
+ bool IsPseudo;
+ };
+
+ /// Parses the string at the start of \p Str for a variable name. \returns
+ /// a VariableProperties structure holding the variable name and whether it
+ /// is the name of a pseudo variable, or an error holding a diagnostic
+ /// against \p SM if parsing fail. If parsing was successful, also strips
+ /// \p Str from the variable name.
+ static Expected<VariableProperties> parseVariable(StringRef &Str,
+ const SourceMgr &SM);
+ /// Parses \p Expr for a numeric substitution block at line \p LineNumber,
+ /// or before input is parsed if \p LineNumber is None. Parameter
+ /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE
+ /// expression and \p Context points to the class instance holding the live
+ /// string and numeric variables. \returns a pointer to the class instance
+ /// representing the AST of the expression whose value must be substitued, or
+ /// an error holding a diagnostic against \p SM if parsing fails. If
+ /// substitution was successful, sets \p DefinedNumericVariable to point to
+ /// the class representing the numeric variable defined in this numeric
+ /// substitution block, or None if this block does not define any variable.
+ static Expected<std::unique_ptr<FileCheckExpressionAST>>
+ parseNumericSubstitutionBlock(
+ StringRef Expr,
+ Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
+ bool IsLegacyLineExpr, Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context, const SourceMgr &SM);
+ /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern
+ /// instance accordingly.
+ ///
+ /// \p Prefix provides which prefix is being matched, \p Req describes the
+ /// global options that influence the parsing such as whitespace
+ /// canonicalization, \p SM provides the SourceMgr used for error reports.
+ /// \returns true in case of an error, false otherwise.
+ bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
+ const FileCheckRequest &Req);
+ /// Matches the pattern string against the input buffer \p Buffer
+ ///
+ /// \returns the position that is matched or an error indicating why matching
+ /// failed. If there is a match, updates \p MatchLen with the size of the
+ /// matched string.
+ ///
+ /// The GlobalVariableTable StringMap in the FileCheckPatternContext class
+ /// instance provides the current values of FileCheck string variables and
+ /// is updated if this match defines new values. Likewise, the
+ /// GlobalNumericVariableTable StringMap in the same class provides the
+ /// current values of FileCheck numeric variables and is updated if this
+ /// match defines new numeric values.
+ Expected<size_t> match(StringRef Buffer, size_t &MatchLen,
+ const SourceMgr &SM) const;
+ /// Prints the value of successful substitutions or the name of the undefined
+ /// string or numeric variables preventing a successful substitution.
+ void printSubstitutions(const SourceMgr &SM, StringRef Buffer,
+ SMRange MatchRange = None) const;
+ void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
+ std::vector<FileCheckDiag> *Diags) const;
+
+ bool hasVariable() const {
+ return !(Substitutions.empty() && VariableDefs.empty());
+ }
+
+ Check::FileCheckType getCheckTy() const { return CheckTy; }
+
+ int getCount() const { return CheckTy.getCount(); }
+
+private:
+ bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
+ void AddBackrefToRegEx(unsigned BackrefNum);
+ /// Computes an arbitrary estimate for the quality of matching this pattern
+ /// at the start of \p Buffer; a distance of zero should correspond to a
+ /// perfect match.
+ unsigned computeMatchDistance(StringRef Buffer) const;
+ /// Finds the closing sequence of a regex variable usage or definition.
+ ///
+ /// \p Str has to point in the beginning of the definition (right after the
+ /// opening sequence). \p SM holds the SourceMgr used for error repporting.
+ /// \returns the offset of the closing sequence within Str, or npos if it
+ /// was not found.
+ size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
+
+ /// Parses \p Expr for the name of a numeric variable to be defined at line
+ /// \p LineNumber, or before input is parsed if \p LineNumber is None.
+ /// \returns a pointer to the class instance representing that variable,
+ /// creating it if needed, or an error holding a diagnostic against \p SM
+ /// should defining such a variable be invalid.
+ static Expected<FileCheckNumericVariable *> parseNumericVariableDefinition(
+ StringRef &Expr, FileCheckPatternContext *Context,
+ Optional<size_t> LineNumber, const SourceMgr &SM);
+ /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use
+ /// at line \p LineNumber, or before input is parsed if \p LineNumber is
+ /// None. Parameter \p Context points to the class instance holding the live
+ /// string and numeric variables. \returns the pointer to the class instance
+ /// representing that variable if successful, or an error holding a
+ /// diagnostic against \p SM otherwise.
+ static Expected<std::unique_ptr<FileCheckNumericVariableUse>>
+ parseNumericVariableUse(StringRef Name, bool IsPseudo,
+ Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context,
+ const SourceMgr &SM);
+ enum class AllowedOperand { LineVar, Literal, Any };
+ /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or
+ /// before input is parsed if \p LineNumber is None. Accepts both literal
+ /// values and numeric variables, depending on the value of \p AO. Parameter
+ /// \p Context points to the class instance holding the live string and
+ /// numeric variables. \returns the class representing that operand in the
+ /// AST of the expression or an error holding a diagnostic against \p SM
+ /// otherwise.
+ static Expected<std::unique_ptr<FileCheckExpressionAST>>
+ parseNumericOperand(StringRef &Expr, AllowedOperand AO,
+ Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context, const SourceMgr &SM);
+ /// Parses \p Expr for a binary operation at line \p LineNumber, or before
+ /// input is parsed if \p LineNumber is None. The left operand of this binary
+ /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether
+ /// we are parsing a legacy @LINE expression. Parameter \p Context points to
+ /// the class instance holding the live string and numeric variables.
+ /// \returns the class representing the binary operation in the AST of the
+ /// expression, or an error holding a diagnostic against \p SM otherwise.
+ static Expected<std::unique_ptr<FileCheckExpressionAST>>
+ parseBinop(StringRef &Expr, std::unique_ptr<FileCheckExpressionAST> LeftOp,
+ bool IsLegacyLineExpr, Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context, const SourceMgr &SM);
+};
+
+//===----------------------------------------------------------------------===//
+// Check Strings.
+//===----------------------------------------------------------------------===//
+
+/// A check that we found in the input file.
+struct FileCheckString {
+ /// The pattern to match.
+ FileCheckPattern Pat;
+
+ /// Which prefix name this check matched.
+ StringRef Prefix;
+
+ /// The location in the match file that the check string was specified.
+ SMLoc Loc;
+
+ /// All of the strings that are disallowed from occurring between this match
+ /// string and the previous one (or start of file).
+ std::vector<FileCheckPattern> DagNotStrings;
+
+ FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L)
+ : Pat(P), Prefix(S), Loc(L) {}
+
+ /// Matches check string and its "not strings" and/or "dag strings".
+ size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
+ size_t &MatchLen, FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const;
+
+ /// Verifies that there is a single line in the given \p Buffer. Errors are
+ /// reported against \p SM.
+ bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
+ /// Verifies that there is no newline in the given \p Buffer. Errors are
+ /// reported against \p SM.
+ bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
+ /// Verifies that none of the strings in \p NotStrings are found in the given
+ /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in
+ /// \p Diags according to the verbosity level set in \p Req.
+ bool CheckNot(const SourceMgr &SM, StringRef Buffer,
+ const std::vector<const FileCheckPattern *> &NotStrings,
+ const FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const;
+ /// Matches "dag strings" and their mixed "not strings".
+ size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
+ std::vector<const FileCheckPattern *> &NotStrings,
+ const FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Support/FileCollector.cpp b/lib/Support/FileCollector.cpp
new file mode 100644
index 000000000000..47fca6413722
--- /dev/null
+++ b/lib/Support/FileCollector.cpp
@@ -0,0 +1,268 @@
+//===-- FileCollector.cpp ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileCollector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+
+using namespace llvm;
+
+static bool isCaseSensitivePath(StringRef Path) {
+ SmallString<256> TmpDest = Path, UpperDest, RealDest;
+
+ // Remove component traversals, links, etc.
+ if (!sys::fs::real_path(Path, TmpDest))
+ return true; // Current default value in vfs.yaml
+ Path = TmpDest;
+
+ // Change path to all upper case and ask for its real path, if the latter
+ // exists and is equal to path, it's not case sensitive. Default to case
+ // sensitive in the absence of real_path, since this is the YAMLVFSWriter
+ // default.
+ UpperDest = Path.upper();
+ if (sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest))
+ return false;
+ return true;
+}
+
+FileCollector::FileCollector(std::string Root, std::string OverlayRoot)
+ : Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) {
+ sys::fs::create_directories(this->Root, true);
+}
+
+bool FileCollector::getRealPath(StringRef SrcPath,
+ SmallVectorImpl<char> &Result) {
+ SmallString<256> RealPath;
+ StringRef FileName = sys::path::filename(SrcPath);
+ std::string Directory = sys::path::parent_path(SrcPath).str();
+ auto DirWithSymlink = SymlinkMap.find(Directory);
+
+ // Use real_path to fix any symbolic link component present in a path.
+ // Computing the real path is expensive, cache the search through the parent
+ // path Directory.
+ if (DirWithSymlink == SymlinkMap.end()) {
+ auto EC = sys::fs::real_path(Directory, RealPath);
+ if (EC)
+ return false;
+ SymlinkMap[Directory] = RealPath.str();
+ } else {
+ RealPath = DirWithSymlink->second;
+ }
+
+ sys::path::append(RealPath, FileName);
+ Result.swap(RealPath);
+ return true;
+}
+
+void FileCollector::addFile(const Twine &file) {
+ std::lock_guard<std::mutex> lock(Mutex);
+ std::string FileStr = file.str();
+ if (markAsSeen(FileStr))
+ addFileImpl(FileStr);
+}
+
+void FileCollector::addFileImpl(StringRef SrcPath) {
+ // We need an absolute src path to append to the root.
+ SmallString<256> AbsoluteSrc = SrcPath;
+ sys::fs::make_absolute(AbsoluteSrc);
+
+ // Canonicalize src to a native path to avoid mixed separator styles.
+ sys::path::native(AbsoluteSrc);
+
+ // Remove redundant leading "./" pieces and consecutive separators.
+ AbsoluteSrc = sys::path::remove_leading_dotslash(AbsoluteSrc);
+
+ // Canonicalize the source path by removing "..", "." components.
+ SmallString<256> VirtualPath = AbsoluteSrc;
+ sys::path::remove_dots(VirtualPath, /*remove_dot_dot=*/true);
+
+ // If a ".." component is present after a symlink component, remove_dots may
+ // lead to the wrong real destination path. Let the source be canonicalized
+ // like that but make sure we always use the real path for the destination.
+ SmallString<256> CopyFrom;
+ if (!getRealPath(AbsoluteSrc, CopyFrom))
+ CopyFrom = VirtualPath;
+
+ SmallString<256> DstPath = StringRef(Root);
+ sys::path::append(DstPath, sys::path::relative_path(CopyFrom));
+
+ // Always map a canonical src path to its real path into the YAML, by doing
+ // this we map different virtual src paths to the same entry in the VFS
+ // overlay, which is a way to emulate symlink inside the VFS; this is also
+ // needed for correctness, not doing that can lead to module redefinition
+ // errors.
+ addFileToMapping(VirtualPath, DstPath);
+}
+
+/// Set the access and modification time for the given file from the given
+/// status object.
+static std::error_code
+copyAccessAndModificationTime(StringRef Filename,
+ const sys::fs::file_status &Stat) {
+ int FD;
+
+ if (auto EC =
+ sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
+ return EC;
+
+ if (auto EC = sys::fs::setLastAccessAndModificationTime(
+ FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
+ return EC;
+
+ if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
+ return EC;
+
+ return {};
+}
+
+std::error_code FileCollector::copyFiles(bool StopOnError) {
+ for (auto &entry : VFSWriter.getMappings()) {
+ // Create directory tree.
+ if (std::error_code EC =
+ sys::fs::create_directories(sys::path::parent_path(entry.RPath),
+ /*IgnoreExisting=*/true)) {
+ if (StopOnError)
+ return EC;
+ }
+
+ // Get the status of the original file/directory.
+ sys::fs::file_status Stat;
+ if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) {
+ if (StopOnError)
+ return EC;
+ continue;
+ }
+
+ if (Stat.type() == sys::fs::file_type::directory_file) {
+ // Construct a directory when it's just a directory entry.
+ if (std::error_code EC =
+ sys::fs::create_directories(entry.RPath,
+ /*IgnoreExisting=*/true)) {
+ if (StopOnError)
+ return EC;
+ }
+ continue;
+ }
+
+ // Copy file over.
+ if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) {
+ if (StopOnError)
+ return EC;
+ }
+
+ // Copy over permissions.
+ if (auto perms = sys::fs::getPermissions(entry.VPath)) {
+ if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) {
+ if (StopOnError)
+ return EC;
+ }
+ }
+
+ // Copy over modification time.
+ copyAccessAndModificationTime(entry.RPath, Stat);
+ }
+ return {};
+}
+
+std::error_code FileCollector::writeMapping(StringRef mapping_file) {
+ std::lock_guard<std::mutex> lock(Mutex);
+
+ VFSWriter.setOverlayDir(OverlayRoot);
+ VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot));
+ VFSWriter.setUseExternalNames(false);
+
+ std::error_code EC;
+ raw_fd_ostream os(mapping_file, EC, sys::fs::OF_Text);
+ if (EC)
+ return EC;
+
+ VFSWriter.write(os);
+
+ return {};
+}
+
+namespace {
+
+class FileCollectorFileSystem : public vfs::FileSystem {
+public:
+ explicit FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS,
+ std::shared_ptr<FileCollector> Collector)
+ : FS(std::move(FS)), Collector(std::move(Collector)) {}
+
+ llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override {
+ auto Result = FS->status(Path);
+ if (Result && Result->exists())
+ Collector->addFile(Path);
+ return Result;
+ }
+
+ llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
+ openFileForRead(const Twine &Path) override {
+ auto Result = FS->openFileForRead(Path);
+ if (Result && *Result)
+ Collector->addFile(Path);
+ return Result;
+ }
+
+ llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir,
+ std::error_code &EC) override {
+ auto It = FS->dir_begin(Dir, EC);
+ if (EC)
+ return It;
+ // Collect everything that's listed in case the user needs it.
+ Collector->addFile(Dir);
+ for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) {
+ if (It->type() == sys::fs::file_type::regular_file ||
+ It->type() == sys::fs::file_type::directory_file ||
+ It->type() == sys::fs::file_type::symlink_file) {
+ Collector->addFile(It->path());
+ }
+ }
+ if (EC)
+ return It;
+ // Return a new iterator.
+ return FS->dir_begin(Dir, EC);
+ }
+
+ std::error_code getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const override {
+ auto EC = FS->getRealPath(Path, Output);
+ if (!EC) {
+ Collector->addFile(Path);
+ if (Output.size() > 0)
+ Collector->addFile(Output);
+ }
+ return EC;
+ }
+
+ std::error_code isLocal(const Twine &Path, bool &Result) override {
+ return FS->isLocal(Path, Result);
+ }
+
+ llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
+ return FS->getCurrentWorkingDirectory();
+ }
+
+ std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override {
+ return FS->setCurrentWorkingDirectory(Path);
+ }
+
+private:
+ IntrusiveRefCntPtr<vfs::FileSystem> FS;
+ std::shared_ptr<FileCollector> Collector;
+};
+
+} // end anonymous namespace
+
+IntrusiveRefCntPtr<vfs::FileSystem>
+FileCollector::createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
+ std::shared_ptr<FileCollector> Collector) {
+ return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector));
+}
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index 3d6b569f2993..024dd3e57a40 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -121,7 +121,7 @@ createInMemoryBuffer(StringRef Path, size_t Size, unsigned Mode) {
Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
if (EC)
return errorCodeToError(EC);
- return llvm::make_unique<InMemoryBuffer>(Path, MB, Size, Mode);
+ return std::make_unique<InMemoryBuffer>(Path, MB, Size, Mode);
}
static Expected<std::unique_ptr<FileOutputBuffer>>
@@ -146,7 +146,7 @@ createOnDiskBuffer(StringRef Path, size_t Size, unsigned Mode) {
// Mmap it.
std::error_code EC;
- auto MappedFile = llvm::make_unique<fs::mapped_file_region>(
+ auto MappedFile = std::make_unique<fs::mapped_file_region>(
fs::convertFDToNativeFile(File.FD), fs::mapped_file_region::readwrite,
Size, 0, EC);
@@ -157,7 +157,7 @@ createOnDiskBuffer(StringRef Path, size_t Size, unsigned Mode) {
return createInMemoryBuffer(Path, Size, Mode);
}
- return llvm::make_unique<OnDiskBuffer>(Path, std::move(File),
+ return std::make_unique<OnDiskBuffer>(Path, std::move(File),
std::move(MappedFile));
}
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index 62eb7bfda195..d11fbb54dc0d 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -12,9 +12,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/FileUtilities.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
#include <cmath>
@@ -264,3 +267,66 @@ int llvm::DiffFilesWithTolerance(StringRef NameA,
return CompareFailed;
}
+
+void llvm::AtomicFileWriteError::log(raw_ostream &OS) const {
+ OS << "atomic_write_error: ";
+ switch (Error) {
+ case atomic_write_error::failed_to_create_uniq_file:
+ OS << "failed_to_create_uniq_file";
+ return;
+ case atomic_write_error::output_stream_error:
+ OS << "output_stream_error";
+ return;
+ case atomic_write_error::failed_to_rename_temp_file:
+ OS << "failed_to_rename_temp_file";
+ return;
+ }
+ llvm_unreachable("unknown atomic_write_error value in "
+ "failed_to_rename_temp_file::log()");
+}
+
+llvm::Error llvm::writeFileAtomically(StringRef TempPathModel,
+ StringRef FinalPath, StringRef Buffer) {
+ return writeFileAtomically(TempPathModel, FinalPath,
+ [&Buffer](llvm::raw_ostream &OS) {
+ OS.write(Buffer.data(), Buffer.size());
+ return llvm::Error::success();
+ });
+}
+
+llvm::Error llvm::writeFileAtomically(
+ StringRef TempPathModel, StringRef FinalPath,
+ std::function<llvm::Error(llvm::raw_ostream &)> Writer) {
+ SmallString<128> GeneratedUniqPath;
+ int TempFD;
+ if (sys::fs::createUniqueFile(TempPathModel.str(), TempFD,
+ GeneratedUniqPath)) {
+ return llvm::make_error<AtomicFileWriteError>(
+ atomic_write_error::failed_to_create_uniq_file);
+ }
+ llvm::FileRemover RemoveTmpFileOnFail(GeneratedUniqPath);
+
+ raw_fd_ostream OS(TempFD, /*shouldClose=*/true);
+ if (llvm::Error Err = Writer(OS)) {
+ return Err;
+ }
+
+ OS.close();
+ if (OS.has_error()) {
+ OS.clear_error();
+ return llvm::make_error<AtomicFileWriteError>(
+ atomic_write_error::output_stream_error);
+ }
+
+ if (const std::error_code Error =
+ sys::fs::rename(/*from=*/GeneratedUniqPath.c_str(),
+ /*to=*/FinalPath.str().c_str())) {
+ return llvm::make_error<AtomicFileWriteError>(
+ atomic_write_error::failed_to_rename_temp_file);
+ }
+
+ RemoveTmpFileOnFail.releaseFile();
+ return Error::success();
+}
+
+char llvm::AtomicFileWriteError::ID;
diff --git a/lib/Support/GlobPattern.cpp b/lib/Support/GlobPattern.cpp
index 6011be86d77f..8dae6941ec77 100644
--- a/lib/Support/GlobPattern.cpp
+++ b/lib/Support/GlobPattern.cpp
@@ -19,7 +19,7 @@
using namespace llvm;
static bool hasWildcard(StringRef S) {
- return S.find_first_of("?*[") != StringRef::npos;
+ return S.find_first_of("?*[\\") != StringRef::npos;
}
// Expands character ranges and returns a bitmap.
@@ -60,8 +60,9 @@ static Expected<BitVector> expand(StringRef S, StringRef Original) {
}
// This is a scanner for the glob pattern.
-// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
-// (which is a negative form of "[<chars>]"), or a non-meta character.
+// A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
+// (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
+// equivalent to "[^<chars>]"), or a non-meta character.
// This function returns the first token in S.
static Expected<BitVector> scan(StringRef &S, StringRef Original) {
switch (S[0]) {
@@ -74,14 +75,16 @@ static Expected<BitVector> scan(StringRef &S, StringRef Original) {
S = S.substr(1);
return BitVector(256, true);
case '[': {
- size_t End = S.find(']', 1);
+ // ']' is allowed as the first character of a character class. '[]' is
+ // invalid. So, just skip the first character.
+ size_t End = S.find(']', 2);
if (End == StringRef::npos)
return make_error<StringError>("invalid glob pattern: " + Original,
errc::invalid_argument);
StringRef Chars = S.substr(1, End - 1);
S = S.substr(End + 1);
- if (Chars.startswith("^")) {
+ if (Chars.startswith("^") || Chars.startswith("!")) {
Expected<BitVector> BV = expand(Chars.substr(1), Original);
if (!BV)
return BV.takeError();
@@ -89,6 +92,11 @@ static Expected<BitVector> scan(StringRef &S, StringRef Original) {
}
return expand(Chars, Original);
}
+ case '\\':
+ // Eat this character and fall through below to treat it like a non-meta
+ // character.
+ S = S.substr(1);
+ LLVM_FALLTHROUGH;
default:
BitVector BV(256, false);
BV[(uint8_t)S[0]] = true;
@@ -107,8 +115,9 @@ Expected<GlobPattern> GlobPattern::create(StringRef S) {
return Pat;
}
- // S is something like "foo*". We can use startswith().
- if (S.endswith("*") && !hasWildcard(S.drop_back())) {
+ // S is something like "foo*", and the "* is not escaped. We can use
+ // startswith().
+ if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
Pat.Prefix = S.drop_back();
return Pat;
}
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index d491912bdc0c..2a473a1994c2 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -316,7 +316,7 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
unsigned int Id;
if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
if (Id >= 8561 && HaveVectorSupport)
- return "arch13";
+ return "z15";
if (Id >= 3906 && HaveVectorSupport)
return "z14";
if (Id >= 2964 && HaveVectorSupport)
@@ -680,7 +680,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// Skylake Xeon:
case 0x55:
*Type = X86::INTEL_COREI7;
- if (Features3 & (1 << (X86::FEATURE_AVX512BF16 - 64)))
+ if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32)))
*Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake"
else if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32)))
*Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake"
@@ -746,6 +746,13 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;
default: // Unknown family 6 CPU, try to guess.
+ // TODO detect tigerlake host
+ if (Features3 & (1 << (X86::FEATURE_AVX512VP2INTERSECT - 64))) {
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_TIGERLAKE;
+ break;
+ }
+
if (Features & (1 << X86::FEATURE_AVX512VBMI2)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
@@ -758,7 +765,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;
}
- if (Features3 & (1 << (X86::FEATURE_AVX512BF16 - 64))) {
+ if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_COOPERLAKE;
break;
@@ -1034,7 +1041,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(X86::FEATURE_BMI);
if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
setFeature(X86::FEATURE_AVX2);
- if (HasLeaf7 && ((EBX >> 9) & 1))
+ if (HasLeaf7 && ((EBX >> 8) & 1))
setFeature(X86::FEATURE_BMI2);
if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512F);
@@ -1078,6 +1085,13 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(X86::FEATURE_AVX5124VNNIW);
if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX5124FMAPS);
+ if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
+ setFeature(X86::FEATURE_AVX512VP2INTERSECT);
+
+ bool HasLeaf7Subleaf1 =
+ MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
+ setFeature(X86::FEATURE_AVX512BF16);
unsigned MaxExtLevel;
getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
@@ -1369,7 +1383,6 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
- Features["mpx"] = HasLeaf7 && ((EBX >> 14) & 1);
// AVX512 is only supported if the OS supports the context save for it.
Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
@@ -1499,6 +1512,17 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
return true;
}
+#elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
+bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
+ if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
+ Features["neon"] = true;
+ if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
+ Features["crc"] = true;
+ if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
+ Features["crypto"] = true;
+
+ return true;
+}
#else
bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
#endif
diff --git a/lib/Support/JSON.cpp b/lib/Support/JSON.cpp
index 95e5ed654277..16b1d11efd08 100644
--- a/lib/Support/JSON.cpp
+++ b/lib/Support/JSON.cpp
@@ -502,7 +502,7 @@ bool Parser::parseError(const char *Msg) {
}
}
Err.emplace(
- llvm::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
+ std::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
return false;
}
} // namespace
diff --git a/lib/Support/JamCRC.cpp b/lib/Support/JamCRC.cpp
deleted file mode 100644
index e043a3c33c28..000000000000
--- a/lib/Support/JamCRC.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-//===-- JamCRC.cpp - Cyclic Redundancy Check --------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains an implementation of JamCRC.
-//
-//===----------------------------------------------------------------------===//
-//
-// The implementation technique is the one mentioned in:
-// D. V. Sarwate. 1988. Computation of cyclic redundancy checks via table
-// look-up. Commun. ACM 31, 8 (August 1988)
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/JamCRC.h"
-#include "llvm/ADT/ArrayRef.h"
-
-using namespace llvm;
-
-static const uint32_t CRCTable[256] = {
- 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
- 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
- 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
- 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
- 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
- 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
- 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
- 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
- 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
- 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
- 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
- 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
- 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
- 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
- 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
- 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
- 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
- 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
- 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
- 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
- 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
- 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
- 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
- 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
- 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
- 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
- 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
- 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
- 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
- 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
- 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
- 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
- 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
- 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
- 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
- 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
- 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
- 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
- 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
- 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
- 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
- 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
- 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
- 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
- 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
- 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
- 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
- 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
- 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
- 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
- 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
- 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
- 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
- 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
- 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
- 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
- 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
- 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
- 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
- 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
- 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
- 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
- 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
- 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
-};
-
-void JamCRC::update(ArrayRef<char> Data) {
- for (char Byte : Data) {
- int TableIdx = (CRC ^ Byte) & 0xff;
- CRC = CRCTable[TableIdx] ^ (CRC >> 8);
- }
-}
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 28ceb1a70e42..053493f72fb5 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -12,21 +12,20 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Config/config.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/Threading.h"
#include <cassert>
+#include <mutex>
using namespace llvm;
static const ManagedStaticBase *StaticList = nullptr;
-static sys::Mutex *ManagedStaticMutex = nullptr;
+static std::recursive_mutex *ManagedStaticMutex = nullptr;
static llvm::once_flag mutex_init_flag;
static void initializeMutex() {
- ManagedStaticMutex = new sys::Mutex();
+ ManagedStaticMutex = new std::recursive_mutex();
}
-static sys::Mutex* getManagedStaticMutex() {
+static std::recursive_mutex *getManagedStaticMutex() {
llvm::call_once(mutex_init_flag, initializeMutex);
return ManagedStaticMutex;
}
@@ -35,7 +34,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
void (*Deleter)(void*)) const {
assert(Creator);
if (llvm_is_multithreaded()) {
- MutexGuard Lock(*getManagedStaticMutex());
+ std::lock_guard<std::recursive_mutex> Lock(*getManagedStaticMutex());
if (!Ptr.load(std::memory_order_relaxed)) {
void *Tmp = Creator();
@@ -77,7 +76,7 @@ void ManagedStaticBase::destroy() const {
/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
void llvm::llvm_shutdown() {
- MutexGuard Lock(*getManagedStaticMutex());
+ std::lock_guard<std::recursive_mutex> Lock(*getManagedStaticMutex());
while (StaticList)
StaticList->destroy();
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index d0e5bb154c1a..e4027ca7bbfd 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -211,15 +211,17 @@ static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) {
const ssize_t ChunkSize = 4096*4;
SmallString<ChunkSize> Buffer;
- size_t ReadBytes;
// Read into Buffer until we hit EOF.
- do {
+ for (;;) {
Buffer.reserve(Buffer.size() + ChunkSize);
- if (auto EC = sys::fs::readNativeFile(
- FD, makeMutableArrayRef(Buffer.end(), ChunkSize), &ReadBytes))
- return EC;
- Buffer.set_size(Buffer.size() + ReadBytes);
- } while (ReadBytes != 0);
+ Expected<size_t> ReadBytes = sys::fs::readNativeFile(
+ FD, makeMutableArrayRef(Buffer.end(), ChunkSize));
+ if (!ReadBytes)
+ return errorToErrorCode(ReadBytes.takeError());
+ if (*ReadBytes == 0)
+ break;
+ Buffer.set_size(Buffer.size() + *ReadBytes);
+ }
return getMemBufferCopyImpl(Buffer, BufferName);
}
@@ -458,7 +460,20 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
return make_error_code(errc::not_enough_memory);
}
- sys::fs::readNativeFileSlice(FD, Buf->getBuffer(), Offset);
+ // Read until EOF, zero-initialize the rest.
+ MutableArrayRef<char> ToRead = Buf->getBuffer();
+ while (!ToRead.empty()) {
+ Expected<size_t> ReadBytes =
+ sys::fs::readNativeFileSlice(FD, ToRead, Offset);
+ if (!ReadBytes)
+ return errorToErrorCode(ReadBytes.takeError());
+ if (*ReadBytes == 0) {
+ std::memset(ToRead.data(), 0, ToRead.size());
+ break;
+ }
+ ToRead = ToRead.drop_front(*ReadBytes);
+ Offset += *ReadBytes;
+ }
return std::move(Buf);
}
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
deleted file mode 100644
index 69b7b8126ab1..000000000000
--- a/lib/Support/Mutex.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the llvm::sys::Mutex class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Mutex.h"
-#include "llvm/Config/config.h"
-#include "llvm/Support/ErrorHandling.h"
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//=== independent code.
-//===----------------------------------------------------------------------===//
-
-#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
-// Define all methods as no-ops if threading is explicitly disabled
-namespace llvm {
-using namespace sys;
-MutexImpl::MutexImpl( bool recursive) { }
-MutexImpl::~MutexImpl() { }
-bool MutexImpl::acquire() { return true; }
-bool MutexImpl::release() { return true; }
-bool MutexImpl::tryacquire() { return true; }
-}
-#else
-
-#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK)
-
-#include <cassert>
-#include <pthread.h>
-#include <stdlib.h>
-
-namespace llvm {
-using namespace sys;
-
-// Construct a Mutex using pthread calls
-MutexImpl::MutexImpl( bool recursive)
- : data_(nullptr)
-{
- // Declare the pthread_mutex data structures
- pthread_mutex_t* mutex =
- static_cast<pthread_mutex_t*>(safe_malloc(sizeof(pthread_mutex_t)));
-
- pthread_mutexattr_t attr;
-
- // Initialize the mutex attributes
- int errorcode = pthread_mutexattr_init(&attr);
- assert(errorcode == 0); (void)errorcode;
-
- // Initialize the mutex as a recursive mutex, if requested, or normal
- // otherwise.
- int kind = ( recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
- errorcode = pthread_mutexattr_settype(&attr, kind);
- assert(errorcode == 0);
-
- // Initialize the mutex
- errorcode = pthread_mutex_init(mutex, &attr);
- assert(errorcode == 0);
-
- // Destroy the attributes
- errorcode = pthread_mutexattr_destroy(&attr);
- assert(errorcode == 0);
-
- // Assign the data member
- data_ = mutex;
-}
-
-// Destruct a Mutex
-MutexImpl::~MutexImpl()
-{
- pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != nullptr);
- pthread_mutex_destroy(mutex);
- free(mutex);
-}
-
-bool
-MutexImpl::acquire()
-{
- pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != nullptr);
-
- int errorcode = pthread_mutex_lock(mutex);
- return errorcode == 0;
-}
-
-bool
-MutexImpl::release()
-{
- pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != nullptr);
-
- int errorcode = pthread_mutex_unlock(mutex);
- return errorcode == 0;
-}
-
-bool
-MutexImpl::tryacquire()
-{
- pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != nullptr);
-
- int errorcode = pthread_mutex_trylock(mutex);
- return errorcode == 0;
-}
-
-}
-
-#elif defined(LLVM_ON_UNIX)
-#include "Unix/Mutex.inc"
-#elif defined( _WIN32)
-#include "Windows/Mutex.inc"
-#else
-#warning Neither LLVM_ON_UNIX nor _WIN32 was set in Support/Mutex.cpp
-#endif
-#endif
diff --git a/lib/Support/Parallel.cpp b/lib/Support/Parallel.cpp
index 621bccbf2a4c..355c64b7d079 100644
--- a/lib/Support/Parallel.cpp
+++ b/lib/Support/Parallel.cpp
@@ -32,34 +32,6 @@ public:
static Executor *getDefaultExecutor();
};
-#if defined(_MSC_VER)
-/// An Executor that runs tasks via ConcRT.
-class ConcRTExecutor : public Executor {
- struct Taskish {
- Taskish(std::function<void()> Task) : Task(Task) {}
-
- std::function<void()> Task;
-
- static void run(void *P) {
- Taskish *Self = static_cast<Taskish *>(P);
- Self->Task();
- concurrency::Free(Self);
- }
- };
-
-public:
- virtual void add(std::function<void()> F) {
- Concurrency::CurrentScheduler::ScheduleTask(
- Taskish::run, new (concurrency::Alloc(sizeof(Taskish))) Taskish(F));
- }
-};
-
-Executor *Executor::getDefaultExecutor() {
- static ConcRTExecutor exec;
- return &exec;
-}
-
-#else
/// An implementation of an Executor that runs closures on a thread pool
/// in filo order.
class ThreadPoolExecutor : public Executor {
@@ -117,8 +89,7 @@ Executor *Executor::getDefaultExecutor() {
static ThreadPoolExecutor exec;
return &exec;
}
-#endif
-}
+} // namespace
static std::atomic<int> TaskGroupInstances;
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index c49260125dba..14def83802da 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -855,11 +855,11 @@ void make_absolute(const Twine &current_directory,
StringRef p(path.data(), path.size());
bool rootDirectory = path::has_root_directory(p);
- bool rootName =
- (real_style(Style::native) != Style::windows) || path::has_root_name(p);
+ bool rootName = path::has_root_name(p);
// Already absolute.
- if (rootName && rootDirectory)
+ if ((rootName || real_style(Style::native) != Style::windows) &&
+ rootDirectory)
return;
// All of the following conditions will need the current directory.
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index aec00baec0e3..bfb238cc8539 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -121,31 +121,63 @@ extern "C" const char *__crashreporter_info__
asm(".desc ___crashreporter_info__, 0x10");
#endif
-/// CrashHandler - This callback is run if a fatal signal is delivered to the
-/// process, it prints the pretty stack trace.
+static void setCrashLogMessage(const char *msg) LLVM_ATTRIBUTE_UNUSED;
+static void setCrashLogMessage(const char *msg) {
+#ifdef HAVE_CRASHREPORTERCLIENT_H
+ (void)CRSetCrashLogMessage(msg);
+#elif HAVE_CRASHREPORTER_INFO
+ __crashreporter_info__ = msg;
+#endif
+ // Don't reorder subsequent operations: whatever comes after might crash and
+ // we want the system crash handling to see the message we just set.
+ std::atomic_signal_fence(std::memory_order_seq_cst);
+}
+
+#ifdef __APPLE__
+using CrashHandlerString = SmallString<2048>;
+using CrashHandlerStringStorage =
+ std::aligned_storage<sizeof(CrashHandlerString),
+ alignof(CrashHandlerString)>::type;
+static CrashHandlerStringStorage crashHandlerStringStorage;
+#endif
+
+/// This callback is run if a fatal signal is delivered to the process, it
+/// prints the pretty stack trace.
static void CrashHandler(void *) {
#ifndef __APPLE__
// On non-apple systems, just emit the crash stack trace to stderr.
PrintCurStackTrace(errs());
#else
- // Otherwise, emit to a smallvector of chars, send *that* to stderr, but also
- // put it into __crashreporter_info__.
- SmallString<2048> TmpStr;
+ // Emit the crash stack trace to a SmallString, put it where the system crash
+ // handling will find it, and also send it to stderr.
+ //
+ // The SmallString is fairly large in the hope that we don't allocate (we're
+ // handling a fatal signal, something is already pretty wrong, allocation
+ // might not work). Further, we don't use a magic static in case that's also
+ // borked. We leak any allocation that does occur because the program is about
+ // to die anyways. This is technically racy if we were handling two fatal
+ // signals, however if we're in that situation a race is the least of our
+ // worries.
+ auto &crashHandlerString =
+ *new (&crashHandlerStringStorage) CrashHandlerString;
+
+ // If we crash while trying to print the stack trace, we still want the system
+ // crash handling to have some partial information. That'll work out as long
+ // as the SmallString doesn't allocate. If it does allocate then the system
+ // crash handling will see some garbage because the inline buffer now contains
+ // a pointer.
+ setCrashLogMessage(crashHandlerString.c_str());
+
{
- raw_svector_ostream Stream(TmpStr);
+ raw_svector_ostream Stream(crashHandlerString);
PrintCurStackTrace(Stream);
}
- if (!TmpStr.empty()) {
-#ifdef HAVE_CRASHREPORTERCLIENT_H
- // Cast to void to avoid warning.
- (void)CRSetCrashLogMessage(TmpStr.c_str());
-#elif HAVE_CRASHREPORTER_INFO
- __crashreporter_info__ = strdup(TmpStr.c_str());
-#endif
- errs() << TmpStr.str();
- }
-
+ if (!crashHandlerString.empty()) {
+ setCrashLogMessage(crashHandlerString.c_str());
+ errs() << crashHandlerString.str();
+ } else
+ setCrashLogMessage("No crash information.");
#endif
}
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index 7ce856b716c6..5accf73e5f94 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -14,24 +14,20 @@
#include "llvm/Support/RWMutex.h"
#include "llvm/Config/config.h"
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//=== independent code.
-//===----------------------------------------------------------------------===//
+#if defined(LLVM_USE_RW_MUTEX_IMPL)
+using namespace llvm;
+using namespace sys;
#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
// Define all methods as no-ops if threading is explicitly disabled
-using namespace llvm;
-using namespace sys;
-
RWMutexImpl::RWMutexImpl() = default;
RWMutexImpl::~RWMutexImpl() = default;
-bool RWMutexImpl::reader_acquire() { return true; }
-bool RWMutexImpl::reader_release() { return true; }
-bool RWMutexImpl::writer_acquire() { return true; }
-bool RWMutexImpl::writer_release() { return true; }
+bool RWMutexImpl::lock_shared() { return true; }
+bool RWMutexImpl::unlock_shared() { return true; }
+bool RWMutexImpl::lock() { return true; }
+bool RWMutexImpl::unlock() { return true; }
#else
@@ -41,9 +37,6 @@ bool RWMutexImpl::writer_release() { return true; }
#include <cstdlib>
#include <pthread.h>
-using namespace llvm;
-using namespace sys;
-
// Construct a RWMutex using pthread calls
RWMutexImpl::RWMutexImpl()
{
@@ -75,7 +68,7 @@ RWMutexImpl::~RWMutexImpl()
}
bool
-RWMutexImpl::reader_acquire()
+RWMutexImpl::lock_shared()
{
pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
assert(rwlock != nullptr);
@@ -85,7 +78,7 @@ RWMutexImpl::reader_acquire()
}
bool
-RWMutexImpl::reader_release()
+RWMutexImpl::unlock_shared()
{
pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
assert(rwlock != nullptr);
@@ -95,7 +88,7 @@ RWMutexImpl::reader_release()
}
bool
-RWMutexImpl::writer_acquire()
+RWMutexImpl::lock()
{
pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
assert(rwlock != nullptr);
@@ -105,7 +98,7 @@ RWMutexImpl::writer_acquire()
}
bool
-RWMutexImpl::writer_release()
+RWMutexImpl::unlock()
{
pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
assert(rwlock != nullptr);
@@ -114,11 +107,30 @@ RWMutexImpl::writer_release()
return errorcode == 0;
}
-#elif defined(LLVM_ON_UNIX)
-#include "Unix/RWMutex.inc"
-#elif defined( _WIN32)
-#include "Windows/RWMutex.inc"
#else
-#warning Neither LLVM_ON_UNIX nor _WIN32 was set in Support/Mutex.cpp
+
+RWMutexImpl::RWMutexImpl() : data_(new MutexImpl(false)) { }
+
+RWMutexImpl::~RWMutexImpl() {
+ delete static_cast<MutexImpl *>(data_);
+}
+
+bool RWMutexImpl::lock_shared() {
+ return static_cast<MutexImpl *>(data_)->acquire();
+}
+
+bool RWMutexImpl::unlock_shared() {
+ return static_cast<MutexImpl *>(data_)->release();
+}
+
+bool RWMutexImpl::lock() {
+ return static_cast<MutexImpl *>(data_)->acquire();
+}
+
+bool RWMutexImpl::unlock() {
+ return static_cast<MutexImpl *>(data_)->release();
+}
+
+#endif
#endif
#endif
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index 4c1b07038024..8da345d4f140 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -52,14 +52,24 @@ Regex::~Regex() {
}
}
-bool Regex::isValid(std::string &Error) const {
- if (!error)
- return true;
+namespace {
+/// Utility to convert a regex error code into a human-readable string.
+void RegexErrorToString(int error, struct llvm_regex *preg,
+ std::string &Error) {
size_t len = llvm_regerror(error, preg, nullptr, 0);
Error.resize(len - 1);
llvm_regerror(error, preg, &Error[0], len);
+}
+
+} // namespace
+
+bool Regex::isValid(std::string &Error) const {
+ if (!error)
+ return true;
+
+ RegexErrorToString(error, preg, Error);
return false;
}
@@ -69,8 +79,14 @@ unsigned Regex::getNumMatches() const {
return preg->re_nsub;
}
-bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
- if (error)
+bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
+ std::string *Error) const {
+ // Reset error, if given.
+ if (Error && !Error->empty())
+ *Error = "";
+
+ // Check if the regex itself didn't successfully compile.
+ if (Error ? !isValid(*Error) : !isValid())
return false;
unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
@@ -83,11 +99,13 @@ bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
+ // Failure to match is not an error, it's just a normal return value.
+ // Any other error code is considered abnormal, and is logged in the Error.
if (rc == REG_NOMATCH)
return false;
if (rc != 0) {
- // regexec can fail due to invalid pattern or running out of memory.
- error = rc;
+ if (Error)
+ RegexErrorToString(error, preg, *Error);
return false;
}
@@ -112,14 +130,11 @@ bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
}
std::string Regex::sub(StringRef Repl, StringRef String,
- std::string *Error) {
+ std::string *Error) const {
SmallVector<StringRef, 8> Matches;
- // Reset error, if given.
- if (Error && !Error->empty()) *Error = "";
-
// Return the input if there was no match.
- if (!match(String, &Matches))
+ if (!match(String, &Matches, Error))
return String;
// Otherwise splice in the replacement string, starting with the prefix before
diff --git a/lib/Support/Signposts.cpp b/lib/Support/Signposts.cpp
index d456f41d2fa6..aa159e1da2ae 100644
--- a/lib/Support/Signposts.cpp
+++ b/lib/Support/Signposts.cpp
@@ -78,6 +78,8 @@ public:
#if LLVM_SUPPORT_XCODE_SIGNPOSTS
#define HAVE_ANY_SIGNPOST_IMPL 1
+#else
+#define HAVE_ANY_SIGNPOST_IMPL 0
#endif
SignpostEmitter::SignpostEmitter() {
diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp
index 96e09f9552bb..9bd1f18a4ee7 100644
--- a/lib/Support/SpecialCaseList.cpp
+++ b/lib/Support/SpecialCaseList.cpp
@@ -53,7 +53,7 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp,
return false;
RegExes.emplace_back(
- std::make_pair(make_unique<Regex>(std::move(CheckRE)), LineNumber));
+ std::make_pair(std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
return true;
}
@@ -175,7 +175,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
// Create this section if it has not been seen before.
if (SectionsMap.find(Section) == SectionsMap.end()) {
- std::unique_ptr<Matcher> M = make_unique<Matcher>();
+ std::unique_ptr<Matcher> M = std::make_unique<Matcher>();
std::string REError;
if (!M->insert(Section, LineNo, REError)) {
Error = (Twine("malformed section ") + Section + ": '" + REError).str();
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index e4f0535d21aa..8b4177c7fba6 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -57,7 +57,7 @@ namespace {
/// This class is also used to look up statistic values from applications that
/// use LLVM.
class StatisticInfo {
- std::vector<Statistic*> Stats;
+ std::vector<TrackingStatistic *> Stats;
friend void llvm::PrintStatistics();
friend void llvm::PrintStatistics(raw_ostream &OS);
@@ -66,14 +66,12 @@ class StatisticInfo {
/// Sort statistics by debugtype,name,description.
void sort();
public:
- using const_iterator = std::vector<Statistic *>::const_iterator;
+ using const_iterator = std::vector<TrackingStatistic *>::const_iterator;
StatisticInfo();
~StatisticInfo();
- void addStatistic(Statistic *S) {
- Stats.push_back(S);
- }
+ void addStatistic(TrackingStatistic *S) { Stats.push_back(S); }
const_iterator begin() const { return Stats.begin(); }
const_iterator end() const { return Stats.end(); }
@@ -90,7 +88,7 @@ static ManagedStatic<sys::SmartMutex<true> > StatLock;
/// RegisterStatistic - The first time a statistic is bumped, this method is
/// called.
-void Statistic::RegisterStatistic() {
+void TrackingStatistic::RegisterStatistic() {
// If stats are enabled, inform StatInfo that this statistic should be
// printed.
// llvm_shutdown calls destructors while holding the ManagedStatic mutex.
@@ -135,15 +133,16 @@ bool llvm::AreStatisticsEnabled() {
}
void StatisticInfo::sort() {
- llvm::stable_sort(Stats, [](const Statistic *LHS, const Statistic *RHS) {
- if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType()))
- return Cmp < 0;
+ llvm::stable_sort(
+ Stats, [](const TrackingStatistic *LHS, const TrackingStatistic *RHS) {
+ if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType()))
+ return Cmp < 0;
- if (int Cmp = std::strcmp(LHS->getName(), RHS->getName()))
- return Cmp < 0;
+ if (int Cmp = std::strcmp(LHS->getName(), RHS->getName()))
+ return Cmp < 0;
- return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0;
- });
+ return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0;
+ });
}
void StatisticInfo::reset() {
@@ -207,7 +206,7 @@ void llvm::PrintStatisticsJSON(raw_ostream &OS) {
// Print all of the statistics.
OS << "{\n";
const char *delim = "";
- for (const Statistic *Stat : Stats.Stats) {
+ for (const TrackingStatistic *Stat : Stats.Stats) {
OS << delim;
assert(yaml::needsQuotes(Stat->getDebugType()) == yaml::QuotingType::None &&
"Statistic group/type name is simple.");
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index bf28b2be5657..af8dd463e125 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -60,7 +60,9 @@ void llvm::SplitString(StringRef Source,
void llvm::printEscapedString(StringRef Name, raw_ostream &Out) {
for (unsigned i = 0, e = Name.size(); i != e; ++i) {
unsigned char C = Name[i];
- if (isPrint(C) && C != '\\' && C != '"')
+ if (C == '\\')
+ Out << '\\' << C;
+ else if (isPrint(C) && C != '"')
Out << C;
else
Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
diff --git a/lib/Support/TimeProfiler.cpp b/lib/Support/TimeProfiler.cpp
index bc2340815645..ca9119e30b65 100644
--- a/lib/Support/TimeProfiler.cpp
+++ b/lib/Support/TimeProfiler.cpp
@@ -24,29 +24,38 @@ using namespace std::chrono;
namespace llvm {
-static cl::opt<unsigned> TimeTraceGranularity(
- "time-trace-granularity",
- cl::desc(
- "Minimum time granularity (in microseconds) traced by time profiler"),
- cl::init(500));
-
TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
typedef duration<steady_clock::rep, steady_clock::period> DurationType;
+typedef time_point<steady_clock> TimePointType;
typedef std::pair<size_t, DurationType> CountAndDurationType;
typedef std::pair<std::string, CountAndDurationType>
NameAndCountAndDurationType;
struct Entry {
- time_point<steady_clock> Start;
- DurationType Duration;
+ TimePointType Start;
+ TimePointType End;
std::string Name;
std::string Detail;
- Entry(time_point<steady_clock> &&S, DurationType &&D, std::string &&N,
- std::string &&Dt)
- : Start(std::move(S)), Duration(std::move(D)), Name(std::move(N)),
+ Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
+ : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
Detail(std::move(Dt)){};
+
+ // Calculate timings for FlameGraph. Cast time points to microsecond precision
+ // rather than casting duration. This avoid truncation issues causing inner
+ // scopes overruning outer scopes.
+ steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
+ return (time_point_cast<microseconds>(Start) -
+ time_point_cast<microseconds>(StartTime))
+ .count();
+ }
+
+ steady_clock::rep getFlameGraphDurUs() const {
+ return (time_point_cast<microseconds>(End) -
+ time_point_cast<microseconds>(Start))
+ .count();
+ }
};
struct TimeTraceProfiler {
@@ -55,17 +64,27 @@ struct TimeTraceProfiler {
}
void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
- Stack.emplace_back(steady_clock::now(), DurationType{}, std::move(Name),
+ Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
Detail());
}
void end() {
assert(!Stack.empty() && "Must call begin() first");
auto &E = Stack.back();
- E.Duration = steady_clock::now() - E.Start;
+ E.End = steady_clock::now();
+
+ // Check that end times monotonically increase.
+ assert((Entries.empty() ||
+ (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
+ Entries.back().getFlameGraphStartUs(StartTime) +
+ Entries.back().getFlameGraphDurUs())) &&
+ "TimeProfiler scope ended earlier than previous scope");
- // Only include sections longer than TimeTraceGranularity msec.
- if (duration_cast<microseconds>(E.Duration).count() > TimeTraceGranularity)
+ // Calculate duration at full precision for overall counts.
+ DurationType Duration = E.End - E.Start;
+
+ // Only include sections longer or equal to TimeTraceGranularity msec.
+ if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
Entries.emplace_back(E);
// Track total time taken by each "name", but only the topmost levels of
@@ -78,7 +97,7 @@ struct TimeTraceProfiler {
}) == Stack.rend()) {
auto &CountAndTotal = CountAndTotalPerName[E.Name];
CountAndTotal.first++;
- CountAndTotal.second += E.Duration;
+ CountAndTotal.second += Duration;
}
Stack.pop_back();
@@ -94,8 +113,8 @@ struct TimeTraceProfiler {
// Emit all events for the main flame graph.
for (const auto &E : Entries) {
- auto StartUs = duration_cast<microseconds>(E.Start - StartTime).count();
- auto DurUs = duration_cast<microseconds>(E.Duration).count();
+ auto StartUs = E.getFlameGraphStartUs(StartTime);
+ auto DurUs = E.getFlameGraphDurUs();
J.object([&]{
J.attribute("pid", 1);
@@ -160,13 +179,17 @@ struct TimeTraceProfiler {
SmallVector<Entry, 16> Stack;
SmallVector<Entry, 128> Entries;
StringMap<CountAndDurationType> CountAndTotalPerName;
- time_point<steady_clock> StartTime;
+ TimePointType StartTime;
+
+ // Minimum time granularity (in microseconds)
+ unsigned TimeTraceGranularity;
};
-void timeTraceProfilerInitialize() {
+void timeTraceProfilerInitialize(unsigned TimeTraceGranularity) {
assert(TimeTraceProfilerInstance == nullptr &&
"Profiler should not be initialized");
TimeTraceProfilerInstance = new TimeTraceProfiler();
+ TimeTraceProfilerInstance->TimeTraceGranularity = TimeTraceGranularity;
}
void timeTraceProfilerCleanup() {
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 2a7ff1eaaf63..10c9b8e0b329 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -58,23 +58,23 @@ namespace {
std::unique_ptr<raw_fd_ostream> llvm::CreateInfoOutputFile() {
const std::string &OutputFilename = getLibSupportInfoOutputFilename();
if (OutputFilename.empty())
- return llvm::make_unique<raw_fd_ostream>(2, false); // stderr.
+ return std::make_unique<raw_fd_ostream>(2, false); // stderr.
if (OutputFilename == "-")
- return llvm::make_unique<raw_fd_ostream>(1, false); // stdout.
+ return std::make_unique<raw_fd_ostream>(1, false); // stdout.
// Append mode is used because the info output file is opened and closed
// each time -stats or -time-passes wants to print output to it. To
// compensate for this, the test-suite Makefiles have code to delete the
// info output file before running commands which write to it.
std::error_code EC;
- auto Result = llvm::make_unique<raw_fd_ostream>(
- OutputFilename, EC, sys::fs::F_Append | sys::fs::F_Text);
+ auto Result = std::make_unique<raw_fd_ostream>(
+ OutputFilename, EC, sys::fs::OF_Append | sys::fs::OF_Text);
if (!EC)
return Result;
errs() << "Error opening info-output-file '"
<< OutputFilename << " for appending!\n";
- return llvm::make_unique<raw_fd_ostream>(2, false); // stderr.
+ return std::make_unique<raw_fd_ostream>(2, false); // stderr.
}
namespace {
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index a0927da50e48..05f8e32896fa 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -176,7 +176,7 @@ Memory::releaseMappedMemory(MemoryBlock &M) {
std::error_code
Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
- static const size_t PageSize = Process::getPageSizeEstimate();
+ static const Align PageSize = Align(Process::getPageSizeEstimate());
if (M.Address == nullptr || M.AllocatedSize == 0)
return std::error_code();
@@ -184,8 +184,8 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
return std::error_code(EINVAL, std::generic_category());
int Protect = getPosixProtectionFlags(Flags);
- uintptr_t Start = alignAddr((uint8_t *)M.Address - PageSize + 1, PageSize);
- uintptr_t End = alignAddr((uint8_t *)M.Address + M.AllocatedSize, PageSize);
+ uintptr_t Start = alignAddr((const uint8_t *)M.Address - PageSize.value() + 1, PageSize);
+ uintptr_t End = alignAddr((const uint8_t *)M.Address + M.AllocatedSize, PageSize);
bool InvalidateCache = (Flags & MF_EXEC);
diff --git a/lib/Support/Unix/Mutex.inc b/lib/Support/Unix/Mutex.inc
deleted file mode 100644
index 2c982b38d6ff..000000000000
--- a/lib/Support/Unix/Mutex.inc
+++ /dev/null
@@ -1,42 +0,0 @@
-//===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Unix specific (non-pthread) Mutex class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//=== is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-namespace llvm
-{
-using namespace sys;
-
-MutexImpl::MutexImpl( bool recursive)
-{
-}
-
-MutexImpl::~MutexImpl()
-{
-}
-
-bool
-MutexImpl::release()
-{
- return true;
-}
-
-bool
-MutexImpl::tryacquire( void )
-{
- return true;
-}
-
-}
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index e80880c6b3cb..a617eca3566a 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -186,12 +186,12 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__minix) || defined(__DragonFly__) || \
defined(__FreeBSD_kernel__) || defined(_AIX)
- StringRef curproc("/proc/curproc/file");
+ const char *curproc = "/proc/curproc/file";
char exe_path[PATH_MAX];
// /proc is not mounted by default under FreeBSD, but gives more accurate
// information than argv[0] when it is.
if (sys::fs::exists(curproc)) {
- ssize_t len = readlink(curproc.str().c_str(), exe_path, sizeof(exe_path));
+ ssize_t len = readlink(curproc, exe_path, sizeof(exe_path));
if (len > 0) {
// Null terminate the string for realpath. readlink never null
// terminates its output.
@@ -205,10 +205,10 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
return exe_path;
#elif defined(__linux__) || defined(__CYGWIN__)
char exe_path[MAXPATHLEN];
- StringRef aPath("/proc/self/exe");
+ const char *aPath = "/proc/self/exe";
if (sys::fs::exists(aPath)) {
// /proc is not always mounted under Linux (chroot for example).
- ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path));
+ ssize_t len = readlink(aPath, exe_path, sizeof(exe_path));
if (len < 0)
return "";
@@ -443,7 +443,7 @@ static bool is_local_impl(struct STATVFS &Vfs) {
std::unique_ptr<char[]> Buf;
int Tries = 3;
while (Tries--) {
- Buf = llvm::make_unique<char[]>(BufSize);
+ Buf = std::make_unique<char[]>(BufSize);
Ret = mntctl(MCTL_QUERY, BufSize, Buf.get());
if (Ret != 0)
break;
@@ -833,7 +833,10 @@ std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
static file_type direntType(dirent* Entry) {
// Most platforms provide the file type in the dirent: Linux/BSD/Mac.
// The DTTOIF macro lets us reuse our status -> type conversion.
-#if defined(_DIRENT_HAVE_D_TYPE) && defined(DTTOIF)
+ // Note that while glibc provides a macro to see if this is supported,
+ // _DIRENT_HAVE_D_TYPE, it's not defined on BSD/Mac, so we test for the
+ // d_type-to-mode_t conversion macro instead.
+#if defined(DTTOIF)
return typeForMode(DTTOIF(Entry->d_type));
#else
// Other platforms such as Solaris require a stat() to get the type.
@@ -884,9 +887,9 @@ static int nativeOpenFlags(CreationDisposition Disp, OpenFlags Flags,
else if (Access == (FA_Read | FA_Write))
Result |= O_RDWR;
- // This is for compatibility with old code that assumed F_Append implied
+ // This is for compatibility with old code that assumed OF_Append implied
// would open an existing file. See Windows/Path.inc for a longer comment.
- if (Flags & F_Append)
+ if (Flags & OF_Append)
Disp = CD_OpenAlways;
if (Disp == CD_CreateNew) {
@@ -901,7 +904,7 @@ static int nativeOpenFlags(CreationDisposition Disp, OpenFlags Flags,
// Nothing special, just don't add O_CREAT and we get these semantics.
}
- if (Flags & F_Append)
+ if (Flags & OF_Append)
Result |= O_APPEND;
#ifdef O_CLOEXEC
@@ -996,44 +999,28 @@ file_t getStdinHandle() { return 0; }
file_t getStdoutHandle() { return 1; }
file_t getStderrHandle() { return 2; }
-std::error_code readNativeFile(file_t FD, MutableArrayRef<char> Buf,
- size_t *BytesRead) {
- *BytesRead = sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size());
- if (ssize_t(*BytesRead) == -1)
- return std::error_code(errno, std::generic_category());
- return std::error_code();
+Expected<size_t> readNativeFile(file_t FD, MutableArrayRef<char> Buf) {
+ ssize_t NumRead =
+ sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size());
+ if (ssize_t(NumRead) == -1)
+ return errorCodeToError(std::error_code(errno, std::generic_category()));
+ return NumRead;
}
-std::error_code readNativeFileSlice(file_t FD, MutableArrayRef<char> Buf,
- size_t Offset) {
- char *BufPtr = Buf.data();
- size_t BytesLeft = Buf.size();
-
-#ifndef HAVE_PREAD
- // If we don't have pread, seek to Offset.
- if (lseek(FD, Offset, SEEK_SET) == -1)
- return std::error_code(errno, std::generic_category());
-#endif
-
- while (BytesLeft) {
+Expected<size_t> readNativeFileSlice(file_t FD, MutableArrayRef<char> Buf,
+ uint64_t Offset) {
#ifdef HAVE_PREAD
- ssize_t NumRead = sys::RetryAfterSignal(-1, ::pread, FD, BufPtr, BytesLeft,
- Buf.size() - BytesLeft + Offset);
+ ssize_t NumRead =
+ sys::RetryAfterSignal(-1, ::pread, FD, Buf.data(), Buf.size(), Offset);
#else
- ssize_t NumRead = sys::RetryAfterSignal(-1, ::read, FD, BufPtr, BytesLeft);
+ if (lseek(FD, Offset, SEEK_SET) == -1)
+ return errorCodeToError(std::error_code(errno, std::generic_category()));
+ ssize_t NumRead =
+ sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size());
#endif
- if (NumRead == -1) {
- // Error while reading.
- return std::error_code(errno, std::generic_category());
- }
- if (NumRead == 0) {
- memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer.
- break;
- }
- BytesLeft -= NumRead;
- BufPtr += NumRead;
- }
- return std::error_code();
+ if (NumRead == -1)
+ return errorCodeToError(std::error_code(errno, std::generic_category()));
+ return NumRead;
}
std::error_code closeFile(file_t &F) {
@@ -1200,7 +1187,7 @@ namespace fs {
/// implementation.
std::error_code copy_file(const Twine &From, const Twine &To) {
uint32_t Flag = COPYFILE_DATA;
-#if __has_builtin(__builtin_available)
+#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE)
if (__builtin_available(macos 10.12, *)) {
bool IsSymlink;
if (std::error_code Error = is_symlink_file(From, IsSymlink))
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 4115ee396582..dfe81d7e2833 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -15,8 +15,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/MutexGuard.h"
+#include <mutex>
#if HAVE_FCNTL_H
#include <fcntl.h>
#endif
@@ -327,13 +326,13 @@ extern "C" int tigetnum(char *capname);
#endif
#ifdef HAVE_TERMINFO
-static ManagedStatic<sys::Mutex> TermColorMutex;
+static ManagedStatic<std::mutex> TermColorMutex;
#endif
static bool terminalHasColors(int fd) {
#ifdef HAVE_TERMINFO
// First, acquire a global lock because these C routines are thread hostile.
- MutexGuard G(*TermColorMutex);
+ std::lock_guard<std::mutex> G(*TermColorMutex);
int errret = 0;
if (setupterm(nullptr, fd, &errret) != 0)
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index c4123a64046f..520685a0e987 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -136,7 +136,7 @@ static bool RedirectIO_PS(const std::string *Path, int FD, std::string *ErrMsg,
if (int Err = posix_spawn_file_actions_addopen(
FileActions, FD, File,
FD == 0 ? O_RDONLY : O_WRONLY | O_CREAT, 0666))
- return MakeErrMsg(ErrMsg, "Cannot dup2", Err);
+ return MakeErrMsg(ErrMsg, "Cannot posix_spawn_file_actions_addopen", Err);
return false;
}
#endif
@@ -444,7 +444,7 @@ std::error_code
llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
WindowsEncodingMethod Encoding /*unused*/) {
std::error_code EC;
- llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::F_Text);
+ llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::OF_Text);
if (EC)
return EC;
diff --git a/lib/Support/Unix/RWMutex.inc b/lib/Support/Unix/RWMutex.inc
deleted file mode 100644
index 8b47dfa0f85c..000000000000
--- a/lib/Support/Unix/RWMutex.inc
+++ /dev/null
@@ -1,50 +0,0 @@
-//= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock =//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Unix specific (non-pthread) RWMutex class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//=== is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Mutex.h"
-
-namespace llvm {
-
-using namespace sys;
-
-// This naive implementation treats readers the same as writers. This
-// will therefore deadlock if a thread tries to acquire a read lock
-// multiple times.
-
-RWMutexImpl::RWMutexImpl() : data_(new MutexImpl(false)) { }
-
-RWMutexImpl::~RWMutexImpl() {
- delete static_cast<MutexImpl *>(data_);
-}
-
-bool RWMutexImpl::reader_acquire() {
- return static_cast<MutexImpl *>(data_)->acquire();
-}
-
-bool RWMutexImpl::reader_release() {
- return static_cast<MutexImpl *>(data_)->release();
-}
-
-bool RWMutexImpl::writer_acquire() {
- return static_cast<MutexImpl *>(data_)->acquire();
-}
-
-bool RWMutexImpl::writer_release() {
- return static_cast<MutexImpl *>(data_)->release();
-}
-
-}
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 634c16aa36c7..5e0cde4a81ed 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -43,7 +43,6 @@
#include "llvm/Support/Mutex.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/SaveAndRestore.h"
-#include "llvm/Support/UniqueLock.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <string>
@@ -83,12 +82,18 @@ using namespace llvm;
static RETSIGTYPE SignalHandler(int Sig); // defined below.
static RETSIGTYPE InfoSignalHandler(int Sig); // defined below.
+static void DefaultPipeSignalFunction() {
+ exit(EX_IOERR);
+}
+
using SignalHandlerFunctionType = void (*)();
/// The function to call if ctrl-c is pressed.
static std::atomic<SignalHandlerFunctionType> InterruptFunction =
ATOMIC_VAR_INIT(nullptr);
static std::atomic<SignalHandlerFunctionType> InfoSignalFunction =
ATOMIC_VAR_INIT(nullptr);
+static std::atomic<SignalHandlerFunctionType> PipeSignalFunction =
+ ATOMIC_VAR_INIT(DefaultPipeSignalFunction);
namespace {
/// Signal-safe removal of files.
@@ -364,7 +369,8 @@ static RETSIGTYPE SignalHandler(int Sig) {
// Send a special return code that drivers can check for, from sysexits.h.
if (Sig == SIGPIPE)
- exit(EX_IOERR);
+ if (SignalHandlerFunctionType CurrentPipeFunction = PipeSignalFunction)
+ CurrentPipeFunction();
raise(Sig); // Execute the default handler.
return;
@@ -404,6 +410,11 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
RegisterHandlers();
}
+void llvm::sys::SetPipeSignalFunction(void (*Handler)()) {
+ PipeSignalFunction.exchange(Handler);
+ RegisterHandlers();
+}
+
// The public API
bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
std::string* ErrMsg) {
diff --git a/lib/Support/VirtualFileSystem.cpp b/lib/Support/VirtualFileSystem.cpp
index 5d3480e97148..c390cb1b2227 100644
--- a/lib/Support/VirtualFileSystem.cpp
+++ b/lib/Support/VirtualFileSystem.cpp
@@ -176,9 +176,9 @@ class RealFile : public File {
Status S;
std::string RealName;
- RealFile(file_t FD, StringRef NewName, StringRef NewRealPathName)
- : FD(FD), S(NewName, {}, {}, {}, {}, {},
- llvm::sys::fs::file_type::status_error, {}),
+ RealFile(file_t RawFD, StringRef NewName, StringRef NewRealPathName)
+ : FD(RawFD), S(NewName, {}, {}, {}, {}, {},
+ llvm::sys::fs::file_type::status_error, {}),
RealName(NewRealPathName.str()) {
assert(FD != kInvalidFile && "Invalid or inactive file descriptor");
}
@@ -349,7 +349,7 @@ IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
}
std::unique_ptr<FileSystem> vfs::createPhysicalFileSystem() {
- return llvm::make_unique<RealFileSystem>(false);
+ return std::make_unique<RealFileSystem>(false);
}
namespace {
@@ -754,7 +754,7 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file,
NewDirectoryPerms);
Dir = cast<detail::InMemoryDirectory>(Dir->addChild(
- Name, llvm::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
+ Name, std::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
continue;
}
@@ -989,6 +989,16 @@ std::error_code InMemoryFileSystem::isLocal(const Twine &Path, bool &Result) {
// RedirectingFileSystem implementation
//===-----------------------------------------------------------------------===/
+RedirectingFileSystem::RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> FS)
+ : ExternalFS(std::move(FS)) {
+ if (ExternalFS)
+ if (auto ExternalWorkingDirectory =
+ ExternalFS->getCurrentWorkingDirectory()) {
+ WorkingDirectory = *ExternalWorkingDirectory;
+ ExternalFSValidWD = true;
+ }
+}
+
// FIXME: reuse implementation common with OverlayFSDirIterImpl as these
// iterators are conceptually similar.
class llvm::vfs::VFSFromYamlDirIterImpl
@@ -1035,12 +1045,27 @@ public:
llvm::ErrorOr<std::string>
RedirectingFileSystem::getCurrentWorkingDirectory() const {
- return ExternalFS->getCurrentWorkingDirectory();
+ return WorkingDirectory;
}
std::error_code
RedirectingFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
- return ExternalFS->setCurrentWorkingDirectory(Path);
+ // Don't change the working directory if the path doesn't exist.
+ if (!exists(Path))
+ return errc::no_such_file_or_directory;
+
+ // Always change the external FS but ignore its result.
+ if (ExternalFS) {
+ auto EC = ExternalFS->setCurrentWorkingDirectory(Path);
+ ExternalFSValidWD = !static_cast<bool>(EC);
+ }
+
+ SmallString<128> AbsolutePath;
+ Path.toVector(AbsolutePath);
+ if (std::error_code EC = makeAbsolute(AbsolutePath))
+ return EC;
+ WorkingDirectory = AbsolutePath.str();
+ return {};
}
std::error_code RedirectingFileSystem::isLocal(const Twine &Path,
@@ -1053,7 +1078,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Dir);
if (!E) {
EC = E.getError();
- if (IsFallthrough && EC == errc::no_such_file_or_directory)
+ if (shouldUseExternalFS() && EC == errc::no_such_file_or_directory)
return ExternalFS->dir_begin(Dir, EC);
return {};
}
@@ -1071,7 +1096,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
auto *D = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(*E);
return directory_iterator(std::make_shared<VFSFromYamlDirIterImpl>(
Dir, D->contents_begin(), D->contents_end(),
- /*IterateExternalFS=*/IsFallthrough, *ExternalFS, EC));
+ /*IterateExternalFS=*/shouldUseExternalFS(), *ExternalFS, EC));
}
void RedirectingFileSystem::setExternalContentsPrefixDir(StringRef PrefixDir) {
@@ -1082,20 +1107,19 @@ StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const {
return ExternalContentsPrefixDir;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const {
+void RedirectingFileSystem::dump(raw_ostream &OS) const {
for (const auto &Root : Roots)
- dumpEntry(Root.get());
+ dumpEntry(OS, Root.get());
}
-LLVM_DUMP_METHOD void
-RedirectingFileSystem::dumpEntry(RedirectingFileSystem::Entry *E,
- int NumSpaces) const {
+void RedirectingFileSystem::dumpEntry(raw_ostream &OS,
+ RedirectingFileSystem::Entry *E,
+ int NumSpaces) const {
StringRef Name = E->getName();
for (int i = 0, e = NumSpaces; i < e; ++i)
- dbgs() << " ";
- dbgs() << "'" << Name.str().c_str() << "'"
- << "\n";
+ OS << " ";
+ OS << "'" << Name.str().c_str() << "'"
+ << "\n";
if (E->getKind() == RedirectingFileSystem::EK_Directory) {
auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(E);
@@ -1103,9 +1127,12 @@ RedirectingFileSystem::dumpEntry(RedirectingFileSystem::Entry *E,
for (std::unique_ptr<Entry> &SubEntry :
llvm::make_range(DE->contents_begin(), DE->contents_end()))
- dumpEntry(SubEntry.get(), NumSpaces + 2);
+ dumpEntry(OS, SubEntry.get(), NumSpaces + 2);
}
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const { dump(dbgs()); }
#endif
/// A helper class to hold the common YAML parsing state.
@@ -1209,7 +1236,7 @@ class llvm::vfs::RedirectingFileSystemParser {
// ... or create a new one
std::unique_ptr<RedirectingFileSystem::Entry> E =
- llvm::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
+ std::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
Name, Status("", getNextVirtualUniqueID(),
std::chrono::system_clock::now(), 0, 0, 0,
file_type::directory_file, sys::fs::all_all));
@@ -1221,7 +1248,7 @@ class llvm::vfs::RedirectingFileSystemParser {
}
auto *DE =
- dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(ParentEntry);
+ cast<RedirectingFileSystem::RedirectingDirectoryEntry>(ParentEntry);
DE->addContent(std::move(E));
return DE->getLastContent();
}
@@ -1232,9 +1259,7 @@ class llvm::vfs::RedirectingFileSystemParser {
StringRef Name = SrcE->getName();
switch (SrcE->getKind()) {
case RedirectingFileSystem::EK_Directory: {
- auto *DE =
- dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(SrcE);
- assert(DE && "Must be a directory");
+ auto *DE = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(SrcE);
// Empty directories could be present in the YAML as a way to
// describe a file for a current directory after some of its subdir
// is parsed. This only leads to redundant walks, ignore it.
@@ -1246,13 +1271,12 @@ class llvm::vfs::RedirectingFileSystemParser {
break;
}
case RedirectingFileSystem::EK_File: {
- auto *FE = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(SrcE);
- assert(FE && "Must be a file");
assert(NewParentE && "Parent entry must exist");
- auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(
- NewParentE);
+ auto *FE = cast<RedirectingFileSystem::RedirectingFileEntry>(SrcE);
+ auto *DE =
+ cast<RedirectingFileSystem::RedirectingDirectoryEntry>(NewParentE);
DE->addContent(
- llvm::make_unique<RedirectingFileSystem::RedirectingFileEntry>(
+ std::make_unique<RedirectingFileSystem::RedirectingFileEntry>(
Name, FE->getExternalContentsPath(), FE->getUseName()));
break;
}
@@ -1423,12 +1447,12 @@ class llvm::vfs::RedirectingFileSystemParser {
std::unique_ptr<RedirectingFileSystem::Entry> Result;
switch (Kind) {
case RedirectingFileSystem::EK_File:
- Result = llvm::make_unique<RedirectingFileSystem::RedirectingFileEntry>(
+ Result = std::make_unique<RedirectingFileSystem::RedirectingFileEntry>(
LastComponent, std::move(ExternalContentsPath), UseExternalName);
break;
case RedirectingFileSystem::EK_Directory:
Result =
- llvm::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
+ std::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
LastComponent, std::move(EntryArrayContents),
Status("", getNextVirtualUniqueID(),
std::chrono::system_clock::now(), 0, 0, 0,
@@ -1447,7 +1471,7 @@ class llvm::vfs::RedirectingFileSystemParser {
std::vector<std::unique_ptr<RedirectingFileSystem::Entry>> Entries;
Entries.push_back(std::move(Result));
Result =
- llvm::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
+ std::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
*I, std::move(Entries),
Status("", getNextVirtualUniqueID(),
std::chrono::system_clock::now(), 0, 0, 0,
@@ -1573,7 +1597,7 @@ RedirectingFileSystem::create(std::unique_ptr<MemoryBuffer> Buffer,
RedirectingFileSystemParser P(Stream);
std::unique_ptr<RedirectingFileSystem> FS(
- new RedirectingFileSystem(std::move(ExternalFS)));
+ new RedirectingFileSystem(ExternalFS));
if (!YAMLFilePath.empty()) {
// Use the YAML path from -ivfsoverlay to compute the dir to be prefixed
@@ -1702,7 +1726,7 @@ ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path,
ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path) {
ErrorOr<RedirectingFileSystem::Entry *> Result = lookupPath(Path);
if (!Result) {
- if (IsFallthrough &&
+ if (shouldUseExternalFS() &&
Result.getError() == llvm::errc::no_such_file_or_directory) {
return ExternalFS->status(Path);
}
@@ -1740,7 +1764,7 @@ ErrorOr<std::unique_ptr<File>>
RedirectingFileSystem::openFileForRead(const Twine &Path) {
ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Path);
if (!E) {
- if (IsFallthrough &&
+ if (shouldUseExternalFS() &&
E.getError() == llvm::errc::no_such_file_or_directory) {
return ExternalFS->openFileForRead(Path);
}
@@ -1763,7 +1787,7 @@ RedirectingFileSystem::openFileForRead(const Twine &Path) {
Status S = getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames),
*ExternalStatus);
return std::unique_ptr<File>(
- llvm::make_unique<FileWithFixedStatus>(std::move(*Result), S));
+ std::make_unique<FileWithFixedStatus>(std::move(*Result), S));
}
std::error_code
@@ -1771,7 +1795,7 @@ RedirectingFileSystem::getRealPath(const Twine &Path,
SmallVectorImpl<char> &Output) const {
ErrorOr<RedirectingFileSystem::Entry *> Result = lookupPath(Path);
if (!Result) {
- if (IsFallthrough &&
+ if (shouldUseExternalFS() &&
Result.getError() == llvm::errc::no_such_file_or_directory) {
return ExternalFS->getRealPath(Path, Output);
}
@@ -1784,8 +1808,8 @@ RedirectingFileSystem::getRealPath(const Twine &Path,
}
// Even if there is a directory entry, fall back to ExternalFS if allowed,
// because directories don't have a single external contents path.
- return IsFallthrough ? ExternalFS->getRealPath(Path, Output)
- : llvm::errc::invalid_argument;
+ return shouldUseExternalFS() ? ExternalFS->getRealPath(Path, Output)
+ : llvm::errc::invalid_argument;
}
IntrusiveRefCntPtr<FileSystem>
diff --git a/lib/Support/Windows/Mutex.inc b/lib/Support/Windows/Mutex.inc
deleted file mode 100644
index b55b14febf2c..000000000000
--- a/lib/Support/Windows/Mutex.inc
+++ /dev/null
@@ -1,56 +0,0 @@
-//===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Win32 specific (non-pthread) Mutex class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic Win32 code that
-//=== is guaranteed to work on *all* Win32 variants.
-//===----------------------------------------------------------------------===//
-
-#include "WindowsSupport.h"
-#include "llvm/Support/Mutex.h"
-
-namespace llvm {
-
-sys::MutexImpl::MutexImpl(bool /*recursive*/)
-{
- data_ = new CRITICAL_SECTION;
- InitializeCriticalSection((LPCRITICAL_SECTION)data_);
-}
-
-sys::MutexImpl::~MutexImpl()
-{
- DeleteCriticalSection((LPCRITICAL_SECTION)data_);
- delete (LPCRITICAL_SECTION)data_;
- data_ = 0;
-}
-
-bool
-sys::MutexImpl::acquire()
-{
- EnterCriticalSection((LPCRITICAL_SECTION)data_);
- return true;
-}
-
-bool
-sys::MutexImpl::release()
-{
- LeaveCriticalSection((LPCRITICAL_SECTION)data_);
- return true;
-}
-
-bool
-sys::MutexImpl::tryacquire()
-{
- return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
-}
-
-}
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 5704930aeecc..c3b13abef5de 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -371,13 +371,19 @@ static std::error_code realPathFromHandle(HANDLE H,
if (std::error_code EC = realPathFromHandle(H, Buffer))
return EC;
- const wchar_t *Data = Buffer.data();
+ // Strip the \\?\ prefix. We don't want it ending up in output, and such
+ // paths don't get canonicalized by file APIs.
+ wchar_t *Data = Buffer.data();
DWORD CountChars = Buffer.size();
- if (CountChars >= 4) {
- if (0 == ::memcmp(Data, L"\\\\?\\", 8)) {
- CountChars -= 4;
- Data += 4;
- }
+ if (CountChars >= 8 && ::memcmp(Data, L"\\\\?\\UNC\\", 16) == 0) {
+ // Convert \\?\UNC\foo\bar to \\foo\bar
+ CountChars -= 6;
+ Data += 6;
+ Data[0] = '\\';
+ } else if (CountChars >= 4 && ::memcmp(Data, L"\\\\?\\", 8) == 0) {
+ // Convert \\?\c:\foo to c:\foo
+ CountChars -= 4;
+ Data += 4;
}
// Convert the result from UTF-16 to UTF-8.
@@ -1217,57 +1223,34 @@ file_t getStdinHandle() { return ::GetStdHandle(STD_INPUT_HANDLE); }
file_t getStdoutHandle() { return ::GetStdHandle(STD_OUTPUT_HANDLE); }
file_t getStderrHandle() { return ::GetStdHandle(STD_ERROR_HANDLE); }
-std::error_code readNativeFileImpl(file_t FileHandle, char *BufPtr, size_t BytesToRead,
- size_t *BytesRead, OVERLAPPED *Overlap) {
+Expected<size_t> readNativeFileImpl(file_t FileHandle,
+ MutableArrayRef<char> Buf,
+ OVERLAPPED *Overlap) {
// ReadFile can only read 2GB at a time. The caller should check the number of
// bytes and read in a loop until termination.
- DWORD BytesToRead32 =
- std::min(size_t(std::numeric_limits<DWORD>::max()), BytesToRead);
- DWORD BytesRead32 = 0;
- bool Success =
- ::ReadFile(FileHandle, BufPtr, BytesToRead32, &BytesRead32, Overlap);
- *BytesRead = BytesRead32;
- if (!Success) {
- DWORD Err = ::GetLastError();
- // Pipe EOF is not an error.
- if (Err == ERROR_BROKEN_PIPE)
- return std::error_code();
- return mapWindowsError(Err);
- }
- return std::error_code();
-}
-
-std::error_code readNativeFile(file_t FileHandle, MutableArrayRef<char> Buf,
- size_t *BytesRead) {
- return readNativeFileImpl(FileHandle, Buf.data(), Buf.size(), BytesRead,
- /*Overlap=*/nullptr);
-}
-
-std::error_code readNativeFileSlice(file_t FileHandle,
- MutableArrayRef<char> Buf, size_t Offset) {
- char *BufPtr = Buf.data();
- size_t BytesLeft = Buf.size();
-
- while (BytesLeft) {
- uint64_t CurOff = Buf.size() - BytesLeft + Offset;
- OVERLAPPED Overlapped = {};
- Overlapped.Offset = uint32_t(CurOff);
- Overlapped.OffsetHigh = uint32_t(uint64_t(CurOff) >> 32);
-
- size_t BytesRead = 0;
- if (auto EC = readNativeFileImpl(FileHandle, BufPtr, BytesLeft, &BytesRead,
- &Overlapped))
- return EC;
-
- // Once we reach EOF, zero the remaining bytes in the buffer.
- if (BytesRead == 0) {
- memset(BufPtr, 0, BytesLeft);
- break;
- }
- BytesLeft -= BytesRead;
- BufPtr += BytesRead;
- }
- return std::error_code();
+ DWORD BytesToRead =
+ std::min(size_t(std::numeric_limits<DWORD>::max()), Buf.size());
+ DWORD BytesRead = 0;
+ if (::ReadFile(FileHandle, Buf.data(), BytesToRead, &BytesRead, Overlap))
+ return BytesRead;
+ DWORD Err = ::GetLastError();
+ // EOF is not an error.
+ if (Err == ERROR_BROKEN_PIPE || Err == ERROR_HANDLE_EOF)
+ return BytesRead;
+ return errorCodeToError(mapWindowsError(Err));
+}
+
+Expected<size_t> readNativeFile(file_t FileHandle, MutableArrayRef<char> Buf) {
+ return readNativeFileImpl(FileHandle, Buf, /*Overlap=*/nullptr);
+}
+
+Expected<size_t> readNativeFileSlice(file_t FileHandle,
+ MutableArrayRef<char> Buf,
+ uint64_t Offset) {
+ OVERLAPPED Overlapped = {};
+ Overlapped.Offset = uint32_t(Offset);
+ Overlapped.OffsetHigh = uint32_t(Offset >> 32);
+ return readNativeFileImpl(FileHandle, Buf, &Overlapped);
}
std::error_code closeFile(file_t &F) {
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index 0f54e59ee55b..a23ed95fc390 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -470,7 +470,7 @@ std::error_code
llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
WindowsEncodingMethod Encoding) {
std::error_code EC;
- llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::F_Text);
+ llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OF_Text);
if (EC)
return EC;
diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc
deleted file mode 100644
index 8df9bc394160..000000000000
--- a/lib/Support/Windows/RWMutex.inc
+++ /dev/null
@@ -1,128 +0,0 @@
-//= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock =//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Win32 specific (non-pthread) RWMutex class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic Win32 code that
-//=== is guaranteed to work on *all* Win32 variants.
-//===----------------------------------------------------------------------===//
-
-#include "WindowsSupport.h"
-
-namespace llvm {
-
-// Windows has slim read-writer lock support on Vista and higher, so we
-// will attempt to load the APIs. If they exist, we will use them, and
-// if not, we will fall back on critical sections. When we drop support
-// for XP, we can stop lazy-loading these APIs and just use them directly.
-#if defined(__MINGW32__)
- // Taken from WinNT.h
- typedef struct _RTL_SRWLOCK {
- PVOID Ptr;
- } RTL_SRWLOCK, *PRTL_SRWLOCK;
-
- // Taken from WinBase.h
- typedef RTL_SRWLOCK SRWLOCK, *PSRWLOCK;
-#endif
-
-static VOID (WINAPI *fpInitializeSRWLock)(PSRWLOCK lock) = NULL;
-static VOID (WINAPI *fpAcquireSRWLockExclusive)(PSRWLOCK lock) = NULL;
-static VOID (WINAPI *fpAcquireSRWLockShared)(PSRWLOCK lock) = NULL;
-static VOID (WINAPI *fpReleaseSRWLockExclusive)(PSRWLOCK lock) = NULL;
-static VOID (WINAPI *fpReleaseSRWLockShared)(PSRWLOCK lock) = NULL;
-
-static bool sHasSRW = false;
-
-static bool loadSRW() {
- static bool sChecked = false;
- if (!sChecked) {
- sChecked = true;
-
- if (HMODULE hLib = ::GetModuleHandleW(L"Kernel32.dll")) {
- fpInitializeSRWLock =
- (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
- "InitializeSRWLock");
- fpAcquireSRWLockExclusive =
- (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
- "AcquireSRWLockExclusive");
- fpAcquireSRWLockShared =
- (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
- "AcquireSRWLockShared");
- fpReleaseSRWLockExclusive =
- (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
- "ReleaseSRWLockExclusive");
- fpReleaseSRWLockShared =
- (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
- "ReleaseSRWLockShared");
-
- if (fpInitializeSRWLock != NULL) {
- sHasSRW = true;
- }
- }
- }
- return sHasSRW;
-}
-
-sys::RWMutexImpl::RWMutexImpl() {
- if (loadSRW()) {
- data_ = safe_calloc(1, sizeof(SRWLOCK));
- fpInitializeSRWLock(static_cast<PSRWLOCK>(data_));
- } else {
- data_ = safe_calloc(1, sizeof(CRITICAL_SECTION));
- InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
- }
-}
-
-sys::RWMutexImpl::~RWMutexImpl() {
- if (!sHasSRW)
- DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
- // Nothing to do in the case of slim reader/writers except free the memory.
- free(data_);
-}
-
-bool sys::RWMutexImpl::reader_acquire() {
- if (sHasSRW) {
- fpAcquireSRWLockShared(static_cast<PSRWLOCK>(data_));
- } else {
- EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
- }
- return true;
-}
-
-bool sys::RWMutexImpl::reader_release() {
- if (sHasSRW) {
- fpReleaseSRWLockShared(static_cast<PSRWLOCK>(data_));
- } else {
- LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
- }
- return true;
-}
-
-bool sys::RWMutexImpl::writer_acquire() {
- if (sHasSRW) {
- fpAcquireSRWLockExclusive(static_cast<PSRWLOCK>(data_));
- } else {
- EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
- }
- return true;
-}
-
-bool sys::RWMutexImpl::writer_release() {
- if (sHasSRW) {
- fpReleaseSRWLockExclusive(static_cast<PSRWLOCK>(data_));
- } else {
- LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
- }
- return true;
-}
-
-
-}
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 6a820ef22b1e..d962daf79348 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -560,6 +560,9 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
// Unimplemented.
}
+void llvm::sys::SetPipeSignalFunction(void (*Handler)()) {
+ // Unimplemented.
+}
/// Add a function to be called when a signal is delivered to the process. The
/// handler can have a cookie passed to it to identify what instance of the
diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h
index fed9b2f462ef..2e2e97430b76 100644
--- a/lib/Support/Windows/WindowsSupport.h
+++ b/lib/Support/Windows/WindowsSupport.h
@@ -38,6 +38,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h" // Get build system configuration settings
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/VersionTuple.h"
diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc
index bbbf7ea6a777..0a4fda1d4e8c 100644
--- a/lib/Support/Windows/explicit_symbols.inc
+++ b/lib/Support/Windows/explicit_symbols.inc
@@ -90,12 +90,6 @@
INLINE_DEF_FLOAT_SYMBOL(tanf, 1)
INLINE_DEF_FLOAT_SYMBOL(tanhf, 1)
- // These were added in VS 2013.
-#if (1800 <= _MSC_VER && _MSC_VER < 1900)
- INLINE_DEF_FLOAT_SYMBOL(copysignf, 2)
- INLINE_DEF_FLOAT_SYMBOL(fminf, 2)
- INLINE_DEF_FLOAT_SYMBOL(fmaxf, 2)
-#endif
#undef INLINE_DEF_FLOAT_SYMBOL
#endif
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index 09eb36943de9..eba22fd14725 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -40,7 +40,7 @@ IO::IO(void *Context) : Ctxt(Context) {}
IO::~IO() = default;
-void *IO::getContext() {
+void *IO::getContext() const {
return Ctxt;
}
@@ -79,7 +79,7 @@ void Input::ScalarHNode::anchor() {}
void Input::MapHNode::anchor() {}
void Input::SequenceHNode::anchor() {}
-bool Input::outputting() {
+bool Input::outputting() const {
return false;
}
@@ -377,12 +377,12 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
// Copy string to permanent storage
KeyStr = StringStorage.str().copy(StringAllocator);
}
- return llvm::make_unique<ScalarHNode>(N, KeyStr);
+ return std::make_unique<ScalarHNode>(N, KeyStr);
} else if (BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N)) {
StringRef ValueCopy = BSN->getValue().copy(StringAllocator);
- return llvm::make_unique<ScalarHNode>(N, ValueCopy);
+ return std::make_unique<ScalarHNode>(N, ValueCopy);
} else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
- auto SQHNode = llvm::make_unique<SequenceHNode>(N);
+ auto SQHNode = std::make_unique<SequenceHNode>(N);
for (Node &SN : *SQ) {
auto Entry = createHNodes(&SN);
if (EC)
@@ -391,7 +391,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
}
return std::move(SQHNode);
} else if (MappingNode *Map = dyn_cast<MappingNode>(N)) {
- auto mapHNode = llvm::make_unique<MapHNode>(N);
+ auto mapHNode = std::make_unique<MapHNode>(N);
for (KeyValueNode &KVN : *Map) {
Node *KeyNode = KVN.getKey();
ScalarNode *Key = dyn_cast<ScalarNode>(KeyNode);
@@ -416,7 +416,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
}
return std::move(mapHNode);
} else if (isa<NullNode>(N)) {
- return llvm::make_unique<EmptyHNode>(N);
+ return std::make_unique<EmptyHNode>(N);
} else {
setError(N, "unknown node kind");
return nullptr;
@@ -440,7 +440,7 @@ Output::Output(raw_ostream &yout, void *context, int WrapColumn)
Output::~Output() = default;
-bool Output::outputting() {
+bool Output::outputting() const {
return true;
}
diff --git a/lib/Support/Z3Solver.cpp b/lib/Support/Z3Solver.cpp
index f1a6fdf87cf2..a83d0f441a4b 100644
--- a/lib/Support/Z3Solver.cpp
+++ b/lib/Support/Z3Solver.cpp
@@ -886,7 +886,7 @@ public:
llvm::SMTSolverRef llvm::CreateZ3Solver() {
#if LLVM_WITH_Z3
- return llvm::make_unique<Z3Solver>();
+ return std::make_unique<Z3Solver>();
#else
llvm::report_fatal_error("LLVM was not compiled with Z3 support, rebuild "
"with -DLLVM_ENABLE_Z3_SOLVER=ON",
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 2baccaa0cbd7..b9989371f5ea 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -65,6 +65,17 @@
using namespace llvm;
+const raw_ostream::Colors raw_ostream::BLACK;
+const raw_ostream::Colors raw_ostream::RED;
+const raw_ostream::Colors raw_ostream::GREEN;
+const raw_ostream::Colors raw_ostream::YELLOW;
+const raw_ostream::Colors raw_ostream::BLUE;
+const raw_ostream::Colors raw_ostream::MAGENTA;
+const raw_ostream::Colors raw_ostream::CYAN;
+const raw_ostream::Colors raw_ostream::WHITE;
+const raw_ostream::Colors raw_ostream::SAVEDCOLOR;
+const raw_ostream::Colors raw_ostream::RESET;
+
raw_ostream::~raw_ostream() {
// raw_ostream's subclasses should take care to flush the buffer
// in their destructors.
@@ -133,6 +144,14 @@ raw_ostream &raw_ostream::write_hex(unsigned long long N) {
return *this;
}
+raw_ostream &raw_ostream::operator<<(Colors C) {
+ if (C == Colors::RESET)
+ resetColor();
+ else
+ changeColor(C);
+ return *this;
+}
+
raw_ostream &raw_ostream::write_uuid(const uuid_t UUID) {
for (int Idx = 0; Idx < 16; ++Idx) {
*this << format("%02" PRIX32, UUID[Idx]);
@@ -784,11 +803,15 @@ size_t raw_fd_ostream::preferred_buffer_size() const {
raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
bool bg) {
+ if (!ColorEnabled)
+ return *this;
+
if (sys::Process::ColorNeedsFlush())
flush();
const char *colorcode =
- (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg)
- : sys::Process::OutputColor(colors, bold, bg);
+ (colors == SAVEDCOLOR)
+ ? sys::Process::OutputBold(bg)
+ : sys::Process::OutputColor(static_cast<char>(colors), bold, bg);
if (colorcode) {
size_t len = strlen(colorcode);
write(colorcode, len);
@@ -799,6 +822,9 @@ raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
}
raw_ostream &raw_fd_ostream::resetColor() {
+ if (!ColorEnabled)
+ return *this;
+
if (sys::Process::ColorNeedsFlush())
flush();
const char *colorcode = sys::Process::ResetColor();
@@ -812,6 +838,9 @@ raw_ostream &raw_fd_ostream::resetColor() {
}
raw_ostream &raw_fd_ostream::reverseColor() {
+ if (!ColorEnabled)
+ return *this;
+
if (sys::Process::ColorNeedsFlush())
flush();
const char *colorcode = sys::Process::OutputReverse();
@@ -843,7 +872,7 @@ void raw_fd_ostream::anchor() {}
raw_ostream &llvm::outs() {
// Set buffer settings to model stdout behavior.
std::error_code EC;
- static raw_fd_ostream S("-", EC, sys::fs::F_None);
+ static raw_fd_ostream S("-", EC, sys::fs::OF_None);
assert(!EC);
return S;
}
diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c
index 12669ab75d1a..ee2a1d87a267 100644
--- a/lib/Support/regcomp.c
+++ b/lib/Support/regcomp.c
@@ -48,6 +48,7 @@
#include "regex2.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/Compiler.h"
/* character-class table */
static struct cclass {
@@ -537,7 +538,7 @@ p_ere_exp(struct parse *p)
break;
case '{': /* okay as ordinary except if digit follows */
REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
- /* FALLTHROUGH */
+ LLVM_FALLTHROUGH;
default:
ordinary(p, c);
break;
@@ -733,7 +734,7 @@ p_simp_re(struct parse *p,
break;
case '*':
REQUIRE(starordinary, REG_BADRPT);
- /* FALLTHROUGH */
+ LLVM_FALLTHROUGH;
default:
ordinary(p, (char)c);
break;
@@ -1635,7 +1636,7 @@ findmust(struct parse *p, struct re_guts *g)
return;
}
} while (OP(s) != O_QUEST && OP(s) != O_CH);
- /* fallthrough */
+ LLVM_FALLTHROUGH;
default: /* things that break a sequence */
if (newlen > g->mlen) { /* ends one */
start = newstart;
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index 7523b32ca0e5..54b063cb4f8d 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -39,6 +39,8 @@ static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind,
"instantiated from multiclass");
}
+void PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; }
+
void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg);
}
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index bcd39584e450..48ded6c45a46 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -49,6 +49,9 @@ static cl::list<std::string>
MacroNames("D", cl::desc("Name of the macro to be defined"),
cl::value_desc("macro name"), cl::Prefix);
+static cl::opt<bool>
+WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
+
static int reportError(const char *ProgName, Twine Msg) {
errs() << ProgName << ": " << Msg;
errs().flush();
@@ -64,7 +67,7 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) {
return reportError(argv0, "the option -d must be used together with -o\n");
std::error_code EC;
- ToolOutputFile DepOut(DependFilename, EC, sys::fs::F_Text);
+ ToolOutputFile DepOut(DependFilename, EC, sys::fs::OF_None);
if (EC)
return reportError(argv0, "error opening " + DependFilename + ":" +
EC.message() + "\n");
@@ -114,15 +117,17 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
return Ret;
}
- // Only updates the real output file if there are any differences.
- // This prevents recompilation of all the files depending on it if there
- // aren't any.
- if (auto ExistingOrErr = MemoryBuffer::getFile(OutputFilename))
- if (std::move(ExistingOrErr.get())->getBuffer() == Out.str())
- return 0;
+ if (WriteIfChanged) {
+ // Only updates the real output file if there are any differences.
+ // This prevents recompilation of all the files depending on it if there
+ // aren't any.
+ if (auto ExistingOrErr = MemoryBuffer::getFile(OutputFilename))
+ if (std::move(ExistingOrErr.get())->getBuffer() == Out.str())
+ return 0;
+ }
std::error_code EC;
- ToolOutputFile OutFile(OutputFilename, EC, sys::fs::F_Text);
+ ToolOutputFile OutFile(OutputFilename, EC, sys::fs::OF_None);
if (EC)
return reportError(argv0, "error opening " + OutputFilename + ":" +
EC.message() + "\n");
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 27d1bdc7f4c3..835ef8c7141b 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -438,7 +438,7 @@ Init *BitsInit::resolveReferences(Resolver &R) const {
CachedBitVarRef = CurBitVar->getBitVar();
CachedBitVarResolved = CachedBitVarRef->resolveReferences(R);
}
-
+ assert(CachedBitVarResolved && "Unresolved bitvar reference");
NewBit = CachedBitVarResolved->getBit(CurBitVar->getBitNum());
} else {
// getBit(0) implicitly converts int and bits<1> values to bit.
@@ -1616,7 +1616,7 @@ void VarDefInit::Profile(FoldingSetNodeID &ID) const {
DefInit *VarDefInit::instantiate() {
if (!Def) {
RecordKeeper &Records = Class->getRecords();
- auto NewRecOwner = make_unique<Record>(Records.getNewAnonymousName(),
+ auto NewRecOwner = std::make_unique<Record>(Records.getNewAnonymousName(),
Class->getLoc(), Records,
/*IsAnonymous=*/true);
Record *NewRec = NewRecOwner.get();
@@ -1930,6 +1930,13 @@ void DagInit::Profile(FoldingSetNodeID &ID) const {
ProfileDagInit(ID, Val, ValName, makeArrayRef(getTrailingObjects<Init *>(), NumArgs), makeArrayRef(getTrailingObjects<StringInit *>(), NumArgNames));
}
+Record *DagInit::getOperatorAsDef(ArrayRef<SMLoc> Loc) const {
+ if (DefInit *DefI = dyn_cast<DefInit>(Val))
+ return DefI->getDef();
+ PrintFatalError(Loc, "Expected record as operator");
+ return nullptr;
+}
+
Init *DagInit::resolveReferences(Resolver &R) const {
SmallVector<Init*, 8> NewArgs;
NewArgs.reserve(arg_size());
diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp
index a870e41d58f8..5a30ee98cce9 100644
--- a/lib/TableGen/SetTheory.cpp
+++ b/lib/TableGen/SetTheory.cpp
@@ -255,16 +255,16 @@ void SetTheory::Operator::anchor() {}
void SetTheory::Expander::anchor() {}
SetTheory::SetTheory() {
- addOperator("add", llvm::make_unique<AddOp>());
- addOperator("sub", llvm::make_unique<SubOp>());
- addOperator("and", llvm::make_unique<AndOp>());
- addOperator("shl", llvm::make_unique<ShlOp>());
- addOperator("trunc", llvm::make_unique<TruncOp>());
- addOperator("rotl", llvm::make_unique<RotOp>(false));
- addOperator("rotr", llvm::make_unique<RotOp>(true));
- addOperator("decimate", llvm::make_unique<DecimateOp>());
- addOperator("interleave", llvm::make_unique<InterleaveOp>());
- addOperator("sequence", llvm::make_unique<SequenceOp>());
+ addOperator("add", std::make_unique<AddOp>());
+ addOperator("sub", std::make_unique<SubOp>());
+ addOperator("and", std::make_unique<AndOp>());
+ addOperator("shl", std::make_unique<ShlOp>());
+ addOperator("trunc", std::make_unique<TruncOp>());
+ addOperator("rotl", std::make_unique<RotOp>(false));
+ addOperator("rotr", std::make_unique<RotOp>(true));
+ addOperator("decimate", std::make_unique<DecimateOp>());
+ addOperator("interleave", std::make_unique<InterleaveOp>());
+ addOperator("sequence", std::make_unique<SequenceOp>());
}
void SetTheory::addOperator(StringRef Name, std::unique_ptr<Operator> Op) {
@@ -276,7 +276,7 @@ void SetTheory::addExpander(StringRef ClassName, std::unique_ptr<Expander> E) {
}
void SetTheory::addFieldExpander(StringRef ClassName, StringRef FieldName) {
- addExpander(ClassName, llvm::make_unique<FieldExpander>(FieldName));
+ addExpander(ClassName, std::make_unique<FieldExpander>(FieldName));
}
void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index d28c62b3133d..da2286e41fe5 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -51,7 +51,7 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
// Pretend that we enter the "top-level" include file.
PrepIncludeStack.push_back(
- make_unique<std::vector<PreprocessorControlDesc>>());
+ std::make_unique<std::vector<PreprocessorControlDesc>>());
// Put all macros defined in the command line into the DefinedMacros set.
std::for_each(Macros.begin(), Macros.end(),
@@ -393,7 +393,7 @@ bool TGLexer::LexInclude() {
CurPtr = CurBuf.begin();
PrepIncludeStack.push_back(
- make_unique<std::vector<PreprocessorControlDesc>>());
+ std::make_unique<std::vector<PreprocessorControlDesc>>());
return false;
}
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index a9ace152d59e..c373e2899a5d 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -378,7 +378,7 @@ bool TGParser::resolve(const ForeachLoop &Loop, SubstStack &Substs,
auto LI = dyn_cast<ListInit>(List);
if (!LI) {
if (!Final) {
- Dest->emplace_back(make_unique<ForeachLoop>(Loop.Loc, Loop.IterVar,
+ Dest->emplace_back(std::make_unique<ForeachLoop>(Loop.Loc, Loop.IterVar,
List));
return resolve(Loop.Entries, Substs, Final, &Dest->back().Loop->Entries,
Loc);
@@ -413,7 +413,7 @@ bool TGParser::resolve(const std::vector<RecordsEntry> &Source,
if (E.Loop) {
Error = resolve(*E.Loop, Substs, Final, Dest);
} else {
- auto Rec = make_unique<Record>(*E.Rec);
+ auto Rec = std::make_unique<Record>(*E.Rec);
if (Loc)
Rec->appendLoc(*Loc);
@@ -1147,9 +1147,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
if (!InitList.back()) return nullptr;
// All BinOps require their arguments to be of compatible types.
- TypedInit *TI = dyn_cast<TypedInit>(InitList.back());
+ RecTy *ListType = cast<TypedInit>(InitList.back())->getType();
if (!ArgType) {
- ArgType = TI->getType();
+ ArgType = ListType;
switch (Code) {
case BinOpInit::LISTCONCAT:
@@ -1198,11 +1198,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
default: llvm_unreachable("other ops have fixed argument types");
}
} else {
- RecTy *Resolved = resolveTypes(ArgType, TI->getType());
+ RecTy *Resolved = resolveTypes(ArgType, ListType);
if (!Resolved) {
Error(InitLoc, Twine("expected value of type '") +
- ArgType->getAsString() + "', got '" +
- TI->getType()->getAsString() + "'");
+ ArgType->getAsString() + "', got '" +
+ ListType->getAsString() + "'");
return nullptr;
}
if (Code != BinOpInit::ADD && Code != BinOpInit::AND &&
@@ -1330,7 +1330,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
std::unique_ptr<Record> ParseRecTmp;
Record *ParseRec = CurRec;
if (!ParseRec) {
- ParseRecTmp = make_unique<Record>(".parse", ArrayRef<SMLoc>{}, Records);
+ ParseRecTmp = std::make_unique<Record>(".parse", ArrayRef<SMLoc>{}, Records);
ParseRec = ParseRecTmp.get();
}
@@ -1597,7 +1597,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
std::unique_ptr<Record> ParseRecTmp;
Record *ParseRec = CurRec;
if (!ParseRec) {
- ParseRecTmp = make_unique<Record>(".parse", ArrayRef<SMLoc>{}, Records);
+ ParseRecTmp = std::make_unique<Record>(".parse", ArrayRef<SMLoc>{}, Records);
ParseRec = ParseRecTmp.get();
}
@@ -2702,10 +2702,10 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
return true;
if (isa<UnsetInit>(Name))
- CurRec = make_unique<Record>(Records.getNewAnonymousName(), DefLoc, Records,
+ CurRec = std::make_unique<Record>(Records.getNewAnonymousName(), DefLoc, Records,
/*Anonymous=*/true);
else
- CurRec = make_unique<Record>(Name, DefLoc, Records);
+ CurRec = std::make_unique<Record>(Name, DefLoc, Records);
if (ParseObjectBody(CurRec.get()))
return true;
@@ -2783,7 +2783,7 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
Lex.Lex(); // Eat the in
// Create a loop object and remember it.
- Loops.push_back(llvm::make_unique<ForeachLoop>(Loc, IterName, ListValue));
+ Loops.push_back(std::make_unique<ForeachLoop>(Loc, IterName, ListValue));
if (Lex.getCode() != tgtok::l_brace) {
// FOREACH Declaration IN Object
@@ -2834,7 +2834,7 @@ bool TGParser::ParseClass() {
} else {
// If this is the first reference to this class, create and add it.
auto NewRec =
- llvm::make_unique<Record>(Lex.getCurStrVal(), Lex.getLoc(), Records,
+ std::make_unique<Record>(Lex.getCurStrVal(), Lex.getLoc(), Records,
/*Class=*/true);
CurRec = NewRec.get();
Records.addClass(std::move(NewRec));
@@ -2963,7 +2963,7 @@ bool TGParser::ParseMultiClass() {
auto Result =
MultiClasses.insert(std::make_pair(Name,
- llvm::make_unique<MultiClass>(Name, Lex.getLoc(),Records)));
+ std::make_unique<MultiClass>(Name, Lex.getLoc(),Records)));
if (!Result.second)
return TokError("multiclass '" + Name + "' already defined");
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index 6965403a25ab..ac765ebcddc0 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -55,8 +55,9 @@ FunctionPass *createAArch64CollectLOHPass();
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
AArch64Subtarget &, AArch64RegisterBankInfo &);
-FunctionPass *createAArch64PreLegalizeCombiner();
-FunctionPass *createAArch64StackTaggingPass();
+FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone);
+FunctionPass *createAArch64StackTaggingPass(bool MergeInit);
+FunctionPass *createAArch64StackTaggingPreRAPass();
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
@@ -80,6 +81,7 @@ void initializeFalkorHWPFFixPass(PassRegistry&);
void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
void initializeLDTLSCleanupPass(PassRegistry&);
void initializeAArch64StackTaggingPass(PassRegistry&);
+void initializeAArch64StackTaggingPreRAPass(PassRegistry&);
} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index e39c6995e367..5b4c9e2149da 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -115,11 +115,12 @@ def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true",
def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true",
"Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>;
-def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true",
+def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true",
"Enable bit permutation SVE2 instructions", [FeatureSVE2]>;
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
+
def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
"Has zero-cycle zeroing instructions for generic registers">;
@@ -284,6 +285,10 @@ def FeatureSEL2 : SubtargetFeature<
"sel2", "HasSEL2", "true",
"Enable v8.4-A Secure Exception Level 2 extension">;
+def FeaturePMU : SubtargetFeature<
+ "pmu", "HasPMU", "true",
+ "Enable v8.4-A PMU extension">;
+
def FeatureTLB_RMI : SubtargetFeature<
"tlb-rmi", "HasTLB_RMI", "true",
"Enable v8.4-A TLB Range and Maintenance Instructions">;
@@ -345,6 +350,21 @@ def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen",
def FeatureMTE : SubtargetFeature<"mte", "HasMTE",
"true", "Enable Memory Tagging Extension" >;
+def FeatureTRBE : SubtargetFeature<"trbe", "HasTRBE",
+ "true", "Enable Trace Buffer Extension">;
+
+def FeatureETE : SubtargetFeature<"ete", "HasETE",
+ "true", "Enable Embedded Trace Extension",
+ [FeatureTRBE]>;
+
+def FeatureTME : SubtargetFeature<"tme", "HasTME",
+ "true", "Enable Transactional Memory Extension" >;
+
+def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
+ "AllowTaggedGlobals",
+ "true", "Use an instruction sequence for taking the address of a global "
+ "that allows a memory tag in the upper address bits">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -354,7 +374,7 @@ def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
FeaturePAN, FeatureLOR, FeatureVH]>;
def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
- "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO,
+ "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO,
FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>;
def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
@@ -364,7 +384,7 @@ def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
"Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
FeatureNV, FeatureRASv8_4, FeatureMPAM, FeatureDIT,
- FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI,
+ FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeaturePMU, FeatureTLB_RMI,
FeatureFMI, FeatureRCPC_IMMO]>;
def HasV8_5aOps : SubtargetFeature<
@@ -390,6 +410,7 @@ include "AArch64Schedule.td"
include "AArch64InstrInfo.td"
include "AArch64SchedPredicates.td"
include "AArch64SchedPredExynos.td"
+include "AArch64Combine.td"
def AArch64InstrInfo : InstrInfo;
@@ -484,6 +505,19 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
FeaturePredictableSelectIsExpensive
]>;
+def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
+ "Cortex-A65 ARM processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureDotProd,
+ FeatureFPARMv8,
+ FeatureFullFP16,
+ FeatureNEON,
+ FeatureRAS,
+ FeatureRCPC,
+ FeatureSSBS,
+ ]>;
+
def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
"Cortex-A72 ARM processors", [
FeatureCRC,
@@ -641,6 +675,33 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
FeatureSlowSTRQro
]>;
+def ProcNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily",
+ "NeoverseE1",
+ "Neoverse E1 ARM processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureDotProd,
+ FeatureFPARMv8,
+ FeatureFullFP16,
+ FeatureNEON,
+ FeatureRCPC,
+ FeatureSSBS,
+ ]>;
+
+def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily",
+ "NeoverseN1",
+ "Neoverse N1 ARM processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureDotProd,
+ FeatureFPARMv8,
+ FeatureFullFP16,
+ FeatureNEON,
+ FeatureRCPC,
+ FeatureSPE,
+ FeatureSSBS,
+ ]>;
+
def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
"Qualcomm Saphira processors", [
FeatureCrypto,
@@ -732,19 +793,28 @@ def : ProcessorModel<"generic", NoSchedModel, [
FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
- FeaturePostRAScheduler
+ FeaturePostRAScheduler,
+// ETE and TRBE are future architecture extensions. We temporariliy enable them
+// by default for users targeting generic AArch64, until it is decided in which
+// armv8.x-a architecture revision they will end up. The extensions do not
+// affect code generated by the compiler and can be used only by explicitly
+// mentioning the new system register names in assembly.
+ FeatureETE
]>;
-// FIXME: Cortex-A35 and Cortex-A55 are currently modeled as a Cortex-A53.
def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
+def : ProcessorModel<"cortex-a65", CortexA53Model, [ProcA65]>;
+def : ProcessorModel<"cortex-a65ae", CortexA53Model, [ProcA65]>;
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>;
def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
+def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>;
+def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 92c8c4955d50..13d389cec7a0 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -552,7 +552,7 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
std::vector<unsigned> ToErase;
for (auto &U : I.operands()) {
if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) {
- unsigned OrigReg = U.getReg();
+ Register OrigReg = U.getReg();
U.setReg(Substs[OrigReg]);
if (U.isKill())
// Don't erase straight away, because there may be other operands
@@ -611,12 +611,12 @@ void AArch64A57FPLoadBalancing::scanInstruction(
// Create a new chain. Multiplies don't require forwarding so can go on any
// unit.
- unsigned DestReg = MI->getOperand(0).getReg();
+ Register DestReg = MI->getOperand(0).getReg();
LLVM_DEBUG(dbgs() << "New chain started for register "
<< printReg(DestReg, TRI) << " at " << *MI);
- auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
+ auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
AllChains.push_back(std::move(G));
@@ -624,8 +624,8 @@ void AArch64A57FPLoadBalancing::scanInstruction(
// It is beneficial to keep MLAs on the same functional unit as their
// accumulator operand.
- unsigned DestReg = MI->getOperand(0).getReg();
- unsigned AccumReg = MI->getOperand(3).getReg();
+ Register DestReg = MI->getOperand(0).getReg();
+ Register AccumReg = MI->getOperand(3).getReg();
maybeKillChain(MI->getOperand(1), Idx, ActiveChains);
maybeKillChain(MI->getOperand(2), Idx, ActiveChains);
@@ -661,7 +661,7 @@ void AArch64A57FPLoadBalancing::scanInstruction(
LLVM_DEBUG(dbgs() << "Creating new chain for dest register "
<< printReg(DestReg, TRI) << "\n");
- auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
+ auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
AllChains.push_back(std::move(G));
diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 89404463e1f0..981b366c14b1 100644
--- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -105,14 +105,14 @@ static bool isGPR64(unsigned Reg, unsigned SubReg,
const MachineRegisterInfo *MRI) {
if (SubReg)
return false;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::GPR64RegClass);
return AArch64::GPR64RegClass.contains(Reg);
}
static bool isFPR64(unsigned Reg, unsigned SubReg,
const MachineRegisterInfo *MRI) {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR64RegClass) &&
SubReg == 0) ||
(MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR128RegClass) &&
@@ -201,8 +201,8 @@ bool AArch64AdvSIMDScalar::isProfitableToTransform(
unsigned NumNewCopies = 3;
unsigned NumRemovableCopies = 0;
- unsigned OrigSrc0 = MI.getOperand(1).getReg();
- unsigned OrigSrc1 = MI.getOperand(2).getReg();
+ Register OrigSrc0 = MI.getOperand(1).getReg();
+ Register OrigSrc1 = MI.getOperand(2).getReg();
unsigned SubReg0;
unsigned SubReg1;
if (!MRI->def_empty(OrigSrc0)) {
@@ -236,7 +236,7 @@ bool AArch64AdvSIMDScalar::isProfitableToTransform(
// any of the uses is a transformable instruction, it's likely the tranforms
// will chain, enabling us to save a copy there, too. This is an aggressive
// heuristic that approximates the graph based cost analysis described above.
- unsigned Dst = MI.getOperand(0).getReg();
+ Register Dst = MI.getOperand(0).getReg();
bool AllUsesAreCopies = true;
for (MachineRegisterInfo::use_instr_nodbg_iterator
Use = MRI->use_instr_nodbg_begin(Dst),
@@ -293,8 +293,8 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) {
assert(OldOpc != NewOpc && "transform an instruction to itself?!");
// Check if we need a copy for the source registers.
- unsigned OrigSrc0 = MI.getOperand(1).getReg();
- unsigned OrigSrc1 = MI.getOperand(2).getReg();
+ Register OrigSrc0 = MI.getOperand(1).getReg();
+ Register OrigSrc1 = MI.getOperand(2).getReg();
unsigned Src0 = 0, SubReg0;
unsigned Src1 = 0, SubReg1;
bool KillSrc0 = false, KillSrc1 = false;
@@ -354,7 +354,7 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) {
// Create a vreg for the destination.
// FIXME: No need to do this if the ultimate user expects an FPR64.
// Check for that and avoid the copy if possible.
- unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
+ Register Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
// For now, all of the new instructions have the same simple three-register
// form, so no need to special case based on what instruction we're
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 094fbd999523..7ea7915c2ca6 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -99,7 +99,8 @@ public:
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
- std::map<std::pair<unsigned, uint32_t>, MCSymbol *> HwasanMemaccessSymbols;
+ typedef std::tuple<unsigned, bool, uint32_t> HwasanMemaccessTuple;
+ std::map<HwasanMemaccessTuple, MCSymbol *> HwasanMemaccessSymbols;
void LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI);
void EmitHwasanMemaccessSymbols(Module &M);
@@ -150,7 +151,7 @@ private:
void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O);
bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
bool printAsmRegInClass(const MachineOperand &MO,
- const TargetRegisterClass *RC, bool isVector,
+ const TargetRegisterClass *RC, unsigned AltName,
raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
@@ -236,9 +237,12 @@ void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
}
void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
+ bool IsShort =
+ MI.getOpcode() == AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES;
uint32_t AccessInfo = MI.getOperand(1).getImm();
- MCSymbol *&Sym = HwasanMemaccessSymbols[{Reg, AccessInfo}];
+ MCSymbol *&Sym =
+ HwasanMemaccessSymbols[HwasanMemaccessTuple(Reg, IsShort, AccessInfo)];
if (!Sym) {
// FIXME: Make this work on non-ELF.
if (!TM.getTargetTriple().isOSBinFormatELF())
@@ -246,6 +250,8 @@ void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" +
utostr(AccessInfo);
+ if (IsShort)
+ SymName += "_short";
Sym = OutContext.getOrCreateSymbol(SymName);
}
@@ -263,15 +269,22 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
std::unique_ptr<MCSubtargetInfo> STI(
TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
- MCSymbol *HwasanTagMismatchSym =
+ MCSymbol *HwasanTagMismatchV1Sym =
OutContext.getOrCreateSymbol("__hwasan_tag_mismatch");
+ MCSymbol *HwasanTagMismatchV2Sym =
+ OutContext.getOrCreateSymbol("__hwasan_tag_mismatch_v2");
- const MCSymbolRefExpr *HwasanTagMismatchRef =
- MCSymbolRefExpr::create(HwasanTagMismatchSym, OutContext);
+ const MCSymbolRefExpr *HwasanTagMismatchV1Ref =
+ MCSymbolRefExpr::create(HwasanTagMismatchV1Sym, OutContext);
+ const MCSymbolRefExpr *HwasanTagMismatchV2Ref =
+ MCSymbolRefExpr::create(HwasanTagMismatchV2Sym, OutContext);
for (auto &P : HwasanMemaccessSymbols) {
- unsigned Reg = P.first.first;
- uint32_t AccessInfo = P.first.second;
+ unsigned Reg = std::get<0>(P.first);
+ bool IsShort = std::get<1>(P.first);
+ uint32_t AccessInfo = std::get<2>(P.first);
+ const MCSymbolRefExpr *HwasanTagMismatchRef =
+ IsShort ? HwasanTagMismatchV2Ref : HwasanTagMismatchV1Ref;
MCSymbol *Sym = P.second;
OutStreamer->SwitchSection(OutContext.getELFSection(
@@ -304,82 +317,86 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
.addReg(Reg)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)),
*STI);
- MCSymbol *HandlePartialSym = OutContext.createTempSymbol();
+ MCSymbol *HandleMismatchOrPartialSym = OutContext.createTempSymbol();
OutStreamer->EmitInstruction(
MCInstBuilder(AArch64::Bcc)
.addImm(AArch64CC::NE)
- .addExpr(MCSymbolRefExpr::create(HandlePartialSym, OutContext)),
+ .addExpr(MCSymbolRefExpr::create(HandleMismatchOrPartialSym,
+ OutContext)),
*STI);
MCSymbol *ReturnSym = OutContext.createTempSymbol();
OutStreamer->EmitLabel(ReturnSym);
OutStreamer->EmitInstruction(
MCInstBuilder(AArch64::RET).addReg(AArch64::LR), *STI);
+ OutStreamer->EmitLabel(HandleMismatchOrPartialSym);
- OutStreamer->EmitLabel(HandlePartialSym);
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWri)
- .addReg(AArch64::WZR)
- .addReg(AArch64::W16)
- .addImm(15)
- .addImm(0),
- *STI);
- MCSymbol *HandleMismatchSym = OutContext.createTempSymbol();
- OutStreamer->EmitInstruction(
- MCInstBuilder(AArch64::Bcc)
- .addImm(AArch64CC::HI)
- .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
- *STI);
-
- OutStreamer->EmitInstruction(
- MCInstBuilder(AArch64::ANDXri)
- .addReg(AArch64::X17)
- .addReg(Reg)
- .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)),
- *STI);
- unsigned Size = 1 << (AccessInfo & 0xf);
- if (Size != 1)
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ADDXri)
- .addReg(AArch64::X17)
- .addReg(AArch64::X17)
- .addImm(Size - 1)
+ if (IsShort) {
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWri)
+ .addReg(AArch64::WZR)
+ .addReg(AArch64::W16)
+ .addImm(15)
.addImm(0),
*STI);
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWrs)
- .addReg(AArch64::WZR)
- .addReg(AArch64::W16)
- .addReg(AArch64::W17)
- .addImm(0),
- *STI);
- OutStreamer->EmitInstruction(
- MCInstBuilder(AArch64::Bcc)
- .addImm(AArch64CC::LS)
- .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
- *STI);
-
- OutStreamer->EmitInstruction(
- MCInstBuilder(AArch64::ORRXri)
- .addReg(AArch64::X16)
- .addReg(Reg)
- .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)),
- *STI);
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBui)
- .addReg(AArch64::W16)
- .addReg(AArch64::X16)
- .addImm(0),
- *STI);
- OutStreamer->EmitInstruction(
- MCInstBuilder(AArch64::SUBSXrs)
- .addReg(AArch64::XZR)
- .addReg(AArch64::X16)
- .addReg(Reg)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)),
- *STI);
- OutStreamer->EmitInstruction(
- MCInstBuilder(AArch64::Bcc)
- .addImm(AArch64CC::EQ)
- .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)),
- *STI);
+ MCSymbol *HandleMismatchSym = OutContext.createTempSymbol();
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::Bcc)
+ .addImm(AArch64CC::HI)
+ .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
+ *STI);
+
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::ANDXri)
+ .addReg(AArch64::X17)
+ .addReg(Reg)
+ .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)),
+ *STI);
+ unsigned Size = 1 << (AccessInfo & 0xf);
+ if (Size != 1)
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ADDXri)
+ .addReg(AArch64::X17)
+ .addReg(AArch64::X17)
+ .addImm(Size - 1)
+ .addImm(0),
+ *STI);
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWrs)
+ .addReg(AArch64::WZR)
+ .addReg(AArch64::W16)
+ .addReg(AArch64::W17)
+ .addImm(0),
+ *STI);
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::Bcc)
+ .addImm(AArch64CC::LS)
+ .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
+ *STI);
+
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::ORRXri)
+ .addReg(AArch64::X16)
+ .addReg(Reg)
+ .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)),
+ *STI);
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBui)
+ .addReg(AArch64::W16)
+ .addReg(AArch64::X16)
+ .addImm(0),
+ *STI);
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::SUBSXrs)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X16)
+ .addReg(Reg)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)),
+ *STI);
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::Bcc)
+ .addImm(AArch64CC::EQ)
+ .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)),
+ *STI);
+
+ OutStreamer->EmitLabel(HandleMismatchSym);
+ }
- OutStreamer->EmitLabel(HandleMismatchSym);
OutStreamer->EmitInstruction(MCInstBuilder(AArch64::STPXpre)
.addReg(AArch64::SP)
.addReg(AArch64::X0)
@@ -414,16 +431,16 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
MCInstBuilder(AArch64::ADRP)
.addReg(AArch64::X16)
.addExpr(AArch64MCExpr::create(
- HwasanTagMismatchRef,
- AArch64MCExpr::VariantKind::VK_GOT_PAGE, OutContext)),
+ HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_PAGE,
+ OutContext)),
*STI);
OutStreamer->EmitInstruction(
MCInstBuilder(AArch64::LDRXui)
.addReg(AArch64::X16)
.addReg(AArch64::X16)
.addExpr(AArch64MCExpr::create(
- HwasanTagMismatchRef,
- AArch64MCExpr::VariantKind::VK_GOT_LO12, OutContext)),
+ HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_LO12,
+ OutContext)),
*STI);
OutStreamer->EmitInstruction(
MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI);
@@ -485,15 +502,14 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
default:
llvm_unreachable("<unknown operand type>");
case MachineOperand::MO_Register: {
- unsigned Reg = MO.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ Register Reg = MO.getReg();
+ assert(Register::isPhysicalRegister(Reg));
assert(!MO.getSubReg() && "Subregs should be eliminated!");
O << AArch64InstPrinter::getRegisterName(Reg);
break;
}
case MachineOperand::MO_Immediate: {
- int64_t Imm = MO.getImm();
- O << '#' << Imm;
+ O << MO.getImm();
break;
}
case MachineOperand::MO_GlobalAddress: {
@@ -510,7 +526,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
raw_ostream &O) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
switch (Mode) {
default:
return true; // Unknown mode.
@@ -531,14 +547,13 @@ bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
// printing.
bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
const TargetRegisterClass *RC,
- bool isVector, raw_ostream &O) {
+ unsigned AltName, raw_ostream &O) {
assert(MO.isReg() && "Should only get here with a register!");
const TargetRegisterInfo *RI = STI->getRegisterInfo();
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
assert(RI->regsOverlap(RegToPrint, Reg));
- O << AArch64InstPrinter::getRegisterName(
- RegToPrint, isVector ? AArch64::vreg : AArch64::NoRegAltName);
+ O << AArch64InstPrinter::getRegisterName(RegToPrint, AltName);
return false;
}
@@ -574,6 +589,7 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
case 's': // Print S register.
case 'd': // Print D register.
case 'q': // Print Q register.
+ case 'z': // Print Z register.
if (MO.isReg()) {
const TargetRegisterClass *RC;
switch (ExtraCode[0]) {
@@ -592,10 +608,13 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
case 'q':
RC = &AArch64::FPR128RegClass;
break;
+ case 'z':
+ RC = &AArch64::ZPRRegClass;
+ break;
default:
return true;
}
- return printAsmRegInClass(MO, RC, false /* vector */, O);
+ return printAsmRegInClass(MO, RC, AArch64::NoRegAltName, O);
}
printOperand(MI, OpNum, O);
return false;
@@ -605,16 +624,26 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// According to ARM, we should emit x and v registers unless we have a
// modifier.
if (MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// If this is a w or x register, print an x register.
if (AArch64::GPR32allRegClass.contains(Reg) ||
AArch64::GPR64allRegClass.contains(Reg))
return printAsmMRegister(MO, 'x', O);
+ unsigned AltName = AArch64::NoRegAltName;
+ const TargetRegisterClass *RegClass;
+ if (AArch64::ZPRRegClass.contains(Reg)) {
+ RegClass = &AArch64::ZPRRegClass;
+ } else if (AArch64::PPRRegClass.contains(Reg)) {
+ RegClass = &AArch64::PPRRegClass;
+ } else {
+ RegClass = &AArch64::FPR128RegClass;
+ AltName = AArch64::vreg;
+ }
+
// If this is a b, h, s, d, or q register, print it as a v register.
- return printAsmRegInClass(MO, &AArch64::FPR128RegClass, true /* vector */,
- O);
+ return printAsmRegInClass(MO, RegClass, AltName, O);
}
printOperand(MI, OpNum, O);
@@ -682,7 +711,7 @@ void AArch64AsmPrinter::EmitJumpTableInfo() {
if (JTBBs.empty()) continue;
unsigned Size = AFI->getJumpTableEntrySize(JTI);
- EmitAlignment(Log2_32(Size));
+ EmitAlignment(Align(Size));
OutStreamer->EmitLabel(GetJTISymbol(JTI));
for (auto *JTBB : JTBBs)
@@ -725,12 +754,12 @@ void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
/// add xDest, xDest, xScratch, lsl #2
void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
const llvm::MachineInstr &MI) {
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned ScratchReg = MI.getOperand(1).getReg();
- unsigned ScratchRegW =
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register ScratchRegW =
STI->getRegisterInfo()->getSubReg(ScratchReg, AArch64::sub_32);
- unsigned TableReg = MI.getOperand(2).getReg();
- unsigned EntryReg = MI.getOperand(3).getReg();
+ Register TableReg = MI.getOperand(2).getReg();
+ Register EntryReg = MI.getOperand(3).getReg();
int JTIdx = MI.getOperand(4).getIndex();
bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8;
@@ -800,7 +829,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
if (CallTarget) {
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
"High 16 bits of call target should be zero.");
- unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
+ Register ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
EncodedBytes = 16;
// Materialize the jump address:
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVZXi)
@@ -830,7 +859,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
}
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
// Convert H/S/D register to corresponding Q register
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
@@ -894,32 +923,32 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
default:
break;
case AArch64::MOVMCSym: {
- unsigned DestReg = MI->getOperand(0).getReg();
- const MachineOperand &MO_Sym = MI->getOperand(1);
- MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym);
- MCOperand Hi_MCSym, Lo_MCSym;
-
- Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S);
- Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC);
-
- MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym);
- MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym);
-
- MCInst MovZ;
- MovZ.setOpcode(AArch64::MOVZXi);
- MovZ.addOperand(MCOperand::createReg(DestReg));
- MovZ.addOperand(Hi_MCSym);
- MovZ.addOperand(MCOperand::createImm(16));
- EmitToStreamer(*OutStreamer, MovZ);
-
- MCInst MovK;
- MovK.setOpcode(AArch64::MOVKXi);
- MovK.addOperand(MCOperand::createReg(DestReg));
- MovK.addOperand(MCOperand::createReg(DestReg));
- MovK.addOperand(Lo_MCSym);
- MovK.addOperand(MCOperand::createImm(0));
- EmitToStreamer(*OutStreamer, MovK);
- return;
+ Register DestReg = MI->getOperand(0).getReg();
+ const MachineOperand &MO_Sym = MI->getOperand(1);
+ MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym);
+ MCOperand Hi_MCSym, Lo_MCSym;
+
+ Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S);
+ Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC);
+
+ MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym);
+ MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym);
+
+ MCInst MovZ;
+ MovZ.setOpcode(AArch64::MOVZXi);
+ MovZ.addOperand(MCOperand::createReg(DestReg));
+ MovZ.addOperand(Hi_MCSym);
+ MovZ.addOperand(MCOperand::createImm(16));
+ EmitToStreamer(*OutStreamer, MovZ);
+
+ MCInst MovK;
+ MovK.setOpcode(AArch64::MOVKXi);
+ MovK.addOperand(MCOperand::createReg(DestReg));
+ MovK.addOperand(MCOperand::createReg(DestReg));
+ MovK.addOperand(Lo_MCSym);
+ MovK.addOperand(MCOperand::createImm(0));
+ EmitToStreamer(*OutStreamer, MovK);
+ return;
}
case AArch64::MOVIv2d_ns:
// If the target has <rdar://problem/16473581>, lower this
@@ -1084,6 +1113,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
case AArch64::HWASAN_CHECK_MEMACCESS:
+ case AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES:
LowerHWASAN_CHECK_MEMACCESS(*MI);
return;
@@ -1193,4 +1223,6 @@ extern "C" void LLVMInitializeAArch64AsmPrinter() {
RegisterAsmPrinter<AArch64AsmPrinter> X(getTheAArch64leTarget());
RegisterAsmPrinter<AArch64AsmPrinter> Y(getTheAArch64beTarget());
RegisterAsmPrinter<AArch64AsmPrinter> Z(getTheARM64Target());
+ RegisterAsmPrinter<AArch64AsmPrinter> W(getTheARM64_32Target());
+ RegisterAsmPrinter<AArch64AsmPrinter> V(getTheAArch64_32Target());
}
diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp
index 59757769c89a..ed93d02aa615 100644
--- a/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -99,7 +99,7 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
/// (it's an implicit-def of the BL).
virtual void markPhysRegUsed(unsigned PhysReg) = 0;
- bool isArgumentHandler() const override { return true; }
+ bool isIncomingArgumentHandler() const override { return true; }
uint64_t StackUsed;
};
@@ -110,6 +110,7 @@ struct FormalArgHandler : public IncomingArgHandler {
: IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
void markPhysRegUsed(unsigned PhysReg) override {
+ MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
};
@@ -129,14 +130,29 @@ struct CallReturnHandler : public IncomingArgHandler {
struct OutgoingArgHandler : public CallLowering::ValueHandler {
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn,
- CCAssignFn *AssignFnVarArg)
+ CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
+ int FPDiff = 0)
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
- AssignFnVarArg(AssignFnVarArg), StackSize(0) {}
+ AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
+ StackSize(0) {}
+
+ bool isIncomingArgumentHandler() const override { return false; }
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
+ MachineFunction &MF = MIRBuilder.getMF();
LLT p0 = LLT::pointer(0, 64);
LLT s64 = LLT::scalar(64);
+
+ if (IsTailCall) {
+ Offset += FPDiff;
+ int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
+ Register FIReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildFrameIndex(FIReg, FI);
+ MPO = MachinePointerInfo::getFixedStack(MF, FI);
+ return FIReg;
+ }
+
Register SPReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildCopy(SPReg, Register(AArch64::SP));
@@ -146,7 +162,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
Register AddrReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
- MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+ MPO = MachinePointerInfo::getStack(MF, Offset);
return AddrReg;
}
@@ -173,12 +189,13 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
const CallLowering::ArgInfo &Info,
+ ISD::ArgFlagsTy Flags,
CCState &State) override {
bool Res;
if (Info.IsFixed)
- Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+ Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
else
- Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+ Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
StackSize = State.getNextStackOffset();
return Res;
@@ -186,10 +203,19 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
MachineInstrBuilder MIB;
CCAssignFn *AssignFnVarArg;
+ bool IsTailCall;
+
+ /// For tail calls, the byte offset of the call's argument area from the
+ /// callee's. Unused elsewhere.
+ int FPDiff;
uint64_t StackSize;
};
} // namespace
+static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
+ return CallConv == CallingConv::Fast && TailCallOpt;
+}
+
void AArch64CallLowering::splitToValueTypes(
const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
@@ -207,7 +233,7 @@ void AArch64CallLowering::splitToValueTypes(
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
- OrigArg.Flags, OrigArg.IsFixed);
+ OrigArg.Flags[0], OrigArg.IsFixed);
return;
}
@@ -218,13 +244,13 @@ void AArch64CallLowering::splitToValueTypes(
OrigArg.Ty, CallConv, false);
for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
- SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags,
+ SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
OrigArg.IsFixed);
if (NeedsRegBlock)
- SplitArgs.back().Flags.setInConsecutiveRegs();
+ SplitArgs.back().Flags[0].setInConsecutiveRegs();
}
- SplitArgs.back().Flags.setInConsecutiveRegsLast();
+ SplitArgs.back().Flags[0].setInConsecutiveRegsLast();
}
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
@@ -344,6 +370,49 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
return Success;
}
+/// Helper function to compute forwarded registers for musttail calls. Computes
+/// the forwarded registers, sets MBB liveness, and emits COPY instructions that
+/// can be used to save + restore registers later.
+static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
+ CCAssignFn *AssignFn) {
+ MachineBasicBlock &MBB = MIRBuilder.getMBB();
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ if (!MFI.hasMustTailInVarArgFunc())
+ return;
+
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ const Function &F = MF.getFunction();
+ assert(F.isVarArg() && "Expected F to be vararg?");
+
+ // Compute the set of forwarded registers. The rest are scratch.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
+ F.getContext());
+ SmallVector<MVT, 2> RegParmTypes;
+ RegParmTypes.push_back(MVT::i64);
+ RegParmTypes.push_back(MVT::f128);
+
+ // Later on, we can use this vector to restore the registers if necessary.
+ SmallVectorImpl<ForwardedRegister> &Forwards =
+ FuncInfo->getForwardedMustTailRegParms();
+ CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
+
+ // Conservatively forward X8, since it might be used for an aggregate
+ // return.
+ if (!CCInfo.isAllocated(AArch64::X8)) {
+ unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+ Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
+ }
+
+ // Add the forwards to the MachineBasicBlock and MachineFunction.
+ for (const auto &F : Forwards) {
+ MBB.addLiveIn(F.PReg);
+ MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
+ }
+}
+
bool AArch64CallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
@@ -376,64 +445,530 @@ bool AArch64CallLowering::lowerFormalArguments(
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ uint64_t StackOffset = Handler.StackUsed;
if (F.isVarArg()) {
- if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
- // FIXME: we need to reimplement saveVarArgsRegisters from
+ auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ if (!Subtarget.isTargetDarwin()) {
+ // FIXME: we need to reimplement saveVarArgsRegisters from
// AArch64ISelLowering.
return false;
}
- // We currently pass all varargs at 8-byte alignment.
- uint64_t StackOffset = alignTo(Handler.StackUsed, 8);
+ // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
+ StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);
auto &MFI = MIRBuilder.getMF().getFrameInfo();
- AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
}
+ if (doesCalleeRestoreStack(F.getCallingConv(),
+ MF.getTarget().Options.GuaranteedTailCallOpt)) {
+ // We have a non-standard ABI, so why not make full use of the stack that
+ // we're going to pop? It must be aligned to 16 B in any case.
+ StackOffset = alignTo(StackOffset, 16);
+
+ // If we're expected to restore the stack (e.g. fastcc), then we'll be
+ // adding a multiple of 16.
+ FuncInfo->setArgumentStackToRestore(StackOffset);
+
+ // Our own callers will guarantee that the space is free by giving an
+ // aligned value to CALLSEQ_START.
+ }
+
+ // When we tail call, we need to check if the callee's arguments
+ // will fit on the caller's stack. So, whenever we lower formal arguments,
+ // we should keep track of this information, since we might lower a tail call
+ // in this function later.
+ FuncInfo->setBytesInStackArgArea(StackOffset);
+
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (Subtarget.hasCustomCallingConv())
Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
+ handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
+
// Move back to the end of the basic block.
MIRBuilder.setMBB(MBB);
return true;
}
+/// Return true if the calling convention is one that we can guarantee TCO for.
+static bool canGuaranteeTCO(CallingConv::ID CC) {
+ return CC == CallingConv::Fast;
+}
+
+/// Return true if we might ever do TCO for calls with this calling convention.
+static bool mayTailCallThisCC(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::PreserveMost:
+ case CallingConv::Swift:
+ return true;
+ default:
+ return canGuaranteeTCO(CC);
+ }
+}
+
+/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
+/// CC.
+static std::pair<CCAssignFn *, CCAssignFn *>
+getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
+ return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
+}
+
+bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
+ CallLoweringInfo &Info, MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &InArgs) const {
+ const Function &CallerF = MF.getFunction();
+ CallingConv::ID CalleeCC = Info.CallConv;
+ CallingConv::ID CallerCC = CallerF.getCallingConv();
+
+ // If the calling conventions match, then everything must be the same.
+ if (CalleeCC == CallerCC)
+ return true;
+
+ // Check if the caller and callee will handle arguments in the same way.
+ const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+ CCAssignFn *CalleeAssignFnFixed;
+ CCAssignFn *CalleeAssignFnVarArg;
+ std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
+ getAssignFnsForCC(CalleeCC, TLI);
+
+ CCAssignFn *CallerAssignFnFixed;
+ CCAssignFn *CallerAssignFnVarArg;
+ std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
+ getAssignFnsForCC(CallerCC, TLI);
+
+ if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed,
+ *CalleeAssignFnVarArg, *CallerAssignFnFixed,
+ *CallerAssignFnVarArg))
+ return false;
+
+ // Make sure that the caller and callee preserve all of the same registers.
+ auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
+ const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
+ const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
+ if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
+ TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
+ TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
+ }
+
+ return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
+}
+
+bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
+ CallLoweringInfo &Info, MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &OutArgs) const {
+ // If there are no outgoing arguments, then we are done.
+ if (OutArgs.empty())
+ return true;
+
+ const Function &CallerF = MF.getFunction();
+ CallingConv::ID CalleeCC = Info.CallConv;
+ CallingConv::ID CallerCC = CallerF.getCallingConv();
+ const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+
+ CCAssignFn *AssignFnFixed;
+ CCAssignFn *AssignFnVarArg;
+ std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
+
+ // We have outgoing arguments. Make sure that we can tail call with them.
+ SmallVector<CCValAssign, 16> OutLocs;
+ CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
+
+ if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) {
+ LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
+ return false;
+ }
+
+ // Make sure that they can fit on the caller's stack.
+ const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
+ LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
+ return false;
+ }
+
+ // Verify that the parameters in callee-saved registers match.
+ // TODO: Port this over to CallLowering as general code once swiftself is
+ // supported.
+ auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
+ const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (unsigned i = 0; i < OutLocs.size(); ++i) {
+ auto &ArgLoc = OutLocs[i];
+ // If it's not a register, it's fine.
+ if (!ArgLoc.isRegLoc()) {
+ if (Info.IsVarArg) {
+ // Be conservative and disallow variadic memory operands to match SDAG's
+ // behaviour.
+ // FIXME: If the caller's calling convention is C, then we can
+ // potentially use its argument area. However, for cases like fastcc,
+ // we can't do anything.
+ LLVM_DEBUG(
+ dbgs()
+ << "... Cannot tail call vararg function with stack arguments\n");
+ return false;
+ }
+ continue;
+ }
+
+ Register Reg = ArgLoc.getLocReg();
+
+ // Only look at callee-saved registers.
+ if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
+ continue;
+
+ LLVM_DEBUG(
+ dbgs()
+ << "... Call has an argument passed in a callee-saved register.\n");
+
+ // Check if it was copied from.
+ ArgInfo &OutInfo = OutArgs[i];
+
+ if (OutInfo.Regs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs() << "... Cannot handle arguments in multiple registers.\n");
+ return false;
+ }
+
+ // Check if we copy the register, walking through copies from virtual
+ // registers. Note that getDefIgnoringCopies does not ignore copies from
+ // physical registers.
+ MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
+ if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
+ LLVM_DEBUG(
+ dbgs()
+ << "... Parameter was not copied into a VReg, cannot tail call.\n");
+ return false;
+ }
+
+ // Got a copy. Verify that it's the same as the register we want.
+ Register CopyRHS = RegDef->getOperand(1).getReg();
+ if (CopyRHS != Reg) {
+ LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
+ "VReg, cannot tail call.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool AArch64CallLowering::isEligibleForTailCallOptimization(
+ MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
+ SmallVectorImpl<ArgInfo> &InArgs,
+ SmallVectorImpl<ArgInfo> &OutArgs) const {
+
+ // Must pass all target-independent checks in order to tail call optimize.
+ if (!Info.IsTailCall)
+ return false;
+
+ CallingConv::ID CalleeCC = Info.CallConv;
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &CallerF = MF.getFunction();
+
+ LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
+
+ if (Info.SwiftErrorVReg) {
+ // TODO: We should handle this.
+ // Note that this is also handled by the check for no outgoing arguments.
+ // Proactively disabling this though, because the swifterror handling in
+ // lowerCall inserts a COPY *after* the location of the call.
+ LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
+ return false;
+ }
+
+ if (!mayTailCallThisCC(CalleeCC)) {
+ LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
+ return false;
+ }
+
+ // Byval parameters hand the function a pointer directly into the stack area
+ // we want to reuse during a tail call. Working around this *is* possible (see
+ // X86).
+ //
+ // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
+ // it?
+ //
+ // On Windows, "inreg" attributes signify non-aggregate indirect returns.
+ // In this case, it is necessary to save/restore X0 in the callee. Tail
+ // call opt interferes with this. So we disable tail call opt when the
+ // caller has an argument with "inreg" attribute.
+ //
+ // FIXME: Check whether the callee also has an "inreg" argument.
+ //
+ // When the caller has a swifterror argument, we don't want to tail call
+ // because would have to move into the swifterror register before the
+ // tail call.
+ if (any_of(CallerF.args(), [](const Argument &A) {
+ return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
+ })) {
+ LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
+ "inreg, or swifterror arguments\n");
+ return false;
+ }
+
+ // Externally-defined functions with weak linkage should not be
+ // tail-called on AArch64 when the OS does not support dynamic
+ // pre-emption of symbols, as the AAELF spec requires normal calls
+ // to undefined weak functions to be replaced with a NOP or jump to the
+ // next instruction. The behaviour of branch instructions in this
+ // situation (as used for tail calls) is implementation-defined, so we
+ // cannot rely on the linker replacing the tail call with a return.
+ if (Info.Callee.isGlobal()) {
+ const GlobalValue *GV = Info.Callee.getGlobal();
+ const Triple &TT = MF.getTarget().getTargetTriple();
+ if (GV->hasExternalWeakLinkage() &&
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
+ TT.isOSBinFormatMachO())) {
+ LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
+ "with weak linkage for this OS.\n");
+ return false;
+ }
+ }
+
+ // If we have -tailcallopt, then we're done.
+ if (MF.getTarget().Options.GuaranteedTailCallOpt)
+ return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
+
+ // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
+ // Try to find cases where we can do that.
+
+ // I want anyone implementing a new calling convention to think long and hard
+ // about this assert.
+ assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
+ "Unexpected variadic calling convention");
+
+ // Verify that the incoming and outgoing arguments from the callee are
+ // safe to tail call.
+ if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "... Caller and callee have incompatible calling conventions.\n");
+ return false;
+ }
+
+ if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
+ return false;
+
+ LLVM_DEBUG(
+ dbgs() << "... Call is eligible for tail call optimization.\n");
+ return true;
+}
+
+static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect,
+ bool IsTailCall) {
+ if (!IsTailCall)
+ return IsIndirect ? AArch64::BLR : AArch64::BL;
+
+ if (!IsIndirect)
+ return AArch64::TCRETURNdi;
+
+ // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
+ // x16 or x17.
+ if (CallerF.hasFnAttribute("branch-target-enforcement"))
+ return AArch64::TCRETURNriBTI;
+
+ return AArch64::TCRETURNri;
+}
+
+bool AArch64CallLowering::lowerTailCall(
+ MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
+ SmallVectorImpl<ArgInfo> &OutArgs) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+
+ // True when we're tail calling, but without -tailcallopt.
+ bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
+
+ // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
+ // register class. Until we can do that, we should fall back here.
+ if (F.hasFnAttribute("branch-target-enforcement")) {
+ LLVM_DEBUG(
+ dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
+ return false;
+ }
+
+ // Find out which ABI gets to decide where things go.
+ CallingConv::ID CalleeCC = Info.CallConv;
+ CCAssignFn *AssignFnFixed;
+ CCAssignFn *AssignFnVarArg;
+ std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
+
+ MachineInstrBuilder CallSeqStart;
+ if (!IsSibCall)
+ CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
+
+ unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true);
+ auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
+ MIB.add(Info.Callee);
+
+ // Byte offset for the tail call. When we are sibcalling, this will always
+ // be 0.
+ MIB.addImm(0);
+
+ // Tell the call which registers are clobbered.
+ auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
+ if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
+ TRI->UpdateCustomCallPreservedMask(MF, &Mask);
+ MIB.addRegMask(Mask);
+
+ if (TRI->isAnyArgRegReserved(MF))
+ TRI->emitReservedArgRegCallError(MF);
+
+ // FPDiff is the byte offset of the call's argument area from the callee's.
+ // Stores to callee stack arguments will be placed in FixedStackSlots offset
+ // by this amount for a tail call. In a sibling call it must be 0 because the
+ // caller will deallocate the entire stack and the callee still expects its
+ // arguments to begin at SP+0.
+ int FPDiff = 0;
+
+ // This will be 0 for sibcalls, potentially nonzero for tail calls produced
+ // by -tailcallopt. For sibcalls, the memory operands for the call are
+ // already available in the caller's incoming argument space.
+ unsigned NumBytes = 0;
+ if (!IsSibCall) {
+ // We aren't sibcalling, so we need to compute FPDiff. We need to do this
+ // before handling assignments, because FPDiff must be known for memory
+ // arguments.
+ unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
+ SmallVector<CCValAssign, 16> OutLocs;
+ CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
+ analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg);
+
+ // The callee will pop the argument stack as a tail call. Thus, we must
+ // keep it 16-byte aligned.
+ NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
+
+ // FPDiff will be negative if this tail call requires more space than we
+ // would automatically have in our incoming argument space. Positive if we
+ // actually shrink the stack.
+ FPDiff = NumReusableBytes - NumBytes;
+
+ // The stack pointer must be 16-byte aligned at all times it's used for a
+ // memory operation, which in practice means at *all* times and in
+ // particular across call boundaries. Therefore our own arguments started at
+ // a 16-byte aligned SP and the delta applied for the tail call should
+ // satisfy the same constraint.
+ assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
+ }
+
+ const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
+
+ // Do the actual argument marshalling.
+ SmallVector<unsigned, 8> PhysRegs;
+ OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
+ AssignFnVarArg, true, FPDiff);
+ if (!handleAssignments(MIRBuilder, OutArgs, Handler))
+ return false;
+
+ if (Info.IsVarArg && Info.IsMustTailCall) {
+ // Now we know what's being passed to the function. Add uses to the call for
+ // the forwarded registers that we *aren't* passing as parameters. This will
+ // preserve the copies we build earlier.
+ for (const auto &F : Forwards) {
+ Register ForwardedReg = F.PReg;
+ // If the register is already passed, or aliases a register which is
+ // already being passed, then skip it.
+ if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
+ if (!Use.isReg())
+ return false;
+ return TRI->regsOverlap(Use.getReg(), ForwardedReg);
+ }))
+ continue;
+
+ // We aren't passing it already, so we should add it to the call.
+ MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
+ MIB.addReg(ForwardedReg, RegState::Implicit);
+ }
+ }
+
+ // If we have -tailcallopt, we need to adjust the stack. We'll do the call
+ // sequence start and end here.
+ if (!IsSibCall) {
+ MIB->getOperand(1).setImm(FPDiff);
+ CallSeqStart.addImm(NumBytes).addImm(0);
+ // End the call sequence *before* emitting the call. Normally, we would
+ // tidy the frame up after the call. However, here, we've laid out the
+ // parameters so that when SP is reset, they will be in the correct
+ // location.
+ MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0);
+ }
+
+ // Now we can add the actual call instruction to the correct basic block.
+ MIRBuilder.insertInstr(MIB);
+
+ // If Callee is a reg, since it is used by a target specific instruction,
+ // it must have a register class matching the constraint of that instruction.
+ if (Info.Callee.isReg())
+ MIB->getOperand(0).setReg(constrainOperandRegClass(
+ MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
+ *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
+ 0));
+
+ MF.getFrameInfo().setHasTailCall();
+ Info.LoweredTailCall = true;
+ return true;
+}
+
bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
- CallingConv::ID CallConv,
- const MachineOperand &Callee,
- const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs,
- Register SwiftErrorVReg) const {
+ CallLoweringInfo &Info) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
auto &DL = F.getParent()->getDataLayout();
+ const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
- SmallVector<ArgInfo, 8> SplitArgs;
- for (auto &OrigArg : OrigArgs) {
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv);
+ SmallVector<ArgInfo, 8> OutArgs;
+ for (auto &OrigArg : Info.OrigArgs) {
+ splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
// AAPCS requires that we zero-extend i1 to 8 bits by the caller.
if (OrigArg.Ty->isIntegerTy(1))
- SplitArgs.back().Flags.setZExt();
+ OutArgs.back().Flags[0].setZExt();
+ }
+
+ SmallVector<ArgInfo, 8> InArgs;
+ if (!Info.OrigRet.Ty->isVoidTy())
+ splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv());
+
+ // If we can lower as a tail call, do that instead.
+ bool CanTailCallOpt =
+ isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
+
+ // We must emit a tail call if we have musttail.
+ if (Info.IsMustTailCall && !CanTailCallOpt) {
+ // There are types of incoming/outgoing arguments we can't handle yet, so
+ // it doesn't make sense to actually die here like in ISelLowering. Instead,
+ // fall back to SelectionDAG and let it try to handle this.
+ LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
+ return false;
}
+ if (CanTailCallOpt)
+ return lowerTailCall(MIRBuilder, Info, OutArgs);
+
// Find out which ABI gets to decide where things go.
- const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
- CCAssignFn *AssignFnFixed =
- TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
- CCAssignFn *AssignFnVarArg =
- TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/true);
+ CCAssignFn *AssignFnFixed;
+ CCAssignFn *AssignFnVarArg;
+ std::tie(AssignFnFixed, AssignFnVarArg) =
+ getAssignFnsForCC(Info.CallConv, TLI);
- auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
+ MachineInstrBuilder CallSeqStart;
+ CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
- auto MIB = MIRBuilder.buildInstrNoInsert(Callee.isReg() ? AArch64::BLR
- : AArch64::BL);
- MIB.add(Callee);
+ unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), false);
+
+ auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
+ MIB.add(Info.Callee);
// Tell the call which registers are clobbered.
auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
@@ -448,8 +983,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
- AssignFnVarArg);
- if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
+ AssignFnVarArg, false);
+ if (!handleAssignments(MIRBuilder, OutArgs, Handler))
return false;
// Now we can add the actual call instruction to the correct basic block.
@@ -458,34 +993,37 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// If Callee is a reg, since it is used by a target specific
// instruction, it must have a register class matching the
// constraint of that instruction.
- if (Callee.isReg())
+ if (Info.Callee.isReg())
MIB->getOperand(0).setReg(constrainOperandRegClass(
MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
- *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0));
+ *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
+ 0));
// Finally we can copy the returned value back into its virtual-register. In
// symmetry with the arugments, the physical register must be an
// implicit-define of the call instruction.
- CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
- if (!OrigRet.Ty->isVoidTy()) {
- SplitArgs.clear();
-
- splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv());
-
+ if (!Info.OrigRet.Ty->isVoidTy()) {
+ CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
- if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
+ if (!handleAssignments(MIRBuilder, InArgs, Handler))
return false;
}
- if (SwiftErrorVReg) {
+ if (Info.SwiftErrorVReg) {
MIB.addDef(AArch64::X21, RegState::Implicit);
- MIRBuilder.buildCopy(SwiftErrorVReg, Register(AArch64::X21));
+ MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
}
+ uint64_t CalleePopBytes =
+ doesCalleeRestoreStack(Info.CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt)
+ ? alignTo(Handler.StackSize, 16)
+ : 0;
+
CallSeqStart.addImm(Handler.StackSize).addImm(0);
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
.addImm(Handler.StackSize)
- .addImm(0);
+ .addImm(CalleePopBytes);
return true;
}
diff --git a/lib/Target/AArch64/AArch64CallLowering.h b/lib/Target/AArch64/AArch64CallLowering.h
index 4f428f254537..b0c601c7062c 100644
--- a/lib/Target/AArch64/AArch64CallLowering.h
+++ b/lib/Target/AArch64/AArch64CallLowering.h
@@ -40,16 +40,15 @@ public:
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const override;
- bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
- const MachineOperand &Callee, const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs,
- Register SwiftErrorVReg) const override;
+ bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const override;
- bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
- const MachineOperand &Callee, const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const override {
- return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs, 0);
- }
+ /// Returns true if the call can be lowered as a tail call.
+ bool
+ isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info,
+ SmallVectorImpl<ArgInfo> &InArgs,
+ SmallVectorImpl<ArgInfo> &OutArgs) const;
bool supportSwiftError() const override { return true; }
@@ -64,6 +63,18 @@ private:
SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI,
CallingConv::ID CallConv) const;
+
+ bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
+ SmallVectorImpl<ArgInfo> &OutArgs) const;
+
+ bool
+ doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info,
+ MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &InArgs) const;
+
+ bool
+ areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &OutArgs) const;
};
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64CallingConvention.cpp b/lib/Target/AArch64/AArch64CallingConvention.cpp
index 02538a187611..a0695cef615f 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.cpp
+++ b/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -40,12 +40,14 @@ static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
CCState &State, unsigned SlotAlign) {
unsigned Size = LocVT.getSizeInBits() / 8;
- unsigned StackAlign =
+ const Align StackAlign =
State.getMachineFunction().getDataLayout().getStackAlignment();
- unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
+ const Align OrigAlign(ArgFlags.getOrigAlign());
+ const Align Align = std::min(OrigAlign, StackAlign);
for (auto &It : PendingMembers) {
- It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
+ It.convertToMem(State.AllocateStack(
+ Size, std::max((unsigned)Align.value(), SlotAlign)));
State.addLoc(It);
SlotAlign = 1;
}
@@ -79,10 +81,14 @@ static bool CC_AArch64_Custom_Stack_Block(
static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
+ State.getMachineFunction().getSubtarget());
+ bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();
+
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
ArrayRef<MCPhysReg> RegList;
- if (LocVT.SimpleTy == MVT::i64)
+ if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
RegList = XRegList;
else if (LocVT.SimpleTy == MVT::f16)
RegList = HRegList;
@@ -107,8 +113,12 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
if (!ArgFlags.isInConsecutiveRegsLast())
return true;
- unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
- if (RegResult) {
+ // [N x i32] arguments get packed into x-registers on Darwin's arm64_32
+ // because that's how the armv7k Clang front-end emits small structs.
+ unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;
+ unsigned RegResult = State.AllocateRegBlock(
+ RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg);
+ if (RegResult && EltsPerReg == 1) {
for (auto &It : PendingMembers) {
It.convertToReg(RegResult);
State.addLoc(It);
@@ -116,14 +126,26 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
}
PendingMembers.clear();
return true;
+ } else if (RegResult) {
+ assert(EltsPerReg == 2 && "unexpected ABI");
+ bool UseHigh = false;
+ CCValAssign::LocInfo Info;
+ for (auto &It : PendingMembers) {
+ Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt;
+ State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult,
+ MVT::i64, Info));
+ UseHigh = !UseHigh;
+ if (!UseHigh)
+ ++RegResult;
+ }
+ PendingMembers.clear();
+ return true;
}
// Mark all regs in the class as unavailable
for (auto Reg : RegList)
State.AllocateReg(Reg);
- const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
- State.getMachineFunction().getSubtarget());
unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h
index 13cc0c583fd2..5a55d090d7c8 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/lib/Target/AArch64/AArch64CallingConvention.h
@@ -25,6 +25,9 @@ bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index d969a9e1ab3a..bccbbd4591ed 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -17,6 +17,10 @@ class CCIfAlign<string Align, CCAction A> :
class CCIfBigEndian<CCAction A> :
CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
+class CCIfILP32<CCAction A> :
+ CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>;
+
+
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
@@ -70,6 +74,18 @@ def CC_AArch64_AAPCS : CallingConv<[
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+ CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64],
+ CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
+ CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64],
+ CCPassIndirect<i64>>,
+
+ CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCAssignToReg<[P0, P1, P2, P3]>>,
+ CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCPassIndirect<i64>>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -111,6 +127,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
// Big endian vectors must be passed as if they were 1-element vectors so that
@@ -135,7 +152,14 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
- CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
+ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+ CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64],
+ CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
+
+ CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCAssignToReg<[P0, P1, P2, P3]>>
]>;
// Vararg functions on windows pass floats in integer registers
@@ -202,6 +226,12 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Re-demote pointers to 32-bits so we don't end up storing 64-bit
+ // values and clobbering neighbouring stack locations. Not very pretty.
+ CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+ CCIfPtr<CCIfILP32<CCAssignToStack<4, 4>>>,
+
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
CCAssignToStack<8, 8>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
@@ -229,6 +259,29 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
CCAssignToStack<16, 16>>
]>;
+// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the
+// same as the normal Darwin VarArgs handling.
+let Entry = 1 in
+def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
+ CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+ CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+
+ // Handle all scalar types as either i32 or f32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[f16], CCPromoteToType<f32>>,
+
+ // Everything is on the stack.
+ // i128 is split to two i64s, and its stack alignment is 16 bytes.
+ CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+ CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+ CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCAssignToStack<8, 8>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToStack<16, 16>>
+]>;
+
+
// The WebKit_JS calling convention only passes the first argument (the callee)
// in register and the remaining arguments on stack. We allow 32bit stack slots,
// so that WebKit can write partial values in the stack and define the other
@@ -298,6 +351,12 @@ def CC_AArch64_GHC : CallingConv<[
CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
]>;
+// The order of the callee-saves in this file is important, because the
+// FrameLowering code will use this order to determine the layout the
+// callee-save area in the stack frame. As can be observed below, Darwin
+// requires the frame-record (LR, FP) to be at the top the callee-save area,
+// whereas for other platforms they are at the bottom.
+
// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
// presumably a callee to someone. External functions may not do so, but this
// is currently safe since BL has LR as an implicit-def and what happens after a
@@ -306,7 +365,13 @@ def CC_AArch64_GHC : CallingConv<[
// It would be better to model its preservation semantics properly (create a
// vreg on entry, use it in RET & tail call generation; make that vreg def if we
// end up saving LR as part of a call frame). Watch this space...
-def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP,
+ D8, D9, D10, D11,
+ D12, D13, D14, D15)>;
+
+// Darwin puts the frame-record at the top of the callee-save area.
+def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
X23, X24, X25, X26, X27, X28,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
@@ -314,17 +379,24 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
// and not (LR,FP) pairs.
-def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22,
- X23, X24, X25, X26, X27, X28,
+def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, FP, LR,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
// AArch64 PCS for vector functions (VPCS)
// must (additionally) preserve full Q8-Q23 registers
-def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
- X23, X24, X25, X26, X27, X28,
+def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP,
(sequence "Q%u", 8, 23))>;
+// Functions taking SVE arguments or returning an SVE type
+// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
+def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP,
+ (sequence "Z%u", 8, 23),
+ (sequence "P%u", 4, 15))>;
+
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
// 'this' and the pointer return value are both passed in X0 in these cases,
// this can be partially modelled by treating X0 as a callee-saved register;
@@ -336,7 +408,7 @@ def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
def CSR_AArch64_AAPCS_SwiftError
- : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
+ : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>;
// The function used by Darwin to obtain the address of a thread-local variable
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
@@ -352,7 +424,7 @@ def CSR_AArch64_TLS_Darwin
// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
def CSR_AArch64_CXX_TLS_Darwin
- : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
+ : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS,
(sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
(sequence "D%u", 0, 31))>;
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 9f324b433209..35e6fef24363 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -103,6 +103,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -181,6 +182,7 @@ static bool canDefBePartOfLOH(const MachineInstr &MI) {
case AArch64::ADDXri:
return canAddBePartOfLOH(MI);
case AArch64::LDRXui:
+ case AArch64::LDRWui:
// Check immediate to see if the immediate is an address.
switch (MI.getOperand(2).getType()) {
default:
@@ -312,7 +314,8 @@ static void handleUse(const MachineInstr &MI, const MachineOperand &MO,
Info.Type = MCLOH_AdrpAdd;
Info.IsCandidate = true;
Info.MI0 = &MI;
- } else if (MI.getOpcode() == AArch64::LDRXui &&
+ } else if ((MI.getOpcode() == AArch64::LDRXui ||
+ MI.getOpcode() == AArch64::LDRWui) &&
MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {
Info.Type = MCLOH_AdrpLdrGot;
Info.IsCandidate = true;
@@ -357,7 +360,9 @@ static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
return true;
}
} else {
- assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui");
+ assert((MI.getOpcode() == AArch64::LDRXui ||
+ MI.getOpcode() == AArch64::LDRWui) &&
+ "Expect LDRXui or LDRWui");
assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&
"Expected GOT relocation");
if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
@@ -474,13 +479,23 @@ static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
handleClobber(LOHInfos[Idx]);
}
// Handle uses.
+
+ SmallSet<int, 4> UsesSeen;
for (const MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.readsReg())
continue;
int Idx = mapRegToGPRIndex(MO.getReg());
if (Idx < 0)
continue;
- handleUse(MI, MO, LOHInfos[Idx]);
+
+ // Multiple uses of the same register within a single instruction don't
+ // count as MultiUser or block optimization. This is especially important on
+ // arm64_32, where any memory operation is likely to be an explicit use of
+ // xN and an implicit use of wN (the base address register).
+ if (!UsesSeen.count(Idx)) {
+ handleUse(MI, MO, LOHInfos[Idx]);
+ UsesSeen.insert(Idx);
+ }
}
}
@@ -512,6 +527,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
switch (Opcode) {
case AArch64::ADDXri:
case AArch64::LDRXui:
+ case AArch64::LDRWui:
if (canDefBePartOfLOH(MI)) {
const MachineOperand &Def = MI.getOperand(0);
const MachineOperand &Op = MI.getOperand(1);
diff --git a/lib/Target/AArch64/AArch64Combine.td b/lib/Target/AArch64/AArch64Combine.td
new file mode 100644
index 000000000000..bb99f2516ecf
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Combine.td
@@ -0,0 +1,18 @@
+//=- AArch64.td - Define AArch64 Combine Rules ---------------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/GlobalISel/Combine.td"
+
+def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
+ "AArch64GenPreLegalizerCombinerHelper", [all_combines,
+ elide_br_by_inverting_cond]> {
+ let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
+}
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp
index 453132e09669..25e23e4623de 100644
--- a/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -78,7 +78,7 @@ void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const {
}
MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) {
- if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!Register::isVirtualRegister(MO.getReg()))
return nullptr;
return MRI->getUniqueVRegDef(MO.getReg());
}
@@ -98,7 +98,7 @@ MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
}
bool Is64Bit;
unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit);
- unsigned NewDestReg = MI.getOperand(0).getReg();
+ Register NewDestReg = MI.getOperand(0).getReg();
if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg()))
NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 2cfbcc592d6a..43ae9f8ec47f 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -220,7 +220,7 @@ bool SSACCmpConv::trivialTailPHIs() {
// PHI operands come in (VReg, MBB) pairs.
for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
- unsigned Reg = I.getOperand(oi).getReg();
+ Register Reg = I.getOperand(oi).getReg();
if (MBB == Head) {
assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
HeadReg = Reg;
@@ -259,7 +259,7 @@ bool SSACCmpConv::isDeadDef(unsigned DstReg) {
// Writes to the zero register are dead.
if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
return true;
- if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (!Register::isVirtualRegister(DstReg))
return false;
// A virtual register def without any uses will be marked dead later, and
// eventually replaced by the zero register.
@@ -631,7 +631,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
}
const MCInstrDesc &MCID = TII->get(Opc);
// Create a dummy virtual register for the SUBS def.
- unsigned DestReg =
+ Register DestReg =
MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
// Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
BuildMI(*Head, Head->end(), TermDL, MCID)
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index a43077cb88ec..bc3808df1dbc 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -145,8 +145,8 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
continue;
// We should not have any relevant physreg defs that are replacable by
// zero before register allocation. So we just check for dead vreg defs.
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg) ||
(!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
continue;
assert(!MO.isImplicit() && "Unexpected implicit def!");
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 210c10eb1842..082e17e44d04 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -109,7 +109,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned BitSize) {
MachineInstr &MI = *MBBI;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
uint64_t Imm = MI.getOperand(1).getImm();
if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
@@ -150,7 +150,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
} break;
case AArch64::MOVKWi:
case AArch64::MOVKXi: {
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
.addReg(DstReg,
@@ -174,14 +174,14 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
const MachineOperand &Dest = MI.getOperand(0);
- unsigned StatusReg = MI.getOperand(1).getReg();
+ Register StatusReg = MI.getOperand(1).getReg();
bool StatusDead = MI.getOperand(1).isDead();
// Duplicating undef operands into 2 instructions does not guarantee the same
// value on both; However undef should be replaced by xzr anyway.
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
- unsigned AddrReg = MI.getOperand(2).getReg();
- unsigned DesiredReg = MI.getOperand(3).getReg();
- unsigned NewReg = MI.getOperand(4).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register DesiredReg = MI.getOperand(3).getReg();
+ Register NewReg = MI.getOperand(4).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -254,16 +254,16 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
DebugLoc DL = MI.getDebugLoc();
MachineOperand &DestLo = MI.getOperand(0);
MachineOperand &DestHi = MI.getOperand(1);
- unsigned StatusReg = MI.getOperand(2).getReg();
+ Register StatusReg = MI.getOperand(2).getReg();
bool StatusDead = MI.getOperand(2).isDead();
// Duplicating undef operands into 2 instructions does not guarantee the same
// value on both; However undef should be replaced by xzr anyway.
assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
- unsigned AddrReg = MI.getOperand(3).getReg();
- unsigned DesiredLoReg = MI.getOperand(4).getReg();
- unsigned DesiredHiReg = MI.getOperand(5).getReg();
- unsigned NewLoReg = MI.getOperand(6).getReg();
- unsigned NewHiReg = MI.getOperand(7).getReg();
+ Register AddrReg = MI.getOperand(3).getReg();
+ Register DesiredLoReg = MI.getOperand(4).getReg();
+ Register DesiredHiReg = MI.getOperand(5).getReg();
+ Register NewLoReg = MI.getOperand(6).getReg();
+ Register NewHiReg = MI.getOperand(7).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -475,7 +475,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case AArch64::LOADgot: {
MachineFunction *MF = MBB.getParent();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
const MachineOperand &MO1 = MI.getOperand(1);
unsigned Flags = MO1.getTargetFlags();
@@ -495,12 +495,26 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
}
} else {
// Small codemodel expand into ADRP + LDR.
+ MachineFunction &MF = *MI.getParent()->getParent();
+ DebugLoc DL = MI.getDebugLoc();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
- MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
- .add(MI.getOperand(0))
- .addReg(DstReg);
+
+ MachineInstrBuilder MIB2;
+ if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
+ auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
+ unsigned DstFlags = MI.getOperand(0).getTargetFlags();
+ MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
+ .addDef(Reg32)
+ .addReg(DstReg, RegState::Kill)
+ .addReg(DstReg, DstFlags | RegState::Implicit);
+ } else {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
+ .add(MI.getOperand(0))
+ .addUse(DstReg, RegState::Kill);
+ }
if (MO1.isGlobal()) {
MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
@@ -534,11 +548,28 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case AArch64::MOVaddrTLS:
case AArch64::MOVaddrEXT: {
// Expand into ADRP + ADD.
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
.add(MI.getOperand(1));
+ if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
+ // MO_TAGGED on the page indicates a tagged address. Set the tag now.
+ // We do so by creating a MOVK that sets bits 48-63 of the register to
+ // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
+ // the small code model so we can assume a binary size of <= 4GB, which
+ // makes the untagged PC relative offset positive. The binary must also be
+ // loaded into address range [0, 2^48). Both of these properties need to
+ // be ensured at runtime when using tagged addresses.
+ auto Tag = MI.getOperand(1);
+ Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
+ Tag.setOffset(0x100000000);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
+ .addReg(DstReg)
+ .add(Tag)
+ .addImm(48);
+ }
+
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
.add(MI.getOperand(0))
@@ -561,7 +592,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return true;
case AArch64::MOVbaseTLS: {
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
auto SysReg = AArch64SysReg::TPIDR_EL0;
MachineFunction *MF = MBB.getParent();
if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
@@ -642,11 +673,12 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
// instruction sequence.
int BaseOffset = -AFI->getTaggedBasePointerOffset();
unsigned FrameReg;
- int FrameRegOffset = TFI->resolveFrameOffsetReference(
- MF, BaseOffset, false /*isFixed*/, FrameReg, /*PreferFP=*/false,
+ StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
+ MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
+ /*PreferFP=*/false,
/*ForSimm=*/true);
Register SrcReg = FrameReg;
- if (FrameRegOffset != 0) {
+ if (FrameRegOffset) {
// Use output register as temporary.
SrcReg = MI.getOperand(0).getReg();
emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
diff --git a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index 3b3182128c4c..b54fc2e51bac 100644
--- a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -642,7 +642,7 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
}
// Loads from the stack pointer don't get prefetched.
- unsigned BaseReg = MI.getOperand(BaseRegIdx).getReg();
+ Register BaseReg = MI.getOperand(BaseRegIdx).getReg();
if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP)
return None;
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 8dc2768b9597..277a3052f1e5 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -459,7 +459,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
return 0;
- unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+ unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
if (!DestEVT.isSimple())
@@ -474,12 +474,32 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
- ResultReg = createResultReg(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
+ unsigned LdrOpc;
+ if (Subtarget->isTargetILP32()) {
+ ResultReg = createResultReg(&AArch64::GPR32RegClass);
+ LdrOpc = AArch64::LDRWui;
+ } else {
+ ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ LdrOpc = AArch64::LDRXui;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
+ .addReg(ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC | OpFlags);
+ if (!Subtarget->isTargetILP32())
+ return ResultReg;
+
+ // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
+ // so we must extend the result on ILP32.
+ unsigned Result64 = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::SUBREG_TO_REG))
+ .addDef(Result64)
+ .addImm(0)
+ .addReg(ResultReg, RegState::Kill)
+ .addImm(AArch64::sub_32);
+ return Result64;
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
@@ -504,6 +524,15 @@ unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
if (!CEVT.isSimple())
return 0;
MVT VT = CEVT.getSimpleVT();
+ // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
+ // 'null' pointers need to have a somewhat special treatment.
+ if (const auto *CPN = dyn_cast<ConstantPointerNull>(C)) {
+ (void)CPN;
+ assert(CPN->getType()->getPointerAddressSpace() == 0 &&
+ "Unexpected address space");
+ assert(VT == MVT::i64 && "Expected 64-bit pointers");
+ return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
+ }
if (const auto *CI = dyn_cast<ConstantInt>(C))
return materializeInt(CI, VT);
@@ -946,6 +975,9 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(DL, Ty, true);
+ if (Subtarget->isTargetILP32() && Ty->isPointerTy())
+ return false;
+
// Only handle simple types.
if (evt == MVT::Other || !evt.isSimple())
return false;
@@ -988,6 +1020,9 @@ bool AArch64FastISel::isValueAvailable(const Value *V) const {
}
bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
+ if (Subtarget->isTargetILP32())
+ return false;
+
unsigned ScaleFactor = getImplicitScaleFactor(VT);
if (!ScaleFactor)
return false;
@@ -3165,6 +3200,11 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (IsTailCall)
return false;
+ // FIXME: we could and should support this, but for now correctness at -O0 is
+ // more important.
+ if (Subtarget->isTargetILP32())
+ return false;
+
CodeModel::Model CM = TM.getCodeModel();
// Only support the small-addressing and large code models.
if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
@@ -3434,8 +3474,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
MFI.setFrameAddressIsTaken(true);
const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
- unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
- unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
// Recursively load frame address
@@ -3796,6 +3836,11 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ // FIXME: in principle it could. Mostly just a case of zero extending outgoing
+ // pointers.
+ if (Subtarget->isTargetILP32())
+ return false;
+
if (F.isVarArg())
return false;
@@ -3842,7 +3887,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
return false;
unsigned SrcReg = Reg + VA.getValNo();
- unsigned DestReg = VA.getLocReg();
+ Register DestReg = VA.getLocReg();
// Avoid a cross-class copy. This is very unlikely.
if (!MRI.getRegClass(SrcReg)->contains(DestReg))
return false;
@@ -3970,7 +4015,7 @@ unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
if (DestVT == MVT::i64) {
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
- unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), Reg64)
.addImm(0)
@@ -4123,7 +4168,7 @@ unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
};
unsigned Opc = OpcTable[IsZExt][Is64Bit];
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
- unsigned TmpReg = MRI.createVirtualRegister(RC);
+ Register TmpReg = MRI.createVirtualRegister(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
@@ -4244,7 +4289,7 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
};
unsigned Opc = OpcTable[IsZExt][Is64Bit];
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
- unsigned TmpReg = MRI.createVirtualRegister(RC);
+ Register TmpReg = MRI.createVirtualRegister(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
@@ -4353,7 +4398,7 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
};
unsigned Opc = OpcTable[IsZExt][Is64Bit];
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
- unsigned TmpReg = MRI.createVirtualRegister(RC);
+ Register TmpReg = MRI.createVirtualRegister(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
@@ -4412,7 +4457,7 @@ unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
if (DestVT == MVT::i8 || DestVT == MVT::i16)
DestVT = MVT::i32;
else if (DestVT == MVT::i64) {
- unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), Src64)
.addImm(0)
@@ -4495,7 +4540,7 @@ bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
const auto *LoadMI = MI;
if (LoadMI->getOpcode() == TargetOpcode::COPY &&
LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
- unsigned LoadReg = MI->getOperand(1).getReg();
+ Register LoadReg = MI->getOperand(1).getReg();
LoadMI = MRI.getUniqueVRegDef(LoadReg);
assert(LoadMI && "Expected valid instruction");
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 8c6e5cbd5c13..68e1e6a30224 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -44,11 +44,19 @@
// | |
// |-----------------------------------|
// | |
-// | prev_fp, prev_lr |
+// | callee-saved gpr registers | <--.
+// | | | On Darwin platforms these
+// |- - - - - - - - - - - - - - - - - -| | callee saves are swapped,
+// | | | (frame record first)
+// | prev_fp, prev_lr | <--'
// | (a.k.a. "frame record") |
// |-----------------------------------| <- fp(=x29)
// | |
-// | other callee-saved registers |
+// | callee-saved fp/simd/SVE regs |
+// | |
+// |-----------------------------------|
+// | |
+// | SVE stack objects |
// | |
// |-----------------------------------|
// |.empty.space.to.make.part.below....|
@@ -80,6 +88,20 @@
// * A frame pointer is definitely needed when there are local variables with
// more-than-default alignment requirements.
//
+// For Darwin platforms the frame-record (fp, lr) is stored at the top of the
+// callee-saved area, since the unwind encoding does not allow for encoding
+// this dynamically and existing tools depend on this layout. For other
+// platforms, the frame-record is stored at the bottom of the (gpr) callee-saved
+// area to allow SVE stack objects (allocated directly below the callee-saves,
+// if available) to be accessed directly from the framepointer.
+// The SVE spill/fill instructions have VL-scaled addressing modes such
+// as:
+// ldr z8, [fp, #-7 mul vl]
+// For SVE the size of the vector length (VL) is not known at compile-time, so
+// '#-7 mul vl' is an offset that can only be evaluated at runtime. With this
+// layout, we don't need to add an unscaled offset to the framepointer before
+// accessing the SVE object in the frame.
+//
// In some cases when a base pointer is not strictly needed, it is generated
// anyway when offsets from the frame pointer to access local variables become
// so large that the offset can't be encoded in the immediate fields of loads
@@ -94,6 +116,7 @@
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
+#include "AArch64StackOffset.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -173,7 +196,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
if (!MO.isFI())
continue;
- int Offset = 0;
+ StackOffset Offset;
if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
AArch64FrameOffsetCannotUpdate)
return 0;
@@ -183,6 +206,12 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
return DefaultSafeSPDisplacement;
}
+/// Returns the size of the entire SVE stackframe (calleesaves + spills).
+static StackOffset getSVEStackSize(const MachineFunction &MF) {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return {(int64_t)AFI->getStackSizeSVE(), MVT::nxv1i8};
+}
+
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
@@ -195,7 +224,8 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned NumBytes = AFI->getLocalStackSize();
- return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
+ return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128 ||
+ getSVEStackSize(MF));
}
/// hasFP - Return true if the specified function should have a dedicated frame
@@ -273,14 +303,15 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
// Most call frames will be allocated at the start of a function so
// this is OK, but it is a limitation that needs dealing with.
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
- emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {Amount, MVT::i8},
+ TII);
}
} else if (CalleePopAmount != 0) {
// If the calling convention demands that the callee pops arguments from the
// stack, we want to add it back if we have a reserved call frame.
assert(CalleePopAmount < 0xffffff && "call frame too large");
- emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
- TII);
+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
+ {-(int64_t)CalleePopAmount, MVT::i8}, TII);
}
return MBB.erase(I);
}
@@ -416,6 +447,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ if (MF.getFunction().hasOptSize())
+ return false;
+
if (AFI->getLocalStackSize() == 0)
return false;
@@ -436,6 +470,11 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
if (canUseRedZone(MF))
return false;
+ // When there is an SVE area on the stack, always allocate the
+ // callee-saves and spills/locals separately.
+ if (getSVEStackSize(MF))
+ return false;
+
return true;
}
@@ -474,8 +513,8 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
Imm = -Imm;
LLVM_FALLTHROUGH;
case AArch64::STPXpre: {
- unsigned Reg0 = MBBI->getOperand(1).getReg();
- unsigned Reg1 = MBBI->getOperand(2).getReg();
+ Register Reg0 = MBBI->getOperand(1).getReg();
+ Register Reg1 = MBBI->getOperand(2).getReg();
if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
.addImm(Imm * 8)
@@ -523,8 +562,8 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
}
case AArch64::STPXi:
case AArch64::LDPXi: {
- unsigned Reg0 = MBBI->getOperand(0).getReg();
- unsigned Reg1 = MBBI->getOperand(1).getReg();
+ Register Reg0 = MBBI->getOperand(0).getReg();
+ Register Reg1 = MBBI->getOperand(1).getReg();
if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
.addImm(Imm * 8)
@@ -791,6 +830,10 @@ static bool needsWinCFI(const MachineFunction &MF) {
F.needsUnwindTableEntry();
}
+static bool isTargetDarwin(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -846,6 +889,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Ideally it should match SP value after prologue.
AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+ const StackOffset &SVEStackSize = getSVEStackSize(MF);
+
// getStackSize() includes all the locals in its size calculation. We don't
// include these locals when computing the stack size of a funclet, as they
// are allocated in the parent's stack frame and accessed via the frame
@@ -856,6 +901,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
: (int)MFI.getStackSize();
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
+ assert(!SVEStackSize &&
+ "unexpected function without stack frame but with SVE objects");
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
if (!NumBytes)
@@ -866,8 +913,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
AFI->setHasRedZone(true);
++NumRedZoneFunctions;
} else {
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
+ false, NeedsWinCFI, &HasWinCFI);
if (!NeedsWinCFI) {
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
@@ -901,8 +949,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
if (CombineSPBump) {
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ assert(!SVEStackSize && "Cannot combine SP bump with SVE");
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false,
+ NeedsWinCFI, &HasWinCFI);
NumBytes = 0;
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
@@ -948,9 +998,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
if (HasFP) {
- // Only set up FP if we actually need to. Frame pointer is fp =
- // sp - fixedobject - 16.
- int FPOffset = AFI->getCalleeSavedStackSize() - 16;
+ // Only set up FP if we actually need to.
+ int FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
+
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
@@ -958,8 +1008,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// mov fp,sp when FPOffset is zero.
// Note: All stores of callee-saved registers are marked as "FrameSetup".
// This code marks the instruction(s) that set the FP also.
- emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
+ {FPOffset, MVT::i8}, TII, MachineInstr::FrameSetup, false,
+ NeedsWinCFI, &HasWinCFI);
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
@@ -1056,6 +1107,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
NumBytes = 0;
}
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII,
+ MachineInstr::FrameSetup);
+
// Allocate space for the rest of the frame.
if (NumBytes) {
const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
@@ -1071,8 +1125,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
- emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
+ {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
+ false, NeedsWinCFI, &HasWinCFI);
if (NeedsRealignment) {
const unsigned Alignment = MFI.getMaxAlignment();
@@ -1130,8 +1185,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (needsFrameMoves) {
const DataLayout &TD = MF.getDataLayout();
- const int StackGrowth = -TD.getPointerSize(0);
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ const int StackGrowth = isTargetDarwin(MF)
+ ? (2 * -TD.getPointerSize(0))
+ : -AFI->getCalleeSavedStackSize();
+ Register FramePtr = RegInfo->getFrameRegister(MF);
// An example of the prologue:
//
// .globl __foo
@@ -1202,7 +1259,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
- nullptr, Reg, 2 * StackGrowth - FixedObject));
+ nullptr, Reg, StackGrowth - FixedObject));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -1401,11 +1458,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameDestroy);
}
+ const StackOffset &SVEStackSize = getSVEStackSize(MF);
+
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
+ assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
- NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
- false, NeedsWinCFI, &HasWinCFI);
+ {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
if (NeedsWinCFI && HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
@@ -1416,6 +1476,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
+ // Deallocate the SVE area.
+ if (SVEStackSize)
+ if (!AFI->isStackRealigned())
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize,
+ TII, MachineInstr::FrameDestroy);
+
if (!hasFP(MF)) {
bool RedZone = canUseRedZone(MF);
// If this was a redzone leaf function, we don't need to restore the
@@ -1437,8 +1503,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackRestoreBytes, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ {StackRestoreBytes, MVT::i8}, TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
if (Done) {
if (NeedsWinCFI) {
HasWinCFI = true;
@@ -1456,13 +1522,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// FIXME: Rather than doing the math here, we should instead just use
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
- if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned()))
+ if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
+ int64_t OffsetToFrameRecord =
+ isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
- -AFI->getCalleeSavedStackSize() + 16, TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI);
- else if (NumBytes)
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ {OffsetToFrameRecord, MVT::i8},
+ TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ } else if (NumBytes)
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
+ {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false,
+ NeedsWinCFI);
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
@@ -1483,8 +1552,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
- AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ {(int64_t)AfterCSRPopSize, MVT::i8}, TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
}
if (NeedsWinCFI && HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
@@ -1501,10 +1570,11 @@ int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
unsigned &FrameReg) const {
return resolveFrameIndexReference(
- MF, FI, FrameReg,
- /*PreferFP=*/
- MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
- /*ForSimm=*/false);
+ MF, FI, FrameReg,
+ /*PreferFP=*/
+ MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
+ /*ForSimm=*/false)
+ .getBytes();
}
int AArch64FrameLowering::getNonLocalFrameIndexReference(
@@ -1512,18 +1582,19 @@ int AArch64FrameLowering::getNonLocalFrameIndexReference(
return getSEHFrameIndexOffset(MF, FI);
}
-static int getFPOffset(const MachineFunction &MF, int ObjectOffset) {
+static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) {
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
- return ObjectOffset + FixedObject + 16;
+ unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize();
+ return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
}
-static int getStackOffset(const MachineFunction &MF, int ObjectOffset) {
+static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) {
const auto &MFI = MF.getFrameInfo();
- return ObjectOffset + MFI.getStackSize();
+ return {ObjectOffset + (int)MFI.getStackSize(), MVT::i8};
}
int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
@@ -1532,23 +1603,23 @@ int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
MF.getSubtarget().getRegisterInfo());
int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
- ? getFPOffset(MF, ObjectOffset)
- : getStackOffset(MF, ObjectOffset);
+ ? getFPOffset(MF, ObjectOffset).getBytes()
+ : getStackOffset(MF, ObjectOffset).getBytes();
}
-int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
- int FI, unsigned &FrameReg,
- bool PreferFP,
- bool ForSimm) const {
+StackOffset AArch64FrameLowering::resolveFrameIndexReference(
+ const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP,
+ bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
int ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
- return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
+ bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector;
+ return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
PreferFP, ForSimm);
}
-int AArch64FrameLowering::resolveFrameOffsetReference(
- const MachineFunction &MF, int ObjectOffset, bool isFixed,
+StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
+ const MachineFunction &MF, int ObjectOffset, bool isFixed, bool isSVE,
unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
@@ -1556,17 +1627,23 @@ int AArch64FrameLowering::resolveFrameOffsetReference(
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- int FPOffset = getFPOffset(MF, ObjectOffset);
- int Offset = getStackOffset(MF, ObjectOffset);
+ int FPOffset = getFPOffset(MF, ObjectOffset).getBytes();
+ int Offset = getStackOffset(MF, ObjectOffset).getBytes();
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
+ const StackOffset &SVEStackSize = getSVEStackSize(MF);
+
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
// reliable as a base). Make sure useFPForScavengingIndex() does the
// right thing for the emergency spill slot.
bool UseFP = false;
- if (AFI->hasStackFrame()) {
+ if (AFI->hasStackFrame() && !isSVE) {
+ // We shouldn't prefer using the FP when there is an SVE area
+ // in between the FP and the non-SVE locals/spills.
+ PreferFP &= !SVEStackSize;
+
// Note: Keeping the following as multiple 'if' statements rather than
// merging to a single expression for readability.
//
@@ -1594,8 +1671,10 @@ int AArch64FrameLowering::resolveFrameOffsetReference(
bool CanUseBP = RegInfo->hasBasePointer(MF);
if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
UseFP = PreferFP;
- else if (!CanUseBP) // Can't use BP. Forced to use FP.
+ else if (!CanUseBP) { // Can't use BP. Forced to use FP.
+ assert(!SVEStackSize && "Expected BP to be available");
UseFP = true;
+ }
// else we can use BP and FP, but the offset from FP won't fit.
// That will make us scavenge registers which we can probably avoid by
// using BP. If it won't fit for BP either, we'll scavenge anyway.
@@ -1625,9 +1704,36 @@ int AArch64FrameLowering::resolveFrameOffsetReference(
"In the presence of dynamic stack pointer realignment, "
"non-argument/CSR objects cannot be accessed through the frame pointer");
+ if (isSVE) {
+ int64_t OffsetToSVEArea =
+ MFI.getStackSize() - AFI->getCalleeSavedStackSize();
+ StackOffset FPOffset = {ObjectOffset, MVT::nxv1i8};
+ StackOffset SPOffset = SVEStackSize +
+ StackOffset(ObjectOffset, MVT::nxv1i8) +
+ StackOffset(OffsetToSVEArea, MVT::i8);
+ // Always use the FP for SVE spills if available and beneficial.
+ if (hasFP(MF) &&
+ (SPOffset.getBytes() ||
+ FPOffset.getScalableBytes() < SPOffset.getScalableBytes() ||
+ RegInfo->needsStackRealignment(MF))) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+
+ FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
+ : (unsigned)AArch64::SP;
+ return SPOffset;
+ }
+
+ StackOffset ScalableOffset = {};
+ if (UseFP && !(isFixed || isCSR))
+ ScalableOffset = -SVEStackSize;
+ if (!UseFP && (isFixed || isCSR))
+ ScalableOffset = SVEStackSize;
+
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
- return FPOffset;
+ return StackOffset(FPOffset, MVT::i8) + ScalableOffset;
}
// Use the base pointer if we have one.
@@ -1644,7 +1750,7 @@ int AArch64FrameLowering::resolveFrameOffsetReference(
Offset -= AFI->getLocalStackSize();
}
- return Offset;
+ return StackOffset(Offset, MVT::i8) + ScalableOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@@ -1682,6 +1788,23 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
return true;
}
+/// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
+/// WindowsCFI requires that only consecutive registers can be paired.
+/// LR and FP need to be allocated together when the frame needs to save
+/// the frame-record. This means any other register pairing with LR is invalid.
+static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
+ bool NeedsWinCFI, bool NeedsFrameRecord) {
+ if (NeedsWinCFI)
+ return invalidateWindowsRegisterPairing(Reg1, Reg2, true);
+
+ // If we need to store the frame record, don't pair any register
+ // with LR other than FP.
+ if (NeedsFrameRecord)
+ return Reg2 == AArch64::LR;
+
+ return false;
+}
+
namespace {
struct RegPairInfo {
@@ -1701,7 +1824,7 @@ struct RegPairInfo {
static void computeCalleeSaveRegisterPairs(
MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
- bool &NeedShadowCallStackProlog) {
+ bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) {
if (CSI.empty())
return;
@@ -1743,7 +1866,8 @@ static void computeCalleeSaveRegisterPairs(
switch (RPI.Type) {
case RegPairInfo::GPR:
if (AArch64::GPR64RegClass.contains(NextReg) &&
- !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+ !invalidateRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
+ NeedsFrameRecord))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR64:
@@ -1777,6 +1901,10 @@ static void computeCalleeSaveRegisterPairs(
(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
"Out of order callee saved regs!");
+ assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
+ RPI.Reg1 == AArch64::LR) &&
+ "FrameRecord must be allocated together with LR");
+
// MachO's compact unwind format relies on all registers being stored in
// adjacent register pairs.
assert((!produceCompactUnwindFrame(MF) ||
@@ -1825,7 +1953,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
bool NeedShadowCallStackProlog = false;
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
- NeedShadowCallStackProlog);
+ NeedShadowCallStackProlog, hasFP(MF));
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (NeedShadowCallStackProlog) {
@@ -1955,7 +2083,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
bool NeedShadowCallStackProlog = false;
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
- NeedShadowCallStackProlog);
+ NeedShadowCallStackProlog, hasFP(MF));
auto EmitMI = [&](const RegPairInfo &RPI) {
unsigned Reg1 = RPI.Reg1;
@@ -2113,19 +2241,26 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(AArch64::LR);
}
- LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
+ LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
for (unsigned Reg
: SavedRegs.set_bits()) dbgs()
<< ' ' << printReg(Reg, RegInfo);
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
- bool CanEliminateFrame = SavedRegs.count() == 0;
+ unsigned MaxAlign = getStackAlignment();
+ int64_t SVEStackSize =
+ alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign);
+ assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+ bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
- bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
+
+ // Conservatively always assume BigStack when there are SVE spills.
+ bool BigStack = SVEStackSize ||
+ (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
@@ -2145,7 +2280,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// store the pair.
if (produceCompactUnwindFrame(MF))
SavedRegs.set(UnspilledCSGPRPaired);
- ExtraCSSpill = UnspilledCSGPRPaired;
+ ExtraCSSpill = UnspilledCSGPR;
}
// If we didn't find an extra callee-saved register to spill, create
@@ -2181,14 +2316,42 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
return AFI->hasCalleeSaveStackFreeSpace();
}
+int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
+ unsigned &MaxAlign) const {
+ // Process all fixed stack objects.
+ int64_t Offset = 0;
+ for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
+ if (MFI.getStackID(I) == TargetStackID::SVEVector) {
+ int64_t FixedOffset = -MFI.getObjectOffset(I);
+ if (FixedOffset > Offset)
+ Offset = FixedOffset;
+ }
+
+ // Note: We don't take allocatable stack objects into
+ // account yet, because allocation for those is not yet
+ // implemented.
+ return Offset;
+}
+
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
+ "Upwards growing stack unsupported");
+
+ unsigned MaxAlign = getStackAlignment();
+ int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign);
+
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign));
+ assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
if (!MF.hasEHFunclets())
return;
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineFrameInfo &MFI = MF.getFrameInfo();
WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
MachineBasicBlock &MBB = MF.front();
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 6dbd34b2189f..ac150e86c9eb 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
+#include "AArch64StackOffset.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
@@ -20,7 +21,7 @@ namespace llvm {
class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
- : TargetFrameLowering(StackGrowsDown, 16, 0, 16,
+ : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16),
true /*StackRealignable*/) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
@@ -39,12 +40,13 @@ public:
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
- int resolveFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg, bool PreferFP,
- bool ForSimm) const;
- int resolveFrameOffsetReference(const MachineFunction &MF, int ObjectOffset,
- bool isFixed, unsigned &FrameReg,
- bool PreferFP, bool ForSimm) const;
+ StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg, bool PreferFP,
+ bool ForSimm) const;
+ StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
+ int ObjectOffset, bool isFixed,
+ bool isSVE, unsigned &FrameReg,
+ bool PreferFP, bool ForSimm) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -85,9 +87,21 @@ public:
int FI) const override;
int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ bool isSupportedStackID(TargetStackID::Value ID) const override {
+ switch (ID) {
+ default:
+ return false;
+ case TargetStackID::Default:
+ case TargetStackID::SVEVector:
+ case TargetStackID::NoAlloc:
+ return true;
+ }
+ }
+
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
unsigned StackBumpBytes) const;
+ int64_t determineSVEStackSize(MachineFrameInfo &MF, unsigned &MaxAlign) const;
};
} // End llvm namespace
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index cd7e927ac80c..1f08505f37e7 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2053,7 +2053,7 @@ static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
}
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
- if (Depth >= 6)
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
return;
// Initialize UsefulBits
if (!Depth) {
@@ -2913,49 +2913,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
break;
- case ISD::EXTRACT_VECTOR_ELT: {
- // Extracting lane zero is a special case where we can just use a plain
- // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
- // the rest of the compiler, especially the register allocator and copyi
- // propagation, to reason about, so is preferred when it's possible to
- // use it.
- ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
- // Bail and use the default Select() for non-zero lanes.
- if (LaneNode->getZExtValue() != 0)
- break;
- // If the element type is not the same as the result type, likewise
- // bail and use the default Select(), as there's more to do than just
- // a cross-class COPY. This catches extracts of i8 and i16 elements
- // since they will need an explicit zext.
- if (VT != Node->getOperand(0).getValueType().getVectorElementType())
- break;
- unsigned SubReg;
- switch (Node->getOperand(0)
- .getValueType()
- .getVectorElementType()
- .getSizeInBits()) {
- default:
- llvm_unreachable("Unexpected vector element type!");
- case 64:
- SubReg = AArch64::dsub;
- break;
- case 32:
- SubReg = AArch64::ssub;
- break;
- case 16:
- SubReg = AArch64::hsub;
- break;
- case 8:
- llvm_unreachable("unexpected zext-requiring extract element!");
- }
- SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
- Node->getOperand(0));
- LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
- LLVM_DEBUG(Extract->dumpr(CurDAG));
- LLVM_DEBUG(dbgs() << "\n");
- ReplaceNode(Node, Extract.getNode());
- return;
- }
case ISD::Constant: {
// Materialize zero constants as copies from WZR/XZR. This allows
// the coalescer to propagate these into other instructions.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7becc99fb5c7..2746117e8ee5 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -161,6 +162,29 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addQRTypeForNEON(MVT::v8f16);
}
+ if (Subtarget->hasSVE()) {
+ // Add legal sve predicate types
+ addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
+ addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
+ addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
+ addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
+
+ // Add legal sve data types
+ addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
+
+ addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv1f32, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv1f64, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
+ }
+
// Compute derived properties from the register classes
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -283,7 +307,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// AArch64 lacks both left-rotate and popcount instructions.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
- for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
@@ -297,7 +321,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
- for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
}
@@ -606,6 +630,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
+ MaxLoadsPerMemcmpOptSize = 4;
+ MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
+ ? MaxLoadsPerMemcmpOptSize : 8;
+
setStackPointerRegisterToSaveRestore(AArch64::SP);
setSchedulingPreference(Sched::Hybrid);
@@ -613,10 +641,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
EnableExtLdPromotion = true;
// Set required alignment.
- setMinFunctionAlignment(2);
+ setMinFunctionAlignment(Align(4));
// Set preferred alignments.
- setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
- setPrefLoopAlignment(STI.getPrefLoopAlignment());
+ setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
+ setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
// Only change the limit for entries in a jump table if specified by
// the sub target, but not at the command line.
@@ -725,7 +753,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.
- for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
@@ -741,7 +769,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
- for (MVT InnerVT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
@@ -773,6 +801,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
}
+ if (Subtarget->hasSVE()) {
+ for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
+ if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1)
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ }
+ }
+
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
}
@@ -1025,6 +1060,14 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
Known.One &= Known2.One;
break;
}
+ case AArch64ISD::LOADgot:
+ case AArch64ISD::ADDlow: {
+ if (!Subtarget->isTargetILP32())
+ break;
+ // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
+ Known.Zero = APInt::getHighBitsSet(64, 32);
+ break;
+ }
case ISD::INTRINSIC_W_CHAIN: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
@@ -1100,6 +1143,32 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
return true;
}
+// Same as above but handling LLTs instead.
+bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
+ LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+ bool *Fast) const {
+ if (Subtarget->requiresStrictAlign())
+ return false;
+
+ if (Fast) {
+ // Some CPUs are fine with unaligned stores except for 128-bit ones.
+ *Fast = !Subtarget->isMisaligned128StoreSlow() ||
+ Ty.getSizeInBytes() != 16 ||
+ // See comments in performSTORECombine() for more details about
+ // these conditions.
+
+ // Code that uses clang vector extensions can mark that it
+ // wants unaligned accesses to be treated as fast by
+ // underspecifying alignment to be 1 or 2.
+ Align <= 2 ||
+
+ // Disregard v2i64. Memcpy lowering produces those and splitting
+ // them regresses performance on micro-benchmarks and olden/bh.
+ Ty == LLT::vector(2, 64);
+ }
+ return true;
+}
+
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
@@ -1238,6 +1307,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::STZG: return "AArch64ISD::STZG";
case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
+ case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI";
+ case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
+ case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
+ case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
}
return nullptr;
}
@@ -1263,9 +1336,9 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
DebugLoc DL = MI.getDebugLoc();
MachineFunction::iterator It = ++MBB->getIterator();
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned IfTrueReg = MI.getOperand(1).getReg();
- unsigned IfFalseReg = MI.getOperand(2).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
+ Register IfTrueReg = MI.getOperand(1).getReg();
+ Register IfFalseReg = MI.getOperand(2).getReg();
unsigned CondCode = MI.getOperand(3).getImm();
bool NZCVKilled = MI.getOperand(4).isKill();
@@ -2140,7 +2213,8 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
- return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
}
// Returns true if the given Op is the overflow flag result of an overflow
@@ -2349,7 +2423,8 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
// precise. That doesn't take part in the LibCall so we can't directly use
// LowerF128Call.
SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, CallOptions,
SDLoc(Op)).first;
}
@@ -2419,7 +2494,8 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
- return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, LC, Op.getValueType(), Ops, CallOptions, SDLoc(Op)).first;
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -2773,6 +2849,19 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_sunpkhi:
+ return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_sunpklo:
+ return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_uunpkhi:
+ return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_uunpklo:
+ return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
+ Op.getOperand(1));
+
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
const auto *RegInfo = Subtarget->getRegisterInfo();
@@ -2937,6 +3026,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SPLAT_VECTOR:
+ return LowerSPLAT_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::SRA:
@@ -3014,8 +3105,11 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
return CC_AArch64_Win64_VarArg;
if (!Subtarget->isTargetDarwin())
return CC_AArch64_AAPCS;
- return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
- case CallingConv::Win64:
+ if (!IsVarArg)
+ return CC_AArch64_DarwinPCS;
+ return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
+ : CC_AArch64_DarwinPCS_VarArg;
+ case CallingConv::Win64:
return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
case CallingConv::AArch64_VectorCall:
return CC_AArch64_AAPCS;
@@ -3038,6 +3132,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
+ DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
@@ -3094,11 +3189,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
continue;
}
+ SDValue ArgValue;
if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
-
- SDValue ArgValue;
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
@@ -3113,6 +3207,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
RC = &AArch64::FPR64RegClass;
else if (RegVT == MVT::f128 || RegVT.is128BitVector())
RC = &AArch64::FPR128RegClass;
+ else if (RegVT.isScalableVector() &&
+ RegVT.getVectorElementType() == MVT::i1)
+ RC = &AArch64::PPRRegClass;
+ else if (RegVT.isScalableVector())
+ RC = &AArch64::ZPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
@@ -3128,20 +3227,23 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
+ case CCValAssign::Indirect:
+ assert(VA.getValVT().isScalableVector() &&
+ "Only scalable vectors can be passed indirectly");
+ llvm_unreachable("Spilling of SVE vectors not yet implemented");
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
break;
case CCValAssign::AExt:
case CCValAssign::SExt:
case CCValAssign::ZExt:
- // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
- // nodes after our lowering.
- assert(RegVT == Ins[i].VT && "incorrect register location selected");
+ break;
+ case CCValAssign::AExtUpper:
+ ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
+ DAG.getConstant(32, DL, RegVT));
+ ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
break;
}
-
- InVals.push_back(ArgValue);
-
} else { // VA.isRegLoc()
assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
unsigned ArgOffset = VA.getLocMemOffset();
@@ -3156,7 +3258,6 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue ArgValue;
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
@@ -3165,9 +3266,14 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
switch (VA.getLocInfo()) {
default:
break;
+ case CCValAssign::Trunc:
case CCValAssign::BCvt:
MemVT = VA.getLocVT();
break;
+ case CCValAssign::Indirect:
+ assert(VA.getValVT().isScalableVector() &&
+ "Only scalable vectors can be passed indirectly");
+ llvm_unreachable("Spilling of SVE vectors not yet implemented");
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
@@ -3184,8 +3290,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
MemVT);
- InVals.push_back(ArgValue);
}
+ if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
+ ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
+ ArgValue, DAG.getValueType(MVT::i32));
+ InVals.push_back(ArgValue);
}
// varargs
@@ -3202,8 +3311,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// This will point to the next argument passed via stack.
unsigned StackOffset = CCInfo.getNextStackOffset();
- // We currently pass all varargs at 8-byte alignment.
- StackOffset = ((StackOffset + 7) & ~7);
+ // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
+ StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
if (MFI.hasMustTailInVarArgFunc()) {
@@ -3233,8 +3342,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
assert(!FuncInfo->getSRetReturnReg());
MVT PtrTy = getPointerTy(DAG.getDataLayout());
- unsigned Reg =
- MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
+ Register Reg =
+ MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
@@ -3366,6 +3475,7 @@ SDValue AArch64TargetLowering::LowerCallResult(
: RetCC_AArch64_AAPCS;
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
+ DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC);
@@ -3383,10 +3493,16 @@ SDValue AArch64TargetLowering::LowerCallResult(
continue;
}
- SDValue Val =
- DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
- Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
+ // Avoid copying a physreg twice since RegAllocFast is incompetent and only
+ // allows one use of a physreg per block.
+ SDValue Val = CopiedRegs.lookup(VA.getLocReg());
+ if (!Val) {
+ Val =
+ DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ CopiedRegs[VA.getLocReg()] = Val;
+ }
switch (VA.getLocInfo()) {
default:
@@ -3396,6 +3512,15 @@ SDValue AArch64TargetLowering::LowerCallResult(
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
+ case CCValAssign::AExtUpper:
+ Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
+ DAG.getConstant(32, DL, VA.getLocVT()));
+ LLVM_FALLTHROUGH;
+ case CCValAssign::AExt:
+ LLVM_FALLTHROUGH;
+ case CCValAssign::ZExt:
+ Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
+ break;
}
InVals.push_back(Val);
@@ -3593,6 +3718,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
+ MachineFunction::CallSiteInfo CSInfo;
bool IsThisReturn = false;
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
@@ -3709,6 +3835,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
getPointerTy(DAG.getDataLayout()));
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallSet<unsigned, 8> RegsUsed;
SmallVector<SDValue, 8> MemOpChains;
auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -3716,7 +3843,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
- RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
+ RegsToPass.emplace_back(F.PReg, Val);
}
}
@@ -3747,12 +3874,25 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
+ case CCValAssign::AExtUpper:
+ assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
+ DAG.getConstant(32, DL, VA.getLocVT()));
+ break;
case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+ Arg = DAG.getBitcast(VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::Trunc:
+ Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
break;
case CCValAssign::FPExt:
Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
break;
+ case CCValAssign::Indirect:
+ assert(VA.getValVT().isScalableVector() &&
+ "Only scalable vectors can be passed indirectly");
+ llvm_unreachable("Spilling of SVE vectors not yet implemented");
}
if (VA.isRegLoc()) {
@@ -3764,7 +3904,33 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
"unexpected use of 'returned'");
IsThisReturn = true;
}
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (RegsUsed.count(VA.getLocReg())) {
+ // If this register has already been used then we're trying to pack
+ // parts of an [N x i32] into an X-register. The extension type will
+ // take care of putting the two halves in the right place but we have to
+ // combine them.
+ SDValue &Bits =
+ std::find_if(RegsToPass.begin(), RegsToPass.end(),
+ [=](const std::pair<unsigned, SDValue> &Elt) {
+ return Elt.first == VA.getLocReg();
+ })
+ ->second;
+ Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
+ // Call site info is used for function's parameter entry value
+ // tracking. For now we track only simple cases when parameter
+ // is transferred through whole register.
+ CSInfo.erase(std::remove_if(CSInfo.begin(), CSInfo.end(),
+ [&VA](MachineFunction::ArgRegPair ArgReg) {
+ return ArgReg.Reg == VA.getLocReg();
+ }),
+ CSInfo.end());
+ } else {
+ RegsToPass.emplace_back(VA.getLocReg(), Arg);
+ RegsUsed.insert(VA.getLocReg());
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.EnableDebugEntryValues)
+ CSInfo.emplace_back(VA.getLocReg(), i);
+ }
} else {
assert(VA.isMemLoc());
@@ -3899,6 +4065,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegister(RegToPass.first,
RegToPass.second.getValueType()));
+ // Check callee args/returns for SVE registers and set calling convention
+ // accordingly.
+ if (CallConv == CallingConv::C) {
+ bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
+ return Out.VT.isScalableVector();
+ });
+ bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
+ return In.VT.isScalableVector();
+ });
+
+ if (CalleeInSVE || CalleeOutSVE)
+ CallConv = CallingConv::AArch64_SVE_VectorCall;
+ }
+
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -3930,12 +4110,15 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// actual call instruction.
if (IsTailCall) {
MF.getFrameInfo().setHasTailCall();
- return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
+ SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
+ DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
+ return Ret;
}
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
InFlag = Chain.getValue(1);
+ DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
uint64_t CalleePopBytes =
DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
@@ -3983,7 +4166,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Copy the result values into the output registers.
SDValue Flag;
- SmallVector<SDValue, 4> RetOps(1, Chain);
+ SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
+ SmallSet<unsigned, 4> RegsUsed;
for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
@@ -4005,11 +4189,38 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
break;
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt:
+ Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
+ break;
+ case CCValAssign::AExtUpper:
+ assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
+ Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
+ Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
+ DAG.getConstant(32, DL, VA.getLocVT()));
+ break;
}
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
+ if (RegsUsed.count(VA.getLocReg())) {
+ SDValue &Bits =
+ std::find_if(RetVals.begin(), RetVals.end(),
+ [=](const std::pair<unsigned, SDValue> &Elt) {
+ return Elt.first == VA.getLocReg();
+ })
+ ->second;
+ Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
+ } else {
+ RetVals.emplace_back(VA.getLocReg(), Arg);
+ RegsUsed.insert(VA.getLocReg());
+ }
+ }
+
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+ for (auto &RetVal : RetVals) {
+ Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ RetOps.push_back(
+ DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
}
// Windows AArch64 ABIs require that for returning structs by value we copy
@@ -4139,8 +4350,7 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
- unsigned char OpFlags =
- Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
+ unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
if (OpFlags != AArch64II::MO_NO_FLAG)
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
@@ -4204,6 +4414,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
SDLoc DL(Op);
MVT PtrVT = getPointerTy(DAG.getDataLayout());
+ MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDValue TLVPAddr =
@@ -4214,13 +4425,15 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// to obtain the address of the variable.
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet = DAG.getLoad(
- MVT::i64, DL, Chain, DescAddr,
+ PtrMemVT, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- /* Alignment = */ 8,
- MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant |
- MachineMemOperand::MODereferenceable);
+ /* Alignment = */ PtrMemVT.getSizeInBits() / 8,
+ MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
Chain = FuncTLVGet.getValue(1);
+ // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
+ FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
+
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setAdjustsStack(true);
@@ -4470,7 +4683,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// value of a libcall against zero, which is just what the rest of LowerBR_CC
// is expecting to deal with.
if (LHS.getValueType() == MVT::f128) {
- softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -4736,7 +4949,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Handle f128 first, since one possible outcome is a normal integer
// comparison which gets picked up by the next if statement.
if (LHS.getValueType() == MVT::f128) {
- softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands returned a scalar, use it.
if (!RHS.getNode()) {
@@ -4798,7 +5011,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// Handle f128 first, because it will result in a comparison of some RTLIB
// call result against zero.
if (LHS.getValueType() == MVT::f128) {
- softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -5096,6 +5309,7 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
SDLoc DL(Op);
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
getPointerTy(DAG.getDataLayout()));
+ FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
@@ -5202,15 +5416,15 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
// pointer.
SDLoc DL(Op);
- unsigned VaListSize =
- Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
+ unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
+ unsigned VaListSize = (Subtarget->isTargetDarwin() ||
+ Subtarget->isTargetWindows()) ? PtrSize : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
- Op.getOperand(2),
- DAG.getConstant(VaListSize, DL, MVT::i32),
- 8, false, false, false, MachinePointerInfo(DestSV),
+ return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(VaListSize, DL, MVT::i32), PtrSize,
+ false, false, false, MachinePointerInfo(DestSV),
MachinePointerInfo(SrcSV));
}
@@ -5224,12 +5438,15 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
unsigned Align = Op.getConstantOperandVal(3);
+ unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
auto PtrVT = getPointerTy(DAG.getDataLayout());
-
- SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
+ auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
+ SDValue VAList =
+ DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
Chain = VAList.getValue(1);
+ VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
- if (Align > 8) {
+ if (Align > MinSlotSize) {
assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Align - 1, DL, PtrVT));
@@ -5238,14 +5455,14 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
- uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
+ unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
// Scalar integer and FP values smaller than 64 bits are implicitly extended
// up to 64 bits. At the very least, we have to increase the striding of the
// vaargs list to match this, and for FP values we need to introduce
// FP_ROUND nodes as well.
if (VT.isInteger() && !VT.isVector())
- ArgSize = 8;
+ ArgSize = std::max(ArgSize, MinSlotSize);
bool NeedFPTrunc = false;
if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
ArgSize = 8;
@@ -5255,6 +5472,8 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// Increment the pointer, VAList, to the next vaarg
SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(ArgSize, DL, PtrVT));
+ VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
+
// Store the incremented VAList to the legalized pointer
SDValue APStore =
DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
@@ -5284,10 +5503,15 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue FrameAddr =
- DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
+ DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
while (Depth--)
FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo());
+
+ if (Subtarget->isTargetILP32())
+ FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
+ DAG.getValueType(VT));
+
return FrameAddr;
}
@@ -5306,9 +5530,9 @@ SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
- unsigned Reg = MatchRegisterName(RegName);
+Register AArch64TargetLowering::
+getRegisterByName(const char* RegName, EVT VT, const MachineFunction &MF) const {
+ Register Reg = MatchRegisterName(RegName);
if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
@@ -5653,6 +5877,21 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
return "r";
}
+enum PredicateConstraint {
+ Upl,
+ Upa,
+ Invalid
+};
+
+static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
+ PredicateConstraint P = PredicateConstraint::Invalid;
+ if (Constraint == "Upa")
+ P = PredicateConstraint::Upa;
+ if (Constraint == "Upl")
+ P = PredicateConstraint::Upl;
+ return P;
+}
+
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
AArch64TargetLowering::ConstraintType
@@ -5661,19 +5900,30 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
switch (Constraint[0]) {
default:
break;
- case 'z':
- return C_Other;
case 'x':
case 'w':
+ case 'y':
return C_RegisterClass;
// An address with a single base register. Due to the way we
// currently handle addresses it is the same as 'r'.
case 'Q':
return C_Memory;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'Y':
+ case 'Z':
+ return C_Immediate;
+ case 'z':
case 'S': // A symbolic address
return C_Other;
}
- }
+ } else if (parsePredicateConstraint(Constraint) !=
+ PredicateConstraint::Invalid)
+ return C_RegisterClass;
return TargetLowering::getConstraintType(Constraint);
}
@@ -5697,12 +5947,17 @@ AArch64TargetLowering::getSingleConstraintMatchWeight(
break;
case 'x':
case 'w':
+ case 'y':
if (type->isFloatingPointTy() || type->isVectorTy())
weight = CW_Register;
break;
case 'z':
weight = CW_Constant;
break;
+ case 'U':
+ if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
+ weight = CW_Register;
+ break;
}
return weight;
}
@@ -5719,6 +5974,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
case 'w':
if (!Subtarget->hasFPARMv8())
break;
+ if (VT.isScalableVector())
+ return std::make_pair(0U, &AArch64::ZPRRegClass);
if (VT.getSizeInBits() == 16)
return std::make_pair(0U, &AArch64::FPR16RegClass);
if (VT.getSizeInBits() == 32)
@@ -5733,9 +5990,25 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
case 'x':
if (!Subtarget->hasFPARMv8())
break;
+ if (VT.isScalableVector())
+ return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &AArch64::FPR128_loRegClass);
break;
+ case 'y':
+ if (!Subtarget->hasFPARMv8())
+ break;
+ if (VT.isScalableVector())
+ return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
+ break;
+ }
+ } else {
+ PredicateConstraint PC = parsePredicateConstraint(Constraint);
+ if (PC != PredicateConstraint::Invalid) {
+ assert(VT.isScalableVector());
+ bool restricted = (PC == PredicateConstraint::Upl);
+ return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
+ : std::make_pair(0U, &AArch64::PPRRegClass);
}
}
if (StringRef("{cc}").equals_lower(Constraint))
@@ -6279,6 +6552,8 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
@@ -6446,8 +6721,7 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
if (!isConcatMask(Mask, VT, SplitV0))
return SDValue();
- EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- VT.getVectorNumElements() / 2);
+ EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
if (SplitV0) {
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
DAG.getConstant(0, DL, MVT::i64));
@@ -6790,6 +7064,41 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return GenerateTBL(Op, ShuffleMask, DAG);
}
+SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ EVT ElemVT = VT.getScalarType();
+
+ SDValue SplatVal = Op.getOperand(0);
+
+ // Extend input splat value where needed to fit into a GPR (32b or 64b only)
+ // FPRs don't have this restriction.
+ switch (ElemVT.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ case MVT::i16:
+ SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
+ break;
+ case MVT::i64:
+ SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
+ break;
+ case MVT::i32:
+ // Fine as is
+ break;
+ // TODO: we can support splats of i1s and float types, but haven't added
+ // patterns yet.
+ case MVT::i1:
+ case MVT::f16:
+ case MVT::f32:
+ case MVT::f64:
+ default:
+ llvm_unreachable("Unsupported SPLAT_VECTOR input operand type");
+ break;
+ }
+
+ return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
+}
+
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
APInt &UndefBits) {
EVT VT = BVN->getValueType(0);
@@ -8063,7 +8372,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
- Info.align = 0;
+ Info.align.reset();
// volatile loads with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOLoad;
return true;
@@ -8089,7 +8398,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
- Info.align = 0;
+ Info.align.reset();
// volatile stores with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOStore;
return true;
@@ -8101,7 +8410,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
+ Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
@@ -8112,7 +8421,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
+ Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
@@ -8122,7 +8431,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = 16;
+ Info.align = Align(16);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
case Intrinsic::aarch64_stlxp:
@@ -8131,7 +8440,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
- Info.align = 16;
+ Info.align = Align(16);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
default:
@@ -8278,7 +8587,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
// Get the shift amount based on the scaling factor:
// log2(sizeof(IdxTy)) - log2(8).
uint64_t ShiftAmt =
- countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3;
+ countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
// Is the constant foldable in the shift of the addressing mode?
// I.e., shift amount is between 1 and 4 inclusive.
if (ShiftAmt == 0 || ShiftAmt > 4)
@@ -8739,6 +9048,39 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
return MVT::Other;
}
+LLT AArch64TargetLowering::getOptimalMemOpLLT(
+ uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset, bool MemcpyStrSrc,
+ const AttributeList &FuncAttributes) const {
+ bool CanImplicitFloat =
+ !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
+ bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
+ bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
+ // Only use AdvSIMD to implement memset of 32-byte and above. It would have
+ // taken one instruction to materialize the v2i64 zero and one store (with
+ // restrictive addressing mode). Just do i64 stores.
+ bool IsSmallMemset = IsMemset && Size < 32;
+ auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
+ if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
+ return true;
+ bool Fast;
+ return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
+ &Fast) &&
+ Fast;
+ };
+
+ if (CanUseNEON && IsMemset && !IsSmallMemset &&
+ AlignmentIsAcceptable(MVT::v2i64, 16))
+ return LLT::vector(2, 64);
+ if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
+ return LLT::scalar(128);
+ if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
+ return LLT::scalar(64);
+ if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
+ return LLT::scalar(32);
+ return LLT();
+}
+
// 12-bit optionally shifted immediates are legal for adds.
bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
if (Immed == std::numeric_limits<int64_t>::min()) {
@@ -10065,6 +10407,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
Opcode = AArch64ISD::SQSHLU_I;
IsRightShift = false;
break;
+ case Intrinsic::aarch64_neon_sshl:
+ case Intrinsic::aarch64_neon_ushl:
+ // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
+ // left shift for positive shift amounts. Below, we only replace the current
+ // node with VSHL, if this condition is met.
+ Opcode = AArch64ISD::VSHL;
+ IsRightShift = false;
+ break;
}
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
@@ -10151,6 +10501,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_sqshlu:
case Intrinsic::aarch64_neon_srshl:
case Intrinsic::aarch64_neon_urshl:
+ case Intrinsic::aarch64_neon_sshl:
+ case Intrinsic::aarch64_neon_ushl:
return tryCombineShiftImm(IID, N, DAG);
case Intrinsic::aarch64_crc32b:
case Intrinsic::aarch64_crc32cb:
@@ -10482,10 +10834,10 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return ReplacedSplat;
SDLoc DL(S);
- unsigned NumElts = VT.getVectorNumElements() / 2;
+
// Split VT into two.
- EVT HalfVT =
- EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
+ EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned NumElts = HalfVT.getVectorNumElements();
SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
DAG.getConstant(0, DL, MVT::i64));
SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
@@ -10567,7 +10919,7 @@ static SDValue performPostLD1Combine(SDNode *N,
// are predecessors to each other or the Vector.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
- Visited.insert(N);
+ Visited.insert(Addr.getNode());
Worklist.push_back(User);
Worklist.push_back(LD);
Worklist.push_back(Vector.getNode());
@@ -11983,6 +12335,27 @@ bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
return Mask->getValue().isPowerOf2();
}
+bool AArch64TargetLowering::
+ shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const {
+ // Does baseline recommend not to perform the fold by default?
+ if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
+ return false;
+ // Else, if this is a vector shift, prefer 'shl'.
+ return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
+}
+
+bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG,
+ SDNode *N) const {
+ if (DAG.getMachineFunction().getFunction().hasMinSize() &&
+ !Subtarget->isTargetWindows())
+ return false;
+ return true;
+}
+
void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
// Update IsSplitCSR in AArch64unctionInfo.
AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
@@ -12009,7 +12382,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 4421c31f65c9..00fa96bc4e6d 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -191,6 +191,11 @@ enum NodeType : unsigned {
FRECPE, FRECPS,
FRSQRTE, FRSQRTS,
+ SUNPKHI,
+ SUNPKLO,
+ UUNPKHI,
+ UUNPKLO,
+
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,
@@ -261,6 +266,14 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
+ // Returning i64 unconditionally here (i.e. even for ILP32) means that the
+ // *DAG* representation of pointers will always be 64-bits. They will be
+ // truncated and extended when transferred to memory, but the 64-bit DAG
+ // allows us to use AArch64's addressing modes much more easily.
+ return MVT::getIntegerVT(64);
+ }
+
bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
TargetLoweringOpt &TLO) const override;
@@ -272,6 +285,10 @@ public:
EVT VT, unsigned AddrSpace = 0, unsigned Align = 1,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;
+ /// LLT variant.
+ bool allowsMisalignedMemoryAccesses(
+ LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+ bool *Fast = nullptr) const override;
/// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -358,6 +375,10 @@ public:
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
const AttributeList &FuncAttributes) const override;
+ LLT getOptimalMemOpLLT(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ const AttributeList &FuncAttributes) const override;
+
/// Return true if the addressing mode represented by AM is legal for this
/// target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -480,11 +501,12 @@ public:
return VT.getSizeInBits() >= 64; // vector 'bic'
}
- bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
- if (DAG.getMachineFunction().getFunction().hasMinSize())
- return false;
- return true;
- }
+ bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const override;
+
+ bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
bool shouldTransformSignedTruncationCheck(EVT XVT,
unsigned KeptBits) const override {
@@ -655,6 +677,7 @@ private:
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
@@ -690,8 +713,8 @@ private:
unsigned combineRepeatedFPDivisors() const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td
index e22cb44d81ae..459b53923625 100644
--- a/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -204,19 +204,27 @@ def : Pat<(relaxed_store<atomic_store_64>
def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}
def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}
def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}
def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}
def : Pat<(ldxr_1 GPR64sp:$addr),
(SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>;
@@ -237,19 +245,27 @@ def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff),
def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}
def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}
def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}
def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}
def : Pat<(ldaxr_1 GPR64sp:$addr),
(SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>;
@@ -271,22 +287,30 @@ def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff),
def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
(int_aarch64_stxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}
def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
(int_aarch64_stxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}
def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
(int_aarch64_stxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}
def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
(int_aarch64_stxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}
def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr),
@@ -317,22 +341,30 @@ def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
def stlxr_1 : PatFrag<(ops node:$val, node:$ptr),
(int_aarch64_stlxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}
def stlxr_2 : PatFrag<(ops node:$val, node:$ptr),
(int_aarch64_stlxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}
def stlxr_4 : PatFrag<(ops node:$val, node:$ptr),
(int_aarch64_stlxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}
def stlxr_8 : PatFrag<(ops node:$val, node:$ptr),
(int_aarch64_stlxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}
def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr),
@@ -422,4 +454,3 @@ let Predicates = [HasLSE] in {
defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">;
defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
}
-
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index d619137b55c5..f555e4123307 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -480,76 +480,40 @@ def BranchTarget14Operand : BranchTarget<14>;
def BranchTarget26Operand : BranchTarget<26>;
def PCRelLabel19Operand : PCRelLabel<19>;
-def MovZSymbolG3AsmOperand : AsmOperandClass {
- let Name = "MovZSymbolG3";
+def MovWSymbolG3AsmOperand : AsmOperandClass {
+ let Name = "MovWSymbolG3";
let RenderMethod = "addImmOperands";
}
-def movz_symbol_g3 : Operand<i32> {
- let ParserMatchClass = MovZSymbolG3AsmOperand;
+def movw_symbol_g3 : Operand<i32> {
+ let ParserMatchClass = MovWSymbolG3AsmOperand;
}
-def MovZSymbolG2AsmOperand : AsmOperandClass {
- let Name = "MovZSymbolG2";
+def MovWSymbolG2AsmOperand : AsmOperandClass {
+ let Name = "MovWSymbolG2";
let RenderMethod = "addImmOperands";
}
-def movz_symbol_g2 : Operand<i32> {
- let ParserMatchClass = MovZSymbolG2AsmOperand;
+def movw_symbol_g2 : Operand<i32> {
+ let ParserMatchClass = MovWSymbolG2AsmOperand;
}
-def MovZSymbolG1AsmOperand : AsmOperandClass {
- let Name = "MovZSymbolG1";
+def MovWSymbolG1AsmOperand : AsmOperandClass {
+ let Name = "MovWSymbolG1";
let RenderMethod = "addImmOperands";
}
-def movz_symbol_g1 : Operand<i32> {
- let ParserMatchClass = MovZSymbolG1AsmOperand;
+def movw_symbol_g1 : Operand<i32> {
+ let ParserMatchClass = MovWSymbolG1AsmOperand;
}
-def MovZSymbolG0AsmOperand : AsmOperandClass {
- let Name = "MovZSymbolG0";
+def MovWSymbolG0AsmOperand : AsmOperandClass {
+ let Name = "MovWSymbolG0";
let RenderMethod = "addImmOperands";
}
-def movz_symbol_g0 : Operand<i32> {
- let ParserMatchClass = MovZSymbolG0AsmOperand;
-}
-
-def MovKSymbolG3AsmOperand : AsmOperandClass {
- let Name = "MovKSymbolG3";
- let RenderMethod = "addImmOperands";
-}
-
-def movk_symbol_g3 : Operand<i32> {
- let ParserMatchClass = MovKSymbolG3AsmOperand;
-}
-
-def MovKSymbolG2AsmOperand : AsmOperandClass {
- let Name = "MovKSymbolG2";
- let RenderMethod = "addImmOperands";
-}
-
-def movk_symbol_g2 : Operand<i32> {
- let ParserMatchClass = MovKSymbolG2AsmOperand;
-}
-
-def MovKSymbolG1AsmOperand : AsmOperandClass {
- let Name = "MovKSymbolG1";
- let RenderMethod = "addImmOperands";
-}
-
-def movk_symbol_g1 : Operand<i32> {
- let ParserMatchClass = MovKSymbolG1AsmOperand;
-}
-
-def MovKSymbolG0AsmOperand : AsmOperandClass {
- let Name = "MovKSymbolG0";
- let RenderMethod = "addImmOperands";
-}
-
-def movk_symbol_g0 : Operand<i32> {
- let ParserMatchClass = MovKSymbolG0AsmOperand;
+def movw_symbol_g0 : Operand<i32> {
+ let ParserMatchClass = MovWSymbolG0AsmOperand;
}
class fixedpoint_i32<ValueType FloatVT>
@@ -673,6 +637,11 @@ def logical_imm64_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
}]>;
+def gi_logical_imm32_XFORM : GICustomOperandRenderer<"renderLogicalImm32">,
+ GISDNodeXFormEquiv<logical_imm32_XFORM>;
+def gi_logical_imm64_XFORM : GICustomOperandRenderer<"renderLogicalImm64">,
+ GISDNodeXFormEquiv<logical_imm64_XFORM>;
+
let DiagnosticType = "LogicalSecondSource" in {
def LogicalImm32Operand : AsmOperandClass {
let Name = "LogicalImm32";
@@ -714,12 +683,15 @@ def logical_imm64_not : Operand<i64> {
let ParserMatchClass = LogicalImm64NotOperand;
}
-// imm0_65535 predicate - True if the immediate is in the range [0,65535].
-def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+// iXX_imm0_65535 predicates - True if the immediate is in the range [0,65535].
+let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in {
+def i32_imm0_65535 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 65536;
-}]> {
- let ParserMatchClass = AsmImmRange<0, 65535>;
- let PrintMethod = "printImmHex";
+}]>;
+
+def i64_imm0_65535 : Operand<i64>, TImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 65536;
+}]>;
}
// imm0_255 predicate - True if the immediate is in the range [0,255].
@@ -815,6 +787,14 @@ class arith_shifted_reg<ValueType Ty, RegisterClass regclass, int width>
def arith_shifted_reg32 : arith_shifted_reg<i32, GPR32, 32>;
def arith_shifted_reg64 : arith_shifted_reg<i64, GPR64, 64>;
+def gi_arith_shifted_reg32 :
+ GIComplexOperandMatcher<s32, "selectArithShiftedRegister">,
+ GIComplexPatternEquiv<arith_shifted_reg32>;
+
+def gi_arith_shifted_reg64 :
+ GIComplexOperandMatcher<s64, "selectArithShiftedRegister">,
+ GIComplexPatternEquiv<arith_shifted_reg64>;
+
// An arithmetic shifter operand:
// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror
// {5-0} - imm6
@@ -837,6 +817,14 @@ class logical_shifted_reg<ValueType Ty, RegisterClass regclass, Operand shiftop>
def logical_shifted_reg32 : logical_shifted_reg<i32, GPR32, logical_shift32>;
def logical_shifted_reg64 : logical_shifted_reg<i64, GPR64, logical_shift64>;
+def gi_logical_shifted_reg32 :
+ GIComplexOperandMatcher<s32, "selectLogicalShiftedRegister">,
+ GIComplexPatternEquiv<logical_shifted_reg32>;
+
+def gi_logical_shifted_reg64 :
+ GIComplexOperandMatcher<s64, "selectLogicalShiftedRegister">,
+ GIComplexPatternEquiv<logical_shifted_reg64>;
+
// A logical vector shifter operand:
// {7-6} - shift type: 00 = lsl
// {5-0} - imm6: #0, #8, #16, or #24
@@ -918,6 +906,14 @@ class neg_addsub_shifted_imm<ValueType Ty>
def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm<i32>;
def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm<i64>;
+def gi_neg_addsub_shifted_imm32 :
+ GIComplexOperandMatcher<s32, "selectNegArithImmed">,
+ GIComplexPatternEquiv<neg_addsub_shifted_imm32>;
+
+def gi_neg_addsub_shifted_imm64 :
+ GIComplexOperandMatcher<s64, "selectNegArithImmed">,
+ GIComplexPatternEquiv<neg_addsub_shifted_imm64>;
+
// An extend operand:
// {5-3} - extend type
// {2-0} - imm3
@@ -948,6 +944,21 @@ class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
let MIOperandInfo = (ops GPR32, arith_extend64);
}
+def arith_extended_reg32_i32 : arith_extended_reg32<i32>;
+def gi_arith_extended_reg32_i32 :
+ GIComplexOperandMatcher<s32, "selectArithExtendedRegister">,
+ GIComplexPatternEquiv<arith_extended_reg32_i32>;
+
+def arith_extended_reg32_i64 : arith_extended_reg32<i64>;
+def gi_arith_extended_reg32_i64 :
+ GIComplexOperandMatcher<s64, "selectArithExtendedRegister">,
+ GIComplexPatternEquiv<arith_extended_reg32_i64>;
+
+def arith_extended_reg32to64_i64 : arith_extended_reg32to64<i64>;
+def gi_arith_extended_reg32to64_i64 :
+ GIComplexOperandMatcher<s64, "selectArithExtendedRegister">,
+ GIComplexPatternEquiv<arith_extended_reg32to64_i64>;
+
// Floating-point immediate.
def fpimm16 : Operand<f16>,
FPImmLeaf<f16, [{
@@ -1000,8 +1011,8 @@ class AsmVectorIndex<int Min, int Max, string NamePrefix=""> : AsmOperandClass {
let RenderMethod = "addVectorIndexOperands";
}
-class AsmVectorIndexOpnd<AsmOperandClass mc, code pred>
- : Operand<i64>, ImmLeaf<i64, pred> {
+class AsmVectorIndexOpnd<ValueType ty, AsmOperandClass mc, code pred>
+ : Operand<ty>, ImmLeaf<ty, pred> {
let ParserMatchClass = mc;
let PrintMethod = "printVectorIndex";
}
@@ -1012,11 +1023,17 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>;
def VectorIndexSOperand : AsmVectorIndex<0, 3>;
def VectorIndexDOperand : AsmVectorIndex<0, 1>;
-def VectorIndex1 : AsmVectorIndexOpnd<VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>;
-def VectorIndexB : AsmVectorIndexOpnd<VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>;
-def VectorIndexH : AsmVectorIndexOpnd<VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>;
-def VectorIndexS : AsmVectorIndexOpnd<VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>;
-def VectorIndexD : AsmVectorIndexOpnd<VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>;
+def VectorIndex1 : AsmVectorIndexOpnd<i64, VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>;
+def VectorIndexB : AsmVectorIndexOpnd<i64, VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>;
+def VectorIndexH : AsmVectorIndexOpnd<i64, VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>;
+def VectorIndexS : AsmVectorIndexOpnd<i64, VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>;
+def VectorIndexD : AsmVectorIndexOpnd<i64, VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>;
+
+def VectorIndex132b : AsmVectorIndexOpnd<i32, VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>;
+def VectorIndexB32b : AsmVectorIndexOpnd<i32, VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>;
+def VectorIndexH32b : AsmVectorIndexOpnd<i32, VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>;
+def VectorIndexS32b : AsmVectorIndexOpnd<i32, VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>;
+def VectorIndexD32b : AsmVectorIndexOpnd<i32, VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>;
def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">;
def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">;
@@ -1025,15 +1042,15 @@ def SVEVectorIndexExtDupDOperand : AsmVectorIndex<0, 7, "SVE">;
def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">;
def sve_elm_idx_extdup_b
- : AsmVectorIndexOpnd<SVEVectorIndexExtDupBOperand, [{ return ((uint64_t)Imm) < 64; }]>;
+ : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupBOperand, [{ return ((uint64_t)Imm) < 64; }]>;
def sve_elm_idx_extdup_h
- : AsmVectorIndexOpnd<SVEVectorIndexExtDupHOperand, [{ return ((uint64_t)Imm) < 32; }]>;
+ : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupHOperand, [{ return ((uint64_t)Imm) < 32; }]>;
def sve_elm_idx_extdup_s
- : AsmVectorIndexOpnd<SVEVectorIndexExtDupSOperand, [{ return ((uint64_t)Imm) < 16; }]>;
+ : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupSOperand, [{ return ((uint64_t)Imm) < 16; }]>;
def sve_elm_idx_extdup_d
- : AsmVectorIndexOpnd<SVEVectorIndexExtDupDOperand, [{ return ((uint64_t)Imm) < 8; }]>;
+ : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupDOperand, [{ return ((uint64_t)Imm) < 8; }]>;
def sve_elm_idx_extdup_q
- : AsmVectorIndexOpnd<SVEVectorIndexExtDupQOperand, [{ return ((uint64_t)Imm) < 4; }]>;
+ : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupQOperand, [{ return ((uint64_t)Imm) < 4; }]>;
// 8-bit immediate for AdvSIMD where 64-bit values of the form:
// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
@@ -1082,6 +1099,45 @@ class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
let Inst{4-0} = Rt;
}
+// System instructions for transactional memory extension
+class TMBaseSystemI<bit L, bits<4> CRm, bits<3> op2, dag oops, dag iops,
+ string asm, string operands, list<dag> pattern>
+ : BaseSystemI<L, oops, iops, asm, operands, pattern>,
+ Sched<[WriteSys]> {
+ let Inst{20-12} = 0b000110011;
+ let Inst{11-8} = CRm;
+ let Inst{7-5} = op2;
+ let DecoderMethod = "";
+
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+// System instructions for transactional memory - single input operand
+class TMSystemI<bits<4> CRm, string asm, list<dag> pattern>
+ : TMBaseSystemI<0b1, CRm, 0b011,
+ (outs GPR64:$Rt), (ins), asm, "\t$Rt", pattern> {
+ bits<5> Rt;
+ let Inst{4-0} = Rt;
+}
+
+// System instructions for transactional memory - no operand
+class TMSystemINoOperand<bits<4> CRm, string asm, list<dag> pattern>
+ : TMBaseSystemI<0b0, CRm, 0b011, (outs), (ins), asm, "", pattern> {
+ let Inst{4-0} = 0b11111;
+}
+
+// System instructions for exit from transactions
+class TMSystemException<bits<3> op1, string asm, list<dag> pattern>
+ : I<(outs), (ins i64_imm0_65535:$imm), asm, "\t$imm", "", pattern>,
+ Sched<[WriteSys]> {
+ bits<16> imm;
+ let Inst{31-24} = 0b11010100;
+ let Inst{23-21} = op1;
+ let Inst{20-5} = imm;
+ let Inst{4-0} = 0b00000;
+}
+
// Hint instructions that take both a CRm and a 3-bit immediate.
// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
// model patterns with sufficiently fine granularity
@@ -2180,11 +2236,11 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
// Add/Subtract extended register
let AddedComplexity = 1, hasSideEffects = 0 in {
def Wrx : BaseAddSubEReg<isSub, 0, GPR32sp, GPR32sp,
- arith_extended_reg32<i32>, mnemonic, OpNode> {
+ arith_extended_reg32_i32, mnemonic, OpNode> {
let Inst{31} = 0;
}
def Xrx : BaseAddSubEReg<isSub, 0, GPR64sp, GPR64sp,
- arith_extended_reg32to64<i64>, mnemonic, OpNode> {
+ arith_extended_reg32to64_i64, mnemonic, OpNode> {
let Inst{31} = 1;
}
}
@@ -2254,11 +2310,11 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
// Add/Subtract extended register
let AddedComplexity = 1 in {
def Wrx : BaseAddSubEReg<isSub, 1, GPR32, GPR32sp,
- arith_extended_reg32<i32>, mnemonic, OpNode> {
+ arith_extended_reg32_i32, mnemonic, OpNode> {
let Inst{31} = 0;
}
def Xrx : BaseAddSubEReg<isSub, 1, GPR64, GPR64sp,
- arith_extended_reg32<i64>, mnemonic, OpNode> {
+ arith_extended_reg32_i64, mnemonic, OpNode> {
let Inst{31} = 1;
}
}
@@ -2969,6 +3025,22 @@ def ro_Xindexed32 : ComplexPattern<i64, 4, "SelectAddrModeXRO<32>", []>;
def ro_Xindexed64 : ComplexPattern<i64, 4, "SelectAddrModeXRO<64>", []>;
def ro_Xindexed128 : ComplexPattern<i64, 4, "SelectAddrModeXRO<128>", []>;
+def gi_ro_Xindexed8 :
+ GIComplexOperandMatcher<s64, "selectAddrModeXRO<8>">,
+ GIComplexPatternEquiv<ro_Xindexed8>;
+def gi_ro_Xindexed16 :
+ GIComplexOperandMatcher<s64, "selectAddrModeXRO<16>">,
+ GIComplexPatternEquiv<ro_Xindexed16>;
+def gi_ro_Xindexed32 :
+ GIComplexOperandMatcher<s64, "selectAddrModeXRO<32>">,
+ GIComplexPatternEquiv<ro_Xindexed32>;
+def gi_ro_Xindexed64 :
+ GIComplexOperandMatcher<s64, "selectAddrModeXRO<64>">,
+ GIComplexPatternEquiv<ro_Xindexed64>;
+def gi_ro_Xindexed128 :
+ GIComplexOperandMatcher<s64, "selectAddrModeXRO<128>">,
+ GIComplexPatternEquiv<ro_Xindexed128>;
+
def ro_Windexed8 : ComplexPattern<i64, 4, "SelectAddrModeWRO<8>", []>;
def ro_Windexed16 : ComplexPattern<i64, 4, "SelectAddrModeWRO<16>", []>;
def ro_Windexed32 : ComplexPattern<i64, 4, "SelectAddrModeWRO<32>", []>;
@@ -4086,7 +4158,7 @@ multiclass MemTagStore<bits<2> opc1, string insn> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
- : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>,
+ : I<(outs), (ins i32_imm0_65535:$imm), asm, "\t$imm", "", []>,
Sched<[WriteSys]> {
bits<16> imm;
let Inst{31-24} = 0b11010100;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 215e96a82d0e..5c35e5bcdd30 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Casting.h"
@@ -82,6 +83,10 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
}
+ // Meta-instructions emit no code.
+ if (MI.isMetaInstruction())
+ return 0;
+
// FIXME: We currently only handle pseudoinstructions that don't get expanded
// before the assembly printer.
unsigned NumBytes = 0;
@@ -91,12 +96,6 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// Anything not explicitly designated otherwise is a normal 4-byte insn.
NumBytes = 4;
break;
- case TargetOpcode::DBG_VALUE:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- NumBytes = 0;
- break;
case TargetOpcode::STACKMAP:
// The upper bound for a stackmap intrinsic is the full length of its shadow
NumBytes = StackMapOpers(&MI).getNumPatchBytes();
@@ -416,7 +415,7 @@ unsigned AArch64InstrInfo::insertBranch(
// Find the original register that VReg is copied from.
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
- while (TargetRegisterInfo::isVirtualRegister(VReg)) {
+ while (Register::isVirtualRegister(VReg)) {
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
if (!DefMI->isFullCopy())
return VReg;
@@ -431,7 +430,7 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
unsigned *NewVReg = nullptr) {
VReg = removeCopies(MRI, VReg);
- if (!TargetRegisterInfo::isVirtualRegister(VReg))
+ if (!Register::isVirtualRegister(VReg))
return 0;
bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
@@ -574,7 +573,7 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
CC = AArch64CC::NE;
break;
}
- unsigned SrcReg = Cond[2].getReg();
+ Register SrcReg = Cond[2].getReg();
if (Is64Bit) {
// cmp reg, #0 is actually subs xzr, reg, #0.
MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
@@ -930,7 +929,7 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
}
bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
- const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const {
+ const MachineInstr &MIa, const MachineInstr &MIb) const {
const TargetRegisterInfo *TRI = &getRegisterInfo();
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
@@ -1071,8 +1070,8 @@ static bool UpdateOperandRegClass(MachineInstr &Instr) {
assert(MO.isReg() &&
"Operand has register constraints without being a register!");
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
if (!OpRegCstraints->contains(Reg))
return false;
} else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
@@ -1472,6 +1471,8 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return false;
MachineBasicBlock &MBB = *MI.getParent();
+ auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
+ auto TRI = Subtarget.getRegisterInfo();
DebugLoc DL = MI.getDebugLoc();
if (MI.getOpcode() == AArch64::CATCHRET) {
@@ -1497,21 +1498,32 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
const GlobalValue *GV =
cast<GlobalValue>((*MI.memoperands_begin())->getValue());
const TargetMachine &TM = MBB.getParent()->getTarget();
- unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
+ unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
const unsigned char MO_NC = AArch64II::MO_NC;
if ((OpFlags & AArch64II::MO_GOT) != 0) {
BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
.addGlobalAddress(GV, 0, OpFlags);
- BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(0)
- .addMemOperand(*MI.memoperands_begin());
+ if (Subtarget.isTargetILP32()) {
+ unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
+ BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
+ .addDef(Reg32, RegState::Dead)
+ .addUse(Reg, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(*MI.memoperands_begin())
+ .addDef(Reg, RegState::Implicit);
+ } else {
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(*MI.memoperands_begin());
+ }
} else if (TM.getCodeModel() == CodeModel::Large) {
+ assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
.addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
.addImm(0);
@@ -1538,10 +1550,20 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
.addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
- BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
- .addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, LoFlags)
- .addMemOperand(*MI.memoperands_begin());
+ if (Subtarget.isTargetILP32()) {
+ unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
+ BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
+ .addDef(Reg32, RegState::Dead)
+ .addUse(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, LoFlags)
+ .addMemOperand(*MI.memoperands_begin())
+ .addDef(Reg, RegState::Implicit);
+ } else {
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, LoFlags)
+ .addMemOperand(*MI.memoperands_begin());
+ }
}
MBB.erase(MI);
@@ -1581,7 +1603,7 @@ bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
break;
case TargetOpcode::COPY: {
// GPR32 copies will by lowered to ORRXrs
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
return (AArch64::GPR32RegClass.contains(DstReg) ||
AArch64::GPR64RegClass.contains(DstReg));
}
@@ -1611,7 +1633,7 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
break;
case TargetOpcode::COPY: {
// FPR64 copies will by lowered to ORR.16b
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
return (AArch64::FPR64RegClass.contains(DstReg) ||
AArch64::FPR128RegClass.contains(DstReg));
}
@@ -1917,7 +1939,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
// e.g., ldr x0, [x0]
// This case will never occur with an FI base.
if (MI.getOperand(1).isReg()) {
- unsigned BaseReg = MI.getOperand(1).getReg();
+ Register BaseReg = MI.getOperand(1).getReg();
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (MI.modifiesRegister(BaseReg, TRI))
return false;
@@ -1928,6 +1950,17 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
if (isLdStPairSuppressed(MI))
return false;
+ // Do not pair any callee-save store/reload instructions in the
+ // prologue/epilogue if the CFI information encoded the operations as separate
+ // instructions, as that will cause the size of the actual prologue to mismatch
+ // with the prologue size recorded in the Windows CFI.
+ const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
+ bool NeedsWinCFI = MAI->usesWindowsCFI() &&
+ MI.getMF()->getFunction().needsUnwindTableEntry();
+ if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
+ MI.getFlag(MachineInstr::FrameDestroy)))
+ return false;
+
// On some CPUs quad load/store pairs are slower than two single load/stores.
if (Subtarget.isPaired128Slow()) {
switch (MI.getOpcode()) {
@@ -2165,6 +2198,18 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
MinOffset = -256;
MaxOffset = 255;
break;
+ case AArch64::LDR_PXI:
+ case AArch64::STR_PXI:
+ Scale = Width = 2;
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LDR_ZXI:
+ case AArch64::STR_ZXI:
+ Scale = Width = 16;
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
Scale = 16;
@@ -2350,7 +2395,7 @@ static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
if (!SubIdx)
return MIB.addReg(Reg, State);
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
return MIB.addReg(Reg, State, SubIdx);
}
@@ -2474,6 +2519,27 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // Copy a Predicate register by ORRing with itself.
+ if (AArch64::PPRRegClass.contains(DestReg) &&
+ AArch64::PPRRegClass.contains(SrcReg)) {
+ assert(Subtarget.hasSVE() && "Unexpected SVE register.");
+ BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
+ .addReg(SrcReg) // Pg
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ // Copy a Z register by ORRing with itself.
+ if (AArch64::ZPRRegClass.contains(DestReg) &&
+ AArch64::ZPRRegClass.contains(SrcReg)) {
+ assert(Subtarget.hasSVE() && "Unexpected SVE register.");
+ BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
if (AArch64::GPR64spRegClass.contains(DestReg) &&
(AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
@@ -2722,7 +2788,7 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
MachineMemOperand *MMO) {
unsigned SrcReg0 = SrcReg;
unsigned SrcReg1 = SrcReg;
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ if (Register::isPhysicalRegister(SrcReg)) {
SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
SubIdx0 = 0;
SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
@@ -2761,7 +2827,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
Opc = AArch64::STRWui;
- if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+ if (Register::isVirtualRegister(SrcReg))
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
else
assert(SrcReg != AArch64::WSP);
@@ -2771,7 +2837,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
case 8:
if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
Opc = AArch64::STRXui;
- if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+ if (Register::isVirtualRegister(SrcReg))
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
else
assert(SrcReg != AArch64::SP);
@@ -2852,7 +2918,7 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
unsigned DestReg0 = DestReg;
unsigned DestReg1 = DestReg;
bool IsUndef = true;
- if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
+ if (Register::isPhysicalRegister(DestReg)) {
DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
SubIdx0 = 0;
DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
@@ -2892,7 +2958,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
Opc = AArch64::LDRWui;
- if (TargetRegisterInfo::isVirtualRegister(DestReg))
+ if (Register::isVirtualRegister(DestReg))
MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
else
assert(DestReg != AArch64::WSP);
@@ -2902,7 +2968,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
case 8:
if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
Opc = AArch64::LDRXui;
- if (TargetRegisterInfo::isVirtualRegister(DestReg))
+ if (Register::isVirtualRegister(DestReg))
MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
else
assert(DestReg != AArch64::SP);
@@ -2972,21 +3038,39 @@ void AArch64InstrInfo::loadRegFromStackSlot(
MI.addMemOperand(MMO);
}
-void llvm::emitFrameOffset(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- unsigned DestReg, unsigned SrcReg, int Offset,
- const TargetInstrInfo *TII,
- MachineInstr::MIFlag Flag, bool SetNZCV,
- bool NeedsWinCFI, bool *HasWinCFI) {
- if (DestReg == SrcReg && Offset == 0)
- return;
-
- assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
- "SP increment/decrement not 16-byte aligned");
-
- bool isSub = Offset < 0;
- if (isSub)
- Offset = -Offset;
+// Helper function to emit a frame offset adjustment from a given
+// pointer (SrcReg), stored into DestReg. This function is explicit
+// in that it requires the opcode.
+static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, int64_t Offset, unsigned Opc,
+ const TargetInstrInfo *TII,
+ MachineInstr::MIFlag Flag, bool NeedsWinCFI,
+ bool *HasWinCFI) {
+ int Sign = 1;
+ unsigned MaxEncoding, ShiftSize;
+ switch (Opc) {
+ case AArch64::ADDXri:
+ case AArch64::ADDSXri:
+ case AArch64::SUBXri:
+ case AArch64::SUBSXri:
+ MaxEncoding = 0xfff;
+ ShiftSize = 12;
+ break;
+ case AArch64::ADDVL_XXI:
+ case AArch64::ADDPL_XXI:
+ MaxEncoding = 31;
+ ShiftSize = 0;
+ if (Offset < 0) {
+ MaxEncoding = 32;
+ Sign = -1;
+ Offset = -Offset;
+ }
+ break;
+ default:
+ llvm_unreachable("Unsupported opcode");
+ }
// FIXME: If the offset won't fit in 24-bits, compute the offset into a
// scratch register. If DestReg is a virtual register, use it as the
@@ -2999,65 +3083,94 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
// of code.
// assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
- unsigned Opc;
- if (SetNZCV)
- Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
- else
- Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
- const unsigned MaxEncoding = 0xfff;
- const unsigned ShiftSize = 12;
const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
- while (((unsigned)Offset) >= (1 << ShiftSize)) {
- unsigned ThisVal;
- if (((unsigned)Offset) > MaxEncodableValue) {
- ThisVal = MaxEncodableValue;
- } else {
- ThisVal = Offset & MaxEncodableValue;
+ do {
+ unsigned ThisVal = std::min<unsigned>(Offset, MaxEncodableValue);
+ unsigned LocalShiftSize = 0;
+ if (ThisVal > MaxEncoding) {
+ ThisVal = ThisVal >> ShiftSize;
+ LocalShiftSize = ShiftSize;
}
assert((ThisVal >> ShiftSize) <= MaxEncoding &&
"Encoding cannot handle value that big");
- BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
- .addReg(SrcReg)
- .addImm(ThisVal >> ShiftSize)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
- .setMIFlag(Flag);
-
- if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP) {
+ auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
+ .addReg(SrcReg)
+ .addImm(Sign * (int)ThisVal);
+ if (ShiftSize)
+ MBI = MBI.addImm(
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
+ MBI = MBI.setMIFlag(Flag);
+
+ if (NeedsWinCFI) {
+ assert(Sign == 1 && "SEH directives should always have a positive sign");
+ int Imm = (int)(ThisVal << LocalShiftSize);
+ if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
+ (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
+ if (HasWinCFI)
+ *HasWinCFI = true;
+ if (Imm == 0)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
+ else
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
+ .addImm(Imm)
+ .setMIFlag(Flag);
+ assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to "
+ "emit a single SEH directive");
+ } else if (DestReg == AArch64::SP) {
+ if (HasWinCFI)
+ *HasWinCFI = true;
+ assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(Imm)
+ .setMIFlag(Flag);
+ }
if (HasWinCFI)
*HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
- .addImm(ThisVal)
- .setMIFlag(Flag);
}
SrcReg = DestReg;
- Offset -= ThisVal;
- if (Offset == 0)
- return;
- }
- BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
- .addReg(SrcReg)
- .addImm(Offset)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
- .setMIFlag(Flag);
+ Offset -= ThisVal << LocalShiftSize;
+ } while (Offset);
+}
- if (NeedsWinCFI) {
- if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
- (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
- if (HasWinCFI)
- *HasWinCFI = true;
- if (Offset == 0)
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
- setMIFlag(Flag);
- else
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
- addImm(Offset).setMIFlag(Flag);
- } else if (DestReg == AArch64::SP) {
- if (HasWinCFI)
- *HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
- addImm(Offset).setMIFlag(Flag);
+void llvm::emitFrameOffset(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ unsigned DestReg, unsigned SrcReg,
+ StackOffset Offset, const TargetInstrInfo *TII,
+ MachineInstr::MIFlag Flag, bool SetNZCV,
+ bool NeedsWinCFI, bool *HasWinCFI) {
+ int64_t Bytes, NumPredicateVectors, NumDataVectors;
+ Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
+
+ // First emit non-scalable frame offsets, or a simple 'mov'.
+ if (Bytes || (!Offset && SrcReg != DestReg)) {
+ assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
+ "SP increment/decrement not 16-byte aligned");
+ unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
+ if (Bytes < 0) {
+ Bytes = -Bytes;
+ Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
}
+ emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
+ NeedsWinCFI, HasWinCFI);
+ SrcReg = DestReg;
+ }
+
+ assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
+ "SetNZCV not supported with SVE vectors");
+ assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
+ "WinCFI not supported with SVE vectors");
+
+ if (NumDataVectors) {
+ emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
+ AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
+ SrcReg = DestReg;
+ }
+
+ if (NumPredicateVectors) {
+ assert(DestReg != AArch64::SP && "Unaligned access to SP");
+ emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
+ AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
}
}
@@ -3079,15 +3192,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
// <rdar://problem/11522048>
//
if (MI.isFullCopy()) {
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (SrcReg == AArch64::SP &&
- TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) {
MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
return nullptr;
}
- if (DstReg == AArch64::SP &&
- TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) {
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
return nullptr;
}
@@ -3127,14 +3238,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
MachineBasicBlock &MBB = *MI.getParent();
const MachineOperand &DstMO = MI.getOperand(0);
const MachineOperand &SrcMO = MI.getOperand(1);
- unsigned DstReg = DstMO.getReg();
- unsigned SrcReg = SrcMO.getReg();
+ Register DstReg = DstMO.getReg();
+ Register SrcReg = SrcMO.getReg();
// This is slightly expensive to compute for physical regs since
// getMinimalPhysRegClass is slow.
auto getRegClass = [&](unsigned Reg) {
- return TargetRegisterInfo::isVirtualRegister(Reg)
- ? MRI.getRegClass(Reg)
- : TRI.getMinimalPhysRegClass(Reg);
+ return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
+ : TRI.getMinimalPhysRegClass(Reg);
};
if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
@@ -3159,8 +3269,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// STRXui %xzr, %stack.0
//
- if (IsSpill && DstMO.isUndef() &&
- TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) {
assert(SrcMO.getSubReg() == 0 &&
"Unexpected subreg on physical register");
const TargetRegisterClass *SpillRC;
@@ -3243,10 +3352,23 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
return nullptr;
}
-int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
+static bool isSVEScaledImmInstruction(unsigned Opcode) {
+ switch (Opcode) {
+ case AArch64::LDR_ZXI:
+ case AArch64::STR_ZXI:
+ case AArch64::LDR_PXI:
+ case AArch64::STR_PXI:
+ return true;
+ default:
+ return false;
+ }
+}
+
+int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
+ StackOffset &SOffset,
bool *OutUseUnscaledOp,
unsigned *OutUnscaledOp,
- int *EmittableOffset) {
+ int64_t *EmittableOffset) {
// Set output values in case of early exit.
if (EmittableOffset)
*EmittableOffset = 0;
@@ -3285,6 +3407,10 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
// Construct the complete offset.
+ bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode());
+ int64_t Offset =
+ IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes());
+
const MachineOperand &ImmOpnd =
MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
Offset += ImmOpnd.getImm() * Scale;
@@ -3304,7 +3430,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
"Cannot have remainder when using unscaled op");
assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
- int NewOffset = Offset / Scale;
+ int64_t NewOffset = Offset / Scale;
if (MinOff <= NewOffset && NewOffset <= MaxOff)
Offset = Remainder;
else {
@@ -3319,27 +3445,33 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
if (OutUnscaledOp && UnscaledOp)
*OutUnscaledOp = *UnscaledOp;
+ if (IsMulVL)
+ SOffset = StackOffset(Offset, MVT::nxv1i8) +
+ StackOffset(SOffset.getBytes(), MVT::i8);
+ else
+ SOffset = StackOffset(Offset, MVT::i8) +
+ StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
return AArch64FrameOffsetCanUpdate |
- (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
+ (SOffset ? 0 : AArch64FrameOffsetIsLegal);
}
bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
+ unsigned FrameReg, StackOffset &Offset,
const AArch64InstrInfo *TII) {
unsigned Opcode = MI.getOpcode();
unsigned ImmIdx = FrameRegIdx + 1;
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
- Offset += MI.getOperand(ImmIdx).getImm();
+ Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8);
emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
MI.getOperand(0).getReg(), FrameReg, Offset, TII,
MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
MI.eraseFromParent();
- Offset = 0;
+ Offset = StackOffset();
return true;
}
- int NewOffset;
+ int64_t NewOffset;
unsigned UnscaledOp;
bool UseUnscaledOp;
int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
@@ -3352,7 +3484,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
MI.setDesc(TII->get(UnscaledOp));
MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
- return Offset == 0;
+ return !Offset;
}
return false;
@@ -3428,13 +3560,19 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
switch (Inst.getOpcode()) {
default:
break;
+ case AArch64::FADDHrr:
case AArch64::FADDSrr:
case AArch64::FADDDrr:
+ case AArch64::FADDv4f16:
+ case AArch64::FADDv8f16:
case AArch64::FADDv2f32:
case AArch64::FADDv2f64:
case AArch64::FADDv4f32:
+ case AArch64::FSUBHrr:
case AArch64::FSUBSrr:
case AArch64::FSUBDrr:
+ case AArch64::FSUBv4f16:
+ case AArch64::FSUBv8f16:
case AArch64::FSUBv2f32:
case AArch64::FSUBv2f64:
case AArch64::FSUBv4f32:
@@ -3459,7 +3597,7 @@ static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineInstr *MI = nullptr;
- if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
MI = MRI.getUniqueVRegDef(MO.getReg());
// And it needs to be in the trace (otherwise, it won't have a depth).
if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
@@ -3544,86 +3682,48 @@ static bool getMaddPatterns(MachineInstr &Root,
Opc = NewOpc;
}
+ auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
+ MachineCombinerPattern Pattern) {
+ if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
+ Patterns.push_back(Pattern);
+ Found = true;
+ }
+ };
+
+ typedef MachineCombinerPattern MCP;
+
switch (Opc) {
default:
break;
case AArch64::ADDWrr:
assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
"ADDWrr does not have register operands");
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
- Found = true;
- }
- if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
- Found = true;
- }
+ setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
+ setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
break;
case AArch64::ADDXrr:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
- Found = true;
- }
- if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
- Found = true;
- }
+ setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
+ setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
break;
case AArch64::SUBWrr:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
- Found = true;
- }
- if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
- Found = true;
- }
+ setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
+ setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
break;
case AArch64::SUBXrr:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
- Found = true;
- }
- if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
- Found = true;
- }
+ setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
+ setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
break;
case AArch64::ADDWri:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
- Found = true;
- }
+ setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
break;
case AArch64::ADDXri:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
- Found = true;
- }
+ setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
break;
case AArch64::SUBWri:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
- Found = true;
- }
+ setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
break;
case AArch64::SUBXri:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
- Found = true;
- }
+ setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
break;
}
return Found;
@@ -3640,204 +3740,135 @@ static bool getFMAPatterns(MachineInstr &Root,
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
+ auto Match = [&](int Opcode, int Operand,
+ MachineCombinerPattern Pattern) -> bool {
+ if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
+ Patterns.push_back(Pattern);
+ return true;
+ }
+ return false;
+ };
+
+ typedef MachineCombinerPattern MCP;
+
switch (Root.getOpcode()) {
default:
assert(false && "Unsupported FP instruction in combiner\n");
break;
+ case AArch64::FADDHrr:
+ assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
+ "FADDHrr does not have register operands");
+
+ Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
+ Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
+ break;
case AArch64::FADDSrr:
assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
- "FADDWrr does not have register operands");
- if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
- Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv1i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
- Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv1i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
- Found = true;
- }
+ "FADDSrr does not have register operands");
+
+ Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
+ Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
+
+ Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
+ Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
break;
case AArch64::FADDDrr:
- if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
- Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv1i64_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
- Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv1i64_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
- Found = true;
- }
+ Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
+ Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
+
+ Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
+ Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
+ break;
+ case AArch64::FADDv4f16:
+ Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
+ Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
+
+ Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
+ Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
+ break;
+ case AArch64::FADDv8f16:
+ Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
+ Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
+
+ Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
+ Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
break;
case AArch64::FADDv2f32:
- if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2f32)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv2i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv2f32)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
- Found = true;
- }
+ Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
+ Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
+
+ Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
+ Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
break;
case AArch64::FADDv2f64:
- if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2i64_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2f64)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv2i64_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv2f64)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
- Found = true;
- }
+ Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
+ Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
+
+ Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
+ Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
break;
case AArch64::FADDv4f32:
- if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv4i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv4f32)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv4i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv4f32)) {
- Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
- Found = true;
- }
- break;
+ Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
+ Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
+ Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
+ Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
+ break;
+ case AArch64::FSUBHrr:
+ Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
+ Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
+ Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
+ break;
case AArch64::FSUBSrr:
- if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
- Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
- Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv1i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
- Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
- Found = true;
- }
+ Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
+
+ Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
+ Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
+
+ Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
break;
case AArch64::FSUBDrr:
- if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
- Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
- Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv1i64_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
- Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
- Found = true;
- }
+ Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
+
+ Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
+ Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
+
+ Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
+ break;
+ case AArch64::FSUBv4f16:
+ Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
+ Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
+
+ Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
+ Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
+ break;
+ case AArch64::FSUBv8f16:
+ Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
+ Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
+
+ Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
+ Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
break;
case AArch64::FSUBv2f32:
- if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv2i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv2f32)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2f32)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
- Found = true;
- }
+ Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
+ Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
+
+ Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
+ Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
break;
case AArch64::FSUBv2f64:
- if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv2i64_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv2f64)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2i64_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv2f64)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
- Found = true;
- }
+ Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
+ Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
+
+ Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
+ Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
break;
case AArch64::FSUBv4f32:
- if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv4i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
- AArch64::FMULv4f32)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
- Found = true;
- }
- if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv4i32_indexed)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
- Found = true;
- } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
- AArch64::FMULv4f32)) {
- Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
- Found = true;
- }
+ Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
+ Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
+
+ Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
+ Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
break;
}
return Found;
@@ -3851,6 +3882,10 @@ bool AArch64InstrInfo::isThroughputPattern(
switch (Pattern) {
default:
break;
+ case MachineCombinerPattern::FMULADDH_OP1:
+ case MachineCombinerPattern::FMULADDH_OP2:
+ case MachineCombinerPattern::FMULSUBH_OP1:
+ case MachineCombinerPattern::FMULSUBH_OP2:
case MachineCombinerPattern::FMULADDS_OP1:
case MachineCombinerPattern::FMULADDS_OP2:
case MachineCombinerPattern::FMULSUBS_OP1:
@@ -3859,12 +3894,21 @@ bool AArch64InstrInfo::isThroughputPattern(
case MachineCombinerPattern::FMULADDD_OP2:
case MachineCombinerPattern::FMULSUBD_OP1:
case MachineCombinerPattern::FMULSUBD_OP2:
+ case MachineCombinerPattern::FNMULSUBH_OP1:
case MachineCombinerPattern::FNMULSUBS_OP1:
case MachineCombinerPattern::FNMULSUBD_OP1:
+ case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
+ case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
+ case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
+ case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
+ case MachineCombinerPattern::FMLAv4f16_OP2:
+ case MachineCombinerPattern::FMLAv4f16_OP1:
+ case MachineCombinerPattern::FMLAv8f16_OP1:
+ case MachineCombinerPattern::FMLAv8f16_OP2:
case MachineCombinerPattern::FMLAv2f32_OP2:
case MachineCombinerPattern::FMLAv2f32_OP1:
case MachineCombinerPattern::FMLAv2f64_OP1:
@@ -3877,10 +3921,18 @@ bool AArch64InstrInfo::isThroughputPattern(
case MachineCombinerPattern::FMLAv4f32_OP2:
case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
+ case MachineCombinerPattern::FMLSv4i16_indexed_OP1:
+ case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
+ case MachineCombinerPattern::FMLSv8i16_indexed_OP1:
+ case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
+ case MachineCombinerPattern::FMLSv4f16_OP1:
+ case MachineCombinerPattern::FMLSv4f16_OP2:
+ case MachineCombinerPattern::FMLSv8f16_OP1:
+ case MachineCombinerPattern::FMLSv8f16_OP2:
case MachineCombinerPattern::FMLSv2f32_OP2:
case MachineCombinerPattern::FMLSv2f64_OP2:
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
@@ -3933,15 +3985,15 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
unsigned MaddOpc, const TargetRegisterClass *RC,
FMAInstKind kind = FMAInstKind::Default,
- const unsigned *ReplacedAddend = nullptr) {
+ const Register *ReplacedAddend = nullptr) {
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
- unsigned ResultReg = Root.getOperand(0).getReg();
- unsigned SrcReg0 = MUL->getOperand(1).getReg();
+ Register ResultReg = Root.getOperand(0).getReg();
+ Register SrcReg0 = MUL->getOperand(1).getReg();
bool Src0IsKill = MUL->getOperand(1).isKill();
- unsigned SrcReg1 = MUL->getOperand(2).getReg();
+ Register SrcReg1 = MUL->getOperand(2).getReg();
bool Src1IsKill = MUL->getOperand(2).isKill();
unsigned SrcReg2;
@@ -3955,13 +4007,13 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
}
- if (TargetRegisterInfo::isVirtualRegister(ResultReg))
+ if (Register::isVirtualRegister(ResultReg))
MRI.constrainRegClass(ResultReg, RC);
- if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
+ if (Register::isVirtualRegister(SrcReg0))
MRI.constrainRegClass(SrcReg0, RC);
- if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
+ if (Register::isVirtualRegister(SrcReg1))
MRI.constrainRegClass(SrcReg1, RC);
- if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
+ if (Register::isVirtualRegister(SrcReg2))
MRI.constrainRegClass(SrcReg2, RC);
MachineInstrBuilder MIB;
@@ -4015,19 +4067,19 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
- unsigned ResultReg = Root.getOperand(0).getReg();
- unsigned SrcReg0 = MUL->getOperand(1).getReg();
+ Register ResultReg = Root.getOperand(0).getReg();
+ Register SrcReg0 = MUL->getOperand(1).getReg();
bool Src0IsKill = MUL->getOperand(1).isKill();
- unsigned SrcReg1 = MUL->getOperand(2).getReg();
+ Register SrcReg1 = MUL->getOperand(2).getReg();
bool Src1IsKill = MUL->getOperand(2).isKill();
- if (TargetRegisterInfo::isVirtualRegister(ResultReg))
+ if (Register::isVirtualRegister(ResultReg))
MRI.constrainRegClass(ResultReg, RC);
- if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
+ if (Register::isVirtualRegister(SrcReg0))
MRI.constrainRegClass(SrcReg0, RC);
- if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
+ if (Register::isVirtualRegister(SrcReg1))
MRI.constrainRegClass(SrcReg1, RC);
- if (TargetRegisterInfo::isVirtualRegister(VR))
+ if (Register::isVirtualRegister(VR))
MRI.constrainRegClass(VR, RC);
MachineInstrBuilder MIB =
@@ -4116,7 +4168,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
- unsigned NewVR = MRI.createVirtualRegister(OrrRC);
+ Register NewVR = MRI.createVirtualRegister(OrrRC);
uint64_t Imm = Root.getOperand(2).getImm();
if (Root.getOperand(3).isImm()) {
@@ -4158,7 +4210,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
- unsigned NewVR = MRI.createVirtualRegister(SubRC);
+ Register NewVR = MRI.createVirtualRegister(SubRC);
// SUB NewVR, 0, C
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
@@ -4208,7 +4260,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
- unsigned NewVR = MRI.createVirtualRegister(OrrRC);
+ Register NewVR = MRI.createVirtualRegister(OrrRC);
uint64_t Imm = Root.getOperand(2).getImm();
if (Root.getOperand(3).isImm()) {
unsigned Val = Root.getOperand(3).getImm();
@@ -4228,34 +4280,35 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
break;
}
// Floating Point Support
+ case MachineCombinerPattern::FMULADDH_OP1:
+ Opc = AArch64::FMADDHrrr;
+ RC = &AArch64::FPR16RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
case MachineCombinerPattern::FMULADDS_OP1:
+ Opc = AArch64::FMADDSrrr;
+ RC = &AArch64::FPR32RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
case MachineCombinerPattern::FMULADDD_OP1:
- // MUL I=A,B,0
- // ADD R,I,C
- // ==> MADD R,A,B,C
- // --- Create(MADD);
- if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
- Opc = AArch64::FMADDSrrr;
- RC = &AArch64::FPR32RegClass;
- } else {
- Opc = AArch64::FMADDDrrr;
- RC = &AArch64::FPR64RegClass;
- }
+ Opc = AArch64::FMADDDrrr;
+ RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
+
+ case MachineCombinerPattern::FMULADDH_OP2:
+ Opc = AArch64::FMADDHrrr;
+ RC = &AArch64::FPR16RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
case MachineCombinerPattern::FMULADDS_OP2:
+ Opc = AArch64::FMADDSrrr;
+ RC = &AArch64::FPR32RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
case MachineCombinerPattern::FMULADDD_OP2:
- // FMUL I=A,B,0
- // FADD R,C,I
- // ==> FMADD R,A,B,C
- // --- Create(FMADD);
- if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
- Opc = AArch64::FMADDSrrr;
- RC = &AArch64::FPR32RegClass;
- } else {
- Opc = AArch64::FMADDDrrr;
- RC = &AArch64::FPR64RegClass;
- }
+ Opc = AArch64::FMADDDrrr;
+ RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
@@ -4285,6 +4338,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
FMAInstKind::Indexed);
break;
+ case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
+ RC = &AArch64::FPR64RegClass;
+ Opc = AArch64::FMLAv4i16_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+ case MachineCombinerPattern::FMLAv4f16_OP1:
+ RC = &AArch64::FPR64RegClass;
+ Opc = AArch64::FMLAv4f16;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator);
+ break;
+ case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
+ RC = &AArch64::FPR64RegClass;
+ Opc = AArch64::FMLAv4i16_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+ case MachineCombinerPattern::FMLAv4f16_OP2:
+ RC = &AArch64::FPR64RegClass;
+ Opc = AArch64::FMLAv4f16;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Accumulator);
+ break;
+
case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
case MachineCombinerPattern::FMLAv2f32_OP1:
RC = &AArch64::FPR64RegClass;
@@ -4312,6 +4390,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
+ case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
+ RC = &AArch64::FPR128RegClass;
+ Opc = AArch64::FMLAv8i16_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+ case MachineCombinerPattern::FMLAv8f16_OP1:
+ RC = &AArch64::FPR128RegClass;
+ Opc = AArch64::FMLAv8f16;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator);
+ break;
+ case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
+ RC = &AArch64::FPR128RegClass;
+ Opc = AArch64::FMLAv8i16_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+ case MachineCombinerPattern::FMLAv8f16_OP2:
+ RC = &AArch64::FPR128RegClass;
+ Opc = AArch64::FMLAv8f16;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Accumulator);
+ break;
+
case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
case MachineCombinerPattern::FMLAv2f64_OP1:
RC = &AArch64::FPR128RegClass;
@@ -4367,56 +4470,53 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
+ case MachineCombinerPattern::FMULSUBH_OP1:
+ Opc = AArch64::FNMSUBHrrr;
+ RC = &AArch64::FPR16RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
case MachineCombinerPattern::FMULSUBS_OP1:
- case MachineCombinerPattern::FMULSUBD_OP1: {
- // FMUL I=A,B,0
- // FSUB R,I,C
- // ==> FNMSUB R,A,B,C // = -C + A*B
- // --- Create(FNMSUB);
- if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
- Opc = AArch64::FNMSUBSrrr;
- RC = &AArch64::FPR32RegClass;
- } else {
- Opc = AArch64::FNMSUBDrrr;
- RC = &AArch64::FPR64RegClass;
- }
+ Opc = AArch64::FNMSUBSrrr;
+ RC = &AArch64::FPR32RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::FMULSUBD_OP1:
+ Opc = AArch64::FNMSUBDrrr;
+ RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- }
+ case MachineCombinerPattern::FNMULSUBH_OP1:
+ Opc = AArch64::FNMADDHrrr;
+ RC = &AArch64::FPR16RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
case MachineCombinerPattern::FNMULSUBS_OP1:
- case MachineCombinerPattern::FNMULSUBD_OP1: {
- // FNMUL I=A,B,0
- // FSUB R,I,C
- // ==> FNMADD R,A,B,C // = -A*B - C
- // --- Create(FNMADD);
- if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
- Opc = AArch64::FNMADDSrrr;
- RC = &AArch64::FPR32RegClass;
- } else {
- Opc = AArch64::FNMADDDrrr;
- RC = &AArch64::FPR64RegClass;
- }
+ Opc = AArch64::FNMADDSrrr;
+ RC = &AArch64::FPR32RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::FNMULSUBD_OP1:
+ Opc = AArch64::FNMADDDrrr;
+ RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- }
+ case MachineCombinerPattern::FMULSUBH_OP2:
+ Opc = AArch64::FMSUBHrrr;
+ RC = &AArch64::FPR16RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
case MachineCombinerPattern::FMULSUBS_OP2:
- case MachineCombinerPattern::FMULSUBD_OP2: {
- // FMUL I=A,B,0
- // FSUB R,C,I
- // ==> FMSUB R,A,B,C (computes C - A*B)
- // --- Create(FMSUB);
- if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
- Opc = AArch64::FMSUBSrrr;
- RC = &AArch64::FPR32RegClass;
- } else {
- Opc = AArch64::FMSUBDrrr;
- RC = &AArch64::FPR64RegClass;
- }
+ Opc = AArch64::FMSUBSrrr;
+ RC = &AArch64::FPR32RegClass;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::FMULSUBD_OP2:
+ Opc = AArch64::FMSUBDrrr;
+ RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- }
case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
Opc = AArch64::FMLSv1i32_indexed;
@@ -4432,6 +4532,39 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
FMAInstKind::Indexed);
break;
+ case MachineCombinerPattern::FMLSv4f16_OP1:
+ case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
+ RC = &AArch64::FPR64RegClass;
+ Register NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
+ Opc = AArch64::FMLAv4f16;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv4i16_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ }
+ break;
+ }
+ case MachineCombinerPattern::FMLSv4f16_OP2:
+ RC = &AArch64::FPR64RegClass;
+ Opc = AArch64::FMLSv4f16;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Accumulator);
+ break;
+ case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
+ RC = &AArch64::FPR64RegClass;
+ Opc = AArch64::FMLSv4i16_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+
case MachineCombinerPattern::FMLSv2f32_OP2:
case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
RC = &AArch64::FPR64RegClass;
@@ -4446,6 +4579,39 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
+ case MachineCombinerPattern::FMLSv8f16_OP1:
+ case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
+ RC = &AArch64::FPR128RegClass;
+ Register NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
+ Opc = AArch64::FMLAv8f16;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv8i16_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ }
+ break;
+ }
+ case MachineCombinerPattern::FMLSv8f16_OP2:
+ RC = &AArch64::FPR128RegClass;
+ Opc = AArch64::FMLSv8f16;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Accumulator);
+ break;
+ case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
+ RC = &AArch64::FPR128RegClass;
+ Opc = AArch64::FMLSv8i16_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
+ FMAInstKind::Indexed);
+ break;
+
case MachineCombinerPattern::FMLSv2f64_OP2:
case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
RC = &AArch64::FPR128RegClass;
@@ -4476,7 +4642,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
case MachineCombinerPattern::FMLSv2f32_OP1:
case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
RC = &AArch64::FPR64RegClass;
- unsigned NewVR = MRI.createVirtualRegister(RC);
+ Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
.add(Root.getOperand(2));
@@ -4496,7 +4662,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
case MachineCombinerPattern::FMLSv4f32_OP1:
case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
RC = &AArch64::FPR128RegClass;
- unsigned NewVR = MRI.createVirtualRegister(RC);
+ Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
.add(Root.getOperand(2));
@@ -4516,7 +4682,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
case MachineCombinerPattern::FMLSv2f64_OP1:
case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
RC = &AArch64::FPR128RegClass;
- unsigned NewVR = MRI.createVirtualRegister(RC);
+ Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
.add(Root.getOperand(2));
@@ -4617,15 +4783,15 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
MachineBasicBlock *MBB = MI.getParent();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- unsigned VReg = MI.getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(VReg))
+ Register VReg = MI.getOperand(0).getReg();
+ if (!Register::isVirtualRegister(VReg))
return false;
MachineInstr *DefMI = MRI->getVRegDef(VReg);
// Look through COPY instructions to find definition.
while (DefMI->isCopy()) {
- unsigned CopyVReg = DefMI->getOperand(1).getReg();
+ Register CopyVReg = DefMI->getOperand(1).getReg();
if (!MRI->hasOneNonDBGUse(CopyVReg))
return false;
if (!MRI->hasOneDef(CopyVReg))
@@ -4653,8 +4819,8 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
return false;
MachineOperand &MO = DefMI->getOperand(1);
- unsigned NewReg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(NewReg))
+ Register NewReg = MO.getReg();
+ if (!Register::isVirtualRegister(NewReg))
return false;
assert(!MRI->def_empty(NewReg) && "Register must be defined.");
@@ -4737,9 +4903,13 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_COFFSTUB, "aarch64-coffstub"},
- {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"},
- {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"},
- {MO_DLLIMPORT, "aarch64-dllimport"}};
+ {MO_GOT, "aarch64-got"},
+ {MO_NC, "aarch64-nc"},
+ {MO_S, "aarch64-s"},
+ {MO_TLS, "aarch64-tls"},
+ {MO_DLLIMPORT, "aarch64-dllimport"},
+ {MO_PREL, "aarch64-prel"},
+ {MO_TAGGED, "aarch64-tagged"}};
return makeArrayRef(TargetFlags);
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 7be4daba7dc4..1688045e4fb8 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -15,6 +15,7 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
+#include "AArch64StackOffset.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -55,8 +56,7 @@ public:
bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA = nullptr) const override;
+ const MachineInstr &MIb) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
@@ -299,7 +299,7 @@ private:
/// if necessary, to be replaced by the scavenger at the end of PEI.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
- int Offset, const TargetInstrInfo *TII,
+ StackOffset Offset, const TargetInstrInfo *TII,
MachineInstr::MIFlag = MachineInstr::NoFlags,
bool SetNZCV = false, bool NeedsWinCFI = false,
bool *HasWinCFI = nullptr);
@@ -308,7 +308,7 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
/// FP. Return false if the offset could not be handled directly in MI, and
/// return the left-over portion by reference.
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
+ unsigned FrameReg, StackOffset &Offset,
const AArch64InstrInfo *TII);
/// Use to report the frame offset status in isAArch64FrameOffsetLegal.
@@ -332,10 +332,10 @@ enum AArch64FrameOffsetStatus {
/// If set, @p EmittableOffset contains the amount that can be set in @p MI
/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
/// is a legal offset.
-int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
+int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset,
bool *OutUseUnscaledOp = nullptr,
unsigned *OutUnscaledOp = nullptr,
- int *EmittableOffset = nullptr);
+ int64_t *EmittableOffset = nullptr);
static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index eed53f36d574..1981bd5d3bf0 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -62,6 +62,9 @@ def HasAM : Predicate<"Subtarget->hasAM()">,
def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
AssemblerPredicate<"FeatureSEL2", "sel2">;
+def HasPMU : Predicate<"Subtarget->hasPMU()">,
+ AssemblerPredicate<"FeaturePMU", "pmu">;
+
def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
AssemblerPredicate<"FeatureTLB_RMI", "tlb-rmi">;
@@ -116,7 +119,7 @@ def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">;
def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
- AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">;
+ AssemblerPredicate<"FeatureSVE2BitPerm", "sve2-bitperm">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicate<"FeatureRCPC", "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -133,6 +136,12 @@ def HasBTI : Predicate<"Subtarget->hasBTI()">,
AssemblerPredicate<"FeatureBranchTargetId", "bti">;
def HasMTE : Predicate<"Subtarget->hasMTE()">,
AssemblerPredicate<"FeatureMTE", "mte">;
+def HasTME : Predicate<"Subtarget->hasTME()">,
+ AssemblerPredicate<"FeatureTME", "tme">;
+def HasETE : Predicate<"Subtarget->hasETE()">,
+ AssemblerPredicate<"FeatureETE", "ete">;
+def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
+ AssemblerPredicate<"FeatureTRBE", "trbe">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@@ -415,6 +424,14 @@ def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, S
def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def SDT_AArch64unpk : SDTypeProfile<1, 1, [
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
+]>;
+def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
+def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
+def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
+def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -431,6 +448,13 @@ let RecomputePerFunction = 1 in {
def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
+
+ // Toggles patterns which aren't beneficial in GlobalISel when we aren't
+ // optimizing. This allows us to selectively use patterns without impacting
+ // SelectionDAG's behaviour.
+ // FIXME: One day there will probably be a nicer way to check for this, but
+ // today is not that day.
+ def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
}
include "AArch64InstrFormats.td"
@@ -785,7 +809,11 @@ def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
def HWASAN_CHECK_MEMACCESS : Pseudo<
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
- [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 imm:$accessinfo))]>,
+ [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
+ Sched<[]>;
+def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
+ (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
+ [(int_hwasan_check_memaccess_shortgranules X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
Sched<[]>;
}
@@ -804,6 +832,23 @@ def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
(SYSxt imm0_7:$op1, sys_cr_op:$Cn,
sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
+
+let Predicates = [HasTME] in {
+
+def TSTART : TMSystemI<0b0000, "tstart",
+ [(set GPR64:$Rt, (int_aarch64_tstart))]>;
+
+def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
+
+def TCANCEL : TMSystemException<0b011, "tcancel",
+ [(int_aarch64_tcancel i64_imm0_65535:$imm)]>;
+
+def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
+ let mayLoad = 0;
+ let mayStore = 0;
+}
+} // HasTME
+
//===----------------------------------------------------------------------===//
// Move immediate instructions.
//===----------------------------------------------------------------------===//
@@ -815,37 +860,37 @@ let PostEncoderMethod = "fixMOVZ" in
defm MOVZ : MoveImmediate<0b10, "movz">;
// First group of aliases covers an implicit "lsl #0".
-def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>;
-def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>;
-def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, i32_imm0_65535:$imm, 0), 0>;
+def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, i32_imm0_65535:$imm, 0), 0>;
+def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
+def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48), 0>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32), 0>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16), 0>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
-def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16), 0>;
-def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
// Final group of aliases covers true "mov $Rd, $imm" cases.
multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
@@ -917,8 +962,12 @@ def trunc_imm : SDNodeXForm<imm, [{
def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
GISDNodeXFormEquiv<trunc_imm>;
+let Predicates = [OptimizedGISelOrOtherSelector] in {
+// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
+// copies.
def : Pat<(i64 i64imm_32bit:$src),
(SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
+}
// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
@@ -1012,10 +1061,10 @@ def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
(SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
let AddedComplexity = 1 in {
-def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
- (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
-def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
- (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
+def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
+ (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
+def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
+ (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
}
// Because of the immediate format for add/sub-imm instructions, the
@@ -2165,8 +2214,8 @@ def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
const DataLayout &DL = MF->getDataLayout();
- unsigned Align = G->getGlobal()->getPointerAlignment(DL);
- return Align >= 4 && G->getOffset() % 4 == 0;
+ MaybeAlign Align = G->getGlobal()->getPointerAlignment(DL);
+ return Align && *Align >= 4 && G->getOffset() % 4 == 0;
}
if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
return C->getAlignment() >= 4 && C->getOffset() % 4 == 0;
@@ -3281,20 +3330,37 @@ defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
// the NEON variant.
+
+// Here we handle first -(a + b*c) for FNMADD:
+
+let Predicates = [HasNEON, HasFullFP16] in
+def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
+ (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
+
def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
(FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
(FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and
-// "(-a) + b*(-c)".
+// Now it's time for "(-a) + (-b)*c"
+
+let Predicates = [HasNEON, HasFullFP16] in
+def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
+ (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
+
def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
(FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
(FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+// And here "(-a) + b*(-c)"
+
+let Predicates = [HasNEON, HasFullFP16] in
+def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))),
+ (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
+
def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))),
(FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
@@ -6939,5 +7005,124 @@ def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
+// Extracting lane zero is a special case where we can just use a plain
+// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
+// rest of the compiler, especially the register allocator and copy propagation,
+// to reason about, so is preferred when it's possible to use it.
+let AddedComplexity = 10 in {
+ def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
+ def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
+ def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
+}
+
+// dot_v4i8
+class mul_v4i8<SDPatternOperator ldop> :
+ PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
+ (mul (ldop (add node:$Rn, node:$offset)),
+ (ldop (add node:$Rm, node:$offset)))>;
+class mulz_v4i8<SDPatternOperator ldop> :
+ PatFrag<(ops node:$Rn, node:$Rm),
+ (mul (ldop node:$Rn), (ldop node:$Rm))>;
+
+def load_v4i8 :
+ OutPatFrag<(ops node:$R),
+ (INSERT_SUBREG
+ (v2i32 (IMPLICIT_DEF)),
+ (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
+ ssub)>;
+
+class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
+ Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
+ (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
+ (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
+ (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
+ (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
+ (load_v4i8 GPR64sp:$Rn),
+ (load_v4i8 GPR64sp:$Rm))),
+ sub_32)>, Requires<[HasDotProd]>;
+
+// dot_v8i8
+class ee_v8i8<SDPatternOperator extend> :
+ PatFrag<(ops node:$V, node:$K),
+ (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
+
+class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
+ PatFrag<(ops node:$M, node:$N, node:$K),
+ (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
+ (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
+
+class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
+ PatFrag<(ops node:$M, node:$N),
+ (i32 (extractelt
+ (v4i32 (AArch64uaddv
+ (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
+ (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
+ (i64 0)))>;
+
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
+
+class odot_v8i8<Instruction DOT> :
+ OutPatFrag<(ops node:$Vm, node:$Vn),
+ (EXTRACT_SUBREG
+ (VADDV_32
+ (i64 (DOT (DUPv2i32gpr WZR),
+ (v8i8 node:$Vm),
+ (v8i8 node:$Vn)))),
+ sub_32)>;
+
+class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
+ SDPatternOperator extend> :
+ Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
+ (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
+ Requires<[HasDotProd]>;
+
+// dot_v16i8
+class ee_v16i8<SDPatternOperator extend> :
+ PatFrag<(ops node:$V, node:$K1, node:$K2),
+ (v4i16 (extract_subvector
+ (v8i16 (extend
+ (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
+
+class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
+ PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
+ (v4i32
+ (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
+ (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
+
+class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
+ PatFrag<(ops node:$M, node:$N),
+ (i32 (extractelt
+ (v4i32 (AArch64uaddv
+ (add
+ (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
+ (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
+ (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
+ (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
+ (i64 0)))>;
+
+class odot_v16i8<Instruction DOT> :
+ OutPatFrag<(ops node:$Vm, node:$Vn),
+ (i32 (ADDVv4i32v
+ (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
+
+class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
+ SDPatternOperator extend> :
+ Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
+ (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
+ Requires<[HasDotProd]>;
+
+let AddedComplexity = 10 in {
+ def : dot_v4i8<SDOTv8i8, sextloadi8>;
+ def : dot_v4i8<UDOTv8i8, zextloadi8>;
+ def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
+ def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
+ def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
+ def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
+
+ // FIXME: add patterns to generate vector by element dot product.
+ // FIXME: add SVE dot-product patterns.
+}
+
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 4e13fb8e2027..961f38cad1e4 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -51,9 +51,19 @@ public:
const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI);
- bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
+ bool select(MachineInstr &I) override;
static const char *getName() { return DEBUG_TYPE; }
+ void setupMF(MachineFunction &MF, GISelKnownBits &KB,
+ CodeGenCoverage &CoverageInfo) override {
+ InstructionSelector::setupMF(MF, KB, CoverageInfo);
+
+ // hasFnAttribute() is expensive to call on every BRCOND selection, so
+ // cache it here for each run of the selector.
+ ProduceNonFlagSettingCondBr =
+ !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
+ }
+
private:
/// tblgen-erated 'select' implementation, used as the initial selector for
/// the patterns that don't require complex C++.
@@ -68,6 +78,10 @@ private:
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ /// Eliminate same-sized cross-bank copies into stores before selectImpl().
+ void contractCrossBankCopyIntoStore(MachineInstr &I,
+ MachineRegisterInfo &MRI) const;
+
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
@@ -101,8 +115,6 @@ private:
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
- void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
- SmallVectorImpl<Optional<int>> &Idxs) const;
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
@@ -116,6 +128,7 @@ private:
bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
@@ -128,6 +141,8 @@ private:
MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitTST(const Register &LHS, const Register &RHS,
@@ -155,7 +170,9 @@ private:
ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
+ ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
+ ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
unsigned Size) const;
@@ -183,11 +200,48 @@ private:
return selectAddrModeIndexed(Root, Width / 8);
}
+ bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const;
+ ComplexRendererFns
+ selectAddrModeShiftedExtendXReg(MachineOperand &Root,
+ unsigned SizeInBytes) const;
+ ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
+ ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
+ unsigned SizeInBytes) const;
+ template <int Width>
+ ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
+ return selectAddrModeXRO(Root, Width / 8);
+ }
+
+ ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
+
+ ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
+ return selectShiftedRegister(Root);
+ }
+
+ ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
+ // TODO: selectShiftedRegister should allow for rotates on logical shifts.
+ // For now, make them the same. The only difference between the two is that
+ // logical shifts are allowed to fold in rotates. Otherwise, these are
+ // functionally the same.
+ return selectShiftedRegister(Root);
+ }
+
+ /// Instructions that accept extend modifiers like UXTW expect the register
+ /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
+ /// subregister copy if necessary. Return either ExtReg, or the result of the
+ /// new copy.
+ Register narrowExtendRegIfNeeded(Register ExtReg,
+ MachineIRBuilder &MIB) const;
+ ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
+
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
+ void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
+ void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
void materializeLargeCMVal(MachineInstr &I, const Value *V,
- unsigned char OpFlags) const;
+ unsigned OpFlags) const;
// Optimization methods.
bool tryOptVectorShuffle(MachineInstr &I) const;
@@ -197,12 +251,22 @@ private:
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
+ /// Return true if \p MI is a load or store of \p NumBytes bytes.
+ bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
+
+ /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
+ /// register zeroed out. In other words, the result of MI has been explicitly
+ /// zero extended.
+ bool isDef32(const MachineInstr &MI) const;
+
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
const AArch64RegisterInfo &TRI;
const AArch64RegisterBankInfo &RBI;
+ bool ProduceNonFlagSettingCondBr = false;
+
#define GET_GLOBALISEL_PREDICATES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_DECL
@@ -312,7 +376,7 @@ static bool getSubRegForClass(const TargetRegisterClass *RC,
SubReg = AArch64::hsub;
break;
case 32:
- if (RC == &AArch64::GPR32RegClass)
+ if (RC != &AArch64::FPR32RegClass)
SubReg = AArch64::sub_32;
else
SubReg = AArch64::ssub;
@@ -357,7 +421,7 @@ static bool unsupportedBinOp(const MachineInstr &I,
// so, this will need to be taught about that, and we'll need to get the
// bank out of the minimal class for the register.
// Either way, this needs to be documented (and possibly verified).
- if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (!Register::isVirtualRegister(MO.getReg())) {
LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
return true;
}
@@ -492,8 +556,8 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned SrcReg = I.getOperand(1).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register SrcReg = I.getOperand(1).getReg();
const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
@@ -502,7 +566,7 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
(DstSize == SrcSize ||
// Copies are a mean to setup initial types, the number of
// bits may not exactly match.
- (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
+ (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
// Copies are a mean to copy bits around, as long as we are
// on the same register class, that's fine. Otherwise, that
// means we need some SUBREG_TO_REG or AND & co.
@@ -526,7 +590,7 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
/// SubRegCopy (To class) = COPY CopyReg:SubReg
/// Dst = COPY SubRegCopy
static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
- const RegisterBankInfo &RBI, unsigned SrcReg,
+ const RegisterBankInfo &RBI, Register SrcReg,
const TargetRegisterClass *From,
const TargetRegisterClass *To,
unsigned SubReg) {
@@ -539,7 +603,7 @@ static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
// It's possible that the destination register won't be constrained. Make
// sure that happens.
- if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
+ if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
return true;
@@ -553,8 +617,8 @@ static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
- unsigned DstReg = I.getOperand(0).getReg();
- unsigned SrcReg = I.getOperand(1).getReg();
+ Register DstReg = I.getOperand(0).getReg();
+ Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
@@ -579,8 +643,8 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
- unsigned DstReg = I.getOperand(0).getReg();
- unsigned SrcReg = I.getOperand(1).getReg();
+ Register DstReg = I.getOperand(0).getReg();
+ Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
@@ -607,11 +671,10 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
// result.
auto CheckCopy = [&]() {
// If we have a bitcast or something, we can't have physical registers.
- assert(
- (I.isCopy() ||
- (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
- !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
- "No phys reg on generic operator!");
+ assert((I.isCopy() ||
+ (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
+ !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
+ "No phys reg on generic operator!");
assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
(void)KnownValid;
return true;
@@ -626,38 +689,38 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
return false;
}
- // Is this a cross-bank copy?
- if (DstRegBank.getID() != SrcRegBank.getID()) {
- // If we're doing a cross-bank copy on different-sized registers, we need
- // to do a bit more work.
- unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
- unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
-
- if (SrcSize > DstSize) {
- // We're doing a cross-bank copy into a smaller register. We need a
- // subregister copy. First, get a register class that's on the same bank
- // as the destination, but the same size as the source.
- const TargetRegisterClass *SubregRC =
- getMinClassForRegBank(DstRegBank, SrcSize, true);
- assert(SubregRC && "Didn't get a register class for subreg?");
-
- // Get the appropriate subregister for the destination.
- unsigned SubReg = 0;
- if (!getSubRegForClass(DstRC, TRI, SubReg)) {
- LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
- return false;
- }
-
- // Now, insert a subregister copy using the new register class.
- selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
- return CheckCopy();
+ unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
+ unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
+
+ // If we're doing a cross-bank copy on different-sized registers, we need
+ // to do a bit more work.
+ if (SrcSize > DstSize) {
+ // We're doing a cross-bank copy into a smaller register. We need a
+ // subregister copy. First, get a register class that's on the same bank
+ // as the destination, but the same size as the source.
+ const TargetRegisterClass *SubregRC =
+ getMinClassForRegBank(DstRegBank, SrcSize, true);
+ assert(SubregRC && "Didn't get a register class for subreg?");
+
+ // Get the appropriate subregister for the destination.
+ unsigned SubReg = 0;
+ if (!getSubRegForClass(DstRC, TRI, SubReg)) {
+ LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
+ return false;
}
- else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
- SrcSize == 16) {
+ // Now, insert a subregister copy using the new register class.
+ selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
+ return CheckCopy();
+ }
+
+ // Is this a cross-bank copy?
+ if (DstRegBank.getID() != SrcRegBank.getID()) {
+ if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
+ SrcSize == 16) {
// Special case for FPR16 to GPR32.
// FIXME: This can probably be generalized like the above case.
- unsigned PromoteReg =
+ Register PromoteReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
@@ -674,7 +737,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
// If the destination is a physical register, then there's nothing to
// change, so we're done.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ if (Register::isPhysicalRegister(DstReg))
return CheckCopy();
}
@@ -955,7 +1018,9 @@ bool AArch64InstructionSelector::selectVectorSHL(
return false;
unsigned Opc = 0;
- if (Ty == LLT::vector(4, 32)) {
+ if (Ty == LLT::vector(2, 64)) {
+ Opc = AArch64::USHLv2i64;
+ } else if (Ty == LLT::vector(4, 32)) {
Opc = AArch64::USHLv4i32;
} else if (Ty == LLT::vector(2, 32)) {
Opc = AArch64::USHLv2i32;
@@ -989,7 +1054,11 @@ bool AArch64InstructionSelector::selectVectorASHR(
unsigned Opc = 0;
unsigned NegOpc = 0;
const TargetRegisterClass *RC = nullptr;
- if (Ty == LLT::vector(4, 32)) {
+ if (Ty == LLT::vector(2, 64)) {
+ Opc = AArch64::SSHLv2i64;
+ NegOpc = AArch64::NEGv2i64;
+ RC = &AArch64::FPR128RegClass;
+ } else if (Ty == LLT::vector(4, 32)) {
Opc = AArch64::SSHLv4i32;
NegOpc = AArch64::NEGv4i32;
RC = &AArch64::FPR128RegClass;
@@ -1044,7 +1113,7 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
}
void AArch64InstructionSelector::materializeLargeCMVal(
- MachineInstr &I, const Value *V, unsigned char OpFlags) const {
+ MachineInstr &I, const Value *V, unsigned OpFlags) const {
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1097,8 +1166,8 @@ void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
// some reason we receive input GMIR that has an s64 shift amount that's not
// a G_CONSTANT, insert a truncate so that we can still select the s32
// register-register variant.
- unsigned SrcReg = I.getOperand(1).getReg();
- unsigned ShiftReg = I.getOperand(2).getReg();
+ Register SrcReg = I.getOperand(1).getReg();
+ Register ShiftReg = I.getOperand(2).getReg();
const LLT ShiftTy = MRI.getType(ShiftReg);
const LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
@@ -1118,6 +1187,9 @@ void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
}
return;
}
+ case TargetOpcode::G_STORE:
+ contractCrossBankCopyIntoStore(I, MRI);
+ return;
default:
return;
}
@@ -1158,6 +1230,48 @@ bool AArch64InstructionSelector::earlySelectSHL(
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
}
+void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
+ // If we're storing a scalar, it doesn't matter what register bank that
+ // scalar is on. All that matters is the size.
+ //
+ // So, if we see something like this (with a 32-bit scalar as an example):
+ //
+ // %x:gpr(s32) = ... something ...
+ // %y:fpr(s32) = COPY %x:gpr(s32)
+ // G_STORE %y:fpr(s32)
+ //
+ // We can fix this up into something like this:
+ //
+ // G_STORE %x:gpr(s32)
+ //
+ // And then continue the selection process normally.
+ MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
+ if (!Def)
+ return;
+ Register DefDstReg = Def->getOperand(0).getReg();
+ LLT DefDstTy = MRI.getType(DefDstReg);
+ Register StoreSrcReg = I.getOperand(0).getReg();
+ LLT StoreSrcTy = MRI.getType(StoreSrcReg);
+
+ // If we get something strange like a physical register, then we shouldn't
+ // go any further.
+ if (!DefDstTy.isValid())
+ return;
+
+ // Are the source and dst types the same size?
+ if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
+ return;
+
+ if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
+ RBI.getRegBank(DefDstReg, MRI, TRI))
+ return;
+
+ // We have a cross-bank copy, which is entering a store. Let's fold it.
+ I.getOperand(0).setReg(DefDstReg);
+}
+
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -1169,13 +1283,37 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
switch (I.getOpcode()) {
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
+ case TargetOpcode::G_CONSTANT: {
+ bool IsZero = false;
+ if (I.getOperand(1).isCImm())
+ IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
+ else if (I.getOperand(1).isImm())
+ IsZero = I.getOperand(1).getImm() == 0;
+
+ if (!IsZero)
+ return false;
+
+ Register DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+ if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
+ return false;
+
+ if (Ty == LLT::scalar(64)) {
+ I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
+ RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
+ } else {
+ I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
+ RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
+ }
+ I.setDesc(TII.get(TargetOpcode::COPY));
+ return true;
+ }
default:
return false;
}
}
-bool AArch64InstructionSelector::select(MachineInstr &I,
- CodeGenCoverage &CoverageInfo) const {
+bool AArch64InstructionSelector::select(MachineInstr &I) {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -1244,7 +1382,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
if (earlySelect(I))
return true;
- if (selectImpl(I, CoverageInfo))
+ if (selectImpl(I, *CoverageInfo))
return true;
LLT Ty =
@@ -1439,14 +1577,43 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return true;
}
case TargetOpcode::G_EXTRACT: {
- LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
- LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ Register DstReg = I.getOperand(0).getReg();
+ Register SrcReg = I.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(DstReg);
(void)DstTy;
unsigned SrcSize = SrcTy.getSizeInBits();
- // Larger extracts are vectors, same-size extracts should be something else
- // by now (either split up or simplified to a COPY).
- if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
- return false;
+
+ if (SrcTy.getSizeInBits() > 64) {
+ // This should be an extract of an s128, which is like a vector extract.
+ if (SrcTy.getSizeInBits() != 128)
+ return false;
+ // Only support extracting 64 bits from an s128 at the moment.
+ if (DstTy.getSizeInBits() != 64)
+ return false;
+
+ const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
+ const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
+ // Check we have the right regbank always.
+ assert(SrcRB.getID() == AArch64::FPRRegBankID &&
+ DstRB.getID() == AArch64::FPRRegBankID &&
+ "Wrong extract regbank!");
+ (void)SrcRB;
+
+ // Emit the same code as a vector extract.
+ // Offset must be a multiple of 64.
+ unsigned Offset = I.getOperand(2).getImm();
+ if (Offset % 64 != 0)
+ return false;
+ unsigned LaneIdx = Offset / 64;
+ MachineIRBuilder MIB(I);
+ MachineInstr *Extract = emitExtractVectorElt(
+ DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
+ if (!Extract)
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
@@ -1458,7 +1625,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- Register DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
.addReg(DstReg, 0, AArch64::sub_32);
@@ -1521,11 +1688,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_GLOBAL_VALUE: {
auto GV = I.getOperand(1).getGlobal();
- if (GV->isThreadLocal()) {
- // FIXME: we don't support TLS yet.
- return false;
- }
- unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
+ if (GV->isThreadLocal())
+ return selectTLSGlobalValue(I, MRI);
+
+ unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
@@ -1562,8 +1728,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
}
auto &MemOp = **I.memoperands_begin();
- if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
- LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
+ if (MemOp.isAtomic()) {
+ // For now we just support s8 acquire loads to be able to compile stack
+ // protector code.
+ if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
+ MemOp.getSize() == 1) {
+ I.setDesc(TII.get(AArch64::LDARB));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+ LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
return false;
}
unsigned MemSizeInBits = MemOp.getSize() * 8;
@@ -1598,7 +1771,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
const unsigned Size = MemSizeInBits / 8;
const unsigned Scale = Log2_32(Size);
if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
- unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
+ Register Ptr2Reg = PtrMI->getOperand(1).getReg();
I.getOperand(1).setReg(Ptr2Reg);
PtrMI = MRI.getVRegDef(Ptr2Reg);
Offset = Imm / Size;
@@ -1688,8 +1861,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return selectVectorSHL(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_OR:
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_GEP: {
+ case TargetOpcode::G_LSHR: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
@@ -1711,6 +1883,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+ case TargetOpcode::G_GEP: {
+ MachineIRBuilder MIRBuilder(I);
+ emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
+ MIRBuilder);
+ I.eraseFromParent();
+ return true;
+ }
case TargetOpcode::G_UADDO: {
// TODO: Support other types.
unsigned OpSize = Ty.getSizeInBits();
@@ -1816,6 +1995,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
+
+ if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
+ MachineIRBuilder MIB(I);
+ MachineInstr *Extract = emitExtractVectorElt(
+ DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
+ if (!Extract)
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
}
return false;
@@ -1868,21 +2057,41 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_SEXT: {
unsigned Opcode = I.getOpcode();
- const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
- SrcTy = MRI.getType(I.getOperand(1).getReg());
- const bool isSigned = Opcode == TargetOpcode::G_SEXT;
+ const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
const Register DefReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
- const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+ const LLT DstTy = MRI.getType(DefReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+ unsigned DstSize = DstTy.getSizeInBits();
+ unsigned SrcSize = SrcTy.getSizeInBits();
- if (RB.getID() != AArch64::GPRRegBankID) {
- LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
- << ", expected: GPR\n");
- return false;
- }
+ assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
+ AArch64::GPRRegBankID &&
+ "Unexpected ext regbank");
+ MachineIRBuilder MIB(I);
MachineInstr *ExtI;
- if (DstTy == LLT::scalar(64)) {
+ if (DstTy.isVector())
+ return false; // Should be handled by imported patterns.
+
+ // First check if we're extending the result of a load which has a dest type
+ // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
+ // GPR register on AArch64 and all loads which are smaller automatically
+ // zero-extend the upper bits. E.g.
+ // %v(s8) = G_LOAD %p, :: (load 1)
+ // %v2(s32) = G_ZEXT %v(s8)
+ if (!IsSigned) {
+ auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
+ if (LoadMI &&
+ RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
+ const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
+ unsigned BytesLoaded = MemOp->getSize();
+ if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
+ return selectCopy(I, TII, MRI, TRI, RBI);
+ }
+ }
+
+ if (DstSize == 64) {
// FIXME: Can we avoid manually doing this?
if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
@@ -1890,33 +2099,26 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return false;
}
- const Register SrcXReg =
- MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
- .addDef(SrcXReg)
- .addImm(0)
- .addUse(SrcReg)
- .addImm(AArch64::sub_32);
-
- const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
- ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
- .addDef(DefReg)
- .addUse(SrcXReg)
- .addImm(0)
- .addImm(SrcTy.getSizeInBits() - 1);
- } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
- const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
- ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
- .addDef(DefReg)
- .addUse(SrcReg)
- .addImm(0)
- .addImm(SrcTy.getSizeInBits() - 1);
+ auto SubregToReg =
+ MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
+ .addImm(0)
+ .addUse(SrcReg)
+ .addImm(AArch64::sub_32);
+
+ ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
+ {DefReg}, {SubregToReg})
+ .addImm(0)
+ .addImm(SrcSize - 1);
+ } else if (DstSize <= 32) {
+ ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
+ {DefReg}, {SrcReg})
+ .addImm(0)
+ .addImm(SrcSize - 1);
} else {
return false;
}
constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
-
I.eraseFromParent();
return true;
}
@@ -2163,6 +2365,37 @@ bool AArch64InstructionSelector::selectJumpTable(
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
+bool AArch64InstructionSelector::selectTLSGlobalValue(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ if (!STI.isTargetMachO())
+ return false;
+ MachineFunction &MF = *I.getParent()->getParent();
+ MF.getFrameInfo().setAdjustsStack(true);
+
+ const GlobalValue &GV = *I.getOperand(1).getGlobal();
+ MachineIRBuilder MIB(I);
+
+ MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
+ .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
+
+ auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
+ {Register(AArch64::X0)})
+ .addImm(0);
+
+ // TLS calls preserve all registers except those that absolutely must be
+ // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
+ // silly).
+ MIB.buildInstr(AArch64::BLR, {}, {Load})
+ .addDef(AArch64::X0, RegState::Implicit)
+ .addRegMask(TRI.getTLSCallPreservedMask());
+
+ MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
+ RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
+ MRI);
+ I.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::selectIntrinsicTrunc(
MachineInstr &I, MachineRegisterInfo &MRI) const {
const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
@@ -2478,16 +2711,40 @@ bool AArch64InstructionSelector::selectMergeValues(
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
+ const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
- // At the moment we only support merging two s32s into an s64.
if (I.getNumOperands() != 3)
return false;
- if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
- return false;
- const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
+
+ // Merging 2 s64s into an s128.
+ if (DstTy == LLT::scalar(128)) {
+ if (SrcTy.getSizeInBits() != 64)
+ return false;
+ MachineIRBuilder MIB(I);
+ Register DstReg = I.getOperand(0).getReg();
+ Register Src1Reg = I.getOperand(1).getReg();
+ Register Src2Reg = I.getOperand(2).getReg();
+ auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
+ MachineInstr *InsMI =
+ emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
+ if (!InsMI)
+ return false;
+ MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
+ Src2Reg, /* LaneIdx */ 1, RB, MIB);
+ if (!Ins2MI)
+ return false;
+ constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+ }
+
if (RB.getID() != AArch64::GPRRegBankID)
return false;
+ if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
+ return false;
+
auto *DstRC = &AArch64::GPR64RegClass;
Register SubToRegDef = MRI.createVirtualRegister(DstRC);
MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
@@ -2695,7 +2952,8 @@ bool AArch64InstructionSelector::selectUnmergeValues(
const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
const LLT WideTy = MRI.getType(SrcReg);
(void)WideTy;
- assert(WideTy.isVector() && "can only unmerge from vector types!");
+ assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
+ "can only unmerge from vector or s128 types!");
assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!");
@@ -2802,29 +3060,6 @@ bool AArch64InstructionSelector::selectConcatVectors(
return true;
}
-void AArch64InstructionSelector::collectShuffleMaskIndices(
- MachineInstr &I, MachineRegisterInfo &MRI,
- SmallVectorImpl<Optional<int>> &Idxs) const {
- MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
- assert(
- MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
- "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
- // Find the constant indices.
- for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
- // Look through copies.
- MachineInstr *ScalarDef =
- getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI);
- assert(ScalarDef && "Could not find vreg def of shufflevec index op");
- if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
- // This be an undef if not a constant.
- assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
- Idxs.push_back(None);
- } else {
- Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
- }
- }
-}
-
unsigned
AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
MachineFunction &MF) const {
@@ -2906,6 +3141,31 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
}
MachineInstr *
+AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
+ MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
+ {AArch64::ADDWrr, AArch64::ADDWri}};
+ bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
+ auto ImmFns = selectArithImmed(RHS);
+ unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
+ auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
+
+ // If we matched a valid constant immediate, add those operands.
+ if (ImmFns) {
+ for (auto &RenderFn : *ImmFns)
+ RenderFn(AddMI);
+ } else {
+ AddMI.addUse(RHS.getReg());
+ }
+
+ constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
+ return &*AddMI;
+}
+
+MachineInstr *
AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
@@ -3151,7 +3411,7 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
// Can't see past copies from physregs.
if (Opc == TargetOpcode::COPY &&
- TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
+ Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
return false;
CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
@@ -3342,16 +3602,9 @@ bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
return false;
// The shuffle's second operand doesn't matter if the mask is all zero.
- auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI);
- if (!ZeroVec)
+ const Constant *Mask = I.getOperand(3).getShuffleMask();
+ if (!isa<ConstantAggregateZero>(Mask))
return false;
- int64_t Zero = 0;
- if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
- return false;
- for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
- if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
- return false; // This wasn't an all zeros vector.
- }
// We're done, now find out what kind of splat we need.
LLT VecTy = MRI.getType(I.getOperand(0).getReg());
@@ -3399,19 +3652,14 @@ bool AArch64InstructionSelector::selectShuffleVector(
const LLT Src1Ty = MRI.getType(Src1Reg);
Register Src2Reg = I.getOperand(2).getReg();
const LLT Src2Ty = MRI.getType(Src2Reg);
+ const Constant *ShuffleMask = I.getOperand(3).getShuffleMask();
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
LLVMContext &Ctx = MF.getFunction().getContext();
- // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
- // operand, it comes in as a normal vector value which we have to analyze to
- // find the mask indices. If the mask element is undef, then
- // collectShuffleMaskIndices() will add a None entry for that index into
- // the list.
- SmallVector<Optional<int>, 8> Mask;
- collectShuffleMaskIndices(I, MRI, Mask);
- assert(!Mask.empty() && "Expected to find mask indices");
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask);
// G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
// it's originated from a <1 x T> type. Those should have been lowered into
@@ -3424,10 +3672,10 @@ bool AArch64InstructionSelector::selectShuffleVector(
unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
SmallVector<Constant *, 64> CstIdxs;
- for (auto &MaybeVal : Mask) {
+ for (int Val : Mask) {
// For now, any undef indexes we'll just assume to be 0. This should be
// optimized in future, e.g. to select DUP etc.
- int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
+ Val = Val < 0 ? 0 : Val;
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
unsigned Offset = Byte + Val * BytesPerElt;
CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
@@ -3684,21 +3932,6 @@ static unsigned findIntrinsicID(MachineInstr &I) {
return IntrinOp->getIntrinsicID();
}
-/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
-/// intrinsic.
-static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
- switch (NumBytesToStore) {
- // TODO: 1, 2, and 4 byte stores.
- case 8:
- return AArch64::STLXRX;
- default:
- LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
- << NumBytesToStore << ")\n");
- break;
- }
- return 0;
-}
-
bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MachineInstr &I, MachineRegisterInfo &MRI) const {
// Find the intrinsic ID.
@@ -3719,32 +3952,6 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
return false;
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
break;
- case Intrinsic::aarch64_stlxr:
- Register StatReg = I.getOperand(0).getReg();
- assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
- "Status register must be 32 bits!");
- Register SrcReg = I.getOperand(2).getReg();
-
- if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
- LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
- return false;
- }
-
- Register PtrReg = I.getOperand(3).getReg();
- assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
-
- // Expect only one memory operand.
- if (!I.hasOneMemOperand())
- return false;
-
- const MachineMemOperand *MemOp = *I.memoperands_begin();
- unsigned NumBytesToStore = MemOp->getSize();
- unsigned Opc = getStlxrOpcode(NumBytesToStore);
- if (!Opc)
- return false;
-
- auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
- constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
}
I.eraseFromParent();
@@ -3860,6 +4067,30 @@ AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
}
+/// Helper to select an immediate value that can be represented as a 12-bit
+/// value shifted left by either 0 or 12. If it is possible to do so, return
+/// the immediate and shift value. If not, return None.
+///
+/// Used by selectArithImmed and selectNegArithImmed.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::select12BitValueWithLeftShift(
+ uint64_t Immed) const {
+ unsigned ShiftAmt;
+ if (Immed >> 12 == 0) {
+ ShiftAmt = 0;
+ } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
+ ShiftAmt = 12;
+ Immed = Immed >> 12;
+ } else
+ return None;
+
+ unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
+ }};
+}
+
/// SelectArithImmed - Select an immediate value that can be represented as
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
/// Val set to the 12-bit value and Shift set to the shifter operand.
@@ -3873,22 +4104,229 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None)
return None;
+ return select12BitValueWithLeftShift(*MaybeImmed);
+}
+
+/// SelectNegArithImmed - As above, but negates the value before trying to
+/// select it.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
+ // We need a register here, because we need to know if we have a 64 or 32
+ // bit immediate.
+ if (!Root.isReg())
+ return None;
+ auto MaybeImmed = getImmedFromMO(Root);
+ if (MaybeImmed == None)
+ return None;
uint64_t Immed = *MaybeImmed;
- unsigned ShiftAmt;
- if (Immed >> 12 == 0) {
- ShiftAmt = 0;
- } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
- ShiftAmt = 12;
- Immed = Immed >> 12;
- } else
+ // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
+ // have the opposite effect on the C flag, so this pattern mustn't match under
+ // those circumstances.
+ if (Immed == 0)
return None;
- unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
- }};
+ // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
+ // the root.
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+ if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
+ Immed = ~((uint32_t)Immed) + 1;
+ else
+ Immed = ~Immed + 1ULL;
+
+ if (Immed & 0xFFFFFFFFFF000000ULL)
+ return None;
+
+ Immed &= 0xFFFFFFULL;
+ return select12BitValueWithLeftShift(Immed);
+}
+
+/// Return true if it is worth folding MI into an extended register. That is,
+/// if it's safe to pull it into the addressing mode of a load or store as a
+/// shift.
+bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
+ MachineInstr &MI, const MachineRegisterInfo &MRI) const {
+ // Always fold if there is one use, or if we're optimizing for size.
+ Register DefReg = MI.getOperand(0).getReg();
+ if (MRI.hasOneUse(DefReg) ||
+ MI.getParent()->getParent()->getFunction().hasMinSize())
+ return true;
+
+ // It's better to avoid folding and recomputing shifts when we don't have a
+ // fastpath.
+ if (!STI.hasLSLFast())
+ return false;
+
+ // We have a fastpath, so folding a shift in and potentially computing it
+ // many times may be beneficial. Check if this is only used in memory ops.
+ // If it is, then we should fold.
+ return all_of(MRI.use_instructions(DefReg),
+ [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
+}
+
+/// This is used for computing addresses like this:
+///
+/// ldr x1, [x2, x3, lsl #3]
+///
+/// Where x2 is the base register, and x3 is an offset register. The shift-left
+/// is a constant value specific to this load instruction. That is, we'll never
+/// see anything other than a 3 here (which corresponds to the size of the
+/// element being loaded.)
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
+ MachineOperand &Root, unsigned SizeInBytes) const {
+ if (!Root.isReg())
+ return None;
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+ // Make sure that the memory op is a valid size.
+ int64_t LegalShiftVal = Log2_32(SizeInBytes);
+ if (LegalShiftVal == 0)
+ return None;
+
+ // We want to find something like this:
+ //
+ // val = G_CONSTANT LegalShiftVal
+ // shift = G_SHL off_reg val
+ // ptr = G_GEP base_reg shift
+ // x = G_LOAD ptr
+ //
+ // And fold it into this addressing mode:
+ //
+ // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
+
+ // Check if we can find the G_GEP.
+ MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
+ if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
+ return None;
+
+ // Now, try to match an opcode which will match our specific offset.
+ // We want a G_SHL or a G_MUL.
+ MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
+ if (!OffsetInst)
+ return None;
+
+ unsigned OffsetOpc = OffsetInst->getOpcode();
+ if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
+ return None;
+
+ if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
+ return None;
+
+ // Now, try to find the specific G_CONSTANT. Start by assuming that the
+ // register we will offset is the LHS, and the register containing the
+ // constant is the RHS.
+ Register OffsetReg = OffsetInst->getOperand(1).getReg();
+ Register ConstantReg = OffsetInst->getOperand(2).getReg();
+ auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ if (!ValAndVReg) {
+ // We didn't get a constant on the RHS. If the opcode is a shift, then
+ // we're done.
+ if (OffsetOpc == TargetOpcode::G_SHL)
+ return None;
+
+ // If we have a G_MUL, we can use either register. Try looking at the RHS.
+ std::swap(OffsetReg, ConstantReg);
+ ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ if (!ValAndVReg)
+ return None;
+ }
+
+ // The value must fit into 3 bits, and must be positive. Make sure that is
+ // true.
+ int64_t ImmVal = ValAndVReg->Value;
+
+ // Since we're going to pull this into a shift, the constant value must be
+ // a power of 2. If we got a multiply, then we need to check this.
+ if (OffsetOpc == TargetOpcode::G_MUL) {
+ if (!isPowerOf2_32(ImmVal))
+ return None;
+
+ // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
+ ImmVal = Log2_32(ImmVal);
+ }
+
+ if ((ImmVal & 0x7) != ImmVal)
+ return None;
+
+ // We are only allowed to shift by LegalShiftVal. This shift value is built
+ // into the instruction, so we can't just use whatever we want.
+ if (ImmVal != LegalShiftVal)
+ return None;
+
+ // We can use the LHS of the GEP as the base, and the LHS of the shift as an
+ // offset. Signify that we are shifting by setting the shift flag to 1.
+ return {{[=](MachineInstrBuilder &MIB) {
+ MIB.addUse(Gep->getOperand(1).getReg());
+ },
+ [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
+ [=](MachineInstrBuilder &MIB) {
+ // Need to add both immediates here to make sure that they are both
+ // added to the instruction.
+ MIB.addImm(0);
+ MIB.addImm(1);
+ }}};
+}
+
+/// This is used for computing addresses like this:
+///
+/// ldr x1, [x2, x3]
+///
+/// Where x2 is the base register, and x3 is an offset register.
+///
+/// When possible (or profitable) to fold a G_GEP into the address calculation,
+/// this will do so. Otherwise, it will return None.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeRegisterOffset(
+ MachineOperand &Root) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+ // We need a GEP.
+ MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
+ if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
+ return None;
+
+ // If this is used more than once, let's not bother folding.
+ // TODO: Check if they are memory ops. If they are, then we can still fold
+ // without having to recompute anything.
+ if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
+ return None;
+
+ // Base is the GEP's LHS, offset is its RHS.
+ return {{[=](MachineInstrBuilder &MIB) {
+ MIB.addUse(Gep->getOperand(1).getReg());
+ },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addUse(Gep->getOperand(2).getReg());
+ },
+ [=](MachineInstrBuilder &MIB) {
+ // Need to add both immediates here to make sure that they are both
+ // added to the instruction.
+ MIB.addImm(0);
+ MIB.addImm(0);
+ }}};
+}
+
+/// This is intended to be equivalent to selectAddrModeXRO in
+/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
+ unsigned SizeInBytes) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+ // If we have a constant offset, then we probably don't want to match a
+ // register offset.
+ if (isBaseWithConstantOffset(Root, MRI))
+ return None;
+
+ // Try to fold shifts into the addressing mode.
+ auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
+ if (AddrModeFns)
+ return AddrModeFns;
+
+ // If that doesn't work, see if it's possible to fold in registers from
+ // a GEP.
+ return selectAddrModeRegisterOffset(Root);
}
/// Select a "register plus unscaled signed 9-bit immediate" address. This
@@ -3994,6 +4432,205 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
}};
}
+/// Given a shift instruction, return the correct shift type for that
+/// instruction.
+static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
+ // TODO: Handle AArch64_AM::ROR
+ switch (MI.getOpcode()) {
+ default:
+ return AArch64_AM::InvalidShiftExtend;
+ case TargetOpcode::G_SHL:
+ return AArch64_AM::LSL;
+ case TargetOpcode::G_LSHR:
+ return AArch64_AM::LSR;
+ case TargetOpcode::G_ASHR:
+ return AArch64_AM::ASR;
+ }
+}
+
+/// Select a "shifted register" operand. If the value is not shifted, set the
+/// shift operand to a default value of "lsl 0".
+///
+/// TODO: Allow shifted register to be rotated in logical instructions.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
+ if (!Root.isReg())
+ return None;
+ MachineRegisterInfo &MRI =
+ Root.getParent()->getParent()->getParent()->getRegInfo();
+
+ // Check if the operand is defined by an instruction which corresponds to
+ // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
+ //
+ // TODO: Handle AArch64_AM::ROR for logical instructions.
+ MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
+ if (!ShiftInst)
+ return None;
+ AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
+ if (ShType == AArch64_AM::InvalidShiftExtend)
+ return None;
+ if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
+ return None;
+
+ // Need an immediate on the RHS.
+ MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
+ auto Immed = getImmedFromMO(ShiftRHS);
+ if (!Immed)
+ return None;
+
+ // We have something that we can fold. Fold in the shift's LHS and RHS into
+ // the instruction.
+ MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
+ Register ShiftReg = ShiftLHS.getReg();
+
+ unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
+ unsigned Val = *Immed & (NumBits - 1);
+ unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
+}
+
+/// Get the correct ShiftExtendType for an extend instruction.
+static AArch64_AM::ShiftExtendType
+getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ unsigned Opc = MI.getOpcode();
+
+ // Handle explicit extend instructions first.
+ if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
+ unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ assert(Size != 64 && "Extend from 64 bits?");
+ switch (Size) {
+ case 8:
+ return AArch64_AM::SXTB;
+ case 16:
+ return AArch64_AM::SXTH;
+ case 32:
+ return AArch64_AM::SXTW;
+ default:
+ return AArch64_AM::InvalidShiftExtend;
+ }
+ }
+
+ if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
+ unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ assert(Size != 64 && "Extend from 64 bits?");
+ switch (Size) {
+ case 8:
+ return AArch64_AM::UXTB;
+ case 16:
+ return AArch64_AM::UXTH;
+ case 32:
+ return AArch64_AM::UXTW;
+ default:
+ return AArch64_AM::InvalidShiftExtend;
+ }
+ }
+
+ // Don't have an explicit extend. Try to handle a G_AND with a constant mask
+ // on the RHS.
+ if (Opc != TargetOpcode::G_AND)
+ return AArch64_AM::InvalidShiftExtend;
+
+ Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
+ if (!MaybeAndMask)
+ return AArch64_AM::InvalidShiftExtend;
+ uint64_t AndMask = *MaybeAndMask;
+ switch (AndMask) {
+ default:
+ return AArch64_AM::InvalidShiftExtend;
+ case 0xFF:
+ return AArch64_AM::UXTB;
+ case 0xFFFF:
+ return AArch64_AM::UXTH;
+ case 0xFFFFFFFF:
+ return AArch64_AM::UXTW;
+ }
+}
+
+Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
+ Register ExtReg, MachineIRBuilder &MIB) const {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ if (MRI.getType(ExtReg).getSizeInBits() == 32)
+ return ExtReg;
+
+ // Insert a copy to move ExtReg to GPR32.
+ Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
+
+ // Select the copy into a subregister copy.
+ selectCopy(*Copy, TII, MRI, TRI, RBI);
+ return Copy.getReg(0);
+}
+
+/// Select an "extended register" operand. This operand folds in an extend
+/// followed by an optional left shift.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectArithExtendedRegister(
+ MachineOperand &Root) const {
+ if (!Root.isReg())
+ return None;
+ MachineRegisterInfo &MRI =
+ Root.getParent()->getParent()->getParent()->getRegInfo();
+
+ uint64_t ShiftVal = 0;
+ Register ExtReg;
+ AArch64_AM::ShiftExtendType Ext;
+ MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
+ if (!RootDef)
+ return None;
+
+ if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
+ return None;
+
+ // Check if we can fold a shift and an extend.
+ if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
+ // Look for a constant on the RHS of the shift.
+ MachineOperand &RHS = RootDef->getOperand(2);
+ Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
+ if (!MaybeShiftVal)
+ return None;
+ ShiftVal = *MaybeShiftVal;
+ if (ShiftVal > 4)
+ return None;
+ // Look for a valid extend instruction on the LHS of the shift.
+ MachineOperand &LHS = RootDef->getOperand(1);
+ MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
+ if (!ExtDef)
+ return None;
+ Ext = getExtendTypeForInst(*ExtDef, MRI);
+ if (Ext == AArch64_AM::InvalidShiftExtend)
+ return None;
+ ExtReg = ExtDef->getOperand(1).getReg();
+ } else {
+ // Didn't get a shift. Try just folding an extend.
+ Ext = getExtendTypeForInst(*RootDef, MRI);
+ if (Ext == AArch64_AM::InvalidShiftExtend)
+ return None;
+ ExtReg = RootDef->getOperand(1).getReg();
+
+ // If we have a 32 bit instruction which zeroes out the high half of a
+ // register, we get an implicit zero extend for free. Check if we have one.
+ // FIXME: We actually emit the extend right now even though we don't have
+ // to.
+ if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
+ MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
+ if (ExtInst && isDef32(*ExtInst))
+ return None;
+ }
+ }
+
+ // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
+ // copy.
+ MachineIRBuilder MIB(*RootDef);
+ ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addImm(getArithExtendImm(Ext, ShiftVal));
+ }}};
+}
+
void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
@@ -4003,6 +4640,51 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
MIB.addImm(CstVal.getValue());
}
+void AArch64InstructionSelector::renderLogicalImm32(
+ MachineInstrBuilder &MIB, const MachineInstr &I) const {
+ assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
+ uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
+ MIB.addImm(Enc);
+}
+
+void AArch64InstructionSelector::renderLogicalImm64(
+ MachineInstrBuilder &MIB, const MachineInstr &I) const {
+ assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
+ uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
+ MIB.addImm(Enc);
+}
+
+bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
+ const MachineInstr &MI, unsigned NumBytes) const {
+ if (!MI.mayLoadOrStore())
+ return false;
+ assert(MI.hasOneMemOperand() &&
+ "Expected load/store to have only one mem op!");
+ return (*MI.memoperands_begin())->getSize() == NumBytes;
+}
+
+bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
+ return false;
+
+ // Only return true if we know the operation will zero-out the high half of
+ // the 64-bit register. Truncates can be subregister copies, which don't
+ // zero out the high bits. Copies and other copy-like instructions can be
+ // fed by truncates, or could be lowered as subregister copies.
+ switch (MI.getOpcode()) {
+ default:
+ return true;
+ case TargetOpcode::COPY:
+ case TargetOpcode::G_BITCAST:
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_PHI:
+ return false;
+ }
+}
+
namespace llvm {
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index a985b330eafa..7a1901bd5b1e 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -13,7 +13,9 @@
#include "AArch64LegalizerInfo.h"
#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -50,6 +52,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
const LLT v2s64 = LLT::vector(2, 64);
const LLT v2p0 = LLT::vector(2, p0);
+ // FIXME: support subtargets which have neon/fp-armv8 disabled.
+ if (!ST.hasNEON() || !ST.hasFPARMv8()) {
+ computeTables();
+ return;
+ }
+
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
.legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
.clampScalar(0, s1, s64)
@@ -74,7 +82,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
getActionDefinitionsBuilder(G_BSWAP)
.legalFor({s32, s64, v4s32, v2s32, v2s64})
- .clampScalar(0, s16, s64)
+ .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
@@ -104,6 +112,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
getActionDefinitionsBuilder({G_SDIV, G_UDIV})
.legalFor({s32, s64})
+ .libcallFor({s128})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
.scalarize(0);
@@ -115,8 +124,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
AmtTy.getSizeInBits() == 32;
})
- .legalFor(
- {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
+ .legalFor({{s32, s32},
+ {s32, s64},
+ {s64, s64},
+ {v2s32, v2s32},
+ {v4s32, v4s32},
+ {v2s64, v2s64}})
.clampScalar(1, s32, s64)
.clampScalar(0, s32, s64)
.minScalarSameAs(1, 0);
@@ -191,14 +204,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.legalIf([=](const LegalityQuery &Query) {
const LLT &Ty0 = Query.Types[0];
const LLT &Ty1 = Query.Types[1];
- if (Ty1 != s32 && Ty1 != s64)
+ if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
return false;
if (Ty1 == p0)
return true;
return isPowerOf2_32(Ty0.getSizeInBits()) &&
(Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
})
- .clampScalar(1, s32, s64)
+ .clampScalar(1, s32, s128)
.widenScalarToNextPow2(1)
.maxScalarIf(typeInSet(1, {s32}), 0, s16)
.maxScalarIf(typeInSet(1, {s64}), 0, s32)
@@ -236,6 +249,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
{s32, p0, 32, 8},
{s64, p0, 64, 8},
{p0, p0, 64, 8},
+ {s128, p0, 128, 8},
{v8s8, p0, 64, 8},
{v16s8, p0, 128, 8},
{v4s16, p0, 64, 8},
@@ -247,14 +261,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.legalForTypesWithMemDesc({{s32, p0, 8, 8},
{s32, p0, 16, 8}})
.clampScalar(0, s8, s64)
- .widenScalarToNextPow2(0)
- // TODO: We could support sum-of-pow2's but the lowering code doesn't know
- // how to do that yet.
- .unsupportedIfMemSizeNotPow2()
+ .lowerIfMemSizeNotPow2()
// Lower any any-extending loads left into G_ANYEXT and G_LOAD
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
+ .widenScalarToNextPow2(0)
.clampMaxNumElements(0, s32, 2)
.clampMaxNumElements(0, s64, 1)
.customIf(IsPtrVecPred);
@@ -262,9 +274,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
getActionDefinitionsBuilder(G_STORE)
.legalForTypesWithMemDesc({{s8, p0, 8, 8},
{s16, p0, 16, 8},
+ {s32, p0, 8, 8},
+ {s32, p0, 16, 8},
{s32, p0, 32, 8},
{s64, p0, 64, 8},
{p0, p0, 64, 8},
+ {s128, p0, 128, 8},
{v16s8, p0, 128, 8},
{v4s16, p0, 64, 8},
{v8s16, p0, 128, 8},
@@ -272,10 +287,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
{v4s32, p0, 128, 8},
{v2s64, p0, 128, 8}})
.clampScalar(0, s8, s64)
- .widenScalarToNextPow2(0)
- // TODO: We could support sum-of-pow2's but the lowering code doesn't know
- // how to do that yet.
- .unsupportedIfMemSizeNotPow2()
+ .lowerIfMemSizeNotPow2()
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isScalar() &&
Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
@@ -305,8 +317,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
{v8s16, v8s16},
{v8s8, v8s8},
{v16s8, v16s8}})
- .clampScalar(0, s32, s32)
.clampScalar(1, s32, s64)
+ .clampScalar(0, s32, s32)
.minScalarEltSameAsIf(
[=](const LegalityQuery &Query) {
const LLT &Ty = Query.Types[0];
@@ -330,33 +342,40 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.widenScalarToNextPow2(1);
// Extensions
- getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
- .legalIf([=](const LegalityQuery &Query) {
- unsigned DstSize = Query.Types[0].getSizeInBits();
-
- // Make sure that we have something that will fit in a register, and
- // make sure it's a power of 2.
- if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
- return false;
+ auto ExtLegalFunc = [=](const LegalityQuery &Query) {
+ unsigned DstSize = Query.Types[0].getSizeInBits();
+
+ if (DstSize == 128 && !Query.Types[0].isVector())
+ return false; // Extending to a scalar s128 needs narrowing.
+
+ // Make sure that we have something that will fit in a register, and
+ // make sure it's a power of 2.
+ if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
+ return false;
- const LLT &SrcTy = Query.Types[1];
+ const LLT &SrcTy = Query.Types[1];
- // Special case for s1.
- if (SrcTy == s1)
- return true;
+ // Special case for s1.
+ if (SrcTy == s1)
+ return true;
- // Make sure we fit in a register otherwise. Don't bother checking that
- // the source type is below 128 bits. We shouldn't be allowing anything
- // through which is wider than the destination in the first place.
- unsigned SrcSize = SrcTy.getSizeInBits();
- if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
- return false;
+ // Make sure we fit in a register otherwise. Don't bother checking that
+ // the source type is below 128 bits. We shouldn't be allowing anything
+ // through which is wider than the destination in the first place.
+ unsigned SrcSize = SrcTy.getSizeInBits();
+ if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
+ return false;
- return true;
- });
+ return true;
+ };
+ getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
+ .legalIf(ExtLegalFunc)
+ .clampScalar(0, s64, s64); // Just for s128, others are handled above.
getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
+ getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+
// FP conversions
getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
{{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
@@ -591,6 +610,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
return Query.Types[0] == p0 && Query.Types[1] == s64;
});
+ getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
+
computeTables();
verify(*ST.getInstrInfo());
}
@@ -617,6 +638,24 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
llvm_unreachable("expected switch to return");
}
+bool AArch64LegalizerInfo::legalizeIntrinsic(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ switch (MI.getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ if (createMemLibcall(MIRBuilder, MRI, MI) ==
+ LegalizerHelper::UnableToLegalize)
+ return false;
+ MI.eraseFromParent();
+ return true;
+ default:
+ break;
+ }
+ return true;
+}
+
bool AArch64LegalizerInfo::legalizeShlAshrLshr(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const {
@@ -655,7 +694,7 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
// legalized. In order to allow further legalization of the inst, we create
// a new instruction and erase the existing one.
- unsigned ValReg = MI.getOperand(0).getReg();
+ Register ValReg = MI.getOperand(0).getReg();
const LLT ValTy = MRI.getType(ValReg);
if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
@@ -672,7 +711,7 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
} else {
- unsigned NewReg = MRI.createGenericVirtualRegister(NewTy);
+ Register NewReg = MRI.createGenericVirtualRegister(NewTy);
auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
MIRBuilder.buildBitcast({ValReg}, {NewLoad});
}
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.h b/lib/Target/AArch64/AArch64LegalizerInfo.h
index f3362a18620f..15161bab466c 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -31,6 +31,9 @@ public:
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const override;
+ bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const override;
+
private:
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 65b5f906e3f6..a0c4a25bb5b9 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -201,8 +201,22 @@ static bool isNarrowStore(unsigned Opc) {
}
}
+// These instruction set memory tag and either keep memory contents unchanged or
+// set it to zero, ignoring the address part of the source register.
+static bool isTagStore(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case AArch64::STGOffset:
+ case AArch64::STZGOffset:
+ case AArch64::ST2GOffset:
+ case AArch64::STZ2GOffset:
+ return true;
+ }
+}
+
// Scaling factor for unscaled load or store.
-static int getMemScale(MachineInstr &MI) {
+static int getMemScale(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
llvm_unreachable("Opcode has unknown scale!");
@@ -255,6 +269,11 @@ static int getMemScale(MachineInstr &MI) {
case AArch64::STURQi:
case AArch64::LDPQi:
case AArch64::STPQi:
+ case AArch64::STGOffset:
+ case AArch64::STZGOffset:
+ case AArch64::ST2GOffset:
+ case AArch64::STZ2GOffset:
+ case AArch64::STGPi:
return 16;
}
}
@@ -449,6 +468,16 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
return AArch64::STPWpre;
case AArch64::STPXi:
return AArch64::STPXpre;
+ case AArch64::STGOffset:
+ return AArch64::STGPreIndex;
+ case AArch64::STZGOffset:
+ return AArch64::STZGPreIndex;
+ case AArch64::ST2GOffset:
+ return AArch64::ST2GPreIndex;
+ case AArch64::STZ2GOffset:
+ return AArch64::STZ2GPreIndex;
+ case AArch64::STGPi:
+ return AArch64::STGPpre;
}
}
@@ -518,6 +547,16 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
return AArch64::STPWpost;
case AArch64::STPXi:
return AArch64::STPXpost;
+ case AArch64::STGOffset:
+ return AArch64::STGPostIndex;
+ case AArch64::STZGOffset:
+ return AArch64::STZGPostIndex;
+ case AArch64::ST2GOffset:
+ return AArch64::ST2GPostIndex;
+ case AArch64::STZ2GOffset:
+ return AArch64::STZ2GPostIndex;
+ case AArch64::STGPi:
+ return AArch64::STGPpost;
}
}
@@ -536,10 +575,30 @@ static bool isPairedLdSt(const MachineInstr &MI) {
case AArch64::STPQi:
case AArch64::STPWi:
case AArch64::STPXi:
+ case AArch64::STGPi:
return true;
}
}
+// Returns the scale and offset range of pre/post indexed variants of MI.
+static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
+ int &MinOffset, int &MaxOffset) {
+ bool IsPaired = isPairedLdSt(MI);
+ bool IsTagStore = isTagStore(MI);
+ // ST*G and all paired ldst have the same scale in pre/post-indexed variants
+ // as in the "unsigned offset" variant.
+ // All other pre/post indexed ldst instructions are unscaled.
+ Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1;
+
+ if (IsPaired) {
+ MinOffset = -64;
+ MaxOffset = 63;
+ } else {
+ MinOffset = -256;
+ MaxOffset = 255;
+ }
+}
+
static const MachineOperand &getLdStRegOp(const MachineInstr &MI,
unsigned PairedRegOp = 0) {
assert(PairedRegOp < 2 && "Unexpected register operand idx.");
@@ -618,6 +677,11 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
case AArch64::LDRWui:
case AArch64::LDRHHui:
case AArch64::LDRBBui:
+ case AArch64::STGOffset:
+ case AArch64::STZGOffset:
+ case AArch64::ST2GOffset:
+ case AArch64::STZ2GOffset:
+ case AArch64::STGPi:
// Unscaled instructions.
case AArch64::STURSi:
case AArch64::STURDi:
@@ -808,7 +872,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
// STRWui %w1, ...
// USE kill %w1 ; need to clear kill flag when moving STRWui downwards
// STRW %w0
- unsigned Reg = getLdStRegOp(*I).getReg();
+ Register Reg = getLdStRegOp(*I).getReg();
for (MachineInstr &MI : make_range(std::next(I), Paired))
MI.clearRegisterKills(Reg, TRI);
}
@@ -837,9 +901,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineOperand &DstMO = MIB->getOperand(SExtIdx);
// Right now, DstMO has the extended register, since it comes from an
// extended opcode.
- unsigned DstRegX = DstMO.getReg();
+ Register DstRegX = DstMO.getReg();
// Get the W variant of that register.
- unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
+ Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
// Update the result of LDP to use the W instead of the X variant.
DstMO.setReg(DstRegW);
LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
@@ -882,9 +946,9 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
int LoadSize = getMemScale(*LoadI);
int StoreSize = getMemScale(*StoreI);
- unsigned LdRt = getLdStRegOp(*LoadI).getReg();
+ Register LdRt = getLdStRegOp(*LoadI).getReg();
const MachineOperand &StMO = getLdStRegOp(*StoreI);
- unsigned StRt = getLdStRegOp(*StoreI).getReg();
+ Register StRt = getLdStRegOp(*StoreI).getReg();
bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
assert((IsStoreXReg ||
@@ -933,10 +997,10 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
? getLdStOffsetOp(*StoreI).getImm()
: getLdStOffsetOp(*StoreI).getImm() * StoreSize;
int Width = LoadSize * 8;
- unsigned DestReg = IsStoreXReg
- ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
- &AArch64::GPR64RegClass)
- : LdRt;
+ unsigned DestReg =
+ IsStoreXReg ? Register(TRI->getMatchingSuperReg(
+ LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
+ : LdRt;
assert((UnscaledLdOffset >= UnscaledStOffset &&
(UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
@@ -1042,7 +1106,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator MBBI = I;
MachineInstr &LoadMI = *I;
- unsigned BaseReg = getLdStBaseOp(LoadMI).getReg();
+ Register BaseReg = getLdStBaseOp(LoadMI).getReg();
// If the load is the first instruction in the block, there's obviously
// not any matching store.
@@ -1156,8 +1220,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool MayLoad = FirstMI.mayLoad();
bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
- unsigned Reg = getLdStRegOp(FirstMI).getReg();
- unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
+ Register Reg = getLdStRegOp(FirstMI).getReg();
+ Register BaseReg = getLdStBaseOp(FirstMI).getReg();
int Offset = getLdStOffsetOp(FirstMI).getImm();
int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
@@ -1188,7 +1252,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// check for +1/-1. Make sure to check the new instruction offset is
// actually an immediate and not a symbolic reference destined for
// a relocation.
- unsigned MIBaseReg = getLdStBaseOp(MI).getReg();
+ Register MIBaseReg = getLdStBaseOp(MI).getReg();
int MIOffset = getLdStOffsetOp(MI).getImm();
bool MIIsUnscaled = TII->isUnscaledLdSt(MI);
if (IsUnscaled != MIIsUnscaled) {
@@ -1328,18 +1392,19 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
: getPostIndexedOpcode(I->getOpcode());
MachineInstrBuilder MIB;
+ int Scale, MinOffset, MaxOffset;
+ getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
if (!isPairedLdSt(*I)) {
// Non-paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
.add(getLdStRegOp(*Update))
.add(getLdStRegOp(*I))
.add(getLdStBaseOp(*I))
- .addImm(Value)
+ .addImm(Value / Scale)
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
} else {
// Paired instruction.
- int Scale = getMemScale(*I);
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
.add(getLdStRegOp(*Update))
.add(getLdStRegOp(*I, 0))
@@ -1395,28 +1460,21 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
MI.getOperand(1).getReg() != BaseReg)
break;
- bool IsPairedInsn = isPairedLdSt(MemMI);
int UpdateOffset = MI.getOperand(2).getImm();
if (MI.getOpcode() == AArch64::SUBXri)
UpdateOffset = -UpdateOffset;
- // For non-paired load/store instructions, the immediate must fit in a
- // signed 9-bit integer.
- if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
+ // The immediate must be a multiple of the scaling factor of the pre/post
+ // indexed instruction.
+ int Scale, MinOffset, MaxOffset;
+ getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
+ if (UpdateOffset % Scale != 0)
break;
- // For paired load/store instructions, the immediate must be a multiple of
- // the scaling factor. The scaled offset must also fit into a signed 7-bit
- // integer.
- if (IsPairedInsn) {
- int Scale = getMemScale(MemMI);
- if (UpdateOffset % Scale != 0)
- break;
-
- int ScaledOffset = UpdateOffset / Scale;
- if (ScaledOffset > 63 || ScaledOffset < -64)
- break;
- }
+ // Scaled offset must fit in the instruction immediate.
+ int ScaledOffset = UpdateOffset / Scale;
+ if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
+ break;
// If we have a non-zero Offset, we check that it matches the amount
// we're adding to the register.
@@ -1433,7 +1491,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;
- unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
+ Register BaseReg = getLdStBaseOp(MemMI).getReg();
int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
// Scan forward looking for post-index opportunities. Updating instructions
@@ -1442,13 +1500,19 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
if (MIUnscaledOffset != UnscaledOffset)
return E;
- // If the base register overlaps a destination register, we can't
- // merge the update.
- bool IsPairedInsn = isPairedLdSt(MemMI);
- for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
- unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
- if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
- return E;
+ // If the base register overlaps a source/destination register, we can't
+ // merge the update. This does not apply to tag store instructions which
+ // ignore the address part of the source register.
+ // This does not apply to STGPi as well, which does not have unpredictable
+ // behavior in this case unlike normal stores, and always performs writeback
+ // after reading the source register value.
+ if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
+ bool IsPairedInsn = isPairedLdSt(MemMI);
+ for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
+ Register DestReg = getLdStRegOp(MemMI, i).getReg();
+ if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ return E;
+ }
}
// Track which register units have been modified and used between the first
@@ -1487,7 +1551,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;
- unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
+ Register BaseReg = getLdStBaseOp(MemMI).getReg();
int Offset = getLdStOffsetOp(MemMI).getImm();
// If the load/store is the first instruction in the block, there's obviously
@@ -1496,11 +1560,13 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
return E;
// If the base register overlaps a destination register, we can't
// merge the update.
- bool IsPairedInsn = isPairedLdSt(MemMI);
- for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
- unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
- if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
- return E;
+ if (!isTagStore(MemMI)) {
+ bool IsPairedInsn = isPairedLdSt(MemMI);
+ for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
+ Register DestReg = getLdStRegOp(MemMI, i).getReg();
+ if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ return E;
+ }
}
// Track which register units have been modified and used between the first
@@ -1659,7 +1725,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
// however, is not, so adjust here.
int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
- // Look forward to try to find a post-index instruction. For example,
+ // Look forward to try to find a pre-index instruction. For example,
// ldr x1, [x0, #64]
// add x0, x0, #64
// merged into:
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index e7d4a2789a28..afd5ae6bcbf2 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -148,6 +148,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
RefFlags |= AArch64MCExpr::VK_TLSDESC;
break;
}
+ } else if (MO.getTargetFlags() & AArch64II::MO_PREL) {
+ RefFlags |= AArch64MCExpr::VK_PREL;
} else {
// No modifier means this is a generic reference, classified as absolute for
// the cases where it matters (:abs_g0: etc).
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 0efeeb272ec1..0009fb7b5520 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include <cassert>
@@ -95,6 +96,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// returned struct in a register. This field holds the virtual register into
/// which the sret argument is passed.
unsigned SRetReturnReg = 0;
+ /// SVE stack size (for predicates and data vectors) are maintained here
+ /// rather than in FrameInfo, as the placement and Stack IDs are target
+ /// specific.
+ uint64_t StackSizeSVE = 0;
+
+ /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid.
+ bool HasCalculatedStackSizeSVE = false;
/// Has a value when it is known whether or not the function uses a
/// redzone, and no value otherwise.
@@ -131,6 +139,15 @@ public:
ArgumentStackToRestore = bytes;
}
+ bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
+
+ void setStackSizeSVE(uint64_t S) {
+ HasCalculatedStackSizeSVE = true;
+ StackSizeSVE = S;
+ }
+
+ uint64_t getStackSizeSVE() const { return StackSizeSVE; }
+
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index aff861aae6be..d503c39b1f90 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -162,11 +162,11 @@ bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
LiveIntervals &LIs = G.getMetadata().LIS;
- if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) {
- LLVM_DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd)
- << '\n');
- LLVM_DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra)
- << '\n');
+ if (Register::isPhysicalRegister(Rd) || Register::isPhysicalRegister(Ra)) {
+ LLVM_DEBUG(dbgs() << "Rd is a physical reg:"
+ << Register::isPhysicalRegister(Rd) << '\n');
+ LLVM_DEBUG(dbgs() << "Ra is a physical reg:"
+ << Register::isPhysicalRegister(Ra) << '\n');
return false;
}
@@ -359,8 +359,8 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) {
case AArch64::FMADDDrrr:
case AArch64::FNMSUBDrrr:
case AArch64::FNMADDDrrr: {
- unsigned Rd = MI.getOperand(0).getReg();
- unsigned Ra = MI.getOperand(3).getReg();
+ Register Rd = MI.getOperand(0).getReg();
+ Register Ra = MI.getOperand(3).getReg();
if (addIntraChainConstraint(G, Rd, Ra))
addInterChainConstraint(G, Rd, Ra);
@@ -369,7 +369,7 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) {
case AArch64::FMLAv2f32:
case AArch64::FMLSv2f32: {
- unsigned Rd = MI.getOperand(0).getReg();
+ Register Rd = MI.getOperand(0).getReg();
addInterChainConstraint(G, Rd, Rd);
break;
}
diff --git a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
index 5f7245bfbd74..d30ea120bae4 100644
--- a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
+++ b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
@@ -15,7 +15,9 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
@@ -25,12 +27,31 @@
using namespace llvm;
using namespace MIPatternMatch;
+#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#include "AArch64GenGICombiner.inc"
+#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+
namespace {
+#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#include "AArch64GenGICombiner.inc"
+#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+
class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
+ GISelKnownBits *KB;
+ MachineDominatorTree *MDT;
+
public:
- AArch64PreLegalizerCombinerInfo()
+ AArch64GenPreLegalizerCombinerHelper Generated;
+
+ AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
+ GISelKnownBits *KB, MachineDominatorTree *MDT)
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
- /*LegalizerInfo*/ nullptr) {}
+ /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
+ KB(KB), MDT(MDT) {
+ if (!Generated.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+ }
+
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
MachineIRBuilder &B) const override;
};
@@ -38,24 +59,50 @@ public:
bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B);
+ CombinerHelper Helper(Observer, B, KB, MDT);
switch (MI.getOpcode()) {
- default:
- return false;
- case TargetOpcode::COPY:
- return Helper.tryCombineCopy(MI);
- case TargetOpcode::G_BR:
- return Helper.tryCombineBr(MI);
+ case TargetOpcode::G_CONCAT_VECTORS:
+ return Helper.tryCombineConcatVectors(MI);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return Helper.tryCombineShuffleVector(MI);
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
- case TargetOpcode::G_ZEXTLOAD:
- return Helper.tryCombineExtendingLoads(MI);
+ case TargetOpcode::G_ZEXTLOAD: {
+ bool Changed = false;
+ Changed |= Helper.tryCombineExtendingLoads(MI);
+ Changed |= Helper.tryCombineIndexedLoadStore(MI);
+ return Changed;
+ }
+ case TargetOpcode::G_STORE:
+ return Helper.tryCombineIndexedLoadStore(MI);
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ switch (MI.getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset: {
+ // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
+ // heuristics decide.
+ unsigned MaxLen = EnableOpt ? 0 : 32;
+ // Try to inline memcpy type calls if optimizations are enabled.
+ return (!EnableMinSize) ? Helper.tryCombineMemCpyFamily(MI, MaxLen)
+ : false;
+ }
+ default:
+ break;
+ }
}
+ if (Generated.tryCombineAll(Observer, MI, B))
+ return true;
+
return false;
}
+#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+#include "AArch64GenGICombiner.inc"
+#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+
// Pass boilerplate
// ================
@@ -63,24 +110,33 @@ class AArch64PreLegalizerCombiner : public MachineFunctionPass {
public:
static char ID;
- AArch64PreLegalizerCombiner();
+ AArch64PreLegalizerCombiner(bool IsOptNone = false);
StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+private:
+ bool IsOptNone;
};
-}
+} // end anonymous namespace
void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.setPreservesCFG();
getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ if (!IsOptNone) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
MachineFunctionPass::getAnalysisUsage(AU);
}
-AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() : MachineFunctionPass(ID) {
+AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner(bool IsOptNone)
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
}
@@ -89,7 +145,14 @@ bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
MachineFunctionProperties::Property::FailedISel))
return false;
auto *TPC = &getAnalysis<TargetPassConfig>();
- AArch64PreLegalizerCombinerInfo PCInfo;
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT =
+ IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
+ AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
+ F.hasMinSize(), KB, MDT);
Combiner C(PCInfo, TPC);
return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
}
@@ -99,13 +162,14 @@ INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE,
"Combine AArch64 machine instrs before legalization",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
"Combine AArch64 machine instrs before legalization", false,
false)
namespace llvm {
-FunctionPass *createAArch64PreLegalizeCombiner() {
- return new AArch64PreLegalizerCombiner();
+FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone) {
+ return new AArch64PreLegalizerCombiner(IsOptNone);
}
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index b52259cc9acd..8ec73aa3c040 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -563,12 +563,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getSameKindOfOperandsMapping(MI);
}
case TargetOpcode::COPY: {
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
// Check if one of the register is not a generic register.
- if ((TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ if ((Register::isPhysicalRegister(DstReg) ||
!MRI.getType(DstReg).isValid()) ||
- (TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
+ (Register::isPhysicalRegister(SrcReg) ||
!MRI.getType(SrcReg).isValid())) {
const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
@@ -635,6 +635,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Some of the floating-point instructions have mixed GPR and FPR operands:
// fine-tune the computed mapping.
switch (Opc) {
+ case TargetOpcode::G_TRUNC: {
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ break;
+ }
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
@@ -687,7 +693,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_STORE:
// Check if that store is fed by fp instructions.
if (OpRegBankIdx[0] == PMI_FirstGPR) {
- unsigned VReg = MI.getOperand(0).getReg();
+ Register VReg = MI.getOperand(0).getReg();
if (!VReg)
break;
MachineInstr *DefMI = MRI.getVRegDef(VReg);
@@ -702,11 +708,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
// If we're taking in vectors, we have no choice but to put everything on
- // FPRs.
+ // FPRs, except for the condition. The condition must always be on a GPR.
LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
if (SrcTy.isVector()) {
- for (unsigned Idx = 0; Idx < 4; ++Idx)
- OpRegBankIdx[Idx] = PMI_FirstFPR;
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
break;
}
@@ -740,7 +745,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// This doesn't check the condition, since it's just whatever is in NZCV.
// This isn't passed explicitly in a register to fcsel/csel.
for (unsigned Idx = 2; Idx < 4; ++Idx) {
- unsigned VReg = MI.getOperand(Idx).getReg();
+ Register VReg = MI.getOperand(Idx).getReg();
MachineInstr *DefMI = MRI.getVRegDef(VReg);
if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
onlyDefinesFP(*DefMI, MRI, TRI))
@@ -750,8 +755,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// If we have more FP constraints than not, then move everything over to
// FPR.
if (NumFP >= 2)
- for (unsigned Idx = 0; Idx < 4; ++Idx)
- OpRegBankIdx[Idx] = PMI_FirstFPR;
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
break;
}
@@ -764,7 +768,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
// UNMERGE into scalars from a vector should always use FPR.
// Likewise if any of the uses are FP instructions.
- if (SrcTy.isVector() ||
+ if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
[&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
// Set the register bank of every operand to FPR.
@@ -795,12 +799,21 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Index needs to be a GPR.
OpRegBankIdx[3] = PMI_FirstGPR;
break;
+ case TargetOpcode::G_EXTRACT: {
+ // For s128 sources we have to use fpr.
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ if (SrcTy.getSizeInBits() == 128) {
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ OpRegBankIdx[1] = PMI_FirstFPR;
+ }
+ break;
+ }
case TargetOpcode::G_BUILD_VECTOR:
// If the first source operand belongs to a FPR register bank, then make
// sure that we preserve that.
if (OpRegBankIdx[1] != PMI_FirstGPR)
break;
- unsigned VReg = MI.getOperand(1).getReg();
+ Register VReg = MI.getOperand(1).getReg();
if (!VReg)
break;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 6d5a4e3d2f76..de176088595d 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64StackOffset.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
@@ -23,10 +24,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -63,8 +64,9 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_AArch64_AAPCS_SwiftError_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
return CSR_AArch64_RT_MostRegs_SaveList;
- else
- return CSR_AArch64_AAPCS_SaveList;
+ if (MF->getSubtarget<AArch64Subtarget>().isTargetDarwin())
+ return CSR_Darwin_AArch64_AAPCS_SaveList;
+ return CSR_AArch64_AAPCS_SaveList;
}
const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
@@ -120,6 +122,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
: CSR_AArch64_CXX_TLS_Darwin_RegMask;
if (CC == CallingConv::AArch64_VectorCall)
return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
+ if (CC == CallingConv::AArch64_SVE_VectorCall)
+ return CSR_AArch64_SVE_AAPCS_RegMask;
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
->supportSwiftError() &&
MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
@@ -388,7 +392,7 @@ bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
int64_t Offset) const {
assert(Offset <= INT_MAX && "Offset too big to fit in int.");
assert(MI && "Unable to get the legal offset for nil instruction.");
- int SaveOffset = Offset;
+ StackOffset SaveOffset(Offset, MVT::i8);
return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal;
}
@@ -418,7 +422,9 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const {
- int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ // ARM doesn't need the general 64-bit offsets
+ StackOffset Off(Offset, MVT::i8);
+
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
@@ -441,40 +447,69 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64InstrInfo *TII =
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const AArch64FrameLowering *TFI = getFrameLowering(MF);
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ bool Tagged =
+ MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED;
unsigned FrameReg;
- int Offset;
// Special handling of dbg_value, stackmap and patchpoint instructions.
if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
- Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
- /*PreferFP=*/true,
- /*ForSimm=*/false);
- Offset += MI.getOperand(FIOperandNum + 1).getImm();
+ StackOffset Offset =
+ TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
+ /*PreferFP=*/true,
+ /*ForSimm=*/false);
+ Offset += StackOffset(MI.getOperand(FIOperandNum + 1).getImm(), MVT::i8);
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getBytes());
return;
}
if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
MachineOperand &FI = MI.getOperand(FIOperandNum);
- Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
+ int Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
FI.ChangeToImmediate(Offset);
return;
}
+ StackOffset Offset;
if (MI.getOpcode() == AArch64::TAGPstack) {
// TAGPstack must use the virtual frame register in its 3rd operand.
- const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
FrameReg = MI.getOperand(3).getReg();
- Offset =
- MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset();
+ Offset = {MFI.getObjectOffset(FrameIndex) +
+ AFI->getTaggedBasePointerOffset(),
+ MVT::i8};
+ } else if (Tagged) {
+ StackOffset SPOffset = {
+ MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), MVT::i8};
+ if (MFI.hasVarSizedObjects() ||
+ isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) !=
+ (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) {
+ // Can't update to SP + offset in place. Precalculate the tagged pointer
+ // in a scratch register.
+ Offset = TFI->resolveFrameIndexReference(
+ MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
+ Register ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset,
+ TII);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0);
+ MI.getOperand(FIOperandNum)
+ .ChangeToRegister(ScratchReg, false, false, true);
+ return;
+ }
+ FrameReg = AArch64::SP;
+ Offset = {MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(),
+ MVT::i8};
} else {
Offset = TFI->resolveFrameIndexReference(
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
@@ -490,7 +525,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above. Handle the rest, providing a register that is
// SP+LargeImm.
- unsigned ScratchReg =
+ Register ScratchReg =
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
diff --git a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index 854670079e40..28a7e680849b 100644
--- a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -426,16 +426,16 @@ bool AArch64SIMDInstrOpt::optimizeVectElement(MachineInstr &MI) {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// Get the operands of the current SIMD arithmetic instruction.
- unsigned MulDest = MI.getOperand(0).getReg();
- unsigned SrcReg0 = MI.getOperand(1).getReg();
+ Register MulDest = MI.getOperand(0).getReg();
+ Register SrcReg0 = MI.getOperand(1).getReg();
unsigned Src0IsKill = getKillRegState(MI.getOperand(1).isKill());
- unsigned SrcReg1 = MI.getOperand(2).getReg();
+ Register SrcReg1 = MI.getOperand(2).getReg();
unsigned Src1IsKill = getKillRegState(MI.getOperand(2).isKill());
unsigned DupDest;
// Instructions of interest have either 4 or 5 operands.
if (MI.getNumOperands() == 5) {
- unsigned SrcReg2 = MI.getOperand(3).getReg();
+ Register SrcReg2 = MI.getOperand(3).getReg();
unsigned Src2IsKill = getKillRegState(MI.getOperand(3).isKill());
unsigned LaneNumber = MI.getOperand(4).getImm();
// Create a new DUP instruction. Note that if an equivalent DUP instruction
diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 79ab42f4c080..b573eac76754 100644
--- a/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -82,11 +82,11 @@ let Predicates = [HasSVE] in {
defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">;
defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">;
- defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">;
- defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">;
+ defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>;
+ defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>;
- defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">;
- defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">;
+ defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>;
+ defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>;
defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">;
defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">;
@@ -94,14 +94,14 @@ let Predicates = [HasSVE] in {
defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth">;
defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw">;
defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw">;
- defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs">;
- defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg">;
-
- defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls">;
- defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz">;
- defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt">;
- defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot">;
- defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not">;
+ defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>;
+ defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>;
+
+ defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", null_frag>;
+ defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", null_frag>;
+ defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", int_aarch64_sve_cnt>;
+ defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", null_frag>;
+ defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", null_frag>;
defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">;
defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">;
@@ -138,12 +138,12 @@ let Predicates = [HasSVE] in {
defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr">;
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">;
- defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd">;
- defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub">;
- defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul">;
- defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul">;
- defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps">;
- defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts">;
+ defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
+ defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", null_frag>;
+ defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", null_frag>;
+ defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul", null_frag>;
+ defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", null_frag>;
+ defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", null_frag>;
defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">;
@@ -187,7 +187,7 @@ let Predicates = [HasSVE] in {
defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">;
// Splat scalar register (unpredicated, GPR or vector + element index)
- defm DUP_ZR : sve_int_perm_dup_r<"dup">;
+ defm DUP_ZR : sve_int_perm_dup_r<"dup", AArch64dup>;
defm DUP_ZZI : sve_int_perm_dup_i<"dup">;
// Splat scalar register (predicated)
@@ -211,13 +211,13 @@ let Predicates = [HasSVE] in {
defm REV_PP : sve_int_perm_reverse_p<"rev">;
defm REV_ZZ : sve_int_perm_reverse_z<"rev">;
- defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo">;
- defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi">;
- defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo">;
- defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi">;
+ defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
+ defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>;
+ defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo", AArch64uunpklo>;
+ defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi", AArch64uunpkhi>;
- def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">;
- def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">;
+ defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>;
+ defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>;
defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
@@ -1020,6 +1020,56 @@ let Predicates = [HasSVE] in {
(FCMGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
(FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+
+ def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv16i8 (bitconvert (nxv8f16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv16i8 (bitconvert (nxv4f32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv16i8 (bitconvert (nxv2f64 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+
+ def : Pat<(nxv8i16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+
+ def : Pat<(nxv4i32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+
+ def : Pat<(nxv2i64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+
+ def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+
+ def : Pat<(nxv4f32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+
+ def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+
}
let Predicates = [HasSVE2] in {
@@ -1164,6 +1214,13 @@ let Predicates = [HasSVE2] in {
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
+ // SVE2 predicated shifts
+ defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
+ defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
+ defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
+ defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
+ defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
+
// SVE2 integer add/subtract long
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">;
@@ -1199,14 +1256,14 @@ let Predicates = [HasSVE2] in {
defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">;
// SVE2 bitwise shift and insert
- defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">;
- defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">;
+ defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">;
+ defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">;
// SVE2 bitwise shift right and accumulate
- defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">;
- defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">;
- defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">;
- defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">;
+ defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">;
+ defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">;
+ defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">;
+ defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">;
// SVE2 complex integer add
defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">;
@@ -1228,41 +1285,47 @@ let Predicates = [HasSVE2] in {
defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
- // SVE2 bitwise shift right narrow
- defm SQSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">;
- defm SQSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">;
- defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">;
- defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">;
- defm SHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">;
- defm SHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">;
- defm RSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">;
- defm RSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">;
- defm SQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">;
- defm SQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">;
- defm SQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">;
- defm SQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">;
- defm UQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">;
- defm UQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">;
- defm UQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">;
- defm UQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">;
-
- // SVE2 integer add/subtract narrow high part
- defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b000, "addhnb">;
- defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b001, "addhnt">;
- defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">;
- defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">;
- defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b100, "subhnb">;
- defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b101, "subhnt">;
- defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">;
- defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">;
-
- // SVE2 saturating extract narrow
- defm SQXTNB_ZZ : sve2_int_sat_extract_narrow<0b000, "sqxtnb">;
- defm SQXTNT_ZZ : sve2_int_sat_extract_narrow<0b001, "sqxtnt">;
- defm UQXTNB_ZZ : sve2_int_sat_extract_narrow<0b010, "uqxtnb">;
- defm UQXTNT_ZZ : sve2_int_sat_extract_narrow<0b011, "uqxtnt">;
- defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">;
- defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">;
+ // SVE2 bitwise shift right narrow (bottom)
+ defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">;
+ defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">;
+ defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">;
+ defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">;
+ defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">;
+ defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">;
+ defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">;
+ defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">;
+
+ // SVE2 bitwise shift right narrow (top)
+ defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">;
+ defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">;
+ defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">;
+ defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">;
+ defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">;
+ defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">;
+ defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">;
+ defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">;
+
+ // SVE2 integer add/subtract narrow high part (bottom)
+ defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">;
+ defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">;
+ defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">;
+ defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">;
+
+ // SVE2 integer add/subtract narrow high part (top)
+ defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">;
+ defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">;
+ defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">;
+ defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">;
+
+ // SVE2 saturating extract narrow (bottom)
+ defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">;
+ defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">;
+ defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">;
+
+ // SVE2 saturating extract narrow (top)
+ defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">;
+ defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">;
+ defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">;
// SVE2 character match
defm MATCH_PPzZZ : sve2_char_match<0b0, "match">;
@@ -1289,10 +1352,14 @@ let Predicates = [HasSVE2] in {
// SVE2 histogram generation (vector)
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
+ // SVE2 floating-point base 2 logarithm as integer
+ defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
+
// SVE2 floating-point convert precision
defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">;
defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">;
+ def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
// SVE2 floating-point pairwise operations
defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">;
@@ -1321,58 +1388,45 @@ let Predicates = [HasSVE2] in {
def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">;
def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">;
- // sve_int_rotate_imm
+ // SVE2 bitwise xor and rotate right by immediate
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
- // SVE floating-point convert precision
- def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
-
- // SVE floating-point convert to integer
- defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
-
- // Non-temporal contiguous loads (vector + register)
- defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
- defm LDNT1B_ZZR_S : sve2_mem_cldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
- defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
- defm LDNT1H_ZZR_S : sve2_mem_cldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
- defm LDNT1W_ZZR_S : sve2_mem_cldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
-
- defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
- defm LDNT1B_ZZR_D : sve2_mem_cldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
- defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
- defm LDNT1H_ZZR_D : sve2_mem_cldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
- defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
- defm LDNT1W_ZZR_D : sve2_mem_cldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
- defm LDNT1D_ZZR_D : sve2_mem_cldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
+ // SVE2 non-temporal gather loads
+ defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
+ defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
+ defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
+ defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
+ defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
+
+ defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
+ defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
+ defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
+ defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
+ defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
+ defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
+ defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
- // Predicated shifts
- defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
- defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
- defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
- defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
- defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
-
- // Non-temporal contiguous stores (vector + register)
- defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
- defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
- defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
+ // SVE2 non-temporal scatter stores
+ defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
+ defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
+ defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
- defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
- defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
- defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
- defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
+ defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
+ defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
+ defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
+ defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
- // SVE table lookup (three sources)
+ // SVE2 table lookup (three sources)
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;
- // SVE integer compare scalar count and limit
+ // SVE2 integer compare scalar count and limit
defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
@@ -1383,7 +1437,7 @@ let Predicates = [HasSVE2] in {
defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
- // SVE pointer conflict compare
+ // SVE2 pointer conflict compare
defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">;
}
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 60dbace03ca6..ba61ed726e84 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -32,7 +32,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
const AArch64TargetLowering &TLI = *STI.getTargetLowering();
EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
- Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
diff --git a/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/lib/Target/AArch64/AArch64SpeculationHardening.cpp
index 3087e6ce441d..7307961ddb5f 100644
--- a/lib/Target/AArch64/AArch64SpeculationHardening.cpp
+++ b/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -106,6 +106,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
#include <cassert>
@@ -115,9 +116,9 @@ using namespace llvm;
#define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass"
-cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden,
- cl::desc("Sanitize loads from memory."),
- cl::init(true));
+static cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden,
+ cl::desc("Sanitize loads from memory."),
+ cl::init(true));
namespace {
@@ -521,7 +522,7 @@ bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
for (auto Use : MI.uses()) {
if (!Use.isReg())
continue;
- unsigned Reg = Use.getReg();
+ Register Reg = Use.getReg();
// Some loads of floating point data have implicit defs/uses on a
// super register of that floating point data. Some examples:
// $s0 = LDRSui $sp, 22, implicit-def $q0
@@ -561,8 +562,8 @@ bool AArch64SpeculationHardening::expandSpeculationSafeValue(
// miss-speculation isn't happening because we're already inserting barriers
// to guarantee that.
if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
// Mark this register and all its aliasing registers as needing to be
// value speculation hardened before its next use, by using a CSDB
// barrier instruction.
diff --git a/lib/Target/AArch64/AArch64StackOffset.h b/lib/Target/AArch64/AArch64StackOffset.h
new file mode 100644
index 000000000000..13f12a6c9c30
--- /dev/null
+++ b/lib/Target/AArch64/AArch64StackOffset.h
@@ -0,0 +1,138 @@
+//==--AArch64StackOffset.h ---------------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the StackOffset class, which is used to
+// describe scalable and non-scalable offsets during frame lowering.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H
+
+#include "llvm/Support/MachineValueType.h"
+
+namespace llvm {
+
+/// StackOffset is a wrapper around scalable and non-scalable offsets and is
+/// used in several functions such as 'isAArch64FrameOffsetLegal' and
+/// 'emitFrameOffset()'. StackOffsets are described by MVTs, e.g.
+//
+/// StackOffset(1, MVT::nxv16i8)
+//
+/// would describe an offset as being the size of a single SVE vector.
+///
+/// The class also implements simple arithmetic (addition/subtraction) on these
+/// offsets, e.g.
+//
+/// StackOffset(1, MVT::nxv16i8) + StackOffset(1, MVT::i64)
+//
+/// describes an offset that spans the combined storage required for an SVE
+/// vector and a 64bit GPR.
+class StackOffset {
+ int64_t Bytes;
+ int64_t ScalableBytes;
+
+ explicit operator int() const;
+
+public:
+ using Part = std::pair<int64_t, MVT>;
+
+ StackOffset() : Bytes(0), ScalableBytes(0) {}
+
+ StackOffset(int64_t Offset, MVT::SimpleValueType T) : StackOffset() {
+ assert(MVT(T).getSizeInBits() % 8 == 0 &&
+ "Offset type is not a multiple of bytes");
+ *this += Part(Offset, T);
+ }
+
+ StackOffset(const StackOffset &Other)
+ : Bytes(Other.Bytes), ScalableBytes(Other.ScalableBytes) {}
+
+ StackOffset &operator=(const StackOffset &) = default;
+
+ StackOffset &operator+=(const StackOffset::Part &Other) {
+ int64_t OffsetInBytes = Other.first * (Other.second.getSizeInBits() / 8);
+ if (Other.second.isScalableVector())
+ ScalableBytes += OffsetInBytes;
+ else
+ Bytes += OffsetInBytes;
+ return *this;
+ }
+
+ StackOffset &operator+=(const StackOffset &Other) {
+ Bytes += Other.Bytes;
+ ScalableBytes += Other.ScalableBytes;
+ return *this;
+ }
+
+ StackOffset operator+(const StackOffset &Other) const {
+ StackOffset Res(*this);
+ Res += Other;
+ return Res;
+ }
+
+ StackOffset &operator-=(const StackOffset &Other) {
+ Bytes -= Other.Bytes;
+ ScalableBytes -= Other.ScalableBytes;
+ return *this;
+ }
+
+ StackOffset operator-(const StackOffset &Other) const {
+ StackOffset Res(*this);
+ Res -= Other;
+ return Res;
+ }
+
+ StackOffset operator-() const {
+ StackOffset Res = {};
+ const StackOffset Other(*this);
+ Res -= Other;
+ return Res;
+ }
+
+ /// Returns the scalable part of the offset in bytes.
+ int64_t getScalableBytes() const { return ScalableBytes; }
+
+ /// Returns the non-scalable part of the offset in bytes.
+ int64_t getBytes() const { return Bytes; }
+
+ /// Returns the offset in parts to which this frame offset can be
+ /// decomposed for the purpose of describing a frame offset.
+ /// For non-scalable offsets this is simply its byte size.
+ void getForFrameOffset(int64_t &NumBytes, int64_t &NumPredicateVectors,
+ int64_t &NumDataVectors) const {
+ assert(isValid() && "Invalid frame offset");
+
+ NumBytes = Bytes;
+ NumDataVectors = 0;
+ NumPredicateVectors = ScalableBytes / 2;
+ // This method is used to get the offsets to adjust the frame offset.
+ // If the function requires ADDPL to be used and needs more than two ADDPL
+ // instructions, part of the offset is folded into NumDataVectors so that it
+ // uses ADDVL for part of it, reducing the number of ADDPL instructions.
+ if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
+ NumPredicateVectors > 62) {
+ NumDataVectors = NumPredicateVectors / 8;
+ NumPredicateVectors -= NumDataVectors * 8;
+ }
+ }
+
+ /// Returns whether the offset is known zero.
+ explicit operator bool() const { return Bytes || ScalableBytes; }
+
+ bool isValid() const {
+ // The smallest scalable element supported by scaled SVE addressing
+ // modes are predicates, which are 2 scalable bytes in size. So the scalable
+ // byte offset must always be a multiple of 2.
+ return ScalableBytes % 2 == 0;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AArch64StackTagging.cpp b/lib/Target/AArch64/AArch64StackTagging.cpp
index 6e99c48bf1d7..e6dbe01d3807 100644
--- a/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -55,9 +56,215 @@ using namespace llvm;
#define DEBUG_TYPE "stack-tagging"
-static constexpr unsigned kTagGranuleSize = 16;
+static cl::opt<bool> ClMergeInit(
+ "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+ cl::desc("merge stack variable initializers with tagging when possible"));
+
+static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
+ cl::init(40), cl::Hidden);
+
+static const Align kTagGranuleSize = Align(16);
namespace {
+
+class InitializerBuilder {
+ uint64_t Size;
+ const DataLayout *DL;
+ Value *BasePtr;
+ Function *SetTagFn;
+ Function *SetTagZeroFn;
+ Function *StgpFn;
+
+ // List of initializers sorted by start offset.
+ struct Range {
+ uint64_t Start, End;
+ Instruction *Inst;
+ };
+ SmallVector<Range, 4> Ranges;
+ // 8-aligned offset => 8-byte initializer
+ // Missing keys are zero initialized.
+ std::map<uint64_t, Value *> Out;
+
+public:
+ InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr,
+ Function *SetTagFn, Function *SetTagZeroFn,
+ Function *StgpFn)
+ : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn),
+ SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
+
+ bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
+ auto I = std::lower_bound(
+ Ranges.begin(), Ranges.end(), Start,
+ [](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; });
+ if (I != Ranges.end() && End > I->Start) {
+ // Overlap - bail.
+ return false;
+ }
+ Ranges.insert(I, {Start, End, Inst});
+ return true;
+ }
+
+ bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) {
+ int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType());
+ if (!addRange(Offset, Offset + StoreSize, SI))
+ return false;
+ IRBuilder<> IRB(SI);
+ applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0));
+ return true;
+ }
+
+ bool addMemSet(uint64_t Offset, MemSetInst *MSI) {
+ uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if (!addRange(Offset, Offset + StoreSize, MSI))
+ return false;
+ IRBuilder<> IRB(MSI);
+ applyMemSet(IRB, Offset, Offset + StoreSize,
+ cast<ConstantInt>(MSI->getValue()));
+ return true;
+ }
+
+ void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End,
+ ConstantInt *V) {
+ // Out[] does not distinguish between zero and undef, and we already know
+ // that this memset does not overlap with any other initializer. Nothing to
+ // do for memset(0).
+ if (V->isZero())
+ return;
+ for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
+ uint64_t Cst = 0x0101010101010101UL;
+ int LowBits = Offset < Start ? (Start - Offset) * 8 : 0;
+ if (LowBits)
+ Cst = (Cst >> LowBits) << LowBits;
+ int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0;
+ if (HighBits)
+ Cst = (Cst << HighBits) >> HighBits;
+ ConstantInt *C =
+ ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue());
+
+ Value *&CurrentV = Out[Offset];
+ if (!CurrentV) {
+ CurrentV = C;
+ } else {
+ CurrentV = IRB.CreateOr(CurrentV, C);
+ }
+ }
+ }
+
+ // Take a 64-bit slice of the value starting at the given offset (in bytes).
+ // Offset can be negative. Pad with zeroes on both sides when necessary.
+ Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) {
+ if (Offset > 0) {
+ V = IRB.CreateLShr(V, Offset * 8);
+ V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
+ } else if (Offset < 0) {
+ V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
+ V = IRB.CreateShl(V, -Offset * 8);
+ } else {
+ V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
+ }
+ return V;
+ }
+
+ void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End,
+ Value *StoredValue) {
+ StoredValue = flatten(IRB, StoredValue);
+ for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
+ Value *V = sliceValue(IRB, StoredValue, Offset - Start);
+ Value *&CurrentV = Out[Offset];
+ if (!CurrentV) {
+ CurrentV = V;
+ } else {
+ CurrentV = IRB.CreateOr(CurrentV, V);
+ }
+ }
+ }
+
+ void generate(IRBuilder<> &IRB) {
+ LLVM_DEBUG(dbgs() << "Combined initializer\n");
+ // No initializers => the entire allocation is undef.
+ if (Ranges.empty()) {
+ emitUndef(IRB, 0, Size);
+ return;
+ }
+
+ // Look through 8-byte initializer list 16 bytes at a time;
+ // If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
+ // Otherwise, emit zeroes up to next available item.
+ uint64_t LastOffset = 0;
+ for (uint64_t Offset = 0; Offset < Size; Offset += 16) {
+ auto I1 = Out.find(Offset);
+ auto I2 = Out.find(Offset + 8);
+ if (I1 == Out.end() && I2 == Out.end())
+ continue;
+
+ if (Offset > LastOffset)
+ emitZeroes(IRB, LastOffset, Offset - LastOffset);
+
+ Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
+ : I1->second;
+ Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
+ : I2->second;
+ emitPair(IRB, Offset, Store1, Store2);
+ LastOffset = Offset + 16;
+ }
+
+ // memset(0) does not update Out[], therefore the tail can be either undef
+ // or zero.
+ if (LastOffset < Size)
+ emitZeroes(IRB, LastOffset, Size - LastOffset);
+
+ for (const auto &R : Ranges) {
+ R.Inst->eraseFromParent();
+ }
+ }
+
+ void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
+ LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size
+ << ") zero\n");
+ Value *Ptr = BasePtr;
+ if (Offset)
+ Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
+ IRB.CreateCall(SetTagZeroFn,
+ {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
+ }
+
+ void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
+ LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size
+ << ") undef\n");
+ Value *Ptr = BasePtr;
+ if (Offset)
+ Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
+ IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
+ }
+
+ void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) {
+ LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n");
+ LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n");
+ Value *Ptr = BasePtr;
+ if (Offset)
+ Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
+ IRB.CreateCall(StgpFn, {Ptr, A, B});
+ }
+
+ Value *flatten(IRBuilder<> &IRB, Value *V) {
+ if (V->getType()->isIntegerTy())
+ return V;
+ // vector of pointers -> vector of ints
+ if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) {
+ LLVMContext &Ctx = IRB.getContext();
+ Type *EltTy = VecTy->getElementType();
+ if (EltTy->isPointerTy()) {
+ uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
+ Type *NewTy = VectorType::get(IntegerType::get(Ctx, EltSize),
+ VecTy->getNumElements());
+ V = IRB.CreatePointerCast(V, NewTy);
+ }
+ }
+ return IRB.CreateBitOrPointerCast(
+ V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8));
+ }
+};
+
class AArch64StackTagging : public FunctionPass {
struct AllocaInfo {
AllocaInst *AI;
@@ -67,10 +274,15 @@ class AArch64StackTagging : public FunctionPass {
int Tag; // -1 for non-tagged allocations
};
+ bool MergeInit;
+
public:
static char ID; // Pass ID, replacement for typeid
- AArch64StackTagging() : FunctionPass(ID) {
+ AArch64StackTagging(bool MergeInit = true)
+ : FunctionPass(ID),
+ MergeInit(ClMergeInit.getNumOccurrences() > 0 ? ClMergeInit
+ : MergeInit) {
initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
}
@@ -81,6 +293,9 @@ public:
uint64_t Size);
void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
+ Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr,
+ uint64_t Size, InitializerBuilder &IB);
+
Instruction *
insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
const DominatorTree *DT);
@@ -92,9 +307,12 @@ private:
Function *F;
Function *SetTagFunc;
const DataLayout *DL;
+ AAResults *AA;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ if (MergeInit)
+ AU.addRequired<AAResultsWrapperPass>();
}
};
@@ -107,8 +325,68 @@ INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
false, false)
-FunctionPass *llvm::createAArch64StackTaggingPass() {
- return new AArch64StackTagging();
+FunctionPass *llvm::createAArch64StackTaggingPass(bool MergeInit) {
+ return new AArch64StackTagging(MergeInit);
+}
+
+Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
+ Value *StartPtr,
+ uint64_t Size,
+ InitializerBuilder &IB) {
+ MemoryLocation AllocaLoc{StartPtr, Size};
+ Instruction *LastInst = StartInst;
+ BasicBlock::iterator BI(StartInst);
+
+ unsigned Count = 0;
+ for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) {
+ if (!isa<DbgInfoIntrinsic>(*BI))
+ ++Count;
+
+ if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc)))
+ continue;
+
+ if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
+ // If the instruction is readnone, ignore it, otherwise bail out. We
+ // don't even allow readonly here because we don't want something like:
+ // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
+ if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
+ break;
+ continue;
+ }
+
+ if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+ if (!NextStore->isSimple())
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ Optional<int64_t> Offset =
+ isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL);
+ if (!Offset)
+ break;
+
+ if (!IB.addStore(*Offset, NextStore, DL))
+ break;
+ LastInst = NextStore;
+ } else {
+ MemSetInst *MSI = cast<MemSetInst>(BI);
+
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
+ break;
+
+ if (!isa<ConstantInt>(MSI->getValue()))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), *DL);
+ if (!Offset)
+ break;
+
+ if (!IB.addMemSet(*Offset, MSI))
+ break;
+ LastInst = MSI;
+ }
+ }
+ return LastInst;
}
bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
@@ -127,8 +405,23 @@ bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
Value *Ptr, uint64_t Size) {
+ auto SetTagZeroFunc =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero);
+ auto StgpFunc =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp);
+
+ InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc);
+ bool LittleEndian =
+ Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
+ // Current implementation of initializer merging assumes little endianness.
+ if (MergeInit && !F->hasOptNone() && LittleEndian) {
+ LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
+ << ", size = " << Size << "\n");
+ InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
+ }
+
IRBuilder<> IRB(InsertBefore);
- IRB.CreateCall(SetTagFunc, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
+ IB.generate(IRB);
}
void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
@@ -166,7 +459,8 @@ Instruction *AArch64StackTagging::insertBaseTaggedPointer(
}
void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
- unsigned NewAlignment = std::max(Info.AI->getAlignment(), kTagGranuleSize);
+ const Align NewAlignment =
+ max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize);
Info.AI->setAlignment(NewAlignment);
uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
@@ -179,7 +473,7 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
Info.AI->isArrayAllocation()
? ArrayType::get(
Info.AI->getAllocatedType(),
- dyn_cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
+ cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
: Info.AI->getAllocatedType();
Type *PaddingType =
ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
@@ -187,7 +481,7 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
auto *NewAI = new AllocaInst(
TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
NewAI->takeName(Info.AI);
- NewAI->setAlignment(Info.AI->getAlignment());
+ NewAI->setAlignment(MaybeAlign(Info.AI->getAlignment()));
NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
NewAI->setSwiftError(Info.AI->isSwiftError());
NewAI->copyMetadata(*Info.AI);
@@ -198,6 +492,24 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
Info.AI = NewAI;
}
+// Helper function to check for post-dominance.
+static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A,
+ const IntrinsicInst *B) {
+ const BasicBlock *ABB = A->getParent();
+ const BasicBlock *BBB = B->getParent();
+
+ if (ABB != BBB)
+ return PDT->dominates(ABB, BBB);
+
+ for (const Instruction &I : *ABB) {
+ if (&I == B)
+ return true;
+ if (&I == A)
+ return false;
+ }
+ llvm_unreachable("Corrupt instruction list");
+}
+
// FIXME: check for MTE extension
bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
@@ -205,6 +517,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
F = &Fn;
DL = &Fn.getParent()->getDataLayout();
+ if (MergeInit)
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
SmallVector<Instruction *, 8> RetVec;
@@ -270,23 +584,31 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (NumInterestingAllocas == 0)
return true;
+ std::unique_ptr<DominatorTree> DeleteDT;
+ DominatorTree *DT = nullptr;
+ if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &P->getDomTree();
+
+ if (DT == nullptr && (NumInterestingAllocas > 1 ||
+ !F->hasFnAttribute(Attribute::OptimizeNone))) {
+ DeleteDT = std::make_unique<DominatorTree>(*F);
+ DT = DeleteDT.get();
+ }
+
+ std::unique_ptr<PostDominatorTree> DeletePDT;
+ PostDominatorTree *PDT = nullptr;
+ if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
+ PDT = &P->getPostDomTree();
+
+ if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) {
+ DeletePDT = std::make_unique<PostDominatorTree>(*F);
+ PDT = DeletePDT.get();
+ }
+
SetTagFunc =
Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
- // Compute DT only if the function has the attribute, there are more than 1
- // interesting allocas, and it is not available for free.
- Instruction *Base;
- if (NumInterestingAllocas > 1) {
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- if (DTWP) {
- Base = insertBaseTaggedPointer(Allocas, &DTWP->getDomTree());
- } else {
- DominatorTree DT(*F);
- Base = insertBaseTaggedPointer(Allocas, &DT);
- }
- } else {
- Base = insertBaseTaggedPointer(Allocas, nullptr);
- }
+ Instruction *Base = insertBaseTaggedPointer(Allocas, DT);
for (auto &I : Allocas) {
const AllocaInfo &Info = I.second;
@@ -309,11 +631,37 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
Info.LifetimeEnd.size() == 1) {
IntrinsicInst *Start = Info.LifetimeStart[0];
+ IntrinsicInst *End = Info.LifetimeEnd[0];
uint64_t Size =
dyn_cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
Size = alignTo(Size, kTagGranuleSize);
tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
- untagAlloca(AI, Info.LifetimeEnd[0], Size);
+ // We need to ensure that if we tag some object, we certainly untag it
+ // before the function exits.
+ if (PDT != nullptr && postDominates(PDT, End, Start)) {
+ untagAlloca(AI, End, Size);
+ } else {
+ SmallVector<Instruction *, 8> ReachableRetVec;
+ unsigned NumCoveredExits = 0;
+ for (auto &RI : RetVec) {
+ if (!isPotentiallyReachable(Start, RI, nullptr, DT))
+ continue;
+ ReachableRetVec.push_back(RI);
+ if (DT != nullptr && DT->dominates(End, RI))
+ ++NumCoveredExits;
+ }
+ // If there's a mix of covered and non-covered exits, just put the untag
+ // on exits, so we avoid the redundancy of untagging twice.
+ if (NumCoveredExits == ReachableRetVec.size()) {
+ untagAlloca(AI, End, Size);
+ } else {
+ for (auto &RI : ReachableRetVec)
+ untagAlloca(AI, RI, Size);
+ // We may have inserted untag outside of the lifetime interval.
+ // Remove the lifetime end call for this alloca.
+ End->eraseFromParent();
+ }
+ }
} else {
uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
diff --git a/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
new file mode 100644
index 000000000000..3cc556f74aea
--- /dev/null
+++ b/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -0,0 +1,209 @@
+//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64InstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
+
+enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
+
+cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
+ "stack-tagging-unchecked-ld-st", cl::Hidden,
+ cl::init(UncheckedSafe),
+ cl::desc(
+ "Unconditionally apply unchecked-ld-st optimization (even for large "
+ "stack frames, or in the presence of variable sized allocas)."),
+ cl::values(
+ clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
+ clEnumValN(
+ UncheckedSafe, "safe",
+ "apply unchecked-ld-st when the target is definitely within range"),
+ clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
+
+namespace {
+
+class AArch64StackTaggingPreRA : public MachineFunctionPass {
+ MachineFunction *MF;
+ AArch64FunctionInfo *AFI;
+ MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ const AArch64RegisterInfo *TRI;
+ const AArch64InstrInfo *TII;
+
+ SmallVector<MachineInstr*, 16> ReTags;
+
+public:
+ static char ID;
+ AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
+ initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool mayUseUncheckedLoadStore();
+ void uncheckUsesOf(unsigned TaggedReg, int FI);
+ void uncheckLoadsAndStores();
+
+ bool runOnMachineFunction(MachineFunction &Func) override;
+ StringRef getPassName() const override {
+ return "AArch64 Stack Tagging PreRA";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // end anonymous namespace
+
+char AArch64StackTaggingPreRA::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
+ "AArch64 Stack Tagging PreRA Pass", false, false)
+INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
+ "AArch64 Stack Tagging PreRA Pass", false, false)
+
+FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
+ return new AArch64StackTaggingPreRA();
+}
+
+static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case AArch64::LDRWui:
+ case AArch64::LDRSHWui:
+ case AArch64::LDRXui:
+ case AArch64::LDRBui:
+ case AArch64::LDRBBui:
+ case AArch64::LDRHui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ case AArch64::STRWui:
+ case AArch64::STRXui:
+ case AArch64::STRBui:
+ case AArch64::STRBBui:
+ case AArch64::STRHui:
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
+ if (ClUncheckedLdSt == UncheckedNever)
+ return false;
+ else if (ClUncheckedLdSt == UncheckedAlways)
+ return true;
+
+ // This estimate can be improved if we had harder guarantees about stack frame
+ // layout. With LocalStackAllocation we can estimate SP offset to any
+ // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
+ // objects ahead of non-tagged ones, but that's not always desirable.
+ //
+ // Underestimating SP offset here may require the use of LDG to materialize
+ // the tagged address of the stack slot, along with a scratch register
+ // allocation (post-regalloc!).
+ //
+ // For now we do the safe thing here and require that the entire stack frame
+ // is within range of the shortest of the unchecked instructions.
+ unsigned FrameSize = 0;
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
+ FrameSize += MFI->getObjectSize(i);
+ bool EntireFrameReachableFromSP = FrameSize < 0xf00;
+ return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
+}
+
+void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
+ for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end();
+ UI != E;) {
+ MachineInstr *UseI = &*(UI++);
+ if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) {
+ // FI operand is always the one before the immediate offset.
+ unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1;
+ if (UseI->getOperand(OpIdx).isReg() &&
+ UseI->getOperand(OpIdx).getReg() == TaggedReg) {
+ UseI->getOperand(OpIdx).ChangeToFrameIndex(FI);
+ UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
+ }
+ } else if (UseI->isCopy() &&
+ Register::isVirtualRegister(UseI->getOperand(0).getReg())) {
+ uncheckUsesOf(UseI->getOperand(0).getReg(), FI);
+ }
+ }
+}
+
+void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
+ for (auto *I : ReTags) {
+ unsigned TaggedReg = I->getOperand(0).getReg();
+ int FI = I->getOperand(1).getIndex();
+ uncheckUsesOf(TaggedReg, FI);
+ }
+}
+
+bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ MRI = &MF->getRegInfo();
+ AFI = MF->getInfo<AArch64FunctionInfo>();
+ TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
+ TRI = static_cast<const AArch64RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+ MFI = &MF->getFrameInfo();
+ ReTags.clear();
+
+ assert(MRI->isSSA());
+
+ LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
+ << "********** Function: " << MF->getName() << '\n');
+
+ SmallSetVector<int, 8> TaggedSlots;
+ for (auto &BB : *MF) {
+ for (auto &I : BB) {
+ if (I.getOpcode() == AArch64::TAGPstack) {
+ ReTags.push_back(&I);
+ int FI = I.getOperand(1).getIndex();
+ TaggedSlots.insert(FI);
+ // There should be no offsets in TAGP yet.
+ assert(I.getOperand(2).getImm() == 0);
+ }
+ }
+ }
+
+ if (ReTags.empty())
+ return false;
+
+ if (mayUseUncheckedLoadStore())
+ uncheckLoadsAndStores();
+
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 0e84a00df006..5deb601822b8 100644
--- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -151,7 +151,7 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
int64_t Offset;
if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) &&
BaseOp->isReg()) {
- unsigned BaseReg = BaseOp->getReg();
+ Register BaseReg = BaseOp->getReg();
if (PrevBaseReg == BaseReg) {
// If this block can take STPs, skip ahead to the next block.
if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 3bc89b91c3f7..558bea368eff 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -71,19 +71,22 @@ void AArch64Subtarget::initializeProperties() {
case CortexA35:
break;
case CortexA53:
- PrefFunctionAlignment = 3;
+ PrefFunctionLogAlignment = 3;
break;
case CortexA55:
break;
case CortexA57:
MaxInterleaveFactor = 4;
- PrefFunctionAlignment = 4;
+ PrefFunctionLogAlignment = 4;
+ break;
+ case CortexA65:
+ PrefFunctionLogAlignment = 3;
break;
case CortexA72:
case CortexA73:
case CortexA75:
case CortexA76:
- PrefFunctionAlignment = 4;
+ PrefFunctionLogAlignment = 4;
break;
case Cyclone:
CacheLineSize = 64;
@@ -94,14 +97,14 @@ void AArch64Subtarget::initializeProperties() {
case ExynosM1:
MaxInterleaveFactor = 4;
MaxJumpTableSize = 8;
- PrefFunctionAlignment = 4;
- PrefLoopAlignment = 3;
+ PrefFunctionLogAlignment = 4;
+ PrefLoopLogAlignment = 3;
break;
case ExynosM3:
MaxInterleaveFactor = 4;
MaxJumpTableSize = 20;
- PrefFunctionAlignment = 5;
- PrefLoopAlignment = 4;
+ PrefFunctionLogAlignment = 5;
+ PrefLoopLogAlignment = 4;
break;
case Falkor:
MaxInterleaveFactor = 4;
@@ -122,6 +125,12 @@ void AArch64Subtarget::initializeProperties() {
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
break;
+ case NeoverseE1:
+ PrefFunctionLogAlignment = 3;
+ break;
+ case NeoverseN1:
+ PrefFunctionLogAlignment = 4;
+ break;
case Saphira:
MaxInterleaveFactor = 4;
// FIXME: remove this to enable 64-bit SLP if performance looks good.
@@ -129,8 +138,8 @@ void AArch64Subtarget::initializeProperties() {
break;
case ThunderX2T99:
CacheLineSize = 64;
- PrefFunctionAlignment = 3;
- PrefLoopAlignment = 2;
+ PrefFunctionLogAlignment = 3;
+ PrefLoopLogAlignment = 2;
MaxInterleaveFactor = 4;
PrefetchDistance = 128;
MinPrefetchStride = 1024;
@@ -143,15 +152,15 @@ void AArch64Subtarget::initializeProperties() {
case ThunderXT81:
case ThunderXT83:
CacheLineSize = 128;
- PrefFunctionAlignment = 3;
- PrefLoopAlignment = 2;
+ PrefFunctionLogAlignment = 3;
+ PrefLoopLogAlignment = 2;
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
break;
case TSV110:
CacheLineSize = 64;
- PrefFunctionAlignment = 4;
- PrefLoopAlignment = 2;
+ PrefFunctionLogAlignment = 4;
+ PrefLoopLogAlignment = 2;
break;
}
}
@@ -187,7 +196,7 @@ const CallLowering *AArch64Subtarget::getCallLowering() const {
return CallLoweringInfo.get();
}
-const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
+InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
return InstSelector.get();
}
@@ -201,7 +210,7 @@ const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
/// Find the target operand flags that describe how a global value should be
/// referenced for the current subtarget.
-unsigned char
+unsigned
AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM) const {
// MachO large model always goes via a GOT, simply to get a single 8-byte
@@ -224,10 +233,17 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
GV->hasExternalWeakLinkage())
return AArch64II::MO_GOT;
+ // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
+ // that their nominal addresses are tagged and outside of the code model. In
+ // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
+ // tag if necessary based on MO_TAGGED.
+ if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
+ return AArch64II::MO_NC | AArch64II::MO_TAGGED;
+
return AArch64II::MO_NO_FLAG;
}
-unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
+unsigned AArch64Subtarget::classifyGlobalFunctionReference(
const GlobalValue *GV, const TargetMachine &TM) const {
// MachO large model always goes via a GOT, because we don't have the
// relocations available to do anything else..
@@ -275,7 +291,7 @@ bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
std::unique_ptr<PBQPRAConstraint>
AArch64Subtarget::getCustomPBQPConstraints() const {
- return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
+ return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
}
void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 0c84cfb8329a..f3212fae8e5e 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -42,6 +42,7 @@ public:
CortexA53,
CortexA55,
CortexA57,
+ CortexA65,
CortexA72,
CortexA73,
CortexA75,
@@ -51,6 +52,8 @@ public:
ExynosM3,
Falkor,
Kryo,
+ NeoverseE1,
+ NeoverseN1,
Saphira,
ThunderX2T99,
ThunderX,
@@ -113,6 +116,7 @@ protected:
bool HasTRACEV8_4 = false;
bool HasAM = false;
bool HasSEL2 = false;
+ bool HasPMU = false;
bool HasTLB_RMI = false;
bool HasFMI = false;
bool HasRCPC_IMMO = false;
@@ -134,6 +138,7 @@ protected:
bool HasBTI = false;
bool HasRandGen = false;
bool HasMTE = false;
+ bool HasTME = false;
// Arm SVE2 extensions
bool HasSVE2AES = false;
@@ -141,6 +146,10 @@ protected:
bool HasSVE2SHA3 = false;
bool HasSVE2BitPerm = false;
+ // Future architecture extensions.
+ bool HasETE = false;
+ bool HasTRBE = false;
+
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
@@ -183,14 +192,15 @@ protected:
bool UseEL1ForTP = false;
bool UseEL2ForTP = false;
bool UseEL3ForTP = false;
+ bool AllowTaggedGlobals = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
uint16_t PrefetchDistance = 0;
uint16_t MinPrefetchStride = 1;
unsigned MaxPrefetchIterationsAhead = UINT_MAX;
- unsigned PrefFunctionAlignment = 0;
- unsigned PrefLoopAlignment = 0;
+ unsigned PrefFunctionLogAlignment = 0;
+ unsigned PrefLoopLogAlignment = 0;
unsigned MaxJumpTableSize = 0;
unsigned WideningBaseCost = 0;
@@ -247,7 +257,7 @@ public:
return &getInstrInfo()->getRegisterInfo();
}
const CallLowering *getCallLowering() const override;
- const InstructionSelector *getInstructionSelector() const override;
+ InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -344,14 +354,16 @@ public:
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
}
- unsigned getCacheLineSize() const { return CacheLineSize; }
- unsigned getPrefetchDistance() const { return PrefetchDistance; }
- unsigned getMinPrefetchStride() const { return MinPrefetchStride; }
- unsigned getMaxPrefetchIterationsAhead() const {
+ unsigned getCacheLineSize() const override { return CacheLineSize; }
+ unsigned getPrefetchDistance() const override { return PrefetchDistance; }
+ unsigned getMinPrefetchStride() const override { return MinPrefetchStride; }
+ unsigned getMaxPrefetchIterationsAhead() const override {
return MaxPrefetchIterationsAhead;
}
- unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
- unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; }
+ unsigned getPrefFunctionLogAlignment() const {
+ return PrefFunctionLogAlignment;
+ }
+ unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }
unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
@@ -380,6 +392,7 @@ public:
bool hasBTI() const { return HasBTI; }
bool hasRandGen() const { return HasRandGen; }
bool hasMTE() const { return HasMTE; }
+ bool hasTME() const { return HasTME; }
// Arm SVE2 extensions
bool hasSVE2AES() const { return HasSVE2AES; }
bool hasSVE2SM4() const { return HasSVE2SM4; }
@@ -399,6 +412,8 @@ public:
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+ bool isTargetILP32() const { return TargetTriple.isArch32Bit(); }
+
bool useAA() const override { return UseAA; }
bool hasVH() const { return HasVH; }
@@ -421,10 +436,17 @@ public:
bool hasTRACEV8_4() const { return HasTRACEV8_4; }
bool hasAM() const { return HasAM; }
bool hasSEL2() const { return HasSEL2; }
+ bool hasPMU() const { return HasPMU; }
bool hasTLB_RMI() const { return HasTLB_RMI; }
bool hasFMI() const { return HasFMI; }
bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
+ bool addrSinkUsingGEPs() const override {
+ // Keeping GEPs inbounds is important for exploiting AArch64
+ // addressing-modes in ILP32 mode.
+ return useAA() || isTargetILP32();
+ }
+
bool useSmallAddressing() const {
switch (TLInfo.getTargetMachine().getCodeModel()) {
case CodeModel::Kernel:
@@ -443,11 +465,11 @@ public:
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
- unsigned char ClassifyGlobalReference(const GlobalValue *GV,
- const TargetMachine &TM) const;
+ unsigned ClassifyGlobalReference(const GlobalValue *GV,
+ const TargetMachine &TM) const;
- unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
- const TargetMachine &TM) const;
+ unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
+ const TargetMachine &TM) const;
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
diff --git a/lib/Target/AArch64/AArch64SystemOperands.td b/lib/Target/AArch64/AArch64SystemOperands.td
index 536a6591478b..05249a4ea6a8 100644
--- a/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/lib/Target/AArch64/AArch64SystemOperands.td
@@ -612,6 +612,7 @@ def : ROSysReg<"ISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b000>;
def : ROSysReg<"CNTPCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b001>;
def : ROSysReg<"CNTVCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b010>;
def : ROSysReg<"ID_MMFR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b110>;
+def : ROSysReg<"ID_MMFR5_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b110>;
// Trace registers
// Op0 Op1 CRn CRm Op2
@@ -1321,6 +1322,12 @@ def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>;
def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
} // FeatureSEL2
+// v8.4a PMU registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeaturePMU} }] in {
+def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>;
+} // FeaturePMU
+
// v8.4a RAS registers
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::FeatureRASv8_4} }] in {
@@ -1452,14 +1459,37 @@ let Requires = [{ {AArch64::FeatureMTE} }] in {
def : RWSysReg<"TCO", 0b11, 0b011, 0b0100, 0b0010, 0b111>;
def : RWSysReg<"GCR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b110>;
def : RWSysReg<"RGSR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b101>;
-def : RWSysReg<"TFSR_EL1", 0b11, 0b000, 0b0110, 0b0101, 0b000>;
-def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0110, 0b0101, 0b000>;
-def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0110, 0b0110, 0b000>;
-def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0110, 0b0110, 0b000>;
-def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0110, 0b0110, 0b001>;
+def : RWSysReg<"TFSR_EL1", 0b11, 0b000, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0101, 0b0110, 0b001>;
def : ROSysReg<"GMID_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b100>;
} // HasMTE
+// Embedded Trace Extension R/W System registers
+let Requires = [{ {AArch64::FeatureETE} }] in {
+// Name Op0 Op1 CRn CRm Op2
+def : RWSysReg<"TRCRSR", 0b10, 0b001, 0b0000, 0b1010, 0b000>;
+// TRCEXTINSELR0 has the same encoding as ETM TRCEXTINSELR
+def : RWSysReg<"TRCEXTINSELR0", 0b10, 0b001, 0b0000, 0b1000, 0b100>;
+def : RWSysReg<"TRCEXTINSELR1", 0b10, 0b001, 0b0000, 0b1001, 0b100>;
+def : RWSysReg<"TRCEXTINSELR2", 0b10, 0b001, 0b0000, 0b1010, 0b100>;
+def : RWSysReg<"TRCEXTINSELR3", 0b10, 0b001, 0b0000, 0b1011, 0b100>;
+} // FeatureETE
+
+// Trace Buffer Extension System registers
+let Requires = [{ {AArch64::FeatureTRBE} }] in {
+// Name Op0 Op1 CRn CRm Op2
+def : RWSysReg<"TRBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b000>;
+def : RWSysReg<"TRBPTR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b001>;
+def : RWSysReg<"TRBBASER_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b010>;
+def : RWSysReg<"TRBSR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b011>;
+def : RWSysReg<"TRBMAR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b100>;
+def : RWSysReg<"TRBTRG_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b110>;
+def : ROSysReg<"TRBIDR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b111>;
+} // FeatureTRBE
+
// Cyclone specific system registers
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::ProcCyclone} }] in
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 865461480499..b3ed96e815be 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -157,6 +157,8 @@ extern "C" void LLVMInitializeAArch64Target() {
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target());
+ RegisterTargetMachine<AArch64leTargetMachine> W(getTheARM64_32Target());
+ RegisterTargetMachine<AArch64leTargetMachine> V(getTheAArch64_32Target());
auto PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
initializeAArch64A53Fix835769Pass(*PR);
@@ -180,6 +182,7 @@ extern "C" void LLVMInitializeAArch64Target() {
initializeLDTLSCleanupPass(*PR);
initializeAArch64SpeculationHardeningPass(*PR);
initializeAArch64StackTaggingPass(*PR);
+ initializeAArch64StackTaggingPreRAPass(*PR);
}
//===----------------------------------------------------------------------===//
@@ -187,11 +190,11 @@ extern "C" void LLVMInitializeAArch64Target() {
//===----------------------------------------------------------------------===//
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
- return llvm::make_unique<AArch64_MachoTargetObjectFile>();
+ return std::make_unique<AArch64_MachoTargetObjectFile>();
if (TT.isOSBinFormatCOFF())
- return llvm::make_unique<AArch64_COFFTargetObjectFile>();
+ return std::make_unique<AArch64_COFFTargetObjectFile>();
- return llvm::make_unique<AArch64_ELFTargetObjectFile>();
+ return std::make_unique<AArch64_ELFTargetObjectFile>();
}
// Helper function to build a DataLayout string
@@ -200,8 +203,11 @@ static std::string computeDataLayout(const Triple &TT,
bool LittleEndian) {
if (Options.getABIName() == "ilp32")
return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128";
- if (TT.isOSBinFormatMachO())
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getArch() == Triple::aarch64_32)
+ return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128";
return "e-m:o-i64:64-i128:128-n32:64-S128";
+ }
if (TT.isOSBinFormatCOFF())
return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
if (LittleEndian)
@@ -277,8 +283,11 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
this->Options.TrapUnreachable = true;
}
- // Enable GlobalISel at or below EnableGlobalISelAt0.
- if (getOptLevel() <= EnableGlobalISelAtO) {
+ // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is
+ // MachO/CodeModel::Large, which GlobalISel does not support.
+ if (getOptLevel() <= EnableGlobalISelAtO &&
+ TT.getArch() != Triple::aarch64_32 &&
+ !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) {
setGlobalISel(true);
setGlobalISelAbort(GlobalISelAbortMode::Disable);
}
@@ -310,7 +319,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
+ I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
isLittle);
}
return I.get();
@@ -448,7 +457,8 @@ void AArch64PassConfig::addIRPasses() {
addPass(createLICMPass());
}
- addPass(createAArch64StackTaggingPass());
+ addPass(createAArch64StackTaggingPass(/* MergeInit = */ TM->getOptLevel() !=
+ CodeGenOpt::None));
}
// Pass Pipeline Configuration
@@ -502,7 +512,8 @@ bool AArch64PassConfig::addIRTranslator() {
}
void AArch64PassConfig::addPreLegalizeMachineIR() {
- addPass(createAArch64PreLegalizeCombiner());
+ bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ addPass(createAArch64PreLegalizeCombiner(IsOptNone));
}
bool AArch64PassConfig::addLegalizeMachineIR() {
@@ -516,9 +527,7 @@ bool AArch64PassConfig::addRegBankSelect() {
}
void AArch64PassConfig::addPreGlobalInstructionSelect() {
- // Workaround the deficiency of the fast register allocator.
- if (TM->getOptLevel() == CodeGenOpt::None)
- addPass(new Localizer());
+ addPass(new Localizer());
}
bool AArch64PassConfig::addGlobalInstructionSelect() {
@@ -540,6 +549,8 @@ bool AArch64PassConfig::addILPOpts() {
if (EnableStPairSuppress)
addPass(createAArch64StorePairSuppressPass());
addPass(createAArch64SIMDInstrOptPass());
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createAArch64StackTaggingPreRAPass());
return true;
}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 1c3d5d0743ad..54562094fcf5 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -59,8 +59,8 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol(
}
const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
- const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
- MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV,
+ int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const {
assert((Offset+MV.getConstant() == 0) &&
"Arch64 does not support GOT PC rel with extra offset");
// On ARM64 Darwin, we can reference symbols with foo@GOT-., which
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 7ead363d42fe..1cb4c028c80d 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -35,7 +35,8 @@ public:
const TargetMachine &TM,
MachineModuleInfo *MMI) const override;
- const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV,
+ const MCSymbol *Sym,
const MCValue &MV, int64_t Offset,
MachineModuleInfo *MMI,
MCStreamer &Streamer) const override;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a4b78f2a7d6b..dc916a7b3407 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -618,6 +618,19 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
+AArch64TTIImpl::TTI::MemCmpExpansionOptions
+AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+ Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ Options.NumLoadsPerBlock = Options.MaxNumLoads;
+ // TODO: Though vector loads usually perform well on AArch64, in some targets
+ // they may wake up the FP unit, which raises the power consumption. Perhaps
+ // they could be used with no holds barred (-O3).
+ Options.LoadSizes = {8, 4, 2, 1};
+ return Options;
+}
+
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
unsigned Alignment, unsigned AddressSpace,
const Instruction *I) {
@@ -879,22 +892,6 @@ bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
return Considerable;
}
-unsigned AArch64TTIImpl::getCacheLineSize() {
- return ST->getCacheLineSize();
-}
-
-unsigned AArch64TTIImpl::getPrefetchDistance() {
- return ST->getPrefetchDistance();
-}
-
-unsigned AArch64TTIImpl::getMinPrefetchStride() {
- return ST->getMinPrefetchStride();
-}
-
-unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
- return ST->getMaxPrefetchIterationsAhead();
-}
-
bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const {
assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 10c15a139b4c..32c59f41e1c3 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -85,7 +85,8 @@ public:
bool enableInterleavedAccessVectorization() { return true; }
- unsigned getNumberOfRegisters(bool Vector) {
+ unsigned getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
if (Vector) {
if (ST->hasNEON())
return 32;
@@ -130,6 +131,9 @@ public:
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const;
+
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
@@ -153,14 +157,6 @@ public:
shouldConsiderAddressTypePromotion(const Instruction &I,
bool &AllowPromotionWithoutCommonHeader);
- unsigned getCacheLineSize();
-
- unsigned getPrefetchDistance();
-
- unsigned getMinPrefetchStride();
-
- unsigned getMaxPrefetchIterationsAhead();
-
bool shouldExpandReduction(const IntrinsicInst *II) const {
return false;
}
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f4c55d48d215..4fb409f020d9 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -935,48 +935,34 @@ public:
return false;
}
- bool isMovZSymbolG3() const {
- return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
+ bool isMovWSymbolG3() const {
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G3, AArch64MCExpr::VK_PREL_G3});
}
- bool isMovZSymbolG2() const {
- return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
- AArch64MCExpr::VK_TPREL_G2,
- AArch64MCExpr::VK_DTPREL_G2});
- }
-
- bool isMovZSymbolG1() const {
- return isMovWSymbol({
- AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
- AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1,
- AArch64MCExpr::VK_DTPREL_G1,
- });
- }
-
- bool isMovZSymbolG0() const {
- return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
- AArch64MCExpr::VK_TPREL_G0,
- AArch64MCExpr::VK_DTPREL_G0});
- }
-
- bool isMovKSymbolG3() const {
- return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
- }
-
- bool isMovKSymbolG2() const {
- return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC);
+ bool isMovWSymbolG2() const {
+ return isMovWSymbol(
+ {AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
+ AArch64MCExpr::VK_ABS_G2_NC, AArch64MCExpr::VK_PREL_G2,
+ AArch64MCExpr::VK_PREL_G2_NC, AArch64MCExpr::VK_TPREL_G2,
+ AArch64MCExpr::VK_DTPREL_G2});
}
- bool isMovKSymbolG1() const {
- return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC,
- AArch64MCExpr::VK_TPREL_G1_NC,
- AArch64MCExpr::VK_DTPREL_G1_NC});
+ bool isMovWSymbolG1() const {
+ return isMovWSymbol(
+ {AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
+ AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_PREL_G1,
+ AArch64MCExpr::VK_PREL_G1_NC, AArch64MCExpr::VK_GOTTPREL_G1,
+ AArch64MCExpr::VK_TPREL_G1, AArch64MCExpr::VK_TPREL_G1_NC,
+ AArch64MCExpr::VK_DTPREL_G1, AArch64MCExpr::VK_DTPREL_G1_NC});
}
- bool isMovKSymbolG0() const {
+ bool isMovWSymbolG0() const {
return isMovWSymbol(
- {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
- AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC});
+ {AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
+ AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_PREL_G0,
+ AArch64MCExpr::VK_PREL_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_TPREL_G0_NC,
+ AArch64MCExpr::VK_DTPREL_G0, AArch64MCExpr::VK_DTPREL_G0_NC});
}
template<int RegWidth, int Shift>
@@ -1814,7 +1800,7 @@ public:
static std::unique_ptr<AArch64Operand>
CreateToken(StringRef Str, bool IsSuffix, SMLoc S, MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_Token, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_Token, Ctx);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->Tok.IsSuffix = IsSuffix;
@@ -1829,7 +1815,7 @@ public:
AArch64_AM::ShiftExtendType ExtTy = AArch64_AM::LSL,
unsigned ShiftAmount = 0,
unsigned HasExplicitAmount = false) {
- auto Op = make_unique<AArch64Operand>(k_Register, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_Register, Ctx);
Op->Reg.RegNum = RegNum;
Op->Reg.Kind = Kind;
Op->Reg.ElementWidth = 0;
@@ -1861,7 +1847,7 @@ public:
CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements,
unsigned ElementWidth, RegKind RegisterKind, SMLoc S, SMLoc E,
MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_VectorList, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_VectorList, Ctx);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.NumElements = NumElements;
@@ -1874,7 +1860,7 @@ public:
static std::unique_ptr<AArch64Operand>
CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_VectorIndex, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_VectorIndex, Ctx);
Op->VectorIndex.Val = Idx;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -1883,7 +1869,7 @@ public:
static std::unique_ptr<AArch64Operand> CreateImm(const MCExpr *Val, SMLoc S,
SMLoc E, MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_Immediate, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_Immediate, Ctx);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -1894,7 +1880,7 @@ public:
unsigned ShiftAmount,
SMLoc S, SMLoc E,
MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_ShiftedImm, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_ShiftedImm, Ctx);
Op->ShiftedImm .Val = Val;
Op->ShiftedImm.ShiftAmount = ShiftAmount;
Op->StartLoc = S;
@@ -1904,7 +1890,7 @@ public:
static std::unique_ptr<AArch64Operand>
CreateCondCode(AArch64CC::CondCode Code, SMLoc S, SMLoc E, MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_CondCode, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_CondCode, Ctx);
Op->CondCode.Code = Code;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -1913,7 +1899,7 @@ public:
static std::unique_ptr<AArch64Operand>
CreateFPImm(APFloat Val, bool IsExact, SMLoc S, MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_FPImm, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_FPImm, Ctx);
Op->FPImm.Val = Val.bitcastToAPInt().getSExtValue();
Op->FPImm.IsExact = IsExact;
Op->StartLoc = S;
@@ -1925,7 +1911,7 @@ public:
StringRef Str,
SMLoc S,
MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_Barrier, Ctx);
Op->Barrier.Val = Val;
Op->Barrier.Data = Str.data();
Op->Barrier.Length = Str.size();
@@ -1939,7 +1925,7 @@ public:
uint32_t MSRReg,
uint32_t PStateField,
MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_SysReg, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_SysReg, Ctx);
Op->SysReg.Data = Str.data();
Op->SysReg.Length = Str.size();
Op->SysReg.MRSReg = MRSReg;
@@ -1952,7 +1938,7 @@ public:
static std::unique_ptr<AArch64Operand> CreateSysCR(unsigned Val, SMLoc S,
SMLoc E, MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_SysCR, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_SysCR, Ctx);
Op->SysCRImm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -1963,7 +1949,7 @@ public:
StringRef Str,
SMLoc S,
MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_Prefetch, Ctx);
Op->Prefetch.Val = Val;
Op->Barrier.Data = Str.data();
Op->Barrier.Length = Str.size();
@@ -1976,7 +1962,7 @@ public:
StringRef Str,
SMLoc S,
MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_PSBHint, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_PSBHint, Ctx);
Op->PSBHint.Val = Val;
Op->PSBHint.Data = Str.data();
Op->PSBHint.Length = Str.size();
@@ -1989,7 +1975,7 @@ public:
StringRef Str,
SMLoc S,
MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_BTIHint, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_BTIHint, Ctx);
Op->BTIHint.Val = Val << 1 | 32;
Op->BTIHint.Data = Str.data();
Op->BTIHint.Length = Str.size();
@@ -2001,7 +1987,7 @@ public:
static std::unique_ptr<AArch64Operand>
CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
- auto Op = make_unique<AArch64Operand>(k_ShiftExtend, Ctx);
+ auto Op = std::make_unique<AArch64Operand>(k_ShiftExtend, Ctx);
Op->ShiftExtend.Type = ShOp;
Op->ShiftExtend.Amount = Val;
Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount;
@@ -2840,7 +2826,7 @@ static const struct Extension {
{"sve2-aes", {AArch64::FeatureSVE2AES}},
{"sve2-sm4", {AArch64::FeatureSVE2SM4}},
{"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
- {"bitperm", {AArch64::FeatureSVE2BitPerm}},
+ {"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}},
// FIXME: Unsupported extensions
{"pan", {}},
{"lor", {}},
@@ -3260,6 +3246,13 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
.Case("abs_g0", AArch64MCExpr::VK_ABS_G0)
.Case("abs_g0_s", AArch64MCExpr::VK_ABS_G0_S)
.Case("abs_g0_nc", AArch64MCExpr::VK_ABS_G0_NC)
+ .Case("prel_g3", AArch64MCExpr::VK_PREL_G3)
+ .Case("prel_g2", AArch64MCExpr::VK_PREL_G2)
+ .Case("prel_g2_nc", AArch64MCExpr::VK_PREL_G2_NC)
+ .Case("prel_g1", AArch64MCExpr::VK_PREL_G1)
+ .Case("prel_g1_nc", AArch64MCExpr::VK_PREL_G1_NC)
+ .Case("prel_g0", AArch64MCExpr::VK_PREL_G0)
+ .Case("prel_g0_nc", AArch64MCExpr::VK_PREL_G0_NC)
.Case("dtprel_g2", AArch64MCExpr::VK_DTPREL_G2)
.Case("dtprel_g1", AArch64MCExpr::VK_DTPREL_G1)
.Case("dtprel_g1_nc", AArch64MCExpr::VK_DTPREL_G1_NC)
@@ -5283,7 +5276,7 @@ bool AArch64AsmParser::parseDirectiveInst(SMLoc Loc) {
auto parseOp = [&]() -> bool {
SMLoc L = getLoc();
- const MCExpr *Expr;
+ const MCExpr *Expr = nullptr;
if (check(getParser().parseExpression(Expr), L, "expected expression"))
return true;
const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
@@ -5542,43 +5535,43 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
switch (Kind) {
default:
return Match_InvalidOperand;
- case MCK__35_0:
+ case MCK__HASH_0:
ExpectedVal = 0;
break;
- case MCK__35_1:
+ case MCK__HASH_1:
ExpectedVal = 1;
break;
- case MCK__35_12:
+ case MCK__HASH_12:
ExpectedVal = 12;
break;
- case MCK__35_16:
+ case MCK__HASH_16:
ExpectedVal = 16;
break;
- case MCK__35_2:
+ case MCK__HASH_2:
ExpectedVal = 2;
break;
- case MCK__35_24:
+ case MCK__HASH_24:
ExpectedVal = 24;
break;
- case MCK__35_3:
+ case MCK__HASH_3:
ExpectedVal = 3;
break;
- case MCK__35_32:
+ case MCK__HASH_32:
ExpectedVal = 32;
break;
- case MCK__35_4:
+ case MCK__HASH_4:
ExpectedVal = 4;
break;
- case MCK__35_48:
+ case MCK__HASH_48:
ExpectedVal = 48;
break;
- case MCK__35_6:
+ case MCK__HASH_6:
ExpectedVal = 6;
break;
- case MCK__35_64:
+ case MCK__HASH_64:
ExpectedVal = 64;
break;
- case MCK__35_8:
+ case MCK__HASH_8:
ExpectedVal = 8;
break;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 6418211a4f55..21ce5785ea5e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -153,9 +153,8 @@ static unsigned AdrImmBits(unsigned Value) {
static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
uint64_t Value, MCContext &Ctx,
const Triple &TheTriple, bool IsResolved) {
- unsigned Kind = Fixup.getKind();
int64_t SignedValue = static_cast<int64_t>(Value);
- switch (Kind) {
+ switch (Fixup.getTargetKind()) {
default:
llvm_unreachable("Unknown fixup kind!");
case AArch64::fixup_aarch64_pcrel_adr_imm21:
@@ -574,7 +573,7 @@ public:
case MCCFIInstruction::OpDefCfa: {
// Defines a frame pointer.
unsigned XReg =
- getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true));
+ getXRegFromWReg(*MRI.getLLVMRegNum(Inst.getRegister(), true));
// Other CFA registers than FP are not supported by compact unwind.
// Fallback on DWARF.
@@ -593,8 +592,8 @@ public:
assert(FPPush.getOperation() == MCCFIInstruction::OpOffset &&
"Frame pointer not pushed!");
- unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true);
- unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true);
+ unsigned LRReg = *MRI.getLLVMRegNum(LRPush.getRegister(), true);
+ unsigned FPReg = *MRI.getLLVMRegNum(FPPush.getRegister(), true);
LRReg = getXRegFromWReg(LRReg);
FPReg = getXRegFromWReg(FPReg);
@@ -615,14 +614,14 @@ public:
case MCCFIInstruction::OpOffset: {
// Registers are saved in pairs. We expect there to be two consecutive
// `.cfi_offset' instructions with the appropriate registers specified.
- unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ unsigned Reg1 = *MRI.getLLVMRegNum(Inst.getRegister(), true);
if (i + 1 == e)
return CU::UNWIND_ARM64_MODE_DWARF;
const MCCFIInstruction &Inst2 = Instrs[++i];
if (Inst2.getOperation() != MCCFIInstruction::OpOffset)
return CU::UNWIND_ARM64_MODE_DWARF;
- unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true);
+ unsigned Reg2 = *MRI.getLLVMRegNum(Inst2.getRegister(), true);
// N.B. The encodings must be in register number order, and the X
// registers before the D registers.
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index c871e2c62eac..0fd1ca187be7 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -57,7 +57,7 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32)
static bool isNonILP32reloc(const MCFixup &Fixup,
AArch64MCExpr::VariantKind RefKind,
MCContext &Ctx) {
- if ((unsigned)Fixup.getKind() != AArch64::fixup_aarch64_movw)
+ if (Fixup.getTargetKind() != AArch64::fixup_aarch64_movw)
return false;
switch (RefKind) {
case AArch64MCExpr::VK_ABS_G3:
@@ -120,7 +120,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
"Should only be expression-level modifiers here");
if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
case FK_Data_1:
Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
return ELF::R_AARCH64_NONE;
@@ -184,7 +184,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
} else {
if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx))
return ELF::R_AARCH64_NONE;
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
case FK_NONE:
return ELF::R_AARCH64_NONE;
case FK_Data_1:
@@ -394,6 +394,20 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return R_CLS(MOVW_SABS_G0);
if (RefKind == AArch64MCExpr::VK_ABS_G0_NC)
return R_CLS(MOVW_UABS_G0_NC);
+ if (RefKind == AArch64MCExpr::VK_PREL_G3)
+ return ELF::R_AARCH64_MOVW_PREL_G3;
+ if (RefKind == AArch64MCExpr::VK_PREL_G2)
+ return ELF::R_AARCH64_MOVW_PREL_G2;
+ if (RefKind == AArch64MCExpr::VK_PREL_G2_NC)
+ return ELF::R_AARCH64_MOVW_PREL_G2_NC;
+ if (RefKind == AArch64MCExpr::VK_PREL_G1)
+ return R_CLS(MOVW_PREL_G1);
+ if (RefKind == AArch64MCExpr::VK_PREL_G1_NC)
+ return ELF::R_AARCH64_MOVW_PREL_G1_NC;
+ if (RefKind == AArch64MCExpr::VK_PREL_G0)
+ return R_CLS(MOVW_PREL_G0);
+ if (RefKind == AArch64MCExpr::VK_PREL_G0_NC)
+ return R_CLS(MOVW_PREL_G0_NC);
if (RefKind == AArch64MCExpr::VK_DTPREL_G2)
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
if (RefKind == AArch64MCExpr::VK_DTPREL_G1)
@@ -434,5 +448,5 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
std::unique_ptr<MCObjectTargetWriter>
llvm::createAArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32) {
- return llvm::make_unique<AArch64ELFObjectWriter>(OSABI, IsILP32);
+ return std::make_unique<AArch64ELFObjectWriter>(OSABI, IsILP32);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index d0a544273b8b..1a16468484ad 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -172,7 +172,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
int ImmS = MI->getOperand(4).getImm();
if ((Op2.getReg() == AArch64::WZR || Op2.getReg() == AArch64::XZR) &&
- (ImmR == 0 || ImmS < ImmR)) {
+ (ImmR == 0 || ImmS < ImmR) &&
+ STI.getFeatureBits()[AArch64::HasV8_2aOps]) {
// BFC takes precedence over its entire range, sligtly differently to BFI.
int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
int LSB = (BitWidth - ImmR) % BitWidth;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index ecff1ab0a8b3..5926a4f81616 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -30,7 +30,7 @@ static cl::opt<AsmWriterVariantTy> AsmWriterVariant(
cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"),
clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly")));
-AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
+AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin(bool IsILP32) {
// We prefer NEON instructions to be printed in the short, Apple-specific
// form when targeting Darwin.
AssemblerDialect = AsmWriterVariant == Default ? Apple : AsmWriterVariant;
@@ -39,7 +39,8 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
PrivateLabelPrefix = "L";
SeparatorString = "%%";
CommentString = ";";
- CodePointerSize = CalleeSaveStackSlotSize = 8;
+ CalleeSaveStackSlotSize = 8;
+ CodePointerSize = IsILP32 ? 4 : 8;
AlignmentIsInBytes = false;
UsesELFSectionDirectiveForBSS = true;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 36ae92afc8c1..7274ae79f74a 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -23,7 +23,7 @@ class Target;
class Triple;
struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin {
- explicit AArch64MCAsmInfoDarwin();
+ explicit AArch64MCAsmInfoDarwin(bool IsILP32);
const MCExpr *
getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
MCStreamer &Streamer) const override;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 0a529321edc8..548e399e05a3 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -42,6 +42,13 @@ StringRef AArch64MCExpr::getVariantKindName() const {
case VK_ABS_G0: return ":abs_g0:";
case VK_ABS_G0_S: return ":abs_g0_s:";
case VK_ABS_G0_NC: return ":abs_g0_nc:";
+ case VK_PREL_G3: return ":prel_g3:";
+ case VK_PREL_G2: return ":prel_g2:";
+ case VK_PREL_G2_NC: return ":prel_g2_nc:";
+ case VK_PREL_G1: return ":prel_g1:";
+ case VK_PREL_G1_NC: return ":prel_g1_nc:";
+ case VK_PREL_G0: return ":prel_g0:";
+ case VK_PREL_G0_NC: return ":prel_g0_nc:";
case VK_DTPREL_G2: return ":dtprel_g2:";
case VK_DTPREL_G1: return ":dtprel_g1:";
case VK_DTPREL_G1_NC: return ":dtprel_g1_nc:";
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index ec9c95911628..a82ff2e91426 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -27,12 +27,13 @@ public:
// symbol. E.g. direct, via the GOT, ...
VK_ABS = 0x001,
VK_SABS = 0x002,
- VK_GOT = 0x003,
- VK_DTPREL = 0x004,
- VK_GOTTPREL = 0x005,
- VK_TPREL = 0x006,
- VK_TLSDESC = 0x007,
- VK_SECREL = 0x008,
+ VK_PREL = 0x003,
+ VK_GOT = 0x004,
+ VK_DTPREL = 0x005,
+ VK_GOTTPREL = 0x006,
+ VK_TPREL = 0x007,
+ VK_TLSDESC = 0x008,
+ VK_SECREL = 0x009,
VK_SymLocBits = 0x00f,
// Variants specifying which part of the final address calculation is
@@ -72,6 +73,13 @@ public:
VK_ABS_G0_S = VK_SABS | VK_G0,
VK_ABS_G0_NC = VK_ABS | VK_G0 | VK_NC,
VK_LO12 = VK_ABS | VK_PAGEOFF | VK_NC,
+ VK_PREL_G3 = VK_PREL | VK_G3,
+ VK_PREL_G2 = VK_PREL | VK_G2,
+ VK_PREL_G2_NC = VK_PREL | VK_G2 | VK_NC,
+ VK_PREL_G1 = VK_PREL | VK_G1,
+ VK_PREL_G1_NC = VK_PREL | VK_G1 | VK_NC,
+ VK_PREL_G0 = VK_PREL | VK_G0,
+ VK_PREL_G0_NC = VK_PREL | VK_G0 | VK_NC,
VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC,
VK_GOT_PAGE = VK_GOT | VK_PAGE,
VK_DTPREL_G2 = VK_DTPREL | VK_G2,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index df12274d9470..1d583ec0087b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -241,7 +241,7 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TheTriple) {
MCAsmInfo *MAI;
if (TheTriple.isOSBinFormatMachO())
- MAI = new AArch64MCAsmInfoDarwin();
+ MAI = new AArch64MCAsmInfoDarwin(TheTriple.getArch() == Triple::aarch64_32);
else if (TheTriple.isWindowsMSVCEnvironment())
MAI = new AArch64MCAsmInfoMicrosoftCOFF();
else if (TheTriple.isOSBinFormatCOFF())
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index b3ce5ef22eef..fc04d37eb362 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -54,7 +54,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED);
Log2Size = ~0U;
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default:
return false;
@@ -406,6 +406,6 @@ void AArch64MachObjectWriter::recordRelocation(
std::unique_ptr<MCObjectTargetWriter>
llvm::createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype,
bool IsILP32) {
- return llvm::make_unique<AArch64MachObjectWriter>(CPUType, CPUSubtype,
+ return std::make_unique<AArch64MachObjectWriter>(CPUType, CPUSubtype,
IsILP32);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
index a45880a07427..aa50bd05cb71 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
@@ -120,7 +120,7 @@ bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
namespace llvm {
std::unique_ptr<MCObjectTargetWriter> createAArch64WinCOFFObjectWriter() {
- return llvm::make_unique<AArch64WinCOFFObjectWriter>();
+ return std::make_unique<AArch64WinCOFFObjectWriter>();
}
} // end namespace llvm
diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td
index 808e59467081..8ccf6aa675ba 100644
--- a/lib/Target/AArch64/SVEInstrFormats.td
+++ b/lib/Target/AArch64/SVEInstrFormats.td
@@ -279,6 +279,19 @@ let Predicates = [HasSVE] in {
defm PTRUES : sve_int_ptrue<0b001, "ptrues">;
}
+//===----------------------------------------------------------------------===//
+// SVE pattern match helpers.
+//===----------------------------------------------------------------------===//
+
+class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ Instruction inst>
+: Pat<(vtd (op vt1:$Op1)),
+ (inst $Op1)>;
+
+class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Instruction inst>
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
+ (inst $Op1, $Op2, $Op3)>;
//===----------------------------------------------------------------------===//
// SVE Predicate Misc Group
@@ -403,12 +416,12 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm> {
}
class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
- ZPRRegOp zprty>
-: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg),
- asm, "\t$Zdn, $Pg",
+ ZPRRegOp zprty, PPRRegOp pprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, pprty:$Pm),
+ asm, "\t$Zdn, $Pm",
"",
[]>, Sched<[]> {
- bits<4> Pg;
+ bits<4> Pm;
bits<5> Zdn;
let Inst{31-24} = 0b00100101;
let Inst{23-22} = sz8_64;
@@ -416,7 +429,7 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
let Inst{18-16} = opc{4-2};
let Inst{15-11} = 0b10000;
let Inst{10-9} = opc{1-0};
- let Inst{8-5} = Pg;
+ let Inst{8-5} = Pm;
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
@@ -425,9 +438,16 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
}
multiclass sve_int_count_v<bits<5> opc, string asm> {
- def _H : sve_int_count_v<0b01, opc, asm, ZPR16>;
- def _S : sve_int_count_v<0b10, opc, asm, ZPR32>;
- def _D : sve_int_count_v<0b11, opc, asm, ZPR64>;
+ def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>;
+ def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>;
+ def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>;
+
+ def : InstAlias<asm # "\t$Zdn, $Pm",
+ (!cast<Instruction>(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>;
+ def : InstAlias<asm # "\t$Zdn, $Pm",
+ (!cast<Instruction>(NAME # "_S") ZPR32:$Zdn, PPRAny:$Pm), 0>;
+ def : InstAlias<asm # "\t$Zdn, $Pm",
+ (!cast<Instruction>(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>;
}
class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
@@ -609,11 +629,12 @@ multiclass sve_int_pred_pattern_b_x64<bits<5> opc, string asm> {
//===----------------------------------------------------------------------===//
class sve_int_perm_dup_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
- RegisterClass srcRegType>
+ ValueType vt, RegisterClass srcRegType,
+ SDPatternOperator op>
: I<(outs zprty:$Zd), (ins srcRegType:$Rn),
asm, "\t$Zd, $Rn",
"",
- []>, Sched<[]> {
+ [(set (vt zprty:$Zd), (op srcRegType:$Rn))]>, Sched<[]> {
bits<5> Rn;
bits<5> Zd;
let Inst{31-24} = 0b00000101;
@@ -623,11 +644,11 @@ class sve_int_perm_dup_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_perm_dup_r<string asm> {
- def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, GPR32sp>;
- def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, GPR32sp>;
- def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, GPR32sp>;
- def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, GPR64sp>;
+multiclass sve_int_perm_dup_r<string asm, SDPatternOperator op> {
+ def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, nxv16i8, GPR32sp, op>;
+ def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, nxv8i16, GPR32sp, op>;
+ def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, nxv4i32, GPR32sp, op>;
+ def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, nxv2i64, GPR64sp, op>;
def : InstAlias<"mov $Zd, $Rn",
(!cast<Instruction>(NAME # _B) ZPR8:$Zd, GPR32sp:$Rn), 1>;
@@ -744,7 +765,7 @@ multiclass sve2_int_perm_tbl<string asm> {
}
class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
-: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, zprty:$Zm),
asm, "\t$Zd, $Zn, $Zm",
"",
[]>, Sched<[]> {
@@ -758,6 +779,8 @@ class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{15-10} = 0b001011;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
}
multiclass sve2_int_perm_tbx<string asm> {
@@ -826,10 +849,14 @@ class sve_int_perm_unpk<bits<2> sz16_64, bits<2> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve_int_perm_unpk<bits<2> opc, string asm> {
+multiclass sve_int_perm_unpk<bits<2> opc, string asm, SDPatternOperator op> {
def _H : sve_int_perm_unpk<0b01, opc, asm, ZPR16, ZPR8>;
def _S : sve_int_perm_unpk<0b10, opc, asm, ZPR32, ZPR16>;
def _D : sve_int_perm_unpk<0b11, opc, asm, ZPR64, ZPR32>;
+
+ def : SVE_1_Op_Pat<nxv8i16, op, nxv16i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv4i32, op, nxv8i16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Pat<nxv2i64, op, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -1197,10 +1224,12 @@ multiclass sve_fp_ftmad<string asm> {
//===----------------------------------------------------------------------===//
class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm,
- ZPRRegOp zprty>
+ ZPRRegOp zprty,
+ ValueType vt, ValueType vt2, SDPatternOperator op>
: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
asm, "\t$Zd, $Zn, $Zm",
- "", []>, Sched<[]> {
+ "",
+ [(set (vt zprty:$Zd), (op (vt zprty:$Zn), (vt2 zprty:$Zm)))]>, Sched<[]> {
bits<5> Zd;
bits<5> Zm;
bits<5> Zn;
@@ -1214,10 +1243,10 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve_fp_3op_u_zd<bits<3> opc, string asm> {
- def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
- def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
- def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
+multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
+ def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16, nxv8f16, nxv8f16, op>;
+ def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32, nxv4f32, nxv4f32, op>;
+ def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64, nxv2f64, nxv2f64, op>;
}
//===----------------------------------------------------------------------===//
@@ -1489,7 +1518,7 @@ multiclass sve_fp_fcadd<string asm> {
class sve2_fp_convert_precision<bits<4> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
-: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
+: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
asm, "\t$Zd, $Pg/m, $Zn",
"",
[]>, Sched<[]> {
@@ -1504,6 +1533,8 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
}
multiclass sve2_fp_convert_down_narrow<string asm> {
@@ -1998,12 +2029,14 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = Destructive;
- let ElementSize = zprty1.ElementSize;
}
-multiclass sve_intx_dot<bit opc, string asm> {
+multiclass sve_intx_dot<bit opc, string asm, SDPatternOperator op> {
def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>;
def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>;
+
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -2028,22 +2061,27 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = Destructive;
- let ElementSize = ElementSizeNone;
}
-multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
- def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
+ SDPatternOperator op> {
+ def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
bits<2> iop;
bits<3> Zm;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+ def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
bits<1> iop;
bits<4> Zm;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
+
+ def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv16i8:$Op2, nxv16i8:$Op3, (i32 VectorIndexS32b:$idx))),
+ (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>;
+ def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv8i16:$Op2, nxv8i16:$Op3, (i32 VectorIndexD32b:$idx))),
+ (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>;
}
//===----------------------------------------------------------------------===//
@@ -2399,21 +2437,40 @@ multiclass sve2_misc_bitwise<bits<4> opc, string asm> {
def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>;
}
-multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
- let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in {
- def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8, ZPR8>;
- def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>;
- def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>;
- def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>;
- }
-}
-
multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm> {
def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
}
+class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm),
+ asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15-11} = 0b10010;
+ let Inst{10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
+ def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>;
+ def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>;
+ def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>;
+ def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>;
+}
+
class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2,
Operand immtype>
@@ -2451,9 +2508,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> {
// SVE2 Accumulate Group
//===----------------------------------------------------------------------===//
-class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
- ZPRRegOp zprty, Operand immtype>
-: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
+class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm,
+ ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, immtype:$imm),
asm, "\t$Zd, $Zn, $imm",
"", []>, Sched<[]> {
bits<5> Zd;
@@ -2468,38 +2525,40 @@ class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
let Inst{10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_bin_cons_shift_imm_left<bit opc, string asm> {
- def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
- def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> {
+ def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+ def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{19} = imm{3};
}
- def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+ def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
let Inst{20-19} = imm{4-3};
}
- def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+ def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
}
-multiclass sve2_int_bin_cons_shift_imm_right<bit opc, string asm> {
- def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
- def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> {
+ def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+ def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
}
- def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+ def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+ def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
}
-class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
- ZPRRegOp zprty, Operand immtype>
+class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm),
asm, "\t$Zda, $Zn, $imm",
"", []>, Sched<[]> {
@@ -2521,15 +2580,15 @@ class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_bin_accum_cons_shift_imm_right<bits<2> opc, string asm> {
- def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
- def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> {
+ def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+ def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
}
- def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+ def _S : sve2_int_bin_accum_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+ def _D : sve2_int_bin_accum_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
@@ -2607,9 +2666,9 @@ multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
// SVE2 Narrowing Group
//===----------------------------------------------------------------------===//
-class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
- string asm, ZPRRegOp zprty1,
- ZPRRegOp zprty2, Operand immtype>
+class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
+ string asm, ZPRRegOp zprty1,
+ ZPRRegOp zprty2, Operand immtype>
: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
asm, "\t$Zd, $Zn, $imm",
"", []>, Sched<[]> {
@@ -2622,26 +2681,63 @@ class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
let Inst{20-19} = tsz8_64{1-0};
let Inst{18-16} = imm{2-0}; // imm3
let Inst{15-14} = 0b00;
- let Inst{13-10} = opc;
+ let Inst{13-11} = opc;
+ let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
}
-multiclass sve2_int_bin_cons_shift_imm_right_narrow<bits<4> opc, string asm> {
- def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16,
- vecshiftR8>;
- def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32,
- vecshiftR16> {
+multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm> {
+ def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
+ vecshiftR8>;
+ def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
+ vecshiftR16> {
let Inst{19} = imm{3};
}
- def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64,
- vecshiftR32> {
+ def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
+ vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
}
-class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
- ZPRRegOp zprty1, ZPRRegOp zprty2>
+class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
+ string asm, ZPRRegOp zprty1,
+ ZPRRegOp zprty2, Operand immtype>
+: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, immtype:$imm),
+ asm, "\t$Zd, $Zn, $imm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> imm;
+ let Inst{31-23} = 0b010001010;
+ let Inst{22} = tsz8_64{2};
+ let Inst{21} = 0b1;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-16} = imm{2-0}; // imm3
+ let Inst{15-14} = 0b00;
+ let Inst{13-11} = opc;
+ let Inst{10} = 0b1;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+}
+
+multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm> {
+ def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
+ vecshiftR8>;
+ def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
+ vecshiftR16> {
+ let Inst{19} = imm{3};
+ }
+ def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
+ vecshiftR32> {
+ let Inst{20-19} = imm{4-3};
+ }
+}
+
+class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zd;
@@ -2652,19 +2748,46 @@ class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
let Inst{21} = 0b1;
let Inst{20-16} = Zm;
let Inst{15-13} = 0b011;
- let Inst{12-10} = opc; // S, R, T
+ let Inst{12-11} = opc; // S, R
+ let Inst{10} = 0b0; // Top
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
}
-multiclass sve2_int_addsub_narrow_high<bits<3> opc, string asm> {
- def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>;
- def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>;
- def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>;
+multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm> {
+ def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>;
+ def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>;
+ def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>;
+}
+
+class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm),
+ asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b011;
+ let Inst{12-11} = opc; // S, R
+ let Inst{10} = 0b1; // Top
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+}
+
+multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm> {
+ def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>;
+ def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>;
+ def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>;
}
-class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
- ZPRRegOp zprty1, ZPRRegOp zprty2>
+class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zd), (ins zprty2:$Zn),
asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<5> Zd;
@@ -2674,15 +2797,41 @@ class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
let Inst{21} = 0b1;
let Inst{20-19} = tsz8_64{1-0};
let Inst{18-13} = 0b000010;
- let Inst{12-10} = opc;
+ let Inst{12-11} = opc;
+ let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
}
-multiclass sve2_int_sat_extract_narrow<bits<3> opc, string asm> {
- def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>;
- def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>;
- def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>;
+multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm> {
+ def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>;
+ def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>;
+ def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>;
+}
+
+class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn),
+ asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-23} = 0b010001010;
+ let Inst{22} = tsz8_64{2};
+ let Inst{21} = 0b1;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-13} = 0b000010;
+ let Inst{12-11} = opc;
+ let Inst{10} = 0b1;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+}
+
+multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm> {
+ def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>;
+ def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>;
+ def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>;
}
//===----------------------------------------------------------------------===//
@@ -2713,11 +2862,17 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>;
def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm> {
@@ -2735,11 +2890,21 @@ multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm> {
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
}
-multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>;
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm> {
@@ -3886,9 +4051,9 @@ multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty,
(!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
}
-class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
- RegisterOperand VecList>
-: I<(outs VecList:$Zt), iops,
+class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
+ RegisterOperand listty, ZPRRegOp zprty>
+: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
asm, "\t$Zt, $Pg, [$Zn, $Rm]",
"",
[]>, Sched<[]> {
@@ -3908,17 +4073,14 @@ class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
let mayStore = 1;
}
-multiclass sve2_mem_cstnt_vs<bits<3> opc, string asm,
+multiclass sve2_mem_sstnt_vs<bits<3> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {
- def _REAL : sve2_mem_cstnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
- asm, listty>;
+ def _REAL : sve2_mem_sstnt_vs_base<opc, asm, listty, zprty>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
- def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
}
@@ -4147,6 +4309,14 @@ class sve_int_perm_punpk<bit opc, string asm>
let Inst{3-0} = Pd;
}
+multiclass sve_int_perm_punpk<bit opc, string asm, SDPatternOperator op> {
+ def NAME : sve_int_perm_punpk<opc, asm>;
+
+ def : SVE_1_Op_Pat<nxv8i1, op, nxv16i1, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Pat<nxv4i1, op, nxv8i1, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Pat<nxv2i1, op, nxv4i1, !cast<Instruction>(NAME)>;
+}
+
class sve_int_rdffr_pred<bit s, string asm>
: I<(outs PPR8:$Pd), (ins PPRAny:$Pg),
asm, "\t$Pd, $Pg/z",
@@ -5094,7 +5264,7 @@ multiclass sve_mem_p_fill<string asm> {
(!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
}
-class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
+class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
RegisterOperand VecList>
: I<(outs VecList:$Zt), iops,
asm, "\t$Zt, $Pg/z, [$Zn, $Rm]",
@@ -5119,17 +5289,15 @@ class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
let mayLoad = 1;
}
-multiclass sve2_mem_cldnt_vs<bits<5> opc, string asm,
+multiclass sve2_mem_gldnt_vs<bits<5> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {
- def _REAL : sve2_mem_cldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
+ def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
asm, listty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
- def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 7bb075c36e79..c27fc7a112ec 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -125,7 +125,7 @@ namespace llvm {
uint32_t AArch64SysReg::parseGenericRegister(StringRef Name) {
// Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name
- Regex GenericRegPattern("^S([0-3])_([0-7])_C([0-9]|1[0-5])_C([0-9]|1[0-5])_([0-7])$");
+ static const Regex GenericRegPattern("^S([0-3])_([0-7])_C([0-9]|1[0-5])_C([0-9]|1[0-5])_([0-7])$");
std::string UpperName = Name.upper();
SmallVector<StringRef, 5> Ops;
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index e5e2fc2cb0df..7a4fcac09ec4 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -313,9 +313,9 @@ struct SysAlias {
uint16_t Encoding;
FeatureBitset FeaturesRequired;
- SysAlias (const char *N, uint16_t E) : Name(N), Encoding(E) {};
- SysAlias (const char *N, uint16_t E, FeatureBitset F) :
- Name(N), Encoding(E), FeaturesRequired(F) {};
+ constexpr SysAlias(const char *N, uint16_t E) : Name(N), Encoding(E) {}
+ constexpr SysAlias(const char *N, uint16_t E, FeatureBitset F)
+ : Name(N), Encoding(E), FeaturesRequired(F) {}
bool haveFeatures(FeatureBitset ActiveFeatures) const {
return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
@@ -326,9 +326,10 @@ struct SysAlias {
struct SysAliasReg : SysAlias {
bool NeedsReg;
- SysAliasReg(const char *N, uint16_t E, bool R) : SysAlias(N, E), NeedsReg(R) {};
- SysAliasReg(const char *N, uint16_t E, bool R, FeatureBitset F) : SysAlias(N, E, F),
- NeedsReg(R) {};
+ constexpr SysAliasReg(const char *N, uint16_t E, bool R)
+ : SysAlias(N, E), NeedsReg(R) {}
+ constexpr SysAliasReg(const char *N, uint16_t E, bool R, FeatureBitset F)
+ : SysAlias(N, E, F), NeedsReg(R) {}
};
namespace AArch64AT{
@@ -627,6 +628,18 @@ namespace AArch64II {
/// MO_S - Indicates that the bits of the symbol operand represented by
/// MO_G0 etc are signed.
MO_S = 0x100,
+
+ /// MO_PREL - Indicates that the bits of the symbol operand represented by
+ /// MO_G0 etc are PC relative.
+ MO_PREL = 0x200,
+
+ /// MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag
+ /// in bits 56-63.
+ /// On a FrameIndex operand, indicates that the underlying memory is tagged
+ /// with an unknown tag value (MTE); this needs to be lowered either to an
+ /// SP-relative load or store instruction (which do not check tags), or to
+ /// an LDG instruction to obtain the tag value.
+ MO_TAGGED = 0x400,
};
} // end namespace AArch64II
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 19a8bd901629..b64422ae5427 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -188,6 +188,10 @@ ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
ModulePass *createR600OpenCLImageTypeLoweringPass();
FunctionPass *createAMDGPUAnnotateUniformValues();
+ModulePass *createAMDGPUPrintfRuntimeBinding();
+void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
+extern char &AMDGPUPrintfRuntimeBindingID;
+
ModulePass* createAMDGPUUnifyMetadataPass();
void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
extern char &AMDGPUUnifyMetadataID;
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index baeba534012c..42b477e07b3b 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -10,6 +10,15 @@ include "llvm/TableGen/SearchableTable.td"
include "llvm/Target/Target.td"
include "AMDGPUFeatures.td"
+def p0 : PtrValueType<i64, 0>;
+def p1 : PtrValueType<i64, 1>;
+def p2 : PtrValueType<i32, 2>;
+def p3 : PtrValueType<i32, 3>;
+def p4 : PtrValueType<i64, 4>;
+def p5 : PtrValueType<i32, 5>;
+def p6 : PtrValueType<i32, 6>;
+
+
class BoolToList<bit Value> {
list<int> ret = !if(Value, [1]<int>, []<int>);
}
@@ -145,6 +154,12 @@ def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
"Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
>;
+def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug",
+ "HasMFMAInlineLiteralBug",
+ "true",
+ "MFMA cannot use inline literal as SrcC"
+>;
+
def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
"HasVcmpxPermlaneHazard",
"true",
@@ -802,6 +817,7 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeaturePkFmacF16Inst,
FeatureAtomicFaddInsts,
FeatureSRAMECC,
+ FeatureMFMAInlineLiteralBug,
FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_9 : FeatureSet<
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 419ebb2240ad..e72b3f4fde63 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -173,6 +173,9 @@ static StringRef intrinsicToAttrName(Intrinsic::ID ID,
case Intrinsic::amdgcn_implicitarg_ptr:
return "amdgpu-implicitarg-ptr";
case Intrinsic::amdgcn_queue_ptr:
+ case Intrinsic::amdgcn_is_shared:
+ case Intrinsic::amdgcn_is_private:
+ // TODO: Does not require queue ptr on gfx9+
case Intrinsic::trap:
case Intrinsic::debugtrap:
IsQueuePtr = true;
@@ -194,18 +197,12 @@ static bool handleAttr(Function &Parent, const Function &Callee,
static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
bool &NeedQueuePtr) {
// X ids unnecessarily propagated to kernels.
- static const StringRef AttrNames[] = {
- { "amdgpu-work-item-id-x" },
- { "amdgpu-work-item-id-y" },
- { "amdgpu-work-item-id-z" },
- { "amdgpu-work-group-id-x" },
- { "amdgpu-work-group-id-y" },
- { "amdgpu-work-group-id-z" },
- { "amdgpu-dispatch-ptr" },
- { "amdgpu-dispatch-id" },
- { "amdgpu-kernarg-segment-ptr" },
- { "amdgpu-implicitarg-ptr" }
- };
+ static constexpr StringLiteral AttrNames[] = {
+ "amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
+ "amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
+ "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
+ "amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
+ "amdgpu-kernarg-segment-ptr", "amdgpu-implicitarg-ptr"};
if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
NeedQueuePtr = true;
diff --git a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 097730441ed8..f0e7ee910f95 100644
--- a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -48,8 +48,8 @@ public:
return ArgDescriptor(Reg, Mask, false, true);
}
- static ArgDescriptor createStack(Register Reg, unsigned Mask = ~0u) {
- return ArgDescriptor(Reg, Mask, true, true);
+ static ArgDescriptor createStack(unsigned Offset, unsigned Mask = ~0u) {
+ return ArgDescriptor(Offset, Mask, true, true);
}
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask) {
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 743ac64b8f10..f2d903c8e7b1 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -229,7 +229,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
// alignment.
Streamer.EmitValueToAlignment(64, 0, 1, 0);
if (ReadOnlySection.getAlignment() < 64)
- ReadOnlySection.setAlignment(64);
+ ReadOnlySection.setAlignment(Align(64));
const MCSubtargetInfo &STI = MF->getSubtarget();
@@ -273,7 +273,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
AsmPrinter::EmitFunctionEntryLabel();
}
-void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
// Write a line for the basic block label if it is not only fallthrough.
DisasmLines.push_back(
@@ -342,6 +342,8 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) {
// Print comments that apply to both callable functions and entry points.
void AMDGPUAsmPrinter::emitCommonFunctionComments(
uint32_t NumVGPR,
+ Optional<uint32_t> NumAGPR,
+ uint32_t TotalNumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
uint64_t CodeSize,
@@ -349,6 +351,11 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments(
OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
+ if (NumAGPR) {
+ OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
+ OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
+ false);
+ }
OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
false);
@@ -417,7 +424,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// The starting address of all shader programs must be 256 bytes aligned.
// Regular functions just need the basic required instruction alignment.
- MF.setAlignment(MFI->isEntryFunction() ? 8 : 2);
+ MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));
SetupMachineFunction(MF);
@@ -474,6 +481,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
emitCommonFunctionComments(
Info.NumVGPR,
+ STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
+ Info.getTotalNumVGPRs(STM),
Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
Info.PrivateSegmentSize,
getFunctionCodeSize(MF), MFI);
@@ -481,7 +490,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
OutStreamer->emitRawComment(" Kernel info:", false);
- emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
+ emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
+ STM.hasMAIInsts()
+ ? CurrentProgramInfo.NumAccVGPR
+ : Optional<uint32_t>(),
+ CurrentProgramInfo.NumVGPR,
CurrentProgramInfo.NumSGPR,
CurrentProgramInfo.ScratchSize,
getFunctionCodeSize(MF), MFI);
@@ -507,6 +520,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
OutStreamer->emitRawComment(
+ " Occupancy: " +
+ Twine(CurrentProgramInfo.Occupancy), false);
+
+ OutStreamer->emitRawComment(
" WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
OutStreamer->emitRawComment(
@@ -588,6 +605,11 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
UsesVCC, UsesFlatScratch);
}
+int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(
+ const GCNSubtarget &ST) const {
+ return std::max(NumVGPR, NumAGPR);
+}
+
AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
const MachineFunction &MF) const {
SIFunctionResourceInfo Info;
@@ -634,11 +656,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
HighestVGPRReg = Reg;
break;
}
- MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg);
- if (MRI.isPhysRegUsed(AReg)) {
- HighestVGPRReg = AReg;
- break;
+ }
+
+ if (ST.hasMAIInsts()) {
+ MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
+ for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ HighestAGPRReg = Reg;
+ break;
+ }
}
+ Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 :
+ TRI.getHWRegIndex(HighestAGPRReg) + 1;
}
MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
@@ -660,6 +689,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
}
int32_t MaxVGPR = -1;
+ int32_t MaxAGPR = -1;
int32_t MaxSGPR = -1;
uint64_t CalleeFrameSize = 0;
@@ -669,11 +699,12 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
for (const MachineOperand &MO : MI.operands()) {
unsigned Width = 0;
bool IsSGPR = false;
+ bool IsAGPR = false;
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
switch (Reg) {
case AMDGPU::EXEC:
case AMDGPU::EXEC_LO:
@@ -744,6 +775,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 1;
} else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
@@ -755,6 +787,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 2;
} else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 2;
} else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
IsSGPR = false;
@@ -771,6 +804,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 4;
} else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 4;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
@@ -790,6 +824,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 16;
} else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 16;
} else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
IsSGPR = true;
@@ -799,6 +834,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 32;
} else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 32;
} else {
llvm_unreachable("Unknown register class");
@@ -807,6 +843,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
int MaxUsed = HWReg + Width - 1;
if (IsSGPR) {
MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
+ } else if (IsAGPR) {
+ MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
} else {
MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
}
@@ -828,6 +866,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
+ MaxAGPR = std::max(MaxAGPR, 23);
CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
Info.UsesVCC = true;
@@ -852,6 +891,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
+ MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
CalleeFrameSize
= std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
Info.UsesVCC |= I->second.UsesVCC;
@@ -868,6 +908,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Info.NumExplicitSGPR = MaxSGPR + 1;
Info.NumVGPR = MaxVGPR + 1;
+ Info.NumAGPR = MaxAGPR + 1;
Info.PrivateSegmentSize += CalleeFrameSize;
return Info;
@@ -876,8 +917,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFunction &MF) {
SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- ProgInfo.NumVGPR = Info.NumVGPR;
+ ProgInfo.NumArchVGPR = Info.NumVGPR;
+ ProgInfo.NumAccVGPR = Info.NumAGPR;
+ ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
ProgInfo.NumSGPR = Info.NumExplicitSGPR;
ProgInfo.ScratchSize = Info.PrivateSegmentSize;
ProgInfo.VCCUsed = Info.UsesVCC;
@@ -890,7 +934,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
MF.getFunction().getContext().diagnose(DiagStackSize);
}
- const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
@@ -1057,6 +1100,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
S_00B84C_EXCP_EN(0);
+
+ ProgInfo.Occupancy = STM.computeOccupancy(MF, ProgInfo.LDSSize,
+ ProgInfo.NumSGPRsForWavesPerEU,
+ ProgInfo.NumVGPRsForWavesPerEU);
}
static unsigned getRsrcReg(CallingConv::ID CallConv) {
@@ -1214,17 +1261,16 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
if (STM.isXNACKEnabled())
Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
- unsigned MaxKernArgAlign;
+ Align MaxKernArgAlign;
Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
- // These alignment values are specified in powers of two, so alignment =
- // 2^n. The minimum alignment is 2^4 = 16.
- Out.kernarg_segment_alignment = std::max<size_t>(4,
- countTrailingZeros(MaxKernArgAlign));
+ // kernarg_segment_alignment is specified as log of the alignment.
+ // The minimum alignment is 16.
+ Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index cf77034329ef..c50c19a4609c 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -43,6 +43,7 @@ private:
// Track the number of explicitly used VGPRs. Special registers reserved at
// the end are tracked separately.
int32_t NumVGPR = 0;
+ int32_t NumAGPR = 0;
int32_t NumExplicitSGPR = 0;
uint64_t PrivateSegmentSize = 0;
bool UsesVCC = false;
@@ -51,6 +52,7 @@ private:
bool HasRecursion = false;
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
+ int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
};
SIProgramInfo CurrentProgramInfo;
@@ -77,6 +79,8 @@ private:
void EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);
void emitCommonFunctionComments(uint32_t NumVGPR,
+ Optional<uint32_t> NumAGPR,
+ uint32_t TotalNumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
uint64_t CodeSize,
@@ -125,7 +129,7 @@ public:
void EmitFunctionEntryLabel() override;
- void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override;
+ void EmitBasicBlockStart(const MachineBasicBlock &MBB) override;
void EmitGlobalVariable(const GlobalVariable *GV) override;
@@ -140,8 +144,8 @@ public:
const char *ExtraCode, raw_ostream &O) override;
protected:
- mutable std::vector<std::string> DisasmLines, HexLines;
- mutable size_t DisasmLineMaxLen;
+ std::vector<std::string> DisasmLines, HexLines;
+ size_t DisasmLineMaxLen;
};
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 8a92e7d923fb..ba8343142c63 100644
--- a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
@@ -24,20 +25,10 @@
#define DEBUG_TYPE "amdgpu-atomic-optimizer"
using namespace llvm;
+using namespace llvm::AMDGPU;
namespace {
-enum DPP_CTRL {
- DPP_ROW_SR1 = 0x111,
- DPP_ROW_SR2 = 0x112,
- DPP_ROW_SR3 = 0x113,
- DPP_ROW_SR4 = 0x114,
- DPP_ROW_SR8 = 0x118,
- DPP_WF_SR1 = 0x138,
- DPP_ROW_BCAST15 = 0x142,
- DPP_ROW_BCAST31 = 0x143
-};
-
struct ReplacementInfo {
Instruction *I;
AtomicRMWInst::BinOp Op;
@@ -52,9 +43,12 @@ private:
const LegacyDivergenceAnalysis *DA;
const DataLayout *DL;
DominatorTree *DT;
- bool HasDPP;
+ const GCNSubtarget *ST;
bool IsPixelShader;
+ Value *buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *V,
+ Value *const Identity) const;
+ Value *buildShiftRight(IRBuilder<> &B, Value *V, Value *const Identity) const;
void optimizeAtomic(Instruction &I, AtomicRMWInst::BinOp Op, unsigned ValIdx,
bool ValDivergent) const;
@@ -93,8 +87,7 @@ bool AMDGPUAtomicOptimizer::runOnFunction(Function &F) {
DT = DTW ? &DTW->getDomTree() : nullptr;
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const TargetMachine &TM = TPC.getTM<TargetMachine>();
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
- HasDPP = ST.hasDPP();
+ ST = &TM.getSubtarget<GCNSubtarget>(F);
IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS;
visit(F);
@@ -142,17 +135,18 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
// If the pointer operand is divergent, then each lane is doing an atomic
// operation on a different address, and we cannot optimize that.
- if (DA->isDivergent(I.getOperand(PtrIdx))) {
+ if (DA->isDivergentUse(&I.getOperandUse(PtrIdx))) {
return;
}
- const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
+ const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));
// If the value operand is divergent, each lane is contributing a different
// value to the atomic calculation. We can only optimize divergent values if
// we have DPP available on our subtarget, and the atomic operation is 32
// bits.
- if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) {
+ if (ValDivergent &&
+ (!ST->hasDPP() || DL->getTypeSizeInBits(I.getType()) != 32)) {
return;
}
@@ -219,20 +213,21 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
const unsigned ValIdx = 0;
- const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
+ const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));
// If the value operand is divergent, each lane is contributing a different
// value to the atomic calculation. We can only optimize divergent values if
// we have DPP available on our subtarget, and the atomic operation is 32
// bits.
- if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) {
+ if (ValDivergent &&
+ (!ST->hasDPP() || DL->getTypeSizeInBits(I.getType()) != 32)) {
return;
}
// If any of the other arguments to the intrinsic are divergent, we can't
// optimize the operation.
for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) {
- if (DA->isDivergent(I.getOperand(Idx))) {
+ if (DA->isDivergentUse(&I.getOperandUse(Idx))) {
return;
}
}
@@ -282,6 +277,111 @@ static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
return B.CreateSelect(Cond, LHS, RHS);
}
+// Use the builder to create an inclusive scan of V across the wavefront, with
+// all lanes active.
+Value *AMDGPUAtomicOptimizer::buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
+ Value *V, Value *const Identity) const {
+ Type *const Ty = V->getType();
+ Module *M = B.GetInsertBlock()->getModule();
+ Function *UpdateDPP =
+ Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, Ty);
+ Function *PermLaneX16 =
+ Intrinsic::getDeclaration(M, Intrinsic::amdgcn_permlanex16, {});
+ Function *ReadLane =
+ Intrinsic::getDeclaration(M, Intrinsic::amdgcn_readlane, {});
+
+ for (unsigned Idx = 0; Idx < 4; Idx++) {
+ V = buildNonAtomicBinOp(
+ B, Op, V,
+ B.CreateCall(UpdateDPP,
+ {Identity, V, B.getInt32(DPP::ROW_SHR0 | 1 << Idx),
+ B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}));
+ }
+ if (ST->hasDPPBroadcasts()) {
+ // GFX9 has DPP row broadcast operations.
+ V = buildNonAtomicBinOp(
+ B, Op, V,
+ B.CreateCall(UpdateDPP,
+ {Identity, V, B.getInt32(DPP::BCAST15), B.getInt32(0xa),
+ B.getInt32(0xf), B.getFalse()}));
+ V = buildNonAtomicBinOp(
+ B, Op, V,
+ B.CreateCall(UpdateDPP,
+ {Identity, V, B.getInt32(DPP::BCAST31), B.getInt32(0xc),
+ B.getInt32(0xf), B.getFalse()}));
+ } else {
+ // On GFX10 all DPP operations are confined to a single row. To get cross-
+ // row operations we have to use permlane or readlane.
+
+ // Combine lane 15 into lanes 16..31 (and, for wave 64, lane 47 into lanes
+ // 48..63).
+ Value *const PermX =
+ B.CreateCall(PermLaneX16, {V, V, B.getInt32(-1), B.getInt32(-1),
+ B.getFalse(), B.getFalse()});
+ V = buildNonAtomicBinOp(
+ B, Op, V,
+ B.CreateCall(UpdateDPP,
+ {Identity, PermX, B.getInt32(DPP::QUAD_PERM_ID),
+ B.getInt32(0xa), B.getInt32(0xf), B.getFalse()}));
+ if (!ST->isWave32()) {
+ // Combine lane 31 into lanes 32..63.
+ Value *const Lane31 = B.CreateCall(ReadLane, {V, B.getInt32(31)});
+ V = buildNonAtomicBinOp(
+ B, Op, V,
+ B.CreateCall(UpdateDPP,
+ {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID),
+ B.getInt32(0xc), B.getInt32(0xf), B.getFalse()}));
+ }
+ }
+ return V;
+}
+
+// Use the builder to create a shift right of V across the wavefront, with all
+// lanes active, to turn an inclusive scan into an exclusive scan.
+Value *AMDGPUAtomicOptimizer::buildShiftRight(IRBuilder<> &B, Value *V,
+ Value *const Identity) const {
+ Type *const Ty = V->getType();
+ Module *M = B.GetInsertBlock()->getModule();
+ Function *UpdateDPP =
+ Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, Ty);
+ Function *ReadLane =
+ Intrinsic::getDeclaration(M, Intrinsic::amdgcn_readlane, {});
+ Function *WriteLane =
+ Intrinsic::getDeclaration(M, Intrinsic::amdgcn_writelane, {});
+
+ if (ST->hasDPPWavefrontShifts()) {
+ // GFX9 has DPP wavefront shift operations.
+ V = B.CreateCall(UpdateDPP,
+ {Identity, V, B.getInt32(DPP::WAVE_SHR1), B.getInt32(0xf),
+ B.getInt32(0xf), B.getFalse()});
+ } else {
+ // On GFX10 all DPP operations are confined to a single row. To get cross-
+ // row operations we have to use permlane or readlane.
+ Value *Old = V;
+ V = B.CreateCall(UpdateDPP,
+ {Identity, V, B.getInt32(DPP::ROW_SHR0 + 1),
+ B.getInt32(0xf), B.getInt32(0xf), B.getFalse()});
+
+ // Copy the old lane 15 to the new lane 16.
+ V = B.CreateCall(WriteLane, {B.CreateCall(ReadLane, {Old, B.getInt32(15)}),
+ B.getInt32(16), V});
+
+ if (!ST->isWave32()) {
+ // Copy the old lane 31 to the new lane 32.
+ V = B.CreateCall(
+ WriteLane,
+ {B.CreateCall(ReadLane, {Old, B.getInt32(31)}), B.getInt32(32), V});
+
+ // Copy the old lane 47 to the new lane 48.
+ V = B.CreateCall(
+ WriteLane,
+ {B.CreateCall(ReadLane, {Old, B.getInt32(47)}), B.getInt32(48), V});
+ }
+ }
+
+ return V;
+}
+
static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op,
unsigned BitWidth) {
switch (Op) {
@@ -345,23 +445,29 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// We need to know how many lanes are active within the wavefront, and we do
// this by doing a ballot of active lanes.
+ Type *const WaveTy = B.getIntNTy(ST->getWavefrontSize());
CallInst *const Ballot = B.CreateIntrinsic(
- Intrinsic::amdgcn_icmp, {B.getInt64Ty(), B.getInt32Ty()},
+ Intrinsic::amdgcn_icmp, {WaveTy, B.getInt32Ty()},
{B.getInt32(1), B.getInt32(0), B.getInt32(CmpInst::ICMP_NE)});
// We need to know how many lanes are active within the wavefront that are
// below us. If we counted each lane linearly starting from 0, a lane is
// below us only if its associated index was less than ours. We do this by
// using the mbcnt intrinsic.
- Value *const BitCast = B.CreateBitCast(Ballot, VecTy);
- Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0));
- Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
- CallInst *const PartialMbcnt = B.CreateIntrinsic(
- Intrinsic::amdgcn_mbcnt_lo, {}, {ExtractLo, B.getInt32(0)});
- Value *const Mbcnt =
- B.CreateIntCast(B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {},
- {ExtractHi, PartialMbcnt}),
- Ty, false);
+ Value *Mbcnt;
+ if (ST->isWave32()) {
+ Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
+ {Ballot, B.getInt32(0)});
+ } else {
+ Value *const BitCast = B.CreateBitCast(Ballot, VecTy);
+ Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0));
+ Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
+ Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
+ {ExtractLo, B.getInt32(0)});
+ Mbcnt =
+ B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, {ExtractHi, Mbcnt});
+ }
+ Mbcnt = B.CreateIntCast(Mbcnt, Ty, false);
Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth));
@@ -373,47 +479,25 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
if (ValDivergent) {
// First we need to set all inactive invocations to the identity value, so
// that they can correctly contribute to the final result.
- CallInst *const SetInactive =
- B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
-
- CallInst *const FirstDPP =
- B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Ty,
- {Identity, SetInactive, B.getInt32(DPP_WF_SR1),
- B.getInt32(0xf), B.getInt32(0xf), B.getFalse()});
- ExclScan = FirstDPP;
-
- const unsigned Iters = 7;
- const unsigned DPPCtrl[Iters] = {
- DPP_ROW_SR1, DPP_ROW_SR2, DPP_ROW_SR3, DPP_ROW_SR4,
- DPP_ROW_SR8, DPP_ROW_BCAST15, DPP_ROW_BCAST31};
- const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xf, 0xa, 0xc};
- const unsigned BankMask[Iters] = {0xf, 0xf, 0xf, 0xe, 0xc, 0xf, 0xf};
-
- // This loop performs an exclusive scan across the wavefront, with all lanes
- // active (by using the WWM intrinsic).
- for (unsigned Idx = 0; Idx < Iters; Idx++) {
- Value *const UpdateValue = Idx < 3 ? FirstDPP : ExclScan;
- CallInst *const DPP = B.CreateIntrinsic(
- Intrinsic::amdgcn_update_dpp, Ty,
- {Identity, UpdateValue, B.getInt32(DPPCtrl[Idx]),
- B.getInt32(RowMask[Idx]), B.getInt32(BankMask[Idx]), B.getFalse()});
-
- ExclScan = buildNonAtomicBinOp(B, Op, ExclScan, DPP);
- }
+ NewV = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
- NewV = buildNonAtomicBinOp(B, Op, SetInactive, ExclScan);
+ const AtomicRMWInst::BinOp ScanOp =
+ Op == AtomicRMWInst::Sub ? AtomicRMWInst::Add : Op;
+ NewV = buildScan(B, ScanOp, NewV, Identity);
+ ExclScan = buildShiftRight(B, NewV, Identity);
// Read the value from the last lane, which has accumlated the values of
// each active lane in the wavefront. This will be our new value which we
// will provide to the atomic operation.
+ Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
if (TyBitWidth == 64) {
Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty());
Value *const ExtractHi =
- B.CreateTrunc(B.CreateLShr(NewV, B.getInt64(32)), B.getInt32Ty());
+ B.CreateTrunc(B.CreateLShr(NewV, 32), B.getInt32Ty());
CallInst *const ReadLaneLo = B.CreateIntrinsic(
- Intrinsic::amdgcn_readlane, {}, {ExtractLo, B.getInt32(63)});
+ Intrinsic::amdgcn_readlane, {}, {ExtractLo, LastLaneIdx});
CallInst *const ReadLaneHi = B.CreateIntrinsic(
- Intrinsic::amdgcn_readlane, {}, {ExtractHi, B.getInt32(63)});
+ Intrinsic::amdgcn_readlane, {}, {ExtractHi, LastLaneIdx});
Value *const PartialInsert = B.CreateInsertElement(
UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0));
Value *const Insert =
@@ -421,7 +505,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
NewV = B.CreateBitCast(Insert, Ty);
} else if (TyBitWidth == 32) {
NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
- {NewV, B.getInt32(63)});
+ {NewV, LastLaneIdx});
} else {
llvm_unreachable("Unhandled atomic bit width");
}
@@ -493,77 +577,80 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// original instruction.
B.SetInsertPoint(&I);
- // Create a PHI node to get our new atomic result into the exit block.
- PHINode *const PHI = B.CreatePHI(Ty, 2);
- PHI->addIncoming(UndefValue::get(Ty), EntryBB);
- PHI->addIncoming(NewI, SingleLaneTerminator->getParent());
-
- // We need to broadcast the value who was the lowest active lane (the first
- // lane) to all other lanes in the wavefront. We use an intrinsic for this,
- // but have to handle 64-bit broadcasts with two calls to this intrinsic.
- Value *BroadcastI = nullptr;
-
- if (TyBitWidth == 64) {
- Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty());
- Value *const ExtractHi =
- B.CreateTrunc(B.CreateLShr(PHI, B.getInt64(32)), B.getInt32Ty());
- CallInst *const ReadFirstLaneLo =
- B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
- CallInst *const ReadFirstLaneHi =
- B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
- Value *const PartialInsert = B.CreateInsertElement(
- UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
- Value *const Insert =
- B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
- BroadcastI = B.CreateBitCast(Insert, Ty);
- } else if (TyBitWidth == 32) {
-
- BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
- } else {
- llvm_unreachable("Unhandled atomic bit width");
- }
+ const bool NeedResult = !I.use_empty();
+ if (NeedResult) {
+ // Create a PHI node to get our new atomic result into the exit block.
+ PHINode *const PHI = B.CreatePHI(Ty, 2);
+ PHI->addIncoming(UndefValue::get(Ty), EntryBB);
+ PHI->addIncoming(NewI, SingleLaneTerminator->getParent());
- // Now that we have the result of our single atomic operation, we need to
- // get our individual lane's slice into the result. We use the lane offset we
- // previously calculated combined with the atomic result value we got from the
- // first lane, to get our lane's index into the atomic result.
- Value *LaneOffset = nullptr;
- if (ValDivergent) {
- LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan);
- } else {
- switch (Op) {
- default:
- llvm_unreachable("Unhandled atomic op");
- case AtomicRMWInst::Add:
- case AtomicRMWInst::Sub:
- LaneOffset = B.CreateMul(V, Mbcnt);
- break;
- case AtomicRMWInst::And:
- case AtomicRMWInst::Or:
- case AtomicRMWInst::Max:
- case AtomicRMWInst::Min:
- case AtomicRMWInst::UMax:
- case AtomicRMWInst::UMin:
- LaneOffset = B.CreateSelect(Cond, Identity, V);
- break;
- case AtomicRMWInst::Xor:
- LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1));
- break;
+ // We need to broadcast the value who was the lowest active lane (the first
+ // lane) to all other lanes in the wavefront. We use an intrinsic for this,
+ // but have to handle 64-bit broadcasts with two calls to this intrinsic.
+ Value *BroadcastI = nullptr;
+
+ if (TyBitWidth == 64) {
+ Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty());
+ Value *const ExtractHi =
+ B.CreateTrunc(B.CreateLShr(PHI, 32), B.getInt32Ty());
+ CallInst *const ReadFirstLaneLo =
+ B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
+ CallInst *const ReadFirstLaneHi =
+ B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
+ Value *const PartialInsert = B.CreateInsertElement(
+ UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
+ Value *const Insert =
+ B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
+ BroadcastI = B.CreateBitCast(Insert, Ty);
+ } else if (TyBitWidth == 32) {
+
+ BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
+ } else {
+ llvm_unreachable("Unhandled atomic bit width");
}
- }
- Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset);
- if (IsPixelShader) {
- // Need a final PHI to reconverge to above the helper lane branch mask.
- B.SetInsertPoint(PixelExitBB->getFirstNonPHI());
+ // Now that we have the result of our single atomic operation, we need to
+ // get our individual lane's slice into the result. We use the lane offset
+ // we previously calculated combined with the atomic result value we got
+ // from the first lane, to get our lane's index into the atomic result.
+ Value *LaneOffset = nullptr;
+ if (ValDivergent) {
+ LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan);
+ } else {
+ switch (Op) {
+ default:
+ llvm_unreachable("Unhandled atomic op");
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ LaneOffset = B.CreateMul(V, Mbcnt);
+ break;
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ LaneOffset = B.CreateSelect(Cond, Identity, V);
+ break;
+ case AtomicRMWInst::Xor:
+ LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1));
+ break;
+ }
+ }
+ Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset);
- PHINode *const PHI = B.CreatePHI(Ty, 2);
- PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB);
- PHI->addIncoming(Result, I.getParent());
- I.replaceAllUsesWith(PHI);
- } else {
- // Replace the original atomic instruction with the new one.
- I.replaceAllUsesWith(Result);
+ if (IsPixelShader) {
+ // Need a final PHI to reconverge to above the helper lane branch mask.
+ B.SetInsertPoint(PixelExitBB->getFirstNonPHI());
+
+ PHINode *const PHI = B.CreatePHI(Ty, 2);
+ PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB);
+ PHI->addIncoming(Result, I.getParent());
+ I.replaceAllUsesWith(PHI);
+ } else {
+ // Replace the original atomic instruction with the new one.
+ I.replaceAllUsesWith(Result);
+ }
}
// And delete the original.
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index b107c357196d..58c44acde1a7 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -30,13 +30,15 @@ using namespace llvm;
namespace {
-struct OutgoingArgHandler : public CallLowering::ValueHandler {
- OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder MIB, CCAssignFn *AssignFn)
- : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+struct OutgoingValueHandler : public CallLowering::ValueHandler {
+ OutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+ : ValueHandler(B, MRI, AssignFn), MIB(MIB) {}
MachineInstrBuilder MIB;
+ bool isIncomingArgumentHandler() const override { return false; }
+
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
llvm_unreachable("not implemented");
@@ -49,15 +51,96 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
- MIB.addUse(PhysReg);
- MIRBuilder.buildCopy(PhysReg, ValVReg);
+ Register ExtReg;
+ if (VA.getLocVT().getSizeInBits() < 32) {
+ // 16-bit types are reported as legal for 32-bit registers. We need to
+ // extend and do a 32-bit copy to avoid the verifier complaining about it.
+ ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
+ } else
+ ExtReg = extendRegister(ValVReg, VA);
+
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
+ MIB.addUse(PhysReg, RegState::Implicit);
}
bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
const CallLowering::ArgInfo &Info,
+ ISD::ArgFlagsTy Flags,
CCState &State) override {
- return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+ return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
+ }
+};
+
+struct IncomingArgHandler : public CallLowering::ValueHandler {
+ uint64_t StackUsed = 0;
+
+ IncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : ValueHandler(B, MRI, AssignFn) {}
+
+ Register getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+ auto &MFI = MIRBuilder.getMF().getFrameInfo();
+ int FI = MFI.CreateFixedObject(Size, Offset, true);
+ MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+ Register AddrReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32));
+ MIRBuilder.buildFrameIndex(AddrReg, FI);
+ StackUsed = std::max(StackUsed, Size + Offset);
+ return AddrReg;
+ }
+
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign &VA) override {
+ markPhysRegUsed(PhysReg);
+
+ if (VA.getLocVT().getSizeInBits() < 32) {
+ // 16-bit types are reported as legal for 32-bit registers. We need to do
+ // a 32-bit copy, and truncate to avoid the verifier complaining about it.
+ auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ return;
+ }
+
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::SExt:
+ case CCValAssign::LocInfo::ZExt:
+ case CCValAssign::LocInfo::AExt: {
+ auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ break;
+ }
+ default:
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ break;
+ }
+ }
+
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ // FIXME: Get alignment
+ auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1);
+ MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+ }
+
+ /// How the physical register gets marked varies between formal
+ /// parameters (it's a basic-block live-in), and a call instruction
+ /// (it's an implicit-def of the BL).
+ virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+
+ // FIXME: What is the point of this being a callback?
+ bool isIncomingArgumentHandler() const override { return true; }
+};
+
+struct FormalArgHandler : public IncomingArgHandler {
+ FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : IncomingArgHandler(B, MRI, AssignFn) {}
+
+ void markPhysRegUsed(unsigned PhysReg) override {
+ MIRBuilder.getMBB().addLiveIn(PhysReg);
}
};
@@ -67,55 +150,198 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
}
-bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
+void AMDGPUCallLowering::splitToValueTypes(
+ const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
+ SplitArgTy PerformArgSplit) const {
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+ LLVMContext &Ctx = OrigArg.Ty->getContext();
+
+ if (OrigArg.Ty->isVoidTy())
+ return;
+
+ SmallVector<EVT, 4> SplitVTs;
+ ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
+
+ assert(OrigArg.Regs.size() == SplitVTs.size());
+
+ int SplitIdx = 0;
+ for (EVT VT : SplitVTs) {
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
+ Type *Ty = VT.getTypeForEVT(Ctx);
+
+
+
+ if (NumParts == 1) {
+ // No splitting to do, but we want to replace the original type (e.g. [1 x
+ // double] -> double).
+ SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty,
+ OrigArg.Flags, OrigArg.IsFixed);
+
+ ++SplitIdx;
+ continue;
+ }
+
+ LLT LLTy = getLLTForType(*Ty, DL);
+
+ SmallVector<Register, 8> SplitRegs;
+
+ EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
+ Type *PartTy = PartVT.getTypeForEVT(Ctx);
+ LLT PartLLT = getLLTForType(*PartTy, DL);
+
+ // FIXME: Should we be reporting all of the part registers for a single
+ // argument, and let handleAssignments take care of the repacking?
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
+ SplitRegs.push_back(PartReg);
+ SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
+ }
+
+ PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx);
+
+ ++SplitIdx;
+ }
+}
+
+// Get the appropriate type to make \p OrigTy \p Factor times bigger.
+static LLT getMultipleType(LLT OrigTy, int Factor) {
+ if (OrigTy.isVector()) {
+ return LLT::vector(OrigTy.getNumElements() * Factor,
+ OrigTy.getElementType());
+ }
+
+ return LLT::scalar(OrigTy.getSizeInBits() * Factor);
+}
+
+// TODO: Move to generic code
+static void unpackRegsToOrigType(MachineIRBuilder &B,
+ ArrayRef<Register> DstRegs,
+ Register SrcReg,
+ LLT SrcTy,
+ LLT PartTy) {
+ assert(DstRegs.size() > 1 && "Nothing to unpack");
+
+ MachineFunction &MF = B.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ const unsigned SrcSize = SrcTy.getSizeInBits();
+ const unsigned PartSize = PartTy.getSizeInBits();
+
+ if (SrcTy.isVector() && !PartTy.isVector() &&
+ PartSize > SrcTy.getElementType().getSizeInBits()) {
+ // Vector was scalarized, and the elements extended.
+ auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(),
+ SrcReg);
+ for (int i = 0, e = DstRegs.size(); i != e; ++i)
+ B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i));
+ return;
+ }
+
+ if (SrcSize % PartSize == 0) {
+ B.buildUnmerge(DstRegs, SrcReg);
+ return;
+ }
+
+ const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize;
+
+ LLT BigTy = getMultipleType(PartTy, NumRoundedParts);
+ auto ImpDef = B.buildUndef(BigTy);
+
+ Register BigReg = MRI.createGenericVirtualRegister(BigTy);
+ B.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0);
+
+ int64_t Offset = 0;
+ for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize)
+ B.buildExtract(DstRegs[i], BigReg, Offset);
+}
+
+/// Lower the return value for the already existing \p Ret. This assumes that
+/// \p B's insertion point is correct.
+bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
+ const Value *Val, ArrayRef<Register> VRegs,
+ MachineInstrBuilder &Ret) const {
+ if (!Val)
+ return true;
+
+ auto &MF = B.getMF();
+ const auto &F = MF.getFunction();
+ const DataLayout &DL = MF.getDataLayout();
+
+ CallingConv::ID CC = F.getCallingConv();
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ ArgInfo OrigRetInfo(VRegs, Val->getType());
+ setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
+ SmallVector<ArgInfo, 4> SplitRetInfos;
+
+ splitToValueTypes(
+ OrigRetInfo, SplitRetInfos, DL, MRI, CC,
+ [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
+ unpackRegsToOrigType(B, Regs, VRegs[VTSplitIdx], LLTy, PartLLT);
+ });
+
+ CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
+
+ OutgoingValueHandler RetHandler(B, MF.getRegInfo(), Ret, AssignFn);
+ return handleAssignments(B, SplitRetInfos, RetHandler);
+}
+
+bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B,
const Value *Val,
ArrayRef<Register> VRegs) const {
- MachineFunction &MF = MIRBuilder.getMF();
+ MachineFunction &MF = B.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MFI->setIfReturnsVoid(!Val);
- if (!Val) {
- MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
+ assert(!Val == VRegs.empty() && "Return value without a vreg");
+
+ CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
+ const bool IsShader = AMDGPU::isShader(CC);
+ const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) ||
+ AMDGPU::isKernel(CC);
+ if (IsWaveEnd) {
+ B.buildInstr(AMDGPU::S_ENDPGM)
+ .addImm(0);
return true;
}
- Register VReg = VRegs[0];
-
- const Function &F = MF.getFunction();
- auto &DL = F.getParent()->getDataLayout();
- if (!AMDGPU::isShader(F.getCallingConv()))
- return false;
+ auto const &ST = B.getMF().getSubtarget<GCNSubtarget>();
+ unsigned ReturnOpc =
+ IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
- const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
- SmallVector<EVT, 4> SplitVTs;
- SmallVector<uint64_t, 4> Offsets;
- ArgInfo OrigArg{VReg, Val->getType()};
- setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
- ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
-
- SmallVector<ArgInfo, 8> SplitArgs;
- CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
- for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
- Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
- SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
+ auto Ret = B.buildInstrNoInsert(ReturnOpc);
+ Register ReturnAddrVReg;
+ if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
+ ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
+ Ret.addUse(ReturnAddrVReg);
}
- auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
- OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
- if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
+
+ if (!lowerReturnVal(B, Val, VRegs, Ret))
return false;
- MIRBuilder.insertInstr(RetInstr);
+ if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
+ &AMDGPU::SGPR_64RegClass);
+ B.buildCopy(ReturnAddrVReg, LiveInReturn);
+ }
+
+ // TODO: Handle CalleeSavedRegsViaCopy.
+
+ B.insertInstr(Ret);
return true;
}
-Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
+Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B,
Type *ParamTy,
uint64_t Offset) const {
- MachineFunction &MF = MIRBuilder.getMF();
+ MachineFunction &MF = B.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
@@ -128,79 +354,37 @@ Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ B.buildConstant(OffsetReg, Offset);
- MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
+ B.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
return DstReg;
}
-void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
+void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B,
Type *ParamTy, uint64_t Offset,
unsigned Align,
Register DstReg) const {
- MachineFunction &MF = MIRBuilder.getMF();
+ MachineFunction &MF = B.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
- Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
+ Register PtrReg = lowerParameterPtr(B, ParamTy, Offset);
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
- MachineMemOperand::MONonTemporal |
+ MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
TypeSize, Align);
- MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
-}
-
-static Register findFirstFreeSGPR(CCState &CCInfo) {
- unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
- for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
- if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
- return AMDGPU::SGPR0 + Reg;
- }
- }
- llvm_unreachable("Cannot allocate sgpr");
-}
-
-static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
- const LLT S32 = LLT::scalar(32);
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- if (Info.hasWorkItemIDX()) {
- Register Reg = AMDGPU::VGPR0;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
- }
-
- if (Info.hasWorkItemIDY()) {
- Register Reg = AMDGPU::VGPR1;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
- }
-
- if (Info.hasWorkItemIDZ()) {
- Register Reg = AMDGPU::VGPR2;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
- }
+ B.buildLoad(DstReg, PtrReg, *MMO);
}
// Allocate special inputs passed in user SGPRs.
static void allocateHSAUserSGPRs(CCState &CCInfo,
- MachineIRBuilder &MIRBuilder,
+ MachineIRBuilder &B,
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) {
@@ -229,8 +413,8 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
Register VReg = MRI.createGenericVirtualRegister(P4);
MRI.addLiveIn(InputPtrReg, VReg);
- MIRBuilder.getMBB().addLiveIn(InputPtrReg);
- MIRBuilder.buildCopy(VReg, InputPtrReg);
+ B.getMBB().addLiveIn(InputPtrReg);
+ B.buildCopy(VReg, InputPtrReg);
CCInfo.AllocateReg(InputPtrReg);
}
@@ -250,74 +434,22 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
// these from the dispatch pointer.
}
-static void allocateSystemSGPRs(CCState &CCInfo,
- MachineFunction &MF,
- SIMachineFunctionInfo &Info,
- CallingConv::ID CallConv,
- bool IsShader) {
- const LLT S32 = LLT::scalar(32);
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- if (Info.hasWorkGroupIDX()) {
- Register Reg = Info.addWorkGroupIDX();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDY()) {
- Register Reg = Info.addWorkGroupIDY();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDZ()) {
- unsigned Reg = Info.addWorkGroupIDZ();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupInfo()) {
- unsigned Reg = Info.addWorkGroupInfo();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasPrivateSegmentWaveByteOffset()) {
- // Scratch wave offset passed in system SGPR.
- unsigned PrivateSegmentWaveByteOffsetReg;
-
- if (IsShader) {
- PrivateSegmentWaveByteOffsetReg =
- Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
-
- // This is true if the scratch wave byte offset doesn't have a fixed
- // location.
- if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
- PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
- Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
- }
- } else
- PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
-
- MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
- CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
- }
-}
-
bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
- MachineIRBuilder &MIRBuilder, const Function &F,
+ MachineIRBuilder &B, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
- MachineFunction &MF = MIRBuilder.getMF();
+ MachineFunction &MF = B.getMF();
const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+
const DataLayout &DL = F.getParent()->getDataLayout();
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
- allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
+ allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info);
unsigned i = 0;
const unsigned KernArgBaseAlign = 16;
@@ -343,123 +475,242 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
: MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
- lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
+ lowerParameter(B, ArgTy, ArgOffset, Align, ArgReg);
if (OrigArgRegs.size() > 1)
- unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
+ unpackRegs(OrigArgRegs, ArgReg, ArgTy, B);
++i;
}
- allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
- allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
+ TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
+ TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
return true;
}
+// TODO: Move this to generic code
+static void packSplitRegsToOrigType(MachineIRBuilder &B,
+ ArrayRef<Register> OrigRegs,
+ ArrayRef<Register> Regs,
+ LLT LLTy,
+ LLT PartLLT) {
+ if (!LLTy.isVector() && !PartLLT.isVector()) {
+ B.buildMerge(OrigRegs[0], Regs);
+ return;
+ }
+
+ if (LLTy.isVector() && PartLLT.isVector()) {
+ assert(LLTy.getElementType() == PartLLT.getElementType());
+
+ int DstElts = LLTy.getNumElements();
+ int PartElts = PartLLT.getNumElements();
+ if (DstElts % PartElts == 0)
+ B.buildConcatVectors(OrigRegs[0], Regs);
+ else {
+ // Deal with v3s16 split into v2s16
+ assert(PartElts == 2 && DstElts % 2 != 0);
+ int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
+
+ LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
+ auto RoundedConcat = B.buildConcatVectors(RoundedDestTy, Regs);
+ B.buildExtract(OrigRegs[0], RoundedConcat, 0);
+ }
+
+ return;
+ }
+
+ assert(LLTy.isVector() && !PartLLT.isVector());
+
+ LLT DstEltTy = LLTy.getElementType();
+ if (DstEltTy == PartLLT) {
+ // Vector was trivially scalarized.
+ B.buildBuildVector(OrigRegs[0], Regs);
+ } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
+ // Deal with vector with 64-bit elements decomposed to 32-bit
+ // registers. Need to create intermediate 64-bit elements.
+ SmallVector<Register, 8> EltMerges;
+ int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
+
+ assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
+
+ for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
+ auto Merge = B.buildMerge(DstEltTy,
+ Regs.take_front(PartsPerElt));
+ EltMerges.push_back(Merge.getReg(0));
+ Regs = Regs.drop_front(PartsPerElt);
+ }
+
+ B.buildBuildVector(OrigRegs[0], EltMerges);
+ } else {
+ // Vector was split, and elements promoted to a wider type.
+ LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
+ auto BV = B.buildBuildVector(BVType, Regs);
+ B.buildTrunc(OrigRegs[0], BV);
+ }
+}
+
bool AMDGPUCallLowering::lowerFormalArguments(
- MachineIRBuilder &MIRBuilder, const Function &F,
+ MachineIRBuilder &B, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
+ CallingConv::ID CC = F.getCallingConv();
+
// The infrastructure for normal calling convention lowering is essentially
// useless for kernels. We want to avoid any kind of legalization or argument
// splitting.
- if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
- return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
+ if (CC == CallingConv::AMDGPU_KERNEL)
+ return lowerFormalArgumentsKernel(B, F, VRegs);
- // AMDGPU_GS and AMDGP_HS are not supported yet.
- if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
- F.getCallingConv() == CallingConv::AMDGPU_HS)
- return false;
+ const bool IsShader = AMDGPU::isShader(CC);
+ const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
- MachineFunction &MF = MIRBuilder.getMF();
+ MachineFunction &MF = B.getMF();
+ MachineBasicBlock &MBB = B.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
const DataLayout &DL = F.getParent()->getDataLayout();
- bool IsShader = AMDGPU::isShader(F.getCallingConv());
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+ CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
+
+ if (!IsEntryFunc) {
+ Register ReturnAddrReg = TRI->getReturnAddressReg(MF);
+ Register LiveInReturn = MF.addLiveIn(ReturnAddrReg,
+ &AMDGPU::SGPR_64RegClass);
+ MBB.addLiveIn(ReturnAddrReg);
+ B.buildCopy(LiveInReturn, ReturnAddrReg);
+ }
if (Info->hasImplicitBufferPtr()) {
- unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
+ Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
- unsigned NumArgs = F.arg_size();
- Function::const_arg_iterator CurOrigArg = F.arg_begin();
- const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
+
+ SmallVector<ArgInfo, 32> SplitArgs;
+ unsigned Idx = 0;
unsigned PSInputNum = 0;
- BitVector Skipped(NumArgs);
- for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
- EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
-
- // We can only hanlde simple value types at the moment.
- ISD::ArgFlagsTy Flags;
- assert(VRegs[i].size() == 1 && "Can't lower into more than one register");
- ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()};
- setArgFlags(OrigArg, i + 1, DL, F);
- Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
-
- if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
- !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
- PSInputNum <= 15) {
- if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
- Skipped.set(i);
- ++PSInputNum;
+
+ for (auto &Arg : F.args()) {
+ if (DL.getTypeStoreSize(Arg.getType()) == 0)
+ continue;
+
+ const bool InReg = Arg.hasAttribute(Attribute::InReg);
+
+ // SGPR arguments to functions not implemented.
+ if (!IsShader && InReg)
+ return false;
+
+ if (Arg.hasAttribute(Attribute::SwiftSelf) ||
+ Arg.hasAttribute(Attribute::SwiftError) ||
+ Arg.hasAttribute(Attribute::Nest))
+ return false;
+
+ if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
+ const bool ArgUsed = !Arg.use_empty();
+ bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
+
+ if (!SkipArg) {
+ Info->markPSInputAllocated(PSInputNum);
+ if (ArgUsed)
+ Info->markPSInputEnabled(PSInputNum);
+ }
+
+ ++PSInputNum;
+
+ if (SkipArg) {
+ for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
+ B.buildUndef(VRegs[Idx][I]);
+
+ ++Idx;
continue;
}
+ }
- Info->markPSInputAllocated(PSInputNum);
- if (!CurOrigArg->use_empty())
- Info->markPSInputEnabled(PSInputNum);
+ ArgInfo OrigArg(VRegs[Idx], Arg.getType());
+ setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
- ++PSInputNum;
+ splitToValueTypes(
+ OrigArg, SplitArgs, DL, MRI, CC,
+ // FIXME: We should probably be passing multiple registers to
+ // handleAssignments to do this
+ [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
+ packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs,
+ LLTy, PartLLT);
+ });
+
+ ++Idx;
+ }
+
+ // At least one interpolation mode must be enabled or else the GPU will
+ // hang.
+ //
+ // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
+ // set PSInputAddr, the user wants to enable some bits after the compilation
+ // based on run-time states. Since we can't know what the final PSInputEna
+ // will look like, so we shouldn't do anything here and the user should take
+ // responsibility for the correct programming.
+ //
+ // Otherwise, the following restrictions apply:
+ // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+ // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+ // enabled too.
+ if (CC == CallingConv::AMDGPU_PS) {
+ if ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 &&
+ Info->isPSInputAllocated(11))) {
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ Info->markPSInputAllocated(0);
+ Info->markPSInputEnabled(0);
}
- CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
- /*IsVarArg=*/false);
-
- if (ValEVT.isVector()) {
- EVT ElemVT = ValEVT.getVectorElementType();
- if (!ValEVT.isSimple())
- return false;
- MVT ValVT = ElemVT.getSimpleVT();
- bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
- OrigArg.Flags, CCInfo);
- if (!Res)
- return false;
- } else {
- MVT ValVT = ValEVT.getSimpleVT();
- if (!ValEVT.isSimple())
- return false;
- bool Res =
- AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
-
- // Fail if we don't know how to handle this type.
- if (Res)
- return false;
+ if (Subtarget.isAmdPalOS()) {
+ // For isAmdPalOS, the user does not enable some bits after compilation
+ // based on run-time states; the register values being generated here are
+ // the final ones set in hardware. Therefore we need to apply the
+ // workaround to PSInputAddr and PSInputEnable together. (The case where
+ // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
+ // set up an input arg for a particular interpolation mode, but nothing
+ // uses that input arg. Really we should have an earlier pass that removes
+ // such an arg.)
+ unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
+ if ((PsInputBits & 0x7F) == 0 ||
+ ((PsInputBits & 0xF) == 0 &&
+ (PsInputBits >> 11 & 1)))
+ Info->markPSInputEnabled(
+ countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
}
}
- Function::const_arg_iterator Arg = F.arg_begin();
-
- if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
- F.getCallingConv() == CallingConv::AMDGPU_PS) {
- for (unsigned i = 0, OrigArgIdx = 0;
- OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
- if (Skipped.test(OrigArgIdx))
- continue;
- assert(VRegs[OrigArgIdx].size() == 1 &&
- "Can't lower into more than 1 reg");
- CCValAssign &VA = ArgLocs[i++];
- MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]);
- MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
- MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg());
- }
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+ CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
- allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
- return true;
+ if (!MBB.empty())
+ B.setInstr(*MBB.begin());
+
+ FormalArgHandler Handler(B, MRI, AssignFn);
+ if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler))
+ return false;
+
+ if (!IsEntryFunc) {
+ // Special inputs come after user arguments.
+ TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
+ }
+
+ // Start adding system SGPRs.
+ if (IsEntryFunc) {
+ TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
+ } else {
+ CCInfo.AllocateReg(Info->getScratchRSrcReg());
+ CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
+ CCInfo.AllocateReg(Info->getFrameOffsetReg());
+ TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
}
- return false;
+ // Move back to the end of the basic block.
+ B.setMBB(MBB);
+
+ return true;
}
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 3599659cac6a..53a562586bc0 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -20,26 +20,37 @@
namespace llvm {
class AMDGPUTargetLowering;
+class MachineInstrBuilder;
class AMDGPUCallLowering: public CallLowering {
- Register lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
+ Register lowerParameterPtr(MachineIRBuilder &B, Type *ParamTy,
uint64_t Offset) const;
- void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
- uint64_t Offset, unsigned Align,
- Register DstReg) const;
+ void lowerParameter(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset,
+ unsigned Align, Register DstReg) const;
- public:
+ /// A function of this type is used to perform value split action.
+ using SplitArgTy = std::function<void(ArrayRef<Register>, LLT, LLT, int)>;
+
+ void splitToValueTypes(const ArgInfo &OrigArgInfo,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI,
+ CallingConv::ID CallConv,
+ SplitArgTy SplitArg) const;
+
+ bool lowerReturnVal(MachineIRBuilder &B, const Value *Val,
+ ArrayRef<Register> VRegs, MachineInstrBuilder &Ret) const;
+
+public:
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ bool lowerReturn(MachineIRBuilder &B, const Value *Val,
ArrayRef<Register> VRegs) const override;
- bool lowerFormalArgumentsKernel(MachineIRBuilder &MIRBuilder,
- const Function &F,
+ bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const;
- bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+ bool lowerFormalArguments(MachineIRBuilder &B, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const override;
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 3688cd77542e..f8a54a61aac2 100644
--- a/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -24,22 +24,9 @@ def CC_SI : CallingConv<[
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
- SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47,
- SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55,
- SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63,
- SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71,
- SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79,
- SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87,
- SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95,
- SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103,
- SGPR104, SGPR105
+ SGPR40, SGPR41, SGPR42, SGPR43
]>>>,
- // We have no way of referring to the generated register tuples
- // here, so use a custom function.
- CCIfInReg<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
- CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
-
// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
CCIfNotInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
@@ -69,15 +56,7 @@ def RetCC_SI_Shader : CallingConv<[
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
- SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47,
- SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55,
- SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63,
- SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71,
- SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79,
- SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87,
- SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95,
- SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103,
- SGPR104, SGPR105
+ SGPR40, SGPR41, SGPR42, SGPR43
]>>,
// 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
@@ -138,7 +117,6 @@ def CC_AMDGPU_Func : CallingConv<[
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
- CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v5i32, v5f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>,
@@ -157,7 +135,6 @@ def RetCC_AMDGPU_Func : CallingConv<[
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
- CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>
]>;
def CC_AMDGPU : CallingConv<[
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index b750c6b5f6d2..1640a4a59ee2 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -55,6 +55,12 @@ static cl::opt<bool> WidenLoads(
cl::ReallyHidden,
cl::init(true));
+static cl::opt<bool> UseMul24Intrin(
+ "amdgpu-codegenprepare-mul24",
+ cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
+ cl::ReallyHidden,
+ cl::init(true));
+
class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNSubtarget *ST = nullptr;
@@ -509,7 +515,9 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
}
}
- I.replaceAllUsesWith(insertValues(Builder, Ty, ResultVals));
+ Value *NewVal = insertValues(Builder, Ty, ResultVals);
+ NewVal->takeName(&I);
+ I.replaceAllUsesWith(NewVal);
I.eraseFromParent();
return true;
@@ -879,7 +887,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
DA->isUniform(&I) && promoteUniformOpToI32(I))
return true;
- if (replaceMulWithMul24(I))
+ if (UseMul24Intrin && replaceMulWithMul24(I))
return true;
bool Changed = false;
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
index e80797736363..61ce83b30e00 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -13,9 +13,9 @@
#include "AMDGPUFrameLowering.h"
using namespace llvm;
-AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
- int LAO, unsigned TransAl)
- : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, Align StackAl,
+ int LAO, Align TransAl)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl) {}
AMDGPUFrameLowering::~AMDGPUFrameLowering() = default;
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index 48b64488303e..92e256cf2829 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -25,8 +25,8 @@ namespace llvm {
/// See TargetFrameInfo for more comments.
class AMDGPUFrameLowering : public TargetFrameLowering {
public:
- AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
- unsigned TransAl = 1);
+ AMDGPUFrameLowering(StackDirection D, Align StackAl, int LAO,
+ Align TransAl = Align::None());
~AMDGPUFrameLowering() override;
/// \returns The number of 32-bit sub-registers that are used when storing
diff --git a/lib/Target/AMDGPU/AMDGPUGISel.td b/lib/Target/AMDGPU/AMDGPUGISel.td
index cad4c2ef404c..f2be1ca44d34 100644
--- a/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -12,10 +12,6 @@
include "AMDGPU.td"
-def p0 : PtrValueType<i64, 0>;
-def p1 : PtrValueType<i64, 1>;
-def p4 : PtrValueType<i64, 4>;
-
def sd_vsrc0 : ComplexPattern<i32, 1, "">;
def gi_vsrc0 :
GIComplexOperandMatcher<s32, "selectVSRC0">,
@@ -38,6 +34,18 @@ def gi_vop3omods :
GIComplexOperandMatcher<s32, "selectVOP3OMods">,
GIComplexPatternEquiv<VOP3OMods>;
+def gi_vop3omods0clamp0omod :
+ GIComplexOperandMatcher<s32, "selectVOP3Mods0Clamp0OMod">,
+ GIComplexPatternEquiv<VOP3Mods0Clamp0OMod>;
+
+def gi_vop3opselmods0 :
+ GIComplexOperandMatcher<s32, "selectVOP3OpSelMods0">,
+ GIComplexPatternEquiv<VOP3OpSelMods0>;
+
+def gi_vop3opselmods :
+ GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
+ GIComplexPatternEquiv<VOP3OpSelMods>;
+
def gi_smrd_imm :
GIComplexOperandMatcher<s64, "selectSmrdImm">,
GIComplexPatternEquiv<SMRDImm>;
@@ -50,12 +58,19 @@ def gi_smrd_sgpr :
GIComplexOperandMatcher<s64, "selectSmrdSgpr">,
GIComplexPatternEquiv<SMRDSgpr>;
+// FIXME: Why are the atomic versions separated?
def gi_flat_offset :
GIComplexOperandMatcher<s64, "selectFlatOffset">,
GIComplexPatternEquiv<FLATOffset>;
def gi_flat_offset_signed :
GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
GIComplexPatternEquiv<FLATOffsetSigned>;
+def gi_flat_atomic :
+ GIComplexOperandMatcher<s64, "selectFlatOffset">,
+ GIComplexPatternEquiv<FLATAtomic>;
+def gi_flat_signed_atomic :
+ GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
+ GIComplexPatternEquiv<FLATSignedAtomic>;
def gi_mubuf_scratch_offset :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
@@ -64,6 +79,44 @@ def gi_mubuf_scratch_offen :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
GIComplexPatternEquiv<MUBUFScratchOffen>;
+def gi_ds_1addr_1offset :
+ GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">,
+ GIComplexPatternEquiv<DS1Addr1Offset>;
+
+
+// Separate load nodes are defined to glue m0 initialization in
+// SelectionDAG. The GISel selector can just insert m0 initialization
+// directly before before selecting a glue-less load, so hide this
+// distinction.
+
+def : GINodeEquiv<G_LOAD, AMDGPUld_glue> {
+ let CheckMMOIsNonAtomic = 1;
+}
+
+def : GINodeEquiv<G_STORE, AMDGPUst_glue> {
+ let CheckMMOIsNonAtomic = 1;
+}
+
+def : GINodeEquiv<G_LOAD, AMDGPUatomic_ld_glue> {
+ bit CheckMMOIsAtomic = 1;
+}
+
+
+
+def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap_glue>;
+def : GINodeEquiv<G_ATOMICRMW_XCHG, atomic_swap_glue>;
+def : GINodeEquiv<G_ATOMICRMW_ADD, atomic_load_add_glue>;
+def : GINodeEquiv<G_ATOMICRMW_SUB, atomic_load_sub_glue>;
+def : GINodeEquiv<G_ATOMICRMW_AND, atomic_load_and_glue>;
+def : GINodeEquiv<G_ATOMICRMW_OR, atomic_load_or_glue>;
+def : GINodeEquiv<G_ATOMICRMW_XOR, atomic_load_xor_glue>;
+def : GINodeEquiv<G_ATOMICRMW_MIN, atomic_load_min_glue>;
+def : GINodeEquiv<G_ATOMICRMW_MAX, atomic_load_max_glue>;
+def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin_glue>;
+def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>;
+def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
+
+def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>;
class GISelSop2Pat <
SDPatternOperator node,
@@ -143,20 +196,6 @@ multiclass GISelVop2IntrPat <
def : GISelSop2Pat <or, S_OR_B32, i32>;
def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
-// FIXME: We can't re-use SelectionDAG patterns here because they match
-// against a custom SDNode and we would need to create a generic machine
-// instruction that is equivalent to the custom SDNode. This would also require
-// us to custom legalize the intrinsic to the new generic machine instruction,
-// but I can't get custom legalizing of intrinsic to work and I'm not sure if
-// this is even supported yet.
-def : GISelVop3Pat2ModsPat <
- int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e64, v2f16, f32>;
-
-defm : GISelVop2IntrPat <int_maxnum, V_MAX_F32_e32, f32>;
-def : GISelVop3Pat2ModsPat <int_maxnum, V_MAX_F64, f64>;
-defm : GISelVop2IntrPat <int_minnum, V_MIN_F32_e32, f32>;
-def : GISelVop3Pat2ModsPat <int_minnum, V_MIN_F64, f64>;
-
// Since GlobalISel is more flexible then SelectionDAG, I think we can get
// away with adding patterns for integer types and not legalizing all
// loads and stores to vector types. This should help simplify the load/store
@@ -164,3 +203,6 @@ def : GISelVop3Pat2ModsPat <int_minnum, V_MIN_F64, f64>;
foreach Ty = [i64, p0, p1, p4] in {
defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>;
}
+
+def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
+ GISDNodeXFormEquiv<as_i32timm>;
diff --git a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 0a1f48231b18..85d1ad349157 100644
--- a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -22,15 +22,17 @@ enum PartialMappingIdx {
PM_SGPR128 = 9,
PM_SGPR256 = 10,
PM_SGPR512 = 11,
- PM_VGPR1 = 12,
- PM_VGPR16 = 16,
- PM_VGPR32 = 17,
- PM_VGPR64 = 18,
- PM_VGPR128 = 19,
- PM_VGPR256 = 20,
- PM_VGPR512 = 21,
- PM_SGPR96 = 22,
- PM_VGPR96 = 23
+ PM_SGPR1024 = 12,
+ PM_VGPR1 = 13,
+ PM_VGPR16 = 17,
+ PM_VGPR32 = 18,
+ PM_VGPR64 = 19,
+ PM_VGPR128 = 20,
+ PM_VGPR256 = 21,
+ PM_VGPR512 = 22,
+ PM_VGPR1024 = 23,
+ PM_SGPR96 = 24,
+ PM_VGPR96 = 25
};
const RegisterBankInfo::PartialMapping PartMappings[] {
@@ -45,6 +47,7 @@ const RegisterBankInfo::PartialMapping PartMappings[] {
{0, 128, SGPRRegBank},
{0, 256, SGPRRegBank},
{0, 512, SGPRRegBank},
+ {0, 1024, SGPRRegBank},
{0, 1, VGPRRegBank}, // VGPR begin
{0, 16, VGPRRegBank},
@@ -53,8 +56,9 @@ const RegisterBankInfo::PartialMapping PartMappings[] {
{0, 128, VGPRRegBank},
{0, 256, VGPRRegBank},
{0, 512, VGPRRegBank},
+ {0, 1024, VGPRRegBank},
{0, 96, SGPRRegBank},
- {0, 96, VGPRRegBank},
+ {0, 96, VGPRRegBank}
};
const RegisterBankInfo::ValueMapping ValMappings[] {
@@ -65,41 +69,43 @@ const RegisterBankInfo::ValueMapping ValMappings[] {
{&PartMappings[1], 1},
// SGPRs
- {&PartMappings[2], 1},
+ {&PartMappings[2], 1}, // 1
{nullptr, 0}, // Illegal power of 2 sizes
{nullptr, 0},
{nullptr, 0},
- {&PartMappings[3], 1},
- {&PartMappings[4], 1},
- {&PartMappings[5], 1},
- {&PartMappings[6], 1},
- {&PartMappings[7], 1},
- {&PartMappings[8], 1},
-
- // VGPRs
- {&PartMappings[9], 1},
+ {&PartMappings[3], 1}, // 16
+ {&PartMappings[4], 1}, // 32
+ {&PartMappings[5], 1}, // 64
+ {&PartMappings[6], 1}, // 128
+ {&PartMappings[7], 1}, // 256
+ {&PartMappings[8], 1}, // 512
+ {&PartMappings[9], 1}, // 1024
+
+ // VGPRs
+ {&PartMappings[10], 1}, // 1
{nullptr, 0},
{nullptr, 0},
{nullptr, 0},
- {&PartMappings[10], 1},
- {&PartMappings[11], 1},
- {&PartMappings[12], 1},
- {&PartMappings[13], 1},
- {&PartMappings[14], 1},
- {&PartMappings[15], 1},
- {&PartMappings[16], 1},
- {&PartMappings[17], 1}
+ {&PartMappings[11], 1}, // 16
+ {&PartMappings[12], 1}, // 32
+ {&PartMappings[13], 1}, // 64
+ {&PartMappings[14], 1}, // 128
+ {&PartMappings[15], 1}, // 256
+ {&PartMappings[16], 1}, // 512
+ {&PartMappings[17], 1}, // 1024
+ {&PartMappings[18], 1},
+ {&PartMappings[19], 1}
};
const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
- /*32-bit op*/ {0, 32, SGPRRegBank},
- /*2x32-bit op*/ {0, 32, SGPRRegBank},
- {32, 32, SGPRRegBank},
-/*<2x32-bit> op*/ {0, 64, SGPRRegBank},
-
- /*32-bit op*/ {0, 32, VGPRRegBank},
- /*2x32-bit op*/ {0, 32, VGPRRegBank},
- {32, 32, VGPRRegBank},
+ {0, 32, SGPRRegBank}, // 32-bit op
+ {0, 32, SGPRRegBank}, // 2x32-bit op
+ {32, 32, SGPRRegBank},
+ {0, 64, SGPRRegBank}, // <2x32-bit> op
+
+ {0, 32, VGPRRegBank}, // 32-bit op
+ {0, 32, VGPRRegBank}, // 2x32-bit op
+ {32, 32, VGPRRegBank},
};
@@ -116,7 +122,7 @@ const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] {
enum ValueMappingIdx {
SCCStartIdx = 0,
SGPRStartIdx = 2,
- VGPRStartIdx = 12
+ VGPRStartIdx = 13
};
const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index b31de0af5018..9f5bcd8ff5f0 100644
--- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -218,12 +218,13 @@ MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL);
- unsigned MaxKernArgAlign;
+ Align MaxKernArgAlign;
HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
MaxKernArgAlign);
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
- HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
+ HSACodeProps.mKernargSegmentAlign =
+ std::max(MaxKernArgAlign, Align(4)).value();
HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
@@ -883,7 +884,7 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
auto Kern = HSAMetadataDoc->getMapNode();
- unsigned MaxKernArgAlign;
+ Align MaxKernArgAlign;
Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
STM.getKernArgSegmentSize(F, MaxKernArgAlign));
Kern[".group_segment_fixed_size"] =
@@ -891,7 +892,7 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
Kern[".private_segment_fixed_size"] =
Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
Kern[".kernarg_segment_align"] =
- Kern.getDocument()->getNode(std::max(uint32_t(4), MaxKernArgAlign));
+ Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
Kern[".wavefront_size"] =
Kern.getDocument()->getNode(STM.getWavefrontSize());
Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 2eecddbd7b01..80ac8ca67bcd 100644
--- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -52,7 +52,7 @@ public:
class MetadataStreamerV3 final : public MetadataStreamer {
private:
std::unique_ptr<msgpack::Document> HSAMetadataDoc =
- llvm::make_unique<msgpack::Document>();
+ std::make_unique<msgpack::Document>();
void dump(StringRef HSAMetadataString) const;
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index ea730539f834..f330bd7ebcdd 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -172,8 +172,9 @@ private:
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
- SDNode *glueCopyToM0LDSInit(SDNode *N) const;
+ SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
+ SDNode *glueCopyToM0LDSInit(SDNode *N) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
@@ -186,10 +187,11 @@ private:
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC) const;
+ SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &GLC,
- SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
+ SDValue &SLC, SDValue &TFE, SDValue &DLC,
+ SDValue &SWZ) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
@@ -202,21 +204,20 @@ private:
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
SDValue &Offset, SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC) const;
+ SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &SLC) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
+ template <bool IsSigned>
+ bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, SDValue &SLC) const;
bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
- template <bool IsSigned>
- bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, SDValue &SLC) const;
-
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
SDValue Expand32BitAddress(SDValue Addr) const;
@@ -262,6 +263,8 @@ private:
SDValue getHi16Elt(SDValue In) const;
+ SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
+
void SelectADD_SUB_I64(SDNode *N);
void SelectAddcSubb(SDNode *N);
void SelectUADDO_USUBO(SDNode *N);
@@ -282,6 +285,7 @@ private:
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
+ void SelectINTRINSIC_WO_CHAIN(SDNode *N);
void SelectINTRINSIC_VOID(SDNode *N);
protected:
@@ -543,7 +547,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
if (!N->isMachineOpcode()) {
if (N->getOpcode() == ISD::CopyToReg) {
unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
return MRI.getRegClass(Reg);
}
@@ -582,19 +586,10 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
}
}
-SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
- const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
-
- assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
-
- SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N),
- Val);
-
- SDValue Glue = M0.getValue(1);
-
+SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
+ SDValue Glue) const {
SmallVector <SDValue, 8> Ops;
- Ops.push_back(M0); // Replace the chain.
+ Ops.push_back(NewChain); // Replace the chain.
for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
Ops.push_back(N->getOperand(i));
@@ -602,6 +597,16 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
}
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+
+ assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
+
+ SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
+ return glueCopyToOp(N, M0, M0.getValue(1));
+}
+
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
if (AS == AMDGPUAS::LOCAL_ADDRESS) {
@@ -635,13 +640,13 @@ MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
switch (NumVectorElts) {
case 1:
- return AMDGPU::SReg_32_XM0RegClassID;
+ return AMDGPU::SReg_32RegClassID;
case 2:
return AMDGPU::SReg_64RegClassID;
case 3:
return AMDGPU::SGPR_96RegClassID;
case 4:
- return AMDGPU::SReg_128RegClassID;
+ return AMDGPU::SGPR_128RegClassID;
case 5:
return AMDGPU::SGPR_160RegClassID;
case 8:
@@ -713,12 +718,17 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
return; // Already selected.
}
- if (isa<AtomicSDNode>(N) ||
+ // isa<MemSDNode> almost works but is slightly too permissive for some DS
+ // intrinsics.
+ if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
(Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
Opc == ISD::ATOMIC_LOAD_FADD ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
+ Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
N = glueCopyToM0LDSInit(N);
+ SelectCode(N);
+ return;
+ }
switch (Opc) {
default:
@@ -781,7 +791,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SDValue RC, SubReg0, SubReg1;
SDLoc DL(N);
if (N->getValueType(0) == MVT::i128) {
- RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
+ RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
} else if (N->getValueType(0) == MVT::i64) {
@@ -815,14 +825,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
return;
}
- case ISD::LOAD:
- case ISD::STORE:
- case ISD::ATOMIC_LOAD:
- case ISD::ATOMIC_STORE: {
- N = glueCopyToM0LDSInit(N);
- break;
- }
-
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
// There is a scalar version available, but unlike the vector version which
@@ -908,6 +910,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectINTRINSIC_W_CHAIN(N);
return;
}
+ case ISD::INTRINSIC_WO_CHAIN: {
+ SelectINTRINSIC_WO_CHAIN(N);
+ return;
+ }
case ISD::INTRINSIC_VOID: {
SelectINTRINSIC_VOID(N);
return;
@@ -961,6 +967,14 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
return true;
}
+SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
+ const SDLoc &DL) const {
+ SDNode *Mov = CurDAG->getMachineNode(
+ AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getTargetConstant(Val, DL, MVT::i32));
+ return SDValue(Mov, 0);
+}
+
// FIXME: Should only handle addcarry/subcarry
void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDLoc DL(N);
@@ -1308,7 +1322,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64,
SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC) const {
+ SDValue &TFE, SDValue &DLC,
+ SDValue &SWZ) const {
// Subtarget prefers to use flat instruction
if (Subtarget->useFlatForGlobal())
return false;
@@ -1321,6 +1336,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -1400,7 +1416,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &GLC,
SDValue &SLC, SDValue &TFE,
- SDValue &DLC) const {
+ SDValue &DLC, SDValue &SWZ) const {
SDValue Ptr, Offen, Idxen, Addr64;
// addr64 bit was removed for volcanic islands.
@@ -1408,7 +1424,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return false;
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE, DLC))
+ GLC, SLC, TFE, DLC, SWZ))
return false;
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
@@ -1430,9 +1446,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &Offset,
SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
- SDValue GLC, TFE, DLC;
+ SDValue GLC, TFE, DLC, SWZ;
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ);
}
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
@@ -1557,13 +1573,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset,
SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC) const {
+ SDValue &TFE, SDValue &DLC,
+ SDValue &SWZ) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE, DLC))
+ GLC, SLC, TFE, DLC, SWZ))
return false;
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
@@ -1585,16 +1602,30 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset
) const {
- SDValue GLC, SLC, TFE, DLC;
+ SDValue GLC, SLC, TFE, DLC, SWZ;
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset,
SDValue &SLC) const {
- SDValue GLC, TFE, DLC;
+ SDValue GLC, TFE, DLC, SWZ;
+
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
+}
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
+// Find a load or store from corresponding pattern root.
+// Roots may be build_vector, bitconvert or their combinations.
+static MemSDNode* findMemSDNode(SDNode *N) {
+ N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
+ if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
+ return MN;
+ assert(isa<BuildVectorSDNode>(N));
+ for (SDValue V : N->op_values())
+ if (MemSDNode *MN =
+ dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
+ return MN;
+ llvm_unreachable("cannot find MemSDNode in the pattern!");
}
template <bool IsSigned>
@@ -1603,8 +1634,95 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
- return static_cast<const SITargetLowering*>(getTargetLowering())->
- SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
+ int64_t OffsetVal = 0;
+
+ if (Subtarget->hasFlatInstOffsets() &&
+ (!Subtarget->hasFlatSegmentOffsetBug() ||
+ findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) &&
+ CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
+
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+ unsigned AS = findMemSDNode(N)->getAddressSpace();
+ if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
+ Addr = N0;
+ OffsetVal = COffsetVal;
+ } else {
+ // If the offset doesn't fit, put the low bits into the offset field and
+ // add the rest.
+
+ SDLoc DL(N);
+ uint64_t ImmField;
+ const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
+ if (IsSigned) {
+ ImmField = SignExtend64(COffsetVal, NumBits);
+
+ // Don't use a negative offset field if the base offset is positive.
+ // Since the scheduler currently relies on the offset field, doing so
+ // could result in strange scheduling decisions.
+
+ // TODO: Should we not do this in the opposite direction as well?
+ if (static_cast<int64_t>(COffsetVal) > 0) {
+ if (static_cast<int64_t>(ImmField) < 0) {
+ const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits - 1);
+ ImmField = COffsetVal & OffsetMask;
+ }
+ }
+ } else {
+ // TODO: Should we do this for a negative offset?
+ const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
+ ImmField = COffsetVal & OffsetMask;
+ }
+
+ uint64_t RemainderOffset = COffsetVal - ImmField;
+
+ assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
+ assert(RemainderOffset + ImmField == COffsetVal);
+
+ OffsetVal = ImmField;
+
+ // TODO: Should this try to use a scalar add pseudo if the base address is
+ // uniform and saddr is usable?
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
+
+ SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, N0, Sub0);
+ SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, N0, Sub1);
+
+ SDValue AddOffsetLo
+ = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
+ SDValue AddOffsetHi
+ = getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
+
+ SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
+ SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+
+ SDNode *Add = CurDAG->getMachineNode(
+ AMDGPU::V_ADD_I32_e64, DL, VTs,
+ {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
+
+ SDNode *Addc = CurDAG->getMachineNode(
+ AMDGPU::V_ADDC_U32_e64, DL, VTs,
+ {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
+
+ SDValue RegSequenceArgs[] = {
+ CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
+ SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1
+ };
+
+ Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::i64, RegSequenceArgs), 0);
+ }
+ }
+
+ VAddr = Addr;
+ Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
+ SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
+ return true;
}
bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
@@ -1616,10 +1734,10 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
}
bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
- SDValue Addr,
- SDValue &VAddr,
- SDValue &Offset,
- SDValue &SLC) const {
+ SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
}
@@ -2158,10 +2276,12 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
// offset field) % 64. Some versions of the programming guide omit the m0
// part, or claim it's from offset 0.
if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
- // If we have a constant offset, try to use the default value for m0 as a
- // base to possibly avoid setting it up.
- glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32));
- ImmOffset = ConstOffset->getZExtValue() + 1;
+ // If we have a constant offset, try to use the 0 in m0 as the base.
+ // TODO: Look into changing the default m0 initialization value. If the
+ // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
+ // the immediate offset.
+ glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
+ ImmOffset = ConstOffset->getZExtValue();
} else {
if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
ImmOffset = BaseOffset.getConstantOperandVal(1);
@@ -2182,22 +2302,7 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
glueCopyToM0(N, SDValue(M0Base, 0));
}
- SDValue V0;
SDValue Chain = N->getOperand(0);
- SDValue Glue;
- if (HasVSrc) {
- SDValue VSrc0 = N->getOperand(2);
-
- // The manual doesn't mention this, but it seems only v0 works.
- V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
-
- SDValue CopyToV0 = CurDAG->getCopyToReg(
- N->getOperand(0), SL, V0, VSrc0,
- N->getOperand(N->getNumOperands() - 1));
- Chain = CopyToV0;
- Glue = CopyToV0.getValue(1);
- }
-
SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
// TODO: Can this just be removed from the instruction?
@@ -2206,14 +2311,11 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
const unsigned Opc = gwsIntrinToOpcode(IntrID);
SmallVector<SDValue, 5> Ops;
if (HasVSrc)
- Ops.push_back(V0);
+ Ops.push_back(N->getOperand(2));
Ops.push_back(OffsetField);
Ops.push_back(GDS);
Ops.push_back(Chain);
- if (HasVSrc)
- Ops.push_back(Glue);
-
SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
@@ -2233,6 +2335,28 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
SelectCode(N);
}
+void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned Opcode;
+ switch (IntrID) {
+ case Intrinsic::amdgcn_wqm:
+ Opcode = AMDGPU::WQM;
+ break;
+ case Intrinsic::amdgcn_softwqm:
+ Opcode = AMDGPU::SOFT_WQM;
+ break;
+ case Intrinsic::amdgcn_wwm:
+ Opcode = AMDGPU::WWM;
+ break;
+ default:
+ SelectCode(N);
+ return;
+ }
+
+ SDValue Src = N->getOperand(1);
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
+}
+
void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntrID) {
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 39016ed37193..1115d8c23620 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -12,10 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define AMDGPU_LOG2E_F 1.44269504088896340735992468100189214f
-#define AMDGPU_LN2_F 0.693147180559945309417232121458176568f
-#define AMDGPU_LN10_F 2.30258509299404568401799145468436421f
-
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUCallLowering.h"
@@ -37,82 +33,9 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
-static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State,
- const TargetRegisterClass *RC,
- unsigned NumRegs) {
- ArrayRef<MCPhysReg> RegList = makeArrayRef(RC->begin(), NumRegs);
- unsigned RegResult = State.AllocateReg(RegList);
- if (RegResult == AMDGPU::NoRegister)
- return false;
-
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo));
- return true;
-}
-
-static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- switch (LocVT.SimpleTy) {
- case MVT::i64:
- case MVT::f64:
- case MVT::v2i32:
- case MVT::v2f32:
- case MVT::v4i16:
- case MVT::v4f16: {
- // Up to SGPR0-SGPR105
- return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::SGPR_64RegClass, 53);
- }
- default:
- return false;
- }
-}
-
-// Allocate up to VGPR31.
-//
-// TODO: Since there are no VGPR alignent requirements would it be better to
-// split into individual scalar registers?
-static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- switch (LocVT.SimpleTy) {
- case MVT::i64:
- case MVT::f64:
- case MVT::v2i32:
- case MVT::v2f32:
- case MVT::v4i16:
- case MVT::v4f16: {
- return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::VReg_64RegClass, 31);
- }
- case MVT::v4i32:
- case MVT::v4f32:
- case MVT::v2i64:
- case MVT::v2f64: {
- return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::VReg_128RegClass, 29);
- }
- case MVT::v8i32:
- case MVT::v8f32: {
- return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::VReg_256RegClass, 25);
-
- }
- case MVT::v16i32:
- case MVT::v16f32: {
- return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::VReg_512RegClass, 17);
-
- }
- default:
- return false;
- }
-}
-
#include "AMDGPUGenCallingConv.inc"
// Find a larger type to do a load / store of a vector with.
@@ -208,7 +131,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
}
- for (MVT VT : MVT::integer_vector_valuetypes()) {
+ for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand);
@@ -218,6 +141,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v3i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v3i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v3i16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand);
@@ -225,8 +151,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
@@ -286,8 +215,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
+ setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
+ setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand);
+ setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -571,6 +503,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FABS);
setTargetDAGCombine(ISD::AssertZext);
setTargetDAGCombine(ISD::AssertSext);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
}
//===----------------------------------------------------------------------===//
@@ -630,15 +563,26 @@ static bool hasSourceMods(const SDNode *N) {
case ISD::FREM:
case ISD::INLINEASM:
case ISD::INLINEASM_BR:
- case AMDGPUISD::INTERP_P1:
- case AMDGPUISD::INTERP_P2:
case AMDGPUISD::DIV_SCALE:
+ case ISD::INTRINSIC_W_CHAIN:
// TODO: Should really be looking at the users of the bitcast. These are
// problematic because bitcasts are used to legalize all stores to integer
// types.
case ISD::BITCAST:
return false;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
+ case Intrinsic::amdgcn_interp_p1:
+ case Intrinsic::amdgcn_interp_p2:
+ case Intrinsic::amdgcn_interp_mov:
+ case Intrinsic::amdgcn_interp_p1_f16:
+ case Intrinsic::amdgcn_interp_p2_f16:
+ return false;
+ default:
+ return true;
+ }
+ }
default:
return true;
}
@@ -745,8 +689,9 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
return false;
bool Fast = false;
- return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), CastTy,
- MMO, &Fast) && Fast;
+ return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ CastTy, MMO, &Fast) &&
+ Fast;
}
// SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
@@ -782,9 +727,8 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
break;
case ISD::LOAD:
{
- const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
- if (L->getMemOperand()->getAddrSpace()
- == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
+ if (cast<LoadSDNode>(N)->getMemOperand()->getAddrSpace() ==
+ AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return true;
return false;
}
@@ -1199,9 +1143,9 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::FROUND: return LowerFROUND(Op, DAG);
case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
case ISD::FLOG:
- return LowerFLOG(Op, DAG, 1 / AMDGPU_LOG2E_F);
+ return LowerFLOG(Op, DAG, 1.0F / numbers::log2ef);
case ISD::FLOG10:
- return LowerFLOG(Op, DAG, AMDGPU_LN2_F / AMDGPU_LN10_F);
+ return LowerFLOG(Op, DAG, numbers::ln2f / numbers::ln10f);
case ISD::FEXP:
return lowerFEXP(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
@@ -1236,7 +1180,7 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
}
}
-static bool hasDefinedInitializer(const GlobalValue *GV) {
+bool AMDGPUTargetLowering::hasDefinedInitializer(const GlobalValue *GV) {
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
if (!GVar || !GVar->hasInitializer())
return false;
@@ -2349,30 +2293,13 @@ SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
}
-// Return M_LOG2E of appropriate type
-static SDValue getLog2EVal(SelectionDAG &DAG, const SDLoc &SL, EVT VT) {
- switch (VT.getScalarType().getSimpleVT().SimpleTy) {
- case MVT::f32:
- return DAG.getConstantFP(1.44269504088896340735992468100189214f, SL, VT);
- case MVT::f16:
- return DAG.getConstantFP(
- APFloat(APFloat::IEEEhalf(), "1.44269504088896340735992468100189214"),
- SL, VT);
- case MVT::f64:
- return DAG.getConstantFP(
- APFloat(APFloat::IEEEdouble(), "0x1.71547652b82fep+0"), SL, VT);
- default:
- llvm_unreachable("unsupported fp type");
- }
-}
-
// exp2(M_LOG2E_F * f);
SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
- const SDValue K = getLog2EVal(DAG, SL, VT);
+ const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags());
return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags());
}
@@ -2836,8 +2763,16 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) {
static SDValue simplifyI24(SDNode *Node24,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
- SDValue LHS = Node24->getOperand(0);
- SDValue RHS = Node24->getOperand(1);
+ bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
+
+ SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0);
+ SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1);
+ unsigned NewOpcode = Node24->getOpcode();
+ if (IsIntrin) {
+ unsigned IID = cast<ConstantSDNode>(Node24->getOperand(0))->getZExtValue();
+ NewOpcode = IID == Intrinsic::amdgcn_mul_i24 ?
+ AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
+ }
APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
@@ -2847,7 +2782,7 @@ static SDValue simplifyI24(SDNode *Node24,
SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded);
SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded);
if (DemandedLHS || DemandedRHS)
- return DAG.getNode(Node24->getOpcode(), SDLoc(Node24), Node24->getVTList(),
+ return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(),
DemandedLHS ? DemandedLHS : LHS,
DemandedRHS ? DemandedRHS : RHS);
@@ -2904,54 +2839,6 @@ bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const {
return true;
}
-// Find a load or store from corresponding pattern root.
-// Roots may be build_vector, bitconvert or their combinations.
-static MemSDNode* findMemSDNode(SDNode *N) {
- N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
- if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
- return MN;
- assert(isa<BuildVectorSDNode>(N));
- for (SDValue V : N->op_values())
- if (MemSDNode *MN =
- dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
- return MN;
- llvm_unreachable("cannot find MemSDNode in the pattern!");
-}
-
-bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned,
- SelectionDAG &DAG,
- SDNode *N,
- SDValue Addr,
- SDValue &VAddr,
- SDValue &Offset,
- SDValue &SLC) const {
- const GCNSubtarget &ST =
- DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
- int64_t OffsetVal = 0;
-
- if (ST.hasFlatInstOffsets() &&
- (!ST.hasFlatSegmentOffsetBug() ||
- findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) &&
- DAG.isBaseWithConstantOffset(Addr)) {
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
- int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
-
- const SIInstrInfo *TII = ST.getInstrInfo();
- if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(),
- IsSigned)) {
- Addr = N0;
- OffsetVal = COffsetVal;
- }
- }
-
- VAddr = Addr;
- Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
- SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1);
-
- return true;
-}
-
// Replace load of an illegal type with a store of a bitcast to a friendlier
// type.
SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
@@ -3085,6 +2972,19 @@ SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
return SDValue();
}
+
+SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IID) {
+ case Intrinsic::amdgcn_mul_i24:
+ case Intrinsic::amdgcn_mul_u24:
+ return simplifyI24(N, DCI);
+ default:
+ return SDValue();
+ }
+}
+
/// Split the 64-bit value \p LHS into two 32-bit components, and perform the
/// binary operation \p Opc to it with the corresponding constant operands.
SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
@@ -4173,6 +4073,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AssertZext:
case ISD::AssertSext:
return performAssertSZExtCombine(N, DCI);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return performIntrinsicWOChainCombine(N, DCI);
}
return SDValue();
}
@@ -4203,14 +4105,28 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT);
}
+// This may be called multiple times, and nothing prevents creating multiple
+// objects at the same offset. See if we already defined this object.
+static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size,
+ int64_t Offset) {
+ for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+ if (MFI.getObjectOffset(I) == Offset) {
+ assert(MFI.getObjectSize(I) == Size);
+ return I;
+ }
+ }
+
+ return MFI.CreateFixedObject(Size, Offset, true);
+}
+
SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
EVT VT,
const SDLoc &SL,
int64_t Offset) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset);
- int FI = MFI.CreateFixedObject(VT.getStoreSize(), Offset, true);
auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset);
SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32);
@@ -4260,7 +4176,7 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
const AMDGPUSubtarget &ST =
AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction());
unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction());
- unsigned Alignment = ST.getAlignmentForImplicitArgPtr();
+ const Align Alignment = ST.getAlignmentForImplicitArgPtr();
uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) +
ExplicitArgOffset;
switch (Param) {
@@ -4295,6 +4211,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(SETCC)
NODE_NAME_CASE(SETREG)
+ NODE_NAME_CASE(DENORM_MODE)
NODE_NAME_CASE(FMA_W_CHAIN)
NODE_NAME_CASE(FMUL_W_CHAIN)
NODE_NAME_CASE(CLAMP)
@@ -4377,13 +4294,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(KILL)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
- NODE_NAME_CASE(INIT_EXEC)
- NODE_NAME_CASE(INIT_EXEC_FROM_INPUT)
- NODE_NAME_CASE(SENDMSG)
- NODE_NAME_CASE(SENDMSGHALT)
- NODE_NAME_CASE(INTERP_MOV)
- NODE_NAME_CASE(INTERP_P1)
- NODE_NAME_CASE(INTERP_P2)
NODE_NAME_CASE(INTERP_P1LL_F16)
NODE_NAME_CASE(INTERP_P1LV_F16)
NODE_NAME_CASE(INTERP_P2_F16)
@@ -4428,6 +4338,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUFFER_ATOMIC_AND)
NODE_NAME_CASE(BUFFER_ATOMIC_OR)
NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
+ NODE_NAME_CASE(BUFFER_ATOMIC_INC)
+ NODE_NAME_CASE(BUFFER_ATOMIC_DEC)
NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
NODE_NAME_CASE(BUFFER_ATOMIC_PK_FADD)
@@ -4576,9 +4488,9 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
} else if (SelBits == 0x0c) {
- Known.Zero |= 0xff << I;
+ Known.Zero |= 0xFFull << I;
} else if (SelBits > 0x0c) {
- Known.One |= 0xff << I;
+ Known.One |= 0xFFull << I;
}
Sel >>= 8;
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index fe7ad694943d..dea0d1d4343a 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -38,6 +38,7 @@ private:
public:
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG);
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
+ static bool hasDefinedInitializer(const GlobalValue *GV);
protected:
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -78,6 +79,7 @@ protected:
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
unsigned Opc, SDValue LHS,
@@ -324,10 +326,6 @@ public:
}
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
-
- bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N,
- SDValue Addr, SDValue &VAddr, SDValue &Offset,
- SDValue &SLC) const;
};
namespace AMDGPUISD {
@@ -369,6 +367,9 @@ enum NodeType : unsigned {
// result bit per item in the wavefront.
SETCC,
SETREG,
+
+ DENORM_MODE,
+
// FP ops with input and output chain.
FMA_W_CHAIN,
FMUL_W_CHAIN,
@@ -475,13 +476,6 @@ enum NodeType : unsigned {
BUILD_VERTICAL_VECTOR,
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
- INIT_EXEC,
- INIT_EXEC_FROM_INPUT,
- SENDMSG,
- SENDMSGHALT,
- INTERP_MOV,
- INTERP_P1,
- INTERP_P2,
INTERP_P1LL_F16,
INTERP_P1LV_F16,
INTERP_P2_F16,
@@ -532,6 +526,8 @@ enum NodeType : unsigned {
BUFFER_ATOMIC_AND,
BUFFER_ATOMIC_OR,
BUFFER_ATOMIC_XOR,
+ BUFFER_ATOMIC_INC,
+ BUFFER_ATOMIC_DEC,
BUFFER_ATOMIC_CMPSWAP,
BUFFER_ATOMIC_FADD,
BUFFER_ATOMIC_PK_FADD,
diff --git a/lib/Target/AMDGPU/AMDGPUInline.cpp b/lib/Target/AMDGPU/AMDGPUInline.cpp
index f4df20b8f03e..a83ec23ec054 100644
--- a/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -51,7 +51,7 @@ ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
// Inliner constraint to achieve reasonable compilation time
static cl::opt<size_t>
-MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300),
+MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100),
cl::desc("Maximum BB number allowed in a function after inlining"
" (compile time constraint)"));
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 4a8446955496..cf0ce5659951 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -110,39 +110,38 @@ def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
// Force dependencies for vector trunc stores
def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
-def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
-def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
-
+def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
+def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
// out = a - floor(a)
-def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
+def AMDGPUfract_impl : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
// out = 1.0 / a
-def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
+def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a)
-def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
+def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a)
-def AMDGPUrcp_legacy : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>;
-def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
+def AMDGPUrsq_legacy_impl : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
+def AMDGPUrcp_legacy_impl : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>;
def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
-def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
+def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
-def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
+def AMDGPUldexp_impl : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
-def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
-def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
-def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
-def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>;
-def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>;
+def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
+def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
+def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
+def AMDGPUpk_i16_i32_impl : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>;
+def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>;
def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>;
-def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;
+def AMDGPUfp_class_impl : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;
// out = max(a, b) a and b are floats, where a nan comparison fails.
// This is not commutative because this gives the second operand:
@@ -285,7 +284,7 @@ def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;
-def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>;
+def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>;
def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>;
@@ -320,7 +319,7 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp,
[]
>;
-def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
+def AMDGPUfmed3_impl : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
@@ -330,35 +329,6 @@ def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
-def AMDGPUinit_exec : SDNode<"AMDGPUISD::INIT_EXEC",
- SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPHasChain, SDNPInGlue]>;
-
-def AMDGPUinit_exec_from_input : SDNode<"AMDGPUISD::INIT_EXEC_FROM_INPUT",
- SDTypeProfile<0, 2,
- [SDTCisInt<0>, SDTCisInt<1>]>,
- [SDNPHasChain, SDNPInGlue]>;
-
-def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
- SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPHasChain, SDNPInGlue]>;
-
-def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT",
- SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPHasChain, SDNPInGlue]>;
-
-def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
- SDTypeProfile<1, 3, [SDTCisFP<0>]>,
- [SDNPInGlue]>;
-
-def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
- SDTypeProfile<1, 3, [SDTCisFP<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-
-def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
- SDTypeProfile<1, 4, [SDTCisFP<0>]>,
- [SDNPInGlue]>;
-
def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16",
SDTypeProfile<1, 7, [SDTCisFP<0>]>,
[SDNPInGlue, SDNPOutGlue]>;
@@ -425,3 +395,65 @@ def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
+
+
+//===----------------------------------------------------------------------===//
+// Intrinsic/Custom node compatability PatFrags
+//===----------------------------------------------------------------------===//
+
+def AMDGPUrcp : PatFrags<(ops node:$src), [(int_amdgcn_rcp node:$src),
+ (AMDGPUrcp_impl node:$src)]>;
+def AMDGPUrcp_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rcp_legacy node:$src),
+ (AMDGPUrcp_legacy_impl node:$src)]>;
+
+def AMDGPUrsq_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rsq_legacy node:$src),
+ (AMDGPUrsq_legacy_impl node:$src)]>;
+
+def AMDGPUrsq : PatFrags<(ops node:$src), [(int_amdgcn_rsq node:$src),
+ (AMDGPUrsq_impl node:$src)]>;
+
+def AMDGPUrsq_clamp : PatFrags<(ops node:$src), [(int_amdgcn_rsq_clamp node:$src),
+ (AMDGPUrsq_clamp_impl node:$src)]>;
+
+def AMDGPUsin : PatFrags<(ops node:$src), [(int_amdgcn_sin node:$src),
+ (AMDGPUsin_impl node:$src)]>;
+def AMDGPUcos : PatFrags<(ops node:$src), [(int_amdgcn_cos node:$src),
+ (AMDGPUcos_impl node:$src)]>;
+def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src),
+ (AMDGPUfract_impl node:$src)]>;
+
+def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_ldexp node:$src0, node:$src1),
+ (AMDGPUldexp_impl node:$src0, node:$src1)]>;
+
+def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_class node:$src0, node:$src1),
+ (AMDGPUfp_class_impl node:$src0, node:$src1)]>;
+
+def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2),
+ (AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>;
+
+def AMDGPUffbh_i32 : PatFrags<(ops node:$src),
+ [(int_amdgcn_sffbh node:$src),
+ (AMDGPUffbh_i32_impl node:$src)]>;
+
+def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_cvt_pkrtz node:$src0, node:$src1),
+ (AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>;
+
+def AMDGPUpknorm_i16_f32 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_cvt_pknorm_i16 node:$src0, node:$src1),
+ (AMDGPUpknorm_i16_f32_impl node:$src0, node:$src1)]>;
+
+def AMDGPUpknorm_u16_f32 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_cvt_pknorm_u16 node:$src0, node:$src1),
+ (AMDGPUpknorm_u16_f32_impl node:$src0, node:$src1)]>;
+
+def AMDGPUpk_i16_i32 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_cvt_pk_i16 node:$src0, node:$src1),
+ (AMDGPUpk_i16_i32_impl node:$src0, node:$src1)]>;
+
+def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_cvt_pk_u16 node:$src0, node:$src1),
+ (AMDGPUpk_u16_u32_impl node:$src0, node:$src1)]>;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 901a2eaa8829..3cfa9d57ec46 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -19,8 +19,10 @@
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -61,8 +63,14 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
+void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits &KB,
+ CodeGenCoverage &CoverageInfo) {
+ MRI = &MF.getRegInfo();
+ InstructionSelector::setupMF(MF, KB, CoverageInfo);
+}
+
static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return Reg == AMDGPU::SCC;
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
@@ -71,7 +79,9 @@ static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
if (RC) {
// FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
// context of the register bank has been lost.
- if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
+ // Has a hack getRegClassForSizeOnBank uses exactly SGPR_32RegClass, which
+ // won't ever beconstrained any further.
+ if (RC != &AMDGPU::SGPR_32RegClass)
return false;
const LLT Ty = MRI.getType(Reg);
return Ty.isValid() && Ty.getSizeInBits() == 1;
@@ -83,7 +93,7 @@ static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
bool AMDGPUInstructionSelector::isVCC(Register Reg,
const MachineRegisterInfo &MRI) const {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return Reg == TRI.getVCC();
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
@@ -102,8 +112,6 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg,
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
const DebugLoc &DL = I.getDebugLoc();
MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
I.setDesc(TII.get(TargetOpcode::COPY));
const MachineOperand &Src = I.getOperand(1);
@@ -111,33 +119,33 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
Register DstReg = Dst.getReg();
Register SrcReg = Src.getReg();
- if (isVCC(DstReg, MRI)) {
+ if (isVCC(DstReg, *MRI)) {
if (SrcReg == AMDGPU::SCC) {
const TargetRegisterClass *RC
- = TRI.getConstrainedRegClassForOperand(Dst, MRI);
+ = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
if (!RC)
return true;
- return RBI.constrainGenericRegister(DstReg, *RC, MRI);
+ return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
}
- if (!isVCC(SrcReg, MRI)) {
+ if (!isVCC(SrcReg, *MRI)) {
// TODO: Should probably leave the copy and let copyPhysReg expand it.
- if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
+ if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
return false;
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
.addImm(0)
.addReg(SrcReg);
- if (!MRI.getRegClassOrNull(SrcReg))
- MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
+ if (!MRI->getRegClassOrNull(SrcReg))
+ MRI->setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, *MRI));
I.eraseFromParent();
return true;
}
const TargetRegisterClass *RC =
- TRI.getConstrainedRegClassForOperand(Dst, MRI);
- if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI))
+ TRI.getConstrainedRegClassForOperand(Dst, *MRI);
+ if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
return false;
// Don't constrain the source register to a class so the def instruction
@@ -148,8 +156,8 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
// with size 1. An SReg_32 with size 1 is ambiguous with wave32.
if (Src.isUndef()) {
const TargetRegisterClass *SrcRC =
- TRI.getConstrainedRegClassForOperand(Src, MRI);
- if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
+ TRI.getConstrainedRegClassForOperand(Src, *MRI);
+ if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
return false;
}
@@ -157,30 +165,26 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
}
for (const MachineOperand &MO : I.operands()) {
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ if (Register::isPhysicalRegister(MO.getReg()))
continue;
const TargetRegisterClass *RC =
- TRI.getConstrainedRegClassForOperand(MO, MRI);
+ TRI.getConstrainedRegClassForOperand(MO, *MRI);
if (!RC)
continue;
- RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
+ RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
}
return true;
}
bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
const Register DefReg = I.getOperand(0).getReg();
- const LLT DefTy = MRI.getType(DefReg);
+ const LLT DefTy = MRI->getType(DefReg);
// TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
const RegClassOrRegBank &RegClassOrBank =
- MRI.getRegClassOrRegBank(DefReg);
+ MRI->getRegClassOrRegBank(DefReg);
const TargetRegisterClass *DefRC
= RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
@@ -196,7 +200,7 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
return false;
}
- DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
+ DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, *MRI);
if (!DefRC) {
LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
return false;
@@ -204,7 +208,7 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
}
I.setDesc(TII.get(TargetOpcode::PHI));
- return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
+ return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
}
MachineOperand
@@ -214,13 +218,11 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
MachineInstr *MI = MO.getParent();
MachineBasicBlock *BB = MO.getParent()->getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- Register DstReg = MRI.createVirtualRegister(&SubRC);
+ Register DstReg = MRI->createVirtualRegister(&SubRC);
if (MO.isReg()) {
unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
.addReg(Reg, 0, ComposedSubIdx);
@@ -244,10 +246,6 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
}
}
-static int64_t getConstant(const MachineInstr *MI) {
- return MI->getOperand(1).getCImm()->getSExtValue();
-}
-
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
switch (Opc) {
case AMDGPU::G_AND:
@@ -262,16 +260,13 @@ static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
}
bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
MachineOperand &Dst = I.getOperand(0);
MachineOperand &Src0 = I.getOperand(1);
MachineOperand &Src1 = I.getOperand(2);
Register DstReg = Dst.getReg();
- unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+ unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
- const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
if (DstRB->getID() == AMDGPU::VCCRegBankID) {
const TargetRegisterClass *RC = TRI.getBoolRC();
unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
@@ -282,12 +277,12 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
// The selector for G_ICMP relies on seeing the register bank for the result
// is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
// be ambiguous whether it's a scalar or vector bool.
- if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg()))
- MRI.setRegClass(Src0.getReg(), RC);
- if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg()))
- MRI.setRegClass(Src1.getReg(), RC);
+ if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg()))
+ MRI->setRegClass(Src0.getReg(), RC);
+ if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg()))
+ MRI->setRegClass(Src1.getReg(), RC);
- return RBI.constrainGenericRegister(DstReg, *RC, MRI);
+ return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
}
// TODO: Should this allow an SCC bank result, and produce a copy from SCC for
@@ -295,14 +290,7 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
I.setDesc(TII.get(InstOpc));
-
- const TargetRegisterClass *RC
- = TRI.getConstrainedRegClassForOperand(Dst, MRI);
- if (!RC)
- return false;
- return RBI.constrainGenericRegister(DstReg, *RC, MRI) &&
- RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) &&
- RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI);
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
return false;
@@ -311,11 +299,10 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
Register DstReg = I.getOperand(0).getReg();
const DebugLoc &DL = I.getDebugLoc();
- unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
- const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+ unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
@@ -340,7 +327,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
- Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
+ Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
MachineInstr *Add
= BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
.addDef(UnusedCarry, RegState::Dead)
@@ -363,8 +350,8 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
- Register DstLo = MRI.createVirtualRegister(&HalfRC);
- Register DstHi = MRI.createVirtualRegister(&HalfRC);
+ Register DstLo = MRI->createVirtualRegister(&HalfRC);
+ Register DstHi = MRI->createVirtualRegister(&HalfRC);
if (IsSALU) {
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
@@ -375,14 +362,14 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
.add(Hi2);
} else {
const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
- Register CarryReg = MRI.createVirtualRegister(CarryRC);
+ Register CarryReg = MRI->createVirtualRegister(CarryRC);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
.addDef(CarryReg)
.add(Lo1)
.add(Lo2)
.addImm(0);
MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
- .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
+ .addDef(MRI->createVirtualRegister(CarryRC), RegState::Dead)
.add(Hi1)
.add(Hi2)
.addReg(CarryReg, RegState::Kill)
@@ -399,19 +386,61 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
.addImm(AMDGPU::sub1);
- if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
+ if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
return false;
I.eraseFromParent();
return true;
}
-bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- assert(I.getOperand(2).getImm() % 32 == 0);
- unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
+ const DebugLoc &DL = I.getDebugLoc();
+ Register Dst0Reg = I.getOperand(0).getReg();
+ Register Dst1Reg = I.getOperand(1).getReg();
+ const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO;
+
+ if (!isSCC(Dst1Reg, MRI)) {
+ // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
+ // carry out despite the _i32 name. These were renamed in VI to _U32.
+ // FIXME: We should probably rename the opcodes here.
+ unsigned NewOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+ I.setDesc(TII.get(NewOpc));
+ I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ I.addOperand(*MF, MachineOperand::CreateImm(0));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
+ Register Src0Reg = I.getOperand(2).getReg();
+ Register Src1Reg = I.getOperand(3).getReg();
+ unsigned NewOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
+ BuildMI(*BB, &I, DL, TII.get(NewOpc), Dst0Reg)
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
+ .addReg(AMDGPU::SCC);
+
+ if (!MRI.getRegClassOrNull(Dst1Reg))
+ MRI.setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
+
+ if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, MRI) ||
+ !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, MRI) ||
+ !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, MRI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ unsigned Offset = I.getOperand(2).getImm();
+ if (Offset % 32 != 0)
+ return false;
+
+ unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32);
const DebugLoc &DL = I.getDebugLoc();
MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
I.getOperand(0).getReg())
@@ -419,10 +448,10 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
for (const MachineOperand &MO : Copy->operands()) {
const TargetRegisterClass *RC =
- TRI.getConstrainedRegClassForOperand(MO, MRI);
+ TRI.getConstrainedRegClassForOperand(MO, *MRI);
if (!RC)
continue;
- RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
+ RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
}
I.eraseFromParent();
return true;
@@ -430,21 +459,19 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
MachineBasicBlock *BB = MI.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ LLT DstTy = MRI->getType(DstReg);
+ LLT SrcTy = MRI->getType(MI.getOperand(1).getReg());
const unsigned SrcSize = SrcTy.getSizeInBits();
if (SrcSize < 32)
return false;
const DebugLoc &DL = MI.getDebugLoc();
- const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
+ const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
const unsigned DstSize = DstTy.getSizeInBits();
const TargetRegisterClass *DstRC =
- TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
+ TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
if (!DstRC)
return false;
@@ -457,12 +484,12 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
MIB.addImm(SubRegs[I]);
const TargetRegisterClass *SrcRC
- = TRI.getConstrainedRegClassForOperand(Src, MRI);
- if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
+ = TRI.getConstrainedRegClassForOperand(Src, *MRI);
+ if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
return false;
}
- if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
return false;
MI.eraseFromParent();
@@ -471,25 +498,23 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
MachineBasicBlock *BB = MI.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
const int NumDst = MI.getNumOperands() - 1;
MachineOperand &Src = MI.getOperand(NumDst);
Register SrcReg = Src.getReg();
Register DstReg0 = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg0);
- LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI->getType(DstReg0);
+ LLT SrcTy = MRI->getType(SrcReg);
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
const DebugLoc &DL = MI.getDebugLoc();
- const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
+ const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
const TargetRegisterClass *SrcRC =
- TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
- if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
+ TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI);
+ if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
return false;
const unsigned SrcFlags = getUndefRegState(Src.isUndef());
@@ -504,8 +529,8 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
.addReg(SrcReg, SrcFlags, SubRegs[I]);
const TargetRegisterClass *DstRC =
- TRI.getConstrainedRegClassForOperand(Dst, MRI);
- if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
+ TRI.getConstrainedRegClassForOperand(Dst, *MRI);
+ if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
return false;
}
@@ -518,16 +543,13 @@ bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
}
bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
const MachineOperand &MO = I.getOperand(0);
// FIXME: Interface for getConstrainedRegClassForOperand needs work. The
// regbank check here is to know why getConstrainedRegClassForOperand failed.
- const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
- if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
- (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
+ const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
+ if ((!RC && !MRI->getRegBankOrNull(MO.getReg())) ||
+ (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI))) {
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
return true;
}
@@ -537,44 +559,62 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
- DebugLoc DL = I.getDebugLoc();
- MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
- .addDef(I.getOperand(0).getReg())
- .addReg(I.getOperand(1).getReg())
- .addReg(I.getOperand(2).getReg())
- .addImm(SubReg);
-
- for (const MachineOperand &MO : Ins->operands()) {
- if (!MO.isReg())
- continue;
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
- continue;
- const TargetRegisterClass *RC =
- TRI.getConstrainedRegClassForOperand(MO, MRI);
- if (!RC)
- continue;
- RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
- }
+ Register DstReg = I.getOperand(0).getReg();
+ Register Src0Reg = I.getOperand(1).getReg();
+ Register Src1Reg = I.getOperand(2).getReg();
+ LLT Src1Ty = MRI->getType(Src1Reg);
+
+ unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
+ unsigned InsSize = Src1Ty.getSizeInBits();
+
+ int64_t Offset = I.getOperand(3).getImm();
+ if (Offset % 32 != 0)
+ return false;
+
+ unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32);
+ if (SubReg == AMDGPU::NoSubRegister)
+ return false;
+
+ const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
+ const TargetRegisterClass *DstRC =
+ TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
+ if (!DstRC)
+ return false;
+
+ const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
+ const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
+ const TargetRegisterClass *Src0RC =
+ TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI);
+ const TargetRegisterClass *Src1RC =
+ TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI);
+
+ // Deal with weird cases where the class only partially supports the subreg
+ // index.
+ Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
+ if (!Src0RC)
+ return false;
+
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
+ !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
+ !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
+ return false;
+
+ const DebugLoc &DL = I.getDebugLoc();
+ BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
+ .addReg(Src0Reg)
+ .addReg(Src1Reg)
+ .addImm(SubReg);
+
I.eraseFromParent();
return true;
}
-bool AMDGPUInstructionSelector::selectG_INTRINSIC(
- MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
- unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
+bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
+ unsigned IntrinsicID = I.getIntrinsicID();
switch (IntrinsicID) {
- case Intrinsic::maxnum:
- case Intrinsic::minnum:
- case Intrinsic::amdgcn_cvt_pkrtz:
- return selectImpl(I, CoverageInfo);
case Intrinsic::amdgcn_if_break: {
MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
@@ -589,15 +629,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(
I.eraseFromParent();
- for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
- if (!MRI.getRegClassOrNull(Reg))
- MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
- }
+ for (Register Reg : { DstReg, Src0Reg, Src1Reg })
+ MRI->setRegClass(Reg, TRI.getWaveMaskRegClass());
return true;
}
default:
- return selectImpl(I, CoverageInfo);
+ return selectImpl(I, *CoverageInfo);
}
}
@@ -677,17 +715,15 @@ int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
const DebugLoc &DL = I.getDebugLoc();
- unsigned SrcReg = I.getOperand(2).getReg();
- unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
+ Register SrcReg = I.getOperand(2).getReg();
+ unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
- unsigned CCReg = I.getOperand(0).getReg();
- if (isSCC(CCReg, MRI)) {
+ Register CCReg = I.getOperand(0).getReg();
+ if (isSCC(CCReg, *MRI)) {
int Opcode = getS_CMPOpcode(Pred, Size);
if (Opcode == -1)
return false;
@@ -698,7 +734,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
.addReg(AMDGPU::SCC);
bool Ret =
constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
- RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
+ RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
I.eraseFromParent();
return Ret;
}
@@ -712,7 +748,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
.add(I.getOperand(2))
.add(I.getOperand(3));
RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
- *TRI.getBoolRC(), MRI);
+ *TRI.getBoolRC(), *MRI);
bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
I.eraseFromParent();
return Ret;
@@ -736,19 +772,273 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
.addImm(Enabled);
}
+static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
+ int64_t C;
+ if (mi_match(Reg, MRI, m_ICst(C)) && C == 0)
+ return true;
+
+ // FIXME: matcher should ignore copies
+ return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0;
+}
+
+static unsigned extractGLC(unsigned AuxiliaryData) {
+ return AuxiliaryData & 1;
+}
+
+static unsigned extractSLC(unsigned AuxiliaryData) {
+ return (AuxiliaryData >> 1) & 1;
+}
+
+static unsigned extractDLC(unsigned AuxiliaryData) {
+ return (AuxiliaryData >> 2) & 1;
+}
+
+static unsigned extractSWZ(unsigned AuxiliaryData) {
+ return (AuxiliaryData >> 3) & 1;
+}
+
+// Returns Base register, constant offset, and offset def point.
+static std::tuple<Register, unsigned, MachineInstr *>
+getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
+ MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+ if (!Def)
+ return std::make_tuple(Reg, 0, nullptr);
+
+ if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
+ unsigned Offset;
+ const MachineOperand &Op = Def->getOperand(1);
+ if (Op.isImm())
+ Offset = Op.getImm();
+ else
+ Offset = Op.getCImm()->getZExtValue();
+
+ return std::make_tuple(Register(), Offset, Def);
+ }
+
+ int64_t Offset;
+ if (Def->getOpcode() == AMDGPU::G_ADD) {
+ // TODO: Handle G_OR used for add case
+ if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset)))
+ return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def);
+
+ // FIXME: matcher should ignore copies
+ if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset))))
+ return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def);
+ }
+
+ return std::make_tuple(Reg, 0, Def);
+}
+
+static unsigned getBufferStoreOpcode(LLT Ty,
+ const unsigned MemSize,
+ const bool Offen) {
+ const int Size = Ty.getSizeInBits();
+ switch (8 * MemSize) {
+ case 8:
+ return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
+ case 16:
+ return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
+ default:
+ unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
+ if (Size > 32)
+ Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
+ return Opc;
+ }
+}
+
+static unsigned getBufferStoreFormatOpcode(LLT Ty,
+ const unsigned MemSize,
+ const bool Offen) {
+ bool IsD16Packed = Ty.getScalarSizeInBits() == 16;
+ bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits();
+ int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
+
+ if (IsD16Packed) {
+ switch (NumElts) {
+ case 1:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
+ case 2:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact;
+ case 3:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact;
+ case 4:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact;
+ default:
+ return -1;
+ }
+ }
+
+ if (IsD16Unpacked) {
+ switch (NumElts) {
+ case 1:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
+ case 2:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact;
+ case 3:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact;
+ case 4:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact;
+ default:
+ return -1;
+ }
+ }
+
+ switch (NumElts) {
+ case 1:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact;
+ case 2:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact;
+ case 3:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact;
+ case 4:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact;
+ default:
+ return -1;
+ }
+
+ llvm_unreachable("unhandled buffer store");
+}
+
+// TODO: Move this to combiner
+// Returns base register, imm offset, total constant offset.
+std::tuple<Register, unsigned, unsigned>
+AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B,
+ Register OrigOffset) const {
+ const unsigned MaxImm = 4095;
+ Register BaseReg;
+ unsigned TotalConstOffset;
+ MachineInstr *OffsetDef;
+
+ std::tie(BaseReg, TotalConstOffset, OffsetDef)
+ = getBaseWithConstantOffset(*MRI, OrigOffset);
+
+ unsigned ImmOffset = TotalConstOffset;
+
+ // If the immediate value is too big for the immoffset field, put the value
+ // and -4096 into the immoffset field so that the value that is copied/added
+ // for the voffset field is a multiple of 4096, and it stands more chance
+ // of being CSEd with the copy/add for another similar load/store.f
+ // However, do not do that rounding down to a multiple of 4096 if that is a
+ // negative number, as it appears to be illegal to have a negative offset
+ // in the vgpr, even if adding the immediate offset makes it positive.
+ unsigned Overflow = ImmOffset & ~MaxImm;
+ ImmOffset -= Overflow;
+ if ((int32_t)Overflow < 0) {
+ Overflow += ImmOffset;
+ ImmOffset = 0;
+ }
+
+ if (Overflow != 0) {
+ // In case this is in a waterfall loop, insert offset code at the def point
+ // of the offset, not inside the loop.
+ MachineBasicBlock::iterator OldInsPt = B.getInsertPt();
+ MachineBasicBlock &OldMBB = B.getMBB();
+ B.setInstr(*OffsetDef);
+
+ if (!BaseReg) {
+ BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ B.buildInstr(AMDGPU::V_MOV_B32_e32)
+ .addDef(BaseReg)
+ .addImm(Overflow);
+ } else {
+ Register OverflowVal = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ B.buildInstr(AMDGPU::V_MOV_B32_e32)
+ .addDef(OverflowVal)
+ .addImm(Overflow);
+
+ Register NewBaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg)
+ .addReg(BaseReg)
+ .addReg(OverflowVal, RegState::Kill)
+ .addImm(0);
+ BaseReg = NewBaseReg;
+ }
+
+ B.setInsertPt(OldMBB, OldInsPt);
+ }
+
+ return std::make_tuple(BaseReg, ImmOffset, TotalConstOffset);
+}
+
+bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
+ bool IsFormat) const {
+ MachineIRBuilder B(MI);
+ MachineFunction &MF = B.getMF();
+ Register VData = MI.getOperand(1).getReg();
+ LLT Ty = MRI->getType(VData);
+
+ int Size = Ty.getSizeInBits();
+ if (Size % 32 != 0)
+ return false;
+
+ // FIXME: Verifier should enforce 1 MMO for these intrinsics.
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ const int MemSize = MMO->getSize();
+
+ Register RSrc = MI.getOperand(2).getReg();
+ Register VOffset = MI.getOperand(3).getReg();
+ Register SOffset = MI.getOperand(4).getReg();
+ unsigned AuxiliaryData = MI.getOperand(5).getImm();
+ unsigned ImmOffset;
+ unsigned TotalOffset;
+
+ std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
+ if (TotalOffset != 0)
+ MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize);
+
+ const bool Offen = !isZero(VOffset, *MRI);
+
+ int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) :
+ getBufferStoreOpcode(Ty, MemSize, Offen);
+ if (Opc == -1)
+ return false;
+
+ MachineInstrBuilder MIB = B.buildInstr(Opc)
+ .addUse(VData);
+
+ if (Offen)
+ MIB.addUse(VOffset);
+
+ MIB.addUse(RSrc)
+ .addUse(SOffset)
+ .addImm(ImmOffset)
+ .addImm(extractGLC(AuxiliaryData))
+ .addImm(extractSLC(AuxiliaryData))
+ .addImm(0) // tfe: FIXME: Remove from inst
+ .addImm(extractDLC(AuxiliaryData))
+ .addImm(extractSWZ(AuxiliaryData))
+ .addMemOperand(MMO);
+
+ MI.eraseFromParent();
+
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
- MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
+ MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
+ unsigned IntrinsicID = I.getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_exp: {
- int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
- int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
- int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
- int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
+ int64_t Tgt = I.getOperand(1).getImm();
+ int64_t Enabled = I.getOperand(2).getImm();
+ int64_t Done = I.getOperand(7).getImm();
+ int64_t VM = I.getOperand(8).getImm();
MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
I.getOperand(4).getReg(),
@@ -761,13 +1051,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
}
case Intrinsic::amdgcn_exp_compr: {
const DebugLoc &DL = I.getDebugLoc();
- int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
- int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
- unsigned Reg0 = I.getOperand(3).getReg();
- unsigned Reg1 = I.getOperand(4).getReg();
- unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
- int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
+ int64_t Tgt = I.getOperand(1).getImm();
+ int64_t Enabled = I.getOperand(2).getImm();
+ Register Reg0 = I.getOperand(3).getReg();
+ Register Reg1 = I.getOperand(4).getReg();
+ Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ int64_t Done = I.getOperand(5).getImm();
+ int64_t VM = I.getOperand(6).getImm();
BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
@@ -786,27 +1076,29 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
Register Reg = I.getOperand(1).getReg();
I.eraseFromParent();
- if (!MRI.getRegClassOrNull(Reg))
- MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
+ if (!MRI->getRegClassOrNull(Reg))
+ MRI->setRegClass(Reg, TRI.getWaveMaskRegClass());
return true;
}
+ case Intrinsic::amdgcn_raw_buffer_store:
+ return selectStoreIntrinsic(I, false);
+ case Intrinsic::amdgcn_raw_buffer_store_format:
+ return selectStoreIntrinsic(I, true);
default:
- return selectImpl(I, CoverageInfo);
+ return selectImpl(I, *CoverageInfo);
}
}
bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
const DebugLoc &DL = I.getDebugLoc();
- unsigned DstReg = I.getOperand(0).getReg();
- unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+ Register DstReg = I.getOperand(0).getReg();
+ unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
assert(Size <= 32 || Size == 64);
const MachineOperand &CCOp = I.getOperand(1);
- unsigned CCReg = CCOp.getReg();
- if (isSCC(CCReg, MRI)) {
+ Register CCReg = CCOp.getReg();
+ if (isSCC(CCReg, *MRI)) {
unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
AMDGPU::S_CSELECT_B32;
MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
@@ -815,8 +1107,8 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
// The generic constrainSelectedInstRegOperands doesn't work for the scc register
// bank, because it does not cover the register class that we used to represent
// for it. So we need to manually set the register class here.
- if (!MRI.getRegClassOrNull(CCReg))
- MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
+ if (!MRI->getRegClassOrNull(CCReg))
+ MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
.add(I.getOperand(2))
.add(I.getOperand(3));
@@ -845,52 +1137,8 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
}
bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- DebugLoc DL = I.getDebugLoc();
- unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
- if (PtrSize != 64) {
- LLVM_DEBUG(dbgs() << "Unhandled address space\n");
- return false;
- }
-
- unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
- unsigned Opcode;
-
- // FIXME: Remove this when integers > s32 naturally selected.
- switch (StoreSize) {
- default:
- return false;
- case 32:
- Opcode = AMDGPU::FLAT_STORE_DWORD;
- break;
- case 64:
- Opcode = AMDGPU::FLAT_STORE_DWORDX2;
- break;
- case 96:
- Opcode = AMDGPU::FLAT_STORE_DWORDX3;
- break;
- case 128:
- Opcode = AMDGPU::FLAT_STORE_DWORDX4;
- break;
- }
-
- MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
- .add(I.getOperand(1))
- .add(I.getOperand(0))
- .addImm(0) // offset
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0); // dlc
-
-
- // Now that we selected an opcode, we need to constrain the register
- // operands to use appropriate classes.
- bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
-
- I.eraseFromParent();
- return Ret;
+ initM0(I);
+ return selectImpl(I, *CoverageInfo);
}
static int sizeToSubRegIndex(unsigned Size) {
@@ -915,19 +1163,15 @@ static int sizeToSubRegIndex(unsigned Size) {
}
bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- unsigned DstReg = I.getOperand(0).getReg();
- unsigned SrcReg = I.getOperand(1).getReg();
- const LLT DstTy = MRI.getType(DstReg);
- const LLT SrcTy = MRI.getType(SrcReg);
+ Register DstReg = I.getOperand(0).getReg();
+ Register SrcReg = I.getOperand(1).getReg();
+ const LLT DstTy = MRI->getType(DstReg);
+ const LLT SrcTy = MRI->getType(SrcReg);
if (!DstTy.isScalar())
return false;
- const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
- const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
if (SrcRB != DstRB)
return false;
@@ -935,9 +1179,9 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
unsigned SrcSize = SrcTy.getSizeInBits();
const TargetRegisterClass *SrcRC
- = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
+ = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, *MRI);
const TargetRegisterClass *DstRC
- = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
+ = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, *MRI);
if (SrcSize > 32) {
int SubRegIdx = sizeToSubRegIndex(DstSize);
@@ -953,8 +1197,8 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
I.getOperand(1).setSubReg(SubRegIdx);
}
- if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
- !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
return false;
}
@@ -974,20 +1218,18 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
const DebugLoc &DL = I.getDebugLoc();
MachineBasicBlock &MBB = *I.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned SrcReg = I.getOperand(1).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register SrcReg = I.getOperand(1).getReg();
- const LLT DstTy = MRI.getType(DstReg);
- const LLT SrcTy = MRI.getType(SrcReg);
+ const LLT DstTy = MRI->getType(DstReg);
+ const LLT SrcTy = MRI->getType(SrcReg);
const LLT S1 = LLT::scalar(1);
const unsigned SrcSize = SrcTy.getSizeInBits();
const unsigned DstSize = DstTy.getSizeInBits();
if (!DstTy.isScalar())
return false;
- const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
+ const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
if (SrcTy != S1 || DstSize > 64) // Invalid
@@ -1000,7 +1242,7 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
// FIXME: Create an extra copy to avoid incorrectly constraining the result
// of the scc producer.
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
.addReg(SrcReg);
BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
@@ -1010,7 +1252,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
.addImm(0)
.addImm(Signed ? -1 : 1);
- return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI);
}
if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
@@ -1024,6 +1267,7 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
.addImm(0) // src1_modifiers
.addImm(Signed ? -1 : 1) // src1
.addUse(SrcReg);
+ I.eraseFromParent();
return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
}
@@ -1040,6 +1284,7 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
.addImm(Mask)
.addReg(SrcReg);
+ I.eraseFromParent();
return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
}
@@ -1049,11 +1294,12 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
.addReg(SrcReg)
.addImm(0) // Offset
.addImm(SrcSize); // Width
+ I.eraseFromParent();
return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
}
if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
- if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
+ if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
return false;
if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
@@ -1061,7 +1307,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
.addReg(SrcReg);
- return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
}
const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
@@ -1070,10 +1317,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
// Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
if (DstSize > 32 && SrcSize <= 32) {
// We need a 64-bit register source, but the high bits don't matter.
- unsigned ExtReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned UndefReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
.addReg(SrcReg)
@@ -1085,7 +1330,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
.addReg(ExtReg)
.addImm(SrcSize << 16);
- return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
}
unsigned Mask;
@@ -1099,16 +1345,58 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
.addImm(SrcSize << 16);
}
- return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
}
return false;
}
+static int64_t getFPTrueImmVal(unsigned Size, bool Signed) {
+ switch (Size) {
+ case 16:
+ return Signed ? 0xBC00 : 0x3C00;
+ case 32:
+ return Signed ? 0xbf800000 : 0x3f800000;
+ case 64:
+ return Signed ? 0xbff0000000000000 : 0x3ff0000000000000;
+ default:
+ llvm_unreachable("Invalid FP type size");
+ }
+}
+
+bool AMDGPUInstructionSelector::selectG_SITOFP_UITOFP(MachineInstr &I) const {
+ MachineBasicBlock *MBB = I.getParent();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ Register Src = I.getOperand(1).getReg();
+ if (!isSCC(Src, MRI))
+ return selectImpl(I, *CoverageInfo);
+
+ bool Signed = I.getOpcode() == AMDGPU::G_SITOFP;
+ Register DstReg = I.getOperand(0).getReg();
+ const LLT DstTy = MRI.getType(DstReg);
+ const unsigned DstSize = DstTy.getSizeInBits();
+ const DebugLoc &DL = I.getDebugLoc();
+
+ BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
+ .addReg(Src);
+
+ unsigned NewOpc =
+ DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
+ auto MIB = BuildMI(*MBB, I, DL, TII.get(NewOpc), DstReg)
+ .addImm(0)
+ .addImm(getFPTrueImmVal(DstSize, Signed));
+
+ if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
MachineOperand &ImmOp = I.getOperand(1);
// The AMDGPU backend only supports Imm operands and not CImm or FPImm.
@@ -1119,15 +1407,15 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
}
- unsigned DstReg = I.getOperand(0).getReg();
+ Register DstReg = I.getOperand(0).getReg();
unsigned Size;
bool IsSgpr;
- const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
+ const RegisterBank *RB = MRI->getRegBankOrNull(I.getOperand(0).getReg());
if (RB) {
IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
- Size = MRI.getType(DstReg).getSizeInBits();
+ Size = MRI->getType(DstReg).getSizeInBits();
} else {
- const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
+ const TargetRegisterClass *RC = TRI.getRegClassForReg(*MRI, DstReg);
IsSgpr = TRI.isSGPRClass(RC);
Size = TRI.getRegSizeInBits(*RC);
}
@@ -1142,34 +1430,41 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- DebugLoc DL = I.getDebugLoc();
- const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
- &AMDGPU::VGPR_32RegClass;
- unsigned LoReg = MRI.createVirtualRegister(RC);
- unsigned HiReg = MRI.createVirtualRegister(RC);
- const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
-
- BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
- .addImm(Imm.trunc(32).getZExtValue());
+ const DebugLoc &DL = I.getDebugLoc();
- BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
- .addImm(Imm.ashr(32).getZExtValue());
+ APInt Imm(Size, I.getOperand(1).getImm());
- const MachineInstr *RS =
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
- .addReg(LoReg)
- .addImm(AMDGPU::sub0)
- .addReg(HiReg)
- .addImm(AMDGPU::sub1);
+ MachineInstr *ResInst;
+ if (IsSgpr && TII.isInlineConstant(Imm)) {
+ ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
+ .addImm(I.getOperand(1).getImm());
+ } else {
+ const TargetRegisterClass *RC = IsSgpr ?
+ &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
+ Register LoReg = MRI->createVirtualRegister(RC);
+ Register HiReg = MRI->createVirtualRegister(RC);
+
+ BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
+ .addImm(Imm.trunc(32).getZExtValue());
+
+ BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
+ .addImm(Imm.ashr(32).getZExtValue());
+
+ ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+ .addReg(LoReg)
+ .addImm(AMDGPU::sub0)
+ .addReg(HiReg)
+ .addImm(AMDGPU::sub1);
+ }
// We can't call constrainSelectedInstRegOperands here, because it doesn't
// work for target independent opcodes
I.eraseFromParent();
const TargetRegisterClass *DstRC =
- TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
+ TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI);
if (!DstRC)
return true;
- return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
+ return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI);
}
static bool isConstant(const MachineInstr &MI) {
@@ -1188,13 +1483,13 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
GEPInfo GEPInfo(*PtrMI);
- for (unsigned i = 1, e = 3; i < e; ++i) {
+ for (unsigned i = 1; i != 3; ++i) {
const MachineOperand &GEPOp = PtrMI->getOperand(i);
const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
assert(OpDef);
- if (isConstant(*OpDef)) {
- // FIXME: Is it possible to have multiple Imm parts? Maybe if we
- // are lacking other optimizations.
+ if (i == 2 && isConstant(*OpDef)) {
+ // TODO: Could handle constant base + variable offset, but a combine
+ // probably should have commuted it.
assert(GEPInfo.Imm == 0);
GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
continue;
@@ -1240,16 +1535,26 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
return false;
}
-bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
- // TODO: Can/should we insert m0 initialization here for DS instructions and
- // call the normal selector?
- return false;
+void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+
+ const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());
+ unsigned AS = PtrTy.getAddressSpace();
+ if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) &&
+ STI.ldsRequiresM0Init()) {
+ // If DS instructions require M0 initializtion, insert it before selecting.
+ BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addImm(-1);
+ }
+}
+
+bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const {
+ initM0(I);
+ return selectImpl(I, *CoverageInfo);
}
bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
MachineOperand &CondOp = I.getOperand(0);
Register CondReg = CondOp.getReg();
const DebugLoc &DL = I.getDebugLoc();
@@ -1263,11 +1568,12 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
// GlobalISel, we should push that decision into RegBankSelect. Assume for now
// RegBankSelect knows what it's doing if the branch condition is scc, even
// though it currently does not.
- if (isSCC(CondReg, MRI)) {
+ if (isSCC(CondReg, *MRI)) {
CondPhysReg = AMDGPU::SCC;
BrOpcode = AMDGPU::S_CBRANCH_SCC1;
- ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
- } else if (isVCC(CondReg, MRI)) {
+ // FIXME: Hack for isSCC tests
+ ConstrainRC = &AMDGPU::SGPR_32RegClass;
+ } else if (isVCC(CondReg, *MRI)) {
// FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
// We sort of know that a VCC producer based on the register bank, that ands
// inactive lanes with 0. What if there was a logical operation with vcc
@@ -1279,8 +1585,8 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
} else
return false;
- if (!MRI.getRegClassOrNull(CondReg))
- MRI.setRegClass(CondReg, ConstrainRC);
+ if (!MRI->getRegClassOrNull(CondReg))
+ MRI->setRegClass(CondReg, ConstrainRC);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
.addReg(CondReg);
@@ -1292,27 +1598,83 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
}
bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
Register DstReg = I.getOperand(0).getReg();
- const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
if (IsVGPR)
I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
return RBI.constrainGenericRegister(
- DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
+ DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
+}
+
+bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const {
+ uint64_t Align = I.getOperand(2).getImm();
+ const uint64_t Mask = ~((UINT64_C(1) << Align) - 1);
+
+ MachineBasicBlock *BB = I.getParent();
+
+ Register DstReg = I.getOperand(0).getReg();
+ Register SrcReg = I.getOperand(1).getReg();
+
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
+ const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
+ unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
+ unsigned MovOpc = IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
+ const TargetRegisterClass &RegRC
+ = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
+
+ LLT Ty = MRI->getType(DstReg);
+
+ const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB,
+ *MRI);
+ const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB,
+ *MRI);
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
+ !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
+ return false;
+
+ const DebugLoc &DL = I.getDebugLoc();
+ Register ImmReg = MRI->createVirtualRegister(&RegRC);
+ BuildMI(*BB, &I, DL, TII.get(MovOpc), ImmReg)
+ .addImm(Mask);
+
+ if (Ty.getSizeInBits() == 32) {
+ BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
+ .addReg(SrcReg)
+ .addReg(ImmReg);
+ I.eraseFromParent();
+ return true;
+ }
+
+ Register HiReg = MRI->createVirtualRegister(&RegRC);
+ Register LoReg = MRI->createVirtualRegister(&RegRC);
+ Register MaskLo = MRI->createVirtualRegister(&RegRC);
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg)
+ .addReg(SrcReg, 0, AMDGPU::sub0);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg)
+ .addReg(SrcReg, 0, AMDGPU::sub1);
+
+ BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskLo)
+ .addReg(LoReg)
+ .addReg(ImmReg);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+ .addReg(MaskLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(HiReg)
+ .addImm(AMDGPU::sub1);
+ I.eraseFromParent();
+ return true;
}
-bool AMDGPUInstructionSelector::select(MachineInstr &I,
- CodeGenCoverage &CoverageInfo) const {
+bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);
- if (!isPreISelGenericOpcode(I.getOpcode())) {
+ if (!I.isPreISelOpcode()) {
if (I.isCopy())
return selectCOPY(I);
return true;
@@ -1324,16 +1686,18 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_XOR:
if (selectG_AND_OR_XOR(I))
return true;
- return selectImpl(I, CoverageInfo);
+ return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
- if (selectG_ADD_SUB(I))
+ if (selectImpl(I, *CoverageInfo))
return true;
- LLVM_FALLTHROUGH;
- default:
- return selectImpl(I, CoverageInfo);
+ return selectG_ADD_SUB(I);
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_USUBO:
+ return selectG_UADDO_USUBO(I);
case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_BITCAST:
+ case TargetOpcode::G_PTRTOINT:
return selectCOPY(I);
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FCONSTANT:
@@ -1353,32 +1717,40 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_INSERT:
return selectG_INSERT(I);
case TargetOpcode::G_INTRINSIC:
- return selectG_INTRINSIC(I, CoverageInfo);
+ return selectG_INTRINSIC(I);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
- return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
+ return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
case TargetOpcode::G_ICMP:
if (selectG_ICMP(I))
return true;
- return selectImpl(I, CoverageInfo);
+ return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_LOAD:
- return selectImpl(I, CoverageInfo);
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ case TargetOpcode::G_ATOMICRMW_XCHG:
+ case TargetOpcode::G_ATOMICRMW_ADD:
+ case TargetOpcode::G_ATOMICRMW_SUB:
+ case TargetOpcode::G_ATOMICRMW_AND:
+ case TargetOpcode::G_ATOMICRMW_OR:
+ case TargetOpcode::G_ATOMICRMW_XOR:
+ case TargetOpcode::G_ATOMICRMW_MIN:
+ case TargetOpcode::G_ATOMICRMW_MAX:
+ case TargetOpcode::G_ATOMICRMW_UMIN:
+ case TargetOpcode::G_ATOMICRMW_UMAX:
+ case TargetOpcode::G_ATOMICRMW_FADD:
+ return selectG_LOAD_ATOMICRMW(I);
case TargetOpcode::G_SELECT:
return selectG_SELECT(I);
case TargetOpcode::G_STORE:
- if (selectImpl(I, CoverageInfo))
- return true;
return selectG_STORE(I);
case TargetOpcode::G_TRUNC:
return selectG_TRUNC(I);
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
- if (selectG_SZA_EXT(I)) {
- I.eraseFromParent();
- return true;
- }
-
- return false;
+ return selectG_SZA_EXT(I);
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP:
+ return selectG_SITOFP_UITOFP(I);
case TargetOpcode::G_BRCOND:
return selectG_BRCOND(I);
case TargetOpcode::G_FRAME_INDEX:
@@ -1388,6 +1760,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
// is checking for G_CONSTANT
I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
return true;
+ case TargetOpcode::G_PTR_MASK:
+ return selectG_PTR_MASK(I);
+ default:
+ return selectImpl(I, *CoverageInfo);
}
return false;
}
@@ -1402,14 +1778,14 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
std::pair<Register, unsigned>
AMDGPUInstructionSelector::selectVOP3ModsImpl(
- Register Src, const MachineRegisterInfo &MRI) const {
+ Register Src) const {
unsigned Mods = 0;
- MachineInstr *MI = MRI.getVRegDef(Src);
+ MachineInstr *MI = MRI->getVRegDef(Src);
if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
Src = MI->getOperand(1).getReg();
Mods |= SISrcMods::NEG;
- MI = MRI.getVRegDef(Src);
+ MI = MRI->getVRegDef(Src);
}
if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
@@ -1432,12 +1808,23 @@ AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
- MachineRegisterInfo &MRI
- = Root.getParent()->getParent()->getParent()->getRegInfo();
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
+ }};
+}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const {
Register Src;
unsigned Mods;
- std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
@@ -1446,6 +1833,7 @@ AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
}};
}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
return {{
@@ -1457,12 +1845,9 @@ AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
- MachineRegisterInfo &MRI
- = Root.getParent()->getParent()->getParent()->getRegInfo();
-
Register Src;
unsigned Mods;
- std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
@@ -1471,12 +1856,28 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
- MachineRegisterInfo &MRI =
- Root.getParent()->getParent()->getParent()->getRegInfo();
+AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const {
+ // FIXME: Handle clamp and op_sel
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp
+ }};
+}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
+ // FIXME: Handle op_sel
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
SmallVector<GEPInfo, 4> AddrInfo;
- getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
+ getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);
if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
return None;
@@ -1496,11 +1897,8 @@ AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
- MachineRegisterInfo &MRI =
- Root.getParent()->getParent()->getParent()->getRegInfo();
-
SmallVector<GEPInfo, 4> AddrInfo;
- getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
+ getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);
if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
return None;
@@ -1521,10 +1919,9 @@ InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
MachineInstr *MI = Root.getParent();
MachineBasicBlock *MBB = MI->getParent();
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
SmallVector<GEPInfo, 4> AddrInfo;
- getAddrModeInfo(*MI, MRI, AddrInfo);
+ getAddrModeInfo(*MI, *MRI, AddrInfo);
// FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
// then we can select all ptr + 32-bit offsets not just immediate offsets.
@@ -1540,7 +1937,7 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
// failed trying to select this load into one of the _IMM variants since
// the _IMM Patterns are considered before the _SGPR patterns.
unsigned PtrReg = GEPInfo.SgprParts[0];
- unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
.addImm(GEPInfo.Imm);
return {{
@@ -1553,8 +1950,6 @@ template <bool Signed>
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
MachineInstr *MI = Root.getParent();
- MachineBasicBlock *MBB = MI->getParent();
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
InstructionSelector::ComplexRendererFns Default = {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
@@ -1565,12 +1960,12 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
if (!STI.hasFlatInstOffsets())
return Default;
- const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
+ const MachineInstr *OpDef = MRI->getVRegDef(Root.getReg());
if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
return Default;
Optional<int64_t> Offset =
- getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
+ getConstantVRegVal(OpDef->getOperand(2).getReg(), *MRI);
if (!Offset.hasValue())
return Default;
@@ -1597,12 +1992,6 @@ AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
return selectFlatOffsetImpl<true>(Root);
}
-// FIXME: Implement
-static bool signBitIsZero(const MachineOperand &Op,
- const MachineRegisterInfo &MRI) {
- return false;
-}
-
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
return PSV && PSV->isStack();
@@ -1613,12 +2002,11 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
MachineInstr *MI = Root.getParent();
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
int64_t Offset = 0;
- if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) {
- Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ if (mi_match(Root.getReg(), *MRI, m_ICst(Offset))) {
+ Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
// TODO: Should this be inside the render function? The iterator seems to
// move.
@@ -1652,18 +2040,18 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
// offsets.
Optional<int> FI;
Register VAddr = Root.getReg();
- if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) {
- if (isBaseWithConstantOffset(Root, MRI)) {
+ if (const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg())) {
+ if (isBaseWithConstantOffset(Root, *MRI)) {
const MachineOperand &LHS = RootDef->getOperand(1);
const MachineOperand &RHS = RootDef->getOperand(2);
- const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
- const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
+ const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg());
+ const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg());
if (LHSDef && RHSDef) {
int64_t PossibleOffset =
RHSDef->getOperand(1).getCImm()->getSExtValue();
if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) &&
(!STI.privateMemoryResourceIsRangeChecked() ||
- signBitIsZero(LHS, MRI))) {
+ KnownBits->signBitIsZero(LHS.getReg()))) {
if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
FI = LHSDef->getOperand(1).getIndex();
else
@@ -1700,15 +2088,30 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
}}};
}
+bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
+ const MachineOperand &Base,
+ int64_t Offset,
+ unsigned OffsetBits) const {
+ if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
+ (OffsetBits == 8 && !isUInt<8>(Offset)))
+ return false;
+
+ if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
+ return true;
+
+ // On Southern Islands instruction with a negative base value and an offset
+ // don't seem to work.
+ return KnownBits->signBitIsZero(Base.getReg());
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFScratchOffset(
MachineOperand &Root) const {
MachineInstr *MI = Root.getParent();
MachineBasicBlock *MBB = MI->getParent();
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
int64_t Offset = 0;
- if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) ||
+ if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) ||
!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
return {};
@@ -1728,3 +2131,54 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
}};
}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
+ const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
+ if (!RootDef) {
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
+ }};
+ }
+
+ int64_t ConstAddr = 0;
+ if (isBaseWithConstantOffset(Root, *MRI)) {
+ const MachineOperand &LHS = RootDef->getOperand(1);
+ const MachineOperand &RHS = RootDef->getOperand(2);
+ const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg());
+ const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg());
+ if (LHSDef && RHSDef) {
+ int64_t PossibleOffset =
+ RHSDef->getOperand(1).getCImm()->getSExtValue();
+ if (isDSOffsetLegal(*MRI, LHS, PossibleOffset, 16)) {
+ // (add n0, c0)
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); }
+ }};
+ }
+ }
+ } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
+
+
+
+ } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) {
+
+
+ }
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
+ }};
+}
+
+void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
+ const MachineInstr &MI) const {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
+ assert(CstVal && "Expected constant value");
+ MIB.addImm(CstVal.getValue());
+}
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 4f489ddfb23d..d3c83a6a872a 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -35,6 +35,7 @@ class AMDGPUInstrInfo;
class AMDGPURegisterBankInfo;
class GCNSubtarget;
class MachineInstr;
+class MachineIRBuilder;
class MachineOperand;
class MachineRegisterInfo;
class SIInstrInfo;
@@ -42,14 +43,20 @@ class SIMachineFunctionInfo;
class SIRegisterInfo;
class AMDGPUInstructionSelector : public InstructionSelector {
+private:
+ MachineRegisterInfo *MRI;
+
public:
AMDGPUInstructionSelector(const GCNSubtarget &STI,
const AMDGPURegisterBankInfo &RBI,
const AMDGPUTargetMachine &TM);
- bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
+ bool select(MachineInstr &I) override;
static const char *getName();
+ void setupMF(MachineFunction &MF, GISelKnownBits &KB,
+ CodeGenCoverage &CoverageInfo) override;
+
private:
struct GEPInfo {
const MachineInstr &GEP;
@@ -72,32 +79,42 @@ private:
bool selectPHI(MachineInstr &I) const;
bool selectG_TRUNC(MachineInstr &I) const;
bool selectG_SZA_EXT(MachineInstr &I) const;
+ bool selectG_SITOFP_UITOFP(MachineInstr &I) const;
bool selectG_CONSTANT(MachineInstr &I) const;
bool selectG_AND_OR_XOR(MachineInstr &I) const;
bool selectG_ADD_SUB(MachineInstr &I) const;
+ bool selectG_UADDO_USUBO(MachineInstr &I) const;
bool selectG_EXTRACT(MachineInstr &I) const;
bool selectG_MERGE_VALUES(MachineInstr &I) const;
bool selectG_UNMERGE_VALUES(MachineInstr &I) const;
bool selectG_GEP(MachineInstr &I) const;
bool selectG_IMPLICIT_DEF(MachineInstr &I) const;
bool selectG_INSERT(MachineInstr &I) const;
- bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
- bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I,
- CodeGenCoverage &CoverageInfo) const;
+ bool selectG_INTRINSIC(MachineInstr &I) const;
+
+ std::tuple<Register, unsigned, unsigned>
+ splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
+
+ bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
+
+ bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
bool selectG_ICMP(MachineInstr &I) const;
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
- bool selectG_LOAD(MachineInstr &I) const;
- bool selectG_SELECT(MachineInstr &I) const;
+
+ void initM0(MachineInstr &I) const;
+ bool selectG_LOAD_ATOMICRMW(MachineInstr &I) const;
bool selectG_STORE(MachineInstr &I) const;
+ bool selectG_SELECT(MachineInstr &I) const;
bool selectG_BRCOND(MachineInstr &I) const;
bool selectG_FRAME_INDEX(MachineInstr &I) const;
+ bool selectG_PTR_MASK(MachineInstr &I) const;
std::pair<Register, unsigned>
- selectVOP3ModsImpl(Register Src, const MachineRegisterInfo &MRI) const;
+ selectVOP3ModsImpl(Register Src) const;
InstructionSelector::ComplexRendererFns
selectVCSRC(MachineOperand &Root) const;
@@ -108,11 +125,18 @@ private:
InstructionSelector::ComplexRendererFns
selectVOP3Mods0(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
+ selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
selectVOP3OMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
+ selectVOP3OpSelMods0(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectVOP3OpSelMods(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
selectSmrdImm(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectSmrdImm32(MachineOperand &Root) const;
@@ -133,6 +157,16 @@ private:
InstructionSelector::ComplexRendererFns
selectMUBUFScratchOffset(MachineOperand &Root) const;
+ bool isDSOffsetLegal(const MachineRegisterInfo &MRI,
+ const MachineOperand &Base,
+ int64_t Offset, unsigned OffsetBits) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectDS1Addr1Offset(MachineOperand &Root) const;
+
+ void renderTruncImm32(MachineInstrBuilder &MIB,
+ const MachineInstr &MI) const;
+
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
const AMDGPURegisterBankInfo &RBI;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index 61bc415c839d..846e7f577a28 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -75,7 +75,7 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
let isCodeGenOnly = 1;
}
-def TruePredicate : Predicate<"true">;
+def TruePredicate : Predicate<"">;
class PredicateControl {
Predicate SubtargetPredicate = TruePredicate;
@@ -220,80 +220,48 @@ def hi_f16_elt : PatLeaf<
// PatLeafs for floating-point comparisons
//===----------------------------------------------------------------------===//
-def COND_OEQ : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
->;
-
-def COND_ONE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
->;
-
-def COND_OGT : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
->;
-
-def COND_OGE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
->;
-
-def COND_OLT : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
->;
-
-def COND_OLE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
->;
-
-def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
-def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
+def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>;
+def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>;
+def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>;
+def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>;
+def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>;
+def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>;
+def COND_O : PatFrags<(ops), [(OtherVT SETO)]>;
+def COND_UO : PatFrags<(ops), [(OtherVT SETUO)]>;
//===----------------------------------------------------------------------===//
// PatLeafs for unsigned / unordered comparisons
//===----------------------------------------------------------------------===//
-def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
-def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
-def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
-def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
-def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
-def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
+def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>;
+def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>;
+def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>;
+def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>;
+def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>;
+def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>;
// XXX - For some reason R600 version is preferring to use unordered
// for setne?
-def COND_UNE_NE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
->;
+def COND_UNE_NE : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>;
//===----------------------------------------------------------------------===//
// PatLeafs for signed comparisons
//===----------------------------------------------------------------------===//
-def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
-def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
-def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
-def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
+def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>;
+def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>;
+def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>;
+def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>;
//===----------------------------------------------------------------------===//
// PatLeafs for integer equality
//===----------------------------------------------------------------------===//
-def COND_EQ : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
->;
-
-def COND_NE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
->;
+def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>;
+def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>;
+// FIXME: Should not need code predicate
+//def COND_NULL : PatLeaf<(OtherVT null_frag)>;
def COND_NULL : PatLeaf <
(cond),
[{(void)N; return false;}]
@@ -335,17 +303,17 @@ def TEX_SHADOW_ARRAY : PatLeaf<
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
+def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+>;
+
class AddressSpaceList<list<int> AS> {
list<int> AddrSpaces = AS;
}
-class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
-}]>;
-
-class Aligned16Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAlignment() >= 16;
-}]>;
+class Aligned<int Bytes> {
+ int MinAlignment = Bytes;
+}
class LoadFrag <SDPatternOperator op> : PatFrag<(ops node:$ptr), (op node:$ptr)>;
@@ -502,6 +470,35 @@ defm atomic_store_#as : binary_atomic_op<atomic_store>;
} // End foreach AddrSpace
+multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
+ foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
+ let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
+ defm "_"#as : binary_atomic_op<atomic_op, IsInt>;
+
+ let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in {
+ defm "_"#as#"_noret" : binary_atomic_op<atomic_op, IsInt>;
+ }
+
+ let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in {
+ defm "_"#as#"_ret" : binary_atomic_op<atomic_op, IsInt>;
+ }
+ }
+ }
+}
+
+defm atomic_swap : ret_noret_binary_atomic_op<atomic_swap>;
+defm atomic_load_add : ret_noret_binary_atomic_op<atomic_load_add>;
+defm atomic_load_and : ret_noret_binary_atomic_op<atomic_load_and>;
+defm atomic_load_max : ret_noret_binary_atomic_op<atomic_load_max>;
+defm atomic_load_min : ret_noret_binary_atomic_op<atomic_load_min>;
+defm atomic_load_or : ret_noret_binary_atomic_op<atomic_load_or>;
+defm atomic_load_sub : ret_noret_binary_atomic_op<atomic_load_sub>;
+defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>;
+defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>;
+defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>;
+defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
+
+
def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
@@ -513,21 +510,31 @@ def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
def atomic_store_local : LocalStore <atomic_store>;
-def load_align8_local : Aligned8Bytes <
- (ops node:$ptr), (load_local node:$ptr)
->;
-def load_align16_local : Aligned16Bytes <
- (ops node:$ptr), (load_local node:$ptr)
->;
+def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+ let MinAlignment = 8;
+}
-def store_align8_local : Aligned8Bytes <
- (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
->;
+def load_align16_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+ let MinAlignment = 16;
+}
+
+def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
+ (store_local node:$val, node:$ptr)>, Aligned<8> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+}
+
+def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
+ (store_local node:$val, node:$ptr)>, Aligned<16> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+}
-def store_align16_local : Aligned16Bytes <
- (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
->;
def atomic_store_flat : FlatStore <atomic_store>;
def truncstorei8_hi16_flat : StoreHi16<truncstorei8>, FlatStoreAddress;
@@ -547,69 +554,26 @@ class region_binary_atomic_op<SDNode atomic_op> :
}]>;
-def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
-def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
-def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
-def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
-def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
-def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
-def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
-def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
-def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
-def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
-def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
-
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
-class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
- (ops node:$ptr, node:$cmp, node:$swap),
- (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
- AtomicSDNode *AN = cast<AtomicSDNode>(N);
- return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
-
-class AtomicCmpSwapRegion <SDNode cmp_swap_node> : PatFrag<
- (ops node:$ptr, node:$cmp, node:$swap),
- (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
- AtomicSDNode *AN = cast<AtomicSDNode>(N);
- return AN->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
-}]>;
+let AddressSpaces = StoreAddress_local.AddrSpaces in {
+defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
+defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
+}
-def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
+let AddressSpaces = StoreAddress_region.AddrSpaces in {
+defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>;
+defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
+}
class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
-multiclass global_binary_atomic_op<SDNode atomic_op> {
- def "" : global_binary_atomic_op_frag<atomic_op>;
-
- def _noret : PatFrag<
- (ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
-
- def _ret : PatFrag<
- (ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
-}
-
-defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
-defm atomic_add_global : global_binary_atomic_op<atomic_load_add>;
-defm atomic_and_global : global_binary_atomic_op<atomic_load_and>;
-defm atomic_max_global : global_binary_atomic_op<atomic_load_max>;
-defm atomic_min_global : global_binary_atomic_op<atomic_load_min>;
-defm atomic_or_global : global_binary_atomic_op<atomic_load_or>;
-defm atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
-defm atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
-defm atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
-defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
-
// Legacy.
def AMDGPUatomic_cmp_swap_global : PatFrag<
(ops node:$ptr, node:$value),
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 670f6225fbf7..5aba35a19ced 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -11,6 +11,13 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
+#if defined(_MSC_VER) || defined(__MINGW32__)
+// According to Microsoft, one must set _USE_MATH_DEFINES in order to get M_PI
+// from the Visual C++ cmath / math.h headers:
+// https://docs.microsoft.com/en-us/cpp/c-runtime-library/math-constants?view=vs-2019
+#define _USE_MATH_DEFINES
+#endif
+
#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPUTargetMachine.h"
@@ -20,6 +27,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
@@ -32,7 +40,7 @@ using namespace LegalityPredicates;
static LegalityPredicate isMultiple32(unsigned TypeIdx,
- unsigned MaxSize = 512) {
+ unsigned MaxSize = 1024) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
const LLT EltTy = Ty.getScalarType();
@@ -40,12 +48,27 @@ static LegalityPredicate isMultiple32(unsigned TypeIdx,
};
}
+static LegalityPredicate sizeIs(unsigned TypeIdx, unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].getSizeInBits() == Size;
+ };
+}
+
static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
return Ty.isVector() &&
Ty.getNumElements() % 2 != 0 &&
- Ty.getElementType().getSizeInBits() < 32;
+ Ty.getElementType().getSizeInBits() < 32 &&
+ Ty.getSizeInBits() % 32 != 0;
+ };
+}
+
+static LegalityPredicate isWideVec16(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ const LLT EltTy = Ty.getScalarType();
+ return EltTy.getSizeInBits() == 16 && Ty.getNumElements() > 2;
};
}
@@ -68,6 +91,31 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
};
}
+// Increase the number of vector elements to reach the next multiple of 32-bit
+// type.
+static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+
+ const LLT EltTy = Ty.getElementType();
+ const int Size = Ty.getSizeInBits();
+ const int EltSize = EltTy.getSizeInBits();
+ const int NextMul32 = (Size + 31) / 32;
+
+ assert(EltSize < 32);
+
+ const int NewNumElts = (32 * NextMul32 + EltSize - 1) / EltSize;
+ return std::make_pair(TypeIdx, LLT::vector(NewNumElts, EltTy));
+ };
+}
+
+static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isVector() && QueryTy.getSizeInBits() < Size;
+ };
+}
+
static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
@@ -82,7 +130,7 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
};
}
-// Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
+// Any combination of 32 or 64-bit elements up to 1024 bits, and multiples of
// v2s16.
static LegalityPredicate isRegisterType(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
@@ -94,7 +142,21 @@ static LegalityPredicate isRegisterType(unsigned TypeIdx) {
EltSize == 128 || EltSize == 256;
}
- return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
+ return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 1024;
+ };
+}
+
+static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].getElementType() == Type;
+ };
+}
+
+static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ return !Ty.isVector() && Ty.getSizeInBits() > 32 &&
+ Query.MMODescrs[0].SizeInBits < Ty.getSizeInBits();
};
}
@@ -112,9 +174,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
+ const LLT S96 = LLT::scalar(96);
const LLT S128 = LLT::scalar(128);
const LLT S256 = LLT::scalar(256);
- const LLT S512 = LLT::scalar(512);
+ const LLT S1024 = LLT::scalar(1024);
const LLT V2S16 = LLT::vector(2, 16);
const LLT V4S16 = LLT::vector(4, 16);
@@ -134,6 +197,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT V14S32 = LLT::vector(14, 32);
const LLT V15S32 = LLT::vector(15, 32);
const LLT V16S32 = LLT::vector(16, 32);
+ const LLT V32S32 = LLT::vector(32, 32);
const LLT V2S64 = LLT::vector(2, 64);
const LLT V3S64 = LLT::vector(3, 64);
@@ -142,16 +206,19 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT V6S64 = LLT::vector(6, 64);
const LLT V7S64 = LLT::vector(7, 64);
const LLT V8S64 = LLT::vector(8, 64);
+ const LLT V16S64 = LLT::vector(16, 64);
std::initializer_list<LLT> AllS32Vectors =
{V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
- V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
+ V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32, V32S32};
std::initializer_list<LLT> AllS64Vectors =
- {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
+ {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64, V16S64};
const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
+ const LLT Constant32Ptr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS_32BIT);
const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
+ const LLT RegionPtr = GetAddrSpacePtr(AMDGPUAS::REGION_ADDRESS);
const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
@@ -162,7 +229,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
};
const std::initializer_list<LLT> AddrSpaces32 = {
- LocalPtr, PrivatePtr
+ LocalPtr, PrivatePtr, Constant32Ptr, RegionPtr
};
const std::initializer_list<LLT> FPTypesBase = {
@@ -216,37 +283,34 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
.clampScalar(0, S32, S64)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
- .fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0))
+ .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0))
.widenScalarToNextPow2(0)
.scalarize(0);
- getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
+ getActionDefinitionsBuilder({G_UADDO, G_USUBO,
G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
.legalFor({{S32, S1}})
- .clampScalar(0, S32, S32);
+ .clampScalar(0, S32, S32)
+ .scalarize(0); // TODO: Implement.
+
+ getActionDefinitionsBuilder({G_SADDO, G_SSUBO})
+ .lower();
getActionDefinitionsBuilder(G_BITCAST)
- .legalForCartesianProduct({S32, V2S16})
- .legalForCartesianProduct({S64, V2S32, V4S16})
- .legalForCartesianProduct({V2S64, V4S32})
// Don't worry about the size constraint.
- .legalIf(all(isPointer(0), isPointer(1)));
+ .legalIf(all(isRegisterType(0), isRegisterType(1)))
+ // FIXME: Testing hack
+ .legalForCartesianProduct({S16, LLT::vector(2, 8), });
- if (ST.has16BitInsts()) {
- getActionDefinitionsBuilder(G_FCONSTANT)
- .legalFor({S32, S64, S16})
- .clampScalar(0, S16, S64);
- } else {
- getActionDefinitionsBuilder(G_FCONSTANT)
- .legalFor({S32, S64})
- .clampScalar(0, S32, S64);
- }
+ getActionDefinitionsBuilder(G_FCONSTANT)
+ .legalFor({S32, S64, S16})
+ .clampScalar(0, S16, S64);
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
- .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
+ .legalFor({S1, S32, S64, S16, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
- .clampScalarOrElt(0, S32, S512)
+ .clampScalarOrElt(0, S32, S1024)
.legalIf(isMultiple32(0))
.widenScalarToNextPow2(0, 32)
.clampMaxNumElements(0, S32, 16);
@@ -256,23 +320,33 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// values may not be legal. We need to figure out how to distinguish
// between these two scenarios.
getActionDefinitionsBuilder(G_CONSTANT)
- .legalFor({S1, S32, S64, GlobalPtr,
+ .legalFor({S1, S32, S64, S16, GlobalPtr,
LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
.clampScalar(0, S32, S64)
.widenScalarToNextPow2(0)
.legalIf(isPointer(0));
setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE)
+ .customFor({LocalPtr, GlobalPtr, ConstantPtr, Constant32Ptr});
+
auto &FPOpActions = getActionDefinitionsBuilder(
- { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
+ { G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE})
.legalFor({S32, S64});
+ auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
+ .customFor({S32, S64});
+ auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV)
+ .customFor({S32, S64});
if (ST.has16BitInsts()) {
if (ST.hasVOP3PInsts())
FPOpActions.legalFor({S16, V2S16});
else
FPOpActions.legalFor({S16});
+
+ TrigActions.customFor({S16});
+ FDIVActions.customFor({S16});
}
auto &MinNumMaxNum = getActionDefinitionsBuilder({
@@ -293,22 +367,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
}
- // TODO: Implement
- getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
-
if (ST.hasVOP3PInsts())
FPOpActions.clampMaxNumElements(0, S16, 2);
+
FPOpActions
.scalarize(0)
.clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
+ TrigActions
+ .scalarize(0)
+ .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
+
+ FDIVActions
+ .scalarize(0)
+ .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
+
+ getActionDefinitionsBuilder({G_FNEG, G_FABS})
+ .legalFor(FPTypesPK16)
+ .clampMaxNumElements(0, S16, 2)
+ .scalarize(0)
+ .clampScalar(0, S16, S64);
+
+ // TODO: Implement
+ getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
+
if (ST.has16BitInsts()) {
- getActionDefinitionsBuilder(G_FSQRT)
+ getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR})
.legalFor({S32, S64, S16})
.scalarize(0)
.clampScalar(0, S16, S64);
} else {
- getActionDefinitionsBuilder(G_FSQRT)
+ getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR})
.legalFor({S32, S64})
.scalarize(0)
.clampScalar(0, S32, S64);
@@ -334,23 +423,43 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.clampScalar(0, S32, S64);
+ // Whether this is legal depends on the floating point mode for the function.
+ auto &FMad = getActionDefinitionsBuilder(G_FMAD);
+ if (ST.hasMadF16())
+ FMad.customFor({S32, S16});
+ else
+ FMad.customFor({S32});
+ FMad.scalarize(0)
+ .lower();
+
getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
.legalFor({{S64, S32}, {S32, S16}, {S64, S16},
{S32, S1}, {S64, S1}, {S16, S1},
+ {S96, S32},
// FIXME: Hack
{S64, LLT::scalar(33)},
{S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
.scalarize(0);
- getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
- .legalFor({{S32, S32}, {S64, S32}})
+ // TODO: Split s1->s64 during regbankselect for VALU.
+ auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
+ .legalFor({{S32, S32}, {S64, S32}, {S16, S32}, {S32, S1}, {S16, S1}, {S64, S1}})
.lowerFor({{S32, S64}})
- .customFor({{S64, S64}})
- .scalarize(0);
+ .customFor({{S64, S64}});
+ if (ST.has16BitInsts())
+ IToFP.legalFor({{S16, S16}});
+ IToFP.clampScalar(1, S32, S64)
+ .scalarize(0);
+
+ auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
+ .legalFor({{S32, S32}, {S32, S64}, {S32, S16}});
+ if (ST.has16BitInsts())
+ FPToI.legalFor({{S16, S16}});
+ else
+ FPToI.minScalar(1, S32);
- getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
- .legalFor({{S32, S32}, {S32, S64}})
- .scalarize(0);
+ FPToI.minScalar(0, S32)
+ .scalarize(0);
getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
.legalFor({S32, S64})
@@ -374,6 +483,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalForCartesianProduct(AddrSpaces32, {S32})
.scalarize(0);
+ getActionDefinitionsBuilder(G_PTR_MASK)
+ .scalarize(0)
+ .alwaysLegal();
+
setAction({G_BLOCK_ADDR, CodePtr}, Legal);
auto &CmpBuilder =
@@ -415,7 +528,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.widenScalarToNextPow2(1, 32);
// TODO: Expand for > s32
- getActionDefinitionsBuilder(G_BSWAP)
+ getActionDefinitionsBuilder({G_BSWAP, G_BITREVERSE})
.legalFor({S32})
.clampScalar(0, S32, S32)
.scalarize(0);
@@ -491,87 +604,239 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
});
- if (ST.hasFlatAddressSpace()) {
- getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
- .scalarize(0)
- .custom();
- }
+ getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
+ .scalarize(0)
+ .custom();
// TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
// handle some operations by just promoting the register during
// selection. There are also d16 loads on GFX9+ which preserve the high bits.
- getActionDefinitionsBuilder({G_LOAD, G_STORE})
- .narrowScalarIf([](const LegalityQuery &Query) {
- unsigned Size = Query.Types[0].getSizeInBits();
- unsigned MemSize = Query.MMODescrs[0].SizeInBits;
- return (Size > 32 && MemSize < Size);
- },
- [](const LegalityQuery &Query) {
- return std::make_pair(0, LLT::scalar(32));
- })
- .fewerElementsIf([=](const LegalityQuery &Query) {
- unsigned MemSize = Query.MMODescrs[0].SizeInBits;
- return (MemSize == 96) &&
- Query.Types[0].isVector() &&
- !ST.hasDwordx3LoadStores();
- },
- [=](const LegalityQuery &Query) {
- return std::make_pair(0, V2S32);
- })
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &Ty0 = Query.Types[0];
-
- unsigned Size = Ty0.getSizeInBits();
- unsigned MemSize = Query.MMODescrs[0].SizeInBits;
- if (Size < 32 || (Size > 32 && MemSize < Size))
- return false;
-
- if (Ty0.isVector() && Size != MemSize)
- return false;
-
- // TODO: Decompose private loads into 4-byte components.
- // TODO: Illegal flat loads on SI
- switch (MemSize) {
- case 8:
- case 16:
- return Size == 32;
- case 32:
- case 64:
- case 128:
- return true;
+ auto maxSizeForAddrSpace = [this](unsigned AS) -> unsigned {
+ switch (AS) {
+ // FIXME: Private element size.
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ return 32;
+ // FIXME: Check subtarget
+ case AMDGPUAS::LOCAL_ADDRESS:
+ return ST.useDS128() ? 128 : 64;
+
+ // Treat constant and global as identical. SMRD loads are sometimes usable
+ // for global loads (ideally constant address space should be eliminated)
+ // depending on the context. Legality cannot be context dependent, but
+ // RegBankSelect can split the load as necessary depending on the pointer
+ // register bank/uniformity and if the memory is invariant or not written in
+ // a kernel.
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ return 512;
+ default:
+ return 128;
+ }
+ };
- case 96:
- return ST.hasDwordx3LoadStores();
-
- case 256:
- case 512:
- // TODO: Possibly support loads of i256 and i512 . This will require
- // adding i256 and i512 types to MVT in order for to be able to use
- // TableGen.
- // TODO: Add support for other vector types, this will require
- // defining more value mappings for the new types.
- return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 ||
- Ty0.getScalarType().getSizeInBits() == 64);
-
- default:
- return false;
- }
- })
- .clampScalar(0, S32, S64);
+ const auto needToSplitLoad = [=](const LegalityQuery &Query) -> bool {
+ const LLT DstTy = Query.Types[0];
+
+ // Split vector extloads.
+ unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+ if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
+ return true;
+
+ const LLT PtrTy = Query.Types[1];
+ unsigned AS = PtrTy.getAddressSpace();
+ if (MemSize > maxSizeForAddrSpace(AS))
+ return true;
+
+ // Catch weird sized loads that don't evenly divide into the access sizes
+ // TODO: May be able to widen depending on alignment etc.
+ unsigned NumRegs = MemSize / 32;
+ if (NumRegs == 3 && !ST.hasDwordx3LoadStores())
+ return true;
+
+ unsigned Align = Query.MMODescrs[0].AlignInBits;
+ if (Align < MemSize) {
+ const SITargetLowering *TLI = ST.getTargetLowering();
+ return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8);
+ }
+
+ return false;
+ };
+ unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
+ unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
+ unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
+
+ // TODO: Refine based on subtargets which support unaligned access or 128-bit
+ // LDS
+ // TODO: Unsupported flat for SI.
+
+ for (unsigned Op : {G_LOAD, G_STORE}) {
+ const bool IsStore = Op == G_STORE;
+
+ auto &Actions = getActionDefinitionsBuilder(Op);
+ // Whitelist the common cases.
+ // TODO: Pointer loads
+ // TODO: Wide constant loads
+ // TODO: Only CI+ has 3x loads
+ // TODO: Loads to s16 on gfx9
+ Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32},
+ {V2S32, GlobalPtr, 64, GlobalAlign32},
+ {V3S32, GlobalPtr, 96, GlobalAlign32},
+ {S96, GlobalPtr, 96, GlobalAlign32},
+ {V4S32, GlobalPtr, 128, GlobalAlign32},
+ {S128, GlobalPtr, 128, GlobalAlign32},
+ {S64, GlobalPtr, 64, GlobalAlign32},
+ {V2S64, GlobalPtr, 128, GlobalAlign32},
+ {V2S16, GlobalPtr, 32, GlobalAlign32},
+ {S32, GlobalPtr, 8, GlobalAlign8},
+ {S32, GlobalPtr, 16, GlobalAlign16},
+
+ {S32, LocalPtr, 32, 32},
+ {S64, LocalPtr, 64, 32},
+ {V2S32, LocalPtr, 64, 32},
+ {S32, LocalPtr, 8, 8},
+ {S32, LocalPtr, 16, 16},
+ {V2S16, LocalPtr, 32, 32},
+
+ {S32, PrivatePtr, 32, 32},
+ {S32, PrivatePtr, 8, 8},
+ {S32, PrivatePtr, 16, 16},
+ {V2S16, PrivatePtr, 32, 32},
+
+ {S32, FlatPtr, 32, GlobalAlign32},
+ {S32, FlatPtr, 16, GlobalAlign16},
+ {S32, FlatPtr, 8, GlobalAlign8},
+ {V2S16, FlatPtr, 32, GlobalAlign32},
+
+ {S32, ConstantPtr, 32, GlobalAlign32},
+ {V2S32, ConstantPtr, 64, GlobalAlign32},
+ {V3S32, ConstantPtr, 96, GlobalAlign32},
+ {V4S32, ConstantPtr, 128, GlobalAlign32},
+ {S64, ConstantPtr, 64, GlobalAlign32},
+ {S128, ConstantPtr, 128, GlobalAlign32},
+ {V2S32, ConstantPtr, 32, GlobalAlign32}});
+ Actions
+ .customIf(typeIs(1, Constant32Ptr))
+ .narrowScalarIf(
+ [=](const LegalityQuery &Query) -> bool {
+ return !Query.Types[0].isVector() && needToSplitLoad(Query);
+ },
+ [=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
+ const LLT DstTy = Query.Types[0];
+ const LLT PtrTy = Query.Types[1];
+
+ const unsigned DstSize = DstTy.getSizeInBits();
+ unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+
+ // Split extloads.
+ if (DstSize > MemSize)
+ return std::make_pair(0, LLT::scalar(MemSize));
+
+ if (DstSize > 32 && (DstSize % 32 != 0)) {
+ // FIXME: Need a way to specify non-extload of larger size if
+ // suitably aligned.
+ return std::make_pair(0, LLT::scalar(32 * (DstSize / 32)));
+ }
+
+ unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
+ if (MemSize > MaxSize)
+ return std::make_pair(0, LLT::scalar(MaxSize));
+
+ unsigned Align = Query.MMODescrs[0].AlignInBits;
+ return std::make_pair(0, LLT::scalar(Align));
+ })
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) -> bool {
+ return Query.Types[0].isVector() && needToSplitLoad(Query);
+ },
+ [=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
+ const LLT DstTy = Query.Types[0];
+ const LLT PtrTy = Query.Types[1];
+
+ LLT EltTy = DstTy.getElementType();
+ unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
+
+ // Split if it's too large for the address space.
+ if (Query.MMODescrs[0].SizeInBits > MaxSize) {
+ unsigned NumElts = DstTy.getNumElements();
+ unsigned NumPieces = Query.MMODescrs[0].SizeInBits / MaxSize;
+
+ // FIXME: Refine when odd breakdowns handled
+ // The scalars will need to be re-legalized.
+ if (NumPieces == 1 || NumPieces >= NumElts ||
+ NumElts % NumPieces != 0)
+ return std::make_pair(0, EltTy);
+
+ return std::make_pair(0,
+ LLT::vector(NumElts / NumPieces, EltTy));
+ }
+
+ // Need to split because of alignment.
+ unsigned Align = Query.MMODescrs[0].AlignInBits;
+ unsigned EltSize = EltTy.getSizeInBits();
+ if (EltSize > Align &&
+ (EltSize / Align < DstTy.getNumElements())) {
+ return std::make_pair(0, LLT::vector(EltSize / Align, EltTy));
+ }
+
+ // May need relegalization for the scalars.
+ return std::make_pair(0, EltTy);
+ })
+ .minScalar(0, S32);
+
+ if (IsStore)
+ Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32));
+
+ // TODO: Need a bitcast lower option?
+ Actions
+ .legalIf([=](const LegalityQuery &Query) {
+ const LLT Ty0 = Query.Types[0];
+ unsigned Size = Ty0.getSizeInBits();
+ unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+ unsigned Align = Query.MMODescrs[0].AlignInBits;
+
+ // No extending vector loads.
+ if (Size > MemSize && Ty0.isVector())
+ return false;
+
+ // FIXME: Widening store from alignment not valid.
+ if (MemSize < Size)
+ MemSize = std::max(MemSize, Align);
+
+ switch (MemSize) {
+ case 8:
+ case 16:
+ return Size == 32;
+ case 32:
+ case 64:
+ case 128:
+ return true;
+ case 96:
+ return ST.hasDwordx3LoadStores();
+ case 256:
+ case 512:
+ return true;
+ default:
+ return false;
+ }
+ })
+ .widenScalarToNextPow2(0)
+ // TODO: v3s32->v4s32 with alignment
+ .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0));
+ }
- // FIXME: Handle alignment requirements.
auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
- .legalForTypesWithMemDesc({
- {S32, GlobalPtr, 8, 8},
- {S32, GlobalPtr, 16, 8},
- {S32, LocalPtr, 8, 8},
- {S32, LocalPtr, 16, 8},
- {S32, PrivatePtr, 8, 8},
- {S32, PrivatePtr, 16, 8}});
+ .legalForTypesWithMemDesc({{S32, GlobalPtr, 8, 8},
+ {S32, GlobalPtr, 16, 2 * 8},
+ {S32, LocalPtr, 8, 8},
+ {S32, LocalPtr, 16, 16},
+ {S32, PrivatePtr, 8, 8},
+ {S32, PrivatePtr, 16, 16},
+ {S32, ConstantPtr, 8, 8},
+ {S32, ConstantPtr, 16, 2 * 8}});
if (ST.hasFlatAddressSpace()) {
- ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
- {S32, FlatPtr, 16, 8}});
+ ExtLoads.legalForTypesWithMemDesc(
+ {{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}});
}
ExtLoads.clampScalar(0, S32, S32)
@@ -590,6 +855,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
}
+ getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
+ .legalFor({{S32, LocalPtr}});
+
+ getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
+ .lower();
+
// TODO: Pointer types, any 32-bit or 64-bit vector
getActionDefinitionsBuilder(G_SELECT)
.legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16,
@@ -643,7 +914,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return (EltTy.getSizeInBits() == 16 ||
EltTy.getSizeInBits() % 32 == 0) &&
VecTy.getSizeInBits() % 32 == 0 &&
- VecTy.getSizeInBits() <= 512 &&
+ VecTy.getSizeInBits() <= 1024 &&
IdxTy.getSizeInBits() == 32;
})
.clampScalar(EltTypeIdx, S32, S64)
@@ -663,6 +934,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// FIXME: Doesn't handle extract of illegal sizes.
getActionDefinitionsBuilder(Op)
+ .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32)))
+ // FIXME: Multiples of 16 should not be legal.
.legalIf([=](const LegalityQuery &Query) {
const LLT BigTy = Query.Types[BigTyIdx];
const LLT LitTy = Query.Types[LitTyIdx];
@@ -686,18 +959,36 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
}
- getActionDefinitionsBuilder(G_BUILD_VECTOR)
- .legalForCartesianProduct(AllS32Vectors, {S32})
- .legalForCartesianProduct(AllS64Vectors, {S64})
- .clampNumElements(0, V16S32, V16S32)
- .clampNumElements(0, V2S64, V8S64)
- .minScalarSameAs(1, 0)
- .legalIf(isRegisterType(0))
- .minScalarOrElt(0, S32);
+ auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR)
+ .legalForCartesianProduct(AllS32Vectors, {S32})
+ .legalForCartesianProduct(AllS64Vectors, {S64})
+ .clampNumElements(0, V16S32, V32S32)
+ .clampNumElements(0, V2S64, V16S64)
+ .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16));
+
+ if (ST.hasScalarPackInsts())
+ BuildVector.legalFor({V2S16, S32});
+
+ BuildVector
+ .minScalarSameAs(1, 0)
+ .legalIf(isRegisterType(0))
+ .minScalarOrElt(0, S32);
+
+ if (ST.hasScalarPackInsts()) {
+ getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
+ .legalFor({V2S16, S32})
+ .lower();
+ } else {
+ getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
+ .lower();
+ }
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalIf(isRegisterType(0));
+ // TODO: Don't fully scalarize v2s16 pieces
+ getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower();
+
// Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
@@ -715,14 +1006,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return false;
};
- getActionDefinitionsBuilder(Op)
+ auto &Builder = getActionDefinitionsBuilder(Op)
.widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
// Clamp the little scalar to s8-s256 and make it a power of 2. It's not
// worth considering the multiples of 64 since 2*192 and 2*384 are not
// valid.
.clampScalar(LitTyIdx, S16, S256)
.widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
-
+ .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
+ .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32),
+ elementTypeIs(1, S16)),
+ changeTo(1, V2S16))
// Break up vectors with weird elements into scalars
.fewerElementsIf(
[=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
@@ -730,25 +1024,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.fewerElementsIf(
[=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
scalarize(1))
- .clampScalar(BigTyIdx, S32, S512)
- .widenScalarIf(
+ .clampScalar(BigTyIdx, S32, S1024)
+ .lowerFor({{S16, V2S16}});
+
+ if (Op == G_MERGE_VALUES) {
+ Builder.widenScalarIf(
+ // TODO: Use 16-bit shifts if legal for 8-bit values?
[=](const LegalityQuery &Query) {
- const LLT &Ty = Query.Types[BigTyIdx];
- return !isPowerOf2_32(Ty.getSizeInBits()) &&
- Ty.getSizeInBits() % 16 != 0;
+ const LLT Ty = Query.Types[LitTyIdx];
+ return Ty.getSizeInBits() < 32;
},
- [=](const LegalityQuery &Query) {
- // Pick the next power of 2, or a multiple of 64 over 128.
- // Whichever is smaller.
- const LLT &Ty = Query.Types[BigTyIdx];
- unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
- if (NewSizeInBits >= 256) {
- unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
- if (RoundedTo < NewSizeInBits)
- NewSizeInBits = RoundedTo;
- }
- return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
- })
+ changeTo(LitTyIdx, S32));
+ }
+
+ Builder.widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[BigTyIdx];
+ return !isPowerOf2_32(Ty.getSizeInBits()) &&
+ Ty.getSizeInBits() % 16 != 0;
+ },
+ [=](const LegalityQuery &Query) {
+ // Pick the next power of 2, or a multiple of 64 over 128.
+ // Whichever is smaller.
+ const LLT &Ty = Query.Types[BigTyIdx];
+ unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
+ if (NewSizeInBits >= 256) {
+ unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
+ if (RoundedTo < NewSizeInBits)
+ NewSizeInBits = RoundedTo;
+ }
+ return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
+ })
.legalIf([=](const LegalityQuery &Query) {
const LLT &BigTy = Query.Types[BigTyIdx];
const LLT &LitTy = Query.Types[LitTyIdx];
@@ -760,43 +1066,56 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return BigTy.getSizeInBits() % 16 == 0 &&
LitTy.getSizeInBits() % 16 == 0 &&
- BigTy.getSizeInBits() <= 512;
+ BigTy.getSizeInBits() <= 1024;
})
// Any vectors left are the wrong size. Scalarize them.
.scalarize(0)
.scalarize(1);
}
+ getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+
computeTables();
verify(*ST.getInstrInfo());
}
bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
+ MachineIRBuilder &B,
GISelChangeObserver &Observer) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_ADDRSPACE_CAST:
- return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
+ return legalizeAddrSpaceCast(MI, MRI, B);
case TargetOpcode::G_FRINT:
- return legalizeFrint(MI, MRI, MIRBuilder);
+ return legalizeFrint(MI, MRI, B);
case TargetOpcode::G_FCEIL:
- return legalizeFceil(MI, MRI, MIRBuilder);
+ return legalizeFceil(MI, MRI, B);
case TargetOpcode::G_INTRINSIC_TRUNC:
- return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder);
+ return legalizeIntrinsicTrunc(MI, MRI, B);
case TargetOpcode::G_SITOFP:
- return legalizeITOFP(MI, MRI, MIRBuilder, true);
+ return legalizeITOFP(MI, MRI, B, true);
case TargetOpcode::G_UITOFP:
- return legalizeITOFP(MI, MRI, MIRBuilder, false);
+ return legalizeITOFP(MI, MRI, B, false);
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE:
- return legalizeMinNumMaxNum(MI, MRI, MIRBuilder);
+ return legalizeMinNumMaxNum(MI, MRI, B);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
- return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
+ return legalizeExtractVectorElt(MI, MRI, B);
case TargetOpcode::G_INSERT_VECTOR_ELT:
- return legalizeInsertVectorElt(MI, MRI, MIRBuilder);
+ return legalizeInsertVectorElt(MI, MRI, B);
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FCOS:
+ return legalizeSinCos(MI, MRI, B);
+ case TargetOpcode::G_GLOBAL_VALUE:
+ return legalizeGlobalValue(MI, MRI, B);
+ case TargetOpcode::G_LOAD:
+ return legalizeLoad(MI, MRI, B, Observer);
+ case TargetOpcode::G_FMAD:
+ return legalizeFMad(MI, MRI, B);
+ case TargetOpcode::G_FDIV:
+ return legalizeFDIV(MI, MRI, B);
default:
return false;
}
@@ -807,11 +1126,13 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
Register AMDGPULegalizerInfo::getSegmentAperture(
unsigned AS,
MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- MachineFunction &MF = MIRBuilder.getMF();
+ MachineIRBuilder &B) const {
+ MachineFunction &MF = B.getMF();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const LLT S32 = LLT::scalar(32);
+ assert(AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS);
+
if (ST.hasApertureRegs()) {
// FIXME: Use inline constants (src_{shared, private}_base) instead of
// getreg.
@@ -829,13 +1150,13 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
Register ApertureReg = MRI.createGenericVirtualRegister(S32);
Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
+ B.buildInstr(AMDGPU::S_GETREG_B32)
.addDef(GetReg)
.addImm(Encoding);
MRI.setType(GetReg, S32);
- auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1);
- MIRBuilder.buildInstr(TargetOpcode::G_SHL)
+ auto ShiftAmt = B.buildConstant(S32, WidthM1 + 1);
+ B.buildInstr(TargetOpcode::G_SHL)
.addDef(ApertureReg)
.addUse(GetReg)
.addUse(ShiftAmt.getReg(0));
@@ -846,8 +1167,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
Register QueuePtr = MRI.createGenericVirtualRegister(
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
- // FIXME: Placeholder until we can track the input registers.
- MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef);
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ if (!loadInputValue(QueuePtr, B, &MFI->getArgInfo().QueuePtr))
+ return Register();
// Offset into amd_queue_t for group_segment_aperture_base_hi /
// private_segment_aperture_base_hi.
@@ -870,18 +1192,19 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
Register LoadResult = MRI.createGenericVirtualRegister(S32);
Register LoadAddr;
- MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
- MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
+ B.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
+ B.buildLoad(LoadResult, LoadAddr, *MMO);
return LoadResult;
}
bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- MachineFunction &MF = MIRBuilder.getMF();
+ MachineIRBuilder &B) const {
+ MachineFunction &MF = B.getMF();
- MIRBuilder.setInstr(MI);
+ B.setInstr(MI);
+ const LLT S32 = LLT::scalar(32);
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
@@ -899,7 +1222,28 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
- MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST));
+ MI.setDesc(B.getTII().get(TargetOpcode::G_BITCAST));
+ return true;
+ }
+
+ if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+ // Truncate.
+ B.buildExtract(Dst, Src, 0);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ uint32_t AddrHiVal = Info->get32BitAddressHighBits();
+
+ // FIXME: This is a bit ugly due to creating a merge of 2 pointers to
+ // another. Merge operands are required to be the same type, but creating an
+ // extra ptrtoint would be kind of pointless.
+ auto HighAddr = B.buildConstant(
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal);
+ B.buildMerge(Dst, {Src, HighAddr.getReg(0)});
+ MI.eraseFromParent();
return true;
}
@@ -908,47 +1252,52 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
DestAS == AMDGPUAS::PRIVATE_ADDRESS);
unsigned NullVal = TM.getNullPointerValue(DestAS);
- auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal);
- auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0);
+ auto SegmentNull = B.buildConstant(DstTy, NullVal);
+ auto FlatNull = B.buildConstant(SrcTy, 0);
Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
// Extract low 32-bits of the pointer.
- MIRBuilder.buildExtract(PtrLo32, Src, 0);
+ B.buildExtract(PtrLo32, Src, 0);
Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
- MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
+ B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
+ B.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
MI.eraseFromParent();
return true;
}
- assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
- SrcAS == AMDGPUAS::PRIVATE_ADDRESS);
+ if (SrcAS != AMDGPUAS::LOCAL_ADDRESS && SrcAS != AMDGPUAS::PRIVATE_ADDRESS)
+ return false;
+
+ if (!ST.hasFlatAddressSpace())
+ return false;
auto SegmentNull =
- MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
+ B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
auto FlatNull =
- MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
+ B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
- Register ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
+ Register ApertureReg = getSegmentAperture(SrcAS, MRI, B);
+ if (!ApertureReg.isValid())
+ return false;
Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
+ B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
Register BuildPtr = MRI.createGenericVirtualRegister(DstTy);
// Coerce the type of the low half of the result so we can use merge_values.
- Register SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32));
- MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
+ Register SrcAsInt = MRI.createGenericVirtualRegister(S32);
+ B.buildInstr(TargetOpcode::G_PTRTOINT)
.addDef(SrcAsInt)
.addUse(Src);
// TODO: Should we allow mismatched types but matching sizes in merges to
// avoid the ptrtoint?
- MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
- MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
+ B.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
+ B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
MI.eraseFromParent();
return true;
@@ -956,8 +1305,8 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
bool AMDGPULegalizerInfo::legalizeFrint(
MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- MIRBuilder.setInstr(MI);
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
Register Src = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(Src);
@@ -966,18 +1315,18 @@ bool AMDGPULegalizerInfo::legalizeFrint(
APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
- auto C1 = MIRBuilder.buildFConstant(Ty, C1Val);
- auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src);
+ auto C1 = B.buildFConstant(Ty, C1Val);
+ auto CopySign = B.buildFCopysign(Ty, C1, Src);
// TODO: Should this propagate fast-math-flags?
- auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign);
- auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign);
+ auto Tmp1 = B.buildFAdd(Ty, Src, CopySign);
+ auto Tmp2 = B.buildFSub(Ty, Tmp1, CopySign);
- auto C2 = MIRBuilder.buildFConstant(Ty, C2Val);
- auto Fabs = MIRBuilder.buildFAbs(Ty, Src);
+ auto C2 = B.buildFConstant(Ty, C2Val);
+ auto Fabs = B.buildFAbs(Ty, Src);
- auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
- MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
+ auto Cond = B.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
+ B.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
return true;
}
@@ -1124,7 +1473,7 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
- HelperBuilder.setMBB(*MI.getParent());
+ HelperBuilder.setInstr(MI);
return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
}
@@ -1187,6 +1536,194 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
return true;
}
+bool AMDGPULegalizerInfo::legalizeSinCos(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ unsigned Flags = MI.getFlags();
+
+ Register TrigVal;
+ auto OneOver2Pi = B.buildFConstant(Ty, 0.5 / M_PI);
+ if (ST.hasTrigReducedRange()) {
+ auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
+ TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
+ .addUse(MulVal.getReg(0))
+ .setMIFlags(Flags).getReg(0);
+ } else
+ TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0);
+
+ Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
+ Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
+ B.buildIntrinsic(TrigIntrin, makeArrayRef<Register>(DstReg), false)
+ .addUse(TrigVal)
+ .setMIFlags(Flags);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(
+ Register DstReg, LLT PtrTy,
+ MachineIRBuilder &B, const GlobalValue *GV,
+ unsigned Offset, unsigned GAFlags) const {
+ // In order to support pc-relative addressing, SI_PC_ADD_REL_OFFSET is lowered
+ // to the following code sequence:
+ //
+ // For constant address space:
+ // s_getpc_b64 s[0:1]
+ // s_add_u32 s0, s0, $symbol
+ // s_addc_u32 s1, s1, 0
+ //
+ // s_getpc_b64 returns the address of the s_add_u32 instruction and then
+ // a fixup or relocation is emitted to replace $symbol with a literal
+ // constant, which is a pc-relative offset from the encoding of the $symbol
+ // operand to the global variable.
+ //
+ // For global address space:
+ // s_getpc_b64 s[0:1]
+ // s_add_u32 s0, s0, $symbol@{gotpc}rel32@lo
+ // s_addc_u32 s1, s1, $symbol@{gotpc}rel32@hi
+ //
+ // s_getpc_b64 returns the address of the s_add_u32 instruction and then
+ // fixups or relocations are emitted to replace $symbol@*@lo and
+ // $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
+ // which is a 64-bit pc-relative offset from the encoding of the $symbol
+ // operand to the global variable.
+ //
+ // What we want here is an offset from the value returned by s_getpc
+ // (which is the address of the s_add_u32 instruction) to the global
+ // variable, but since the encoding of $symbol starts 4 bytes after the start
+ // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
+ // small. This requires us to add 4 to the global variable offset in order to
+ // compute the correct address.
+
+ LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+
+ Register PCReg = PtrTy.getSizeInBits() != 32 ? DstReg :
+ B.getMRI()->createGenericVirtualRegister(ConstPtrTy);
+
+ MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET)
+ .addDef(PCReg);
+
+ MIB.addGlobalAddress(GV, Offset + 4, GAFlags);
+ if (GAFlags == SIInstrInfo::MO_NONE)
+ MIB.addImm(0);
+ else
+ MIB.addGlobalAddress(GV, Offset + 4, GAFlags + 1);
+
+ B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
+
+ if (PtrTy.getSizeInBits() == 32)
+ B.buildExtract(DstReg, PCReg, 0);
+ return true;
+ }
+
+bool AMDGPULegalizerInfo::legalizeGlobalValue(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ unsigned AS = Ty.getAddressSpace();
+
+ const GlobalValue *GV = MI.getOperand(1).getGlobal();
+ MachineFunction &MF = B.getMF();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ B.setInstr(MI);
+
+ if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
+ if (!MFI->isEntryFunction()) {
+ const Function &Fn = MF.getFunction();
+ DiagnosticInfoUnsupported BadLDSDecl(
+ Fn, "local memory global used by non-kernel function", MI.getDebugLoc());
+ Fn.getContext().diagnose(BadLDSDecl);
+ }
+
+ // TODO: We could emit code to handle the initialization somewhere.
+ if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) {
+ B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), *GV));
+ MI.eraseFromParent();
+ return true;
+ }
+
+ const Function &Fn = MF.getFunction();
+ DiagnosticInfoUnsupported BadInit(
+ Fn, "unsupported initializer for address space", MI.getDebugLoc());
+ Fn.getContext().diagnose(BadInit);
+ return true;
+ }
+
+ const SITargetLowering *TLI = ST.getTargetLowering();
+
+ if (TLI->shouldEmitFixup(GV)) {
+ buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (TLI->shouldEmitPCReloc(GV)) {
+ buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0, SIInstrInfo::MO_REL32);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+ Register GOTAddr = MRI.createGenericVirtualRegister(PtrTy);
+
+ MachineMemOperand *GOTMMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ 8 /*Size*/, 8 /*Align*/);
+
+ buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);
+
+ if (Ty.getSizeInBits() == 32) {
+ // Truncate if this is a 32-bit constant adrdess.
+ auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO);
+ B.buildExtract(DstReg, Load, 0);
+ } else
+ B.buildLoad(DstReg, GOTAddr, *GOTMMO);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeLoad(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, GISelChangeObserver &Observer) const {
+ B.setInstr(MI);
+ LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+ auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg());
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Cast.getReg(0));
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFMad(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ assert(Ty.isScalar());
+
+ // TODO: Always legal with future ftz flag.
+ if (Ty == LLT::scalar(32) && !ST.hasFP32Denormals())
+ return true;
+ if (Ty == LLT::scalar(16) && !ST.hasFP16Denormals())
+ return true;
+
+ MachineFunction &MF = B.getMF();
+
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
+ HelperBuilder.setMBB(*MI.getParent());
+ return Helper.lowerFMad(MI) == LegalizerHelper::Legalized;
+}
+
// Return the use branch instruction, otherwise null if the usage is invalid.
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI) {
@@ -1212,10 +1749,9 @@ Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI,
bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
const ArgDescriptor *Arg) const {
- if (!Arg->isRegister())
+ if (!Arg->isRegister() || !Arg->getRegister().isValid())
return false; // TODO: Handle these
- assert(Arg->getRegister() != 0);
assert(Arg->getRegister().isPhysical());
MachineRegisterInfo &MRI = *B.getMRI();
@@ -1229,19 +1765,30 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
const unsigned Mask = Arg->getMask();
const unsigned Shift = countTrailingZeros<unsigned>(Mask);
- auto ShiftAmt = B.buildConstant(S32, Shift);
- auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt);
- B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift));
+ Register AndMaskSrc = LiveIn;
+
+ if (Shift != 0) {
+ auto ShiftAmt = B.buildConstant(S32, Shift);
+ AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0);
+ }
+
+ B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift));
} else
B.buildCopy(DstReg, LiveIn);
// Insert the argument copy if it doens't already exist.
// FIXME: It seems EmitLiveInCopies isn't called anywhere?
if (!MRI.getVRegDef(LiveIn)) {
+ // FIXME: Should have scoped insert pt
+ MachineBasicBlock &OrigInsBB = B.getMBB();
+ auto OrigInsPt = B.getInsertPt();
+
MachineBasicBlock &EntryMBB = B.getMF().front();
EntryMBB.addLiveIn(Arg->getRegister());
B.setInsertPt(EntryMBB, EntryMBB.begin());
B.buildCopy(LiveIn, Arg->getRegister());
+
+ B.setInsertPt(OrigInsBB, OrigInsPt);
}
return true;
@@ -1272,6 +1819,113 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
return false;
}
+bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+
+ if (legalizeFastUnsafeFDIV(MI, MRI, B))
+ return true;
+
+ return false;
+}
+
+bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ uint16_t Flags = MI.getFlags();
+
+ LLT ResTy = MRI.getType(Res);
+ LLT S32 = LLT::scalar(32);
+ LLT S64 = LLT::scalar(64);
+
+ const MachineFunction &MF = B.getMF();
+ bool Unsafe =
+ MF.getTarget().Options.UnsafeFPMath || MI.getFlag(MachineInstr::FmArcp);
+
+ if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64)
+ return false;
+
+ if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals())
+ return false;
+
+ if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
+ // 1 / x -> RCP(x)
+ if (CLHS->isExactlyValue(1.0)) {
+ B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
+ .addUse(RHS)
+ .setMIFlags(Flags);
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // -1 / x -> RCP( FNEG(x) )
+ if (CLHS->isExactlyValue(-1.0)) {
+ auto FNeg = B.buildFNeg(ResTy, RHS, Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
+ .addUse(FNeg.getReg(0))
+ .setMIFlags(Flags);
+
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+
+ // x / y -> x * (1.0 / y)
+ if (Unsafe) {
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
+ .addUse(RHS)
+ .setMIFlags(Flags);
+ B.buildFMul(Res, LHS, RCP, Flags);
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
+bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+ uint16_t Flags = MI.getFlags();
+
+ LLT S32 = LLT::scalar(32);
+ LLT S1 = LLT::scalar(1);
+
+ auto Abs = B.buildFAbs(S32, RHS, Flags);
+ const APFloat C0Val(1.0f);
+
+ auto C0 = B.buildConstant(S32, 0x6f800000);
+ auto C1 = B.buildConstant(S32, 0x2f800000);
+ auto C2 = B.buildConstant(S32, FloatToBits(1.0f));
+
+ auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags);
+ auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags);
+
+ auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
+
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
+ .addUse(Mul0.getReg(0))
+ .setMIFlags(Flags);
+
+ auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
+
+ B.buildFMul(Res, Sel, Mul1, Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -1306,11 +1960,79 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ unsigned AddrSpace) const {
+ B.setInstr(MI);
+ Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B);
+ auto Hi32 = B.buildExtract(LLT::scalar(32), MI.getOperand(2).getReg(), 32);
+ B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg);
+ MI.eraseFromParent();
+ return true;
+}
+
+/// Handle register layout difference for f16 images for some subtargets.
+Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
+ MachineRegisterInfo &MRI,
+ Register Reg) const {
+ if (!ST.hasUnpackedD16VMem())
+ return Reg;
+
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ LLT StoreVT = MRI.getType(Reg);
+ assert(StoreVT.isVector() && StoreVT.getElementType() == S16);
+
+ auto Unmerge = B.buildUnmerge(S16, Reg);
+
+ SmallVector<Register, 4> WideRegs;
+ for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+ WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
+
+ int NumElts = StoreVT.getNumElements();
+
+ return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0);
+}
+
+bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ bool IsFormat) const {
+ // TODO: Reject f16 format on targets where unsupported.
+ Register VData = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(VData);
+
+ B.setInstr(MI);
+
+ const LLT S32 = LLT::scalar(32);
+ const LLT S16 = LLT::scalar(16);
+
+ // Fixup illegal register types for i8 stores.
+ if (Ty == LLT::scalar(8) || Ty == S16) {
+ Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
+ MI.getOperand(1).setReg(AnyExt);
+ return true;
+ }
+
+ if (Ty.isVector()) {
+ if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) {
+ if (IsFormat)
+ MI.getOperand(1).setReg(handleD16VData(B, MRI, VData));
+ return true;
+ }
+
+ return Ty.getElementType() == S32 && Ty.getNumElements() <= 4;
+ }
+
+ return Ty == S32;
+}
+
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
// Replace the use G_BRCOND with the exec manipulate and branch pseudos.
- switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ switch (MI.getIntrinsicID()) {
case Intrinsic::amdgcn_if: {
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
const SIRegisterInfo *TRI
@@ -1386,6 +2108,22 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
case Intrinsic::amdgcn_dispatch_id:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::DISPATCH_ID);
+ case Intrinsic::amdgcn_fdiv_fast:
+ return legalizeFDIVFastIntrin(MI, MRI, B);
+ case Intrinsic::amdgcn_is_shared:
+ return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::LOCAL_ADDRESS);
+ case Intrinsic::amdgcn_is_private:
+ return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::PRIVATE_ADDRESS);
+ case Intrinsic::amdgcn_wavefrontsize: {
+ B.setInstr(MI);
+ B.buildConstant(MI.getOperand(0), ST.getWavefrontSize());
+ MI.eraseFromParent();
+ return true;
+ }
+ case Intrinsic::amdgcn_raw_buffer_store:
+ return legalizeRawBufferStore(MI, MRI, B, false);
+ case Intrinsic::amdgcn_raw_buffer_store_format:
+ return legalizeRawBufferStore(MI, MRI, B, true);
default:
return true;
}
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 3f1cc1d265dd..d0fba23a8686 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "AMDGPUArgumentUsageInfo.h"
+#include "SIInstrInfo.h"
namespace llvm {
@@ -32,29 +33,44 @@ public:
const GCNTargetMachine &TM);
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
+ MachineIRBuilder &B,
GISelChangeObserver &Observer) const override;
Register getSegmentAperture(unsigned AddrSpace,
MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
+ MachineIRBuilder &B) const;
bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
+ MachineIRBuilder &B) const;
bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
+ MachineIRBuilder &B) const;
bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
+ MachineIRBuilder &B) const;
bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
+ MachineIRBuilder &B) const;
bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder, bool Signed) const;
+ MachineIRBuilder &B, bool Signed) const;
bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
+ MachineIRBuilder &B) const;
bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
+ MachineIRBuilder &B) const;
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
+ MachineIRBuilder &B) const;
+ bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
+ bool buildPCRelGlobalAddress(
+ Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
+ unsigned Offset, unsigned GAFlags = SIInstrInfo::MO_NONE) const;
+
+ bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ GISelChangeObserver &Observer) const;
+
+ bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
Register getLiveInRegister(MachineRegisterInfo &MRI,
Register Reg, LLT Ty) const;
@@ -65,10 +81,24 @@ public:
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+ bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, unsigned AddrSpace) const;
+
+ Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ Register Reg) const;
+ bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, bool IsFormat) const;
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const override;
+ MachineIRBuilder &B) const override;
};
} // End llvm namespace.
diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index ce0a9db7c7f4..2c94e0046651 100644
--- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -48,18 +49,10 @@ static cl::list<std::string> UseNative("amdgpu-use-native",
cl::CommaSeparated, cl::ValueOptional,
cl::Hidden);
-#define MATH_PI 3.14159265358979323846264338327950288419716939937511
-#define MATH_E 2.71828182845904523536028747135266249775724709369996
-#define MATH_SQRT2 1.41421356237309504880168872420969807856967187537695
-
-#define MATH_LOG2E 1.4426950408889634073599246810018921374266459541529859
-#define MATH_LOG10E 0.4342944819032518276511289189166050822943970058036665
-// Value of log2(10)
-#define MATH_LOG2_10 3.3219280948873623478703194294893901758648313930245806
-// Value of 1 / log2(10)
-#define MATH_RLOG2_10 0.3010299956639811952137388947244930267681898814621085
-// Value of 1 / M_LOG2E_F = 1 / log2(e)
-#define MATH_RLOG2_E 0.6931471805599453094172321214581765680755001343602552
+#define MATH_PI numbers::pi
+#define MATH_E numbers::e
+#define MATH_SQRT2 numbers::sqrt2
+#define MATH_SQRT1_2 numbers::inv_sqrt2
namespace llvm {
@@ -254,8 +247,8 @@ struct TableEntry {
/* a list of {result, input} */
static const TableEntry tbl_acos[] = {
- {MATH_PI/2.0, 0.0},
- {MATH_PI/2.0, -0.0},
+ {MATH_PI / 2.0, 0.0},
+ {MATH_PI / 2.0, -0.0},
{0.0, 1.0},
{MATH_PI, -1.0}
};
@@ -271,8 +264,8 @@ static const TableEntry tbl_acospi[] = {
static const TableEntry tbl_asin[] = {
{0.0, 0.0},
{-0.0, -0.0},
- {MATH_PI/2.0, 1.0},
- {-MATH_PI/2.0, -1.0}
+ {MATH_PI / 2.0, 1.0},
+ {-MATH_PI / 2.0, -1.0}
};
static const TableEntry tbl_asinh[] = {
{0.0, 0.0},
@@ -287,8 +280,8 @@ static const TableEntry tbl_asinpi[] = {
static const TableEntry tbl_atan[] = {
{0.0, 0.0},
{-0.0, -0.0},
- {MATH_PI/4.0, 1.0},
- {-MATH_PI/4.0, -1.0}
+ {MATH_PI / 4.0, 1.0},
+ {-MATH_PI / 4.0, -1.0}
};
static const TableEntry tbl_atanh[] = {
{0.0, 0.0},
@@ -359,7 +352,7 @@ static const TableEntry tbl_log10[] = {
};
static const TableEntry tbl_rsqrt[] = {
{1.0, 1.0},
- {1.0/MATH_SQRT2, 2.0}
+ {MATH_SQRT1_2, 2.0}
};
static const TableEntry tbl_sin[] = {
{0.0, 0.0},
@@ -868,7 +861,7 @@ static double log2(double V) {
#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
return ::log2(V);
#else
- return log(V) / 0.693147180559945309417;
+ return log(V) / numbers::ln2;
#endif
}
}
@@ -1430,8 +1423,8 @@ AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
B.SetInsertPoint(&*ItNew);
AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
std::string(prefix) + UI->getName());
- Alloc->setAlignment(UCallee->getParent()->getDataLayout()
- .getTypeAllocSize(RetType));
+ Alloc->setAlignment(MaybeAlign(
+ UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
return Alloc;
}
diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index a5bac25701a0..e1ae496d9cbc 100644
--- a/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -55,7 +55,7 @@ enum EManglingParam {
};
struct ManglingRule {
- StringRef const Name;
+ const char *Name;
unsigned char Lead[2];
unsigned char Param[5];
@@ -69,7 +69,7 @@ struct ManglingRule {
// Information about library functions with unmangled names.
class UnmangledFuncInfo {
- StringRef const Name;
+ const char *Name;
unsigned NumArgs;
// Table for all lib functions with unmangled names.
@@ -82,7 +82,7 @@ class UnmangledFuncInfo {
public:
using ID = AMDGPULibFunc::EFuncId;
- UnmangledFuncInfo(StringRef _Name, unsigned _NumArgs)
+ constexpr UnmangledFuncInfo(const char *_Name, unsigned _NumArgs)
: Name(_Name), NumArgs(_NumArgs) {}
// Get index to Table by function name.
static bool lookup(StringRef Name, ID &Id);
@@ -133,8 +133,8 @@ unsigned ManglingRule::getNumArgs() const {
// E_ANY - use prev lead type, E_CONSTPTR_ANY - make const pointer out of
// prev lead type, etc. see ParamIterator::getNextParam() for details.
-static const ManglingRule manglingRules[] = {
-{ StringRef(), {0}, {0} },
+static constexpr ManglingRule manglingRules[] = {
+{ "", {0}, {0} },
{ "abs" , {1}, {E_ANY}},
{ "abs_diff" , {1}, {E_ANY,E_COPY}},
{ "acos" , {1}, {E_ANY}},
@@ -682,9 +682,9 @@ bool AMDGPULibFunc::parse(StringRef FuncName, AMDGPULibFunc &F) {
}
if (eatTerm(FuncName, "_Z"))
- F.Impl = make_unique<AMDGPUMangledLibFunc>();
+ F.Impl = std::make_unique<AMDGPUMangledLibFunc>();
else
- F.Impl = make_unique<AMDGPUUnmangledLibFunc>();
+ F.Impl = std::make_unique<AMDGPUUnmangledLibFunc>();
if (F.Impl->parseFuncName(FuncName))
return true;
diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 5dd5b3691e0a..e64542a395f0 100644
--- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -72,10 +72,10 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
BasicBlock &EntryBlock = *F.begin();
IRBuilder<> Builder(&*EntryBlock.begin());
- const unsigned KernArgBaseAlign = 16; // FIXME: Increase if necessary
+ const Align KernArgBaseAlign(16); // FIXME: Increase if necessary
const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F);
- unsigned MaxAlign;
+ Align MaxAlign;
// FIXME: Alignment is broken broken with explicit arg offset.;
const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);
if (TotalKernArgSize == 0)
@@ -94,12 +94,12 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
for (Argument &Arg : F.args()) {
Type *ArgTy = Arg.getType();
- unsigned Align = DL.getABITypeAlignment(ArgTy);
+ unsigned ABITypeAlign = DL.getABITypeAlignment(ArgTy);
unsigned Size = DL.getTypeSizeInBits(ArgTy);
unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
- uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset;
- ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize;
+ uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;
+ ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
if (Arg.use_empty())
continue;
@@ -128,8 +128,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
int64_t AlignDownOffset = alignDown(EltOffset, 4);
int64_t OffsetDiff = EltOffset - AlignDownOffset;
- unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset,
- KernArgBaseAlign);
+ Align AdjustedAlign = commonAlignment(
+ KernArgBaseAlign, DoShiftOpt ? AlignDownOffset : EltOffset);
Value *ArgPtr;
Type *AdjustedArgTy;
@@ -160,7 +160,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS),
ArgPtr->getName() + ".cast");
LoadInst *Load =
- Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
+ Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign.value());
Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));
MDBuilder MDB(Ctx);
@@ -220,8 +220,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
}
KernArgSegment->addAttribute(
- AttributeList::ReturnIndex,
- Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));
+ AttributeList::ReturnIndex,
+ Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));
return true;
}
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index ae4c32c258a7..3760aed87a43 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -211,6 +211,10 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
lowerOperand(MO, MCOp);
OutMI.addOperand(MCOp);
}
+
+ int FIIdx = AMDGPU::getNamedOperandIdx(MCOpcode, AMDGPU::OpName::fi);
+ if (FIIdx >= (int)OutMI.getNumOperands())
+ OutMI.addOperand(MCOperand::createImm(0));
}
bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 237490957058..ba72f71f4322 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -694,7 +694,7 @@ void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI,
PHILinearize &PHIInfo) {
- if (TRI->isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
<< "\n");
// If this is a source register to a PHI we are chaining, it
@@ -734,7 +734,7 @@ void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI,
PHILinearize &PHIInfo) {
- if (TRI->isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
<< "\n");
for (auto &UI : MRI->use_operands(Reg)) {
@@ -949,7 +949,7 @@ void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister,
(IncludeLoopPHI && IsLoopPHI);
if (ShouldReplace) {
- if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
+ if (Register::isPhysicalRegister(NewRegister)) {
LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
<< printReg(NewRegister, MRI->getTargetRegisterInfo())
<< "\n");
@@ -1016,13 +1016,15 @@ bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) {
// before are no longer register kills.
void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ (void)TRI; // It's used by LLVM_DEBUG.
+
for (auto MBBI : MBBs) {
MachineBasicBlock *MBB = MBBI;
for (auto &II : *MBB) {
for (auto &RI : II.uses()) {
if (RI.isReg()) {
- unsigned Reg = RI.getReg();
- if (TRI->isVirtualRegister(Reg)) {
+ Register Reg = RI.getReg();
+ if (Register::isVirtualRegister(Reg)) {
if (hasNoDef(Reg, MRI))
continue;
if (!MRI->hasOneDef(Reg)) {
@@ -1402,7 +1404,7 @@ void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest(
unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo(
MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) {
unsigned DestReg = getPHIDestReg(PHI);
- unsigned LinearizeDestReg =
+ Register LinearizeDestReg =
MRI->createVirtualRegister(MRI->getRegClass(DestReg));
PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc());
storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices);
@@ -1890,7 +1892,7 @@ void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled(
if (!Cond[0].isReg())
return;
- unsigned CondReg = Cond[0].getReg();
+ Register CondReg = Cond[0].getReg();
for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) {
(*UI).setIsKill(false);
}
@@ -1929,8 +1931,8 @@ void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *Co
BBSelectReg, TrueBB->getNumber());
} else {
const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg);
- unsigned TrueBBReg = MRI->createVirtualRegister(RegClass);
- unsigned FalseBBReg = MRI->createVirtualRegister(RegClass);
+ Register TrueBBReg = MRI->createVirtualRegister(RegClass);
+ Register FalseBBReg = MRI->createVirtualRegister(RegClass);
TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
TrueBBReg, TrueBB->getNumber());
TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
@@ -1996,7 +1998,7 @@ void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB,
InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI);
}
const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg);
- unsigned NextDestReg = MRI->createVirtualRegister(RegClass);
+ Register NextDestReg = MRI->createVirtualRegister(RegClass);
bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1;
LLVM_DEBUG(dbgs() << "Insert Chained PHI\n");
insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg,
@@ -2056,8 +2058,8 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
// register, unless it is the outgoing BB select register. We have
// already creaed phi nodes for these.
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
- unsigned PHIDestReg = MRI->createVirtualRegister(RegClass);
- unsigned IfSourceReg = MRI->createVirtualRegister(RegClass);
+ Register PHIDestReg = MRI->createVirtualRegister(RegClass);
+ Register IfSourceReg = MRI->createVirtualRegister(RegClass);
// Create initializer, this value is never used, but is needed
// to satisfy SSA.
LLVM_DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n");
@@ -2172,7 +2174,7 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio
MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent();
const TargetRegisterClass *RegClass =
MRI->getRegClass(CurrentBackedgeReg);
- unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass);
+ Register NewBackedgeReg = MRI->createVirtualRegister(RegClass);
MachineInstrBuilder BackedgePHI =
BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL,
TII->get(TargetOpcode::PHI), NewBackedgeReg);
@@ -2230,7 +2232,7 @@ void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register,
I != E;) {
MachineOperand &O = *I;
++I;
- if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
+ if (Register::isPhysicalRegister(NewRegister)) {
LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
<< printReg(NewRegister, MRI->getTargetRegisterInfo())
<< "\n");
@@ -2309,7 +2311,7 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
} else {
// Handle internal block.
const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn);
- unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass);
+ Register CodeBBSelectReg = MRI->createVirtualRegister(RegClass);
rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg);
bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB;
MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB,
@@ -2446,7 +2448,7 @@ void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI,
}
const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest);
- unsigned NewDestReg = MRI->createVirtualRegister(RegClass);
+ Register NewDestReg = MRI->createVirtualRegister(RegClass);
LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI);
MachineInstrBuilder MIB =
BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(),
@@ -2734,9 +2736,9 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
}
const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI());
unsigned InReg = LRegion->getBBSelectRegIn();
- unsigned InnerSelectReg =
+ Register InnerSelectReg =
MRI->createVirtualRegister(MRI->getRegClass(InReg));
- unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg));
+ Register NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg));
TII->materializeImmediate(*(LRegion->getEntry()),
LRegion->getEntry()->getFirstTerminator(), DL,
NewInReg, Region->getEntry()->getNumber());
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 0d3a1f1a769f..89ca702f577d 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -17,7 +17,6 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
LocalMemoryObjects(),
ExplicitKernArgSize(0),
- MaxKernArgAlign(0),
LDSSize(0),
IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath),
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 52987e2fa411..9818ab1ef148 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -23,7 +23,7 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
protected:
uint64_t ExplicitKernArgSize; // Cache for this.
- unsigned MaxKernArgAlign; // Cache for this.
+ Align MaxKernArgAlign; // Cache for this.
/// Number of bytes in the LDS that are being used.
unsigned LDSSize;
@@ -47,9 +47,7 @@ public:
return ExplicitKernArgSize;
}
- unsigned getMaxKernArgAlign() const {
- return MaxKernArgAlign;
- }
+ unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); }
unsigned getLDSSize() const {
return LDSSize;
diff --git a/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
new file mode 100644
index 000000000000..5250bf455d71
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -0,0 +1,592 @@
+//=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// \file
+//
+// The pass bind printfs to a kernel arg pointer that will be bound to a buffer
+// later by the runtime.
+//
+// This pass traverses the functions in the module and converts
+// each call to printf to a sequence of operations that
+// store the following into the printf buffer:
+// - format string (passed as a module's metadata unique ID)
+// - bitwise copies of printf arguments
+// The backend passes will need to store metadata in the kernel
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "printfToRuntime"
+#define DWORD_ALIGN 4
+
+namespace {
+class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final
+ : public ModulePass {
+
+public:
+ static char ID;
+
+ explicit AMDGPUPrintfRuntimeBinding();
+
+private:
+ bool runOnModule(Module &M) override;
+ void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers,
+ StringRef fmt, size_t num_ops) const;
+
+ bool shouldPrintAsStr(char Specifier, Type *OpType) const;
+ bool
+ lowerPrintfForGpu(Module &M,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ }
+
+ Value *simplify(Instruction *I, const TargetLibraryInfo *TLI) {
+ return SimplifyInstruction(I, {*TD, TLI, DT});
+ }
+
+ const DataLayout *TD;
+ const DominatorTree *DT;
+ SmallVector<CallInst *, 32> Printfs;
+};
+} // namespace
+
+char AMDGPUPrintfRuntimeBinding::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding,
+ "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding",
+ "AMDGPU Printf lowering", false, false)
+
+char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID;
+
+namespace llvm {
+ModulePass *createAMDGPUPrintfRuntimeBinding() {
+ return new AMDGPUPrintfRuntimeBinding();
+}
+} // namespace llvm
+
+AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding()
+ : ModulePass(ID), TD(nullptr), DT(nullptr) {
+ initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry());
+}
+
+void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers(
+ SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt,
+ size_t NumOps) const {
+ // not all format characters are collected.
+ // At this time the format characters of interest
+ // are %p and %s, which use to know if we
+ // are either storing a literal string or a
+ // pointer to the printf buffer.
+ static const char ConvSpecifiers[] = "cdieEfgGaosuxXp";
+ size_t CurFmtSpecifierIdx = 0;
+ size_t PrevFmtSpecifierIdx = 0;
+
+ while ((CurFmtSpecifierIdx = Fmt.find_first_of(
+ ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) {
+ bool ArgDump = false;
+ StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx,
+ CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
+ size_t pTag = CurFmt.find_last_of("%");
+ if (pTag != StringRef::npos) {
+ ArgDump = true;
+ while (pTag && CurFmt[--pTag] == '%') {
+ ArgDump = !ArgDump;
+ }
+ }
+
+ if (ArgDump)
+ OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]);
+
+ PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx;
+ }
+}
+
+bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier,
+ Type *OpType) const {
+ if (Specifier != 's')
+ return false;
+ const PointerType *PT = dyn_cast<PointerType>(OpType);
+ if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
+ return false;
+ Type *ElemType = PT->getContainedType(0);
+ if (ElemType->getTypeID() != Type::IntegerTyID)
+ return false;
+ IntegerType *ElemIType = cast<IntegerType>(ElemType);
+ return ElemIType->getBitWidth() == 8;
+}
+
+bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
+ Module &M, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+ LLVMContext &Ctx = M.getContext();
+ IRBuilder<> Builder(Ctx);
+ Type *I32Ty = Type::getInt32Ty(Ctx);
+ unsigned UniqID = 0;
+ // NB: This is important for this string size to be divizable by 4
+ const char NonLiteralStr[4] = "???";
+
+ for (auto CI : Printfs) {
+ unsigned NumOps = CI->getNumArgOperands();
+
+ SmallString<16> OpConvSpecifiers;
+ Value *Op = CI->getArgOperand(0);
+
+ if (auto LI = dyn_cast<LoadInst>(Op)) {
+ Op = LI->getPointerOperand();
+ for (auto Use : Op->users()) {
+ if (auto SI = dyn_cast<StoreInst>(Use)) {
+ Op = SI->getValueOperand();
+ break;
+ }
+ }
+ }
+
+ if (auto I = dyn_cast<Instruction>(Op)) {
+ Value *Op_simplified = simplify(I, &GetTLI(*I->getFunction()));
+ if (Op_simplified)
+ Op = Op_simplified;
+ }
+
+ ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op);
+
+ if (ConstExpr) {
+ GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
+
+ StringRef Str("unknown");
+ if (GVar && GVar->hasInitializer()) {
+ auto Init = GVar->getInitializer();
+ if (auto CA = dyn_cast<ConstantDataArray>(Init)) {
+ if (CA->isString())
+ Str = CA->getAsCString();
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ Str = "";
+ }
+ //
+ // we need this call to ascertain
+ // that we are printing a string
+ // or a pointer. It takes out the
+ // specifiers and fills up the first
+ // arg
+ getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1);
+ }
+ // Add metadata for the string
+ std::string AStreamHolder;
+ raw_string_ostream Sizes(AStreamHolder);
+ int Sum = DWORD_ALIGN;
+ Sizes << CI->getNumArgOperands() - 1;
+ Sizes << ':';
+ for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
+ ArgCount <= OpConvSpecifiers.size();
+ ArgCount++) {
+ Value *Arg = CI->getArgOperand(ArgCount);
+ Type *ArgType = Arg->getType();
+ unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType);
+ ArgSize = ArgSize / 8;
+ //
+ // ArgSize by design should be a multiple of DWORD_ALIGN,
+ // expand the arguments that do not follow this rule.
+ //
+ if (ArgSize % DWORD_ALIGN != 0) {
+ llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx);
+ VectorType *LLVMVecType = llvm::dyn_cast<llvm::VectorType>(ArgType);
+ int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1;
+ if (LLVMVecType && NumElem > 1)
+ ResType = llvm::VectorType::get(ResType, NumElem);
+ Builder.SetInsertPoint(CI);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+ if (OpConvSpecifiers[ArgCount - 1] == 'x' ||
+ OpConvSpecifiers[ArgCount - 1] == 'X' ||
+ OpConvSpecifiers[ArgCount - 1] == 'u' ||
+ OpConvSpecifiers[ArgCount - 1] == 'o')
+ Arg = Builder.CreateZExt(Arg, ResType);
+ else
+ Arg = Builder.CreateSExt(Arg, ResType);
+ ArgType = Arg->getType();
+ ArgSize = TD->getTypeAllocSizeInBits(ArgType);
+ ArgSize = ArgSize / 8;
+ CI->setOperand(ArgCount, Arg);
+ }
+ if (OpConvSpecifiers[ArgCount - 1] == 'f') {
+ ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg);
+ if (FpCons)
+ ArgSize = 4;
+ else {
+ FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg);
+ if (FpExt && FpExt->getType()->isDoubleTy() &&
+ FpExt->getOperand(0)->getType()->isFloatTy())
+ ArgSize = 4;
+ }
+ }
+ if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
+ if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
+ GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
+ if (GV && GV->hasInitializer()) {
+ Constant *Init = GV->getInitializer();
+ ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init);
+ if (Init->isZeroValue() || CA->isString()) {
+ size_t SizeStr = Init->isZeroValue()
+ ? 1
+ : (strlen(CA->getAsCString().data()) + 1);
+ size_t Rem = SizeStr % DWORD_ALIGN;
+ size_t NSizeStr = 0;
+ LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr
+ << '\n');
+ if (Rem) {
+ NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
+ } else {
+ NSizeStr = SizeStr;
+ }
+ ArgSize = NSizeStr;
+ }
+ } else {
+ ArgSize = sizeof(NonLiteralStr);
+ }
+ } else {
+ ArgSize = sizeof(NonLiteralStr);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize
+ << " for type: " << *ArgType << '\n');
+ Sizes << ArgSize << ':';
+ Sum += ArgSize;
+ }
+ LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str()
+ << '\n');
+ for (size_t I = 0; I < Str.size(); ++I) {
+ // Rest of the C escape sequences (e.g. \') are handled correctly
+ // by the MDParser
+ switch (Str[I]) {
+ case '\a':
+ Sizes << "\\a";
+ break;
+ case '\b':
+ Sizes << "\\b";
+ break;
+ case '\f':
+ Sizes << "\\f";
+ break;
+ case '\n':
+ Sizes << "\\n";
+ break;
+ case '\r':
+ Sizes << "\\r";
+ break;
+ case '\v':
+ Sizes << "\\v";
+ break;
+ case ':':
+ // ':' cannot be scanned by Flex, as it is defined as a delimiter
+ // Replace it with it's octal representation \72
+ Sizes << "\\72";
+ break;
+ default:
+ Sizes << Str[I];
+ break;
+ }
+ }
+
+ // Insert the printf_alloc call
+ Builder.SetInsertPoint(CI);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+
+ Type *SizetTy = Type::getInt32Ty(Ctx);
+
+ Type *Tys_alloc[1] = {SizetTy};
+ Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1);
+ FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false);
+ FunctionCallee PrintfAllocFn =
+ M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
+
+ LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n');
+ std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str();
+ MDString *fmtStrArray = MDString::get(Ctx, fmtstr);
+
+ // Instead of creating global variables, the
+ // printf format strings are extracted
+ // and passed as metadata. This avoids
+ // polluting llvm's symbol tables in this module.
+ // Metadata is going to be extracted
+ // by the backend passes and inserted
+ // into the OpenCL binary as appropriate.
+ StringRef amd("llvm.printf.fmts");
+ NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd);
+ MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
+ metaD->addOperand(myMD);
+ Value *sumC = ConstantInt::get(SizetTy, Sum, false);
+ SmallVector<Value *, 1> alloc_args;
+ alloc_args.push_back(sumC);
+ CallInst *pcall =
+ CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI);
+
+ //
+ // Insert code to split basicblock with a
+ // piece of hammock code.
+ // basicblock splits after buffer overflow check
+ //
+ ConstantPointerNull *zeroIntPtr =
+ ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1));
+ ICmpInst *cmp =
+ dyn_cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, ""));
+ if (!CI->use_empty()) {
+ Value *result =
+ Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res");
+ CI->replaceAllUsesWith(result);
+ }
+ SplitBlock(CI->getParent(), cmp);
+ Instruction *Brnch =
+ SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false);
+
+ Builder.SetInsertPoint(Brnch);
+
+ // store unique printf id in the buffer
+ //
+ SmallVector<Value *, 1> ZeroIdxList;
+ ConstantInt *zeroInt =
+ ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10));
+ ZeroIdxList.push_back(zeroInt);
+
+ GetElementPtrInst *BufferIdx =
+ dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create(
+ nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch));
+
+ Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS);
+ Value *id_gep_cast =
+ new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch);
+
+ StoreInst *stbuff =
+ new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast);
+ stbuff->insertBefore(Brnch); // to Remove unused variable warning
+
+ SmallVector<Value *, 2> FourthIdxList;
+ ConstantInt *fourInt =
+ ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10));
+
+ FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id
+ // the following GEP is the buffer pointer
+ BufferIdx = cast<GetElementPtrInst>(GetElementPtrInst::Create(
+ nullptr, pcall, FourthIdxList, "PrintBuffGep", Brnch));
+
+ Type *Int32Ty = Type::getInt32Ty(Ctx);
+ Type *Int64Ty = Type::getInt64Ty(Ctx);
+ for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
+ ArgCount <= OpConvSpecifiers.size();
+ ArgCount++) {
+ Value *Arg = CI->getArgOperand(ArgCount);
+ Type *ArgType = Arg->getType();
+ SmallVector<Value *, 32> WhatToStore;
+ if (ArgType->isFPOrFPVectorTy() &&
+ (ArgType->getTypeID() != Type::VectorTyID)) {
+ Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty;
+ if (OpConvSpecifiers[ArgCount - 1] == 'f') {
+ ConstantFP *fpCons = dyn_cast<ConstantFP>(Arg);
+ if (fpCons) {
+ APFloat Val(fpCons->getValueAPF());
+ bool Lost = false;
+ Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+ &Lost);
+ Arg = ConstantFP::get(Ctx, Val);
+ IType = Int32Ty;
+ } else {
+ FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg);
+ if (FpExt && FpExt->getType()->isDoubleTy() &&
+ FpExt->getOperand(0)->getType()->isFloatTy()) {
+ Arg = FpExt->getOperand(0);
+ IType = Int32Ty;
+ }
+ }
+ }
+ Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch);
+ WhatToStore.push_back(Arg);
+ } else if (ArgType->getTypeID() == Type::PointerTyID) {
+ if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
+ const char *S = NonLiteralStr;
+ if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
+ GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
+ if (GV && GV->hasInitializer()) {
+ Constant *Init = GV->getInitializer();
+ ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init);
+ if (Init->isZeroValue() || CA->isString()) {
+ S = Init->isZeroValue() ? "" : CA->getAsCString().data();
+ }
+ }
+ }
+ size_t SizeStr = strlen(S) + 1;
+ size_t Rem = SizeStr % DWORD_ALIGN;
+ size_t NSizeStr = 0;
+ if (Rem) {
+ NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
+ } else {
+ NSizeStr = SizeStr;
+ }
+ if (S[0]) {
+ char *MyNewStr = new char[NSizeStr]();
+ strcpy(MyNewStr, S);
+ int NumInts = NSizeStr / 4;
+ int CharC = 0;
+ while (NumInts) {
+ int ANum = *(int *)(MyNewStr + CharC);
+ CharC += 4;
+ NumInts--;
+ Value *ANumV = ConstantInt::get(Int32Ty, ANum, false);
+ WhatToStore.push_back(ANumV);
+ }
+ delete[] MyNewStr;
+ } else {
+ // Empty string, give a hint to RT it is no NULL
+ Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false);
+ WhatToStore.push_back(ANumV);
+ }
+ } else {
+ uint64_t Size = TD->getTypeAllocSizeInBits(ArgType);
+ assert((Size == 32 || Size == 64) && "unsupported size");
+ Type *DstType = (Size == 32) ? Int32Ty : Int64Ty;
+ Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch);
+ WhatToStore.push_back(Arg);
+ }
+ } else if (ArgType->getTypeID() == Type::VectorTyID) {
+ Type *IType = NULL;
+ uint32_t EleCount = cast<VectorType>(ArgType)->getNumElements();
+ uint32_t EleSize = ArgType->getScalarSizeInBits();
+ uint32_t TotalSize = EleCount * EleSize;
+ if (EleCount == 3) {
+ IntegerType *Int32Ty = Type::getInt32Ty(ArgType->getContext());
+ Constant *Indices[4] = {
+ ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 1),
+ ConstantInt::get(Int32Ty, 2), ConstantInt::get(Int32Ty, 2)};
+ Constant *Mask = ConstantVector::get(Indices);
+ ShuffleVectorInst *Shuffle = new ShuffleVectorInst(Arg, Arg, Mask);
+ Shuffle->insertBefore(Brnch);
+ Arg = Shuffle;
+ ArgType = Arg->getType();
+ TotalSize += EleSize;
+ }
+ switch (EleSize) {
+ default:
+ EleCount = TotalSize / 64;
+ IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext()));
+ break;
+ case 8:
+ if (EleCount >= 8) {
+ EleCount = TotalSize / 64;
+ IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext()));
+ } else if (EleCount >= 3) {
+ EleCount = 1;
+ IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext()));
+ } else {
+ EleCount = 1;
+ IType = dyn_cast<Type>(Type::getInt16Ty(ArgType->getContext()));
+ }
+ break;
+ case 16:
+ if (EleCount >= 3) {
+ EleCount = TotalSize / 64;
+ IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext()));
+ } else {
+ EleCount = 1;
+ IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext()));
+ }
+ break;
+ }
+ if (EleCount > 1) {
+ IType = dyn_cast<Type>(VectorType::get(IType, EleCount));
+ }
+ Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch);
+ WhatToStore.push_back(Arg);
+ } else {
+ WhatToStore.push_back(Arg);
+ }
+ for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) {
+ Value *TheBtCast = WhatToStore[I];
+ unsigned ArgSize =
+ TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8;
+ SmallVector<Value *, 1> BuffOffset;
+ BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize));
+
+ Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1);
+ Value *CastedGEP =
+ new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch);
+ StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch);
+ LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
+ << *StBuff << '\n');
+ (void)StBuff;
+ if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands())
+ break;
+ BufferIdx = dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create(
+ nullptr, BufferIdx, BuffOffset, "PrintBuffNextPtr", Brnch));
+ LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"
+ << *BufferIdx << '\n');
+ }
+ }
+ }
+ }
+
+ // erase the printf calls
+ for (auto CI : Printfs)
+ CI->eraseFromParent();
+
+ Printfs.clear();
+ return true;
+}
+
+bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
+ Triple TT(M.getTargetTriple());
+ if (TT.getArch() == Triple::r600)
+ return false;
+
+ auto PrintfFunction = M.getFunction("printf");
+ if (!PrintfFunction)
+ return false;
+
+ for (auto &U : PrintfFunction->uses()) {
+ if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
+ if (CI->isCallee(&U))
+ Printfs.push_back(CI);
+ }
+ }
+
+ if (Printfs.empty())
+ return false;
+
+ TD = &M.getDataLayout();
+ auto DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+
+ return lowerPrintfForGpu(M, GetTLI);
+}
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e4c9d6685d4a..3e9dcca114a3 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -801,7 +801,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
GlobalVariable::NotThreadLocal,
AMDGPUAS::LOCAL_ADDRESS);
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- GV->setAlignment(I.getAlignment());
+ GV->setAlignment(MaybeAlign(I.getAlignment()));
Value *TCntY, *TCntZ;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 815cbc5e26ee..4d78188b3dc3 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -17,9 +17,9 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -33,6 +33,7 @@
#include "AMDGPUGenRegisterBankInfo.def"
using namespace llvm;
+using namespace MIPatternMatch;
namespace {
@@ -84,9 +85,11 @@ public:
};
}
-AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
+AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST)
: AMDGPUGenRegisterBankInfo(),
- TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
+ Subtarget(ST),
+ TRI(Subtarget.getRegisterInfo()),
+ TII(Subtarget.getInstrInfo()) {
// HACK: Until this is fully tablegen'd.
static bool AlreadyInit = false;
@@ -163,11 +166,10 @@ unsigned AMDGPURegisterBankInfo::getBreakDownCost(
const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
const TargetRegisterClass &RC) const {
+ if (&RC == &AMDGPU::SReg_1RegClass)
+ return AMDGPU::VCCRegBank;
- if (TRI->isSGPRClass(&RC))
- return getRegBank(AMDGPU::SGPRRegBankID);
-
- return getRegBank(AMDGPU::VGPRRegBankID);
+ return TRI->isSGPRClass(&RC) ? AMDGPU::SGPRRegBank : AMDGPU::VGPRRegBank;
}
template <unsigned NumOps>
@@ -192,7 +194,8 @@ AMDGPURegisterBankInfo::addMappingFromTable(
Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
}
- unsigned MappingID = 0;
+ // getInstrMapping's default mapping uses ID 1, so start at 2.
+ unsigned MappingID = 2;
for (const auto &Entry : Table) {
for (unsigned I = 0; I < NumOps; ++I) {
int OpIdx = RegSrcOpIdx[I];
@@ -210,7 +213,7 @@ AMDGPURegisterBankInfo::addMappingFromTable(
RegisterBankInfo::InstructionMappings
AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic(
const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
- switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ switch (MI.getIntrinsicID()) {
case Intrinsic::amdgcn_readlane: {
static const OpRegBankEntry<3> Table[2] = {
// Perfectly legal.
@@ -251,7 +254,7 @@ RegisterBankInfo::InstructionMappings
AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
- switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ switch (MI.getIntrinsicID()) {
case Intrinsic::amdgcn_buffer_load: {
static const OpRegBankEntry<3> Table[4] = {
// Perfectly legal.
@@ -303,6 +306,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
}
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
+ // FIXME: Should have no register for immediate
static const OpRegBankEntry<1> Table[2] = {
// Perfectly legal.
{ { AMDGPU::SGPRRegBankID }, 1 },
@@ -319,12 +323,15 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
}
}
-static bool isInstrUniform(const MachineInstr &MI) {
+// FIXME: Returns uniform if there's no source value information. This is
+// probably wrong.
+static bool isInstrUniformNonExtLoadAlign4(const MachineInstr &MI) {
if (!MI.hasOneMemOperand())
return false;
const MachineMemOperand *MMO = *MI.memoperands_begin();
- return AMDGPUInstrInfo::isUniformMMO(MMO);
+ return MMO->getSize() >= 4 && MMO->getAlignment() >= 4 &&
+ AMDGPUInstrInfo::isUniformMMO(MMO);
}
RegisterBankInfo::InstructionMappings
@@ -337,6 +344,31 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
InstructionMappings AltMappings;
switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONSTANT: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ if (Size == 1) {
+ static const OpRegBankEntry<1> Table[4] = {
+ { { AMDGPU::VGPRRegBankID }, 1 },
+ { { AMDGPU::SGPRRegBankID }, 1 },
+ { { AMDGPU::VCCRegBankID }, 1 },
+ { { AMDGPU::SCCRegBankID }, 1 }
+ };
+
+ return addMappingFromTable<1>(MI, MRI, {{ 0 }}, Table);
+ }
+
+ LLVM_FALLTHROUGH;
+ }
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FRAME_INDEX:
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ static const OpRegBankEntry<1> Table[2] = {
+ { { AMDGPU::VGPRRegBankID }, 1 },
+ { { AMDGPU::SGPRRegBankID }, 1 }
+ };
+
+ return addMappingFromTable<1>(MI, MRI, {{ 0 }}, Table);
+ }
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR: {
@@ -408,23 +440,29 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
AltMappings.push_back(&VSMapping);
break;
}
- case TargetOpcode::G_LOAD: {
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_ZEXTLOAD:
+ case TargetOpcode::G_SEXTLOAD: {
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
+ unsigned PtrSize = PtrTy.getSizeInBits();
+ unsigned AS = PtrTy.getAddressSpace();
LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
- // FIXME: Should we be hard coding the size for these mappings?
- if (isInstrUniform(MI)) {
+ if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
+ AS != AMDGPUAS::PRIVATE_ADDRESS) &&
+ isInstrUniformNonExtLoadAlign4(MI)) {
const InstructionMapping &SSMapping = getInstructionMapping(
1, 1, getOperandsMapping(
{AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
2); // Num Operands
AltMappings.push_back(&SSMapping);
}
const InstructionMapping &VVMapping = getInstructionMapping(
2, 1, getOperandsMapping(
- {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
+ {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
2); // Num Operands
AltMappings.push_back(&VVMapping);
@@ -620,57 +658,53 @@ static LLT getHalfSizedType(LLT Ty) {
///
/// There is additional complexity to try for compare values to identify the
/// unique values used.
-void AMDGPURegisterBankInfo::executeInWaterfallLoop(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- ArrayRef<unsigned> OpIndices) const {
- MachineFunction *MF = MI.getParent()->getParent();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
- MachineBasicBlock::iterator I(MI);
-
- MachineBasicBlock &MBB = *MI.getParent();
- const DebugLoc &DL = MI.getDebugLoc();
-
- // Use a set to avoid extra readfirstlanes in the case where multiple operands
- // are the same register.
- SmallSet<Register, 4> SGPROperandRegs;
- for (unsigned Op : OpIndices) {
- assert(MI.getOperand(Op).isUse());
- Register Reg = MI.getOperand(Op).getReg();
- const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
- if (OpBank->getID() == AMDGPU::VGPRRegBankID)
- SGPROperandRegs.insert(Reg);
- }
-
- // No operands need to be replaced, so no need to loop.
- if (SGPROperandRegs.empty())
- return;
-
- MachineIRBuilder B(MI);
+bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
+ MachineIRBuilder &B,
+ iterator_range<MachineBasicBlock::iterator> Range,
+ SmallSet<Register, 4> &SGPROperandRegs,
+ MachineRegisterInfo &MRI) const {
SmallVector<Register, 4> ResultRegs;
SmallVector<Register, 4> InitResultRegs;
SmallVector<Register, 4> PhiRegs;
- for (MachineOperand &Def : MI.defs()) {
- LLT ResTy = MRI.getType(Def.getReg());
- const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
- ResultRegs.push_back(Def.getReg());
- Register InitReg = B.buildUndef(ResTy).getReg(0);
- Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
- InitResultRegs.push_back(InitReg);
- PhiRegs.push_back(PhiReg);
- MRI.setRegBank(PhiReg, *DefBank);
- MRI.setRegBank(InitReg, *DefBank);
+
+ MachineBasicBlock &MBB = B.getMBB();
+ MachineFunction *MF = &B.getMF();
+
+ const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
+ const unsigned WaveAndOpc = Subtarget.isWave32() ?
+ AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ const unsigned MovTermOpc = Subtarget.isWave32() ?
+ AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
+ const unsigned XorTermOpc = Subtarget.isWave32() ?
+ AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
+ const unsigned AndSaveExecOpc = Subtarget.isWave32() ?
+ AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
+ const unsigned ExecReg = Subtarget.isWave32() ?
+ AMDGPU::EXEC_LO : AMDGPU::EXEC;
+
+ for (MachineInstr &MI : Range) {
+ for (MachineOperand &Def : MI.defs()) {
+ LLT ResTy = MRI.getType(Def.getReg());
+ const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
+ ResultRegs.push_back(Def.getReg());
+ Register InitReg = B.buildUndef(ResTy).getReg(0);
+ Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
+ InitResultRegs.push_back(InitReg);
+ PhiRegs.push_back(PhiReg);
+ MRI.setRegBank(PhiReg, *DefBank);
+ MRI.setRegBank(InitReg, *DefBank);
+ }
}
- Register SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- Register InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
+ Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
// Don't bother using generic instructions/registers for the exec mask.
B.buildInstr(TargetOpcode::IMPLICIT_DEF)
.addDef(InitSaveExecReg);
- Register PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- Register NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register PhiExec = MRI.createVirtualRegister(WaveRC);
+ Register NewExec = MRI.createVirtualRegister(WaveRC);
// To insert the loop we need to split the block. Move everything before this
// point to a new block, and insert a new empty block before this instruction.
@@ -688,7 +722,7 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
// Move the rest of the block into a new block.
RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
- RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+ RemainderBB->splice(RemainderBB->begin(), &MBB, Range.end(), MBB.end());
MBB.addSuccessor(LoopBB);
RestoreExecBB->addSuccessor(RemainderBB);
@@ -711,164 +745,173 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
.addMBB(LoopBB);
}
- // Move the instruction into the loop.
- LoopBB->splice(LoopBB->end(), &MBB, I);
- I = std::prev(LoopBB->end());
+ const DebugLoc &DL = B.getDL();
+
+ // Figure out the iterator range after splicing the instructions.
+ auto NewBegin = std::prev(LoopBB->end());
- B.setInstr(*I);
+ // Move the instruction into the loop. Note we moved everything after
+ // Range.end() already into a new block, so Range.end() is no longer valid.
+ LoopBB->splice(LoopBB->end(), &MBB, Range.begin(), MBB.end());
+
+ auto NewEnd = LoopBB->end();
+
+ MachineBasicBlock::iterator I = Range.begin();
+ B.setInsertPt(*LoopBB, I);
Register CondReg;
- for (MachineOperand &Op : MI.uses()) {
- if (!Op.isReg())
- continue;
+ for (MachineInstr &MI : make_range(NewBegin, NewEnd)) {
+ for (MachineOperand &Op : MI.uses()) {
+ if (!Op.isReg() || Op.isDef())
+ continue;
- assert(!Op.isDef());
- if (SGPROperandRegs.count(Op.getReg())) {
- LLT OpTy = MRI.getType(Op.getReg());
- unsigned OpSize = OpTy.getSizeInBits();
-
- // Can only do a readlane of 32-bit pieces.
- if (OpSize == 32) {
- // Avoid extra copies in the simple case of one 32-bit register.
- Register CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- MRI.setType(CurrentLaneOpReg, OpTy);
-
- constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg)
- .addReg(Op.getReg());
-
- Register NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- bool First = CondReg == AMDGPU::NoRegister;
- if (First)
- CondReg = NewCondReg;
-
- // Compare the just read M0 value to all possible Idx values.
- B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
- .addDef(NewCondReg)
- .addReg(CurrentLaneOpReg)
- .addReg(Op.getReg());
- Op.setReg(CurrentLaneOpReg);
-
- if (!First) {
- Register AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
-
- // If there are multiple operands to consider, and the conditions.
- B.buildInstr(AMDGPU::S_AND_B64)
- .addDef(AndReg)
- .addReg(NewCondReg)
- .addReg(CondReg);
- CondReg = AndReg;
- }
- } else {
- LLT S32 = LLT::scalar(32);
- SmallVector<Register, 8> ReadlanePieces;
+ if (SGPROperandRegs.count(Op.getReg())) {
+ LLT OpTy = MRI.getType(Op.getReg());
+ unsigned OpSize = OpTy.getSizeInBits();
- // The compares can be done as 64-bit, but the extract needs to be done
- // in 32-bit pieces.
+ // Can only do a readlane of 32-bit pieces.
+ if (OpSize == 32) {
+ // Avoid extra copies in the simple case of one 32-bit register.
+ Register CurrentLaneOpReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ MRI.setType(CurrentLaneOpReg, OpTy);
- bool Is64 = OpSize % 64 == 0;
+ constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpReg)
+ .addReg(Op.getReg());
- LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
- unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
- : AMDGPU::V_CMP_EQ_U32_e64;
+ Register NewCondReg = MRI.createVirtualRegister(WaveRC);
+ bool First = CondReg == AMDGPU::NoRegister;
+ if (First)
+ CondReg = NewCondReg;
- // The compares can be done as 64-bit, but the extract needs to be done
- // in 32-bit pieces.
+ // Compare the just read M0 value to all possible Idx values.
+ B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
+ .addDef(NewCondReg)
+ .addReg(CurrentLaneOpReg)
+ .addReg(Op.getReg());
+ Op.setReg(CurrentLaneOpReg);
- // Insert the unmerge before the loop.
+ if (!First) {
+ Register AndReg = MRI.createVirtualRegister(WaveRC);
- B.setMBB(MBB);
- auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
- B.setInstr(*I);
+ // If there are multiple operands to consider, and the conditions.
+ B.buildInstr(WaveAndOpc)
+ .addDef(AndReg)
+ .addReg(NewCondReg)
+ .addReg(CondReg);
+ CondReg = AndReg;
+ }
+ } else {
+ LLT S32 = LLT::scalar(32);
+ SmallVector<Register, 8> ReadlanePieces;
- unsigned NumPieces = Unmerge->getNumOperands() - 1;
- for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
- unsigned UnmergePiece = Unmerge.getReg(PieceIdx);
+ // The compares can be done as 64-bit, but the extract needs to be done
+ // in 32-bit pieces.
- Register CurrentLaneOpReg;
- if (Is64) {
- Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
- Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
+ bool Is64 = OpSize % 64 == 0;
- MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
- MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
- MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
+ LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
+ unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
+ : AMDGPU::V_CMP_EQ_U32_e64;
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpRegLo)
- .addReg(UnmergePiece, 0, AMDGPU::sub0);
+ // The compares can be done as 64-bit, but the extract needs to be done
+ // in 32-bit pieces.
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpRegHi)
- .addReg(UnmergePiece, 0, AMDGPU::sub1);
+ // Insert the unmerge before the loop.
- CurrentLaneOpReg =
- B.buildMerge(LLT::scalar(64),
- {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
- .getReg(0);
+ B.setMBB(MBB);
+ auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
+ B.setInstr(*I);
- MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
+ unsigned NumPieces = Unmerge->getNumOperands() - 1;
+ for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
+ Register UnmergePiece = Unmerge.getReg(PieceIdx);
- if (OpTy.getScalarSizeInBits() == 64) {
- // If we need to produce a 64-bit element vector, so use the
- // merged pieces
- ReadlanePieces.push_back(CurrentLaneOpReg);
+ Register CurrentLaneOpReg;
+ if (Is64) {
+ Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
+ Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
+
+ MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
+ MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
+ MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpRegLo)
+ .addReg(UnmergePiece, 0, AMDGPU::sub0);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpRegHi)
+ .addReg(UnmergePiece, 0, AMDGPU::sub1);
+
+ CurrentLaneOpReg =
+ B.buildMerge(LLT::scalar(64),
+ {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
+ .getReg(0);
+
+ MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
+
+ if (OpTy.getScalarSizeInBits() == 64) {
+ // If we need to produce a 64-bit element vector, so use the
+ // merged pieces
+ ReadlanePieces.push_back(CurrentLaneOpReg);
+ } else {
+ // 32-bit element type.
+ ReadlanePieces.push_back(CurrentLaneOpRegLo);
+ ReadlanePieces.push_back(CurrentLaneOpRegHi);
+ }
} else {
- // 32-bit element type.
- ReadlanePieces.push_back(CurrentLaneOpRegLo);
- ReadlanePieces.push_back(CurrentLaneOpRegHi);
+ CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
+ MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpReg)
+ .addReg(UnmergePiece);
+ ReadlanePieces.push_back(CurrentLaneOpReg);
}
- } else {
- CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
- MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
- MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpReg)
- .addReg(UnmergePiece);
- ReadlanePieces.push_back(CurrentLaneOpReg);
- }
+ Register NewCondReg = MRI.createVirtualRegister(WaveRC);
+ bool First = CondReg == AMDGPU::NoRegister;
+ if (First)
+ CondReg = NewCondReg;
- Register NewCondReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- bool First = CondReg == AMDGPU::NoRegister;
- if (First)
- CondReg = NewCondReg;
+ B.buildInstr(CmpOp)
+ .addDef(NewCondReg)
+ .addReg(CurrentLaneOpReg)
+ .addReg(UnmergePiece);
- B.buildInstr(CmpOp)
- .addDef(NewCondReg)
- .addReg(CurrentLaneOpReg)
- .addReg(UnmergePiece);
+ if (!First) {
+ Register AndReg = MRI.createVirtualRegister(WaveRC);
- if (!First) {
- Register AndReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ // If there are multiple operands to consider, and the conditions.
+ B.buildInstr(WaveAndOpc)
+ .addDef(AndReg)
+ .addReg(NewCondReg)
+ .addReg(CondReg);
+ CondReg = AndReg;
+ }
+ }
- // If there are multiple operands to consider, and the conditions.
- B.buildInstr(AMDGPU::S_AND_B64)
- .addDef(AndReg)
- .addReg(NewCondReg)
- .addReg(CondReg);
- CondReg = AndReg;
+ // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
+ // BUILD_VECTOR
+ if (OpTy.isVector()) {
+ auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
+ Op.setReg(Merge.getReg(0));
+ } else {
+ auto Merge = B.buildMerge(OpTy, ReadlanePieces);
+ Op.setReg(Merge.getReg(0));
}
- }
- // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
- // BUILD_VECTOR
- if (OpTy.isVector()) {
- auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
- Op.setReg(Merge.getReg(0));
- } else {
- auto Merge = B.buildMerge(OpTy, ReadlanePieces);
- Op.setReg(Merge.getReg(0));
+ MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
}
-
- MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
}
}
}
@@ -876,16 +919,16 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
B.setInsertPt(*LoopBB, LoopBB->end());
// Update EXEC, save the original EXEC value to VCC.
- B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
+ B.buildInstr(AndSaveExecOpc)
.addDef(NewExec)
.addReg(CondReg, RegState::Kill);
MRI.setSimpleHint(NewExec, CondReg);
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
- B.buildInstr(AMDGPU::S_XOR_B64_term)
- .addDef(AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
+ B.buildInstr(XorTermOpc)
+ .addDef(ExecReg)
+ .addReg(ExecReg)
.addReg(NewExec);
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
@@ -896,14 +939,60 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
.addMBB(LoopBB);
// Save the EXEC mask before the loop.
- BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
- .addReg(AMDGPU::EXEC);
+ BuildMI(MBB, MBB.end(), DL, TII->get(MovTermOpc), SaveExecReg)
+ .addReg(ExecReg);
// Restore the EXEC mask after the loop.
B.setMBB(*RestoreExecBB);
- B.buildInstr(AMDGPU::S_MOV_B64_term)
- .addDef(AMDGPU::EXEC)
+ B.buildInstr(MovTermOpc)
+ .addDef(ExecReg)
.addReg(SaveExecReg);
+
+ // Restore the insert point before the original instruction.
+ B.setInsertPt(MBB, MBB.end());
+
+ return true;
+}
+
+// Return any unique registers used by \p MI at \p OpIndices that need to be
+// handled in a waterfall loop. Returns these registers in \p
+// SGPROperandRegs. Returns true if there are any operansd to handle and a
+// waterfall loop is necessary.
+bool AMDGPURegisterBankInfo::collectWaterfallOperands(
+ SmallSet<Register, 4> &SGPROperandRegs, MachineInstr &MI,
+ MachineRegisterInfo &MRI, ArrayRef<unsigned> OpIndices) const {
+ for (unsigned Op : OpIndices) {
+ assert(MI.getOperand(Op).isUse());
+ Register Reg = MI.getOperand(Op).getReg();
+ const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
+ if (OpBank->getID() == AMDGPU::VGPRRegBankID)
+ SGPROperandRegs.insert(Reg);
+ }
+
+ // No operands need to be replaced, so no need to loop.
+ return !SGPROperandRegs.empty();
+}
+
+bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
+ MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI,
+ ArrayRef<unsigned> OpIndices) const {
+ // Use a set to avoid extra readfirstlanes in the case where multiple operands
+ // are the same register.
+ SmallSet<Register, 4> SGPROperandRegs;
+
+ if (!collectWaterfallOperands(SGPROperandRegs, MI, MRI, OpIndices))
+ return false;
+
+ MachineBasicBlock::iterator I = MI.getIterator();
+ return executeInWaterfallLoop(B, make_range(I, std::next(I)),
+ SGPROperandRegs, MRI);
+}
+
+bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ ArrayRef<unsigned> OpIndices) const {
+ MachineIRBuilder B(MI);
+ return executeInWaterfallLoop(B, MI, MRI, OpIndices);
}
// Legalize an operand that must be an SGPR by inserting a readfirstlane.
@@ -960,8 +1049,13 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
SmallVector<unsigned, 1> SrcRegs(OpdMapper.getVRegs(1));
// If the pointer is an SGPR, we have nothing to do.
- if (SrcRegs.empty())
- return false;
+ if (SrcRegs.empty()) {
+ Register PtrReg = MI.getOperand(1).getReg();
+ const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
+ if (PtrBank == &AMDGPU::SGPRRegBank)
+ return false;
+ SrcRegs.push_back(PtrReg);
+ }
assert(LoadSize % MaxNonSmrdLoadSize == 0);
@@ -1013,6 +1107,33 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
return true;
}
+bool AMDGPURegisterBankInfo::applyMappingImage(
+ MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ MachineRegisterInfo &MRI, int RsrcIdx) const {
+ const int NumDefs = MI.getNumExplicitDefs();
+
+ // The reported argument index is relative to the IR intrinsic call arguments,
+ // so we need to shift by the number of defs and the intrinsic ID.
+ RsrcIdx += NumDefs + 1;
+
+ // Insert copies to VGPR arguments.
+ applyDefaultMapping(OpdMapper);
+
+ // Fixup any SGPR arguments.
+ SmallVector<unsigned, 4> SGPRIndexes;
+ for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) {
+ if (!MI.getOperand(I).isReg())
+ continue;
+
+ // If this intrinsic has a sampler, it immediately follows rsrc.
+ if (I == RsrcIdx || I == RsrcIdx + 1)
+ SGPRIndexes.push_back(I);
+ }
+
+ executeInWaterfallLoop(MI, MRI, SGPRIndexes);
+ return true;
+}
+
// For cases where only a single copy is inserted for matching register banks.
// Replace the register in the instruction operand
static void substituteSimpleCopyRegs(
@@ -1024,6 +1145,184 @@ static void substituteSimpleCopyRegs(
}
}
+/// Handle register layout difference for f16 images for some subtargets.
+Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B,
+ MachineRegisterInfo &MRI,
+ Register Reg) const {
+ if (!Subtarget.hasUnpackedD16VMem())
+ return Reg;
+
+ const LLT S16 = LLT::scalar(16);
+ LLT StoreVT = MRI.getType(Reg);
+ if (!StoreVT.isVector() || StoreVT.getElementType() != S16)
+ return Reg;
+
+ auto Unmerge = B.buildUnmerge(S16, Reg);
+
+
+ SmallVector<Register, 4> WideRegs;
+ for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+ WideRegs.push_back(Unmerge.getReg(I));
+
+ const LLT S32 = LLT::scalar(32);
+ int NumElts = StoreVT.getNumElements();
+
+ return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0);
+}
+
+static std::pair<Register, unsigned>
+getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
+ int64_t Const;
+ if (mi_match(Reg, MRI, m_ICst(Const)))
+ return std::make_pair(Register(), Const);
+
+ Register Base;
+ if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const))))
+ return std::make_pair(Base, Const);
+
+ // TODO: Handle G_OR used for add case
+ return std::make_pair(Reg, 0);
+}
+
+std::pair<Register, unsigned>
+AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
+ Register OrigOffset) const {
+ const unsigned MaxImm = 4095;
+ Register BaseReg;
+ unsigned ImmOffset;
+ const LLT S32 = LLT::scalar(32);
+
+ std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(),
+ OrigOffset);
+
+ unsigned C1 = 0;
+ if (ImmOffset != 0) {
+ // If the immediate value is too big for the immoffset field, put the value
+ // and -4096 into the immoffset field so that the value that is copied/added
+ // for the voffset field is a multiple of 4096, and it stands more chance
+ // of being CSEd with the copy/add for another similar load/store.
+ // However, do not do that rounding down to a multiple of 4096 if that is a
+ // negative number, as it appears to be illegal to have a negative offset
+ // in the vgpr, even if adding the immediate offset makes it positive.
+ unsigned Overflow = ImmOffset & ~MaxImm;
+ ImmOffset -= Overflow;
+ if ((int32_t)Overflow < 0) {
+ Overflow += ImmOffset;
+ ImmOffset = 0;
+ }
+
+ C1 = ImmOffset;
+ if (Overflow != 0) {
+ if (!BaseReg)
+ BaseReg = B.buildConstant(S32, Overflow).getReg(0);
+ else {
+ auto OverflowVal = B.buildConstant(S32, Overflow);
+ BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
+ }
+ }
+ }
+
+ if (!BaseReg)
+ BaseReg = B.buildConstant(S32, 0).getReg(0);
+
+ return {BaseReg, C1};
+}
+
+static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
+ int64_t C;
+ return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
+}
+
+static unsigned extractGLC(unsigned CachePolicy) {
+ return CachePolicy & 1;
+}
+
+static unsigned extractSLC(unsigned CachePolicy) {
+ return (CachePolicy >> 1) & 1;
+}
+
+static unsigned extractDLC(unsigned CachePolicy) {
+ return (CachePolicy >> 2) & 1;
+}
+
+MachineInstr *
+AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
+ MachineInstr &MI) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ executeInWaterfallLoop(B, MI, MRI, {2, 4});
+
+ // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
+
+ Register VData = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(VData);
+
+ int EltSize = Ty.getScalarSizeInBits();
+ int Size = Ty.getSizeInBits();
+
+ // FIXME: Broken integer truncstore.
+ if (EltSize != 32)
+ report_fatal_error("unhandled intrinsic store");
+
+ // FIXME: Verifier should enforce 1 MMO for these intrinsics.
+ const int MemSize = (*MI.memoperands_begin())->getSize();
+
+
+ Register RSrc = MI.getOperand(2).getReg();
+ Register VOffset = MI.getOperand(3).getReg();
+ Register SOffset = MI.getOperand(4).getReg();
+ unsigned CachePolicy = MI.getOperand(5).getImm();
+
+ unsigned ImmOffset;
+ std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
+
+ const bool Offen = !isZero(VOffset, MRI);
+
+ unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact;
+ switch (8 * MemSize) {
+ case 8:
+ Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
+ break;
+ case 16:
+ Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
+ break;
+ default:
+ Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
+ if (Size > 32)
+ Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
+ break;
+ }
+
+
+ // Set the insertion point back to the instruction in case it was moved into a
+ // loop.
+ B.setInstr(MI);
+
+ MachineInstrBuilder MIB = B.buildInstr(Opc)
+ .addUse(VData);
+
+ if (Offen)
+ MIB.addUse(VOffset);
+
+ MIB.addUse(RSrc)
+ .addUse(SOffset)
+ .addImm(ImmOffset)
+ .addImm(extractGLC(CachePolicy))
+ .addImm(extractSLC(CachePolicy))
+ .addImm(0) // tfe: FIXME: Remove from inst
+ .addImm(extractDLC(CachePolicy))
+ .cloneMemRefs(MI);
+
+ // FIXME: We need a way to report failure from applyMappingImpl.
+ // Insert constrain copies before inserting the loop.
+ if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
+ report_fatal_error("failed to constrain selected store intrinsic");
+
+ return MIB;
+}
+
void AMDGPURegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@@ -1289,12 +1588,202 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MI.eraseFromParent();
return;
}
- case AMDGPU::G_EXTRACT_VECTOR_ELT:
- applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(MI, MRI, { 2 });
+ case AMDGPU::G_BUILD_VECTOR:
+ case AMDGPU::G_BUILD_VECTOR_TRUNC: {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy != LLT::vector(2, 16))
+ break;
+
+ assert(MI.getNumOperands() == 3 && OpdMapper.getVRegs(0).empty());
+ substituteSimpleCopyRegs(OpdMapper, 1);
+ substituteSimpleCopyRegs(OpdMapper, 2);
+
+ const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
+ if (DstBank == &AMDGPU::SGPRRegBank)
+ break; // Can use S_PACK_* instructions.
+
+ MachineIRBuilder B(MI);
+
+ Register Lo = MI.getOperand(1).getReg();
+ Register Hi = MI.getOperand(2).getReg();
+ const LLT S32 = LLT::scalar(32);
+
+ const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI);
+ const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI);
+
+ Register ZextLo;
+ Register ShiftHi;
+
+ if (Opc == AMDGPU::G_BUILD_VECTOR) {
+ ZextLo = B.buildZExt(S32, Lo).getReg(0);
+ MRI.setRegBank(ZextLo, *BankLo);
+
+ Register ZextHi = B.buildZExt(S32, Hi).getReg(0);
+ MRI.setRegBank(ZextHi, *BankHi);
+
+ auto ShiftAmt = B.buildConstant(S32, 16);
+ MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
+
+ ShiftHi = B.buildShl(S32, ZextHi, ShiftAmt).getReg(0);
+ MRI.setRegBank(ShiftHi, *BankHi);
+ } else {
+ Register MaskLo = B.buildConstant(S32, 0xffff).getReg(0);
+ MRI.setRegBank(MaskLo, *BankLo);
+
+ auto ShiftAmt = B.buildConstant(S32, 16);
+ MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
+
+ ShiftHi = B.buildShl(S32, Hi, ShiftAmt).getReg(0);
+ MRI.setRegBank(ShiftHi, *BankHi);
+
+ ZextLo = B.buildAnd(S32, Lo, MaskLo).getReg(0);
+ MRI.setRegBank(ZextLo, *BankLo);
+ }
+
+ auto Or = B.buildOr(S32, ZextLo, ShiftHi);
+ MRI.setRegBank(Or.getReg(0), *DstBank);
+
+ B.buildBitcast(DstReg, Or);
+ MI.eraseFromParent();
+ return;
+ }
+ case AMDGPU::G_EXTRACT_VECTOR_ELT: {
+ SmallVector<Register, 2> DstRegs(OpdMapper.getVRegs(0));
+
+ assert(OpdMapper.getVRegs(1).empty() && OpdMapper.getVRegs(2).empty());
+
+ if (DstRegs.empty()) {
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, { 2 });
+ return;
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register IdxReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ (void)DstTy;
+
+ assert(DstTy.getSizeInBits() == 64);
+
+ LLT SrcTy = MRI.getType(SrcReg);
+ const LLT S32 = LLT::scalar(32);
+ LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32);
+
+ MachineIRBuilder B(MI);
+ auto CastSrc = B.buildBitcast(Vec32, SrcReg);
+ auto One = B.buildConstant(S32, 1);
+
+ // Split the vector index into 32-bit pieces. Prepare to move all of the
+ // new instructions into a waterfall loop if necessary.
+ //
+ // Don't put the bitcast or constant in the loop.
+ MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB());
+
+ // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1).
+ auto IdxLo = B.buildShl(S32, IdxReg, One);
+ auto IdxHi = B.buildAdd(S32, IdxLo, One);
+ B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
+ B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
+
+ const ValueMapping &DstMapping
+ = OpdMapper.getInstrMapping().getOperandMapping(0);
+
+ // FIXME: Should be getting from mapping or not?
+ const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
+ MRI.setRegBank(DstReg, *DstMapping.BreakDown[0].RegBank);
+ MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
+ MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
+ MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
+ MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
+
+ SmallSet<Register, 4> OpsToWaterfall;
+ if (!collectWaterfallOperands(OpsToWaterfall, MI, MRI, { 2 })) {
+ MI.eraseFromParent();
+ return;
+ }
+
+ // Remove the original instruction to avoid potentially confusing the
+ // waterfall loop logic.
+ B.setInstr(*Span.begin());
+ MI.eraseFromParent();
+ executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()),
+ OpsToWaterfall, MRI);
+ return;
+ }
+ case AMDGPU::G_INSERT_VECTOR_ELT: {
+ SmallVector<Register, 2> InsRegs(OpdMapper.getVRegs(2));
+
+ assert(OpdMapper.getVRegs(0).empty());
+ assert(OpdMapper.getVRegs(1).empty());
+ assert(OpdMapper.getVRegs(3).empty());
+
+ if (InsRegs.empty()) {
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, { 3 });
+ return;
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register InsReg = MI.getOperand(2).getReg();
+ Register IdxReg = MI.getOperand(3).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT InsTy = MRI.getType(InsReg);
+ (void)InsTy;
+
+ assert(InsTy.getSizeInBits() == 64);
+
+ const LLT S32 = LLT::scalar(32);
+ LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32);
+
+ MachineIRBuilder B(MI);
+ auto CastSrc = B.buildBitcast(Vec32, SrcReg);
+ auto One = B.buildConstant(S32, 1);
+
+ // Split the vector index into 32-bit pieces. Prepare to move all of the
+ // new instructions into a waterfall loop if necessary.
+ //
+ // Don't put the bitcast or constant in the loop.
+ MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB());
+
+ // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1).
+ auto IdxLo = B.buildShl(S32, IdxReg, One);
+ auto IdxHi = B.buildAdd(S32, IdxLo, One);
+
+ auto InsLo = B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
+ auto InsHi = B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
+ B.buildBitcast(DstReg, InsHi);
+
+ const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
+ const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
+ const RegisterBank *InsSrcBank = getRegBank(InsReg, MRI, *TRI);
+
+ MRI.setRegBank(InsReg, *InsSrcBank);
+ MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
+ MRI.setRegBank(InsLo.getReg(0), *DstBank);
+ MRI.setRegBank(InsHi.getReg(0), *DstBank);
+ MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
+ MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
+ MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
+
+
+ SmallSet<Register, 4> OpsToWaterfall;
+ if (!collectWaterfallOperands(OpsToWaterfall, MI, MRI, { 3 })) {
+ MI.eraseFromParent();
+ return;
+ }
+
+ B.setInstr(*Span.begin());
+ MI.eraseFromParent();
+
+ executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()),
+ OpsToWaterfall, MRI);
return;
+ }
case AMDGPU::G_INTRINSIC: {
- switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ switch (MI.getIntrinsicID()) {
case Intrinsic::amdgcn_s_buffer_load: {
// FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS
executeInWaterfallLoop(MI, MRI, { 2, 3 });
@@ -1303,8 +1792,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case Intrinsic::amdgcn_readlane: {
substituteSimpleCopyRegs(OpdMapper, 2);
- assert(empty(OpdMapper.getVRegs(0)));
- assert(empty(OpdMapper.getVRegs(3)));
+ assert(OpdMapper.getVRegs(0).empty());
+ assert(OpdMapper.getVRegs(3).empty());
// Make sure the index is an SGPR. It doesn't make sense to run this in a
// waterfall loop, so assume it's a uniform value.
@@ -1312,9 +1801,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case Intrinsic::amdgcn_writelane: {
- assert(empty(OpdMapper.getVRegs(0)));
- assert(empty(OpdMapper.getVRegs(2)));
- assert(empty(OpdMapper.getVRegs(3)));
+ assert(OpdMapper.getVRegs(0).empty());
+ assert(OpdMapper.getVRegs(2).empty());
+ assert(OpdMapper.getVRegs(3).empty());
substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val
constrainOpWithReadfirstlane(MI, MRI, 2); // Source value
@@ -1327,7 +1816,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
break;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
- switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ auto IntrID = MI.getIntrinsicID();
+ switch (IntrID) {
case Intrinsic::amdgcn_buffer_load: {
executeInWaterfallLoop(MI, MRI, { 2 });
return;
@@ -1335,23 +1825,70 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
// This is only allowed to execute with 1 lane, so readfirstlane is safe.
- assert(empty(OpdMapper.getVRegs(0)));
+ assert(OpdMapper.getVRegs(0).empty());
substituteSimpleCopyRegs(OpdMapper, 3);
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
return;
}
+ case Intrinsic::amdgcn_ds_gws_init:
+ case Intrinsic::amdgcn_ds_gws_barrier:
+ case Intrinsic::amdgcn_ds_gws_sema_br: {
+ // Only the first lane is executes, so readfirstlane is safe.
+ substituteSimpleCopyRegs(OpdMapper, 1);
+ constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ return;
+ }
+ case Intrinsic::amdgcn_ds_gws_sema_v:
+ case Intrinsic::amdgcn_ds_gws_sema_p:
+ case Intrinsic::amdgcn_ds_gws_sema_release_all: {
+ // Only the first lane is executes, so readfirstlane is safe.
+ constrainOpWithReadfirstlane(MI, MRI, 1); // M0
+ return;
+ }
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// FIXME: Should this use a waterfall loop?
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
return;
}
- default:
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_buffer_load_format:
+ case Intrinsic::amdgcn_raw_tbuffer_load:
+ case Intrinsic::amdgcn_raw_buffer_store:
+ case Intrinsic::amdgcn_raw_buffer_store_format:
+ case Intrinsic::amdgcn_raw_tbuffer_store: {
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, {2, 4});
+ return;
+ }
+ case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_store:
+ case Intrinsic::amdgcn_struct_tbuffer_load:
+ case Intrinsic::amdgcn_struct_tbuffer_store: {
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, {2, 5});
+ return;
+ }
+ default: {
+ if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
+ AMDGPU::lookupRsrcIntrinsic(IntrID)) {
+ // Non-images can have complications from operands that allow both SGPR
+ // and VGPR. For now it's too complicated to figure out the final opcode
+ // to derive the register bank from the MCInstrDesc.
+ if (RSrcIntrin->IsImage) {
+ applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
+ return;
+ }
+ }
+
break;
}
+ }
break;
}
- case AMDGPU::G_LOAD: {
+ case AMDGPU::G_LOAD:
+ case AMDGPU::G_ZEXTLOAD:
+ case AMDGPU::G_SEXTLOAD: {
if (applyMappingWideLoad(MI, OpdMapper, MRI))
return;
break;
@@ -1452,25 +1989,71 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
}
const RegisterBankInfo::InstructionMapping &
+AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ int RsrcIdx) const {
+ // The reported argument index is relative to the IR intrinsic call arguments,
+ // so we need to shift by the number of defs and the intrinsic ID.
+ RsrcIdx += MI.getNumExplicitDefs() + 1;
+
+ const int NumOps = MI.getNumOperands();
+ SmallVector<const ValueMapping *, 8> OpdsMapping(NumOps);
+
+ // TODO: Should packed/unpacked D16 difference be reported here as part of
+ // the value mapping?
+ for (int I = 0; I != NumOps; ++I) {
+ if (!MI.getOperand(I).isReg())
+ continue;
+
+ Register OpReg = MI.getOperand(I).getReg();
+ unsigned Size = getSizeInBits(OpReg, MRI, *TRI);
+
+ // FIXME: Probably need a new intrinsic register bank searchable table to
+ // handle arbitrary intrinsics easily.
+ //
+ // If this has a sampler, it immediately follows rsrc.
+ const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1;
+
+ if (MustBeSGPR) {
+ // If this must be an SGPR, so we must report whatever it is as legal.
+ unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size);
+ } else {
+ // Some operands must be VGPR, and these are easy to copy to.
+ OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ }
+ }
+
+ return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
+}
+
+const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
+ SmallVector<const ValueMapping*, 2> OpdsMapping(2);
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
- unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AS = PtrTy.getAddressSpace();
+ unsigned PtrSize = PtrTy.getSizeInBits();
const ValueMapping *ValMapping;
const ValueMapping *PtrMapping;
- if (isInstrUniform(MI)) {
+ const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
+
+ if (PtrBank == &AMDGPU::SGPRRegBank &&
+ (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
+ AS != AMDGPUAS::PRIVATE_ADDRESS) &&
+ isInstrUniformNonExtLoadAlign4(MI)) {
// We have a uniform instruction so we want to use an SMRD load
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
} else {
ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
- // FIXME: What would happen if we used SGPRRegBankID here?
PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
}
@@ -1494,6 +2077,31 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg,
return Bank ? Bank->getID() : Default;
}
+
+static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
+ return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ?
+ AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
+}
+
+const RegisterBankInfo::ValueMapping *
+AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ // Lie and claim anything is legal, even though this needs to be an SGPR
+ // applyMapping will have to deal with it as a waterfall loop.
+ unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID);
+ unsigned Size = getSizeInBits(Reg, MRI, TRI);
+ return AMDGPU::getValueMapping(Bank, Size);
+}
+
+const RegisterBankInfo::ValueMapping *
+AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ unsigned Size = getSizeInBits(Reg, MRI, TRI);
+ return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+}
+
///
/// This function must return a legal mapping, because
/// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
@@ -1536,7 +2144,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
int ResultBank = -1;
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
- unsigned Reg = MI.getOperand(I).getReg();
+ Register Reg = MI.getOperand(I).getReg();
const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
// FIXME: Assuming VGPR for any undetermined inputs.
@@ -1660,7 +2268,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
LLVM_FALLTHROUGH;
}
-
case AMDGPU::G_GEP:
case AMDGPU::G_ADD:
case AMDGPU::G_SUB:
@@ -1669,15 +2276,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_LSHR:
case AMDGPU::G_ASHR:
case AMDGPU::G_UADDO:
- case AMDGPU::G_SADDO:
case AMDGPU::G_USUBO:
- case AMDGPU::G_SSUBO:
case AMDGPU::G_UADDE:
case AMDGPU::G_SADDE:
case AMDGPU::G_USUBE:
case AMDGPU::G_SSUBE:
- case AMDGPU::G_UMULH:
- case AMDGPU::G_SMULH:
case AMDGPU::G_SMIN:
case AMDGPU::G_SMAX:
case AMDGPU::G_UMIN:
@@ -1692,17 +2295,32 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FPTOUI:
case AMDGPU::G_FMUL:
case AMDGPU::G_FMA:
+ case AMDGPU::G_FMAD:
case AMDGPU::G_FSQRT:
+ case AMDGPU::G_FFLOOR:
+ case AMDGPU::G_FCEIL:
+ case AMDGPU::G_FRINT:
case AMDGPU::G_SITOFP:
case AMDGPU::G_UITOFP:
case AMDGPU::G_FPTRUNC:
case AMDGPU::G_FPEXT:
case AMDGPU::G_FEXP2:
case AMDGPU::G_FLOG2:
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ case AMDGPU::G_FMAXNUM_IEEE:
case AMDGPU::G_FCANONICALIZE:
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_INTRINSIC_ROUND:
+ case AMDGPU::G_AMDGPU_FFBH_U32:
+ return getDefaultMappingVOP(MI);
+ case AMDGPU::G_UMULH:
+ case AMDGPU::G_SMULH: {
+ if (Subtarget.hasScalarMulHiInsts() && isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
return getDefaultMappingVOP(MI);
+ }
case AMDGPU::G_IMPLICIT_DEF: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
@@ -1710,12 +2328,19 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case AMDGPU::G_FCONSTANT:
case AMDGPU::G_CONSTANT:
- case AMDGPU::G_FRAME_INDEX:
+ case AMDGPU::G_GLOBAL_VALUE:
case AMDGPU::G_BLOCK_ADDR: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
+ case AMDGPU::G_FRAME_INDEX: {
+ // TODO: This should be the same as other constants, but eliminateFrameIndex
+ // currently assumes VALU uses.
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ break;
+ }
case AMDGPU::G_INSERT: {
unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
AMDGPU::VGPRRegBankID;
@@ -1737,8 +2362,25 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = nullptr;
break;
}
- case AMDGPU::G_MERGE_VALUES:
case AMDGPU::G_BUILD_VECTOR:
+ case AMDGPU::G_BUILD_VECTOR_TRUNC: {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ if (DstTy == LLT::vector(2, 16)) {
+ unsigned DstSize = DstTy.getSizeInBits();
+ unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
+ break;
+ }
+
+ LLVM_FALLTHROUGH;
+ }
+ case AMDGPU::G_MERGE_VALUES:
case AMDGPU::G_CONCAT_VECTORS: {
unsigned Bank = isSALUMapping(MI) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
@@ -1760,6 +2402,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_CTTZ_ZERO_UNDEF:
case AMDGPU::G_CTPOP:
case AMDGPU::G_BSWAP:
+ case AMDGPU::G_BITREVERSE:
case AMDGPU::G_FABS:
case AMDGPU::G_FNEG: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -1848,7 +2491,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
Op3Bank == AMDGPU::SGPRRegBankID &&
(Size == 32 || (Size == 64 &&
(Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
- MF.getSubtarget<GCNSubtarget>().hasScalarCompareEq64()));
+ Subtarget.hasScalarCompareEq64()));
unsigned Op0Bank = CanUseSCC ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
@@ -1859,14 +2502,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_EXTRACT_VECTOR_ELT: {
- unsigned OutputBankID = isSALUMapping(MI) ?
- AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
+ // VGPR index can be used for waterfall when indexing a SGPR vector.
+ unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned OutputBankID = regBankUnion(SrcBankID, IdxBank);
- OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
- OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
+ OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
// The index can be either if the source vector is VGPR.
OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
@@ -1879,15 +2524,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
- unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
- unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
+ unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned InsertEltBankID = getRegBankID(MI.getOperand(2).getReg(),
+ MRI, *TRI);
+ unsigned IdxBankID = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
- OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
- OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, VecSize);
+ OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(InsertEltBankID,
+ InsertSize);
// The index can be either if the source vector is VGPR.
- OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
+ OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
break;
}
case AMDGPU::G_UNMERGE_VALUES: {
@@ -1903,11 +2551,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_INTRINSIC: {
- switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ switch (MI.getIntrinsicID()) {
default:
return getInvalidInstructionMapping();
- case Intrinsic::maxnum:
- case Intrinsic::minnum:
case Intrinsic::amdgcn_div_fmas:
case Intrinsic::amdgcn_trig_preop:
case Intrinsic::amdgcn_sin:
@@ -1938,6 +2584,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_mbcnt_hi:
case Intrinsic::amdgcn_ubfe:
case Intrinsic::amdgcn_sbfe:
+ case Intrinsic::amdgcn_mul_u24:
+ case Intrinsic::amdgcn_mul_i24:
case Intrinsic::amdgcn_lerp:
case Intrinsic::amdgcn_sad_u8:
case Intrinsic::amdgcn_msad_u8:
@@ -1956,10 +2604,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_udot4:
case Intrinsic::amdgcn_sdot8:
case Intrinsic::amdgcn_udot8:
- case Intrinsic::amdgcn_fdiv_fast:
case Intrinsic::amdgcn_wwm:
case Intrinsic::amdgcn_wqm:
return getDefaultMappingVOP(MI);
+ case Intrinsic::amdgcn_ds_swizzle:
case Intrinsic::amdgcn_ds_permute:
case Intrinsic::amdgcn_ds_bpermute:
case Intrinsic::amdgcn_update_dpp:
@@ -2040,7 +2688,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case Intrinsic::amdgcn_readlane: {
// This must be an SGPR, but accept a VGPR.
- unsigned IdxReg = MI.getOperand(3).getReg();
+ Register IdxReg = MI.getOperand(3).getReg();
unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
@@ -2055,10 +2703,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case Intrinsic::amdgcn_writelane: {
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- unsigned SrcReg = MI.getOperand(2).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
- unsigned IdxReg = MI.getOperand(3).getReg();
+ Register IdxReg = MI.getOperand(3).getReg();
unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
@@ -2081,9 +2729,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
- switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
- default:
- return getInvalidInstructionMapping();
+ auto IntrID = MI.getIntrinsicID();
+ switch (IntrID) {
case Intrinsic::amdgcn_s_getreg:
case Intrinsic::amdgcn_s_memtime:
case Intrinsic::amdgcn_s_memrealtime:
@@ -2123,18 +2770,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break;
case Intrinsic::amdgcn_exp:
- OpdsMapping[0] = nullptr; // IntrinsicID
- // FIXME: These are immediate values which can't be read from registers.
- OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
- OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
// FIXME: Could we support packed types here?
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
- // FIXME: These are immediate values which can't be read from registers.
- OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
- OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break;
case Intrinsic::amdgcn_buffer_load: {
Register RSrc = MI.getOperand(2).getReg(); // SGPR
@@ -2169,11 +2809,97 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
- case Intrinsic::amdgcn_end_cf: {
+ case Intrinsic::amdgcn_end_cf:
+ case Intrinsic::amdgcn_init_exec: {
+ unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ break;
+ }
+ case Intrinsic::amdgcn_else: {
+ unsigned WaveSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
+ break;
+ }
+ case Intrinsic::amdgcn_kill: {
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
+ break;
+ }
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_tbuffer_load: {
+ // FIXME: Should make intrinsic ID the last operand of the instruction,
+ // then this would be the same as store
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_raw_buffer_store:
+ case Intrinsic::amdgcn_raw_buffer_store_format:
+ case Intrinsic::amdgcn_raw_tbuffer_store: {
+ OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_tbuffer_load: {
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_struct_buffer_store:
+ case Intrinsic::amdgcn_struct_tbuffer_store: {
+ OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_init_exec_from_input: {
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ break;
+ }
+ case Intrinsic::amdgcn_ds_gws_init:
+ case Intrinsic::amdgcn_ds_gws_barrier:
+ case Intrinsic::amdgcn_ds_gws_sema_br: {
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+
+ // This must be an SGPR, but accept a VGPR.
+ unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
+ case Intrinsic::amdgcn_ds_gws_sema_v:
+ case Intrinsic::amdgcn_ds_gws_sema_p:
+ case Intrinsic::amdgcn_ds_gws_sema_release_all: {
+ // This must be an SGPR, but accept a VGPR.
+ unsigned Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
+ break;
+ }
+ default:
+ if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
+ AMDGPU::lookupRsrcIntrinsic(IntrID)) {
+ // Non-images can have complications from operands that allow both SGPR
+ // and VGPR. For now it's too complicated to figure out the final opcode
+ // to derive the register bank from the MCInstrDesc.
+ if (RSrcIntrin->IsImage)
+ return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
+ }
+
+ return getInvalidInstructionMapping();
}
break;
}
@@ -2216,6 +2942,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case AMDGPU::G_LOAD:
+ case AMDGPU::G_ZEXTLOAD:
+ case AMDGPU::G_SEXTLOAD:
return getInstrMappingForLoad(MI);
case AMDGPU::G_ATOMICRMW_XCHG:
@@ -2228,6 +2956,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_ATOMICRMW_MIN:
case AMDGPU::G_ATOMICRMW_UMAX:
case AMDGPU::G_ATOMICRMW_UMIN:
+ case AMDGPU::G_ATOMICRMW_FADD:
case AMDGPU::G_ATOMIC_CMPXCHG: {
return getDefaultMappingAllVGPR(MI);
}
@@ -2247,4 +2976,3 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
getOperandsMapping(OpdsMapping),
MI.getNumOperands());
}
-
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index f3a96e2a6128..a14b74961118 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -13,6 +13,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
@@ -23,7 +25,9 @@
namespace llvm {
class LLT;
+class GCNSubtarget;
class MachineIRBuilder;
+class SIInstrInfo;
class SIRegisterInfo;
class TargetRegisterInfo;
@@ -36,9 +40,27 @@ protected:
#include "AMDGPUGenRegisterBank.inc"
};
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
+ const GCNSubtarget &Subtarget;
const SIRegisterInfo *TRI;
-
- void executeInWaterfallLoop(MachineInstr &MI,
+ const SIInstrInfo *TII;
+
+ bool collectWaterfallOperands(
+ SmallSet<Register, 4> &SGPROperandRegs,
+ MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ArrayRef<unsigned> OpIndices) const;
+
+ bool executeInWaterfallLoop(
+ MachineIRBuilder &B,
+ iterator_range<MachineBasicBlock::iterator> Range,
+ SmallSet<Register, 4> &SGPROperandRegs,
+ MachineRegisterInfo &MRI) const;
+
+ bool executeInWaterfallLoop(MachineIRBuilder &B,
+ MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ArrayRef<unsigned> OpIndices) const;
+ bool executeInWaterfallLoop(MachineInstr &MI,
MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const;
@@ -47,6 +69,19 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
bool applyMappingWideLoad(MachineInstr &MI,
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI) const;
+ bool
+ applyMappingImage(MachineInstr &MI,
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ MachineRegisterInfo &MRI, int RSrcIdx) const;
+
+ Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ Register Reg) const;
+
+ std::pair<Register, unsigned>
+ splitBufferOffsets(MachineIRBuilder &B, Register Offset) const;
+
+ MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B,
+ MachineInstr &MI) const;
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
@@ -58,6 +93,16 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const TargetRegisterInfo &TRI,
unsigned Default = AMDGPU::VGPRRegBankID) const;
+ // Return a value mapping for an operand that is required to be an SGPR.
+ const ValueMapping *getSGPROpMapping(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
+ // Return a value mapping for an operand that is required to be a VGPR.
+ const ValueMapping *getVGPROpMapping(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
/// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
/// Regs. This appropriately sets the regbank of the new registers.
void split64BitValueForMapping(MachineIRBuilder &B,
@@ -90,8 +135,13 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingAllVGPR(
const MachineInstr &MI) const;
+
+ const InstructionMapping &getImageMapping(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ int RsrcIdx) const;
+
public:
- AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
+ AMDGPURegisterBankInfo(const GCNSubtarget &STI);
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
unsigned Size) const override;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 9555694fb106..00f53b157577 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -7,14 +7,14 @@
//===----------------------------------------------------------------------===//
def SGPRRegBank : RegisterBank<"SGPR",
- [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512]
+ [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512, SReg_1024]
>;
def VGPRRegBank : RegisterBank<"VGPR",
- [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
+ [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512, VReg_1024]
>;
def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>;
// It is helpful to distinguish conditions from ordinary SGPRs.
-def VCCRegBank : RegisterBank <"VCC", [SReg_64]>;
+def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index 7cffdf1a4dcf..9806e6b0714f 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -26,19 +26,59 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
// they are not supported at this time.
//===----------------------------------------------------------------------===//
-unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) {
- static const unsigned SubRegs[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
- AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9,
- AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14,
- AMDGPU::sub15, AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
- AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, AMDGPU::sub24,
- AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, AMDGPU::sub28, AMDGPU::sub29,
- AMDGPU::sub30, AMDGPU::sub31
- };
-
- assert(Channel < array_lengthof(SubRegs));
- return SubRegs[Channel];
+// Table of NumRegs sized pieces at every 32-bit offset.
+static const uint16_t SubRegFromChannelTable[][32] = {
+ { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+ AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
+ AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
+ AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
+ AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31
+ },
+ {
+ AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, AMDGPU::sub3_sub4,
+ AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, AMDGPU::sub6_sub7, AMDGPU::sub7_sub8,
+ AMDGPU::sub8_sub9, AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12,
+ AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, AMDGPU::sub15_sub16,
+ AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, AMDGPU::sub18_sub19, AMDGPU::sub19_sub20,
+ AMDGPU::sub20_sub21, AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24,
+ AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, AMDGPU::sub27_sub28,
+ AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, AMDGPU::sub30_sub31, AMDGPU::NoSubRegister
+ },
+ {
+ AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5,
+ AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9,
+ AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
+ AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
+ AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
+ AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
+ AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
+ AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
+ },
+ {
+ AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6,
+ AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10,
+ AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
+ AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
+ AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
+ AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
+ AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
+ AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
+ }
+};
+
+// FIXME: TableGen should generate something to make this manageable for all
+// register classes. At a minimum we could use the opposite of
+// composeSubRegIndices and go up from the base 32-bit subreg.
+unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) {
+ const unsigned NumRegIndex = NumRegs - 1;
+
+ assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
+ "Not implemented");
+ assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
+ return SubRegFromChannelTable[NumRegIndex][Channel];
}
void AMDGPURegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index 3453a8c1b0b3..9e713ca804a1 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -28,7 +28,7 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
- static unsigned getSubRegFromChannel(unsigned Channel);
+ static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
void reserveRegisterTuples(BitVector &, unsigned Reg) const;
};
diff --git a/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index f8703c36127a..26b8b7840270 100644
--- a/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -81,6 +81,8 @@ def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umax>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_and>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_or>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_inc>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_dec>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
@@ -92,6 +94,8 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umax>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_and>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_or>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_inc>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_dec>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_ps_live>;
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 1eb9b83456c5..3bb6dd4571c0 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -175,6 +175,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
HasFminFmaxLegacy(true),
EnablePromoteAlloca(false),
HasTrigReducedRange(false),
+ MaxWavesPerEU(10),
LocalMemorySize(0),
WavefrontSize(0)
{ }
@@ -261,6 +262,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
AddNoCarryInsts(false),
HasUnpackedD16VMem(false),
LDSMisalignedBug(false),
+ HasMFMAInlineLiteralBug(false),
ScalarizeGlobal(false),
@@ -278,9 +280,10 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
TLInfo(TM, *this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
+ MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
- RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
+ RegBankInfo.reset(new AMDGPURegisterBankInfo(*this));
InstSelector.reset(new AMDGPUInstructionSelector(
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
}
@@ -489,28 +492,28 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
}
uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
- unsigned &MaxAlign) const {
+ Align &MaxAlign) const {
assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL);
const DataLayout &DL = F.getParent()->getDataLayout();
uint64_t ExplicitArgBytes = 0;
- MaxAlign = 1;
+ MaxAlign = Align::None();
for (const Argument &Arg : F.args()) {
Type *ArgTy = Arg.getType();
- unsigned Align = DL.getABITypeAlignment(ArgTy);
+ const Align Alignment(DL.getABITypeAlignment(ArgTy));
uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
- ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize;
- MaxAlign = std::max(MaxAlign, Align);
+ ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
+ MaxAlign = std::max(MaxAlign, Alignment);
}
return ExplicitArgBytes;
}
unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
- unsigned &MaxAlign) const {
+ Align &MaxAlign) const {
uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
unsigned ExplicitOffset = getExplicitKernelArgOffset(F);
@@ -518,7 +521,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
unsigned ImplicitBytes = getImplicitArgNumBytes(F);
if (ImplicitBytes != 0) {
- unsigned Alignment = getAlignmentForImplicitArgPtr();
+ const Align Alignment = getAlignmentForImplicitArgPtr();
TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
}
@@ -566,7 +569,7 @@ bool GCNSubtarget::hasMadF16() const {
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
- return 10;
+ return getMaxWavesPerEU();
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
@@ -591,25 +594,12 @@ unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
}
unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
- if (VGPRs <= 24)
- return 10;
- if (VGPRs <= 28)
- return 9;
- if (VGPRs <= 32)
- return 8;
- if (VGPRs <= 36)
- return 7;
- if (VGPRs <= 40)
- return 6;
- if (VGPRs <= 48)
- return 5;
- if (VGPRs <= 64)
- return 4;
- if (VGPRs <= 84)
- return 3;
- if (VGPRs <= 128)
- return 2;
- return 1;
+ unsigned MaxWaves = getMaxWavesPerEU();
+ unsigned Granule = getVGPRAllocGranule();
+ if (VGPRs < Granule)
+ return MaxWaves;
+ unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule;
+ return std::min(std::max(getTotalNumVGPRs() / RoundedRegs, 1u), MaxWaves);
}
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
@@ -629,6 +619,20 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
return 2; // VCC.
}
+unsigned GCNSubtarget::computeOccupancy(const MachineFunction &MF,
+ unsigned LDSSize,
+ unsigned NumSGPRs,
+ unsigned NumVGPRs) const {
+ unsigned Occupancy =
+ std::min(getMaxWavesPerEU(),
+ getOccupancyWithLocalMemSize(LDSSize, MF.getFunction()));
+ if (NumSGPRs)
+ Occupancy = std::min(Occupancy, getOccupancyWithNumSGPRs(NumSGPRs));
+ if (NumVGPRs)
+ Occupancy = std::min(Occupancy, getOccupancyWithNumVGPRs(NumVGPRs));
+ return Occupancy;
+}
+
unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
@@ -878,8 +882,8 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
void GCNSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
- Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
- Mutations.push_back(llvm::make_unique<FillMFMAShadowMutation>(&InstrInfo));
+ Mutations.push_back(std::make_unique<MemOpClusterMutation>(&InstrInfo));
+ Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
}
const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 78c3b823946d..936feb00c62b 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -75,6 +75,7 @@ protected:
bool HasFminFmaxLegacy;
bool EnablePromoteAlloca;
bool HasTrigReducedRange;
+ unsigned MaxWavesPerEU;
int LocalMemorySize;
unsigned WavefrontSize;
@@ -195,8 +196,8 @@ public:
return LocalMemorySize;
}
- unsigned getAlignmentForImplicitArgPtr() const {
- return isAmdHsaOS() ? 8 : 4;
+ Align getAlignmentForImplicitArgPtr() const {
+ return isAmdHsaOS() ? Align(8) : Align(4);
}
/// Returns the offset in bytes from the start of the input buffer
@@ -223,7 +224,9 @@ public:
/// subtarget.
virtual unsigned getMinWavesPerEU() const = 0;
- unsigned getMaxWavesPerEU() const { return 10; }
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget without any kind of limitation.
+ unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
/// Creates value range metadata on an workitemid.* inrinsic call or load.
bool makeLIDRangeMetadata(Instruction *I) const;
@@ -235,16 +238,17 @@ public:
return 16;
return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
}
- uint64_t getExplicitKernArgSize(const Function &F,
- unsigned &MaxAlign) const;
- unsigned getKernArgSegmentSize(const Function &F,
- unsigned &MaxAlign) const;
+ uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
+ unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
virtual ~AMDGPUSubtarget() {}
};
class GCNSubtarget : public AMDGPUGenSubtargetInfo,
public AMDGPUSubtarget {
+
+ using AMDGPUSubtarget::getMaxWavesPerEU;
+
public:
enum TrapHandlerAbi {
TrapHandlerAbiNone = 0,
@@ -362,6 +366,7 @@ protected:
bool CaymanISA;
bool CFALUBug;
bool LDSMisalignedBug;
+ bool HasMFMAInlineLiteralBug;
bool HasVertexCache;
short TexVTXClauseSize;
bool ScalarizeGlobal;
@@ -416,7 +421,7 @@ public:
return CallLoweringInfo.get();
}
- const InstructionSelector *getInstructionSelector() const override {
+ InstructionSelector *getInstructionSelector() const override {
return InstSelector.get();
}
@@ -544,6 +549,14 @@ public:
return GFX9Insts;
}
+ bool hasScalarPackInsts() const {
+ return GFX9Insts;
+ }
+
+ bool hasScalarMulHiInsts() const {
+ return GFX9Insts;
+ }
+
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
@@ -611,6 +624,11 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
+ /// \returns If target supports S_DENORM_MODE.
+ bool hasDenormModeInst() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX10;
+ }
+
bool useFlatForGlobal() const {
return FlatForGlobal;
}
@@ -848,9 +866,7 @@ public:
// on the pointer value itself may rely on the alignment / known low bits of
// the pointer. Set this to something above the minimum to avoid needing
// dynamic realignment in common cases.
- unsigned getStackAlignment() const {
- return 16;
- }
+ Align getStackAlignment() const { return Align(16); }
bool enableMachineScheduler() const override {
return true;
@@ -881,12 +897,6 @@ public:
return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
}
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget without any kind of limitation.
- unsigned getMaxWavesPerEU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU();
- }
-
/// \returns Number of waves per work group supported by the subtarget and
/// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
@@ -944,6 +954,14 @@ public:
return HasDPP;
}
+ bool hasDPPBroadcasts() const {
+ return HasDPP && getGeneration() < GFX10;
+ }
+
+ bool hasDPPWavefrontShifts() const {
+ return HasDPP && getGeneration() < GFX10;
+ }
+
bool hasDPP8() const {
return HasDPP8;
}
@@ -974,6 +992,10 @@ public:
return SGPRInitBug;
}
+ bool hasMFMAInlineLiteralBug() const {
+ return HasMFMAInlineLiteralBug;
+ }
+
bool has12DWordStoreHazard() const {
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
@@ -1036,6 +1058,13 @@ public:
/// VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
+ /// Return occupancy for the given function. Used LDS and a number of
+ /// registers if provided.
+ /// Note, occupancy can be affected by the scratch allocation as well, but
+ /// we do not have enough information to compute it.
+ unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0,
+ unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
+
/// \returns true if the flat_scratch register should be initialized with the
/// pointer to the wave's scratch memory rather than a size and offset.
bool flatScratchIsPointer() const {
@@ -1226,9 +1255,7 @@ public:
return Gen;
}
- unsigned getStackAlignment() const {
- return 4;
- }
+ Align getStackAlignment() const { return Align(4); }
R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS);
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0ea8db04c298..e8cf77161a14 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -238,16 +238,17 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUUseNativeCallsPass(*PR);
initializeAMDGPUSimplifyLibCallsPass(*PR);
initializeAMDGPUInlinerPass(*PR);
+ initializeAMDGPUPrintfRuntimeBindingPass(*PR);
initializeGCNRegBankReassignPass(*PR);
initializeGCNNSAReassignPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
- return llvm::make_unique<AMDGPUTargetObjectFile>();
+ return std::make_unique<AMDGPUTargetObjectFile>();
}
static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
+ return new ScheduleDAGMILive(C, std::make_unique<R600SchedStrategy>());
}
static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
@@ -257,7 +258,7 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
- new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
+ new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
@@ -412,6 +413,7 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
PM.add(createAMDGPUExternalAAWrapperPass());
}
PM.add(createAMDGPUUnifyMetadataPass());
+ PM.add(createAMDGPUPrintfRuntimeBinding());
PM.add(createAMDGPUPropagateAttributesLatePass(this));
if (Internalize) {
PM.add(createInternalizePass(mustPreserveGV));
@@ -482,7 +484,7 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl(
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
+ I = std::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
}
return I.get();
@@ -518,7 +520,7 @@ const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
+ I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
}
I->setScalarizeGlobalBehavior(ScalarizeGlobal);
@@ -659,6 +661,8 @@ void AMDGPUPassConfig::addIRPasses() {
disablePass(&FuncletLayoutID);
disablePass(&PatchableFunctionID);
+ addPass(createAMDGPUPrintfRuntimeBinding());
+
// This must occur before inlining, as the inliner will not look through
// bitcast calls.
addPass(createAMDGPUFixFunctionBitcastsPass());
@@ -681,12 +685,6 @@ void AMDGPUPassConfig::addIRPasses() {
// without ever running any passes on the second.
addPass(createBarrierNoopPass());
- if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
- // TODO: May want to move later or split into an early and late one.
-
- addPass(createAMDGPUCodeGenPreparePass());
- }
-
// Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
if (TM.getTargetTriple().getArch() == Triple::r600)
addPass(createR600OpenCLImageTypeLoweringPass());
@@ -714,6 +712,11 @@ void AMDGPUPassConfig::addIRPasses() {
}
}
+ if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
+ // TODO: May want to move later or split into an early and late one.
+ addPass(createAMDGPUCodeGenPreparePass());
+ }
+
TargetPassConfig::addIRPasses();
// EarlyCSE is not always strong enough to clean up what LSR produces. For
@@ -1046,7 +1049,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
return true;
if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
- !AMDGPU::SReg_128RegClass.contains(MFI->ScratchRSrcReg)) {
+ !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
}
@@ -1095,7 +1098,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
if (YamlMFI.ArgInfo &&
(parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
- AMDGPU::SReg_128RegClass,
+ AMDGPU::SGPR_128RegClass,
MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index aaed280a1270..616196ad5ba3 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -57,7 +57,7 @@ using namespace llvm;
static cl::opt<unsigned> UnrollThresholdPrivate(
"amdgpu-unroll-threshold-private",
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
- cl::init(2500), cl::Hidden);
+ cl::init(2000), cl::Hidden);
static cl::opt<unsigned> UnrollThresholdLocal(
"amdgpu-unroll-threshold-local",
@@ -590,6 +590,61 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
return false;
}
+bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const {
+ switch (IID) {
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_ds_fadd:
+ case Intrinsic::amdgcn_ds_fmin:
+ case Intrinsic::amdgcn_ds_fmax:
+ case Intrinsic::amdgcn_is_shared:
+ case Intrinsic::amdgcn_is_private:
+ OpIndexes.push_back(0);
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace(
+ IntrinsicInst *II, Value *OldV, Value *NewV) const {
+ auto IntrID = II->getIntrinsicID();
+ switch (IntrID) {
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_ds_fadd:
+ case Intrinsic::amdgcn_ds_fmin:
+ case Intrinsic::amdgcn_ds_fmax: {
+ const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4));
+ if (!IsVolatile->isZero())
+ return false;
+ Module *M = II->getParent()->getParent()->getParent();
+ Type *DestTy = II->getType();
+ Type *SrcTy = NewV->getType();
+ Function *NewDecl =
+ Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
+ II->setArgOperand(0, NewV);
+ II->setCalledFunction(NewDecl);
+ return true;
+ }
+ case Intrinsic::amdgcn_is_shared:
+ case Intrinsic::amdgcn_is_private: {
+ unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?
+ AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS;
+ unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+ LLVMContext &Ctx = NewV->getType()->getContext();
+ ConstantInt *NewVal = (TrueAS == NewAS) ?
+ ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
+ II->replaceAllUsesWith(NewVal);
+ II->eraseFromParent();
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
if (ST->hasVOP3PInsts()) {
@@ -638,6 +693,39 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
CommonTTI.getUnrollingPreferences(L, SE, UP);
}
+unsigned GCNTTIImpl::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands) {
+ // Estimate extractelement elimination
+ if (const ExtractElementInst *EE = dyn_cast<ExtractElementInst>(U)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(EE->getOperand(1));
+ unsigned Idx = -1;
+ if (CI)
+ Idx = CI->getZExtValue();
+ return getVectorInstrCost(EE->getOpcode(), EE->getOperand(0)->getType(),
+ Idx);
+ }
+
+ // Estimate insertelement elimination
+ if (const InsertElementInst *IE = dyn_cast<InsertElementInst>(U)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
+ unsigned Idx = -1;
+ if (CI)
+ Idx = CI->getZExtValue();
+ return getVectorInstrCost(IE->getOpcode(), IE->getType(), Idx);
+ }
+
+ // Estimate different intrinsics, e.g. llvm.fabs
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ SmallVector<Value *, 4> Args(II->arg_operands());
+ FastMathFlags FMF;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+ FMF = FPMO->getFastMathFlags();
+ return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
+ FMF);
+ }
+ return BaseT::getUserCost(U, Operands);
+}
+
unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 6f1bf5a26f0d..67f7f9074f10 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -46,10 +46,18 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
Triple TargetTriple;
+ const TargetSubtargetInfo *ST;
+ const TargetLoweringBase *TLI;
+
+ const TargetSubtargetInfo *getST() const { return ST; }
+ const TargetLoweringBase *getTLI() const { return TLI; }
+
public:
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()),
- TargetTriple(TM->getTargetTriple()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ TargetTriple(TM->getTargetTriple()),
+ ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
+ TLI(ST->getTargetLowering()) {}
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
@@ -183,6 +191,11 @@ public:
return AMDGPUAS::FLAT_ADDRESS;
}
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const;
+ bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+ Value *OldV, Value *NewV) const;
+
unsigned getVectorSplitCost() { return 0; }
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
@@ -191,7 +204,7 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- unsigned getInliningThresholdMultiplier() { return 7; }
+ unsigned getInliningThresholdMultiplier() { return 9; }
int getInlinerVectorBonusPercent() { return 0; }
@@ -201,6 +214,7 @@ public:
int getMinMaxReductionCost(Type *Ty, Type *CondTy,
bool IsPairwiseForm,
bool IsUnsigned);
+ unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
};
class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 12f2e9519c9e..101ecfc0c87c 100644
--- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -1307,8 +1307,8 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
if (LandBlkHasOtherPred) {
report_fatal_error("Extra register needed to handle CFG");
- unsigned CmpResReg =
- HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
+ Register CmpResReg =
+ HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
report_fatal_error("Extra compare instruction needed to handle CFG");
insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
CmpResReg, DebugLoc());
@@ -1316,8 +1316,8 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
// XXX: We are running this after RA, so creating virtual registers will
// cause an assertion failure in the PostRA scheduling pass.
- unsigned InitReg =
- HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
+ Register InitReg =
+ HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
DebugLoc());
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 6d678966c98e..9dd511fab57c 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -143,6 +143,7 @@ public:
ImmTyDLC,
ImmTyGLC,
ImmTySLC,
+ ImmTySWZ,
ImmTyTFE,
ImmTyD16,
ImmTyClampSI,
@@ -216,14 +217,15 @@ public:
if (Kind == Token)
return true;
- if (Kind != Expression || !Expr)
- return false;
-
// When parsing operands, we can't always tell if something was meant to be
// a token, like 'gds', or an expression that references a global variable.
// In this case, we assume the string is an expression, and if we need to
// interpret is a token, then we treat the symbol name as the token.
- return isa<MCSymbolRefExpr>(Expr);
+ return isSymbolRefExpr();
+ }
+
+ bool isSymbolRefExpr() const {
+ return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
}
bool isImm() const override {
@@ -274,8 +276,10 @@ public:
isRegClass(AMDGPU::VReg_64RegClassID) ||
isRegClass(AMDGPU::VReg_96RegClassID) ||
isRegClass(AMDGPU::VReg_128RegClassID) ||
+ isRegClass(AMDGPU::VReg_160RegClassID) ||
isRegClass(AMDGPU::VReg_256RegClassID) ||
- isRegClass(AMDGPU::VReg_512RegClassID);
+ isRegClass(AMDGPU::VReg_512RegClassID) ||
+ isRegClass(AMDGPU::VReg_1024RegClassID);
}
bool isVReg32() const {
@@ -286,6 +290,10 @@ public:
return isOff() || isVReg32();
}
+ bool isNull() const {
+ return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
+ }
+
bool isSDWAOperand(MVT type) const;
bool isSDWAFP16Operand() const;
bool isSDWAFP32Operand() const;
@@ -325,6 +333,7 @@ public:
bool isDLC() const { return isImmTy(ImmTyDLC); }
bool isGLC() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
+ bool isSWZ() const { return isImmTy(ImmTySWZ); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isD16() const { return isImmTy(ImmTyD16); }
bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
@@ -817,6 +826,7 @@ public:
case ImmTyDLC: OS << "DLC"; break;
case ImmTyGLC: OS << "GLC"; break;
case ImmTySLC: OS << "SLC"; break;
+ case ImmTySWZ: OS << "SWZ"; break;
case ImmTyTFE: OS << "TFE"; break;
case ImmTyD16: OS << "D16"; break;
case ImmTyFORMAT: OS << "FORMAT"; break;
@@ -886,7 +896,7 @@ public:
int64_t Val, SMLoc Loc,
ImmTy Type = ImmTyNone,
bool IsFPImm = false) {
- auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
+ auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
Op->Imm.Val = Val;
Op->Imm.IsFPImm = IsFPImm;
Op->Imm.Type = Type;
@@ -899,7 +909,7 @@ public:
static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
StringRef Str, SMLoc Loc,
bool HasExplicitEncodingSize = true) {
- auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
+ auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
Res->StartLoc = Loc;
@@ -910,7 +920,7 @@ public:
static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
unsigned RegNo, SMLoc S,
SMLoc E) {
- auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
+ auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
Op->Reg.RegNo = RegNo;
Op->Reg.Mods = Modifiers();
Op->StartLoc = S;
@@ -920,7 +930,7 @@ public:
static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
const class MCExpr *Expr, SMLoc S) {
- auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
+ auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
Op->Expr = Expr;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -1051,11 +1061,23 @@ private:
std::string &CollectString);
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
- RegisterKind RegKind, unsigned Reg1,
- unsigned RegNum);
+ RegisterKind RegKind, unsigned Reg1);
bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
- unsigned& RegNum, unsigned& RegWidth,
- unsigned *DwordRegIndex);
+ unsigned& RegNum, unsigned& RegWidth);
+ unsigned ParseRegularReg(RegisterKind &RegKind,
+ unsigned &RegNum,
+ unsigned &RegWidth);
+ unsigned ParseSpecialReg(RegisterKind &RegKind,
+ unsigned &RegNum,
+ unsigned &RegWidth);
+ unsigned ParseRegList(RegisterKind &RegKind,
+ unsigned &RegNum,
+ unsigned &RegWidth);
+ bool ParseRegRange(unsigned& Num, unsigned& Width);
+ unsigned getRegularReg(RegisterKind RegKind,
+ unsigned RegNum,
+ unsigned RegWidth);
+
bool isRegister();
bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
@@ -1306,6 +1328,7 @@ private:
bool validateOpSel(const MCInst &Inst);
bool validateVccOperand(unsigned Reg) const;
bool validateVOP3Literal(const MCInst &Inst) const;
+ unsigned getConstantBusLimit(unsigned Opcode) const;
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
@@ -1321,6 +1344,7 @@ private:
void peekTokens(MutableArrayRef<AsmToken> Tokens);
AsmToken::TokenKind getTokenKind() const;
bool parseExpr(int64_t &Imm);
+ bool parseExpr(OperandVector &Operands);
StringRef getTokenStr() const;
AsmToken peekToken();
AsmToken getToken() const;
@@ -1399,9 +1423,12 @@ public:
void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
+ void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
- uint64_t BasicInstType, bool skipVcc = false);
+ uint64_t BasicInstType,
+ bool SkipDstVcc = false,
+ bool SkipSrcVcc = false);
AMDGPUOperand::Ptr defaultBLGP() const;
AMDGPUOperand::Ptr defaultCBSZ() const;
@@ -1636,8 +1663,8 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {
}
bool AMDGPUOperand::isBoolReg() const {
- return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
- isSCSrcB64() : isSCSrcB32();
+ return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
+ (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
}
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
@@ -1849,6 +1876,8 @@ static bool isInlineValue(unsigned Reg) {
case AMDGPU::SRC_EXECZ:
case AMDGPU::SRC_SCC:
return true;
+ case AMDGPU::SGPR_NULL:
+ return true;
default:
return false;
}
@@ -1870,8 +1899,10 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
case 2: return AMDGPU::VReg_64RegClassID;
case 3: return AMDGPU::VReg_96RegClassID;
case 4: return AMDGPU::VReg_128RegClassID;
+ case 5: return AMDGPU::VReg_160RegClassID;
case 8: return AMDGPU::VReg_256RegClassID;
case 16: return AMDGPU::VReg_512RegClassID;
+ case 32: return AMDGPU::VReg_1024RegClassID;
}
} else if (Is == IS_TTMP) {
switch (RegWidth) {
@@ -1944,7 +1975,7 @@ static unsigned getSpecialRegForName(StringRef RegName) {
.Case("tba_lo", AMDGPU::TBA_LO)
.Case("tba_hi", AMDGPU::TBA_HI)
.Case("null", AMDGPU::SGPR_NULL)
- .Default(0);
+ .Default(AMDGPU::NoRegister);
}
bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
@@ -1959,8 +1990,7 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
}
bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
- RegisterKind RegKind, unsigned Reg1,
- unsigned RegNum) {
+ RegisterKind RegKind, unsigned Reg1) {
switch (RegKind) {
case IS_SPECIAL:
if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
@@ -2008,14 +2038,37 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
}
}
-static const StringRef Registers[] = {
- { "v" },
- { "s" },
- { "ttmp" },
- { "acc" },
- { "a" },
+struct RegInfo {
+ StringLiteral Name;
+ RegisterKind Kind;
+};
+
+static constexpr RegInfo RegularRegisters[] = {
+ {{"v"}, IS_VGPR},
+ {{"s"}, IS_SGPR},
+ {{"ttmp"}, IS_TTMP},
+ {{"acc"}, IS_AGPR},
+ {{"a"}, IS_AGPR},
};
+static bool isRegularReg(RegisterKind Kind) {
+ return Kind == IS_VGPR ||
+ Kind == IS_SGPR ||
+ Kind == IS_TTMP ||
+ Kind == IS_AGPR;
+}
+
+static const RegInfo* getRegularRegInfo(StringRef Str) {
+ for (const RegInfo &Reg : RegularRegisters)
+ if (Str.startswith(Reg.Name))
+ return &Reg;
+ return nullptr;
+}
+
+static bool getRegNum(StringRef Str, unsigned& Num) {
+ return !Str.getAsInteger(10, Num);
+}
+
bool
AMDGPUAsmParser::isRegister(const AsmToken &Token,
const AsmToken &NextToken) const {
@@ -2029,24 +2082,24 @@ AMDGPUAsmParser::isRegister(const AsmToken &Token,
// A single register like s0 or a range of registers like s[0:1]
- StringRef RegName = Token.getString();
-
- for (StringRef Reg : Registers) {
- if (RegName.startswith(Reg)) {
- if (Reg.size() < RegName.size()) {
- unsigned RegNum;
- // A single register with an index: rXX
- if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
- return true;
- } else {
- // A range of registers: r[XX:YY].
- if (NextToken.is(AsmToken::LBrac))
- return true;
- }
+ StringRef Str = Token.getString();
+ const RegInfo *Reg = getRegularRegInfo(Str);
+ if (Reg) {
+ StringRef RegName = Reg->Name;
+ StringRef RegSuffix = Str.substr(RegName.size());
+ if (!RegSuffix.empty()) {
+ unsigned Num;
+ // A single register with an index: rXX
+ if (getRegNum(RegSuffix, Num))
+ return true;
+ } else {
+ // A range of registers: r[XX:YY].
+ if (NextToken.is(AsmToken::LBrac))
+ return true;
}
}
- return getSpecialRegForName(RegName);
+ return getSpecialRegForName(Str) != AMDGPU::NoRegister;
}
bool
@@ -2055,137 +2108,161 @@ AMDGPUAsmParser::isRegister()
return isRegister(getToken(), peekToken());
}
-bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
- unsigned &RegNum, unsigned &RegWidth,
- unsigned *DwordRegIndex) {
- if (DwordRegIndex) { *DwordRegIndex = 0; }
+unsigned
+AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
+ unsigned RegNum,
+ unsigned RegWidth) {
+
+ assert(isRegularReg(RegKind));
+
+ unsigned AlignSize = 1;
+ if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
+ // SGPR and TTMP registers must be aligned.
+ // Max required alignment is 4 dwords.
+ AlignSize = std::min(RegWidth, 4u);
+ }
+
+ if (RegNum % AlignSize != 0)
+ return AMDGPU::NoRegister;
+
+ unsigned RegIdx = RegNum / AlignSize;
+ int RCID = getRegClass(RegKind, RegWidth);
+ if (RCID == -1)
+ return AMDGPU::NoRegister;
+
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
- if (getLexer().is(AsmToken::Identifier)) {
- StringRef RegName = Parser.getTok().getString();
- if ((Reg = getSpecialRegForName(RegName))) {
- Parser.Lex();
- RegKind = IS_SPECIAL;
- } else {
- unsigned RegNumIndex = 0;
- if (RegName[0] == 'v') {
- RegNumIndex = 1;
- RegKind = IS_VGPR;
- } else if (RegName[0] == 's') {
- RegNumIndex = 1;
- RegKind = IS_SGPR;
- } else if (RegName[0] == 'a') {
- RegNumIndex = RegName.startswith("acc") ? 3 : 1;
- RegKind = IS_AGPR;
- } else if (RegName.startswith("ttmp")) {
- RegNumIndex = strlen("ttmp");
- RegKind = IS_TTMP;
- } else {
- return false;
- }
- if (RegName.size() > RegNumIndex) {
- // Single 32-bit register: vXX.
- if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
- return false;
- Parser.Lex();
- RegWidth = 1;
- } else {
- // Range of registers: v[XX:YY]. ":YY" is optional.
- Parser.Lex();
- int64_t RegLo, RegHi;
- if (getLexer().isNot(AsmToken::LBrac))
- return false;
- Parser.Lex();
+ const MCRegisterClass RC = TRI->getRegClass(RCID);
+ if (RegIdx >= RC.getNumRegs())
+ return AMDGPU::NoRegister;
- if (getParser().parseAbsoluteExpression(RegLo))
- return false;
+ return RC.getRegister(RegIdx);
+}
- const bool isRBrace = getLexer().is(AsmToken::RBrac);
- if (!isRBrace && getLexer().isNot(AsmToken::Colon))
- return false;
- Parser.Lex();
+bool
+AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
+ int64_t RegLo, RegHi;
+ if (!trySkipToken(AsmToken::LBrac))
+ return false;
- if (isRBrace) {
- RegHi = RegLo;
- } else {
- if (getParser().parseAbsoluteExpression(RegHi))
- return false;
+ if (!parseExpr(RegLo))
+ return false;
- if (getLexer().isNot(AsmToken::RBrac))
- return false;
- Parser.Lex();
- }
- RegNum = (unsigned) RegLo;
- RegWidth = (RegHi - RegLo) + 1;
- }
- }
- } else if (getLexer().is(AsmToken::LBrac)) {
- // List of consecutive registers: [s0,s1,s2,s3]
- Parser.Lex();
- if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
- return false;
- if (RegWidth != 1)
+ if (trySkipToken(AsmToken::Colon)) {
+ if (!parseExpr(RegHi))
return false;
- RegisterKind RegKind1;
- unsigned Reg1, RegNum1, RegWidth1;
- do {
- if (getLexer().is(AsmToken::Comma)) {
- Parser.Lex();
- } else if (getLexer().is(AsmToken::RBrac)) {
- Parser.Lex();
- break;
- } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
- if (RegWidth1 != 1) {
- return false;
- }
- if (RegKind1 != RegKind) {
- return false;
- }
- if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
- return false;
- }
- } else {
- return false;
- }
- } while (true);
} else {
- return false;
+ RegHi = RegLo;
}
- switch (RegKind) {
- case IS_SPECIAL:
+
+ if (!trySkipToken(AsmToken::RBrac))
+ return false;
+
+ if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
+ return false;
+
+ Num = static_cast<unsigned>(RegLo);
+ Width = (RegHi - RegLo) + 1;
+ return true;
+}
+
+unsigned
+AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
+ unsigned &RegNum,
+ unsigned &RegWidth) {
+ assert(isToken(AsmToken::Identifier));
+ unsigned Reg = getSpecialRegForName(getTokenStr());
+ if (Reg) {
RegNum = 0;
RegWidth = 1;
- break;
- case IS_VGPR:
- case IS_SGPR:
- case IS_AGPR:
- case IS_TTMP:
- {
- unsigned Size = 1;
- if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
- // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
- Size = std::min(RegWidth, 4u);
- }
- if (RegNum % Size != 0)
- return false;
- if (DwordRegIndex) { *DwordRegIndex = RegNum; }
- RegNum = RegNum / Size;
- int RCID = getRegClass(RegKind, RegWidth);
- if (RCID == -1)
- return false;
- const MCRegisterClass RC = TRI->getRegClass(RCID);
- if (RegNum >= RC.getNumRegs())
- return false;
- Reg = RC.getRegister(RegNum);
- break;
+ RegKind = IS_SPECIAL;
+ lex(); // skip register name
+ }
+ return Reg;
+}
+
+unsigned
+AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
+ unsigned &RegNum,
+ unsigned &RegWidth) {
+ assert(isToken(AsmToken::Identifier));
+ StringRef RegName = getTokenStr();
+
+ const RegInfo *RI = getRegularRegInfo(RegName);
+ if (!RI)
+ return AMDGPU::NoRegister;
+ lex(); // skip register name
+
+ RegKind = RI->Kind;
+ StringRef RegSuffix = RegName.substr(RI->Name.size());
+ if (!RegSuffix.empty()) {
+ // Single 32-bit register: vXX.
+ if (!getRegNum(RegSuffix, RegNum))
+ return AMDGPU::NoRegister;
+ RegWidth = 1;
+ } else {
+ // Range of registers: v[XX:YY]. ":YY" is optional.
+ if (!ParseRegRange(RegNum, RegWidth))
+ return AMDGPU::NoRegister;
}
- default:
- llvm_unreachable("unexpected register kind");
+ return getRegularReg(RegKind, RegNum, RegWidth);
+}
+
+unsigned
+AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
+ unsigned &RegNum,
+ unsigned &RegWidth) {
+ unsigned Reg = AMDGPU::NoRegister;
+
+ if (!trySkipToken(AsmToken::LBrac))
+ return AMDGPU::NoRegister;
+
+ // List of consecutive registers, e.g.: [s0,s1,s2,s3]
+
+ if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
+ return AMDGPU::NoRegister;
+ if (RegWidth != 1)
+ return AMDGPU::NoRegister;
+
+ for (; trySkipToken(AsmToken::Comma); ) {
+ RegisterKind NextRegKind;
+ unsigned NextReg, NextRegNum, NextRegWidth;
+
+ if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth))
+ return AMDGPU::NoRegister;
+ if (NextRegWidth != 1)
+ return AMDGPU::NoRegister;
+ if (NextRegKind != RegKind)
+ return AMDGPU::NoRegister;
+ if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
+ return AMDGPU::NoRegister;
}
- if (!subtargetHasRegister(*TRI, Reg))
- return false;
- return true;
+ if (!trySkipToken(AsmToken::RBrac))
+ return AMDGPU::NoRegister;
+
+ if (isRegularReg(RegKind))
+ Reg = getRegularReg(RegKind, RegNum, RegWidth);
+
+ return Reg;
+}
+
+bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
+ unsigned &Reg,
+ unsigned &RegNum,
+ unsigned &RegWidth) {
+ Reg = AMDGPU::NoRegister;
+
+ if (isToken(AsmToken::Identifier)) {
+ Reg = ParseSpecialReg(RegKind, RegNum, RegWidth);
+ if (Reg == AMDGPU::NoRegister)
+ Reg = ParseRegularReg(RegKind, RegNum, RegWidth);
+ } else {
+ Reg = ParseRegList(RegKind, RegNum, RegWidth);
+ }
+
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
}
Optional<StringRef>
@@ -2241,18 +2318,18 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
SMLoc StartLoc = Tok.getLoc();
SMLoc EndLoc = Tok.getEndLoc();
RegisterKind RegKind;
- unsigned Reg, RegNum, RegWidth, DwordRegIndex;
+ unsigned Reg, RegNum, RegWidth;
- if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
+ if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
//FIXME: improve error messages (bug 41303).
Error(StartLoc, "not a valid operand.");
return nullptr;
}
if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
- if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
+ if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
return nullptr;
} else
- KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
+ KernelScope.usesRegister(RegKind, RegNum, RegWidth);
return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
}
@@ -2648,7 +2725,6 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
case AMDGPU::VCC_LO:
case AMDGPU::VCC_HI:
case AMDGPU::M0:
- case AMDGPU::SGPR_NULL:
return Reg;
default:
break;
@@ -2697,13 +2773,38 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
}
}
+unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
+ if (!isGFX10())
+ return 1;
+
+ switch (Opcode) {
+ // 64-bit shift instructions can use only one scalar value input
+ case AMDGPU::V_LSHLREV_B64:
+ case AMDGPU::V_LSHLREV_B64_gfx10:
+ case AMDGPU::V_LSHL_B64:
+ case AMDGPU::V_LSHRREV_B64:
+ case AMDGPU::V_LSHRREV_B64_gfx10:
+ case AMDGPU::V_LSHR_B64:
+ case AMDGPU::V_ASHRREV_I64:
+ case AMDGPU::V_ASHRREV_I64_gfx10:
+ case AMDGPU::V_ASHR_I64:
+ return 1;
+ default:
+ return 2;
+ }
+}
+
bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
const MCOperand &MO = Inst.getOperand(OpIdx);
if (MO.isImm()) {
return !isInlineConstant(Inst, OpIdx);
+ } else if (MO.isReg()) {
+ auto Reg = MO.getReg();
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
+ } else {
+ return true;
}
- return !MO.isReg() ||
- isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
}
bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
@@ -2782,10 +2883,7 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
}
ConstantBusUseCount += NumLiterals;
- if (isGFX10())
- return ConstantBusUseCount <= 2;
-
- return ConstantBusUseCount <= 1;
+ return ConstantBusUseCount <= getConstantBusLimit(Opcode);
}
bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
@@ -3212,6 +3310,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
const int OpIndices[] = { Src0Idx, Src1Idx };
+ unsigned NumExprs = 0;
unsigned NumLiterals = 0;
uint32_t LiteralValue;
@@ -3219,19 +3318,21 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
if (OpIdx == -1) break;
const MCOperand &MO = Inst.getOperand(OpIdx);
- if (MO.isImm() &&
- // Exclude special imm operands (like that used by s_set_gpr_idx_on)
- AMDGPU::isSISrcOperand(Desc, OpIdx) &&
- !isInlineConstant(Inst, OpIdx)) {
- uint32_t Value = static_cast<uint32_t>(MO.getImm());
- if (NumLiterals == 0 || LiteralValue != Value) {
- LiteralValue = Value;
- ++NumLiterals;
+ // Exclude special imm operands (like that used by s_set_gpr_idx_on)
+ if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
+ if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
+ uint32_t Value = static_cast<uint32_t>(MO.getImm());
+ if (NumLiterals == 0 || LiteralValue != Value) {
+ LiteralValue = Value;
+ ++NumLiterals;
+ }
+ } else if (MO.isExpr()) {
+ ++NumExprs;
}
}
}
- return NumLiterals <= 1;
+ return NumLiterals + NumExprs <= 1;
}
bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
@@ -3267,6 +3368,7 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+ unsigned NumExprs = 0;
unsigned NumLiterals = 0;
uint32_t LiteralValue;
@@ -3274,17 +3376,26 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
if (OpIdx == -1) break;
const MCOperand &MO = Inst.getOperand(OpIdx);
- if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
+ if (!MO.isImm() && !MO.isExpr())
+ continue;
+ if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
continue;
- if (!isInlineConstant(Inst, OpIdx)) {
+ if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
+ getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
+ return false;
+
+ if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
uint32_t Value = static_cast<uint32_t>(MO.getImm());
if (NumLiterals == 0 || LiteralValue != Value) {
LiteralValue = Value;
++NumLiterals;
}
+ } else if (MO.isExpr()) {
+ ++NumExprs;
}
}
+ NumLiterals += NumExprs;
return !NumLiterals ||
(NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
@@ -3607,37 +3718,44 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
Val, ValRange);
- UserSGPRCount += 4;
+ if (Val)
+ UserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
ValRange);
- UserSGPRCount += 2;
+ if (Val)
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
ValRange);
- UserSGPRCount += 2;
+ if (Val)
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
Val, ValRange);
- UserSGPRCount += 2;
+ if (Val)
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
ValRange);
- UserSGPRCount += 2;
+ if (Val)
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
ValRange);
- UserSGPRCount += 2;
+ if (Val)
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
Val, ValRange);
- UserSGPRCount += 1;
+ if (Val)
+ UserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
if (IVersion.Major < 10)
return getParser().Error(IDRange.Start, "directive requires gfx10+",
@@ -5225,6 +5343,23 @@ AMDGPUAsmParser::parseExpr(int64_t &Imm) {
}
bool
+AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
+ SMLoc S = getLoc();
+
+ const MCExpr *Expr;
+ if (Parser.parseExpression(Expr))
+ return false;
+
+ int64_t IntVal;
+ if (Expr->evaluateAsAbsolute(IntVal)) {
+ Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
+ } else {
+ Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
+ }
+ return true;
+}
+
+bool
AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
if (isToken(AsmToken::String)) {
Val = getToken().getStringContents();
@@ -5605,25 +5740,29 @@ bool AMDGPUOperand::isGPRIdxMode() const {
OperandMatchResultTy
AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
- SMLoc S = Parser.getTok().getLoc();
- switch (getLexer().getKind()) {
- default: return MatchOperand_ParseFail;
- case AsmToken::Integer: {
- int64_t Imm;
- if (getParser().parseAbsoluteExpression(Imm))
- return MatchOperand_ParseFail;
- Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
- return MatchOperand_Success;
- }
+ // Make sure we are not parsing something
+ // that looks like a label or an expression but is not.
+ // This will improve error messages.
+ if (isRegister() || isModifier())
+ return MatchOperand_NoMatch;
- case AsmToken::Identifier:
- Operands.push_back(AMDGPUOperand::CreateExpr(this,
- MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
- Parser.getTok().getString()), getContext()), S));
- Parser.Lex();
- return MatchOperand_Success;
+ if (parseExpr(Operands)) {
+
+ AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
+ assert(Opr.isImm() || Opr.isExpr());
+ SMLoc Loc = Opr.getStartLoc();
+
+ // Currently we do not support arbitrary expressions as branch targets.
+ // Only labels and absolute expressions are accepted.
+ if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
+ Error(Loc, "expected an absolute expression or a label");
+ } else if (Opr.isImm() && !Opr.isS16Imm()) {
+ Error(Loc, "expected a 16-bit signed jump offset");
+ }
}
+
+ return MatchOperand_Success; // avoid excessive error messages
}
//===----------------------------------------------------------------------===//
@@ -5908,6 +6047,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
+ {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
@@ -5941,8 +6081,6 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
};
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
- unsigned size = Operands.size();
- assert(size > 0);
OperandMatchResultTy res = parseOptionalOpr(Operands);
@@ -5957,17 +6095,13 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operan
// to make sure autogenerated parser of custom operands never hit hardcoded
// mandatory operands.
- if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
-
- // We have parsed the first optional operand.
- // Parse as many operands as necessary to skip all mandatory operands.
+ for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
+ if (res != MatchOperand_Success ||
+ isToken(AsmToken::EndOfStatement))
+ break;
- for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
- if (res != MatchOperand_Success ||
- getLexer().is(AsmToken::EndOfStatement)) break;
- if (getLexer().is(AsmToken::Comma)) Parser.Lex();
- res = parseOptionalOpr(Operands);
- }
+ trySkipToken(AsmToken::Comma);
+ res = parseOptionalOpr(Operands);
}
return res;
@@ -6682,7 +6816,11 @@ void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
}
void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
- cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
+ cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
+}
+
+void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
+ cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
}
void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
@@ -6690,11 +6828,14 @@ void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
}
void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
- uint64_t BasicInstType, bool skipVcc) {
+ uint64_t BasicInstType,
+ bool SkipDstVcc,
+ bool SkipSrcVcc) {
using namespace llvm::AMDGPU::SDWA;
OptionalImmIndexMap OptionalIdx;
- bool skippedVcc = false;
+ bool SkipVcc = SkipDstVcc || SkipSrcVcc;
+ bool SkippedVcc = false;
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
@@ -6704,19 +6845,21 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
- if (skipVcc && !skippedVcc && Op.isReg() &&
+ if (SkipVcc && !SkippedVcc && Op.isReg() &&
(Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
// Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
// or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
// Skip VCC only if we didn't skip it on previous iteration.
+ // Note that src0 and src1 occupy 2 slots each because of modifiers.
if (BasicInstType == SIInstrFlags::VOP2 &&
- (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
- skippedVcc = true;
+ ((SkipDstVcc && Inst.getNumOperands() == 1) ||
+ (SkipSrcVcc && Inst.getNumOperands() == 5))) {
+ SkippedVcc = true;
continue;
} else if (BasicInstType == SIInstrFlags::VOPC &&
Inst.getNumOperands() == 0) {
- skippedVcc = true;
+ SkippedVcc = true;
continue;
}
}
@@ -6728,7 +6871,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
} else {
llvm_unreachable("Invalid operand type");
}
- skippedVcc = false;
+ SkippedVcc = false;
}
if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
@@ -6849,6 +6992,14 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
case MCK_AttrChan:
return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
+ case MCK_SReg_64:
+ case MCK_SReg_64_XEXEC:
+ // Null is defined as a 32-bit register but
+ // it should also be enabled with 64-bit operands.
+ // The following code enables it for SReg_64 operands
+ // used as source and destination. Remaining source
+ // operands are handled in isInlinableImm.
+ return Operand.isNull() ? Match_Success : Match_InvalidOperand;
default:
return Match_InvalidOperand;
}
diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td
index 62a19d848af2..1b12550aed88 100644
--- a/lib/Target/AMDGPU/BUFInstructions.td
+++ b/lib/Target/AMDGPU/BUFInstructions.td
@@ -7,13 +7,13 @@
//===----------------------------------------------------------------------===//
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 8, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 9, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
-def MUBUFOffset : ComplexPattern<i64, 7, "SelectMUBUFOffset">;
+def MUBUFOffset : ComplexPattern<i64, 8, "SelectMUBUFOffset">;
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
@@ -54,6 +54,17 @@ class MTBUFAddr64Table <bit is_addr64, string Name> {
// MTBUF classes
//===----------------------------------------------------------------------===//
+class MTBUFGetBaseOpcode<string Op> {
+ string ret = !subst("FORMAT_XY", "FORMAT_X",
+ !subst("FORMAT_XYZ", "FORMAT_X",
+ !subst("FORMAT_XYZW", "FORMAT_X", Op)));
+}
+
+class getMTBUFElements<string Op> {
+ int ret = 1;
+}
+
+
class MTBUF_Pseudo <string opName, dag outs, dag ins,
string asmOps, list<dag> pattern=[]> :
InstSI<outs, ins, "", pattern>,
@@ -67,6 +78,9 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
string Mnemonic = opName;
string AsmOperands = asmOps;
+ Instruction Opcode = !cast<Instruction>(NAME);
+ Instruction BaseOpcode = !cast<Instruction>(MTBUFGetBaseOpcode<NAME>.ret);
+
let VM_CNT = 1;
let EXP_CNT = 1;
let MTBUF = 1;
@@ -90,6 +104,7 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> has_offset = 1;
bits<1> has_slc = 1;
bits<1> has_tfe = 1;
+ bits<4> elements = 0;
}
class MTBUF_Real <MTBUF_Pseudo ps> :
@@ -126,17 +141,17 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc),
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc)
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz)
);
dag InsData = !if(!empty(vaddrList),
(ins vdataClass:$vdata, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
- SLC:$slc, TFE:$tfe, DLC:$dlc),
+ SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz),
(ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
- SLC:$slc, TFE:$tfe, DLC:$dlc)
+ SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz)
);
dag ret = !if(!empty(vdataList), InsNoData, InsData);
}
@@ -181,51 +196,54 @@ class MTBUF_SetupAddr<int addrKind> {
class MTBUF_Load_Pseudo <string opName,
int addrKind,
RegisterClass vdataClass,
+ int elems,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind>
: MTBUF_Pseudo<opName,
(outs vdataClass:$vdata),
getMTBUFIns<addrKindCopy>.ret,
- " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz",
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let mayLoad = 1;
let mayStore = 0;
+ let elements = elems;
}
multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
- ValueType load_vt = i32,
+ int elems, ValueType load_vt = i32,
SDPatternOperator ld = null_frag> {
- def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+ def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems,
[(set load_vt:$vdata,
(ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
- i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
+ i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>,
MTBUFAddr64Table<0, NAME>;
- def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+ def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems,
[(set load_vt:$vdata,
(ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
- i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
+ i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>,
MTBUFAddr64Table<1, NAME>;
- def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
- def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
- def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
+ def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
+ def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
let DisableWQM = 1 in {
- def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
- def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
- def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
- def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>;
+ def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
+ def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
+ def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
}
}
class MTBUF_Store_Pseudo <string opName,
int addrKind,
RegisterClass vdataClass,
+ int elems,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind,
@@ -233,39 +251,40 @@ class MTBUF_Store_Pseudo <string opName,
: MTBUF_Pseudo<opName,
(outs),
getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
- " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz",
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let mayLoad = 0;
let mayStore = 1;
+ let elements = elems;
}
multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
- ValueType store_vt = i32,
+ int elems, ValueType store_vt = i32,
SDPatternOperator st = null_frag> {
- def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+ def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems,
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i8:$format, i1:$glc,
- i1:$slc, i1:$tfe, i1:$dlc))]>,
+ i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
MTBUFAddr64Table<0, NAME>;
- def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+ def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems,
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i8:$format, i1:$glc,
- i1:$slc, i1:$tfe, i1:$dlc))]>,
+ i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
MTBUFAddr64Table<1, NAME>;
- def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
- def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
- def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
+ def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
+ def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
let DisableWQM = 1 in {
- def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
- def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
- def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
- def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>;
+ def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
+ def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
+ def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
}
}
@@ -320,7 +339,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> has_offset = 1;
bits<1> has_slc = 1;
bits<1> has_tfe = 1;
- bits<4> dwords = 0;
+ bits<4> elements = 0;
}
class MUBUF_Real <MUBUF_Pseudo ps> :
@@ -393,18 +412,30 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
);
dag ret = !con(
!if(!empty(vdataList), InsNoData, InsData),
- !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc))
+ !if(isLds, (ins DLC:$dlc, SWZ:$swz), (ins TFE:$tfe, DLC:$dlc,SWZ:$swz))
);
}
-class getMUBUFDwords<RegisterClass regClass> {
- string regClassAsInt = !cast<string>(regClass);
+class getMUBUFElements<ValueType vt> {
+ // eq does not support ValueType for some reason.
+ string vtAsStr = !cast<string>(vt);
+
int ret =
- !if(!eq(regClassAsInt, !cast<string>(VGPR_32)), 1,
- !if(!eq(regClassAsInt, !cast<string>(VReg_64)), 2,
- !if(!eq(regClassAsInt, !cast<string>(VReg_96)), 3,
- !if(!eq(regClassAsInt, !cast<string>(VReg_128)), 4,
- 0))));
+ !if(!eq(vtAsStr, "f16"), 1,
+ !if(!eq(vtAsStr, "v2f16"), 2,
+ !if(!eq(vtAsStr, "v3f16"), 3,
+ !if(!eq(vtAsStr, "v4f16"), 4,
+ !if(!eq(vt.Size, 32), 1,
+ !if(!eq(vt.Size, 64), 2,
+ !if(!eq(vt.Size, 96), 3,
+ !if(!eq(vt.Size, 128), 4, 0)
+ )
+ )
+ )
+ )
+ )
+ )
+ );
}
class getMUBUFIns<int addrKind, list<RegisterClass> vdataList=[], bit isLds = 0> {
@@ -442,18 +473,18 @@ class MUBUF_SetupAddr<int addrKind> {
class MUBUF_Load_Pseudo <string opName,
int addrKind,
- RegisterClass vdataClass,
+ ValueType vdata_vt,
bit HasTiedDest = 0,
bit isLds = 0,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind>
: MUBUF_Pseudo<opName,
- (outs vdataClass:$vdata),
+ (outs getVregSrcForVT<vdata_vt>.ret:$vdata),
!con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
- !if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))),
+ !if(HasTiedDest, (ins getVregSrcForVT<vdata_vt>.ret:$vdata_in), (ins))),
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
- !if(isLds, " lds", "$tfe") # "$dlc",
+ !if(isLds, " lds", "$tfe") # "$dlc" # "$swz",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # !if(isLds, "_lds", "") #
@@ -467,19 +498,19 @@ class MUBUF_Load_Pseudo <string opName,
let Uses = !if(isLds, [EXEC, M0], [EXEC]);
let has_tfe = !if(isLds, 0, 1);
let lds = isLds;
- let dwords = getMUBUFDwords<vdataClass>.ret;
+ let elements = getMUBUFElements<vdata_vt>.ret;
}
class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
- (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
- (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
+ (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
+ (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))
>;
class MUBUF_Addr64_Load_Pat <Instruction inst,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> : Pat <
- (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
- (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
+ (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
+ (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))
>;
multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
@@ -490,89 +521,87 @@ multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPa
// FIXME: tfe can't be an operand because it requires a separate
// opcode because it needs an N+1 register class dest register.
-multiclass MUBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
+multiclass MUBUF_Pseudo_Loads<string opName,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag,
bit TiedDest = 0,
bit isLds = 0> {
- def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, TiedDest, isLds>,
+ def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, load_vt, TiedDest, isLds>,
MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>;
- def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, TiedDest, isLds>,
+ def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, load_vt, TiedDest, isLds>,
MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>;
- def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, TiedDest, isLds>;
- def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, TiedDest, isLds>;
- def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, TiedDest, isLds>;
+ def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, load_vt, TiedDest, isLds>;
+ def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, load_vt, TiedDest, isLds>;
+ def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, load_vt, TiedDest, isLds>;
let DisableWQM = 1 in {
- def _OFFSET_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, TiedDest, isLds>;
- def _OFFEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, TiedDest, isLds>;
- def _IDXEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, TiedDest, isLds>;
- def _BOTHEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, TiedDest, isLds>;
+ def _OFFSET_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, load_vt, TiedDest, isLds>;
+ def _OFFEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, load_vt, TiedDest, isLds>;
+ def _IDXEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, load_vt, TiedDest, isLds>;
+ def _BOTHEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, load_vt, TiedDest, isLds>;
}
}
-multiclass MUBUF_Pseudo_Loads_Lds<string opName, RegisterClass vdataClass,
- ValueType load_vt = i32,
+multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32,
SDPatternOperator ld_nolds = null_frag,
SDPatternOperator ld_lds = null_frag> {
- defm NAME : MUBUF_Pseudo_Loads<opName, vdataClass, load_vt, ld_nolds>;
- defm _LDS : MUBUF_Pseudo_Loads<opName, vdataClass, load_vt, ld_lds, 0, 1>;
+ defm NAME : MUBUF_Pseudo_Loads<opName, load_vt, ld_nolds>;
+ defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, ld_lds, 0, 1>;
}
class MUBUF_Store_Pseudo <string opName,
int addrKind,
- RegisterClass vdataClass,
+ ValueType store_vt,
list<dag> pattern=[],
// Workaround bug bz30254
- int addrKindCopy = addrKind,
- RegisterClass vdataClassCopy = vdataClass>
+ int addrKindCopy = addrKind>
: MUBUF_Pseudo<opName,
(outs),
- getMUBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
- " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
+ getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret]>.ret,
+ " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let mayLoad = 0;
let mayStore = 1;
let maybeAtomic = 1;
- let dwords = getMUBUFDwords<vdataClass>.ret;
+ let elements = getMUBUFElements<store_vt>.ret;
}
-multiclass MUBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
+multiclass MUBUF_Pseudo_Stores<string opName,
ValueType store_vt = i32,
SDPatternOperator st = null_frag> {
- def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+ def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, store_vt,
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
MUBUFAddr64Table<0, NAME>;
- def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+ def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, store_vt,
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
MUBUFAddr64Table<1, NAME>;
- def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
- def _IDXEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
- def _BOTHEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, store_vt>;
+ def _IDXEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, store_vt>;
+ def _BOTHEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, store_vt>;
let DisableWQM = 1 in {
- def _OFFSET_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
- def _OFFEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
- def _IDXEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
- def _BOTHEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ def _OFFSET_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, store_vt>;
+ def _OFFEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, store_vt>;
+ def _IDXEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, store_vt>;
+ def _BOTHEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, store_vt>;
}
}
class MUBUF_Pseudo_Store_Lds<string opName>
: MUBUF_Pseudo<opName,
(outs),
- (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc),
- " $srsrc, $soffset$offset lds$glc$slc"> {
+ (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc, SWZ:$swz),
+ " $srsrc, $soffset$offset lds$glc$slc$swz"> {
let mayLoad = 0;
let mayStore = 1;
let maybeAtomic = 1;
@@ -686,7 +715,7 @@ multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
SDPatternOperator atomic,
- bit isFP = getIsFP<vdataType>.ret> {
+ bit isFP = isFloatType<vdataType>.ret> {
let FPAtomic = isFP in
def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
MUBUFAddr64Table <0, NAME>;
@@ -710,7 +739,7 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
SDPatternOperator atomic,
- bit isFP = getIsFP<vdataType>.ret> {
+ bit isFP = isFloatType<vdataType>.ret> {
let FPAtomic = isFP in
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set vdataType:$vdata,
@@ -748,107 +777,107 @@ multiclass MUBUF_Pseudo_Atomics <string opName,
//===----------------------------------------------------------------------===//
defm BUFFER_LOAD_FORMAT_X : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_format_x", VGPR_32
+ "buffer_load_format_x", f32
>;
defm BUFFER_LOAD_FORMAT_XY : MUBUF_Pseudo_Loads <
- "buffer_load_format_xy", VReg_64
+ "buffer_load_format_xy", v2f32
>;
defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Pseudo_Loads <
- "buffer_load_format_xyz", VReg_96
+ "buffer_load_format_xyz", v3f32
>;
defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Pseudo_Loads <
- "buffer_load_format_xyzw", VReg_128
+ "buffer_load_format_xyzw", v4f32
>;
defm BUFFER_STORE_FORMAT_X : MUBUF_Pseudo_Stores <
- "buffer_store_format_x", VGPR_32
+ "buffer_store_format_x", f32
>;
defm BUFFER_STORE_FORMAT_XY : MUBUF_Pseudo_Stores <
- "buffer_store_format_xy", VReg_64
+ "buffer_store_format_xy", v2f32
>;
defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Pseudo_Stores <
- "buffer_store_format_xyz", VReg_96
+ "buffer_store_format_xyz", v3f32
>;
defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores <
- "buffer_store_format_xyzw", VReg_128
+ "buffer_store_format_xyzw", v4f32
>;
let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads <
- "buffer_load_format_d16_x", VGPR_32
+ "buffer_load_format_d16_x", i32
>;
defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Loads <
- "buffer_load_format_d16_xy", VReg_64
+ "buffer_load_format_d16_xy", v2i32
>;
defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Loads <
- "buffer_load_format_d16_xyz", VReg_96
+ "buffer_load_format_d16_xyz", v3i32
>;
defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Loads <
- "buffer_load_format_d16_xyzw", VReg_128
+ "buffer_load_format_d16_xyzw", v4i32
>;
defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Stores <
- "buffer_store_format_d16_x", VGPR_32
+ "buffer_store_format_d16_x", i32
>;
defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Stores <
- "buffer_store_format_d16_xy", VReg_64
+ "buffer_store_format_d16_xy", v2i32
>;
defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Stores <
- "buffer_store_format_d16_xyz", VReg_96
+ "buffer_store_format_d16_xyz", v3i32
>;
defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Stores <
- "buffer_store_format_d16_xyzw", VReg_128
+ "buffer_store_format_d16_xyzw", v4i32
>;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads <
- "buffer_load_format_d16_x", VGPR_32
+ "buffer_load_format_d16_x", f16
>;
defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Pseudo_Loads <
- "buffer_load_format_d16_xy", VGPR_32
+ "buffer_load_format_d16_xy", v2f16
>;
defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Pseudo_Loads <
- "buffer_load_format_d16_xyz", VReg_64
+ "buffer_load_format_d16_xyz", v3f16
>;
defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Pseudo_Loads <
- "buffer_load_format_d16_xyzw", VReg_64
+ "buffer_load_format_d16_xyzw", v4f16
>;
defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Pseudo_Stores <
- "buffer_store_format_d16_x", VGPR_32
+ "buffer_store_format_d16_x", f16
>;
defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Pseudo_Stores <
- "buffer_store_format_d16_xy", VGPR_32
+ "buffer_store_format_d16_xy", v2f16
>;
defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Pseudo_Stores <
- "buffer_store_format_d16_xyz", VReg_64
+ "buffer_store_format_d16_xyz", v3f16
>;
defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Pseudo_Stores <
- "buffer_store_format_d16_xyzw", VReg_64
+ "buffer_store_format_d16_xyzw", v4f16
>;
} // End HasPackedD16VMem.
defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_ubyte", VGPR_32, i32
+ "buffer_load_ubyte", i32
>;
defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_sbyte", VGPR_32, i32
+ "buffer_load_sbyte", i32
>;
defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_ushort", VGPR_32, i32
+ "buffer_load_ushort", i32
>;
defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_sshort", VGPR_32, i32
+ "buffer_load_sshort", i32
>;
defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_dword", VGPR_32, i32
+ "buffer_load_dword", i32
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx2", VReg_64, v2i32
+ "buffer_load_dwordx2", v2i32
>;
defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx3", VReg_96, v3i32
+ "buffer_load_dwordx3", v3i32
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx4", VReg_128, v4i32
+ "buffer_load_dwordx4", v4i32
>;
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
@@ -867,111 +896,111 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>;
// in at least GFX8+ chips. See Bug 37653.
let SubtargetPredicate = isGFX8GFX9 in {
defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx2", VReg_64, v2i32, null_frag, 0, 1
+ "buffer_load_dwordx2", v2i32, null_frag, 0, 1
>;
defm BUFFER_LOAD_DWORDX3_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx3", VReg_96, untyped, null_frag, 0, 1
+ "buffer_load_dwordx3", v3i32, null_frag, 0, 1
>;
defm BUFFER_LOAD_DWORDX4_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx4", VReg_128, v4i32, null_frag, 0, 1
+ "buffer_load_dwordx4", v4i32, null_frag, 0, 1
>;
}
defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores <
- "buffer_store_byte", VGPR_32, i32, truncstorei8_global
+ "buffer_store_byte", i32, truncstorei8_global
>;
defm BUFFER_STORE_SHORT : MUBUF_Pseudo_Stores <
- "buffer_store_short", VGPR_32, i32, truncstorei16_global
+ "buffer_store_short", i32, truncstorei16_global
>;
defm BUFFER_STORE_DWORD : MUBUF_Pseudo_Stores <
- "buffer_store_dword", VGPR_32, i32, store_global
+ "buffer_store_dword", i32, store_global
>;
defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores <
- "buffer_store_dwordx2", VReg_64, v2i32, store_global
+ "buffer_store_dwordx2", v2i32, store_global
>;
defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores <
- "buffer_store_dwordx3", VReg_96, v3i32, store_global
+ "buffer_store_dwordx3", v3i32, store_global
>;
defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores <
- "buffer_store_dwordx4", VReg_128, v4i32, store_global
+ "buffer_store_dwordx4", v4i32, store_global
>;
defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics <
- "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
+ "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global_32
>;
defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics <
"buffer_atomic_cmpswap", VReg_64, v2i32, null_frag
>;
defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics <
- "buffer_atomic_add", VGPR_32, i32, atomic_add_global
+ "buffer_atomic_add", VGPR_32, i32, atomic_load_add_global_32
>;
defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics <
- "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
+ "buffer_atomic_sub", VGPR_32, i32, atomic_load_sub_global_32
>;
defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics <
- "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
+ "buffer_atomic_smin", VGPR_32, i32, atomic_load_min_global_32
>;
defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics <
- "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
+ "buffer_atomic_umin", VGPR_32, i32, atomic_load_umin_global_32
>;
defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics <
- "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
+ "buffer_atomic_smax", VGPR_32, i32, atomic_load_max_global_32
>;
defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics <
- "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
+ "buffer_atomic_umax", VGPR_32, i32, atomic_load_umax_global_32
>;
defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics <
- "buffer_atomic_and", VGPR_32, i32, atomic_and_global
+ "buffer_atomic_and", VGPR_32, i32, atomic_load_and_global_32
>;
defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics <
- "buffer_atomic_or", VGPR_32, i32, atomic_or_global
+ "buffer_atomic_or", VGPR_32, i32, atomic_load_or_global_32
>;
defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics <
- "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
+ "buffer_atomic_xor", VGPR_32, i32, atomic_load_xor_global_32
>;
defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics <
- "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global
+ "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global_32
>;
defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics <
- "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global
+ "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global_32
>;
defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global
+ "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global_64
>;
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag
>;
defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_add_x2", VReg_64, i64, atomic_add_global
+ "buffer_atomic_add_x2", VReg_64, i64, atomic_load_add_global_64
>;
defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_sub_x2", VReg_64, i64, atomic_sub_global
+ "buffer_atomic_sub_x2", VReg_64, i64, atomic_load_sub_global_64
>;
defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_smin_x2", VReg_64, i64, atomic_min_global
+ "buffer_atomic_smin_x2", VReg_64, i64, atomic_load_min_global_64
>;
defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_umin_x2", VReg_64, i64, atomic_umin_global
+ "buffer_atomic_umin_x2", VReg_64, i64, atomic_load_umin_global_64
>;
defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_smax_x2", VReg_64, i64, atomic_max_global
+ "buffer_atomic_smax_x2", VReg_64, i64, atomic_load_max_global_64
>;
defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_umax_x2", VReg_64, i64, atomic_umax_global
+ "buffer_atomic_umax_x2", VReg_64, i64, atomic_load_umax_global_64
>;
defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_and_x2", VReg_64, i64, atomic_and_global
+ "buffer_atomic_and_x2", VReg_64, i64, atomic_load_and_global_64
>;
defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_or_x2", VReg_64, i64, atomic_or_global
+ "buffer_atomic_or_x2", VReg_64, i64, atomic_load_or_global_64
>;
defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_xor_x2", VReg_64, i64, atomic_xor_global
+ "buffer_atomic_xor_x2", VReg_64, i64, atomic_load_xor_global_64
>;
defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global
+ "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global_64
>;
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global
+ "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64
>;
let SubtargetPredicate = isGFX8GFX9 in {
@@ -981,58 +1010,75 @@ def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
let SubtargetPredicate = isGFX6 in { // isn't on CI & VI
/*
defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">;
-defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">;
-defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin">;
-defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax">;
defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub_x2">;
-defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap_x2">;
-defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin_x2">;
-defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax_x2">;
*/
def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc",
int_amdgcn_buffer_wbinvl1_sc>;
}
+let SubtargetPredicate = isGFX6GFX7GFX10 in {
+
+defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <
+ "buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag
+>;
+defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <
+ "buffer_atomic_fmin", VGPR_32, f32, null_frag
+>;
+defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <
+ "buffer_atomic_fmax", VGPR_32, f32, null_frag
+>;
+defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
+ "buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
+>;
+defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <
+ "buffer_atomic_fmin_x2", VReg_64, f64, null_frag
+>;
+defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <
+ "buffer_atomic_fmax_x2", VReg_64, f64, null_frag
+>;
+
+}
+
let SubtargetPredicate = HasD16LoadStore in {
defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads <
- "buffer_load_ubyte_d16", VGPR_32, i32, null_frag, 1
+ "buffer_load_ubyte_d16", i32, null_frag, 1
>;
defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Pseudo_Loads <
- "buffer_load_ubyte_d16_hi", VGPR_32, i32, null_frag, 1
+ "buffer_load_ubyte_d16_hi", i32, null_frag, 1
>;
defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Pseudo_Loads <
- "buffer_load_sbyte_d16", VGPR_32, i32, null_frag, 1
+ "buffer_load_sbyte_d16", i32, null_frag, 1
>;
defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Pseudo_Loads <
- "buffer_load_sbyte_d16_hi", VGPR_32, i32, null_frag, 1
+ "buffer_load_sbyte_d16_hi", i32, null_frag, 1
>;
defm BUFFER_LOAD_SHORT_D16 : MUBUF_Pseudo_Loads <
- "buffer_load_short_d16", VGPR_32, i32, null_frag, 1
+ "buffer_load_short_d16", i32, null_frag, 1
>;
defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Pseudo_Loads <
- "buffer_load_short_d16_hi", VGPR_32, i32, null_frag, 1
+ "buffer_load_short_d16_hi", i32, null_frag, 1
>;
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Pseudo_Stores <
- "buffer_store_byte_d16_hi", VGPR_32, i32
+ "buffer_store_byte_d16_hi", i32
>;
defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Pseudo_Stores <
- "buffer_store_short_d16_hi", VGPR_32, i32
+ "buffer_store_short_d16_hi", i32
>;
defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Pseudo_Loads <
- "buffer_load_format_d16_hi_x", VGPR_32
+ "buffer_load_format_d16_hi_x", i32
>;
defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores <
- "buffer_store_format_d16_hi_x", VGPR_32
+ "buffer_store_format_d16_hi_x", i32
>;
} // End HasD16LoadStore
@@ -1043,10 +1089,10 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
let SubtargetPredicate = HasAtomicFaddInsts in {
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
- "buffer_atomic_add_f32", VGPR_32, f32, atomic_add_global
+ "buffer_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
>;
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
- "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global
+ "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
>;
} // End SubtargetPredicate = HasAtomicFaddInsts
@@ -1055,35 +1101,35 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
// MTBUF Instructions
//===----------------------------------------------------------------------===//
-defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>;
-defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>;
-defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96>;
-defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>;
-defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>;
-defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>;
-defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96>;
-defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32, 1>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64, 2>;
+defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96, 3>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128, 4>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32, 1>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64, 2>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96, 3>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128, 4>;
let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
- defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
- defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>;
- defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>;
- defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128>;
- defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>;
- defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64>;
- defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96>;
- defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>;
+ defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>;
+ defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64, 2>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96, 3>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128, 4>;
+ defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>;
+ defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64, 2>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96, 3>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128, 4>;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
- defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
- defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>;
- defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>;
- defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64>;
- defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>;
- defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32>;
- defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64>;
- defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>;
+ defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>;
+ defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32, 2>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64, 3>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64, 4>;
+ defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>;
+ defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32, 2>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64, 3>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64, 4>;
} // End HasPackedD16VMem.
let SubtargetPredicate = isGFX7Plus in {
@@ -1118,6 +1164,10 @@ def extract_dlc : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
}]>;
+def extract_swz : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
+}]>;
+
//===----------------------------------------------------------------------===//
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
@@ -1125,33 +1175,37 @@ def extract_dlc : SDNodeXForm<imm, [{
multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0)),
+ (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0)),
+ (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm)),
+ (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ timm:$auxiliary, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm)),
+ (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$auxiliary, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
}
@@ -1182,8 +1236,12 @@ let SubtargetPredicate = HasPackedD16VMem in {
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, i32, "BUFFER_LOAD_DWORD">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i16, "BUFFER_LOAD_DWORD">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f16, "BUFFER_LOAD_DWORD">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i32, "BUFFER_LOAD_DWORDX2">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i16, "BUFFER_LOAD_DWORDX2">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f16, "BUFFER_LOAD_DWORDX2">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3f32, "BUFFER_LOAD_DWORDX3">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3i32, "BUFFER_LOAD_DWORDX3">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
@@ -1196,36 +1254,40 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">;
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (extract_glc $cachepolicy),
- (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (as_i16imm $offset), (extract_glc $auxiliary),
+ (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ timm:$auxiliary, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (extract_glc $cachepolicy),
- (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (as_i16imm $offset), (extract_glc $auxiliary),
+ (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$auxiliary, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy),
- (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ $rsrc, $soffset, (as_i16imm $offset), (extract_glc $auxiliary),
+ (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
}
@@ -1256,8 +1318,12 @@ let SubtargetPredicate = HasPackedD16VMem in {
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, i32, "BUFFER_STORE_DWORD">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i16, "BUFFER_STORE_DWORD">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f16, "BUFFER_STORE_DWORD">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i32, "BUFFER_STORE_DWORDX2">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i16, "BUFFER_STORE_DWORDX2">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f16, "BUFFER_STORE_DWORDX2">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3f32, "BUFFER_STORE_DWORDX3">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3i32, "BUFFER_STORE_DWORDX3">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
@@ -1273,32 +1339,32 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0)),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm)),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0)),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm)),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
$vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -1316,6 +1382,8 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i32, "BUFFER_ATOMIC_UMAX">;
defm : BufferAtomicPatterns<SIbuffer_atomic_and, i32, "BUFFER_ATOMIC_AND">;
defm : BufferAtomicPatterns<SIbuffer_atomic_or, i32, "BUFFER_ATOMIC_OR">;
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i32, "BUFFER_ATOMIC_XOR">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i32, "BUFFER_ATOMIC_INC">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i32, "BUFFER_ATOMIC_DEC">;
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i64, "BUFFER_ATOMIC_SWAP_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_add, i64, "BUFFER_ATOMIC_ADD_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i64, "BUFFER_ATOMIC_SUB_X2">;
@@ -1326,37 +1394,39 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i64, "BUFFER_ATOMIC_UMAX_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_and, i64, "BUFFER_ATOMIC_AND_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_or, i64, "BUFFER_ATOMIC_OR_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i64, "BUFFER_ATOMIC_INC_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i64, "BUFFER_ATOMIC_DEC_X2">;
multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, 0,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, 0,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
$vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -1370,8 +1440,8 @@ defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMI
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1382,8 +1452,8 @@ def : GCNPat<
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1394,8 +1464,8 @@ def : GCNPat<
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1406,8 +1476,8 @@ def : GCNPat<
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1419,8 +1489,8 @@ def : GCNPat<
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : GCNPat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
>;
multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
@@ -1428,12 +1498,12 @@ multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Ins
def : GCNPat <
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0)
>;
def : GCNPat <
(vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
- (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
+ (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0)
>;
}
@@ -1454,8 +1524,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
def : GCNPat <
(vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
- (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
+ (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
>;
}
@@ -1478,12 +1548,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
def : GCNPat <
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset))),
- (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0)
>;
def : GCNPat <
(vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
- (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0)
+ (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0)
>;
}
@@ -1493,12 +1563,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
ValueType vt, PatFrag ld_frag> {
def : GCNPat <
(ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
- (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in)
>;
def : GCNPat <
(ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
- (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
+ (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in)
>;
}
@@ -1512,7 +1582,10 @@ defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET,
defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, zextloadi16_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>;
+
+foreach vt = Reg32Types.types in {
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
+}
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX3_OFFEN, BUFFER_LOAD_DWORDX3_OFFSET, v3i32, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
@@ -1535,16 +1608,16 @@ defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D1
multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_st> {
- // Store follows atomic op convention so address is forst
+ // Store follows atomic op convention so address is first
def : GCNPat <
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc), vt:$val),
- (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
+ (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0)
>;
def : GCNPat <
(atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
- (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
+ (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0)
>;
}
let SubtargetPredicate = isGFX6GFX7 in {
@@ -1558,8 +1631,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
def : GCNPat <
(st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)),
- (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)),
+ (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
>;
}
@@ -1573,13 +1646,13 @@ multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
def : GCNPat <
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset)),
- (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
+ (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0)
>;
def : GCNPat <
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
u16imm:$offset)),
- (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
+ (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0)
>;
}
@@ -1587,7 +1660,11 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET,
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>;
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>;
+
+foreach vt = Reg32Types.types in {
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, vt, store_private>;
+}
+
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>;
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>;
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
@@ -1613,37 +1690,41 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D
multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, 0)),
+ (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ timm:$format, timm:$auxiliary, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, imm)),
+ (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ timm:$format, timm:$auxiliary, timm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, 0)),
+ (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$format, timm:$auxiliary, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, imm)),
+ (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$format, timm:$auxiliary, timm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
}
@@ -1671,37 +1752,41 @@ let SubtargetPredicate = HasPackedD16VMem in {
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, 0),
+ (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, imm),
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ timm:$format, timm:$auxiliary, timm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, 0),
+ (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
- imm:$offset, imm:$format, imm:$cachepolicy, imm),
+ timm:$offset, timm:$format, timm:$auxiliary, timm),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (extract_swz $auxiliary))
>;
}
@@ -1957,10 +2042,9 @@ defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>;
defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>;
defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>;
defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>;
-// FIXME-GFX6-GFX7-GFX10: Add following instructions:
-//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
-//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
-//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
+defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
+defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
+defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>;
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>;
defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>;
@@ -1975,10 +2059,9 @@ defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>;
defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
-// FIXME-GFX6-GFX7-GFX10: Add following instructions:
-//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
-//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
-//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
+defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
+defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
+defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;
@@ -2353,7 +2436,7 @@ let SubtargetPredicate = HasPackedD16VMem in {
def MUBUFInfoTable : GenericTable {
let FilterClass = "MUBUF_Pseudo";
let CppTypeName = "MUBUFInfo";
- let Fields = ["Opcode", "BaseOpcode", "dwords", "has_vaddr", "has_srsrc", "has_soffset"];
+ let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"];
let PrimaryKey = ["Opcode"];
let PrimaryKeyName = "getMUBUFOpcodeHelper";
@@ -2364,7 +2447,26 @@ def getMUBUFInfoFromOpcode : SearchIndex {
let Key = ["Opcode"];
}
-def getMUBUFInfoFromBaseOpcodeAndDwords : SearchIndex {
+def getMUBUFInfoFromBaseOpcodeAndElements : SearchIndex {
let Table = MUBUFInfoTable;
- let Key = ["BaseOpcode", "dwords"];
+ let Key = ["BaseOpcode", "elements"];
+}
+
+def MTBUFInfoTable : GenericTable {
+ let FilterClass = "MTBUF_Pseudo";
+ let CppTypeName = "MTBUFInfo";
+ let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"];
+
+ let PrimaryKey = ["Opcode"];
+ let PrimaryKeyName = "getMTBUFOpcodeHelper";
+}
+
+def getMTBUFInfoFromOpcode : SearchIndex {
+ let Table = MTBUFInfoTable;
+ let Key = ["Opcode"];
+}
+
+def getMTBUFInfoFromBaseOpcodeAndElements : SearchIndex {
+ let Table = MTBUFInfoTable;
+ let Key = ["BaseOpcode", "elements"];
}
diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td
index c52eaaa3fdc5..816ec14a0e98 100644
--- a/lib/Target/AMDGPU/DSInstructions.td
+++ b/lib/Target/AMDGPU/DSInstructions.td
@@ -81,6 +81,17 @@ class DS_Real <DS_Pseudo ds> :
// DS Pseudo instructions
+class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
+: DS_Pseudo<opName,
+ (outs),
+ (ins rc:$data0, offset:$offset, gds:$gds),
+ "$data0$offset$gds"> {
+
+ let has_addr = 0;
+ let has_data1 = 0;
+ let has_vdst = 0;
+}
+
class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs),
@@ -317,13 +328,16 @@ class DS_GWS <string opName, dag ins, string asmOps>
class DS_GWS_0D <string opName>
: DS_GWS<opName,
- (ins offset:$offset, gds:$gds), "$offset gds">;
+ (ins offset:$offset, gds:$gds), "$offset gds"> {
+ let hasSideEffects = 1;
+}
class DS_GWS_1D <string opName>
: DS_GWS<opName,
(ins VGPR_32:$data0, offset:$offset, gds:$gds), "$data0$offset gds"> {
let has_gws_data0 = 1;
+ let hasSideEffects = 1;
}
class DS_VOID <string opName> : DS_Pseudo<opName,
@@ -391,11 +405,12 @@ def DS_WRITE_B8_D16_HI : DS_1A1D_NORET<"ds_write_b8_d16_hi">;
def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">;
}
+} // End has_m0_read = 0
+
let SubtargetPredicate = HasDSAddTid in {
-def DS_WRITE_ADDTID_B32 : DS_1A1D_NORET<"ds_write_addtid_b32">;
+def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">;
}
-} // End has_m0_read = 0
} // End mayLoad = 0
defm DS_MSKOR_B32 : DS_1A2D_NORET_mc<"ds_mskor_b32">;
@@ -540,13 +555,14 @@ def DS_READ_I8_D16_HI : DS_1A_RET_Tied<"ds_read_i8_d16_hi">;
def DS_READ_U16_D16 : DS_1A_RET_Tied<"ds_read_u16_d16">;
def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">;
}
+} // End has_m0_read = 0
let SubtargetPredicate = HasDSAddTid in {
-def DS_READ_ADDTID_B32 : DS_1A_RET<"ds_read_addtid_b32">;
-}
-} // End has_m0_read = 0
+def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">;
}
+} // End mayStore = 0
+
def DS_CONSUME : DS_0A_RET<"ds_consume">;
def DS_APPEND : DS_0A_RET<"ds_append">;
def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
@@ -600,13 +616,13 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
//===----------------------------------------------------------------------===//
def : GCNPat <
- (int_amdgcn_ds_swizzle i32:$src, imm:$offset16),
+ (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
(DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
>;
class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
- (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
- (inst $ptr, (as_i16imm $offset), (i1 gds))
+ (vt (frag (DS1Addr1Offset i32:$ptr, i16:$offset))),
+ (inst $ptr, offset:$offset, (i1 gds))
>;
multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
@@ -621,8 +637,8 @@ multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
}
class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
- (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
- (inst $ptr, (as_i16imm $offset), (i1 0), $in)
+ (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$in),
+ (inst $ptr, offset:$offset, (i1 0), $in)
>;
defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
@@ -636,13 +652,20 @@ defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
-defm : DSReadPat_mc <DS_READ_B32, i32, "load_local">;
+
+foreach vt = Reg32Types.types in {
+defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
+}
+
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
let AddedComplexity = 100 in {
-defm : DSReadPat_mc <DS_READ_B64, v2i32, "load_align8_local">;
+foreach vt = VReg_64.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
+}
+
defm : DSReadPat_mc <DS_READ_B128, v4i32, "load_align16_local">;
} // End AddedComplexity = 100
@@ -664,8 +687,8 @@ def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
}
class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
- (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
- (inst $ptr, $value, (as_i16imm $offset), (i1 gds))
+ (frag vt:$value, (DS1Addr1Offset i32:$ptr, i16:$offset)),
+ (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))
>;
multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
@@ -681,8 +704,8 @@ multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
// Irritatingly, atomic_store reverses the order of operands from a
// normal store.
class DSAtomicWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
- (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+ (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
+ (inst $ptr, $value, offset:$offset, (i1 0))
>;
multiclass DSAtomicWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
@@ -699,9 +722,13 @@ defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
-defm : DSWritePat_mc <DS_WRITE_B32, i32, "store_local">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local">;
+
+foreach vt = VGPR_32.RegTypes in {
+defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
+}
+
+defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local_32">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local_64">;
let OtherPredicates = [D16PreservesUnusedBits] in {
def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_local_hi16>;
@@ -736,46 +763,49 @@ def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, store_local>;
let AddedComplexity = 100 in {
-defm : DSWritePat_mc <DS_WRITE_B64, v2i32, "store_align8_local">;
+foreach vt = VReg_64.RegTypes in {
+defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
+}
+
defm : DSWritePat_mc <DS_WRITE_B128, v4i32, "store_align16_local">;
} // End AddedComplexity = 100
class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
- (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst $ptr, $value, (as_i16imm $offset), (i1 gds))
+ (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
+ (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))
>;
multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
- def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0")>;
+ def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
- !cast<PatFrag>(frag#"_local")>;
+ !cast<PatFrag>(frag#"_local_"#vt.Size)>;
}
- def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0"), 1>;
+ def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), 1>;
}
class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
- (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
- (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 gds))
+ (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap),
+ (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds))
>;
multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, ValueType vt, string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
- def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_local_m0")>;
+ def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
- !cast<PatFrag>(frag#"_local")>;
+ !cast<PatFrag>(frag#"_local_"#vt.Size)>;
}
- def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0"), 1>;
+ def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), 1>;
}
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 4ec4be9bc485..ec2e2c4e8b71 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1095,6 +1095,7 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
case 106: return createRegOperand(VCC);
case 108: return createRegOperand(TBA);
case 110: return createRegOperand(TMA);
+ case 125: return createRegOperand(SGPR_NULL);
case 126: return createRegOperand(EXEC);
case 235: return createRegOperand(SRC_SHARED_BASE);
case 236: return createRegOperand(SRC_SHARED_LIMIT);
@@ -1172,7 +1173,8 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
int TTmpIdx = getTTmpIdx(Val);
if (TTmpIdx >= 0) {
- return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx);
+ auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
+ return createSRegOperand(TTmpClsId, TTmpIdx);
} else if (Val > SGPR_MAX) {
return IsWave64 ? decodeSpecialReg64(Val)
: decodeSpecialReg32(Val);
diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td
index 0550092ce1d6..792e26d21f98 100644
--- a/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -322,46 +322,46 @@ def : EGOrCaymanPat<(i32 (atomic_cmp_swap_global_noret i32:$ptr, i32:$cmp, i32:$
defm AtomicSwapPat : AtomicPat <RAT_ATOMIC_XCHG_INT_RTN,
RAT_ATOMIC_XCHG_INT_NORET,
- atomic_swap_global_ret,
- atomic_swap_global_noret>;
+ atomic_swap_global_ret_32,
+ atomic_swap_global_noret_32>;
defm AtomicAddPat : AtomicPat <RAT_ATOMIC_ADD_RTN, RAT_ATOMIC_ADD_NORET,
- atomic_add_global_ret, atomic_add_global_noret>;
+ atomic_load_add_global_ret_32, atomic_load_add_global_noret_32>;
defm AtomicSubPat : AtomicPat <RAT_ATOMIC_SUB_RTN, RAT_ATOMIC_SUB_NORET,
- atomic_sub_global_ret, atomic_sub_global_noret>;
+ atomic_load_sub_global_ret_32, atomic_load_sub_global_noret_32>;
defm AtomicMinPat : AtomicPat <RAT_ATOMIC_MIN_INT_RTN,
RAT_ATOMIC_MIN_INT_NORET,
- atomic_min_global_ret, atomic_min_global_noret>;
+ atomic_load_min_global_ret_32, atomic_load_min_global_noret_32>;
defm AtomicUMinPat : AtomicPat <RAT_ATOMIC_MIN_UINT_RTN,
RAT_ATOMIC_MIN_UINT_NORET,
- atomic_umin_global_ret, atomic_umin_global_noret>;
+ atomic_load_umin_global_ret_32, atomic_load_umin_global_noret_32>;
defm AtomicMaxPat : AtomicPat <RAT_ATOMIC_MAX_INT_RTN,
RAT_ATOMIC_MAX_INT_NORET,
- atomic_max_global_ret, atomic_max_global_noret>;
+ atomic_load_max_global_ret_32, atomic_load_max_global_noret_32>;
defm AtomicUMaxPat : AtomicPat <RAT_ATOMIC_MAX_UINT_RTN,
RAT_ATOMIC_MAX_UINT_NORET,
- atomic_umax_global_ret, atomic_umax_global_noret>;
+ atomic_load_umax_global_ret_32, atomic_load_umax_global_noret_32>;
defm AtomicAndPat : AtomicPat <RAT_ATOMIC_AND_RTN, RAT_ATOMIC_AND_NORET,
- atomic_and_global_ret, atomic_and_global_noret>;
+ atomic_load_and_global_ret_32, atomic_load_and_global_noret_32>;
defm AtomicOrPat : AtomicPat <RAT_ATOMIC_OR_RTN, RAT_ATOMIC_OR_NORET,
- atomic_or_global_ret, atomic_or_global_noret>;
+ atomic_load_or_global_ret_32, atomic_load_or_global_noret_32>;
defm AtomicXorPat : AtomicPat <RAT_ATOMIC_XOR_RTN, RAT_ATOMIC_XOR_NORET,
- atomic_xor_global_ret, atomic_xor_global_noret>;
+ atomic_load_xor_global_ret_32, atomic_load_xor_global_noret_32>;
defm AtomicIncAddPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
RAT_ATOMIC_INC_UINT_NORET,
- atomic_add_global_ret,
- atomic_add_global_noret, 1>;
+ atomic_load_add_global_ret_32,
+ atomic_load_add_global_noret_32, 1>;
defm AtomicIncSubPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
RAT_ATOMIC_INC_UINT_NORET,
- atomic_sub_global_ret,
- atomic_sub_global_noret, -1>;
+ atomic_load_sub_global_ret_32,
+ atomic_load_sub_global_noret_32, -1>;
defm AtomicDecAddPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
RAT_ATOMIC_DEC_UINT_NORET,
- atomic_add_global_ret,
- atomic_add_global_noret, -1>;
+ atomic_load_add_global_ret_32,
+ atomic_load_add_global_noret_32, -1>;
defm AtomicDecSubPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
RAT_ATOMIC_DEC_UINT_NORET,
- atomic_sub_global_ret,
- atomic_sub_global_noret, 1>;
+ atomic_load_sub_global_ret_32,
+ atomic_load_sub_global_noret_32, 1>;
// Should be predicated on FeatureFP64
// def FMA_64 : R600_3OP <
@@ -628,37 +628,37 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE",
[(truncstorei16_local i32:$src1, i32:$src0)]
>;
def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
- [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))]
+ [(set i32:$dst, (atomic_load_add_local_32 i32:$src0, i32:$src1))]
>;
def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
- [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
+ [(set i32:$dst, (atomic_load_sub_local_32 i32:$src0, i32:$src1))]
>;
def LDS_AND_RET : R600_LDS_1A1D_RET <0x29, "LDS_AND",
- [(set i32:$dst, (atomic_load_and_local i32:$src0, i32:$src1))]
+ [(set i32:$dst, (atomic_load_and_local_32 i32:$src0, i32:$src1))]
>;
def LDS_OR_RET : R600_LDS_1A1D_RET <0x2a, "LDS_OR",
- [(set i32:$dst, (atomic_load_or_local i32:$src0, i32:$src1))]
+ [(set i32:$dst, (atomic_load_or_local_32 i32:$src0, i32:$src1))]
>;
def LDS_XOR_RET : R600_LDS_1A1D_RET <0x2b, "LDS_XOR",
- [(set i32:$dst, (atomic_load_xor_local i32:$src0, i32:$src1))]
+ [(set i32:$dst, (atomic_load_xor_local_32 i32:$src0, i32:$src1))]
>;
def LDS_MIN_INT_RET : R600_LDS_1A1D_RET <0x25, "LDS_MIN_INT",
- [(set i32:$dst, (atomic_load_min_local i32:$src0, i32:$src1))]
+ [(set i32:$dst, (atomic_load_min_local_32 i32:$src0, i32:$src1))]
>;
def LDS_MAX_INT_RET : R600_LDS_1A1D_RET <0x26, "LDS_MAX_INT",
- [(set i32:$dst, (atomic_load_max_local i32:$src0, i32:$src1))]
+ [(set i32:$dst, (atomic_load_max_local_32 i32:$src0, i32:$src1))]
>;
def LDS_MIN_UINT_RET : R600_LDS_1A1D_RET <0x27, "LDS_MIN_UINT",
- [(set i32:$dst, (atomic_load_umin_local i32:$src0, i32:$src1))]
+ [(set i32:$dst, (atomic_load_umin_local_32 i32:$src0, i32:$src1))]
>;
def LDS_MAX_UINT_RET : R600_LDS_1A1D_RET <0x28, "LDS_MAX_UINT",
- [(set i32:$dst, (atomic_load_umax_local i32:$src0, i32:$src1))]
+ [(set i32:$dst, (atomic_load_umax_local_32 i32:$src0, i32:$src1))]
>;
def LDS_WRXCHG_RET : R600_LDS_1A1D_RET <0x2d, "LDS_WRXCHG",
- [(set i32:$dst, (atomic_swap_local i32:$src0, i32:$src1))]
+ [(set i32:$dst, (atomic_swap_local_32 i32:$src0, i32:$src1))]
>;
def LDS_CMPST_RET : R600_LDS_1A2D_RET <0x30, "LDS_CMPST",
- [(set i32:$dst, (atomic_cmp_swap_local i32:$src0, i32:$src1, i32:$src2))]
+ [(set i32:$dst, (atomic_cmp_swap_local_32 i32:$src0, i32:$src1, i32:$src2))]
>;
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
[(set (i32 R600_Reg32:$dst), (load_local R600_Reg32:$src0))]
diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td
index 889f60dae920..80ee17eba141 100644
--- a/lib/Target/AMDGPU/FLATInstructions.td
+++ b/lib/Target/AMDGPU/FLATInstructions.td
@@ -270,7 +270,7 @@ multiclass FLAT_Atomic_Pseudo<
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc,
- bit isFP = getIsFP<data_vt>.ret> {
+ bit isFP = isFloatType<data_vt>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
@@ -300,7 +300,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc,
- bit isFP = getIsFP<data_vt>.ret> {
+ bit isFP = isFloatType<data_vt>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
@@ -333,7 +333,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc,
- bit isFP = getIsFP<data_vt>.ret> {
+ bit isFP = isFloatType<data_vt>.ret> {
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
@@ -564,76 +564,76 @@ defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswa
v2i64, VReg_128>;
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
- VGPR_32, i32, atomic_swap_global>;
+ VGPR_32, i32, atomic_swap_global_32>;
defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
- VReg_64, i64, atomic_swap_global>;
+ VReg_64, i64, atomic_swap_global_64>;
defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
- VGPR_32, i32, atomic_add_global>;
+ VGPR_32, i32, atomic_load_add_global_32>;
defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
- VGPR_32, i32, atomic_sub_global>;
+ VGPR_32, i32, atomic_load_sub_global_32>;
defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
- VGPR_32, i32, atomic_min_global>;
+ VGPR_32, i32, atomic_load_min_global_32>;
defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
- VGPR_32, i32, atomic_umin_global>;
+ VGPR_32, i32, atomic_load_umin_global_32>;
defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
- VGPR_32, i32, atomic_max_global>;
+ VGPR_32, i32, atomic_load_max_global_32>;
defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
- VGPR_32, i32, atomic_umax_global>;
+ VGPR_32, i32, atomic_load_umax_global_32>;
defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
- VGPR_32, i32, atomic_and_global>;
+ VGPR_32, i32, atomic_load_and_global_32>;
defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
- VGPR_32, i32, atomic_or_global>;
+ VGPR_32, i32, atomic_load_or_global_32>;
defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
- VGPR_32, i32, atomic_xor_global>;
+ VGPR_32, i32, atomic_load_xor_global_32>;
defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
- VGPR_32, i32, atomic_inc_global>;
+ VGPR_32, i32, atomic_inc_global_32>;
defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
- VGPR_32, i32, atomic_dec_global>;
+ VGPR_32, i32, atomic_dec_global_32>;
defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
- VReg_64, i64, atomic_add_global>;
+ VReg_64, i64, atomic_load_add_global_64>;
defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
- VReg_64, i64, atomic_sub_global>;
+ VReg_64, i64, atomic_load_sub_global_64>;
defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
- VReg_64, i64, atomic_min_global>;
+ VReg_64, i64, atomic_load_min_global_64>;
defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
- VReg_64, i64, atomic_umin_global>;
+ VReg_64, i64, atomic_load_umin_global_64>;
defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
- VReg_64, i64, atomic_max_global>;
+ VReg_64, i64, atomic_load_max_global_64>;
defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
- VReg_64, i64, atomic_umax_global>;
+ VReg_64, i64, atomic_load_umax_global_64>;
defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
- VReg_64, i64, atomic_and_global>;
+ VReg_64, i64, atomic_load_and_global_64>;
defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
- VReg_64, i64, atomic_or_global>;
+ VReg_64, i64, atomic_load_or_global_64>;
defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
- VReg_64, i64, atomic_xor_global>;
+ VReg_64, i64, atomic_load_xor_global_64>;
defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
- VReg_64, i64, atomic_inc_global>;
+ VReg_64, i64, atomic_inc_global_64>;
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
- VReg_64, i64, atomic_dec_global>;
+ VReg_64, i64, atomic_dec_global_64>;
} // End is_flat_global = 1
} // End SubtargetPredicate = HasFlatGlobalInsts
@@ -686,10 +686,10 @@ let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
- "global_atomic_add_f32", VGPR_32, f32, atomic_add_global
+ "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
>;
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
- "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global
+ "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
>;
} // End SubtargetPredicate = HasAtomicFaddInsts
@@ -777,8 +777,6 @@ def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, v2i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>;
@@ -787,41 +785,50 @@ def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
-def : FlatStorePat <FLAT_STORE_DWORD, store_flat, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32, VReg_64>;
+
+foreach vt = Reg32Types.types in {
+def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
+def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
+}
+
+foreach vt = VReg_64.RegTypes in {
+def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
+}
+
def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>;
def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
-def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
-
-def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
+
+def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
@@ -847,9 +854,6 @@ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
} // End OtherPredicates = [HasFlatAddressSpace]
-def atomic_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_fadd>;
-def atomic_pk_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd>;
-
let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
@@ -863,8 +867,16 @@ def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, v2i32>;
+foreach vt = Reg32Types.types in {
+def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>;
+}
+
+foreach vt = VReg_64.RegTypes in {
+def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>;
+}
+
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
@@ -875,8 +887,6 @@ def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32, VGPR_32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32, VReg_64>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>;
@@ -902,36 +912,36 @@ def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>;
def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64, VReg_64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_add_global, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_and_global, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_or_global, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
-
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
+
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
-def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global, f32>;
-def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global, v2f16>;
+def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
+def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>;
} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
@@ -1174,7 +1184,7 @@ class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
let AssemblerPredicate = isGFX10Plus;
let DecoderNamespace = "GFX10";
- let Inst{11-0} = {offset{12}, offset{10-0}};
+ let Inst{11-0} = offset{11-0};
let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue);
let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
let Inst{55} = 0;
diff --git a/lib/Target/AMDGPU/GCNDPPCombine.cpp b/lib/Target/AMDGPU/GCNDPPCombine.cpp
index e1845e2e8e87..98678873e37c 100644
--- a/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -41,6 +41,7 @@
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -155,8 +156,6 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
RegSubRegPair CombOldVGPR,
bool CombBCZ) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
- assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
- TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
auto OrigOp = OrigMI.getOpcode();
auto DPPOp = getDPPOp(OrigOp);
@@ -178,7 +177,9 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
if (OldIdx != -1) {
assert(OldIdx == NumOperands);
assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
- DPPInst.addReg(CombOldVGPR.Reg, 0, CombOldVGPR.SubReg);
+ auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
+ DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
+ CombOldVGPR.SubReg);
++NumOperands;
} else {
// TODO: this discards MAC/FMA instructions for now, let's add it later
@@ -195,6 +196,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
DPPInst.addImm(Mod0->getImm());
++NumOperands;
+ } else if (AMDGPU::getNamedOperandIdx(DPPOp,
+ AMDGPU::OpName::src0_modifiers) != -1) {
+ DPPInst.addImm(0);
+ ++NumOperands;
}
auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
assert(Src0);
@@ -214,6 +219,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
DPPInst.addImm(Mod1->getImm());
++NumOperands;
+ } else if (AMDGPU::getNamedOperandIdx(DPPOp,
+ AMDGPU::OpName::src1_modifiers) != -1) {
+ DPPInst.addImm(0);
+ ++NumOperands;
}
if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
@@ -344,6 +353,10 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
assert(DstOpnd && DstOpnd->isReg());
auto DPPMovReg = DstOpnd->getReg();
+ if (DPPMovReg.isPhysical()) {
+ LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n");
+ return false;
+ }
if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
" for all uses\n");
@@ -362,7 +375,13 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
bool BoundCtrlZero = BCZOpnd->getImm();
auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
+ auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
assert(OldOpnd && OldOpnd->isReg());
+ assert(SrcOpnd && SrcOpnd->isReg());
+ if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
+ LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n");
+ return false;
+ }
auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
// OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
@@ -408,6 +427,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
+ DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
// try to reuse previous old reg if its undefined (IMPLICIT_DEF)
if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
@@ -420,13 +440,49 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
OrigMIs.push_back(&MovMI);
bool Rollback = true;
+ SmallVector<MachineOperand*, 16> Uses;
+
for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
+ Uses.push_back(&Use);
+ }
+
+ while (!Uses.empty()) {
+ MachineOperand *Use = Uses.pop_back_val();
Rollback = true;
- auto &OrigMI = *Use.getParent();
+ auto &OrigMI = *Use->getParent();
LLVM_DEBUG(dbgs() << " try: " << OrigMI);
auto OrigOp = OrigMI.getOpcode();
+ if (OrigOp == AMDGPU::REG_SEQUENCE) {
+ Register FwdReg = OrigMI.getOperand(0).getReg();
+ unsigned FwdSubReg = 0;
+
+ if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
+ LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
+ " for all uses\n");
+ break;
+ }
+
+ unsigned OpNo, E = OrigMI.getNumOperands();
+ for (OpNo = 1; OpNo < E; OpNo += 2) {
+ if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
+ FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
+ break;
+ }
+ }
+
+ if (!FwdSubReg)
+ break;
+
+ for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
+ if (Op.getSubReg() == FwdSubReg)
+ Uses.push_back(&Op);
+ }
+ RegSeqWithOpNos[&OrigMI].push_back(OpNo);
+ continue;
+ }
+
if (TII->isVOP3(OrigOp)) {
if (!TII->hasVALU32BitEncoding(OrigOp)) {
LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n");
@@ -447,14 +503,14 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
}
LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
- if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
+ if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
OldOpndValue, CombBCZ)) {
DPPMIs.push_back(DPPInst);
Rollback = false;
}
} else if (OrigMI.isCommutable() &&
- &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+ Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
auto *BB = OrigMI.getParent();
auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
BB->insert(OrigMI, NewMI);
@@ -475,9 +531,22 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
OrigMIs.push_back(&OrigMI);
}
+ Rollback |= !Uses.empty();
+
for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
MI->eraseFromParent();
+ if (!Rollback) {
+ for (auto &S : RegSeqWithOpNos) {
+ if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
+ S.first->eraseFromParent();
+ continue;
+ }
+ while (!S.second.empty())
+ S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
+ }
+ }
+
return !Rollback;
}
@@ -498,6 +567,13 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
+ } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
+ auto Split = TII->expandMovDPP64(MI);
+ for (auto M : { Split.first, Split.second }) {
+ if (combineDPPMov(*M))
+ ++NumDPPMovsCombined;
+ }
+ Changed = true;
}
}
}
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 885239e2faed..9528aee4c50e 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -726,7 +726,7 @@ int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
if (!TRI->isVGPR(MRI, Def.getReg()))
return WaitStatesNeeded;
- unsigned Reg = Def.getReg();
+ Register Reg = Def.getReg();
auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
int DataIdx = createsVALUHazard(*MI);
return DataIdx >= 0 &&
@@ -792,7 +792,7 @@ int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
return 0;
- unsigned LaneSelectReg = LaneSelectOp->getReg();
+ Register LaneSelectReg = LaneSelectOp->getReg();
auto IsHazardFn = [TII] (MachineInstr *MI) {
return TII->isVALU(*MI);
};
@@ -891,7 +891,7 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
// Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
// which is always a VGPR and available.
auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
- unsigned Reg = Src0->getReg();
+ Register Reg = Src0->getReg();
bool IsUndef = Src0->isUndef();
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::V_MOV_B32_e32))
@@ -952,6 +952,7 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
unsigned SDSTName;
switch (MI->getOpcode()) {
case AMDGPU::V_READLANE_B32:
+ case AMDGPU::V_READLANE_B32_gfx10:
case AMDGPU::V_READFIRSTLANE_B32:
SDSTName = AMDGPU::OpName::vdst;
break;
@@ -976,7 +977,7 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
if (!SDST)
return false;
- const unsigned SDSTReg = SDST->getReg();
+ const Register SDSTReg = SDST->getReg();
auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
};
@@ -1251,14 +1252,14 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
const int MaxWaitStates = 18;
- unsigned Reg = Op.getReg();
+ Register Reg = Op.getReg();
unsigned HazardDefLatency = 0;
auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
(MachineInstr *MI) {
if (!IsMFMAFn(MI))
return false;
- unsigned DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
if (DstReg == Reg)
return false;
HazardDefLatency = std::max(HazardDefLatency,
@@ -1304,7 +1305,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
return false;
- unsigned DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
return TRI.regsOverlap(Reg, DstReg);
};
@@ -1330,14 +1331,14 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
const int MaxWaitStates = 13;
- unsigned DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
unsigned HazardDefLatency = 0;
auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
(MachineInstr *MI) {
if (!IsMFMAFn(MI))
return false;
- unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
+ Register Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
HazardDefLatency = std::max(HazardDefLatency,
TSchedModel.computeInstrLatency(MI));
return TRI.regsOverlap(Reg, DstReg);
@@ -1376,7 +1377,7 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
continue;
- unsigned Reg = Op.getReg();
+ Register Reg = Op.getReg();
const int AccVgprReadLdStWaitStates = 2;
const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
diff --git a/lib/Target/AMDGPU/GCNILPSched.cpp b/lib/Target/AMDGPU/GCNILPSched.cpp
index 1eb617640c32..39072af7d871 100644
--- a/lib/Target/AMDGPU/GCNILPSched.cpp
+++ b/lib/Target/AMDGPU/GCNILPSched.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 3525174223bd..90ab6a14ce20 100644
--- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -237,7 +237,7 @@ public:
GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
StrategyKind S)
- : BaseClass(C, llvm::make_unique<SchedStrategyStub>())
+ : BaseClass(C, std::make_unique<SchedStrategyStub>())
, Context(C)
, Strategy(S)
, UPTracker(*LIS) {
diff --git a/lib/Target/AMDGPU/GCNNSAReassign.cpp b/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 51c4c99cfb18..36a8f74150f5 100644
--- a/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -173,11 +173,11 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
bool NSA = false;
for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
- unsigned Reg = Op.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
+ Register Reg = Op.getReg();
+ if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
return NSA_Status::FIXED;
- unsigned PhysReg = VRM->getPhys(Reg);
+ Register PhysReg = VRM->getPhys(Reg);
if (!Fast) {
if (!PhysReg)
@@ -276,7 +276,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
SlotIndex MinInd, MaxInd;
for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
- unsigned Reg = Op.getReg();
+ Register Reg = Op.getReg();
LiveInterval *LI = &LIS->getInterval(Reg);
if (llvm::find(Intervals, LI) != Intervals.end()) {
// Same register used, unable to make sequential
diff --git a/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/lib/Target/AMDGPU/GCNRegBankReassign.cpp
index f0d47eaa4ed1..2927d4eb745a 100644
--- a/lib/Target/AMDGPU/GCNRegBankReassign.cpp
+++ b/lib/Target/AMDGPU/GCNRegBankReassign.cpp
@@ -230,7 +230,7 @@ private:
public:
Printable printReg(unsigned Reg, unsigned SubReg = 0) const {
return Printable([Reg, SubReg, this](raw_ostream &OS) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
OS << llvm::printReg(Reg, TRI);
return;
}
@@ -275,7 +275,7 @@ char GCNRegBankReassign::ID = 0;
char &llvm::GCNRegBankReassignID = GCNRegBankReassign::ID;
unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const {
- assert (TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Register::isPhysicalRegister(Reg));
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
unsigned Size = TRI->getRegSizeInBits(*RC);
@@ -293,7 +293,7 @@ unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const {
unsigned GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
int Bank) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (!VRM->isAssignedReg(Reg))
return 0;
@@ -364,7 +364,7 @@ unsigned GCNRegBankReassign::analyzeInst(const MachineInstr& MI,
if (!Op.isReg() || Op.isUndef())
continue;
- unsigned R = Op.getReg();
+ Register R = Op.getReg();
if (TRI->hasAGPRs(TRI->getRegClassForReg(*MRI, R)))
continue;
@@ -420,12 +420,12 @@ unsigned GCNRegBankReassign::getOperandGatherWeight(const MachineInstr& MI,
}
bool GCNRegBankReassign::isReassignable(unsigned Reg) const {
- if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
+ if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
return false;
const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
- unsigned PhysReg = VRM->getPhys(Reg);
+ Register PhysReg = VRM->getPhys(Reg);
if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
return false;
@@ -654,7 +654,7 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) {
}
std::sort(BankStalls.begin(), BankStalls.end());
- unsigned OrigReg = VRM->getPhys(C.Reg);
+ Register OrigReg = VRM->getPhys(C.Reg);
LRM->unassign(LI);
while (!BankStalls.empty()) {
BankStall BS = BankStalls.pop_back_val();
diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp
index 39460fbd8a84..d593204cba05 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -40,7 +40,7 @@ void llvm::printLivesAt(SlotIndex SI,
<< *LIS.getInstructionFromIndex(SI);
unsigned Num = 0;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- const unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ const unsigned Reg = Register::index2VirtReg(I);
if (!LIS.hasInterval(Reg))
continue;
const auto &LI = LIS.getInterval(Reg);
@@ -84,7 +84,7 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
unsigned GCNRegPressure::getRegKind(unsigned Reg,
const MachineRegisterInfo &MRI) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
const auto RC = MRI.getRegClass(Reg);
auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
return STI->isSGPRClass(RC) ?
@@ -183,7 +183,8 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void GCNRegPressure::print(raw_ostream &OS, const GCNSubtarget *ST) const {
- OS << "VGPRs: " << getVGPRNum();
+ OS << "VGPRs: " << Value[VGPR32] << ' ';
+ OS << "AGPRs: " << Value[AGPR32];
if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGPRNum()) << ')';
OS << ", SGPRs: " << getSGPRNum();
if (ST) OS << "(O" << ST->getOccupancyWithNumSGPRs(getSGPRNum()) << ')';
@@ -196,8 +197,7 @@ void GCNRegPressure::print(raw_ostream &OS, const GCNSubtarget *ST) const {
static LaneBitmask getDefRegMask(const MachineOperand &MO,
const MachineRegisterInfo &MRI) {
- assert(MO.isDef() && MO.isReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()));
+ assert(MO.isDef() && MO.isReg() && Register::isVirtualRegister(MO.getReg()));
// We don't rely on read-undef flag because in case of tentative schedule
// tracking it isn't set correctly yet. This works correctly however since
@@ -210,8 +210,7 @@ static LaneBitmask getDefRegMask(const MachineOperand &MO,
static LaneBitmask getUsedRegMask(const MachineOperand &MO,
const MachineRegisterInfo &MRI,
const LiveIntervals &LIS) {
- assert(MO.isUse() && MO.isReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()));
+ assert(MO.isUse() && MO.isReg() && Register::isVirtualRegister(MO.getReg()));
if (auto SubReg = MO.getSubReg())
return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
@@ -232,7 +231,7 @@ collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI) {
SmallVector<RegisterMaskPair, 8> Res;
for (const auto &MO : MI.operands()) {
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
if (!MO.isUse() || !MO.readsReg())
continue;
@@ -278,7 +277,7 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
const MachineRegisterInfo &MRI) {
GCNRPTracker::LiveRegSet LiveRegs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- auto Reg = TargetRegisterInfo::index2VirtReg(I);
+ auto Reg = Register::index2VirtReg(I);
if (!LIS.hasInterval(Reg))
continue;
auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
@@ -329,8 +328,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
MaxPressure = max(AtMIPressure, MaxPressure);
for (const auto &MO : MI.defs()) {
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()) ||
- MO.isDead())
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()) || MO.isDead())
continue;
auto Reg = MO.getReg();
@@ -408,8 +406,8 @@ void GCNDownwardRPTracker::advanceToNext() {
for (const auto &MO : LastTrackedMI->defs()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
auto &LiveMask = LiveRegs[Reg];
auto PrevMask = LiveMask;
@@ -500,7 +498,7 @@ void GCNRPTracker::printLiveRegs(raw_ostream &OS, const LiveRegSet& LiveRegs,
const MachineRegisterInfo &MRI) {
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
auto It = LiveRegs.find(Reg);
if (It != LiveRegs.end() && It->second.any())
OS << ' ' << printVRegOrUnit(Reg, TRI) << ':'
diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h
index e4894418b943..5862cdb04166 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/lib/Target/AMDGPU/GCNRegPressure.h
@@ -214,7 +214,7 @@ getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> LiveRegMap;
SmallVector<SlotIndex, 32> LiveIdxs, SRLiveIdxs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- auto Reg = TargetRegisterInfo::index2VirtReg(I);
+ auto Reg = Register::index2VirtReg(I);
if (!LIS.hasInterval(Reg))
continue;
auto &LI = LIS.getInterval(Reg);
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 4ea990ae490e..973491a70d3c 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -71,8 +71,8 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// the tracker, so we need to pass those function a non-const copy.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
- std::vector<unsigned> Pressure;
- std::vector<unsigned> MaxPressure;
+ Pressure.clear();
+ MaxPressure.clear();
if (AtTop)
TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
@@ -103,10 +103,10 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// the analysis to look through dependencies to find the path with the least
// register pressure.
- // We only need to update the RPDelata for instructions that increase
- // register pressure. Instructions that decrease or keep reg pressure
- // the same will be marked as RegExcess in tryCandidate() when they
- // are compared with instructions that increase the register pressure.
+ // We only need to update the RPDelta for instructions that increase register
+ // pressure. Instructions that decrease or keep reg pressure the same will be
+ // marked as RegExcess in tryCandidate() when they are compared with
+ // instructions that increase the register pressure.
if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
@@ -160,6 +160,7 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(Zone.DAG, SchedModel);
Cand.setBest(TryCand);
+ LLVM_DEBUG(traceCandidate(Cand));
}
}
}
@@ -195,6 +196,15 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
} else {
LLVM_DEBUG(traceCandidate(BotCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
+ assert(TCand.SU == BotCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
}
// Check if the top Q has a better candidate.
@@ -206,6 +216,15 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
} else {
LLVM_DEBUG(traceCandidate(TopCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
+ assert(TCand.SU == TopCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
}
// Pick best from BotCand and TopCand.
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h
index eaf3dee9ba5d..dd687a930c79 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -40,6 +40,9 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
const SIRegisterInfo *SRI,
unsigned SGPRPressure, unsigned VGPRPressure);
+ std::vector<unsigned> Pressure;
+ std::vector<unsigned> MaxPressure;
+
unsigned SGPRExcessLimit;
unsigned VGPRExcessLimit;
unsigned SGPRCriticalLimit;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 57c0ba26cc3a..1f94ab799122 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -109,7 +109,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
MCContext *Ctx) {
int64_t SignedValue = static_cast<int64_t>(Value);
- switch (static_cast<unsigned>(Fixup.getKind())) {
+ switch (Fixup.getTargetKind()) {
case AMDGPU::fixup_si_sopp_br: {
int64_t BrImm = (SignedValue - 4) / 4;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 6549a8d7d592..d352219a7a98 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -87,7 +87,7 @@ std::unique_ptr<MCObjectTargetWriter>
llvm::createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
bool HasRelocationAddend,
uint8_t ABIVersion) {
- return llvm::make_unique<AMDGPUELFObjectWriter>(Is64Bit, OSABI,
+ return std::make_unique<AMDGPUELFObjectWriter>(Is64Bit, OSABI,
HasRelocationAddend,
ABIVersion);
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 01b53432cbb7..a9888e6ed924 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -196,6 +196,10 @@ void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "slc");
}
+void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+}
+
void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "tfe");
@@ -292,35 +296,7 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
}
#endif
- unsigned AltName = AMDGPU::Reg32;
-
- if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg64;
- else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg128;
- else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SReg_96RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg96;
- else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SReg_160RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg160;
- else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg256;
- else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg512;
- else if (MRI.getRegClass(AMDGPU::VReg_1024RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SReg_1024RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::AReg_1024RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg1024;
-
- O << getRegisterName(RegNo, AltName);
+ O << getRegisterName(RegNo);
}
void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
@@ -623,9 +599,11 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
+ case AMDGPU::V_CNDMASK_B32_dpp_gfx10:
case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_CNDMASK_B32_dpp8_gfx10:
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
@@ -689,6 +667,7 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
switch (MI->getOpcode()) {
default: break;
+ case AMDGPU::V_CNDMASK_B32_sdwa_gfx10:
case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index b544d1ef3605..66b70831ff9e 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -12,7 +12,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
-#include "AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -26,8 +25,7 @@ public:
//Autogenerated by tblgen
void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo,
- unsigned AltIdx = AMDGPU::NoRegAltName);
+ static const char *getRegisterName(unsigned RegNo);
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
@@ -74,6 +72,8 @@ private:
raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 8f11433476f4..c15da8075a34 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -250,7 +250,7 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
const uint32_t Encoded_s_code_end = 0xbf9f0000;
OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
- OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
+ OS << "\t.fill 48, 4, " << Encoded_s_code_end << '\n';
return true;
}
@@ -602,7 +602,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
MCStreamer &OS = getStreamer();
OS.PushSection();
OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
- for (unsigned I = 0; I < 32; ++I)
+ for (unsigned I = 0; I < 48; ++I)
OS.EmitIntValue(Encoded_s_code_end, 4);
OS.PopSection();
return true;
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index 4735e6cb2446..f33ad950d5d9 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -26,7 +26,7 @@ def MIMGEncoding : GenericEnum {
// Represent an ISA-level opcode, independent of the encoding and the
// vdata/vaddr size.
-class MIMGBaseOpcode {
+class MIMGBaseOpcode : PredicateControl {
MIMGBaseOpcode BaseOpcode = !cast<MIMGBaseOpcode>(NAME);
bit Store = 0;
bit Atomic = 0;
@@ -291,7 +291,7 @@ multiclass MIMG_NoSampler_Src_Helper <bits<8> op, string asm,
multiclass MIMG_NoSampler <bits<8> op, string asm, bit has_d16, bit mip = 0,
bit isResInfo = 0> {
- def "" : MIMGBaseOpcode, PredicateControl {
+ def "" : MIMGBaseOpcode {
let Coordinates = !if(isResInfo, 0, 1);
let LodOrClampOrMip = mip;
let HasD16 = has_d16;
diff --git a/lib/Target/AMDGPU/R600AsmPrinter.cpp b/lib/Target/AMDGPU/R600AsmPrinter.cpp
index 3fb18862fca8..b29cd75f75cf 100644
--- a/lib/Target/AMDGPU/R600AsmPrinter.cpp
+++ b/lib/Target/AMDGPU/R600AsmPrinter.cpp
@@ -104,7 +104,7 @@ bool R600AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Functions needs to be cacheline (256B) aligned.
- MF.ensureAlignment(8);
+ MF.ensureAlignment(Align(256));
SetupMachineFunction(MF);
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 8098b81d1ea2..e4160ac11c86 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -303,7 +303,7 @@ private:
if (!MO.isReg())
continue;
if (MO.isDef()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (R600::R600_Reg128RegClass.contains(Reg))
DstMI = Reg;
else
@@ -312,7 +312,7 @@ private:
&R600::R600_Reg128RegClass);
}
if (MO.isUse()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (R600::R600_Reg128RegClass.contains(Reg))
SrcMI = Reg;
else
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index c6e8a060d8a0..fd75c41040e1 100644
--- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -135,7 +135,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
for (unsigned Chan = 0; Chan < 4; ++Chan) {
@@ -155,12 +155,12 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
unsigned Opcode = BMI->getOpcode();
// While not strictly necessary from hw point of view, we force
// all src operands of a dot4 inst to belong to the same slot.
- unsigned Src0 = BMI->getOperand(
- TII->getOperandIdx(Opcode, R600::OpName::src0))
- .getReg();
- unsigned Src1 = BMI->getOperand(
- TII->getOperandIdx(Opcode, R600::OpName::src1))
- .getReg();
+ Register Src0 =
+ BMI->getOperand(TII->getOperandIdx(Opcode, R600::OpName::src0))
+ .getReg();
+ Register Src1 =
+ BMI->getOperand(TII->getOperandIdx(Opcode, R600::OpName::src1))
+ .getReg();
(void) Src0;
(void) Src1;
if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
@@ -205,10 +205,10 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
// T0_Z = CUBE T1_X, T1_Z
// T0_W = CUBE T1_Y, T1_Z
for (unsigned Chan = 0; Chan < 4; Chan++) {
- unsigned DstReg = MI.getOperand(
- TII->getOperandIdx(MI, R600::OpName::dst)).getReg();
- unsigned Src0 = MI.getOperand(
- TII->getOperandIdx(MI, R600::OpName::src0)).getReg();
+ Register DstReg =
+ MI.getOperand(TII->getOperandIdx(MI, R600::OpName::dst)).getReg();
+ Register Src0 =
+ MI.getOperand(TII->getOperandIdx(MI, R600::OpName::src0)).getReg();
unsigned Src1 = 0;
// Determine the correct source registers
diff --git a/lib/Target/AMDGPU/R600FrameLowering.h b/lib/Target/AMDGPU/R600FrameLowering.h
index 950e238f4979..283e4d1935ea 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.h
+++ b/lib/Target/AMDGPU/R600FrameLowering.h
@@ -15,9 +15,9 @@ namespace llvm {
class R600FrameLowering : public AMDGPUFrameLowering {
public:
- R600FrameLowering(StackDirection D, unsigned StackAl, int LAO,
- unsigned TransAl = 1) :
- AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
+ R600FrameLowering(StackDirection D, Align StackAl, int LAO,
+ Align TransAl = Align::None())
+ : AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
~R600FrameLowering() override;
void emitPrologue(MachineFunction &MF,
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index f80a53ba1dc6..659458b0b752 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -41,6 +41,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -334,8 +335,8 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
case R600::MASK_WRITE: {
- unsigned maskedRegister = MI.getOperand(0).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
+ Register maskedRegister = MI.getOperand(0).getReg();
+ assert(Register::isVirtualRegister(maskedRegister));
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
break;
@@ -782,7 +783,7 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
return TrigVal;
// On R600 hw, COS/SIN input must be between -Pi and Pi.
return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
- DAG.getConstantFP(3.14159265359, DL, MVT::f32));
+ DAG.getConstantFP(numbers::pif, DL, MVT::f32));
}
SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index d9e839fe2035..04a5e93f6213 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -97,8 +97,8 @@ bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const {
for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
E = MBBI->operands_end(); I != E; ++I) {
- if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
- I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
+ if (I->isReg() && !Register::isVirtualRegister(I->getReg()) && I->isUse() &&
+ RI.isPhysRegLiveAcrossClauses(I->getReg()))
return false;
}
return true;
@@ -242,8 +242,7 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
E = MI.operands_end();
I != E; ++I) {
- if (!I->isReg() || !I->isUse() ||
- TargetRegisterInfo::isVirtualRegister(I->getReg()))
+ if (!I->isReg() || !I->isUse() || Register::isVirtualRegister(I->getReg()))
continue;
if (R600::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
@@ -294,7 +293,7 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const {
for (unsigned j = 0; j < 8; j++) {
MachineOperand &MO =
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == R600::ALU_CONST) {
MachineOperand &Sel =
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
@@ -317,7 +316,7 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const {
if (SrcIdx < 0)
break;
MachineOperand &MO = MI.getOperand(SrcIdx);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == R600::ALU_CONST) {
MachineOperand &Sel =
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
@@ -348,7 +347,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
unsigned i = 0;
for (const auto &Src : getSrcs(MI)) {
++i;
- unsigned Reg = Src.first->getReg();
+ Register Reg = Src.first->getReg();
int Index = RI.getEncodingValue(Reg) & 0xff;
if (Reg == R600::OQAP) {
Result.push_back(std::make_pair(Index, 0U));
@@ -865,7 +864,7 @@ bool R600InstrInfo::isPredicated(const MachineInstr &MI) const {
if (idx < 0)
return false;
- unsigned Reg = MI.getOperand(idx).getReg();
+ Register Reg = MI.getOperand(idx).getReg();
switch (Reg) {
default: return false;
case R600::PRED_SEL_ONE:
@@ -1038,7 +1037,7 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
- unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
+ Register OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
if (OffsetReg == R600::INDIRECT_BASE_ADDR) {
buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),
getIndirectAddrRegClass()->getRegister(Address));
@@ -1052,7 +1051,7 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
- unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
+ Register OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
if (OffsetReg == R600::INDIRECT_BASE_ADDR) {
buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
MI.getOperand(ValOpIdx).getReg());
@@ -1193,8 +1192,7 @@ int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
for (std::pair<unsigned, unsigned> LI : MRI.liveins()) {
unsigned Reg = LI.first;
- if (TargetRegisterInfo::isVirtualRegister(Reg) ||
- !IndirectRC->contains(Reg))
+ if (Register::isVirtualRegister(Reg) || !IndirectRC->contains(Reg))
continue;
unsigned RegIndex;
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp
index 34267a909b5e..7569a2629539 100644
--- a/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -183,7 +183,7 @@ isPhysicalRegCopy(MachineInstr *MI) {
if (MI->getOpcode() != R600::COPY)
return false;
- return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
+ return !Register::isVirtualRegister(MI->getOperand(1).getReg());
}
void R600SchedStrategy::releaseTopNode(SUnit *SU) {
@@ -209,7 +209,7 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
const TargetRegisterClass *RC) const {
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!Register::isVirtualRegister(Reg)) {
return RC->contains(Reg);
} else {
return MRI->getRegClass(Reg) == RC;
@@ -270,7 +270,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
}
// Is the result already member of a X/Y/Z/W class ?
- unsigned DestReg = MI->getOperand(0).getReg();
+ Register DestReg = MI->getOperand(0).getReg();
if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
return AluT_X;
@@ -357,7 +357,7 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
if (DstIndex == -1) {
return;
}
- unsigned DestReg = MI->getOperand(DstIndex).getReg();
+ Register DestReg = MI->getOperand(DstIndex).getReg();
// PressureRegister crashes if an operand is def and used in the same inst
// and we try to constraint its regclass
for (MachineInstr::mop_iterator It = MI->operands_begin(),
diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 9f1cb6582b5c..cec7f563f480 100644
--- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -58,7 +58,7 @@ using namespace llvm;
static bool isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
assert(MRI.isSSA());
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return false;
const MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
return MI && MI->isImplicitDef();
@@ -197,17 +197,17 @@ unsigned getReassignedChan(
MachineInstr *R600VectorRegMerger::RebuildVector(
RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
- unsigned Reg = RSI->Instr->getOperand(0).getReg();
+ Register Reg = RSI->Instr->getOperand(0).getReg();
MachineBasicBlock::iterator Pos = RSI->Instr;
MachineBasicBlock &MBB = *Pos->getParent();
DebugLoc DL = Pos->getDebugLoc();
- unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg();
+ Register SrcVec = BaseRSI->Instr->getOperand(0).getReg();
DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
E = RSI->RegToChan.end(); It != E; ++It) {
- unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
+ Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
unsigned SubReg = (*It).first;
unsigned Swizzle = (*It).second;
unsigned Chan = getReassignedChan(RemapChan, Swizzle);
@@ -350,7 +350,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
MachineInstr &MI = *MII;
if (MI.getOpcode() != R600::REG_SEQUENCE) {
if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
- unsigned Reg = MI.getOperand(1).getReg();
+ Register Reg = MI.getOperand(1).getReg();
for (MachineRegisterInfo::def_instr_iterator
It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
It != E; ++It) {
@@ -363,7 +363,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
RegSeqInfo RSI(*MRI, &MI);
// All uses of MI are swizzeable ?
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
if (!areAllUsesSwizzeable(Reg))
continue;
diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp
index df200baf11c1..176269f9b68c 100644
--- a/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -90,7 +90,7 @@ private:
if (DstIdx == -1) {
continue;
}
- unsigned Dst = BI->getOperand(DstIdx).getReg();
+ Register Dst = BI->getOperand(DstIdx).getReg();
if (isTrans || TII->isTransOnly(*BI)) {
Result[Dst] = R600::PS;
continue;
@@ -136,7 +136,7 @@ private:
int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
if (OperandIdx < 0)
continue;
- unsigned Src = MI.getOperand(OperandIdx).getReg();
+ Register Src = MI.getOperand(OperandIdx).getReg();
const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
if (It != PVs.end())
MI.getOperand(OperandIdx).setReg(It->second);
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp
index 685df74490fe..ef12c1d24594 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -93,7 +93,7 @@ const RegClassWeight &R600RegisterInfo::getRegClassWeight(
}
bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const {
- assert(!TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(!Register::isVirtualRegister(Reg));
switch (Reg) {
case R600::OQAP:
diff --git a/lib/Target/AMDGPU/SIAddIMGInit.cpp b/lib/Target/AMDGPU/SIAddIMGInit.cpp
index f8094e35816c..ee011286b8ff 100644
--- a/lib/Target/AMDGPU/SIAddIMGInit.cpp
+++ b/lib/Target/AMDGPU/SIAddIMGInit.cpp
@@ -129,7 +129,7 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
continue;
// Create a register for the intialization value.
- unsigned PrevDst =
+ Register PrevDst =
MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
unsigned NewDst = 0; // Final initialized value will be in here
@@ -150,7 +150,7 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
NewDst =
MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
// Initialize dword
- unsigned SubReg =
+ Register SubReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
.addImm(0);
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index a0e1ec6ac235..23ef56afc39c 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -99,7 +99,10 @@ enum : uint64_t {
FPAtomic = UINT64_C(1) << 53,
// Is a MFMA instruction.
- IsMAI = UINT64_C(1) << 54
+ IsMAI = UINT64_C(1) << 54,
+
+ // Is a DOT instruction.
+ IsDOT = UINT64_C(1) << 55
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
@@ -444,6 +447,7 @@ namespace DPP {
enum DppCtrl : unsigned {
QUAD_PERM_FIRST = 0,
+ QUAD_PERM_ID = 0xE4, // identity permutation
QUAD_PERM_LAST = 0xFF,
DPP_UNUSED1 = 0x100,
ROW_SHL0 = 0x100,
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 624953963cf4..65286751c12d 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -113,10 +113,16 @@ class SIFixSGPRCopies : public MachineFunctionPass {
public:
static char ID;
+ MachineRegisterInfo *MRI;
+ const SIRegisterInfo *TRI;
+ const SIInstrInfo *TII;
+
SIFixSGPRCopies() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
+ void processPHINode(MachineInstr &MI);
+
StringRef getPassName() const override { return "SI Fix SGPR copies"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -148,7 +154,7 @@ static bool hasVectorOperands(const MachineInstr &MI,
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (!MI.getOperand(i).isReg() ||
- !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
+ !Register::isVirtualRegister(MI.getOperand(i).getReg()))
continue;
if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg())))
@@ -161,21 +167,19 @@ static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getCopyRegClasses(const MachineInstr &Copy,
const SIRegisterInfo &TRI,
const MachineRegisterInfo &MRI) {
- unsigned DstReg = Copy.getOperand(0).getReg();
- unsigned SrcReg = Copy.getOperand(1).getReg();
+ Register DstReg = Copy.getOperand(0).getReg();
+ Register SrcReg = Copy.getOperand(1).getReg();
- const TargetRegisterClass *SrcRC =
- TargetRegisterInfo::isVirtualRegister(SrcReg) ?
- MRI.getRegClass(SrcReg) :
- TRI.getPhysRegClass(SrcReg);
+ const TargetRegisterClass *SrcRC = Register::isVirtualRegister(SrcReg)
+ ? MRI.getRegClass(SrcReg)
+ : TRI.getPhysRegClass(SrcReg);
// We don't really care about the subregister here.
// SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
- const TargetRegisterClass *DstRC =
- TargetRegisterInfo::isVirtualRegister(DstReg) ?
- MRI.getRegClass(DstReg) :
- TRI.getPhysRegClass(DstReg);
+ const TargetRegisterClass *DstRC = Register::isVirtualRegister(DstReg)
+ ? MRI.getRegClass(DstReg)
+ : TRI.getPhysRegClass(DstReg);
return std::make_pair(SrcRC, DstRC);
}
@@ -199,10 +203,10 @@ static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
const SIInstrInfo *TII) {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
auto &Src = MI.getOperand(1);
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = Src.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- !TargetRegisterInfo::isVirtualRegister(DstReg))
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = Src.getReg();
+ if (!Register::isVirtualRegister(SrcReg) ||
+ !Register::isVirtualRegister(DstReg))
return false;
for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
@@ -238,7 +242,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
MachineRegisterInfo &MRI) {
assert(MI.isRegSequence());
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
return false;
@@ -250,7 +254,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
return false;
// It is illegal to have vreg inputs to a physreg defining reg_sequence.
- if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
+ if (Register::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
return false;
const TargetRegisterClass *SrcRC, *DstRC;
@@ -281,7 +285,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
bool IsAGPR = TRI->hasAGPRs(DstRC);
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
- unsigned SrcReg = MI.getOperand(I).getReg();
+ Register SrcReg = MI.getOperand(I).getReg();
unsigned SrcSubReg = MI.getOperand(I).getSubReg();
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
@@ -291,7 +295,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
- unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
+ Register TmpReg = MRI.createVirtualRegister(NewSrcRC);
BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
TmpReg)
@@ -299,7 +303,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
if (IsAGPR) {
const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC);
- unsigned TmpAReg = MRI.createVirtualRegister(NewSrcRC);
+ Register TmpAReg = MRI.createVirtualRegister(NewSrcRC);
unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ?
AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY;
BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc),
@@ -315,52 +319,6 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
return true;
}
-static bool phiHasVGPROperands(const MachineInstr &PHI,
- const MachineRegisterInfo &MRI,
- const SIRegisterInfo *TRI,
- const SIInstrInfo *TII) {
- for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
- unsigned Reg = PHI.getOperand(i).getReg();
- if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
- return true;
- }
- return false;
-}
-
-static bool phiHasBreakDef(const MachineInstr &PHI,
- const MachineRegisterInfo &MRI,
- SmallSet<unsigned, 8> &Visited) {
- for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
- unsigned Reg = PHI.getOperand(i).getReg();
- if (Visited.count(Reg))
- continue;
-
- Visited.insert(Reg);
-
- MachineInstr *DefInstr = MRI.getVRegDef(Reg);
- switch (DefInstr->getOpcode()) {
- default:
- break;
- case AMDGPU::SI_IF_BREAK:
- return true;
- case AMDGPU::PHI:
- if (phiHasBreakDef(*DefInstr, MRI, Visited))
- return true;
- }
- }
- return false;
-}
-
-static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
- const TargetRegisterInfo &TRI) {
- for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
- E = MBB.end(); I != E; ++I) {
- if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
- return true;
- }
- return false;
-}
-
static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
const MachineInstr *MoveImm,
const SIInstrInfo *TII,
@@ -422,12 +380,6 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
return false;
}
-static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
- const TargetRegisterInfo *TRI) {
- return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
- return hasTerminatorThatModifiesExec(*MBB, *TRI); });
-}
-
// Checks if there is potential path From instruction To instruction.
// If CutOff is specified and it sits in between of that path we ignore
// a higher portion of the path and report it is not reachable.
@@ -468,6 +420,7 @@ getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) {
// executioon.
static bool hoistAndMergeSGPRInits(unsigned Reg,
const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo *TRI,
MachineDominatorTree &MDT,
const TargetInstrInfo *TII) {
// List of inits by immediate value.
@@ -482,7 +435,7 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
for (auto &MI : MRI.def_instructions(Reg)) {
MachineOperand *Imm = nullptr;
- for (auto &MO: MI.operands()) {
+ for (auto &MO : MI.operands()) {
if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
(!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
Imm = nullptr;
@@ -587,8 +540,44 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
}
}
- for (auto MI : MergedInstrs)
- MI->removeFromParent();
+ // Remove initializations that were merged into another.
+ for (auto &Init : Inits) {
+ auto &Defs = Init.second;
+ auto I = Defs.begin();
+ while (I != Defs.end()) {
+ if (MergedInstrs.count(*I)) {
+ (*I)->eraseFromParent();
+ I = Defs.erase(I);
+ } else
+ ++I;
+ }
+ }
+
+ // Try to schedule SGPR initializations as early as possible in the MBB.
+ for (auto &Init : Inits) {
+ auto &Defs = Init.second;
+ for (auto MI : Defs) {
+ auto MBB = MI->getParent();
+ MachineInstr &BoundaryMI = *getFirstNonPrologue(MBB, TII);
+ MachineBasicBlock::reverse_iterator B(BoundaryMI);
+ // Check if B should actually be a boundary. If not set the previous
+ // instruction as the boundary instead.
+ if (!TII->isBasicBlockPrologue(*B))
+ B++;
+
+ auto R = std::next(MI->getReverseIterator());
+ const unsigned Threshold = 50;
+ // Search until B or Threshold for a place to insert the initialization.
+ for (unsigned I = 0; R != B && I < Threshold; ++R, ++I)
+ if (R->readsRegister(Reg, TRI) || R->definesRegister(Reg, TRI) ||
+ TII->isSchedulingBoundary(*R, MBB, *MBB->getParent()))
+ break;
+
+ // Move to directly after R.
+ if (&*--R != MI)
+ MBB->splice(*R, MBB, MI);
+ }
+ }
if (Changed)
MRI.clearKillFlags(Reg);
@@ -598,9 +587,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const SIInstrInfo *TII = ST.getInstrInfo();
+ MRI = &MF.getRegInfo();
+ TRI = ST.getRegisterInfo();
+ TII = ST.getInstrInfo();
MDT = &getAnalysis<MachineDominatorTree>();
SmallVector<MachineInstr *, 16> Worklist;
@@ -617,22 +606,39 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
continue;
case AMDGPU::COPY:
case AMDGPU::WQM:
+ case AMDGPU::SOFT_WQM:
case AMDGPU::WWM: {
- // If the destination register is a physical register there isn't really
- // much we can do to fix this.
- if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
- continue;
+ Register DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *SrcRC, *DstRC;
- std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
+ std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
+
+ if (!Register::isVirtualRegister(DstReg)) {
+ // If the destination register is a physical register there isn't
+ // really much we can do to fix this.
+ // Some special instructions use M0 as an input. Some even only use
+ // the first lane. Insert a readfirstlane and hope for the best.
+ if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
+ Register TmpReg
+ = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ BuildMI(MBB, MI, MI.getDebugLoc(),
+ TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
+ .add(MI.getOperand(1));
+ MI.getOperand(1).setReg(TmpReg);
+ }
+
+ continue;
+ }
+
if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg)) {
TII->moveToVALU(MI, MDT);
break;
}
- MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
unsigned SMovOp;
int64_t Imm;
// If we are just copying an immediate, we can replace the copy with
@@ -651,70 +657,13 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
break;
}
case AMDGPU::PHI: {
- unsigned Reg = MI.getOperand(0).getReg();
- if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
- break;
-
- // We don't need to fix the PHI if the common dominator of the
- // two incoming blocks terminates with a uniform branch.
- bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
- if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
- MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
- MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
-
- if (!predsHasDivergentTerminator(MBB0, TRI) &&
- !predsHasDivergentTerminator(MBB1, TRI)) {
- LLVM_DEBUG(dbgs()
- << "Not fixing PHI for uniform branch: " << MI << '\n');
- break;
- }
- }
-
- // If a PHI node defines an SGPR and any of its operands are VGPRs,
- // then we need to move it to the VALU.
- //
- // Also, if a PHI node defines an SGPR and has all SGPR operands
- // we must move it to the VALU, because the SGPR operands will
- // all end up being assigned the same register, which means
- // there is a potential for a conflict if different threads take
- // different control flow paths.
- //
- // For Example:
- //
- // sgpr0 = def;
- // ...
- // sgpr1 = def;
- // ...
- // sgpr2 = PHI sgpr0, sgpr1
- // use sgpr2;
- //
- // Will Become:
- //
- // sgpr2 = def;
- // ...
- // sgpr2 = def;
- // ...
- // use sgpr2
- //
- // The one exception to this rule is when one of the operands
- // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
- // instruction. In this case, there we know the program will
- // never enter the second block (the loop) without entering
- // the first block (where the condition is computed), so there
- // is no chance for values to be over-written.
-
- SmallSet<unsigned, 8> Visited;
- if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
- LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
- TII->moveToVALU(MI, MDT);
- }
-
+ processPHINode(MI);
break;
}
case AMDGPU::REG_SEQUENCE:
if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) ||
!hasVectorOperands(MI, TRI)) {
- foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
+ foldVGPRCopyIntoRegSequence(MI, TRI, TII, *MRI);
continue;
}
@@ -724,9 +673,9 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
break;
case AMDGPU::INSERT_SUBREG: {
const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
- DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
- Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
- Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
+ DstRC = MRI->getRegClass(MI.getOperand(0).getReg());
+ Src0RC = MRI->getRegClass(MI.getOperand(1).getReg());
+ Src1RC = MRI->getRegClass(MI.getOperand(2).getReg());
if (TRI->isSGPRClass(DstRC) &&
(TRI->hasVectorRegisters(Src0RC) ||
TRI->hasVectorRegisters(Src1RC))) {
@@ -735,12 +684,159 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
break;
}
+ case AMDGPU::V_WRITELANE_B32: {
+ // Some architectures allow more than one constant bus access without
+ // SGPR restriction
+ if (ST.getConstantBusLimit(MI.getOpcode()) != 1)
+ break;
+
+ // Writelane is special in that it can use SGPR and M0 (which would
+ // normally count as using the constant bus twice - but in this case it
+ // is allowed since the lane selector doesn't count as a use of the
+ // constant bus). However, it is still required to abide by the 1 SGPR
+ // rule. Apply a fix here as we might have multiple SGPRs after
+ // legalizing VGPRs to SGPRs
+ int Src0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+ int Src1Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
+ MachineOperand &Src0 = MI.getOperand(Src0Idx);
+ MachineOperand &Src1 = MI.getOperand(Src1Idx);
+
+ // Check to see if the instruction violates the 1 SGPR rule
+ if ((Src0.isReg() && TRI->isSGPRReg(*MRI, Src0.getReg()) &&
+ Src0.getReg() != AMDGPU::M0) &&
+ (Src1.isReg() && TRI->isSGPRReg(*MRI, Src1.getReg()) &&
+ Src1.getReg() != AMDGPU::M0)) {
+
+ // Check for trivially easy constant prop into one of the operands
+ // If this is the case then perform the operation now to resolve SGPR
+ // issue. If we don't do that here we will always insert a mov to m0
+ // that can't be resolved in later operand folding pass
+ bool Resolved = false;
+ for (MachineOperand *MO : {&Src0, &Src1}) {
+ if (Register::isVirtualRegister(MO->getReg())) {
+ MachineInstr *DefMI = MRI->getVRegDef(MO->getReg());
+ if (DefMI && TII->isFoldableCopy(*DefMI)) {
+ const MachineOperand &Def = DefMI->getOperand(0);
+ if (Def.isReg() &&
+ MO->getReg() == Def.getReg() &&
+ MO->getSubReg() == Def.getSubReg()) {
+ const MachineOperand &Copied = DefMI->getOperand(1);
+ if (Copied.isImm() &&
+ TII->isInlineConstant(APInt(64, Copied.getImm(), true))) {
+ MO->ChangeToImmediate(Copied.getImm());
+ Resolved = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if (!Resolved) {
+ // Haven't managed to resolve by replacing an SGPR with an immediate
+ // Move src1 to be in M0
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+ TII->get(AMDGPU::COPY), AMDGPU::M0)
+ .add(Src1);
+ Src1.ChangeToRegister(AMDGPU::M0, false);
+ }
+ }
+ break;
+ }
}
}
}
if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
- hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII);
+ hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII);
return true;
}
+
+void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
+ unsigned numVGPRUses = 0;
+ bool AllAGPRUses = true;
+ SetVector<const MachineInstr *> worklist;
+ SmallSet<const MachineInstr *, 4> Visited;
+ worklist.insert(&MI);
+ Visited.insert(&MI);
+ while (!worklist.empty()) {
+ const MachineInstr *Instr = worklist.pop_back_val();
+ unsigned Reg = Instr->getOperand(0).getReg();
+ for (const auto &Use : MRI->use_operands(Reg)) {
+ const MachineInstr *UseMI = Use.getParent();
+ AllAGPRUses &= (UseMI->isCopy() &&
+ TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg())) ||
+ TRI->isAGPR(*MRI, Use.getReg());
+ if (UseMI->isCopy() || UseMI->isRegSequence()) {
+ if (UseMI->isCopy() &&
+ UseMI->getOperand(0).getReg().isPhysical() &&
+ !TRI->isSGPRReg(*MRI, UseMI->getOperand(0).getReg())) {
+ numVGPRUses++;
+ }
+ if (Visited.insert(UseMI).second)
+ worklist.insert(UseMI);
+
+ continue;
+ }
+
+ if (UseMI->isPHI()) {
+ const TargetRegisterClass *UseRC = MRI->getRegClass(Use.getReg());
+ if (!TRI->isSGPRReg(*MRI, Use.getReg()) &&
+ UseRC != &AMDGPU::VReg_1RegClass)
+ numVGPRUses++;
+ continue;
+ }
+
+ const TargetRegisterClass *OpRC =
+ TII->getOpRegClass(*UseMI, UseMI->getOperandNo(&Use));
+ if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
+ OpRC != &AMDGPU::VS_64RegClass) {
+ numVGPRUses++;
+ }
+ }
+ }
+
+ Register PHIRes = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
+ if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) {
+ LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI);
+ MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0));
+ }
+
+ bool hasVGPRInput = false;
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ unsigned InputReg = MI.getOperand(i).getReg();
+ MachineInstr *Def = MRI->getVRegDef(InputReg);
+ if (TRI->isVectorRegister(*MRI, InputReg)) {
+ if (Def->isCopy()) {
+ unsigned SrcReg = Def->getOperand(1).getReg();
+ const TargetRegisterClass *RC =
+ TRI->getRegClassForReg(*MRI, SrcReg);
+ if (TRI->isSGPRClass(RC))
+ continue;
+ }
+ hasVGPRInput = true;
+ break;
+ }
+ else if (Def->isCopy() &&
+ TRI->isVectorRegister(*MRI, Def->getOperand(1).getReg())) {
+ hasVGPRInput = true;
+ break;
+ }
+ }
+
+ if ((!TRI->isVectorRegister(*MRI, PHIRes) &&
+ RC0 != &AMDGPU::VReg_1RegClass) &&
+ (hasVGPRInput || numVGPRUses > 1)) {
+ LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
+ TII->moveToVALU(MI);
+ }
+ else {
+ LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
+ TII->legalizeOperands(MI, MDT);
+ }
+
+}
diff --git a/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/lib/Target/AMDGPU/SIFixupVectorISel.cpp
index 5b834c8de13a..a0119297b112 100644
--- a/lib/Target/AMDGPU/SIFixupVectorISel.cpp
+++ b/lib/Target/AMDGPU/SIFixupVectorISel.cpp
@@ -91,8 +91,7 @@ static bool findSRegBaseAndIndex(MachineOperand *Op,
Worklist.push_back(Op);
while (!Worklist.empty()) {
MachineOperand *WOp = Worklist.pop_back_val();
- if (!WOp->isReg() ||
- !TargetRegisterInfo::isVirtualRegister(WOp->getReg()))
+ if (!WOp->isReg() || !Register::isVirtualRegister(WOp->getReg()))
continue;
MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg());
switch (DefInst->getOpcode()) {
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index 74d77d328019..4eac03168760 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -142,16 +142,20 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
switch (Opc) {
case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_MAC_F16_e64:
- case AMDGPU::V_FMAC_F32_e64: {
+ case AMDGPU::V_FMAC_F32_e64:
+ case AMDGPU::V_FMAC_F16_e64: {
// Special case for mac. Since this is replaced with mad when folded into
// src2, we need to check the legality for the final instruction.
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (static_cast<int>(OpNo) == Src2Idx) {
- bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
- bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
+ bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F16_e64;
+ bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F32_e64;
unsigned Opc = IsFMA ?
- AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
+ (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) :
+ (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
const MCInstrDesc &MadDesc = TII->get(Opc);
return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
}
@@ -235,9 +239,11 @@ static bool updateOperand(FoldCandidate &Fold,
if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
MachineBasicBlock *MBB = MI->getParent();
- auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
- if (Liveness != MachineBasicBlock::LQR_Dead)
+ auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI, 16);
+ if (Liveness != MachineBasicBlock::LQR_Dead) {
+ LLVM_DEBUG(dbgs() << "Not shrinking " << MI << " due to vcc liveness\n");
return false;
+ }
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
int Op32 = Fold.getShrinkOpcode();
@@ -248,7 +254,7 @@ static bool updateOperand(FoldCandidate &Fold,
bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
- unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
+ Register NewReg0 = MRI.createVirtualRegister(Dst0RC);
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
@@ -314,12 +320,15 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
// Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
unsigned Opc = MI->getOpcode();
if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F32_e64) &&
+ Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
(int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
- bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
- bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
+ bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F16_e64;
+ bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F32_e64;
unsigned NewOpc = IsFMA ?
- AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
+ (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) :
+ (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
// Check if changing this to a v_mad_{f16, f32} instruction will allow us
// to fold the operand.
@@ -435,7 +444,8 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST)
return false;
- if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy)) {
+ if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) &&
+ TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
return true;
}
@@ -443,8 +453,8 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
if (!OpToFold.isReg())
return false;
- unsigned UseReg = OpToFold.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(UseReg))
+ Register UseReg = OpToFold.getReg();
+ if (!Register::isVirtualRegister(UseReg))
return false;
if (llvm::find_if(FoldList, [UseMI](const FoldCandidate &FC) {
@@ -481,6 +491,9 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
return false; // Can only fold splat constants
}
+ if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
+ return false;
+
FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
return true;
}
@@ -518,7 +531,7 @@ void SIFoldOperands::foldOperand(
// REG_SEQUENCE instructions, so we have to fold them into the
// uses of REG_SEQUENCE.
if (UseMI->isRegSequence()) {
- unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
+ Register RegSeqDstReg = UseMI->getOperand(0).getReg();
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
MachineRegisterInfo::use_iterator Next;
@@ -569,15 +582,18 @@ void SIFoldOperands::foldOperand(
OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
if (FoldingImmLike && UseMI->isCopy()) {
- unsigned DestReg = UseMI->getOperand(0).getReg();
- const TargetRegisterClass *DestRC
- = TargetRegisterInfo::isVirtualRegister(DestReg) ?
- MRI->getRegClass(DestReg) :
- TRI->getPhysRegClass(DestReg);
-
- unsigned SrcReg = UseMI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
- TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ Register DestReg = UseMI->getOperand(0).getReg();
+
+ // Don't fold into a copy to a physical register. Doing so would interfere
+ // with the register coalescer's logic which would avoid redundant
+ // initalizations.
+ if (DestReg.isPhysical())
+ return;
+
+ const TargetRegisterClass *DestRC = MRI->getRegClass(DestReg);
+
+ Register SrcReg = UseMI->getOperand(1).getReg();
+ if (SrcReg.isVirtual()) { // XXX - This can be an assert?
const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
MachineRegisterInfo::use_iterator NextUse;
@@ -613,10 +629,17 @@ void SIFoldOperands::foldOperand(
return;
UseMI->setDesc(TII->get(MovOp));
+ MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
+ MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
+ while (ImpOpI != ImpOpE) {
+ MachineInstr::mop_iterator Tmp = ImpOpI;
+ ImpOpI++;
+ UseMI->RemoveOperand(UseMI->getOperandNo(Tmp));
+ }
CopiesToReplace.push_back(UseMI);
} else {
if (UseMI->isCopy() && OpToFold.isReg() &&
- TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
+ Register::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
TRI->isVectorRegister(*MRI, UseMI->getOperand(1).getReg()) &&
!UseMI->getOperand(1).getSubReg()) {
@@ -677,6 +700,9 @@ void SIFoldOperands::foldOperand(
// =>
// %sgpr1 = COPY %sgpr0
UseMI->setDesc(TII->get(AMDGPU::COPY));
+ UseMI->getOperand(1).setReg(OpToFold.getReg());
+ UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
+ UseMI->getOperand(1).setIsKill(false);
UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
return;
}
@@ -708,7 +734,7 @@ void SIFoldOperands::foldOperand(
// Split 64-bit constants into 32-bits for folding.
if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
- unsigned UseReg = UseOp.getReg();
+ Register UseReg = UseOp.getReg();
const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
@@ -810,7 +836,7 @@ static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI,
if (Op.isReg()) {
// If this has a subregister, it obviously is a register source.
if (Op.getSubReg() != AMDGPU::NoSubRegister ||
- !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
+ !Register::isVirtualRegister(Op.getReg()))
return &Op;
MachineInstr *Def = MRI.getVRegDef(Op.getReg());
@@ -1073,6 +1099,13 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
Copy->addImplicitDefUseOperands(*MF);
for (FoldCandidate &Fold : FoldList) {
+ if (Fold.isReg() && Register::isVirtualRegister(Fold.OpToFold->getReg())) {
+ Register Reg = Fold.OpToFold->getReg();
+ MachineInstr *DefMI = Fold.OpToFold->getParent();
+ if (DefMI->readsRegister(AMDGPU::EXEC, TRI) &&
+ execMayBeModifiedBeforeUse(*MRI, Reg, *DefMI, *Fold.UseMI))
+ continue;
+ }
if (updateOperand(Fold, *TII, *TRI, *ST)) {
// Clear kill flags.
if (Fold.isReg()) {
@@ -1316,6 +1349,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock *MBB : depth_first(&MF)) {
MachineBasicBlock::iterator I, Next;
+
+ MachineOperand *CurrentKnownM0Val = nullptr;
for (I = MBB->begin(); I != MBB->end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
@@ -1328,6 +1363,25 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
!tryFoldOMod(MI))
tryFoldClamp(MI);
+
+ // Saw an unknown clobber of m0, so we no longer know what it is.
+ if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
+ CurrentKnownM0Val = nullptr;
+ continue;
+ }
+
+ // Specially track simple redefs of m0 to the same value in a block, so we
+ // can erase the later ones.
+ if (MI.getOperand(0).getReg() == AMDGPU::M0) {
+ MachineOperand &NewM0Val = MI.getOperand(1);
+ if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
+ MI.eraseFromParent();
+ continue;
+ }
+
+ // We aren't tracking other physical registers
+ CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ?
+ nullptr : &NewM0Val;
continue;
}
@@ -1339,8 +1393,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (!FoldingImm && !OpToFold.isReg())
continue;
- if (OpToFold.isReg() &&
- !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()))
+ if (OpToFold.isReg() && !Register::isVirtualRegister(OpToFold.getReg()))
continue;
// Prevent folding operands backwards in the function. For example,
@@ -1350,8 +1403,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// ...
// %vgpr0 = V_MOV_B32_e32 1, implicit %exec
MachineOperand &Dst = MI.getOperand(0);
- if (Dst.isReg() &&
- !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
+ if (Dst.isReg() && !Register::isVirtualRegister(Dst.getReg()))
continue;
foldInstOperand(MI, OpToFold);
diff --git a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index f3c9ad63a80a..26bae5734df7 100644
--- a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -120,7 +120,7 @@ static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
return false;
// If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it.
for (const MachineOperand &ResMO : MI.defs()) {
- unsigned ResReg = ResMO.getReg();
+ Register ResReg = ResMO.getReg();
for (const MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || MO.isDef())
continue;
@@ -144,7 +144,7 @@ static unsigned getMopState(const MachineOperand &MO) {
S |= RegState::Kill;
if (MO.isEarlyClobber())
S |= RegState::EarlyClobber;
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && MO.isRenamable())
+ if (Register::isPhysicalRegister(MO.getReg()) && MO.isRenamable())
S |= RegState::Renamable;
return S;
}
@@ -152,7 +152,7 @@ static unsigned getMopState(const MachineOperand &MO) {
template <typename Callable>
void SIFormMemoryClauses::forAllLanes(unsigned Reg, LaneBitmask LaneMask,
Callable Func) const {
- if (LaneMask.all() || TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ if (LaneMask.all() || Register::isPhysicalRegister(Reg) ||
LaneMask == MRI->getMaxLaneMaskForVReg(Reg)) {
Func(0);
return;
@@ -216,7 +216,7 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI,
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// If it is tied we will need to write same register as we read.
if (MO.isTied())
@@ -227,7 +227,7 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI,
if (Conflict == Map.end())
continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return false;
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
@@ -265,13 +265,13 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- LaneBitmask Mask = TargetRegisterInfo::isVirtualRegister(Reg) ?
- TRI->getSubRegIndexLaneMask(MO.getSubReg()) :
- LaneBitmask::getAll();
+ LaneBitmask Mask = Register::isVirtualRegister(Reg)
+ ? TRI->getSubRegIndexLaneMask(MO.getSubReg())
+ : LaneBitmask::getAll();
RegUse &Map = MO.isDef() ? Defs : Uses;
auto Loc = Map.find(Reg);
@@ -389,7 +389,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
for (auto &&R : Defs) {
unsigned Reg = R.first;
Uses.erase(Reg);
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
LIS->removeInterval(Reg);
LIS->createAndComputeVirtRegInterval(Reg);
@@ -397,7 +397,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
for (auto &&R : Uses) {
unsigned Reg = R.first;
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
LIS->removeInterval(Reg);
LIS->createAndComputeVirtRegInterval(Reg);
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index feab6bed2603..ed07ed100a19 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -112,6 +112,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
.addImm(0) // slc
.addImm(0) // tfe
.addImm(0) // dlc
+ .addImm(0) // swz
.addMemOperand(MMO);
return;
}
@@ -132,6 +133,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
.addImm(0) // slc
.addImm(0) // tfe
.addImm(0) // dlc
+ .addImm(0) // swz
.addMemOperand(MMO);
}
@@ -157,6 +159,7 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
.addImm(0) // slc
.addImm(0) // tfe
.addImm(0) // dlc
+ .addImm(0) // swz
.addMemOperand(MMO);
return;
}
@@ -177,6 +180,7 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
.addImm(0) // slc
.addImm(0) // tfe
.addImm(0) // dlc
+ .addImm(0) // swz
.addMemOperand(MMO);
}
@@ -202,15 +206,15 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
DebugLoc DL;
MachineBasicBlock::iterator I = MBB.begin();
- unsigned FlatScratchInitReg
- = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
+ Register FlatScratchInitReg =
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
MachineRegisterInfo &MRI = MF.getRegInfo();
MRI.addLiveIn(FlatScratchInitReg);
MBB.addLiveIn(FlatScratchInitReg);
- unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
- unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
+ Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
+ Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
@@ -424,8 +428,8 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
// We need to insert initialization of the scratch resource descriptor.
- unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
- AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+ Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
+ AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
if (ST.isAmdHsaOrMesa(F)) {
@@ -539,9 +543,9 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
if (ST.isAmdPalOS()) {
// The pointer to the GIT is formed from the offset passed in and either
// the amdgpu-git-ptr-high function attribute or the top part of the PC
- unsigned RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- unsigned RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
- unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+ Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+ Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
@@ -601,14 +605,14 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
assert(!ST.isAmdHsaOrMesa(Fn));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
- unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
- unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+ Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+ Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
// Use relocations to get the pointer, and setup the other bits manually.
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
if (MFI->hasImplicitBufferPtr()) {
- unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+ Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
@@ -640,8 +644,8 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
}
} else {
- unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+ Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
BuildMI(MBB, I, DL, SMovB32, Rsrc0)
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
@@ -669,6 +673,8 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
case TargetStackID::NoAlloc:
case TargetStackID::SGPRSpill:
return true;
+ case TargetStackID::SVEVector:
+ return false;
}
llvm_unreachable("Invalid TargetStackID::Value");
}
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h
index c644f4726e2c..d9970fd6b4b8 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -20,9 +20,9 @@ class GCNSubtarget;
class SIFrameLowering final : public AMDGPUFrameLowering {
public:
- SIFrameLowering(StackDirection D, unsigned StackAl, int LAO,
- unsigned TransAl = 1) :
- AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
+ SIFrameLowering(StackDirection D, Align StackAl, int LAO,
+ Align TransAl = Align::None())
+ : AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
~SIFrameLowering() override = default;
void emitEntryFunctionPrologue(MachineFunction &MF,
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index db0782e2bf3e..56ebf9c06741 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -20,11 +20,11 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -35,6 +35,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/DAGCombine.h"
@@ -44,6 +45,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -115,7 +117,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
- addRegisterClass(MVT::i32, &AMDGPU::SReg_32_XM0RegClass);
+ addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
@@ -125,10 +127,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
- addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass);
- addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
+ addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);
- addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
@@ -141,12 +143,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
if (Subtarget->has16BitInsts()) {
- addRegisterClass(MVT::i16, &AMDGPU::SReg_32_XM0RegClass);
- addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass);
+ addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
+ addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
// Unless there are also VOP3P operations, not operations are really legal.
- addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32_XM0RegClass);
- addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass);
+ addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass);
+ addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
}
@@ -178,6 +180,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v32i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
+ setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
@@ -215,31 +218,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v3i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
-
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
-
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
-
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
@@ -653,6 +635,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FADD, MVT::v4f16, Custom);
setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
+ setOperationAction(ISD::FMA, MVT::v4f16, Custom);
setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom);
@@ -687,6 +670,33 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
}
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
+
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2i16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4i16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
+
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v4i16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::f16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
+
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::ADDCARRY);
setTargetDAGCombine(ISD::SUB);
@@ -768,19 +778,22 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // TODO: Consider splitting all arguments into 32-bit pieces.
- if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ if (CC == CallingConv::AMDGPU_KERNEL)
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+
+ if (VT.isVector()) {
EVT ScalarVT = VT.getScalarType();
unsigned Size = ScalarVT.getSizeInBits();
if (Size == 32)
return ScalarVT.getSimpleVT();
- if (Size == 64)
+ if (Size > 32)
return MVT::i32;
if (Size == 16 && Subtarget->has16BitInsts())
return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
- }
+ } else if (VT.getSizeInBits() > 32)
+ return MVT::i32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
@@ -788,7 +801,10 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ if (CC == CallingConv::AMDGPU_KERNEL)
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+
+ if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
EVT ScalarVT = VT.getScalarType();
unsigned Size = ScalarVT.getSizeInBits();
@@ -796,12 +812,13 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
if (Size == 32)
return NumElts;
- if (Size == 64)
- return 2 * NumElts;
+ if (Size > 32)
+ return NumElts * ((Size + 31) / 32);
if (Size == 16 && Subtarget->has16BitInsts())
- return (VT.getVectorNumElements() + 1) / 2;
- }
+ return (NumElts + 1) / 2;
+ } else if (VT.getSizeInBits() > 32)
+ return (VT.getSizeInBits() + 31) / 32;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
@@ -821,10 +838,10 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
return NumIntermediates;
}
- if (Size == 64) {
+ if (Size > 32) {
RegisterVT = MVT::i32;
IntermediateVT = RegisterVT;
- NumIntermediates = 2 * NumElts;
+ NumIntermediates = NumElts * ((Size + 31) / 32);
return NumIntermediates;
}
@@ -901,7 +918,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = MFI->getImagePSV(
*MF.getSubtarget<GCNSubtarget>().getInstrInfo(),
CI.getArgOperand(RsrcIntr->RsrcArg));
- Info.align = 0;
+ Info.align.reset();
} else {
Info.ptrVal = MFI->getBufferPSV(
*MF.getSubtarget<GCNSubtarget>().getInstrInfo(),
@@ -947,7 +964,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
- Info.align = 0;
+ Info.align.reset();
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
@@ -964,7 +981,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = MFI->getBufferPSV(
*MF.getSubtarget<GCNSubtarget>().getInstrInfo(),
CI.getArgOperand(1));
- Info.align = 0;
+ Info.align.reset();
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
@@ -978,7 +995,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(CI.getOperand(0)->getType()
->getPointerElementType());
Info.ptrVal = CI.getOperand(0);
- Info.align = 0;
+ Info.align.reset();
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
return true;
@@ -988,7 +1005,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
- Info.align = 0;
+ Info.align.reset();
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
@@ -1012,7 +1029,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// This is an abstract access, but we need to specify a type and size.
Info.memVT = MVT::i32;
Info.size = 4;
- Info.align = 4;
+ Info.align = Align(4);
Info.flags = MachineMemOperand::MOStore;
if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
@@ -1215,21 +1232,12 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
return true;
}
-bool SITargetLowering::allowsMisalignedMemoryAccesses(
- EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
- bool *IsFast) const {
+bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
+ unsigned Size, unsigned AddrSpace, unsigned Align,
+ MachineMemOperand::Flags Flags, bool *IsFast) const {
if (IsFast)
*IsFast = false;
- // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
- // which isn't a simple VT.
- // Until MVT is extended to handle this, simply check for the size and
- // rely on the condition below: allow accesses if the size is a multiple of 4.
- if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
- VT.getStoreSize() > 16)) {
- return false;
- }
-
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
@@ -1268,7 +1276,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
}
// Smaller than dword value must be aligned.
- if (VT.bitsLT(MVT::i32))
+ if (Size < 32)
return false;
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
@@ -1277,7 +1285,26 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
if (IsFast)
*IsFast = true;
- return VT.bitsGT(MVT::i32) && Align % 4 == 0;
+ return Size >= 32 && Align >= 4;
+}
+
+bool SITargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+ bool *IsFast) const {
+ if (IsFast)
+ *IsFast = false;
+
+ // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
+ // which isn't a simple VT.
+ // Until MVT is extended to handle this, simply check for the size and
+ // rely on the condition below: allow accesses if the size is a multiple of 4.
+ if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
+ VT.getStoreSize() > 16)) {
+ return false;
+ }
+
+ return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
+ Align, Flags, IsFast);
}
EVT SITargetLowering::getOptimalMemOpType(
@@ -1336,9 +1363,9 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
TargetLoweringBase::LegalizeTypeAction
SITargetLowering::getPreferredVectorAction(MVT VT) const {
- if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
- return TypeSplitVector;
-
+ int NumElts = VT.getVectorNumElements();
+ if (NumElts != 1 && VT.getScalarType().bitsLE(MVT::i16))
+ return VT.isPow2VectorType() ? TypeSplitVector : TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
@@ -1562,7 +1589,8 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
// entire split argument.
if (Arg->Flags.isSplit()) {
while (!Arg->Flags.isSplitEnd()) {
- assert(!Arg->VT.isVector() &&
+ assert((!Arg->VT.isVector() ||
+ Arg->VT.getScalarSizeInBits() == 16) &&
"unexpected vector split in ps argument type");
if (!SkipArg)
Splits.push_back(*Arg);
@@ -1589,29 +1617,32 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
}
// Allocate special inputs passed in VGPRs.
-static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+void SITargetLowering::allocateSpecialEntryInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const {
+ const LLT S32 = LLT::scalar(32);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
if (Info.hasWorkItemIDX()) {
- unsigned Reg = AMDGPU::VGPR0;
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ Register Reg = AMDGPU::VGPR0;
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
CCInfo.AllocateReg(Reg);
Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
}
if (Info.hasWorkItemIDY()) {
- unsigned Reg = AMDGPU::VGPR1;
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ Register Reg = AMDGPU::VGPR1;
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
CCInfo.AllocateReg(Reg);
Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
}
if (Info.hasWorkItemIDZ()) {
- unsigned Reg = AMDGPU::VGPR2;
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ Register Reg = AMDGPU::VGPR2;
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
CCInfo.AllocateReg(Reg);
Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
@@ -1642,7 +1673,8 @@ static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask = ~0u,
assert(Reg != AMDGPU::NoRegister);
MachineFunction &MF = CCInfo.getMachineFunction();
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ Register LiveInVReg = MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ MF.getRegInfo().setType(LiveInVReg, LLT::scalar(32));
return ArgDescriptor::createRegister(Reg, Mask);
}
@@ -1671,10 +1703,10 @@ static ArgDescriptor allocateSGPR64Input(CCState &CCInfo) {
return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16);
}
-static void allocateSpecialInputVGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+void SITargetLowering::allocateSpecialInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const {
const unsigned Mask = 0x3ff;
ArgDescriptor Arg;
@@ -1692,10 +1724,11 @@ static void allocateSpecialInputVGPRs(CCState &CCInfo,
Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo, Mask << 20, Arg));
}
-static void allocateSpecialInputSGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+void SITargetLowering::allocateSpecialInputSGPRs(
+ CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();
// TODO: Unify handling with private memory pointers.
@@ -1728,10 +1761,10 @@ static void allocateSpecialInputSGPRs(CCState &CCInfo,
}
// Allocate special inputs passed in user SGPRs.
-static void allocateHSAUserSGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const {
if (Info.hasImplicitBufferPtr()) {
unsigned ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
@@ -1758,9 +1791,12 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
}
if (Info.hasKernargSegmentPtr()) {
- unsigned InputPtrReg = Info.addKernargSegmentPtr(TRI);
- MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
CCInfo.AllocateReg(InputPtrReg);
+
+ Register VReg = MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
+ MRI.setType(VReg, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
}
if (Info.hasDispatchID()) {
@@ -1780,32 +1816,32 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
}
// Allocate special input registers that are initialized per-wave.
-static void allocateSystemSGPRs(CCState &CCInfo,
- MachineFunction &MF,
- SIMachineFunctionInfo &Info,
- CallingConv::ID CallConv,
- bool IsShader) {
+void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ SIMachineFunctionInfo &Info,
+ CallingConv::ID CallConv,
+ bool IsShader) const {
if (Info.hasWorkGroupIDX()) {
unsigned Reg = Info.addWorkGroupIDX();
- MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
+ MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupIDY()) {
unsigned Reg = Info.addWorkGroupIDY();
- MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
+ MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupIDZ()) {
unsigned Reg = Info.addWorkGroupIDZ();
- MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
+ MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupInfo()) {
unsigned Reg = Info.addWorkGroupInfo();
- MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
+ MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
@@ -1860,7 +1896,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
// resource. For the Code Object V2 ABI, this will be the first 4 user
// SGPR inputs. We can reserve those and use them directly.
- unsigned PrivateSegmentBufferReg =
+ Register PrivateSegmentBufferReg =
Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
Info.setScratchRSrcReg(PrivateSegmentBufferReg);
} else {
@@ -1921,7 +1957,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
//
// FIXME: Should not do this if inline asm is reading/writing these
// registers.
- unsigned PreloadedSP = Info.getPreloadedReg(
+ Register PreloadedSP = Info.getPreloadedReg(
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
Info.setStackPtrOffsetReg(PreloadedSP);
@@ -1971,7 +2007,7 @@ void SITargetLowering::insertCopiesSplitCSR(
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
@@ -2134,7 +2170,7 @@ SDValue SITargetLowering::LowerFormalArguments(
assert(VA.isRegLoc() && "Parameter must be in a register!");
- unsigned Reg = VA.getLocReg();
+ Register Reg = VA.getLocReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
EVT ValVT = VA.getValVT();
@@ -2652,6 +2688,15 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
bool IsThisReturn = false;
MachineFunction &MF = DAG.getMachineFunction();
+ if (Callee.isUndef() || isNullConstant(Callee)) {
+ if (!CLI.IsTailCall) {
+ for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I)
+ InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT));
+ }
+
+ return Chain;
+ }
+
if (IsVarArg) {
return lowerUnhandledCall(CLI, InVals,
"unsupported call to variadic function ");
@@ -2782,7 +2827,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
int32_t Offset = LocMemOffset;
SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT);
- unsigned Align = 0;
+ MaybeAlign Alignment;
if (IsTailCall) {
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
@@ -2790,8 +2835,10 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
Flags.getByValSize() : VA.getValVT().getStoreSize();
// FIXME: We can have better than the minimum byval required alignment.
- Align = Flags.isByVal() ? Flags.getByValAlign() :
- MinAlign(Subtarget->getStackAlignment(), Offset);
+ Alignment =
+ Flags.isByVal()
+ ? MaybeAlign(Flags.getByValAlign())
+ : commonAlignment(Subtarget->getStackAlignment(), Offset);
Offset = Offset + FPDiff;
int FI = MFI.CreateFixedObject(OpSize, Offset, true);
@@ -2810,7 +2857,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
} else {
DstAddr = PtrOff;
DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
- Align = MinAlign(Subtarget->getStackAlignment(), LocMemOffset);
+ Alignment =
+ commonAlignment(Subtarget->getStackAlignment(), LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
@@ -2825,7 +2873,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
MemOpChains.push_back(Cpy);
} else {
- SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Align);
+ SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo,
+ Alignment ? Alignment->value() : 0);
MemOpChains.push_back(Store);
}
}
@@ -2937,9 +2986,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
IsThisReturn ? OutVals[0] : SDValue());
}
-unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
- unsigned Reg = StringSwitch<unsigned>(RegName)
+Register SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const {
+ Register Reg = StringSwitch<Register>(RegName)
.Case("m0", AMDGPU::M0)
.Case("exec", AMDGPU::EXEC)
.Case("exec_lo", AMDGPU::EXEC_LO)
@@ -2947,7 +2996,7 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
.Case("flat_scratch", AMDGPU::FLAT_SCR)
.Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
.Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
- .Default(AMDGPU::NoRegister);
+ .Default(Register());
if (Reg == AMDGPU::NoRegister) {
report_fatal_error(Twine("invalid register name \""
@@ -3055,6 +3104,20 @@ splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop) {
return std::make_pair(LoopBB, RemainderBB);
}
+/// Insert \p MI into a BUNDLE with an S_WAITCNT 0 immediately following it.
+void SITargetLowering::bundleInstWithWaitcnt(MachineInstr &MI) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+ auto I = MI.getIterator();
+ auto E = std::next(I);
+
+ BuildMI(*MBB, E, MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+
+ MIBundleBuilder Bundler(*MBB, I, E);
+ finalizeBundle(*MBB, Bundler.begin());
+}
+
MachineBasicBlock *
SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -3066,12 +3129,13 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
MachineBasicBlock *RemainderBB;
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
- MachineBasicBlock::iterator Prev = std::prev(MI.getIterator());
+ // Apparently kill flags are only valid if the def is in the same block?
+ if (MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0))
+ Src->setIsKill(false);
std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true);
MachineBasicBlock::iterator I = LoopBB->end();
- MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
const unsigned EncodedReg = AMDGPU::Hwreg::encodeHwreg(
AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1);
@@ -3081,23 +3145,9 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
.addImm(0)
.addImm(EncodedReg);
- // This is a pain, but we're not allowed to have physical register live-ins
- // yet. Insert a pair of copies if the VGPR0 hack is necessary.
- if (Src && TargetRegisterInfo::isPhysicalRegister(Src->getReg())) {
- unsigned Data0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(*BB, std::next(Prev), DL, TII->get(AMDGPU::COPY), Data0)
- .add(*Src);
+ bundleInstWithWaitcnt(MI);
- BuildMI(*LoopBB, LoopBB->begin(), DL, TII->get(AMDGPU::COPY), Src->getReg())
- .addReg(Data0);
-
- MRI.setSimpleHint(Data0, Src->getReg());
- }
-
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_WAITCNT))
- .addImm(0);
-
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
// Load and check TRAP_STS.MEM_VIOL
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), Reg)
@@ -3138,10 +3188,10 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
MachineBasicBlock::iterator I = LoopBB.begin();
const TargetRegisterClass *BoolRC = TRI->getBoolRC();
- unsigned PhiExec = MRI.createVirtualRegister(BoolRC);
- unsigned NewExec = MRI.createVirtualRegister(BoolRC);
- unsigned CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned CondReg = MRI.createVirtualRegister(BoolRC);
+ Register PhiExec = MRI.createVirtualRegister(BoolRC);
+ Register NewExec = MRI.createVirtualRegister(BoolRC);
+ Register CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register CondReg = MRI.createVirtualRegister(BoolRC);
BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiReg)
.addReg(InitReg)
@@ -3240,9 +3290,9 @@ static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII,
MachineBasicBlock::iterator I(&MI);
const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
- unsigned TmpExec = MRI.createVirtualRegister(BoolXExecRC);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
+ Register TmpExec = MRI.createVirtualRegister(BoolXExecRC);
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
@@ -3315,7 +3365,7 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII,
SetOn->getOperand(3).setIsUndef();
} else {
- unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp)
.add(*Idx)
.addImm(Offset);
@@ -3351,8 +3401,8 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg();
int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
const TargetRegisterClass *VecRC = MRI.getRegClass(SrcReg);
@@ -3390,8 +3440,8 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
- unsigned PhiReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned InitReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register PhiReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register InitReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg);
@@ -3442,7 +3492,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned Dst = MI.getOperand(0).getReg();
+ Register Dst = MI.getOperand(0).getReg();
const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src);
const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val);
@@ -3505,7 +3555,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
const DebugLoc &DL = MI.getDebugLoc();
- unsigned PhiReg = MRI.createVirtualRegister(VecRC);
+ Register PhiReg = MRI.createVirtualRegister(VecRC);
auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg,
Offset, UseGPRIdxMode, false);
@@ -3564,22 +3614,22 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MachineOperand &Src0 = MI.getOperand(1);
MachineOperand &Src1 = MI.getOperand(2);
- unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(MI, MRI,
Src0, BoolRC, AMDGPU::sub0,
- &AMDGPU::SReg_32_XM0RegClass);
+ &AMDGPU::SReg_32RegClass);
MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(MI, MRI,
Src0, BoolRC, AMDGPU::sub1,
- &AMDGPU::SReg_32_XM0RegClass);
+ &AMDGPU::SReg_32RegClass);
MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(MI, MRI,
Src1, BoolRC, AMDGPU::sub0,
- &AMDGPU::SReg_32_XM0RegClass);
+ &AMDGPU::SReg_32RegClass);
MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(MI, MRI,
Src1, BoolRC, AMDGPU::sub1,
- &AMDGPU::SReg_32_XM0RegClass);
+ &AMDGPU::SReg_32RegClass);
bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
@@ -3632,8 +3682,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
// S_CMOV_B64 exec, -1
MachineInstr *FirstMI = &*BB->begin();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned InputReg = MI.getOperand(0).getReg();
- unsigned CountReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register InputReg = MI.getOperand(0).getReg();
+ Register CountReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
bool Found = false;
// Move the COPY of the input reg to the beginning, so that we can use it.
@@ -3707,16 +3757,16 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Src0 = MI.getOperand(1).getReg();
- unsigned Src1 = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
const DebugLoc &DL = MI.getDebugLoc();
- unsigned SrcCond = MI.getOperand(3).getReg();
+ Register SrcCond = MI.getOperand(3).getReg();
- unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
- unsigned SrcCondCopy = MRI.createVirtualRegister(CondRC);
+ Register SrcCondCopy = MRI.createVirtualRegister(CondRC);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
.addReg(SrcCond);
@@ -3814,8 +3864,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::DS_GWS_SEMA_P:
case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
case AMDGPU::DS_GWS_BARRIER:
- if (getSubtarget()->hasGWSAutoReplay())
+ // A s_waitcnt 0 is required to be the instruction immediately following.
+ if (getSubtarget()->hasGWSAutoReplay()) {
+ bundleInstWithWaitcnt(MI);
return BB;
+ }
+
return emitGWSMemViolTestLoop(MI, BB);
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
@@ -3939,6 +3993,30 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
}
+SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opc = Op.getOpcode();
+ EVT VT = Op.getValueType();
+ assert(VT == MVT::v4i16 || VT == MVT::v4f16);
+
+ SDValue Lo0, Hi0;
+ std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
+ SDValue Lo1, Hi1;
+ std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);
+ SDValue Lo2, Hi2;
+ std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2);
+
+ SDLoc SL(Op);
+
+ SDValue OpLo = DAG.getNode(Opc, SL, Lo0.getValueType(), Lo0, Lo1, Lo2,
+ Op->getFlags());
+ SDValue OpHi = DAG.getNode(Opc, SL, Hi0.getValueType(), Hi0, Hi1, Hi2,
+ Op->getFlags());
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
+}
+
+
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
@@ -3991,6 +4069,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FMINNUM:
case ISD::FMAXNUM:
return lowerFMINNUM_FMAXNUM(Op, DAG);
+ case ISD::FMA:
+ return splitTernaryVectorOp(Op, DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
@@ -4070,6 +4150,41 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL);
}
+SDValue SITargetLowering::lowerIntrinsicLoad(MemSDNode *M, bool IsFormat,
+ SelectionDAG &DAG,
+ ArrayRef<SDValue> Ops) const {
+ SDLoc DL(M);
+ EVT LoadVT = M->getValueType(0);
+ EVT EltType = LoadVT.getScalarType();
+ EVT IntVT = LoadVT.changeTypeToInteger();
+
+ bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
+
+ unsigned Opc =
+ IsFormat ? AMDGPUISD::BUFFER_LOAD_FORMAT : AMDGPUISD::BUFFER_LOAD;
+
+ if (IsD16) {
+ return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, M, DAG, Ops);
+ }
+
+ // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
+ if (!IsD16 && !LoadVT.isVector() && EltType.getSizeInBits() < 32)
+ return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
+
+ if (isTypeLegal(LoadVT)) {
+ return getMemIntrinsicNode(Opc, DL, M->getVTList(), Ops, IntVT,
+ M->getMemOperand(), DAG);
+ }
+
+ EVT CastVT = getEquivalentMemType(*DAG.getContext(), LoadVT);
+ SDVTList VTList = DAG.getVTList(CastVT, MVT::Other);
+ SDValue MemNode = getMemIntrinsicNode(Opc, DL, VTList, Ops, CastVT,
+ M->getMemOperand(), DAG);
+ return DAG.getMergeValues(
+ {DAG.getNode(ISD::BITCAST, DL, LoadVT, MemNode), MemNode.getValue(1)},
+ DL);
+}
+
static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
@@ -4196,8 +4311,14 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::INTRINSIC_W_CHAIN: {
if (SDValue Res = LowerINTRINSIC_W_CHAIN(SDValue(N, 0), DAG)) {
- Results.push_back(Res);
- Results.push_back(Res.getValue(1));
+ if (Res.getOpcode() == ISD::MERGE_VALUES) {
+ // FIXME: Hacky
+ Results.push_back(Res.getOperand(0));
+ Results.push_back(Res.getOperand(1));
+ } else {
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ }
return;
}
@@ -4935,11 +5056,8 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
// of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
// small. This requires us to add 4 to the global variable offset in order to
// compute the correct address.
- unsigned LoFlags = GAFlags;
- if (LoFlags == SIInstrInfo::MO_NONE)
- LoFlags = SIInstrInfo::MO_REL32;
SDValue PtrLo =
- DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, LoFlags);
+ DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags);
SDValue PtrHi;
if (GAFlags == SIInstrInfo::MO_NONE) {
PtrHi = DAG.getTargetConstant(0, DL, MVT::i32);
@@ -5563,14 +5681,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
SDValue Ops[] = {
- DAG.getEntryNode(), // Chain
- Rsrc, // rsrc
- DAG.getConstant(0, DL, MVT::i32), // vindex
- {}, // voffset
- {}, // soffset
- {}, // offset
- DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ DAG.getEntryNode(), // Chain
+ Rsrc, // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ {}, // voffset
+ {}, // soffset
+ {}, // offset
+ DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
// Use the alignment to ensure that the required offsets will fit into the
@@ -5579,7 +5697,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
for (unsigned i = 0; i < NumLoads; ++i) {
- Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32);
+ Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32);
Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
Ops, LoadVT, MMO));
}
@@ -5758,45 +5876,31 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
- case Intrinsic::amdgcn_interp_mov: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
- SDValue Glue = M0.getValue(1);
- return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3), Glue);
- }
- case Intrinsic::amdgcn_interp_p1: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
- SDValue Glue = M0.getValue(1);
- return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3), Glue);
- }
- case Intrinsic::amdgcn_interp_p2: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
- SDValue Glue = SDValue(M0.getNode(), 1);
- return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
- Glue);
- }
case Intrinsic::amdgcn_interp_p1_f16: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
- SDValue Glue = M0.getValue(1);
+ SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+ Op.getOperand(5), SDValue());
if (getSubtarget()->getLDSBankCount() == 16) {
// 16 bank LDS
- SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
- DAG.getConstant(2, DL, MVT::i32), // P0
- Op.getOperand(2), // Attrchan
- Op.getOperand(3), // Attr
- Glue);
+
+ // FIXME: This implicitly will insert a second CopyToReg to M0.
+ SDValue S = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, MVT::f32,
+ DAG.getTargetConstant(Intrinsic::amdgcn_interp_mov, DL, MVT::i32),
+ DAG.getConstant(2, DL, MVT::i32), // P0
+ Op.getOperand(2), // Attrchan
+ Op.getOperand(3), // Attr
+ Op.getOperand(5)); // m0
+
SDValue Ops[] = {
Op.getOperand(1), // Src0
Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr
- DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+ DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
S, // Src2 - holds two f16 values selected by high
- DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
+ DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
Op.getOperand(4), // high
- DAG.getConstant(0, DL, MVT::i1), // $clamp
- DAG.getConstant(0, DL, MVT::i32) // $omod
+ DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
+ DAG.getTargetConstant(0, DL, MVT::i32) // $omod
};
return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
} else {
@@ -5805,28 +5909,28 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), // Src0
Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr
- DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+ DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
Op.getOperand(4), // high
- DAG.getConstant(0, DL, MVT::i1), // $clamp
- DAG.getConstant(0, DL, MVT::i32), // $omod
- Glue
+ DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
+ DAG.getTargetConstant(0, DL, MVT::i32), // $omod
+ ToM0.getValue(1)
};
return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
}
}
case Intrinsic::amdgcn_interp_p2_f16: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(6));
- SDValue Glue = SDValue(M0.getNode(), 1);
+ SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+ Op.getOperand(6), SDValue());
SDValue Ops[] = {
Op.getOperand(2), // Src0
Op.getOperand(3), // Attrchan
Op.getOperand(4), // Attr
- DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+ DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
Op.getOperand(1), // Src2
- DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
+ DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
Op.getOperand(5), // high
- DAG.getConstant(0, DL, MVT::i1), // $clamp
- Glue
+ DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
+ ToM0.getValue(1)
};
return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
}
@@ -5947,16 +6051,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), Op.getOperand(2));
return DAG.getNode(ISD::BITCAST, DL, VT, Node);
}
- case Intrinsic::amdgcn_wqm: {
- SDValue Src = Op.getOperand(1);
- return SDValue(DAG.getMachineNode(AMDGPU::WQM, DL, Src.getValueType(), Src),
- 0);
- }
- case Intrinsic::amdgcn_wwm: {
- SDValue Src = Op.getOperand(1);
- return SDValue(DAG.getMachineNode(AMDGPU::WWM, DL, Src.getValueType(), Src),
- 0);
- }
case Intrinsic::amdgcn_fmad_ftz:
return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
@@ -5977,6 +6071,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SIInstrInfo::MO_ABS32_LO);
return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
}
+ case Intrinsic::amdgcn_is_shared:
+ case Intrinsic::amdgcn_is_private: {
+ SDLoc SL(Op);
+ unsigned AS = (IntrinsicID == Intrinsic::amdgcn_is_shared) ?
+ AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS;
+ SDValue Aperture = getSegmentAperture(AS, SL, DAG);
+ SDValue SrcVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32,
+ Op.getOperand(1));
+
+ SDValue SrcHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, SrcVec,
+ DAG.getConstant(1, SL, MVT::i32));
+ return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ);
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -5986,6 +6093,30 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
}
+// This function computes an appropriate offset to pass to
+// MachineMemOperand::setOffset() based on the offset inputs to
+// an intrinsic. If any of the offsets are non-contstant or
+// if VIndex is non-zero then this function returns 0. Otherwise,
+// it returns the sum of VOffset, SOffset, and Offset.
+static unsigned getBufferOffsetForMMO(SDValue VOffset,
+ SDValue SOffset,
+ SDValue Offset,
+ SDValue VIndex = SDValue()) {
+
+ if (!isa<ConstantSDNode>(VOffset) || !isa<ConstantSDNode>(SOffset) ||
+ !isa<ConstantSDNode>(Offset))
+ return 0;
+
+ if (VIndex) {
+ if (!isa<ConstantSDNode>(VIndex) || !cast<ConstantSDNode>(VIndex)->isNullValue())
+ return 0;
+ }
+
+ return cast<ConstantSDNode>(VOffset)->getSExtValue() +
+ cast<ConstantSDNode>(SOffset)->getSExtValue() +
+ cast<ConstantSDNode>(Offset)->getSExtValue();
+}
+
SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -6128,17 +6259,22 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
- setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
+ unsigned Offset = setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
+ // We don't know the offset if vindex is non-zero, so clear it.
+ if (IdxEn)
+ Offset = 0;
+
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
EVT VT = Op.getValueType();
EVT IntVT = VT.changeTypeToInteger();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(Offset);
EVT LoadVT = Op.getValueType();
if (LoadVT.getScalarType() == MVT::f16)
@@ -6155,6 +6291,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format: {
+ const bool IsFormat = IntrID == Intrinsic::amdgcn_raw_buffer_load_format;
+
auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
@@ -6163,32 +6301,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Offsets.first, // voffset
Op.getOperand(4), // soffset
Offsets.second, // offset
- Op.getOperand(5), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ Op.getOperand(5), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
- unsigned Opc = (IntrID == Intrinsic::amdgcn_raw_buffer_load) ?
- AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
-
- EVT VT = Op.getValueType();
- EVT IntVT = VT.changeTypeToInteger();
auto *M = cast<MemSDNode>(Op);
- EVT LoadVT = Op.getValueType();
-
- if (LoadVT.getScalarType() == MVT::f16)
- return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
- M, DAG, Ops);
-
- // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
- if (LoadVT.getScalarType() == MVT::i8 ||
- LoadVT.getScalarType() == MVT::i16)
- return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
-
- return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
- M->getMemOperand(), DAG);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5]));
+ return lowerIntrinsicLoad(M, IsFormat, DAG, Ops);
}
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format: {
+ const bool IsFormat = IntrID == Intrinsic::amdgcn_struct_buffer_load_format;
+
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
@@ -6197,29 +6321,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Offsets.first, // voffset
Op.getOperand(5), // soffset
Offsets.second, // offset
- Op.getOperand(6), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idxen
+ Op.getOperand(6), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
- unsigned Opc = (IntrID == Intrinsic::amdgcn_struct_buffer_load) ?
- AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
-
- EVT VT = Op.getValueType();
- EVT IntVT = VT.changeTypeToInteger();
auto *M = cast<MemSDNode>(Op);
- EVT LoadVT = Op.getValueType();
-
- if (LoadVT.getScalarType() == MVT::f16)
- return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
- M, DAG, Ops);
-
- // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
- if (LoadVT.getScalarType() == MVT::i8 ||
- LoadVT.getScalarType() == MVT::i16)
- return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
-
- return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
- M->getMemOperand(), DAG);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5],
+ Ops[2]));
+ return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
}
case Intrinsic::amdgcn_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
@@ -6239,9 +6348,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // voffset
Op.getOperand(5), // soffset
Op.getOperand(6), // offset
- DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
- DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@@ -6264,8 +6373,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // soffset
Offsets.second, // offset
Op.getOperand(5), // format
- Op.getOperand(6), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ Op.getOperand(6), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@@ -6288,8 +6397,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // format
- Op.getOperand(7), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idxen
+ Op.getOperand(7), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@@ -6321,13 +6430,17 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
- setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ // We don't know the offset if vindex is non-zero, so clear it.
+ if (IdxEn)
+ Offset = 0;
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(Offset);
unsigned Opcode = 0;
switch (IntrID) {
@@ -6377,7 +6490,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_raw_buffer_atomic_umax:
case Intrinsic::amdgcn_raw_buffer_atomic_and:
case Intrinsic::amdgcn_raw_buffer_atomic_or:
- case Intrinsic::amdgcn_raw_buffer_atomic_xor: {
+ case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ case Intrinsic::amdgcn_raw_buffer_atomic_inc:
+ case Intrinsic::amdgcn_raw_buffer_atomic_dec: {
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
@@ -6388,11 +6503,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6]));
unsigned Opcode = 0;
switch (IntrID) {
@@ -6426,6 +6542,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_raw_buffer_atomic_xor:
Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_inc:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_INC;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_dec:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_DEC;
+ break;
default:
llvm_unreachable("unhandled atomic opcode");
}
@@ -6442,7 +6564,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_struct_buffer_atomic_umax:
case Intrinsic::amdgcn_struct_buffer_atomic_and:
case Intrinsic::amdgcn_struct_buffer_atomic_or:
- case Intrinsic::amdgcn_struct_buffer_atomic_xor: {
+ case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ case Intrinsic::amdgcn_struct_buffer_atomic_inc:
+ case Intrinsic::amdgcn_struct_buffer_atomic_dec: {
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
@@ -6453,11 +6577,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6],
+ Ops[3]));
unsigned Opcode = 0;
switch (IntrID) {
@@ -6491,6 +6617,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_struct_buffer_atomic_xor:
Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_inc:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_INC;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_dec:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_DEC;
+ break;
default:
llvm_unreachable("unhandled atomic opcode");
}
@@ -6512,12 +6644,16 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
- setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
+ unsigned Offset = setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
+ // We don't know the offset if vindex is non-zero, so clear it.
+ if (IdxEn)
+ Offset = 0;
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(Offset);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
@@ -6534,10 +6670,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7]));
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
@@ -6554,10 +6691,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(7), // soffset
Offsets.second, // offset
Op.getOperand(8), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7],
+ Ops[4]));
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
@@ -6686,23 +6825,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
}
- case Intrinsic::amdgcn_s_sendmsg:
- case Intrinsic::amdgcn_s_sendmsghalt: {
- unsigned NodeOp = (IntrinsicID == Intrinsic::amdgcn_s_sendmsg) ?
- AMDGPUISD::SENDMSG : AMDGPUISD::SENDMSGHALT;
- Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
- SDValue Glue = Chain.getValue(1);
- return DAG.getNode(NodeOp, DL, MVT::Other, Chain,
- Op.getOperand(2), Glue);
- }
- case Intrinsic::amdgcn_init_exec: {
- return DAG.getNode(AMDGPUISD::INIT_EXEC, DL, MVT::Other, Chain,
- Op.getOperand(2));
- }
- case Intrinsic::amdgcn_init_exec_from_input: {
- return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain,
- Op.getOperand(2), Op.getOperand(3));
- }
case Intrinsic::amdgcn_s_barrier: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -6733,9 +6855,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(5), // voffset
Op.getOperand(6), // soffset
Op.getOperand(7), // offset
- DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
- DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idexen
+ DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -6759,8 +6881,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // format
- Op.getOperand(8), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idexen
+ Op.getOperand(8), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(1, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -6784,8 +6906,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // format
- Op.getOperand(7), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idexen
+ Op.getOperand(7), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(0, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -6813,14 +6935,18 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
- setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ // We don't know the offset if vindex is non-zero, so clear it.
+ if (IdxEn)
+ Offset = 0;
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(Offset);
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
EVT VDataType = VData.getValueType().getScalarType();
@@ -6833,10 +6959,22 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format: {
+ const bool IsFormat =
+ IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format;
+
SDValue VData = Op.getOperand(2);
- bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ EVT VDataVT = VData.getValueType();
+ EVT EltType = VDataVT.getScalarType();
+ bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
if (IsD16)
VData = handleD16VData(VData, DAG);
+
+ if (!isTypeLegal(VDataVT)) {
+ VData =
+ DAG.getNode(ISD::BITCAST, DL,
+ getEquivalentMemType(*DAG.getContext(), VDataVT), VData);
+ }
+
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
Chain,
@@ -6846,18 +6984,18 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Offsets.first, // voffset
Op.getOperand(5), // soffset
Offsets.second, // offset
- Op.getOperand(6), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ Op.getOperand(6), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
- unsigned Opc = IntrinsicID == Intrinsic::amdgcn_raw_buffer_store ?
- AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
+ unsigned Opc =
+ IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6]));
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
- EVT VDataType = VData.getValueType().getScalarType();
- if (VDataType == MVT::i8 || VDataType == MVT::i16)
- return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
+ if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32)
+ return handleByteShortBufferStores(DAG, VDataVT, DL, Ops, M);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
@@ -6865,10 +7003,23 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store_format: {
+ const bool IsFormat =
+ IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format;
+
SDValue VData = Op.getOperand(2);
- bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ EVT VDataVT = VData.getValueType();
+ EVT EltType = VDataVT.getScalarType();
+ bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
+
if (IsD16)
VData = handleD16VData(VData, DAG);
+
+ if (!isTypeLegal(VDataVT)) {
+ VData =
+ DAG.getNode(ISD::BITCAST, DL,
+ getEquivalentMemType(*DAG.getContext(), VDataVT), VData);
+ }
+
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
Chain,
@@ -6878,17 +7029,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Offsets.first, // voffset
Op.getOperand(6), // soffset
Offsets.second, // offset
- Op.getOperand(7), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idxen
+ Op.getOperand(7), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6],
+ Ops[3]));
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
EVT VDataType = VData.getValueType().getScalarType();
- if (VDataType == MVT::i8 || VDataType == MVT::i16)
+ if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32)
return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
@@ -6908,13 +7061,17 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
- setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ // We don't know the offset if vindex is non-zero, so clear it.
+ if (IdxEn)
+ Offset = 0;
EVT VT = Op.getOperand(2).getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(Offset);
unsigned Opcode = VT.isVector() ? AMDGPUISD::BUFFER_ATOMIC_PK_FADD
: AMDGPUISD::BUFFER_ATOMIC_FADD;
@@ -6987,7 +7144,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
Overflow += ImmOffset;
ImmOffset = 0;
}
- C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
+ C1 = cast<ConstantSDNode>(DAG.getTargetConstant(ImmOffset, DL, MVT::i32));
if (Overflow) {
auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
if (!N0)
@@ -7001,14 +7158,14 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
if (!N0)
N0 = DAG.getConstant(0, DL, MVT::i32);
if (!C1)
- C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32));
+ C1 = cast<ConstantSDNode>(DAG.getTargetConstant(0, DL, MVT::i32));
return {N0, SDValue(C1, 0)};
}
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
// pointed to by Offsets.
-void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
+unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
SelectionDAG &DAG, SDValue *Offsets,
unsigned Align) const {
SDLoc DL(CombinedOffset);
@@ -7018,8 +7175,8 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
- Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
- return;
+ Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
+ return SOffset + ImmOffset;
}
}
if (DAG.isBaseWithConstantOffset(CombinedOffset)) {
@@ -7031,13 +7188,14 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
Subtarget, Align)) {
Offsets[0] = N0;
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
- Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
- return;
+ Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
+ return 0;
}
}
Offsets[0] = CombinedOffset;
Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
- Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
+ Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32);
+ return 0;
}
// Handle 8 bit and 16 bit buffer loads
@@ -7053,9 +7211,10 @@ SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG,
SDValue BufferLoad = DAG.getMemIntrinsicNode(Opc, DL, ResList,
Ops, IntVT,
M->getMemOperand());
- SDValue BufferLoadTrunc = DAG.getNode(ISD::TRUNCATE, DL,
- LoadVT.getScalarType(), BufferLoad);
- return DAG.getMergeValues({BufferLoadTrunc, BufferLoad.getValue(1)}, DL);
+ SDValue LoadVal = DAG.getNode(ISD::TRUNCATE, DL, IntVT, BufferLoad);
+ LoadVal = DAG.getNode(ISD::BITCAST, DL, LoadVT, LoadVal);
+
+ return DAG.getMergeValues({LoadVal, BufferLoad.getValue(1)}, DL);
}
// Handle 8 bit and 16 bit buffer stores
@@ -7063,6 +7222,9 @@ SDValue SITargetLowering::handleByteShortBufferStores(SelectionDAG &DAG,
EVT VDataType, SDLoc DL,
SDValue Ops[],
MemSDNode *M) const {
+ if (VDataType == MVT::f16)
+ Ops[1] = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Ops[1]);
+
SDValue BufferStoreExt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Ops[1]);
Ops[1] = BufferStoreExt;
unsigned Opc = (VDataType == MVT::i8) ? AMDGPUISD::BUFFER_STORE_BYTE :
@@ -7215,8 +7377,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
"Custom lowering for non-i32 vectors hasn't been implemented.");
- if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
- *Load->getMemOperand())) {
+ if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ MemVT, *Load->getMemOperand())) {
SDValue Ops[2];
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
return DAG.getMergeValues(Ops, DL);
@@ -7505,6 +7667,19 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
}
+// Returns immediate value for setting the F32 denorm mode when using the
+// S_DENORM_MODE instruction.
+static const SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
+ const SDLoc &SL, const GCNSubtarget *ST) {
+ assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
+ int DPDenormModeDefault = ST->hasFP64Denormals()
+ ? FP_DENORM_FLUSH_NONE
+ : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+ int Mode = SPDenormMode | (DPDenormModeDefault << 2);
+ return DAG.getTargetConstant(Mode, SL, MVT::i32);
+}
+
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
return FastLowered;
@@ -7531,16 +7706,26 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
-
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
if (!Subtarget->hasFP32Denormals()) {
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
- SL, MVT::i32);
- SDValue EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
- DAG.getEntryNode(),
- EnableDenormValue, BitField);
+
+ SDValue EnableDenorm;
+ if (Subtarget->hasDenormModeInst()) {
+ const SDValue EnableDenormValue =
+ getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
+
+ EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
+ DAG.getEntryNode(), EnableDenormValue);
+ } else {
+ const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
+ SL, MVT::i32);
+ EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
+ DAG.getEntryNode(), EnableDenormValue,
+ BitField);
+ }
+
SDValue Ops[3] = {
NegDivScale0,
EnableDenorm.getValue(0),
@@ -7562,19 +7747,29 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
NumeratorScaled, Mul);
- SDValue Fma3 = getFPTernOp(DAG, ISD::FMA,SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
+ SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
NumeratorScaled, Fma3);
if (!Subtarget->hasFP32Denormals()) {
- const SDValue DisableDenormValue =
- DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
- SDValue DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
- Fma4.getValue(1),
- DisableDenormValue,
- BitField,
- Fma4.getValue(2));
+
+ SDValue DisableDenorm;
+ if (Subtarget->hasDenormModeInst()) {
+ const SDValue DisableDenormValue =
+ getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
+
+ DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
+ Fma4.getValue(1), DisableDenormValue,
+ Fma4.getValue(2));
+ } else {
+ const SDValue DisableDenormValue =
+ DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
+
+ DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
+ Fma4.getValue(1), DisableDenormValue,
+ BitField, Fma4.getValue(2));
+ }
SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
DisableDenorm, DAG.getRoot());
@@ -7684,8 +7879,8 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
assert(VT.isVector() &&
Store->getValue().getValueType().getScalarType() == MVT::i32);
- if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- *Store->getMemOperand())) {
+ if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ VT, *Store->getMemOperand())) {
return expandUnalignedStore(Store, DAG);
}
@@ -10065,7 +10260,7 @@ SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
// Insert a copy to a VReg_1 virtual register so LowerI1Copies doesn't have
// to try understanding copies to physical registers.
if (SrcVal.getValueType() == MVT::i1 &&
- TargetRegisterInfo::isPhysicalRegister(DestReg->getReg())) {
+ Register::isPhysicalRegister(DestReg->getReg())) {
SDLoc SL(Node);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDValue VReg = DAG.getRegister(
@@ -10218,7 +10413,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
MachineOperand &Op = MI.getOperand(I);
if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID &&
OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) ||
- !TargetRegisterInfo::isVirtualRegister(Op.getReg()) ||
+ !Register::isVirtualRegister(Op.getReg()) ||
!TRI->isAGPR(MRI, Op.getReg()))
continue;
auto *Src = MRI.getUniqueVRegDef(Op.getReg());
@@ -10256,7 +10451,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
Node->use_begin()->isMachineOpcode() &&
Node->use_begin()->getMachineOpcode() == AMDGPU::EXTRACT_SUBREG &&
!Node->use_begin()->hasAnyUseOfValue(0))) {
- unsigned Def = MI.getOperand(0).getReg();
+ Register Def = MI.getOperand(0).getReg();
// Change this into a noret atomic.
MI.setDesc(TII->get(NoRetAtomicOp));
@@ -10300,7 +10495,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
// Combine the constants and the pointer.
const SDValue Ops1[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
+ DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32),
Ptr,
DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
SubRegHi,
@@ -10330,7 +10525,7 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, const SDLoc &DL,
SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);
const SDValue Ops[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
+ DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32),
PtrLo,
DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
PtrHi,
@@ -10364,7 +10559,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, nullptr);
case 32:
case 16:
- RC = &AMDGPU::SReg_32_XM0RegClass;
+ RC = &AMDGPU::SReg_32RegClass;
break;
case 64:
RC = &AMDGPU::SGPR_64RegClass;
@@ -10373,7 +10568,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
RC = &AMDGPU::SReg_96RegClass;
break;
case 128:
- RC = &AMDGPU::SReg_128RegClass;
+ RC = &AMDGPU::SGPR_128RegClass;
break;
case 160:
RC = &AMDGPU::SReg_160RegClass;
@@ -10415,6 +10610,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
break;
case 'a':
+ if (!Subtarget->hasMAIInsts())
+ break;
switch (VT.getSizeInBits()) {
default:
return std::make_pair(0U, nullptr);
@@ -10548,9 +10745,9 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex());
}
-unsigned SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
- const unsigned PrefAlign = TargetLowering::getPrefLoopAlignment(ML);
- const unsigned CacheLineAlign = 6; // log2(64)
+Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+ const Align PrefAlign = TargetLowering::getPrefLoopAlignment(ML);
+ const Align CacheLineAlign = Align(64);
// Pre-GFX10 target did not benefit from loop alignment
if (!ML || DisableLoopAlignment ||
@@ -10578,7 +10775,7 @@ unsigned SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
// If inner loop block is aligned assume in average half of the alignment
// size to be added as nops.
if (MBB != Header)
- LoopSize += (1 << MBB->getAlignment()) / 2;
+ LoopSize += MBB->getAlignment().value() / 2;
for (const MachineInstr &MI : *MBB) {
LoopSize += TII->getInstSizeInBytes(MI);
@@ -10644,7 +10841,7 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
const MachineRegisterInfo &MRI = MF->getRegInfo();
const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
unsigned Reg = R->getReg();
- if (TRI.isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return !TRI.isSGPRReg(MRI, Reg);
if (MRI.isLiveIn(Reg)) {
@@ -10683,12 +10880,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
case ISD::INTRINSIC_W_CHAIN:
return AMDGPU::isIntrinsicSourceOfDivergence(
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
- // In some cases intrinsics that are a source of divergence have been
- // lowered to AMDGPUISD so we also need to check those too.
- case AMDGPUISD::INTERP_MOV:
- case AMDGPUISD::INTERP_P1:
- case AMDGPUISD::INTERP_P2:
- return true;
}
return false;
}
@@ -10748,3 +10939,110 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
}
+
+const TargetRegisterClass *
+SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
+ const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
+ return Subtarget->getWavefrontSize() == 64 ? &AMDGPU::SReg_64RegClass
+ : &AMDGPU::SReg_32RegClass;
+ if (!TRI->isSGPRClass(RC) && !isDivergent)
+ return TRI->getEquivalentSGPRClass(RC);
+ else if (TRI->isSGPRClass(RC) && isDivergent)
+ return TRI->getEquivalentVGPRClass(RC);
+
+ return RC;
+}
+
+static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited) {
+ if (!Visited.insert(V).second)
+ return false;
+ bool Result = false;
+ for (auto U : V->users()) {
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
+ if (V == U->getOperand(1)) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ Result = false;
+ break;
+ case Intrinsic::amdgcn_if_break:
+ case Intrinsic::amdgcn_if:
+ case Intrinsic::amdgcn_else:
+ Result = true;
+ break;
+ }
+ }
+ if (V == U->getOperand(0)) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ Result = false;
+ break;
+ case Intrinsic::amdgcn_end_cf:
+ case Intrinsic::amdgcn_loop:
+ Result = true;
+ break;
+ }
+ }
+ } else {
+ Result = hasCFUser(U, Visited);
+ }
+ if (Result)
+ break;
+ }
+ return Result;
+}
+
+bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
+ const Value *V) const {
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::amdgcn_if_break:
+ return true;
+ }
+ }
+ if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) {
+ if (const IntrinsicInst *Intrinsic =
+ dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::amdgcn_if:
+ case Intrinsic::amdgcn_else: {
+ ArrayRef<unsigned> Indices = ExtValue->getIndices();
+ if (Indices.size() == 1 && Indices[0] == 1) {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+ if (isa<InlineAsm>(CI->getCalledValue())) {
+ const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
+ ImmutableCallSite CS(CI);
+ TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
+ MF.getDataLayout(), Subtarget->getRegisterInfo(), CS);
+ for (auto &TC : TargetConstraints) {
+ if (TC.Type == InlineAsm::isOutput) {
+ ComputeConstraintToUse(TC, SDValue());
+ unsigned AssignedReg;
+ const TargetRegisterClass *RC;
+ std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
+ SIRI, TC.ConstraintCode, TC.ConstraintVT);
+ if (RC) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
+ return true;
+ else if (SIRI->isSGPRClass(RC))
+ return true;
+ }
+ }
+ }
+ }
+ }
+ SmallPtrSet<const Value *, 16> Visited;
+ return hasCFUser(V, Visited);
+}
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index 21a215e16ce7..f0102feb65c4 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -94,6 +94,9 @@ private:
SelectionDAG &DAG, ArrayRef<SDValue> Ops,
bool IsIntrinsic = false) const;
+ SDValue lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, SelectionDAG &DAG,
+ ArrayRef<SDValue> Ops) const;
+
// Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to
// dwordx4 if on SI.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
@@ -183,6 +186,7 @@ private:
unsigned isCFIntrinsic(const SDNode *Intr) const;
+public:
/// \returns True if fixup needs to be emitted for given global value \p GV,
/// false otherwise.
bool shouldEmitFixup(const GlobalValue *GV) const;
@@ -195,11 +199,14 @@ private:
/// global value \p GV, false otherwise.
bool shouldEmitPCReloc(const GlobalValue *GV) const;
+private:
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
// pointed to by Offsets.
- void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
- SDValue *Offsets, unsigned Align = 4) const;
+ /// \returns 0 If there is a non-constant offset or if the offset is 0.
+ /// Otherwise returns the constant offset.
+ unsigned setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
+ SDValue *Offsets, unsigned Align = 4) const;
// Handle 8 bit and 16 bit buffer loads
SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
@@ -235,6 +242,11 @@ public:
bool canMergeStoresTo(unsigned AS, EVT MemVT,
const SelectionDAG &DAG) const override;
+ bool allowsMisalignedMemoryAccessesImpl(
+ unsigned Size, unsigned AS, unsigned Align,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *IsFast = nullptr) const;
+
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS, unsigned Align,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
@@ -309,12 +321,13 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const override;
MachineBasicBlock *splitKillBlock(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ void bundleInstWithWaitcnt(MachineInstr &MI) const;
MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI,
MachineBasicBlock *BB) const;
@@ -330,6 +343,7 @@ public:
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
@@ -374,7 +388,37 @@ public:
unsigned Depth = 0) const override;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
- unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
+ virtual const TargetRegisterClass *
+ getRegClassFor(MVT VT, bool isDivergent) const override;
+ virtual bool requiresUniformRegister(MachineFunction &MF,
+ const Value *V) const override;
+ Align getPrefLoopAlignment(MachineLoop *ML) const override;
+
+ void allocateHSAUserSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
+
+ void allocateSystemSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ SIMachineFunctionInfo &Info,
+ CallingConv::ID CallConv,
+ bool IsShader) const;
+
+ void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
+ void allocateSpecialInputSGPRs(
+ CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
+
+ void allocateSpecialInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
};
} // End namespace llvm
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c89d5b71ec5c..dcb04e426584 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1483,12 +1483,12 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
if (BI.Incoming) {
if (!Brackets)
- Brackets = llvm::make_unique<WaitcntBrackets>(*BI.Incoming);
+ Brackets = std::make_unique<WaitcntBrackets>(*BI.Incoming);
else
*Brackets = *BI.Incoming;
} else {
if (!Brackets)
- Brackets = llvm::make_unique<WaitcntBrackets>(ST);
+ Brackets = std::make_unique<WaitcntBrackets>(ST);
else
Brackets->clear();
}
@@ -1508,7 +1508,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
if (!MoveBracketsToSucc) {
MoveBracketsToSucc = &SuccBI;
} else {
- SuccBI.Incoming = llvm::make_unique<WaitcntBrackets>(*Brackets);
+ SuccBI.Incoming = std::make_unique<WaitcntBrackets>(*Brackets);
}
} else if (SuccBI.Incoming->merge(*Brackets)) {
SuccBI.Dirty = true;
diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td
index 561a16c3e351..4dcbe92861f2 100644
--- a/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/lib/Target/AMDGPU/SIInstrFormats.td
@@ -124,6 +124,9 @@ class InstSI <dag outs, dag ins, string asm = "",
// This bit indicates that this is one of MFMA instructions.
field bit IsMAI = 0;
+ // This bit indicates that this is one of DOT instructions.
+ field bit IsDOT = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -189,6 +192,8 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{54} = IsMAI;
+ let TSFlags{55} = IsDOT;
+
let SchedRW = [Write32Bit];
field bits<1> DisableSIDecoder = 0;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index ba8ed6993a56..d97e6a62971b 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -318,8 +318,25 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
- if (SOffset && SOffset->isReg())
- return false;
+ if (SOffset && SOffset->isReg()) {
+ // We can only handle this if it's a stack access, as any other resource
+ // would require reporting multiple base registers.
+ const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ if (AddrReg && !AddrReg->isFI())
+ return false;
+
+ const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
+ const SIMachineFunctionInfo *MFI
+ = LdSt.getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
+ if (RSrc->getReg() != MFI->getScratchRSrcReg())
+ return false;
+
+ const MachineOperand *OffsetImm =
+ getNamedOperand(LdSt, AMDGPU::OpName::offset);
+ BaseOp = SOffset;
+ Offset = OffsetImm->getImm();
+ return true;
+ }
const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (!AddrReg)
@@ -458,9 +475,9 @@ bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
const MachineRegisterInfo &MRI =
FirstLdSt.getParent()->getParent()->getRegInfo();
- const unsigned Reg = FirstDst->getReg();
+ const Register Reg = FirstDst->getReg();
- const TargetRegisterClass *DstRC = TargetRegisterInfo::isVirtualRegister(Reg)
+ const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg)
? MRI.getRegClass(Reg)
: RI.getPhysRegClass(Reg);
@@ -807,7 +824,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
"Not a VGPR32 reg");
if (Cond.size() == 1) {
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(Cond[0]);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
@@ -820,7 +837,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
assert(Cond[0].isImm() && "Cond[0] is not an immediate");
switch (Cond[0].getImm()) {
case SIInstrInfo::SCC_TRUE: {
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(-1)
@@ -834,7 +851,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
break;
}
case SIInstrInfo::SCC_FALSE: {
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
@@ -850,7 +867,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
case SIInstrInfo::VCCNZ: {
MachineOperand RegOp = Cond[1];
RegOp.setImplicit(false);
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
@@ -864,7 +881,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
case SIInstrInfo::VCCZ: {
MachineOperand RegOp = Cond[1];
RegOp.setImplicit(false);
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
@@ -876,8 +893,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
break;
}
case SIInstrInfo::EXECNZ: {
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
- unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
: AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
.addImm(0);
@@ -894,8 +911,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
break;
}
case SIInstrInfo::EXECZ: {
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
- unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
: AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
.addImm(0);
@@ -925,7 +942,7 @@ unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
const DebugLoc &DL,
unsigned SrcReg, int Value) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register Reg = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
.addImm(Value)
.addReg(SrcReg);
@@ -938,7 +955,7 @@ unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
const DebugLoc &DL,
unsigned SrcReg, int Value) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register Reg = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
.addImm(Value)
.addReg(SrcReg);
@@ -1052,12 +1069,12 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// The SGPR spill/restore instructions only work on number sgprs, so we need
// to make sure we are using the correct register class.
- if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
+ if (Register::isVirtualRegister(SrcReg) && SpillSize == 4) {
MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
}
- MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
+ BuildMI(MBB, MI, DL, OpDesc)
.addReg(SrcReg, getKillRegState(isKill)) // data
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
@@ -1068,11 +1085,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// correctly handled.
if (RI.spillSGPRToVGPR())
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
- if (ST.hasScalarStores()) {
- // m0 is used for offset to scalar stores if used to spill.
- Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
- }
-
return;
}
@@ -1083,7 +1095,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
auto MIB = BuildMI(MBB, MI, DL, get(Opcode));
if (RI.hasAGPRs(RC)) {
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MIB.addReg(Tmp, RegState::Define);
}
MIB.addReg(SrcReg, getKillRegState(isKill)) // data
@@ -1182,24 +1194,18 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
// FIXME: Maybe this should not include a memoperand because it will be
// lowered to non-memory instructions.
const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
- if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
+ if (Register::isVirtualRegister(DestReg) && SpillSize == 4) {
MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
}
if (RI.spillSGPRToVGPR())
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
- MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
+ BuildMI(MBB, MI, DL, OpDesc, DestReg)
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
.addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
-
- if (ST.hasScalarStores()) {
- // m0 is used for offset to scalar stores if used to spill.
- Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
- }
-
return;
}
@@ -1208,7 +1214,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg);
if (RI.hasAGPRs(RC)) {
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MIB.addReg(Tmp, RegState::Define);
}
MIB.addFrameIndex(FrameIndex) // vaddr
@@ -1242,13 +1248,13 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) &&
WorkGroupSize > WavefrontSize) {
- unsigned TIDIGXReg
- = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
- unsigned TIDIGYReg
- = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
- unsigned TIDIGZReg
- = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
- unsigned InputPtrReg =
+ Register TIDIGXReg =
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+ Register TIDIGYReg =
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+ Register TIDIGZReg =
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ Register InputPtrReg =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
@@ -1410,9 +1416,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
break;
case AMDGPU::V_MOV_B64_PSEUDO: {
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
- unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+ Register Dst = MI.getOperand(0).getReg();
+ Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+ Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
const MachineOperand &SrcOp = MI.getOperand(1);
// FIXME: Will this work for 64-bit floating point immediates?
@@ -1437,6 +1443,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
+ case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
+ expandMovDPP64(MI);
+ break;
+ }
case AMDGPU::V_SET_INACTIVE_B32: {
unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
@@ -1469,7 +1479,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::V_MOVRELD_B32_V8:
case AMDGPU::V_MOVRELD_B32_V16: {
const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
- unsigned VecReg = MI.getOperand(0).getReg();
+ Register VecReg = MI.getOperand(0).getReg();
bool IsUndef = MI.getOperand(1).isUndef();
unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
assert(VecReg == MI.getOperand(1).getReg());
@@ -1492,9 +1502,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case AMDGPU::SI_PC_ADD_REL_OFFSET: {
MachineFunction &MF = *MBB.getParent();
- unsigned Reg = MI.getOperand(0).getReg();
- unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
- unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
+ Register Reg = MI.getOperand(0).getReg();
+ Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
+ Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
// Create a bundle so these instructions won't be re-ordered by the
// post-RA scheduler.
@@ -1531,7 +1541,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
break;
}
case TargetOpcode::BUNDLE: {
- if (!MI.mayLoad())
+ if (!MI.mayLoad() || MI.hasUnmodeledSideEffects())
return false;
// If it is a load it must be a memory clause
@@ -1550,6 +1560,64 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
+std::pair<MachineInstr*, MachineInstr*>
+SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
+ assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ MachineFunction *MF = MBB.getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned Part = 0;
+ MachineInstr *Split[2];
+
+
+ for (auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
+ auto MovDPP = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_dpp));
+ if (Dst.isPhysical()) {
+ MovDPP.addDef(RI.getSubReg(Dst, Sub));
+ } else {
+ assert(MRI.isSSA());
+ auto Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MovDPP.addDef(Tmp);
+ }
+
+ for (unsigned I = 1; I <= 2; ++I) { // old and src operands.
+ const MachineOperand &SrcOp = MI.getOperand(I);
+ assert(!SrcOp.isFPImm());
+ if (SrcOp.isImm()) {
+ APInt Imm(64, SrcOp.getImm());
+ Imm.ashrInPlace(Part * 32);
+ MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
+ } else {
+ assert(SrcOp.isReg());
+ Register Src = SrcOp.getReg();
+ if (Src.isPhysical())
+ MovDPP.addReg(RI.getSubReg(Src, Sub));
+ else
+ MovDPP.addReg(Src, SrcOp.isUndef() ? RegState::Undef : 0, Sub);
+ }
+ }
+
+ for (unsigned I = 3; I < MI.getNumExplicitOperands(); ++I)
+ MovDPP.addImm(MI.getOperand(I).getImm());
+
+ Split[Part] = MovDPP;
+ ++Part;
+ }
+
+ if (Dst.isVirtual())
+ BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), Dst)
+ .addReg(Split[0]->getOperand(0).getReg())
+ .addImm(AMDGPU::sub0)
+ .addReg(Split[1]->getOperand(0).getReg())
+ .addImm(AMDGPU::sub1);
+
+ MI.eraseFromParent();
+ return std::make_pair(Split[0], Split[1]);
+}
+
bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
MachineOperand &Src0,
unsigned Src0OpName,
@@ -1574,7 +1642,7 @@ bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
MachineOperand &RegOp,
MachineOperand &NonRegOp) {
- unsigned Reg = RegOp.getReg();
+ Register Reg = RegOp.getReg();
unsigned SubReg = RegOp.getSubReg();
bool IsKill = RegOp.isKill();
bool IsDead = RegOp.isDead();
@@ -1646,7 +1714,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// This needs to be implemented because the source modifiers may be inserted
// between the true commutable operands, and the base
// TargetInstrInfo::commuteInstruction uses it.
-bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
+bool SIInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx0,
unsigned &SrcOpIdx1) const {
return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1);
}
@@ -1710,7 +1779,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// FIXME: Virtual register workaround for RegScavenger not working with empty
// blocks.
- unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
auto I = MBB.end();
@@ -2163,7 +2232,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
SmallVector<unsigned, 8> Regs;
for (int Idx = 0; Idx != NElts; ++Idx) {
- unsigned DstElt = MRI.createVirtualRegister(EltRC);
+ Register DstElt = MRI.createVirtualRegister(EltRC);
Regs.push_back(DstElt);
unsigned SubIdx = SubIndices[Idx];
@@ -2327,7 +2396,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
UseMI.RemoveOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
- unsigned Src1Reg = Src1->getReg();
+ Register Src1Reg = Src1->getReg();
unsigned Src1SubReg = Src1->getSubReg();
Src0->setReg(Src1Reg);
Src0->setSubReg(Src1SubReg);
@@ -2367,12 +2436,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MRI->hasOneUse(Src0->getReg())) {
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
Src0Inlined = true;
- } else if ((RI.isPhysicalRegister(Src0->getReg()) &&
- (ST.getConstantBusLimit(Opc) <= 1 &&
- RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) ||
- (RI.isVirtualRegister(Src0->getReg()) &&
- (ST.getConstantBusLimit(Opc) <= 1 &&
- RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
+ } else if ((Register::isPhysicalRegister(Src0->getReg()) &&
+ (ST.getConstantBusLimit(Opc) <= 1 &&
+ RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) ||
+ (Register::isVirtualRegister(Src0->getReg()) &&
+ (ST.getConstantBusLimit(Opc) <= 1 &&
+ RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
return false;
// VGPR is okay as Src0 - fallthrough
}
@@ -2385,10 +2454,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MRI->hasOneUse(Src1->getReg()) &&
commuteInstruction(UseMI)) {
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
- } else if ((RI.isPhysicalRegister(Src1->getReg()) &&
- RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
- (RI.isVirtualRegister(Src1->getReg()) &&
- RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+ } else if ((Register::isPhysicalRegister(Src1->getReg()) &&
+ RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
+ (Register::isVirtualRegister(Src1->getReg()) &&
+ RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
return false;
// VGPR is okay as Src1 - fallthrough
}
@@ -2472,8 +2541,7 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
}
bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA) const {
+ const MachineInstr &MIb) const {
assert((MIa.mayLoad() || MIa.mayStore()) &&
"MIa must load from or modify a memory location");
assert((MIb.mayLoad() || MIb.mayStore()) &&
@@ -2664,6 +2732,7 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
+ MI.getOpcode() == AMDGPU::S_DENORM_MODE ||
changesVGPRIndexingMode(MI);
}
@@ -2865,8 +2934,16 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
if (OpInfo.RegClass < 0)
return false;
- if (MO.isImm() && isInlineConstant(MO, OpInfo))
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+
+ if (MO.isImm() && isInlineConstant(MO, OpInfo)) {
+ if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() &&
+ OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::src2))
+ return false;
return RI.opCanUseInlineConstant(OpInfo.OperandType);
+ }
if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
return false;
@@ -2874,8 +2951,6 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
return true;
- const MachineFunction *MF = MI.getParent()->getParent();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
return ST.hasVOP3Literal();
}
@@ -3036,7 +3111,7 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
if (!MO.isUse())
return false;
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (Register::isVirtualRegister(MO.getReg()))
return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
// Null is free
@@ -3093,7 +3168,8 @@ static bool shouldReadExec(const MachineInstr &MI) {
return true;
}
- if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
+ if (MI.isPreISelOpcode() ||
+ SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
SIInstrInfo::isSALU(MI) ||
SIInstrInfo::isSMRD(MI))
return false;
@@ -3104,7 +3180,7 @@ static bool shouldReadExec(const MachineInstr &MI) {
static bool isSubRegOf(const SIRegisterInfo &TRI,
const MachineOperand &SuperVec,
const MachineOperand &SubReg) {
- if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg()))
+ if (Register::isPhysicalRegister(SubReg.getReg()))
return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
@@ -3144,8 +3220,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (!Op.isReg())
continue;
- unsigned Reg = Op.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
+ Register Reg = Op.getReg();
+ if (!Register::isVirtualRegister(Reg) && !RC->contains(Reg)) {
ErrInfo = "inlineasm operand has incorrect register class.";
return false;
}
@@ -3209,9 +3285,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
continue;
if (RegClass != -1) {
- unsigned Reg = MI.getOperand(i).getReg();
- if (Reg == AMDGPU::NoRegister ||
- TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MI.getOperand(i).getReg();
+ if (Reg == AMDGPU::NoRegister || Register::isVirtualRegister(Reg))
continue;
const TargetRegisterClass *RC = RI.getRegClass(RegClass);
@@ -3304,7 +3379,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo =
"Dst register should be tied to implicit use of preserved register";
return false;
- } else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) &&
+ } else if (Register::isPhysicalRegister(TiedMO.getReg()) &&
Dst.getReg() != TiedMO.getReg()) {
ErrInfo = "Dst register should use same physical register as preserved";
return false;
@@ -3409,6 +3484,32 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ // Special case for writelane - this can break the multiple constant bus rule,
+ // but still can't use more than one SGPR register
+ if (Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
+ unsigned SGPRCount = 0;
+ Register SGPRUsed = AMDGPU::NoRegister;
+
+ for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) {
+ if (OpIdx == -1)
+ break;
+
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+
+ if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
+ if (MO.isReg() && MO.getReg() != AMDGPU::M0) {
+ if (MO.getReg() != SGPRUsed)
+ ++SGPRCount;
+ SGPRUsed = MO.getReg();
+ }
+ }
+ if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
+ ErrInfo = "WRITELANE instruction violates constant bus restriction";
+ return false;
+ }
+ }
+ }
+
// Verify misc. restrictions on specific instructions.
if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
@@ -3609,7 +3710,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
ErrInfo = "Invalid dpp_ctrl value: "
- "broadcats are not supported on GFX10+";
+ "broadcasts are not supported on GFX10+";
return false;
}
if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
@@ -3631,6 +3732,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::PHI: return AMDGPU::PHI;
case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
case AMDGPU::WQM: return AMDGPU::WQM;
+ case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM;
case AMDGPU::WWM: return AMDGPU::WWM;
case AMDGPU::S_MOV_B32: {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
@@ -3708,9 +3810,9 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
const MCInstrDesc &Desc = get(MI.getOpcode());
if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
Desc.OpInfo[OpNo].RegClass == -1) {
- unsigned Reg = MI.getOperand(OpNo).getReg();
+ Register Reg = MI.getOperand(OpNo).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return MRI.getRegClass(Reg);
return RI.getPhysRegClass(Reg);
}
@@ -3741,7 +3843,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
else
VRC = &AMDGPU::VGPR_32RegClass;
- unsigned Reg = MRI.createVirtualRegister(VRC);
+ Register Reg = MRI.createVirtualRegister(VRC);
DebugLoc DL = MBB->findDebugLoc(I);
BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
MO.ChangeToRegister(Reg, false);
@@ -3756,7 +3858,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
const {
MachineBasicBlock *MBB = MI->getParent();
DebugLoc DL = MI->getDebugLoc();
- unsigned SubReg = MRI.createVirtualRegister(SubRC);
+ Register SubReg = MRI.createVirtualRegister(SubRC);
if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
@@ -3768,7 +3870,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
// value so we don't need to worry about merging its subreg index with the
// SubIdx passed to this function. The register coalescer should be able to
// eliminate this extra copy.
- unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+ Register NewSuperReg = MRI.createVirtualRegister(SuperRC);
BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
@@ -3814,11 +3916,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
if (!MO.isReg())
return false;
- unsigned Reg = MO.getReg();
- const TargetRegisterClass *RC =
- TargetRegisterInfo::isVirtualRegister(Reg) ?
- MRI.getRegClass(Reg) :
- RI.getPhysRegClass(Reg);
+ Register Reg = MO.getReg();
+ const TargetRegisterClass *RC = Register::isVirtualRegister(Reg)
+ ? MRI.getRegClass(Reg)
+ : RI.getPhysRegClass(Reg);
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
@@ -3935,13 +4036,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
if (Opc == AMDGPU::V_WRITELANE_B32) {
const DebugLoc &DL = MI.getDebugLoc();
if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) {
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.add(Src0);
Src0.ChangeToRegister(Reg, false);
}
if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) {
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
const DebugLoc &DL = MI.getDebugLoc();
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.add(Src1);
@@ -3967,7 +4068,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
// select is uniform.
if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
RI.isVGPR(MRI, Src1.getReg())) {
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
const DebugLoc &DL = MI.getDebugLoc();
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.add(Src1);
@@ -4003,7 +4104,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
MI.setDesc(get(CommutedOpc));
- unsigned Src0Reg = Src0.getReg();
+ Register Src0Reg = Src0.getReg();
unsigned Src0SubReg = Src0.getSubReg();
bool Src0Kill = Src0.isKill();
@@ -4039,13 +4140,13 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]);
const DebugLoc &DL = MI.getDebugLoc();
if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.add(Src1);
Src1.ChangeToRegister(Reg, false);
}
if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) {
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.add(Src2);
Src2.ChangeToRegister(Reg, false);
@@ -4113,12 +4214,12 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
MachineRegisterInfo &MRI) const {
const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
- unsigned DstReg = MRI.createVirtualRegister(SRC);
+ Register DstReg = MRI.createVirtualRegister(SRC);
unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
if (RI.hasAGPRs(VRC)) {
VRC = RI.getEquivalentVGPRClass(VRC);
- unsigned NewSrcReg = MRI.createVirtualRegister(VRC);
+ Register NewSrcReg = MRI.createVirtualRegister(VRC);
BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
get(TargetOpcode::COPY), NewSrcReg)
.addReg(SrcReg);
@@ -4134,7 +4235,7 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
SmallVector<unsigned, 8> SRegs;
for (unsigned i = 0; i < SubRegs; ++i) {
- unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
.addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
@@ -4176,7 +4277,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
MachineOperand &Op,
MachineRegisterInfo &MRI,
const DebugLoc &DL) const {
- unsigned OpReg = Op.getReg();
+ Register OpReg = Op.getReg();
unsigned OpSubReg = Op.getSubReg();
const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
@@ -4186,7 +4287,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
if (DstRC == OpRC)
return;
- unsigned DstReg = MRI.createVirtualRegister(DstRC);
+ Register DstReg = MRI.createVirtualRegister(DstRC);
MachineInstr *Copy =
BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
@@ -4198,8 +4299,19 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
return;
// Try to eliminate the copy if it is copying an immediate value.
- if (Def->isMoveImmediate())
+ if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
FoldImmediate(*Copy, *Def, OpReg, &MRI);
+
+ bool ImpDef = Def->isImplicitDef();
+ while (!ImpDef && Def && Def->isCopy()) {
+ if (Def->getOperand(1).getReg().isPhysical())
+ break;
+ Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
+ ImpDef = Def && Def->isImplicitDef();
+ }
+ if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
+ !ImpDef)
+ Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
}
// Emit the actual waterfall loop, executing the wrapped instruction for each
@@ -4223,18 +4335,18 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
MachineBasicBlock::iterator I = LoopBB.begin();
- unsigned VRsrc = Rsrc.getReg();
+ Register VRsrc = Rsrc.getReg();
unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
- unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
- unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
- unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
- unsigned AndCond = MRI.createVirtualRegister(BoolXExecRC);
- unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+ Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
+ Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
+ Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
+ Register AndCond = MRI.createVirtualRegister(BoolXExecRC);
+ Register SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
// Beginning of the loop, read the next Rsrc variant.
BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
@@ -4302,7 +4414,7 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
- unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
+ Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
// Save the EXEC mask
BuildMI(MBB, I, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec);
@@ -4370,10 +4482,10 @@ extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) {
AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
// Create an empty resource descriptor
- unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+ Register Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat();
// Zero64 = 0
@@ -4430,7 +4542,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
if (!MI.getOperand(i).isReg() ||
- !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
+ !Register::isVirtualRegister(MI.getOperand(i).getReg()))
continue;
const TargetRegisterClass *OpRC =
MRI.getRegClass(MI.getOperand(i).getReg());
@@ -4447,8 +4559,16 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
if (!VRC) {
assert(SRC);
- VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) ? RI.getEquivalentAGPRClass(SRC)
- : RI.getEquivalentVGPRClass(SRC);
+ if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
+ VRC = &AMDGPU::VReg_1RegClass;
+ } else
+ VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
+ ? RI.getEquivalentAGPRClass(SRC)
+ : RI.getEquivalentVGPRClass(SRC);
+ } else {
+ VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
+ ? RI.getEquivalentAGPRClass(VRC)
+ : RI.getEquivalentVGPRClass(VRC);
}
RC = VRC;
} else {
@@ -4458,7 +4578,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// Update all the operands so they have the same type.
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
+ if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg()))
continue;
// MI is a PHI instruction.
@@ -4483,7 +4603,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// subregister index types e.g. sub0_sub1 + sub2 + sub3
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
+ if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg()))
continue;
const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
@@ -4502,8 +4622,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// Legalize INSERT_SUBREG
// src0 must have the same register class as dst
if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Src0 = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
if (DstRC != Src0RC) {
@@ -4577,13 +4697,13 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) {
// This is already an ADDR64 instruction so we need to add the pointer
// extracted from the resource descriptor to the current value of VAddr.
- unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
- unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
- unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
+ Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
+ Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
unsigned RsrcPtr, NewSRsrc;
std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
@@ -4623,7 +4743,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
unsigned RsrcPtr, NewSRsrc;
std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
- unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
@@ -4661,6 +4781,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
MIB.addImm(TFE->getImm());
}
+ MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz));
+
MIB.cloneMemRefs(MI);
Addr64 = MIB;
} else {
@@ -4933,8 +5055,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
unsigned NewDstReg = AMDGPU::NoRegister;
if (HasDst) {
- unsigned DstReg = Inst.getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ Register DstReg = Inst.getOperand(0).getReg();
+ if (Register::isPhysicalRegister(DstReg))
continue;
// Update the destination register class.
@@ -4943,7 +5065,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
continue;
if (Inst.isCopy() &&
- TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
+ Register::isVirtualRegister(Inst.getOperand(1).getReg()) &&
NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
// Instead of creating a copy where src and dst are the same register
// class, we just replace all uses of dst with src. These kinds of
@@ -4988,8 +5110,8 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- unsigned OldDstReg = Inst.getOperand(0).getReg();
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register OldDstReg = Inst.getOperand(0).getReg();
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned Opc = Inst.getOpcode();
assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
@@ -5022,8 +5144,8 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
MachineOperand &Dest = Inst.getOperand(0);
MachineOperand &Src = Inst.getOperand(1);
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned SubOp = ST.hasAddNoCarry() ?
AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
@@ -5052,7 +5174,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
MachineOperand &Src1 = Inst.getOperand(2);
if (ST.hasDLInsts()) {
- unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
@@ -5072,8 +5194,8 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
bool Src1IsSGPR = Src1.isReg() &&
RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
MachineInstr *Xor;
- unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
// Build a pair of scalar instructions and add them to the work list.
// The next iteration over the work list will lower these to the vector
@@ -5117,8 +5239,8 @@ void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
MachineOperand &Src0 = Inst.getOperand(1);
MachineOperand &Src1 = Inst.getOperand(2);
- unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm)
.add(Src0)
@@ -5146,8 +5268,8 @@ void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
MachineOperand &Src0 = Inst.getOperand(1);
MachineOperand &Src1 = Inst.getOperand(2);
- unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm)
.add(Src1);
@@ -5189,16 +5311,16 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
- unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
+ Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
- unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
+ Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
- unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
+ Register FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
@@ -5226,12 +5348,12 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
- unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned CarryReg = MRI.createVirtualRegister(CarryRC);
- unsigned DeadCarryReg = MRI.createVirtualRegister(CarryRC);
+ Register CarryReg = MRI.createVirtualRegister(CarryRC);
+ Register DeadCarryReg = MRI.createVirtualRegister(CarryRC);
MachineOperand &Dest = Inst.getOperand(0);
MachineOperand &Src0 = Inst.getOperand(1);
@@ -5327,17 +5449,17 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
- unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
+ Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
.add(SrcReg0Sub0)
.add(SrcReg1Sub0);
- unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
+ Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.add(SrcReg0Sub1)
.add(SrcReg1Sub1);
- unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
+ Register FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
@@ -5368,7 +5490,7 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
- unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
MachineOperand* Op0;
MachineOperand* Op1;
@@ -5384,7 +5506,7 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm)
.add(*Op0);
- unsigned NewDest = MRI.createVirtualRegister(DestRC);
+ Register NewDest = MRI.createVirtualRegister(DestRC);
MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest)
.addReg(Interm)
@@ -5411,8 +5533,8 @@ void SIInstrInfo::splitScalar64BitBCNT(
MRI.getRegClass(Src.getReg()) :
&AMDGPU::SGPR_32RegClass;
- unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
@@ -5451,9 +5573,9 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
Offset == 0 && "Not implemented");
if (BitWidth < 32) {
- unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
.addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
@@ -5476,8 +5598,8 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
}
MachineOperand &Src = Inst.getOperand(1);
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
.addImm(31)
@@ -5506,6 +5628,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
switch (UseMI.getOpcode()) {
case AMDGPU::COPY:
case AMDGPU::WQM:
+ case AMDGPU::SOFT_WQM:
case AMDGPU::WWM:
case AMDGPU::REG_SEQUENCE:
case AMDGPU::PHI:
@@ -5531,7 +5654,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
MachineRegisterInfo &MRI,
MachineInstr &Inst) const {
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MachineBasicBlock *MBB = Inst.getParent();
MachineOperand &Src0 = Inst.getOperand(1);
MachineOperand &Src1 = Inst.getOperand(2);
@@ -5539,8 +5662,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
switch (Inst.getOpcode()) {
case AMDGPU::S_PACK_LL_B32_B16: {
- unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
// FIXME: Can do a lot better if we know the high bits of src0 or src1 are
// 0.
@@ -5558,7 +5681,7 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
break;
}
case AMDGPU::S_PACK_LH_B32_B16: {
- unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
.addImm(0xffff);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
@@ -5568,8 +5691,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
break;
}
case AMDGPU::S_PACK_HH_B32_B16: {
- unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
.addImm(16)
.add(Src0);
@@ -5623,17 +5746,27 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::REG_SEQUENCE:
case AMDGPU::INSERT_SUBREG:
case AMDGPU::WQM:
+ case AMDGPU::SOFT_WQM:
case AMDGPU::WWM: {
const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1);
if (RI.hasAGPRs(SrcRC)) {
if (RI.hasAGPRs(NewDstRC))
return nullptr;
- NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
+ switch (Inst.getOpcode()) {
+ case AMDGPU::PHI:
+ case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::INSERT_SUBREG:
+ NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
+ break;
+ default:
+ NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+ }
+
if (!NewDstRC)
return nullptr;
} else {
- if (RI.hasVGPRs(NewDstRC))
+ if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
return nullptr;
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
@@ -5686,7 +5819,7 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
return MO.getReg();
// If this could be a VGPR or an SGPR, Check the dynamic register class.
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
if (RI.isSGPRClass(RegRC))
UsedSGPRs[i] = Reg;
@@ -5941,7 +6074,7 @@ void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
- unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register DstReg = MRI.createVirtualRegister(RI.getBoolRC());
MachineInstr *SIIF =
BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
.add(Branch->getOperand(0))
@@ -5968,8 +6101,8 @@ void SIInstrInfo::convertNonUniformLoopRegion(
if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
- unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC());
- unsigned BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register DstReg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC());
MachineInstrBuilder HeaderPHIBuilder =
BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
@@ -5979,7 +6112,7 @@ void SIInstrInfo::convertNonUniformLoopRegion(
HeaderPHIBuilder.addReg(BackEdgeReg);
} else {
MachineBasicBlock *PMBB = *PI;
- unsigned ZeroReg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC());
materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
ZeroReg, 0);
HeaderPHIBuilder.addReg(ZeroReg);
@@ -6063,13 +6196,30 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- unsigned UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC());
+ Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC());
MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
.addReg(UnusedCarry, RegState::Define | RegState::Dead);
}
+MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ Register DestReg,
+ RegScavenger &RS) const {
+ if (ST.hasAddNoCarry())
+ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg);
+
+ Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false);
+ // TODO: Users need to deal with this.
+ if (!UnusedCarry.isValid())
+ return MachineInstrBuilder();
+
+ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
+ .addReg(UnusedCarry, RegState::Define | RegState::Dead);
+}
+
bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
switch (Opcode) {
case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
@@ -6115,7 +6265,21 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
return false;
const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;
- return RCID == AMDGPU::SReg_128RegClassID;
+ return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
+}
+
+unsigned SIInstrInfo::getNumFlatOffsetBits(unsigned AddrSpace,
+ bool Signed) const {
+ if (!ST.hasFlatInstOffsets())
+ return 0;
+
+ if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS)
+ return 0;
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX10)
+ return Signed ? 12 : 11;
+
+ return Signed ? 13 : 12;
}
bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
@@ -6254,7 +6418,7 @@ static bool followSubRegDef(MachineInstr &MI,
MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
MachineRegisterInfo &MRI) {
assert(MRI.isSSA());
- if (!TargetRegisterInfo::isVirtualRegister(P.Reg))
+ if (!Register::isVirtualRegister(P.Reg))
return nullptr;
auto RSR = P;
@@ -6265,8 +6429,7 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
case AMDGPU::COPY:
case AMDGPU::V_MOV_B32_e32: {
auto &Op1 = MI->getOperand(1);
- if (Op1.isReg() &&
- TargetRegisterInfo::isVirtualRegister(Op1.getReg())) {
+ if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg())) {
if (Op1.isUndef())
return nullptr;
RSR = getRegSubRegPair(Op1);
@@ -6360,3 +6523,40 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
return true;
}
}
+
+MachineInstr *SIInstrInfo::createPHIDestinationCopy(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt,
+ const DebugLoc &DL, Register Src, Register Dst) const {
+ auto Cur = MBB.begin();
+ if (Cur != MBB.end())
+ do {
+ if (!Cur->isPHI() && Cur->readsRegister(Dst))
+ return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src);
+ ++Cur;
+ } while (Cur != MBB.end() && Cur != LastPHIIt);
+
+ return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src,
+ Dst);
+}
+
+MachineInstr *SIInstrInfo::createPHISourceCopy(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt,
+ const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const {
+ if (InsPt != MBB.end() &&
+ (InsPt->getOpcode() == AMDGPU::SI_IF ||
+ InsPt->getOpcode() == AMDGPU::SI_ELSE ||
+ InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
+ InsPt->definesRegister(Src)) {
+ InsPt++;
+ return BuildMI(MBB, InsPt, InsPt->getDebugLoc(),
+ get(ST.isWave32() ? AMDGPU::S_MOV_B32_term
+ : AMDGPU::S_MOV_B64_term),
+ Dst)
+ .addReg(Src, 0, SrcSubReg)
+ .addReg(AMDGPU::EXEC, RegState::Implicit);
+ }
+ return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg,
+ Dst);
+}
+
+bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); }
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 3ff35da0b963..be463442c888 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -173,7 +173,7 @@ public:
}
bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const override;
+ AAResults *AA) const override;
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
int64_t &Offset1,
@@ -229,6 +229,14 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
+ // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
+ // instructions. Returns a pair of generated instructions.
+ // Can split either post-RA with physical registers or pre-RA with
+ // virtual registers. In latter case IR needs to be in SSA form and
+ // and a REG_SEQUENCE is produced to define original register.
+ std::pair<MachineInstr*, MachineInstr*>
+ expandMovDPP64(MachineInstr &MI) const;
+
// Returns an opcode that can be used to move a value to a \p DstRC
// register. If there is no hardware instruction that can store to \p
// DstRC, then AMDGPU::COPY is returned.
@@ -242,7 +250,7 @@ public:
return commuteOpcode(MI.getOpcode());
}
- bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0,
@@ -303,8 +311,7 @@ public:
bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA = nullptr) const override;
+ const MachineInstr &MIb) const override;
bool isFoldableCopy(const MachineInstr &MI) const;
@@ -578,6 +585,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
}
+ static bool isDOT(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
+ }
+
+ bool isDOT(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
+ }
+
static bool isScalarUnit(const MachineInstr &MI) {
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
}
@@ -954,6 +969,19 @@ public:
bool isBasicBlockPrologue(const MachineInstr &MI) const override;
+ MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsPt,
+ const DebugLoc &DL, Register Src,
+ Register Dst) const override;
+
+ MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsPt,
+ const DebugLoc &DL, Register Src,
+ Register SrcSubReg,
+ Register Dst) const override;
+
+ bool isWave32() const;
+
/// Return a partially built integer add instruction without carry.
/// Caller must add source operands.
/// For pre-GFX9 it will generate unused carry destination operand.
@@ -963,6 +991,12 @@ public:
const DebugLoc &DL,
unsigned DestReg) const;
+ MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ Register DestReg,
+ RegScavenger &RS) const;
+
static bool isKillTerminator(unsigned Opcode);
const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
@@ -970,6 +1004,8 @@ public:
return isUInt<12>(Imm);
}
+ unsigned getNumFlatOffsetBits(unsigned AddrSpace, bool Signed) const;
+
/// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
/// encoded instruction. If \p Signed, this is for an instruction that
/// interprets the offset as signed.
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index c382c816e0b4..1eecbf555613 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -84,7 +84,7 @@ def SDTtbuffer_load : SDTypeProfile<1, 8,
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
SDTCisVT<6, i32>, // format(imm)
- SDTCisVT<7, i32>, // cachecontrol(imm)
+ SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
SDTCisVT<8, i1> // idxen(imm)
]>;
@@ -102,7 +102,7 @@ def SDTtbuffer_store : SDTypeProfile<0, 9,
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
SDTCisVT<6, i32>, // format(imm)
- SDTCisVT<7, i32>, // cachecontrol(imm)
+ SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
SDTCisVT<8, i1> // idxen(imm)
]>;
@@ -119,7 +119,7 @@ def SDTBufferLoad : SDTypeProfile<1, 7,
SDTCisVT<3, i32>, // voffset(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
- SDTCisVT<6, i32>, // cachepolicy(imm)
+ SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
SDTCisVT<7, i1>]>; // idxen(imm)
def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
@@ -145,7 +145,7 @@ def SDTBufferStore : SDTypeProfile<0, 8,
SDTCisVT<3, i32>, // voffset(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
- SDTCisVT<6, i32>, // cachepolicy(imm)
+ SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
SDTCisVT<7, i1>]>; // idxen(imm)
def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
@@ -198,6 +198,8 @@ def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
+def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
+def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
@@ -264,6 +266,11 @@ def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
>;
+def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
+ SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
+ [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]
+>;
+
//===----------------------------------------------------------------------===//
// ValueType helpers
//===----------------------------------------------------------------------===//
@@ -277,7 +284,9 @@ class isFloatType<ValueType SrcVT> {
!if(!eq(SrcVT.Value, f64.Value), 1,
!if(!eq(SrcVT.Value, v2f16.Value), 1,
!if(!eq(SrcVT.Value, v4f16.Value), 1,
- 0)))));
+ !if(!eq(SrcVT.Value, v2f32.Value), 1,
+ !if(!eq(SrcVT.Value, v2f64.Value), 1,
+ 0)))))));
}
class isIntType<ValueType SrcVT> {
@@ -300,14 +309,36 @@ class isPackedType<ValueType SrcVT> {
// PatFrags for global memory operations
//===----------------------------------------------------------------------===//
-defm atomic_inc_global : global_binary_atomic_op<SIatomic_inc>;
-defm atomic_dec_global : global_binary_atomic_op<SIatomic_dec>;
+foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
+let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
+
-def atomic_inc_local : local_binary_atomic_op<SIatomic_inc>;
-def atomic_dec_local : local_binary_atomic_op<SIatomic_dec>;
-def atomic_load_fadd_local : local_binary_atomic_op<atomic_load_fadd>;
-def atomic_load_fmin_local : local_binary_atomic_op<SIatomic_fmin>;
-def atomic_load_fmax_local : local_binary_atomic_op<SIatomic_fmax>;
+defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>;
+defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>;
+defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>;
+defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>;
+
+
+} // End let AddressSpaces = ...
+} // End foreach AddrSpace
+
+def atomic_fadd_global_noret : PatFrag<
+ (ops node:$ptr, node:$value),
+ (SIglobal_atomic_fadd node:$ptr, node:$value)> {
+ // FIXME: Move this
+ let MemoryVT = f32;
+ let IsAtomic = 1;
+ let AddressSpaces = StoreAddress_global.AddrSpaces;
+}
+
+def atomic_pk_fadd_global_noret : PatFrag<
+ (ops node:$ptr, node:$value),
+ (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> {
+ // FIXME: Move this
+ let MemoryVT = v2f16;
+ let IsAtomic = 1;
+ let AddressSpaces = StoreAddress_global.AddrSpaces;
+}
//===----------------------------------------------------------------------===//
// SDNodes PatFrags for loads/stores with a glue input.
@@ -328,10 +359,12 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
>;
def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
+ let IsLoad = 1;
let IsUnindexed = 1;
}
def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
+ let IsLoad = 1;
let IsNonExtLoad = 1;
}
@@ -347,14 +380,15 @@ def atomic_load_64_glue : PatFrag<(ops node:$ptr),
let MemoryVT = i64;
}
-def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
+def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
let IsLoad = 1;
let IsAnyExtLoad = 1;
}
-def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{
- return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
-}]>;
+def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
+ let IsLoad = 1;
+ let IsSignExtLoad = 1;
+}
def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
let IsLoad = 1;
@@ -391,25 +425,50 @@ def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
let MemoryVT = i16;
}
-def load_glue_align8 : Aligned8Bytes <
- (ops node:$ptr), (load_glue node:$ptr)
->;
-def load_glue_align16 : Aligned16Bytes <
- (ops node:$ptr), (load_glue node:$ptr)
->;
+let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
+def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
+ let IsNonExtLoad = 1;
+}
-def load_local_m0 : LoadFrag<load_glue>, LocalAddress;
-def sextloadi8_local_m0 : LoadFrag<sextloadi8_glue>, LocalAddress;
-def sextloadi16_local_m0 : LoadFrag<sextloadi16_glue>, LocalAddress;
-def extloadi8_local_m0 : LoadFrag<extloadi8_glue>, LocalAddress;
-def zextloadi8_local_m0 : LoadFrag<zextloadi8_glue>, LocalAddress;
-def extloadi16_local_m0 : LoadFrag<extloadi16_glue>, LocalAddress;
-def zextloadi16_local_m0 : LoadFrag<zextloadi16_glue>, LocalAddress;
-def load_align8_local_m0 : LoadFrag <load_glue_align8>, LocalAddress;
-def load_align16_local_m0 : LoadFrag <load_glue_align16>, LocalAddress;
-def atomic_load_32_local_m0 : LoadFrag<atomic_load_32_glue>, LocalAddress;
-def atomic_load_64_local_m0 : LoadFrag<atomic_load_64_glue>, LocalAddress;
+let MemoryVT = i8 in {
+def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
+def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
+def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
+}
+
+let MemoryVT = i16 in {
+def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
+def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
+def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
+}
+
+def load_align8_local_m0 : PatFrag<(ops node:$ptr),
+ (load_local_m0 node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+ let MinAlignment = 8;
+}
+def load_align16_local_m0 : PatFrag<(ops node:$ptr),
+ (load_local_m0 node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+ let MinAlignment = 16;
+}
+
+} // End IsLoad = 1
+
+let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
+def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_32_glue node:$ptr)> {
+ let MemoryVT = i32;
+}
+def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_64_glue node:$ptr)> {
+ let MemoryVT = i64;
+}
+
+} // End let AddressSpaces = LoadAddress_local.AddrSpaces
def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
@@ -420,50 +479,88 @@ def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
>;
-def atomic_store_glue : PatFrag<(ops node:$ptr, node:$val),
- (AMDGPUatomic_st_glue node:$ptr, node:$val)> {
-}
-
def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
- (AMDGPUst_glue node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
-}]>;
+ (AMDGPUst_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let IsUnindexed = 1;
+}
def store_glue : PatFrag<(ops node:$val, node:$ptr),
- (unindexedstore_glue node:$val, node:$ptr), [{
- return !cast<StoreSDNode>(N)->isTruncatingStore();
-}]>;
+ (unindexedstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+}
def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
- (unindexedstore_glue node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->isTruncatingStore();
-}]>;
+ (unindexedstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 1;
+}
def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
- (truncstore_glue node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+ (truncstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = i8;
+}
def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
- (truncstore_glue node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+ (truncstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = i16;
+}
-def store_glue_align8 : Aligned8Bytes <
- (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr)
->;
+let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
+def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
+ (store_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+}
-def store_glue_align16 : Aligned16Bytes <
- (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr)
->;
+def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
+ (unindexedstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = i8;
+}
+
+def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
+ (unindexedstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = i16;
+}
+}
+
+def store_align16_local_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (store_local_m0 node:$value, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+ let MinAlignment = 16;
+}
-def store_local_m0 : StoreFrag<store_glue>, LocalAddress;
-def truncstorei8_local_m0 : StoreFrag<truncstorei8_glue>, LocalAddress;
-def truncstorei16_local_m0 : StoreFrag<truncstorei16_glue>, LocalAddress;
-def atomic_store_local_m0 : StoreFrag<AMDGPUatomic_st_glue>, LocalAddress;
+def store_align8_local_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (store_local_m0 node:$value, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+ let MinAlignment = 8;
+}
+
+let AddressSpaces = StoreAddress_local.AddrSpaces in {
+
+def atomic_store_local_32_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i32;
+}
+def atomic_store_local_64_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i64;
+}
+} // End let AddressSpaces = StoreAddress_local.AddrSpaces
-def store_align8_local_m0 : StoreFrag<store_glue_align8>, LocalAddress;
-def store_align16_local_m0 : StoreFrag<store_glue_align16>, LocalAddress;
def si_setcc_uniform : PatFrag <
(ops node:$lhs, node:$rhs, node:$cond),
@@ -539,16 +636,27 @@ def lshl_rev : PatFrag <
(shl $src0, $src1)
>;
+def add_ctpop : PatFrag <
+ (ops node:$src0, node:$src1),
+ (add (ctpop $src0), $src1)
+>;
+
multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
- SDTypeProfile tc = SDTAtomic2> {
+ SDTypeProfile tc = SDTAtomic2,
+ bit IsInt = 1> {
def _glue : SDNode <
!if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
>;
- def _local_m0 : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
- def _region_m0 : region_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
+ let AddressSpaces = StoreAddress_local.AddrSpaces in {
+ defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
+ }
+
+ let AddressSpaces = StoreAddress_region.AddrSpaces in {
+ defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
+ }
}
defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
@@ -563,17 +671,9 @@ defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
-defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32>;
-defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32>;
-defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32>;
-
-def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
->;
-
-def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal<atomic_cmp_swap_glue>;
-def atomic_cmp_swap_region_m0 : AtomicCmpSwapRegion<atomic_cmp_swap_glue>;
-
+defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
+defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>;
+defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>;
def as_i1imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
@@ -591,6 +691,10 @@ def as_i32imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
}]>;
+def as_i32timm: SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
def as_i64imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
}]>;
@@ -627,9 +731,13 @@ def SIMM16bit : ImmLeaf <i32,
>;
def UIMM16bit : ImmLeaf <i32,
- [{return isUInt<16>(Imm); }]
+ [{return isUInt<16>(Imm);}]
>;
+def i64imm_32bit : ImmLeaf<i64, [{
+ return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
+}]>;
+
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
return isInlineImmediate(N);
}]>;
@@ -763,6 +871,18 @@ def ExpTgtMatchClass : AsmOperandClass {
let RenderMethod = "printExpTgt";
}
+def SWaitMatchClass : AsmOperandClass {
+ let Name = "SWaitCnt";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseSWaitCntOps";
+}
+
+def VReg32OrOffClass : AsmOperandClass {
+ let Name = "VReg32OrOff";
+ let ParserMethod = "parseVReg32OrOff";
+}
+
+let OperandType = "OPERAND_IMMEDIATE" in {
def SendMsgImm : Operand<i32> {
let PrintMethod = "printSendMsg";
let ParserMatchClass = SendMsgMatchClass;
@@ -778,22 +898,11 @@ def EndpgmImm : Operand<i16> {
let ParserMatchClass = EndpgmMatchClass;
}
-def SWaitMatchClass : AsmOperandClass {
- let Name = "SWaitCnt";
- let RenderMethod = "addImmOperands";
- let ParserMethod = "parseSWaitCntOps";
-}
-
-def VReg32OrOffClass : AsmOperandClass {
- let Name = "VReg32OrOff";
- let ParserMethod = "parseVReg32OrOff";
-}
-
def WAIT_FLAG : Operand <i32> {
let ParserMatchClass = SWaitMatchClass;
let PrintMethod = "printWaitFlag";
- let OperandType = "OPERAND_IMMEDIATE";
}
+} // End OperandType = "OPERAND_IMMEDIATE"
include "SIInstrFormats.td"
include "VIInstrFormats.td"
@@ -929,6 +1038,7 @@ def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
+def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
@@ -1317,18 +1427,6 @@ class getVALUDstForVT<ValueType VT> {
VOPDstS64orS32)))); // else VT == i1
}
-// Returns true if VT is floating point.
-class getIsFP<ValueType VT> {
- bit ret = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, v2f16.Value), 1,
- !if(!eq(VT.Value, v4f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- !if(!eq(VT.Value, v2f32.Value), 1,
- !if(!eq(VT.Value, f64.Value), 1,
- !if(!eq(VT.Value, v2f64.Value), 1,
- 0)))))));
-}
-
// Returns the register class to use for the destination of VOP[12C]
// instructions with SDWA extension
class getSDWADstForVT<ValueType VT> {
@@ -1340,7 +1438,7 @@ class getSDWADstForVT<ValueType VT> {
// Returns the register class to use for source 0 of VOP[12C]
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
- bit isFP = getIsFP<VT>.ret;
+ bit isFP = isFloatType<VT>.ret;
RegisterOperand ret =
!if(isFP,
@@ -1373,11 +1471,14 @@ class getVOPSrc0ForVT<ValueType VT> {
// Returns the vreg register class to use for source operand given VT
class getVregSrcForVT<ValueType VT> {
RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
- !if(!eq(VT.Size, 64), VReg_64, VGPR_32));
+ !if(!eq(VT.Size, 96), VReg_96,
+ !if(!eq(VT.Size, 64), VReg_64,
+ !if(!eq(VT.Size, 48), VReg_64,
+ VGPR_32))));
}
class getSDWASrcForVT <ValueType VT> {
- bit isFP = getIsFP<VT>.ret;
+ bit isFP = isFloatType<VT>.ret;
RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
RegisterOperand ret = !if(isFP, retFlt, retInt);
@@ -1386,7 +1487,7 @@ class getSDWASrcForVT <ValueType VT> {
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
class getVOP3SrcForVT<ValueType VT> {
- bit isFP = getIsFP<VT>.ret;
+ bit isFP = isFloatType<VT>.ret;
RegisterOperand ret =
!if(!eq(VT.Size, 128),
VSrc_128,
@@ -1433,7 +1534,7 @@ class isModifierType<ValueType SrcVT> {
// Return type of input modifiers operand for specified input operand
class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
- bit isFP = getIsFP<VT>.ret;
+ bit isFP = isFloatType<VT>.ret;
bit isPacked = isPackedType<VT>.ret;
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
@@ -1452,7 +1553,7 @@ class getOpSelMod <ValueType VT> {
// Return type of input modifiers operand specified input operand for DPP
class getSrcModExt <ValueType VT> {
- bit isFP = getIsFP<VT>.ret;
+ bit isFP = isFloatType<VT>.ret;
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}
@@ -2038,6 +2139,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field int NeedPatGen = PatGenMode.NoPattern;
field bit IsMAI = 0;
+ field bit IsDOT = 0;
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 70f20bb69370..21984c6ad910 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
- [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan),
- (i32 imm:$attr)))]
+ [(set f32:$vdst, (int_amdgcn_interp_p1 f32:$vsrc,
+ (i32 timm:$attrchan), (i32 timm:$attr), M0))]
>;
let OtherPredicates = [has32BankLDS] in {
@@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
- [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan),
- (i32 imm:$attr)))]>;
+ [(set f32:$vdst, (int_amdgcn_interp_p2 f32:$src0, f32:$vsrc,
+ (i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
} // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
@@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
- [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan),
- (i32 imm:$attr)))]>;
+ [(set f32:$vdst, (int_amdgcn_interp_mov (i32 imm:$vsrc),
+ (i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
} // End Uses = [M0, EXEC]
@@ -92,6 +92,11 @@ def ATOMIC_FENCE : SPseudoInstSI<
let maybeAtomic = 1;
}
+def VOP_I64_I64_DPP : VOPProfile <[i64, i64, untyped, untyped]> {
+ let HasExt = 1;
+ let HasExtDPP = 1;
+}
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
// For use in patterns
@@ -107,10 +112,19 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)>;
+// 64-bit vector move with dpp. Expanded post-RA.
+def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64_DPP> {
+ let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete.
+}
+
// Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy after the
// WQM pass processes it.
def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
+// Pseudoinstruction for @llvm.amdgcn.softwqm. Like @llvm.amdgcn.wqm it is
+// turned into a copy by WQM pass, but does not seed WQM requirements.
+def SOFT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
+
// Pseudoinstruction for @llvm.amdgcn.wwm. It is turned into a copy post-RA, so
// that the @earlyclobber is respected. The @earlyclobber is to make sure that
// the instruction that defines $src0 (which is run in WWM) doesn't
@@ -345,13 +359,15 @@ def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> {
}
def SI_INIT_EXEC : SPseudoInstSI <
- (outs), (ins i64imm:$src), []> {
+ (outs), (ins i64imm:$src),
+ [(int_amdgcn_init_exec (i64 timm:$src))]> {
let Defs = [EXEC];
let usesCustomInserter = 1;
let isAsCheapAsAMove = 1;
let WaveSizePredicate = isWave64;
}
+// FIXME: Intrinsic should be mangled for wave size.
def SI_INIT_EXEC_LO : SPseudoInstSI <
(outs), (ins i32imm:$src), []> {
let Defs = [EXEC_LO];
@@ -360,12 +376,20 @@ def SI_INIT_EXEC_LO : SPseudoInstSI <
let WaveSizePredicate = isWave32;
}
+// FIXME: Wave32 version
def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
- (outs), (ins SSrc_b32:$input, i32imm:$shift), []> {
+ (outs), (ins SSrc_b32:$input, i32imm:$shift),
+ [(int_amdgcn_init_exec_from_input i32:$input, (i32 timm:$shift))]> {
let Defs = [EXEC];
let usesCustomInserter = 1;
}
+def : GCNPat <
+ (int_amdgcn_init_exec timm:$src),
+ (SI_INIT_EXEC_LO (as_i32imm imm:$src))> {
+ let WaveSizePredicate = isWave32;
+}
+
// Return for returning shaders to a shader variant epilog.
def SI_RETURN_TO_EPILOG : SPseudoInstSI <
(outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
@@ -604,25 +628,6 @@ def : GCNPat <
(SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0))
>;
-def : GCNPat <
- (AMDGPUinit_exec i64:$src),
- (SI_INIT_EXEC (as_i64imm $src))
-> {
- let WaveSizePredicate = isWave64;
-}
-
-def : GCNPat <
- (AMDGPUinit_exec i64:$src),
- (SI_INIT_EXEC_LO (as_i32imm $src))
-> {
- let WaveSizePredicate = isWave32;
-}
-
-def : GCNPat <
- (AMDGPUinit_exec_from_input i32:$input, i32:$shift),
- (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift))
->;
-
def : GCNPat<
(AMDGPUtrap timm:$trapid),
(S_TRAP $trapid)
@@ -740,22 +745,22 @@ def : GCNPat <
def : GCNPat <
(i32 (fp_to_sint f16:$src)),
- (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 $src))
+ (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src))
>;
def : GCNPat <
(i32 (fp_to_uint f16:$src)),
- (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 $src))
+ (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src))
>;
def : GCNPat <
(f16 (sint_to_fp i32:$src)),
- (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 $src))
+ (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 VSrc_b32:$src))
>;
def : GCNPat <
(f16 (uint_to_fp i32:$src)),
- (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 $src))
+ (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 VSrc_b32:$src))
>;
//===----------------------------------------------------------------------===//
@@ -808,8 +813,14 @@ def : GCNPat <
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
}
+
def : GCNPat <
- (i16 (add (i16 (trunc (getDivergentFrag<ctpop>.ret i32:$popcnt))), i16:$val)),
+ (i32 (ctpop i32:$popcnt)),
+ (V_BCNT_U32_B32_e64 VSrc_b32:$popcnt, (i32 0))
+>;
+
+def : GCNPat <
+ (i16 (add (i16 (trunc (i32 (getDivergentFrag<ctpop>.ret i32:$popcnt)))), i16:$val)),
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
@@ -1076,53 +1087,158 @@ def : GCNPat <
/********** ================================ **********/
// Prevent expanding both fneg and fabs.
+// TODO: Add IgnoredBySelectionDAG bit?
+let AddedComplexity = 1 in { // Prefer SALU to VALU patterns for DAG
def : GCNPat <
- (fneg (fabs f32:$src)),
- (S_OR_B32 $src, (S_MOV_B32(i32 0x80000000))) // Set sign bit
+ (fneg (fabs (f32 SReg_32:$src))),
+ (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) // Set sign bit
>;
-// FIXME: Should use S_OR_B32
def : GCNPat <
- (fneg (fabs f64:$src)),
- (REG_SEQUENCE VReg_64,
- (i32 (EXTRACT_SUBREG f64:$src, sub0)),
- sub0,
- (V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
- (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
- sub1)
+ (fabs (f32 SReg_32:$src)),
+ (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fffffff)))
+>;
+
+def : GCNPat <
+ (fneg (f32 SReg_32:$src)),
+ (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000)))
+>;
+
+def : GCNPat <
+ (fneg (f16 SReg_32:$src)),
+ (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000)))
+>;
+
+def : GCNPat <
+ (fneg (f16 VGPR_32:$src)),
+ (V_XOR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fabs (f16 SReg_32:$src)),
+ (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00007fff)))
+>;
+
+def : GCNPat <
+ (fneg (fabs (f16 SReg_32:$src))),
+ (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
+>;
+
+def : GCNPat <
+ (fneg (fabs (f16 VGPR_32:$src))),
+ (V_OR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
+>;
+
+def : GCNPat <
+ (fneg (v2f16 SReg_32:$src)),
+ (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000)))
+>;
+
+def : GCNPat <
+ (fabs (v2f16 SReg_32:$src)),
+ (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fff7fff)))
+>;
+
+// This is really (fneg (fabs v2f16:$src))
+//
+// fabs is not reported as free because there is modifier for it in
+// VOP3P instructions, so it is turned into the bit op.
+def : GCNPat <
+ (fneg (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))),
+ (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
>;
def : GCNPat <
- (fabs f32:$src),
- (S_AND_B32 $src, (S_MOV_B32 (i32 0x7fffffff)))
+ (fneg (v2f16 (fabs SReg_32:$src))),
+ (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
+>;
+
+// FIXME: The implicit-def of scc from S_[X]OR_B32 is mishandled
+ // def : GCNPat <
+// (fneg (f64 SReg_64:$src)),
+// (REG_SEQUENCE SReg_64,
+// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+// sub0,
+// (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+// (i32 (S_MOV_B32 (i32 0x80000000)))),
+// sub1)
+// >;
+
+// def : GCNPat <
+// (fneg (fabs (f64 SReg_64:$src))),
+// (REG_SEQUENCE SReg_64,
+// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+// sub0,
+// (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+// (S_MOV_B32 (i32 0x80000000))), // Set sign bit.
+// sub1)
+// >;
+
+} // End let AddedComplexity = 1
+
+def : GCNPat <
+ (fabs (f32 VGPR_32:$src)),
+ (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fneg (f32 VGPR_32:$src)),
+ (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fabs (f16 VGPR_32:$src)),
+ (V_AND_B32_e32 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
>;
def : GCNPat <
- (fneg f32:$src),
- (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000)))
+ (fneg (v2f16 VGPR_32:$src)),
+ (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
>;
def : GCNPat <
- (fabs f64:$src),
+ (fabs (v2f16 VGPR_32:$src)),
+ (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fneg (v2f16 (fabs VGPR_32:$src))),
+ (V_OR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) // Set sign bit
+>;
+
+def : GCNPat <
+ (fabs (f64 VReg_64:$src)),
(REG_SEQUENCE VReg_64,
- (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_AND_B32_e64 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
+ (V_AND_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
(V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit.
sub1)
>;
+// TODO: Use SGPR for constant
def : GCNPat <
- (fneg f64:$src),
+ (fneg (f64 VReg_64:$src)),
(REG_SEQUENCE VReg_64,
- (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
+ (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
(i32 (V_MOV_B32_e32 (i32 0x80000000)))),
sub1)
>;
+// TODO: Use SGPR for constant
+def : GCNPat <
+ (fneg (fabs (f64 VReg_64:$src))),
+ (REG_SEQUENCE VReg_64,
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
+ sub0,
+ (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
+ (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
+ sub1)
+>;
+
def : GCNPat <
(fcopysign f16:$src0, f16:$src1),
(V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
@@ -1154,45 +1270,6 @@ def : GCNPat <
(V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1)))
>;
-def : GCNPat <
- (fneg f16:$src),
- (S_XOR_B32 $src, (S_MOV_B32 (i32 0x00008000)))
->;
-
-def : GCNPat <
- (fabs f16:$src),
- (S_AND_B32 $src, (S_MOV_B32 (i32 0x00007fff)))
->;
-
-def : GCNPat <
- (fneg (fabs f16:$src)),
- (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
->;
-
-def : GCNPat <
- (fneg v2f16:$src),
- (S_XOR_B32 $src, (S_MOV_B32 (i32 0x80008000)))
->;
-
-def : GCNPat <
- (fabs v2f16:$src),
- (S_AND_B32 $src, (S_MOV_B32 (i32 0x7fff7fff)))
->;
-
-// This is really (fneg (fabs v2f16:$src))
-//
-// fabs is not reported as free because there is modifier for it in
-// VOP3P instructions, so it is turned into the bit op.
-def : GCNPat <
- (fneg (v2f16 (bitconvert (and_oneuse i32:$src, 0x7fff7fff)))),
- (S_OR_B32 $src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
->;
-
-def : GCNPat <
- (fneg (v2f16 (fabs v2f16:$src))),
- (S_OR_B32 $src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
->;
-
/********** ================== **********/
/********** Immediate Patterns **********/
/********** ================== **********/
@@ -1544,7 +1621,7 @@ def : GCNPat <
(V_CVT_F16_F32_e32 (
V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
- $src))
+ SSrc_i1:$src))
>;
def : GCNPat <
@@ -1552,35 +1629,35 @@ def : GCNPat <
(V_CVT_F16_F32_e32 (
V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
- $src))
+ SSrc_i1:$src))
>;
def : GCNPat <
(f32 (sint_to_fp i1:$src)),
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
- $src)
+ SSrc_i1:$src)
>;
def : GCNPat <
(f32 (uint_to_fp i1:$src)),
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
- $src)
+ SSrc_i1:$src)
>;
def : GCNPat <
(f64 (sint_to_fp i1:$src)),
(V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 -1),
- $src))
+ SSrc_i1:$src))
>;
def : GCNPat <
(f64 (uint_to_fp i1:$src)),
(V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 1),
- $src))
+ SSrc_i1:$src))
>;
//===----------------------------------------------------------------------===//
@@ -1788,6 +1865,22 @@ def : GCNPat <
(INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0)
>;
+def : GCNPat <
+ (i64 (int_amdgcn_mov_dpp i64:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
+ timm:$bound_ctrl)),
+ (V_MOV_B64_DPP_PSEUDO $src, $src, (as_i32imm $dpp_ctrl),
+ (as_i32imm $row_mask), (as_i32imm $bank_mask),
+ (as_i1imm $bound_ctrl))
+>;
+
+def : GCNPat <
+ (i64 (int_amdgcn_update_dpp i64:$old, i64:$src, timm:$dpp_ctrl, timm:$row_mask,
+ timm:$bank_mask, timm:$bound_ctrl)),
+ (V_MOV_B64_DPP_PSEUDO $old, $src, (as_i32imm $dpp_ctrl),
+ (as_i32imm $row_mask), (as_i32imm $bank_mask),
+ (as_i1imm $bound_ctrl))
+>;
+
//===----------------------------------------------------------------------===//
// Fract Patterns
//===----------------------------------------------------------------------===//
@@ -1915,3 +2008,13 @@ def : FP16Med3Pat<f16, V_MED3_F16>;
defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
} // End Predicates = [isGFX9Plus]
+
+class AMDGPUGenericInstruction : GenericInstruction {
+ let Namespace = "AMDGPU";
+}
+
+def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = 0;
+}
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index ae8b967893a2..20db1c37f354 100644
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -42,10 +42,7 @@
//
// Future improvements:
//
-// - This currently relies on the scheduler to place loads and stores next to
-// each other, and then only merges adjacent pairs of instructions. It would
-// be good to be more flexible with interleaved instructions, and possibly run
-// before scheduling. It currently missing stores of constants because loading
+// - This is currently missing stores of constants because loading
// the constant into the data register is placed between the stores, although
// this is arguably a scheduling problem.
//
@@ -98,14 +95,9 @@ enum InstClassEnum {
DS_READ,
DS_WRITE,
S_BUFFER_LOAD_IMM,
- BUFFER_LOAD_OFFEN = AMDGPU::BUFFER_LOAD_DWORD_OFFEN,
- BUFFER_LOAD_OFFSET = AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
- BUFFER_STORE_OFFEN = AMDGPU::BUFFER_STORE_DWORD_OFFEN,
- BUFFER_STORE_OFFSET = AMDGPU::BUFFER_STORE_DWORD_OFFSET,
- BUFFER_LOAD_OFFEN_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact,
- BUFFER_LOAD_OFFSET_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact,
- BUFFER_STORE_OFFEN_exact = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact,
- BUFFER_STORE_OFFSET_exact = AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact,
+ BUFFER_LOAD,
+ BUFFER_STORE,
+ MIMG,
};
enum RegisterEnum {
@@ -114,6 +106,7 @@ enum RegisterEnum {
SOFFSET = 0x4,
VADDR = 0x8,
ADDR = 0x10,
+ SSAMP = 0x20,
};
class SILoadStoreOptimizer : public MachineFunctionPass {
@@ -126,6 +119,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
unsigned Width0;
unsigned Width1;
unsigned BaseOff;
+ unsigned DMask0;
+ unsigned DMask1;
InstClassEnum InstClass;
bool GLC0;
bool GLC1;
@@ -135,6 +130,60 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
bool DLC1;
bool UseST64;
SmallVector<MachineInstr *, 8> InstsToMove;
+ int AddrIdx[5];
+ const MachineOperand *AddrReg[5];
+ unsigned NumAddresses;
+
+ bool hasSameBaseAddress(const MachineInstr &MI) {
+ for (unsigned i = 0; i < NumAddresses; i++) {
+ const MachineOperand &AddrRegNext = MI.getOperand(AddrIdx[i]);
+
+ if (AddrReg[i]->isImm() || AddrRegNext.isImm()) {
+ if (AddrReg[i]->isImm() != AddrRegNext.isImm() ||
+ AddrReg[i]->getImm() != AddrRegNext.getImm()) {
+ return false;
+ }
+ continue;
+ }
+
+ // Check same base pointer. Be careful of subregisters, which can occur
+ // with vectors of pointers.
+ if (AddrReg[i]->getReg() != AddrRegNext.getReg() ||
+ AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool hasMergeableAddress(const MachineRegisterInfo &MRI) {
+ for (unsigned i = 0; i < NumAddresses; ++i) {
+ const MachineOperand *AddrOp = AddrReg[i];
+ // Immediates are always OK.
+ if (AddrOp->isImm())
+ continue;
+
+ // Don't try to merge addresses that aren't either immediates or registers.
+ // TODO: Should be possible to merge FrameIndexes and maybe some other
+ // non-register
+ if (!AddrOp->isReg())
+ return false;
+
+ // TODO: We should be able to merge physical reg addreses.
+ if (Register::isPhysicalRegister(AddrOp->getReg()))
+ return false;
+
+ // If an address has only one use then there will be on other
+ // instructions with the same address, so we can't merge this one.
+ if (MRI.hasOneNonDBGUse(AddrOp->getReg()))
+ return false;
+ }
+ return true;
+ }
+
+ void setMI(MachineBasicBlock::iterator MI, const SIInstrInfo &TII,
+ const GCNSubtarget &STM);
+ void setPaired(MachineBasicBlock::iterator MI, const SIInstrInfo &TII);
};
struct BaseRegisters {
@@ -160,14 +209,12 @@ private:
AliasAnalysis *AA = nullptr;
bool OptimizeAgain;
+ static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII);
static bool offsetsCanBeCombined(CombineInfo &CI);
static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI);
static unsigned getNewOpcode(const CombineInfo &CI);
static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI);
const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI);
- unsigned getOpcodeWidth(const MachineInstr &MI);
- InstClassEnum getInstClass(unsigned Opc);
- unsigned getRegs(unsigned Opc);
bool findMatchingInst(CombineInfo &CI);
@@ -178,22 +225,27 @@ private:
unsigned write2Opcode(unsigned EltSize) const;
unsigned write2ST64Opcode(unsigned EltSize) const;
MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI);
+ MachineBasicBlock::iterator mergeImagePair(CombineInfo &CI);
MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI);
MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI);
MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI);
void updateBaseAndOffset(MachineInstr &I, unsigned NewBase,
- int32_t NewOffset);
- unsigned computeBase(MachineInstr &MI, const MemAddress &Addr);
- MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI);
- Optional<int32_t> extractConstOffset(const MachineOperand &Op);
- void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr);
+ int32_t NewOffset) const;
+ unsigned computeBase(MachineInstr &MI, const MemAddress &Addr) const;
+ MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI) const;
+ Optional<int32_t> extractConstOffset(const MachineOperand &Op) const;
+ void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr) const;
/// Promotes constant offset to the immediate by adjusting the base. It
/// tries to use a base from the nearby instructions that allows it to have
/// a 13bit constant offset which gets promoted to the immediate.
bool promoteConstantOffsetToImm(MachineInstr &CI,
MemInfoMap &Visited,
- SmallPtrSet<MachineInstr *, 4> &Promoted);
+ SmallPtrSet<MachineInstr *, 4> &Promoted) const;
+ void addInstToMergeableList(const CombineInfo &CI,
+ std::list<std::list<CombineInfo> > &MergeableInsts) const;
+ bool collectMergeableInsts(MachineBasicBlock &MBB,
+ std::list<std::list<CombineInfo> > &MergeableInsts) const;
public:
static char ID;
@@ -202,7 +254,11 @@ public:
initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
}
- bool optimizeBlock(MachineBasicBlock &MBB);
+ void removeCombinedInst(std::list<CombineInfo> &MergeList,
+ const MachineInstr &MI);
+ bool optimizeInstsWithSameBaseAddr(std::list<CombineInfo> &MergeList,
+ bool &OptimizeListAgain);
+ bool optimizeBlock(std::list<std::list<CombineInfo> > &MergeableInsts);
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -216,6 +272,264 @@ public:
}
};
+static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
+ const unsigned Opc = MI.getOpcode();
+
+ if (TII.isMUBUF(Opc)) {
+ // FIXME: Handle d16 correctly
+ return AMDGPU::getMUBUFElements(Opc);
+ }
+ if (TII.isMIMG(MI)) {
+ uint64_t DMaskImm =
+ TII.getNamedOperand(MI, AMDGPU::OpName::dmask)->getImm();
+ return countPopulation(DMaskImm);
+ }
+
+ switch (Opc) {
+ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ return 1;
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ return 2;
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ return 4;
+ default:
+ return 0;
+ }
+}
+
+/// Maps instruction opcode to enum InstClassEnum.
+static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
+ switch (Opc) {
+ default:
+ if (TII.isMUBUF(Opc)) {
+ switch (AMDGPU::getMUBUFBaseOpcode(Opc)) {
+ default:
+ return UNKNOWN;
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact:
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact:
+ return BUFFER_LOAD;
+ case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
+ case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
+ case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
+ case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
+ return BUFFER_STORE;
+ }
+ }
+ if (TII.isMIMG(Opc)) {
+ // Ignore instructions encoded without vaddr.
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1)
+ return UNKNOWN;
+ // TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD.
+ if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() || TII.isGather4(Opc))
+ return UNKNOWN;
+ return MIMG;
+ }
+ return UNKNOWN;
+ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ return S_BUFFER_LOAD_IMM;
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_B32_gfx9:
+ case AMDGPU::DS_READ_B64:
+ case AMDGPU::DS_READ_B64_gfx9:
+ return DS_READ;
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B32_gfx9:
+ case AMDGPU::DS_WRITE_B64:
+ case AMDGPU::DS_WRITE_B64_gfx9:
+ return DS_WRITE;
+ }
+}
+
+/// Determines instruction subclass from opcode. Only instructions
+/// of the same subclass can be merged together.
+static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
+ switch (Opc) {
+ default:
+ if (TII.isMUBUF(Opc))
+ return AMDGPU::getMUBUFBaseOpcode(Opc);
+ if (TII.isMIMG(Opc)) {
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
+ assert(Info);
+ return Info->BaseOpcode;
+ }
+ return -1;
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_B32_gfx9:
+ case AMDGPU::DS_READ_B64:
+ case AMDGPU::DS_READ_B64_gfx9:
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B32_gfx9:
+ case AMDGPU::DS_WRITE_B64:
+ case AMDGPU::DS_WRITE_B64_gfx9:
+ return Opc;
+ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ return AMDGPU::S_BUFFER_LOAD_DWORD_IMM;
+ }
+}
+
+static unsigned getRegs(unsigned Opc, const SIInstrInfo &TII) {
+ if (TII.isMUBUF(Opc)) {
+ unsigned result = 0;
+
+ if (AMDGPU::getMUBUFHasVAddr(Opc)) {
+ result |= VADDR;
+ }
+
+ if (AMDGPU::getMUBUFHasSrsrc(Opc)) {
+ result |= SRSRC;
+ }
+
+ if (AMDGPU::getMUBUFHasSoffset(Opc)) {
+ result |= SOFFSET;
+ }
+
+ return result;
+ }
+
+ if (TII.isMIMG(Opc)) {
+ unsigned result = VADDR | SRSRC;
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
+ if (Info && AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler)
+ result |= SSAMP;
+ return result;
+ }
+
+ switch (Opc) {
+ default:
+ return 0;
+ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ return SBASE;
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_B64:
+ case AMDGPU::DS_READ_B32_gfx9:
+ case AMDGPU::DS_READ_B64_gfx9:
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B64:
+ case AMDGPU::DS_WRITE_B32_gfx9:
+ case AMDGPU::DS_WRITE_B64_gfx9:
+ return ADDR;
+ }
+}
+
+
+void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
+ const SIInstrInfo &TII,
+ const GCNSubtarget &STM) {
+ I = MI;
+ unsigned Opc = MI->getOpcode();
+ InstClass = getInstClass(Opc, TII);
+
+ if (InstClass == UNKNOWN)
+ return;
+
+ switch (InstClass) {
+ case DS_READ:
+ EltSize =
+ (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8
+ : 4;
+ break;
+ case DS_WRITE:
+ EltSize =
+ (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8
+ : 4;
+ break;
+ case S_BUFFER_LOAD_IMM:
+ EltSize = AMDGPU::getSMRDEncodedOffset(STM, 4);
+ break;
+ default:
+ EltSize = 4;
+ break;
+ }
+
+ if (InstClass == MIMG) {
+ DMask0 = TII.getNamedOperand(*I, AMDGPU::OpName::dmask)->getImm();
+ } else {
+ int OffsetIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset);
+ Offset0 = I->getOperand(OffsetIdx).getImm();
+ }
+
+ Width0 = getOpcodeWidth(*I, TII);
+
+ if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
+ Offset0 &= 0xffff;
+ } else if (InstClass != MIMG) {
+ GLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm();
+ if (InstClass != S_BUFFER_LOAD_IMM) {
+ SLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm();
+ }
+ DLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::dlc)->getImm();
+ }
+
+ unsigned AddrOpName[5] = {0};
+ NumAddresses = 0;
+ const unsigned Regs = getRegs(I->getOpcode(), TII);
+
+ if (Regs & ADDR) {
+ AddrOpName[NumAddresses++] = AMDGPU::OpName::addr;
+ }
+
+ if (Regs & SBASE) {
+ AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase;
+ }
+
+ if (Regs & SRSRC) {
+ AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
+ }
+
+ if (Regs & SOFFSET) {
+ AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
+ }
+
+ if (Regs & VADDR) {
+ AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
+ }
+
+ if (Regs & SSAMP) {
+ AddrOpName[NumAddresses++] = AMDGPU::OpName::ssamp;
+ }
+
+ for (unsigned i = 0; i < NumAddresses; i++) {
+ AddrIdx[i] = AMDGPU::getNamedOperandIdx(I->getOpcode(), AddrOpName[i]);
+ AddrReg[i] = &I->getOperand(AddrIdx[i]);
+ }
+
+ InstsToMove.clear();
+}
+
+void SILoadStoreOptimizer::CombineInfo::setPaired(MachineBasicBlock::iterator MI,
+ const SIInstrInfo &TII) {
+ Paired = MI;
+ assert(InstClass == getInstClass(Paired->getOpcode(), TII));
+
+ if (InstClass == MIMG) {
+ DMask1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::dmask)->getImm();
+ } else {
+ int OffsetIdx =
+ AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset);
+ Offset1 = Paired->getOperand(OffsetIdx).getImm();
+ }
+
+ Width1 = getOpcodeWidth(*Paired, TII);
+ if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
+ Offset1 &= 0xffff;
+ } else if (InstClass != MIMG) {
+ GLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::glc)->getImm();
+ if (InstClass != S_BUFFER_LOAD_IMM) {
+ SLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::slc)->getImm();
+ }
+ DLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::dlc)->getImm();
+ }
+}
+
+
} // end anonymous namespace.
INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
@@ -249,8 +563,7 @@ static void addDefsUsesToList(const MachineInstr &MI,
if (Op.isReg()) {
if (Op.isDef())
RegDefs.insert(Op.getReg());
- else if (Op.readsReg() &&
- TargetRegisterInfo::isPhysicalRegister(Op.getReg()))
+ else if (Op.readsReg() && Register::isPhysicalRegister(Op.getReg()))
PhysRegUses.insert(Op.getReg());
}
}
@@ -282,7 +595,7 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
if (Use.isReg() &&
((Use.readsReg() && RegDefs.count(Use.getReg())) ||
(Use.isDef() && RegDefs.count(Use.getReg())) ||
- (Use.isDef() && TargetRegisterInfo::isPhysicalRegister(Use.getReg()) &&
+ (Use.isDef() && Register::isPhysicalRegister(Use.getReg()) &&
PhysRegUses.count(Use.getReg())))) {
Insts.push_back(&MI);
addDefsUsesToList(MI, RegDefs, PhysRegUses);
@@ -307,7 +620,59 @@ static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
return true;
}
+// This function assumes that \p A and \p B have are identical except for
+// size and offset, and they referecne adjacent memory.
+static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
+ const MachineMemOperand *A,
+ const MachineMemOperand *B) {
+ unsigned MinOffset = std::min(A->getOffset(), B->getOffset());
+ unsigned Size = A->getSize() + B->getSize();
+ // This function adds the offset parameter to the existing offset for A,
+ // so we pass 0 here as the offset and then manually set it to the correct
+ // value after the call.
+ MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size);
+ MMO->setOffset(MinOffset);
+ return MMO;
+}
+
+bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII) {
+ assert(CI.InstClass == MIMG);
+
+ // Ignore instructions with tfe/lwe set.
+ const auto *TFEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::tfe);
+ const auto *LWEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::lwe);
+
+ if ((TFEOp && TFEOp->getImm()) || (LWEOp && LWEOp->getImm()))
+ return false;
+
+ // Check other optional immediate operands for equality.
+ unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc,
+ AMDGPU::OpName::d16, AMDGPU::OpName::unorm,
+ AMDGPU::OpName::da, AMDGPU::OpName::r128};
+
+ for (auto op : OperandsToMatch) {
+ int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);
+ if (AMDGPU::getNamedOperandIdx(CI.Paired->getOpcode(), op) != Idx)
+ return false;
+ if (Idx != -1 &&
+ CI.I->getOperand(Idx).getImm() != CI.Paired->getOperand(Idx).getImm())
+ return false;
+ }
+
+ // Check DMask for overlaps.
+ unsigned MaxMask = std::max(CI.DMask0, CI.DMask1);
+ unsigned MinMask = std::min(CI.DMask0, CI.DMask1);
+
+ unsigned AllowedBitsForMin = llvm::countTrailingZeros(MaxMask);
+ if ((1u << AllowedBitsForMin) <= MinMask)
+ return false;
+
+ return true;
+}
+
bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
+ assert(CI.InstClass != MIMG);
+
// XXX - Would the same offset be OK? Is there any reason this would happen or
// be useful?
if (CI.Offset0 == CI.Offset1)
@@ -384,164 +749,24 @@ bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
}
}
-unsigned SILoadStoreOptimizer::getOpcodeWidth(const MachineInstr &MI) {
- const unsigned Opc = MI.getOpcode();
-
- if (TII->isMUBUF(MI)) {
- return AMDGPU::getMUBUFDwords(Opc);
- }
-
- switch (Opc) {
- default:
- return 0;
- case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
- return 1;
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
- return 2;
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
- return 4;
- }
-}
-
-InstClassEnum SILoadStoreOptimizer::getInstClass(unsigned Opc) {
- if (TII->isMUBUF(Opc)) {
- const int baseOpcode = AMDGPU::getMUBUFBaseOpcode(Opc);
-
- // If we couldn't identify the opcode, bail out.
- if (baseOpcode == -1) {
- return UNKNOWN;
- }
-
- switch (baseOpcode) {
- default:
- return UNKNOWN;
- case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
- return BUFFER_LOAD_OFFEN;
- case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
- return BUFFER_LOAD_OFFSET;
- case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
- return BUFFER_STORE_OFFEN;
- case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
- return BUFFER_STORE_OFFSET;
- case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact:
- return BUFFER_LOAD_OFFEN_exact;
- case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact:
- return BUFFER_LOAD_OFFSET_exact;
- case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
- return BUFFER_STORE_OFFEN_exact;
- case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
- return BUFFER_STORE_OFFSET_exact;
- }
- }
-
- switch (Opc) {
- default:
- return UNKNOWN;
- case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
- return S_BUFFER_LOAD_IMM;
- case AMDGPU::DS_READ_B32:
- case AMDGPU::DS_READ_B64:
- case AMDGPU::DS_READ_B32_gfx9:
- case AMDGPU::DS_READ_B64_gfx9:
- return DS_READ;
- case AMDGPU::DS_WRITE_B32:
- case AMDGPU::DS_WRITE_B64:
- case AMDGPU::DS_WRITE_B32_gfx9:
- case AMDGPU::DS_WRITE_B64_gfx9:
- return DS_WRITE;
- }
-}
-
-unsigned SILoadStoreOptimizer::getRegs(unsigned Opc) {
- if (TII->isMUBUF(Opc)) {
- unsigned result = 0;
-
- if (AMDGPU::getMUBUFHasVAddr(Opc)) {
- result |= VADDR;
- }
-
- if (AMDGPU::getMUBUFHasSrsrc(Opc)) {
- result |= SRSRC;
- }
-
- if (AMDGPU::getMUBUFHasSoffset(Opc)) {
- result |= SOFFSET;
- }
-
- return result;
- }
-
- switch (Opc) {
- default:
- return 0;
- case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
- return SBASE;
- case AMDGPU::DS_READ_B32:
- case AMDGPU::DS_READ_B64:
- case AMDGPU::DS_READ_B32_gfx9:
- case AMDGPU::DS_READ_B64_gfx9:
- case AMDGPU::DS_WRITE_B32:
- case AMDGPU::DS_WRITE_B64:
- case AMDGPU::DS_WRITE_B32_gfx9:
- case AMDGPU::DS_WRITE_B64_gfx9:
- return ADDR;
- }
-}
-
bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
MachineBasicBlock::iterator E = MBB->end();
MachineBasicBlock::iterator MBBI = CI.I;
const unsigned Opc = CI.I->getOpcode();
- const InstClassEnum InstClass = getInstClass(Opc);
+ const InstClassEnum InstClass = getInstClass(Opc, *TII);
if (InstClass == UNKNOWN) {
return false;
}
+ const unsigned InstSubclass = getInstSubclass(Opc, *TII);
- const unsigned Regs = getRegs(Opc);
-
- unsigned AddrOpName[5] = {0};
- int AddrIdx[5];
- const MachineOperand *AddrReg[5];
- unsigned NumAddresses = 0;
-
- if (Regs & ADDR) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::addr;
- }
-
- if (Regs & SBASE) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase;
- }
-
- if (Regs & SRSRC) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
- }
-
- if (Regs & SOFFSET) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
- }
-
- if (Regs & VADDR) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
- }
-
- for (unsigned i = 0; i < NumAddresses; i++) {
- AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]);
- AddrReg[i] = &CI.I->getOperand(AddrIdx[i]);
-
- // We only ever merge operations with the same base address register, so
- // don't bother scanning forward if there are no other uses.
- if (AddrReg[i]->isReg() &&
- (TargetRegisterInfo::isPhysicalRegister(AddrReg[i]->getReg()) ||
- MRI->hasOneNonDBGUse(AddrReg[i]->getReg())))
- return false;
- }
+ // Do not merge VMEM buffer instructions with "swizzled" bit set.
+ int Swizzled =
+ AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::swz);
+ if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm())
+ return false;
++MBBI;
@@ -550,11 +775,10 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);
for (; MBBI != E; ++MBBI) {
- const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE);
- if ((getInstClass(MBBI->getOpcode()) != InstClass) ||
- (IsDS && (MBBI->getOpcode() != Opc))) {
- // This is not a matching DS instruction, but we can keep looking as
+ if ((getInstClass(MBBI->getOpcode(), *TII) != InstClass) ||
+ (getInstSubclass(MBBI->getOpcode(), *TII) != InstSubclass)) {
+ // This is not a matching instruction, but we can keep looking as
// long as one of these conditions are met:
// 1. It is safe to move I down past MBBI.
// 2. It is safe to move MBBI down past the instruction that I will
@@ -599,58 +823,23 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
CI.InstsToMove))
continue;
- bool Match = true;
- for (unsigned i = 0; i < NumAddresses; i++) {
- const MachineOperand &AddrRegNext = MBBI->getOperand(AddrIdx[i]);
-
- if (AddrReg[i]->isImm() || AddrRegNext.isImm()) {
- if (AddrReg[i]->isImm() != AddrRegNext.isImm() ||
- AddrReg[i]->getImm() != AddrRegNext.getImm()) {
- Match = false;
- break;
- }
- continue;
- }
-
- // Check same base pointer. Be careful of subregisters, which can occur
- // with vectors of pointers.
- if (AddrReg[i]->getReg() != AddrRegNext.getReg() ||
- AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) {
- Match = false;
- break;
- }
- }
+ bool Match = CI.hasSameBaseAddress(*MBBI);
if (Match) {
- int OffsetIdx =
- AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset);
- CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm();
- CI.Width0 = getOpcodeWidth(*CI.I);
- CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm();
- CI.Width1 = getOpcodeWidth(*MBBI);
- CI.Paired = MBBI;
-
- if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) {
- CI.Offset0 &= 0xffff;
- CI.Offset1 &= 0xffff;
- } else {
- CI.GLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::glc)->getImm();
- CI.GLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::glc)->getImm();
- if (CI.InstClass != S_BUFFER_LOAD_IMM) {
- CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm();
- CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm();
- }
- CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm();
- CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm();
- }
+ CI.setPaired(MBBI, *TII);
+
+ // Check both offsets (or masks for MIMG) can be combined and fit in the
+ // reduced range.
+ bool canBeCombined =
+ CI.InstClass == MIMG
+ ? dmasksCanBeCombined(CI, *TII)
+ : widthsFit(*STM, CI) && offsetsCanBeCombined(CI);
- // Check both offsets fit in the reduced range.
// We also need to go through the list of instructions that we plan to
// move and make sure they are all safe to move down past the merged
// instruction.
- if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
- if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
- return true;
+ if (canBeCombined && canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
+ return true;
}
// We've found a load/store that we couldn't merge for some reason.
@@ -711,15 +900,15 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
const TargetRegisterClass *SuperRC =
(CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
- unsigned DestReg = MRI->createVirtualRegister(SuperRC);
+ Register DestReg = MRI->createVirtualRegister(SuperRC);
DebugLoc DL = CI.I->getDebugLoc();
- unsigned BaseReg = AddrReg->getReg();
+ Register BaseReg = AddrReg->getReg();
unsigned BaseSubReg = AddrReg->getSubReg();
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
- unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
.addImm(CI.BaseOff);
@@ -755,12 +944,11 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
moveInstsAfter(Copy1, CI.InstsToMove);
- MachineBasicBlock::iterator Next = std::next(CI.I);
CI.I->eraseFromParent();
CI.Paired->eraseFromParent();
LLVM_DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
- return Next;
+ return Read2;
}
unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const {
@@ -809,11 +997,11 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
const MCInstrDesc &Write2Desc = TII->get(Opc);
DebugLoc DL = CI.I->getDebugLoc();
- unsigned BaseReg = AddrReg->getReg();
+ Register BaseReg = AddrReg->getReg();
unsigned BaseSubReg = AddrReg->getSubReg();
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
- unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
.addImm(CI.BaseOff);
@@ -839,12 +1027,65 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
moveInstsAfter(Write2, CI.InstsToMove);
- MachineBasicBlock::iterator Next = std::next(CI.I);
CI.I->eraseFromParent();
CI.Paired->eraseFromParent();
LLVM_DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
- return Next;
+ return Write2;
+}
+
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI) {
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+ const unsigned Opcode = getNewOpcode(CI);
+
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
+
+ Register DestReg = MRI->createVirtualRegister(SuperRC);
+ unsigned MergedDMask = CI.DMask0 | CI.DMask1;
+ unsigned DMaskIdx =
+ AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::dmask);
+
+ auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg);
+ for (unsigned I = 1, E = (*CI.I).getNumOperands(); I != E; ++I) {
+ if (I == DMaskIdx)
+ MIB.addImm(MergedDMask);
+ else
+ MIB.add((*CI.I).getOperand(I));
+ }
+
+ // It shouldn't be possible to get this far if the two instructions
+ // don't have a single memoperand, because MachineInstr::mayAlias()
+ // will return true if this is the case.
+ assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
+
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
+
+ MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
+
+ // Copy to the old destination registers.
+ const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+ const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
+ const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata);
+
+ BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ .add(*Dest0) // Copy to same destination including flags and sub reg.
+ .addReg(DestReg, 0, SubRegIdx0);
+ MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
+
+ moveInstsAfter(Copy1, CI.InstsToMove);
+
+ CI.I->eraseFromParent();
+ CI.Paired->eraseFromParent();
+ return New;
}
MachineBasicBlock::iterator
@@ -855,15 +1096,24 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
- unsigned DestReg = MRI->createVirtualRegister(SuperRC);
+ Register DestReg = MRI->createVirtualRegister(SuperRC);
unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
- BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg)
- .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
- .addImm(MergedOffset) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.DLC0) // dlc
- .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
+ // It shouldn't be possible to get this far if the two instructions
+ // don't have a single memoperand, because MachineInstr::mayAlias()
+ // will return true if this is the case.
+ assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
+
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
+
+ MachineInstr *New =
+ BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg)
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
+ .addImm(MergedOffset) // offset
+ .addImm(CI.GLC0) // glc
+ .addImm(CI.DLC0) // dlc
+ .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -883,10 +1133,9 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
moveInstsAfter(Copy1, CI.InstsToMove);
- MachineBasicBlock::iterator Next = std::next(CI.I);
CI.I->eraseFromParent();
CI.Paired->eraseFromParent();
- return Next;
+ return New;
}
MachineBasicBlock::iterator
@@ -899,24 +1148,34 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
// Copy to the new source register.
- unsigned DestReg = MRI->createVirtualRegister(SuperRC);
+ Register DestReg = MRI->createVirtualRegister(SuperRC);
unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg);
- const unsigned Regs = getRegs(Opcode);
+ const unsigned Regs = getRegs(Opcode, *TII);
if (Regs & VADDR)
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
- MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
- .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
- .addImm(MergedOffset) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.SLC0) // slc
- .addImm(0) // tfe
- .addImm(CI.DLC0) // dlc
- .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
+ // It shouldn't be possible to get this far if the two instructions
+ // don't have a single memoperand, because MachineInstr::mayAlias()
+ // will return true if this is the case.
+ assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
+
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
+
+ MachineInstr *New =
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
+ .addImm(MergedOffset) // offset
+ .addImm(CI.GLC0) // glc
+ .addImm(CI.SLC0) // slc
+ .addImm(0) // tfe
+ .addImm(CI.DLC0) // dlc
+ .addImm(0) // swz
+ .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -936,10 +1195,9 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
moveInstsAfter(Copy1, CI.InstsToMove);
- MachineBasicBlock::iterator Next = std::next(CI.I);
CI.I->eraseFromParent();
CI.Paired->eraseFromParent();
- return Next;
+ return New;
}
unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
@@ -947,7 +1205,10 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
switch (CI.InstClass) {
default:
- return AMDGPU::getMUBUFOpcode(CI.InstClass, Width);
+ assert(CI.InstClass == BUFFER_LOAD || CI.InstClass == BUFFER_STORE);
+ // FIXME: Handle d16 correctly
+ return AMDGPU::getMUBUFOpcode(AMDGPU::getMUBUFBaseOpcode(CI.I->getOpcode()),
+ Width);
case UNKNOWN:
llvm_unreachable("Unknown instruction class");
case S_BUFFER_LOAD_IMM:
@@ -959,76 +1220,47 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
case 4:
return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
}
+ case MIMG:
+ assert("No overlaps" && (countPopulation(CI.DMask0 | CI.DMask1) == Width));
+ return AMDGPU::getMaskedMIMGOp(CI.I->getOpcode(), Width);
}
}
std::pair<unsigned, unsigned>
SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI) {
- if (CI.Offset0 > CI.Offset1) {
- switch (CI.Width0) {
- default:
- return std::make_pair(0, 0);
- case 1:
- switch (CI.Width1) {
- default:
- return std::make_pair(0, 0);
- case 1:
- return std::make_pair(AMDGPU::sub1, AMDGPU::sub0);
- case 2:
- return std::make_pair(AMDGPU::sub2, AMDGPU::sub0_sub1);
- case 3:
- return std::make_pair(AMDGPU::sub3, AMDGPU::sub0_sub1_sub2);
- }
- case 2:
- switch (CI.Width1) {
- default:
- return std::make_pair(0, 0);
- case 1:
- return std::make_pair(AMDGPU::sub1_sub2, AMDGPU::sub0);
- case 2:
- return std::make_pair(AMDGPU::sub2_sub3, AMDGPU::sub0_sub1);
- }
- case 3:
- switch (CI.Width1) {
- default:
- return std::make_pair(0, 0);
- case 1:
- return std::make_pair(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0);
- }
- }
+
+ if (CI.Width0 == 0 || CI.Width0 == 0 || CI.Width0 + CI.Width1 > 4)
+ return std::make_pair(0, 0);
+
+ bool ReverseOrder;
+ if (CI.InstClass == MIMG) {
+ assert((countPopulation(CI.DMask0 | CI.DMask1) == CI.Width0 + CI.Width1) &&
+ "No overlaps");
+ ReverseOrder = CI.DMask0 > CI.DMask1;
+ } else
+ ReverseOrder = CI.Offset0 > CI.Offset1;
+
+ static const unsigned Idxs[4][4] = {
+ {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3},
+ {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0},
+ {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0},
+ {AMDGPU::sub3, 0, 0, 0},
+ };
+ unsigned Idx0;
+ unsigned Idx1;
+
+ assert(CI.Width0 >= 1 && CI.Width0 <= 3);
+ assert(CI.Width1 >= 1 && CI.Width1 <= 3);
+
+ if (ReverseOrder) {
+ Idx1 = Idxs[0][CI.Width1 - 1];
+ Idx0 = Idxs[CI.Width1][CI.Width0 - 1];
} else {
- switch (CI.Width0) {
- default:
- return std::make_pair(0, 0);
- case 1:
- switch (CI.Width1) {
- default:
- return std::make_pair(0, 0);
- case 1:
- return std::make_pair(AMDGPU::sub0, AMDGPU::sub1);
- case 2:
- return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2);
- case 3:
- return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2_sub3);
- }
- case 2:
- switch (CI.Width1) {
- default:
- return std::make_pair(0, 0);
- case 1:
- return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2);
- case 2:
- return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2_sub3);
- }
- case 3:
- switch (CI.Width1) {
- default:
- return std::make_pair(0, 0);
- case 1:
- return std::make_pair(AMDGPU::sub0_sub1_sub2, AMDGPU::sub3);
- }
- }
+ Idx0 = Idxs[0][CI.Width0 - 1];
+ Idx1 = Idxs[CI.Width0][CI.Width1 - 1];
}
+
+ return std::make_pair(Idx0, Idx1);
}
const TargetRegisterClass *
@@ -1040,7 +1272,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI) {
case 2:
return &AMDGPU::SReg_64_XEXECRegClass;
case 4:
- return &AMDGPU::SReg_128RegClass;
+ return &AMDGPU::SGPR_128RegClass;
case 8:
return &AMDGPU::SReg_256RegClass;
case 16:
@@ -1073,7 +1305,7 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
// Copy to the new source register.
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
- unsigned SrcReg = MRI->createVirtualRegister(SuperRC);
+ Register SrcReg = MRI->createVirtualRegister(SuperRC);
const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
const auto *Src1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata);
@@ -1087,35 +1319,45 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
- const unsigned Regs = getRegs(Opcode);
+ const unsigned Regs = getRegs(Opcode, *TII);
if (Regs & VADDR)
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
- MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
- .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
- .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.SLC0) // slc
- .addImm(0) // tfe
- .addImm(CI.DLC0) // dlc
- .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
+
+ // It shouldn't be possible to get this far if the two instructions
+ // don't have a single memoperand, because MachineInstr::mayAlias()
+ // will return true if this is the case.
+ assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
+
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
+
+ MachineInstr *New =
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
+ .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
+ .addImm(CI.GLC0) // glc
+ .addImm(CI.SLC0) // slc
+ .addImm(0) // tfe
+ .addImm(CI.DLC0) // dlc
+ .addImm(0) // swz
+ .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
moveInstsAfter(MIB, CI.InstsToMove);
- MachineBasicBlock::iterator Next = std::next(CI.I);
CI.I->eraseFromParent();
CI.Paired->eraseFromParent();
- return Next;
+ return New;
}
MachineOperand
-SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) {
+SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) const {
APInt V(32, Val, true);
if (TII->isInlineConstant(V))
return MachineOperand::CreateImm(Val);
- unsigned Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
MachineInstr *Mov =
BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
TII->get(AMDGPU::S_MOV_B32), Reg)
@@ -1127,7 +1369,7 @@ SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) {
// Compute base address using Addr and return the final register.
unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
- const MemAddress &Addr) {
+ const MemAddress &Addr) const {
MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::iterator MBBI = MI.getIterator();
DebugLoc DL = MI.getDebugLoc();
@@ -1146,11 +1388,11 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
- unsigned CarryReg = MRI->createVirtualRegister(CarryRC);
- unsigned DeadCarryReg = MRI->createVirtualRegister(CarryRC);
+ Register CarryReg = MRI->createVirtualRegister(CarryRC);
+ Register DeadCarryReg = MRI->createVirtualRegister(CarryRC);
- unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MachineInstr *LoHalf =
BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
.addReg(CarryReg, RegState::Define)
@@ -1170,7 +1412,7 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
(void)HiHalf;
LLVM_DEBUG(dbgs() << " "; HiHalf->dump(););
- unsigned FullDestReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register FullDestReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass);
MachineInstr *FullBase =
BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
@@ -1186,13 +1428,13 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
// Update base and offset with the NewBase and NewOffset in MI.
void SILoadStoreOptimizer::updateBaseAndOffset(MachineInstr &MI,
unsigned NewBase,
- int32_t NewOffset) {
+ int32_t NewOffset) const {
TII->getNamedOperand(MI, AMDGPU::OpName::vaddr)->setReg(NewBase);
TII->getNamedOperand(MI, AMDGPU::OpName::offset)->setImm(NewOffset);
}
Optional<int32_t>
-SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) {
+SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) const {
if (Op.isImm())
return Op.getImm();
@@ -1218,7 +1460,7 @@ SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) {
// %Base:vreg_64 =
// REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1
void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base,
- MemAddress &Addr) {
+ MemAddress &Addr) const {
if (!Base.isReg())
return;
@@ -1273,15 +1515,16 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base
bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
MachineInstr &MI,
MemInfoMap &Visited,
- SmallPtrSet<MachineInstr *, 4> &AnchorList) {
+ SmallPtrSet<MachineInstr *, 4> &AnchorList) const {
+
+ if (!(MI.mayLoad() ^ MI.mayStore()))
+ return false;
// TODO: Support flat and scratch.
- if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0 ||
- TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL)
+ if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0)
return false;
- // TODO: Support Store.
- if (!MI.mayLoad())
+ if (MI.mayLoad() && TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL)
return false;
if (AnchorList.count(&MI))
@@ -1418,100 +1661,166 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
return false;
}
-// Scan through looking for adjacent LDS operations with constant offsets from
-// the same base register. We rely on the scheduler to do the hard work of
-// clustering nearby loads, and assume these are all adjacent.
-bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
- bool Modified = false;
+void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI,
+ std::list<std::list<CombineInfo> > &MergeableInsts) const {
+ for (std::list<CombineInfo> &AddrList : MergeableInsts) {
+ if (AddrList.front().hasSameBaseAddress(*CI.I) &&
+ AddrList.front().InstClass == CI.InstClass) {
+ AddrList.emplace_back(CI);
+ return;
+ }
+ }
+
+ // Base address not found, so add a new list.
+ MergeableInsts.emplace_back(1, CI);
+}
+bool SILoadStoreOptimizer::collectMergeableInsts(MachineBasicBlock &MBB,
+ std::list<std::list<CombineInfo> > &MergeableInsts) const {
+ bool Modified = false;
// Contain the list
MemInfoMap Visited;
// Contains the list of instructions for which constant offsets are being
// promoted to the IMM.
SmallPtrSet<MachineInstr *, 4> AnchorList;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
- MachineInstr &MI = *I;
-
+ // Sort potential mergeable instructions into lists. One list per base address.
+ for (MachineInstr &MI : MBB.instrs()) {
+ // We run this before checking if an address is mergeable, because it can produce
+ // better code even if the instructions aren't mergeable.
if (promoteConstantOffsetToImm(MI, Visited, AnchorList))
Modified = true;
+ const InstClassEnum InstClass = getInstClass(MI.getOpcode(), *TII);
+ if (InstClass == UNKNOWN)
+ continue;
+
// Don't combine if volatile.
- if (MI.hasOrderedMemoryRef()) {
- ++I;
+ if (MI.hasOrderedMemoryRef())
+ continue;
+
+ CombineInfo CI;
+ CI.setMI(MI, *TII, *STM);
+
+ if (!CI.hasMergeableAddress(*MRI))
+ continue;
+
+ addInstToMergeableList(CI, MergeableInsts);
+ }
+ return Modified;
+}
+
+// Scan through looking for adjacent LDS operations with constant offsets from
+// the same base register. We rely on the scheduler to do the hard work of
+// clustering nearby loads, and assume these are all adjacent.
+bool SILoadStoreOptimizer::optimizeBlock(
+ std::list<std::list<CombineInfo> > &MergeableInsts) {
+ bool Modified = false;
+
+ for (std::list<CombineInfo> &MergeList : MergeableInsts) {
+ if (MergeList.size() < 2)
+ continue;
+
+ bool OptimizeListAgain = false;
+ if (!optimizeInstsWithSameBaseAddr(MergeList, OptimizeListAgain)) {
+ // We weren't able to make any changes, so clear the list so we don't
+ // process the same instructions the next time we try to optimize this
+ // block.
+ MergeList.clear();
continue;
}
- const unsigned Opc = MI.getOpcode();
+ // We made changes, but also determined that there were no more optimization
+ // opportunities, so we don't need to reprocess the list
+ if (!OptimizeListAgain)
+ MergeList.clear();
- CombineInfo CI;
- CI.I = I;
- CI.InstClass = getInstClass(Opc);
+ OptimizeAgain |= OptimizeListAgain;
+ Modified = true;
+ }
+ return Modified;
+}
+
+void
+SILoadStoreOptimizer::removeCombinedInst(std::list<CombineInfo> &MergeList,
+ const MachineInstr &MI) {
+
+ for (auto CI = MergeList.begin(), E = MergeList.end(); CI != E; ++CI) {
+ if (&*CI->I == &MI) {
+ MergeList.erase(CI);
+ return;
+ }
+ }
+}
+
+bool
+SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
+ std::list<CombineInfo> &MergeList,
+ bool &OptimizeListAgain) {
+ bool Modified = false;
+ for (auto I = MergeList.begin(); I != MergeList.end(); ++I) {
+ CombineInfo &CI = *I;
switch (CI.InstClass) {
default:
break;
case DS_READ:
- CI.EltSize =
- (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8
- : 4;
if (findMatchingInst(CI)) {
Modified = true;
- I = mergeRead2Pair(CI);
- } else {
- ++I;
+ removeCombinedInst(MergeList, *CI.Paired);
+ MachineBasicBlock::iterator NewMI = mergeRead2Pair(CI);
+ CI.setMI(NewMI, *TII, *STM);
}
- continue;
+ break;
case DS_WRITE:
- CI.EltSize =
- (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8
- : 4;
if (findMatchingInst(CI)) {
Modified = true;
- I = mergeWrite2Pair(CI);
- } else {
- ++I;
+ removeCombinedInst(MergeList, *CI.Paired);
+ MachineBasicBlock::iterator NewMI = mergeWrite2Pair(CI);
+ CI.setMI(NewMI, *TII, *STM);
}
- continue;
+ break;
case S_BUFFER_LOAD_IMM:
- CI.EltSize = AMDGPU::getSMRDEncodedOffset(*STM, 4);
if (findMatchingInst(CI)) {
Modified = true;
- I = mergeSBufferLoadImmPair(CI);
- OptimizeAgain |= (CI.Width0 + CI.Width1) < 16;
- } else {
- ++I;
+ removeCombinedInst(MergeList, *CI.Paired);
+ MachineBasicBlock::iterator NewMI = mergeSBufferLoadImmPair(CI);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 16;
}
- continue;
- case BUFFER_LOAD_OFFEN:
- case BUFFER_LOAD_OFFSET:
- case BUFFER_LOAD_OFFEN_exact:
- case BUFFER_LOAD_OFFSET_exact:
- CI.EltSize = 4;
+ break;
+ case BUFFER_LOAD:
if (findMatchingInst(CI)) {
Modified = true;
- I = mergeBufferLoadPair(CI);
- OptimizeAgain |= (CI.Width0 + CI.Width1) < 4;
- } else {
- ++I;
+ removeCombinedInst(MergeList, *CI.Paired);
+ MachineBasicBlock::iterator NewMI = mergeBufferLoadPair(CI);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
}
- continue;
- case BUFFER_STORE_OFFEN:
- case BUFFER_STORE_OFFSET:
- case BUFFER_STORE_OFFEN_exact:
- case BUFFER_STORE_OFFSET_exact:
- CI.EltSize = 4;
+ break;
+ case BUFFER_STORE:
if (findMatchingInst(CI)) {
Modified = true;
- I = mergeBufferStorePair(CI);
- OptimizeAgain |= (CI.Width0 + CI.Width1) < 4;
- } else {
- ++I;
+ removeCombinedInst(MergeList, *CI.Paired);
+ MachineBasicBlock::iterator NewMI = mergeBufferStorePair(CI);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
}
- continue;
+ break;
+ case MIMG:
+ if (findMatchingInst(CI)) {
+ Modified = true;
+ removeCombinedInst(MergeList, *CI.Paired);
+ MachineBasicBlock::iterator NewMI = mergeImagePair(CI);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
+ }
+ break;
}
-
- ++I;
+ // Clear the InstsToMove after we have finished searching so we don't have
+ // stale values left over if we search for this CI again in another pass
+ // over the block.
+ CI.InstsToMove.clear();
}
return Modified;
@@ -1537,10 +1846,14 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
bool Modified = false;
+
for (MachineBasicBlock &MBB : MF) {
+ std::list<std::list<CombineInfo> > MergeableInsts;
+ // First pass: Collect list of all instructions we know how to merge.
+ Modified |= collectMergeableInsts(MBB, MergeableInsts);
do {
OptimizeAgain = false;
- Modified |= optimizeBlock(MBB);
+ Modified |= optimizeBlock(MergeableInsts);
} while (OptimizeAgain);
}
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 78f409cd9555..6f9abd3a8d9b 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -98,6 +98,8 @@ private:
void emitLoop(MachineInstr &MI);
void emitEndCf(MachineInstr &MI);
+ Register getSaveExec(MachineInstr* MI);
+
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
SmallVectorImpl<MachineOperand> &Src) const;
@@ -144,7 +146,7 @@ char &llvm::SILowerControlFlowID = SILowerControlFlow::ID;
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
const SIInstrInfo *TII) {
- unsigned SaveExecReg = MI.getOperand(0).getReg();
+ Register SaveExecReg = MI.getOperand(0).getReg();
auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
if (U == MRI->use_instr_nodbg_end() ||
@@ -175,17 +177,31 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
return true;
}
+Register SILowerControlFlow::getSaveExec(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineOperand &SaveExec = MI->getOperand(0);
+ assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister);
+
+ Register SaveExecReg = SaveExec.getReg();
+ unsigned FalseTermOpc =
+ TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
+ MachineBasicBlock::iterator I = (MI);
+ MachineBasicBlock::iterator J = std::next(I);
+ if (J != MBB->end() && J->getOpcode() == FalseTermOpc &&
+ J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) {
+ SaveExecReg = J->getOperand(0).getReg();
+ J->eraseFromParent();
+ }
+ return SaveExecReg;
+}
+
void SILowerControlFlow::emitIf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
-
- MachineOperand &SaveExec = MI.getOperand(0);
- MachineOperand &Cond = MI.getOperand(1);
- assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
- Cond.getSubReg() == AMDGPU::NoSubRegister);
-
- Register SaveExecReg = SaveExec.getReg();
+ Register SaveExecReg = getSaveExec(&MI);
+ MachineOperand& Cond = MI.getOperand(1);
+ assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
MachineOperand &ImpDefSCC = MI.getOperand(4);
assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
@@ -204,7 +220,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
.addReg(Exec)
.addReg(Exec, RegState::ImplicitDefine);
- unsigned Tmp = MRI->createVirtualRegister(BoolRC);
+ Register Tmp = MRI->createVirtualRegister(BoolRC);
MachineInstr *And =
BuildMI(MBB, I, DL, TII->get(AndOpc), Tmp)
@@ -266,8 +282,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
- Register DstReg = MI.getOperand(0).getReg();
- assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);
+ Register DstReg = getSaveExec(&MI);
bool ExecModified = MI.getOperand(3).getImm() != 0;
MachineBasicBlock::iterator Start = MBB.begin();
@@ -339,7 +354,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
- auto Dst = MI.getOperand(0).getReg();
+ auto Dst = getSaveExec(&MI);
// Skip ANDing with exec if the break condition is already masked by exec
// because it is a V_CMP in the same basic block. (We know the break
@@ -400,13 +415,17 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ unsigned CFMask = MI.getOperand(0).getReg();
+ MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);
const DebugLoc &DL = MI.getDebugLoc();
- MachineBasicBlock::iterator InsPt = MBB.begin();
- MachineInstr *NewMI =
- BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
- .addReg(Exec)
- .add(MI.getOperand(0));
+ MachineBasicBlock::iterator InsPt =
+ Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def))
+ : MBB.begin();
+ MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
+ .addReg(Exec)
+ .add(MI.getOperand(0));
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
@@ -422,7 +441,7 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
SmallVectorImpl<MachineOperand> &Src) const {
MachineOperand &Op = MI.getOperand(OpNo);
- if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
+ if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg())) {
Src.push_back(Op);
return;
}
@@ -442,8 +461,7 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
for (const auto &SrcOp : Def->explicit_operands())
if (SrcOp.isReg() && SrcOp.isUse() &&
- (TargetRegisterInfo::isVirtualRegister(SrcOp.getReg()) ||
- SrcOp.getReg() == Exec))
+ (Register::isVirtualRegister(SrcOp.getReg()) || SrcOp.getReg() == Exec))
Src.push_back(SrcOp);
}
@@ -466,7 +484,7 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
else return;
- unsigned Reg = MI.getOperand(OpToReplace).getReg();
+ Register Reg = MI.getOperand(OpToReplace).getReg();
MI.RemoveOperand(OpToReplace);
MI.addOperand(Ops[UniqueOpndIdx]);
if (MRI->use_empty(Reg))
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 1c0f836f07e6..b45412536356 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -96,7 +96,7 @@ private:
getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
bool isVreg1(unsigned Reg) const {
- return TargetRegisterInfo::isVirtualRegister(Reg) &&
+ return Register::isVirtualRegister(Reg) &&
MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
}
@@ -489,6 +489,15 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
return true;
}
+#ifndef NDEBUG
+static bool isVRegCompatibleReg(const SIRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI,
+ Register Reg) {
+ unsigned Size = TRI.getRegSizeInBits(Reg, MRI);
+ return Size == 1 || Size == 32;
+}
+#endif
+
void SILowerI1Copies::lowerCopiesFromI1() {
SmallVector<MachineInstr *, 4> DeadCopies;
@@ -497,8 +506,8 @@ void SILowerI1Copies::lowerCopiesFromI1() {
if (MI.getOpcode() != AMDGPU::COPY)
continue;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
if (!isVreg1(SrcReg))
continue;
@@ -509,7 +518,7 @@ void SILowerI1Copies::lowerCopiesFromI1() {
LLVM_DEBUG(dbgs() << "Lower copy from i1: " << MI);
DebugLoc DL = MI.getDebugLoc();
- assert(TII->getRegisterInfo().getRegSizeInBits(DstReg, *MRI) == 32);
+ assert(isVRegCompatibleReg(TII->getRegisterInfo(), *MRI, DstReg));
assert(!MI.getOperand(0).getSubReg());
ConstrainRegs.insert(SrcReg);
@@ -544,7 +553,7 @@ void SILowerI1Copies::lowerPhis() {
LF.initialize(MBB);
for (MachineInstr &MI : MBB.phis()) {
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if (!isVreg1(DstReg))
continue;
@@ -556,7 +565,7 @@ void SILowerI1Copies::lowerPhis() {
// Collect incoming values.
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
assert(i + 1 < MI.getNumOperands());
- unsigned IncomingReg = MI.getOperand(i).getReg();
+ Register IncomingReg = MI.getOperand(i).getReg();
MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB();
MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
@@ -580,12 +589,12 @@ void SILowerI1Copies::lowerPhis() {
// Phis in a loop that are observed outside the loop receive a simple but
// conservatively correct treatment.
- MachineBasicBlock *PostDomBound = &MBB;
- for (MachineInstr &Use : MRI->use_instructions(DstReg)) {
- PostDomBound =
- PDT->findNearestCommonDominator(PostDomBound, Use.getParent());
- }
+ std::vector<MachineBasicBlock *> DomBlocks = {&MBB};
+ for (MachineInstr &Use : MRI->use_instructions(DstReg))
+ DomBlocks.push_back(Use.getParent());
+ MachineBasicBlock *PostDomBound =
+ PDT->findNearestCommonDominator(DomBlocks);
unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
SSAUpdater.Initialize(DstReg);
@@ -669,7 +678,7 @@ void SILowerI1Copies::lowerCopiesToI1() {
MI.getOpcode() != AMDGPU::COPY)
continue;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if (!isVreg1(DstReg))
continue;
@@ -686,10 +695,10 @@ void SILowerI1Copies::lowerCopiesToI1() {
continue;
DebugLoc DL = MI.getDebugLoc();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
assert(!MI.getOperand(1).getSubReg());
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ if (!Register::isVirtualRegister(SrcReg) ||
(!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
unsigned TmpReg = createLaneMaskReg(*MF);
@@ -702,12 +711,12 @@ void SILowerI1Copies::lowerCopiesToI1() {
// Defs in a loop that are observed outside the loop must be transformed
// into appropriate bit manipulation.
- MachineBasicBlock *PostDomBound = &MBB;
- for (MachineInstr &Use : MRI->use_instructions(DstReg)) {
- PostDomBound =
- PDT->findNearestCommonDominator(PostDomBound, Use.getParent());
- }
+ std::vector<MachineBasicBlock *> DomBlocks = {&MBB};
+ for (MachineInstr &Use : MRI->use_instructions(DstReg))
+ DomBlocks.push_back(Use.getParent());
+ MachineBasicBlock *PostDomBound =
+ PDT->findNearestCommonDominator(DomBlocks);
unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
if (FoundLoopLevel) {
SSAUpdater.Initialize(DstReg);
@@ -734,7 +743,7 @@ bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const {
break;
Reg = MI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return false;
if (!isLaneMaskReg(Reg))
return false;
diff --git a/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index a82047473370..714d403a3e8f 100644
--- a/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -278,8 +278,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vaddr);
int FI = MI.getOperand(FIOp).getIndex();
- unsigned VReg = TII->getNamedOperand(MI, AMDGPU::OpName::vdata)
- ->getReg();
+ Register VReg =
+ TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
TRI->isAGPR(MRI, VReg))) {
TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 46da974a2f45..7dd0f11c95de 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -53,8 +53,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
WavesPerEU = ST.getWavesPerEU(F);
- Occupancy = getMaxWavesPerEU();
- limitOccupancy(MF);
+ Occupancy = ST.computeOccupancy(MF, getLDSSize());
CallingConv::ID CC = F.getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
@@ -190,7 +189,7 @@ unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
const SIRegisterInfo &TRI) {
ArgInfo.PrivateSegmentBuffer =
ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
NumUserSGPRs += 4;
return ArgInfo.PrivateSegmentBuffer.getRegister();
}
@@ -487,6 +486,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
MemoryBound(MFI.isMemoryBound()),
WaveLimiter(MFI.needsWaveLimiter()),
+ HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
@@ -501,8 +501,9 @@ void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
bool SIMachineFunctionInfo::initializeBaseYamlFields(
const yaml::SIMachineFunctionInfo &YamlMFI) {
ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
- MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
+ MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
LDSSize = YamlMFI.LDSSize;
+ HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
IsEntryFunction = YamlMFI.IsEntryFunction;
NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
MemoryBound = YamlMFI.MemoryBound;
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index f19b20ceb5da..7d70c786b594 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -265,6 +265,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
bool NoSignedZerosFPMath = false;
bool MemoryBound = false;
bool WaveLimiter = false;
+ uint32_t HighBitsOf32BitAddress = 0;
StringValue ScratchRSrcReg = "$private_rsrc_reg";
StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
@@ -302,6 +303,8 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
StringValue("$sp_reg"));
YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
YamlIO.mapOptional("mode", MFI.Mode, SIMode());
+ YamlIO.mapOptional("highBitsOf32BitAddress",
+ MFI.HighBitsOf32BitAddress, 0u);
}
};
@@ -670,7 +673,7 @@ public:
return GITPtrHigh;
}
- unsigned get32BitAddressHighBits() const {
+ uint32_t get32BitAddressHighBits() const {
return HighBitsOf32BitAddress;
}
@@ -873,7 +876,7 @@ public:
assert(BufferRsrc);
auto PSV = BufferPSVs.try_emplace(
BufferRsrc,
- llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
+ std::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
return PSV.first->second.get();
}
@@ -882,14 +885,14 @@ public:
assert(ImgRsrc);
auto PSV = ImagePSVs.try_emplace(
ImgRsrc,
- llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
+ std::make_unique<AMDGPUImagePseudoSourceValue>(TII));
return PSV.first->second.get();
}
const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
if (!GWSResourcePSV) {
GWSResourcePSV =
- llvm::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
+ std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
}
return GWSResourcePSV.get();
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index ebbdf80f9567..c072ba6b2d1c 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -348,7 +348,7 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
// Do not Track Physical Registers, because it messes up.
for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
- if (TargetRegisterInfo::isVirtualRegister(RegMaskPair.RegUnit))
+ if (Register::isVirtualRegister(RegMaskPair.RegUnit))
LiveInRegs.insert(RegMaskPair.RegUnit);
}
LiveOutRegs.clear();
@@ -376,7 +376,7 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
// The use of findDefBetween removes the case 4.
for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
unsigned Reg = RegMaskPair.RegUnit;
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ if (Register::isVirtualRegister(Reg) &&
isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
LIS)) {
@@ -1228,7 +1228,7 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria
unsigned Color = CurrentColoring[SU->NodeNum];
if (RealID.find(Color) == RealID.end()) {
int ID = CurrentBlocks.size();
- BlockPtrs.push_back(llvm::make_unique<SIScheduleBlock>(DAG, this, ID));
+ BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG, this, ID));
CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
RealID[Color] = ID;
}
@@ -1690,7 +1690,7 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
for (unsigned Reg : Regs) {
// For now only track virtual registers.
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
continue;
// If not already in the live set, then add it.
(void) LiveRegs.insert(Reg);
@@ -1750,7 +1750,7 @@ SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
for (unsigned Reg : InRegs) {
// For now only track virtual registers.
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
continue;
if (LiveRegsConsumers[Reg] > 1)
continue;
@@ -1762,7 +1762,7 @@ SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
for (unsigned Reg : OutRegs) {
// For now only track virtual registers.
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
continue;
PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
for (; PSetI.isValid(); ++PSetI) {
@@ -1801,7 +1801,7 @@ SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
// SIScheduleDAGMI //
SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
- ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C)) {
+ ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) {
SITII = static_cast<const SIInstrInfo*>(TII);
SITRI = static_cast<const SIRegisterInfo*>(TRI);
@@ -1913,7 +1913,7 @@ SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
for (_Iterator RegI = First; RegI != End; ++RegI) {
unsigned Reg = *RegI;
// For now only track virtual registers
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
continue;
PSetIterator PSetI = MRI.getPressureSets(Reg);
for (; PSetI.isValid(); ++PSetI) {
diff --git a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 4320e6c957a0..e914573306ae 100644
--- a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -656,10 +656,10 @@ SICacheControl::SICacheControl(const GCNSubtarget &ST) {
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return make_unique<SIGfx6CacheControl>(ST);
+ return std::make_unique<SIGfx6CacheControl>(ST);
if (Generation < AMDGPUSubtarget::GFX10)
- return make_unique<SIGfx7CacheControl>(ST);
- return make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
+ return std::make_unique<SIGfx7CacheControl>(ST);
+ return std::make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
}
bool SIGfx6CacheControl::enableLoadCacheBypass(
diff --git a/lib/Target/AMDGPU/SIModeRegister.cpp b/lib/Target/AMDGPU/SIModeRegister.cpp
index a5edd7b3554a..52989a280e80 100644
--- a/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -226,7 +226,7 @@ void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
// - on exit we have set the Require, Change, and initial Exit modes.
void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
const SIInstrInfo *TII) {
- auto NewInfo = llvm::make_unique<BlockData>();
+ auto NewInfo = std::make_unique<BlockData>();
MachineInstr *InsertionPoint = nullptr;
// RequirePending is used to indicate whether we are collecting the initial
// requirements for the block, and need to defer the first InsertionPoint to
diff --git a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 3227bff20513..cc9b46a75582 100644
--- a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -322,7 +322,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
+ Register CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
MachineInstr *SaveExecInst = nullptr;
SmallVector<MachineInstr *, 4> OtherUseInsts;
diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 7e10316eab92..fdd30db6a7cb 100644
--- a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -211,7 +211,7 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
return AMDGPU::NoRegister;
MachineOperand *AndCC = &And->getOperand(1);
- unsigned CmpReg = AndCC->getReg();
+ Register CmpReg = AndCC->getReg();
unsigned CmpSubReg = AndCC->getSubReg();
if (CmpReg == ExecReg) {
AndCC = &And->getOperand(2);
@@ -234,7 +234,7 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1)
return AMDGPU::NoRegister;
- unsigned SelReg = Op1->getReg();
+ Register SelReg = Op1->getReg();
auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, MRI, LIS);
if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
return AMDGPU::NoRegister;
@@ -250,15 +250,16 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
Op1->getImm() != 0 || Op2->getImm() != 1)
return AMDGPU::NoRegister;
- LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t'
- << *Cmp << '\t' << *And);
+ LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
+ << *And);
- unsigned CCReg = CC->getReg();
+ Register CCReg = CC->getReg();
LIS->RemoveMachineInstrFromMaps(*And);
- MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
- TII->get(Andn2Opc), And->getOperand(0).getReg())
- .addReg(ExecReg)
- .addReg(CCReg, 0, CC->getSubReg());
+ MachineInstr *Andn2 =
+ BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc),
+ And->getOperand(0).getReg())
+ .addReg(ExecReg)
+ .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg());
And->eraseFromParent();
LIS->InsertMachineInstrInMaps(*Andn2);
@@ -266,20 +267,19 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
// Try to remove compare. Cmp value should not used in between of cmp
// and s_and_b64 if VCC or just unused if any other register.
- if ((TargetRegisterInfo::isVirtualRegister(CmpReg) &&
- MRI.use_nodbg_empty(CmpReg)) ||
+ if ((Register::isVirtualRegister(CmpReg) && MRI.use_nodbg_empty(CmpReg)) ||
(CmpReg == CondReg &&
std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
[&](const MachineInstr &MI) {
- return MI.readsRegister(CondReg, TRI); }))) {
+ return MI.readsRegister(CondReg, TRI);
+ }))) {
LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
LIS->RemoveMachineInstrFromMaps(*Cmp);
Cmp->eraseFromParent();
// Try to remove v_cndmask_b32.
- if (TargetRegisterInfo::isVirtualRegister(SelReg) &&
- MRI.use_nodbg_empty(SelReg)) {
+ if (Register::isVirtualRegister(SelReg) && MRI.use_nodbg_empty(SelReg)) {
LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
LIS->RemoveMachineInstrFromMaps(*Sel);
@@ -413,7 +413,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
if (!SaveExec || !SaveExec->isFullCopy())
continue;
- unsigned SavedExec = SaveExec->getOperand(0).getReg();
+ Register SavedExec = SaveExec->getOperand(0).getReg();
bool SafeToReplace = true;
for (auto& U : MRI.use_nodbg_instructions(SavedExec)) {
if (U.getParent() != SaveExec->getParent()) {
@@ -434,7 +434,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
if (Changed) {
for (auto Reg : RecalcRegs) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
LIS->removeInterval(Reg);
if (!MRI.reg_empty(Reg))
LIS->createAndComputeVirtRegInterval(Reg);
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 2d71abc0612a..9b3b2436475c 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -574,16 +574,16 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (TRI->isPhysicalRegister(Src1->getReg()) ||
- TRI->isPhysicalRegister(Dst->getReg()))
+ if (Register::isPhysicalRegister(Src1->getReg()) ||
+ Register::isPhysicalRegister(Dst->getReg()))
break;
if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
Opcode == AMDGPU::V_LSHLREV_B32_e64) {
- return make_unique<SDWADstOperand>(
+ return std::make_unique<SDWADstOperand>(
Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
} else {
- return make_unique<SDWASrcOperand>(
+ return std::make_unique<SDWASrcOperand>(
Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
Opcode != AMDGPU::V_LSHRREV_B32_e64);
@@ -613,15 +613,15 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (TRI->isPhysicalRegister(Src1->getReg()) ||
- TRI->isPhysicalRegister(Dst->getReg()))
+ if (Register::isPhysicalRegister(Src1->getReg()) ||
+ Register::isPhysicalRegister(Dst->getReg()))
break;
if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
Opcode == AMDGPU::V_LSHLREV_B16_e64) {
- return make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
+ return std::make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
} else {
- return make_unique<SDWASrcOperand>(
+ return std::make_unique<SDWASrcOperand>(
Src1, Dst, BYTE_1, false, false,
Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
Opcode != AMDGPU::V_LSHRREV_B16_e64);
@@ -677,11 +677,11 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (TRI->isPhysicalRegister(Src0->getReg()) ||
- TRI->isPhysicalRegister(Dst->getReg()))
+ if (Register::isPhysicalRegister(Src0->getReg()) ||
+ Register::isPhysicalRegister(Dst->getReg()))
break;
- return make_unique<SDWASrcOperand>(
+ return std::make_unique<SDWASrcOperand>(
Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32);
}
@@ -706,11 +706,11 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (TRI->isPhysicalRegister(ValSrc->getReg()) ||
- TRI->isPhysicalRegister(Dst->getReg()))
+ if (Register::isPhysicalRegister(ValSrc->getReg()) ||
+ Register::isPhysicalRegister(Dst->getReg()))
break;
- return make_unique<SDWASrcOperand>(
+ return std::make_unique<SDWASrcOperand>(
ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
}
@@ -840,7 +840,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
assert(OrDst && OrDst->isReg());
- return make_unique<SDWADstPreserveOperand>(
+ return std::make_unique<SDWADstPreserveOperand>(
OrDst, OrSDWADef, OrOtherDef, DstSel);
}
@@ -1189,7 +1189,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
continue;
}
- unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
if (Op.isImm())
diff --git a/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index f9bfe96f65cb..6cdd12d0e7bd 100644
--- a/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -90,12 +90,12 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
if (!MO.isReg())
return false;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!TRI->isVGPR(*MRI, Reg))
return false;
- if (TRI->isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return false;
if (VRM->hasPhys(Reg))
@@ -124,14 +124,14 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
if (!MO.isReg())
continue;
- const unsigned VirtReg = MO.getReg();
- if (TRI->isPhysicalRegister(VirtReg))
+ const Register VirtReg = MO.getReg();
+ if (Register::isPhysicalRegister(VirtReg))
continue;
if (!VRM->hasPhys(VirtReg))
continue;
- unsigned PhysReg = VRM->getPhys(VirtReg);
+ Register PhysReg = VRM->getPhys(VirtReg);
const unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
PhysReg = TRI->getSubReg(PhysReg, SubReg);
@@ -149,7 +149,7 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
for (unsigned Reg : RegsToRewrite) {
LIS->removeInterval(Reg);
- const unsigned PhysReg = VRM->getPhys(Reg);
+ const Register PhysReg = VRM->getPhys(Reg);
assert(PhysReg != 0);
MFI->ReserveWWMRegister(PhysReg);
}
diff --git a/lib/Target/AMDGPU/SIProgramInfo.h b/lib/Target/AMDGPU/SIProgramInfo.h
index 168f05f8fdd6..7c039a54b57f 100644
--- a/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/lib/Target/AMDGPU/SIProgramInfo.h
@@ -41,6 +41,8 @@ struct SIProgramInfo {
uint64_t ComputePGMRSrc2 = 0;
uint32_t NumVGPR = 0;
+ uint32_t NumArchVGPR = 0;
+ uint32_t NumAccVGPR = 0;
uint32_t NumSGPR = 0;
uint32_t LDSSize = 0;
bool FlatUsed = false;
@@ -51,6 +53,9 @@ struct SIProgramInfo {
// Number of VGPRs that meets number of waves per execution unit request.
uint32_t NumVGPRsForWavesPerEU = 0;
+ // Final occupancy.
+ uint32_t Occupancy = 0;
+
// Whether there is recursion, dynamic allocas, indirect calls or some other
// reason there may be statically unknown stack usage.
bool DynamicCallStack = false;
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index f152deb28004..f58bc3060c42 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -48,11 +48,6 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
}
}
-static cl::opt<bool> EnableSpillSGPRToSMEM(
- "amdgpu-spill-sgpr-to-smem",
- cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
- cl::init(false));
-
static cl::opt<bool> EnableSpillSGPRToVGPR(
"amdgpu-spill-sgpr-to-vgpr",
cl::desc("Enable spilling VGPRs to SGPRs"),
@@ -61,17 +56,12 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
AMDGPURegisterInfo(),
+ ST(ST),
SGPRPressureSets(getNumRegPressureSets()),
VGPRPressureSets(getNumRegPressureSets()),
AGPRPressureSets(getNumRegPressureSets()),
- SpillSGPRToVGPR(false),
- SpillSGPRToSMEM(false),
+ SpillSGPRToVGPR(EnableSpillSGPRToVGPR),
isWave32(ST.isWave32()) {
- if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
- SpillSGPRToSMEM = true;
- else if (EnableSpillSGPRToVGPR)
- SpillSGPRToVGPR = true;
-
unsigned NumRegPressureSets = getNumRegPressureSets();
SGPRSetID = NumRegPressureSets;
@@ -118,11 +108,9 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
const MachineFunction &MF) const {
-
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
- return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
+ return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
}
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
@@ -144,7 +132,6 @@ static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
return AMDGPU::SGPR_32RegClass.getRegister(Reg);
}
@@ -202,8 +189,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::VCC_HI);
}
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
@@ -220,6 +205,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, Reg);
}
+ // Reserve all the rest AGPRs if there are no instructions to use it.
+ if (!ST.hasMAIInsts()) {
+ for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
+ unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
+ reserveRegisterTuples(Reserved, Reg);
+ }
+ }
+
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
@@ -293,32 +286,17 @@ bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const
bool SIRegisterInfo::requiresFrameIndexScavenging(
const MachineFunction &MF) const {
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.hasStackObjects())
- return true;
-
- // May need to deal with callee saved registers.
- const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- return !Info->isEntryFunction();
+ // Do not use frame virtual registers. They used to be used for SGPRs, but
+ // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
+ // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
+ // spill.
+ return false;
}
bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (!MFI.hasStackObjects())
- return false;
-
- // The scavenger is used for large frames which may require finding a free
- // register for large offsets.
- if (!isUInt<12>(MFI.getStackSize()))
- return true;
-
- // If using scalar stores, for spills, m0 is needed for the scalar store
- // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
- // register for it during frame index elimination, so the scavenger is
- // directly needed.
- return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
- MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
+ return MFI.hasStackObjects();
}
bool SIRegisterInfo::requiresVirtualBaseRegisters(
@@ -372,8 +350,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
DL = Ins->getDebugLoc();
MachineFunction *MF = MBB->getParent();
- const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = Subtarget.getInstrInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
if (Offset == 0) {
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
@@ -382,9 +359,9 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
}
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
.addImm(Offset);
@@ -399,11 +376,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const {
-
- MachineBasicBlock *MBB = MI.getParent();
- MachineFunction *MF = MBB->getParent();
- const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = Subtarget.getInstrInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
#ifndef NDEBUG
// FIXME: Is it possible to be storing a frame index to itself?
@@ -419,12 +392,15 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
#endif
MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
+#ifndef NDEBUG
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineFunction *MF = MBB->getParent();
+#endif
assert(FIOp && FIOp->isFI() && "frame index must be address operand");
assert(TII->isMUBUF(MI));
assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
- MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
- "should only be seeing frame offset relative FrameIndex");
-
+ MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg() &&
+ "should only be seeing stack pointer offset relative FrameIndex");
MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
int64_t NewOffset = OffsetOp->getImm() + Offset;
@@ -564,7 +540,8 @@ static int getOffsetMUBUFLoad(unsigned Opc) {
}
}
-static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI,
+static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
+ MachineBasicBlock::iterator MI,
int Index,
unsigned Lane,
unsigned ValueReg,
@@ -572,7 +549,6 @@ static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MI->getParent()->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
@@ -595,11 +571,12 @@ static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI,
// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
// need to handle the case where an SGPR may need to be spilled while spilling.
-static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
+static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
MachineFrameInfo &MFI,
MachineBasicBlock::iterator MI,
int Index,
int64_t Offset) {
+ const SIInstrInfo *TII = ST.getInstrInfo();
MachineBasicBlock *MBB = MI->getParent();
const DebugLoc &DL = MI->getDebugLoc();
bool IsStore = MI->mayStore();
@@ -611,7 +588,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
return false;
const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
- if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr())
+ if (spillVGPRtoAGPR(ST, MI, Index, 0, Reg->getReg(), false).getInstr())
return true;
MachineInstrBuilder NewMI =
@@ -624,6 +601,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
.addImm(0) // slc
.addImm(0) // tfe
.addImm(0) // dlc
+ .addImm(0) // swz
.cloneMemRefs(*MI);
const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
@@ -645,7 +623,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
RegScavenger *RS) const {
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MI->getParent()->getParent();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -707,8 +684,9 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
}
for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
- unsigned SubReg = NumSubRegs == 1 ?
- ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
+ Register SubReg = NumSubRegs == 1
+ ? Register(ValueReg)
+ : getSubReg(ValueReg, getSubRegFromChannel(i));
unsigned SOffsetRegState = 0;
unsigned SrcDstRegState = getDefRegState(!IsStore);
@@ -718,7 +696,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
SrcDstRegState |= getKillRegState(IsKill);
}
- auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill);
+ auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill);
if (!MIB.getInstr()) {
unsigned FinalReg = SubReg;
@@ -743,6 +721,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
.addImm(0) // slc
.addImm(0) // tfe
.addImm(0) // dlc
+ .addImm(0) // swz
.addMemOperand(NewMMO);
if (!IsStore && TmpReg != AMDGPU::NoRegister)
@@ -763,22 +742,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
}
}
-static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
- bool Store) {
- if (SuperRegSize % 16 == 0) {
- return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
- AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
- }
-
- if (SuperRegSize % 8 == 0) {
- return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
- AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
- }
-
- return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
- AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
-}
-
bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
int Index,
RegScavenger *RS,
@@ -794,98 +757,37 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
if (OnlyToVGPR && !SpillToVGPR)
return false;
- MachineRegisterInfo &MRI = MF->getRegInfo();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
- unsigned SuperReg = MI->getOperand(0).getReg();
+ Register SuperReg = MI->getOperand(0).getReg();
bool IsKill = MI->getOperand(0).isKill();
const DebugLoc &DL = MI->getDebugLoc();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- bool SpillToSMEM = spillSGPRToSMEM();
- if (SpillToSMEM && OnlyToVGPR)
- return false;
-
- Register FrameReg = getFrameRegister(*MF);
-
assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
SuperReg != MFI->getFrameOffsetReg() &&
SuperReg != MFI->getScratchWaveOffsetReg()));
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
- unsigned OffsetReg = AMDGPU::M0;
unsigned M0CopyReg = AMDGPU::NoRegister;
- if (SpillToSMEM) {
- if (RS->isRegUsed(AMDGPU::M0)) {
- M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
- .addReg(AMDGPU::M0);
- }
- }
-
- unsigned ScalarStoreOp;
unsigned EltSize = 4;
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
- if (SpillToSMEM && isSGPRClass(RC)) {
- // XXX - if private_element_size is larger than 4 it might be useful to be
- // able to spill wider vmem spills.
- std::tie(EltSize, ScalarStoreOp) =
- getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
- }
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
+ // Scavenged temporary VGPR to use. It must be scavenged once for any number
+ // of spilled subregs.
+ Register TmpVGPR;
+
// SubReg carries the "Kill" flag when SubReg == SuperReg.
unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- unsigned SubReg = NumSubRegs == 1 ?
- SuperReg : getSubReg(SuperReg, SplitParts[i]);
-
- if (SpillToSMEM) {
- int64_t FrOffset = FrameInfo.getObjectOffset(Index);
-
- // The allocated memory size is really the wavefront size * the frame
- // index size. The widest register class is 64 bytes, so a 4-byte scratch
- // allocation is enough to spill this in a single stack object.
- //
- // FIXME: Frame size/offsets are computed earlier than this, so the extra
- // space is still unnecessarily allocated.
-
- unsigned Align = FrameInfo.getObjectAlignment(Index);
- MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
- MachineMemOperand *MMO
- = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- EltSize, MinAlign(Align, EltSize * i));
-
- // SMEM instructions only support a single offset, so increment the wave
- // offset.
-
- int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
- if (Offset != 0) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
- .addReg(FrameReg)
- .addImm(Offset);
- } else {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
- .addReg(FrameReg);
- }
-
- BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
- .addReg(SubReg, getKillRegState(IsKill)) // sdata
- .addReg(MFI->getScratchRSrcReg()) // sbase
- .addReg(OffsetReg, RegState::Kill) // soff
- .addImm(0) // glc
- .addImm(0) // dlc
- .addMemOperand(MMO);
-
- continue;
- }
+ Register SubReg =
+ NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
if (SpillToVGPR) {
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
@@ -915,15 +817,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
return false;
// Spill SGPR to a frame index.
- // TODO: Should VI try to spill to VGPR and then spill to SMEM?
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- // TODO: Should VI try to spill to VGPR and then spill to SMEM?
+ if (!TmpVGPR.isValid())
+ TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
MachineInstrBuilder Mov
- = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(SubReg, SubKillState);
-
// There could be undef components of a spilled super register.
// TODO: Can we detect this and skip the spill?
if (NumSubRegs > 1) {
@@ -941,7 +841,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
EltSize, MinAlign(Align, EltSize * i));
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
- .addReg(TmpReg, RegState::Kill) // src
+ .addReg(TmpVGPR, RegState::Kill) // src
.addFrameIndex(Index) // vaddr
.addReg(MFI->getScratchRSrcReg()) // srrsrc
.addReg(MFI->getStackPtrOffsetReg()) // soffset
@@ -965,7 +865,6 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
RegScavenger *RS,
bool OnlyToVGPR) const {
MachineFunction *MF = MI->getParent()->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
@@ -976,84 +875,27 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
return false;
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const DebugLoc &DL = MI->getDebugLoc();
- unsigned SuperReg = MI->getOperand(0).getReg();
- bool SpillToSMEM = spillSGPRToSMEM();
- if (SpillToSMEM && OnlyToVGPR)
- return false;
+ Register SuperReg = MI->getOperand(0).getReg();
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
- unsigned OffsetReg = AMDGPU::M0;
unsigned M0CopyReg = AMDGPU::NoRegister;
- if (SpillToSMEM) {
- if (RS->isRegUsed(AMDGPU::M0)) {
- M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
- .addReg(AMDGPU::M0);
- }
- }
-
unsigned EltSize = 4;
- unsigned ScalarLoadOp;
-
- Register FrameReg = getFrameRegister(*MF);
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
- if (SpillToSMEM && isSGPRClass(RC)) {
- // XXX - if private_element_size is larger than 4 it might be useful to be
- // able to spill wider vmem spills.
- std::tie(EltSize, ScalarLoadOp) =
- getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
- }
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
- // SubReg carries the "Kill" flag when SubReg == SuperReg.
- int64_t FrOffset = FrameInfo.getObjectOffset(Index);
+ Register TmpVGPR;
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- unsigned SubReg = NumSubRegs == 1 ?
- SuperReg : getSubReg(SuperReg, SplitParts[i]);
-
- if (SpillToSMEM) {
- // FIXME: Size may be > 4 but extra bytes wasted.
- unsigned Align = FrameInfo.getObjectAlignment(Index);
- MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
- MachineMemOperand *MMO
- = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- EltSize, MinAlign(Align, EltSize * i));
-
- // Add i * 4 offset
- int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
- if (Offset != 0) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
- .addReg(FrameReg)
- .addImm(Offset);
- } else {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
- .addReg(FrameReg);
- }
-
- auto MIB =
- BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
- .addReg(MFI->getScratchRSrcReg()) // sbase
- .addReg(OffsetReg, RegState::Kill) // soff
- .addImm(0) // glc
- .addImm(0) // dlc
- .addMemOperand(MMO);
-
- if (NumSubRegs > 1 && i == 0)
- MIB.addReg(SuperReg, RegState::ImplicitDefine);
-
- continue;
- }
+ Register SubReg =
+ NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
if (SpillToVGPR) {
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
@@ -1071,7 +913,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
// Restore SGPR from a stack slot.
// FIXME: We should use S_LOAD_DWORD here for VI.
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ if (!TmpVGPR.isValid())
+ TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
unsigned Align = FrameInfo.getObjectAlignment(Index);
MachinePointerInfo PtrInfo
@@ -1081,7 +924,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
MachineMemOperand::MOLoad, EltSize,
MinAlign(Align, EltSize * i));
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpVGPR)
.addFrameIndex(Index) // vaddr
.addReg(MFI->getScratchRSrcReg()) // srsrc
.addReg(MFI->getStackPtrOffsetReg()) // soffset
@@ -1090,7 +933,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
auto MIB =
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
- .addReg(TmpReg, RegState::Kill);
+ .addReg(TmpVGPR, RegState::Kill);
if (NumSubRegs > 1)
MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
@@ -1141,11 +984,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
MachineFunction *MF = MI->getParent()->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -1255,13 +1096,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// In an entry function/kernel the offset is already the absolute
// address relative to the frame register.
- unsigned DiffReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register TmpDiffReg =
+ RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
+
+ // If there's no free SGPR, in-place modify the FP
+ Register DiffReg = TmpDiffReg.isValid() ? TmpDiffReg : FrameReg;
bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
Register ResultReg = IsCopy ?
MI->getOperand(0).getReg() :
- MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
.addReg(FrameReg)
@@ -1271,35 +1115,80 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (Offset == 0) {
// XXX - This never happens because of emergency scavenging slot at 0?
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
- .addImm(Log2_32(ST.getWavefrontSize()))
+ .addImm(ST.getWavefrontSizeLog2())
.addReg(DiffReg);
} else {
- unsigned ScaledReg
- = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
- .addImm(Log2_32(ST.getWavefrontSize()))
- .addReg(DiffReg, RegState::Kill);
-
- // TODO: Fold if use instruction is another add of a constant.
- if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
- TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
- .addImm(Offset)
- .addReg(ScaledReg, RegState::Kill)
- .addImm(0); // clamp bit
+ if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) {
+ Register ScaledReg =
+ RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MIB, 0);
+
+ BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+ ScaledReg)
+ .addImm(ST.getWavefrontSizeLog2())
+ .addReg(DiffReg, RegState::Kill);
+
+ const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
+
+ // TODO: Fold if use instruction is another add of a constant.
+ if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
+ // FIXME: This can fail
+ MIB.addImm(Offset);
+ MIB.addReg(ScaledReg, RegState::Kill);
+ if (!IsVOP2)
+ MIB.addImm(0); // clamp bit
+ } else {
+ Register ConstOffsetReg =
+ RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false);
+
+ // This should always be able to use the unused carry out.
+ assert(ConstOffsetReg && "this scavenge should not be able to fail");
+
+ BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
+ .addImm(Offset);
+ MIB.addReg(ConstOffsetReg, RegState::Kill);
+ MIB.addReg(ScaledReg, RegState::Kill);
+ MIB.addImm(0); // clamp bit
+ }
} else {
- unsigned ConstOffsetReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
- .addImm(Offset);
- TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
- .addReg(ConstOffsetReg, RegState::Kill)
+ // We have to produce a carry out, and we there isn't a free SGPR
+ // pair for it. We can keep the whole computation on the SALU to
+ // avoid clobbering an additional register at the cost of an extra
+ // mov.
+
+ // We may have 1 free scratch SGPR even though a carry out is
+ // unavailable. Only one additional mov is needed.
+ Register TmpScaledReg =
+ RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
+ Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : DiffReg;
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
+ .addReg(DiffReg, RegState::Kill)
+ .addImm(ST.getWavefrontSizeLog2());
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg)
.addReg(ScaledReg, RegState::Kill)
- .addImm(0); // clamp bit
+ .addImm(Offset);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
+ .addReg(ScaledReg, RegState::Kill);
+
+ // If there were truly no free SGPRs, we need to undo everything.
+ if (!TmpScaledReg.isValid()) {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg)
+ .addReg(ScaledReg, RegState::Kill)
+ .addImm(Offset);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
+ .addReg(DiffReg, RegState::Kill)
+ .addImm(ST.getWavefrontSizeLog2());
+ }
}
}
+ if (!TmpDiffReg.isValid()) {
+ // Restore the FP.
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), FrameReg)
+ .addReg(FrameReg)
+ .addReg(MFI->getScratchWaveOffsetReg());
+ }
+
// Don't introduce an extra copy if we're just materializing in a mov.
if (IsCopy)
MI->eraseFromParent();
@@ -1325,7 +1214,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int64_t NewOffset = OldImm + Offset;
if (isUInt<12>(NewOffset) &&
- buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
+ buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
MI->eraseFromParent();
return;
}
@@ -1337,7 +1226,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int64_t Offset = FrameInfo.getObjectOffset(Index);
FIOp.ChangeToImmediate(Offset);
if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
.addImm(Offset);
FIOp.ChangeToRegister(TmpReg, false, false, true);
@@ -1347,27 +1236,13 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
- const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
- unsigned Size = getRegSizeInBits(*RC);
- unsigned AltName = AMDGPU::NoRegAltName;
-
- switch (Size) {
- case 32: AltName = AMDGPU::Reg32; break;
- case 64: AltName = AMDGPU::Reg64; break;
- case 96: AltName = AMDGPU::Reg96; break;
- case 128: AltName = AMDGPU::Reg128; break;
- case 160: AltName = AMDGPU::Reg160; break;
- case 256: AltName = AMDGPU::Reg256; break;
- case 512: AltName = AMDGPU::Reg512; break;
- case 1024: AltName = AMDGPU::Reg1024; break;
- }
- return AMDGPUInstPrinter::getRegisterName(Reg, AltName);
+ return AMDGPUInstPrinter::getRegisterName(Reg);
}
// FIXME: This is very slow. It might be worth creating a map from physreg to
// register class.
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
- assert(!TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(!Register::isVirtualRegister(Reg));
static const TargetRegisterClass *const BaseClasses[] = {
&AMDGPU::VGPR_32RegClass,
@@ -1408,8 +1283,6 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
unsigned Size = getRegSizeInBits(*RC);
- if (Size < 32)
- return false;
switch (Size) {
case 32:
return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
@@ -1427,8 +1300,11 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
case 1024:
return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
+ case 1:
+ return getCommonSubClass(&AMDGPU::VReg_1RegClass, RC) != nullptr;
default:
- llvm_unreachable("Invalid register class size");
+ assert(Size < 32 && "Invalid register class size");
+ return false;
}
}
@@ -1476,6 +1352,8 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
return &AMDGPU::VReg_512RegClass;
case 1024:
return &AMDGPU::VReg_1024RegClass;
+ case 1:
+ return &AMDGPU::VReg_1RegClass;
default:
llvm_unreachable("Invalid register class size");
}
@@ -1509,7 +1387,7 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
case 96:
return &AMDGPU::SReg_96RegClass;
case 128:
- return &AMDGPU::SReg_128RegClass;
+ return &AMDGPU::SGPR_128RegClass;
case 160:
return &AMDGPU::SReg_160RegClass;
case 256:
@@ -1539,7 +1417,7 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
case 3:
return &AMDGPU::SReg_96RegClass;
case 4:
- return &AMDGPU::SReg_128RegClass;
+ return &AMDGPU::SGPR_128RegClass;
case 5:
return &AMDGPU::SReg_160RegClass;
case 8:
@@ -1587,6 +1465,15 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
}
}
+bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
+ if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
+ OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST)
+ return !ST.hasMFMAInlineLiteralBug();
+
+ return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
+ OpType <= AMDGPU::OPERAND_SRC_LAST;
+}
+
bool SIRegisterInfo::shouldRewriteCopySrc(
const TargetRegisterClass *DefRC,
unsigned DefSubReg,
@@ -1802,7 +1689,7 @@ ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC
const TargetRegisterClass*
SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
unsigned Reg) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return MRI.getRegClass(Reg);
return getPhysRegClass(Reg);
@@ -1845,8 +1732,6 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
-
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
@@ -1900,18 +1785,22 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
return isWave32 ?
&AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
case AMDGPU::SGPRRegBankID:
- return &AMDGPU::SReg_32_XM0RegClass;
+ return &AMDGPU::SReg_32RegClass;
case AMDGPU::SCCRegBankID:
// This needs to return an allocatable class, so don't bother returning
// the dummy SCC class.
- return &AMDGPU::SReg_32_XM0RegClass;
+ //
+ // FIXME: This is a grotesque hack. We use SGPR_32 as an indication this
+ // was not an VCC bank value since we use the larger class SReg_32 for
+ // other values. These should all use SReg_32.
+ return &AMDGPU::SGPR_32RegClass;
default:
llvm_unreachable("unknown register bank");
}
}
case 32:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
- &AMDGPU::SReg_32_XM0RegClass;
+ &AMDGPU::SReg_32RegClass;
case 64:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
&AMDGPU::SReg_64_XEXECRegClass;
@@ -1920,7 +1809,7 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
&AMDGPU::SReg_96RegClass;
case 128:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
- &AMDGPU::SReg_128RegClass;
+ &AMDGPU::SGPR_128RegClass;
case 160:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
&AMDGPU::SReg_160RegClass;
@@ -1930,10 +1819,13 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
case 512:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
&AMDGPU::SReg_512RegClass;
+ case 1024:
+ return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_1024RegClass :
+ &AMDGPU::SReg_1024RegClass;
default:
if (Size < 32)
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
- &AMDGPU::SReg_32_XM0RegClass;
+ &AMDGPU::SReg_32RegClass;
return nullptr;
}
}
@@ -1941,9 +1833,12 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
const TargetRegisterClass *
SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
const MachineRegisterInfo &MRI) const {
- if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()))
+ const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
+ if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
- return nullptr;
+
+ const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>();
+ return getAllocatableClass(RC);
}
unsigned SIRegisterInfo::getVCC() const {
@@ -1974,7 +1869,7 @@ MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg,
SlotIndex UseIdx = LIS->getInstructionIndex(Use);
SlotIndex DefIdx;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (!LIS->hasInterval(Reg))
return nullptr;
LiveInterval &LI = LIS->getInterval(Reg);
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index 34487c96e72e..ac3dea1a1a28 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -27,6 +27,7 @@ class SIMachineFunctionInfo;
class SIRegisterInfo final : public AMDGPURegisterInfo {
private:
+ const GCNSubtarget &ST;
unsigned SGPRSetID;
unsigned VGPRSetID;
unsigned AGPRSetID;
@@ -34,7 +35,6 @@ private:
BitVector VGPRPressureSets;
BitVector AGPRPressureSets;
bool SpillSGPRToVGPR;
- bool SpillSGPRToSMEM;
bool isWave32;
void classifyPressureSet(unsigned PSetID, unsigned Reg,
@@ -46,10 +46,6 @@ public:
return SpillSGPRToVGPR;
}
- bool spillSGPRToSMEM() const {
- return SpillSGPRToSMEM;
- }
-
/// Return the end register initially reserved for the scratch buffer in case
/// spilling is needed.
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
@@ -141,7 +137,7 @@ public:
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const {
const TargetRegisterClass *RC;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
RC = MRI.getRegClass(Reg);
else
RC = getPhysRegClass(Reg);
@@ -193,10 +189,7 @@ public:
/// \returns True if operands defined with this operand type can accept
/// an inline constant. i.e. An integer value in the range (-16, 64) or
/// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
- bool opCanUseInlineConstant(unsigned OpType) const {
- return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
- OpType <= AMDGPU::OPERAND_SRC_LAST;
- }
+ bool opCanUseInlineConstant(unsigned OpType) const;
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC,
@@ -270,7 +263,7 @@ public:
const MachineRegisterInfo &MRI) const override;
const TargetRegisterClass *getBoolRC() const {
- return isWave32 ? &AMDGPU::SReg_32_XM0RegClass
+ return isWave32 ? &AMDGPU::SReg_32RegClass
: &AMDGPU::SReg_64RegClass;
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
index d5948a7862cc..82219cbdf3b2 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -37,50 +37,52 @@ class getSubRegs<int size> {
!if(!eq(size, 16), ret16, ret32))))));
}
-let Namespace = "AMDGPU" in {
-defset list<RegAltNameIndex> AllRegAltNameIndices = {
- def Reg32 : RegAltNameIndex;
- def Reg64 : RegAltNameIndex;
- def Reg96 : RegAltNameIndex;
- def Reg128 : RegAltNameIndex;
- def Reg160 : RegAltNameIndex;
- def Reg256 : RegAltNameIndex;
- def Reg512 : RegAltNameIndex;
- def Reg1024 : RegAltNameIndex;
-}
-}
+// Generates list of sequential register tuple names.
+// E.g. RegSeq<3,2,2,"s">.ret -> [ "s[0:1]", "s[2:3]" ]
+class RegSeqNames<int last_reg, int stride, int size, string prefix,
+ int start = 0> {
+ int next = !add(start, stride);
+ int end_reg = !add(!add(start, size), -1);
+ list<string> ret =
+ !if(!le(end_reg, last_reg),
+ !listconcat([prefix # "[" # start # ":" # end_reg # "]"],
+ RegSeqNames<last_reg, stride, size, prefix, next>.ret),
+ []);
+}
+
+// Generates list of dags for register tupless.
+class RegSeqDags<RegisterClass RC, int last_reg, int stride, int size,
+ int start = 0> {
+ dag trunc_rc = (trunc RC,
+ !if(!and(!eq(stride, 1), !eq(start, 0)),
+ !add(!add(last_reg, 2), !mul(size, -1)),
+ !add(last_reg, 1)));
+ list<dag> ret =
+ !if(!lt(start, size),
+ !listconcat([(add (decimate (shl trunc_rc, start), stride))],
+ RegSeqDags<RC, last_reg, stride, size, !add(start, 1)>.ret),
+ []);
+}
+
+class SIRegisterTuples<list<SubRegIndex> Indices, RegisterClass RC,
+ int last_reg, int stride, int size, string prefix> :
+ RegisterTuples<Indices,
+ RegSeqDags<RC, last_reg, stride, size>.ret,
+ RegSeqNames<last_reg, stride, size, prefix>.ret>;
//===----------------------------------------------------------------------===//
// Declarations that describe the SI registers
//===----------------------------------------------------------------------===//
-class SIReg <string n, bits<16> regIdx = 0, string prefix = "",
- int regNo = !cast<int>(regIdx)> :
- Register<n, !if(!eq(prefix, ""),
- [ n, n, n, n, n, n, n, n ],
- [ prefix # regNo,
- prefix # "[" # regNo # ":" # !and(!add(regNo, 1), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 2), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 3), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 4), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 7), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 15), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 31), 255) # "]",
- ])>,
+class SIReg <string n, bits<16> regIdx = 0> :
+ Register<n>,
DwarfRegNum<[!cast<int>(HWEncoding)]> {
let Namespace = "AMDGPU";
- let RegAltNameIndices = AllRegAltNameIndices;
// This is the not yet the complete register encoding. An additional
// bit is set for VGPRs.
let HWEncoding = regIdx;
}
-class SIRegisterWithSubRegs<string n, list<Register> subregs> :
- RegisterWithSubRegs<n, subregs> {
- let RegAltNameIndices = AllRegAltNameIndices;
- let AltNames = [ n, n, n, n, n, n, n, n ];
-}
-
// Special Registers
def VCC_LO : SIReg<"vcc_lo", 106>;
def VCC_HI : SIReg<"vcc_hi", 107>;
@@ -93,7 +95,7 @@ def SP_REG : SIReg<"sp", 0>;
def SCRATCH_WAVE_OFFSET_REG : SIReg<"scratch_wave_offset", 0>;
// VCC for 64-bit instructions
-def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
+def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
DwarfRegAlias<VCC_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -103,7 +105,7 @@ def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
def EXEC_LO : SIReg<"exec_lo", 126>;
def EXEC_HI : SIReg<"exec_hi", 127>;
-def EXEC : SIRegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
+def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
DwarfRegAlias<EXEC_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -134,7 +136,7 @@ def LDS_DIRECT : SIReg <"src_lds_direct", 254>;
def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
-def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
+def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
DwarfRegAlias<XNACK_MASK_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -145,7 +147,7 @@ def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_
def TBA_LO : SIReg<"tba_lo", 108>;
def TBA_HI : SIReg<"tba_hi", 109>;
-def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
+def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
DwarfRegAlias<TBA_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -155,7 +157,7 @@ def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
def TMA_LO : SIReg<"tma_lo", 110>;
def TMA_HI : SIReg<"tma_hi", 111>;
-def TMA : SIRegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
+def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
DwarfRegAlias<TMA_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -175,7 +177,7 @@ multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
}
class FlatReg <Register lo, Register hi, bits<16> encoding> :
- SIRegisterWithSubRegs<"flat_scratch", [lo, hi]>,
+ RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
DwarfRegAlias<lo> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -191,19 +193,19 @@ def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
// SGPR registers
foreach Index = 0-105 in {
- def SGPR#Index : SIReg <"SGPR"#Index, Index, "s">;
+ def SGPR#Index : SIReg <"s"#Index, Index>;
}
// VGPR registers
foreach Index = 0-255 in {
- def VGPR#Index : SIReg <"VGPR"#Index, Index, "v"> {
+ def VGPR#Index : SIReg <"v"#Index, Index> {
let HWEncoding{8} = 1;
}
}
// AccVGPR registers
foreach Index = 0-255 in {
- def AGPR#Index : SIReg <"AGPR"#Index, Index, "a"> {
+ def AGPR#Index : SIReg <"a"#Index, Index> {
let HWEncoding{8} = 1;
}
}
@@ -226,102 +228,32 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add (sequence "SGPR%u", 0, 105)), Reg32> {
+ (add (sequence "SGPR%u", 0, 105))> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
let AllocationPriority = 9;
}
// SGPR 64-bit registers
-def SGPR_64Regs : RegisterTuples<getSubRegs<2>.ret,
- [(add (decimate SGPR_32, 2)),
- (add (decimate (shl SGPR_32, 1), 2))]>;
+def SGPR_64Regs : SIRegisterTuples<getSubRegs<2>.ret, SGPR_32, 105, 2, 2, "s">;
// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
-def SGPR_96Regs : RegisterTuples<getSubRegs<3>.ret,
- [(add (decimate SGPR_32, 3)),
- (add (decimate (shl SGPR_32, 1), 3)),
- (add (decimate (shl SGPR_32, 2), 3))]>;
+def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 3, 3, "s">;
// SGPR 128-bit registers
-def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret,
- [(add (decimate SGPR_32, 4)),
- (add (decimate (shl SGPR_32, 1), 4)),
- (add (decimate (shl SGPR_32, 2), 4)),
- (add (decimate (shl SGPR_32, 3), 4))]>;
+def SGPR_128Regs : SIRegisterTuples<getSubRegs<4>.ret, SGPR_32, 105, 4, 4, "s">;
// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs.
-def SGPR_160Regs : RegisterTuples<getSubRegs<5>.ret,
- [(add (decimate SGPR_32, 4)),
- (add (decimate (shl SGPR_32, 1), 4)),
- (add (decimate (shl SGPR_32, 2), 4)),
- (add (decimate (shl SGPR_32, 3), 4)),
- (add (decimate (shl SGPR_32, 4), 4))]>;
+def SGPR_160Regs : SIRegisterTuples<getSubRegs<5>.ret, SGPR_32, 105, 4, 5, "s">;
// SGPR 256-bit registers
-def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret,
- [(add (decimate SGPR_32, 4)),
- (add (decimate (shl SGPR_32, 1), 4)),
- (add (decimate (shl SGPR_32, 2), 4)),
- (add (decimate (shl SGPR_32, 3), 4)),
- (add (decimate (shl SGPR_32, 4), 4)),
- (add (decimate (shl SGPR_32, 5), 4)),
- (add (decimate (shl SGPR_32, 6), 4)),
- (add (decimate (shl SGPR_32, 7), 4))]>;
+def SGPR_256Regs : SIRegisterTuples<getSubRegs<8>.ret, SGPR_32, 105, 4, 8, "s">;
// SGPR 512-bit registers
-def SGPR_512Regs : RegisterTuples<getSubRegs<16>.ret,
- [(add (decimate SGPR_32, 4)),
- (add (decimate (shl SGPR_32, 1), 4)),
- (add (decimate (shl SGPR_32, 2), 4)),
- (add (decimate (shl SGPR_32, 3), 4)),
- (add (decimate (shl SGPR_32, 4), 4)),
- (add (decimate (shl SGPR_32, 5), 4)),
- (add (decimate (shl SGPR_32, 6), 4)),
- (add (decimate (shl SGPR_32, 7), 4)),
- (add (decimate (shl SGPR_32, 8), 4)),
- (add (decimate (shl SGPR_32, 9), 4)),
- (add (decimate (shl SGPR_32, 10), 4)),
- (add (decimate (shl SGPR_32, 11), 4)),
- (add (decimate (shl SGPR_32, 12), 4)),
- (add (decimate (shl SGPR_32, 13), 4)),
- (add (decimate (shl SGPR_32, 14), 4)),
- (add (decimate (shl SGPR_32, 15), 4))]>;
+def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s">;
// SGPR 1024-bit registers
-def SGPR_1024Regs : RegisterTuples<getSubRegs<32>.ret,
- [(add (decimate SGPR_32, 4)),
- (add (decimate (shl SGPR_32, 1), 4)),
- (add (decimate (shl SGPR_32, 2), 4)),
- (add (decimate (shl SGPR_32, 3), 4)),
- (add (decimate (shl SGPR_32, 4), 4)),
- (add (decimate (shl SGPR_32, 5), 4)),
- (add (decimate (shl SGPR_32, 6), 4)),
- (add (decimate (shl SGPR_32, 7), 4)),
- (add (decimate (shl SGPR_32, 8), 4)),
- (add (decimate (shl SGPR_32, 9), 4)),
- (add (decimate (shl SGPR_32, 10), 4)),
- (add (decimate (shl SGPR_32, 11), 4)),
- (add (decimate (shl SGPR_32, 12), 4)),
- (add (decimate (shl SGPR_32, 13), 4)),
- (add (decimate (shl SGPR_32, 14), 4)),
- (add (decimate (shl SGPR_32, 15), 4)),
- (add (decimate (shl SGPR_32, 16), 4)),
- (add (decimate (shl SGPR_32, 17), 4)),
- (add (decimate (shl SGPR_32, 18), 4)),
- (add (decimate (shl SGPR_32, 19), 4)),
- (add (decimate (shl SGPR_32, 20), 4)),
- (add (decimate (shl SGPR_32, 21), 4)),
- (add (decimate (shl SGPR_32, 22), 4)),
- (add (decimate (shl SGPR_32, 23), 4)),
- (add (decimate (shl SGPR_32, 24), 4)),
- (add (decimate (shl SGPR_32, 25), 4)),
- (add (decimate (shl SGPR_32, 26), 4)),
- (add (decimate (shl SGPR_32, 27), 4)),
- (add (decimate (shl SGPR_32, 28), 4)),
- (add (decimate (shl SGPR_32, 29), 4)),
- (add (decimate (shl SGPR_32, 30), 4)),
- (add (decimate (shl SGPR_32, 31), 4))]>;
+def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">;
// Trap handler TMP 32-bit registers
def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
@@ -330,51 +262,21 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
}
// Trap handler TMP 64-bit registers
-def TTMP_64Regs : RegisterTuples<getSubRegs<2>.ret,
- [(add (decimate TTMP_32, 2)),
- (add (decimate (shl TTMP_32, 1), 2))]>;
+def TTMP_64Regs : SIRegisterTuples<getSubRegs<2>.ret, TTMP_32, 15, 2, 2, "ttmp">;
// Trap handler TMP 128-bit registers
-def TTMP_128Regs : RegisterTuples<getSubRegs<4>.ret,
- [(add (decimate TTMP_32, 4)),
- (add (decimate (shl TTMP_32, 1), 4)),
- (add (decimate (shl TTMP_32, 2), 4)),
- (add (decimate (shl TTMP_32, 3), 4))]>;
-
-def TTMP_256Regs : RegisterTuples<getSubRegs<8>.ret,
- [(add (decimate TTMP_32, 4)),
- (add (decimate (shl TTMP_32, 1), 4)),
- (add (decimate (shl TTMP_32, 2), 4)),
- (add (decimate (shl TTMP_32, 3), 4)),
- (add (decimate (shl TTMP_32, 4), 4)),
- (add (decimate (shl TTMP_32, 5), 4)),
- (add (decimate (shl TTMP_32, 6), 4)),
- (add (decimate (shl TTMP_32, 7), 4))]>;
-
-def TTMP_512Regs : RegisterTuples<getSubRegs<16>.ret,
- [(add (decimate TTMP_32, 4)),
- (add (decimate (shl TTMP_32, 1), 4)),
- (add (decimate (shl TTMP_32, 2), 4)),
- (add (decimate (shl TTMP_32, 3), 4)),
- (add (decimate (shl TTMP_32, 4), 4)),
- (add (decimate (shl TTMP_32, 5), 4)),
- (add (decimate (shl TTMP_32, 6), 4)),
- (add (decimate (shl TTMP_32, 7), 4)),
- (add (decimate (shl TTMP_32, 8), 4)),
- (add (decimate (shl TTMP_32, 9), 4)),
- (add (decimate (shl TTMP_32, 10), 4)),
- (add (decimate (shl TTMP_32, 11), 4)),
- (add (decimate (shl TTMP_32, 12), 4)),
- (add (decimate (shl TTMP_32, 13), 4)),
- (add (decimate (shl TTMP_32, 14), 4)),
- (add (decimate (shl TTMP_32, 15), 4))]>;
+def TTMP_128Regs : SIRegisterTuples<getSubRegs<4>.ret, TTMP_32, 15, 4, 4, "ttmp">;
+
+def TTMP_256Regs : SIRegisterTuples<getSubRegs<8>.ret, TTMP_32, 15, 4, 8, "ttmp">;
+
+def TTMP_512Regs : SIRegisterTuples<getSubRegs<16>.ret, TTMP_32, 15, 4, 16, "ttmp">;
class TmpRegTuplesBase<int index, int size,
list<Register> subRegs,
list<SubRegIndex> indices = getSubRegs<size>.ret,
int index1 = !add(index, !add(size, -1)),
string name = "ttmp["#index#":"#index1#"]"> :
- SIRegisterWithSubRegs<name, subRegs> {
+ RegisterWithSubRegs<name, subRegs> {
let HWEncoding = subRegs[0].HWEncoding;
let SubRegIndices = indices;
}
@@ -448,196 +350,80 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TT
TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10,
TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>;
+class RegisterTypes<list<ValueType> reg_types> {
+ list<ValueType> types = reg_types;
+}
+
+def Reg16Types : RegisterTypes<[i16, f16]>;
+def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>;
+
+
// VGPR 32-bit registers
// i16/f16 only on VI+
-def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add (sequence "VGPR%u", 0, 255)), Reg32> {
+def VGPR_32 : RegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
+ (add (sequence "VGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
}
// VGPR 64-bit registers
-def VGPR_64 : RegisterTuples<getSubRegs<2>.ret,
- [(add (trunc VGPR_32, 255)),
- (add (shl VGPR_32, 1))]>;
+def VGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, VGPR_32, 255, 1, 2, "v">;
// VGPR 96-bit registers
-def VGPR_96 : RegisterTuples<getSubRegs<3>.ret,
- [(add (trunc VGPR_32, 254)),
- (add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2))]>;
+def VGPR_96 : SIRegisterTuples<getSubRegs<3>.ret, VGPR_32, 255, 1, 3, "v">;
// VGPR 128-bit registers
-def VGPR_128 : RegisterTuples<getSubRegs<4>.ret,
- [(add (trunc VGPR_32, 253)),
- (add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2)),
- (add (shl VGPR_32, 3))]>;
+def VGPR_128 : SIRegisterTuples<getSubRegs<4>.ret, VGPR_32, 255, 1, 4, "v">;
// VGPR 160-bit registers
-def VGPR_160 : RegisterTuples<getSubRegs<5>.ret,
- [(add (trunc VGPR_32, 252)),
- (add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2)),
- (add (shl VGPR_32, 3)),
- (add (shl VGPR_32, 4))]>;
+def VGPR_160 : SIRegisterTuples<getSubRegs<5>.ret, VGPR_32, 255, 1, 5, "v">;
// VGPR 256-bit registers
-def VGPR_256 : RegisterTuples<getSubRegs<8>.ret,
- [(add (trunc VGPR_32, 249)),
- (add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2)),
- (add (shl VGPR_32, 3)),
- (add (shl VGPR_32, 4)),
- (add (shl VGPR_32, 5)),
- (add (shl VGPR_32, 6)),
- (add (shl VGPR_32, 7))]>;
+def VGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, VGPR_32, 255, 1, 8, "v">;
// VGPR 512-bit registers
-def VGPR_512 : RegisterTuples<getSubRegs<16>.ret,
- [(add (trunc VGPR_32, 241)),
- (add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2)),
- (add (shl VGPR_32, 3)),
- (add (shl VGPR_32, 4)),
- (add (shl VGPR_32, 5)),
- (add (shl VGPR_32, 6)),
- (add (shl VGPR_32, 7)),
- (add (shl VGPR_32, 8)),
- (add (shl VGPR_32, 9)),
- (add (shl VGPR_32, 10)),
- (add (shl VGPR_32, 11)),
- (add (shl VGPR_32, 12)),
- (add (shl VGPR_32, 13)),
- (add (shl VGPR_32, 14)),
- (add (shl VGPR_32, 15))]>;
+def VGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, VGPR_32, 255, 1, 16, "v">;
// VGPR 1024-bit registers
-def VGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
- [(add (trunc VGPR_32, 225)),
- (add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2)),
- (add (shl VGPR_32, 3)),
- (add (shl VGPR_32, 4)),
- (add (shl VGPR_32, 5)),
- (add (shl VGPR_32, 6)),
- (add (shl VGPR_32, 7)),
- (add (shl VGPR_32, 8)),
- (add (shl VGPR_32, 9)),
- (add (shl VGPR_32, 10)),
- (add (shl VGPR_32, 11)),
- (add (shl VGPR_32, 12)),
- (add (shl VGPR_32, 13)),
- (add (shl VGPR_32, 14)),
- (add (shl VGPR_32, 15)),
- (add (shl VGPR_32, 16)),
- (add (shl VGPR_32, 17)),
- (add (shl VGPR_32, 18)),
- (add (shl VGPR_32, 19)),
- (add (shl VGPR_32, 20)),
- (add (shl VGPR_32, 21)),
- (add (shl VGPR_32, 22)),
- (add (shl VGPR_32, 23)),
- (add (shl VGPR_32, 24)),
- (add (shl VGPR_32, 25)),
- (add (shl VGPR_32, 26)),
- (add (shl VGPR_32, 27)),
- (add (shl VGPR_32, 28)),
- (add (shl VGPR_32, 29)),
- (add (shl VGPR_32, 30)),
- (add (shl VGPR_32, 31))]>;
+def VGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, VGPR_32, 255, 1, 32, "v">;
// AccVGPR 32-bit registers
def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add (sequence "AGPR%u", 0, 255)), Reg32> {
+ (add (sequence "AGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
}
// AGPR 64-bit registers
-def AGPR_64 : RegisterTuples<getSubRegs<2>.ret,
- [(add (trunc AGPR_32, 255)),
- (add (shl AGPR_32, 1))]>;
+def AGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, AGPR_32, 255, 1, 2, "a">;
// AGPR 128-bit registers
-def AGPR_128 : RegisterTuples<getSubRegs<4>.ret,
- [(add (trunc AGPR_32, 253)),
- (add (shl AGPR_32, 1)),
- (add (shl AGPR_32, 2)),
- (add (shl AGPR_32, 3))]>;
+def AGPR_128 : SIRegisterTuples<getSubRegs<4>.ret, AGPR_32, 255, 1, 4, "a">;
// AGPR 512-bit registers
-def AGPR_512 : RegisterTuples<getSubRegs<16>.ret,
- [(add (trunc AGPR_32, 241)),
- (add (shl AGPR_32, 1)),
- (add (shl AGPR_32, 2)),
- (add (shl AGPR_32, 3)),
- (add (shl AGPR_32, 4)),
- (add (shl AGPR_32, 5)),
- (add (shl AGPR_32, 6)),
- (add (shl AGPR_32, 7)),
- (add (shl AGPR_32, 8)),
- (add (shl AGPR_32, 9)),
- (add (shl AGPR_32, 10)),
- (add (shl AGPR_32, 11)),
- (add (shl AGPR_32, 12)),
- (add (shl AGPR_32, 13)),
- (add (shl AGPR_32, 14)),
- (add (shl AGPR_32, 15))]>;
+def AGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, AGPR_32, 255, 1, 16, "a">;
// AGPR 1024-bit registers
-def AGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
- [(add (trunc AGPR_32, 225)),
- (add (shl AGPR_32, 1)),
- (add (shl AGPR_32, 2)),
- (add (shl AGPR_32, 3)),
- (add (shl AGPR_32, 4)),
- (add (shl AGPR_32, 5)),
- (add (shl AGPR_32, 6)),
- (add (shl AGPR_32, 7)),
- (add (shl AGPR_32, 8)),
- (add (shl AGPR_32, 9)),
- (add (shl AGPR_32, 10)),
- (add (shl AGPR_32, 11)),
- (add (shl AGPR_32, 12)),
- (add (shl AGPR_32, 13)),
- (add (shl AGPR_32, 14)),
- (add (shl AGPR_32, 15)),
- (add (shl AGPR_32, 16)),
- (add (shl AGPR_32, 17)),
- (add (shl AGPR_32, 18)),
- (add (shl AGPR_32, 19)),
- (add (shl AGPR_32, 20)),
- (add (shl AGPR_32, 21)),
- (add (shl AGPR_32, 22)),
- (add (shl AGPR_32, 23)),
- (add (shl AGPR_32, 24)),
- (add (shl AGPR_32, 25)),
- (add (shl AGPR_32, 26)),
- (add (shl AGPR_32, 27)),
- (add (shl AGPR_32, 28)),
- (add (shl AGPR_32, 29)),
- (add (shl AGPR_32, 30)),
- (add (shl AGPR_32, 31))]>;
+def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">;
//===----------------------------------------------------------------------===//
// Register classes used as source and destination
//===----------------------------------------------------------------------===//
def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG), Reg32> {
+ (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
}
def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
- (add PRIVATE_RSRC_REG), Reg128> {
+ (add PRIVATE_RSRC_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
}
def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add LDS_DIRECT), Reg32> {
+ (add LDS_DIRECT)> {
let isAllocatable = 0;
let CopyCost = -1;
}
@@ -648,41 +434,40 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
- SRC_VCCZ, SRC_EXECZ, SRC_SCC), Reg32> {
+ SRC_VCCZ, SRC_EXECZ, SRC_SCC)> {
let AllocationPriority = 10;
}
def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
- (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS), Reg32> {
+ (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
let AllocationPriority = 10;
}
def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
- (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI), Reg32> {
+ (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 10;
}
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
- (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI), Reg32> {
+ (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
let AllocationPriority = 10;
}
def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
- (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS),
- Reg32> {
+ (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
}
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
- (add SGPR_64Regs), Reg64> {
+ (add SGPR_64Regs)> {
let CopyCost = 1;
let AllocationPriority = 11;
}
// CCR (call clobbered registers) SGPR 64-bit registers
def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
- (add (trunc SGPR_64, 16)), Reg64> {
+ (add (trunc SGPR_64, 16))> {
let CopyCost = SGPR_64.CopyCost;
let AllocationPriority = SGPR_64.AllocationPriority;
}
@@ -693,13 +478,13 @@ def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
}
def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA), Reg64> {
+ (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
let AllocationPriority = 13;
}
def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
- (add SReg_64_XEXEC, EXEC), Reg64> {
+ (add SReg_64_XEXEC, EXEC)> {
let CopyCost = 1;
let AllocationPriority = 13;
}
@@ -722,17 +507,17 @@ let CopyCost = 2 in {
// There are no 3-component scalar instructions, but this is needed
// for symmetry with VGPRs.
def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
- (add SGPR_96Regs), Reg96> {
+ (add SGPR_96Regs)> {
let AllocationPriority = 14;
}
def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
- (add SGPR_96), Reg96> {
+ (add SGPR_96)> {
let AllocationPriority = 14;
}
def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
- (add SGPR_128Regs), Reg128> {
+ (add SGPR_128Regs)> {
let AllocationPriority = 15;
}
@@ -742,8 +527,9 @@ def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
}
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
- (add SGPR_128, TTMP_128), Reg128> {
+ (add SGPR_128, TTMP_128)> {
let AllocationPriority = 15;
+ let isAllocatable = 0;
}
} // End CopyCost = 2
@@ -751,17 +537,16 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
// There are no 5-component scalar instructions, but this is needed
// for symmetry with VGPRs.
def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
- (add SGPR_160Regs), Reg160> {
+ (add SGPR_160Regs)> {
let AllocationPriority = 16;
}
def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
- (add SGPR_160), Reg160> {
+ (add SGPR_160)> {
let AllocationPriority = 16;
}
-def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs),
- Reg256> {
+def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
let AllocationPriority = 17;
}
@@ -770,14 +555,14 @@ def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
}
def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
- (add SGPR_256, TTMP_256), Reg256> {
+ (add SGPR_256, TTMP_256)> {
// Requires 4 s_mov_b64 to copy
let CopyCost = 4;
let AllocationPriority = 17;
}
def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add SGPR_512Regs), Reg512> {
+ (add SGPR_512Regs)> {
let AllocationPriority = 18;
}
@@ -787,31 +572,31 @@ def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
}
def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add SGPR_512, TTMP_512), Reg512> {
+ (add SGPR_512, TTMP_512)> {
// Requires 8 s_mov_b64 to copy
let CopyCost = 8;
let AllocationPriority = 18;
}
def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add VGPR_32, LDS_DIRECT_CLASS), Reg32> {
+ (add VGPR_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
}
def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
- (add SGPR_1024Regs), Reg1024> {
+ (add SGPR_1024Regs)> {
let AllocationPriority = 19;
}
def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
- (add SGPR_1024), Reg1024> {
+ (add SGPR_1024)> {
let CopyCost = 16;
let AllocationPriority = 19;
}
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
- (add VGPR_64), Reg64> {
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4], 32,
+ (add VGPR_64)> {
let Size = 64;
// Requires 2 v_mov_b32 to copy
@@ -819,7 +604,7 @@ def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32
let AllocationPriority = 2;
}
-def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> {
+def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> {
let Size = 96;
// Requires 3 v_mov_b32 to copy
@@ -828,7 +613,7 @@ def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96>
}
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
- (add VGPR_128), Reg128> {
+ (add VGPR_128)> {
let Size = 128;
// Requires 4 v_mov_b32 to copy
@@ -837,7 +622,7 @@ def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
}
def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
- (add VGPR_160), Reg160> {
+ (add VGPR_160)> {
let Size = 160;
// Requires 5 v_mov_b32 to copy
@@ -846,28 +631,28 @@ def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
}
def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
- (add VGPR_256), Reg256> {
+ (add VGPR_256)> {
let Size = 256;
let CopyCost = 8;
let AllocationPriority = 6;
}
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add VGPR_512), Reg512> {
+ (add VGPR_512)> {
let Size = 512;
let CopyCost = 16;
let AllocationPriority = 7;
}
def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
- (add VGPR_1024), Reg1024> {
+ (add VGPR_1024)> {
let Size = 1024;
let CopyCost = 32;
let AllocationPriority = 8;
}
def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
- (add AGPR_64), Reg64> {
+ (add AGPR_64)> {
let Size = 64;
let CopyCost = 5;
@@ -875,7 +660,7 @@ def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32
}
def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
- (add AGPR_128), Reg128> {
+ (add AGPR_128)> {
let Size = 128;
// Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr
@@ -884,40 +669,39 @@ def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
}
def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add AGPR_512), Reg512> {
+ (add AGPR_512)> {
let Size = 512;
let CopyCost = 33;
let AllocationPriority = 7;
}
def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
- (add AGPR_1024), Reg1024> {
+ (add AGPR_1024)> {
let Size = 1024;
let CopyCost = 65;
let AllocationPriority = 8;
}
-def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32), Reg32> {
- let Size = 32;
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
+ let Size = 1;
}
def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add VGPR_32, SReg_32, LDS_DIRECT_CLASS), Reg32> {
+ (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
}
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64),
- Reg64> {
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
let isAllocatable = 0;
}
def AV_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add AGPR_32, VGPR_32), Reg32> {
+ (add AGPR_32, VGPR_32)> {
let isAllocatable = 0;
}
def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32,
- (add AReg_64, VReg_64), Reg64> {
+ (add AReg_64, VReg_64)> {
let isAllocatable = 0;
}
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 7ee178149c7a..8afca2cdc325 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -77,8 +77,8 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
// Try to fold Src0
MachineOperand &Src0 = MI.getOperand(Src0Idx);
if (Src0.isReg()) {
- unsigned Reg = Src0.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
+ Register Reg = Src0.getReg();
+ if (Register::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
if (Def && Def->isMoveImmediate()) {
MachineOperand &MovSrc = Def->getOperand(1);
@@ -360,8 +360,7 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
}
if (NewImm != 0) {
- if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) &&
- SrcReg->isReg()) {
+ if (Register::isVirtualRegister(Dest->getReg()) && SrcReg->isReg()) {
MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
return true;
@@ -394,12 +393,11 @@ static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
if (!MO.isReg())
continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
- TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (Register::isPhysicalRegister(Reg) &&
+ Register::isPhysicalRegister(MO.getReg())) {
if (TRI.regsOverlap(Reg, MO.getReg()))
return true;
- } else if (MO.getReg() == Reg &&
- TargetRegisterInfo::isVirtualRegister(Reg)) {
+ } else if (MO.getReg() == Reg && Register::isVirtualRegister(Reg)) {
LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
TRI.getSubRegIndexLaneMask(MO.getSubReg());
if (Overlap.any())
@@ -425,7 +423,7 @@ static TargetInstrInfo::RegSubRegPair
getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
} else {
LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
@@ -459,13 +457,13 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
MovT.getOpcode() == AMDGPU::COPY);
- unsigned T = MovT.getOperand(0).getReg();
+ Register T = MovT.getOperand(0).getReg();
unsigned Tsub = MovT.getOperand(0).getSubReg();
MachineOperand &Xop = MovT.getOperand(1);
if (!Xop.isReg())
return nullptr;
- unsigned X = Xop.getReg();
+ Register X = Xop.getReg();
unsigned Xsub = Xop.getSubReg();
unsigned Size = TII->getOpSize(MovT, 0) / 4;
@@ -484,7 +482,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
MovY.getOperand(1).getSubReg() != Tsub)
continue;
- unsigned Y = MovY.getOperand(0).getReg();
+ Register Y = MovY.getOperand(0).getReg();
unsigned Ysub = MovY.getOperand(0).getSubReg();
if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
@@ -579,7 +577,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// XXX - not exactly a check for post-regalloc run.
MachineOperand &Src = MI.getOperand(1);
if (Src.isImm() &&
- TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) {
+ Register::isPhysicalRegister(MI.getOperand(0).getReg())) {
int32_t ReverseImm;
if (isReverseInlineImm(TII, Src, ReverseImm)) {
MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
@@ -643,8 +641,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// FIXME: This could work better if hints worked with subregisters. If
// we have a vector add of a constant, we usually don't get the correct
// allocation due to the subregister usage.
- if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) &&
- Src0->isReg()) {
+ if (Register::isVirtualRegister(Dest->getReg()) && Src0->isReg()) {
MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
continue;
@@ -672,8 +669,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &Dst = MI.getOperand(0);
MachineOperand &Src = MI.getOperand(1);
- if (Src.isImm() &&
- TargetRegisterInfo::isPhysicalRegister(Dst.getReg())) {
+ if (Src.isImm() && Register::isPhysicalRegister(Dst.getReg())) {
int32_t ReverseImm;
if (isKImmOperand(TII, Src))
MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
@@ -721,8 +717,8 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
if (TII->isVOPC(Op32)) {
- unsigned DstReg = MI.getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ if (Register::isVirtualRegister(DstReg)) {
// VOPC instructions can only write to the VCC register. We can't
// force them to use VCC here, because this is only one register and
// cannot deal with sequences which would require multiple copies of
@@ -745,8 +741,8 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
TII->getNamedOperand(MI, AMDGPU::OpName::src2);
if (!Src2->isReg())
continue;
- unsigned SReg = Src2->getReg();
- if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+ Register SReg = Src2->getReg();
+ if (Register::isVirtualRegister(SReg)) {
MRI.setRegAllocationHint(SReg, 0, VCCReg);
continue;
}
@@ -766,7 +762,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
bool Next = false;
if (SDst->getReg() != VCCReg) {
- if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
+ if (Register::isVirtualRegister(SDst->getReg()))
MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg);
Next = true;
}
@@ -774,7 +770,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// All of the instructions with carry outs also have an SGPR input in
// src2.
if (Src2 && Src2->getReg() != VCCReg) {
- if (TargetRegisterInfo::isVirtualRegister(Src2->getReg()))
+ if (Register::isVirtualRegister(Src2->getReg()))
MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg);
Next = true;
}
diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 4e07efff55d8..cb4cf68d709a 100644
--- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -273,12 +273,12 @@ void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
if (!Use.isReg() || !Use.isUse())
continue;
- unsigned Reg = Use.getReg();
+ Register Reg = Use.getReg();
// Handle physical registers that we need to track; this is mostly relevant
// for VCC, which can appear as the (implicit) input of a uniform branch,
// e.g. when a loop counter is stored in a VGPR.
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!Register::isVirtualRegister(Reg)) {
if (Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO)
continue;
@@ -312,6 +312,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
char GlobalFlags = 0;
bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs");
SmallVector<MachineInstr *, 4> SetInactiveInstrs;
+ SmallVector<MachineInstr *, 4> SoftWQMInstrs;
// We need to visit the basic blocks in reverse post-order so that we visit
// defs before uses, in particular so that we don't accidentally mark an
@@ -340,6 +341,10 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
// correct, so we need it to be in WQM.
Flags = StateWQM;
LowerToCopyInstrs.push_back(&MI);
+ } else if (Opcode == AMDGPU::SOFT_WQM) {
+ LowerToCopyInstrs.push_back(&MI);
+ SoftWQMInstrs.push_back(&MI);
+ continue;
} else if (Opcode == AMDGPU::WWM) {
// The WWM intrinsic doesn't make the same guarantee, and plus it needs
// to be executed in WQM or Exact so that its copy doesn't clobber
@@ -356,8 +361,8 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
if (Inactive.isUndef()) {
LowerToCopyInstrs.push_back(&MI);
} else {
- unsigned Reg = Inactive.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = Inactive.getReg();
+ if (Register::isVirtualRegister(Reg)) {
for (MachineInstr &DefMI : MRI->def_instructions(Reg))
markInstruction(DefMI, StateWWM, Worklist);
}
@@ -385,9 +390,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
- if (!TRI->isVirtualRegister(Reg) &&
+ if (!Register::isVirtualRegister(Reg) &&
TRI->hasVectorRegisters(TRI->getPhysRegClass(Reg))) {
Flags = StateWQM;
break;
@@ -407,9 +412,12 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
// Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is
// ever used anywhere in the function. This implements the corresponding
// semantics of @llvm.amdgcn.set.inactive.
+ // Similarly for SOFT_WQM instructions, implementing @llvm.amdgcn.softwqm.
if (GlobalFlags & StateWQM) {
for (MachineInstr *MI : SetInactiveInstrs)
markInstruction(*MI, StateWQM, Worklist);
+ for (MachineInstr *MI : SoftWQMInstrs)
+ markInstruction(*MI, StateWQM, Worklist);
}
return GlobalFlags;
@@ -548,7 +556,7 @@ bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const {
MachineBasicBlock::iterator
SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before) {
- unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
MachineInstr *Save =
BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg)
@@ -832,7 +840,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
for (MachineInstr *MI : LiveMaskQueries) {
const DebugLoc &DL = MI->getDebugLoc();
- unsigned Dest = MI->getOperand(0).getReg();
+ Register Dest = MI->getOperand(0).getReg();
MachineInstr *Copy =
BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
.addReg(LiveMaskReg);
@@ -847,13 +855,12 @@ void SIWholeQuadMode::lowerCopyInstrs() {
for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
MI->RemoveOperand(i);
- const unsigned Reg = MI->getOperand(0).getReg();
+ const Register Reg = MI->getOperand(0).getReg();
if (TRI->isVGPR(*MRI, Reg)) {
- const TargetRegisterClass *regClass =
- TargetRegisterInfo::isVirtualRegister(Reg)
- ? MRI->getRegClass(Reg)
- : TRI->getPhysRegClass(Reg);
+ const TargetRegisterClass *regClass = Register::isVirtualRegister(Reg)
+ ? MRI->getRegClass(Reg)
+ : TRI->getPhysRegClass(Reg);
const unsigned MovOp = TII->getMovOpcode(regClass);
MI->setDesc(TII->get(MovOp));
@@ -885,7 +892,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
if (!(GlobalFlags & StateWQM)) {
lowerLiveMaskQueries(Exec);
- if (!(GlobalFlags & StateWWM))
+ if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty())
return !LiveMaskQueries.empty();
} else {
// Store a copy of the original live mask when required
diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td
index 1b410b6b5912..1a74ebbf8165 100644
--- a/lib/Target/AMDGPU/SMInstructions.td
+++ b/lib/Target/AMDGPU/SMInstructions.td
@@ -793,9 +793,18 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// selector to prefer those.
let AddedComplexity = 100 in {
-defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
+foreach vt = Reg32Types.types in {
+defm : SMRD_Pattern <"S_LOAD_DWORD", vt>;
+}
+
+foreach vt = SReg_64.RegTypes in {
+defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>;
+}
+
+foreach vt = SReg_128.RegTypes in {
+defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>;
+}
+
defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td
index dfafdccc05a3..d31a49f428ee 100644
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -181,7 +181,9 @@ def S_BCNT0_I32_B64 : SOP1_32_64 <"s_bcnt0_i32_b64">;
def S_BCNT1_I32_B32 : SOP1_32 <"s_bcnt1_i32_b32",
[(set i32:$sdst, (ctpop i32:$src0))]
>;
-def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64">;
+def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64",
+ [(set i32:$sdst, (ctpop i64:$src0))]
+>;
} // End Defs = [SCC]
def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">;
@@ -417,16 +419,16 @@ def S_SUBB_U32 : SOP2_32 <"s_subb_u32",
let isCommutable = 1 in {
def S_MIN_I32 : SOP2_32 <"s_min_i32",
- [(set i32:$sdst, (UniformBinFrag<smin> i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (smin i32:$src0, i32:$src1))]
>;
def S_MIN_U32 : SOP2_32 <"s_min_u32",
- [(set i32:$sdst, (UniformBinFrag<umin> i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (umin i32:$src0, i32:$src1))]
>;
def S_MAX_I32 : SOP2_32 <"s_max_i32",
- [(set i32:$sdst, (UniformBinFrag<smax> i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (smax i32:$src0, i32:$src1))]
>;
def S_MAX_U32 : SOP2_32 <"s_max_u32",
- [(set i32:$sdst, (UniformBinFrag<umax> i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (umax i32:$src0, i32:$src1))]
>;
} // End isCommutable = 1
} // End Defs = [SCC]
@@ -853,13 +855,13 @@ class SOPC_Base <bits<7> op, RegisterOperand rc0, RegisterOperand rc1,
let Defs = [SCC];
}
class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
- string opName, PatLeaf cond> : SOPC_Base <
+ string opName, SDPatternOperator cond> : SOPC_Base <
op, rc, rc, opName,
[(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > {
}
class SOPC_CMP_32<bits<7> op, string opName,
- PatLeaf cond = COND_NULL, string revOp = opName>
+ SDPatternOperator cond = COND_NULL, string revOp = opName>
: SOPC_Helper<op, SSrc_b32, i32, opName, cond>,
Commutable_REV<revOp, !eq(revOp, opName)>,
SOPKInstTable<0, opName> {
@@ -868,7 +870,7 @@ class SOPC_CMP_32<bits<7> op, string opName,
}
class SOPC_CMP_64<bits<7> op, string opName,
- PatLeaf cond = COND_NULL, string revOp = opName>
+ SDPatternOperator cond = COND_NULL, string revOp = opName>
: SOPC_Helper<op, SSrc_b64, i64, opName, cond>,
Commutable_REV<revOp, !eq(revOp, opName)> {
let isCompare = 1;
@@ -1076,8 +1078,6 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
[(int_amdgcn_s_barrier)]> {
let SchedRW = [WriteBarrier];
let simm16 = 0;
- let mayLoad = 1;
- let mayStore = 1;
let isConvergent = 1;
}
@@ -1090,7 +1090,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
- [(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>;
+ [(int_amdgcn_s_waitcnt timm:$simm16)]>;
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
@@ -1099,7 +1099,7 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
// maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the
// maximum really 15 on VI?
def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
- "s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> {
+ "s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 1;
let mayStore = 1;
@@ -1110,12 +1110,11 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
let Uses = [EXEC, M0] in {
// FIXME: Should this be mayLoad+mayStore?
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
- [(AMDGPUsendmsg (i32 imm:$simm16))]
->;
+ [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>;
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
- [(AMDGPUsendmsghalt (i32 imm:$simm16))]
->;
+ [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>;
+
} // End Uses = [EXEC, M0]
def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16"> {
@@ -1126,13 +1125,13 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
let simm16 = 0;
}
def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
- [(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> {
+ [(int_amdgcn_s_incperflevel timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 1;
let mayStore = 1;
}
def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
- [(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> {
+ [(int_amdgcn_s_decperflevel timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 1;
let mayStore = 1;
@@ -1169,7 +1168,10 @@ let SubtargetPredicate = isGFX10Plus in {
def S_ROUND_MODE :
SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
def S_DENORM_MODE :
- SOPP<0x025, (ins s16imm:$simm16), "s_denorm_mode $simm16">;
+ SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16",
+ [(SIdenorm_mode (i32 timm:$simm16))]> {
+ let hasSideEffects = 1;
+ }
def S_TTRACEDATA_IMM :
SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">;
} // End SubtargetPredicate = isGFX10Plus
@@ -1178,7 +1180,7 @@ let SubtargetPredicate = isGFX10Plus in {
// S_GETREG_B32 Intrinsic Pattern.
//===----------------------------------------------------------------------===//
def : GCNPat <
- (int_amdgcn_s_getreg imm:$simm16),
+ (int_amdgcn_s_getreg timm:$simm16),
(S_GETREG_B32 (as_i16imm $simm16))
>;
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index e90f40e6abea..afb2fd987afd 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -131,29 +131,70 @@ int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
struct MUBUFInfo {
uint16_t Opcode;
uint16_t BaseOpcode;
- uint8_t dwords;
+ uint8_t elements;
bool has_vaddr;
bool has_srsrc;
bool has_soffset;
};
+struct MTBUFInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t elements;
+ bool has_vaddr;
+ bool has_srsrc;
+ bool has_soffset;
+};
+
+#define GET_MTBUFInfoTable_DECL
+#define GET_MTBUFInfoTable_IMPL
#define GET_MUBUFInfoTable_DECL
#define GET_MUBUFInfoTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
+int getMTBUFBaseOpcode(unsigned Opc) {
+ const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
+ return Info ? Info->BaseOpcode : -1;
+}
+
+int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
+ const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
+ return Info ? Info->Opcode : -1;
+}
+
+int getMTBUFElements(unsigned Opc) {
+ const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
+ return Info ? Info->elements : 0;
+}
+
+bool getMTBUFHasVAddr(unsigned Opc) {
+ const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
+ return Info ? Info->has_vaddr : false;
+}
+
+bool getMTBUFHasSrsrc(unsigned Opc) {
+ const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
+ return Info ? Info->has_srsrc : false;
+}
+
+bool getMTBUFHasSoffset(unsigned Opc) {
+ const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
+ return Info ? Info->has_soffset : false;
+}
+
int getMUBUFBaseOpcode(unsigned Opc) {
const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
return Info ? Info->BaseOpcode : -1;
}
-int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) {
- const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords);
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
+ const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
return Info ? Info->Opcode : -1;
}
-int getMUBUFDwords(unsigned Opc) {
+int getMUBUFElements(unsigned Opc) {
const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
- return Info ? Info->dwords : 0;
+ return Info ? Info->elements : 0;
}
bool getMUBUFHasVAddr(unsigned Opc) {
@@ -241,7 +282,7 @@ unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
}
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
- return getMaxWavesPerEU() * getEUsPerCU(STI);
+ return getMaxWavesPerEU(STI) * getEUsPerCU(STI);
}
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
@@ -253,9 +294,11 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
return 1;
}
-unsigned getMaxWavesPerEU() {
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
// FIXME: Need to take scratch memory into account.
- return 10;
+ if (!isGFX10(*STI))
+ return 10;
+ return 20;
}
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
@@ -317,7 +360,7 @@ unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
if (Version.Major >= 10)
return 0;
- if (WavesPerEU >= getMaxWavesPerEU())
+ if (WavesPerEU >= getMaxWavesPerEU(STI))
return 0;
unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
@@ -394,17 +437,19 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
}
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
- return 256;
+ if (!isGFX10(*STI))
+ return 256;
+ return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
}
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
- return getTotalNumVGPRs(STI);
+ return 256;
}
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- if (WavesPerEU >= getMaxWavesPerEU())
+ if (WavesPerEU >= getMaxWavesPerEU(STI))
return 0;
unsigned MinNumVGPRs =
alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
@@ -510,7 +555,7 @@ bool isReadOnlySegment(const GlobalValue *GV) {
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
- return TT.getOS() != Triple::AMDHSA;
+ return TT.getOS() == Triple::AMDPAL;
}
int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 209ef7eef749..f78dadd447ff 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -94,7 +94,7 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
/// \returns Maximum number of waves per execution unit for given subtarget \p
/// STI without any kind of limitation.
-unsigned getMaxWavesPerEU();
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
/// \returns Maximum number of waves per execution unit for given subtarget \p
/// STI and limited by given \p FlatWorkGroupSize.
@@ -264,13 +264,31 @@ LLVM_READONLY
const MIMGInfo *getMIMGInfo(unsigned Opc);
LLVM_READONLY
+int getMTBUFBaseOpcode(unsigned Opc);
+
+LLVM_READONLY
+int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
+
+LLVM_READONLY
+int getMTBUFElements(unsigned Opc);
+
+LLVM_READONLY
+bool getMTBUFHasVAddr(unsigned Opc);
+
+LLVM_READONLY
+bool getMTBUFHasSrsrc(unsigned Opc);
+
+LLVM_READONLY
+bool getMTBUFHasSoffset(unsigned Opc);
+
+LLVM_READONLY
int getMUBUFBaseOpcode(unsigned Opc);
LLVM_READONLY
-int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords);
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
LLVM_READONLY
-int getMUBUFDwords(unsigned Opc);
+int getMUBUFElements(unsigned Opc);
LLVM_READONLY
bool getMUBUFHasVAddr(unsigned Opc);
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index db20d5ccf5f9..207e4232e829 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -21,6 +21,8 @@
#include "SIDefines.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/EndianStream.h"
diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td
index 6bc416ed7d4b..f1cdc3097dc0 100644
--- a/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/lib/Target/AMDGPU/VOP1Instructions.td
@@ -104,9 +104,21 @@ multiclass VOP1Inst <string opName, VOPProfile P,
SDPatternOperator node = null_frag> {
def _e32 : VOP1_Pseudo <opName, P>;
def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
- def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
+
+ foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
+
foreach _ = BoolToList<P.HasExtDPP>.ret in
def _dpp : VOP1_DPP_Pseudo <opName, P>;
+
+ def : MnemonicAlias<opName#"_e32", opName>, LetDummies;
+ def : MnemonicAlias<opName#"_e64", opName>, LetDummies;
+
+ foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies;
+
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def : MnemonicAlias<opName#"_dpp", opName>, LetDummies;
}
// Special profile for instructions which have clamp
@@ -227,10 +239,10 @@ defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
} // End SchedRW = [WriteQuarterRate32]
defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
-defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>;
-defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>;
+defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>;
+defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>;
defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>;
-defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>;
+defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>;
let SchedRW = [WriteDoubleAdd] in {
defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>;
@@ -434,7 +446,7 @@ let SubtargetPredicate = isGFX10Plus in {
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
-class VOP1_DPP<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> :
+class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> :
VOP_DPP<ps.OpName, p, isDPP16> {
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
@@ -448,8 +460,9 @@ class VOP1_DPP<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 =
let Inst{31-25} = 0x3f;
}
-class VOP1_DPP16<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
- VOP1_DPP<op, ps, p, 1> {
+class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> :
+ VOP1_DPP<op, ps, p, 1>,
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10> {
let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
let SubtargetPredicate = HasDPP16;
}
@@ -492,6 +505,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP1_Real_sdwa_gfx10<bits<9> op> {
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
@@ -499,11 +513,13 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP1_Real_dpp_gfx10<bits<9> op> {
- def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")> {
let DecoderNamespace = "SDWA10";
}
}
multiclass VOP1_Real_dpp8_gfx10<bits<9> op> {
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
let DecoderNamespace = "DPP8";
}
@@ -704,10 +720,12 @@ multiclass VOP1_Real_e32e64_vi <bits<10> op> {
multiclass VOP1_Real_vi <bits<10> op> {
defm NAME : VOP1_Real_e32e64_vi <op>;
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in
def _sdwa_vi :
VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
@@ -831,25 +849,25 @@ def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>;
def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>;
def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
-let OtherPredicates = [isGFX8GFX9] in {
+let OtherPredicates = [isGFX8Plus] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
- imm:$bound_ctrl)),
+ (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
+ timm:$bound_ctrl)),
(V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl))
>;
def : GCNPat <
- (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
- imm:$bank_mask, imm:$bound_ctrl)),
+ (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
+ timm:$bank_mask, timm:$bound_ctrl)),
(V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl))
>;
-} // End OtherPredicates = [isGFX8GFX9]
+} // End OtherPredicates = [isGFX8Plus]
let OtherPredicates = [isGFX8Plus] in {
def : GCNPat<
@@ -885,6 +903,7 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
defm NAME : VOP1_Real_e32e64_vi <op>;
}
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
@@ -904,23 +923,7 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
let OtherPredicates = [isGFX10Plus] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)),
+ (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
(V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0))
>;
-
-def : GCNPat <
- (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
- imm:$bound_ctrl)),
- (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl),
- (as_i32imm $row_mask), (as_i32imm $bank_mask),
- (as_i1imm $bound_ctrl), (i32 0))
->;
-
-def : GCNPat <
- (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
- imm:$bank_mask, imm:$bound_ctrl)),
- (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl),
- (as_i32imm $row_mask), (as_i32imm $bank_mask),
- (as_i1imm $bound_ctrl), (i32 0))
->;
} // End OtherPredicates = [isGFX10Plus]
diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td
index 1b30cd2ed516..1ab0fc1ab58d 100644
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@@ -147,7 +147,8 @@ multiclass VOP2Inst_sdwa<string opName,
string revOp = opName,
bit GFX9Renamed = 0> {
let renamedInGFX9 = GFX9Renamed in {
- def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
+ foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
} // End renamedInGFX9 = GFX9Renamed
}
@@ -179,9 +180,10 @@ multiclass VOP2bInst <string opName,
let usesCustomInserter = !eq(P.NumSrcArgs, 2);
}
- def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
- let AsmMatchConverter = "cvtSdwaVOP2b";
- }
+ foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
+ let AsmMatchConverter = "cvtSdwaVOP2b";
+ }
foreach _ = BoolToList<P.HasExtDPP>.ret in
def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
@@ -220,9 +222,10 @@ multiclass VOP2eInst <string opName,
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
- let AsmMatchConverter = "cvtSdwaVOP2b";
- }
+ foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
+ let AsmMatchConverter = "cvtSdwaVOP2e";
+ }
foreach _ = BoolToList<P.HasExtDPP>.ret in
def _dpp : VOP2_DPP_Pseudo <opName, P>;
@@ -251,7 +254,9 @@ multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
- field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm);
+ field dag Ins32 = !if(!eq(vt.Size, 32),
+ (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm),
+ (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm));
field bit HasExt = 0;
// Hack to stop printing _e64
@@ -519,7 +524,7 @@ def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
} // End isConvergent = 1
defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT<VOP_I32_I32_I32>>;
-defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>>;
+defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, add_ctpop>;
defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_lo>;
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
@@ -539,9 +544,9 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfma
let SubtargetPredicate = isGFX6GFX7GFX10 in {
let isCommutable = 1 in {
defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>;
-defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>;
-defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32, srl>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32, sra>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32, shl>;
} // End isCommutable = 1
} // End SubtargetPredicate = isGFX6GFX7GFX10
@@ -606,9 +611,9 @@ def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
} // End FPDPRounding = 1
-defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>;
-defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>;
-defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>;
+defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, lshl_rev>;
+defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, lshr_rev>;
+defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>;
let isCommutable = 1 in {
let FPDPRounding = 1 in {
@@ -618,16 +623,16 @@ defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub
defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>;
def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
} // End FPDPRounding = 1
-defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>;
-defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
+defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16, add>;
+defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16, sub>;
defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
-defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>;
+defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>;
defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
-defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>;
-defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>;
-defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>;
-defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>;
+defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>;
+defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>;
+defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>;
+defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>;
let Constraints = "$vdst = $src2", DisableEncoding="$src2",
isConvertibleToThreeAddress = 1 in {
@@ -653,16 +658,17 @@ defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
let Constraints = "$vdst = $src2",
DisableEncoding="$src2",
isConvertibleToThreeAddress = 1,
- isCommutable = 1 in {
+ isCommutable = 1,
+ IsDOT = 1 in {
let SubtargetPredicate = HasDot5Insts in
- defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
+ defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
let SubtargetPredicate = HasDot6Insts in
- defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
+ defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
let SubtargetPredicate = HasDot4Insts in
- defm V_DOT2C_I32_I16 : VOP2Inst_e32<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>;
+ defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>;
let SubtargetPredicate = HasDot3Insts in
- defm V_DOT8C_I32_I4 : VOP2Inst_e32<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>;
+ defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>;
}
let AddedComplexity = 30 in {
@@ -719,50 +725,17 @@ defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
// Note: 16-bit instructions produce a 0 result in the high 16-bits
// on GFX8 and GFX9 and preserve high 16 bits on GFX10+
-def ClearHI16 : OutPatFrag<(ops node:$op),
- (V_AND_B32_e64 $op, (V_MOV_B32_e32 (i32 0xffff)))>;
-
-multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst,
- bit PreservesHI16 = 0> {
-
-def : GCNPat<
- (op i16:$src0, i16:$src1),
- !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1))
->;
-
-def : GCNPat<
- (i32 (zext (op i16:$src0, i16:$src1))),
- !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1))
->;
-
-def : GCNPat<
- (i64 (zext (op i16:$src0, i16:$src1))),
- (REG_SEQUENCE VReg_64,
- !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)),
- sub0,
- (V_MOV_B32_e32 (i32 0)), sub1)
->;
-}
-
-multiclass Bits_OpsRev_i16_Pats <SDPatternOperator op, Instruction inst,
- bit PreservesHI16 = 0> {
-
-def : GCNPat<
- (op i16:$src0, i16:$src1),
- !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0))
->;
+multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> {
def : GCNPat<
(i32 (zext (op i16:$src0, i16:$src1))),
- !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0))
+ (inst $src0, $src1)
>;
-
def : GCNPat<
(i64 (zext (op i16:$src0, i16:$src1))),
(REG_SEQUENCE VReg_64,
- !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)),
- sub0,
+ (inst $src0, $src1), sub0,
(V_MOV_B32_e32 (i32 0)), sub1)
>;
}
@@ -774,53 +747,36 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
$src)
>;
-let Predicates = [Has16BitInsts] in {
-
-let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
-defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64>;
-defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64>;
-defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64>;
-defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64>;
-defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64>;
-defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64>;
-defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64>;
-}
-
-let Predicates = [Has16BitInsts, isGFX10Plus] in {
-defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64, 1>;
-defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64, 1>;
-defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64, 1>;
-defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64, 1>;
-defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64, 1>;
-defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64, 1>;
-defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64, 1>;
-}
-
+foreach vt = [i16, v2i16] in {
def : GCNPat <
- (and i16:$src0, i16:$src1),
- (V_AND_B32_e64 $src0, $src1)
+ (and vt:$src0, vt:$src1),
+ (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
def : GCNPat <
- (or i16:$src0, i16:$src1),
- (V_OR_B32_e64 $src0, $src1)
+ (or vt:$src0, vt:$src1),
+ (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
def : GCNPat <
- (xor i16:$src0, i16:$src1),
- (V_XOR_B32_e64 $src0, $src1)
+ (xor vt:$src0, vt:$src1),
+ (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
-
-let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
-defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64>;
-defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64>;
-defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_I16_e64>;
}
-let Predicates = [Has16BitInsts, isGFX10Plus] in {
-defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64, 1>;
-defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64, 1>;
-defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_I16_e64, 1>;
+let Predicates = [Has16BitInsts] in {
+
+let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
+defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<lshl_rev, V_LSHLREV_B16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<lshr_rev, V_LSHRREV_B16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<ashr_rev, V_ASHRREV_I16_e64>;
}
def : ZExt_i16_i1_Pat<zext>;
@@ -847,7 +803,7 @@ def : GCNPat<
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
-class VOP2_DPP<bits<6> op, VOP2_Pseudo ps,
+class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps,
string opName = ps.OpName, VOPProfile p = ps.Pfl,
bit IsDPP16 = 0> :
VOP_DPP<opName, p, IsDPP16> {
@@ -865,13 +821,18 @@ class VOP2_DPP<bits<6> op, VOP2_Pseudo ps,
let Inst{31} = 0x0;
}
-class VOP2_DPP16<bits<6> op, VOP2_Pseudo ps,
+class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
string opName = ps.OpName, VOPProfile p = ps.Pfl> :
VOP2_DPP<op, ps, opName, p, 1> {
let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
let SubtargetPredicate = HasDPP16;
}
+class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
+ string opName = ps.OpName, VOPProfile p = ps.Pfl> :
+ Base_VOP2_DPP16<op, ps, opName, p>,
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10>;
+
class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
string opName = ps.OpName, VOPProfile p = ps.Pfl> :
VOP_DPP8<ps.OpName, p> {
@@ -924,6 +885,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP2_Real_sdwa_gfx10<bits<6> op> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
@@ -931,11 +893,13 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
- def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
let DecoderNamespace = "SDWA10";
}
}
multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
let DecoderNamespace = "DPP8";
}
@@ -964,6 +928,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
let DecoderNamespace = "SDWA10" in {
multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
string asmName> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
@@ -973,13 +938,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
string asmName> {
- def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp")> {
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
let AsmString = asmName # ps.Pfl.AsmDPP16;
}
}
multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
string asmName> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
let AsmString = asmName # ps.Pfl.AsmDPP8;
@@ -989,13 +956,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
} // End DecoderNamespace = "SDWA10"
//===------------------------------ VOP2be ------------------------------===//
- multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> {
+ multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> {
def _e32_gfx10 :
VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32");
let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
}
+ }
+ multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> {
def _e64_gfx10 :
VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
VOP3be_gfx10<{0, 1, 0, 0, op{5-0}},
@@ -1003,6 +972,9 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64");
let AsmString = asmName # Ps.AsmOperands;
}
+ }
+ multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
@@ -1010,64 +982,76 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
let DecoderNamespace = "SDWA10";
}
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
+ def _sdwa_w32_gfx10 :
+ Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+ VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+ let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
+ let isAsmParserOnly = 1;
+ let DecoderNamespace = "SDWA10";
+ let WaveSizePredicate = isWave32;
+ }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
+ def _sdwa_w64_gfx10 :
+ Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+ VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+ let AsmString = asmName # Ps.AsmOperands;
+ let isAsmParserOnly = 1;
+ let DecoderNamespace = "SDWA10";
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp_gfx10 :
- VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
let AsmString = asmName # !subst(", vcc", "", AsmDPP);
let DecoderNamespace = "SDWA10";
}
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_w32_gfx10 :
+ Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_w64_gfx10 :
+ Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp8_gfx10 :
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
let DecoderNamespace = "DPP8";
}
-
- let WaveSizePredicate = isWave32 in {
- def _sdwa_w32_gfx10 :
- Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
- VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
- VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
- let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
- let isAsmParserOnly = 1;
- let DecoderNamespace = "SDWA10";
- }
- def _dpp_w32_gfx10 :
- VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
- string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
- let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
- let isAsmParserOnly = 1;
- }
- def _dpp8_w32_gfx10 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
- string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
- let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
- let isAsmParserOnly = 1;
- }
- } // End WaveSizePredicate = isWave32
-
- let WaveSizePredicate = isWave64 in {
- def _sdwa_w64_gfx10 :
- Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
- VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
- VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
- let AsmString = asmName # Ps.AsmOperands;
- let isAsmParserOnly = 1;
- let DecoderNamespace = "SDWA10";
- }
- def _dpp_w64_gfx10 :
- VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
- string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
- let AsmString = asmName # AsmDPP;
- let isAsmParserOnly = 1;
- }
- def _dpp8_w64_gfx10 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
- string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
- let AsmString = asmName # AsmDPP8;
- let isAsmParserOnly = 1;
- }
- } // End WaveSizePredicate = isWave64
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp8_w32_gfx10 :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp8_w64_gfx10 :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
}
//===----------------------------- VOP3Only -----------------------------===//
@@ -1088,8 +1072,19 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
-multiclass Base_VOP2_Real_gfx10<bits<6> op> :
- VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>;
+multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> :
+ VOP2be_Real_e32_gfx10<op, opName, asmName>,
+ VOP2be_Real_e64_gfx10<op, opName, asmName>,
+ VOP2be_Real_sdwa_gfx10<op, opName, asmName>,
+ VOP2be_Real_dpp_gfx10<op, opName, asmName>,
+ VOP2be_Real_dpp8_gfx10<op, opName, asmName>;
+
+multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> :
+ VOP2_Real_e32_gfx10<op>,
+ VOP2_Real_e64_gfx10<op>,
+ VOP2be_Real_sdwa_gfx10<op, opName, asmName>,
+ VOP2be_Real_dpp_gfx10<op, opName, asmName>,
+ VOP2be_Real_dpp8_gfx10<op, opName, asmName>;
multiclass VOP2_Real_gfx10<bits<6> op> :
VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>,
@@ -1103,7 +1098,6 @@ multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
-defm V_CNDMASK_B32 : Base_VOP2_Real_gfx10<0x001>;
defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>;
defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>;
defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>;
@@ -1136,6 +1130,9 @@ defm V_SUB_CO_CI_U32 :
defm V_SUBREV_CO_CI_U32 :
VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
+defm V_CNDMASK_B32 :
+ VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">;
+
// VOP3 only.
defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>;
defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>;
@@ -1322,12 +1319,14 @@ multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
} // End AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8"
multiclass VOP2_SDWA_Real <bits<6> op> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in
def _sdwa_vi :
VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
multiclass VOP2_SDWA9_Real <bits<6> op> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
@@ -1350,12 +1349,13 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
let AsmString = AsmName # ps.AsmOperands;
let DecoderNamespace = "GFX8";
}
- def _sdwa_vi :
- VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
- VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
- VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
- let AsmString = AsmName # ps.AsmOperands;
- }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA>.ret in
+ def _sdwa_vi :
+ VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
+ VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
+ VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
+ let AsmString = AsmName # ps.AsmOperands;
+ }
foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp_vi :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>,
@@ -1383,12 +1383,13 @@ multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
let AsmString = AsmName # ps.AsmOperands;
let DecoderNamespace = "GFX9";
}
- def _sdwa_gfx9 :
- VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
- VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
- VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
- let AsmString = AsmName # ps.AsmOperands;
- }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9>.ret in
+ def _sdwa_gfx9 :
+ VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
+ VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
+ VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
+ let AsmString = AsmName # ps.AsmOperands;
+ }
foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp_gfx9 :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>,
@@ -1410,10 +1411,11 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
let DecoderNamespace = "GFX9";
}
- def _sdwa_gfx9 :
- VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
- VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
- }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ def _sdwa_gfx9 :
+ VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
+ }
foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
def _dpp_gfx9 :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
@@ -1554,7 +1556,7 @@ defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
} // End SubtargetPredicate = HasDLInsts
multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> {
- def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+ def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
}
multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index 21dbef9240e1..605425972b1c 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -112,7 +112,7 @@ class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
- imm:$cbsz, imm:$abid, imm:$blgp))];
+ timm:$cbsz, timm:$abid, timm:$blgp))];
}
class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
@@ -385,12 +385,12 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3
}
let SchedRW = [Write64Bit] in {
-let SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10] in {
-def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, shl>;
-def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, srl>;
-def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, sra>;
+let SubtargetPredicate = isGFX6GFX7GFX10 in {
+def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>, shl>;
+def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>, srl>;
+def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, sra>;
def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
-} // End SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10]
+} // End SubtargetPredicate = isGFX6GFX7GFX10
let SubtargetPredicate = isGFX8Plus in {
def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
@@ -399,21 +399,6 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, as
} // End SubtargetPredicate = isGFX8Plus
} // End SchedRW = [Write64Bit]
-let Predicates = [isGFX8Plus] in {
-def : GCNPat <
- (getDivergentFrag<shl>.ret i64:$x, i32:$y),
- (V_LSHLREV_B64 $y, $x)
->;
-def : AMDGPUPat <
- (getDivergentFrag<srl>.ret i64:$x, i32:$y),
- (V_LSHRREV_B64 $y, $x)
->;
-def : AMDGPUPat <
- (getDivergentFrag<sra>.ret i64:$x, i32:$y),
- (V_ASHRREV_I64 $y, $x)
->;
-}
-
let SchedRW = [Write32Bit] in {
let SubtargetPredicate = isGFX8Plus in {
@@ -468,13 +453,13 @@ let FPDPRounding = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
let Uses = [M0, EXEC] in {
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
- [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan),
- (i32 imm:$attr),
- (i32 imm:$src0_modifiers),
+ [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan),
+ (i32 timm:$attr),
+ (i32 timm:$src0_modifiers),
(f32 VRegSrc_32:$src2),
- (i32 imm:$src2_modifiers),
- (i1 imm:$high),
- (i1 imm:$clamp)))]>;
+ (i32 timm:$src2_modifiers),
+ (i1 timm:$high),
+ (i1 timm:$clamp)))]>;
} // End Uses = [M0, EXEC]
} // End FPDPRounding = 1
} // End renamedInGFX9 = 1
@@ -493,21 +478,21 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1
let Uses = [M0, EXEC], FPDPRounding = 1 in {
def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
- [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan),
- (i32 imm:$attr),
- (i32 imm:$src0_modifiers),
- (i1 imm:$high),
- (i1 imm:$clamp),
- (i32 imm:$omod)))]>;
+ [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan),
+ (i32 timm:$attr),
+ (i32 timm:$src0_modifiers),
+ (i1 timm:$high),
+ (i1 timm:$clamp),
+ (i32 timm:$omod)))]>;
def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
- [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan),
- (i32 imm:$attr),
- (i32 imm:$src0_modifiers),
+ [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan),
+ (i32 timm:$attr),
+ (i32 timm:$src0_modifiers),
(f32 VRegSrc_32:$src2),
- (i32 imm:$src2_modifiers),
- (i1 imm:$high),
- (i1 imm:$clamp),
- (i32 imm:$omod)))]>;
+ (i32 timm:$src2_modifiers),
+ (i1 timm:$high),
+ (i1 timm:$clamp),
+ (i32 timm:$omod)))]>;
} // End Uses = [M0, EXEC], FPDPRounding = 1
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
@@ -657,11 +642,11 @@ let SubtargetPredicate = isGFX10Plus in {
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
def : GCNPat<
- (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
+ (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
(V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
>;
def : GCNPat<
- (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
+ (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
(V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
>;
} // End SubtargetPredicate = isGFX10Plus
diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td
index 55ee5f6577cf..0c13f39fec02 100644
--- a/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -261,6 +261,7 @@ class SDot2Pat<Instruction Inst> : GCNPat <
let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
}
+let IsDOT = 1 in {
let SubtargetPredicate = HasDot2Insts in {
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
@@ -277,6 +278,7 @@ def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32
def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
} // End SubtargetPredicate = HasDot1Insts
+} // End let IsDOT = 1
multiclass DotPats<SDPatternOperator dot_op,
VOP3PInst dot_inst> {
diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td
index b3513e383d10..8ef0ec7b71f4 100644
--- a/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/lib/Target/AMDGPU/VOPCInstructions.td
@@ -183,7 +183,7 @@ multiclass VOPCXInstAliases <string OpName, string Arch> {
}
-class getVOPCPat64 <PatLeaf cond, VOPProfile P> : LetDummies {
+class getVOPCPat64 <SDPatternOperator cond, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set i1:$sdst,
(setcc (P.Src0VT
@@ -202,7 +202,7 @@ class VCMPXNoSDstTable <bit has_sdst, string Name> {
multiclass VOPC_Pseudos <string opName,
VOPC_Profile P,
- PatLeaf cond = COND_NULL,
+ SDPatternOperator cond = COND_NULL,
string revOp = opName,
bit DefExec = 0> {
@@ -225,6 +225,7 @@ multiclass VOPC_Pseudos <string opName,
let isCommutable = 1;
}
+ foreach _ = BoolToList<P.HasExtSDWA>.ret in
def _sdwa : VOPC_SDWA_Pseudo <opName, P> {
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let SchedRW = P.Schedule;
@@ -236,7 +237,7 @@ multiclass VOPC_Pseudos <string opName,
let SubtargetPredicate = HasSdstCMPX in {
multiclass VOPCX_Pseudos <string opName,
VOPC_Profile P, VOPC_Profile P_NoSDst,
- PatLeaf cond = COND_NULL,
+ SDPatternOperator cond = COND_NULL,
string revOp = opName> :
VOPC_Pseudos <opName, P, cond, revOp, 1> {
@@ -261,6 +262,7 @@ multiclass VOPCX_Pseudos <string opName,
let SubtargetPredicate = HasNoSdstCMPX;
}
+ foreach _ = BoolToList<P_NoSDst.HasExtSDWA>.ret in
def _nosdst_sdwa : VOPC_SDWA_Pseudo <opName#"_nosdst", P_NoSDst> {
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
@@ -285,22 +287,23 @@ def VOPC_I16_I16 : VOPC_NoSdst_Profile<[Write32Bit], i16>;
def VOPC_I32_I32 : VOPC_NoSdst_Profile<[Write32Bit], i32>;
def VOPC_I64_I64 : VOPC_NoSdst_Profile<[Write64Bit], i64>;
-multiclass VOPC_F16 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+multiclass VOPC_F16 <string opName, SDPatternOperator cond = COND_NULL,
+ string revOp = opName> :
VOPC_Pseudos <opName, VOPC_I1_F16_F16, cond, revOp, 0>;
-multiclass VOPC_F32 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+multiclass VOPC_F32 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
VOPC_Pseudos <opName, VOPC_I1_F32_F32, cond, revOp, 0>;
-multiclass VOPC_F64 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+multiclass VOPC_F64 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
VOPC_Pseudos <opName, VOPC_I1_F64_F64, cond, revOp, 0>;
-multiclass VOPC_I16 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+multiclass VOPC_I16 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
VOPC_Pseudos <opName, VOPC_I1_I16_I16, cond, revOp, 0>;
-multiclass VOPC_I32 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+multiclass VOPC_I32 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
VOPC_Pseudos <opName, VOPC_I1_I32_I32, cond, revOp, 0>;
-multiclass VOPC_I64 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+multiclass VOPC_I64 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
VOPC_Pseudos <opName, VOPC_I1_I64_I64, cond, revOp, 0>;
multiclass VOPCX_F16 <string opName, string revOp = opName> :
@@ -669,6 +672,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
let SchedRW = p.Schedule;
}
+ foreach _ = BoolToList<p.HasExtSDWA>.ret in
def _sdwa : VOPC_SDWA_Pseudo <opName, p> {
let Defs = !if(DefExec, !if(DefVcc, [VCC, EXEC], [EXEC]),
!if(DefVcc, [VCC], []));
@@ -698,6 +702,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
let SubtargetPredicate = HasNoSdstCMPX;
}
+ foreach _ = BoolToList<P_NoSDst.HasExtSDWA>.ret in
def _nosdst_sdwa : VOPC_SDWA_Pseudo <opName#"_nosdst", P_NoSDst> {
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
@@ -737,8 +742,11 @@ defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">;
defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">;
+
+let SubtargetPredicate = Has16BitInsts in {
defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">;
defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
+}
//===----------------------------------------------------------------------===//
// V_ICMPIntrinsic Pattern.
@@ -878,6 +886,7 @@ let AssemblerPredicate = isGFX10Plus in {
}
} // End DecoderNamespace = "GFX10"
+ foreach _ = BoolToList<!cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
VOPC_SDWA9e<op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
@@ -903,6 +912,7 @@ let AssemblerPredicate = isGFX10Plus in {
}
} // End DecoderNamespace = "GFX10"
+ foreach _ = BoolToList<!cast<VOPC_Pseudo>(NAME#"_nosdst_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa")>,
VOPC_SDWA9e<op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa").Pfl> {
@@ -1223,10 +1233,12 @@ multiclass VOPC_Real_vi <bits<10> op> {
}
}
+ foreach _ = BoolToList<!cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in
def _sdwa_vi :
VOP_SDWA_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
VOPC_SDWAe <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+ foreach _ = BoolToList<!cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td
index 677095a354be..f208a1134a5a 100644
--- a/lib/Target/AMDGPU/VOPInstructions.td
+++ b/lib/Target/AMDGPU/VOPInstructions.td
@@ -14,6 +14,7 @@ class LetDummies {
bit isReMaterializable;
bit isAsCheapAsAMove;
bit VOPAsmPrefer32Bit;
+ bit FPDPRounding;
Predicate SubtargetPredicate;
string Constraints;
string DisableEncoding;
@@ -41,9 +42,7 @@ class VOP_Pseudo <string opName, string suffix, VOPProfile P, dag outs, dag ins,
string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern>,
VOP <opName>,
- SIMCInstr <opName#suffix, SIEncodingFamily.NONE>,
- MnemonicAlias<opName#suffix, opName> {
-
+ SIMCInstr <opName#suffix, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let UseNamedOperandTable = 1;
@@ -148,6 +147,7 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily> :
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
+ let OtherPredicates = ps.OtherPredicates;
let AsmMatchConverter = ps.AsmMatchConverter;
let AsmVariantName = ps.AsmVariantName;
let Constraints = ps.Constraints;
@@ -473,8 +473,7 @@ class VOP_SDWA9Be<VOPProfile P> : VOP_SDWA9e<P> {
class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
InstSI <P.OutsSDWA, P.InsSDWA, "", pattern>,
VOP <opName>,
- SIMCInstr <opName#"_sdwa", SIEncodingFamily.NONE>,
- MnemonicAlias <opName#"_sdwa", opName> {
+ SIMCInstr <opName#"_sdwa", SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -595,8 +594,7 @@ class VOP_DPPe<VOPProfile P, bit IsDPP16=0> : Enc64 {
class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, pattern>,
VOP <OpName>,
- SIMCInstr <OpName#"_dpp", SIEncodingFamily.NONE>,
- MnemonicAlias <OpName#"_dpp", OpName> {
+ SIMCInstr <OpName#"_dpp", SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
diff --git a/lib/Target/ARC/ARCFrameLowering.h b/lib/Target/ARC/ARCFrameLowering.h
index 41b559d16761..9242400fb28d 100644
--- a/lib/Target/ARC/ARCFrameLowering.h
+++ b/lib/Target/ARC/ARCFrameLowering.h
@@ -27,8 +27,8 @@ class ARCInstrInfo;
class ARCFrameLowering : public TargetFrameLowering {
public:
ARCFrameLowering(const ARCSubtarget &st)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), ST(st) {
- }
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(4), 0),
+ ST(st) {}
/// Insert Prologue into the function.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
diff --git a/lib/Target/ARC/ARCISelLowering.cpp b/lib/Target/ARC/ARCISelLowering.cpp
index 847d23f0abdb..751fd567bae8 100644
--- a/lib/Target/ARC/ARCISelLowering.cpp
+++ b/lib/Target/ARC/ARCISelLowering.cpp
@@ -716,7 +716,7 @@ SDValue ARCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
assert(cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0 &&
"Only support lowering frame addr of current frame.");
- unsigned FrameReg = ARI.getFrameRegister(MF);
+ Register FrameReg = ARI.getFrameRegister(MF);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
}
diff --git a/lib/Target/ARC/ARCMachineFunctionInfo.h b/lib/Target/ARC/ARCMachineFunctionInfo.h
index 31aa5b93246c..d4dcf9bf285c 100644
--- a/lib/Target/ARC/ARCMachineFunctionInfo.h
+++ b/lib/Target/ARC/ARCMachineFunctionInfo.h
@@ -34,8 +34,8 @@ public:
explicit ARCFunctionInfo(MachineFunction &MF)
: ReturnStackOffsetSet(false), VarArgsFrameIndex(0),
ReturnStackOffset(-1U), MaxCallStackReq(0) {
- // Functions are 4-byte (2**2) aligned.
- MF.setAlignment(2);
+ // Functions are 4-byte aligned.
+ MF.setAlignment(Align(4));
}
~ARCFunctionInfo() {}
diff --git a/lib/Target/ARC/ARCOptAddrMode.cpp b/lib/Target/ARC/ARCOptAddrMode.cpp
index c922b99c57b0..22a3b9111c8e 100644
--- a/lib/Target/ARC/ARCOptAddrMode.cpp
+++ b/lib/Target/ARC/ARCOptAddrMode.cpp
@@ -139,8 +139,7 @@ static bool dominatesAllUsesOf(const MachineInstr *MI, unsigned VReg,
MachineDominatorTree *MDT,
MachineRegisterInfo *MRI) {
- assert(TargetRegisterInfo::isVirtualRegister(VReg) &&
- "Expected virtual register!");
+ assert(Register::isVirtualRegister(VReg) && "Expected virtual register!");
for (auto it = MRI->use_nodbg_begin(VReg), end = MRI->use_nodbg_end();
it != end; ++it) {
@@ -181,7 +180,7 @@ static bool isLoadStoreThatCanHandleDisplacement(const TargetInstrInfo *TII,
bool ARCOptAddrMode::noUseOfAddBeforeLoadOrStore(const MachineInstr *Add,
const MachineInstr *Ldst) {
- unsigned R = Add->getOperand(0).getReg();
+ Register R = Add->getOperand(0).getReg();
return dominatesAllUsesOf(Ldst, R, MDT, MRI);
}
@@ -205,9 +204,8 @@ MachineInstr *ARCOptAddrMode::tryToCombine(MachineInstr &Ldst) {
return nullptr;
}
- unsigned B = Base.getReg();
- if (TargetRegisterInfo::isStackSlot(B) ||
- !TargetRegisterInfo::isVirtualRegister(B)) {
+ Register B = Base.getReg();
+ if (Register::isStackSlot(B) || !Register::isVirtualRegister(B)) {
LLVM_DEBUG(dbgs() << "[ABAW] Base is not VReg\n");
return nullptr;
}
@@ -285,7 +283,7 @@ ARCOptAddrMode::canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add,
return nullptr;
}
- unsigned BaseReg = Ldst->getOperand(BasePos).getReg();
+ Register BaseReg = Ldst->getOperand(BasePos).getReg();
// prohibit this:
// v1 = add v0, c
@@ -294,7 +292,7 @@ ARCOptAddrMode::canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add,
// st v0, [v0, 0]
// v1 = add v0, c
if (Ldst->mayStore() && Ldst->getOperand(0).isReg()) {
- unsigned StReg = Ldst->getOperand(0).getReg();
+ Register StReg = Ldst->getOperand(0).getReg();
if (Add->getOperand(0).getReg() == StReg || BaseReg == StReg) {
LLVM_DEBUG(dbgs() << "[canJoinInstructions] Store uses result of Add\n");
return nullptr;
@@ -447,7 +445,7 @@ void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode,
MachineOperand Src = MachineOperand::CreateImm(0xDEADBEEF);
AII->getBaseAndOffsetPosition(Ldst, BasePos, OffPos);
- unsigned BaseReg = Ldst.getOperand(BasePos).getReg();
+ Register BaseReg = Ldst.getOperand(BasePos).getReg();
Ldst.RemoveOperand(OffPos);
Ldst.RemoveOperand(BasePos);
diff --git a/lib/Target/ARC/ARCRegisterInfo.cpp b/lib/Target/ARC/ARCRegisterInfo.cpp
index 9c8340ac8f81..a7f89b385ffe 100644
--- a/lib/Target/ARC/ARCRegisterInfo.cpp
+++ b/lib/Target/ARC/ARCRegisterInfo.cpp
@@ -206,7 +206,7 @@ void ARCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
LLVM_DEBUG(dbgs() << "Offset : " << Offset << "\n"
<< "<--------->\n");
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
assert(ARC::GPR32RegClass.contains(Reg) && "Unexpected register operand");
if (!TFI->hasFP(MF)) {
diff --git a/lib/Target/ARC/ARCTargetMachine.cpp b/lib/Target/ARC/ARCTargetMachine.cpp
index 9fb45d686c26..34700dc22c54 100644
--- a/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/lib/Target/ARC/ARCTargetMachine.cpp
@@ -38,7 +38,7 @@ ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT,
"f32:32:32-i64:32-f64:32-a:0:32-n32",
TT, CPU, FS, Options, getRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- TLOF(make_unique<TargetLoweringObjectFileELF>()),
+ TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index fb238bfc9cbc..30b9c8071ba2 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -133,9 +133,9 @@ bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
const TargetRegisterClass *TRC) {
if (!MO.isReg())
return false;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
else
return TRC->contains(Reg);
@@ -151,7 +151,7 @@ unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
// Get the subreg type that is most likely to be coalesced
// for an SPR register that will be used in VDUP32d pseudo.
unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
- if (!TRI->isVirtualRegister(SReg))
+ if (!Register::isVirtualRegister(SReg))
return getDPRLaneFromSPR(SReg);
MachineInstr *MI = MRI->getVRegDef(SReg);
@@ -166,7 +166,7 @@ unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
SReg = MI->getOperand(1).getReg();
}
- if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+ if (Register::isVirtualRegister(SReg)) {
if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
return ARM::ssub_0;
}
@@ -191,8 +191,8 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
for (MachineOperand &MO : MI->operands()) {
if ((!MO.isReg()) || (!MO.isUse()))
continue;
- unsigned Reg = MO.getReg();
- if (!TRI->isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
MachineOperand *Op = MI->findRegisterDefOperand(Reg);
@@ -213,8 +213,8 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
for (MachineOperand &MODef : Def->operands()) {
if ((!MODef.isReg()) || (!MODef.isDef()))
continue;
- unsigned DefReg = MODef.getReg();
- if (!TRI->isVirtualRegister(DefReg)) {
+ Register DefReg = MODef.getReg();
+ if (!Register::isVirtualRegister(DefReg)) {
IsDead = false;
break;
}
@@ -245,10 +245,10 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
}
if (MI->isInsertSubreg()) {
- unsigned DPRReg = MI->getOperand(1).getReg();
- unsigned SPRReg = MI->getOperand(2).getReg();
+ Register DPRReg = MI->getOperand(1).getReg();
+ Register SPRReg = MI->getOperand(2).getReg();
- if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
+ if (Register::isVirtualRegister(DPRReg) && Register::isVirtualRegister(SPRReg)) {
MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
@@ -267,7 +267,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
// Find the thing we're subreg copying out of - is it of the same
// regclass as DPRMI? (i.e. a DPR or QPR).
- unsigned FullReg = SPRMI->getOperand(1).getReg();
+ Register FullReg = SPRMI->getOperand(1).getReg();
const TargetRegisterClass *TRC =
MRI->getRegClass(MI->getOperand(1).getReg());
if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
@@ -296,9 +296,9 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
if (!MI->getOperand(I).isReg())
continue;
++NumTotal;
- unsigned OpReg = MI->getOperand(I).getReg();
+ Register OpReg = MI->getOperand(I).getReg();
- if (!TRI->isVirtualRegister(OpReg))
+ if (!Register::isVirtualRegister(OpReg))
break;
MachineInstr *Def = MRI->getVRegDef(OpReg);
@@ -342,7 +342,7 @@ bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
if (!MI->isFullCopy())
return MI;
- if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
return nullptr;
MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
if (!Def)
@@ -369,8 +369,8 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
Reached.insert(MI);
if (MI->isPHI()) {
for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
- unsigned Reg = MI->getOperand(I).getReg();
- if (!TRI->isVirtualRegister(Reg)) {
+ Register Reg = MI->getOperand(I).getReg();
+ if (!Register::isVirtualRegister(Reg)) {
continue;
}
MachineInstr *NewMI = MRI->getVRegDef(Reg);
@@ -379,7 +379,7 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
Front.push_back(NewMI);
}
} else if (MI->isFullCopy()) {
- if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
continue;
MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
if (!NewMI)
@@ -418,8 +418,8 @@ unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const DebugLoc &DL, unsigned Reg,
unsigned Lane, bool QPR) {
- unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
- &ARM::DPRRegClass);
+ Register Out =
+ MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass);
BuildMI(MBB, InsertBefore, DL,
TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
.addReg(Reg)
@@ -434,7 +434,7 @@ unsigned A15SDOptimizer::createExtractSubreg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
const DebugLoc &DL, unsigned DReg, unsigned Lane,
const TargetRegisterClass *TRC) {
- unsigned Out = MRI->createVirtualRegister(TRC);
+ Register Out = MRI->createVirtualRegister(TRC);
BuildMI(MBB,
InsertBefore,
DL,
@@ -448,7 +448,7 @@ unsigned A15SDOptimizer::createExtractSubreg(
unsigned A15SDOptimizer::createRegSequence(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
const DebugLoc &DL, unsigned Reg1, unsigned Reg2) {
- unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
+ Register Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
BuildMI(MBB,
InsertBefore,
DL,
@@ -466,7 +466,7 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const DebugLoc &DL, unsigned Ssub0,
unsigned Ssub1) {
- unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
.addReg(Ssub0)
.addReg(Ssub1)
@@ -478,7 +478,7 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
unsigned A15SDOptimizer::createInsertSubreg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) {
- unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
+ Register Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
BuildMI(MBB,
InsertBefore,
DL,
@@ -494,7 +494,7 @@ unsigned
A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const DebugLoc &DL) {
- unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
BuildMI(MBB,
InsertBefore,
DL,
@@ -602,7 +602,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
// we can end up with multiple defs of this DPR.
SmallVector<MachineInstr *, 8> DefSrcs;
- if (!TRI->isVirtualRegister(*I))
+ if (!Register::isVirtualRegister(*I))
continue;
MachineInstr *Def = MRI->getVRegDef(*I);
if (!Def)
@@ -622,7 +622,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
// Collect all the uses of this MI's DPR def for updating later.
SmallVector<MachineOperand*, 8> Uses;
- unsigned DPRDefReg = MI->getOperand(0).getReg();
+ Register DPRDefReg = MI->getOperand(0).getReg();
for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
E = MRI->use_end(); I != E; ++I)
Uses.push_back(&*I);
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index bf8ed6562fe7..2e6f756d522c 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -35,6 +35,7 @@ class MachineInstr;
class MCInst;
class PassRegistry;
+Pass *createMVETailPredicationPass();
FunctionPass *createARMLowOverheadLoopsPass();
Pass *createARMParallelDSPPass();
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
@@ -67,6 +68,7 @@ void initializeThumb2SizeReducePass(PassRegistry &);
void initializeThumb2ITBlockPass(PassRegistry &);
void initializeMVEVPTBlockPass(PassRegistry &);
void initializeARMLowOverheadLoopsPass(PassRegistry &);
+void initializeMVETailPredicationPass(PassRegistry &);
} // end namespace llvm
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index b687db12eaf5..fed4cb2b9316 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -57,12 +57,15 @@ def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true",
"Extend FP to 32 double registers">;
multiclass VFPver<string name, string query, string description,
- list<SubtargetFeature> prev = [],
- list<SubtargetFeature> otherimplies = []> {
+ list<SubtargetFeature> prev,
+ list<SubtargetFeature> otherimplies,
+ list<SubtargetFeature> vfp2prev = []> {
def _D16_SP: SubtargetFeature<
name#"d16sp", query#"D16SP", "true",
description#" with only 16 d-registers and no double precision",
- !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16_SP")) # otherimplies>;
+ !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16_SP")) #
+ !foreach(v, vfp2prev, !cast<SubtargetFeature>(v # "_SP")) #
+ otherimplies>;
def _SP: SubtargetFeature<
name#"sp", query#"SP", "true",
description#" with no double precision",
@@ -72,6 +75,7 @@ multiclass VFPver<string name, string query, string description,
name#"d16", query#"D16", "true",
description#" with only 16 d-registers",
!foreach(v, prev, !cast<SubtargetFeature>(v # "_D16")) #
+ vfp2prev #
otherimplies # [FeatureFP64, !cast<SubtargetFeature>(NAME # "_D16_SP")]>;
def "": SubtargetFeature<
name, query, "true", description,
@@ -80,11 +84,17 @@ multiclass VFPver<string name, string query, string description,
!cast<SubtargetFeature>(NAME # "_SP")]>;
}
-defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions",
- [], [FeatureFPRegs]>;
+def FeatureVFP2_SP : SubtargetFeature<"vfp2sp", "HasVFPv2SP", "true",
+ "Enable VFP2 instructions with "
+ "no double precision",
+ [FeatureFPRegs]>;
+
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
+ "Enable VFP2 instructions",
+ [FeatureFP64, FeatureVFP2_SP]>;
defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions",
- [FeatureVFP2]>;
+ [], [], [FeatureVFP2]>;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
@@ -98,7 +108,7 @@ defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions",
[FeatureVFP3], [FeatureFP16]>;
defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP",
- [FeatureVFP4]>;
+ [FeatureVFP4], []>;
def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
"Enable full half-precision "
@@ -302,9 +312,18 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
-def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopAlignment","2",
+def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2",
"Prefer 32-bit alignment for loops">;
+def FeatureMVEVectorCostFactor1 : SubtargetFeature<"mve1beat", "MVEVectorCostFactor", "1",
+ "Model MVE instructions as a 1 beat per tick architecture">;
+
+def FeatureMVEVectorCostFactor2 : SubtargetFeature<"mve2beat", "MVEVectorCostFactor", "2",
+ "Model MVE instructions as a 2 beats per tick architecture">;
+
+def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFactor", "4",
+ "Model MVE instructions as a 4 beats per tick architecture">;
+
/// Some instructions update CPSR partially, which can add false dependency for
/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
/// mapped to a separate physical register. Avoid partial CPSR update for these
@@ -1156,6 +1175,13 @@ def : ProcNoItin<"cortex-a76ae", [ARMv82a, ProcA76,
FeatureFullFP16,
FeatureDotProd]>;
+def : ProcNoItin<"neoverse-n1", [ARMv82a,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureDotProd]>;
+
def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureHasRetAddrStack,
FeatureNEONForFP,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index e29077266fcd..c8c91e53c44e 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -168,7 +168,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// relatively easy to exceed the thumb branch range within a TU.
if (! ThumbIndirectPads.empty()) {
OutStreamer->EmitAssemblerFlag(MCAF_Code16);
- EmitAlignment(1);
+ EmitAlignment(Align(2));
for (std::pair<unsigned, MCSymbol *> &TIP : ThumbIndirectPads) {
OutStreamer->EmitLabel(TIP.second);
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX)
@@ -203,8 +203,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
switch (MO.getType()) {
default: llvm_unreachable("<unknown operand type>");
case MachineOperand::MO_Register: {
- unsigned Reg = MO.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ Register Reg = MO.getReg();
+ assert(Register::isPhysicalRegister(Reg));
assert(!MO.getSubReg() && "Subregs should be eliminated!");
if(ARM::GPRPairRegClass.contains(Reg)) {
const MachineFunction &MF = *MI->getParent()->getParent();
@@ -275,7 +275,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
case 'y': // Print a VFP single precision register as indexed double.
if (MI->getOperand(OpNum).isReg()) {
- unsigned Reg = MI->getOperand(OpNum).getReg();
+ Register Reg = MI->getOperand(OpNum).getReg();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
// Find the 'd' register that has this 's' register as a sub-register,
// and determine the lane number.
@@ -302,14 +302,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (!MI->getOperand(OpNum).isReg())
return true;
const MachineOperand &MO = MI->getOperand(OpNum);
- unsigned RegBegin = MO.getReg();
+ Register RegBegin = MO.getReg();
// This takes advantage of the 2 operand-ness of ldm/stm and that we've
// already got the operands in registers that are operands to the
// inline asm statement.
O << "{";
if (ARM::GPRPairRegClass.contains(RegBegin)) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0);
+ Register Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0);
O << ARMInstPrinter::getRegisterName(Reg0) << ", ";
RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1);
}
@@ -378,8 +378,8 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (!MO.isReg())
return true;
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- unsigned Reg = TRI->getSubReg(MO.getReg(), FirstHalf ?
- ARM::gsub_0 : ARM::gsub_1);
+ Register Reg =
+ TRI->getSubReg(MO.getReg(), FirstHalf ? ARM::gsub_0 : ARM::gsub_1);
O << ARMInstPrinter::getRegisterName(Reg);
return false;
}
@@ -391,7 +391,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &MO = MI->getOperand(RegOp);
if (!MO.isReg())
return true;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
O << ARMInstPrinter::getRegisterName(Reg);
return false;
}
@@ -400,12 +400,12 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
case 'f': { // The high doubleword register of a NEON quad register.
if (!MI->getOperand(OpNum).isReg())
return true;
- unsigned Reg = MI->getOperand(OpNum).getReg();
+ Register Reg = MI->getOperand(OpNum).getReg();
if (!ARM::QPRRegClass.contains(Reg))
return true;
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
- ARM::dsub_0 : ARM::dsub_1);
+ Register SubReg =
+ TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? ARM::dsub_0 : ARM::dsub_1);
O << ARMInstPrinter::getRegisterName(SubReg);
return false;
}
@@ -419,7 +419,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return true;
const MachineFunction &MF = *MI->getParent()->getParent();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if(!ARM::GPRPairRegClass.contains(Reg))
return false;
Reg = TRI->getSubReg(Reg, ARM::gsub_1);
@@ -526,7 +526,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
- EmitAlignment(2);
+ EmitAlignment(Align(4));
for (auto &Stub : Stubs)
emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
@@ -539,7 +539,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer->SwitchSection(TLOFMacho.getThreadLocalPointerSection());
- EmitAlignment(2);
+ EmitAlignment(Align(4));
for (auto &Stub : Stubs)
emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
@@ -940,7 +940,7 @@ void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
// Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
// ARM mode tables.
- EmitAlignment(2);
+ EmitAlignment(Align(4));
// Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
@@ -986,7 +986,7 @@ void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
// Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
// ARM mode tables.
- EmitAlignment(2);
+ EmitAlignment(Align(4));
// Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
@@ -1015,7 +1015,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
unsigned JTI = MO1.getIndex();
if (Subtarget->isThumb1Only())
- EmitAlignment(2);
+ EmitAlignment(Align(4));
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
@@ -1058,7 +1058,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
// Make sure the next instruction is 2-byte aligned.
- EmitAlignment(1);
+ EmitAlignment(Align(2));
}
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
@@ -1072,7 +1072,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MachineRegInfo = MF.getRegInfo();
- unsigned FramePtr = TargetRegInfo->getFrameRegister(MF);
+ Register FramePtr = TargetRegInfo->getFrameRegister(MF);
unsigned Opc = MI->getOpcode();
unsigned SrcReg, DstReg;
@@ -1136,7 +1136,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
// Check for registers that are remapped (for a Thumb1 prologue that
// saves high registers).
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(Reg))
Reg = RemappedReg;
RegList.push_back(Reg);
@@ -1326,7 +1326,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// So here we generate a bl to a small jump pad that does bx rN.
// The jump pads are emitted after the function body.
- unsigned TReg = MI->getOperand(0).getReg();
+ Register TReg = MI->getOperand(0).getReg();
MCSymbol *TRegSym = nullptr;
for (std::pair<unsigned, MCSymbol *> &TIP : ThumbIndirectPads) {
if (TIP.first == TReg) {
@@ -1663,8 +1663,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::tTBH_JT: {
bool Is8Bit = MI->getOpcode() == ARM::tTBB_JT;
- unsigned Base = MI->getOperand(0).getReg();
- unsigned Idx = MI->getOperand(1).getReg();
+ Register Base = MI->getOperand(0).getReg();
+ Register Idx = MI->getOperand(1).getReg();
assert(MI->getOperand(1).isKill() && "We need the index register as scratch!");
// Multiply up idx if necessary.
@@ -1844,8 +1844,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// b LSJLJEH
// movs r0, #1
// LSJLJEH:
- unsigned SrcReg = MI->getOperand(0).getReg();
- unsigned ValReg = MI->getOperand(1).getReg();
+ Register SrcReg = MI->getOperand(0).getReg();
+ Register ValReg = MI->getOperand(1).getReg();
MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true);
OutStreamer->AddComment("eh_setjmp begin");
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
@@ -1910,8 +1910,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// mov r0, #0
// add pc, pc, #0
// mov r0, #1
- unsigned SrcReg = MI->getOperand(0).getReg();
- unsigned ValReg = MI->getOperand(1).getReg();
+ Register SrcReg = MI->getOperand(0).getReg();
+ Register ValReg = MI->getOperand(1).getReg();
OutStreamer->AddComment("eh_setjmp begin");
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDri)
@@ -1967,8 +1967,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// ldr $scratch, [$src, #4]
// ldr r7, [$src]
// bx $scratch
- unsigned SrcReg = MI->getOperand(0).getReg();
- unsigned ScratchReg = MI->getOperand(1).getReg();
+ Register SrcReg = MI->getOperand(0).getReg();
+ Register ScratchReg = MI->getOperand(1).getReg();
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12)
.addReg(ARM::SP)
.addReg(SrcReg)
@@ -2027,8 +2027,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// ldr $scratch, [$src, #4]
// ldr r7, [$src]
// bx $scratch
- unsigned SrcReg = MI->getOperand(0).getReg();
- unsigned ScratchReg = MI->getOperand(1).getReg();
+ Register SrcReg = MI->getOperand(0).getReg();
+ Register ScratchReg = MI->getOperand(1).getReg();
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
.addReg(ScratchReg)
@@ -2095,7 +2095,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// ldr.w sp, [$src, #8]
// ldr.w pc, [$src, #4]
- unsigned SrcReg = MI->getOperand(0).getReg();
+ Register SrcReg = MI->getOperand(0).getReg();
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi12)
.addReg(ARM::R11)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 222aa85856a2..684cd1def977 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -172,9 +172,9 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
const MachineOperand &Base = MI.getOperand(2);
const MachineOperand &Offset = MI.getOperand(NumOps - 3);
- unsigned WBReg = WB.getReg();
- unsigned BaseReg = Base.getReg();
- unsigned OffReg = Offset.getReg();
+ Register WBReg = WB.getReg();
+ Register BaseReg = Base.getReg();
+ Register OffReg = Offset.getReg();
unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
switch (AddrMode) {
@@ -276,8 +276,8 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
if (LV) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- unsigned Reg = MO.getReg();
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
+ Register Reg = MO.getReg();
LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
if (MO.isDef()) {
@@ -966,8 +966,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SmallSet<unsigned, 4> DstRegs;
#endif
for (unsigned i = 0; i != SubRegs; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
- unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
+ Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
+ Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
assert(Dst && Src && "Bad sub-register");
#ifndef NDEBUG
assert(!DstRegs.count(Src) && "destructive vector copy");
@@ -1019,7 +1019,7 @@ ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
if (!SubIdx)
return MIB.addReg(Reg, State);
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
return MIB.addReg(Reg, State, SubIdx);
}
@@ -1133,7 +1133,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ Subtarget.hasNEON()) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
.addFrameIndex(FI)
.addImm(16)
@@ -1155,7 +1156,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ Subtarget.hasNEON()) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
@@ -1337,7 +1339,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
}
- if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ if (Register::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
@@ -1368,7 +1370,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
.addFrameIndex(FI)
.addImm(16)
@@ -1382,7 +1385,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
- if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ if (Register::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
}
} else
@@ -1390,7 +1393,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI)
.addImm(16)
@@ -1405,7 +1409,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
- if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ if (Register::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
}
} else
@@ -1425,7 +1429,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
- if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ if (Register::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
@@ -1583,8 +1587,8 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// Look for a copy between even S-registers. That is where we keep floats
// when using NEON v2f32 instructions for f32 arithmetic.
- unsigned DstRegS = MI.getOperand(0).getReg();
- unsigned SrcRegS = MI.getOperand(1).getReg();
+ Register DstRegS = MI.getOperand(0).getReg();
+ Register SrcRegS = MI.getOperand(1).getReg();
if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
return false;
@@ -1794,12 +1798,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
if (MI0.getNumOperands() != MI1.getNumOperands())
return false;
- unsigned Addr0 = MI0.getOperand(1).getReg();
- unsigned Addr1 = MI1.getOperand(1).getReg();
+ Register Addr0 = MI0.getOperand(1).getReg();
+ Register Addr1 = MI1.getOperand(1).getReg();
if (Addr0 != Addr1) {
- if (!MRI ||
- !TargetRegisterInfo::isVirtualRegister(Addr0) ||
- !TargetRegisterInfo::isVirtualRegister(Addr1))
+ if (!MRI || !Register::isVirtualRegister(Addr0) ||
+ !Register::isVirtualRegister(Addr1))
return false;
// This assumes SSA form.
@@ -2076,6 +2079,38 @@ isProfitableToIfCvt(MachineBasicBlock &TBB,
return PredCost <= UnpredCost;
}
+unsigned
+ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF,
+ unsigned NumInsts) const {
+ // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
+ // ARM has a condition code field in every predicable instruction, using it
+ // doesn't change code size.
+ return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0;
+}
+
+unsigned
+ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const {
+ // If this branch is likely to be folded into the comparison to form a
+ // CB(N)Z, then removing it won't reduce code size at all, because that will
+ // just replace the CB(N)Z with a CMP.
+ if (MI.getOpcode() == ARM::t2Bcc &&
+ findCMPToFoldIntoCBZ(&MI, &getRegisterInfo()))
+ return 0;
+
+ unsigned Size = getInstSizeInBytes(MI);
+
+ // For Thumb2, all branches are 32-bit instructions during the if conversion
+ // pass, but may be replaced with 16-bit instructions during size reduction.
+ // Since the branches considered by if conversion tend to be forward branches
+ // over small basic blocks, they are very likely to be in range for the
+ // narrow instructions, so we assume the final code size will be half what it
+ // currently is.
+ if (Subtarget.isThumb2())
+ Size /= 2;
+
+ return Size;
+}
+
bool
ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const {
@@ -2141,7 +2176,7 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
MachineInstr *
ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) const {
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
return nullptr;
@@ -2163,7 +2198,7 @@ ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
// MI can't have any tied operands, that would conflict with predication.
if (MO.isTied())
return nullptr;
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ if (Register::isPhysicalRegister(MO.getReg()))
return nullptr;
if (MO.isDef() && !MO.isDead())
return nullptr;
@@ -2211,7 +2246,7 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
// Find new register class to use.
MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
if (!MRI.constrainRegClass(DestReg, PreviousClass))
return nullptr;
@@ -2298,6 +2333,7 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::tSUBSrr, ARM::tSUBrr},
{ARM::tSBCS, ARM::tSBC},
{ARM::tRSBS, ARM::tRSB},
+ {ARM::tLSLSri, ARM::tLSLri},
{ARM::t2ADDSri, ARM::t2ADDri},
{ARM::t2ADDSrr, ARM::t2ADDrr},
@@ -2420,7 +2456,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
MachineOperand &MO = MI->getOperand(i);
RegList.push_back(MO);
- if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
+ if (MO.isReg() && !MO.isImplicit() &&
+ TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
FirstRegEnc = TRI->getEncodingValue(MO.getReg());
}
@@ -2430,7 +2467,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
--CurRegEnc) {
unsigned CurReg = RegClass->getRegister(CurRegEnc);
- if (IsT1PushPop && CurReg > ARM::R7)
+ if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
continue;
if (!IsPop) {
// Pushing any register is completely harmless, mark the register involved
@@ -3039,18 +3076,22 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
break;
case ARM::VSELEQD:
case ARM::VSELEQS:
+ case ARM::VSELEQH:
CC = ARMCC::EQ;
break;
case ARM::VSELGTD:
case ARM::VSELGTS:
+ case ARM::VSELGTH:
CC = ARMCC::GT;
break;
case ARM::VSELGED:
case ARM::VSELGES:
+ case ARM::VSELGEH:
CC = ARMCC::GE;
break;
- case ARM::VSELVSS:
case ARM::VSELVSD:
+ case ARM::VSELVSS:
+ case ARM::VSELVSH:
CC = ARMCC::VS;
break;
}
@@ -3271,9 +3312,9 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
unsigned OpIdx = Commute ? 2 : 1;
- unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
+ Register Reg1 = UseMI.getOperand(OpIdx).getReg();
bool isKill = UseMI.getOperand(OpIdx).isKill();
- unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+ Register NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
NewReg)
.addReg(Reg1, getKillRegState(isKill))
@@ -3335,15 +3376,15 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
case ARM::LDRSB_POST:
case ARM::LDRSH_POST: {
- unsigned Rt = MI.getOperand(0).getReg();
- unsigned Rm = MI.getOperand(3).getReg();
+ Register Rt = MI.getOperand(0).getReg();
+ Register Rm = MI.getOperand(3).getReg();
return (Rt == Rm) ? 4 : 3;
}
case ARM::LDR_PRE_REG:
case ARM::LDRB_PRE_REG: {
- unsigned Rt = MI.getOperand(0).getReg();
- unsigned Rm = MI.getOperand(3).getReg();
+ Register Rt = MI.getOperand(0).getReg();
+ Register Rm = MI.getOperand(3).getReg();
if (Rt == Rm)
return 3;
unsigned ShOpVal = MI.getOperand(4).getImm();
@@ -3372,8 +3413,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
case ARM::LDRH_PRE:
case ARM::STRH_PRE: {
- unsigned Rt = MI.getOperand(0).getReg();
- unsigned Rm = MI.getOperand(3).getReg();
+ Register Rt = MI.getOperand(0).getReg();
+ Register Rm = MI.getOperand(3).getReg();
if (!Rm)
return 2;
if (Rt == Rm)
@@ -3384,8 +3425,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
case ARM::LDR_POST_REG:
case ARM::LDRB_POST_REG:
case ARM::LDRH_POST: {
- unsigned Rt = MI.getOperand(0).getReg();
- unsigned Rm = MI.getOperand(3).getReg();
+ Register Rt = MI.getOperand(0).getReg();
+ Register Rm = MI.getOperand(3).getReg();
return (Rt == Rm) ? 3 : 2;
}
@@ -3404,10 +3445,10 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
case ARM::LDRSB_PRE:
case ARM::LDRSH_PRE: {
- unsigned Rm = MI.getOperand(3).getReg();
+ Register Rm = MI.getOperand(3).getReg();
if (Rm == 0)
return 3;
- unsigned Rt = MI.getOperand(0).getReg();
+ Register Rt = MI.getOperand(0).getReg();
if (Rt == Rm)
return 4;
unsigned ShOpVal = MI.getOperand(4).getImm();
@@ -3422,9 +3463,9 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
}
case ARM::LDRD: {
- unsigned Rt = MI.getOperand(0).getReg();
- unsigned Rn = MI.getOperand(2).getReg();
- unsigned Rm = MI.getOperand(3).getReg();
+ Register Rt = MI.getOperand(0).getReg();
+ Register Rn = MI.getOperand(2).getReg();
+ Register Rm = MI.getOperand(3).getReg();
if (Rm)
return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
: 3;
@@ -3432,7 +3473,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
}
case ARM::STRD: {
- unsigned Rm = MI.getOperand(3).getReg();
+ Register Rm = MI.getOperand(3).getReg();
if (Rm)
return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
: 3;
@@ -3448,9 +3489,9 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
return 4;
case ARM::LDRD_PRE: {
- unsigned Rt = MI.getOperand(0).getReg();
- unsigned Rn = MI.getOperand(3).getReg();
- unsigned Rm = MI.getOperand(4).getReg();
+ Register Rt = MI.getOperand(0).getReg();
+ Register Rn = MI.getOperand(3).getReg();
+ Register Rm = MI.getOperand(4).getReg();
if (Rm)
return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
: 4;
@@ -3458,13 +3499,13 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
}
case ARM::t2LDRD_PRE: {
- unsigned Rt = MI.getOperand(0).getReg();
- unsigned Rn = MI.getOperand(3).getReg();
+ Register Rt = MI.getOperand(0).getReg();
+ Register Rn = MI.getOperand(3).getReg();
return (Rt == Rn) ? 4 : 3;
}
case ARM::STRD_PRE: {
- unsigned Rm = MI.getOperand(4).getReg();
+ Register Rm = MI.getOperand(4).getReg();
if (Rm)
return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
: 4;
@@ -3495,8 +3536,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
return 2;
case ARM::t2LDRDi8: {
- unsigned Rt = MI.getOperand(0).getReg();
- unsigned Rn = MI.getOperand(2).getReg();
+ Register Rt = MI.getOperand(0).getReg();
+ Register Rn = MI.getOperand(2).getReg();
return (Rt == Rn) ? 3 : 2;
}
@@ -3745,7 +3786,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
}
bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const {
- unsigned BaseReg = MI.getOperand(0).getReg();
+ Register BaseReg = MI.getOperand(0).getReg();
for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
const auto &Op = MI.getOperand(i);
if (Op.isReg() && Op.getReg() == BaseReg)
@@ -4219,7 +4260,7 @@ int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return -1;
const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
- unsigned Reg = DefMO.getReg();
+ Register Reg = DefMO.getReg();
const MachineInstr *ResolvedDefMI = &DefMI;
unsigned DefAdj = 0;
@@ -4328,10 +4369,10 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
- const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+ auto *DefMN = cast<MachineSDNode>(DefNode);
unsigned DefAlign = !DefMN->memoperands_empty()
? (*DefMN->memoperands_begin())->getAlignment() : 0;
- const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+ auto *UseMN = cast<MachineSDNode>(UseNode);
unsigned UseAlign = !UseMN->memoperands_empty()
? (*UseMN->memoperands_begin())->getAlignment() : 0;
int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
@@ -4708,7 +4749,7 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
if (MI.getOperand(i).isImplicit() ||
!MI.getOperand(i).isReg())
continue;
- unsigned Reg = MI.getOperand(i).getReg();
+ Register Reg = MI.getOperand(i).getReg();
if (Reg < ARM::R0 || Reg > ARM::R7) {
if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
!(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
@@ -4731,7 +4772,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
- unsigned Reg = MI->getOperand(0).getReg();
+ Register Reg = MI->getOperand(0).getReg();
const GlobalValue *GV =
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
MachineInstrBuilder MIB;
@@ -5104,7 +5145,7 @@ unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
const MachineOperand &MO = MI.getOperand(OpNum);
if (MO.readsReg())
return 0;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
int UseOp = -1;
switch (MI.getOpcode()) {
@@ -5134,7 +5175,7 @@ unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
return 0;
// We must be able to clobber the whole D-reg.
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
// Virtual register must be a def undef foo:ssub_0 operand.
if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
return 0;
@@ -5159,8 +5200,8 @@ void ARMBaseInstrInfo::breakPartialRegDependency(
assert(TRI && "Need TRI instance");
const MachineOperand &MO = MI.getOperand(OpNum);
- unsigned Reg = MO.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ Register Reg = MO.getReg();
+ assert(Register::isPhysicalRegister(Reg) &&
"Can't break virtual register dependencies.");
unsigned DReg = Reg;
@@ -5337,7 +5378,7 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
// is not redefined between the cmp and the br.
if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
return nullptr;
- unsigned Reg = CmpMI->getOperand(0).getReg();
+ Register Reg = CmpMI->getOperand(0).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
@@ -5349,3 +5390,50 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
return &*CmpMI;
}
+
+unsigned llvm::ConstantMaterializationCost(unsigned Val,
+ const ARMSubtarget *Subtarget,
+ bool ForCodesize) {
+ if (Subtarget->isThumb()) {
+ if (Val <= 255) // MOV
+ return ForCodesize ? 2 : 1;
+ if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
+ ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
+ ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
+ return ForCodesize ? 4 : 1;
+ if (Val <= 510) // MOV + ADDi8
+ return ForCodesize ? 4 : 2;
+ if (~Val <= 255) // MOV + MVN
+ return ForCodesize ? 4 : 2;
+ if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
+ return ForCodesize ? 4 : 2;
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1) // MOV
+ return ForCodesize ? 4 : 1;
+ if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
+ return ForCodesize ? 4 : 1;
+ if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
+ return ForCodesize ? 4 : 1;
+ if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
+ return ForCodesize ? 8 : 2;
+ }
+ if (Subtarget->useMovt()) // MOVW + MOVT
+ return ForCodesize ? 8 : 2;
+ return ForCodesize ? 8 : 3; // Literal pool load
+}
+
+bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
+ const ARMSubtarget *Subtarget,
+ bool ForCodesize) {
+ // Check with ForCodesize
+ unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
+ unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
+ if (Cost1 < Cost2)
+ return true;
+ if (Cost1 > Cost2)
+ return false;
+
+ // If they are equal, try with !ForCodesize
+ return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
+ ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index c28983fcc15c..c232b6f0b45d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -276,6 +276,10 @@ public:
return NumCycles == 1;
}
+ unsigned extraSizeToPredicateInstructions(const MachineFunction &MF,
+ unsigned NumInsts) const override;
+ unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override;
+
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const override;
@@ -601,7 +605,8 @@ bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
- const ARMBaseInstrInfo &TII);
+ const ARMBaseInstrInfo &TII,
+ const TargetRegisterInfo *TRI);
/// Return true if Reg is defd between From and To
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From,
@@ -620,6 +625,20 @@ void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond);
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond,
unsigned Inactive);
+/// Returns the number of instructions required to materialize the given
+/// constant in a register, or 3 if a literal pool load is needed.
+/// If ForCodesize is specified, an approximate cost in bytes is returned.
+unsigned ConstantMaterializationCost(unsigned Val,
+ const ARMSubtarget *Subtarget,
+ bool ForCodesize = false);
+
+/// Returns true if Val1 has a lower Constant Materialization Cost than Val2.
+/// Uses the cost from ConstantMaterializationCost, first with ForCodesize as
+/// specified. If the scores are equal, return the comparison for !ForCodesize.
+bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
+ const ARMSubtarget *Subtarget,
+ bool ForCodesize = false);
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index dc99b37742da..1eaf871867e0 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -174,6 +174,12 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
: CSR_AAPCS_ThisReturn_RegMask;
}
+ArrayRef<MCPhysReg> ARMBaseRegisterInfo::getIntraCallClobberedRegs(
+ const MachineFunction *MF) const {
+ static const MCPhysReg IntraCallClobberedRegs[] = {ARM::R12};
+ return ArrayRef<MCPhysReg>(IntraCallClobberedRegs);
+}
+
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
@@ -185,7 +191,7 @@ getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, ARM::PC);
markSuperRegs(Reserved, ARM::FPSCR);
markSuperRegs(Reserved, ARM::APSR_NZCV);
- if (TFI->hasFP(MF))
+ if (TFI->hasFP(MF) || STI.isTargetDarwin())
markSuperRegs(Reserved, getFramePointerReg(STI));
if (hasBasePointer(MF))
markSuperRegs(Reserved, BasePtr);
@@ -217,7 +223,7 @@ isAsmClobberable(const MachineFunction &MF, unsigned PhysReg) const {
const TargetRegisterClass *
ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
- const MachineFunction &) const {
+ const MachineFunction &MF) const {
const TargetRegisterClass *Super = RC;
TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
do {
@@ -225,11 +231,13 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case ARM::GPRRegClassID:
case ARM::SPRRegClassID:
case ARM::DPRRegClassID:
+ case ARM::GPRPairRegClassID:
+ return Super;
case ARM::QPRRegClassID:
case ARM::QQPRRegClassID:
case ARM::QQQQPRRegClassID:
- case ARM::GPRPairRegClassID:
- return Super;
+ if (MF.getSubtarget<ARMSubtarget>().hasNEON())
+ return Super;
}
Super = *I++;
} while (Super);
@@ -317,7 +325,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
return false;
unsigned PairedPhys = 0;
- if (TargetRegisterInfo::isPhysicalRegister(Paired)) {
+ if (Register::isPhysicalRegister(Paired)) {
PairedPhys = Paired;
} else if (VRM && VRM->hasPhys(Paired)) {
PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this);
@@ -347,7 +355,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
Hint.first == (unsigned)ARMRI::RegPairEven) &&
- TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+ Register::isVirtualRegister(Hint.second)) {
// If 'Reg' is one of the even / odd register pair and it's now changed
// (e.g. coalesced) into a different register. The other register of the
// pair allocation hint must be updated to reflect the relationship
@@ -357,7 +365,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
// Make sure the pair has not already divorced.
if (Hint.second == Reg) {
MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
- if (TargetRegisterInfo::isVirtualRegister(NewReg))
+ if (Register::isVirtualRegister(NewReg))
MRI->setRegAllocationHint(NewReg,
Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven
: ARMRI::RegPairOdd, OtherReg);
@@ -663,7 +671,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
else {
assert(AFI->isThumb2Function());
- Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+ Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII, this);
}
assert(Done && "Unable to resolve frame index!");
(void)Done;
@@ -775,7 +783,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
else {
assert(AFI->isThumb2Function());
- Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
+ Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII, this);
}
if (Done)
return;
@@ -783,21 +791,32 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above, handle the rest, providing a register that is
// SP+LargeImm.
- assert((Offset ||
- (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
- (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
- "This code isn't needed if offset already handled!");
+ assert(
+ (Offset ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrModeT2_i7 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrModeT2_i7s2 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) ==
+ ARMII::AddrModeT2_i7s4) &&
+ "This code isn't needed if offset already handled!");
unsigned ScratchReg = 0;
int PIdx = MI.findFirstPredOperandIdx();
ARMCC::CondCodes Pred = (PIdx == -1)
? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
Register PredReg = (PIdx == -1) ? Register() : MI.getOperand(PIdx+1).getReg();
- if (Offset == 0)
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ const TargetRegisterClass *RegClass =
+ TII.getRegClass(MCID, FIOperandNum, this, *MI.getParent()->getParent());
+
+ if (Offset == 0 &&
+ (Register::isVirtualRegister(FrameReg) || RegClass->contains(FrameReg)))
// Must be addrmode4/6.
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false);
else {
- ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass);
+ ScratchReg = MF.getRegInfo().createVirtualRegister(RegClass);
if (!AFI->isThumbFunction())
emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
Offset, Pred, PredReg, TII);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 7e2c72b4d712..477f3ad0a9a7 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -129,6 +129,9 @@ public:
const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID) const;
+ ArrayRef<MCPhysReg>
+ getIntraCallClobberedRegs(const MachineFunction *MF) const override;
+
BitVector getReservedRegs(const MachineFunction &MF) const override;
bool isAsmClobberable(const MachineFunction &MF,
unsigned PhysReg) const override;
@@ -176,8 +179,6 @@ public:
Register getFrameRegister(const MachineFunction &MF) const override;
unsigned getBaseRegister() const { return BasePtr; }
- bool isLowRegister(unsigned Reg) const;
-
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
diff --git a/lib/Target/ARM/ARMBasicBlockInfo.cpp b/lib/Target/ARM/ARMBasicBlockInfo.cpp
index 2de90e816b33..00a2231f59e3 100644
--- a/lib/Target/ARM/ARMBasicBlockInfo.cpp
+++ b/lib/Target/ARM/ARMBasicBlockInfo.cpp
@@ -6,14 +6,16 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBasicBlockInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMBasicBlockInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/Support/Debug.h"
#include <vector>
#define DEBUG_TYPE "arm-bb-utils"
@@ -47,7 +49,7 @@ void ARMBasicBlockUtils::computeBlockSize(MachineBasicBlock *MBB) {
BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
BBI.Size = 0;
BBI.Unalign = 0;
- BBI.PostAlign = 0;
+ BBI.PostAlign = Align::None();
for (MachineInstr &I : *MBB) {
BBI.Size += TII->getInstSizeInBytes(I);
@@ -62,8 +64,8 @@ void ARMBasicBlockUtils::computeBlockSize(MachineBasicBlock *MBB) {
// tBR_JTr contains a .align 2 directive.
if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
- BBI.PostAlign = 2;
- MBB->getParent()->ensureAlignment(2);
+ BBI.PostAlign = Align(4);
+ MBB->getParent()->ensureAlignment(Align(4));
}
}
@@ -126,9 +128,9 @@ void ARMBasicBlockUtils::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
for(unsigned i = BBNum + 1, e = MF.getNumBlockIDs(); i < e; ++i) {
// Get the offset and known bits at the end of the layout predecessor.
// Include the alignment of the current block.
- unsigned LogAlign = MF.getBlockNumbered(i)->getAlignment();
- unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
- unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+ const Align Align = MF.getBlockNumbered(i)->getAlignment();
+ const unsigned Offset = BBInfo[i - 1].postOffset(Align);
+ const unsigned KnownBits = BBInfo[i - 1].postKnownBits(Align);
// This is where block i begins. Stop if the offset is already correct,
// and we have updated 2 blocks. This is the maximum number of blocks
diff --git a/lib/Target/ARM/ARMBasicBlockInfo.h b/lib/Target/ARM/ARMBasicBlockInfo.h
index 400bba351cec..13df399ed995 100644
--- a/lib/Target/ARM/ARMBasicBlockInfo.h
+++ b/lib/Target/ARM/ARMBasicBlockInfo.h
@@ -21,17 +21,18 @@
namespace llvm {
+struct BasicBlockInfo;
using BBInfoVector = SmallVectorImpl<BasicBlockInfo>;
/// UnknownPadding - Return the worst case padding that could result from
/// unknown offset bits. This does not include alignment padding caused by
/// known offset bits.
///
-/// @param LogAlign log2(alignment)
+/// @param Alignment alignment
/// @param KnownBits Number of known low offset bits.
-inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
- if (KnownBits < LogAlign)
- return (1u << LogAlign) - (1u << KnownBits);
+inline unsigned UnknownPadding(Align Alignment, unsigned KnownBits) {
+ if (KnownBits < Log2(Alignment))
+ return Alignment.value() - (1ull << KnownBits);
return 0;
}
@@ -65,10 +66,9 @@ struct BasicBlockInfo {
/// multiple of 1 << Unalign.
uint8_t Unalign = 0;
- /// PostAlign - When non-zero, the block terminator contains a .align
- /// directive, so the end of the block is aligned to 1 << PostAlign
- /// bytes.
- uint8_t PostAlign = 0;
+ /// PostAlign - When > 1, the block terminator contains a .align
+ /// directive, so the end of the block is aligned to PostAlign bytes.
+ Align PostAlign;
BasicBlockInfo() = default;
@@ -84,16 +84,16 @@ struct BasicBlockInfo {
return Bits;
}
- /// Compute the offset immediately following this block. If LogAlign is
+ /// Compute the offset immediately following this block. If Align is
/// specified, return the offset the successor block will get if it has
/// this alignment.
- unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned postOffset(Align Alignment = Align::None()) const {
unsigned PO = Offset + Size;
- unsigned LA = std::max(unsigned(PostAlign), LogAlign);
- if (!LA)
+ const Align PA = std::max(PostAlign, Alignment);
+ if (PA == Align::None())
return PO;
// Add alignment padding from the terminator.
- return PO + UnknownPadding(LA, internalKnownBits());
+ return PO + UnknownPadding(PA, internalKnownBits());
}
/// Compute the number of known low bits of postOffset. If this block
@@ -101,9 +101,8 @@ struct BasicBlockInfo {
/// instruction alignment. An aligned terminator may increase the number
/// of know bits.
/// If LogAlign is given, also consider the alignment of the next block.
- unsigned postKnownBits(unsigned LogAlign = 0) const {
- return std::max(std::max(unsigned(PostAlign), LogAlign),
- internalKnownBits());
+ unsigned postKnownBits(Align Align = Align::None()) const {
+ return std::max(Log2(std::max(PostAlign, Align)), internalKnownBits());
}
};
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 0cbe6e1871e4..d3b595ce8323 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -90,6 +90,8 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
MachineInstrBuilder &MIB, CCAssignFn *AssignFn)
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+ bool isIncomingArgumentHandler() const override { return false; }
+
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
@@ -169,8 +171,9 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
- const CallLowering::ArgInfo &Info, CCState &State) override {
- if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State))
+ const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
+ CCState &State) override {
+ if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State))
return true;
StackSize =
@@ -199,9 +202,8 @@ void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg,
if (SplitVTs.size() == 1) {
// Even if there is no splitting to do, we still want to replace the
// original type (e.g. pointer type -> integer).
- auto Flags = OrigArg.Flags;
- unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty);
- Flags.setOrigAlign(OriginalAlignment);
+ auto Flags = OrigArg.Flags[0];
+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(OrigArg.Ty)));
SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
Flags, OrigArg.IsFixed);
return;
@@ -211,10 +213,9 @@ void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg,
for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) {
EVT SplitVT = SplitVTs[i];
Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
- auto Flags = OrigArg.Flags;
+ auto Flags = OrigArg.Flags[0];
- unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy);
- Flags.setOrigAlign(OriginalAlignment);
+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(SplitTy)));
bool NeedsConsecutiveRegisters =
TLI.functionArgumentNeedsConsecutiveRegisters(
@@ -286,7 +287,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
CCAssignFn AssignFn)
: ValueHandler(MIRBuilder, MRI, AssignFn) {}
- bool isArgumentHandler() const override { return true; }
+ bool isIncomingArgumentHandler() const override { return true; }
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -298,7 +299,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
int FI = MFI.CreateFixedObject(Size, Offset, true);
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
- unsigned AddrReg =
+ Register AddrReg =
MRI.createGenericVirtualRegister(LLT::pointer(MPO.getAddrSpace(), 32));
MIRBuilder.buildFrameIndex(AddrReg, FI);
@@ -405,6 +406,7 @@ struct FormalArgHandler : public IncomingValueHandler {
: IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
void markPhysRegUsed(unsigned PhysReg) override {
+ MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
};
@@ -498,11 +500,7 @@ unsigned getCallOpcode(const ARMSubtarget &STI, bool isDirect) {
}
} // end anonymous namespace
-bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
- CallingConv::ID CallConv,
- const MachineOperand &Callee,
- const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const {
+bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const {
MachineFunction &MF = MIRBuilder.getMF();
const auto &TLI = *getTLI<ARMTargetLowering>();
const auto &DL = MF.getDataLayout();
@@ -520,7 +518,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Create the call instruction so we can add the implicit uses of arg
// registers, but don't insert it yet.
- bool IsDirect = !Callee.isReg();
+ bool IsDirect = !Info.Callee.isReg();
auto CallOpcode = getCallOpcode(STI, IsDirect);
auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode);
@@ -528,35 +526,35 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (IsThumb)
MIB.add(predOps(ARMCC::AL));
- MIB.add(Callee);
+ MIB.add(Info.Callee);
if (!IsDirect) {
- auto CalleeReg = Callee.getReg();
- if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) {
+ auto CalleeReg = Info.Callee.getReg();
+ if (CalleeReg && !Register::isPhysicalRegister(CalleeReg)) {
unsigned CalleeIdx = IsThumb ? 2 : 0;
MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass(
MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(),
- *MIB.getInstr(), MIB->getDesc(), Callee, CalleeIdx));
+ *MIB.getInstr(), MIB->getDesc(), Info.Callee, CalleeIdx));
}
}
- MIB.addRegMask(TRI->getCallPreservedMask(MF, CallConv));
+ MIB.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv));
bool IsVarArg = false;
SmallVector<ArgInfo, 8> ArgInfos;
- for (auto Arg : OrigArgs) {
+ for (auto Arg : Info.OrigArgs) {
if (!isSupportedType(DL, TLI, Arg.Ty))
return false;
if (!Arg.IsFixed)
IsVarArg = true;
- if (Arg.Flags.isByVal())
+ if (Arg.Flags[0].isByVal())
return false;
splitToValueTypes(Arg, ArgInfos, MF);
}
- auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, IsVarArg);
+ auto ArgAssignFn = TLI.CCAssignFnForCall(Info.CallConv, IsVarArg);
OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
return false;
@@ -564,13 +562,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Now we can add the actual call instruction to the correct basic block.
MIRBuilder.insertInstr(MIB);
- if (!OrigRet.Ty->isVoidTy()) {
- if (!isSupportedType(DL, TLI, OrigRet.Ty))
+ if (!Info.OrigRet.Ty->isVoidTy()) {
+ if (!isSupportedType(DL, TLI, Info.OrigRet.Ty))
return false;
ArgInfos.clear();
- splitToValueTypes(OrigRet, ArgInfos, MF);
- auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, IsVarArg);
+ splitToValueTypes(Info.OrigRet, ArgInfos, MF);
+ auto RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv, IsVarArg);
CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
return false;
diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h
index 794127b5ebc7..ddbc9feb90e2 100644
--- a/lib/Target/ARM/ARMCallLowering.h
+++ b/lib/Target/ARM/ARMCallLowering.h
@@ -38,9 +38,8 @@ public:
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const override;
- bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
- const MachineOperand &Callee, const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const override;
+ bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const override;
private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
diff --git a/lib/Target/ARM/ARMCallingConv.cpp b/lib/Target/ARM/ARMCallingConv.cpp
index 5ede7c67f7c2..92ebc542b423 100644
--- a/lib/Target/ARM/ARMCallingConv.cpp
+++ b/lib/Target/ARM/ARMCallingConv.cpp
@@ -193,7 +193,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
auto &DL = State.getMachineFunction().getDataLayout();
- unsigned StackAlign = DL.getStackAlignment();
+ unsigned StackAlign = DL.getStackAlignment().value();
unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
ArrayRef<MCPhysReg> RegList;
diff --git a/lib/Target/ARM/ARMCodeGenPrepare.cpp b/lib/Target/ARM/ARMCodeGenPrepare.cpp
index 2fc5f4aaab50..1c2c8aef55bb 100644
--- a/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -179,16 +179,12 @@ public:
}
static bool GenerateSignBits(Value *V) {
- if (auto *Arg = dyn_cast<Argument>(V))
- return Arg->hasSExtAttr();
-
if (!isa<Instruction>(V))
return false;
unsigned Opc = cast<Instruction>(V)->getOpcode();
return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
- Opc == Instruction::SRem || Opc == Instruction::SExt ||
- Opc == Instruction::SIToFP;
+ Opc == Instruction::SRem || Opc == Instruction::SExt;
}
static bool EqualTypeSize(Value *V) {
@@ -806,54 +802,48 @@ void IRPromoter::Mutate(Type *OrigTy,
/// return value is zeroext. We don't allow opcodes that can introduce sign
/// bits.
bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
- if (auto *I = dyn_cast<ICmpInst>(V)) {
- // Now that we allow small types than TypeSize, only allow icmp of
- // TypeSize because they will require a trunc to be legalised.
- // TODO: Allow icmp of smaller types, and calculate at the end
- // whether the transform would be beneficial.
- if (isa<PointerType>(I->getOperand(0)->getType()))
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ switch (I->getOpcode()) {
+ default:
+ return isa<BinaryOperator>(I) && isSupportedType(I) &&
+ !GenerateSignBits(I);
+ case Instruction::GetElementPtr:
+ case Instruction::Store:
+ case Instruction::Br:
+ case Instruction::Switch:
return true;
- return EqualTypeSize(I->getOperand(0));
- }
-
- if (GenerateSignBits(V)) {
- LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
- return false;
- }
-
- // Memory instructions
- if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
- return true;
-
- // Branches and targets.
- if( isa<BranchInst>(V) || isa<SwitchInst>(V) || isa<BasicBlock>(V))
- return true;
-
- // Non-instruction values that we can handle.
- if ((isa<Constant>(V) && !isa<ConstantExpr>(V)) || isa<Argument>(V))
+ case Instruction::PHI:
+ case Instruction::Select:
+ case Instruction::Ret:
+ case Instruction::Load:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ return isSupportedType(I);
+ case Instruction::ZExt:
+ return isSupportedType(I->getOperand(0));
+ case Instruction::ICmp:
+ // Now that we allow small types than TypeSize, only allow icmp of
+ // TypeSize because they will require a trunc to be legalised.
+ // TODO: Allow icmp of smaller types, and calculate at the end
+ // whether the transform would be beneficial.
+ if (isa<PointerType>(I->getOperand(0)->getType()))
+ return true;
+ return EqualTypeSize(I->getOperand(0));
+ case Instruction::Call: {
+ // Special cases for calls as we need to check for zeroext
+ // TODO We should accept calls even if they don't have zeroext, as they
+ // can still be sinks.
+ auto *Call = cast<CallInst>(I);
+ return isSupportedType(Call) &&
+ Call->hasRetAttr(Attribute::AttrKind::ZExt);
+ }
+ }
+ } else if (isa<Constant>(V) && !isa<ConstantExpr>(V)) {
return isSupportedType(V);
-
- if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V) ||
- isa<LoadInst>(V))
+ } else if (isa<Argument>(V))
return isSupportedType(V);
- if (auto *Cast = dyn_cast<CastInst>(V))
- return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0));
-
- // Special cases for calls as we need to check for zeroext
- // TODO We should accept calls even if they don't have zeroext, as they can
- // still be sinks.
- if (auto *Call = dyn_cast<CallInst>(V))
- return isSupportedType(Call) &&
- Call->hasRetAttr(Attribute::AttrKind::ZExt);
-
- if (!isa<BinaryOperator>(V))
- return false;
-
- if (!isSupportedType(V))
- return false;
-
- return true;
+ return isa<BasicBlock>(V);
}
/// Check that the type of V would be promoted and that the original type is
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 60e5d7bf6098..24ca25f73e96 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -26,8 +26,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -69,6 +71,7 @@ STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+STATISTIC(NumLEInserted, "Number of LE backwards branches inserted");
static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
@@ -212,6 +215,7 @@ namespace {
const ARMBaseInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
+ MachineDominatorTree *DT = nullptr;
bool isThumb;
bool isThumb1;
bool isThumb2;
@@ -224,6 +228,11 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@@ -238,7 +247,7 @@ namespace {
void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
bool BBHasFallthrough(MachineBasicBlock *MBB);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
- unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ Align getCPEAlign(const MachineInstr *CPEMI);
void scanFunctionJumpTables();
void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
@@ -327,8 +336,7 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
const BasicBlockInfo &BBI = BBInfo[J];
dbgs() << format("%08x %bb.%u\t", BBI.Offset, J)
<< " kb=" << unsigned(BBI.KnownBits)
- << " ua=" << unsigned(BBI.Unalign)
- << " pa=" << unsigned(BBI.PostAlign)
+ << " ua=" << unsigned(BBI.Unalign) << " pa=" << Log2(BBI.PostAlign)
<< format(" size=%#x\n", BBInfo[J].Size);
}
});
@@ -349,6 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
isPositionIndependentOrROPI =
STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
AFI = MF->getInfo<ARMFunctionInfo>();
+ DT = &getAnalysis<MachineDominatorTree>();
isThumb = AFI->isThumbFunction();
isThumb1 = AFI->isThumb1OnlyFunction();
@@ -357,9 +366,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
HasFarJump = false;
bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
- // This pass invalidates liveness information when it splits basic blocks.
- MF->getRegInfo().invalidateLiveness();
-
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
MF->RenumberBlocks();
@@ -398,7 +404,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// Functions with jump tables need an alignment of 4 because they use the ADR
// instruction, which aligns the PC to 4 bytes before adding an offset.
if (!T2JumpTables.empty())
- MF->ensureAlignment(2);
+ MF->ensureAlignment(Align(4));
/// Remove dead constant pool entries.
MadeChange |= removeUnusedCPEntries();
@@ -487,8 +493,9 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
MF->push_back(BB);
- // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
- unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+ // MachineConstantPool measures alignment in bytes.
+ const Align MaxAlign(MCP->getConstantPoolAlignment());
+ const unsigned MaxLogAlign = Log2(MaxAlign);
// Mark the basic block as required by the const-pool.
BB->setAlignment(MaxAlign);
@@ -501,7 +508,8 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
// alignment of all entries as long as BB is sufficiently aligned. Keep
// track of the insertion point for each alignment. We are going to bucket
// sort the entries as they are created.
- SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxLogAlign + 1,
+ BB->end());
// Add all of the constants from the constant pool to the end block, use an
// identity mapping of CPI's to CPE's.
@@ -526,7 +534,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
// Ensure that future entries with higher alignment get inserted before
// CPEMI. This is bucket sort with iterators.
- for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+ for (unsigned a = LogAlign + 1; a <= MaxLogAlign; ++a)
if (InsPoint[a] == InsAt)
InsPoint[a] = CPEMI;
@@ -640,29 +648,27 @@ ARMConstantIslands::findConstPoolEntry(unsigned CPI,
return nullptr;
}
-/// getCPELogAlign - Returns the required alignment of the constant pool entry
-/// represented by CPEMI. Alignment is measured in log2(bytes) units.
-unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+/// getCPEAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI.
+Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) {
switch (CPEMI->getOpcode()) {
case ARM::CONSTPOOL_ENTRY:
break;
case ARM::JUMPTABLE_TBB:
- return isThumb1 ? 2 : 0;
+ return isThumb1 ? Align(4) : Align(1);
case ARM::JUMPTABLE_TBH:
- return isThumb1 ? 2 : 1;
+ return isThumb1 ? Align(4) : Align(2);
case ARM::JUMPTABLE_INSTS:
- return 1;
+ return Align(2);
case ARM::JUMPTABLE_ADDRS:
- return 2;
+ return Align(4);
default:
llvm_unreachable("unknown constpool entry kind");
}
unsigned CPI = getCombinedIndex(CPEMI);
assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
- unsigned Align = MCP->getConstants()[CPI].getAlignment();
- assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
- return Log2_32(Align);
+ return Align(MCP->getConstants()[CPI].getAlignment());
}
/// scanFunctionJumpTables - Do a scan of the function, building up
@@ -687,7 +693,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
BBInfoVector &BBInfo = BBUtils->getBBInfo();
// The known bits of the entry block offset are determined by the function
// alignment.
- BBInfo.front().KnownBits = MF->getAlignment();
+ BBInfo.front().KnownBits = Log2(MF->getAlignment());
// Compute block offsets and known bits.
BBUtils->adjustBBOffsetsAfter(&MF->front());
@@ -824,11 +830,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
Scale = 2; // +-(offset_8*2)
NegOk = true;
break;
-
- case ARM::tLDRHi:
- Bits = 5;
- Scale = 2; // +(offset_5*2)
- break;
}
// Remember that this is a user of a CP entry.
@@ -885,6 +886,13 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
+ // Collect liveness information at MI.
+ LivePhysRegs LRs(*MF->getSubtarget().getRegisterInfo());
+ LRs.addLiveOuts(*OrigBB);
+ auto LivenessEnd = ++MachineBasicBlock::iterator(MI).getReverse();
+ for (MachineInstr &LiveMI : make_range(OrigBB->rbegin(), LivenessEnd))
+ LRs.stepBackward(LiveMI);
+
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
@@ -913,6 +921,12 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
// OrigBB branches to NewBB.
OrigBB->addSuccessor(NewBB);
+ // Update live-in information in the new block.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (MCPhysReg L : LRs)
+ if (!MRI.isReserved(L))
+ NewBB->addLiveIn(L);
+
// Update internal data structures to account for the newly inserted MBB.
// This is almost the same as updateForInsertedWaterBlock, except that
// the Water goes after OrigBB, not NewBB.
@@ -1007,13 +1021,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
MachineBasicBlock* Water, CPUser &U,
unsigned &Growth) {
BBInfoVector &BBInfo = BBUtils->getBBInfo();
- unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
- unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
- unsigned NextBlockOffset, NextBlockAlignment;
+ const Align CPEAlign = getCPEAlign(U.CPEMI);
+ const unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPEAlign);
+ unsigned NextBlockOffset;
+ Align NextBlockAlignment;
MachineFunction::const_iterator NextBlock = Water->getIterator();
if (++NextBlock == MF->end()) {
NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
- NextBlockAlignment = 0;
} else {
NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
NextBlockAlignment = NextBlock->getAlignment();
@@ -1028,13 +1042,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
Growth = CPEEnd - NextBlockOffset;
// Compute the padding that would go at the end of the CPE to align the next
// block.
- Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment);
+ Growth += offsetToAlignment(CPEEnd, NextBlockAlignment);
// If the CPE is to be inserted before the instruction, that will raise
// the offset of the instruction. Also account for unknown alignment padding
// in blocks between CPE and the user.
if (CPEOffset < UserOffset)
- UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+ UserOffset += Growth + UnknownPadding(MF->getAlignment(), Log2(CPEAlign));
} else
// CPE fits in existing padding.
Growth = 0;
@@ -1200,8 +1214,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
// inserting islands between BB0 and BB1 makes other accesses out of range.
MachineBasicBlock *UserBB = U.MI->getParent();
BBInfoVector &BBInfo = BBUtils->getBBInfo();
- unsigned MinNoSplitDisp =
- BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI));
+ const Align CPEAlign = getCPEAlign(U.CPEMI);
+ unsigned MinNoSplitDisp = BBInfo[UserBB->getNumber()].postOffset(CPEAlign);
if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2)
return false;
for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
@@ -1254,7 +1268,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
- unsigned CPELogAlign = getCPELogAlign(CPEMI);
+ const Align CPEAlign = getCPEAlign(CPEMI);
MachineBasicBlock *UserMBB = UserMI->getParent();
BBInfoVector &BBInfo = BBUtils->getBBInfo();
const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
@@ -1267,7 +1281,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Size of branch to insert.
unsigned Delta = isThumb1 ? 2 : 4;
// Compute the offset where the CPE will begin.
- unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
+ unsigned CPEOffset = UserBBI.postOffset(CPEAlign) + Delta;
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB)
@@ -1308,11 +1322,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Try to split the block so it's fully aligned. Compute the latest split
// point where we can add a 4-byte branch instruction, and then align to
- // LogAlign which is the largest possible alignment in the function.
- unsigned LogAlign = MF->getAlignment();
- assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ // Align which is the largest possible alignment in the function.
+ const Align Align = MF->getAlignment();
+ assert(Align >= CPEAlign && "Over-aligned constant pool entry");
unsigned KnownBits = UserBBI.internalKnownBits();
- unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+ unsigned UPad = UnknownPadding(Align, KnownBits);
unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x",
BaseInsertOffset));
@@ -1323,7 +1337,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
BaseInsertOffset -= 4;
LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
- << " la=" << LogAlign << " kb=" << KnownBits
+ << " la=" << Log2(Align) << " kb=" << KnownBits
<< " up=" << UPad << '\n');
// This could point off the end of the block if we've already got constant
@@ -1337,6 +1351,28 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
BaseInsertOffset =
std::max(UserBBI.postOffset() - UPad - 8,
UserOffset + TII->getInstSizeInBytes(*UserMI) + 1);
+ // If the CP is referenced(ie, UserOffset) is in first four instructions
+ // after IT, this recalculated BaseInsertOffset could be in the middle of
+ // an IT block. If it is, change the BaseInsertOffset to just after the
+ // IT block. This still make the CP Entry is in range becuase of the
+ // following reasons.
+ // 1. The initial BaseseInsertOffset calculated is (UserOffset +
+ // U.getMaxDisp() - UPad).
+ // 2. An IT block is only at most 4 instructions plus the "it" itself (18
+ // bytes).
+ // 3. All the relevant instructions support much larger Maximum
+ // displacement.
+ MachineBasicBlock::iterator I = UserMI;
+ ++I;
+ for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI),
+ PredReg = 0;
+ I->getOpcode() != ARM::t2IT &&
+ getITInstrPredicate(*I, PredReg) != ARMCC::AL;
+ Offset += TII->getInstSizeInBytes(*I), I = std::next(I)) {
+ BaseInsertOffset =
+ std::max(BaseInsertOffset, Offset + TII->getInstSizeInBytes(*I) + 1);
+ assert(I != UserMBB->end() && "Fell off end of block");
+ }
LLVM_DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
}
unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
@@ -1354,8 +1390,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
CPUser &U = CPUsers[CPUIndex];
if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
// Shift intertion point by one unit of alignment so it is within reach.
- BaseInsertOffset -= 1u << LogAlign;
- EndInsertOffset -= 1u << LogAlign;
+ BaseInsertOffset -= Align.value();
+ EndInsertOffset -= Align.value();
}
// This is overly conservative, as we don't account for CPEMIs being
// reused within the block, but it doesn't matter much. Also assume CPEs
@@ -1397,9 +1433,10 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
}
// We really must not split an IT block.
- LLVM_DEBUG(unsigned PredReg; assert(
- !isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL));
-
+#ifndef NDEBUG
+ unsigned PredReg;
+ assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL);
+#endif
NewMBB = splitBlockBeforeInstr(&*MI);
}
@@ -1464,9 +1501,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
// Always align the new block because CP entries can be smaller than 4
// bytes. Be careful not to decrease the existing alignment, e.g. NewMBB may
// be an already aligned constant pool block.
- const unsigned Align = isThumb ? 1 : 2;
- if (NewMBB->getAlignment() < Align)
- NewMBB->setAlignment(Align);
+ const Align Alignment = isThumb ? Align(2) : Align(4);
+ if (NewMBB->getAlignment() < Alignment)
+ NewMBB->setAlignment(Alignment);
// Remove the original WaterList entry; we want subsequent insertions in
// this vicinity to go after the one we're about to insert. This
@@ -1495,7 +1532,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
decrementCPEReferenceCount(CPI, CPEMI);
// Mark the basic block as aligned as required by the const-pool entry.
- NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+ NewIsland->setAlignment(getCPEAlign(U.CPEMI));
// Increase the size of the island block to account for the new entry.
BBUtils->adjustBBSize(NewIsland, Size);
@@ -1529,10 +1566,11 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
BBInfo[CPEBB->getNumber()].Size = 0;
// This block no longer needs to be aligned.
- CPEBB->setAlignment(0);
- } else
+ CPEBB->setAlignment(Align::None());
+ } else {
// Entries are sorted by descending alignment, so realign from the front.
- CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin()));
+ CPEBB->setAlignment(getCPEAlign(&*CPEBB->begin()));
+ }
BBUtils->adjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
@@ -1620,7 +1658,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
// L2:
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
CC = ARMCC::getOppositeCondition(CC);
- unsigned CCReg = MI->getOperand(2).getReg();
+ Register CCReg = MI->getOperand(2).getReg();
// If the branch is at the end of its MBB and that has a fall-through block,
// direct the updated conditional branch to the fall-through block. Otherwise,
@@ -1778,16 +1816,10 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
return MadeChange;
}
+
bool ARMConstantIslands::optimizeThumb2Branches() {
- bool MadeChange = false;
- // The order in which branches appear in ImmBranches is approximately their
- // order within the function body. By visiting later branches first, we reduce
- // the distance between earlier forward branches and their targets, making it
- // more likely that the cbn?z optimization, which can only apply to forward
- // branches, will succeed.
- for (unsigned i = ImmBranches.size(); i != 0; --i) {
- ImmBranch &Br = ImmBranches[i-1];
+ auto TryShrinkBranch = [this](ImmBranch &Br) {
unsigned Opcode = Br.MI->getOpcode();
unsigned NewOpc = 0;
unsigned Scale = 1;
@@ -1815,47 +1847,115 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
BBUtils->adjustBBSize(MBB, -2);
BBUtils->adjustBBOffsetsAfter(MBB);
++NumT2BrShrunk;
- MadeChange = true;
+ return true;
}
}
+ return false;
+ };
- Opcode = Br.MI->getOpcode();
- if (Opcode != ARM::tBcc)
- continue;
+ struct ImmCompare {
+ MachineInstr* MI = nullptr;
+ unsigned NewOpc = 0;
+ };
+
+ auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp,
+ MachineBasicBlock *DestBB) {
+ ImmCmp.MI = nullptr;
+ ImmCmp.NewOpc = 0;
// If the conditional branch doesn't kill CPSR, then CPSR can be liveout
// so this transformation is not safe.
if (!Br.MI->killsRegister(ARM::CPSR))
- continue;
+ return false;
- NewOpc = 0;
unsigned PredReg = 0;
+ unsigned NewOpc = 0;
ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);
if (Pred == ARMCC::EQ)
NewOpc = ARM::tCBZ;
else if (Pred == ARMCC::NE)
NewOpc = ARM::tCBNZ;
- if (!NewOpc)
- continue;
- MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ else
+ return false;
+
// Check if the distance is within 126. Subtract starting offset by 2
// because the cmp will be eliminated.
unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2;
BBInfoVector &BBInfo = BBUtils->getBBInfo();
unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126)
- continue;
+ return false;
// Search backwards to find a tCMPi8
auto *TRI = STI->getRegisterInfo();
MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI);
if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8)
+ return false;
+
+ ImmCmp.MI = CmpMI;
+ ImmCmp.NewOpc = NewOpc;
+ return true;
+ };
+
+ auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) {
+ if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() ||
+ STI->hasMinSize())
+ return false;
+
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(DestBB) ||
+ !BBUtils->isBBInRange(Br.MI, DestBB, 4094))
+ return false;
+
+ if (!DT->dominates(DestBB, MBB))
+ return false;
+
+ // We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite
+ // target of Br. So now we need to reverse the condition.
+ Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ;
+
+ MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(),
+ TII->get(ARM::t2LE));
+ MIB.add(Br.MI->getOperand(0));
+ Br.MI->eraseFromParent();
+ Br.MI = MIB;
+ ++NumLEInserted;
+ return true;
+ };
+
+ bool MadeChange = false;
+
+ // The order in which branches appear in ImmBranches is approximately their
+ // order within the function body. By visiting later branches first, we reduce
+ // the distance between earlier forward branches and their targets, making it
+ // more likely that the cbn?z optimization, which can only apply to forward
+ // branches, will succeed.
+ for (ImmBranch &Br : reverse(ImmBranches)) {
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ?
+ MBB->getFallThrough() :
+ MBB->back().getOperand(0).getMBB();
+
+ ImmCompare Cmp;
+ if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) {
+ DestBB = ExitBB;
+ MadeChange = true;
+ } else {
+ FindCmpForCBZ(Br, Cmp, DestBB);
+ MadeChange |= TryShrinkBranch(Br);
+ }
+
+ unsigned Opcode = Br.MI->getOpcode();
+ if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc)
continue;
- unsigned Reg = CmpMI->getOperand(0).getReg();
+ Register Reg = Cmp.MI->getOperand(0).getReg();
// Check for Kill flags on Reg. If they are present remove them and set kill
// on the new CBZ.
+ auto *TRI = STI->getRegisterInfo();
MachineBasicBlock::iterator KillMI = Br.MI;
bool RegKilled = false;
do {
@@ -1865,19 +1965,32 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
RegKilled = true;
break;
}
- } while (KillMI != CmpMI);
+ } while (KillMI != Cmp.MI);
// Create the new CBZ/CBNZ
- MachineBasicBlock *MBB = Br.MI->getParent();
- LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
+ LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI);
MachineInstr *NewBR =
- BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(Cmp.NewOpc))
.addReg(Reg, getKillRegState(RegKilled))
.addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
- CmpMI->eraseFromParent();
- Br.MI->eraseFromParent();
- Br.MI = NewBR;
+
+ Cmp.MI->eraseFromParent();
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
BBInfo[MBB->getNumber()].Size -= 2;
+
+ if (Br.MI->getOpcode() == ARM::tBcc) {
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ } else if (&MBB->back() != Br.MI) {
+ // We've generated an LE and already erased the original conditional
+ // branch. The CBN?Z is now used to branch to the other successor, so an
+ // unconditional branch terminator is now redundant.
+ MachineInstr *LastMI = &MBB->back();
+ if (LastMI != Br.MI) {
+ BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize();
+ LastMI->eraseFromParent();
+ }
+ }
BBUtils->adjustBBOffsetsAfter(MBB);
++NumCBZ;
MadeChange = true;
@@ -1931,8 +2044,8 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
// of BaseReg, but only if the t2ADDrs can be removed.
// + Some instruction other than t2ADDrs computing the entry. Not seen in
// the wild, but we should be careful.
- unsigned EntryReg = JumpMI->getOperand(0).getReg();
- unsigned BaseReg = LEAMI->getOperand(0).getReg();
+ Register EntryReg = JumpMI->getOperand(0).getReg();
+ Register BaseReg = LEAMI->getOperand(0).getReg();
CanDeleteLEA = true;
BaseRegKill = false;
@@ -2009,7 +2122,7 @@ static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
// but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg
// and is not clobbered / used.
MachineInstr *RemovableAdd = nullptr;
- unsigned EntryReg = JumpMI->getOperand(0).getReg();
+ Register EntryReg = JumpMI->getOperand(0).getReg();
// Find the last ADD to set EntryReg
MachineBasicBlock::iterator I(LEAMI);
@@ -2106,7 +2219,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
// %idx = tLSLri %idx, 2
// %base = tLEApcrelJT
// %t = tLDRr %base, %idx
- unsigned BaseReg = User.MI->getOperand(0).getReg();
+ Register BaseReg = User.MI->getOperand(0).getReg();
if (User.MI->getIterator() == User.MI->getParent()->begin())
continue;
@@ -2116,7 +2229,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
!Shift->getOperand(2).isKill())
continue;
IdxReg = Shift->getOperand(2).getReg();
- unsigned ShiftedIdxReg = Shift->getOperand(0).getReg();
+ Register ShiftedIdxReg = Shift->getOperand(0).getReg();
// It's important that IdxReg is live until the actual TBB/TBH. Most of
// the range is checked later, but the LEA might still clobber it and not
@@ -2313,6 +2426,10 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
MachineFunction::iterator MBBI = ++JTBB->getIterator();
MF->insert(MBBI, NewBB);
+ // Copy live-in information to new block.
+ for (const MachineBasicBlock::RegisterMaskPair &RegMaskPair : BB->liveins())
+ NewBB->addLiveIn(RegMaskPair);
+
// Add an unconditional branch from NewBB to BB.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond directly to anything in the source.
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 3bdb0e1ef62d..72c95f441265 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index b32ba3eeea18..563fdda56104 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -481,7 +481,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
unsigned OpIdx = 0;
bool DstIsDead = MI.getOperand(OpIdx).isDead();
- unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ Register DstReg = MI.getOperand(OpIdx++).getReg();
if(TableEntry->RealOpc == ARM::VLD2DUPd8x2 ||
TableEntry->RealOpc == ARM::VLD2DUPd16x2 ||
TableEntry->RealOpc == ARM::VLD2DUPd32x2) {
@@ -492,7 +492,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
assert(RegSpc == OddDblSpc && "Unexpected spacing!");
SubRegIndex = ARM::dsub_1;
}
- unsigned SubReg = TRI->getSubReg(DstReg, SubRegIndex);
+ Register SubReg = TRI->getSubReg(DstReg, SubRegIndex);
unsigned DstRegPair = TRI->getMatchingSuperReg(SubReg, ARM::dsub_0,
&ARM::DPairSpcRegClass);
MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead));
@@ -624,7 +624,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
- unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ Register SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0, getUndefRegState(SrcIsUndef));
@@ -760,7 +760,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
}
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
- unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ Register SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0);
@@ -789,6 +789,7 @@ static bool IsAnAddressOperand(const MachineOperand &MO) {
case MachineOperand::MO_Immediate:
case MachineOperand::MO_CImmediate:
case MachineOperand::MO_FPImmediate:
+ case MachineOperand::MO_ShuffleMask:
return false;
case MachineOperand::MO_MachineBasicBlock:
return true;
@@ -828,7 +829,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned Opcode = MI.getOpcode();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
@@ -932,13 +933,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
const MachineOperand &Dest = MI.getOperand(0);
- unsigned TempReg = MI.getOperand(1).getReg();
+ Register TempReg = MI.getOperand(1).getReg();
// Duplicating undef operands into 2 instructions does not guarantee the same
// value on both; However undef should be replaced by xzr anyway.
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
- unsigned AddrReg = MI.getOperand(2).getReg();
- unsigned DesiredReg = MI.getOperand(3).getReg();
- unsigned NewReg = MI.getOperand(4).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register DesiredReg = MI.getOperand(3).getReg();
+ Register NewReg = MI.getOperand(4).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -1035,8 +1036,8 @@ static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
unsigned Flags, bool IsThumb,
const TargetRegisterInfo *TRI) {
if (IsThumb) {
- unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
- unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
+ Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
+ Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
MIB.addReg(RegLo, Flags);
MIB.addReg(RegHi, Flags);
} else
@@ -1051,19 +1052,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0);
- unsigned TempReg = MI.getOperand(1).getReg();
+ Register TempReg = MI.getOperand(1).getReg();
// Duplicating undef operands into 2 instructions does not guarantee the same
// value on both; However undef should be replaced by xzr anyway.
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
- unsigned AddrReg = MI.getOperand(2).getReg();
- unsigned DesiredReg = MI.getOperand(3).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register DesiredReg = MI.getOperand(3).getReg();
MachineOperand New = MI.getOperand(4);
New.setIsKill(false);
- unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
- unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
- unsigned DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0);
- unsigned DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1);
+ Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
+ Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
+ Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0);
+ Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1);
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -1204,8 +1205,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
NewMI->addOperand(MBBI->getOperand(i));
- // Delete the pseudo instruction TCRETURN.
+
+ // Update call site info and delete the pseudo instruction TCRETURN.
+ MBB.getParent()->moveCallSiteInfo(&MI, &*NewMI);
MBB.erase(MBBI);
+
MBBI = NewMI;
return true;
}
@@ -1336,7 +1340,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// for us. Otherwise, expand to nothing.
if (RI.hasBasePointer(MF)) {
int32_t NumBytes = AFI->getFramePtrSpillOffset();
- unsigned FramePtr = RI.getFrameRegister(MF);
+ Register FramePtr = RI.getFrameRegister(MF);
assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
"base pointer without frame pointer?");
@@ -1412,7 +1416,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(MF->getFunction().getContext(),
"__aeabi_read_tp", PCLabelID, 0);
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
.addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
@@ -1435,6 +1439,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.cloneMemRefs(MI);
TransferImpOps(MI, MIB, MIB);
+ MI.getMF()->moveCallSiteInfo(&MI, &*MIB);
MI.eraseFromParent();
return true;
}
@@ -1442,7 +1447,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::t2LDRpci_pic: {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
? ARM::tLDRpci : ARM::t2LDRpci;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
@@ -1464,7 +1469,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::LDRLIT_ga_pcrel_ldr:
case ARM::tLDRLIT_ga_abs:
case ARM::tLDRLIT_ga_pcrel: {
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
const MachineOperand &MO1 = MI.getOperand(1);
auto Flags = MO1.getTargetFlags();
@@ -1522,7 +1527,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::t2MOV_ga_pcrel: {
// Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
unsigned LabelId = AFI->createPICLabelUId();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
const MachineOperand &MO1 = MI.getOperand(1);
const GlobalValue *GV = MO1.getGlobal();
@@ -1586,7 +1591,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Grab the Q register destination.
bool DstIsDead = MI.getOperand(OpIdx).isDead();
- unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ Register DstReg = MI.getOperand(OpIdx++).getReg();
// Copy the source register.
MIB.add(MI.getOperand(OpIdx++));
@@ -1596,8 +1601,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.add(MI.getOperand(OpIdx++));
// Add the destination operands (D subregs).
- unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
- unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
@@ -1617,7 +1622,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Grab the Q register source.
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
- unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ Register SrcReg = MI.getOperand(OpIdx++).getReg();
// Copy the destination register.
MachineOperand Dst(MI.getOperand(OpIdx++));
@@ -1628,8 +1633,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.add(MI.getOperand(OpIdx++));
// Add the source operands (D subregs).
- unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
- unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0)
.addReg(D1, SrcIsKill ? RegState::Kill : 0);
@@ -1915,6 +1920,37 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::CMP_SWAP_64:
return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
+
+ case ARM::tBL_PUSHLR:
+ case ARM::BL_PUSHLR: {
+ const bool Thumb = Opcode == ARM::tBL_PUSHLR;
+ Register Reg = MI.getOperand(0).getReg();
+ assert(Reg == ARM::LR && "expect LR register!");
+ MachineInstrBuilder MIB;
+ if (Thumb) {
+ // push {lr}
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .addReg(Reg);
+
+ // bl __gnu_mcount_nc
+ MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL));
+ } else {
+ // stmdb sp!, {lr}
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(Reg);
+
+ // bl __gnu_mcount_nc
+ MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL));
+ }
+ MIB.cloneMemRefs(MI);
+ for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i));
+ MI.eraseFromParent();
+ return true;
+ }
}
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 6e274d269bf2..1fc5ff6921c6 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -191,8 +191,8 @@ class ARMFastISel final : public FastISel {
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt, bool isEquality);
- bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ bool isZExt);
+ bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
@@ -219,15 +219,15 @@ class ARMFastISel final : public FastISel {
bool Return,
bool isVarArg);
bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
- SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<Register> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
- SmallVectorImpl<unsigned> &RegArgs,
+ SmallVectorImpl<Register> &RegArgs,
CallingConv::ID CC,
unsigned &NumBytes,
bool isVarArg);
unsigned getLibcallReg(const Twine &Name);
- bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ bool FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes, bool isVarArg);
bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
@@ -301,7 +301,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
// Make sure the input operand is sufficiently constrained to be legal
@@ -913,7 +913,7 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
AddOptionalDefs(MIB);
}
-bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
unsigned Alignment, bool isZExt, bool allocReg) {
unsigned Opc;
bool useAM3 = false;
@@ -1045,7 +1045,7 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
Address Addr;
if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
- unsigned ResultReg;
+ Register ResultReg;
if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
return false;
updateValueMap(I, ResultReg);
@@ -1259,8 +1259,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
- CI->isEquality()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
@@ -1349,7 +1348,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt, bool isEquality) {
+ bool isZExt) {
Type *Ty = Src1Value->getType();
EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple()) return false;
@@ -1397,19 +1396,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
// TODO: Verify compares.
case MVT::f32:
isICmp = false;
- // Equality comparisons shouldn't raise Invalid on uordered inputs.
- if (isEquality)
- CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
- else
- CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
+ CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
break;
case MVT::f64:
isICmp = false;
- // Equality comparisons shouldn't raise Invalid on uordered inputs.
- if (isEquality)
- CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
- else
- CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
+ CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
break;
case MVT::i1:
case MVT::i8:
@@ -1485,8 +1476,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
- CI->isEquality()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
// Now set a register based on the comparison. Explicitly set the predicates
@@ -1893,10 +1883,10 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
}
bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
- SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<Register> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
- SmallVectorImpl<unsigned> &RegArgs,
+ SmallVectorImpl<Register> &RegArgs,
CallingConv::ID CC,
unsigned &NumBytes,
bool isVarArg) {
@@ -1960,7 +1950,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
const Value *ArgVal = Args[VA.getValNo()];
- unsigned Arg = ArgRegs[VA.getValNo()];
+ Register Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
@@ -2039,7 +2029,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
return true;
}
-bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes, bool isVarArg) {
// Issue CALLSEQ_END
@@ -2060,7 +2050,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// double fp reg we want.
MVT DestVT = RVLocs[0].getValVT();
const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
- unsigned ResultReg = createResultReg(DstRC);
+ Register ResultReg = createResultReg(DstRC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VMOVDRR), ResultReg)
.addReg(RVLocs[0].getLocReg())
@@ -2081,7 +2071,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
- unsigned ResultReg = createResultReg(DstRC);
+ Register ResultReg = createResultReg(DstRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
ResultReg).addReg(RVLocs[0].getLocReg());
@@ -2162,7 +2152,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
}
// Make the copy.
- unsigned DstReg = VA.getLocReg();
+ Register DstReg = VA.getLocReg();
const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
// Avoid a cross-class copy. This is very unlikely.
if (!SrcRC->contains(DstReg))
@@ -2231,7 +2221,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
- SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<Register, 8> ArgRegs;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
Args.reserve(I->getNumOperands());
@@ -2247,8 +2237,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
if (!isTypeLegal(ArgTy, ArgVT)) return false;
ISD::ArgFlagsTy Flags;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- Flags.setOrigAlign(OriginalAlignment);
+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy)));
Args.push_back(Op);
ArgRegs.push_back(Arg);
@@ -2257,13 +2246,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
}
// Handle the arguments now that we've gotten them.
- SmallVector<unsigned, 4> RegArgs;
+ SmallVector<Register, 4> RegArgs;
unsigned NumBytes;
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
RegArgs, CC, NumBytes, false))
return false;
- unsigned CalleeReg = 0;
+ Register CalleeReg;
if (Subtarget->genLongCalls()) {
CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
if (CalleeReg == 0) return false;
@@ -2282,7 +2271,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
MIB.addExternalSymbol(TLI.getLibcallName(Call));
// Add implicit physical register uses to the call.
- for (unsigned R : RegArgs)
+ for (Register R : RegArgs)
MIB.addReg(R, RegState::Implicit);
// Add a register mask with the call-preserved registers.
@@ -2290,7 +2279,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Finish off the call including any return values.
- SmallVector<unsigned, 4> UsedRegs;
+ SmallVector<Register, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
// Set all unused physreg defs as dead.
@@ -2340,7 +2329,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
- SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<Register, 8> ArgRegs;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
unsigned arg_size = CS.arg_size();
@@ -2377,12 +2366,11 @@ bool ARMFastISel::SelectCall(const Instruction *I,
ArgVT != MVT::i1)
return false;
- unsigned Arg = getRegForValue(*i);
- if (Arg == 0)
+ Register Arg = getRegForValue(*i);
+ if (!Arg.isValid())
return false;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- Flags.setOrigAlign(OriginalAlignment);
+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy)));
Args.push_back(*i);
ArgRegs.push_back(Arg);
@@ -2391,7 +2379,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
}
// Handle the arguments now that we've gotten them.
- SmallVector<unsigned, 4> RegArgs;
+ SmallVector<Register, 4> RegArgs;
unsigned NumBytes;
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
RegArgs, CC, NumBytes, isVarArg))
@@ -2401,7 +2389,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
if (!GV || Subtarget->genLongCalls()) UseReg = true;
- unsigned CalleeReg = 0;
+ Register CalleeReg;
if (UseReg) {
if (IntrMemName)
CalleeReg = getLibcallReg(IntrMemName);
@@ -2427,7 +2415,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
MIB.addExternalSymbol(IntrMemName, 0);
// Add implicit physical register uses to the call.
- for (unsigned R : RegArgs)
+ for (Register R : RegArgs)
MIB.addReg(R, RegState::Implicit);
// Add a register mask with the call-preserved registers.
@@ -2435,7 +2423,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Finish off the call including any return values.
- SmallVector<unsigned, 4> UsedRegs;
+ SmallVector<Register, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
return false;
@@ -2476,7 +2464,7 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
}
bool RV;
- unsigned ResultReg;
+ Register ResultReg;
RV = ARMEmitLoad(VT, ResultReg, Src);
assert(RV && "Should be able to handle this load.");
RV = ARMEmitStore(VT, ResultReg, Dest);
@@ -2506,7 +2494,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
const ARMBaseRegisterInfo *RegInfo =
static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
- unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = FramePtr;
// Recursively load frame address
@@ -2947,7 +2935,7 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
Address Addr;
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
- unsigned ResultReg = MI->getOperand(0).getReg();
+ Register ResultReg = MI->getOperand(0).getReg();
if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
return false;
MachineBasicBlock::iterator I(MI);
@@ -2974,7 +2962,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
MachineMemOperand::MOLoad, 4, 4);
- unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
+ Register TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index bedb779bcba0..01ae93086dcb 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -76,7 +76,7 @@ skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
unsigned NumAlignedDPRCS2Regs);
ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+ : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
STI(sti) {}
bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
@@ -376,7 +376,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc dl;
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ Register FramePtr = RegInfo->getFrameRegister(MF);
// Determine the sizes of each callee-save spill areas and record which frame
// belongs to which callee-save spill areas.
@@ -780,7 +780,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI.getStackSize();
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ Register FramePtr = RegInfo->getFrameRegister(MF);
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
@@ -1503,11 +1503,17 @@ static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
/// instructions will require a scratch register during their expansion later.
// FIXME: Move to TII?
static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
- const TargetFrameLowering *TFI) {
+ const TargetFrameLowering *TFI,
+ bool &HasNonSPFrameIndex) {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned Limit = (1 << 12) - 1;
for (auto &MBB : MF) {
for (auto &MI : MBB) {
+ if (MI.isDebugInstr())
+ continue;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (!MI.getOperand(i).isFI())
continue;
@@ -1518,13 +1524,29 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
Limit = std::min(Limit, (1U << 8) - 1);
break;
}
+ // t2ADDri will not require an extra register, it can reuse the
+ // destination.
+ if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
+ break;
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
+ if (RegClass && !RegClass->contains(ARM::SP))
+ HasNonSPFrameIndex = true;
// Otherwise check the addressing mode.
switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
+ case ARMII::AddrMode_i12:
+ case ARMII::AddrMode2:
+ // Default 12 bit limit.
+ break;
case ARMII::AddrMode3:
case ARMII::AddrModeT2_i8:
Limit = std::min(Limit, (1U << 8) - 1);
break;
+ case ARMII::AddrMode5FP16:
+ Limit = std::min(Limit, ((1U << 8) - 1) * 2);
+ break;
case ARMII::AddrMode5:
case ARMII::AddrModeT2_i8s4:
case ARMII::AddrModeT2_ldrex:
@@ -1541,8 +1563,17 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
// Addressing modes 4 & 6 (load/store) instructions can't encode an
// immediate offset for stack references.
return 0;
- default:
+ case ARMII::AddrModeT2_i7:
+ Limit = std::min(Limit, ((1U << 7) - 1) * 1);
+ break;
+ case ARMII::AddrModeT2_i7s2:
+ Limit = std::min(Limit, ((1U << 7) - 1) * 2);
break;
+ case ARMII::AddrModeT2_i7s4:
+ Limit = std::min(Limit, ((1U << 7) - 1) * 4);
+ break;
+ default:
+ llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
}
break; // At most one FI per instruction
}
@@ -1623,7 +1654,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
(void)TRI; // Silence unused warning in non-assert builds.
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ Register FramePtr = RegInfo->getFrameRegister(MF);
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
// scratch register. Also spill R4 if Thumb2 function has varsized objects,
@@ -1784,6 +1815,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
EstimatedStackSize += 16; // For possible paddings.
unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
+ bool HasNonSPFrameIndex = false;
if (AFI->isThumb1OnlyFunction()) {
// For Thumb1, don't bother to iterate over the function. The only
// instruction that requires an emergency spill slot is a store to a
@@ -1804,7 +1836,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
EstimatedRSStackSizeLimit = (1U << 8) * 4;
EstimatedRSFixedSizeLimit = (1U << 5) * 4;
} else {
- EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
+ EstimatedRSStackSizeLimit =
+ estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
}
// Final estimate of whether sp or bp-relative accesses might require
@@ -1830,12 +1863,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
- HasLargeArgumentList;
+ HasLargeArgumentList || HasNonSPFrameIndex;
LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
- << "; EstimatedStack" << EstimatedStackSize
- << "; EstimatedFPStack" << MaxFixedOffset - MaxFPOffset
- << "; BigFrameOffsets: " << BigFrameOffsets
- << "\n");
+ << "; EstimatedStack: " << EstimatedStackSize
+ << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
+ << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
if (BigFrameOffsets ||
!CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
@@ -2080,9 +2112,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
ExtraCSSpill = true;
}
}
- if (!ExtraCSSpill) {
+ if (!ExtraCSSpill && RS) {
// Reserve a slot closest to SP or frame pointer.
- assert(RS && "Register scavenging not provided");
LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
const TargetRegisterClass &RC = ARM::GPRRegClass;
unsigned Size = TRI->getSpillSize(RC);
@@ -2097,6 +2128,12 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
AFI->setLRIsSpilledForFarJump(true);
}
AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
+
+ // If we have the "returned" parameter attribute which guarantees that we
+ // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
+ // record that fact for IPRA.
+ if (AFI->getPreservesR0())
+ SavedRegs.set(ARM::R0);
}
MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 7544ca3c38d6..6d8aee597945 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -63,6 +63,11 @@ public:
bool enableShrinkWrapping(const MachineFunction &MF) const override {
return true;
}
+ bool isProfitableForNoCSROpt(const Function &F) const override {
+ // The no-CSR optimisation is bad for code size on ARM, because we can save
+ // many registers with a single PUSH/POP pair.
+ return false;
+ }
private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index b349627b67b1..8f6515c423eb 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -139,6 +139,8 @@ public:
bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
+ template <unsigned Shift>
+ bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
// Thumb 2 Addressing Modes:
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
@@ -146,9 +148,12 @@ public:
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
SDValue &OffImm);
- template<unsigned Shift>
- bool SelectT2AddrModeImm7(SDValue N, SDValue &Base,
- SDValue &OffImm);
+ template <unsigned Shift>
+ bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
+ bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
+ unsigned Shift);
+ template <unsigned Shift>
+ bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
@@ -179,6 +184,7 @@ private:
bool tryARMIndexedLoad(SDNode *N);
bool tryT1IndexedLoad(SDNode *N);
bool tryT2IndexedLoad(SDNode *N);
+ bool tryMVEIndexedLoad(SDNode *N);
/// SelectVLD - Select NEON load intrinsics. NumVecs should be
/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
@@ -246,10 +252,6 @@ private:
SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
bool is64BitVector);
- /// Returns the number of instructions required to materialize the given
- /// constant in a register, or 3 if a literal pool load is needed.
- unsigned ConstantMaterializationCost(unsigned Val) const;
-
/// Checks if N is a multiplication by a constant where we can extract out a
/// power of two from the constant so that it can be used in a shift, but only
/// if it simplifies the materialization of the constant. Returns true if it
@@ -450,27 +452,6 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
(ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
}
-unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
- if (Subtarget->isThumb()) {
- if (Val <= 255) return 1; // MOV
- if (Subtarget->hasV6T2Ops() &&
- (Val <= 0xffff || // MOV
- ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
- ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
- return 1;
- if (Val <= 510) return 2; // MOV + ADDi8
- if (~Val <= 255) return 2; // MOV + MVN
- if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
- } else {
- if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
- if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
- if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
- if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
- }
- if (Subtarget->useMovt()) return 2; // MOVW + MOVT
- return 3; // Literal pool load
-}
-
bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
unsigned MaxShift,
unsigned &PowerOfTwo,
@@ -500,8 +481,8 @@ bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
// Only optimise if the new cost is better
unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
- unsigned OldCost = ConstantMaterializationCost(MulConstVal);
- unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
+ unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
+ unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
return NewCost < OldCost;
}
@@ -1172,6 +1153,28 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
return false;
}
+template <unsigned Shift>
+bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
+ RHSC)) {
+ Base = N.getOperand(0);
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+ OffImm =
+ CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Thumb 2 Addressing Modes
@@ -1278,35 +1281,59 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
return false;
}
-template<unsigned Shift>
-bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
- SDValue &Base, SDValue &OffImm) {
- if (N.getOpcode() == ISD::SUB ||
- CurDAG->isBaseWithConstantOffset(N)) {
- if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- if (N.getOpcode() == ISD::SUB)
- RHSC = -RHSC;
-
- if (isShiftedInt<7, Shift>(RHSC)) {
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(
+template <unsigned Shift>
+bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
+ RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
- }
- OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
- return true;
}
+
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+ OffImm =
+ CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
+ return true;
}
}
// Base only.
Base = N;
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
return true;
}
+template <unsigned Shift>
+bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm) {
+ return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm,
+ unsigned Shift) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ int RHSC;
+ if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
+ OffImm =
+ ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
+ ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
+ : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
+ MVT::i32);
+ return true;
+ }
+ return false;
+}
+
bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
SDValue &Base,
SDValue &OffReg, SDValue &ShImm) {
@@ -1565,6 +1592,68 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
return false;
}
+bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return false;
+ EVT LoadedVT = LD->getMemoryVT();
+ if (!LoadedVT.isVector())
+ return false;
+ bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+ SDValue Offset;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ unsigned Align = LD->getAlignment();
+ bool IsLE = Subtarget->isLittle();
+
+ if (Align >= 2 && LoadedVT == MVT::v4i16 &&
+ SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
+ if (isSExtLd)
+ Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
+ else
+ Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
+ } else if (LoadedVT == MVT::v8i8 &&
+ SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
+ if (isSExtLd)
+ Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
+ else
+ Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
+ } else if (LoadedVT == MVT::v4i8 &&
+ SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
+ if (isSExtLd)
+ Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
+ else
+ Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
+ } else if (Align >= 4 &&
+ (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
+ SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
+ Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
+ else if (Align >= 2 &&
+ (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
+ SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
+ Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
+ else if ((IsLE || LoadedVT == MVT::v16i8) &&
+ SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
+ Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
+ else
+ return false;
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = {Base, Offset,
+ CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
+ CurDAG->getRegister(0, MVT::i32), Chain};
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
+ MVT::i32, MVT::Other, Ops);
+ transferMemOperands(N, New);
+ ReplaceUses(SDValue(N, 0), SDValue(New, 1));
+ ReplaceUses(SDValue(N, 1), SDValue(New, 0));
+ ReplaceUses(SDValue(N, 2), SDValue(New, 2));
+ CurDAG->RemoveDeadNode(N);
+ return true;
+}
+
/// Form a GPRPair pseudo register from a pair of GPR regs.
SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
@@ -2701,7 +2790,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
// If we can't materialize the constant we need to use a literal pool
- if (ConstantMaterializationCost(Val) > 2) {
+ if (ConstantMaterializationCost(Val, Subtarget) > 2) {
SDValue CPIdx = CurDAG->getTargetConstantPool(
ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
TLI->getPointerTy(CurDAG->getDataLayout()));
@@ -2842,8 +2931,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
bool PreferImmediateEncoding =
Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
if (!PreferImmediateEncoding &&
- ConstantMaterializationCost(Imm) >
- ConstantMaterializationCost(~Imm)) {
+ ConstantMaterializationCost(Imm, Subtarget) >
+ ConstantMaterializationCost(~Imm, Subtarget)) {
// The current immediate costs more to materialize than a negated
// immediate, so negate the immediate and use a BIC.
SDValue NewImm =
@@ -2987,6 +3076,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
case ISD::LOAD: {
+ if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
+ return;
if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
if (tryT2IndexedLoad(N))
return;
@@ -2998,13 +3089,26 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
- case ARMISD::WLS: {
- SDValue Ops[] = { N->getOperand(1), // Loop count
- N->getOperand(2), // Exit target
+ case ARMISD::WLS:
+ case ARMISD::LE: {
+ SDValue Ops[] = { N->getOperand(1),
+ N->getOperand(2),
+ N->getOperand(0) };
+ unsigned Opc = N->getOpcode() == ARMISD::WLS ?
+ ARM::t2WhileLoopStart : ARM::t2LoopEnd;
+ SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
+ ReplaceUses(N, New);
+ CurDAG->RemoveDeadNode(N);
+ return;
+ }
+ case ARMISD::LOOP_DEC: {
+ SDValue Ops[] = { N->getOperand(1),
+ N->getOperand(2),
N->getOperand(0) };
- SDNode *LoopStart =
- CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops);
- ReplaceUses(N, LoopStart);
+ SDNode *Dec =
+ CurDAG->getMachineNode(ARM::t2LoopDec, dl,
+ CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
+ ReplaceUses(N, Dec);
CurDAG->RemoveDeadNode(N);
return;
}
@@ -4365,7 +4469,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
// the original GPRs.
- unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
SDValue Chain = SDValue(N,0);
@@ -4401,7 +4505,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
// Copy REG_SEQ into a GPRPair-typed VR and replace the original two
// i32 VRs of inline asm with it.
- unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 18bb9bf3eccc..db26feb57010 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -245,7 +245,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
for (auto VT : IntTypes) {
- addRegisterClass(VT, &ARM::QPRRegClass);
+ addRegisterClass(VT, &ARM::MQPRRegClass);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -258,12 +258,31 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::MLOAD, VT, Custom);
+ setOperationAction(ISD::MSTORE, VT, Legal);
+ setOperationAction(ISD::CTLZ, VT, Legal);
+ setOperationAction(ISD::CTTZ, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Legal);
+ setOperationAction(ISD::BSWAP, VT, Legal);
+ setOperationAction(ISD::SADDSAT, VT, Legal);
+ setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::SSUBSAT, VT, Legal);
+ setOperationAction(ISD::USUBSAT, VT, Legal);
// No native support for these.
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+
+ // Vector reductions
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
if (!HasMVEFP) {
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
@@ -271,11 +290,18 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
}
+
+ // Pre and Post inc are supported on loads and stores
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, VT, Legal);
+ setIndexedStoreAction(im, VT, Legal);
+ }
}
const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
for (auto VT : FloatTypes) {
- addRegisterClass(VT, &ARM::QPRRegClass);
+ addRegisterClass(VT, &ARM::MQPRRegClass);
if (!HasMVEFP)
setAllExpand(VT);
@@ -287,6 +313,16 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::MLOAD, VT, Custom);
+ setOperationAction(ISD::MSTORE, VT, Legal);
+
+ // Pre and Post inc are supported on loads and stores
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, VT, Legal);
+ setIndexedStoreAction(im, VT, Legal);
+ }
if (HasMVEFP) {
setOperationAction(ISD::FMINNUM, VT, Legal);
@@ -314,7 +350,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
// vector types is inhibited at integer-only level.
const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
for (auto VT : LongTypes) {
- addRegisterClass(VT, &ARM::QPRRegClass);
+ addRegisterClass(VT, &ARM::MQPRRegClass);
setAllExpand(VT);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -334,6 +370,33 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
+
+ // Pre and Post inc on these are legal, given the correct extends
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::v8i8, Legal);
+ setIndexedStoreAction(im, MVT::v8i8, Legal);
+ setIndexedLoadAction(im, MVT::v4i8, Legal);
+ setIndexedStoreAction(im, MVT::v4i8, Legal);
+ setIndexedLoadAction(im, MVT::v4i16, Legal);
+ setIndexedStoreAction(im, MVT::v4i16, Legal);
+ }
+
+ // Predicate types
+ const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
+ for (auto VT : pTypes) {
+ addRegisterClass(VT, &ARM::VCCRRegClass);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ }
}
ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
@@ -645,8 +708,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
}
- for (MVT VT : MVT::vector_valuetypes()) {
- for (MVT InnerVT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
addAllExtLoads(VT, InnerVT, Expand);
}
@@ -669,8 +732,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
addMVEVectorTypes(Subtarget->hasMVEFloatOps());
// Combine low-overhead loop intrinsics so that we can lower i1 types.
- if (Subtarget->hasLOB())
+ if (Subtarget->hasLOB()) {
setTargetDAGCombine(ISD::BRCOND);
+ setTargetDAGCombine(ISD::BR_CC);
+ }
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
@@ -837,10 +902,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
@@ -849,7 +910,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// It is legal to extload from v4i8 to v4i16 or v4i32.
for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
MVT::v2i32}) {
- for (MVT VT : MVT::integer_vector_valuetypes()) {
+ for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
@@ -861,6 +922,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
}
if (!Subtarget->hasFP64()) {
@@ -901,9 +966,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
}
- if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()){
+ if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ if (Subtarget->hasFullFP16())
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
}
if (!Subtarget->hasFP16())
@@ -955,6 +1021,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
+ if (Subtarget->hasDSP()) {
+ setOperationAction(ISD::SADDSAT, MVT::i8, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
+ setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
+ }
+ if (Subtarget->hasBaseDSP()) {
+ setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);
+ }
// i64 operation support.
setOperationAction(ISD::MUL, MVT::i64, Expand);
@@ -972,6 +1048,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
// MVE lowers 64 bit shifts to lsll and lsrl
@@ -991,7 +1068,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// ARM does not have ROTL.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
- for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
@@ -1365,14 +1442,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
- setMinStackArgumentAlignment(4);
+ setMinStackArgumentAlignment(Align(4));
// Prefer likely predicted branches to selects on out-of-order cores.
PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
- setPrefLoopAlignment(Subtarget->getPrefLoopAlignment());
+ setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
- setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
+ setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
if (Subtarget->isThumb() || Subtarget->isThumb2())
setTargetDAGCombine(ISD::ABS);
@@ -1472,6 +1549,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::ADDE: return "ARMISD::ADDE";
case ARMISD::SUBC: return "ARMISD::SUBC";
case ARMISD::SUBE: return "ARMISD::SUBE";
+ case ARMISD::LSLS: return "ARMISD::LSLS";
case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
@@ -1496,16 +1574,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
- case ARMISD::VCEQ: return "ARMISD::VCEQ";
- case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
- case ARMISD::VCGE: return "ARMISD::VCGE";
- case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
- case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
- case ARMISD::VCGEU: return "ARMISD::VCGEU";
- case ARMISD::VCGT: return "ARMISD::VCGT";
- case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
- case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
- case ARMISD::VCGTU: return "ARMISD::VCGTU";
+ case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
+ case ARMISD::VCMP: return "ARMISD::VCMP";
+ case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
case ARMISD::VTST: return "ARMISD::VTST";
case ARMISD::VSHLs: return "ARMISD::VSHLs";
@@ -1543,6 +1614,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VTRN: return "ARMISD::VTRN";
case ARMISD::VTBL1: return "ARMISD::VTBL1";
case ARMISD::VTBL2: return "ARMISD::VTBL2";
+ case ARMISD::VMOVN: return "ARMISD::VMOVN";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::UMAAL: return "ARMISD::UMAAL";
@@ -1560,6 +1632,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
+ case ARMISD::QADD16b: return "ARMISD::QADD16b";
+ case ARMISD::QSUB16b: return "ARMISD::QSUB16b";
+ case ARMISD::QADD8b: return "ARMISD::QADD8b";
+ case ARMISD::QSUB8b: return "ARMISD::QSUB8b";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
@@ -1589,6 +1665,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
case ARMISD::WLS: return "ARMISD::WLS";
+ case ARMISD::LE: return "ARMISD::LE";
+ case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC";
+ case ARMISD::CSINV: return "ARMISD::CSINV";
+ case ARMISD::CSNEG: return "ARMISD::CSNEG";
+ case ARMISD::CSINC: return "ARMISD::CSINC";
}
return nullptr;
}
@@ -1597,6 +1678,11 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
+
+ // MVE has a predicate register.
+ if (Subtarget->hasMVEIntegerOps() &&
+ (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8))
+ return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
@@ -1726,34 +1812,22 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
- ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
+ ARMCC::CondCodes &CondCode2) {
CondCode2 = ARMCC::AL;
- InvalidOnQNaN = true;
switch (CC) {
default: llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
- case ISD::SETOEQ:
- CondCode = ARMCC::EQ;
- InvalidOnQNaN = false;
- break;
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
case ISD::SETGT:
case ISD::SETOGT: CondCode = ARMCC::GT; break;
case ISD::SETGE:
case ISD::SETOGE: CondCode = ARMCC::GE; break;
case ISD::SETOLT: CondCode = ARMCC::MI; break;
case ISD::SETOLE: CondCode = ARMCC::LS; break;
- case ISD::SETONE:
- CondCode = ARMCC::MI;
- CondCode2 = ARMCC::GT;
- InvalidOnQNaN = false;
- break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
case ISD::SETO: CondCode = ARMCC::VC; break;
case ISD::SETUO: CondCode = ARMCC::VS; break;
- case ISD::SETUEQ:
- CondCode = ARMCC::EQ;
- CondCode2 = ARMCC::VS;
- InvalidOnQNaN = false;
- break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
case ISD::SETUGT: CondCode = ARMCC::HI; break;
case ISD::SETUGE: CondCode = ARMCC::PL; break;
case ISD::SETLT:
@@ -1761,10 +1835,7 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
case ISD::SETLE:
case ISD::SETULE: CondCode = ARMCC::LE; break;
case ISD::SETNE:
- case ISD::SETUNE:
- CondCode = ARMCC::NE;
- InvalidOnQNaN = false;
- break;
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
}
}
@@ -1988,6 +2059,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
+ MachineFunction::CallSiteInfo CSInfo;
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
@@ -2112,6 +2184,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
"unexpected use of 'returned'");
isThisReturn = true;
}
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.EnableDebugEntryValues)
+ CSInfo.emplace_back(VA.getLocReg(), i);
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else if (isByVal) {
assert(VA.isMemLoc());
@@ -2347,12 +2422,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (isTailCall) {
MF.getFrameInfo().setHasTailCall();
- return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
+ SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
+ DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
+ return Ret;
}
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
+ DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
@@ -2431,7 +2509,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
int FI = std::numeric_limits<int>::max();
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(VR))
+ if (!Register::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
@@ -3047,12 +3125,12 @@ ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
// Load the current TEB (thread environment block)
SDValue Ops[] = {Chain,
- DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
- DAG.getConstant(15, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(13, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(2, DL, MVT::i32)};
+ DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+ DAG.getTargetConstant(15, DL, MVT::i32),
+ DAG.getTargetConstant(0, DL, MVT::i32),
+ DAG.getTargetConstant(13, DL, MVT::i32),
+ DAG.getTargetConstant(0, DL, MVT::i32),
+ DAG.getTargetConstant(2, DL, MVT::i32)};
SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
DAG.getVTList(MVT::i32, MVT::Other), Ops);
@@ -3498,6 +3576,48 @@ SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
Op.getOperand(0));
}
+SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
+ SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
+ unsigned IntNo =
+ cast<ConstantSDNode>(
+ Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other))
+ ->getZExtValue();
+ switch (IntNo) {
+ default:
+ return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_gnu_eabi_mcount: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc dl(Op);
+ SDValue Chain = Op.getOperand(0);
+ // call "\01__gnu_mcount_nc"
+ const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
+ const uint32_t *Mask =
+ ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ // Mark LR an implicit live-in.
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+ SDValue ReturnAddress =
+ DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
+ std::vector<EVT> ResultTys = {MVT::Other, MVT::Glue};
+ SDValue Callee =
+ DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
+ SDValue RegisterMask = DAG.getRegisterMask(Mask);
+ if (Subtarget->isThumb())
+ return SDValue(
+ DAG.getMachineNode(
+ ARM::tBL_PUSHLR, dl, ResultTys,
+ {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
+ DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
+ 0);
+ return SDValue(
+ DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
+ {ReturnAddress, Callee, RegisterMask, Chain}),
+ 0);
+ }
+ }
+}
+
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
@@ -3898,6 +4018,12 @@ SDValue ARMTargetLowering::LowerFormalArguments(
// Transform the arguments in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this value is passed in r0 and has the returned attribute (e.g.
+ // C++ 'structors), record this fact for later use.
+ if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
+ AFI->setPreservesR0();
+ }
}
// If this is an 8 or 16-bit value, it is really passed promoted
@@ -4049,6 +4175,67 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
std::swap(LHS, RHS);
}
+ // Thumb1 has very limited immediate modes, so turning an "and" into a
+ // shift can save multiple instructions.
+ //
+ // If we have (x & C1), and C1 is an appropriate mask, we can transform it
+ // into "((x << n) >> n)". But that isn't necessarily profitable on its
+ // own. If it's the operand to an unsigned comparison with an immediate,
+ // we can eliminate one of the shifts: we transform
+ // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
+ //
+ // We avoid transforming cases which aren't profitable due to encoding
+ // details:
+ //
+ // 1. C2 fits into the immediate field of a cmp, and the transformed version
+ // would not; in that case, we're essentially trading one immediate load for
+ // another.
+ // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
+ // 3. C2 is zero; we have other code for this special case.
+ //
+ // FIXME: Figure out profitability for Thumb2; we usually can't save an
+ // instruction, since the AND is always one instruction anyway, but we could
+ // use narrow instructions in some cases.
+ if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
+ LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
+ !isSignedIntSetCC(CC)) {
+ unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
+ auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
+ uint64_t RHSV = RHSC->getZExtValue();
+ if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
+ unsigned ShiftBits = countLeadingZeros(Mask);
+ if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
+ SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
+ LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
+ RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
+ }
+ }
+ }
+
+ // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
+ // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
+ // way a cmp would.
+ // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
+ // some tweaks to the heuristics for the previous and->shift transform.
+ // FIXME: Optimize cases where the LHS isn't a shift.
+ if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(RHS) &&
+ cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U &&
+ CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) {
+ unsigned ShiftAmt =
+ cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1;
+ SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
+ DAG.getVTList(MVT::i32, MVT::i32),
+ LHS.getOperand(0),
+ DAG.getConstant(ShiftAmt, dl, MVT::i32));
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+ Shift.getValue(1), SDValue());
+ ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
+ return Chain.getValue(1);
+ }
+
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
// If the RHS is a constant zero then the V (overflow) flag will never be
@@ -4083,15 +4270,13 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const SDLoc &dl,
- bool InvalidOnQNaN) const {
+ SelectionDAG &DAG, const SDLoc &dl) const {
assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
- SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
if (!isFloatingPointZero(RHS))
- Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
else
- Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
@@ -4108,12 +4293,10 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
Cmp = Cmp.getOperand(0);
Opc = Cmp.getOpcode();
if (Opc == ARMISD::CMPFP)
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
- Cmp.getOperand(1), Cmp.getOperand(2));
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
else {
assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
- Cmp.getOperand(1));
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
}
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
@@ -4276,6 +4459,35 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
+static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ EVT VT = Op.getValueType();
+ if (!Subtarget->hasDSP())
+ return SDValue();
+ if (!VT.isSimple())
+ return SDValue();
+
+ unsigned NewOpcode;
+ bool IsAdd = Op->getOpcode() == ISD::SADDSAT;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::i8:
+ NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b;
+ break;
+ case MVT::i16:
+ NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b;
+ break;
+ }
+
+ SDLoc dl(Op);
+ SDValue Add =
+ DAG.getNode(NewOpcode, dl, MVT::i32,
+ DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
+ DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
+}
+
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
@@ -4656,10 +4868,62 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);
+ ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
+ ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
+
+ if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
+ LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
+ unsigned TVal = CTVal->getZExtValue();
+ unsigned FVal = CFVal->getZExtValue();
+ unsigned Opcode = 0;
+
+ if (TVal == ~FVal) {
+ Opcode = ARMISD::CSINV;
+ } else if (TVal == ~FVal + 1) {
+ Opcode = ARMISD::CSNEG;
+ } else if (TVal + 1 == FVal) {
+ Opcode = ARMISD::CSINC;
+ } else if (TVal == FVal + 1) {
+ Opcode = ARMISD::CSINC;
+ std::swap(TrueVal, FalseVal);
+ std::swap(TVal, FVal);
+ CC = ISD::getSetCCInverse(CC, true);
+ }
+
+ if (Opcode) {
+ // If one of the constants is cheaper than another, materialise the
+ // cheaper one and let the csel generate the other.
+ if (Opcode != ARMISD::CSINC &&
+ HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
+ std::swap(TrueVal, FalseVal);
+ std::swap(TVal, FVal);
+ CC = ISD::getSetCCInverse(CC, true);
+ }
+
+ // Attempt to use ZR checking TVal is 0, possibly inverting the condition
+ // to get there. CSINC not is invertable like the other two (~(~a) == a,
+ // -(-a) == a, but (a+1)+1 != a).
+ if (FVal == 0 && Opcode != ARMISD::CSINC) {
+ std::swap(TrueVal, FalseVal);
+ std::swap(TVal, FVal);
+ CC = ISD::getSetCCInverse(CC, true);
+ }
+ if (TVal == 0)
+ TrueVal = DAG.getRegister(ARM::ZR, MVT::i32);
+
+ // Drops F's value because we can get it by inverting/negating TVal.
+ FalseVal = TrueVal;
+
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ EVT VT = TrueVal.getValueType();
+ return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
+ }
+ }
if (isUnsupportedFloatingType(LHS.getValueType())) {
DAG.getTargetLoweringInfo().softenSetCCOperands(
- DAG, LHS.getValueType(), LHS, RHS, CC, dl);
+ DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands only returned one value, we should compare it to
// zero.
@@ -4701,8 +4965,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
ARMCC::CondCodes CondCode, CondCode2;
- bool InvalidOnQNaN;
- FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
+ FPCCToARMCC(CC, CondCode, CondCode2);
// Normalize the fp compare. If RHS is zero we prefer to keep it there so we
// match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
@@ -4727,13 +4990,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
@@ -4903,7 +5166,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
if (isUnsupportedFloatingType(LHS.getValueType())) {
DAG.getTargetLoweringInfo().softenSetCCOperands(
- DAG, LHS.getValueType(), LHS, RHS, CC, dl);
+ DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands only returned one value, we should compare it to
// zero.
@@ -4960,11 +5223,10 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
ARMCC::CondCodes CondCode, CondCode2;
- bool InvalidOnQNaN;
- FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
+ FPCCToARMCC(CC, CondCode, CondCode2);
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
@@ -5056,8 +5318,9 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
Op.getValueType());
+ MakeLibCallOptions CallOptions;
return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
- /*isSigned*/ false, SDLoc(Op)).first;
+ CallOptions, SDLoc(Op)).first;
}
return Op;
@@ -5120,8 +5383,9 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
Op.getValueType());
+ MakeLibCallOptions CallOptions;
return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
- /*isSigned*/ false, SDLoc(Op)).first;
+ CallOptions, SDLoc(Op)).first;
}
return Op;
@@ -5140,7 +5404,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
if (UseNEON) {
// Use VBSL to copy the sign bit.
- unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+ unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
@@ -5163,7 +5427,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
- SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+ SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff),
dl, MVT::i32);
AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
@@ -5243,7 +5507,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned FrameReg = ARI.getFrameRegister(MF);
+ Register FrameReg = ARI.getFrameRegister(MF);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
@@ -5253,9 +5517,9 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
- unsigned Reg = StringSwitch<unsigned>(RegName)
+Register ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const {
+ Register Reg = StringSwitch<unsigned>(RegName)
.Case("sp", ARM::SP)
.Default(0);
if (Reg)
@@ -5576,8 +5840,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
- if (VT.isVector()) {
- assert(ST->hasNEON());
+ if (VT.isVector() && ST->hasNEON()) {
// Compute the least significant set bit: LSB = X & -X
SDValue X = N->getOperand(0);
@@ -5777,14 +6040,15 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
unsigned ShPartsOpc = ARMISD::LSLL;
ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
- // If the shift amount is greater than 32 then do the default optimisation
- if (Con && Con->getZExtValue() > 32)
+ // If the shift amount is greater than 32 or has a greater bitwidth than 64
+ // then do the default optimisation
+ if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
+ (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32)))
return SDValue();
- // Extract the lower 32 bits of the shift amount if it's an i64
- if (ShAmt->getValueType(0) == MVT::i64)
- ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt,
- DAG.getConstant(0, dl, MVT::i32));
+ // Extract the lower 32 bits of the shift amount if it's not an i32
+ if (ShAmt->getValueType(0) != MVT::i32)
+ ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
if (ShOpc == ISD::SRL) {
if (!Con)
@@ -5839,20 +6103,37 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
}
-static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
- SDValue TmpOp0, TmpOp1;
+static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
bool Invert = false;
bool Swap = false;
- unsigned Opc = 0;
+ unsigned Opc = ARMCC::AL;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
- EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
EVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
+ EVT CmpVT;
+ if (ST->hasNEON())
+ CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
+ else {
+ assert(ST->hasMVEIntegerOps() &&
+ "No hardware support for integer vector comparison!");
+
+ if (Op.getValueType().getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ // Make sure we expand floating point setcc to scalar if we do not have
+ // mve.fp, so that we can handle them from there.
+ if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
+ return SDValue();
+
+ CmpVT = VT;
+ }
+
if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
(SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
// Special-case integer 64-bit equality comparisons. They aren't legal,
@@ -5880,60 +6161,74 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
- case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
+ case ISD::SETNE:
+ if (ST->hasMVEFloatOps()) {
+ Opc = ARMCC::NE; break;
+ } else {
+ Invert = true; LLVM_FALLTHROUGH;
+ }
case ISD::SETOEQ:
- case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETEQ: Opc = ARMCC::EQ; break;
case ISD::SETOLT:
case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETOGT:
- case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETGT: Opc = ARMCC::GT; break;
case ISD::SETOLE:
case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETOGE:
- case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETGE: Opc = ARMCC::GE; break;
case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
+ case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
+ case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
- case ISD::SETONE:
+ case ISD::SETONE: {
// Expand this to (OLT | OGT).
- TmpOp0 = Op0;
- TmpOp1 = Op1;
- Opc = ISD::OR;
- Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
- Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
- break;
- case ISD::SETUO:
- Invert = true;
- LLVM_FALLTHROUGH;
- case ISD::SETO:
+ SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
+ DAG.getConstant(ARMCC::GT, dl, MVT::i32));
+ SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
+ DAG.getConstant(ARMCC::GT, dl, MVT::i32));
+ SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+ return Result;
+ }
+ case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH;
+ case ISD::SETO: {
// Expand this to (OLT | OGE).
- TmpOp0 = Op0;
- TmpOp1 = Op1;
- Opc = ISD::OR;
- Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
- Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
- break;
+ SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
+ DAG.getConstant(ARMCC::GT, dl, MVT::i32));
+ SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
+ DAG.getConstant(ARMCC::GE, dl, MVT::i32));
+ SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+ return Result;
+ }
}
} else {
// Integer comparisons.
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal integer comparison");
- case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
- case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETNE:
+ if (ST->hasMVEIntegerOps()) {
+ Opc = ARMCC::NE; break;
+ } else {
+ Invert = true; LLVM_FALLTHROUGH;
+ }
+ case ISD::SETEQ: Opc = ARMCC::EQ; break;
case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETGT: Opc = ARMCC::GT; break;
case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETGE: Opc = ARMCC::GE; break;
case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
+ case ISD::SETUGT: Opc = ARMCC::HI; break;
case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
+ case ISD::SETUGE: Opc = ARMCC::HS; break;
}
// Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
- if (Opc == ARMISD::VCEQ) {
+ if (ST->hasNEON() && Opc == ARMCC::EQ) {
SDValue AndOp;
if (ISD::isBuildVectorAllZeros(Op1.getNode()))
AndOp = Op0;
@@ -5945,10 +6240,12 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
AndOp = AndOp.getOperand(0);
if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
- Opc = ARMISD::VTST;
Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
- Invert = !Invert;
+ SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
+ if (!Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+ return Result;
}
}
}
@@ -5962,31 +6259,20 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
if (ISD::isBuildVectorAllZeros(Op1.getNode()))
SingleOp = Op0;
else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
- if (Opc == ARMISD::VCGE)
- Opc = ARMISD::VCLEZ;
- else if (Opc == ARMISD::VCGT)
- Opc = ARMISD::VCLTZ;
+ if (Opc == ARMCC::GE)
+ Opc = ARMCC::LE;
+ else if (Opc == ARMCC::GT)
+ Opc = ARMCC::LT;
SingleOp = Op1;
}
SDValue Result;
if (SingleOp.getNode()) {
- switch (Opc) {
- case ARMISD::VCEQ:
- Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
- case ARMISD::VCGE:
- Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
- case ARMISD::VCLEZ:
- Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
- case ARMISD::VCGT:
- Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
- case ARMISD::VCLTZ:
- Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
- default:
- Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
- }
+ Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp,
+ DAG.getConstant(Opc, dl, MVT::i32));
} else {
- Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
+ Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
+ DAG.getConstant(Opc, dl, MVT::i32));
}
Result = DAG.getSExtOrTrunc(Result, dl, VT);
@@ -6027,13 +6313,13 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
CCR, Chain.getValue(1));
}
-/// isNEONModifiedImm - Check if the specified splat value corresponds to a
-/// valid vector constant for a NEON or MVE instruction with a "modified immediate"
-/// operand (e.g., VMOV). If so, return the encoded value.
-static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON or MVE instruction with a "modified
+/// immediate" operand (e.g., VMOV). If so, return the encoded value.
+static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
const SDLoc &dl, EVT &VT, bool is128Bits,
- NEONModImmType type) {
+ VMOVModImmType type) {
unsigned OpCmode, Imm;
// SplatBitSize is set to the smallest size that splats the vector, so a
@@ -6163,10 +6449,10 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
}
default:
- llvm_unreachable("unexpected size for isNEONModifiedImm");
+ llvm_unreachable("unexpected size for isVMOVModifiedImm");
}
- unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
}
@@ -6246,7 +6532,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
return SDValue();
// Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
- SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
+ SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
VMovVT, false, VMOVModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
@@ -6263,7 +6549,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
}
// Finally, try a VMVN.i32
- NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
+ NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
false, VMVNModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
@@ -6649,6 +6935,29 @@ static bool isReverseMask(ArrayRef<int> M, EVT VT) {
return true;
}
+static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top) {
+ unsigned NumElts = VT.getVectorNumElements();
+ // Make sure the mask has the right size.
+ if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
+ return false;
+
+ // If Top
+ // Look for <0, N, 2, N+2, 4, N+4, ..>.
+ // This inserts Input2 into Input1
+ // else if not Top
+ // Look for <0, N+1, 2, N+3, 4, N+5, ..>
+ // This inserts Input1 into Input2
+ unsigned Offset = Top ? 0 : 1;
+ for (unsigned i = 0; i < NumElts; i+=2) {
+ if (M[i] >= 0 && M[i] != (int)i)
+ return false;
+ if (M[i+1] >= 0 && M[i+1] != (int)(NumElts + i + Offset))
+ return false;
+ }
+
+ return true;
+}
+
// If N is an integer constant that can be moved into a register in one
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
@@ -6669,6 +6978,66 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned BoolMask;
+ unsigned BitsPerBool;
+ if (NumElts == 4) {
+ BitsPerBool = 4;
+ BoolMask = 0xf;
+ } else if (NumElts == 8) {
+ BitsPerBool = 2;
+ BoolMask = 0x3;
+ } else if (NumElts == 16) {
+ BitsPerBool = 1;
+ BoolMask = 0x1;
+ } else
+ return SDValue();
+
+ // If this is a single value copied into all lanes (a splat), we can just sign
+ // extend that single value
+ SDValue FirstOp = Op.getOperand(0);
+ if (!isa<ConstantSDNode>(FirstOp) &&
+ std::all_of(std::next(Op->op_begin()), Op->op_end(),
+ [&FirstOp](SDUse &U) {
+ return U.get().isUndef() || U.get() == FirstOp;
+ })) {
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
+ DAG.getValueType(MVT::i1));
+ return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
+ }
+
+ // First create base with bits set where known
+ unsigned Bits32 = 0;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (!isa<ConstantSDNode>(V) && !V.isUndef())
+ continue;
+ bool BitSet = V.isUndef() ? false : cast<ConstantSDNode>(V)->getZExtValue();
+ if (BitSet)
+ Bits32 |= BoolMask << (i * BitsPerBool);
+ }
+
+ // Add in unknown nodes
+ SDValue Base = DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
+ DAG.getConstant(Bits32, dl, MVT::i32));
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (isa<ConstantSDNode>(V) || V.isUndef())
+ continue;
+ Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
+ DAG.getConstant(i, dl, MVT::i32));
+ }
+
+ return Base;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
@@ -6677,6 +7046,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
EVT VT = Op.getValueType();
+ if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
+ return LowerBUILD_VECTOR_i1(Op, DAG, ST);
+
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
@@ -6688,7 +7060,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
(ST->hasMVEIntegerOps() && SplatBitSize <= 32)) {
// Check if an immediate VMOV works.
EVT VmovVT;
- SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VmovVT, VT.is128BitVector(),
VMOVModImm);
@@ -6700,7 +7072,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Try an immediate VMVN.
uint64_t NegatedImm = (~SplatBits).getZExtValue();
- Val = isNEONModifiedImm(
+ Val = isVMOVModifiedImm(
NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VmovVT, VT.is128BitVector(),
ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
@@ -7088,9 +7460,6 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
LaneMask[j] = ExtractBase + j;
}
- // Final check before we try to produce nonsense...
- if (!isShuffleMaskLegal(Mask, ShuffleVT))
- return SDValue();
// We can't handle more than two sources. This should have already
// been checked before this point.
@@ -7100,8 +7469,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
for (unsigned i = 0; i < Sources.size(); ++i)
ShuffleOps[i] = Sources[i].ShuffleVec;
- SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
- ShuffleOps[1], Mask);
+ SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
+ ShuffleOps[1], Mask, DAG);
+ if (!Shuffle)
+ return SDValue();
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
@@ -7168,6 +7539,7 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
unsigned EltSize = VT.getScalarSizeInBits();
if (EltSize >= 32 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ ShuffleVectorInst::isIdentityMask(M) ||
isVREVMask(M, VT, 64) ||
isVREVMask(M, VT, 32) ||
isVREVMask(M, VT, 16))
@@ -7180,6 +7552,9 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
else if (Subtarget->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) &&
isReverseMask(M, VT))
return true;
+ else if (Subtarget->hasMVEIntegerOps() &&
+ (isVMOVNMask(M, VT, 0) || isVMOVNMask(M, VT, 1)))
+ return true;
else
return false;
}
@@ -7282,6 +7657,94 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
DAG.getConstant(ExtractNum, DL, MVT::i32));
}
+static EVT getVectorTyFromPredicateVector(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i1:
+ return MVT::v4i32;
+ case MVT::v8i1:
+ return MVT::v8i16;
+ case MVT::v16i1:
+ return MVT::v16i8;
+ default:
+ llvm_unreachable("Unexpected vector predicate type");
+ }
+}
+
+static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT,
+ SelectionDAG &DAG) {
+ // Converting from boolean predicates to integers involves creating a vector
+ // of all ones or all zeroes and selecting the lanes based upon the real
+ // predicate.
+ SDValue AllOnes =
+ DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
+ AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
+
+ SDValue AllZeroes =
+ DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
+ AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
+
+ // Get full vector type from predicate type
+ EVT NewVT = getVectorTyFromPredicateVector(VT);
+
+ SDValue RecastV1;
+ // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
+ // this to a v16i1. This cannot be done with an ordinary bitcast because the
+ // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
+ // since we know in hardware the sizes are really the same.
+ if (VT != MVT::v16i1)
+ RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
+ else
+ RecastV1 = Pred;
+
+ // Select either all ones or zeroes depending upon the real predicate bits.
+ SDValue PredAsVector =
+ DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
+
+ // Recast our new predicate-as-integer v16i8 vector into something
+ // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
+ return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
+}
+
+static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = Op.getValueType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+ ArrayRef<int> ShuffleMask = SVN->getMask();
+
+ assert(ST->hasMVEIntegerOps() &&
+ "No support for vector shuffle of boolean predicates");
+
+ SDValue V1 = Op.getOperand(0);
+ SDLoc dl(Op);
+ if (isReverseMask(ShuffleMask, VT)) {
+ SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
+ SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
+ SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
+ DAG.getConstant(16, dl, MVT::i32));
+ return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
+ }
+
+ // Until we can come up with optimised cases for every single vector
+ // shuffle in existence we have chosen the least painful strategy. This is
+ // to essentially promote the boolean predicate to a 8-bit integer, where
+ // each predicate represents a byte. Then we fall back on a normal integer
+ // vector shuffle and convert the result back into a predicate vector. In
+ // many cases the generated code might be even better than scalar code
+ // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
+ // fields in a register into 8 other arbitrary 2-bit fields!
+ SDValue PredAsVector = PromoteMVEPredVector(dl, V1, VT, DAG);
+ EVT NewVT = PredAsVector.getValueType();
+
+ // Do the shuffle!
+ SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector,
+ DAG.getUNDEF(NewVT), ShuffleMask);
+
+ // Now return the result of comparing the shuffled vector with zero,
+ // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
+ return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+}
+
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) {
SDValue V1 = Op.getOperand(0);
@@ -7289,6 +7752,10 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+ unsigned EltSize = VT.getScalarSizeInBits();
+
+ if (ST->hasMVEIntegerOps() && EltSize == 1)
+ return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
// Convert shuffles that are directly supported on NEON to target-specific
// DAG nodes, instead of keeping them as shuffles and matching them again
@@ -7298,7 +7765,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// of the same time so that they get CSEd properly.
ArrayRef<int> ShuffleMask = SVN->getMask();
- unsigned EltSize = VT.getScalarSizeInBits();
if (EltSize <= 32) {
if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
@@ -7364,6 +7830,14 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
.getValue(WhichResult);
}
}
+ if (ST->hasMVEIntegerOps()) {
+ if (isVMOVNMask(ShuffleMask, VT, 0))
+ return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
+ DAG.getConstant(0, dl, MVT::i32));
+ if (isVMOVNMask(ShuffleMask, VT, 1))
+ return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
+ DAG.getConstant(1, dl, MVT::i32));
+ }
// Also check for these shuffles through CONCAT_VECTORS: we canonicalize
// shuffles that produce a result larger than their operands with:
@@ -7468,8 +7942,29 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
-SDValue ARMTargetLowering::
-LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VecVT = Op.getOperand(0).getValueType();
+ SDLoc dl(Op);
+
+ assert(ST->hasMVEIntegerOps() &&
+ "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
+
+ SDValue Conv =
+ DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
+ unsigned Lane = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned LaneWidth =
+ getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;
+ unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
+ Op.getOperand(1), DAG.getValueType(MVT::i1));
+ SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
+ DAG.getConstant(~Mask, dl, MVT::i32));
+ return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
+}
+
+SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
// INSERT_VECTOR_ELT is legal only for immediate indexes.
SDValue Lane = Op.getOperand(2);
if (!isa<ConstantSDNode>(Lane))
@@ -7477,6 +7972,11 @@ LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
SDValue Elt = Op.getOperand(1);
EVT EltVT = Elt.getValueType();
+
+ if (Subtarget->hasMVEIntegerOps() &&
+ Op.getValueType().getScalarSizeInBits() == 1)
+ return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
+
if (getTypeAction(*DAG.getContext(), EltVT) ==
TargetLowering::TypePromoteFloat) {
// INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
@@ -7505,13 +8005,37 @@ LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
-static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VecVT = Op.getOperand(0).getValueType();
+ SDLoc dl(Op);
+
+ assert(ST->hasMVEIntegerOps() &&
+ "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
+
+ SDValue Conv =
+ DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
+ unsigned Lane = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned LaneWidth =
+ getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
+ DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
+ return Shift;
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
// EXTRACT_VECTOR_ELT is legal only for immediate indexes.
SDValue Lane = Op.getOperand(1);
if (!isa<ConstantSDNode>(Lane))
return SDValue();
SDValue Vec = Op.getOperand(0);
+ EVT VT = Vec.getValueType();
+
+ if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
+ return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
+
if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
SDLoc dl(Op);
return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
@@ -7520,7 +8044,64 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
return Op;
}
-static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ EVT Op1VT = V1.getValueType();
+ EVT Op2VT = V2.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ assert(Op1VT == Op2VT && "Operand types don't match!");
+ assert(VT.getScalarSizeInBits() == 1 &&
+ "Unexpected custom CONCAT_VECTORS lowering");
+ assert(ST->hasMVEIntegerOps() &&
+ "CONCAT_VECTORS lowering only supported for MVE");
+
+ SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
+ SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
+
+ // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
+ // promoted to v8i16, etc.
+
+ MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
+
+ // Extract the vector elements from Op1 and Op2 one by one and truncate them
+ // to be the right size for the destination. For example, if Op1 is v4i1 then
+ // the promoted vector is v4i32. The result of concatentation gives a v8i1,
+ // which when promoted is v8i16. That means each i32 element from Op1 needs
+ // truncating to i16 and inserting in the result.
+ EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
+ SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
+ auto ExractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
+ EVT NewVT = NewV.getValueType();
+ EVT ConcatVT = ConVec.getValueType();
+ for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
+ DAG.getIntPtrConstant(i, dl));
+ ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
+ DAG.getConstant(j, dl, MVT::i32));
+ }
+ return ConVec;
+ };
+ unsigned j = 0;
+ ConVec = ExractInto(NewV1, ConVec, j);
+ ConVec = ExractInto(NewV2, ConVec, j);
+
+ // Now return the result of comparing the subvector with zero,
+ // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
+ return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = Op->getValueType(0);
+ if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
+ return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
+
// The only time a CONCAT_VECTORS operation can have legal types is when
// two 64-bit vectors are concatenated to a 128-bit vector.
assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
@@ -7540,6 +8121,43 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
}
+static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ EVT Op1VT = V1.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned Index = cast<ConstantSDNode>(V2)->getZExtValue();
+
+ assert(VT.getScalarSizeInBits() == 1 &&
+ "Unexpected custom EXTRACT_SUBVECTOR lowering");
+ assert(ST->hasMVEIntegerOps() &&
+ "EXTRACT_SUBVECTOR lowering only supported for MVE");
+
+ SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
+
+ // We now have Op1 promoted to a vector of integers, where v8i1 gets
+ // promoted to v8i16, etc.
+
+ MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
+
+ EVT SubVT = MVT::getVectorVT(ElType, NumElts);
+ SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
+ for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
+ DAG.getIntPtrConstant(i, dl));
+ SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
+ DAG.getConstant(j, dl, MVT::i32));
+ }
+
+ // Now return the result of comparing the subvector with zero,
+ // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
+ return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+}
+
/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
/// element has been zero/sign-extended, depending on the isSigned parameter,
/// from an integer type half its size.
@@ -7897,7 +8515,8 @@ static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
return N0;
}
-static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
EVT VT = Op.getValueType();
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::SDIV");
@@ -7924,7 +8543,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
- N0 = LowerCONCAT_VECTORS(N0, DAG);
+ N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
return N0;
@@ -7932,7 +8551,8 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
return LowerSDIV_v4i16(N0, N1, dl, DAG);
}
-static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
// TODO: Should this propagate fast-math-flags?
EVT VT = Op.getValueType();
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
@@ -7960,7 +8580,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
- N0 = LowerCONCAT_VECTORS(N0, DAG);
+ N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
@@ -8255,6 +8875,96 @@ void ARMTargetLowering::ExpandDIV_Windows(
Results.push_back(Upper);
}
+static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ EVT MemVT = LD->getMemoryVT();
+ assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
+ "Expected a predicate type!");
+ assert(MemVT == Op.getValueType());
+ assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ "Expected a non-extending load");
+ assert(LD->isUnindexed() && "Expected a unindexed load");
+
+ // The basic MVE VLDR on a v4i1/v8i1 actually loads the entire 16bit
+ // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
+ // need to make sure that 8/4 bits are actually loaded into the correct
+ // place, which means loading the value and then shuffling the values into
+ // the bottom bits of the predicate.
+ // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
+ // for BE).
+
+ SDLoc dl(Op);
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
+ LD->getMemOperand());
+ SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Load);
+ if (MemVT != MVT::v16i1)
+ Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
+ DAG.getConstant(0, dl, MVT::i32));
+ return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
+}
+
+static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT MemVT = ST->getMemoryVT();
+ assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
+ "Expected a predicate type!");
+ assert(MemVT == ST->getValue().getValueType());
+ assert(!ST->isTruncatingStore() && "Expected a non-extending store");
+ assert(ST->isUnindexed() && "Expected a unindexed store");
+
+ // Only store the v4i1 or v8i1 worth of bits, via a buildvector with top bits
+ // unset and a scalar store.
+ SDLoc dl(Op);
+ SDValue Build = ST->getValue();
+ if (MemVT != MVT::v16i1) {
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
+ DAG.getConstant(I, dl, MVT::i32)));
+ for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
+ Ops.push_back(DAG.getUNDEF(MVT::i32));
+ Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
+ }
+ SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
+ return DAG.getTruncStore(
+ ST->getChain(), dl, GRP, ST->getBasePtr(),
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
+ ST->getMemOperand());
+}
+
+static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
+ MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
+ MVT VT = Op.getSimpleValueType();
+ SDValue Mask = N->getMask();
+ SDValue PassThru = N->getPassThru();
+ SDLoc dl(Op);
+
+ auto IsZero = [](SDValue PassThru) {
+ return (ISD::isBuildVectorAllZeros(PassThru.getNode()) ||
+ (PassThru->getOpcode() == ARMISD::VMOVIMM &&
+ isNullConstant(PassThru->getOperand(0))));
+ };
+
+ if (IsZero(PassThru))
+ return Op;
+
+ // MVE Masked loads use zero as the passthru value. Here we convert undef to
+ // zero too, and other values are lowered to a select.
+ SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ SDValue NewLoad = DAG.getMaskedLoad(
+ VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(),
+ N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad());
+ SDValue Combo = NewLoad;
+ if (!PassThru.isUndef() &&
+ (PassThru.getOpcode() != ISD::BITCAST ||
+ !IsZero(PassThru->getOperand(0))))
+ Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
+ return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
// Acquire/Release load/store is not legal for targets without a dmb or
@@ -8273,12 +8983,12 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
// Under Power Management extensions, the cycle-count is:
// mrc p15, #0, <Rt>, c9, c13, #0
SDValue Ops[] = { N->getOperand(0), // Chain
- DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
- DAG.getConstant(15, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(9, DL, MVT::i32),
- DAG.getConstant(13, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32)
+ DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+ DAG.getTargetConstant(15, DL, MVT::i32),
+ DAG.getTargetConstant(0, DL, MVT::i32),
+ DAG.getTargetConstant(9, DL, MVT::i32),
+ DAG.getTargetConstant(13, DL, MVT::i32),
+ DAG.getTargetConstant(0, DL, MVT::i32)
};
SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
@@ -8412,6 +9122,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
+ case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
@@ -8426,24 +9137,25 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
- case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
- case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
- case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV:
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ true);
- return LowerSDIV(Op, DAG);
+ return LowerSDIV(Op, DAG, Subtarget);
case ISD::UDIV:
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ false);
- return LowerUDIV(Op, DAG);
+ return LowerUDIV(Op, DAG, Subtarget);
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::SADDO:
@@ -8452,6 +9164,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UADDO:
case ISD::USUBO:
return LowerUnsignedALUO(Op, DAG);
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ return LowerSADDSUBSAT(Op, DAG, Subtarget);
+ case ISD::LOAD:
+ return LowerPredicateLoad(Op, DAG);
+ case ISD::STORE:
+ return LowerPredicateStore(Op, DAG);
+ case ISD::MLOAD:
+ return LowerMLOAD(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
@@ -8530,6 +9251,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res.getValue(0));
Results.push_back(Res.getValue(1));
return;
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
+ break;
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
@@ -8600,19 +9325,19 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
// orr r5, r5, #1
// add r5, pc
// str r5, [$jbuf, #+4] ; &jbuf[1]
- unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ Register NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
.addConstantPoolIndex(CPI)
.addMemOperand(CPMMO)
.add(predOps(ARMCC::AL));
// Set the low bit because of thumb mode.
- unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ Register NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
.addReg(NewVReg1, RegState::Kill)
.addImm(0x01)
.add(predOps(ARMCC::AL))
.add(condCodeOp());
- unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ Register NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
.addReg(NewVReg2, RegState::Kill)
.addImm(PCLabelId);
@@ -8630,28 +9355,28 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
// orrs r1, r2
// add r2, $jbuf, #+4 ; &jbuf[1]
// str r1, [r2]
- unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ Register NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
.addConstantPoolIndex(CPI)
.addMemOperand(CPMMO)
.add(predOps(ARMCC::AL));
- unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ Register NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
.addReg(NewVReg1, RegState::Kill)
.addImm(PCLabelId);
// Set the low bit because of thumb mode.
- unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ Register NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
.addReg(ARM::CPSR, RegState::Define)
.addImm(1)
.add(predOps(ARMCC::AL));
- unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ Register NewVReg4 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
.addReg(ARM::CPSR, RegState::Define)
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3, RegState::Kill)
.add(predOps(ARMCC::AL));
- unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ Register NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
.addFrameIndex(FI)
.addImm(36); // &jbuf[1] :: pc
@@ -8666,13 +9391,13 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
// ldr r1, LCPI1_1
// add r1, pc, r1
// str r1, [$jbuf, #+4] ; &jbuf[1]
- unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ Register NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
.addConstantPoolIndex(CPI)
.addImm(0)
.addMemOperand(CPMMO)
.add(predOps(ARMCC::AL));
- unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ Register NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
.addReg(NewVReg1, RegState::Kill)
.addImm(PCLabelId)
@@ -8794,7 +9519,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
bool IsPositionIndependent = isPositionIndependent();
unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
- unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ Register NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
.addFrameIndex(FI)
.addImm(4)
@@ -8807,7 +9532,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addImm(LPadList.size())
.add(predOps(ARMCC::AL));
} else {
- unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ Register VReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
.addImm(NumLPads & 0xFFFF)
.add(predOps(ARMCC::AL));
@@ -8832,12 +9557,12 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addImm(ARMCC::HI)
.addReg(ARM::CPSR);
- unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ Register NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
.addJumpTableIndex(MJTI)
.add(predOps(ARMCC::AL));
- unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ Register NewVReg4 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
.addReg(NewVReg3, RegState::Kill)
.addReg(NewVReg1)
@@ -8850,7 +9575,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(NewVReg1)
.addJumpTableIndex(MJTI);
} else if (Subtarget->isThumb()) {
- unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ Register NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
.addFrameIndex(FI)
.addImm(1)
@@ -8873,7 +9598,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
- unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ Register VReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
.addReg(VReg1, RegState::Define)
.addConstantPoolIndex(Idx)
@@ -8889,19 +9614,19 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addImm(ARMCC::HI)
.addReg(ARM::CPSR);
- unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ Register NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
.addReg(ARM::CPSR, RegState::Define)
.addReg(NewVReg1)
.addImm(2)
.add(predOps(ARMCC::AL));
- unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ Register NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
.addJumpTableIndex(MJTI)
.add(predOps(ARMCC::AL));
- unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ Register NewVReg4 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
.addReg(ARM::CPSR, RegState::Define)
.addReg(NewVReg2, RegState::Kill)
@@ -8911,7 +9636,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
- unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ Register NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
.addReg(NewVReg4, RegState::Kill)
.addImm(0)
@@ -8932,7 +9657,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(NewVReg6, RegState::Kill)
.addJumpTableIndex(MJTI);
} else {
- unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ Register NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
.addFrameIndex(FI)
.addImm(4)
@@ -8945,7 +9670,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addImm(NumLPads)
.add(predOps(ARMCC::AL));
} else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
- unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ Register VReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
.addImm(NumLPads & 0xFFFF)
.add(predOps(ARMCC::AL));
@@ -8974,7 +9699,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
- unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ Register VReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
.addReg(VReg1, RegState::Define)
.addConstantPoolIndex(Idx)
@@ -8991,20 +9716,20 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addImm(ARMCC::HI)
.addReg(ARM::CPSR);
- unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ Register NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
.addReg(NewVReg1)
.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
.add(predOps(ARMCC::AL))
.add(condCodeOp());
- unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ Register NewVReg4 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
.addJumpTableIndex(MJTI)
.add(predOps(ARMCC::AL));
MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
- unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ Register NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
.addReg(NewVReg3, RegState::Kill)
.addReg(NewVReg4)
@@ -9239,8 +9964,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = ++BB->getIterator();
- unsigned dest = MI.getOperand(0).getReg();
- unsigned src = MI.getOperand(1).getReg();
+ Register dest = MI.getOperand(0).getReg();
+ Register src = MI.getOperand(1).getReg();
unsigned SizeVal = MI.getOperand(2).getImm();
unsigned Align = MI.getOperand(3).getImm();
DebugLoc dl = MI.getDebugLoc();
@@ -9291,9 +10016,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
- unsigned srcOut = MRI.createVirtualRegister(TRC);
- unsigned destOut = MRI.createVirtualRegister(TRC);
- unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ Register srcOut = MRI.createVirtualRegister(TRC);
+ Register destOut = MRI.createVirtualRegister(TRC);
+ Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
IsThumb1, IsThumb2);
emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
@@ -9306,9 +10031,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned srcOut = MRI.createVirtualRegister(TRC);
- unsigned destOut = MRI.createVirtualRegister(TRC);
- unsigned scratch = MRI.createVirtualRegister(TRC);
+ Register srcOut = MRI.createVirtualRegister(TRC);
+ Register destOut = MRI.createVirtualRegister(TRC);
+ Register scratch = MRI.createVirtualRegister(TRC);
emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
IsThumb1, IsThumb2);
emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
@@ -9351,7 +10076,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// Load an immediate to varEnd.
- unsigned varEnd = MRI.createVirtualRegister(TRC);
+ Register varEnd = MRI.createVirtualRegister(TRC);
if (Subtarget->useMovt()) {
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
@@ -9401,12 +10126,12 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
// destPhi = PHI(destLoop, dst)
MachineBasicBlock *entryBB = BB;
BB = loopMBB;
- unsigned varLoop = MRI.createVirtualRegister(TRC);
- unsigned varPhi = MRI.createVirtualRegister(TRC);
- unsigned srcLoop = MRI.createVirtualRegister(TRC);
- unsigned srcPhi = MRI.createVirtualRegister(TRC);
- unsigned destLoop = MRI.createVirtualRegister(TRC);
- unsigned destPhi = MRI.createVirtualRegister(TRC);
+ Register varLoop = MRI.createVirtualRegister(TRC);
+ Register varPhi = MRI.createVirtualRegister(TRC);
+ Register srcLoop = MRI.createVirtualRegister(TRC);
+ Register srcPhi = MRI.createVirtualRegister(TRC);
+ Register destLoop = MRI.createVirtualRegister(TRC);
+ Register destPhi = MRI.createVirtualRegister(TRC);
BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
.addReg(varLoop).addMBB(loopMBB)
@@ -9420,7 +10145,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
- unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
IsThumb1, IsThumb2);
emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
@@ -9461,9 +10186,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned srcOut = MRI.createVirtualRegister(TRC);
- unsigned destOut = MRI.createVirtualRegister(TRC);
- unsigned scratch = MRI.createVirtualRegister(TRC);
+ Register srcOut = MRI.createVirtualRegister(TRC);
+ Register destOut = MRI.createVirtualRegister(TRC);
+ Register scratch = MRI.createVirtualRegister(TRC);
emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
IsThumb1, IsThumb2);
emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
@@ -9523,7 +10248,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
break;
case CodeModel::Large: {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
+ Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
.addExternalSymbol("__chkstk");
@@ -9771,8 +10496,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// equality.
bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
- unsigned LHS1 = MI.getOperand(1).getReg();
- unsigned LHS2 = MI.getOperand(2).getReg();
+ Register LHS1 = MI.getOperand(1).getReg();
+ Register LHS2 = MI.getOperand(2).getReg();
if (RHSisZero) {
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(LHS1)
@@ -9782,8 +10507,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(LHS2).addImm(0)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
} else {
- unsigned RHS1 = MI.getOperand(3).getReg();
- unsigned RHS2 = MI.getOperand(4).getReg();
+ Register RHS1 = MI.getOperand(3).getReg();
+ Register RHS2 = MI.getOperand(4).getReg();
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(LHS1)
.addReg(RHS1)
@@ -9844,15 +10569,15 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
Fn->insert(BBI, RSBBB);
Fn->insert(BBI, SinkBB);
- unsigned int ABSSrcReg = MI.getOperand(1).getReg();
- unsigned int ABSDstReg = MI.getOperand(0).getReg();
+ Register ABSSrcReg = MI.getOperand(1).getReg();
+ Register ABSDstReg = MI.getOperand(0).getReg();
bool ABSSrcKIll = MI.getOperand(1).isKill();
bool isThumb2 = Subtarget->isThumb2();
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
- unsigned NewRsbDstReg =
- MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
+ Register NewRsbDstReg = MRI.createVirtualRegister(
+ isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
@@ -9931,7 +10656,7 @@ static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
// The MEMCPY both defines and kills the scratch registers.
for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
- unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
+ Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
: &ARM::GPRRegClass);
MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
}
@@ -10369,10 +11094,7 @@ static SDValue findMUL_LOHI(SDValue V) {
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
- if (Subtarget->isThumb()) {
- if (!Subtarget->hasDSP())
- return SDValue();
- } else if (!Subtarget->hasV5TEOps())
+ if (!Subtarget->hasBaseDSP())
return SDValue();
// SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
@@ -11253,7 +11975,7 @@ static SDValue PerformANDCombine(SDNode *N,
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
EVT VbicVT;
- SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
+ SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VbicVT, VT.is128BitVector(),
OtherModImm);
@@ -11469,6 +12191,77 @@ static SDValue PerformORCombineToBFI(SDNode *N,
return SDValue();
}
+static bool isValidMVECond(unsigned CC, bool IsFloat) {
+ switch (CC) {
+ case ARMCC::EQ:
+ case ARMCC::NE:
+ case ARMCC::LE:
+ case ARMCC::GT:
+ case ARMCC::GE:
+ case ARMCC::LT:
+ return true;
+ case ARMCC::HS:
+ case ARMCC::HI:
+ return !IsFloat;
+ default:
+ return false;
+ };
+}
+
+static SDValue PerformORCombine_i1(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
+ // together with predicates
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ ARMCC::CondCodes CondCode0 = ARMCC::AL;
+ ARMCC::CondCodes CondCode1 = ARMCC::AL;
+ if (N0->getOpcode() == ARMISD::VCMP)
+ CondCode0 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N0->getOperand(2))
+ ->getZExtValue();
+ else if (N0->getOpcode() == ARMISD::VCMPZ)
+ CondCode0 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N0->getOperand(1))
+ ->getZExtValue();
+ if (N1->getOpcode() == ARMISD::VCMP)
+ CondCode1 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N1->getOperand(2))
+ ->getZExtValue();
+ else if (N1->getOpcode() == ARMISD::VCMPZ)
+ CondCode1 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N1->getOperand(1))
+ ->getZExtValue();
+
+ if (CondCode0 == ARMCC::AL || CondCode1 == ARMCC::AL)
+ return SDValue();
+
+ unsigned Opposite0 = ARMCC::getOppositeCondition(CondCode0);
+ unsigned Opposite1 = ARMCC::getOppositeCondition(CondCode1);
+
+ if (!isValidMVECond(Opposite0,
+ N0->getOperand(0)->getValueType(0).isFloatingPoint()) ||
+ !isValidMVECond(Opposite1,
+ N1->getOperand(0)->getValueType(0).isFloatingPoint()))
+ return SDValue();
+
+ SmallVector<SDValue, 4> Ops0;
+ Ops0.push_back(N0->getOperand(0));
+ if (N0->getOpcode() == ARMISD::VCMP)
+ Ops0.push_back(N0->getOperand(1));
+ Ops0.push_back(DCI.DAG.getConstant(Opposite0, SDLoc(N0), MVT::i32));
+ SmallVector<SDValue, 4> Ops1;
+ Ops1.push_back(N1->getOperand(0));
+ if (N1->getOpcode() == ARMISD::VCMP)
+ Ops1.push_back(N1->getOperand(1));
+ Ops1.push_back(DCI.DAG.getConstant(Opposite1, SDLoc(N1), MVT::i32));
+
+ SDValue NewN0 = DCI.DAG.getNode(N0->getOpcode(), SDLoc(N0), VT, Ops0);
+ SDValue NewN1 = DCI.DAG.getNode(N1->getOpcode(), SDLoc(N1), VT, Ops1);
+ SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1);
+ return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And,
+ DCI.DAG.getAllOnesConstant(SDLoc(N), VT));
+}
+
/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
@@ -11489,7 +12282,7 @@ static SDValue PerformORCombine(SDNode *N,
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
EVT VorrVT;
- SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VorrVT, VT.is128BitVector(),
OtherModImm);
@@ -11553,6 +12346,10 @@ static SDValue PerformORCombine(SDNode *N,
}
}
+ if (Subtarget->hasMVEIntegerOps() &&
+ (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1))
+ return PerformORCombine_i1(N, DCI, Subtarget);
+
// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.
if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
@@ -11921,6 +12718,24 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return Vec;
}
+static SDValue
+PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ SDLoc dl(N);
+
+ // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
+ if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
+ // If the valuetypes are the same, we can remove the cast entirely.
+ if (Op->getOperand(0).getValueType() == VT)
+ return Op->getOperand(0);
+ return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl,
+ Op->getOperand(0).getValueType(), Op->getOperand(0));
+ }
+
+ return SDValue();
+}
+
/// PerformInsertEltCombine - Target-specific dag combine xforms for
/// ISD::INSERT_VECTOR_ELT.
static SDValue PerformInsertEltCombine(SDNode *N,
@@ -12332,7 +13147,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
// The canonical VMOV for a zero vector uses a 32-bit element size.
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned EltBits;
- if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+ if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
EltSize = 8;
EVT VT = N->getValueType(0);
if (EltSize > VT.getScalarSizeInBits())
@@ -12382,95 +13197,163 @@ static SDValue PerformLOADCombine(SDNode *N,
return SDValue();
}
-/// PerformSTORECombine - Target-specific dag combine xforms for
-/// ISD::STORE.
-static SDValue PerformSTORECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- StoreSDNode *St = cast<StoreSDNode>(N);
- if (St->isVolatile())
- return SDValue();
-
- // Optimize trunc store (of multiple scalars) to shuffle and store. First,
- // pack all of the elements in one place. Next, store to memory in fewer
- // chunks.
+// Optimize trunc store (of multiple scalars) to shuffle and store. First,
+// pack all of the elements in one place. Next, store to memory in fewer
+// chunks.
+static SDValue PerformTruncatingStoreCombine(StoreSDNode *St,
+ SelectionDAG &DAG) {
SDValue StVal = St->getValue();
EVT VT = StVal.getValueType();
- if (St->isTruncatingStore() && VT.isVector()) {
- SelectionDAG &DAG = DCI.DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT StVT = St->getMemoryVT();
- unsigned NumElems = VT.getVectorNumElements();
- assert(StVT != VT && "Cannot truncate to the same type");
- unsigned FromEltSz = VT.getScalarSizeInBits();
- unsigned ToEltSz = StVT.getScalarSizeInBits();
+ if (!St->isTruncatingStore() || !VT.isVector())
+ return SDValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT StVT = St->getMemoryVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromEltSz = VT.getScalarSizeInBits();
+ unsigned ToEltSz = StVT.getScalarSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
+ return SDValue();
- // From, To sizes and ElemCount must be pow of two
- if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromEltSz) % ToEltSz)
+ return SDValue();
- // We are going to use the original vector elt for storing.
- // Accumulated smaller vector elements must be a multiple of the store size.
- if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+ unsigned SizeRatio = FromEltSz / ToEltSz;
+ assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
- unsigned SizeRatio = FromEltSz / ToEltSz;
- assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+ // Create a type on which we perform the shuffle.
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+ NumElems * SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
- // Create a type on which we perform the shuffle.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
- NumElems*SizeRatio);
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDLoc DL(St);
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; ++i)
+ ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
+ : i * SizeRatio;
- SDLoc DL(St);
- SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i < NumElems; ++i)
- ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
- ? (i + 1) * SizeRatio - 1
- : i * SizeRatio;
-
- // Can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
- DAG.getUNDEF(WideVec.getValueType()),
- ShuffleVec);
- // At this point all of the data is stored at the bottom of the
- // register. We now need to save it to mem.
-
- // Find the largest store unit
- MVT StoreType = MVT::i8;
- for (MVT Tp : MVT::integer_valuetypes()) {
- if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
- StoreType = Tp;
- }
- // Didn't find a legal store type.
- if (!TLI.isTypeLegal(StoreType))
- return SDValue();
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT))
+ return SDValue();
- // Bitcast the original vector into a vector of store-size units
- EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
- StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
- assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
- SmallVector<SDValue, 8> Chains;
- SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
- TLI.getPointerTy(DAG.getDataLayout()));
- SDValue BasePtr = St->getBasePtr();
+ SDValue Shuff = DAG.getVectorShuffle(
+ WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
- // Perform one or more big stores into memory.
- unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
- for (unsigned I = 0; I < E; I++) {
- SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- StoreType, ShuffWide,
- DAG.getIntPtrConstant(I, DL));
- SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
- St->getPointerInfo(), St->getAlignment(),
- St->getMemOperand()->getFlags());
- BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
- Increment);
- Chains.push_back(Ch);
- }
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (MVT Tp : MVT::integer_valuetypes()) {
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+ StoreType = Tp;
}
+ // Didn't find a legal store type.
+ if (!TLI.isTypeLegal(StoreType))
+ return SDValue();
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT =
+ EVT::getVectorVT(*DAG.getContext(), StoreType,
+ VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
+ TLI.getPointerTy(DAG.getDataLayout()));
+ SDValue BasePtr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
+ for (unsigned I = 0; I < E; I++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
+ ShuffWide, DAG.getIntPtrConstant(I, DL));
+ SDValue Ch =
+ DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
+ St->getAlignment(), St->getMemOperand()->getFlags());
+ BasePtr =
+ DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
+ Chains.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+}
+
+// Try taking a single vector store from an truncate (which would otherwise turn
+// into an expensive buildvector) and splitting it into a series of narrowing
+// stores.
+static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
+ SelectionDAG &DAG) {
+ if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
+ return SDValue();
+ SDValue Trunc = St->getValue();
+ if (Trunc->getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+ EVT FromVT = Trunc->getOperand(0).getValueType();
+ EVT ToVT = Trunc.getValueType();
+ if (!ToVT.isVector())
+ return SDValue();
+ assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements());
+ EVT ToEltVT = ToVT.getVectorElementType();
+ EVT FromEltVT = FromVT.getVectorElementType();
+
+ unsigned NumElements = 0;
+ if (FromEltVT == MVT::i32 && (ToEltVT == MVT::i16 || ToEltVT == MVT::i8))
+ NumElements = 4;
+ if (FromEltVT == MVT::i16 && ToEltVT == MVT::i8)
+ NumElements = 8;
+ if (NumElements == 0 || FromVT.getVectorNumElements() == NumElements ||
+ FromVT.getVectorNumElements() % NumElements != 0)
+ return SDValue();
+
+ SDLoc DL(St);
+ // Details about the old store
+ SDValue Ch = St->getChain();
+ SDValue BasePtr = St->getBasePtr();
+ unsigned Alignment = St->getOriginalAlignment();
+ MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = St->getAAInfo();
+
+ EVT NewFromVT = EVT::getVectorVT(*DAG.getContext(), FromEltVT, NumElements);
+ EVT NewToVT = EVT::getVectorVT(*DAG.getContext(), ToEltVT, NumElements);
+
+ SmallVector<SDValue, 4> Stores;
+ for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
+ unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
+ SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset);
+
+ SDValue Extract =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
+ DAG.getConstant(i * NumElements, DL, MVT::i32));
+ SDValue Store = DAG.getTruncStore(
+ Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
+ NewToVT, Alignment, MMOFlags, AAInfo);
+ Stores.push_back(Store);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->isVolatile())
+ return SDValue();
+ SDValue StVal = St->getValue();
+ EVT VT = StVal.getValueType();
+
+ if (Subtarget->hasNEON())
+ if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
+ return Store;
+
+ if (Subtarget->hasMVEIntegerOps())
+ if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
+ return NewToken;
if (!ISD::isNormalStore(St))
return SDValue();
@@ -12522,7 +13405,7 @@ static SDValue PerformSTORECombine(SDNode *N,
}
// If this is a legal vector store, try to combine it into a VST1_UPD.
- if (ISD::isNormalStore(N) && VT.isVector() &&
+ if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
return CombineBaseUpdate(N, DCI);
@@ -12890,6 +13773,71 @@ static SDValue PerformShiftCombine(SDNode *N,
return SDValue();
}
+// Look for a sign/zero extend of a larger than legal load. This can be split
+// into two extending loads, which are simpler to deal with than an arbitrary
+// sign extend.
+static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::LOAD)
+ return SDValue();
+ LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
+ if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
+ LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+ EVT FromVT = LD->getValueType(0);
+ EVT ToVT = N->getValueType(0);
+ if (!ToVT.isVector())
+ return SDValue();
+ assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements());
+ EVT ToEltVT = ToVT.getVectorElementType();
+ EVT FromEltVT = FromVT.getVectorElementType();
+
+ unsigned NumElements = 0;
+ if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
+ NumElements = 4;
+ if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
+ NumElements = 8;
+ if (NumElements == 0 ||
+ FromVT.getVectorNumElements() == NumElements ||
+ FromVT.getVectorNumElements() % NumElements != 0 ||
+ !isPowerOf2_32(NumElements))
+ return SDValue();
+
+ SDLoc DL(LD);
+ // Details about the old load
+ SDValue Ch = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Alignment = LD->getOriginalAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ ISD::LoadExtType NewExtType =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
+ EVT NewFromVT = FromVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ EVT NewToVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned NewOffset = NewFromVT.getSizeInBits() / 8;
+ SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset);
+
+ // Split the load in half, each side of which is extended separately. This
+ // is good enough, as legalisation will take it from there. They are either
+ // already legal or they will be split further into something that is
+ // legal.
+ SDValue NewLoad1 =
+ DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, BasePtr, Offset,
+ LD->getPointerInfo(), NewFromVT, Alignment, MMOFlags, AAInfo);
+ SDValue NewLoad2 =
+ DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
+ LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
+ Alignment, MMOFlags, AAInfo);
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ SDValue(NewLoad1.getNode(), 1),
+ SDValue(NewLoad2.getNode(), 1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, NewLoad1, NewLoad2);
+}
+
/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
@@ -12927,6 +13875,10 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ if (ST->hasMVEIntegerOps())
+ if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
+ return NewLoad;
+
return SDValue();
}
@@ -13028,43 +13980,169 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
return V;
}
+// Given N, the value controlling the conditional branch, search for the loop
+// intrinsic, returning it, along with how the value is used. We need to handle
+// patterns such as the following:
+// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
+// (brcond (setcc (loop.decrement), 0, eq), exit)
+// (brcond (setcc (loop.decrement), 0, ne), header)
+static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm,
+ bool &Negate) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::XOR: {
+ if (!isa<ConstantSDNode>(N.getOperand(1)))
+ return SDValue();
+ if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
+ return SDValue();
+ Negate = !Negate;
+ return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
+ }
+ case ISD::SETCC: {
+ auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!Const)
+ return SDValue();
+ if (Const->isNullValue())
+ Imm = 0;
+ else if (Const->isOne())
+ Imm = 1;
+ else
+ return SDValue();
+ CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
+ return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntOp = cast<ConstantSDNode>(N.getOperand(1))->getZExtValue();
+ if (IntOp != Intrinsic::test_set_loop_iterations &&
+ IntOp != Intrinsic::loop_decrement_reg)
+ return SDValue();
+ return N;
+ }
+ }
+ return SDValue();
+}
+
static SDValue PerformHWLoopCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
- // Look for (brcond (xor test.set.loop.iterations, -1)
- SDValue CC = N->getOperand(1);
- unsigned Opc = CC->getOpcode();
- SDValue Int;
- if ((Opc == ISD::XOR || Opc == ISD::SETCC) &&
- (CC->getOperand(0)->getOpcode() == ISD::INTRINSIC_W_CHAIN)) {
+ // The hwloop intrinsics that we're interested are used for control-flow,
+ // either for entering or exiting the loop:
+ // - test.set.loop.iterations will test whether its operand is zero. If it
+ // is zero, the proceeding branch should not enter the loop.
+ // - loop.decrement.reg also tests whether its operand is zero. If it is
+ // zero, the proceeding branch should not branch back to the beginning of
+ // the loop.
+ // So here, we need to check that how the brcond is using the result of each
+ // of the intrinsics to ensure that we're branching to the right place at the
+ // right time.
+
+ ISD::CondCode CC;
+ SDValue Cond;
+ int Imm = 1;
+ bool Negate = false;
+ SDValue Chain = N->getOperand(0);
+ SDValue Dest;
- assert((isa<ConstantSDNode>(CC->getOperand(1)) &&
- cast<ConstantSDNode>(CC->getOperand(1))->isOne()) &&
- "Expected to compare against 1");
+ if (N->getOpcode() == ISD::BRCOND) {
+ CC = ISD::SETEQ;
+ Cond = N->getOperand(1);
+ Dest = N->getOperand(2);
+ } else {
+ assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
+ CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ Cond = N->getOperand(2);
+ Dest = N->getOperand(4);
+ if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
+ if (!Const->isOne() && !Const->isNullValue())
+ return SDValue();
+ Imm = Const->getZExtValue();
+ } else
+ return SDValue();
+ }
- Int = CC->getOperand(0);
- } else if (CC->getOpcode() == ISD::INTRINSIC_W_CHAIN)
- Int = CC;
- else
+ SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
+ if (!Int)
return SDValue();
- unsigned IntOp = cast<ConstantSDNode>(Int.getOperand(1))->getZExtValue();
- if (IntOp != Intrinsic::test_set_loop_iterations)
- return SDValue();
+ if (Negate)
+ CC = ISD::getSetCCInverse(CC, true);
+
+ auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
+ return (CC == ISD::SETEQ && Imm == 0) ||
+ (CC == ISD::SETNE && Imm == 1) ||
+ (CC == ISD::SETLT && Imm == 1) ||
+ (CC == ISD::SETULT && Imm == 1);
+ };
+
+ auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
+ return (CC == ISD::SETEQ && Imm == 1) ||
+ (CC == ISD::SETNE && Imm == 0) ||
+ (CC == ISD::SETGT && Imm == 0) ||
+ (CC == ISD::SETUGT && Imm == 0) ||
+ (CC == ISD::SETGE && Imm == 1) ||
+ (CC == ISD::SETUGE && Imm == 1);
+ };
+
+ assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
+ "unsupported condition");
SDLoc dl(Int);
- SDValue Chain = N->getOperand(0);
+ SelectionDAG &DAG = DCI.DAG;
SDValue Elements = Int.getOperand(2);
- SDValue ExitBlock = N->getOperand(2);
+ unsigned IntOp = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
+ assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
+ && "expected single br user");
+ SDNode *Br = *N->use_begin();
+ SDValue OtherTarget = Br->getOperand(1);
+
+ // Update the unconditional branch to branch to the given Dest.
+ auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
+ SDValue NewBrOps[] = { Br->getOperand(0), Dest };
+ SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
+ };
- // TODO: Once we start supporting tail predication, we can add another
- // operand to WLS for the number of elements processed in a vector loop.
+ if (IntOp == Intrinsic::test_set_loop_iterations) {
+ SDValue Res;
+ // We expect this 'instruction' to branch when the counter is zero.
+ if (IsTrueIfZero(CC, Imm)) {
+ SDValue Ops[] = { Chain, Elements, Dest };
+ Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
+ } else {
+ // The logic is the reverse of what we need for WLS, so find the other
+ // basic block target: the target of the proceeding br.
+ UpdateUncondBr(Br, Dest, DAG);
- SDValue Ops[] = { Chain, Elements, ExitBlock };
- SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
- DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0));
- return Res;
+ SDValue Ops[] = { Chain, Elements, OtherTarget };
+ Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
+ }
+ DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0));
+ return Res;
+ } else {
+ SDValue Size = DAG.getTargetConstant(
+ cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, MVT::i32);
+ SDValue Args[] = { Int.getOperand(0), Elements, Size, };
+ SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
+ DAG.getVTList(MVT::i32, MVT::Other), Args);
+ DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
+
+ // We expect this instruction to branch when the count is not zero.
+ SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
+
+ // Update the unconditional branch to target the loop preheader if we've
+ // found the condition has been reversed.
+ if (Target == OtherTarget)
+ UpdateUncondBr(Br, Dest, DAG);
+
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ SDValue(LoopDec.getNode(), 1), Chain);
+
+ SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
+ return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
+ }
+ return SDValue();
}
/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
@@ -13298,14 +14376,15 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
- case ISD::BRCOND: return PerformHWLoopCombine(N, DCI, Subtarget);
+ case ISD::BRCOND:
+ case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
case ARMISD::ADDC:
case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
- case ISD::STORE: return PerformSTORECombine(N, DCI);
+ case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
@@ -13334,6 +14413,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
+ case ARMISD::PREDICATE_CAST:
+ return PerformPREDICATE_CASTCombine(N, DCI);
case ARMISD::SMULWB: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
@@ -13348,7 +14429,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
break;
}
- case ARMISD::SMLALBB: {
+ case ARMISD::SMLALBB:
+ case ARMISD::QADD16b:
+ case ARMISD::QSUB16b: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
@@ -13384,6 +14467,15 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
break;
}
+ case ARMISD::QADD8b:
+ case ARMISD::QSUB8b: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
+ if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
+ return SDValue();
+ break;
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -13457,47 +14549,38 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
if (!Subtarget->hasMVEIntegerOps())
return false;
- if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 &&
- Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 &&
- Ty != MVT::v2f64 &&
- // These are for truncated stores
- Ty != MVT::v4i8 && Ty != MVT::v8i8 && Ty != MVT::v4i16)
- return false;
- if (Subtarget->isLittle()) {
- // In little-endian MVE, the store instructions VSTRB.U8,
- // VSTRH.U16 and VSTRW.U32 all store the vector register in
- // exactly the same format, and differ only in the range of
- // their immediate offset field and the required alignment.
- //
- // In particular, VSTRB.U8 can store a vector at byte alignment.
- // So at this stage we can simply say that loads/stores of all
- // 128-bit wide vector types are permitted at any alignment,
- // because we know at least _one_ instruction can manage that.
- //
- // Later on we might find that some of those loads are better
- // generated as VLDRW.U32 if alignment permits, to take
- // advantage of the larger immediate range. But for the moment,
- // all that matters is that if we don't lower the load then
- // _some_ instruction can handle it.
+ // These are for predicates
+ if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1)) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+
+ // These are for truncated stores/narrowing loads. They are fine so long as
+ // the alignment is at least the size of the item being loaded
+ if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
+ Alignment >= VT.getScalarSizeInBits() / 8) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+
+ // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
+ // VSTRW.U32 all store the vector register in exactly the same format, and
+ // differ only in the range of their immediate offset field and the required
+ // alignment. So there is always a store that can be used, regardless of
+ // actual type.
+ //
+ // For big endian, that is not the case. But can still emit a (VSTRB.U8;
+ // VREV64.8) pair and get the same effect. This will likely be better than
+ // aligning the vector through the stack.
+ if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
+ Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
+ Ty == MVT::v2f64) {
if (Fast)
*Fast = true;
return true;
- } else {
- // In big-endian MVE, those instructions aren't so similar
- // after all, because they reorder the bytes of the vector
- // differently. So this time we can only store a particular
- // kind of vector if its alignment is at least the element
- // type. And we can't store vectors of i64 or f64 at all
- // without having to do some postprocessing, because there's
- // no VSTRD.U64.
- if (Ty == MVT::v16i8 ||
- ((Ty == MVT::v8i16 || Ty == MVT::v8f16) && Alignment >= 2) ||
- ((Ty == MVT::v4i32 || Ty == MVT::v4f32) && Alignment >= 4)) {
- if (Fast)
- *Fast = true;
- return true;
- }
}
return false;
@@ -13617,22 +14700,60 @@ static bool areExtractExts(Value *Ext1, Value *Ext2) {
/// sext/zext can be folded into vsubl.
bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const {
- if (!Subtarget->hasNEON() || !I->getType()->isVectorTy())
+ if (!I->getType()->isVectorTy())
return false;
- switch (I->getOpcode()) {
- case Instruction::Sub:
- case Instruction::Add: {
- if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
+ if (Subtarget->hasNEON()) {
+ switch (I->getOpcode()) {
+ case Instruction::Sub:
+ case Instruction::Add: {
+ if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
+ return false;
+ Ops.push_back(&I->getOperandUse(0));
+ Ops.push_back(&I->getOperandUse(1));
+ return true;
+ }
+ default:
return false;
- Ops.push_back(&I->getOperandUse(0));
- Ops.push_back(&I->getOperandUse(1));
- return true;
+ }
}
- default:
+
+ if (!Subtarget->hasMVEIntegerOps())
+ return false;
+
+ auto IsSinker = [](Instruction *I, int Operand) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ return true;
+ case Instruction::Sub:
+ return Operand == 1;
+ default:
+ return false;
+ }
+ };
+
+ int Op = 0;
+ if (!isa<ShuffleVectorInst>(I->getOperand(Op)))
+ Op = 1;
+ if (!IsSinker(I, Op))
+ return false;
+ if (!match(I->getOperand(Op),
+ m_ShuffleVector(m_InsertElement(m_Undef(), m_Value(), m_ZeroInt()),
+ m_Undef(), m_Zero()))) {
return false;
}
- return false;
+ Instruction *Shuffle = cast<Instruction>(I->getOperand(Op));
+ // All uses of the shuffle should be sunk to avoid duplicating it across gpr
+ // and vector registers
+ for (Use &U : Shuffle->uses()) {
+ Instruction *Insn = cast<Instruction>(U.getUser());
+ if (!IsSinker(Insn, U.getOperandNo()))
+ return false;
+ }
+ Ops.push_back(&Shuffle->getOperandUse(0));
+ Ops.push_back(&I->getOperandUse(Op));
+ return true;
}
bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
@@ -13641,6 +14762,11 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
if (!isTypeLegal(VT))
return false;
+ if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
+ if (Ld->isExpandingLoad())
+ return false;
+ }
+
// Don't create a loadext if we can fold the extension into a wide/long
// instruction.
// If there's more than one user instruction, the loadext is desirable no
@@ -14028,6 +15154,52 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
return false;
}
+static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
+ bool isSEXTLoad, bool isLE, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+ if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
+ return false;
+
+ ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
+ int RHSC = (int)RHS->getZExtValue();
+
+ auto IsInRange = [&](int RHSC, int Limit, int Scale) {
+ if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
+ return true;
+ } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
+ isInc = Ptr->getOpcode() == ISD::ADD;
+ Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
+ return true;
+ }
+ return false;
+ };
+
+ // Try to find a matching instruction based on s/zext, Alignment, Offset and
+ // (in BE) type.
+ Base = Ptr->getOperand(0);
+ if (VT == MVT::v4i16) {
+ if (Align >= 2 && IsInRange(RHSC, 0x80, 2))
+ return true;
+ } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
+ if (IsInRange(RHSC, 0x80, 1))
+ return true;
+ } else if (Align >= 4 && (isLE || VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ IsInRange(RHSC, 0x80, 4))
+ return true;
+ else if (Align >= 2 && (isLE || VT == MVT::v8i16 || VT == MVT::v8f16) &&
+ IsInRange(RHSC, 0x80, 2))
+ return true;
+ else if ((isLE || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
+ return true;
+ return false;
+}
+
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -14041,25 +15213,35 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
EVT VT;
SDValue Ptr;
+ unsigned Align;
bool isSEXTLoad = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
- VT = LD->getMemoryVT();
+ VT = LD->getMemoryVT();
+ Align = LD->getAlignment();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
- VT = ST->getMemoryVT();
+ VT = ST->getMemoryVT();
+ Align = ST->getAlignment();
} else
return false;
bool isInc;
bool isLegal = false;
- if (Subtarget->isThumb2())
- isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
- Offset, isInc, DAG);
- else
- isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
- Offset, isInc, DAG);
+ if (VT.isVector())
+ isLegal = Subtarget->hasMVEIntegerOps() &&
+ getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad,
+ Subtarget->isLittle(), Base, Offset,
+ isInc, DAG);
+ else {
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ }
if (!isLegal)
return false;
@@ -14077,15 +15259,18 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SelectionDAG &DAG) const {
EVT VT;
SDValue Ptr;
+ unsigned Align;
bool isSEXTLoad = false, isNonExt;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- VT = LD->getMemoryVT();
+ VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
+ Align = LD->getAlignment();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- VT = ST->getMemoryVT();
+ VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
+ Align = ST->getAlignment();
isNonExt = !ST->isTruncatingStore();
} else
return false;
@@ -14108,12 +15293,19 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isInc;
bool isLegal = false;
- if (Subtarget->isThumb2())
- isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
- isInc, DAG);
- else
- isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ if (VT.isVector())
+ isLegal = Subtarget->hasMVEIntegerOps() &&
+ getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad,
+ Subtarget->isLittle(), Base, Offset,
isInc, DAG);
+ else {
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ }
if (!isLegal)
return false;
@@ -14369,7 +15561,8 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
/// constraint it is for this target.
ARMTargetLowering::ConstraintType
ARMTargetLowering::getConstraintType(StringRef Constraint) const {
- if (Constraint.size() == 1) {
+ unsigned S = Constraint.size();
+ if (S == 1) {
switch (Constraint[0]) {
default: break;
case 'l': return C_RegisterClass;
@@ -14377,12 +15570,12 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const {
case 'h': return C_RegisterClass;
case 'x': return C_RegisterClass;
case 't': return C_RegisterClass;
- case 'j': return C_Other; // Constant for movw.
- // An address with a single base register. Due to the way we
- // currently handle addresses it is the same as an 'r' memory constraint.
+ case 'j': return C_Immediate; // Constant for movw.
+ // An address with a single base register. Due to the way we
+ // currently handle addresses it is the same as an 'r' memory constraint.
case 'Q': return C_Memory;
}
- } else if (Constraint.size() == 2) {
+ } else if (S == 2) {
switch (Constraint[0]) {
default: break;
case 'T': return C_RegisterClass;
@@ -14535,7 +15728,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
case 'j':
// Constant suitable for movw, must be between 0 and
// 65535.
- if (Subtarget->hasV6T2Ops())
+ if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
if (CVal >= 0 && CVal <= 65535)
break;
return;
@@ -14643,7 +15836,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return;
case 'N':
- if (Subtarget->isThumb()) { // FIXME thumb2
+ if (Subtarget->isThumb1Only()) {
// This must be a constant between 0 and 31, for shift amounts.
if (CVal >= 0 && CVal <= 31)
break;
@@ -14651,7 +15844,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return;
case 'O':
- if (Subtarget->isThumb()) { // FIXME thumb2
+ if (Subtarget->isThumb1Only()) {
// This must be a multiple of 4 between -508 and 508, for
// ADD/SUB sp = sp + immediate.
if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
@@ -14874,6 +16067,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
// without FP16. So we must do a function call.
SDLoc Loc(Op);
RTLIB::Libcall LC;
+ MakeLibCallOptions CallOptions;
if (SrcSz == 16) {
// Instruction from 16 -> 32
if (Subtarget->hasFP16())
@@ -14884,7 +16078,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_EXTEND");
SrcVal =
- makeLibCall(DAG, LC, MVT::f32, SrcVal, /*isSigned*/ false, Loc).first;
+ makeLibCall(DAG, LC, MVT::f32, SrcVal, CallOptions, Loc).first;
}
}
@@ -14897,7 +16091,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
LC = RTLIB::getFPEXT(MVT::f32, MVT::f64);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_EXTEND");
- return makeLibCall(DAG, LC, MVT::f64, SrcVal, /*isSigned*/ false, Loc).first;
+ return makeLibCall(DAG, LC, MVT::f64, SrcVal, CallOptions, Loc).first;
}
SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
@@ -14923,7 +16117,8 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_ROUND");
- return makeLibCall(DAG, LC, DstVT, SrcVal, /*isSigned*/ false, Loc).first;
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions, Loc).first;
}
void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
@@ -15015,7 +16210,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
- Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.align = MaybeAlign(cast<ConstantInt>(AlignArg)->getZExtValue());
// volatile loads with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOLoad;
return true;
@@ -15030,7 +16225,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
- Info.align = 0;
+ Info.align.reset();
// volatile loads with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOLoad;
return true;
@@ -15056,7 +16251,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
- Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.align = MaybeAlign(cast<ConstantInt>(AlignArg)->getZExtValue());
// volatile stores with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOStore;
return true;
@@ -15077,7 +16272,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = 0;
+ Info.align.reset();
// volatile stores with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOStore;
return true;
@@ -15090,7 +16285,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
+ Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
@@ -15102,7 +16297,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
+ Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
@@ -15112,7 +16307,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
- Info.align = 8;
+ Info.align = Align(8);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
@@ -15122,7 +16317,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = 8;
+ Info.align = Align(8);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
@@ -15473,6 +16668,12 @@ bool ARMTargetLowering::isLegalInterleavedAccessType(
return VecSize == 64 || VecSize % 128 == 0;
}
+unsigned ARMTargetLowering::getMaxSupportedInterleaveFactor() const {
+ if (Subtarget->hasNEON())
+ return 4;
+ return TargetLoweringBase::getMaxSupportedInterleaveFactor();
+}
+
/// Lower an interleaved load into a vldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -15792,15 +16993,15 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
}
/// Return the correct alignment for the current calling convention.
-unsigned
-ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy,
- DataLayout DL) const {
+Align ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy,
+ DataLayout DL) const {
+ const Align ABITypeAlign(DL.getABITypeAlignment(ArgTy));
if (!ArgTy->isVectorTy())
- return DL.getABITypeAlignment(ArgTy);
+ return ABITypeAlign;
// Avoid over-aligning vector parameters. It would require realigning the
// stack and waste space for no real benefit.
- return std::min(DL.getABITypeAlignment(ArgTy), DL.getStackAlignment());
+ return std::min(ABITypeAlign, DL.getStackAlignment());
}
/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
@@ -15861,7 +17062,7 @@ void ARMTargetLowering::insertCopiesSplitCSR(
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 1675ec59a354..53813fad5afd 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -103,6 +103,7 @@ class VectorType;
ADDE, // Add using carry
SUBC, // Sub with carry
SUBE, // Sub using carry
+ LSLS, // Shift left producing carry
VMOVRRD, // double to two gprs.
VMOVDRR, // Two gprs to double.
@@ -126,17 +127,13 @@ class VectorType;
WIN__DBZCHK, // Windows' divide by zero check
WLS, // Low-overhead loops, While Loop Start
+ LOOP_DEC, // Really a part of LE, performs the sub
+ LE, // Low-overhead loops, Loop End
- VCEQ, // Vector compare equal.
- VCEQZ, // Vector compare equal to zero.
- VCGE, // Vector compare greater than or equal.
- VCGEZ, // Vector compare greater than or equal to zero.
- VCLEZ, // Vector compare less than or equal to zero.
- VCGEU, // Vector compare unsigned greater than or equal.
- VCGT, // Vector compare greater than.
- VCGTZ, // Vector compare greater than zero.
- VCLTZ, // Vector compare less than zero.
- VCGTU, // Vector compare unsigned greater than.
+ PREDICATE_CAST, // Predicate cast for MVE i1 types
+
+ VCMP, // Vector compare.
+ VCMPZ, // Vector compare to zero.
VTST, // Vector test bits.
// Vector shift by vector
@@ -200,6 +197,7 @@ class VectorType;
VTRN, // transpose
VTBL1, // 1-register shuffle with mask
VTBL2, // 2-register shuffle with mask
+ VMOVN, // MVE vmovn
// Vector multiply long:
VMULLs, // ...signed
@@ -221,6 +219,12 @@ class VectorType;
SMMLAR, // Signed multiply long, round and add
SMMLSR, // Signed multiply long, subtract and round
+ // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b stands for.
+ QADD8b,
+ QSUB8b,
+ QADD16b,
+ QSUB16b,
+
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
@@ -243,6 +247,11 @@ class VectorType;
// instructions.
MEMCPY,
+ // V8.1MMainline condition select
+ CSINV, // Conditional select invert.
+ CSNEG, // Conditional select negate.
+ CSINC, // Conditional select increment.
+
// Vector load N-element structure to all lanes:
VLD1DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
VLD2DUP,
@@ -539,7 +548,7 @@ class VectorType;
Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
- unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
+ unsigned getMaxSupportedInterleaveFactor() const override;
bool lowerInterleavedLoad(LoadInst *LI,
ArrayRef<ShuffleVectorInst *> Shuffles,
@@ -608,8 +617,8 @@ class VectorType;
void finalizeLowering(MachineFunction &MF) const override;
/// Return the correct alignment for the current calling convention.
- unsigned getABIAlignmentForCallingConv(Type *ArgTy,
- DataLayout DL) const override;
+ Align getABIAlignmentForCallingConv(Type *ArgTy,
+ DataLayout DL) const override;
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;
@@ -670,6 +679,8 @@ class VectorType;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -721,8 +732,8 @@ class VectorType;
void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
- unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const override;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
@@ -814,7 +825,7 @@ class VectorType;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- const SDLoc &dl, bool InvalidOnQNaN) const;
+ const SDLoc &dl) const;
SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
@@ -838,7 +849,7 @@ class VectorType;
void setAllExpand(MVT VT);
};
- enum NEONModImmType {
+ enum VMOVModImmType {
VMOVModImm,
VMVNModImm,
MVEVMVNModImm,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index bc93a058720c..1da32ad2af6c 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -188,6 +188,13 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
let DecoderMethod = "DecodeCCOutOperand";
}
+// Transform to generate the inverse of a condition code during ISel
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+ ARMCC::CondCodes CC = static_cast<ARMCC::CondCodes>(N->getZExtValue());
+ return CurDAG->getTargetConstant(ARMCC::getOppositeCondition(CC), SDLoc(N),
+ MVT::i32);
+}]>;
+
// VPT predicate
def VPTPredNOperand : AsmOperandClass {
@@ -401,6 +408,8 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
// mnemonic (when not in an IT block) or preclude it (when in an IT block).
bit thumbArithFlagSetting = 0;
+ bit validForTailPredication = 0;
+
// If this is a pseudo instruction, mark it isCodeGenOnly.
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
@@ -412,6 +421,7 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
let TSFlags{14} = canXformTo16Bit;
let TSFlags{18-15} = D.Value;
let TSFlags{19} = thumbArithFlagSetting;
+ let TSFlags{20} = validForTailPredication;
let Constraints = cstr;
let Itinerary = itin;
@@ -455,6 +465,7 @@ class AsmPseudoInst<string asm, dag iops, dag oops = (outs)>
let isCodeGenOnly = 0; // So we get asm matcher for it.
let AsmString = asm;
let isPseudo = 1;
+ let hasNoSchedulingInfo = 1;
}
class ARMAsmPseudo<string asm, dag iops, dag oops = (outs)>
@@ -2282,7 +2293,7 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
let Inst{24} = SIMM{7};
let Inst{18-16} = SIMM{6-4};
let Inst{3-0} = SIMM{3-0};
- let DecoderMethod = "DecodeNEONModImmInstruction";
+ let DecoderMethod = "DecodeVMOVModImmInstruction";
}
// NEON 2 vector register format.
@@ -2724,6 +2735,16 @@ def complexrotateopodd : Operand<i32> {
let PrintMethod = "printComplexRotationOp<180, 90>";
}
+def MveSaturateOperand : AsmOperandClass {
+ let PredicateMethod = "isMveSaturateOp";
+ let DiagnosticString = "saturate operand must be 48 or 64";
+ let Name = "MveSaturate";
+}
+def saturateop : Operand<i32> {
+ let ParserMatchClass = MveSaturateOperand;
+ let PrintMethod = "printMveSaturateOp";
+}
+
// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
def : TokenAlias<".s8", ".i8">;
def : TokenAlias<".u8", ".i8">;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 388c889349b7..a802d5a06f07 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -117,7 +117,7 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
- unsigned Reg = MI->getOperand(0).getReg();
+ Register Reg = MI->getOperand(0).getReg();
MachineInstrBuilder MIB;
MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg)
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index e35145463852..fe696222ec70 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -51,8 +51,6 @@ def SDT_ARMAnd : SDTypeProfile<1, 2,
SDTCisVT<2, i32>]>;
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
-def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>,
- SDTCisVT<2, i32>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
@@ -108,14 +106,24 @@ def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>,
// TODO Add another operand for 'Size' so that we can re-use this node when we
// start supporting *TP versions.
-def SDT_ARMWhileLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>,
- SDTCisVT<1, OtherVT>]>;
+def SDT_ARMLoLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, OtherVT>]>;
def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>;
+def SDT_ARMCSel : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<3>,
+ SDTCisVT<3, i32>]>;
+
+def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel, [SDNPOptInGlue]>;
+def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel, [SDNPOptInGlue]>;
+def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel, [SDNPOptInGlue]>;
+
def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
@@ -194,6 +202,7 @@ def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>;
+def ARMlsls : SDNode<"ARMISD::LSLS", SDTBinaryArithWithFlags>;
def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>;
def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>;
@@ -229,6 +238,11 @@ def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
+def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>;
+def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
+def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
+def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
+
// Vector operations shared between NEON and MVE
def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
@@ -265,8 +279,16 @@ def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
-def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop,
- [SDNPHasChain]>;
+def SDTARMVCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+ SDTCisInt<3>]>;
+def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>;
+
+def ARMvcmp : SDNode<"ARMISD::VCMP", SDTARMVCMP>;
+def ARMvcmpz : SDNode<"ARMISD::VCMPZ", SDTARMVCMPZ>;
+
+def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMLoLoop, [SDNPHasChain]>;
+def ARMLE : SDNode<"ARMISD::LE", SDT_ARMLoLoop, [SDNPHasChain]>;
+def ARMLoopDec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -1948,7 +1970,7 @@ multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
/// the function. The first operand is the ID# for this instruction, the second
/// is the index into the MachineConstantPool that this is, the third is the
/// size in bytes of this constant pool entry.
-let hasSideEffects = 0, isNotDuplicable = 1 in
+let hasSideEffects = 0, isNotDuplicable = 1, hasNoSchedulingInfo = 1 in
def CONSTPOOL_ENTRY :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary, []>;
@@ -2361,6 +2383,12 @@ let isCall = 1,
def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func),
8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
Requires<[IsARM]>, Sched<[WriteBr]>;
+
+ // push lr before the call
+ def BL_PUSHLR : ARMPseudoInst<(outs), (ins GPRlr:$ra, arm_bl_target:$func),
+ 4, IIC_Br,
+ []>,
+ Requires<[IsARM]>, Sched<[WriteBr]>;
}
let isBranch = 1, isTerminator = 1 in {
@@ -3727,6 +3755,23 @@ let DecoderMethod = "DecodeQADDInstruction" in
[(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>;
}
+def : ARMV5TEPat<(saddsat GPR:$a, GPR:$b),
+ (QADD GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(ssubsat GPR:$a, GPR:$b),
+ (QSUB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(saddsat(saddsat rGPR:$Rm, rGPR:$Rm), rGPR:$Rn),
+ (QDADD rGPR:$Rm, rGPR:$Rn)>;
+def : ARMV5TEPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)),
+ (QDSUB rGPR:$Rm, rGPR:$Rn)>;
+def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn),
+ (QADD8 rGPR:$Rm, rGPR:$Rn)>;
+def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn),
+ (QSUB8 rGPR:$Rm, rGPR:$Rn)>;
+def : ARMV6Pat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn),
+ (QADD16 rGPR:$Rm, rGPR:$Rn)>;
+def : ARMV6Pat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn),
+ (QSUB16 rGPR:$Rm, rGPR:$Rn)>;
+
def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>;
def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>;
def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>;
@@ -4870,14 +4915,13 @@ def SB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "sb", "", []>,
let hasSideEffects = 1;
}
-let usesCustomInserter = 1, Defs = [CPSR] in {
-
-// Pseudo instruction that combines movs + predicated rsbmi
-// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
+ // Pseudo instruction that combines movs + predicated rsbmi
+ // to implement integer ABS
def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
}
-let usesCustomInserter = 1, Defs = [CPSR] in {
+let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
def COPY_STRUCT_BYVAL_I32 : PseudoInst<
(outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
NoItinerary,
@@ -5085,8 +5129,8 @@ def SWPB: AIswp<1, (outs GPRnopc:$Rt),
def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
- [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
- imm:$CRm, imm:$opc2)]>,
+ [(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
+ timm:$CRm, timm:$opc2)]>,
Requires<[IsARM,PreV8]> {
bits<4> opc1;
bits<4> CRn;
@@ -5109,8 +5153,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
- [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
- imm:$CRm, imm:$opc2)]>,
+ [(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
+ timm:$CRm, timm:$opc2)]>,
Requires<[IsARM,PreV8]> {
let Inst{31-28} = 0b1111;
bits<4> opc1;
@@ -5289,15 +5333,15 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
}
}
-defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
-defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
+defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
+defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
-defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
-defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
+defm STC : LdStCop <0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
+defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
+defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
+defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
} // DecoderNamespace = "CoProc"
@@ -5333,8 +5377,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
(outs),
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
- [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>,
+ [(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
+ timm:$CRm, timm:$opc2)]>,
ComplexDeprecationPredicate<"MCR">;
def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
@@ -5347,8 +5391,8 @@ def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
-def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
- (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+def : ARMPat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2),
+ (MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
class MovRCopro2<string opc, bit direction, dag oops, dag iops,
list<dag> pattern>
@@ -5379,8 +5423,8 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
(outs),
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
- [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>,
+ [(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
+ timm:$CRm, timm:$opc2)]>,
Requires<[IsARM,PreV8]>;
def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
@@ -5394,9 +5438,9 @@ def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
-def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
- imm:$CRm, imm:$opc2),
- (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+def : ARMV5TPat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn,
+ timm:$CRm, timm:$opc2),
+ (MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
pattern = []>
@@ -5422,8 +5466,8 @@ class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
(outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, c_imm:$CRm),
- [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt,
- GPRnopc:$Rt2, imm:$CRm)]>;
+ [(int_arm_mcrr timm:$cop, timm:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, timm:$CRm)]>;
def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */,
(outs GPRnopc:$Rt, GPRnopc:$Rt2),
(ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>;
@@ -5455,8 +5499,8 @@ class MovRRCopro2<string opc, bit direction, dag oops, dag iops,
def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
(outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, c_imm:$CRm),
- [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt,
- GPRnopc:$Rt2, imm:$CRm)]>;
+ [(int_arm_mcrr2 timm:$cop, timm:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, timm:$CRm)]>;
def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */,
(outs GPRnopc:$Rt, GPRnopc:$Rt2),
@@ -5579,12 +5623,12 @@ def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn),
def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
-let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
+let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP], hasNoSchedulingInfo = 1 in
def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK,
[SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
-let usesCustomInserter = 1, Defs = [CPSR] in
+let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in
def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary,
[(win__dbzchk tGPR:$divisor)]>;
@@ -6131,10 +6175,10 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
ComplexDeprecationPredicate<"IT">;
-let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
+let mayLoad = 1, mayStore =1, hasSideEffects = 1, hasNoSchedulingInfo = 1 in
def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
NoItinerary,
- [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
+ [(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>;
//===----------------------------------
// Atomic cmpxchg for -O0
@@ -6174,4 +6218,5 @@ def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary,
let hasSideEffects = 1;
let Size = 0;
let AsmString = "@ COMPILER BARRIER";
+ let hasNoSchedulingInfo = 1;
}
diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td
index 3e7ae55c7fc8..4f67cd6e47cc 100644
--- a/lib/Target/ARM/ARMInstrMVE.td
+++ b/lib/Target/ARM/ARMInstrMVE.td
@@ -160,7 +160,8 @@ class TMemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
let RenderMethod = "addMemImmOffsetOperands";
}
-class taddrmode_imm7<int shift> : MemOperand {
+class taddrmode_imm7<int shift> : MemOperand,
+ ComplexPattern<i32, 2, "SelectTAddrModeImm7<"#shift#">", []> {
let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand<shift>;
// They are printed the same way as the T2 imm8 version
let PrintMethod = "printT2AddrModeImm8Operand<false>";
@@ -221,7 +222,9 @@ def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>;
def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>;
def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>;
-class t2am_imm7_offset<int shift> : MemOperand {
+class t2am_imm7_offset<int shift> : MemOperand,
+ ComplexPattern<i32, 1, "SelectT2AddrModeImm7Offset<"#shift#">",
+ [], [SDNPWantRoot]> {
// They are printed the same way as the imm8 version
let PrintMethod = "printT2AddrModeImm8OffsetOperand";
let ParserMatchClass =
@@ -371,6 +374,8 @@ class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4, list<dag> pattern=[]>
let Inst{7-6} = 0b00;
let Inst{5-4} = op5_4{1-0};
let Inst{3-0} = 0b1101;
+
+ let Unpredictable{8-6} = 0b111;
}
def MVE_SQRSHR : MVE_ScalarShiftSRegReg<"sqrshr", 0b10>;
@@ -403,18 +408,17 @@ class MVE_ScalarShiftDRegImm<string iname, bits<2> op5_4, bit op16,
let Inst{3-0} = 0b1111;
}
-class MVE_ScalarShiftDRegReg<string iname, bit op5, bit op16,
- list<dag> pattern=[]>
+class MVE_ScalarShiftDRegRegBase<string iname, dag iops, string asm,
+ bit op5, bit op16, list<dag> pattern=[]>
: MVE_ScalarShiftDoubleReg<
- iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm),
- "$RdaLo, $RdaHi, $Rm", "@earlyclobber $RdaHi,@earlyclobber $RdaLo,"
- "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
+ iname, iops, asm, "@earlyclobber $RdaHi,@earlyclobber $RdaLo,"
+ "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
pattern> {
bits<4> Rm;
let Inst{16} = op16;
let Inst{15-12} = Rm{3-0};
- let Inst{7-6} = 0b00;
+ let Inst{6} = 0b0;
let Inst{5} = op5;
let Inst{4} = 0b0;
let Inst{3-0} = 0b1101;
@@ -427,27 +431,44 @@ class MVE_ScalarShiftDRegReg<string iname, bit op5, bit op16,
let DecoderMethod = "DecodeMVEOverlappingLongShift";
}
-def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+class MVE_ScalarShiftDRegReg<string iname, bit op5, list<dag> pattern=[]>
+ : MVE_ScalarShiftDRegRegBase<
+ iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm),
+ "$RdaLo, $RdaHi, $Rm", op5, 0b0, pattern> {
+
+ let Inst{7} = 0b0;
+}
+
+class MVE_ScalarShiftDRegRegWithSat<string iname, bit op5, list<dag> pattern=[]>
+ : MVE_ScalarShiftDRegRegBase<
+ iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm, saturateop:$sat),
+ "$RdaLo, $RdaHi, $sat, $Rm", op5, 0b1, pattern> {
+ bit sat;
+
+ let Inst{7} = sat;
+}
+
+def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
(ARMasrl tGPREven:$RdaLo_src,
tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
(ARMasrl tGPREven:$RdaLo_src,
- tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
-def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
+def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
(ARMlsll tGPREven:$RdaLo_src,
tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
(ARMlsll tGPREven:$RdaLo_src,
- tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+ tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
(ARMlsrl tGPREven:$RdaLo_src,
- tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+ tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
-def MVE_SQRSHRL : MVE_ScalarShiftDRegReg<"sqrshrl", 0b1, 0b1>;
+def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>;
def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>;
def MVE_SRSHRL : MVE_ScalarShiftDRegImm<"srshrl", 0b10, 0b1>;
-def MVE_UQRSHLL : MVE_ScalarShiftDRegReg<"uqrshll", 0b0, 0b1>;
+def MVE_UQRSHLL : MVE_ScalarShiftDRegRegWithSat<"uqrshll", 0b0>;
def MVE_UQSHLL : MVE_ScalarShiftDRegImm<"uqshll", 0b00, 0b1>;
def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>;
@@ -531,6 +552,19 @@ defm MVE_VADDVu8 : MVE_VADDV_A<"u8", 0b1, 0b00>;
defm MVE_VADDVu16 : MVE_VADDV_A<"u16", 0b1, 0b01>;
defm MVE_VADDVu32 : MVE_VADDV_A<"u32", 0b1, 0b10>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 (vecreduce_add (v4i32 MQPR:$src))), (i32 (MVE_VADDVu32no_acc $src))>;
+ def : Pat<(i32 (vecreduce_add (v8i16 MQPR:$src))), (i32 (MVE_VADDVu16no_acc $src))>;
+ def : Pat<(i32 (vecreduce_add (v16i8 MQPR:$src))), (i32 (MVE_VADDVu8no_acc $src))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (v4i32 MQPR:$src1))), (i32 tGPR:$src2))),
+ (i32 (MVE_VADDVu32acc $src2, $src1))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (v8i16 MQPR:$src1))), (i32 tGPR:$src2))),
+ (i32 (MVE_VADDVu16acc $src2, $src1))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (v16i8 MQPR:$src1))), (i32 tGPR:$src2))),
+ (i32 (MVE_VADDVu8acc $src2, $src1))>;
+
+}
+
class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
bit A, bit U, list<dag> pattern=[]>
: MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname,
@@ -636,6 +670,35 @@ multiclass MVE_VMINMAXV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>;
defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
+ (i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;
+ def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),
+ (i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;
+ def : Pat<(i32 (vecreduce_smax (v4i32 MQPR:$src))),
+ (i32 (MVE_VMAXVs32 (t2MOVi (i32 -2147483648)), $src))>;
+ def : Pat<(i32 (vecreduce_umax (v16i8 MQPR:$src))),
+ (i32 (MVE_VMAXVu8 (t2MOVi (i32 0)), $src))>;
+ def : Pat<(i32 (vecreduce_umax (v8i16 MQPR:$src))),
+ (i32 (MVE_VMAXVu16 (t2MOVi (i32 0)), $src))>;
+ def : Pat<(i32 (vecreduce_umax (v4i32 MQPR:$src))),
+ (i32 (MVE_VMAXVu32 (t2MOVi (i32 0)), $src))>;
+
+ def : Pat<(i32 (vecreduce_smin (v16i8 MQPR:$src))),
+ (i32 (MVE_VMINVs8 (t2MOVi (i32 127)), $src))>;
+ def : Pat<(i32 (vecreduce_smin (v8i16 MQPR:$src))),
+ (i32 (MVE_VMINVs16 (t2MOVi16 (i32 32767)), $src))>;
+ def : Pat<(i32 (vecreduce_smin (v4i32 MQPR:$src))),
+ (i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>;
+ def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))),
+ (i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;
+ def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),
+ (i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;
+ def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
+ (i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;
+
+}
+
multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b0, bit_7>;
def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>;
@@ -667,57 +730,57 @@ class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
let Inst{0} = bit_0;
}
-multiclass MVE_VMLAMLSDAV_X<string iname, string suffix, dag iops, string cstr,
- bit sz, bit bit_28, bit A, bit bit_8, bit bit_0,
- list<dag> pattern=[]> {
- def _noexch : MVE_VMLAMLSDAV<iname, suffix, iops, cstr, sz,
- bit_28, A, 0b0, bit_8, bit_0, pattern>;
- def _exch : MVE_VMLAMLSDAV<iname # "x", suffix, iops, cstr, sz,
- bit_28, A, 0b1, bit_8, bit_0, pattern>;
+multiclass MVE_VMLAMLSDAV_A<string iname, string x, string suffix,
+ bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
+ list<dag> pattern=[]> {
+ def ""#x#suffix : MVE_VMLAMLSDAV<iname # x, suffix,
+ (ins MQPR:$Qn, MQPR:$Qm), "",
+ sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
+ def "a"#x#suffix : MVE_VMLAMLSDAV<iname # "a" # x, suffix,
+ (ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
+ "$RdaDest = $RdaSrc",
+ sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
+}
+
+multiclass MVE_VMLAMLSDAV_AX<string iname, string suffix, bit sz, bit bit_28,
+ bit bit_8, bit bit_0, list<dag> pattern=[]> {
+ defm "" : MVE_VMLAMLSDAV_A<iname, "", suffix, sz, bit_28,
+ 0b0, bit_8, bit_0, pattern>;
+ defm "" : MVE_VMLAMLSDAV_A<iname, "x", suffix, sz, bit_28,
+ 0b1, bit_8, bit_0, pattern>;
}
-multiclass MVE_VMLAMLSDAV_XA<string iname, string suffix, bit sz, bit bit_28,
- bit bit_8, bit bit_0, list<dag> pattern=[]> {
- defm _noacc : MVE_VMLAMLSDAV_X<iname, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
- sz, bit_28, 0b0, bit_8, bit_0, pattern>;
- defm _acc : MVE_VMLAMLSDAV_X<iname # "a", suffix,
- (ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
- "$RdaDest = $RdaSrc",
- sz, bit_28, 0b1, bit_8, bit_0, pattern>;
+multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit bit_8,
+ list<dag> pattern=[]> {
+ defm "" : MVE_VMLAMLSDAV_AX<"vmladav", "s"#suffix,
+ sz, 0b0, bit_8, 0b0, pattern>;
+ defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", "u"#suffix,
+ sz, 0b1, 0b0, bit_8, 0b0, pattern>;
}
-multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit U, bit bit_8,
- list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_XA<"vmladav", suffix, sz, U, bit_8, 0b0, pattern>;
+multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28,
+ list<dag> pattern=[]> {
+ defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", "s"#suffix,
+ sz, bit_28, 0b0, 0b1, pattern>;
}
-defm MVE_VMLADAVs16 : MVE_VMLADAV_multi<"s16", 0b0, 0b0, 0b0>;
-defm MVE_VMLADAVs32 : MVE_VMLADAV_multi<"s32", 0b1, 0b0, 0b0>;
-defm MVE_VMLADAVu16 : MVE_VMLADAV_multi<"u16", 0b0, 0b1, 0b0>;
-defm MVE_VMLADAVu32 : MVE_VMLADAV_multi<"u32", 0b1, 0b1, 0b0>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi< "8", 0b0, 0b1>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<"16", 0b0, 0b0>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<"32", 0b1, 0b0>;
-defm MVE_VMLADAVs8 : MVE_VMLADAV_multi<"s8", 0b0, 0b0, 0b1>;
-defm MVE_VMLADAVu8 : MVE_VMLADAV_multi<"u8", 0b0, 0b1, 0b1>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi< "8", 0b0, 0b1>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"16", 0b0, 0b0>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"32", 0b1, 0b0>;
// vmlav aliases vmladav
-foreach acc = ["_acc", "_noacc"] in {
+foreach acc = ["", "a"] in {
foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in {
- def : MVEInstAlias<!strconcat("vmlav", !if(!eq(acc, "_acc"), "a", ""),
- "${vp}.", suffix, "\t$RdaDest, $Qn, $Qm"),
- (!cast<Instruction>("MVE_VMLADAV"#suffix#acc#"_noexch")
+ def : MVEInstAlias<"vmlav"#acc#"${vp}."#suffix#"\t$RdaDest, $Qn, $Qm",
+ (!cast<Instruction>("MVE_VMLADAV"#acc#suffix)
tGPREven:$RdaDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
}
}
-multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28,
- list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_XA<"vmlsdav", suffix, sz, bit_28, 0b0, 0b1, pattern>;
-}
-
-defm MVE_VMLSDAVs8 : MVE_VMLSDAV_multi<"s8", 0, 0b1>;
-defm MVE_VMLSDAVs16 : MVE_VMLSDAV_multi<"s16", 0, 0b0>;
-defm MVE_VMLSDAVs32 : MVE_VMLSDAV_multi<"s32", 1, 0b0>;
-
// Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH
class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
@@ -742,82 +805,83 @@ class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
let Inst{0} = bit_0;
}
-multiclass MVE_VMLALDAVBase_X<string iname, string suffix, dag iops,
- string cstr, bit sz, bit bit_28, bit A,
- bit bit_8, bit bit_0, list<dag> pattern=[]> {
- def _noexch : MVE_VMLALDAVBase<iname, suffix, iops, cstr, sz,
- bit_28, A, 0b0, bit_8, bit_0, pattern>;
- def _exch : MVE_VMLALDAVBase<iname # "x", suffix, iops, cstr, sz,
- bit_28, A, 0b1, bit_8, bit_0, pattern>;
+multiclass MVE_VMLALDAVBase_A<string iname, string x, string suffix,
+ bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
+ list<dag> pattern=[]> {
+ def ""#x#suffix : MVE_VMLALDAVBase<
+ iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
+ sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
+ def "a"#x#suffix : MVE_VMLALDAVBase<
+ iname # "a" # x, suffix,
+ (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm),
+ "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc",
+ sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
}
-multiclass MVE_VMLALDAVBase_XA<string iname, string suffix, bit sz, bit bit_28,
- bit bit_8, bit bit_0, list<dag> pattern=[]> {
- defm _noacc : MVE_VMLALDAVBase_X<
- iname, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
- sz, bit_28, 0b0, bit_8, bit_0, pattern>;
- defm _acc : MVE_VMLALDAVBase_X<
- iname # "a", suffix, (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc,
- MQPR:$Qn, MQPR:$Qm),
- "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc",
- sz, bit_28, 0b1, bit_8, bit_0, pattern>;
+
+multiclass MVE_VMLALDAVBase_AX<string iname, string suffix, bit sz, bit bit_28,
+ bit bit_8, bit bit_0, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_A<iname, "", suffix, sz,
+ bit_28, 0b0, bit_8, bit_0, pattern>;
+ defm "" : MVE_VMLALDAVBase_A<iname, "x", suffix, sz,
+ bit_28, 0b1, bit_8, bit_0, pattern>;
}
-multiclass MVE_VRMLALDAVH_multi<string suffix, bit U, list<dag> pattern=[]> {
- defm "" : MVE_VMLALDAVBase_XA<
- "vrmlaldavh", suffix, 0b0, U, 0b1, 0b0, pattern>;
+multiclass MVE_VRMLALDAVH_multi<string suffix, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix,
+ 0b0, 0b0, 0b1, 0b0, pattern>;
+ defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix,
+ 0b0, 0b1, 0b0, 0b1, 0b0, pattern>;
}
-defm MVE_VRMLALDAVHs32 : MVE_VRMLALDAVH_multi<"s32", 0>;
-defm MVE_VRMLALDAVHu32 : MVE_VRMLALDAVH_multi<"u32", 1>;
+defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">;
// vrmlalvh aliases for vrmlaldavh
def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
- (MVE_VRMLALDAVHs32_noacc_noexch
+ (MVE_VRMLALDAVHs32
tGPREven:$RdaLo, tGPROdd:$RdaHi,
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
def : MVEInstAlias<"vrmlalvha${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
- (MVE_VRMLALDAVHs32_acc_noexch
+ (MVE_VRMLALDAVHas32
tGPREven:$RdaLo, tGPROdd:$RdaHi,
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
def : MVEInstAlias<"vrmlalvh${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
- (MVE_VRMLALDAVHu32_noacc_noexch
+ (MVE_VRMLALDAVHu32
tGPREven:$RdaLo, tGPROdd:$RdaHi,
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
- (MVE_VRMLALDAVHu32_acc_noexch
+ (MVE_VRMLALDAVHau32
tGPREven:$RdaLo, tGPROdd:$RdaHi,
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
-multiclass MVE_VMLALDAV_multi<string suffix, bit sz, bit U,
- list<dag> pattern=[]> {
- defm "" : MVE_VMLALDAVBase_XA<"vmlaldav", suffix, sz, U, 0b0, 0b0, pattern>;
+multiclass MVE_VMLALDAV_multi<string suffix, bit sz, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>;
+ defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix,
+ sz, 0b1, 0b0, 0b0, 0b0, pattern>;
}
-defm MVE_VMLALDAVs16 : MVE_VMLALDAV_multi<"s16", 0b0, 0b0>;
-defm MVE_VMLALDAVs32 : MVE_VMLALDAV_multi<"s32", 0b1, 0b0>;
-defm MVE_VMLALDAVu16 : MVE_VMLALDAV_multi<"u16", 0b0, 0b1>;
-defm MVE_VMLALDAVu32 : MVE_VMLALDAV_multi<"u32", 0b1, 0b1>;
+defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>;
+defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>;
// vmlalv aliases vmlaldav
-foreach acc = ["_acc", "_noacc"] in {
+foreach acc = ["", "a"] in {
foreach suffix = ["s16", "s32", "u16", "u32"] in {
- def : MVEInstAlias<!strconcat("vmlalv", !if(!eq(acc, "_acc"), "a", ""),
- "${vp}.", suffix, "\t$RdaLoDest, $RdaHiDest, $Qn, $Qm"),
- (!cast<Instruction>("MVE_VMLALDAV"#suffix#acc#"_noexch")
+ def : MVEInstAlias<"vmlalv" # acc # "${vp}." # suffix #
+ "\t$RdaLoDest, $RdaHiDest, $Qn, $Qm",
+ (!cast<Instruction>("MVE_VMLALDAV"#acc#suffix)
tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest,
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
}
}
multiclass MVE_VMLSLDAV_multi<string iname, string suffix, bit sz,
- bit bit_28, list<dag> pattern=[]> {
- defm "" : MVE_VMLALDAVBase_XA<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>;
+ bit bit_28, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_AX<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>;
}
-defm MVE_VMLSLDAVs16 : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>;
-defm MVE_VMLSLDAVs32 : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>;
-defm MVE_VRMLSLDAVHs32 : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>;
+defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>;
+defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>;
+defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>;
// end of mve_rDest instructions
@@ -967,11 +1031,12 @@ def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
let Inst{6} = 0b1;
let Inst{4} = 0b1;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
-class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7>
+class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7, string cstr="">
: MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname,
- suffix, "$Qd, $Qm", ""> {
+ suffix, "$Qd, $Qm", cstr> {
let Inst{28} = 0b1;
let Inst{25-23} = 0b111;
@@ -985,9 +1050,9 @@ class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7>
let Inst{0} = 0b0;
}
-def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00>;
-def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00>;
-def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00>;
+def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">;
+def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">;
+def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">;
def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>;
def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
@@ -995,6 +1060,13 @@ def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
let Predicates = [HasMVEInt] in {
+ def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))),
+ (v8i16 (MVE_VREV16_8 (v8i16 MQPR:$src)))>;
+ def : Pat<(v4i32 (bswap (v4i32 MQPR:$src))),
+ (v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>;
+}
+
+let Predicates = [HasMVEInt] in {
def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
(v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
@@ -1026,6 +1098,7 @@ def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
let Inst{12-6} = 0b0010111;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
let Predicates = [HasMVEInt] in {
@@ -1054,6 +1127,7 @@ class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
let Inst{6} = 0b1;
let Inst{4} = 0b1;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>;
@@ -1145,6 +1219,7 @@ class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
class MVE_VORR<string suffix, bits<4> cmode, ExpandImm imm_type>
: MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
let Inst{5} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VORRIZ0v4i32 : MVE_VORR<"i32", 0b0001, expzero00>;
@@ -1173,6 +1248,7 @@ def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
class MVE_VBIC<string suffix, bits<4> cmode, ExpandImm imm_type>
: MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
let Inst{5} = 0b1;
+ let validForTailPredication = 1;
}
def MVE_VBICIZ0v4i32 : MVE_VBIC<"i32", 0b0001, expzero00>;
@@ -1315,8 +1391,12 @@ let Predicates = [HasMVEInt] in {
def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane),
(MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>;
- def : Pat<(extractelt (v8f16 MQPR:$src), imm:$lane),
- (COPY_TO_REGCLASS (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane), HPR)>;
+ def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane),
+ (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>;
+ def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane),
+ (COPY_TO_REGCLASS
+ (VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))),
+ HPR)>;
def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
@@ -1408,6 +1488,7 @@ class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
let Inst{12-8} = 0b01000;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
class MVE_VADD<string suffix, bits<2> size, list<dag> pattern=[]>
@@ -1442,8 +1523,8 @@ let Predicates = [HasMVEInt] in {
}
class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
- bits<2> size, list<dag> pattern=[]>
- : MVE_int<iname, suffix, size, pattern> {
+ bits<2> size, ValueType vt>
+ : MVE_int<iname, suffix, size, []> {
let Inst{28} = U;
let Inst{25-23} = 0b110;
@@ -1453,26 +1534,49 @@ class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
let Inst{8} = 0b0;
let Inst{4} = 0b1;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
+
+ ValueType VT = vt;
}
-class MVE_VQADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
- : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, pattern>;
-class MVE_VQSUB<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
- : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, pattern>;
+class MVE_VQADD<string suffix, bit U, bits<2> size, ValueType VT>
+ : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, VT>;
+class MVE_VQSUB<string suffix, bit U, bits<2> size, ValueType VT>
+ : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, VT>;
-def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00>;
-def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01>;
-def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10>;
-def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00>;
-def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01>;
-def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10>;
+def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00, v16i8>;
+def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01, v8i16>;
+def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10, v4i32>;
+def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00, v16i8>;
+def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01, v8i16>;
+def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10, v4i32>;
+
+def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00, v16i8>;
+def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01, v8i16>;
+def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10, v4i32>;
+def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00, v16i8>;
+def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01, v8i16>;
+def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10, v4i32>;
+
+let Predicates = [HasMVEInt] in {
+ foreach instr = [MVE_VQADDu8, MVE_VQADDu16, MVE_VQADDu32] in
+ foreach VT = [instr.VT] in
+ def : Pat<(VT (uaddsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
+ (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
+ foreach instr = [MVE_VQADDs8, MVE_VQADDs16, MVE_VQADDs32] in
+ foreach VT = [instr.VT] in
+ def : Pat<(VT (saddsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
+ (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
+ foreach instr = [MVE_VQSUBu8, MVE_VQSUBu16, MVE_VQSUBu32] in
+ foreach VT = [instr.VT] in
+ def : Pat<(VT (usubsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
+ (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
+ foreach instr = [MVE_VQSUBs8, MVE_VQSUBs16, MVE_VQSUBs32] in
+ foreach VT = [instr.VT] in
+ def : Pat<(VT (ssubsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
+ (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
+}
-def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00>;
-def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01>;
-def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10>;
-def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00>;
-def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01>;
-def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10>;
class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
: MVE_int<"vabd", suffix, size, pattern> {
@@ -1483,6 +1587,7 @@ class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let Inst{12-8} = 0b00111;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>;
@@ -1501,6 +1606,7 @@ class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let Inst{12-8} = 0b00001;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VRHADDs8 : MVE_VRHADD<"s8", 0b0, 0b00>;
@@ -1522,6 +1628,7 @@ class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
let Inst{8} = 0b0;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
class MVE_VHADD<string suffix, bit U, bits<2> size,
@@ -1545,6 +1652,60 @@ def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>;
def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>;
def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (ARMvshrsImm
+ (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
+ (v16i8 (MVE_VHADDs8
+ (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
+ def : Pat<(v8i16 (ARMvshrsImm
+ (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
+ (v8i16 (MVE_VHADDs16
+ (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
+ def : Pat<(v4i32 (ARMvshrsImm
+ (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
+ (v4i32 (MVE_VHADDs32
+ (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
+
+ def : Pat<(v16i8 (ARMvshruImm
+ (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
+ (v16i8 (MVE_VHADDu8
+ (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
+ def : Pat<(v8i16 (ARMvshruImm
+ (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
+ (v8i16 (MVE_VHADDu16
+ (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
+ def : Pat<(v4i32 (ARMvshruImm
+ (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
+ (v4i32 (MVE_VHADDu32
+ (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
+
+ def : Pat<(v16i8 (ARMvshrsImm
+ (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
+ (v16i8 (MVE_VHSUBs8
+ (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
+ def : Pat<(v8i16 (ARMvshrsImm
+ (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
+ (v8i16 (MVE_VHSUBs16
+ (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
+ def : Pat<(v4i32 (ARMvshrsImm
+ (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
+ (v4i32 (MVE_VHSUBs32
+ (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
+
+ def : Pat<(v16i8 (ARMvshruImm
+ (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
+ (v16i8 (MVE_VHSUBu8
+ (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
+ def : Pat<(v8i16 (ARMvshruImm
+ (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
+ (v8i16 (MVE_VHSUBu16
+ (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
+ def : Pat<(v4i32 (ARMvshruImm
+ (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
+ (v4i32 (MVE_VHSUBu32
+ (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
+}
+
class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
"vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
@@ -1563,6 +1724,7 @@ class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
let Inst{6} = 0b0;
let Inst{5} = E;
let Inst{4-0} = 0b10000;
+ let validForTailPredication = 1;
}
def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>;
@@ -1625,6 +1787,7 @@ class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
let Inst{6} = 0b1;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>;
@@ -1635,6 +1798,15 @@ def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>;
def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))),
+ (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>;
+ def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))),
+ (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>;
+ def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))),
+ (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>;
+}
+
class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
list<dag> pattern=[]>
: MVEIntSingleSrc<iname, suffix, size, pattern> {
@@ -1648,6 +1820,7 @@ class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
let Inst{6} = 0b1;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>;
@@ -1689,6 +1862,7 @@ class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
let Inst{6} = 0b1;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>;
@@ -1720,6 +1894,7 @@ class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
let Inst{3-0} = imm{3-0};
let DecoderMethod = "DecodeMVEModImmInstruction";
+ let validForTailPredication = 1;
}
let isReMaterializable = 1 in {
@@ -2115,6 +2290,7 @@ class MVE_shift_by_vec<string iname, string suffix, bit U,
let Inst{4} = bit_4;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
@@ -2163,6 +2339,7 @@ class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
let Inst{4} = 0b1;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
@@ -2175,6 +2352,7 @@ class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
let Inst{21-16} = imm;
let Inst{10-9} = 0b10;
let Inst{8} = bit_8;
+ let validForTailPredication = 1;
}
def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> {
@@ -2427,6 +2605,7 @@ class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
let Inst{11-10} = 0b01;
let Inst{9-7} = op{2-0};
let Inst{4} = 0b0;
+ let validForTailPredication = 1;
}
@@ -2489,6 +2668,7 @@ class MVE_VMUL_fp<string suffix, bit size, list<dag> pattern=[]>
let Inst{12-8} = 0b01101;
let Inst{7} = Qn{3};
let Inst{4} = 0b1;
+ let validForTailPredication = 1;
}
def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>;
@@ -2556,8 +2736,38 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>;
-def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>;
+let Predicates = [HasMVEFloat, UseFusedMAC] in {
+ def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1),
+ (fmul (v8f16 MQPR:$src2),
+ (v8f16 MQPR:$src3)))),
+ (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>;
+ def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1),
+ (fmul (v4f32 MQPR:$src2),
+ (v4f32 MQPR:$src3)))),
+ (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>;
+
+ def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1),
+ (fmul (v8f16 MQPR:$src2),
+ (v8f16 MQPR:$src3)))),
+ (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>;
+ def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1),
+ (fmul (v4f32 MQPR:$src2),
+ (v4f32 MQPR:$src3)))),
+ (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>;
+}
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
+ (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>;
+ def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
+ (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>;
+}
+
+
+let validForTailPredication = 1 in {
+ def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>;
+ def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>;
+}
let Predicates = [HasMVEFloat] in {
def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
@@ -2566,8 +2776,11 @@ let Predicates = [HasMVEFloat] in {
(v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
}
-def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>;
-def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>;
+
+let validForTailPredication = 1 in {
+ def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>;
+ def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>;
+}
let Predicates = [HasMVEFloat] in {
def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
@@ -2576,10 +2789,10 @@ let Predicates = [HasMVEFloat] in {
(v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
}
-class MVE_VCADD<string suffix, bit size, list<dag> pattern=[]>
+class MVE_VCADD<string suffix, bit size, string cstr="", list<dag> pattern=[]>
: MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
bits<4> Qd;
bits<4> Qn;
bit rot;
@@ -2598,7 +2811,7 @@ class MVE_VCADD<string suffix, bit size, list<dag> pattern=[]>
}
def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>;
-def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1>;
+def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1, "@earlyclobber $Qd">;
class MVE_VABD_fp<string suffix, bit size>
: MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
@@ -2617,6 +2830,7 @@ class MVE_VABD_fp<string suffix, bit size>
let Inst{11-8} = 0b1101;
let Inst{7} = Qn{3};
let Inst{4} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>;
@@ -2643,6 +2857,7 @@ class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
let Inst{4} = 0b1;
let DecoderMethod = "DecodeMVEVCVTt1fp";
+ let validForTailPredication = 1;
}
class MVE_VCVT_imm_asmop<int Bits> : AsmOperandClass {
@@ -2693,6 +2908,7 @@ class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
let Inst{9-8} = rm;
let Inst{7} = op;
let Inst{4} = 0b0;
+ let validForTailPredication = 1;
}
multiclass MVE_VCVT_fp_int_anpm_multi<string suffix, bits<2> size, bit op,
@@ -2727,6 +2943,7 @@ class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
let Inst{12-9} = 0b0011;
let Inst{8-7} = op;
let Inst{4} = 0b0;
+ let validForTailPredication = 1;
}
// The unsuffixed VCVT for float->int implicitly rounds toward zero,
@@ -2776,6 +2993,7 @@ class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
let Inst{11-8} = 0b0111;
let Inst{7} = negate;
let Inst{4} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>;
@@ -2863,6 +3081,7 @@ class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20,
// decoder to emit an operand that isn't affected by any instruction
// bit.
let DecoderMethod = "DecodeMVEVCMP<false," # predtype.DecoderMethod # ">";
+ let validForTailPredication = 1;
}
class MVE_VCMPqqf<string suffix, bit size>
@@ -2927,6 +3146,7 @@ class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20,
let Constraints = "";
// Custom decoder method, for the same reason as MVE_VCMPqq
let DecoderMethod = "DecodeMVEVCMP<true," # predtype.DecoderMethod # ">";
+ let validForTailPredication = 1;
}
class MVE_VCMPqrf<string suffix, bit size>
@@ -2966,6 +3186,168 @@ def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>;
def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>;
def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>;
+multiclass unpred_vcmp_z<string suffix, int fc> {
+ def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>;
+ def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>;
+ def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>;
+
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
+}
+
+multiclass unpred_vcmp_r<string suffix, int fc> {
+ def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>;
+ def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>;
+ def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
+
+ def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>;
+ def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>;
+ def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>;
+
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>;
+
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
+}
+
+multiclass unpred_vcmpf_z<int fc> {
+ def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>;
+ def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
+
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))),
+ (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
+}
+
+multiclass unpred_vcmpf_r<int fc> {
+ def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))),
+ (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
+ def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))),
+ (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
+
+ def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>;
+ def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>;
+
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))),
+ (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))),
+ (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>;
+
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
+}
+
+let Predicates = [HasMVEInt] in {
+ defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>;
+ defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>;
+ defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>;
+ defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>;
+ defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>;
+ defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>;
+ defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>;
+ defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>;
+
+ defm MVE_VCEQ : unpred_vcmp_r<"i", 0>;
+ defm MVE_VCNE : unpred_vcmp_r<"i", 1>;
+ defm MVE_VCGE : unpred_vcmp_r<"s", 10>;
+ defm MVE_VCLT : unpred_vcmp_r<"s", 11>;
+ defm MVE_VCGT : unpred_vcmp_r<"s", 12>;
+ defm MVE_VCLE : unpred_vcmp_r<"s", 13>;
+ defm MVE_VCGTU : unpred_vcmp_r<"u", 8>;
+ defm MVE_VCGEU : unpred_vcmp_r<"u", 2>;
+}
+
+let Predicates = [HasMVEFloat] in {
+ defm MVE_VFCEQZ : unpred_vcmpf_z<0>;
+ defm MVE_VFCNEZ : unpred_vcmpf_z<1>;
+ defm MVE_VFCGEZ : unpred_vcmpf_z<10>;
+ defm MVE_VFCLTZ : unpred_vcmpf_z<11>;
+ defm MVE_VFCGTZ : unpred_vcmpf_z<12>;
+ defm MVE_VFCLEZ : unpred_vcmpf_z<13>;
+
+ defm MVE_VFCEQ : unpred_vcmpf_r<0>;
+ defm MVE_VFCNE : unpred_vcmpf_r<1>;
+ defm MVE_VFCGE : unpred_vcmpf_r<10>;
+ defm MVE_VFCLT : unpred_vcmpf_r<11>;
+ defm MVE_VFCGT : unpred_vcmpf_r<12>;
+ defm MVE_VFCLE : unpred_vcmpf_r<13>;
+}
+
+
+// Extra "worst case" and/or/xor partterns, going into and out of GRP
+multiclass two_predops<SDPatternOperator opnode, Instruction insn> {
+ def v16i1 : Pat<(v16i1 (opnode (v16i1 VCCR:$p1), (v16i1 VCCR:$p2))),
+ (v16i1 (COPY_TO_REGCLASS
+ (insn (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p1), rGPR)),
+ (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p2), rGPR))),
+ VCCR))>;
+ def v8i1 : Pat<(v8i1 (opnode (v8i1 VCCR:$p1), (v8i1 VCCR:$p2))),
+ (v8i1 (COPY_TO_REGCLASS
+ (insn (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p1), rGPR)),
+ (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p2), rGPR))),
+ VCCR))>;
+ def v4i1 : Pat<(v4i1 (opnode (v4i1 VCCR:$p1), (v4i1 VCCR:$p2))),
+ (v4i1 (COPY_TO_REGCLASS
+ (insn (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p1), rGPR)),
+ (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p2), rGPR))),
+ VCCR))>;
+}
+
+let Predicates = [HasMVEInt] in {
+ defm POR : two_predops<or, t2ORRrr>;
+ defm PAND : two_predops<and, t2ANDrr>;
+ defm PEOR : two_predops<xor, t2EORrr>;
+}
+
+// Occasionally we need to cast between a i32 and a boolean vector, for
+// example when moving between rGPR and VPR.P0 as part of predicate vector
+// shuffles. We also sometimes need to cast between different predicate
+// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.
+
+def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;
+
+let Predicates = [HasMVEInt] in {
+ foreach VT = [ v4i1, v8i1, v16i1 ] in {
+ def : Pat<(i32 (predicate_cast (VT VCCR:$src))),
+ (i32 (COPY_TO_REGCLASS (VT VCCR:$src), VCCR))>;
+ def : Pat<(VT (predicate_cast (i32 VCCR:$src))),
+ (VT (COPY_TO_REGCLASS (i32 VCCR:$src), VCCR))>;
+
+ foreach VT2 = [ v4i1, v8i1, v16i1 ] in
+ def : Pat<(VT (predicate_cast (VT2 VCCR:$src))),
+ (VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
+ }
+}
+
// end of MVE compares
// start of MVE_qDest_qSrc
@@ -2989,10 +3371,10 @@ class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops,
}
class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract,
- string suffix, bits<2> size, list<dag> pattern=[]>
+ string suffix, bits<2> size, string cstr="", list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_n, "$Qd = $Qd_src", pattern> {
+ vpred_n, "$Qd = $Qd_src"#cstr, pattern> {
bits<4> Qn;
let Inst{28} = subtract;
@@ -3009,7 +3391,7 @@ multiclass MVE_VQxDMLxDH_multi<string iname, bit exch,
bit round, bit subtract> {
def s8 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s8", 0b00>;
def s16 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s16", 0b01>;
- def s32 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s32", 0b10>;
+ def s32 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s32", 0b10, ",@earlyclobber $Qd">;
}
defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>;
@@ -3021,10 +3403,10 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>;
defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>;
defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>;
-class MVE_VCMUL<string iname, string suffix, bit size, list<dag> pattern=[]>
+class MVE_VCMUL<string iname, string suffix, bit size, string cstr="", list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
bits<4> Qn;
bits<2> rot;
@@ -3041,13 +3423,13 @@ class MVE_VCMUL<string iname, string suffix, bit size, list<dag> pattern=[]>
}
def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>;
-def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1>;
+def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1, "@earlyclobber $Qd">;
class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
- bit T, list<dag> pattern=[]>
+ bit T, string cstr, list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_r, "", pattern> {
+ vpred_r, cstr, pattern> {
bits<4> Qd;
bits<4> Qn;
bits<4> Qm;
@@ -3063,9 +3445,9 @@ class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
}
multiclass MVE_VMULL_multi<string iname, string suffix,
- bit bit_28, bits<2> bits_21_20> {
- def bh : MVE_VMULL<iname # "b", suffix, bit_28, bits_21_20, 0b0>;
- def th : MVE_VMULL<iname # "t", suffix, bit_28, bits_21_20, 0b1>;
+ bit bit_28, bits<2> bits_21_20, string cstr=""> {
+ def bh : MVE_VMULL<iname # "b", suffix, bit_28, bits_21_20, 0b0, cstr>;
+ def th : MVE_VMULL<iname # "t", suffix, bit_28, bits_21_20, 0b1, cstr>;
}
// For integer multiplies, bits 21:20 encode size, and bit 28 signedness.
@@ -3074,10 +3456,10 @@ multiclass MVE_VMULL_multi<string iname, string suffix,
defm MVE_VMULLs8 : MVE_VMULL_multi<"vmull", "s8", 0b0, 0b00>;
defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>;
-defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10>;
+defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10, "@earlyclobber $Qd">;
defm MVE_VMULLu8 : MVE_VMULL_multi<"vmull", "u8", 0b1, 0b00>;
defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>;
-defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10>;
+defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10, "@earlyclobber $Qd">;
defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>;
defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>;
@@ -3144,6 +3526,18 @@ defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>;
defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
+def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
+ (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
+ def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
+ (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
+ def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))),
+ (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
+ def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))),
+ (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
+}
+
class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
@@ -3166,11 +3560,10 @@ defm MVE_VCVTf16f32 : MVE_VCVT_ff_halves<"f16.f32", 0b0>;
defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>;
class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
- list<dag> pattern=[]>
+ string cstr="", list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, "",
- pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
bits<4> Qn;
bit rot;
@@ -3186,11 +3579,11 @@ class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
def MVE_VCADDi8 : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>;
def MVE_VCADDi16 : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>;
-def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1>;
+def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1, "@earlyclobber $Qd">;
def MVE_VHCADDs8 : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>;
def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>;
-def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0>;
+def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0, "@earlyclobber $Qd">;
class MVE_VADCSBC<string iname, bit I, bit subtract,
dag carryin, list<dag> pattern=[]>
@@ -3220,10 +3613,10 @@ def MVE_VSBC : MVE_VADCSBC<"vsbc", 0b0, 0b1, (ins cl_FPSCR_NZCV:$carryin)>;
def MVE_VSBCI : MVE_VADCSBC<"vsbci", 0b1, 0b1, (ins)>;
class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
- list<dag> pattern=[]>
+ string cstr="", list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_r, "", pattern> {
+ vpred_r, cstr, pattern> {
bits<4> Qn;
let Inst{28} = size;
@@ -3236,13 +3629,13 @@ class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
let Inst{0} = 0b1;
}
-multiclass MVE_VQDMULL_halves<string suffix, bit size> {
- def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0>;
- def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1>;
+multiclass MVE_VQDMULL_halves<string suffix, bit size, string cstr=""> {
+ def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0, cstr>;
+ def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1, cstr>;
}
defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>;
-defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1>;
+defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1, "@earlyclobber $Qd">;
// end of mve_qDest_qSrc
@@ -3267,9 +3660,9 @@ class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname,
let Inst{3-0} = Rm{3-0};
}
-class MVE_qDest_rSrc<string iname, string suffix, list<dag> pattern=[]>
+class MVE_qDest_rSrc<string iname, string suffix, string cstr="", list<dag> pattern=[]>
: MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm),
- NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, "",
+ NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr,
pattern>;
class MVE_qDestSrc_rSrc<string iname, string suffix, list<dag> pattern=[]>
@@ -3291,7 +3684,7 @@ class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]>
class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
bit bit_5, bit bit_12, bit bit_16,
bit bit_28, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, pattern> {
+ : MVE_qDest_rSrc<iname, suffix, "", pattern> {
let Inst{28} = bit_28;
let Inst{21-20} = size;
@@ -3299,6 +3692,7 @@ class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
let Inst{12} = bit_12;
let Inst{8} = 0b1;
let Inst{5} = bit_5;
+ let validForTailPredication = 1;
}
multiclass MVE_VADDSUB_qr_sizes<string iname, string suffix,
@@ -3320,9 +3714,27 @@ defm MVE_VSUB_qr_i : MVE_VADDSUB_qr_sizes<"vsub", "i", 0b0, 0b1, 0b1, 0b0>;
defm MVE_VQSUB_qr_s : MVE_VADDSUB_qr_sizes<"vqsub", "s", 0b1, 0b1, 0b0, 0b0>;
defm MVE_VQSUB_qr_u : MVE_VADDSUB_qr_sizes<"vqsub", "u", 0b1, 0b1, 0b0, 0b1>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
+ (v16i8 (MVE_VADD_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
+ def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
+ (v8i16 (MVE_VADD_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
+ def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
+ (v4i32 (MVE_VADD_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
+}
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
+ (v16i8 (MVE_VSUB_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
+ def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
+ (v8i16 (MVE_VSUB_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
+ def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
+ (v4i32 (MVE_VSUB_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
+}
+
class MVE_VQDMULL_qr<string iname, string suffix, bit size,
- bit T, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, pattern> {
+ bit T, string cstr="", list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, cstr, pattern> {
let Inst{28} = size;
let Inst{21-20} = 0b11;
@@ -3332,18 +3744,18 @@ class MVE_VQDMULL_qr<string iname, string suffix, bit size,
let Inst{5} = 0b1;
}
-multiclass MVE_VQDMULL_qr_halves<string suffix, bit size> {
- def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0>;
- def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1>;
+multiclass MVE_VQDMULL_qr_halves<string suffix, bit size, string cstr=""> {
+ def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0, cstr>;
+ def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1, cstr>;
}
defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>;
-defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1>;
+defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1, "@earlyclobber $Qd">;
class MVE_VxADDSUB_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, bit subtract,
list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, pattern> {
+ : MVE_qDest_rSrc<iname, suffix, "", pattern> {
let Inst{28} = bit_28;
let Inst{21-20} = bits_21_20;
@@ -3351,6 +3763,7 @@ class MVE_VxADDSUB_qr<string iname, string suffix,
let Inst{12} = subtract;
let Inst{8} = 0b1;
let Inst{5} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VHADD_qr_s8 : MVE_VxADDSUB_qr<"vhadd", "s8", 0b0, 0b00, 0b0>;
@@ -3388,6 +3801,7 @@ class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
let Inst{12-8} = 0b11110;
let Inst{7} = bit_7;
let Inst{6-4} = 0b110;
+ let validForTailPredication = 1;
}
multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
@@ -3421,7 +3835,7 @@ let Predicates = [HasMVEInt] in {
}
class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, pattern> {
+ : MVE_qDest_rSrc<iname, suffix, "", pattern> {
let Inst{28} = 0b1;
let Inst{21-20} = size;
@@ -3429,15 +3843,27 @@ class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
let Inst{12} = 0b1;
let Inst{8} = 0b0;
let Inst{5} = 0b1;
+ let validForTailPredication = 1;
}
def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>;
def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>;
def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 ( bitreverse (v16i8 MQPR:$val1))),
+ (v16i8 ( MVE_VBRSR8 (v16i8 MQPR:$val1), (t2MOVi (i32 8)) ))>;
+
+ def : Pat<(v4i32 ( bitreverse (v4i32 MQPR:$val1))),
+ (v4i32 ( MVE_VBRSR32 (v4i32 MQPR:$val1), (t2MOVi (i32 32)) ))>;
+
+ def : Pat<(v8i16 ( bitreverse (v8i16 MQPR:$val1))),
+ (v8i16 ( MVE_VBRSR16 (v8i16 MQPR:$val1), (t2MOVi (i32 16)) ))>;
+}
+
class MVE_VMUL_qr_int<string iname, string suffix,
bits<2> size, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, pattern> {
+ : MVE_qDest_rSrc<iname, suffix, "", pattern> {
let Inst{28} = 0b0;
let Inst{21-20} = size;
@@ -3445,15 +3871,25 @@ class MVE_VMUL_qr_int<string iname, string suffix,
let Inst{12} = 0b1;
let Inst{8} = 0b0;
let Inst{5} = 0b1;
+ let validForTailPredication = 1;
}
def MVE_VMUL_qr_i8 : MVE_VMUL_qr_int<"vmul", "i8", 0b00>;
def MVE_VMUL_qr_i16 : MVE_VMUL_qr_int<"vmul", "i16", 0b01>;
def MVE_VMUL_qr_i32 : MVE_VMUL_qr_int<"vmul", "i32", 0b10>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
+ (v16i8 (MVE_VMUL_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
+ def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
+ (v8i16 (MVE_VMUL_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
+ def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
+ (v4i32 (MVE_VMUL_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
+}
+
class MVE_VxxMUL_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, pattern> {
+ : MVE_qDest_rSrc<iname, suffix, "", pattern> {
let Inst{28} = bit_28;
let Inst{21-20} = bits_21_20;
@@ -3471,14 +3907,14 @@ def MVE_VQRDMULH_qr_s8 : MVE_VxxMUL_qr<"vqrdmulh", "s8", 0b1, 0b00>;
def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>;
def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>;
-let Predicates = [HasMVEFloat] in {
+let Predicates = [HasMVEFloat], validForTailPredication = 1 in {
def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>;
def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>;
}
class MVE_VFMAMLA_qr<string iname, string suffix,
- bit bit_28, bits<2> bits_21_20, bit S,
- list<dag> pattern=[]>
+ bit bit_28, bits<2> bits_21_20, bit S,
+ list<dag> pattern=[]>
: MVE_qDestSrc_rSrc<iname, suffix, pattern> {
let Inst{28} = bit_28;
@@ -3487,6 +3923,7 @@ class MVE_VFMAMLA_qr<string iname, string suffix,
let Inst{12} = S;
let Inst{8} = 0b0;
let Inst{5} = 0b0;
+ let validForTailPredication = 1;
}
def MVE_VMLA_qr_s8 : MVE_VFMAMLA_qr<"vmla", "s8", 0b0, 0b00, 0b0>;
@@ -3503,6 +3940,21 @@ def MVE_VMLAS_qr_u8 : MVE_VFMAMLA_qr<"vmlas", "u8", 0b1, 0b00, 0b1>;
def MVE_VMLAS_qr_u16 : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>;
def MVE_VMLAS_qr_u32 : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v4i32 (add (v4i32 MQPR:$src1),
+ (v4i32 (mul (v4i32 MQPR:$src2),
+ (v4i32 (ARMvdup (i32 rGPR:$x))))))),
+ (v4i32 (MVE_VMLA_qr_u32 $src1, $src2, $x))>;
+ def : Pat<(v8i16 (add (v8i16 MQPR:$src1),
+ (v8i16 (mul (v8i16 MQPR:$src2),
+ (v8i16 (ARMvdup (i32 rGPR:$x))))))),
+ (v8i16 (MVE_VMLA_qr_u16 $src1, $src2, $x))>;
+ def : Pat<(v16i8 (add (v16i8 MQPR:$src1),
+ (v16i8 (mul (v16i8 MQPR:$src2),
+ (v16i8 (ARMvdup (i32 rGPR:$x))))))),
+ (v16i8 (MVE_VMLA_qr_u8 $src1, $src2, $x))>;
+}
+
let Predicates = [HasMVEFloat] in {
def MVE_VFMA_qr_f16 : MVE_VFMAMLA_qr<"vfma", "f16", 0b1, 0b11, 0b0>;
def MVE_VFMA_qr_f32 : MVE_VFMAMLA_qr<"vfma", "f32", 0b0, 0b11, 0b0>;
@@ -3555,6 +4007,7 @@ class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
let Inst{7} = imm{1};
let Inst{6-1} = 0b110111;
let Inst{0} = imm{0};
+ let validForTailPredication = 1;
}
def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>;
@@ -3589,6 +4042,7 @@ class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
let Inst{6-4} = 0b110;
let Inst{3-1} = Rm{3-1};
let Inst{0} = imm{0};
+ let validForTailPredication = 1;
}
def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>;
@@ -3599,6 +4053,7 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>;
def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
+let hasSideEffects = 1 in
class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
"$Rn", vpred_n, "", pattern> {
@@ -3614,6 +4069,7 @@ class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]>
let Constraints = "";
let DecoderMethod = "DecodeMveVCTP";
+ let validForTailPredication = 1;
}
def MVE_VCTP8 : MVE_VCTP<"8", 0b00>;
@@ -3621,6 +4077,15 @@ def MVE_VCTP16 : MVE_VCTP<"16", 0b01>;
def MVE_VCTP32 : MVE_VCTP<"32", 0b10>;
def MVE_VCTP64 : MVE_VCTP<"64", 0b11>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(int_arm_vctp8 rGPR:$Rn),
+ (v16i1 (MVE_VCTP8 rGPR:$Rn))>;
+ def : Pat<(int_arm_vctp16 rGPR:$Rn),
+ (v8i1 (MVE_VCTP16 rGPR:$Rn))>;
+ def : Pat<(int_arm_vctp32 rGPR:$Rn),
+ (v4i1 (MVE_VCTP32 rGPR:$Rn))>;
+}
+
// end of mve_qDest_rSrc
// start of coproc mov
@@ -3863,6 +4328,7 @@ class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
let mayLoad = dir.load;
let mayStore = !eq(dir.load,0);
+ let validForTailPredication = 1;
}
// Contiguous load and store instructions. These come in two main
@@ -4165,7 +4631,8 @@ class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> patte
let Inst{7} = fc{0};
let Inst{4} = 0b0;
- let Defs = [VPR, P0];
+ let Defs = [VPR];
+ let validForTailPredication = 1;
}
class MVE_VPTt1<string suffix, bits<2> size, dag iops>
@@ -4177,11 +4644,12 @@ class MVE_VPTt1<string suffix, bits<2> size, dag iops>
let Inst{5} = Qm{3};
let Inst{3-1} = Qm{2-0};
let Inst{0} = fc{1};
+ let validForTailPredication = 1;
}
class MVE_VPTt1i<string suffix, bits<2> size>
: MVE_VPTt1<suffix, size,
- (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, MQPR:$Qm)> {
+ (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_i:$fc)> {
let Inst{12} = 0b0;
let Inst{0} = 0b0;
}
@@ -4192,7 +4660,7 @@ def MVE_VPTv16i8 : MVE_VPTt1i<"i8", 0b00>;
class MVE_VPTt1u<string suffix, bits<2> size>
: MVE_VPTt1<suffix, size,
- (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, MQPR:$Qm)> {
+ (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_u:$fc)> {
let Inst{12} = 0b0;
let Inst{0} = 0b1;
}
@@ -4203,7 +4671,7 @@ def MVE_VPTv16u8 : MVE_VPTt1u<"u8", 0b00>;
class MVE_VPTt1s<string suffix, bits<2> size>
: MVE_VPTt1<suffix, size,
- (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, MQPR:$Qm)> {
+ (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_s:$fc)> {
let Inst{12} = 0b1;
}
@@ -4225,7 +4693,7 @@ class MVE_VPTt2<string suffix, bits<2> size, dag iops>
class MVE_VPTt2i<string suffix, bits<2> size>
: MVE_VPTt2<suffix, size,
- (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+ (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_i:$fc)> {
let Inst{12} = 0b0;
let Inst{5} = 0b0;
}
@@ -4236,7 +4704,7 @@ def MVE_VPTv16i8r : MVE_VPTt2i<"i8", 0b00>;
class MVE_VPTt2u<string suffix, bits<2> size>
: MVE_VPTt2<suffix, size,
- (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+ (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_u:$fc)> {
let Inst{12} = 0b0;
let Inst{5} = 0b1;
}
@@ -4247,7 +4715,7 @@ def MVE_VPTv16u8r : MVE_VPTt2u<"u8", 0b00>;
class MVE_VPTt2s<string suffix, bits<2> size>
: MVE_VPTt2<suffix, size,
- (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+ (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_s:$fc)> {
let Inst{12} = 0b1;
}
@@ -4276,12 +4744,13 @@ class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=
let Inst{7} = fc{0};
let Inst{4} = 0b0;
- let Defs = [P0];
+ let Defs = [VPR];
let Predicates = [HasMVEFloat];
+ let validForTailPredication = 1;
}
class MVE_VPTft1<string suffix, bit size>
- : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, MQPR:$Qm),
+ : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_fp:$fc),
"$fc, $Qn, $Qm"> {
bits<3> fc;
bits<4> Qm;
@@ -4296,7 +4765,7 @@ def MVE_VPTv4f32 : MVE_VPTft1<"f32", 0b0>;
def MVE_VPTv8f16 : MVE_VPTft1<"f16", 0b1>;
class MVE_VPTft2<string suffix, bit size>
- : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, GPRwithZR:$Rm),
+ : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_fp:$fc),
"$fc, $Qn, $Rm"> {
bits<3> fc;
bits<4> Rm;
@@ -4322,7 +4791,8 @@ def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
let Unpredictable{7} = 0b1;
let Unpredictable{5} = 0b1;
- let Defs = [P0];
+ let Uses = [VPR];
+ let validForTailPredication = 1;
}
def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
@@ -4346,6 +4816,7 @@ def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
let Inst{4} = 0b0;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b1;
+ let validForTailPredication = 1;
}
foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32",
@@ -4353,19 +4824,113 @@ foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32",
def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
(MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
-def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary,
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
+ (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
+ (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
+ (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+
+ def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
+ (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
+ (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+
+ def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
+ (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
+ (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>;
+ def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
+ (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
+ (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
+ def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
+ (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
+ (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
+
+ def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
+ (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
+ (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
+ def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
+ (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
+ (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
+
+ // Pred <-> Int
+ def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
+ def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
+ def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))),
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
+
+ def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))),
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
+ def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))),
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
+ def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))),
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
+
+ def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))),
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
+ def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))),
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
+ def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
+
+ def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))),
+ (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, 1))>;
+ def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))),
+ (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, 1))>;
+ def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))),
+ (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, 1))>;
+}
+
+let Predicates = [HasMVEFloat] in {
+ // Pred <-> Float
+ // 112 is 1.0 in float
+ def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
+ // 2620 in 1.0 in half
+ def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
+ // 240 is -1.0 in float
+ def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
+ // 2748 is -1.0 in half
+ def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
+
+ def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
+ def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
+ def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
+ def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
+}
+
+def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary,
"vpnot", "", "", vpred_n, "", []> {
let Inst{31-0} = 0b11111110001100010000111101001101;
let Unpredictable{19-17} = 0b111;
let Unpredictable{12} = 0b1;
let Unpredictable{7} = 0b1;
let Unpredictable{5} = 0b1;
- let Defs = [P0];
- let Uses = [P0];
let Constraints = "";
+ let DecoderMethod = "DecodeMVEVPNOT";
}
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v4i1 (xor (v4i1 VCCR:$pred), (v4i1 (predicate_cast (i32 65535))))),
+ (v4i1 (MVE_VPNOT (v4i1 VCCR:$pred)))>;
+ def : Pat<(v8i1 (xor (v8i1 VCCR:$pred), (v8i1 (predicate_cast (i32 65535))))),
+ (v8i1 (MVE_VPNOT (v8i1 VCCR:$pred)))>;
+ def : Pat<(v16i1 (xor (v16i1 VCCR:$pred), (v16i1 (predicate_cast (i32 65535))))),
+ (v16i1 (MVE_VPNOT (v16i1 VCCR:$pred)))>;
+}
+
+
class MVE_loltp_start<dag iops, string asm, string ops, bits<2> size>
: t2LOL<(outs GPRlr:$LR), iops, asm, ops> {
bits<4> Rn;
@@ -4433,159 +4998,440 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
// Patterns
//===----------------------------------------------------------------------===//
-class MVE_unpred_vector_store_typed<ValueType Ty, Instruction RegImmInst,
+class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
+ (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
+class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred),
+ (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred)>;
+
+multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind,
+ int shift> {
+ def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
+}
+
+class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag LoadKind, int shift>
+ : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
+ (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
+class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag LoadKind, int shift>
+ : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))),
+ (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred))>;
+
+multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
+ int shift> {
+ def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
+}
+
+class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
PatFrag StoreKind, int shift>
- : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
- (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
+ : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr),
+ (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>;
-multiclass MVE_unpred_vector_store<Instruction RegImmInst, PatFrag StoreKind,
+multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
int shift> {
- def : MVE_unpred_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
- def : MVE_unpred_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
- def : MVE_unpred_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
- def : MVE_unpred_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
- def : MVE_unpred_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
- def : MVE_unpred_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
- def : MVE_unpred_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
-}
-
-class MVE_unpred_vector_load_typed<ValueType Ty, Instruction RegImmInst,
- PatFrag LoadKind, int shift>
- : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
- (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
-
-multiclass MVE_unpred_vector_load<Instruction RegImmInst, PatFrag LoadKind,
- int shift> {
- def : MVE_unpred_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
- def : MVE_unpred_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
- def : MVE_unpred_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
- def : MVE_unpred_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
- def : MVE_unpred_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
- def : MVE_unpred_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
- def : MVE_unpred_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
-}
+ def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>;
+}
+
+def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
+ (pre_store node:$val, node:$ptr, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
+ (post_store node:$val, node:$ptr, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
+ (pre_store node:$val, node:$ptr, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 2;
+}]>;
+def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
+ (post_store node:$val, node:$ptr, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 2;
+}]>;
+
+
+def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ return Ld->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+}]>;
+def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
+}]>;
+def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ EVT ScalarVT = Ld->getMemoryVT().getScalarType();
+ return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
+}]>;
+def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ EVT ScalarVT = Ld->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2;
+}]>;
+def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+}]>;
+def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
+}]>;
+def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ EVT ScalarVT = Ld->getMemoryVT().getScalarType();
+ return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
+}]>;
+def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ EVT ScalarVT = Ld->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4;
+}]>;
+
+def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (maskedstore8 node:$val, node:$ptr, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+}]>;
+def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, node:$pred), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+
+def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (maskedstore16 node:$val, node:$ptr, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+}]>;
+def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, node:$pred), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
+}]>;
let Predicates = [HasMVEInt, IsLE] in {
- defm : MVE_unpred_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>;
- defm : MVE_unpred_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>;
- defm : MVE_unpred_vector_store<MVE_VSTRWU32, alignedstore32, 2>;
+ // Stores
+ defm : MVE_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>;
+ defm : MVE_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>;
+ defm : MVE_vector_store<MVE_VSTRWU32, alignedstore32, 2>;
- defm : MVE_unpred_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
- defm : MVE_unpred_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
- defm : MVE_unpred_vector_load<MVE_VLDRWU32, alignedload32, 2>;
+ // Loads
+ defm : MVE_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
+ defm : MVE_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
+ defm : MVE_vector_load<MVE_VLDRWU32, alignedload32, 2>;
- def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)),
- (v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
- def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)),
- (v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
- def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)),
- (v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
+ // Pre/post inc stores
+ defm : MVE_vector_offset_store<MVE_VSTRBU8_pre, pre_store, 0>;
+ defm : MVE_vector_offset_store<MVE_VSTRBU8_post, post_store, 0>;
+ defm : MVE_vector_offset_store<MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
+ defm : MVE_vector_offset_store<MVE_VSTRHU16_post, aligned16_post_store, 1>;
+ defm : MVE_vector_offset_store<MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
+ defm : MVE_vector_offset_store<MVE_VSTRWU32_post, aligned32_post_store, 2>;
}
let Predicates = [HasMVEInt, IsBE] in {
- def : MVE_unpred_vector_store_typed<v16i8, MVE_VSTRBU8, store, 0>;
- def : MVE_unpred_vector_store_typed<v8i16, MVE_VSTRHU16, alignedstore16, 1>;
- def : MVE_unpred_vector_store_typed<v8f16, MVE_VSTRHU16, alignedstore16, 1>;
- def : MVE_unpred_vector_store_typed<v4i32, MVE_VSTRWU32, alignedstore32, 2>;
- def : MVE_unpred_vector_store_typed<v4f32, MVE_VSTRWU32, alignedstore32, 2>;
-
- def : MVE_unpred_vector_load_typed<v16i8, MVE_VLDRBU8, load, 0>;
- def : MVE_unpred_vector_load_typed<v8i16, MVE_VLDRHU16, alignedload16, 1>;
- def : MVE_unpred_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>;
- def : MVE_unpred_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>;
- def : MVE_unpred_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
+ // Aligned Stores
+ def : MVE_vector_store_typed<v16i8, MVE_VSTRBU8, store, 0>;
+ def : MVE_vector_store_typed<v8i16, MVE_VSTRHU16, alignedstore16, 1>;
+ def : MVE_vector_store_typed<v8f16, MVE_VSTRHU16, alignedstore16, 1>;
+ def : MVE_vector_store_typed<v4i32, MVE_VSTRWU32, alignedstore32, 2>;
+ def : MVE_vector_store_typed<v4f32, MVE_VSTRWU32, alignedstore32, 2>;
+
+ // Aligned Loads
+ def : MVE_vector_load_typed<v16i8, MVE_VLDRBU8, load, 0>;
+ def : MVE_vector_load_typed<v8i16, MVE_VLDRHU16, alignedload16, 1>;
+ def : MVE_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>;
+ def : MVE_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>;
+ def : MVE_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
+
+ // Other unaligned loads/stores need to go though a VREV
+ def : Pat<(v2f64 (load t2addrmode_imm7<0>:$addr)),
+ (v2f64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(v2i64 (load t2addrmode_imm7<0>:$addr)),
+ (v2i64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(v4i32 (load t2addrmode_imm7<0>:$addr)),
+ (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(v4f32 (load t2addrmode_imm7<0>:$addr)),
+ (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(v8i16 (load t2addrmode_imm7<0>:$addr)),
+ (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(v8f16 (load t2addrmode_imm7<0>:$addr)),
+ (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(store (v2f64 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(store (v2i64 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(store (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(store (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(store (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+
+ // Pre/Post inc stores
+ def : MVE_vector_offset_store_typed<v16i8, MVE_VSTRBU8_pre, pre_store, 0>;
+ def : MVE_vector_offset_store_typed<v16i8, MVE_VSTRBU8_post, post_store, 0>;
+ def : MVE_vector_offset_store_typed<v8i16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
+ def : MVE_vector_offset_store_typed<v8i16, MVE_VSTRHU16_post, aligned16_post_store, 1>;
+ def : MVE_vector_offset_store_typed<v8f16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
+ def : MVE_vector_offset_store_typed<v8f16, MVE_VSTRHU16_post, aligned16_post_store, 1>;
+ def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
+ def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
+ def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
+ def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
}
+let Predicates = [HasMVEInt] in {
+ // Aligned masked store, shared between LE and BE
+ def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, maskedstore8, 0>;
+ def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, maskedstore16, 1>;
+ def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, maskedstore16, 1>;
+ def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, maskedstore32, 2>;
+ def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, maskedstore32, 2>;
+ // Truncating stores
+ def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
+ (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
+ def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
+ (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
+ def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred),
+ (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>;
+ // Aligned masked loads
+ def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload8, 0>;
+ def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, alignedmaskedload16, 1>;
+ def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, alignedmaskedload16, 1>;
+ def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, alignedmaskedload32, 2>;
+ def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, alignedmaskedload32, 2>;
+ // Extending masked loads.
+ def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v8i16 NEONimmAllZerosV))),
+ (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v8i16 NEONimmAllZerosV))),
+ (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v8i16 NEONimmAllZerosV))),
+ (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
+}
// Widening/Narrowing Loads/Stores
+let MinAlignment = 2 in {
+ def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorevi16 node:$val, node:$ptr)>;
+ def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (post_truncstvi16 node:$val, node:$base, node:$offset)>;
+ def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncstvi16 node:$val, node:$base, node:$offset)>;
+}
+
let Predicates = [HasMVEInt] in {
- def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<1>:$addr),
- (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
- def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
- (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
- def : Pat<(truncstorevi16 (v4i32 MQPR:$val), t2addrmode_imm7<2>:$addr),
- (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<2>:$addr)>;
+ def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr),
+ (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>;
+ def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr),
+ (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>;
+ def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr),
+ (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>;
+
+ def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
+ (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
+ def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
+ (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
+ def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr),
+ (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>;
+
+ def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
+ (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
+ def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
+ (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
+ def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr),
+ (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>;
+}
+
+
+let MinAlignment = 2 in {
+ def extloadvi16_align2 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>;
+ def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>;
+ def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>;
}
multiclass MVEExtLoad<string DestLanes, string DestElemBits,
string SrcElemBits, string SrcElemType,
- Operand am> {
+ string Align, Operand am> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("extloadvi" # SrcElemBits) am:$addr)),
+ (!cast<PatFrag>("extloadvi" # SrcElemBits # Align) am:$addr)),
(!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
am:$addr)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("zextloadvi" # SrcElemBits) am:$addr)),
+ (!cast<PatFrag>("zextloadvi" # SrcElemBits # Align) am:$addr)),
(!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
am:$addr)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("sextloadvi" # SrcElemBits) am:$addr)),
+ (!cast<PatFrag>("sextloadvi" # SrcElemBits # Align) am:$addr)),
(!cast<Instruction>("MVE_VLDR" # SrcElemType # "S" # DestElemBits)
am:$addr)>;
}
let Predicates = [HasMVEInt] in {
- defm : MVEExtLoad<"4", "32", "8", "B", t2addrmode_imm7<1>>;
- defm : MVEExtLoad<"8", "16", "8", "B", t2addrmode_imm7<1>>;
- defm : MVEExtLoad<"4", "32", "16", "H", t2addrmode_imm7<2>>;
+ defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>;
+ defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>;
+ defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>;
}
// Bit convert patterns
let Predicates = [HasMVEInt] in {
- def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
- def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v2i64 MQPR:$src))), (v2f64 MQPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v2f64 MQPR:$src))), (v2i64 MQPR:$src)>;
- def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v4f32 MQPR:$src))), (v4i32 MQPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v4i32 MQPR:$src))), (v4f32 MQPR:$src)>;
- def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>;
- def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v8f16 MQPR:$src))), (v8i16 MQPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v8i16 MQPR:$src))), (v8f16 MQPR:$src)>;
}
let Predicates = [IsLE,HasMVEInt] in {
- def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
-
- def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
- def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
- def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
- def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
- def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
-
- def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
-
- def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
- def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
- def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
- def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
- def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
-
- def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
- def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
- def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
- def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
- def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
-
- def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
- def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
- def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
- def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
- def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
-
- def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
- def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
- def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
- def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
- def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
- def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 MQPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 MQPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 MQPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 MQPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 MQPR:$src)>;
+
+ def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 MQPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 MQPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 MQPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 MQPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 MQPR:$src)>;
+
+ def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 MQPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 MQPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 MQPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 MQPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 MQPR:$src)>;
+
+ def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 MQPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 MQPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 MQPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 MQPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 MQPR:$src)>;
+
+ def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 MQPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 MQPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 MQPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 MQPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 MQPR:$src)>;
+
+ def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 MQPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 MQPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 MQPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 MQPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 MQPR:$src)>;
+
+ def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 MQPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 MQPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 MQPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 MQPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 MQPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 MQPR:$src)>;
+}
+
+let Predicates = [IsBE,HasMVEInt] in {
+ def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>;
+ def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>;
+ def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>;
+ def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>;
+ def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 (MVE_VREV64_8 MQPR:$src))>;
+
+ def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>;
+ def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>;
+ def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>;
+ def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>;
+ def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 (MVE_VREV64_8 MQPR:$src))>;
+
+ def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>;
+ def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>;
+ def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>;
+ def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>;
+ def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 (MVE_VREV32_8 MQPR:$src))>;
+
+ def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>;
+ def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>;
+ def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>;
+ def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>;
+ def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 (MVE_VREV32_8 MQPR:$src))>;
+
+ def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>;
+ def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>;
+ def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>;
+ def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>;
+ def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 (MVE_VREV16_8 MQPR:$src))>;
+
+ def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>;
+ def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>;
+ def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>;
+ def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>;
+ def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 (MVE_VREV16_8 MQPR:$src))>;
+
+ def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>;
+ def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>;
+ def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>;
+ def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>;
+ def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>;
+ def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>;
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 806681df102c..60ca92e58041 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -15,22 +15,22 @@
// NEON-specific Operands.
//===----------------------------------------------------------------------===//
def nModImm : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
+ let PrintMethod = "printVMOVModImmOperand";
}
def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
def nImmSplatI8 : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
+ let PrintMethod = "printVMOVModImmOperand";
let ParserMatchClass = nImmSplatI8AsmOperand;
}
def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
def nImmSplatI16 : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
+ let PrintMethod = "printVMOVModImmOperand";
let ParserMatchClass = nImmSplatI16AsmOperand;
}
def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
def nImmSplatI32 : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
+ let PrintMethod = "printVMOVModImmOperand";
let ParserMatchClass = nImmSplatI32AsmOperand;
}
def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
@@ -43,7 +43,7 @@ def nImmSplatNotI32 : Operand<i32> {
}
def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
def nImmVMOVI32 : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
+ let PrintMethod = "printVMOVModImmOperand";
let ParserMatchClass = nImmVMOVI32AsmOperand;
}
@@ -62,18 +62,18 @@ class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
}
class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
+ let PrintMethod = "printVMOVModImmOperand";
let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
}
class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
+ let PrintMethod = "printVMOVModImmOperand";
let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
}
def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
def nImmVMOVI32Neg : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
+ let PrintMethod = "printVMOVModImmOperand";
let ParserMatchClass = nImmVMOVI32NegAsmOperand;
}
def nImmVMOVF32 : Operand<i32> {
@@ -82,7 +82,7 @@ def nImmVMOVF32 : Operand<i32> {
}
def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
def nImmSplatI64 : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
+ let PrintMethod = "printVMOVModImmOperand";
let ParserMatchClass = nImmSplatI64AsmOperand;
}
@@ -478,20 +478,8 @@ def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
-def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
-
-def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
-def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
-def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
-def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
-def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
-def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
-def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
-def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
-def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
-def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
-def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
+def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>;
// Types for vector shift by immediates. The "SHX" version is for long and
// narrow operations where the source and destination vectors have different
@@ -559,14 +547,14 @@ def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
- uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 32 && EltVal == 0);
}]>;
def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
- uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 8 && EltVal == 0xff);
}]>;
@@ -3326,30 +3314,30 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// source operand element sizes of 8, 16 and 32 bits:
multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4, string opc, string Dt,
- string asm, SDNode OpNode> {
+ string asm, int fc> {
// 64-bit vector types.
def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "8"), asm, "",
- [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
+ [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), (i32 fc))))]>;
def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "16"), asm, "",
- [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
+ [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), (i32 fc))))]>;
def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "32"), asm, "",
- [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
+ [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), (i32 fc))))]>;
def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, "f32", asm, "",
- [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
+ [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), (i32 fc))))]> {
let Inst{10} = 1; // overwrite F = 1
}
def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, "f16", asm, "",
- [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
+ [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), (i32 fc))))]>,
Requires<[HasNEON,HasFullFP16]> {
let Inst{10} = 1; // overwrite F = 1
}
@@ -3358,30 +3346,83 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "8"), asm, "",
- [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
+ [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), (i32 fc))))]>;
def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "16"), asm, "",
- [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
+ [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), (i32 fc))))]>;
def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "32"), asm, "",
- [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
+ [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), (i32 fc))))]>;
def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, "f32", asm, "",
- [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
+ [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), (i32 fc))))]> {
let Inst{10} = 1; // overwrite F = 1
}
def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, "f16", asm, "",
- [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
+ [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), (i32 fc))))]>,
Requires<[HasNEON,HasFullFP16]> {
let Inst{10} = 1; // overwrite F = 1
}
}
+// Neon 3-register comparisons.
+class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, int fc, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), (i32 fc))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+
+class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, int fc, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), (i32 fc))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+
+multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ int fc, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, fc, Commutable>;
+ def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, fc, Commutable>;
+ def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, fc, Commutable>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, fc, Commutable>;
+ def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, fc, Commutable>;
+ def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, fc, Commutable>;
+}
+
// Neon 2-register vector intrinsics,
// element sizes of 8, 16 and 32 bits:
@@ -5026,67 +5067,67 @@ def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
// Vector Comparisons.
// VCEQ : Vector Compare Equal
-defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
-def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
- NEONvceq, 1>;
-def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
- NEONvceq, 1>;
-def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
- NEONvceq, 1>,
+defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vceq", "i", 0, 1>;
+def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
+ 0, 1>;
+def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
+ 0, 1>;
+def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
+ 0, 1>,
Requires<[HasNEON, HasFullFP16]>;
-def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
- NEONvceq, 1>,
+def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
+ 0, 1>,
Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
- "$Vd, $Vm, #0", NEONvceqz>;
+ "$Vd, $Vm, #0", 0>;
// VCGE : Vector Compare Greater Than or Equal
-defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
-defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
-def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
- NEONvcge, 0>;
-def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
- NEONvcge, 0>;
-def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
- NEONvcge, 0>,
+defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "s", 10, 0>;
+defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "u", 2, 0>;
+def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
+ 10, 0>;
+def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
+ 10, 0>;
+def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
+ 10, 0>,
Requires<[HasNEON, HasFullFP16]>;
-def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
- NEONvcge, 0>,
+def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
+ 10, 0>,
Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
- "$Vd, $Vm, #0", NEONvcgez>;
+ "$Vd, $Vm, #0", 10>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
- "$Vd, $Vm, #0", NEONvclez>;
+ "$Vd, $Vm, #0", 13>;
}
// VCGT : Vector Compare Greater Than
-defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
-defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
-def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
- NEONvcgt, 0>;
-def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
- NEONvcgt, 0>;
-def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
- NEONvcgt, 0>,
+defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "s", 12, 0>;
+defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "u", 8, 0>;
+def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
+ 12, 0>;
+def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
+ 12, 0>;
+def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
+ 12, 0>,
Requires<[HasNEON, HasFullFP16]>;
-def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
- NEONvcgt, 0>,
+def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
+ 12, 0>,
Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
- "$Vd, $Vm, #0", NEONvcgtz>;
+ "$Vd, $Vm, #0", 12>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
- "$Vd, $Vm, #0", NEONvcltz>;
+ "$Vd, $Vm, #0", 11>;
}
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index cfeb13c6acb6..18bcbda44580 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -565,6 +565,13 @@ let isCall = 1,
4, IIC_Br,
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>;
+
+ // Also used for Thumb2
+ // push lr before the call
+ def tBL_PUSHLR : tPseudoInst<(outs), (ins GPRlr:$ra, pred:$p, thumb_bl_target:$func),
+ 4, IIC_Br,
+ []>,
+ Requires<[IsThumb]>, Sched<[WriteBr]>;
}
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
@@ -592,6 +599,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
[(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
Sched<[WriteBrTbl]> {
let Size = 2;
+ let isNotDuplicable = 1;
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
}
@@ -1362,6 +1370,12 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
[(set tGPR:$Rd, CPSR, (ARMsubc 0, tGPR:$Rn))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
+
+ def tLSLSri : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, imm0_31:$imm5),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rd, CPSR, (ARMlsls tGPR:$Rn, imm0_31:$imm5))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
}
@@ -1465,7 +1479,7 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
// and make use of the same compressed jump table format as Thumb-2.
let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1,
- isIndirectBranch = 1 in {
+ isIndirectBranch = 1, isNotDuplicable = 1 in {
def tTBB_JT : tPseudoInst<(outs),
(ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
IIC_Br, []>, Sched<[WriteBr]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 7cbfaba7a8eb..25a45b39fa0c 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -45,7 +45,8 @@ def mve_shift_imm : AsmOperandClass {
let RenderMethod = "addImmOperands";
let DiagnosticString = "operand must be an immediate in the range [1,32]";
}
-def long_shift : Operand<i32> {
+def long_shift : Operand<i32>,
+ ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> {
let ParserMatchClass = mve_shift_imm;
let DecoderMethod = "DecodeLongShiftOperand";
}
@@ -2394,6 +2395,23 @@ def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn),
def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)),
(t2QDSUB rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(saddsat rGPR:$Rm, rGPR:$Rn),
+ (t2QADD rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ssubsat rGPR:$Rm, rGPR:$Rn),
+ (t2QSUB rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(saddsat(saddsat rGPR:$Rm, rGPR:$Rm), rGPR:$Rn),
+ (t2QDADD rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)),
+ (t2QDSUB rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn),
+ (t2QADD8 rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn),
+ (t2QSUB8 rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn),
+ (t2QADD16 rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn),
+ (t2QSUB16 rGPR:$Rm, rGPR:$Rn)>;
+
// Signed/Unsigned add/subtract
def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>;
@@ -4085,7 +4103,7 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
// Pseudo isntruction that combines movs + predicated rsbmi
// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in {
+let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
NoItinerary, []>, Requires<[IsThumb2]>;
}
@@ -4175,15 +4193,15 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag>
}
let DecoderNamespace = "Thumb2CoProc" in {
-defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
-defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
+defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
+defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
-defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
-defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
+defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
+defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
}
@@ -4368,8 +4386,8 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
(outs),
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
- [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>,
+ [(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
+ timm:$CRm, timm:$opc2)]>,
ComplexDeprecationPredicate<"MCR">;
def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
@@ -4377,8 +4395,8 @@ def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
- [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]> {
+ [(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
+ timm:$CRm, timm:$opc2)]> {
let Predicates = [IsThumb2, PreV8];
}
def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm",
@@ -4402,24 +4420,24 @@ def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
-def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
- (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+def : T2v6Pat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2),
+ (t2MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
-def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
- (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+def : T2v6Pat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2),
+ (t2MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
/* from ARM core register to coprocessor */
def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs),
(ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
c_imm:$CRm),
- [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
- imm:$CRm)]>;
+ [(int_arm_mcrr timm:$cop, timm:$opc1, GPR:$Rt, GPR:$Rt2,
+ timm:$CRm)]>;
def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs),
(ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
c_imm:$CRm),
- [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
- GPR:$Rt2, imm:$CRm)]> {
+ [(int_arm_mcrr2 timm:$cop, timm:$opc1, GPR:$Rt,
+ GPR:$Rt2, timm:$CRm)]> {
let Predicates = [IsThumb2, PreV8];
}
@@ -4439,8 +4457,8 @@ def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2),
def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
"cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
- [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
- imm:$CRm, imm:$opc2)]> {
+ [(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
+ timm:$CRm, timm:$opc2)]> {
let Inst{27-24} = 0b1110;
bits<4> opc1;
@@ -4465,8 +4483,8 @@ def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
"cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
- [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
- imm:$CRm, imm:$opc2)]> {
+ [(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
+ timm:$CRm, timm:$opc2)]> {
let Inst{27-24} = 0b1110;
bits<4> opc1;
@@ -5087,6 +5105,7 @@ def t2BF_LabelPseudo
: t2PseudoInst<(outs ), (ins pclabel:$cp), 0, NoItinerary, []> {
let isTerminator = 1;
let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+ let hasNoSchedulingInfo = 1;
}
def t2BFi : t2BF<(ins bflabel_u4:$b_label, bflabel_s16:$label, pred:$p),
@@ -5217,11 +5236,13 @@ def t2LoopDec :
t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
4, IIC_Br, []>, Sched<[WriteBr]>;
-let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in {
+let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in {
+// Set WhileLoopStart and LoopEnd to occupy 8 bytes because they may
+// get converted into t2CMP and t2Bcc.
def t2WhileLoopStart :
t2PseudoInst<(outs),
(ins rGPR:$elts, brtarget:$target),
- 4, IIC_Br, []>,
+ 8, IIC_Br, []>,
Sched<[WriteBr]>;
def t2LoopEnd :
@@ -5233,7 +5254,7 @@ def t2LoopEnd :
} // end isNotDuplicable
class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
- : V8_1MI<(outs rGPR:$Rd), (ins GPRwithZR:$Rn, GPRwithZRnosp:$Rm, pred_noal:$fcond),
+ : V8_1MI<(outs rGPR:$Rd), (ins GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rm, pred_noal:$fcond),
AddrModeNone, NoItinerary, iname, "$Rd, $Rn, $Rm, $fcond", "", pattern> {
bits<4> Rd;
bits<4> Rm;
@@ -5255,6 +5276,25 @@ def t2CSINC : CS<"csinc", 0b1001>;
def t2CSINV : CS<"csinv", 0b1010>;
def t2CSNEG : CS<"csneg", 0b1011>;
+let Predicates = [HasV8_1MMainline] in {
+ def : T2Pat<(ARMcsinc GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm),
+ (t2CSINC GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>;
+ def : T2Pat<(ARMcsinv GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm),
+ (t2CSINV GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>;
+ def : T2Pat<(ARMcsneg GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm),
+ (t2CSNEG GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>;
+
+ multiclass ModifiedV8_1CSEL<Instruction Insn, dag modvalue> {
+ def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, cmovpred:$imm),
+ (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>;
+ def : T2Pat<(ARMcmov GPRwithZR:$tval, modvalue, cmovpred:$imm),
+ (Insn GPRwithZR:$tval, GPRwithZR:$fval,
+ (i32 (inv_cond_XFORM imm:$imm)))>;
+ }
+ defm : ModifiedV8_1CSEL<t2CSINC, (add rGPR:$fval, 1)>;
+ defm : ModifiedV8_1CSEL<t2CSINV, (xor rGPR:$fval, -1)>;
+ defm : ModifiedV8_1CSEL<t2CSNEG, (sub 0, rGPR:$fval)>;
+}
// CS aliases.
let Predicates = [HasV8_1MMainline] in {
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index a0dd25de07ee..fdd961bfbb2f 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
@@ -19,7 +19,7 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
-def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
@@ -324,7 +324,7 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
// However, there is no UAL syntax for them, so we keep them around for
// (dis)assembly only.
multiclass vfp_ldstx_mult<string asm, bit L_bit> {
- let Predicates = [HasFPRegs] in {
+ let Predicates = [HasFPRegs], hasNoSchedulingInfo = 1 in {
// Unknown precision
def XIA :
AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -548,12 +548,12 @@ let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
- [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>;
+ [/* For disassembly only; pattern left blank */]>;
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
- [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> {
+ [/* For disassembly only; pattern left blank */]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -562,17 +562,17 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
- [(arm_cmpfp HPR:$Sd, HPR:$Sm, (i32 1))]>;
+ [/* For disassembly only; pattern left blank */]>;
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
- [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
- [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -581,7 +581,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm",
- [(arm_cmpfp HPR:$Sd, HPR:$Sm, (i32 0))]>;
+ [(arm_cmpfp HPR:$Sd, HPR:$Sm)]>;
} // Defs = [FPSCR_NZCV]
//===----------------------------------------------------------------------===//
@@ -611,7 +611,7 @@ let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
- [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> {
+ [/* For disassembly only; pattern left blank */]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -619,7 +619,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
- [(arm_cmpfp0 SPR:$Sd, (i32 1))]> {
+ [/* For disassembly only; pattern left blank */]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -631,7 +631,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0",
- [(arm_cmpfp0 HPR:$Sd, (i32 1))]> {
+ [/* For disassembly only; pattern left blank */]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -639,7 +639,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
- [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -647,7 +647,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
- [(arm_cmpfp0 SPR:$Sd, (i32 0))]> {
+ [(arm_cmpfp0 SPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -659,7 +659,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0",
- [(arm_cmpfp0 HPR:$Sd, (i32 0))]> {
+ [(arm_cmpfp0 HPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -1732,7 +1732,8 @@ def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1740,7 +1741,8 @@ def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1748,7 +1750,8 @@ def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -2297,6 +2300,8 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
let Inst{6-5} = 0b00;
let Inst{4} = 1;
let Inst{3-0} = 0b0000;
+ let Unpredictable{7-5} = 0b111;
+ let Unpredictable{3-0} = 0b1111;
}
let DecoderMethod = "DecodeForVMRSandVMSR" in {
@@ -2370,63 +2375,65 @@ class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
// Instruction operand.
- bits<4> src;
-
- // Encode instruction operand.
- let Inst{15-12} = src;
+ bits<4> Rt;
let Inst{27-20} = 0b11101110;
let Inst{19-16} = opc19_16;
+ let Inst{15-12} = Rt;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
+ let Inst{6-5} = 0b00;
let Inst{4} = 1;
+ let Inst{3-0} = 0b0000;
let Predicates = [HasVFP2];
+ let Unpredictable{7-5} = 0b111;
+ let Unpredictable{3-0} = 0b1111;
}
let DecoderMethod = "DecodeForVMRSandVMSR" in {
let Defs = [FPSCR] in {
let Predicates = [HasFPRegs] in
// Application level GPR -> FPSCR
- def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src),
- "vmsr", "\tfpscr, $src",
- [(int_arm_set_fpscr GPRnopc:$src)]>;
+ def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$Rt),
+ "vmsr", "\tfpscr, $Rt",
+ [(int_arm_set_fpscr GPRnopc:$Rt)]>;
// System level GPR -> FPEXC
- def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPRnopc:$src),
- "vmsr", "\tfpexc, $src", []>;
+ def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPRnopc:$Rt),
+ "vmsr", "\tfpexc, $Rt", []>;
// System level GPR -> FPSID
- def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPRnopc:$src),
- "vmsr", "\tfpsid, $src", []>;
- def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPRnopc:$src),
- "vmsr", "\tfpinst, $src", []>;
- def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$src),
- "vmsr", "\tfpinst2, $src", []>;
+ def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPRnopc:$Rt),
+ "vmsr", "\tfpsid, $Rt", []>;
+ def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPRnopc:$Rt),
+ "vmsr", "\tfpinst, $Rt", []>;
+ def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$Rt),
+ "vmsr", "\tfpinst2, $Rt", []>;
}
let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
// System level GPR -> FPSCR with context saving for security extensions
- def VMSR_FPCXTNS : MovToVFP<0b1110 /* fpcxtns */, (outs), (ins GPR:$src),
- "vmsr", "\tfpcxtns, $src", []>;
+ def VMSR_FPCXTNS : MovToVFP<0b1110 /* fpcxtns */, (outs), (ins GPR:$Rt),
+ "vmsr", "\tfpcxtns, $Rt", []>;
}
let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
// System level GPR -> FPSCR with context saving for security extensions
- def VMSR_FPCXTS : MovToVFP<0b1111 /* fpcxts */, (outs), (ins GPR:$src),
- "vmsr", "\tfpcxts, $src", []>;
+ def VMSR_FPCXTS : MovToVFP<0b1111 /* fpcxts */, (outs), (ins GPR:$Rt),
+ "vmsr", "\tfpcxts, $Rt", []>;
}
let Predicates = [HasV8_1MMainline, HasFPRegs] in {
// System level GPR -> FPSCR_NZCVQC
def VMSR_FPSCR_NZCVQC
: MovToVFP<0b0010 /* fpscr_nzcvqc */,
- (outs cl_FPSCR_NZCV:$fpscr_out), (ins GPR:$src),
- "vmsr", "\tfpscr_nzcvqc, $src", []>;
+ (outs cl_FPSCR_NZCV:$fpscr_out), (ins GPR:$Rt),
+ "vmsr", "\tfpscr_nzcvqc, $Rt", []>;
}
let Predicates = [HasV8_1MMainline, HasMVEInt] in {
// System level GPR -> VPR/P0
let Defs = [VPR] in
- def VMSR_VPR : MovToVFP<0b1100 /* vpr */, (outs), (ins GPR:$src),
- "vmsr", "\tvpr, $src", []>;
+ def VMSR_VPR : MovToVFP<0b1100 /* vpr */, (outs), (ins GPR:$Rt),
+ "vmsr", "\tvpr, $Rt", []>;
- def VMSR_P0 : MovToVFP<0b1101 /* p0 */, (outs VCCR:$cond), (ins GPR:$src),
- "vmsr", "\tp0, $src", []>;
+ def VMSR_P0 : MovToVFP<0b1101 /* p0 */, (outs VCCR:$cond), (ins GPR:$Rt),
+ "vmsr", "\tp0, $Rt", []>;
}
}
@@ -2614,7 +2621,8 @@ def VSCCLRMD : VFPXI<(outs), (ins pred:$p, fp_dreglist_with_vpr:$regs, variable_
let Inst{21-16} = 0b011111;
let Inst{15-12} = regs{11-8};
let Inst{11-8} = 0b1011;
- let Inst{7-0} = regs{7-0};
+ let Inst{7-1} = regs{7-1};
+ let Inst{0} = 0;
let DecoderMethod = "DecodeVSCCLRM";
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 4485a474a6df..8e5e474c0f59 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -34,7 +34,7 @@ public:
ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI,
const ARMRegisterBankInfo &RBI);
- bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
+ bool select(MachineInstr &I) override;
static const char *getName() { return DEBUG_TYPE; }
private:
@@ -210,8 +210,8 @@ static const TargetRegisterClass *guessRegClass(unsigned Reg,
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
- unsigned DstReg = I.getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ Register DstReg = I.getOperand(0).getReg();
+ if (Register::isPhysicalRegister(DstReg))
return true;
const TargetRegisterClass *RC = guessRegClass(DstReg, MRI, TRI, RBI);
@@ -236,17 +236,17 @@ static bool selectMergeValues(MachineInstrBuilder &MIB,
// We only support G_MERGE_VALUES as a way to stick together two scalar GPRs
// into one DPR.
- unsigned VReg0 = MIB->getOperand(0).getReg();
+ Register VReg0 = MIB->getOperand(0).getReg();
(void)VReg0;
assert(MRI.getType(VReg0).getSizeInBits() == 64 &&
RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID &&
"Unsupported operand for G_MERGE_VALUES");
- unsigned VReg1 = MIB->getOperand(1).getReg();
+ Register VReg1 = MIB->getOperand(1).getReg();
(void)VReg1;
assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
"Unsupported operand for G_MERGE_VALUES");
- unsigned VReg2 = MIB->getOperand(2).getReg();
+ Register VReg2 = MIB->getOperand(2).getReg();
(void)VReg2;
assert(MRI.getType(VReg2).getSizeInBits() == 32 &&
RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID &&
@@ -268,17 +268,17 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB,
// We only support G_UNMERGE_VALUES as a way to break up one DPR into two
// GPRs.
- unsigned VReg0 = MIB->getOperand(0).getReg();
+ Register VReg0 = MIB->getOperand(0).getReg();
(void)VReg0;
assert(MRI.getType(VReg0).getSizeInBits() == 32 &&
RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID &&
"Unsupported operand for G_UNMERGE_VALUES");
- unsigned VReg1 = MIB->getOperand(1).getReg();
+ Register VReg1 = MIB->getOperand(1).getReg();
(void)VReg1;
assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
"Unsupported operand for G_UNMERGE_VALUES");
- unsigned VReg2 = MIB->getOperand(2).getReg();
+ Register VReg2 = MIB->getOperand(2).getReg();
(void)VReg2;
assert(MRI.getType(VReg2).getSizeInBits() == 64 &&
RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID &&
@@ -833,8 +833,7 @@ void ARMInstructionSelector::renderVFPF64Imm(
NewInstBuilder.addImm(FPImmEncoding);
}
-bool ARMInstructionSelector::select(MachineInstr &I,
- CodeGenCoverage &CoverageInfo) const {
+bool ARMInstructionSelector::select(MachineInstr &I) {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -851,7 +850,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
using namespace TargetOpcode;
- if (selectImpl(I, CoverageInfo))
+ if (selectImpl(I, *CoverageInfo))
return true;
MachineInstrBuilder MIB{MF, I};
@@ -874,10 +873,10 @@ bool ARMInstructionSelector::select(MachineInstr &I,
MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp());
if (isSExt) {
- unsigned SExtResult = I.getOperand(0).getReg();
+ Register SExtResult = I.getOperand(0).getReg();
// Use a new virtual register for the result of the AND
- unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ Register AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass);
I.getOperand(0).setReg(AndResult);
auto InsertBefore = std::next(I.getIterator());
@@ -928,7 +927,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
assert(MRI.getType(SrcReg).getSizeInBits() == 64 && "Unsupported size");
assert(MRI.getType(DstReg).getSizeInBits() <= 32 && "Unsupported size");
- unsigned IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ Register IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass);
auto InsertBefore = std::next(I.getIterator());
auto MovI =
BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::VMOVRRD))
@@ -1039,7 +1038,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
case G_FCMP: {
assert(STI.hasVFP2Base() && "Can't select fcmp without VFP");
- unsigned OpReg = I.getOperand(2).getReg();
+ Register OpReg = I.getOperand(2).getReg();
unsigned Size = MRI.getType(OpReg).getSizeInBits();
if (Size == 64 && !STI.hasFP64()) {
@@ -1077,12 +1076,12 @@ bool ARMInstructionSelector::select(MachineInstr &I,
case G_STORE:
case G_LOAD: {
const auto &MemOp = **I.memoperands_begin();
- if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
+ if (MemOp.isAtomic()) {
LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
return false;
}
- unsigned Reg = I.getOperand(0).getReg();
+ Register Reg = I.getOperand(0).getReg();
unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID();
LLT ValTy = MRI.getType(Reg);
@@ -1097,9 +1096,9 @@ bool ARMInstructionSelector::select(MachineInstr &I,
if (ValSize == 1 && NewOpc == Opcodes.STORE8) {
// Before storing a 1-bit value, make sure to clear out any unneeded bits.
- unsigned OriginalValue = I.getOperand(0).getReg();
+ Register OriginalValue = I.getOperand(0).getReg();
- unsigned ValueToStore = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ Register ValueToStore = MRI.createVirtualRegister(&ARM::GPRRegClass);
I.getOperand(0).setReg(ValueToStore);
auto InsertBefore = I.getIterator();
@@ -1159,7 +1158,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
case G_PHI: {
I.setDesc(TII.get(PHI));
- unsigned DstReg = I.getOperand(0).getReg();
+ Register DstReg = I.getOperand(0).getReg();
const TargetRegisterClass *RC = guessRegClass(DstReg, MRI, TRI, RBI);
if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
break;
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 73a57b297ad6..81414e6d76fe 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -84,6 +84,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
.legalForCartesianProduct({s8, s16, s32}, {s1, s8, s16});
+ getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+
getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32})
.minScalar(0, s32);
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 90a1ce238c3f..4a193fed04a3 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -509,7 +509,7 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
// If storing the base register, it needs to be reset first.
- unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
+ Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
MO.setImm(Offset);
@@ -859,7 +859,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
// Determine list of registers and list of implicit super-register defs.
for (const MachineInstr *MI : Cand.Instrs) {
const MachineOperand &MO = getLoadStoreRegOp(*MI);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
bool IsKill = MO.isKill();
if (IsKill)
KilledRegs.insert(Reg);
@@ -874,7 +874,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
if (!MO.isReg() || !MO.isDef() || MO.isDead())
continue;
assert(MO.isImplicit());
- unsigned DefReg = MO.getReg();
+ Register DefReg = MO.getReg();
if (is_contained(ImpDefs, DefReg))
continue;
@@ -893,7 +893,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
iterator InsertBefore = std::next(iterator(LatestMI));
MachineBasicBlock &MBB = *LatestMI->getParent();
unsigned Offset = getMemoryOpOffset(*First);
- unsigned Base = getLoadStoreBaseOp(*First).getReg();
+ Register Base = getLoadStoreBaseOp(*First).getReg();
bool BaseKill = LatestMI->killsRegister(Base);
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
@@ -1005,7 +1005,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
const MachineInstr *MI = MemOps[SIndex].MI;
int Offset = MemOps[SIndex].Offset;
const MachineOperand &PMO = getLoadStoreRegOp(*MI);
- unsigned PReg = PMO.getReg();
+ Register PReg = PMO.getReg();
unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
: TRI->getEncodingValue(PReg);
unsigned Latest = SIndex;
@@ -1052,7 +1052,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
if (NewOffset != Offset + (int)Size)
break;
const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == ARM::SP || Reg == ARM::PC)
break;
if (Count == Limit)
@@ -1261,7 +1261,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
if (isThumb1) return false;
const MachineOperand &BaseOP = MI->getOperand(0);
- unsigned Base = BaseOP.getReg();
+ Register Base = BaseOP.getReg();
bool BaseKill = BaseOP.isKill();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
@@ -1387,7 +1387,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// FIXME: Use LDM/STM with single register instead.
if (isThumb1) return false;
- unsigned Base = getLoadStoreBaseOp(*MI).getReg();
+ Register Base = getLoadStoreBaseOp(*MI).getReg();
bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
unsigned Opcode = MI->getOpcode();
DebugLoc DL = MI->getDebugLoc();
@@ -1512,7 +1512,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
// Behaviour for writeback is undefined if base register is the same as one
// of the others.
const MachineOperand &BaseOp = MI.getOperand(2);
- unsigned Base = BaseOp.getReg();
+ Register Base = BaseOp.getReg();
const MachineOperand &Reg0Op = MI.getOperand(0);
const MachineOperand &Reg1Op = MI.getOperand(1);
if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
@@ -1655,9 +1655,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
return false;
const MachineOperand &BaseOp = MI->getOperand(2);
- unsigned BaseReg = BaseOp.getReg();
- unsigned EvenReg = MI->getOperand(0).getReg();
- unsigned OddReg = MI->getOperand(1).getReg();
+ Register BaseReg = BaseOp.getReg();
+ Register EvenReg = MI->getOperand(0).getReg();
+ Register OddReg = MI->getOperand(1).getReg();
unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
@@ -1783,8 +1783,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (isMemoryOp(*MBBI)) {
unsigned Opcode = MBBI->getOpcode();
const MachineOperand &MO = MBBI->getOperand(0);
- unsigned Reg = MO.getReg();
- unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
+ Register Reg = MO.getReg();
+ Register Base = getLoadStoreBaseOp(*MBBI).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
int Offset = getMemoryOpOffset(*MBBI);
@@ -2121,7 +2121,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
MachineOperand &MO = I->getOperand(j);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (MO.isDef() && TRI->regsOverlap(Reg, Base))
return false;
if (Reg != Base && !MemRegs.count(Reg))
@@ -2415,7 +2415,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
int Opc = MI.getOpcode();
bool isLd = isLoadSingle(Opc);
- unsigned Base = MI.getOperand(1).getReg();
+ Register Base = MI.getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);
bool StopHere = false;
auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
diff --git a/lib/Target/ARM/ARMLowOverheadLoops.cpp b/lib/Target/ARM/ARMLowOverheadLoops.cpp
index cedf3bd3c74e..e1c5a9c3e223 100644
--- a/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -11,8 +11,7 @@
/// The expectation is that the loop contains three pseudo instructions:
/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
/// form should be in the preheader, whereas the while form should be in the
-/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
-/// pre-preheader?
+/// preheaders only predecessor.
/// - t2LoopDec - placed within in the loop body.
/// - t2LoopEnd - the loop latch terminator.
///
@@ -35,6 +34,7 @@ using namespace llvm;
namespace {
class ARMLowOverheadLoops : public MachineFunctionPass {
+ MachineFunction *MF = nullptr;
const ARMBaseInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
@@ -52,17 +52,6 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
- bool ProcessLoop(MachineLoop *ML);
-
- void RevertWhile(MachineInstr *MI) const;
-
- void RevertLoopDec(MachineInstr *MI) const;
-
- void RevertLoopEnd(MachineInstr *MI) const;
-
- void Expand(MachineLoop *ML, MachineInstr *Start,
- MachineInstr *Dec, MachineInstr *End, bool Revert);
-
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@@ -71,36 +60,156 @@ namespace {
StringRef getPassName() const override {
return ARM_LOW_OVERHEAD_LOOPS_NAME;
}
+
+ private:
+ bool ProcessLoop(MachineLoop *ML);
+
+ MachineInstr * IsSafeToDefineLR(MachineInstr *MI);
+
+ bool RevertNonLoops();
+
+ void RevertWhile(MachineInstr *MI) const;
+
+ bool RevertLoopDec(MachineInstr *MI, bool AllowFlags = false) const;
+
+ void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;
+
+ void Expand(MachineLoop *ML, MachineInstr *Start,
+ MachineInstr *InsertPt, MachineInstr *Dec,
+ MachineInstr *End, bool Revert);
+
};
}
-
+
char ARMLowOverheadLoops::ID = 0;
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
false, false)
-bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
- if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
+bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
+ const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
+ if (!ST.hasLOB())
return false;
- LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
+ MF = &mf;
+ LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n");
auto &MLI = getAnalysis<MachineLoopInfo>();
- MRI = &MF.getRegInfo();
- TII = static_cast<const ARMBaseInstrInfo*>(
- MF.getSubtarget().getInstrInfo());
- BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
+ MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
+ MRI = &MF->getRegInfo();
+ TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo());
+ BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF));
BBUtils->computeAllBlockSizes();
- BBUtils->adjustBBOffsetsAfter(&MF.front());
+ BBUtils->adjustBBOffsetsAfter(&MF->front());
bool Changed = false;
for (auto ML : MLI) {
if (!ML->getParentLoop())
Changed |= ProcessLoop(ML);
}
+ Changed |= RevertNonLoops();
return Changed;
}
+static bool IsLoopStart(MachineInstr &MI) {
+ return MI.getOpcode() == ARM::t2DoLoopStart ||
+ MI.getOpcode() == ARM::t2WhileLoopStart;
+}
+
+template<typename T>
+static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned Reg) {
+ for(auto &MI : make_range(T(Begin), End)) {
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
+ continue;
+ return &MI;
+ }
+ }
+ return nullptr;
+}
+
+static MachineInstr* SearchForUse(MachineInstr *Begin,
+ MachineBasicBlock::iterator End,
+ unsigned Reg) {
+ for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) {
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ return &MI;
+ }
+ }
+ return nullptr;
+}
+
+// Is it safe to define LR with DLS/WLS?
+// LR can defined if it is the operand to start, because it's the same value,
+// or if it's going to be equivalent to the operand to Start.
+MachineInstr *ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr *Start) {
+
+ auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) {
+ return MI->getOpcode() == ARM::tMOVr &&
+ MI->getOperand(0).getReg() == ARM::LR &&
+ MI->getOperand(1).getReg() == Reg &&
+ MI->getOperand(2).getImm() == ARMCC::AL;
+ };
+
+ MachineBasicBlock *MBB = Start->getParent();
+ unsigned CountReg = Start->getOperand(0).getReg();
+ // Walk forward and backward in the block to find the closest instructions
+ // that define LR. Then also filter them out if they're not a mov lr.
+ MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR);
+ if (PredLRDef && !IsMoveLR(PredLRDef, CountReg))
+ PredLRDef = nullptr;
+
+ MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR);
+ if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg))
+ SuccLRDef = nullptr;
+
+ // We've either found one, two or none mov lr instructions... Now figure out
+ // if they are performing the equilvant mov that the Start instruction will.
+ // Do this by scanning forward and backward to see if there's a def of the
+ // register holding the count value. If we find a suitable def, return it as
+ // the insert point. Later, if InsertPt != Start, then we can remove the
+ // redundant instruction.
+ if (SuccLRDef) {
+ MachineBasicBlock::iterator End(SuccLRDef);
+ if (!SearchForDef(Start, End, CountReg)) {
+ return SuccLRDef;
+ } else
+ SuccLRDef = nullptr;
+ }
+ if (PredLRDef) {
+ MachineBasicBlock::reverse_iterator End(PredLRDef);
+ if (!SearchForDef(Start, End, CountReg)) {
+ return PredLRDef;
+ } else
+ PredLRDef = nullptr;
+ }
+
+ // We can define LR because LR already contains the same value.
+ if (Start->getOperand(0).getReg() == ARM::LR)
+ return Start;
+
+ // We've found no suitable LR def and Start doesn't use LR directly. Can we
+ // just define LR anyway?
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+
+ // Not if we've haven't found a suitable mov and LR is live out.
+ if (LiveRegs.contains(ARM::LR))
+ return nullptr;
+
+ // If LR is not live out, we can insert the instruction if nothing else
+ // uses LR after it.
+ if (!SearchForUse(Start, MBB->end(), ARM::LR))
+ return Start;
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for"
+ << " LR\n");
+ return nullptr;
+}
+
bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
bool Changed = false;
@@ -111,15 +220,10 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
- auto IsLoopStart = [](MachineInstr &MI) {
- return MI.getOpcode() == ARM::t2DoLoopStart ||
- MI.getOpcode() == ARM::t2WhileLoopStart;
- };
-
// Search the given block for a loop start instruction. If one isn't found,
// and there's only one predecessor block, search that one too.
std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
- [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
+ [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
for (auto &MI : *MBB) {
if (IsLoopStart(MI))
return &MI;
@@ -165,41 +269,62 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
Dec = &MI;
else if (MI.getOpcode() == ARM::t2LoopEnd)
End = &MI;
- else if (MI.getDesc().isCall())
+ else if (IsLoopStart(MI))
+ Start = &MI;
+ else if (MI.getDesc().isCall()) {
// TODO: Though the call will require LE to execute again, does this
// mean we should revert? Always executing LE hopefully should be
// faster than performing a sub,cmp,br or even subs,br.
Revert = true;
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n");
+ }
- if (!Dec)
+ if (!Dec || End)
continue;
- // If we find that we load/store LR between LoopDec and LoopEnd, expect
- // that the decremented value has been spilled to the stack. Because
- // this value isn't actually going to be produced until the latch, by LE,
- // we would need to generate a real sub. The value is also likely to be
- // reloaded for use of LoopEnd - in which in case we'd need to perform
- // an add because it gets negated again by LE! The other option is to
- // then generate the other form of LE which doesn't perform the sub.
- if (MI.mayLoad() || MI.mayStore())
- Revert =
- MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
+ // If we find that LR has been written or read between LoopDec and
+ // LoopEnd, expect that the decremented value is being used else where.
+ // Because this value isn't actually going to be produced until the
+ // latch, by LE, we would need to generate a real sub. The value is also
+ // likely to be copied/reloaded for use of LoopEnd - in which in case
+ // we'd need to perform an add because it gets subtracted again by LE!
+ // The other option is to then generate the other form of LE which doesn't
+ // perform the sub.
+ for (auto &MO : MI.operands()) {
+ if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() &&
+ MO.getReg() == ARM::LR) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI);
+ Revert = true;
+ break;
+ }
+ }
}
if (Dec && End && Revert)
break;
}
+ LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
+ if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
+ if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;);
+
if (!Start && !Dec && !End) {
LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
return Changed;
- } if (!(Start && Dec && End)) {
- report_fatal_error("Failed to find all loop components");
+ } else if (!(Start && Dec && End)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
+ return false;
}
- if (!End->getOperand(1).isMBB() ||
- End->getOperand(1).getMBB() != ML->getHeader())
- report_fatal_error("Expected LoopEnd to target Loop Header");
+ if (!End->getOperand(1).isMBB())
+ report_fatal_error("Expected LoopEnd to target basic block");
+
+ // TODO Maybe there's cases where the target doesn't have to be the header,
+ // but for now be safe and revert.
+ if (End->getOperand(1).getMBB() != ML->getHeader()) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
+ Revert = true;
+ }
// The WLS and LE instructions have 12-bits for the label offset. WLS
// requires a positive offset, while LE uses negative.
@@ -216,41 +341,57 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
Revert = true;
}
- LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
- << " - Found Loop Dec: " << *Dec
- << " - Found Loop End: " << *End);
+ MachineInstr *InsertPt = Revert ? nullptr : IsSafeToDefineLR(Start);
+ if (!InsertPt) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
+ Revert = true;
+ } else
+ LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt);
- Expand(ML, Start, Dec, End, Revert);
+ Expand(ML, Start, InsertPt, Dec, End, Revert);
return true;
}
// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
// beq that branches to the exit branch.
-// FIXME: Need to check that we're not trashing the CPSR when generating the
-// cmp. We could also try to generate a cbz if the value in LR is also in
+// TODO: We could also try to generate a cbz if the value in LR is also in
// another low register.
void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
TII->get(ARM::t2CMPri));
- MIB.addReg(ARM::LR);
+ MIB.add(MI->getOperand(0));
MIB.addImm(0);
MIB.addImm(ARMCC::AL);
- MIB.addReg(ARM::CPSR);
+ MIB.addReg(ARM::NoRegister);
+
+ MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
+ unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
+ ARM::tBcc : ARM::t2Bcc;
- // TODO: Try to use tBcc instead
- MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
MIB.add(MI->getOperand(1)); // branch target
MIB.addImm(ARMCC::EQ); // condition code
MIB.addReg(ARM::CPSR);
MI->eraseFromParent();
}
-// TODO: Check flags so that we can possibly generate a tSubs or tSub.
-void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
+bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI,
+ bool AllowFlags) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
+
+ // If nothing uses or defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
+ bool SetFlags = false;
+ if (AllowFlags) {
+ if (auto *Def = SearchForDef(MI, MBB->end(), ARM::CPSR)) {
+ if (!SearchForUse(MI, MBB->end(), ARM::CPSR) &&
+ Def->getOpcode() == ARM::t2LoopEnd)
+ SetFlags = true;
+ }
+ }
+
MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
TII->get(ARM::t2SUBri));
MIB.addDef(ARM::LR);
@@ -258,28 +399,39 @@ void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
MIB.add(MI->getOperand(2));
MIB.addImm(ARMCC::AL);
MIB.addReg(0);
- MIB.addReg(0);
+
+ if (SetFlags) {
+ MIB.addReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ } else
+ MIB.addReg(0);
+
MI->eraseFromParent();
+ return SetFlags;
}
// Generate a subs, or sub and cmp, and a branch instead of an LE.
-// FIXME: Need to check that we're not trashing the CPSR when generating
-// the cmp.
-void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
+void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
- // Create cmp
MachineBasicBlock *MBB = MI->getParent();
- MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(ARM::t2CMPri));
- MIB.addReg(ARM::LR);
- MIB.addImm(0);
- MIB.addImm(ARMCC::AL);
- MIB.addReg(ARM::CPSR);
+ // Create cmp
+ if (!SkipCmp) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(ARM::t2CMPri));
+ MIB.addReg(ARM::LR);
+ MIB.addImm(0);
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(ARM::NoRegister);
+ }
+
+ MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
+ unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
+ ARM::tBcc : ARM::t2Bcc;
- // TODO Try to use tBcc instead.
// Create bne
- MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
MIB.add(MI->getOperand(1)); // branch target
MIB.addImm(ARMCC::NE); // condition code
MIB.addReg(ARM::CPSR);
@@ -287,33 +439,13 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
}
void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
+ MachineInstr *InsertPt,
MachineInstr *Dec, MachineInstr *End,
bool Revert) {
- auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
- // The trip count should already been held in LR since the instructions
- // within the loop can only read and write to LR. So, there should be a
- // mov to setup the count. WLS/DLS perform this move, so find the original
- // and delete it - inserting WLS/DLS in its place.
- MachineBasicBlock *MBB = Start->getParent();
- MachineInstr *InsertPt = Start;
- for (auto &I : MRI->def_instructions(ARM::LR)) {
- if (I.getParent() != MBB)
- continue;
-
- // Always execute.
- if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
- continue;
-
- // Only handle move reg, if the trip count it will need moving into a reg
- // before the setup instruction anyway.
- if (!I.getDesc().isMoveReg() ||
- !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
- continue;
- InsertPt = &I;
- break;
- }
-
+ auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start,
+ MachineInstr *InsertPt) {
+ MachineBasicBlock *MBB = InsertPt->getParent();
unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
ARM::t2DLS : ARM::t2WLS;
MachineInstrBuilder MIB =
@@ -369,16 +501,54 @@ void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
RevertWhile(Start);
else
Start->eraseFromParent();
- RevertLoopDec(Dec);
- RevertLoopEnd(End);
+ bool FlagsAlreadySet = RevertLoopDec(Dec, true);
+ RevertLoopEnd(End, FlagsAlreadySet);
} else {
- Start = ExpandLoopStart(ML, Start);
+ Start = ExpandLoopStart(ML, Start, InsertPt);
RemoveDeadBranch(Start);
End = ExpandLoopEnd(ML, Dec, End);
RemoveDeadBranch(End);
}
}
+bool ARMLowOverheadLoops::RevertNonLoops() {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
+ bool Changed = false;
+
+ for (auto &MBB : *MF) {
+ SmallVector<MachineInstr*, 4> Starts;
+ SmallVector<MachineInstr*, 4> Decs;
+ SmallVector<MachineInstr*, 4> Ends;
+
+ for (auto &I : MBB) {
+ if (IsLoopStart(I))
+ Starts.push_back(&I);
+ else if (I.getOpcode() == ARM::t2LoopDec)
+ Decs.push_back(&I);
+ else if (I.getOpcode() == ARM::t2LoopEnd)
+ Ends.push_back(&I);
+ }
+
+ if (Starts.empty() && Decs.empty() && Ends.empty())
+ continue;
+
+ Changed = true;
+
+ for (auto *Start : Starts) {
+ if (Start->getOpcode() == ARM::t2WhileLoopStart)
+ RevertWhile(Start);
+ else
+ Start->eraseFromParent();
+ }
+ for (auto *Dec : Decs)
+ RevertLoopDec(Dec);
+
+ for (auto *End : Ends)
+ RevertLoopEnd(End);
+ }
+ return Changed;
+}
+
FunctionPass *llvm::createARMLowOverheadLoopsPass() {
return new ARMLowOverheadLoops();
}
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 90c5ad025e56..c92689f4942e 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -74,8 +74,8 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
switch (MO.getType()) {
default: llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
- // Ignore all non-CPSR implicit register operands.
- if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
+ // Ignore all implicit register operands.
+ if (MO.isImplicit())
return false;
assert(!MO.getSubReg() && "Subregs should be eliminated!");
MCOp = MCOperand::createReg(MO.getReg());
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 90d794cd27b1..bb136e92329b 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/Support/ErrorHandling.h"
#include <utility>
@@ -130,6 +131,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// The amount the literal pool has been increasedby due to promoted globals.
int PromotedGlobalsIncrease = 0;
+ /// True if r0 will be preserved by a call to this function (e.g. C++
+ /// con/destructors).
+ bool PreservesR0 = false;
+
public:
ARMFunctionInfo() = default;
@@ -247,6 +252,9 @@ public:
}
DenseMap<unsigned, unsigned> EHPrologueRemappedRegs;
+
+ void setPreservesR0() { PreservesR0 = true; }
+ bool getPreservesR0() const { return PreservesR0; }
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMParallelDSP.cpp b/lib/Target/ARM/ARMParallelDSP.cpp
index 5389d09bf7d7..ae5657a0a2c1 100644
--- a/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/lib/Target/ARM/ARMParallelDSP.cpp
@@ -1,4 +1,4 @@
-//===- ParallelDSP.cpp - Parallel DSP Pass --------------------------------===//
+//===- ARMParallelDSP.cpp - Parallel DSP Pass -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -18,13 +18,11 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/PassSupport.h"
@@ -45,54 +43,39 @@ static cl::opt<bool>
DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false),
cl::desc("Disable the ARM Parallel DSP pass"));
+static cl::opt<unsigned>
+NumLoadLimit("arm-parallel-dsp-load-limit", cl::Hidden, cl::init(16),
+ cl::desc("Limit the number of loads analysed"));
+
namespace {
- struct OpChain;
- struct BinOpChain;
+ struct MulCandidate;
class Reduction;
- using OpChainList = SmallVector<std::unique_ptr<OpChain>, 8>;
- using ReductionList = SmallVector<Reduction, 8>;
- using ValueList = SmallVector<Value*, 8>;
- using MemInstList = SmallVector<LoadInst*, 8>;
- using PMACPair = std::pair<BinOpChain*,BinOpChain*>;
- using PMACPairList = SmallVector<PMACPair, 8>;
- using Instructions = SmallVector<Instruction*,16>;
- using MemLocList = SmallVector<MemoryLocation, 4>;
+ using MulCandList = SmallVector<std::unique_ptr<MulCandidate>, 8>;
+ using MemInstList = SmallVectorImpl<LoadInst*>;
+ using MulPairList = SmallVector<std::pair<MulCandidate*, MulCandidate*>, 8>;
- struct OpChain {
+ // 'MulCandidate' holds the multiplication instructions that are candidates
+ // for parallel execution.
+ struct MulCandidate {
Instruction *Root;
- ValueList AllValues;
- MemInstList VecLd; // List of all load instructions.
- MemInstList Loads;
+ Value* LHS;
+ Value* RHS;
+ bool Exchange = false;
bool ReadOnly = true;
+ bool Paired = false;
+ SmallVector<LoadInst*, 2> VecLd; // Container for loads to widen.
- OpChain(Instruction *I, ValueList &vl) : Root(I), AllValues(vl) { }
- virtual ~OpChain() = default;
+ MulCandidate(Instruction *I, Value *lhs, Value *rhs) :
+ Root(I), LHS(lhs), RHS(rhs) { }
- void PopulateLoads() {
- for (auto *V : AllValues) {
- if (auto *Ld = dyn_cast<LoadInst>(V))
- Loads.push_back(Ld);
- }
+ bool HasTwoLoadInputs() const {
+ return isa<LoadInst>(LHS) && isa<LoadInst>(RHS);
}
- unsigned size() const { return AllValues.size(); }
- };
-
- // 'BinOpChain' holds the multiplication instructions that are candidates
- // for parallel execution.
- struct BinOpChain : public OpChain {
- ValueList LHS; // List of all (narrow) left hand operands.
- ValueList RHS; // List of all (narrow) right hand operands.
- bool Exchange = false;
-
- BinOpChain(Instruction *I, ValueList &lhs, ValueList &rhs) :
- OpChain(I, lhs), LHS(lhs), RHS(rhs) {
- for (auto *V : RHS)
- AllValues.push_back(V);
- }
-
- bool AreSymmetrical(BinOpChain *Other);
+ LoadInst *getBaseLoad() const {
+ return VecLd.front();
+ }
};
/// Represent a sequence of multiply-accumulate operations with the aim to
@@ -100,9 +83,9 @@ namespace {
class Reduction {
Instruction *Root = nullptr;
Value *Acc = nullptr;
- OpChainList Muls;
- PMACPairList MulPairs;
- SmallPtrSet<Instruction*, 4> Adds;
+ MulCandList Muls;
+ MulPairList MulPairs;
+ SetVector<Instruction*> Adds;
public:
Reduction() = delete;
@@ -112,10 +95,35 @@ namespace {
/// Record an Add instruction that is a part of the this reduction.
void InsertAdd(Instruction *I) { Adds.insert(I); }
- /// Record a BinOpChain, rooted at a Mul instruction, that is a part of
- /// this reduction.
- void InsertMul(Instruction *I, ValueList &LHS, ValueList &RHS) {
- Muls.push_back(make_unique<BinOpChain>(I, LHS, RHS));
+ /// Create MulCandidates, each rooted at a Mul instruction, that is a part
+ /// of this reduction.
+ void InsertMuls() {
+ auto GetMulOperand = [](Value *V) -> Instruction* {
+ if (auto *SExt = dyn_cast<SExtInst>(V)) {
+ if (auto *I = dyn_cast<Instruction>(SExt->getOperand(0)))
+ if (I->getOpcode() == Instruction::Mul)
+ return I;
+ } else if (auto *I = dyn_cast<Instruction>(V)) {
+ if (I->getOpcode() == Instruction::Mul)
+ return I;
+ }
+ return nullptr;
+ };
+
+ auto InsertMul = [this](Instruction *I) {
+ Value *LHS = cast<Instruction>(I->getOperand(0))->getOperand(0);
+ Value *RHS = cast<Instruction>(I->getOperand(1))->getOperand(0);
+ Muls.push_back(std::make_unique<MulCandidate>(I, LHS, RHS));
+ };
+
+ for (auto *Add : Adds) {
+ if (Add == Acc)
+ continue;
+ if (auto *Mul = GetMulOperand(Add->getOperand(0)))
+ InsertMul(Mul);
+ if (auto *Mul = GetMulOperand(Add->getOperand(1)))
+ InsertMul(Mul);
+ }
}
/// Add the incoming accumulator value, returns true if a value had not
@@ -128,9 +136,17 @@ namespace {
return true;
}
- /// Set two BinOpChains, rooted at muls, that can be executed as a single
+ /// Set two MulCandidates, rooted at muls, that can be executed as a single
/// parallel operation.
- void AddMulPair(BinOpChain *Mul0, BinOpChain *Mul1) {
+ void AddMulPair(MulCandidate *Mul0, MulCandidate *Mul1,
+ bool Exchange = false) {
+ LLVM_DEBUG(dbgs() << "Pairing:\n"
+ << *Mul0->Root << "\n"
+ << *Mul1->Root << "\n");
+ Mul0->Paired = true;
+ Mul1->Paired = true;
+ if (Exchange)
+ Mul1->Exchange = true;
MulPairs.push_back(std::make_pair(Mul0, Mul1));
}
@@ -141,24 +157,40 @@ namespace {
/// Return the add instruction which is the root of the reduction.
Instruction *getRoot() { return Root; }
+ bool is64Bit() const { return Root->getType()->isIntegerTy(64); }
+
+ Type *getType() const { return Root->getType(); }
+
/// Return the incoming value to be accumulated. This maybe null.
Value *getAccumulator() { return Acc; }
/// Return the set of adds that comprise the reduction.
- SmallPtrSetImpl<Instruction*> &getAdds() { return Adds; }
+ SetVector<Instruction*> &getAdds() { return Adds; }
- /// Return the BinOpChain, rooted at mul instruction, that comprise the
+ /// Return the MulCandidate, rooted at mul instruction, that comprise the
/// the reduction.
- OpChainList &getMuls() { return Muls; }
+ MulCandList &getMuls() { return Muls; }
- /// Return the BinOpChain, rooted at mul instructions, that have been
+ /// Return the MulCandidate, rooted at mul instructions, that have been
/// paired for parallel execution.
- PMACPairList &getMulPairs() { return MulPairs; }
+ MulPairList &getMulPairs() { return MulPairs; }
/// To finalise, replace the uses of the root with the intrinsic call.
void UpdateRoot(Instruction *SMLAD) {
Root->replaceAllUsesWith(SMLAD);
}
+
+ void dump() {
+ LLVM_DEBUG(dbgs() << "Reduction:\n";
+ for (auto *Add : Adds)
+ LLVM_DEBUG(dbgs() << *Add << "\n");
+ for (auto &Mul : Muls)
+ LLVM_DEBUG(dbgs() << *Mul->Root << "\n"
+ << " " << *Mul->LHS << "\n"
+ << " " << *Mul->RHS << "\n");
+ LLVM_DEBUG(if (Acc) dbgs() << "Acc in: " << *Acc << "\n")
+ );
+ }
};
class WidenedLoad {
@@ -176,13 +208,11 @@ namespace {
}
};
- class ARMParallelDSP : public LoopPass {
+ class ARMParallelDSP : public FunctionPass {
ScalarEvolution *SE;
AliasAnalysis *AA;
TargetLibraryInfo *TLI;
DominatorTree *DT;
- LoopInfo *LI;
- Loop *L;
const DataLayout *DL;
Module *M;
std::map<LoadInst*, LoadInst*> LoadPairs;
@@ -190,13 +220,12 @@ namespace {
std::map<LoadInst*, std::unique_ptr<WidenedLoad>> WideLoads;
template<unsigned>
- bool IsNarrowSequence(Value *V, ValueList &VL);
-
+ bool IsNarrowSequence(Value *V);
+ bool Search(Value *V, BasicBlock *BB, Reduction &R);
bool RecordMemoryOps(BasicBlock *BB);
void InsertParallelMACs(Reduction &Reduction);
bool AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, MemInstList &VecMem);
- LoadInst* CreateWideLoad(SmallVectorImpl<LoadInst*> &Loads,
- IntegerType *LoadTy);
+ LoadInst* CreateWideLoad(MemInstList &Loads, IntegerType *LoadTy);
bool CreateParallelPairs(Reduction &R);
/// Try to match and generate: SMLAD, SMLADX - Signed Multiply Accumulate
@@ -204,60 +233,38 @@ namespace {
/// products to a 32-bit accumulate operand. Optionally, the instruction can
/// exchange the halfwords of the second operand before performing the
/// arithmetic.
- bool MatchSMLAD(Loop *L);
+ bool MatchSMLAD(Function &F);
public:
static char ID;
- ARMParallelDSP() : LoopPass(ID) { }
-
- bool doInitialization(Loop *L, LPPassManager &LPM) override {
- LoadPairs.clear();
- WideLoads.clear();
- return true;
- }
+ ARMParallelDSP() : FunctionPass(ID) { }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- LoopPass::getAnalysisUsage(AU);
+ FunctionPass::getAnalysisUsage(AU);
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetPassConfig>();
- AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
- bool runOnLoop(Loop *TheLoop, LPPassManager &) override {
+ bool runOnFunction(Function &F) override {
if (DisableParallelDSP)
return false;
- L = TheLoop;
+ if (skipFunction(F))
+ return false;
+
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &TPC = getAnalysis<TargetPassConfig>();
- BasicBlock *Header = TheLoop->getHeader();
- if (!Header)
- return false;
-
- // TODO: We assume the loop header and latch to be the same block.
- // This is not a fundamental restriction, but lifting this would just
- // require more work to do the transformation and then patch up the CFG.
- if (Header != TheLoop->getLoopLatch()) {
- LLVM_DEBUG(dbgs() << "The loop header is not the loop latch: not "
- "running pass ARMParallelDSP\n");
- return false;
- }
-
- if (!TheLoop->getLoopPreheader())
- InsertPreheaderForLoop(L, DT, LI, nullptr, true);
-
- Function &F = *Header->getParent();
M = F.getParent();
DL = &M->getDataLayout();
@@ -282,17 +289,10 @@ namespace {
return false;
}
- LoopAccessInfo LAI(L, SE, TLI, AA, DT, LI);
-
LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n");
LLVM_DEBUG(dbgs() << " - " << F.getName() << "\n\n");
- if (!RecordMemoryOps(Header)) {
- LLVM_DEBUG(dbgs() << " - No sequential loads found.\n");
- return false;
- }
-
- bool Changes = MatchSMLAD(L);
+ bool Changes = MatchSMLAD(F);
return Changes;
}
};
@@ -331,40 +331,14 @@ bool ARMParallelDSP::AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1,
// TODO: we currently only collect i16, and will support i8 later, so that's
// why we check that types are equal to MaxBitWidth, and not <= MaxBitWidth.
template<unsigned MaxBitWidth>
-bool ARMParallelDSP::IsNarrowSequence(Value *V, ValueList &VL) {
- ConstantInt *CInt;
-
- if (match(V, m_ConstantInt(CInt))) {
- // TODO: if a constant is used, it needs to fit within the bit width.
- return false;
- }
-
- auto *I = dyn_cast<Instruction>(V);
- if (!I)
- return false;
-
- Value *Val, *LHS, *RHS;
- if (match(V, m_Trunc(m_Value(Val)))) {
- if (cast<TruncInst>(I)->getDestTy()->getIntegerBitWidth() == MaxBitWidth)
- return IsNarrowSequence<MaxBitWidth>(Val, VL);
- } else if (match(V, m_Add(m_Value(LHS), m_Value(RHS)))) {
- // TODO: we need to implement sadd16/sadd8 for this, which enables to
- // also do the rewrite for smlad8.ll, but it is unsupported for now.
- return false;
- } else if (match(V, m_ZExtOrSExt(m_Value(Val)))) {
- if (cast<CastInst>(I)->getSrcTy()->getIntegerBitWidth() != MaxBitWidth)
+bool ARMParallelDSP::IsNarrowSequence(Value *V) {
+ if (auto *SExt = dyn_cast<SExtInst>(V)) {
+ if (SExt->getSrcTy()->getIntegerBitWidth() != MaxBitWidth)
return false;
- if (match(Val, m_Load(m_Value()))) {
- auto *Ld = cast<LoadInst>(Val);
-
- // Check that these load could be paired.
- if (!LoadPairs.count(Ld) && !OffsetLoads.count(Ld))
- return false;
-
- VL.push_back(Val);
- VL.push_back(I);
- return true;
+ if (auto *Ld = dyn_cast<LoadInst>(SExt->getOperand(0))) {
+ // Check that this load could be paired.
+ return LoadPairs.count(Ld) || OffsetLoads.count(Ld);
}
}
return false;
@@ -375,6 +349,9 @@ bool ARMParallelDSP::IsNarrowSequence(Value *V, ValueList &VL) {
bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
SmallVector<LoadInst*, 8> Loads;
SmallVector<Instruction*, 8> Writes;
+ LoadPairs.clear();
+ WideLoads.clear();
+ OrderedBasicBlock OrderedBB(BB);
// Collect loads and instruction that may write to memory. For now we only
// record loads which are simple, sign-extended and have a single user.
@@ -389,21 +366,24 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
Loads.push_back(Ld);
}
+ if (Loads.empty() || Loads.size() > NumLoadLimit)
+ return false;
+
using InstSet = std::set<Instruction*>;
using DepMap = std::map<Instruction*, InstSet>;
DepMap RAWDeps;
// Record any writes that may alias a load.
const auto Size = LocationSize::unknown();
- for (auto Read : Loads) {
- for (auto Write : Writes) {
+ for (auto Write : Writes) {
+ for (auto Read : Loads) {
MemoryLocation ReadLoc =
MemoryLocation(Read->getPointerOperand(), Size);
if (!isModOrRefSet(intersectModRef(AA->getModRefInfo(Write, ReadLoc),
ModRefInfo::ModRef)))
continue;
- if (DT->dominates(Write, Read))
+ if (OrderedBB.dominates(Write, Read))
RAWDeps[Read].insert(Write);
}
}
@@ -411,17 +391,16 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
// Check whether there's not a write between the two loads which would
// prevent them from being safely merged.
auto SafeToPair = [&](LoadInst *Base, LoadInst *Offset) {
- LoadInst *Dominator = DT->dominates(Base, Offset) ? Base : Offset;
- LoadInst *Dominated = DT->dominates(Base, Offset) ? Offset : Base;
+ LoadInst *Dominator = OrderedBB.dominates(Base, Offset) ? Base : Offset;
+ LoadInst *Dominated = OrderedBB.dominates(Base, Offset) ? Offset : Base;
if (RAWDeps.count(Dominated)) {
InstSet &WritesBefore = RAWDeps[Dominated];
for (auto Before : WritesBefore) {
-
// We can't move the second load backward, past a write, to merge
// with the first load.
- if (DT->dominates(Dominator, Before))
+ if (OrderedBB.dominates(Dominator, Before))
return false;
}
}
@@ -431,7 +410,7 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
// Record base, offset load pairs.
for (auto *Base : Loads) {
for (auto *Offset : Loads) {
- if (Base == Offset)
+ if (Base == Offset || OffsetLoads.count(Offset))
continue;
if (AreSequentialAccesses<LoadInst>(Base, Offset, *DL, *SE) &&
@@ -453,7 +432,54 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
return LoadPairs.size() > 1;
}
-// Loop Pass that needs to identify integer add/sub reductions of 16-bit vector
+// Search recursively back through the operands to find a tree of values that
+// form a multiply-accumulate chain. The search records the Add and Mul
+// instructions that form the reduction and allows us to find a single value
+// to be used as the initial input to the accumlator.
+bool ARMParallelDSP::Search(Value *V, BasicBlock *BB, Reduction &R) {
+ // If we find a non-instruction, try to use it as the initial accumulator
+ // value. This may have already been found during the search in which case
+ // this function will return false, signaling a search fail.
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return R.InsertAcc(V);
+
+ if (I->getParent() != BB)
+ return false;
+
+ switch (I->getOpcode()) {
+ default:
+ break;
+ case Instruction::PHI:
+ // Could be the accumulator value.
+ return R.InsertAcc(V);
+ case Instruction::Add: {
+ // Adds should be adding together two muls, or another add and a mul to
+ // be within the mac chain. One of the operands may also be the
+ // accumulator value at which point we should stop searching.
+ R.InsertAdd(I);
+ Value *LHS = I->getOperand(0);
+ Value *RHS = I->getOperand(1);
+ bool ValidLHS = Search(LHS, BB, R);
+ bool ValidRHS = Search(RHS, BB, R);
+
+ if (ValidLHS && ValidRHS)
+ return true;
+
+ return R.InsertAcc(I);
+ }
+ case Instruction::Mul: {
+ Value *MulOp0 = I->getOperand(0);
+ Value *MulOp1 = I->getOperand(1);
+ return IsNarrowSequence<16>(MulOp0) && IsNarrowSequence<16>(MulOp1);
+ }
+ case Instruction::SExt:
+ return Search(I->getOperand(0), BB, R);
+ }
+ return false;
+}
+
+// The pass needs to identify integer add/sub reductions of 16-bit vector
// multiplications.
// To use SMLAD:
// 1) we first need to find integer add then look for this pattern:
@@ -484,88 +510,39 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
// If loop invariants are used instead of loads, these need to be packed
// before the loop begins.
//
-bool ARMParallelDSP::MatchSMLAD(Loop *L) {
- // Search recursively back through the operands to find a tree of values that
- // form a multiply-accumulate chain. The search records the Add and Mul
- // instructions that form the reduction and allows us to find a single value
- // to be used as the initial input to the accumlator.
- std::function<bool(Value*, Reduction&)> Search = [&]
- (Value *V, Reduction &R) -> bool {
-
- // If we find a non-instruction, try to use it as the initial accumulator
- // value. This may have already been found during the search in which case
- // this function will return false, signaling a search fail.
- auto *I = dyn_cast<Instruction>(V);
- if (!I)
- return R.InsertAcc(V);
-
- switch (I->getOpcode()) {
- default:
- break;
- case Instruction::PHI:
- // Could be the accumulator value.
- return R.InsertAcc(V);
- case Instruction::Add: {
- // Adds should be adding together two muls, or another add and a mul to
- // be within the mac chain. One of the operands may also be the
- // accumulator value at which point we should stop searching.
- bool ValidLHS = Search(I->getOperand(0), R);
- bool ValidRHS = Search(I->getOperand(1), R);
- if (!ValidLHS && !ValidLHS)
- return false;
- else if (ValidLHS && ValidRHS) {
- R.InsertAdd(I);
- return true;
- } else {
- R.InsertAdd(I);
- return R.InsertAcc(I);
- }
- }
- case Instruction::Mul: {
- Value *MulOp0 = I->getOperand(0);
- Value *MulOp1 = I->getOperand(1);
- if (isa<SExtInst>(MulOp0) && isa<SExtInst>(MulOp1)) {
- ValueList LHS;
- ValueList RHS;
- if (IsNarrowSequence<16>(MulOp0, LHS) &&
- IsNarrowSequence<16>(MulOp1, RHS)) {
- R.InsertMul(I, LHS, RHS);
- return true;
- }
- }
- return false;
- }
- case Instruction::SExt:
- return Search(I->getOperand(0), R);
- }
- return false;
- };
-
+bool ARMParallelDSP::MatchSMLAD(Function &F) {
bool Changed = false;
- SmallPtrSet<Instruction*, 4> AllAdds;
- BasicBlock *Latch = L->getLoopLatch();
- for (Instruction &I : reverse(*Latch)) {
- if (I.getOpcode() != Instruction::Add)
+ for (auto &BB : F) {
+ SmallPtrSet<Instruction*, 4> AllAdds;
+ if (!RecordMemoryOps(&BB))
continue;
- if (AllAdds.count(&I))
- continue;
+ for (Instruction &I : reverse(BB)) {
+ if (I.getOpcode() != Instruction::Add)
+ continue;
- const auto *Ty = I.getType();
- if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64))
- continue;
+ if (AllAdds.count(&I))
+ continue;
- Reduction R(&I);
- if (!Search(&I, R))
- continue;
+ const auto *Ty = I.getType();
+ if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64))
+ continue;
- if (!CreateParallelPairs(R))
- continue;
+ Reduction R(&I);
+ if (!Search(&I, &BB, R))
+ continue;
- InsertParallelMACs(R);
- Changed = true;
- AllAdds.insert(R.getAdds().begin(), R.getAdds().end());
+ R.InsertMuls();
+ LLVM_DEBUG(dbgs() << "After search, Reduction:\n"; R.dump());
+
+ if (!CreateParallelPairs(R))
+ continue;
+
+ InsertParallelMACs(R);
+ Changed = true;
+ AllAdds.insert(R.getAdds().begin(), R.getAdds().end());
+ }
}
return Changed;
@@ -578,87 +555,57 @@ bool ARMParallelDSP::CreateParallelPairs(Reduction &R) {
return false;
// Check that the muls operate directly upon sign extended loads.
- for (auto &MulChain : R.getMuls()) {
- // A mul has 2 operands, and a narrow op consist of sext and a load; thus
- // we expect at least 4 items in this operand value list.
- if (MulChain->size() < 4) {
- LLVM_DEBUG(dbgs() << "Operand list too short.\n");
+ for (auto &MulCand : R.getMuls()) {
+ if (!MulCand->HasTwoLoadInputs())
return false;
- }
- MulChain->PopulateLoads();
- ValueList &LHS = static_cast<BinOpChain*>(MulChain.get())->LHS;
- ValueList &RHS = static_cast<BinOpChain*>(MulChain.get())->RHS;
-
- // Use +=2 to skip over the expected extend instructions.
- for (unsigned i = 0, e = LHS.size(); i < e; i += 2) {
- if (!isa<LoadInst>(LHS[i]) || !isa<LoadInst>(RHS[i]))
- return false;
- }
}
- auto CanPair = [&](Reduction &R, BinOpChain *PMul0, BinOpChain *PMul1) {
- if (!PMul0->AreSymmetrical(PMul1))
- return false;
-
+ auto CanPair = [&](Reduction &R, MulCandidate *PMul0, MulCandidate *PMul1) {
// The first elements of each vector should be loads with sexts. If we
// find that its two pairs of consecutive loads, then these can be
// transformed into two wider loads and the users can be replaced with
// DSP intrinsics.
- for (unsigned x = 0; x < PMul0->LHS.size(); x += 2) {
- auto *Ld0 = dyn_cast<LoadInst>(PMul0->LHS[x]);
- auto *Ld1 = dyn_cast<LoadInst>(PMul1->LHS[x]);
- auto *Ld2 = dyn_cast<LoadInst>(PMul0->RHS[x]);
- auto *Ld3 = dyn_cast<LoadInst>(PMul1->RHS[x]);
-
- if (!Ld0 || !Ld1 || !Ld2 || !Ld3)
- return false;
+ auto Ld0 = static_cast<LoadInst*>(PMul0->LHS);
+ auto Ld1 = static_cast<LoadInst*>(PMul1->LHS);
+ auto Ld2 = static_cast<LoadInst*>(PMul0->RHS);
+ auto Ld3 = static_cast<LoadInst*>(PMul1->RHS);
- LLVM_DEBUG(dbgs() << "Loads:\n"
- << " - " << *Ld0 << "\n"
- << " - " << *Ld1 << "\n"
- << " - " << *Ld2 << "\n"
- << " - " << *Ld3 << "\n");
-
- if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
- if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
- LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
- R.AddMulPair(PMul0, PMul1);
- return true;
- } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
- LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
- LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n");
- PMul1->Exchange = true;
- R.AddMulPair(PMul0, PMul1);
- return true;
- }
- } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
- AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+ if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
+ if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
- LLVM_DEBUG(dbgs() << " exchanging Ld0 and Ld1\n");
- LLVM_DEBUG(dbgs() << " and swapping muls\n");
- PMul0->Exchange = true;
- // Only the second operand can be exchanged, so swap the muls.
- R.AddMulPair(PMul1, PMul0);
+ R.AddMulPair(PMul0, PMul1);
+ return true;
+ } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
+ LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+ LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n");
+ R.AddMulPair(PMul0, PMul1, true);
return true;
}
+ } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
+ AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+ LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+ LLVM_DEBUG(dbgs() << " exchanging Ld0 and Ld1\n");
+ LLVM_DEBUG(dbgs() << " and swapping muls\n");
+ // Only the second operand can be exchanged, so swap the muls.
+ R.AddMulPair(PMul1, PMul0, true);
+ return true;
}
return false;
};
- OpChainList &Muls = R.getMuls();
+ MulCandList &Muls = R.getMuls();
const unsigned Elems = Muls.size();
- SmallPtrSet<const Instruction*, 4> Paired;
for (unsigned i = 0; i < Elems; ++i) {
- BinOpChain *PMul0 = static_cast<BinOpChain*>(Muls[i].get());
- if (Paired.count(PMul0->Root))
+ MulCandidate *PMul0 = static_cast<MulCandidate*>(Muls[i].get());
+ if (PMul0->Paired)
continue;
for (unsigned j = 0; j < Elems; ++j) {
if (i == j)
continue;
- BinOpChain *PMul1 = static_cast<BinOpChain*>(Muls[j].get());
- if (Paired.count(PMul1->Root))
+ MulCandidate *PMul1 = static_cast<MulCandidate*>(Muls[j].get());
+ if (PMul1->Paired)
continue;
const Instruction *Mul0 = PMul0->Root;
@@ -668,29 +615,19 @@ bool ARMParallelDSP::CreateParallelPairs(Reduction &R) {
assert(PMul0 != PMul1 && "expected different chains");
- if (CanPair(R, PMul0, PMul1)) {
- Paired.insert(Mul0);
- Paired.insert(Mul1);
+ if (CanPair(R, PMul0, PMul1))
break;
- }
}
}
return !R.getMulPairs().empty();
}
-
void ARMParallelDSP::InsertParallelMACs(Reduction &R) {
- auto CreateSMLADCall = [&](SmallVectorImpl<LoadInst*> &VecLd0,
- SmallVectorImpl<LoadInst*> &VecLd1,
- Value *Acc, bool Exchange,
- Instruction *InsertAfter) {
+ auto CreateSMLAD = [&](LoadInst* WideLd0, LoadInst *WideLd1,
+ Value *Acc, bool Exchange,
+ Instruction *InsertAfter) {
// Replace the reduction chain with an intrinsic call
- IntegerType *Ty = IntegerType::get(M->getContext(), 32);
- LoadInst *WideLd0 = WideLoads.count(VecLd0[0]) ?
- WideLoads[VecLd0[0]]->getLoad() : CreateWideLoad(VecLd0, Ty);
- LoadInst *WideLd1 = WideLoads.count(VecLd1[0]) ?
- WideLoads[VecLd1[0]]->getLoad() : CreateWideLoad(VecLd1, Ty);
Value* Args[] = { WideLd0, WideLd1, Acc };
Function *SMLAD = nullptr;
@@ -704,34 +641,95 @@ void ARMParallelDSP::InsertParallelMACs(Reduction &R) {
Intrinsic::getDeclaration(M, Intrinsic::arm_smlald);
IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
- ++BasicBlock::iterator(InsertAfter));
+ BasicBlock::iterator(InsertAfter));
Instruction *Call = Builder.CreateCall(SMLAD, Args);
NumSMLAD++;
return Call;
};
- Instruction *InsertAfter = R.getRoot();
+ // Return the instruction after the dominated instruction.
+ auto GetInsertPoint = [this](Value *A, Value *B) {
+ assert((isa<Instruction>(A) || isa<Instruction>(B)) &&
+ "expected at least one instruction");
+
+ Value *V = nullptr;
+ if (!isa<Instruction>(A))
+ V = B;
+ else if (!isa<Instruction>(B))
+ V = A;
+ else
+ V = DT->dominates(cast<Instruction>(A), cast<Instruction>(B)) ? B : A;
+
+ return &*++BasicBlock::iterator(cast<Instruction>(V));
+ };
+
Value *Acc = R.getAccumulator();
- if (!Acc)
- Acc = ConstantInt::get(IntegerType::get(M->getContext(), 32), 0);
- LLVM_DEBUG(dbgs() << "Root: " << *InsertAfter << "\n"
- << "Acc: " << *Acc << "\n");
+ // For any muls that were discovered but not paired, accumulate their values
+ // as before.
+ IRBuilder<NoFolder> Builder(R.getRoot()->getParent());
+ MulCandList &MulCands = R.getMuls();
+ for (auto &MulCand : MulCands) {
+ if (MulCand->Paired)
+ continue;
+
+ Instruction *Mul = cast<Instruction>(MulCand->Root);
+ LLVM_DEBUG(dbgs() << "Accumulating unpaired mul: " << *Mul << "\n");
+
+ if (R.getType() != Mul->getType()) {
+ assert(R.is64Bit() && "expected 64-bit result");
+ Builder.SetInsertPoint(&*++BasicBlock::iterator(Mul));
+ Mul = cast<Instruction>(Builder.CreateSExt(Mul, R.getRoot()->getType()));
+ }
+
+ if (!Acc) {
+ Acc = Mul;
+ continue;
+ }
+
+ // If Acc is the original incoming value to the reduction, it could be a
+ // phi. But the phi will dominate Mul, meaning that Mul will be the
+ // insertion point.
+ Builder.SetInsertPoint(GetInsertPoint(Mul, Acc));
+ Acc = Builder.CreateAdd(Mul, Acc);
+ }
+
+ if (!Acc) {
+ Acc = R.is64Bit() ?
+ ConstantInt::get(IntegerType::get(M->getContext(), 64), 0) :
+ ConstantInt::get(IntegerType::get(M->getContext(), 32), 0);
+ } else if (Acc->getType() != R.getType()) {
+ Builder.SetInsertPoint(R.getRoot());
+ Acc = Builder.CreateSExt(Acc, R.getType());
+ }
+
+ // Roughly sort the mul pairs in their program order.
+ OrderedBasicBlock OrderedBB(R.getRoot()->getParent());
+ llvm::sort(R.getMulPairs(), [&OrderedBB](auto &PairA, auto &PairB) {
+ const Instruction *A = PairA.first->Root;
+ const Instruction *B = PairB.first->Root;
+ return OrderedBB.dominates(A, B);
+ });
+
+ IntegerType *Ty = IntegerType::get(M->getContext(), 32);
for (auto &Pair : R.getMulPairs()) {
- BinOpChain *PMul0 = Pair.first;
- BinOpChain *PMul1 = Pair.second;
- LLVM_DEBUG(dbgs() << "Muls:\n"
- << "- " << *PMul0->Root << "\n"
- << "- " << *PMul1->Root << "\n");
-
- Acc = CreateSMLADCall(PMul0->VecLd, PMul1->VecLd, Acc, PMul1->Exchange,
- InsertAfter);
- InsertAfter = cast<Instruction>(Acc);
+ MulCandidate *LHSMul = Pair.first;
+ MulCandidate *RHSMul = Pair.second;
+ LoadInst *BaseLHS = LHSMul->getBaseLoad();
+ LoadInst *BaseRHS = RHSMul->getBaseLoad();
+ LoadInst *WideLHS = WideLoads.count(BaseLHS) ?
+ WideLoads[BaseLHS]->getLoad() : CreateWideLoad(LHSMul->VecLd, Ty);
+ LoadInst *WideRHS = WideLoads.count(BaseRHS) ?
+ WideLoads[BaseRHS]->getLoad() : CreateWideLoad(RHSMul->VecLd, Ty);
+
+ Instruction *InsertAfter = GetInsertPoint(WideLHS, WideRHS);
+ InsertAfter = GetInsertPoint(InsertAfter, Acc);
+ Acc = CreateSMLAD(WideLHS, WideRHS, Acc, RHSMul->Exchange, InsertAfter);
}
R.UpdateRoot(cast<Instruction>(Acc));
}
-LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl<LoadInst*> &Loads,
+LoadInst* ARMParallelDSP::CreateWideLoad(MemInstList &Loads,
IntegerType *LoadTy) {
assert(Loads.size() == 2 && "currently only support widening two loads");
@@ -758,8 +756,8 @@ LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl<LoadInst*> &Loads,
return;
Source->moveBefore(Sink);
- for (auto &U : Source->uses())
- MoveBefore(Source, U.getUser());
+ for (auto &Op : Source->operands())
+ MoveBefore(Op, Source);
};
// Insert the load at the point of the original dominating load.
@@ -784,57 +782,30 @@ LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl<LoadInst*> &Loads,
// Loads[0] needs trunc while Loads[1] needs a lshr and trunc.
// TODO: Support big-endian as well.
Value *Bottom = IRB.CreateTrunc(WideLoad, Base->getType());
- BaseSExt->setOperand(0, Bottom);
+ Value *NewBaseSExt = IRB.CreateSExt(Bottom, BaseSExt->getType());
+ BaseSExt->replaceAllUsesWith(NewBaseSExt);
IntegerType *OffsetTy = cast<IntegerType>(Offset->getType());
Value *ShiftVal = ConstantInt::get(LoadTy, OffsetTy->getBitWidth());
Value *Top = IRB.CreateLShr(WideLoad, ShiftVal);
Value *Trunc = IRB.CreateTrunc(Top, OffsetTy);
- OffsetSExt->setOperand(0, Trunc);
-
+ Value *NewOffsetSExt = IRB.CreateSExt(Trunc, OffsetSExt->getType());
+ OffsetSExt->replaceAllUsesWith(NewOffsetSExt);
+
+ LLVM_DEBUG(dbgs() << "From Base and Offset:\n"
+ << *Base << "\n" << *Offset << "\n"
+ << "Created Wide Load:\n"
+ << *WideLoad << "\n"
+ << *Bottom << "\n"
+ << *NewBaseSExt << "\n"
+ << *Top << "\n"
+ << *Trunc << "\n"
+ << *NewOffsetSExt << "\n");
WideLoads.emplace(std::make_pair(Base,
- make_unique<WidenedLoad>(Loads, WideLoad)));
+ std::make_unique<WidenedLoad>(Loads, WideLoad)));
return WideLoad;
}
-// Compare the value lists in Other to this chain.
-bool BinOpChain::AreSymmetrical(BinOpChain *Other) {
- // Element-by-element comparison of Value lists returning true if they are
- // instructions with the same opcode or constants with the same value.
- auto CompareValueList = [](const ValueList &VL0,
- const ValueList &VL1) {
- if (VL0.size() != VL1.size()) {
- LLVM_DEBUG(dbgs() << "Muls are mismatching operand list lengths: "
- << VL0.size() << " != " << VL1.size() << "\n");
- return false;
- }
-
- const unsigned Pairs = VL0.size();
-
- for (unsigned i = 0; i < Pairs; ++i) {
- const Value *V0 = VL0[i];
- const Value *V1 = VL1[i];
- const auto *Inst0 = dyn_cast<Instruction>(V0);
- const auto *Inst1 = dyn_cast<Instruction>(V1);
-
- if (!Inst0 || !Inst1)
- return false;
-
- if (Inst0->isSameOperationAs(Inst1))
- continue;
-
- const APInt *C0, *C1;
- if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1))
- return false;
- }
-
- return true;
- };
-
- return CompareValueList(LHS, Other->LHS) &&
- CompareValueList(RHS, Other->RHS);
-}
-
Pass *llvm::createARMParallelDSPPass() {
return new ARMParallelDSP();
}
@@ -842,6 +813,6 @@ Pass *llvm::createARMParallelDSPPass() {
char ARMParallelDSP::ID = 0;
INITIALIZE_PASS_BEGIN(ARMParallelDSP, "arm-parallel-dsp",
- "Transform loops to use DSP intrinsics", false, false)
+ "Transform functions to use DSP intrinsics", false, false)
INITIALIZE_PASS_END(ARMParallelDSP, "arm-parallel-dsp",
- "Transform loops to use DSP intrinsics", false, false)
+ "Transform functions to use DSP intrinsics", false, false)
diff --git a/lib/Target/ARM/ARMPredicates.td b/lib/Target/ARM/ARMPredicates.td
index 0b6b40de80dd..b008d3e2e296 100644
--- a/lib/Target/ARM/ARMPredicates.td
+++ b/lib/Target/ARM/ARMPredicates.td
@@ -71,7 +71,7 @@ def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">,
- AssemblerPredicate<"FeatureVFP2_D16_SP", "VFP2">;
+ AssemblerPredicate<"FeatureVFP2_SP", "VFP2">;
def HasVFP3 : Predicate<"Subtarget->hasVFP3Base()">,
AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">;
def HasVFP4 : Predicate<"Subtarget->hasVFP4Base()">,
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 92ae26b3729d..56055a15483a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -180,7 +180,7 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
// models the APSR when it's accessed by some special instructions. In such cases
// it has the same encoding as PC.
def CPSR : ARMReg<0, "cpsr">;
-def APSR : ARMReg<1, "apsr">;
+def APSR : ARMReg<15, "apsr">;
def APSR_NZCV : ARMReg<15, "apsr_nzcv">;
def SPSR : ARMReg<2, "spsr">;
def FPSCR : ARMReg<3, "fpscr">;
@@ -486,12 +486,20 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
// Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP.
// These are needed by instructions (e.g. ldrexd/strexd) requiring even-odd GPRs.
-def Tuples2R : RegisterTuples<[gsub_0, gsub_1],
- [(add R0, R2, R4, R6, R8, R10, R12),
- (add R1, R3, R5, R7, R9, R11, SP)]>;
+def Tuples2Rnosp : RegisterTuples<[gsub_0, gsub_1],
+ [(add R0, R2, R4, R6, R8, R10),
+ (add R1, R3, R5, R7, R9, R11)]>;
+
+def Tuples2Rsp : RegisterTuples<[gsub_0, gsub_1],
+ [(add R12), (add SP)]>;
// Register class representing a pair of even-odd GPRs.
-def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2R)> {
+def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2Rnosp, Tuples2Rsp)> {
+ let Size = 64; // 2 x 32 bits, we have no predefined type of that size.
+}
+
+// Register class representing a pair of even-odd GPRs, except (R12, SP).
+def GPRPairnosp : RegisterClass<"ARM", [untyped], 64, (add Tuples2Rnosp)> {
let Size = 64; // 2 x 32 bits, we have no predefined type of that size.
}
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 21d32bde4710..3f0b71afd977 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -2239,9 +2239,9 @@ def A9WriteLMfpPostRA : SchedWriteVariant<[
// Distinguish between our multiple MI-level forms of the same
// VLDM/VSTM instructions.
def A9PreRA : SchedPredicate<
- "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">;
+ "Register::isVirtualRegister(MI->getOperand(0).getReg())">;
def A9PostRA : SchedPredicate<
- "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">;
+ "Register::isPhysicalRegister(MI->getOperand(0).getReg())">;
// VLDM represents all destination registers as a single register
// tuple, unlike LDM. So the number of write operands is not variadic.
diff --git a/lib/Target/ARM/ARMScheduleM4.td b/lib/Target/ARM/ARMScheduleM4.td
index 38c8ea2b4f35..bfa5fc0d7131 100644
--- a/lib/Target/ARM/ARMScheduleM4.td
+++ b/lib/Target/ARM/ARMScheduleM4.td
@@ -18,6 +18,9 @@ def CortexM4Model : SchedMachineModel {
let PostRAScheduler = 1;
let CompleteModel = 0;
+ let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasZCZ, HasMVEInt,
+ IsNotMClass, HasDPVFP, HasFPARMv8, HasFullFP16, Has8MSecExt, HasV8,
+ HasV8_3a, HasTrustZone, HasDFB, IsWindows];
}
@@ -50,6 +53,7 @@ def : M4UnitL2<WriteMAC16>;
def : M4UnitL2<WriteDIV>;
def : M4UnitL2I<(instregex "(t|t2)LDM")>;
+def : M4UnitL2I<(instregex "(t|t2)LDR")>;
// Stores we use a latency of 1 as they have no outputs
@@ -78,9 +82,20 @@ def : M4UnitL1<WriteNoop>;
def : M4UnitL1<WritePreLd>;
def : M4UnitL1I<(instregex "(t|t2)MOV")>;
def : M4UnitL1I<(instrs COPY)>;
-def : M4UnitL1I<(instregex "t2IT")>;
-def : M4UnitL1I<(instregex "t2SEL", "t2USAD8",
- "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>;
+def : M4UnitL1I<(instregex "t2IT", "t2MSR", "t2MRS")>;
+def : M4UnitL1I<(instregex "t2CLREX")>;
+def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", "t2SML[AS]",
+ "t2(S|Q|SH|U|UQ|UH|QD)(ADD|ASX|SAX|SUB)", "t2USADA8", "(t|t2)REV")>;
+
+// These instructions are not of much interest to scheduling as they will not
+// be generated or it is not very useful to schedule them. They are here to make
+// the model more complete.
+def : M4UnitL1I<(instregex "t2CDP", "t2LDC", "t2MCR", "t2MRC", "t2MRRC", "t2STC")>;
+def : M4UnitL1I<(instregex "tCPS", "t2ISB", "t2DSB", "t2DMB", "t2?HINT$")>;
+def : M4UnitL1I<(instregex "t2?UDF$", "tBKPT", "t2DBG")>;
+def : M4UnitL1I<(instregex "t?2?Int_eh_sjlj_", "tADDframe", "t?ADJCALL")>;
+def : M4UnitL1I<(instregex "CMP_SWAP", "JUMPTABLE", "MEMCPY")>;
+def : M4UnitL1I<(instregex "VSETLNi32", "VGETLNi32")>;
def : ReadAdvance<ReadALU, 0>;
def : ReadAdvance<ReadALUsr, 0>;
@@ -112,6 +127,9 @@ def : M4UnitL1<WriteVST1>;
def : M4UnitL1<WriteVST2>;
def : M4UnitL1<WriteVST3>;
def : M4UnitL1<WriteVST4>;
+def : M4UnitL1I<(instregex "VMOVS", "FCONSTS", "VCMP", "VNEG", "VABS")>;
+def : M4UnitL2I<(instregex "VMOVD")>;
+def : M4UnitL1I<(instregex "VMRS", "VMSR", "FMSTAT")>;
def : ReadAdvance<ReadFPMUL, 0>;
def : ReadAdvance<ReadFPMAC, 0>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 978faed776b0..09603057b2c8 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -125,7 +125,7 @@ const CallLowering *ARMSubtarget::getCallLowering() const {
return CallLoweringInfo.get();
}
-const InstructionSelector *ARMSubtarget::getInstructionSelector() const {
+InstructionSelector *ARMSubtarget::getInstructionSelector() const {
return InstSelector.get();
}
@@ -205,9 +205,9 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
NoARM = true;
if (isAAPCS_ABI())
- stackAlignment = 8;
+ stackAlignment = Align(8);
if (isTargetNaCl() || isAAPCS16_ABI())
- stackAlignment = 16;
+ stackAlignment = Align(16);
// FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
// emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
@@ -253,6 +253,10 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isRWPI())
ReserveR9 = true;
+ // If MVEVectorCostFactor is still 0 (has not been set to anything else), default it to 2
+ if (MVEVectorCostFactor == 0)
+ MVEVectorCostFactor = 2;
+
// FIXME: Teach TableGen to deal with these instead of doing it manually here.
switch (ARMProcFamily) {
case Others:
@@ -296,13 +300,15 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
LdStMultipleTiming = SingleIssuePlusExtras;
MaxInterleaveFactor = 4;
if (!isThumb())
- PrefLoopAlignment = 3;
+ PrefLoopLogAlignment = 3;
break;
case Kryo:
break;
case Krait:
PreISelOperandLatencyAdjustment = 1;
break;
+ case NeoverseN1:
+ break;
case Swift:
MaxInterleaveFactor = 2;
LdStMultipleTiming = SingleIssuePlusExtras;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index c2b0f052b843..ef460342a69e 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -71,6 +71,7 @@ protected:
Exynos,
Krait,
Kryo,
+ NeoverseN1,
Swift
};
enum ARMProcClassEnum {
@@ -179,11 +180,9 @@ protected:
bool HasVFPv3SP = false;
bool HasVFPv4SP = false;
bool HasFPARMv8SP = false;
- bool HasVFPv2D16 = false;
bool HasVFPv3D16 = false;
bool HasVFPv4D16 = false;
bool HasFPARMv8D16 = false;
- bool HasVFPv2D16SP = false;
bool HasVFPv3D16SP = false;
bool HasVFPv4D16SP = false;
bool HasFPARMv8D16SP = false;
@@ -450,7 +449,7 @@ protected:
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
- unsigned stackAlignment = 4;
+ Align stackAlignment = Align(4);
/// CPUString - String name of used CPU.
std::string CPUString;
@@ -469,7 +468,12 @@ protected:
int PreISelOperandLatencyAdjustment = 2;
/// What alignment is preferred for loop bodies, in log2(bytes).
- unsigned PrefLoopAlignment = 0;
+ unsigned PrefLoopLogAlignment = 0;
+
+ /// The cost factor for MVE instructions, representing the multiple beats an
+ // instruction can take. The default is 2, (set in initSubtargetFeatures so
+ // that we can use subtarget features less than 2).
+ unsigned MVEVectorCostFactor = 0;
/// OptMinSize - True if we're optimising for minimum code size, equal to
/// the function attribute.
@@ -535,7 +539,7 @@ public:
}
const CallLowering *getCallLowering() const override;
- const InstructionSelector *getInstructionSelector() const override;
+ InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
@@ -600,7 +604,7 @@ public:
bool hasARMOps() const { return !NoARM; }
- bool hasVFP2Base() const { return HasVFPv2D16SP; }
+ bool hasVFP2Base() const { return HasVFPv2SP; }
bool hasVFP3Base() const { return HasVFPv3D16SP; }
bool hasVFP4Base() const { return HasVFPv4D16SP; }
bool hasFPARMv8Base() const { return HasFPARMv8D16SP; }
@@ -668,6 +672,12 @@ public:
bool hasSB() const { return HasSB; }
bool genLongCalls() const { return GenLongCalls; }
bool genExecuteOnly() const { return GenExecuteOnly; }
+ bool hasBaseDSP() const {
+ if (isThumb())
+ return hasDSP();
+ else
+ return hasV5TEOps();
+ }
bool hasFP16() const { return HasFP16; }
bool hasD32() const { return HasD32; }
@@ -812,7 +822,7 @@ public:
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
- unsigned getStackAlignment() const { return stackAlignment; }
+ Align getStackAlignment() const { return stackAlignment; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
@@ -853,9 +863,9 @@ public:
return isROPI() || !isTargetELF();
}
- unsigned getPrefLoopAlignment() const {
- return PrefLoopAlignment;
- }
+ unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }
+
+ unsigned getMVEVectorCostFactor() const { return MVEVectorCostFactor; }
bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
unsigned PhysReg) const override;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 7f0aae1739b3..5c8007f101d9 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -96,15 +96,16 @@ extern "C" void LLVMInitializeARMTarget() {
initializeARMExpandPseudoPass(Registry);
initializeThumb2SizeReducePass(Registry);
initializeMVEVPTBlockPass(Registry);
+ initializeMVETailPredicationPass(Registry);
initializeARMLowOverheadLoopsPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
- return llvm::make_unique<TargetLoweringObjectFileMachO>();
+ return std::make_unique<TargetLoweringObjectFileMachO>();
if (TT.isOSWindows())
- return llvm::make_unique<TargetLoweringObjectFileCOFF>();
- return llvm::make_unique<ARMElfTargetObjectFile>();
+ return std::make_unique<TargetLoweringObjectFileCOFF>();
+ return std::make_unique<ARMElfTargetObjectFile>();
}
static ARMBaseTargetMachine::ARMABI
@@ -282,7 +283,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle,
+ I = std::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle,
F.hasMinSize());
if (!I->isThumb() && !I->hasARMOps())
@@ -447,8 +448,10 @@ bool ARMPassConfig::addPreISel() {
MergeExternalByDefault));
}
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOpt::None) {
addPass(createHardwareLoopsPass());
+ addPass(createMVETailPredicationPass());
+ }
return false;
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 2a8ec734a05f..86c8684d14dc 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -36,8 +36,12 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
+static cl::opt<bool> EnableMaskedLoadStores(
+ "enable-arm-maskedldst", cl::Hidden, cl::init(false),
+ cl::desc("Enable the generation of masked loads and stores"));
+
static cl::opt<bool> DisableLowOverheadLoops(
- "disable-arm-loloops", cl::Hidden, cl::init(true),
+ "disable-arm-loloops", cl::Hidden, cl::init(false),
cl::desc("Disable the generation of low-overhead loops"));
bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
@@ -167,6 +171,42 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ // The extend of a load is free
+ if (I && isa<LoadInst>(I->getOperand(0))) {
+ static const TypeConversionCostTblEntry LoadConversionTbl[] = {
+ {ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0},
+ {ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0},
+ {ISD::SIGN_EXTEND, MVT::i32, MVT::i8, 0},
+ {ISD::ZERO_EXTEND, MVT::i32, MVT::i8, 0},
+ {ISD::SIGN_EXTEND, MVT::i16, MVT::i8, 0},
+ {ISD::ZERO_EXTEND, MVT::i16, MVT::i8, 0},
+ {ISD::SIGN_EXTEND, MVT::i64, MVT::i32, 1},
+ {ISD::ZERO_EXTEND, MVT::i64, MVT::i32, 1},
+ {ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 1},
+ {ISD::ZERO_EXTEND, MVT::i64, MVT::i16, 1},
+ {ISD::SIGN_EXTEND, MVT::i64, MVT::i8, 1},
+ {ISD::ZERO_EXTEND, MVT::i64, MVT::i8, 1},
+ };
+ if (const auto *Entry = ConvertCostTableLookup(
+ LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
+ return Entry->Cost;
+
+ static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
+ {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0},
+ {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0},
+ {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 0},
+ {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 0},
+ {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 0},
+ {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 0},
+ };
+ if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
+ if (const auto *Entry =
+ ConvertCostTableLookup(MVELoadConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+ }
+
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost.
// TODO: Get these tables to know at least what the related operations are.
@@ -313,6 +353,31 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return Entry->Cost;
}
+ // MVE extend costs, taken from codegen tests. i8->i16 or i16->i32 is one
+ // instruction, i8->i32 is two. i64 zexts are an VAND with a constant, sext
+ // are linearised so take more.
+ static const TypeConversionCostTblEntry MVEVectorConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 10 },
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 10 },
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 8 },
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 2 },
+ };
+
+ if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
+ if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl,
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost * ST->getMVEVectorCostFactor();
+ }
+
// Scalar integer conversion costs.
static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
@@ -332,7 +397,10 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return Entry->Cost;
}
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
+ ? ST->getMVEVectorCostFactor()
+ : 1;
+ return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src);
}
int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
@@ -343,8 +411,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
return 3;
- if ((Opcode == Instruction::InsertElement ||
- Opcode == Instruction::ExtractElement)) {
+ if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
+ Opcode == Instruction::ExtractElement)) {
// Cross-class copies are expensive on many microarchitectures,
// so assume they are expensive by default.
if (ValTy->getVectorElementType()->isIntegerTy())
@@ -357,6 +425,17 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
}
+ if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
+ Opcode == Instruction::ExtractElement)) {
+ // We say MVE moves costs at least the MVEVectorCostFactor, even though
+ // they are scalar instructions. This helps prevent mixing scalar and
+ // vector, to prevent vectorising where we end up just scalarising the
+ // result anyway.
+ return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index),
+ ST->getMVEVectorCostFactor()) *
+ ValTy->getVectorNumElements() / 2;
+ }
+
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
@@ -385,7 +464,10 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
return LT.first;
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy()
+ ? ST->getMVEVectorCostFactor()
+ : 1;
+ return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@@ -397,13 +479,37 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
unsigned NumVectorInstToHideOverhead = 10;
int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && SE &&
- !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
- return NumVectorInstToHideOverhead;
+ if (ST->hasNEON()) {
+ if (Ty->isVectorTy() && SE &&
+ !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
+ return NumVectorInstToHideOverhead;
- // In many cases the address computation is not merged into the instruction
- // addressing mode.
- return 1;
+ // In many cases the address computation is not merged into the instruction
+ // addressing mode.
+ return 1;
+ }
+ return BaseT::getAddressComputationCost(Ty, SE, Ptr);
+}
+
+bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
+ if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps())
+ return false;
+
+ if (auto *VecTy = dyn_cast<VectorType>(DataTy)) {
+ // Don't support v2i1 yet.
+ if (VecTy->getNumElements() == 2)
+ return false;
+
+ // We don't support extending fp types.
+ unsigned VecWidth = DataTy->getPrimitiveSizeInBits();
+ if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
+ return false;
+ }
+
+ unsigned EltWidth = DataTy->getScalarSizeInBits();
+ return (EltWidth == 32 && (!Alignment || Alignment >= 4)) ||
+ (EltWidth == 16 && (!Alignment || Alignment >= 2)) ||
+ (EltWidth == 8);
}
int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
@@ -442,78 +548,96 @@ int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- if (Kind == TTI::SK_Broadcast) {
- static const CostTblEntry NEONDupTbl[] = {
- // VDUP handles these cases.
- {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
-
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
-
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
- if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE,
- LT.second))
- return LT.first * Entry->Cost;
-
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
- }
- if (Kind == TTI::SK_Reverse) {
- static const CostTblEntry NEONShuffleTbl[] = {
- // Reverse shuffle cost one instruction if we are shuffling within a
- // double word (vrev) or two if we shuffle a quad word (vrev, vext).
- {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
-
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
-
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
- if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
- LT.second))
- return LT.first * Entry->Cost;
-
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
- }
- if (Kind == TTI::SK_Select) {
- static const CostTblEntry NEONSelShuffleTbl[] = {
- // Select shuffle cost table for ARM. Cost is the number of instructions
- // required to create the shuffled vector.
+ if (ST->hasNEON()) {
+ if (Kind == TTI::SK_Broadcast) {
+ static const CostTblEntry NEONDupTbl[] = {
+ // VDUP handles these cases.
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ if (const auto *Entry =
+ CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
+ }
+ if (Kind == TTI::SK_Reverse) {
+ static const CostTblEntry NEONShuffleTbl[] = {
+ // Reverse shuffle cost one instruction if we are shuffling within a
+ // double word (vrev) or two if we shuffle a quad word (vrev, vext).
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ if (const auto *Entry =
+ CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
+ }
+ if (Kind == TTI::SK_Select) {
+ static const CostTblEntry NEONSelShuffleTbl[] = {
+ // Select shuffle cost table for ARM. Cost is the number of
+ // instructions
+ // required to create the shuffled vector.
- {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
- {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return LT.first * Entry->Cost;
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
+ }
+ }
+ if (ST->hasMVEIntegerOps()) {
+ if (Kind == TTI::SK_Broadcast) {
+ static const CostTblEntry MVEDupTbl[] = {
+ // VDUP handles these cases.
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
+ LT.second))
+ return LT.first * Entry->Cost * ST->getMVEVectorCostFactor();
+ }
}
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy()
+ ? ST->getMVEVectorCostFactor()
+ : 1;
+ return BaseCost * BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
int ARMTTIImpl::getArithmeticInstrCost(
@@ -567,38 +691,64 @@ int ARMTTIImpl::getArithmeticInstrCost(
// Multiplication.
};
- if (ST->hasNEON())
+ if (ST->hasNEON()) {
if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
return LT.first * Entry->Cost;
- int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
- Opd1PropInfo, Opd2PropInfo);
-
- // This is somewhat of a hack. The problem that we are facing is that SROA
- // creates a sequence of shift, and, or instructions to construct values.
- // These sequences are recognized by the ISel and have zero-cost. Not so for
- // the vectorized code. Because we have support for v2i64 but not i64 those
- // sequences look particularly beneficial to vectorize.
- // To work around this we increase the cost of v2i64 operations to make them
- // seem less beneficial.
- if (LT.second == MVT::v2i64 &&
- Op2Info == TargetTransformInfo::OK_UniformConstantValue)
- Cost += 4;
-
- return Cost;
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
+
+ // This is somewhat of a hack. The problem that we are facing is that SROA
+ // creates a sequence of shift, and, or instructions to construct values.
+ // These sequences are recognized by the ISel and have zero-cost. Not so for
+ // the vectorized code. Because we have support for v2i64 but not i64 those
+ // sequences look particularly beneficial to vectorize.
+ // To work around this we increase the cost of v2i64 operations to make them
+ // seem less beneficial.
+ if (LT.second == MVT::v2i64 &&
+ Op2Info == TargetTransformInfo::OK_UniformConstantValue)
+ Cost += 4;
+
+ return Cost;
+ }
+
+ int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy()
+ ? ST->getMVEVectorCostFactor()
+ : 1;
+
+ // The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
+ // without treating floats as more expensive that scalars or increasing the
+ // costs for custom operations. The results is also multiplied by the
+ // MVEVectorCostFactor where appropriate.
+ if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
+ return LT.first * BaseCost;
+
+ // Else this is expand, assume that we need to scalarize this op.
+ if (Ty->isVectorTy()) {
+ unsigned Num = Ty->getVectorNumElements();
+ unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values.
+ return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost;
+ }
+
+ return BaseCost;
}
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
- if (Src->isVectorTy() && Alignment != 16 &&
+ if (ST->hasNEON() && Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isDoubleTy()) {
// Unaligned loads/stores are extremely inefficient.
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
return LT.first * 4;
}
- return LT.first;
+ int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
+ ? ST->getMVEVectorCostFactor()
+ : 1;
+ return BaseCost * LT.first;
}
int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
@@ -893,6 +1043,11 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
return;
}
+ // Don't unroll vectorised loop. MVE does not benefit from it as much as
+ // scalar code.
+ if (I.getType()->isVectorTy())
+ return;
+
SmallVector<const Value*, 4> Operands(I.value_op_begin(),
I.value_op_end());
Cost += getUserCost(&I, Operands);
@@ -914,3 +1069,28 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (Cost < 12)
UP.Force = true;
}
+
+bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const {
+ assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
+ unsigned ScalarBits = Ty->getScalarSizeInBits();
+ if (!ST->hasMVEIntegerOps())
+ return false;
+
+ switch (Opcode) {
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Mul:
+ case Instruction::FCmp:
+ return false;
+ case Instruction::ICmp:
+ case Instruction::Add:
+ return ScalarBits < 64 && ScalarBits * Ty->getVectorNumElements() == 128;
+ default:
+ llvm_unreachable("Unhandled reduction opcode");
+ }
+ return false;
+}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 52f6ea4a6e2f..a878fdcfe3c7 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -101,9 +101,9 @@ public:
/// Floating-point computation using ARMv8 AArch32 Advanced
/// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
- /// is IEEE-754 compliant, but it's not covered in this target.
+ /// and Arm MVE are IEEE-754 compliant.
bool isFPVectorizationPotentiallyUnsafe() {
- return !ST->isTargetDarwin();
+ return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
}
/// \name Scalar TTI Implementations
@@ -122,10 +122,13 @@ public:
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector) {
+ unsigned getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
if (Vector) {
if (ST->hasNEON())
return 16;
+ if (ST->hasMVEIntegerOps())
+ return 8;
return 0;
}
@@ -138,6 +141,8 @@ public:
if (Vector) {
if (ST->hasNEON())
return 128;
+ if (ST->hasMVEIntegerOps())
+ return 128;
return 0;
}
@@ -148,10 +153,23 @@ public:
return ST->getMaxInterleaveFactor();
}
+ bool isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment);
+
+ bool isLegalMaskedStore(Type *DataTy, MaybeAlign Alignment) {
+ return isLegalMaskedLoad(DataTy, Alignment);
+ }
+
int getMemcpyCost(const Instruction *I);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const;
+
+ bool shouldExpandReduction(const IntrinsicInst *II) const {
+ return false;
+ }
+
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I = nullptr);
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 1da9452f1d22..d2c355c1da75 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -2275,6 +2275,14 @@ public:
return Value >= 1 && Value <= 32;
}
+ bool isMveSaturateOp() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ uint64_t Value = CE->getValue();
+ return Value == 48 || Value == 64;
+ }
+
bool isITCondCodeNoAL() const {
if (!isITCondCode()) return false;
ARMCC::CondCodes CC = getCondCode();
@@ -2479,28 +2487,28 @@ public:
void addModImmNotOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
uint32_t Enc = ARM_AM::getSOImmVal(~CE->getValue());
Inst.addOperand(MCOperand::createImm(Enc));
}
void addModImmNegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
uint32_t Enc = ARM_AM::getSOImmVal(-CE->getValue());
Inst.addOperand(MCOperand::createImm(Enc));
}
void addThumbModImmNeg8_255Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
uint32_t Val = -CE->getValue();
Inst.addOperand(MCOperand::createImm(Val));
}
void addThumbModImmNeg1_7Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
uint32_t Val = -CE->getValue();
Inst.addOperand(MCOperand::createImm(Val));
}
@@ -2523,19 +2531,19 @@ public:
void addFBits16Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(16 - CE->getValue()));
}
void addFBits32Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(32 - CE->getValue()));
}
void addFPImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
Inst.addOperand(MCOperand::createImm(Val));
}
@@ -2544,7 +2552,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// FIXME: We really want to scale the value here, but the LDRD/STRD
// instruction don't encode operands that way yet.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue()));
}
@@ -2552,35 +2560,31 @@ public:
assert(N == 1 && "Invalid number of operands!");
// FIXME: We really want to scale the value here, but the VSTR/VLDR_VSYSR
// instruction don't encode operands that way yet.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue()));
}
void addImm7Shift0Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- assert(CE != nullptr && "Invalid operand type!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue()));
}
void addImm7Shift1Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- assert(CE != nullptr && "Invalid operand type!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue()));
}
void addImm7Shift2Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- assert(CE != nullptr && "Invalid operand type!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue()));
}
void addImm7Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- assert(CE != nullptr && "Invalid operand type!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue()));
}
@@ -2588,7 +2592,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The immediate is scaled by four in the encoding and is stored
// in the MCInst as such. Lop off the low two bits here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue() / 4));
}
@@ -2596,7 +2600,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The immediate is scaled by four in the encoding and is stored
// in the MCInst as such. Lop off the low two bits here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(-(CE->getValue() / 4)));
}
@@ -2604,7 +2608,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The immediate is scaled by four in the encoding and is stored
// in the MCInst as such. Lop off the low two bits here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue() / 4));
}
@@ -2612,7 +2616,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The constant encodes as the immediate-1, and we store in the instruction
// the bits as encoded, so subtract off one here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue() - 1));
}
@@ -2620,7 +2624,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The constant encodes as the immediate-1, and we store in the instruction
// the bits as encoded, so subtract off one here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue() - 1));
}
@@ -2628,7 +2632,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The constant encodes as the immediate, except for 32, which encodes as
// zero.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
unsigned Imm = CE->getValue();
Inst.addOperand(MCOperand::createImm((Imm == 32 ? 0 : Imm)));
}
@@ -2637,7 +2641,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// An ASR value of 32 encodes as 0, so that's how we want to add it to
// the instruction as well.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
int Val = CE->getValue();
Inst.addOperand(MCOperand::createImm(Val == 32 ? 0 : Val));
}
@@ -2646,7 +2650,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The operand is actually a t2_so_imm, but we have its bitwise
// negation in the assembly source, so twiddle it here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(~(uint32_t)CE->getValue()));
}
@@ -2654,7 +2658,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The operand is actually a t2_so_imm, but we have its
// negation in the assembly source, so twiddle it here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue()));
}
@@ -2662,7 +2666,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The operand is actually an imm0_4095, but we have its
// negation in the assembly source, so twiddle it here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue()));
}
@@ -2671,9 +2675,7 @@ public:
Inst.addOperand(MCOperand::createImm(CE->getValue() >> 2));
return;
}
-
- const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
- assert(SR && "Unknown value type!");
+ const MCSymbolRefExpr *SR = cast<MCSymbolRefExpr>(Imm.Val);
Inst.addOperand(MCOperand::createExpr(SR));
}
@@ -2685,10 +2687,7 @@ public:
Inst.addOperand(MCOperand::createImm(CE->getValue()));
return;
}
-
- const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
-
- assert(SR && "Unknown value type!");
+ const MCSymbolRefExpr *SR = cast<MCSymbolRefExpr>(Imm.Val);
Inst.addOperand(MCOperand::createExpr(SR));
return;
}
@@ -2750,7 +2749,7 @@ public:
return;
}
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
int Val = CE->getValue();
Inst.addOperand(MCOperand::createImm(Val));
}
@@ -3130,7 +3129,7 @@ public:
void addPowerTwoOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue()));
}
@@ -3225,14 +3224,14 @@ public:
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
// Mask in that this is an i8 splat.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue() | 0xe00));
}
void addNEONi16splatOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
unsigned Value = CE->getValue();
Value = ARM_AM::encodeNEONi16splat(Value);
Inst.addOperand(MCOperand::createImm(Value));
@@ -3241,7 +3240,7 @@ public:
void addNEONi16splatNotOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
unsigned Value = CE->getValue();
Value = ARM_AM::encodeNEONi16splat(~Value & 0xffff);
Inst.addOperand(MCOperand::createImm(Value));
@@ -3250,7 +3249,7 @@ public:
void addNEONi32splatOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
unsigned Value = CE->getValue();
Value = ARM_AM::encodeNEONi32splat(Value);
Inst.addOperand(MCOperand::createImm(Value));
@@ -3259,7 +3258,7 @@ public:
void addNEONi32splatNotOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
unsigned Value = CE->getValue();
Value = ARM_AM::encodeNEONi32splat(~Value);
Inst.addOperand(MCOperand::createImm(Value));
@@ -3267,7 +3266,7 @@ public:
void addNEONi8ReplicateOperands(MCInst &Inst, bool Inv) const {
// The immediate encodes the type of constant as well as the value.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
assert((Inst.getOpcode() == ARM::VMOVv8i8 ||
Inst.getOpcode() == ARM::VMOVv16i8) &&
"All instructions that wants to replicate non-zero byte "
@@ -3298,7 +3297,7 @@ public:
void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
unsigned Value = encodeNeonVMOVImmediate(CE->getValue());
Inst.addOperand(MCOperand::createImm(Value));
}
@@ -3310,7 +3309,7 @@ public:
void addNEONvmovi16ReplicateOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
assert((Inst.getOpcode() == ARM::VMOVv4i16 ||
Inst.getOpcode() == ARM::VMOVv8i16 ||
Inst.getOpcode() == ARM::VMVNv4i16 ||
@@ -3327,14 +3326,14 @@ public:
void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
unsigned Value = encodeNeonVMOVImmediate(~CE->getValue());
Inst.addOperand(MCOperand::createImm(Value));
}
void addNEONvmovi32ReplicateOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
assert((Inst.getOpcode() == ARM::VMOVv2i32 ||
Inst.getOpcode() == ARM::VMOVv4i32 ||
Inst.getOpcode() == ARM::VMVNv2i32 ||
@@ -3349,7 +3348,7 @@ public:
void addNEONi64splatOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
uint64_t Value = CE->getValue();
unsigned Imm = 0;
for (unsigned i = 0; i < 8; ++i, Value >>= 8) {
@@ -3360,20 +3359,28 @@ public:
void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm(CE->getValue() / 90));
}
void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::createImm((CE->getValue() - 90) / 180));
}
+ void addMveSaturateOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ unsigned Imm = CE->getValue();
+ assert((Imm == 48 || Imm == 64) && "Invalid saturate operand");
+ Inst.addOperand(MCOperand::createImm(Imm == 48 ? 1 : 0));
+ }
+
void print(raw_ostream &OS) const override;
static std::unique_ptr<ARMOperand> CreateITMask(unsigned Mask, SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_ITCondMask);
+ auto Op = std::make_unique<ARMOperand>(k_ITCondMask);
Op->ITMask.Mask = Mask;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3382,7 +3389,7 @@ public:
static std::unique_ptr<ARMOperand> CreateCondCode(ARMCC::CondCodes CC,
SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_CondCode);
+ auto Op = std::make_unique<ARMOperand>(k_CondCode);
Op->CC.Val = CC;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3391,7 +3398,7 @@ public:
static std::unique_ptr<ARMOperand> CreateVPTPred(ARMVCC::VPTCodes CC,
SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_VPTPred);
+ auto Op = std::make_unique<ARMOperand>(k_VPTPred);
Op->VCC.Val = CC;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3399,7 +3406,7 @@ public:
}
static std::unique_ptr<ARMOperand> CreateCoprocNum(unsigned CopVal, SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_CoprocNum);
+ auto Op = std::make_unique<ARMOperand>(k_CoprocNum);
Op->Cop.Val = CopVal;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3407,7 +3414,7 @@ public:
}
static std::unique_ptr<ARMOperand> CreateCoprocReg(unsigned CopVal, SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_CoprocReg);
+ auto Op = std::make_unique<ARMOperand>(k_CoprocReg);
Op->Cop.Val = CopVal;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3416,7 +3423,7 @@ public:
static std::unique_ptr<ARMOperand> CreateCoprocOption(unsigned Val, SMLoc S,
SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_CoprocOption);
+ auto Op = std::make_unique<ARMOperand>(k_CoprocOption);
Op->Cop.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -3424,7 +3431,7 @@ public:
}
static std::unique_ptr<ARMOperand> CreateCCOut(unsigned RegNum, SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_CCOut);
+ auto Op = std::make_unique<ARMOperand>(k_CCOut);
Op->Reg.RegNum = RegNum;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3432,7 +3439,7 @@ public:
}
static std::unique_ptr<ARMOperand> CreateToken(StringRef Str, SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_Token);
+ auto Op = std::make_unique<ARMOperand>(k_Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -3442,7 +3449,7 @@ public:
static std::unique_ptr<ARMOperand> CreateReg(unsigned RegNum, SMLoc S,
SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_Register);
+ auto Op = std::make_unique<ARMOperand>(k_Register);
Op->Reg.RegNum = RegNum;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -3453,7 +3460,7 @@ public:
CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, unsigned SrcReg,
unsigned ShiftReg, unsigned ShiftImm, SMLoc S,
SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_ShiftedRegister);
+ auto Op = std::make_unique<ARMOperand>(k_ShiftedRegister);
Op->RegShiftedReg.ShiftTy = ShTy;
Op->RegShiftedReg.SrcReg = SrcReg;
Op->RegShiftedReg.ShiftReg = ShiftReg;
@@ -3466,7 +3473,7 @@ public:
static std::unique_ptr<ARMOperand>
CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, unsigned SrcReg,
unsigned ShiftImm, SMLoc S, SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_ShiftedImmediate);
+ auto Op = std::make_unique<ARMOperand>(k_ShiftedImmediate);
Op->RegShiftedImm.ShiftTy = ShTy;
Op->RegShiftedImm.SrcReg = SrcReg;
Op->RegShiftedImm.ShiftImm = ShiftImm;
@@ -3477,7 +3484,7 @@ public:
static std::unique_ptr<ARMOperand> CreateShifterImm(bool isASR, unsigned Imm,
SMLoc S, SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_ShifterImmediate);
+ auto Op = std::make_unique<ARMOperand>(k_ShifterImmediate);
Op->ShifterImm.isASR = isASR;
Op->ShifterImm.Imm = Imm;
Op->StartLoc = S;
@@ -3487,7 +3494,7 @@ public:
static std::unique_ptr<ARMOperand> CreateRotImm(unsigned Imm, SMLoc S,
SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_RotateImmediate);
+ auto Op = std::make_unique<ARMOperand>(k_RotateImmediate);
Op->RotImm.Imm = Imm;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -3496,7 +3503,7 @@ public:
static std::unique_ptr<ARMOperand> CreateModImm(unsigned Bits, unsigned Rot,
SMLoc S, SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_ModifiedImmediate);
+ auto Op = std::make_unique<ARMOperand>(k_ModifiedImmediate);
Op->ModImm.Bits = Bits;
Op->ModImm.Rot = Rot;
Op->StartLoc = S;
@@ -3506,7 +3513,7 @@ public:
static std::unique_ptr<ARMOperand>
CreateConstantPoolImm(const MCExpr *Val, SMLoc S, SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_ConstantPoolImmediate);
+ auto Op = std::make_unique<ARMOperand>(k_ConstantPoolImmediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -3515,7 +3522,7 @@ public:
static std::unique_ptr<ARMOperand>
CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor);
+ auto Op = std::make_unique<ARMOperand>(k_BitfieldDescriptor);
Op->Bitfield.LSB = LSB;
Op->Bitfield.Width = Width;
Op->StartLoc = S;
@@ -3543,16 +3550,15 @@ public:
Kind = k_SPRRegisterList;
}
- // Sort based on the register encoding values.
- array_pod_sort(Regs.begin(), Regs.end());
-
if (Kind == k_RegisterList && Regs.back().second == ARM::APSR)
Kind = k_RegisterListWithAPSR;
- auto Op = make_unique<ARMOperand>(Kind);
- for (SmallVectorImpl<std::pair<unsigned, unsigned>>::const_iterator
- I = Regs.begin(), E = Regs.end(); I != E; ++I)
- Op->Registers.push_back(I->second);
+ assert(std::is_sorted(Regs.begin(), Regs.end()) &&
+ "Register list must be sorted by encoding");
+
+ auto Op = std::make_unique<ARMOperand>(Kind);
+ for (const auto &P : Regs)
+ Op->Registers.push_back(P.second);
Op->StartLoc = StartLoc;
Op->EndLoc = EndLoc;
@@ -3563,7 +3569,7 @@ public:
unsigned Count,
bool isDoubleSpaced,
SMLoc S, SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_VectorList);
+ auto Op = std::make_unique<ARMOperand>(k_VectorList);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.isDoubleSpaced = isDoubleSpaced;
@@ -3575,7 +3581,7 @@ public:
static std::unique_ptr<ARMOperand>
CreateVectorListAllLanes(unsigned RegNum, unsigned Count, bool isDoubleSpaced,
SMLoc S, SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_VectorListAllLanes);
+ auto Op = std::make_unique<ARMOperand>(k_VectorListAllLanes);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.isDoubleSpaced = isDoubleSpaced;
@@ -3587,7 +3593,7 @@ public:
static std::unique_ptr<ARMOperand>
CreateVectorListIndexed(unsigned RegNum, unsigned Count, unsigned Index,
bool isDoubleSpaced, SMLoc S, SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_VectorListIndexed);
+ auto Op = std::make_unique<ARMOperand>(k_VectorListIndexed);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.LaneIndex = Index;
@@ -3599,7 +3605,7 @@ public:
static std::unique_ptr<ARMOperand>
CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) {
- auto Op = make_unique<ARMOperand>(k_VectorIndex);
+ auto Op = std::make_unique<ARMOperand>(k_VectorIndex);
Op->VectorIndex.Val = Idx;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -3608,7 +3614,7 @@ public:
static std::unique_ptr<ARMOperand> CreateImm(const MCExpr *Val, SMLoc S,
SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_Immediate);
+ auto Op = std::make_unique<ARMOperand>(k_Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -3620,7 +3626,7 @@ public:
unsigned OffsetRegNum, ARM_AM::ShiftOpc ShiftType,
unsigned ShiftImm, unsigned Alignment, bool isNegative, SMLoc S,
SMLoc E, SMLoc AlignmentLoc = SMLoc()) {
- auto Op = make_unique<ARMOperand>(k_Memory);
+ auto Op = std::make_unique<ARMOperand>(k_Memory);
Op->Memory.BaseRegNum = BaseRegNum;
Op->Memory.OffsetImm = OffsetImm;
Op->Memory.OffsetRegNum = OffsetRegNum;
@@ -3637,7 +3643,7 @@ public:
static std::unique_ptr<ARMOperand>
CreatePostIdxReg(unsigned RegNum, bool isAdd, ARM_AM::ShiftOpc ShiftTy,
unsigned ShiftImm, SMLoc S, SMLoc E) {
- auto Op = make_unique<ARMOperand>(k_PostIndexRegister);
+ auto Op = std::make_unique<ARMOperand>(k_PostIndexRegister);
Op->PostIdxReg.RegNum = RegNum;
Op->PostIdxReg.isAdd = isAdd;
Op->PostIdxReg.ShiftTy = ShiftTy;
@@ -3649,7 +3655,7 @@ public:
static std::unique_ptr<ARMOperand> CreateMemBarrierOpt(ARM_MB::MemBOpt Opt,
SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_MemBarrierOpt);
+ auto Op = std::make_unique<ARMOperand>(k_MemBarrierOpt);
Op->MBOpt.Val = Opt;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3658,7 +3664,7 @@ public:
static std::unique_ptr<ARMOperand>
CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_InstSyncBarrierOpt);
+ auto Op = std::make_unique<ARMOperand>(k_InstSyncBarrierOpt);
Op->ISBOpt.Val = Opt;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3667,7 +3673,7 @@ public:
static std::unique_ptr<ARMOperand>
CreateTraceSyncBarrierOpt(ARM_TSB::TraceSyncBOpt Opt, SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_TraceSyncBarrierOpt);
+ auto Op = std::make_unique<ARMOperand>(k_TraceSyncBarrierOpt);
Op->TSBOpt.Val = Opt;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3676,7 +3682,7 @@ public:
static std::unique_ptr<ARMOperand> CreateProcIFlags(ARM_PROC::IFlags IFlags,
SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_ProcIFlags);
+ auto Op = std::make_unique<ARMOperand>(k_ProcIFlags);
Op->IFlags.Val = IFlags;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3684,7 +3690,7 @@ public:
}
static std::unique_ptr<ARMOperand> CreateMSRMask(unsigned MMask, SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_MSRMask);
+ auto Op = std::make_unique<ARMOperand>(k_MSRMask);
Op->MMask.Val = MMask;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -3692,7 +3698,7 @@ public:
}
static std::unique_ptr<ARMOperand> CreateBankedReg(unsigned Reg, SMLoc S) {
- auto Op = make_unique<ARMOperand>(k_BankedReg);
+ auto Op = std::make_unique<ARMOperand>(k_BankedReg);
Op->BankedReg.Val = Reg;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -4253,6 +4259,24 @@ static unsigned getNextRegister(unsigned Reg) {
}
}
+// Insert an <Encoding, Register> pair in an ordered vector. Return true on
+// success, or false, if duplicate encoding found.
+static bool
+insertNoDuplicates(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
+ unsigned Enc, unsigned Reg) {
+ Regs.emplace_back(Enc, Reg);
+ for (auto I = Regs.rbegin(), J = I + 1, E = Regs.rend(); J != E; ++I, ++J) {
+ if (J->first == Enc) {
+ Regs.erase(J.base());
+ return false;
+ }
+ if (J->first < Enc)
+ break;
+ std::swap(*I, *J);
+ }
+ return true;
+}
+
/// Parse a register list.
bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
bool EnforceOrder) {
@@ -4278,7 +4302,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
Reg = getDRegFromQReg(Reg);
EReg = MRI->getEncodingValue(Reg);
- Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ Registers.emplace_back(EReg, Reg);
++Reg;
}
const MCRegisterClass *RC;
@@ -4295,7 +4319,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
// Store the register.
EReg = MRI->getEncodingValue(Reg);
- Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ Registers.emplace_back(EReg, Reg);
// This starts immediately after the first register token in the list,
// so we can see either a comma or a minus (range separator) as a legal
@@ -4326,7 +4350,11 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
while (Reg != EndReg) {
Reg = getNextRegister(Reg);
EReg = MRI->getEncodingValue(Reg);
- Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ if (!insertNoDuplicates(Registers, EReg, Reg)) {
+ Warning(AfterMinusLoc, StringRef("duplicated register (") +
+ ARMInstPrinter::getRegisterName(Reg) +
+ ") in register list");
+ }
}
continue;
}
@@ -4350,11 +4378,16 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
// subset of GPRRegClassId except it contains APSR as well.
RC = &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID];
}
- if (Reg == ARM::VPR && (RC == &ARMMCRegisterClasses[ARM::SPRRegClassID] ||
- RC == &ARMMCRegisterClasses[ARM::DPRRegClassID])) {
+ if (Reg == ARM::VPR &&
+ (RC == &ARMMCRegisterClasses[ARM::SPRRegClassID] ||
+ RC == &ARMMCRegisterClasses[ARM::DPRRegClassID] ||
+ RC == &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID])) {
RC = &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID];
EReg = MRI->getEncodingValue(Reg);
- Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ if (!insertNoDuplicates(Registers, EReg, Reg)) {
+ Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+ ") in register list");
+ }
continue;
}
// The register must be in the same register class as the first.
@@ -4371,21 +4404,19 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
else if (!ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg))
return Error(RegLoc, "register list not in ascending order");
}
- if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
- Warning(RegLoc, "duplicated register (" + RegTok.getString() +
- ") in register list");
- continue;
- }
// VFP register lists must also be contiguous.
if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
RC != &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID] &&
Reg != OldReg + 1)
return Error(RegLoc, "non-contiguous register range");
EReg = MRI->getEncodingValue(Reg);
- Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ if (!insertNoDuplicates(Registers, EReg, Reg)) {
+ Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+ ") in register list");
+ }
if (isQReg) {
EReg = MRI->getEncodingValue(++Reg);
- Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ Registers.emplace_back(EReg, Reg);
}
}
@@ -5702,14 +5733,16 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) {
return false;
}
- // If we have a '#', it's an immediate offset, else assume it's a register
- // offset. Be friendly and also accept a plain integer (without a leading
- // hash) for gas compatibility.
+ // If we have a '#' or '$', it's an immediate offset, else assume it's a
+ // register offset. Be friendly and also accept a plain integer or expression
+ // (without a leading hash) for gas compatibility.
if (Parser.getTok().is(AsmToken::Hash) ||
Parser.getTok().is(AsmToken::Dollar) ||
+ Parser.getTok().is(AsmToken::LParen) ||
Parser.getTok().is(AsmToken::Integer)) {
- if (Parser.getTok().isNot(AsmToken::Integer))
- Parser.Lex(); // Eat '#' or '$'.
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar))
+ Parser.Lex(); // Eat '#' or '$'
E = Parser.getTok().getLoc();
bool isNegative = getParser().getTok().is(AsmToken::Minus);
@@ -11308,7 +11341,7 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
SmallVector<uint8_t, 16> Opcodes;
auto parseOne = [&]() -> bool {
- const MCExpr *OE;
+ const MCExpr *OE = nullptr;
SMLoc OpcodeLoc = getLexer().getLoc();
if (check(getLexer().is(AsmToken::EndOfStatement) ||
Parser.parseExpression(OE),
@@ -11694,14 +11727,14 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
{ ARM::AEK_CRYPTO, {Feature_HasV8Bit},
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_FP, {Feature_HasV8Bit},
- {ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} },
+ {ARM::FeatureVFP2_SP, ARM::FeatureFPARMv8} },
{ (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM),
{Feature_HasV7Bit, Feature_IsNotMClassBit},
{ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
{ ARM::AEK_MP, {Feature_HasV7Bit, Feature_IsNotMClassBit},
{ARM::FeatureMP} },
{ ARM::AEK_SIMD, {Feature_HasV8Bit},
- {ARM::FeatureNEON, ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} },
+ {ARM::FeatureNEON, ARM::FeatureVFP2_SP, ARM::FeatureFPARMv8} },
{ ARM::AEK_SEC, {Feature_HasV6KBit}, {ARM::FeatureTrustZone} },
// FIXME: Only available in A-class, isel not predicated
{ ARM::AEK_VIRT, {Feature_HasV7Bit}, {ARM::FeatureVirtualization} },
@@ -11775,19 +11808,19 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
// immediate in the syntax.
switch (Kind) {
default: break;
- case MCK__35_0:
+ case MCK__HASH_0:
if (Op.isImm())
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
if (CE->getValue() == 0)
return Match_Success;
break;
- case MCK__35_8:
+ case MCK__HASH_8:
if (Op.isImm())
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
if (CE->getValue() == 8)
return Match_Success;
break;
- case MCK__35_16:
+ case MCK__HASH_16:
if (Op.isImm())
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
if (CE->getValue() == 16)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 673691ebd93e..eabc26d05f47 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -314,7 +314,7 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder);
@@ -561,6 +561,8 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
@@ -3445,7 +3447,7 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
+DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -5679,7 +5681,7 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
}
}
}
- return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ return DecodeVMOVModImmInstruction(Inst, Insn, Address, Decoder);
}
if (!(imm & 0x20)) return MCDisassembler::Fail;
@@ -5738,7 +5740,7 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
}
}
}
- return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ return DecodeVMOVModImmInstruction(Inst, Insn, Address, Decoder);
}
if (!(imm & 0x20)) return MCDisassembler::Fail;
@@ -6481,6 +6483,12 @@ static DecodeStatus DecodeMVEOverlappingLongShift(
if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
+ if (fieldFromInstruction (Insn, 6, 3) != 4)
+ return MCDisassembler::SoftFail;
+
+ if (Rda == Rm)
+ return MCDisassembler::SoftFail;
+
return S;
}
@@ -6503,6 +6511,13 @@ static DecodeStatus DecodeMVEOverlappingLongShift(
if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
+ if (Inst.getOpcode() == ARM::MVE_SQRSHRL ||
+ Inst.getOpcode() == ARM::MVE_UQRSHLL) {
+ unsigned Saturate = fieldFromInstruction(Insn, 7, 1);
+ // Saturate, the bit position for saturation
+ Inst.addOperand(MCOperand::createImm(Saturate));
+ }
+
return S;
}
@@ -6572,3 +6587,11 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
return S;
}
+
+static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ Inst.addOperand(MCOperand::createReg(ARM::VPR));
+ Inst.addOperand(MCOperand::createReg(ARM::VPR));
+ return S;
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 7732a6485a85..24a9fabf0979 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -518,10 +518,10 @@ namespace ARM_AM {
// Valid alignments depend on the specific instruction.
//===--------------------------------------------------------------------===//
- // NEON Modified Immediates
+ // NEON/MVE Modified Immediates
//===--------------------------------------------------------------------===//
//
- // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+ // Several NEON and MVE instructions (e.g., VMOV) take a "modified immediate"
// vector operand, where a small immediate encoded in the instruction
// specifies a full NEON vector value. These modified immediates are
// represented here as encoded integers. The low 8 bits hold the immediate
@@ -529,20 +529,20 @@ namespace ARM_AM {
// the "Cmode" field of the instruction. The interfaces below treat the
// Op and Cmode values as a single 5-bit value.
- inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+ inline unsigned createVMOVModImm(unsigned OpCmode, unsigned Val) {
return (OpCmode << 8) | Val;
}
- inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+ inline unsigned getVMOVModImmOpCmode(unsigned ModImm) {
return (ModImm >> 8) & 0x1f;
}
- inline unsigned getNEONModImmVal(unsigned ModImm) { return ModImm & 0xff; }
+ inline unsigned getVMOVModImmVal(unsigned ModImm) { return ModImm & 0xff; }
- /// decodeNEONModImm - Decode a NEON modified immediate value into the
+ /// decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the
/// element value and the element size in bits. (If the element size is
/// smaller than the vector, it is splatted into all the elements.)
- inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
- unsigned OpCmode = getNEONModImmOpCmode(ModImm);
- unsigned Imm8 = getNEONModImmVal(ModImm);
+ inline uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits) {
+ unsigned OpCmode = getVMOVModImmOpCmode(ModImm);
+ unsigned Imm8 = getVMOVModImmVal(ModImm);
uint64_t Val = 0;
if (OpCmode == 0xe) {
@@ -572,7 +572,7 @@ namespace ARM_AM {
}
EltBits = 64;
} else {
- llvm_unreachable("Unsupported NEON immediate");
+ llvm_unreachable("Unsupported VMOV immediate");
}
return Val;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index aeab5be78ab4..6196881a9b8f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -233,7 +233,7 @@ static const char *checkPCRelOffset(uint64_t Value, int64_t Min, int64_t Max) {
const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
uint64_t Value) const {
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
case ARM::fixup_arm_thumb_br: {
// Relaxing tB to t2B. tB has a signed 12-bit displacement with the
// low bit being an implied zero. There's an implied +4 offset for the
@@ -870,7 +870,7 @@ bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCValue &Target) {
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
- const unsigned FixupKind = Fixup.getKind() ;
+ const unsigned FixupKind = Fixup.getKind();
if (FixupKind == FK_NONE)
return true;
if (FixupKind == ARM::fixup_arm_thumb_bl) {
@@ -1105,28 +1105,28 @@ uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
if (Instrs.empty())
return 0;
// Start off assuming CFA is at SP+0.
- int CFARegister = ARM::SP;
+ unsigned CFARegister = ARM::SP;
int CFARegisterOffset = 0;
// Mark savable registers as initially unsaved
DenseMap<unsigned, int> RegOffsets;
int FloatRegCount = 0;
// Process each .cfi directive and build up compact unwind info.
for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
- int Reg;
+ unsigned Reg;
const MCCFIInstruction &Inst = Instrs[i];
switch (Inst.getOperation()) {
case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa
CFARegisterOffset = -Inst.getOffset();
- CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ CFARegister = *MRI.getLLVMRegNum(Inst.getRegister(), true);
break;
case MCCFIInstruction::OpDefCfaOffset: // DW_CFA_def_cfa_offset
CFARegisterOffset = -Inst.getOffset();
break;
case MCCFIInstruction::OpDefCfaRegister: // DW_CFA_def_cfa_register
- CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ CFARegister = *MRI.getLLVMRegNum(Inst.getRegister(), true);
break;
case MCCFIInstruction::OpOffset: // DW_CFA_offset
- Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
RegOffsets[Reg] = Inst.getOffset();
else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index c4daafe8ee97..6293a2462306 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -393,6 +393,9 @@ namespace ARMII {
// in an IT block).
ThumbArithFlagSetting = 1 << 19,
+ // Whether an instruction can be included in an MVE tail-predicated loop.
+ ValidForTailPredication = 1 << 20,
+
//===------------------------------------------------------------------===//
// Code domain.
DomainShift = 15,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index fda19eea1de6..1fee38821a49 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -82,7 +82,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default:
Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
@@ -145,7 +145,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
return ELF::R_ARM_THM_BF18;
}
}
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default:
Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
@@ -263,5 +263,5 @@ void ARMELFObjectWriter::addTargetSectionFlags(MCContext &Ctx,
std::unique_ptr<MCObjectTargetWriter>
llvm::createARMELFObjectWriter(uint8_t OSABI) {
- return llvm::make_unique<ARMELFObjectWriter>(OSABI);
+ return std::make_unique<ARMELFObjectWriter>(OSABI);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
index 45be1ee96342..a1def61b58d9 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -1334,12 +1334,12 @@ void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
<< markup(">");
}
-void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+void ARMInstPrinter::printVMOVModImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned EncodedImm = MI->getOperand(OpNum).getImm();
unsigned EltBits;
- uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ uint64_t Val = ARM_AM::decodeVMOVModImm(EncodedImm, EltBits);
O << markup("<imm:") << "#0x";
O.write_hex(Val);
O << markup(">");
@@ -1676,3 +1676,11 @@ void ARMInstPrinter::printExpandedImmOperand(const MCInst *MI, unsigned OpNum,
O.write_hex(Val);
O << markup(">");
}
+
+void ARMInstPrinter::printMveSaturateOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint32_t Val = MI->getOperand(OpNum).getImm();
+ assert(Val <= 1 && "Invalid MVE saturate operand");
+ O << "#" << (Val == 1 ? 48 : 64);
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
index 69026956b60e..eeb811e216fc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -191,7 +191,7 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ void printVMOVModImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
@@ -262,7 +262,8 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printExpandedImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
-
+ void printMveSaturateOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
private:
unsigned DefaultAltIdx = ARM::NoRegAltName;
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index dca6fe37d49a..268fe7efd9ce 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1720,7 +1720,6 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
unsigned Reg = MI.getOperand(Op).getReg();
bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
- bool CLRMRegs = MI.getOpcode() == ARM::t2CLRM;
unsigned Binary = 0;
@@ -1739,21 +1738,13 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
Binary |= NumRegs * 2;
} else {
const MCRegisterInfo &MRI = *CTX.getRegisterInfo();
- if (!CLRMRegs) {
- assert(std::is_sorted(MI.begin() + Op, MI.end(),
- [&](const MCOperand &LHS, const MCOperand &RHS) {
- return MRI.getEncodingValue(LHS.getReg()) <
- MRI.getEncodingValue(RHS.getReg());
- }));
- }
-
+ assert(std::is_sorted(MI.begin() + Op, MI.end(),
+ [&](const MCOperand &LHS, const MCOperand &RHS) {
+ return MRI.getEncodingValue(LHS.getReg()) <
+ MRI.getEncodingValue(RHS.getReg());
+ }));
for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
- unsigned RegNo;
- if (CLRMRegs && MI.getOperand(I).getReg() == ARM::APSR) {
- RegNo = 15;
- } else {
- RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg());
- }
+ unsigned RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg());
Binary |= 1 << RegNo;
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index c49885023cb2..ed4000c7e5be 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -204,7 +204,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// relocation entry in the low 16 bits of r_address field.
unsigned ThumbBit = 0;
unsigned MovtBit = 0;
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default: break;
case ARM::fixup_arm_movt_hi16:
MovtBit = 1;
@@ -480,7 +480,7 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
// PAIR. I.e. it's correct that we insert the high bits of the addend in the
// MOVW case here. relocation entries.
uint32_t Value = 0;
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default: break;
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_t2_movw_lo16:
@@ -506,5 +506,5 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
std::unique_ptr<MCObjectTargetWriter>
llvm::createARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype) {
- return llvm::make_unique<ARMMachObjectWriter>(Is64Bit, CPUType, CPUSubtype);
+ return std::make_unique<ARMMachObjectWriter>(Is64Bit, CPUType, CPUSubtype);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index b863517c0cca..7b30a61e8ccb 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -249,12 +249,12 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
: ARM::FK_VFPV3_D16)
: (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
: ARM::FK_VFPV3XD)));
- else if (STI.hasFeature(ARM::FeatureVFP2_D16_SP))
+ else if (STI.hasFeature(ARM::FeatureVFP2_SP))
emitFPU(ARM::FK_VFPV2);
}
// ABI_HardFP_use attribute to indicate single precision FP.
- if (STI.hasFeature(ARM::FeatureVFP2_D16_SP) && !STI.hasFeature(ARM::FeatureFP64))
+ if (STI.hasFeature(ARM::FeatureVFP2_SP) && !STI.hasFeature(ARM::FeatureFP64))
emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
ARMBuildAttrs::HardFPSinglePrecision);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 054a95dd1e12..900c5fe30364 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -92,7 +92,7 @@ namespace llvm {
std::unique_ptr<MCObjectTargetWriter>
createARMWinCOFFObjectWriter(bool Is64Bit) {
- return llvm::make_unique<ARMWinCOFFObjectWriter>(Is64Bit);
+ return std::make_unique<ARMWinCOFFObjectWriter>(Is64Bit);
}
} // end namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index 2e816bea5e91..b3c8146a9bde 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -22,20 +22,10 @@ public:
std::unique_ptr<MCObjectWriter> OW)
: MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {}
- void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
void EmitThumbFunc(MCSymbol *Symbol) override;
void FinishImpl() override;
};
-void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
- switch (Flag) {
- default: llvm_unreachable("not implemented");
- case MCAF_SyntaxUnified:
- case MCAF_Code16:
- break;
- }
-}
-
void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
getAssembler().setIsThumbFunc(Symbol);
}
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 4b25986b90a7..cc31929899b4 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -86,8 +86,8 @@ void MLxExpansion::pushStack(MachineInstr *MI) {
MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
// Look past COPY and INSERT_SUBREG instructions to find the
// real definition MI. This is important for _sfp instructions.
- unsigned Reg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = MI->getOperand(1).getReg();
+ if (Register::isPhysicalRegister(Reg))
return nullptr;
MachineBasicBlock *MBB = MI->getParent();
@@ -97,13 +97,13 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
break;
if (DefMI->isCopyLike()) {
Reg = DefMI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
DefMI = MRI->getVRegDef(Reg);
continue;
}
} else if (DefMI->isInsertSubreg()) {
Reg = DefMI->getOperand(2).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
DefMI = MRI->getVRegDef(Reg);
continue;
}
@@ -114,9 +114,8 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
}
unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
- unsigned Reg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !MRI->hasOneNonDBGUse(Reg))
+ Register Reg = MI->getOperand(0).getReg();
+ if (Register::isPhysicalRegister(Reg) || !MRI->hasOneNonDBGUse(Reg))
return Reg;
MachineBasicBlock *MBB = MI->getParent();
@@ -126,8 +125,7 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
Reg = UseMI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !MRI->hasOneNonDBGUse(Reg))
+ if (Register::isPhysicalRegister(Reg) || !MRI->hasOneNonDBGUse(Reg))
return Reg;
UseMI = &*MRI->use_instr_nodbg_begin(Reg);
if (UseMI->getParent() != MBB)
@@ -140,8 +138,8 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
/// hasLoopHazard - Check whether an MLx instruction is chained to itself across
/// a single-MBB loop.
bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
- unsigned Reg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = MI->getOperand(1).getReg();
+ if (Register::isPhysicalRegister(Reg))
return false;
MachineBasicBlock *MBB = MI->getParent();
@@ -154,8 +152,8 @@ outer_continue:
if (DefMI->isPHI()) {
for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
if (DefMI->getOperand(i + 1).getMBB() == MBB) {
- unsigned SrcReg = DefMI->getOperand(i).getReg();
- if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ Register SrcReg = DefMI->getOperand(i).getReg();
+ if (Register::isVirtualRegister(SrcReg)) {
DefMI = MRI->getVRegDef(SrcReg);
goto outer_continue;
}
@@ -163,13 +161,13 @@ outer_continue:
}
} else if (DefMI->isCopyLike()) {
Reg = DefMI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
DefMI = MRI->getVRegDef(Reg);
continue;
}
} else if (DefMI->isInsertSubreg()) {
Reg = DefMI->getOperand(2).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
DefMI = MRI->getVRegDef(Reg);
continue;
}
@@ -271,23 +269,23 @@ void
MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
unsigned MulOpc, unsigned AddSubOpc,
bool NegAcc, bool HasLane) {
- unsigned DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
bool DstDead = MI->getOperand(0).isDead();
- unsigned AccReg = MI->getOperand(1).getReg();
- unsigned Src1Reg = MI->getOperand(2).getReg();
- unsigned Src2Reg = MI->getOperand(3).getReg();
+ Register AccReg = MI->getOperand(1).getReg();
+ Register Src1Reg = MI->getOperand(2).getReg();
+ Register Src2Reg = MI->getOperand(3).getReg();
bool Src1Kill = MI->getOperand(2).isKill();
bool Src2Kill = MI->getOperand(3).isKill();
unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
unsigned NextOp = HasLane ? 5 : 4;
ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
- unsigned PredReg = MI->getOperand(++NextOp).getReg();
+ Register PredReg = MI->getOperand(++NextOp).getReg();
const MCInstrDesc &MCID1 = TII->get(MulOpc);
const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
const MachineFunction &MF = *MI->getParent()->getParent();
- unsigned TmpReg = MRI->createVirtualRegister(
- TII->getRegClass(MCID1, 0, TRI, MF));
+ Register TmpReg =
+ MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI, MF));
MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
.addReg(Src1Reg, getKillRegState(Src1Kill))
diff --git a/lib/Target/ARM/MVETailPredication.cpp b/lib/Target/ARM/MVETailPredication.cpp
new file mode 100644
index 000000000000..4db8ab17c49b
--- /dev/null
+++ b/lib/Target/ARM/MVETailPredication.cpp
@@ -0,0 +1,519 @@
+//===- MVETailPredication.cpp - MVE Tail Predication ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Armv8.1m introduced MVE, M-Profile Vector Extension, and low-overhead
+/// branches to help accelerate DSP applications. These two extensions can be
+/// combined to provide implicit vector predication within a low-overhead loop.
+/// The HardwareLoops pass inserts intrinsics identifying loops that the
+/// backend will attempt to convert into a low-overhead loop. The vectorizer is
+/// responsible for generating a vectorized loop in which the lanes are
+/// predicated upon the iteration counter. This pass looks at these predicated
+/// vector loops, that are targets for low-overhead loops, and prepares it for
+/// code generation. Once the vectorizer has produced a masked loop, there's a
+/// couple of final forms:
+/// - A tail-predicated loop, with implicit predication.
+/// - A loop containing multiple VCPT instructions, predicating multiple VPT
+/// blocks of instructions operating on different vector types.
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "ARM.h"
+#include "ARMSubtarget.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mve-tail-predication"
+#define DESC "Transform predicated vector loops to use MVE tail predication"
+
+static cl::opt<bool>
+DisableTailPredication("disable-mve-tail-predication", cl::Hidden,
+ cl::init(true),
+ cl::desc("Disable MVE Tail Predication"));
+namespace {
+
+class MVETailPredication : public LoopPass {
+ SmallVector<IntrinsicInst*, 4> MaskedInsts;
+ Loop *L = nullptr;
+ ScalarEvolution *SE = nullptr;
+ TargetTransformInfo *TTI = nullptr;
+
+public:
+ static char ID;
+
+ MVETailPredication() : LoopPass(ID) { }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager&) override;
+
+private:
+
+ /// Perform the relevant checks on the loop and convert if possible.
+ bool TryConvert(Value *TripCount);
+
+ /// Return whether this is a vectorized loop, that contains masked
+ /// load/stores.
+ bool IsPredicatedVectorLoop();
+
+ /// Compute a value for the total number of elements that the predicated
+ /// loop will process.
+ Value *ComputeElements(Value *TripCount, VectorType *VecTy);
+
+ /// Is the icmp that generates an i1 vector, based upon a loop counter
+ /// and a limit that is defined outside the loop.
+ bool isTailPredicate(Instruction *Predicate, Value *NumElements);
+};
+
+} // end namespace
+
+static bool IsDecrement(Instruction &I) {
+ auto *Call = dyn_cast<IntrinsicInst>(&I);
+ if (!Call)
+ return false;
+
+ Intrinsic::ID ID = Call->getIntrinsicID();
+ return ID == Intrinsic::loop_decrement_reg;
+}
+
+static bool IsMasked(Instruction *I) {
+ auto *Call = dyn_cast<IntrinsicInst>(I);
+ if (!Call)
+ return false;
+
+ Intrinsic::ID ID = Call->getIntrinsicID();
+ // TODO: Support gather/scatter expand/compress operations.
+ return ID == Intrinsic::masked_store || ID == Intrinsic::masked_load;
+}
+
+bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
+ if (skipLoop(L) || DisableTailPredication)
+ return false;
+
+ Function &F = *L->getHeader()->getParent();
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ auto &TM = TPC.getTM<TargetMachine>();
+ auto *ST = &TM.getSubtarget<ARMSubtarget>(F);
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ this->L = L;
+
+ // The MVE and LOB extensions are combined to enable tail-predication, but
+ // there's nothing preventing us from generating VCTP instructions for v8.1m.
+ if (!ST->hasMVEIntegerOps() || !ST->hasV8_1MMainlineOps()) {
+ LLVM_DEBUG(dbgs() << "TP: Not a v8.1m.main+mve target.\n");
+ return false;
+ }
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ auto FindLoopIterations = [](BasicBlock *BB) -> IntrinsicInst* {
+ for (auto &I : *BB) {
+ auto *Call = dyn_cast<IntrinsicInst>(&I);
+ if (!Call)
+ continue;
+
+ Intrinsic::ID ID = Call->getIntrinsicID();
+ if (ID == Intrinsic::set_loop_iterations ||
+ ID == Intrinsic::test_set_loop_iterations)
+ return cast<IntrinsicInst>(&I);
+ }
+ return nullptr;
+ };
+
+ // Look for the hardware loop intrinsic that sets the iteration count.
+ IntrinsicInst *Setup = FindLoopIterations(Preheader);
+
+ // The test.set iteration could live in the pre- preheader.
+ if (!Setup) {
+ if (!Preheader->getSinglePredecessor())
+ return false;
+ Setup = FindLoopIterations(Preheader->getSinglePredecessor());
+ if (!Setup)
+ return false;
+ }
+
+ // Search for the hardware loop intrinic that decrements the loop counter.
+ IntrinsicInst *Decrement = nullptr;
+ for (auto *BB : L->getBlocks()) {
+ for (auto &I : *BB) {
+ if (IsDecrement(I)) {
+ Decrement = cast<IntrinsicInst>(&I);
+ break;
+ }
+ }
+ }
+
+ if (!Decrement)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "TP: Running on Loop: " << *L
+ << *Setup << "\n"
+ << *Decrement << "\n");
+ bool Changed = TryConvert(Setup->getArgOperand(0));
+ return Changed;
+}
+
+bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
+ // Look for the following:
+
+ // %trip.count.minus.1 = add i32 %N, -1
+ // %broadcast.splatinsert10 = insertelement <4 x i32> undef,
+ // i32 %trip.count.minus.1, i32 0
+ // %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10,
+ // <4 x i32> undef,
+ // <4 x i32> zeroinitializer
+ // ...
+ // ...
+ // %index = phi i32
+ // %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
+ // %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert,
+ // <4 x i32> undef,
+ // <4 x i32> zeroinitializer
+ // %induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
+ // %pred = icmp ule <4 x i32> %induction, %broadcast.splat11
+
+ // And return whether V == %pred.
+
+ using namespace PatternMatch;
+
+ CmpInst::Predicate Pred;
+ Instruction *Shuffle = nullptr;
+ Instruction *Induction = nullptr;
+
+ // The vector icmp
+ if (!match(I, m_ICmp(Pred, m_Instruction(Induction),
+ m_Instruction(Shuffle))) ||
+ Pred != ICmpInst::ICMP_ULE || !L->isLoopInvariant(Shuffle))
+ return false;
+
+ // First find the stuff outside the loop which is setting up the limit
+ // vector....
+ // The invariant shuffle that broadcast the limit into a vector.
+ Instruction *Insert = nullptr;
+ if (!match(Shuffle, m_ShuffleVector(m_Instruction(Insert), m_Undef(),
+ m_Zero())))
+ return false;
+
+ // Insert the limit into a vector.
+ Instruction *BECount = nullptr;
+ if (!match(Insert, m_InsertElement(m_Undef(), m_Instruction(BECount),
+ m_Zero())))
+ return false;
+
+ // The limit calculation, backedge count.
+ Value *TripCount = nullptr;
+ if (!match(BECount, m_Add(m_Value(TripCount), m_AllOnes())))
+ return false;
+
+ if (TripCount != NumElements)
+ return false;
+
+ // Now back to searching inside the loop body...
+ // Find the add with takes the index iv and adds a constant vector to it.
+ Instruction *BroadcastSplat = nullptr;
+ Constant *Const = nullptr;
+ if (!match(Induction, m_Add(m_Instruction(BroadcastSplat),
+ m_Constant(Const))))
+ return false;
+
+ // Check that we're adding <0, 1, 2, 3...
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(Const)) {
+ for (unsigned i = 0; i < CDS->getNumElements(); ++i) {
+ if (CDS->getElementAsInteger(i) != i)
+ return false;
+ }
+ } else
+ return false;
+
+ // The shuffle which broadcasts the index iv into a vector.
+ if (!match(BroadcastSplat, m_ShuffleVector(m_Instruction(Insert), m_Undef(),
+ m_Zero())))
+ return false;
+
+ // The insert element which initialises a vector with the index iv.
+ Instruction *IV = nullptr;
+ if (!match(Insert, m_InsertElement(m_Undef(), m_Instruction(IV), m_Zero())))
+ return false;
+
+ // The index iv.
+ auto *Phi = dyn_cast<PHINode>(IV);
+ if (!Phi)
+ return false;
+
+ // TODO: Don't think we need to check the entry value.
+ Value *OnEntry = Phi->getIncomingValueForBlock(L->getLoopPreheader());
+ if (!match(OnEntry, m_Zero()))
+ return false;
+
+ Value *InLoop = Phi->getIncomingValueForBlock(L->getLoopLatch());
+ unsigned Lanes = cast<VectorType>(Insert->getType())->getNumElements();
+
+ Instruction *LHS = nullptr;
+ if (!match(InLoop, m_Add(m_Instruction(LHS), m_SpecificInt(Lanes))))
+ return false;
+
+ return LHS == Phi;
+}
+
+static VectorType* getVectorType(IntrinsicInst *I) {
+ unsigned TypeOp = I->getIntrinsicID() == Intrinsic::masked_load ? 0 : 1;
+ auto *PtrTy = cast<PointerType>(I->getOperand(TypeOp)->getType());
+ return cast<VectorType>(PtrTy->getElementType());
+}
+
+bool MVETailPredication::IsPredicatedVectorLoop() {
+ // Check that the loop contains at least one masked load/store intrinsic.
+ // We only support 'normal' vector instructions - other than masked
+ // load/stores.
+ for (auto *BB : L->getBlocks()) {
+ for (auto &I : *BB) {
+ if (IsMasked(&I)) {
+ VectorType *VecTy = getVectorType(cast<IntrinsicInst>(&I));
+ unsigned Lanes = VecTy->getNumElements();
+ unsigned ElementWidth = VecTy->getScalarSizeInBits();
+ // MVE vectors are 128-bit, but don't support 128 x i1.
+ // TODO: Can we support vectors larger than 128-bits?
+ unsigned MaxWidth = TTI->getRegisterBitWidth(true);
+ if (Lanes * ElementWidth != MaxWidth || Lanes == MaxWidth)
+ return false;
+ MaskedInsts.push_back(cast<IntrinsicInst>(&I));
+ } else if (auto *Int = dyn_cast<IntrinsicInst>(&I)) {
+ for (auto &U : Int->args()) {
+ if (isa<VectorType>(U->getType()))
+ return false;
+ }
+ }
+ }
+ }
+
+ return !MaskedInsts.empty();
+}
+
+Value* MVETailPredication::ComputeElements(Value *TripCount,
+ VectorType *VecTy) {
+ const SCEV *TripCountSE = SE->getSCEV(TripCount);
+ ConstantInt *VF = ConstantInt::get(cast<IntegerType>(TripCount->getType()),
+ VecTy->getNumElements());
+
+ if (VF->equalsInt(1))
+ return nullptr;
+
+ // TODO: Support constant trip counts.
+ auto VisitAdd = [&](const SCEVAddExpr *S) -> const SCEVMulExpr* {
+ if (auto *Const = dyn_cast<SCEVConstant>(S->getOperand(0))) {
+ if (Const->getAPInt() != -VF->getValue())
+ return nullptr;
+ } else
+ return nullptr;
+ return dyn_cast<SCEVMulExpr>(S->getOperand(1));
+ };
+
+ auto VisitMul = [&](const SCEVMulExpr *S) -> const SCEVUDivExpr* {
+ if (auto *Const = dyn_cast<SCEVConstant>(S->getOperand(0))) {
+ if (Const->getValue() != VF)
+ return nullptr;
+ } else
+ return nullptr;
+ return dyn_cast<SCEVUDivExpr>(S->getOperand(1));
+ };
+
+ auto VisitDiv = [&](const SCEVUDivExpr *S) -> const SCEV* {
+ if (auto *Const = dyn_cast<SCEVConstant>(S->getRHS())) {
+ if (Const->getValue() != VF)
+ return nullptr;
+ } else
+ return nullptr;
+
+ if (auto *RoundUp = dyn_cast<SCEVAddExpr>(S->getLHS())) {
+ if (auto *Const = dyn_cast<SCEVConstant>(RoundUp->getOperand(0))) {
+ if (Const->getAPInt() != (VF->getValue() - 1))
+ return nullptr;
+ } else
+ return nullptr;
+
+ return RoundUp->getOperand(1);
+ }
+ return nullptr;
+ };
+
+ // TODO: Can we use SCEV helpers, such as findArrayDimensions, and friends to
+ // determine the numbers of elements instead? Looks like this is what is used
+ // for delinearization, but I'm not sure if it can be applied to the
+ // vectorized form - at least not without a bit more work than I feel
+ // comfortable with.
+
+ // Search for Elems in the following SCEV:
+ // (1 + ((-VF + (VF * (((VF - 1) + %Elems) /u VF))<nuw>) /u VF))<nuw><nsw>
+ const SCEV *Elems = nullptr;
+ if (auto *TC = dyn_cast<SCEVAddExpr>(TripCountSE))
+ if (auto *Div = dyn_cast<SCEVUDivExpr>(TC->getOperand(1)))
+ if (auto *Add = dyn_cast<SCEVAddExpr>(Div->getLHS()))
+ if (auto *Mul = VisitAdd(Add))
+ if (auto *Div = VisitMul(Mul))
+ if (auto *Res = VisitDiv(Div))
+ Elems = Res;
+
+ if (!Elems)
+ return nullptr;
+
+ Instruction *InsertPt = L->getLoopPreheader()->getTerminator();
+ if (!isSafeToExpandAt(Elems, InsertPt, *SE))
+ return nullptr;
+
+ auto DL = L->getHeader()->getModule()->getDataLayout();
+ SCEVExpander Expander(*SE, DL, "elements");
+ return Expander.expandCodeFor(Elems, Elems->getType(), InsertPt);
+}
+
+// Look through the exit block to see whether there's a duplicate predicate
+// instruction. This can happen when we need to perform a select on values
+// from the last and previous iteration. Instead of doing a straight
+// replacement of that predicate with the vctp, clone the vctp and place it
+// in the block. This means that the VPR doesn't have to be live into the
+// exit block which should make it easier to convert this loop into a proper
+// tail predicated loop.
+static void Cleanup(DenseMap<Instruction*, Instruction*> &NewPredicates,
+ SetVector<Instruction*> &MaybeDead, Loop *L) {
+ if (BasicBlock *Exit = L->getUniqueExitBlock()) {
+ for (auto &Pair : NewPredicates) {
+ Instruction *OldPred = Pair.first;
+ Instruction *NewPred = Pair.second;
+
+ for (auto &I : *Exit) {
+ if (I.isSameOperationAs(OldPred)) {
+ Instruction *PredClone = NewPred->clone();
+ PredClone->insertBefore(&I);
+ I.replaceAllUsesWith(PredClone);
+ MaybeDead.insert(&I);
+ break;
+ }
+ }
+ }
+ }
+
+ // Drop references and add operands to check for dead.
+ SmallPtrSet<Instruction*, 4> Dead;
+ while (!MaybeDead.empty()) {
+ auto *I = MaybeDead.front();
+ MaybeDead.remove(I);
+ if (I->hasNUsesOrMore(1))
+ continue;
+
+ for (auto &U : I->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(U))
+ MaybeDead.insert(OpI);
+ }
+ I->dropAllReferences();
+ Dead.insert(I);
+ }
+
+ for (auto *I : Dead)
+ I->eraseFromParent();
+
+ for (auto I : L->blocks())
+ DeleteDeadPHIs(I);
+}
+
+bool MVETailPredication::TryConvert(Value *TripCount) {
+ if (!IsPredicatedVectorLoop())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "TP: Found predicated vector loop.\n");
+
+ // Walk through the masked intrinsics and try to find whether the predicate
+ // operand is generated from an induction variable.
+ Module *M = L->getHeader()->getModule();
+ Type *Ty = IntegerType::get(M->getContext(), 32);
+ SetVector<Instruction*> Predicates;
+ DenseMap<Instruction*, Instruction*> NewPredicates;
+
+ for (auto *I : MaskedInsts) {
+ Intrinsic::ID ID = I->getIntrinsicID();
+ unsigned PredOp = ID == Intrinsic::masked_load ? 2 : 3;
+ auto *Predicate = dyn_cast<Instruction>(I->getArgOperand(PredOp));
+ if (!Predicate || Predicates.count(Predicate))
+ continue;
+
+ VectorType *VecTy = getVectorType(I);
+ Value *NumElements = ComputeElements(TripCount, VecTy);
+ if (!NumElements)
+ continue;
+
+ if (!isTailPredicate(Predicate, NumElements)) {
+ LLVM_DEBUG(dbgs() << "TP: Not tail predicate: " << *Predicate << "\n");
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "TP: Found tail predicate: " << *Predicate << "\n");
+ Predicates.insert(Predicate);
+
+ // Insert a phi to count the number of elements processed by the loop.
+ IRBuilder<> Builder(L->getHeader()->getFirstNonPHI());
+ PHINode *Processed = Builder.CreatePHI(Ty, 2);
+ Processed->addIncoming(NumElements, L->getLoopPreheader());
+
+ // Insert the intrinsic to represent the effect of tail predication.
+ Builder.SetInsertPoint(cast<Instruction>(Predicate));
+ ConstantInt *Factor =
+ ConstantInt::get(cast<IntegerType>(Ty), VecTy->getNumElements());
+ Intrinsic::ID VCTPID;
+ switch (VecTy->getNumElements()) {
+ default:
+ llvm_unreachable("unexpected number of lanes");
+ case 2: VCTPID = Intrinsic::arm_vctp64; break;
+ case 4: VCTPID = Intrinsic::arm_vctp32; break;
+ case 8: VCTPID = Intrinsic::arm_vctp16; break;
+ case 16: VCTPID = Intrinsic::arm_vctp8; break;
+ }
+ Function *VCTP = Intrinsic::getDeclaration(M, VCTPID);
+ Value *TailPredicate = Builder.CreateCall(VCTP, Processed);
+ Predicate->replaceAllUsesWith(TailPredicate);
+ NewPredicates[Predicate] = cast<Instruction>(TailPredicate);
+
+ // Add the incoming value to the new phi.
+ // TODO: This add likely already exists in the loop.
+ Value *Remaining = Builder.CreateSub(Processed, Factor);
+ Processed->addIncoming(Remaining, L->getLoopLatch());
+ LLVM_DEBUG(dbgs() << "TP: Insert processed elements phi: "
+ << *Processed << "\n"
+ << "TP: Inserted VCTP: " << *TailPredicate << "\n");
+ }
+
+ // Now clean up.
+ Cleanup(NewPredicates, Predicates, L);
+ return true;
+}
+
+Pass *llvm::createMVETailPredicationPass() {
+ return new MVETailPredication();
+}
+
+char MVETailPredication::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MVETailPredication, DEBUG_TYPE, DESC, false, false)
+INITIALIZE_PASS_END(MVETailPredication, DEBUG_TYPE, DESC, false, false)
diff --git a/lib/Target/ARM/MVEVPTBlockPass.cpp b/lib/Target/ARM/MVEVPTBlockPass.cpp
new file mode 100644
index 000000000000..bc0a80b177ed
--- /dev/null
+++ b/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -0,0 +1,278 @@
+//===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include <cassert>
+#include <new>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-mve-vpt"
+
+namespace {
+ class MVEVPTBlock : public MachineFunctionPass {
+ public:
+ static char ID;
+ const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ MVEVPTBlock() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ StringRef getPassName() const override {
+ return "MVE VPT block insertion pass";
+ }
+
+ private:
+ bool InsertVPTBlocks(MachineBasicBlock &MBB);
+ };
+
+ char MVEVPTBlock::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
+
+enum VPTMaskValue {
+ T = 8, // 0b1000
+ TT = 4, // 0b0100
+ TE = 12, // 0b1100
+ TTT = 2, // 0b0010
+ TTE = 6, // 0b0110
+ TEE = 10, // 0b1010
+ TET = 14, // 0b1110
+ TTTT = 1, // 0b0001
+ TTTE = 3, // 0b0011
+ TTEE = 5, // 0b0101
+ TTET = 7, // 0b0111
+ TEEE = 9, // 0b1001
+ TEET = 11, // 0b1011
+ TETT = 13, // 0b1101
+ TETE = 15 // 0b1111
+};
+
+static unsigned VCMPOpcodeToVPT(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::MVE_VCMPf32:
+ return ARM::MVE_VPTv4f32;
+ case ARM::MVE_VCMPf16:
+ return ARM::MVE_VPTv8f16;
+ case ARM::MVE_VCMPi8:
+ return ARM::MVE_VPTv16i8;
+ case ARM::MVE_VCMPi16:
+ return ARM::MVE_VPTv8i16;
+ case ARM::MVE_VCMPi32:
+ return ARM::MVE_VPTv4i32;
+ case ARM::MVE_VCMPu8:
+ return ARM::MVE_VPTv16u8;
+ case ARM::MVE_VCMPu16:
+ return ARM::MVE_VPTv8u16;
+ case ARM::MVE_VCMPu32:
+ return ARM::MVE_VPTv4u32;
+ case ARM::MVE_VCMPs8:
+ return ARM::MVE_VPTv16s8;
+ case ARM::MVE_VCMPs16:
+ return ARM::MVE_VPTv8s16;
+ case ARM::MVE_VCMPs32:
+ return ARM::MVE_VPTv4s32;
+
+ case ARM::MVE_VCMPf32r:
+ return ARM::MVE_VPTv4f32r;
+ case ARM::MVE_VCMPf16r:
+ return ARM::MVE_VPTv8f16r;
+ case ARM::MVE_VCMPi8r:
+ return ARM::MVE_VPTv16i8r;
+ case ARM::MVE_VCMPi16r:
+ return ARM::MVE_VPTv8i16r;
+ case ARM::MVE_VCMPi32r:
+ return ARM::MVE_VPTv4i32r;
+ case ARM::MVE_VCMPu8r:
+ return ARM::MVE_VPTv16u8r;
+ case ARM::MVE_VCMPu16r:
+ return ARM::MVE_VPTv8u16r;
+ case ARM::MVE_VCMPu32r:
+ return ARM::MVE_VPTv4u32r;
+ case ARM::MVE_VCMPs8r:
+ return ARM::MVE_VPTv16s8r;
+ case ARM::MVE_VCMPs16r:
+ return ARM::MVE_VPTv8s16r;
+ case ARM::MVE_VCMPs32r:
+ return ARM::MVE_VPTv4s32r;
+
+ default:
+ return 0;
+ }
+}
+
+static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI,
+ const TargetRegisterInfo *TRI,
+ unsigned &NewOpcode) {
+ // Search backwards to the instruction that defines VPR. This may or not
+ // be a VCMP, we check that after this loop. If we find another instruction
+ // that reads cpsr, we return nullptr.
+ MachineBasicBlock::iterator CmpMI = MI;
+ while (CmpMI != MI->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->modifiesRegister(ARM::VPR, TRI))
+ break;
+ if (CmpMI->readsRegister(ARM::VPR, TRI))
+ break;
+ }
+
+ if (CmpMI == MI)
+ return nullptr;
+ NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode());
+ if (NewOpcode == 0)
+ return nullptr;
+
+ // Search forward from CmpMI to MI, checking if either register was def'd
+ if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI),
+ MI, TRI))
+ return nullptr;
+ if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI),
+ MI, TRI))
+ return nullptr;
+ return &*CmpMI;
+}
+
+bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
+ bool Modified = false;
+ MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
+ MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
+
+ while (MBIter != EndIter) {
+ MachineInstr *MI = &*MBIter;
+ unsigned PredReg = 0;
+ DebugLoc dl = MI->getDebugLoc();
+
+ ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
+
+ // The idea of the predicate is that None, Then and Else are for use when
+ // handling assembly language: they correspond to the three possible
+ // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
+ // from assembly source or disassembled from object code, you expect to see
+ // a mixture whenever there's a long VPT block. But in code generation, we
+ // hope we'll never generate an Else as input to this pass.
+ assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
+
+ if (Pred == ARMVCC::None) {
+ ++MBIter;
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump());
+ int VPTInstCnt = 1;
+ ARMVCC::VPTCodes NextPred;
+
+ // Look at subsequent instructions, checking if they can be in the same VPT
+ // block.
+ ++MBIter;
+ while (MBIter != EndIter && VPTInstCnt < 4) {
+ NextPred = getVPTInstrPredicate(*MBIter, PredReg);
+ assert(NextPred != ARMVCC::Else &&
+ "VPT block pass does not expect Else preds");
+ if (NextPred != Pred)
+ break;
+ LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump());
+ ++VPTInstCnt;
+ ++MBIter;
+ };
+
+ unsigned BlockMask = 0;
+ switch (VPTInstCnt) {
+ case 1:
+ BlockMask = VPTMaskValue::T;
+ break;
+ case 2:
+ BlockMask = VPTMaskValue::TT;
+ break;
+ case 3:
+ BlockMask = VPTMaskValue::TTT;
+ break;
+ case 4:
+ BlockMask = VPTMaskValue::TTTT;
+ break;
+ default:
+ llvm_unreachable("Unexpected number of instruction in a VPT block");
+ };
+
+ // Search back for a VCMP that can be folded to create a VPT, or else create
+ // a VPST directly
+ MachineInstrBuilder MIBuilder;
+ unsigned NewOpcode;
+ MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode);
+ if (VCMP) {
+ LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump());
+ MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode));
+ MIBuilder.addImm(BlockMask);
+ MIBuilder.add(VCMP->getOperand(1));
+ MIBuilder.add(VCMP->getOperand(2));
+ MIBuilder.add(VCMP->getOperand(3));
+ VCMP->eraseFromParent();
+ } else {
+ MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST));
+ MIBuilder.addImm(BlockMask);
+ }
+
+ finalizeBundle(
+ Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter);
+
+ Modified = true;
+ }
+ return Modified;
+}
+
+bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
+ const ARMSubtarget &STI =
+ static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+
+ if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
+ return false;
+
+ TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
+ TRI = STI.getRegisterInfo();
+
+ LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
+ << "********** Function: " << Fn.getName() << '\n');
+
+ bool Modified = false;
+ for (MachineBasicBlock &MBB : Fn)
+ Modified |= InsertVPTBlocks(MBB);
+
+ LLVM_DEBUG(dbgs() << "**************************************\n");
+ return Modified;
+}
+
+/// createMVEVPTBlock - Returns an instance of the MVE VPT block
+/// insertion pass.
+FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 426e9a0ed9b8..956d474f1d79 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -164,7 +164,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc dl;
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ Register FramePtr = RegInfo->getFrameRegister(MF);
unsigned BasePtr = RegInfo->getBaseRegister();
int CFAOffset = 0;
@@ -459,8 +459,8 @@ static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
else if (MI.getOpcode() == ARM::tPOP) {
return true;
} else if (MI.getOpcode() == ARM::tMOVr) {
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) &&
ARM::hGPRRegClass.contains(Dst));
}
@@ -483,7 +483,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ Register FramePtr = RegInfo->getFrameRegister(MF);
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index f57d93a2e83d..fccaa4c9cc8a 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -80,12 +80,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
assert((RC == &ARM::tGPRRegClass ||
- (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- isARMLowRegister(SrcReg))) && "Unknown regclass!");
+ (Register::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) &&
+ "Unknown regclass!");
if (RC == &ARM::tGPRRegClass ||
- (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- isARMLowRegister(SrcReg))) {
+ (Register::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -108,13 +107,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert((RC->hasSuperClassEq(&ARM::tGPRRegClass) ||
- (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
- isARMLowRegister(DestReg))) && "Unknown regclass!");
+ assert(
+ (RC->hasSuperClassEq(&ARM::tGPRRegClass) ||
+ (Register::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) &&
+ "Unknown regclass!");
if (RC->hasSuperClassEq(&ARM::tGPRRegClass) ||
- (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
- isARMLowRegister(DestReg))) {
+ (Register::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 3143eb9840ed..786fc78d0233 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -87,7 +87,7 @@ static void TrackDefUses(MachineInstr *MI, RegisterSet &Defs, RegisterSet &Uses,
for (auto &MO : MI->operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
continue;
if (MO.isUse())
@@ -145,8 +145,8 @@ Thumb2ITBlock::MoveCopyOutOfITBlock(MachineInstr *MI,
MI->getOperand(1).getSubReg() == 0 &&
"Sub-register indices still around?");
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
+ Register SrcReg = MI->getOperand(1).getReg();
// First check if it's safe to move it.
if (Uses.count(DstReg) || Defs.count(SrcReg))
@@ -308,131 +308,3 @@ bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) {
/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
/// insertion pass.
FunctionPass *llvm::createThumb2ITBlockPass() { return new Thumb2ITBlock(); }
-
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "arm-mve-vpt"
-
-namespace {
- class MVEVPTBlock : public MachineFunctionPass {
- public:
- static char ID;
- const Thumb2InstrInfo *TII;
- const TargetRegisterInfo *TRI;
-
- MVEVPTBlock() : MachineFunctionPass(ID) {}
-
- bool runOnMachineFunction(MachineFunction &Fn) override;
-
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
-
- StringRef getPassName() const override {
- return "MVE VPT block insertion pass";
- }
-
- private:
- bool InsertVPTBlocks(MachineBasicBlock &MBB);
- };
-
- char MVEVPTBlock::ID = 0;
-
-} // end anonymous namespace
-
-INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
-
-enum VPTMaskValue {
- T = 8, // 0b1000
- TT = 4, // 0b0100
- TE = 12, // 0b1100
- TTT = 2, // 0b0010
- TTE = 6, // 0b0110
- TEE = 10, // 0b1010
- TET = 14, // 0b1110
- TTTT = 1, // 0b0001
- TTTE = 3, // 0b0011
- TTEE = 5, // 0b0101
- TTET = 7, // 0b0111
- TEEE = 9, // 0b1001
- TEET = 11, // 0b1011
- TETT = 13, // 0b1101
- TETE = 15 // 0b1111
-};
-
-bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
- bool Modified = false;
- MachineBasicBlock::iterator MBIter = Block.begin();
- MachineBasicBlock::iterator EndIter = Block.end();
-
- while (MBIter != EndIter) {
- MachineInstr *MI = &*MBIter;
- unsigned PredReg = 0;
- DebugLoc dl = MI->getDebugLoc();
-
- ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
-
- // The idea of the predicate is that None, Then and Else are for use when
- // handling assembly language: they correspond to the three possible
- // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
- // from assembly source or disassembled from object code, you expect to see
- // a mixture whenever there's a long VPT block. But in code generation, we
- // hope we'll never generate an Else as input to this pass.
-
- assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
-
- if (Pred == ARMVCC::None) {
- ++MBIter;
- continue;
- }
-
- MachineInstrBuilder MIBuilder =
- BuildMI(Block, MBIter, dl, TII->get(ARM::MVE_VPST));
- // The mask value for the VPST instruction is T = 0b1000 = 8
- MIBuilder.addImm(VPTMaskValue::T);
-
- MachineBasicBlock::iterator VPSTInsertPos = MIBuilder.getInstr();
- int VPTInstCnt = 1;
- ARMVCC::VPTCodes NextPred;
-
- do {
- ++MBIter;
- NextPred = getVPTInstrPredicate(*MBIter, PredReg);
- } while (NextPred != ARMVCC::None && NextPred == Pred && ++VPTInstCnt < 4);
-
- MachineInstr *LastMI = &*MBIter;
- finalizeBundle(Block, VPSTInsertPos.getInstrIterator(),
- ++LastMI->getIterator());
-
- Modified = true;
- LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump(););
-
- ++MBIter;
- }
- return Modified;
-}
-
-bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
- const ARMSubtarget &STI =
- static_cast<const ARMSubtarget &>(Fn.getSubtarget());
-
- if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
- return false;
-
- TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
- TRI = STI.getRegisterInfo();
-
- LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
- << "********** Function: " << Fn.getName() << '\n');
-
- bool Modified = false;
- for (MachineBasicBlock &MBB : Fn)
- Modified |= InsertVPTBlocks(MBB);
-
- LLVM_DEBUG(dbgs() << "**************************************\n");
- return Modified;
-}
-
-/// createMVEVPTBlock - Returns an instance of the MVE VPT block
-/// insertion pass.
-FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 5a965f7a6b9b..af1f0aeb27ba 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -159,9 +159,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
// gsub_0, but needs an extra constraint for gsub_1 (which could be sp
// otherwise).
- if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (Register::isVirtualRegister(SrcReg)) {
MachineRegisterInfo *MRI = &MF.getRegInfo();
- MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass);
+ MRI->constrainRegClass(SrcReg, &ARM::GPRPairnospRegClass);
}
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
@@ -200,10 +200,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
// gsub_0, but needs an extra constraint for gsub_1 (which could be sp
// otherwise).
- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ if (Register::isVirtualRegister(DestReg)) {
MachineRegisterInfo *MRI = &MF.getRegInfo();
- MRI->constrainRegClass(DestReg,
- &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass);
+ MRI->constrainRegClass(DestReg, &ARM::GPRPairnospRegClass);
}
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
@@ -211,7 +210,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL));
- if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ if (Register::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
return;
}
@@ -470,12 +469,17 @@ immediateOffsetOpcode(unsigned opcode)
bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
- const ARMBaseInstrInfo &TII) {
+ const ARMBaseInstrInfo &TII,
+ const TargetRegisterInfo *TRI) {
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
bool isSub = false;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetRegisterClass *RegClass =
+ TII.getRegClass(Desc, FrameRegIdx, TRI, MF);
+
// Memory operands in inline assembly always use AddrModeT2_i12.
if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
@@ -554,7 +558,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// register then we change to an immediate version.
unsigned NewOpc = Opcode;
if (AddrMode == ARMII::AddrModeT2_so) {
- unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
+ Register OffsetReg = MI.getOperand(FrameRegIdx + 1).getReg();
if (OffsetReg != 0) {
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
return Offset == 0;
@@ -645,10 +649,21 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1);
// Attempt to fold address computation
- // Common case: small offset, fits into instruction.
+ // Common case: small offset, fits into instruction. We need to make sure
+ // the register class is correct too, for instructions like the MVE
+ // VLDRH.32, which only accepts low tGPR registers.
int ImmedOffset = Offset / Scale;
unsigned Mask = (1 << NumBits) - 1;
- if ((unsigned)Offset <= Mask * Scale) {
+ if ((unsigned)Offset <= Mask * Scale &&
+ (Register::isVirtualRegister(FrameReg) ||
+ RegClass->contains(FrameReg))) {
+ if (Register::isVirtualRegister(FrameReg)) {
+ // Make sure the register class for the virtual register is correct
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ if (!MRI->constrainRegClass(FrameReg, RegClass))
+ llvm_unreachable("Unable to constrain virtual register class.");
+ }
+
// Replace the FrameIndex with fp/sp
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
if (isSub) {
@@ -681,7 +696,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
}
Offset = (isSub) ? -Offset : Offset;
- return Offset == 0;
+ return Offset == 0 && (Register::isVirtualRegister(FrameReg) ||
+ RegClass->contains(FrameReg));
}
ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 37a85fa38417..c5a62aa33990 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -300,7 +300,7 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
for (const MachineOperand &MO : CPSRDef->operands()) {
if (!MO.isReg() || MO.isUndef() || MO.isUse())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0 || Reg == ARM::CPSR)
continue;
Defs.insert(Reg);
@@ -309,7 +309,7 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
for (const MachineOperand &MO : Use->operands()) {
if (!MO.isReg() || MO.isUndef() || MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Defs.count(Reg))
return false;
}
@@ -380,7 +380,7 @@ static bool VerifyLowRegs(MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isImplicit())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0 || Reg == ARM::CPSR)
continue;
if (isPCOk && Reg == ARM::PC)
@@ -464,11 +464,11 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// For this reason we can't reuse the logic at the end of this function; we
// have to implement the MI building here.
bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
- unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
- unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
+ Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
+ Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
unsigned Offset = MI->getOperand(3).getImm();
unsigned PredImm = MI->getOperand(4).getImm();
- unsigned PredReg = MI->getOperand(5).getReg();
+ Register PredReg = MI->getOperand(5).getReg();
assert(isARMLowRegister(Rt));
assert(isARMLowRegister(Rn));
@@ -496,7 +496,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
return true;
}
case ARM::t2LDMIA: {
- unsigned BaseReg = MI->getOperand(0).getReg();
+ Register BaseReg = MI->getOperand(0).getReg();
assert(isARMLowRegister(BaseReg));
// For the non-writeback version (this one), the base register must be
@@ -524,7 +524,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
break;
case ARM::t2LDMIA_RET: {
- unsigned BaseReg = MI->getOperand(1).getReg();
+ Register BaseReg = MI->getOperand(1).getReg();
if (BaseReg != ARM::SP)
return false;
Opc = Entry.NarrowOpc2; // tPOP_RET
@@ -537,7 +537,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
case ARM::t2STMDB_UPD: {
OpNum = 0;
- unsigned BaseReg = MI->getOperand(1).getReg();
+ Register BaseReg = MI->getOperand(1).getReg();
if (BaseReg == ARM::SP &&
(Entry.WideOpc == ARM::t2LDMIA_UPD ||
Entry.WideOpc == ARM::t2STMDB_UPD)) {
@@ -743,11 +743,11 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// are optimizing for size.
return false;
- unsigned Reg0 = MI->getOperand(0).getReg();
- unsigned Reg1 = MI->getOperand(1).getReg();
+ Register Reg0 = MI->getOperand(0).getReg();
+ Register Reg1 = MI->getOperand(1).getReg();
// t2MUL is "special". The tied source operand is second, not first.
if (MI->getOpcode() == ARM::t2MUL) {
- unsigned Reg2 = MI->getOperand(2).getReg();
+ Register Reg2 = MI->getOperand(2).getReg();
// Early exit if the regs aren't all low regs.
if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
|| !isARMLowRegister(Reg2))
@@ -782,7 +782,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (Imm > Limit)
return false;
} else {
- unsigned Reg2 = MI->getOperand(2).getReg();
+ Register Reg2 = MI->getOperand(2).getReg();
if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
return false;
}
@@ -868,7 +868,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
continue;
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg || Reg == ARM::CPSR)
continue;
if (Entry.LowRegs1 && !isARMLowRegister(Reg))
diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index a96417ffce4d..b0ba58d8dc4a 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -107,8 +107,9 @@ void ThumbRegisterInfo::emitLoadConstPool(
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
if (STI.isThumb1Only()) {
- assert((isARMLowRegister(DestReg) || isVirtualRegister(DestReg)) &&
- "Thumb1 does not have ldr to high register");
+ assert(
+ (isARMLowRegister(DestReg) || Register::isVirtualRegister(DestReg)) &&
+ "Thumb1 does not have ldr to high register");
return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
PredReg, MIFlags);
}
@@ -141,7 +142,7 @@ static void emitThumbRegPlusImmInReg(
unsigned LdReg = DestReg;
if (DestReg == ARM::SP)
assert(BaseReg == ARM::SP && "Unexpected!");
- if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg))
+ if (!isARMLowRegister(DestReg) && !Register::isVirtualRegister(DestReg))
LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
@@ -371,7 +372,7 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
if (Opcode == ARM::tADDframe) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
*this);
@@ -509,7 +510,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (MI.mayLoad()) {
// Use the destination register to materialize sp + offset.
- unsigned TmpReg = MI.getOperand(0).getReg();
+ Register TmpReg = MI.getOperand(0).getReg();
bool UseRR = false;
if (Opcode == ARM::tLDRspi) {
if (FrameReg == ARM::SP || STI.genExecuteOnly())
diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp
index 7586bd7b78fc..1db6b2236b4f 100644
--- a/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -97,7 +97,7 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
assert(RegOp.isReg() && "Operand must be a register when you're"
"using 'A'..'Z' operand extracodes.");
- unsigned Reg = RegOp.getReg();
+ Register Reg = RegOp.getReg();
unsigned ByteNumber = ExtraCode[0] - 'A';
diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index c45b2d0e39c1..83d0f6845332 100644
--- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -140,8 +140,8 @@ bool AVRExpandPseudo::
expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(2).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool SrcIsKill = MI.getOperand(2).isKill();
@@ -173,8 +173,8 @@ bool AVRExpandPseudo::
expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(2).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool SrcIsKill = MI.getOperand(2).isKill();
@@ -220,7 +220,7 @@ bool AVRExpandPseudo::
expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(3).isDead();
@@ -874,7 +874,7 @@ unsigned AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) {
// Exclude all the registers being used by the instruction.
for (MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() &&
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ !Register::isVirtualRegister(MO.getReg()))
Candidates.reset(MO.getReg());
}
diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp
index 5e91bb8632c1..e6c48de5a782 100644
--- a/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/lib/Target/AVR/AVRFrameLowering.cpp
@@ -30,7 +30,8 @@
namespace llvm {
AVRFrameLowering::AVRFrameLowering()
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 1, -2) {}
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align::None(),
+ -2) {}
bool AVRFrameLowering::canSimplifyCallFramePseudos(
const MachineFunction &MF) const {
@@ -323,7 +324,7 @@ static void fixStackStores(MachineBasicBlock &MBB,
"Invalid register, should be SP!");
if (insertPushes) {
// Replace this instruction with a push.
- unsigned SrcReg = MI.getOperand(2).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
bool SrcIsKill = MI.getOperand(2).isKill();
// We can't use PUSHWRr here because when expanded the order of the new
diff --git a/lib/Target/AVR/AVRISelDAGToDAG.cpp b/lib/Target/AVR/AVRISelDAGToDAG.cpp
index 5cb4441c4380..4c4f4faa0508 100644
--- a/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -251,7 +251,7 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
RegisterSDNode *RegNode =
cast<RegisterSDNode>(CopyFromRegOp->getOperand(1));
Reg = RegNode->getReg();
- CanHandleRegImmOpt &= (TargetRegisterInfo::isVirtualRegister(Reg) ||
+ CanHandleRegImmOpt &= (Register::isVirtualRegister(Reg) ||
AVR::PTRDISPREGSRegClass.contains(Reg));
} else {
CanHandleRegImmOpt = false;
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index b6ba5f22fafb..f12c59b7d8c3 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -236,7 +236,7 @@ AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
setLibcallName(RTLIB::SIN_F32, "sin");
setLibcallName(RTLIB::COS_F32, "cos");
- setMinFunctionAlignment(1);
+ setMinFunctionAlignment(Align(2));
setMinimumJumpTableEntries(UINT_MAX);
}
@@ -1517,11 +1517,11 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
unsigned ShiftAmtReg = RI.createVirtualRegister(&AVR::LD8RegClass);
unsigned ShiftAmtReg2 = RI.createVirtualRegister(&AVR::LD8RegClass);
- unsigned ShiftReg = RI.createVirtualRegister(RC);
- unsigned ShiftReg2 = RI.createVirtualRegister(RC);
- unsigned ShiftAmtSrcReg = MI.getOperand(2).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register ShiftReg = RI.createVirtualRegister(RC);
+ Register ShiftReg2 = RI.createVirtualRegister(RC);
+ Register ShiftAmtSrcReg = MI.getOperand(2).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
// BB:
// cpi N, 0
@@ -1568,7 +1568,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {
if (I->getOpcode() == AVR::COPY) {
- unsigned SrcReg = I->getOperand(1).getReg();
+ Register SrcReg = I->getOperand(1).getReg();
return (SrcReg == AVR::R0 || SrcReg == AVR::R1);
}
@@ -1689,6 +1689,8 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
// See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
switch (Constraint[0]) {
+ default:
+ break;
case 'a': // Simple upper registers
case 'b': // Base pointer registers pairs
case 'd': // Upper register
@@ -1715,9 +1717,7 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
case 'O': // Integer constant (Range: 8, 16, 24)
case 'P': // Integer constant (Range: 1)
case 'R': // Integer constant (Range: -6 to 5)x
- return C_Other;
- default:
- break;
+ return C_Immediate;
}
}
@@ -2006,10 +2006,9 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-unsigned AVRTargetLowering::getRegisterByName(const char *RegName,
- EVT VT,
- SelectionDAG &DAG) const {
- unsigned Reg;
+Register AVRTargetLowering::getRegisterByName(const char *RegName, EVT VT,
+ const MachineFunction &MF) const {
+ Register Reg;
if (VT == MVT::i8) {
Reg = StringSwitch<unsigned>(RegName)
diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h
index ed2d0835903c..6c722fa5414b 100644
--- a/lib/Target/AVR/AVRISelLowering.h
+++ b/lib/Target/AVR/AVRISelLowering.h
@@ -125,8 +125,8 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const override;
bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL)
const override {
diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp
index a6b36f80485d..8fce05c933bc 100644
--- a/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -158,7 +158,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// We need to materialize the offset via an add instruction.
unsigned Opcode;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
assert(DstReg != AVR::R29R28 && "Dest reg cannot be the frame pointer");
II++; // Skip over the FRMIDX (and now MOVW) instruction.
diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp
index a36c8b0f9649..25304280d002 100644
--- a/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/lib/Target/AVR/AVRTargetMachine.cpp
@@ -50,7 +50,7 @@ AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
SubTarget(TT, getCPU(CPU), FS, *this) {
- this->TLOF = make_unique<AVRTargetObjectFile>();
+ this->TLOF = std::make_unique<AVRTargetObjectFile>();
initAsmInfo();
}
diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index aac5644711e2..af60bc4fdc90 100644
--- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -199,22 +199,22 @@ public:
}
static std::unique_ptr<AVROperand> CreateToken(StringRef Str, SMLoc S) {
- return make_unique<AVROperand>(Str, S);
+ return std::make_unique<AVROperand>(Str, S);
}
static std::unique_ptr<AVROperand> CreateReg(unsigned RegNum, SMLoc S,
SMLoc E) {
- return make_unique<AVROperand>(RegNum, S, E);
+ return std::make_unique<AVROperand>(RegNum, S, E);
}
static std::unique_ptr<AVROperand> CreateImm(const MCExpr *Val, SMLoc S,
SMLoc E) {
- return make_unique<AVROperand>(Val, S, E);
+ return std::make_unique<AVROperand>(Val, S, E);
}
static std::unique_ptr<AVROperand>
CreateMemri(unsigned RegNum, const MCExpr *Val, SMLoc S, SMLoc E) {
- return make_unique<AVROperand>(RegNum, Val, S, E);
+ return std::make_unique<AVROperand>(RegNum, Val, S, E);
}
void makeToken(StringRef Token) {
diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
index 6025e4b2437c..1c69fea5962d 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
@@ -152,7 +152,7 @@ unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx,
}
std::unique_ptr<MCObjectTargetWriter> createAVRELFObjectWriter(uint8_t OSABI) {
- return make_unique<AVRELFObjectWriter>(OSABI);
+ return std::make_unique<AVRELFObjectWriter>(OSABI);
}
} // end of namespace llvm
diff --git a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 75885fd058a7..ce1d2ecd9d26 100644
--- a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -194,7 +194,7 @@ public:
}
static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
- auto Op = make_unique<BPFOperand>(Token);
+ auto Op = std::make_unique<BPFOperand>(Token);
Op->Tok = Str;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -203,7 +203,7 @@ public:
static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
SMLoc E) {
- auto Op = make_unique<BPFOperand>(Register);
+ auto Op = std::make_unique<BPFOperand>(Register);
Op->Reg.RegNum = RegNo;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -212,7 +212,7 @@ public:
static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
SMLoc E) {
- auto Op = make_unique<BPFOperand>(Immediate);
+ auto Op = std::make_unique<BPFOperand>(Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
diff --git a/lib/Target/BPF/BPF.h b/lib/Target/BPF/BPF.h
index d311fc154094..6e4f35f4c5d7 100644
--- a/lib/Target/BPF/BPF.h
+++ b/lib/Target/BPF/BPF.h
@@ -15,17 +15,19 @@
namespace llvm {
class BPFTargetMachine;
-ModulePass *createBPFAbstractMemberAccess();
+ModulePass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMISimplifyPatchablePass();
FunctionPass *createBPFMIPeepholePass();
+FunctionPass *createBPFMIPeepholeTruncElimPass();
FunctionPass *createBPFMIPreEmitPeepholePass();
FunctionPass *createBPFMIPreEmitCheckingPass();
void initializeBPFAbstractMemberAccessPass(PassRegistry&);
void initializeBPFMISimplifyPatchablePass(PassRegistry&);
void initializeBPFMIPeepholePass(PassRegistry&);
+void initializeBPFMIPeepholeTruncElimPass(PassRegistry&);
void initializeBPFMIPreEmitPeepholePass(PassRegistry&);
void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
}
diff --git a/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 51d4cbc8a429..400701c4e5c2 100644
--- a/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -50,6 +50,28 @@
// addr = preserve_struct_access_index(base, gep_index, di_index)
// !llvm.preserve.access.index <struct_ditype>
//
+// Bitfield member access needs special attention. User cannot take the
+// address of a bitfield acceess. To facilitate kernel verifier
+// for easy bitfield code optimization, a new clang intrinsic is introduced:
+// uint32_t __builtin_preserve_field_info(member_access, info_kind)
+// In IR, a chain with two (or more) intrinsic calls will be generated:
+// ...
+// addr = preserve_struct_access_index(base, 1, 1) !struct s
+// uint32_t result = bpf_preserve_field_info(addr, info_kind)
+//
+// Suppose the info_kind is FIELD_SIGNEDNESS,
+// The above two IR intrinsics will be replaced with
+// a relocatable insn:
+// signness = /* signness of member_access */
+// and signness can be changed by bpf loader based on the
+// types on the host.
+//
+// User can also test whether a field exists or not with
+// uint32_t result = bpf_preserve_field_info(member_access, FIELD_EXISTENCE)
+// The field will be always available (result = 1) during initial
+// compilation, but bpf loader can patch with the correct value
+// on the target host where the member_access may or may not be available
+//
//===----------------------------------------------------------------------===//
#include "BPF.h"
@@ -65,13 +87,12 @@
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <stack>
#define DEBUG_TYPE "bpf-abstract-member-access"
namespace llvm {
const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama";
-const std::string BPFCoreSharedInfo::PatchableExtSecName =
- ".BPF.patchable_externs";
} // namespace llvm
using namespace llvm;
@@ -87,40 +108,62 @@ class BPFAbstractMemberAccess final : public ModulePass {
public:
static char ID;
- BPFAbstractMemberAccess() : ModulePass(ID) {}
+ TargetMachine *TM;
+ // Add optional BPFTargetMachine parameter so that BPF backend can add the phase
+ // with target machine to find out the endianness. The default constructor (without
+ // parameters) is used by the pass manager for managing purposes.
+ BPFAbstractMemberAccess(BPFTargetMachine *TM = nullptr) : ModulePass(ID), TM(TM) {}
+
+ struct CallInfo {
+ uint32_t Kind;
+ uint32_t AccessIndex;
+ MDNode *Metadata;
+ Value *Base;
+ };
+ typedef std::stack<std::pair<CallInst *, CallInfo>> CallInfoStack;
private:
enum : uint32_t {
BPFPreserveArrayAI = 1,
BPFPreserveUnionAI = 2,
BPFPreserveStructAI = 3,
+ BPFPreserveFieldInfoAI = 4,
};
std::map<std::string, GlobalVariable *> GEPGlobals;
// A map to link preserve_*_access_index instrinsic calls.
- std::map<CallInst *, std::pair<CallInst *, uint32_t>> AIChain;
+ std::map<CallInst *, std::pair<CallInst *, CallInfo>> AIChain;
// A map to hold all the base preserve_*_access_index instrinsic calls.
- // The base call is not an input of any other preserve_*_access_index
+ // The base call is not an input of any other preserve_*
// intrinsics.
- std::map<CallInst *, uint32_t> BaseAICalls;
+ std::map<CallInst *, CallInfo> BaseAICalls;
bool doTransformation(Module &M);
- void traceAICall(CallInst *Call, uint32_t Kind);
- void traceBitCast(BitCastInst *BitCast, CallInst *Parent, uint32_t Kind);
- void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, uint32_t Kind);
+ void traceAICall(CallInst *Call, CallInfo &ParentInfo);
+ void traceBitCast(BitCastInst *BitCast, CallInst *Parent,
+ CallInfo &ParentInfo);
+ void traceGEP(GetElementPtrInst *GEP, CallInst *Parent,
+ CallInfo &ParentInfo);
void collectAICallChains(Module &M, Function &F);
- bool IsPreserveDIAccessIndexCall(const CallInst *Call, uint32_t &Kind);
+ bool IsPreserveDIAccessIndexCall(const CallInst *Call, CallInfo &Cinfo);
+ bool IsValidAIChain(const MDNode *ParentMeta, uint32_t ParentAI,
+ const MDNode *ChildMeta);
bool removePreserveAccessIndexIntrinsic(Module &M);
void replaceWithGEP(std::vector<CallInst *> &CallList,
uint32_t NumOfZerosIndex, uint32_t DIIndex);
-
- Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr,
- std::string &AccessKey, uint32_t Kind,
- MDNode *&TypeMeta);
- bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex);
- bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind);
+ bool HasPreserveFieldInfoCall(CallInfoStack &CallStack);
+ void GetStorageBitRange(DICompositeType *CTy, DIDerivedType *MemberTy,
+ uint32_t AccessIndex, uint32_t &StartBitOffset,
+ uint32_t &EndBitOffset);
+ uint32_t GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy,
+ uint32_t AccessIndex, uint32_t PatchImm);
+
+ Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo,
+ std::string &AccessKey, MDNode *&BaseMeta);
+ uint64_t getConstant(const Value *IndexValue);
+ bool transformGEPChain(Module &M, CallInst *Call, CallInfo &CInfo);
};
} // End anonymous namespace
@@ -128,23 +171,65 @@ char BPFAbstractMemberAccess::ID = 0;
INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE,
"abstracting struct/union member accessees", false, false)
-ModulePass *llvm::createBPFAbstractMemberAccess() {
- return new BPFAbstractMemberAccess();
+ModulePass *llvm::createBPFAbstractMemberAccess(BPFTargetMachine *TM) {
+ return new BPFAbstractMemberAccess(TM);
}
bool BPFAbstractMemberAccess::runOnModule(Module &M) {
LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n");
// Bail out if no debug info.
- if (empty(M.debug_compile_units()))
+ if (M.debug_compile_units().empty())
return false;
return doTransformation(M);
}
+static bool SkipDIDerivedTag(unsigned Tag) {
+ if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
+ Tag != dwarf::DW_TAG_volatile_type &&
+ Tag != dwarf::DW_TAG_restrict_type &&
+ Tag != dwarf::DW_TAG_member)
+ return false;
+ return true;
+}
+
+static DIType * stripQualifiers(DIType *Ty) {
+ while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ if (!SkipDIDerivedTag(DTy->getTag()))
+ break;
+ Ty = DTy->getBaseType();
+ }
+ return Ty;
+}
+
+static const DIType * stripQualifiers(const DIType *Ty) {
+ while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ if (!SkipDIDerivedTag(DTy->getTag()))
+ break;
+ Ty = DTy->getBaseType();
+ }
+ return Ty;
+}
+
+static uint32_t calcArraySize(const DICompositeType *CTy, uint32_t StartDim) {
+ DINodeArray Elements = CTy->getElements();
+ uint32_t DimSize = 1;
+ for (uint32_t I = StartDim; I < Elements.size(); ++I) {
+ if (auto *Element = dyn_cast_or_null<DINode>(Elements[I]))
+ if (Element->getTag() == dwarf::DW_TAG_subrange_type) {
+ const DISubrange *SR = cast<DISubrange>(Element);
+ auto *CI = SR->getCount().dyn_cast<ConstantInt *>();
+ DimSize *= CI->getSExtValue();
+ }
+ }
+
+ return DimSize;
+}
+
/// Check whether a call is a preserve_*_access_index intrinsic call or not.
bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
- uint32_t &Kind) {
+ CallInfo &CInfo) {
if (!Call)
return false;
@@ -152,15 +237,40 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
if (!GV)
return false;
if (GV->getName().startswith("llvm.preserve.array.access.index")) {
- Kind = BPFPreserveArrayAI;
+ CInfo.Kind = BPFPreserveArrayAI;
+ CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
+ if (!CInfo.Metadata)
+ report_fatal_error("Missing metadata for llvm.preserve.array.access.index intrinsic");
+ CInfo.AccessIndex = getConstant(Call->getArgOperand(2));
+ CInfo.Base = Call->getArgOperand(0);
return true;
}
if (GV->getName().startswith("llvm.preserve.union.access.index")) {
- Kind = BPFPreserveUnionAI;
+ CInfo.Kind = BPFPreserveUnionAI;
+ CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
+ if (!CInfo.Metadata)
+ report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic");
+ CInfo.AccessIndex = getConstant(Call->getArgOperand(1));
+ CInfo.Base = Call->getArgOperand(0);
return true;
}
if (GV->getName().startswith("llvm.preserve.struct.access.index")) {
- Kind = BPFPreserveStructAI;
+ CInfo.Kind = BPFPreserveStructAI;
+ CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
+ if (!CInfo.Metadata)
+ report_fatal_error("Missing metadata for llvm.preserve.struct.access.index intrinsic");
+ CInfo.AccessIndex = getConstant(Call->getArgOperand(2));
+ CInfo.Base = Call->getArgOperand(0);
+ return true;
+ }
+ if (GV->getName().startswith("llvm.bpf.preserve.field.info")) {
+ CInfo.Kind = BPFPreserveFieldInfoAI;
+ CInfo.Metadata = nullptr;
+ // Check validity of info_kind as clang did not check this.
+ uint64_t InfoKind = getConstant(Call->getArgOperand(1));
+ if (InfoKind >= BPFCoreSharedInfo::MAX_FIELD_RELOC_KIND)
+ report_fatal_error("Incorrect info_kind for llvm.bpf.preserve.field.info intrinsic");
+ CInfo.AccessIndex = InfoKind;
return true;
}
@@ -173,8 +283,7 @@ void BPFAbstractMemberAccess::replaceWithGEP(std::vector<CallInst *> &CallList,
for (auto Call : CallList) {
uint32_t Dimension = 1;
if (DimensionIndex > 0)
- Dimension = cast<ConstantInt>(Call->getArgOperand(DimensionIndex))
- ->getZExtValue();
+ Dimension = getConstant(Call->getArgOperand(DimensionIndex));
Constant *Zero =
ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0);
@@ -200,14 +309,14 @@ bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) {
for (auto &BB : F)
for (auto &I : BB) {
auto *Call = dyn_cast<CallInst>(&I);
- uint32_t Kind;
- if (!IsPreserveDIAccessIndexCall(Call, Kind))
+ CallInfo CInfo;
+ if (!IsPreserveDIAccessIndexCall(Call, CInfo))
continue;
Found = true;
- if (Kind == BPFPreserveArrayAI)
+ if (CInfo.Kind == BPFPreserveArrayAI)
PreserveArrayIndexCalls.push_back(Call);
- else if (Kind == BPFPreserveUnionAI)
+ else if (CInfo.Kind == BPFPreserveUnionAI)
PreserveUnionIndexCalls.push_back(Call);
else
PreserveStructIndexCalls.push_back(Call);
@@ -233,79 +342,146 @@ bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) {
return Found;
}
-void BPFAbstractMemberAccess::traceAICall(CallInst *Call, uint32_t Kind) {
+/// Check whether the access index chain is valid. We check
+/// here because there may be type casts between two
+/// access indexes. We want to ensure memory access still valid.
+bool BPFAbstractMemberAccess::IsValidAIChain(const MDNode *ParentType,
+ uint32_t ParentAI,
+ const MDNode *ChildType) {
+ if (!ChildType)
+ return true; // preserve_field_info, no type comparison needed.
+
+ const DIType *PType = stripQualifiers(cast<DIType>(ParentType));
+ const DIType *CType = stripQualifiers(cast<DIType>(ChildType));
+
+ // Child is a derived/pointer type, which is due to type casting.
+ // Pointer type cannot be in the middle of chain.
+ if (isa<DIDerivedType>(CType))
+ return false;
+
+ // Parent is a pointer type.
+ if (const auto *PtrTy = dyn_cast<DIDerivedType>(PType)) {
+ if (PtrTy->getTag() != dwarf::DW_TAG_pointer_type)
+ return false;
+ return stripQualifiers(PtrTy->getBaseType()) == CType;
+ }
+
+ // Otherwise, struct/union/array types
+ const auto *PTy = dyn_cast<DICompositeType>(PType);
+ const auto *CTy = dyn_cast<DICompositeType>(CType);
+ assert(PTy && CTy && "ParentType or ChildType is null or not composite");
+
+ uint32_t PTyTag = PTy->getTag();
+ assert(PTyTag == dwarf::DW_TAG_array_type ||
+ PTyTag == dwarf::DW_TAG_structure_type ||
+ PTyTag == dwarf::DW_TAG_union_type);
+
+ uint32_t CTyTag = CTy->getTag();
+ assert(CTyTag == dwarf::DW_TAG_array_type ||
+ CTyTag == dwarf::DW_TAG_structure_type ||
+ CTyTag == dwarf::DW_TAG_union_type);
+
+ // Multi dimensional arrays, base element should be the same
+ if (PTyTag == dwarf::DW_TAG_array_type && PTyTag == CTyTag)
+ return PTy->getBaseType() == CTy->getBaseType();
+
+ DIType *Ty;
+ if (PTyTag == dwarf::DW_TAG_array_type)
+ Ty = PTy->getBaseType();
+ else
+ Ty = dyn_cast<DIType>(PTy->getElements()[ParentAI]);
+
+ return dyn_cast<DICompositeType>(stripQualifiers(Ty)) == CTy;
+}
+
+void BPFAbstractMemberAccess::traceAICall(CallInst *Call,
+ CallInfo &ParentInfo) {
for (User *U : Call->users()) {
Instruction *Inst = dyn_cast<Instruction>(U);
if (!Inst)
continue;
if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
- traceBitCast(BI, Call, Kind);
+ traceBitCast(BI, Call, ParentInfo);
} else if (auto *CI = dyn_cast<CallInst>(Inst)) {
- uint32_t CIKind;
- if (IsPreserveDIAccessIndexCall(CI, CIKind)) {
- AIChain[CI] = std::make_pair(Call, Kind);
- traceAICall(CI, CIKind);
+ CallInfo ChildInfo;
+
+ if (IsPreserveDIAccessIndexCall(CI, ChildInfo) &&
+ IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex,
+ ChildInfo.Metadata)) {
+ AIChain[CI] = std::make_pair(Call, ParentInfo);
+ traceAICall(CI, ChildInfo);
} else {
- BaseAICalls[Call] = Kind;
+ BaseAICalls[Call] = ParentInfo;
}
} else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
if (GI->hasAllZeroIndices())
- traceGEP(GI, Call, Kind);
+ traceGEP(GI, Call, ParentInfo);
else
- BaseAICalls[Call] = Kind;
+ BaseAICalls[Call] = ParentInfo;
+ } else {
+ BaseAICalls[Call] = ParentInfo;
}
}
}
void BPFAbstractMemberAccess::traceBitCast(BitCastInst *BitCast,
- CallInst *Parent, uint32_t Kind) {
+ CallInst *Parent,
+ CallInfo &ParentInfo) {
for (User *U : BitCast->users()) {
Instruction *Inst = dyn_cast<Instruction>(U);
if (!Inst)
continue;
if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
- traceBitCast(BI, Parent, Kind);
+ traceBitCast(BI, Parent, ParentInfo);
} else if (auto *CI = dyn_cast<CallInst>(Inst)) {
- uint32_t CIKind;
- if (IsPreserveDIAccessIndexCall(CI, CIKind)) {
- AIChain[CI] = std::make_pair(Parent, Kind);
- traceAICall(CI, CIKind);
+ CallInfo ChildInfo;
+ if (IsPreserveDIAccessIndexCall(CI, ChildInfo) &&
+ IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex,
+ ChildInfo.Metadata)) {
+ AIChain[CI] = std::make_pair(Parent, ParentInfo);
+ traceAICall(CI, ChildInfo);
} else {
- BaseAICalls[Parent] = Kind;
+ BaseAICalls[Parent] = ParentInfo;
}
} else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
if (GI->hasAllZeroIndices())
- traceGEP(GI, Parent, Kind);
+ traceGEP(GI, Parent, ParentInfo);
else
- BaseAICalls[Parent] = Kind;
+ BaseAICalls[Parent] = ParentInfo;
+ } else {
+ BaseAICalls[Parent] = ParentInfo;
}
}
}
void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent,
- uint32_t Kind) {
+ CallInfo &ParentInfo) {
for (User *U : GEP->users()) {
Instruction *Inst = dyn_cast<Instruction>(U);
if (!Inst)
continue;
if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
- traceBitCast(BI, Parent, Kind);
+ traceBitCast(BI, Parent, ParentInfo);
} else if (auto *CI = dyn_cast<CallInst>(Inst)) {
- uint32_t CIKind;
- if (IsPreserveDIAccessIndexCall(CI, CIKind)) {
- AIChain[CI] = std::make_pair(Parent, Kind);
- traceAICall(CI, CIKind);
+ CallInfo ChildInfo;
+ if (IsPreserveDIAccessIndexCall(CI, ChildInfo) &&
+ IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex,
+ ChildInfo.Metadata)) {
+ AIChain[CI] = std::make_pair(Parent, ParentInfo);
+ traceAICall(CI, ChildInfo);
} else {
- BaseAICalls[Parent] = Kind;
+ BaseAICalls[Parent] = ParentInfo;
}
} else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
if (GI->hasAllZeroIndices())
- traceGEP(GI, Parent, Kind);
+ traceGEP(GI, Parent, ParentInfo);
else
- BaseAICalls[Parent] = Kind;
+ BaseAICalls[Parent] = ParentInfo;
+ } else {
+ BaseAICalls[Parent] = ParentInfo;
}
}
}
@@ -316,92 +492,345 @@ void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) {
for (auto &BB : F)
for (auto &I : BB) {
- uint32_t Kind;
+ CallInfo CInfo;
auto *Call = dyn_cast<CallInst>(&I);
- if (!IsPreserveDIAccessIndexCall(Call, Kind) ||
+ if (!IsPreserveDIAccessIndexCall(Call, CInfo) ||
AIChain.find(Call) != AIChain.end())
continue;
- traceAICall(Call, Kind);
+ traceAICall(Call, CInfo);
}
}
-/// Get access index from the preserve_*_access_index intrinsic calls.
-bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue,
- uint64_t &AccessIndex) {
+uint64_t BPFAbstractMemberAccess::getConstant(const Value *IndexValue) {
const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue);
- if (!CV)
- return false;
+ assert(CV);
+ return CV->getValue().getZExtValue();
+}
- AccessIndex = CV->getValue().getZExtValue();
- return true;
+/// Get the start and the end of storage offset for \p MemberTy.
+/// The storage bits are corresponding to the LLVM internal types,
+/// and the storage bits for the member determines what load width
+/// to use in order to extract the bitfield value.
+void BPFAbstractMemberAccess::GetStorageBitRange(DICompositeType *CTy,
+ DIDerivedType *MemberTy,
+ uint32_t AccessIndex,
+ uint32_t &StartBitOffset,
+ uint32_t &EndBitOffset) {
+ auto SOff = dyn_cast<ConstantInt>(MemberTy->getStorageOffsetInBits());
+ assert(SOff);
+ StartBitOffset = SOff->getZExtValue();
+
+ EndBitOffset = CTy->getSizeInBits();
+ uint32_t Index = AccessIndex + 1;
+ for (; Index < CTy->getElements().size(); ++Index) {
+ auto Member = cast<DIDerivedType>(CTy->getElements()[Index]);
+ if (!Member->getStorageOffsetInBits()) {
+ EndBitOffset = Member->getOffsetInBits();
+ break;
+ }
+ SOff = dyn_cast<ConstantInt>(Member->getStorageOffsetInBits());
+ assert(SOff);
+ unsigned BitOffset = SOff->getZExtValue();
+ if (BitOffset != StartBitOffset) {
+ EndBitOffset = BitOffset;
+ break;
+ }
+ }
+}
+
+uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
+ DICompositeType *CTy,
+ uint32_t AccessIndex,
+ uint32_t PatchImm) {
+ if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE)
+ return 1;
+
+ uint32_t Tag = CTy->getTag();
+ if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_OFFSET) {
+ if (Tag == dwarf::DW_TAG_array_type) {
+ auto *EltTy = stripQualifiers(CTy->getBaseType());
+ PatchImm += AccessIndex * calcArraySize(CTy, 1) *
+ (EltTy->getSizeInBits() >> 3);
+ } else if (Tag == dwarf::DW_TAG_structure_type) {
+ auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
+ if (!MemberTy->isBitField()) {
+ PatchImm += MemberTy->getOffsetInBits() >> 3;
+ } else {
+ auto SOffset = dyn_cast<ConstantInt>(MemberTy->getStorageOffsetInBits());
+ assert(SOffset);
+ PatchImm += SOffset->getZExtValue() >> 3;
+ }
+ }
+ return PatchImm;
+ }
+
+ if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_SIZE) {
+ if (Tag == dwarf::DW_TAG_array_type) {
+ auto *EltTy = stripQualifiers(CTy->getBaseType());
+ return calcArraySize(CTy, 1) * (EltTy->getSizeInBits() >> 3);
+ } else {
+ auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
+ uint32_t SizeInBits = MemberTy->getSizeInBits();
+ if (!MemberTy->isBitField())
+ return SizeInBits >> 3;
+
+ unsigned SBitOffset, NextSBitOffset;
+ GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset);
+ SizeInBits = NextSBitOffset - SBitOffset;
+ if (SizeInBits & (SizeInBits - 1))
+ report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info");
+ return SizeInBits >> 3;
+ }
+ }
+
+ if (InfoKind == BPFCoreSharedInfo::FIELD_SIGNEDNESS) {
+ const DIType *BaseTy;
+ if (Tag == dwarf::DW_TAG_array_type) {
+ // Signedness only checked when final array elements are accessed.
+ if (CTy->getElements().size() != 1)
+ report_fatal_error("Invalid array expression for llvm.bpf.preserve.field.info");
+ BaseTy = stripQualifiers(CTy->getBaseType());
+ } else {
+ auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
+ BaseTy = stripQualifiers(MemberTy->getBaseType());
+ }
+
+ // Only basic types and enum types have signedness.
+ const auto *BTy = dyn_cast<DIBasicType>(BaseTy);
+ while (!BTy) {
+ const auto *CompTy = dyn_cast<DICompositeType>(BaseTy);
+ // Report an error if the field expression does not have signedness.
+ if (!CompTy || CompTy->getTag() != dwarf::DW_TAG_enumeration_type)
+ report_fatal_error("Invalid field expression for llvm.bpf.preserve.field.info");
+ BaseTy = stripQualifiers(CompTy->getBaseType());
+ BTy = dyn_cast<DIBasicType>(BaseTy);
+ }
+ uint32_t Encoding = BTy->getEncoding();
+ return (Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char);
+ }
+
+ if (InfoKind == BPFCoreSharedInfo::FIELD_LSHIFT_U64) {
+ // The value is loaded into a value with FIELD_BYTE_SIZE size,
+ // and then zero or sign extended to U64.
+ // FIELD_LSHIFT_U64 and FIELD_RSHIFT_U64 are operations
+ // to extract the original value.
+ const Triple &Triple = TM->getTargetTriple();
+ DIDerivedType *MemberTy = nullptr;
+ bool IsBitField = false;
+ uint32_t SizeInBits;
+
+ if (Tag == dwarf::DW_TAG_array_type) {
+ auto *EltTy = stripQualifiers(CTy->getBaseType());
+ SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits();
+ } else {
+ MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
+ SizeInBits = MemberTy->getSizeInBits();
+ IsBitField = MemberTy->isBitField();
+ }
+
+ if (!IsBitField) {
+ if (SizeInBits > 64)
+ report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
+ return 64 - SizeInBits;
+ }
+
+ unsigned SBitOffset, NextSBitOffset;
+ GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset);
+ if (NextSBitOffset - SBitOffset > 64)
+ report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
+
+ unsigned OffsetInBits = MemberTy->getOffsetInBits();
+ if (Triple.getArch() == Triple::bpfel)
+ return SBitOffset + 64 - OffsetInBits - SizeInBits;
+ else
+ return OffsetInBits + 64 - NextSBitOffset;
+ }
+
+ if (InfoKind == BPFCoreSharedInfo::FIELD_RSHIFT_U64) {
+ DIDerivedType *MemberTy = nullptr;
+ bool IsBitField = false;
+ uint32_t SizeInBits;
+ if (Tag == dwarf::DW_TAG_array_type) {
+ auto *EltTy = stripQualifiers(CTy->getBaseType());
+ SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits();
+ } else {
+ MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
+ SizeInBits = MemberTy->getSizeInBits();
+ IsBitField = MemberTy->isBitField();
+ }
+
+ if (!IsBitField) {
+ if (SizeInBits > 64)
+ report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
+ return 64 - SizeInBits;
+ }
+
+ unsigned SBitOffset, NextSBitOffset;
+ GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset);
+ if (NextSBitOffset - SBitOffset > 64)
+ report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
+
+ return 64 - SizeInBits;
+ }
+
+ llvm_unreachable("Unknown llvm.bpf.preserve.field.info info kind");
}
-/// Compute the base of the whole preserve_*_access_index chains, i.e., the base
+bool BPFAbstractMemberAccess::HasPreserveFieldInfoCall(CallInfoStack &CallStack) {
+ // This is called in error return path, no need to maintain CallStack.
+ while (CallStack.size()) {
+ auto StackElem = CallStack.top();
+ if (StackElem.second.Kind == BPFPreserveFieldInfoAI)
+ return true;
+ CallStack.pop();
+ }
+ return false;
+}
+
+/// Compute the base of the whole preserve_* intrinsics chains, i.e., the base
/// pointer of the first preserve_*_access_index call, and construct the access
/// string, which will be the name of a global variable.
-Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
- std::string &AccessStr,
+Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
+ CallInfo &CInfo,
std::string &AccessKey,
- uint32_t Kind,
MDNode *&TypeMeta) {
Value *Base = nullptr;
- std::vector<uint64_t> AccessIndices;
- uint64_t TypeNameIndex = 0;
- std::string LastTypeName;
+ std::string TypeName;
+ CallInfoStack CallStack;
+ // Put the access chain into a stack with the top as the head of the chain.
while (Call) {
- // Base of original corresponding GEP
- Base = Call->getArgOperand(0);
-
- // Type Name
- std::string TypeName;
- MDNode *MDN;
- if (Kind == BPFPreserveUnionAI || Kind == BPFPreserveStructAI) {
- MDN = Call->getMetadata(LLVMContext::MD_preserve_access_index);
- if (!MDN)
- return nullptr;
-
- DIType *Ty = dyn_cast<DIType>(MDN);
- if (!Ty)
- return nullptr;
+ CallStack.push(std::make_pair(Call, CInfo));
+ CInfo = AIChain[Call].second;
+ Call = AIChain[Call].first;
+ }
+ // The access offset from the base of the head of chain is also
+ // calculated here as all debuginfo types are available.
+
+ // Get type name and calculate the first index.
+ // We only want to get type name from structure or union.
+ // If user wants a relocation like
+ // int *p; ... __builtin_preserve_access_index(&p[4]) ...
+ // or
+ // int a[10][20]; ... __builtin_preserve_access_index(&a[2][3]) ...
+ // we will skip them.
+ uint32_t FirstIndex = 0;
+ uint32_t PatchImm = 0; // AccessOffset or the requested field info
+ uint32_t InfoKind = BPFCoreSharedInfo::FIELD_BYTE_OFFSET;
+ while (CallStack.size()) {
+ auto StackElem = CallStack.top();
+ Call = StackElem.first;
+ CInfo = StackElem.second;
+
+ if (!Base)
+ Base = CInfo.Base;
+
+ DIType *Ty = stripQualifiers(cast<DIType>(CInfo.Metadata));
+ if (CInfo.Kind == BPFPreserveUnionAI ||
+ CInfo.Kind == BPFPreserveStructAI) {
+ // struct or union type
TypeName = Ty->getName();
+ TypeMeta = Ty;
+ PatchImm += FirstIndex * (Ty->getSizeInBits() >> 3);
+ break;
}
- // Access Index
- uint64_t AccessIndex;
- uint32_t ArgIndex = (Kind == BPFPreserveUnionAI) ? 1 : 2;
- if (!getAccessIndex(Call->getArgOperand(ArgIndex), AccessIndex))
- return nullptr;
-
- AccessIndices.push_back(AccessIndex);
- if (TypeName.size()) {
- TypeNameIndex = AccessIndices.size() - 1;
- LastTypeName = TypeName;
- TypeMeta = MDN;
+ assert(CInfo.Kind == BPFPreserveArrayAI);
+
+ // Array entries will always be consumed for accumulative initial index.
+ CallStack.pop();
+
+ // BPFPreserveArrayAI
+ uint64_t AccessIndex = CInfo.AccessIndex;
+
+ DIType *BaseTy = nullptr;
+ bool CheckElemType = false;
+ if (const auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ // array type
+ assert(CTy->getTag() == dwarf::DW_TAG_array_type);
+
+
+ FirstIndex += AccessIndex * calcArraySize(CTy, 1);
+ BaseTy = stripQualifiers(CTy->getBaseType());
+ CheckElemType = CTy->getElements().size() == 1;
+ } else {
+ // pointer type
+ auto *DTy = cast<DIDerivedType>(Ty);
+ assert(DTy->getTag() == dwarf::DW_TAG_pointer_type);
+
+ BaseTy = stripQualifiers(DTy->getBaseType());
+ CTy = dyn_cast<DICompositeType>(BaseTy);
+ if (!CTy) {
+ CheckElemType = true;
+ } else if (CTy->getTag() != dwarf::DW_TAG_array_type) {
+ FirstIndex += AccessIndex;
+ CheckElemType = true;
+ } else {
+ FirstIndex += AccessIndex * calcArraySize(CTy, 0);
+ }
}
- Kind = AIChain[Call].second;
- Call = AIChain[Call].first;
- }
+ if (CheckElemType) {
+ auto *CTy = dyn_cast<DICompositeType>(BaseTy);
+ if (!CTy) {
+ if (HasPreserveFieldInfoCall(CallStack))
+ report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic");
+ return nullptr;
+ }
- // The intial type name is required.
- // FIXME: if the initial type access is an array index, e.g.,
- // &a[3].b.c, only one dimentional array is supported.
- if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2)
- return nullptr;
+ unsigned CTag = CTy->getTag();
+ if (CTag == dwarf::DW_TAG_structure_type || CTag == dwarf::DW_TAG_union_type) {
+ TypeName = CTy->getName();
+ } else {
+ if (HasPreserveFieldInfoCall(CallStack))
+ report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic");
+ return nullptr;
+ }
+ TypeMeta = CTy;
+ PatchImm += FirstIndex * (CTy->getSizeInBits() >> 3);
+ break;
+ }
+ }
+ assert(TypeName.size());
+ AccessKey += std::to_string(FirstIndex);
+
+ // Traverse the rest of access chain to complete offset calculation
+ // and access key construction.
+ while (CallStack.size()) {
+ auto StackElem = CallStack.top();
+ CInfo = StackElem.second;
+ CallStack.pop();
+
+ if (CInfo.Kind == BPFPreserveFieldInfoAI)
+ break;
+
+ // If the next Call (the top of the stack) is a BPFPreserveFieldInfoAI,
+ // the action will be extracting field info.
+ if (CallStack.size()) {
+ auto StackElem2 = CallStack.top();
+ CallInfo CInfo2 = StackElem2.second;
+ if (CInfo2.Kind == BPFPreserveFieldInfoAI) {
+ InfoKind = CInfo2.AccessIndex;
+ assert(CallStack.size() == 1);
+ }
+ }
- // Construct the type string AccessStr.
- for (unsigned I = 0; I < AccessIndices.size(); ++I)
- AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr;
+ // Access Index
+ uint64_t AccessIndex = CInfo.AccessIndex;
+ AccessKey += ":" + std::to_string(AccessIndex);
- if (TypeNameIndex == AccessIndices.size() - 1)
- AccessStr = "0:" + AccessStr;
+ MDNode *MDN = CInfo.Metadata;
+ // At this stage, it cannot be pointer type.
+ auto *CTy = cast<DICompositeType>(stripQualifiers(cast<DIType>(MDN)));
+ PatchImm = GetFieldInfo(InfoKind, CTy, AccessIndex, PatchImm);
+ }
- // Access key is the type name + access string, uniquely identifying
- // one kernel memory access.
- AccessKey = LastTypeName + ":" + AccessStr;
+ // Access key is the type name + reloc type + patched imm + access string,
+ // uniquely identifying one relocation.
+ AccessKey = TypeName + ":" + std::to_string(InfoKind) + ":" +
+ std::to_string(PatchImm) + "$" + AccessKey;
return Base;
}
@@ -409,39 +838,52 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
/// Call/Kind is the base preserve_*_access_index() call. Attempts to do
/// transformation to a chain of relocable GEPs.
bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
- uint32_t Kind) {
- std::string AccessStr, AccessKey;
- MDNode *TypeMeta = nullptr;
+ CallInfo &CInfo) {
+ std::string AccessKey;
+ MDNode *TypeMeta;
Value *Base =
- computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta);
+ computeBaseAndAccessKey(Call, CInfo, AccessKey, TypeMeta);
if (!Base)
return false;
- // Do the transformation
- // For any original GEP Call and Base %2 like
- // %4 = bitcast %struct.net_device** %dev1 to i64*
- // it is transformed to:
- // %6 = load __BTF_0:sk_buff:0:0:2:0:
- // %7 = bitcast %struct.sk_buff* %2 to i8*
- // %8 = getelementptr i8, i8* %7, %6
- // %9 = bitcast i8* %8 to i64*
- // using %9 instead of %4
- // The original Call inst is removed.
BasicBlock *BB = Call->getParent();
GlobalVariable *GV;
if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
- GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false,
- GlobalVariable::ExternalLinkage, NULL, AccessStr);
+ IntegerType *VarType;
+ if (CInfo.Kind == BPFPreserveFieldInfoAI)
+ VarType = Type::getInt32Ty(BB->getContext()); // 32bit return value
+ else
+ VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr arith
+
+ GV = new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage,
+ NULL, AccessKey);
GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
- // Set the metadata (debuginfo types) for the global.
- if (TypeMeta)
- GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta);
+ GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta);
GEPGlobals[AccessKey] = GV;
} else {
GV = GEPGlobals[AccessKey];
}
+ if (CInfo.Kind == BPFPreserveFieldInfoAI) {
+ // Load the global variable which represents the returned field info.
+ auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV);
+ BB->getInstList().insert(Call->getIterator(), LDInst);
+ Call->replaceAllUsesWith(LDInst);
+ Call->eraseFromParent();
+ return true;
+ }
+
+ // For any original GEP Call and Base %2 like
+ // %4 = bitcast %struct.net_device** %dev1 to i64*
+ // it is transformed to:
+ // %6 = load sk_buff:50:$0:0:0:2:0
+ // %7 = bitcast %struct.sk_buff* %2 to i8*
+ // %8 = getelementptr i8, i8* %7, %6
+ // %9 = bitcast i8* %8 to i64*
+ // using %9 instead of %4
+ // The original Call inst is removed.
+
// Load the global variable.
auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV);
BB->getInstList().insert(Call->getIterator(), LDInst);
diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp
index e61e73468057..218b0302927c 100644
--- a/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -59,7 +59,7 @@ bool BPFAsmPrinter::doInitialization(Module &M) {
AsmPrinter::doInitialization(M);
// Only emit BTF when debuginfo available.
- if (MAI->doesSupportDebugInformation() && !empty(M.debug_compile_units())) {
+ if (MAI->doesSupportDebugInformation() && !M.debug_compile_units().empty()) {
BTF = new BTFDebug(this);
Handlers.push_back(HandlerInfo(std::unique_ptr<BTFDebug>(BTF), "emit",
"Debug Info Emission", "BTF",
diff --git a/lib/Target/BPF/BPFCORE.h b/lib/Target/BPF/BPFCORE.h
index e0950d95f8d7..ed4778353e52 100644
--- a/lib/Target/BPF/BPFCORE.h
+++ b/lib/Target/BPF/BPFCORE.h
@@ -13,10 +13,18 @@ namespace llvm {
class BPFCoreSharedInfo {
public:
- /// The attribute attached to globals representing a member offset
+ enum OffsetRelocKind : uint32_t {
+ FIELD_BYTE_OFFSET = 0,
+ FIELD_BYTE_SIZE,
+ FIELD_EXISTENCE,
+ FIELD_SIGNEDNESS,
+ FIELD_LSHIFT_U64,
+ FIELD_RSHIFT_U64,
+
+ MAX_FIELD_RELOC_KIND,
+ };
+ /// The attribute attached to globals representing a field access
static const std::string AmaAttr;
- /// The section name to identify a patchable external global
- static const std::string PatchableExtSecName;
};
} // namespace llvm
diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h
index 2dc6277d2244..a546351ec6cb 100644
--- a/lib/Target/BPF/BPFFrameLowering.h
+++ b/lib/Target/BPF/BPFFrameLowering.h
@@ -21,7 +21,7 @@ class BPFSubtarget;
class BPFFrameLowering : public TargetFrameLowering {
public:
explicit BPFFrameLowering(const BPFSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {}
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0) {}
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 1bd705c55188..f2be0ff070d2 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -45,9 +45,7 @@ class BPFDAGToDAGISel : public SelectionDAGISel {
public:
explicit BPFDAGToDAGISel(BPFTargetMachine &TM)
- : SelectionDAGISel(TM), Subtarget(nullptr) {
- curr_func_ = nullptr;
- }
+ : SelectionDAGISel(TM), Subtarget(nullptr) {}
StringRef getPassName() const override {
return "BPF DAG->DAG Pattern Instruction Selection";
@@ -92,14 +90,8 @@ private:
val_vec_type &Vals, int Offset);
bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset,
uint64_t Size, unsigned char *ByteSeq);
- bool checkLoadDef(unsigned DefReg, unsigned match_load_op);
-
// Mapping from ConstantStruct global value to corresponding byte-list values
std::map<const void *, val_vec_type> cs_vals_;
- // Mapping from vreg to load memory opcode
- std::map<unsigned, unsigned> load_to_vreg_;
- // Current function
- const Function *curr_func_;
};
} // namespace
@@ -325,32 +317,13 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
}
void BPFDAGToDAGISel::PreprocessISelDAG() {
- // Iterate through all nodes, interested in the following cases:
+ // Iterate through all nodes, interested in the following case:
//
// . loads from ConstantStruct or ConstantArray of constructs
// which can be turns into constant itself, with this we can
// avoid reading from read-only section at runtime.
//
- // . reg truncating is often the result of 8/16/32bit->64bit or
- // 8/16bit->32bit conversion. If the reg value is loaded with
- // masked byte width, the AND operation can be removed since
- // BPF LOAD already has zero extension.
- //
- // This also solved a correctness issue.
- // In BPF socket-related program, e.g., __sk_buff->{data, data_end}
- // are 32-bit registers, but later on, kernel verifier will rewrite
- // it with 64-bit value. Therefore, truncating the value after the
- // load will result in incorrect code.
-
- // clear the load_to_vreg_ map so that we have a clean start
- // for this function.
- if (!curr_func_) {
- curr_func_ = FuncInfo->Fn;
- } else if (curr_func_ != FuncInfo->Fn) {
- load_to_vreg_.clear();
- curr_func_ = FuncInfo->Fn;
- }
-
+ // . Removing redundant AND for intrinsic narrow loads.
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end();
I != E;) {
@@ -358,8 +331,6 @@ void BPFDAGToDAGISel::PreprocessISelDAG() {
unsigned Opcode = Node->getOpcode();
if (Opcode == ISD::LOAD)
PreprocessLoad(Node, I);
- else if (Opcode == ISD::CopyToReg)
- PreprocessCopyToReg(Node);
else if (Opcode == ISD::AND)
PreprocessTrunc(Node, I);
}
@@ -491,37 +462,6 @@ bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL,
return true;
}
-void BPFDAGToDAGISel::PreprocessCopyToReg(SDNode *Node) {
- const RegisterSDNode *RegN = dyn_cast<RegisterSDNode>(Node->getOperand(1));
- if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg()))
- return;
-
- const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node->getOperand(2));
- if (!LD)
- return;
-
- // Assign a load value to a virtual register. record its load width
- unsigned mem_load_op = 0;
- switch (LD->getMemOperand()->getSize()) {
- default:
- return;
- case 4:
- mem_load_op = BPF::LDW;
- break;
- case 2:
- mem_load_op = BPF::LDH;
- break;
- case 1:
- mem_load_op = BPF::LDB;
- break;
- }
-
- LLVM_DEBUG(dbgs() << "Find Load Value to VReg "
- << TargetRegisterInfo::virtReg2Index(RegN->getReg())
- << '\n');
- load_to_vreg_[RegN->getReg()] = mem_load_op;
-}
-
void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
SelectionDAG::allnodes_iterator &I) {
ConstantSDNode *MaskN = dyn_cast<ConstantSDNode>(Node->getOperand(1));
@@ -535,112 +475,26 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
// which the generic optimizer doesn't understand their results are
// zero extended.
SDValue BaseV = Node->getOperand(0);
- if (BaseV.getOpcode() == ISD::INTRINSIC_W_CHAIN) {
- unsigned IntNo = cast<ConstantSDNode>(BaseV->getOperand(1))->getZExtValue();
- uint64_t MaskV = MaskN->getZExtValue();
-
- if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) ||
- (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) ||
- (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF)))
- return;
-
- LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: ";
- Node->dump(); dbgs() << '\n');
-
- I--;
- CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV);
- I++;
- CurDAG->DeleteNode(Node);
-
- return;
- }
-
- // Multiple basic blocks case.
- if (BaseV.getOpcode() != ISD::CopyFromReg)
+ if (BaseV.getOpcode() != ISD::INTRINSIC_W_CHAIN)
return;
- unsigned match_load_op = 0;
- switch (MaskN->getZExtValue()) {
- default:
- return;
- case 0xFFFFFFFF:
- match_load_op = BPF::LDW;
- break;
- case 0xFFFF:
- match_load_op = BPF::LDH;
- break;
- case 0xFF:
- match_load_op = BPF::LDB;
- break;
- }
+ unsigned IntNo = cast<ConstantSDNode>(BaseV->getOperand(1))->getZExtValue();
+ uint64_t MaskV = MaskN->getZExtValue();
- const RegisterSDNode *RegN =
- dyn_cast<RegisterSDNode>(BaseV.getNode()->getOperand(1));
- if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg()))
+ if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) ||
+ (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) ||
+ (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF)))
return;
- unsigned AndOpReg = RegN->getReg();
- LLVM_DEBUG(dbgs() << "Examine " << printReg(AndOpReg) << '\n');
-
- // Examine the PHI insns in the MachineBasicBlock to found out the
- // definitions of this virtual register. At this stage (DAG2DAG
- // transformation), only PHI machine insns are available in the machine basic
- // block.
- MachineBasicBlock *MBB = FuncInfo->MBB;
- MachineInstr *MII = nullptr;
- for (auto &MI : *MBB) {
- for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
- const MachineOperand &MOP = MI.getOperand(i);
- if (!MOP.isReg() || !MOP.isDef())
- continue;
- unsigned Reg = MOP.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg) && Reg == AndOpReg) {
- MII = &MI;
- break;
- }
- }
- }
-
- if (MII == nullptr) {
- // No phi definition in this block.
- if (!checkLoadDef(AndOpReg, match_load_op))
- return;
- } else {
- // The PHI node looks like:
- // %2 = PHI %0, <%bb.1>, %1, <%bb.3>
- // Trace each incoming definition, e.g., (%0, %bb.1) and (%1, %bb.3)
- // The AND operation can be removed if both %0 in %bb.1 and %1 in
- // %bb.3 are defined with a load matching the MaskN.
- LLVM_DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n');
- unsigned PrevReg = -1;
- for (unsigned i = 0; i < MII->getNumOperands(); ++i) {
- const MachineOperand &MOP = MII->getOperand(i);
- if (MOP.isReg()) {
- if (MOP.isDef())
- continue;
- PrevReg = MOP.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(PrevReg))
- return;
- if (!checkLoadDef(PrevReg, match_load_op))
- return;
- }
- }
- }
- LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump();
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: ";
+ Node->dump(); dbgs() << '\n');
I--;
CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV);
I++;
CurDAG->DeleteNode(Node);
-}
-
-bool BPFDAGToDAGISel::checkLoadDef(unsigned DefReg, unsigned match_load_op) {
- auto it = load_to_vreg_.find(DefReg);
- if (it == load_to_vreg_.end())
- return false; // The definition of register is not exported yet.
- return it->second == match_load_op;
+ return;
}
FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) {
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index ff69941d26fb..56e0288f26c9 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -132,9 +132,9 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setBooleanContents(ZeroOrOneBooleanContent);
- // Function alignments (log2)
- setMinFunctionAlignment(3);
- setPrefFunctionAlignment(3);
+ // Function alignments
+ setMinFunctionAlignment(Align(8));
+ setPrefFunctionAlignment(Align(8));
if (BPFExpandMemcpyInOrder) {
// LLVM generic code will try to expand memcpy into load/store pairs at this
@@ -236,9 +236,8 @@ SDValue BPFTargetLowering::LowerFormalArguments(
}
case MVT::i32:
case MVT::i64:
- unsigned VReg = RegInfo.createVirtualRegister(SimpleTy == MVT::i64 ?
- &BPF::GPRRegClass :
- &BPF::GPR32RegClass);
+ Register VReg = RegInfo.createVirtualRegister(
+ SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT);
@@ -571,9 +570,9 @@ BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
DebugLoc DL = MI.getDebugLoc();
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned PromotedReg0 = RegInfo.createVirtualRegister(RC);
- unsigned PromotedReg1 = RegInfo.createVirtualRegister(RC);
- unsigned PromotedReg2 = RegInfo.createVirtualRegister(RC);
+ Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
+ Register PromotedReg1 = RegInfo.createVirtualRegister(RC);
+ Register PromotedReg2 = RegInfo.createVirtualRegister(RC);
BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
.addReg(PromotedReg0).addImm(32);
@@ -699,7 +698,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
report_fatal_error("unimplemented select CondCode " + Twine(CC));
}
- unsigned LHS = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(1).getReg();
bool isSignedCmp = (CC == ISD::SETGT ||
CC == ISD::SETGE ||
CC == ISD::SETLT ||
@@ -716,7 +715,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
if (isSelectRROp) {
- unsigned RHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(2).getReg();
if (is32BitCmp && !HasJmp32)
RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp);
diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp
index 932f718d5490..6de3a4084d3d 100644
--- a/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/lib/Target/BPF/BPFInstrInfo.cpp
@@ -43,11 +43,11 @@ void BPFInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
void BPFInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
+ Register SrcReg = MI->getOperand(1).getReg();
uint64_t CopyLen = MI->getOperand(2).getImm();
uint64_t Alignment = MI->getOperand(3).getImm();
- unsigned ScratchReg = MI->getOperand(4).getReg();
+ Register ScratchReg = MI->getOperand(4).getReg();
MachineBasicBlock *BB = MI->getParent();
DebugLoc dl = MI->getDebugLoc();
unsigned LdOpc, StOpc;
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index c44702a78ec8..ae5a82a99303 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -473,7 +473,7 @@ class CALL<string OpcodeStr>
class CALLX<string OpcodeStr>
: TYPE_ALU_JMP<BPF_CALL.Value, BPF_X.Value,
(outs),
- (ins calltarget:$BrDst),
+ (ins GPR:$BrDst),
!strconcat(OpcodeStr, " $BrDst"),
[]> {
bits<32> BrDst;
diff --git a/lib/Target/BPF/BPFMIChecking.cpp b/lib/Target/BPF/BPFMIChecking.cpp
index 4c46289656b4..f82f166eda4d 100644
--- a/lib/Target/BPF/BPFMIChecking.cpp
+++ b/lib/Target/BPF/BPFMIChecking.cpp
@@ -19,6 +19,7 @@
#include "BPFTargetMachine.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/lib/Target/BPF/BPFMIPeephole.cpp b/lib/Target/BPF/BPFMIPeephole.cpp
index 156ba793e359..e9eecc55c3c3 100644
--- a/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/lib/Target/BPF/BPFMIPeephole.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -71,7 +72,7 @@ void BPFMIPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
MRI = &MF->getRegInfo();
TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
- LLVM_DEBUG(dbgs() << "*** BPF MachineSSA peephole pass ***\n\n");
+ LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
}
bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
@@ -104,10 +105,10 @@ bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
if (!opnd.isReg())
return false;
- unsigned Reg = opnd.getReg();
- if ((TargetRegisterInfo::isVirtualRegister(Reg) &&
+ Register Reg = opnd.getReg();
+ if ((Register::isVirtualRegister(Reg) &&
MRI->getRegClass(Reg) == &BPF::GPRRegClass))
- return false;
+ return false;
}
LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");
@@ -134,8 +135,8 @@ bool BPFMIPeephole::eliminateZExtSeq(void) {
// SRL_ri rB, rB, 32
if (MI.getOpcode() == BPF::SRL_ri &&
MI.getOperand(2).getImm() == 32) {
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned ShfReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register ShfReg = MI.getOperand(1).getReg();
MachineInstr *SllMI = MRI->getVRegDef(ShfReg);
LLVM_DEBUG(dbgs() << "Starting SRL found:");
@@ -159,7 +160,7 @@ bool BPFMIPeephole::eliminateZExtSeq(void) {
LLVM_DEBUG(dbgs() << " Type cast Mov found:");
LLVM_DEBUG(MovMI->dump());
- unsigned SubReg = MovMI->getOperand(1).getReg();
+ Register SubReg = MovMI->getOperand(1).getReg();
if (!isMovFrom32Def(MovMI)) {
LLVM_DEBUG(dbgs()
<< " One ZExt elim sequence failed qualifying elim.\n");
@@ -186,7 +187,8 @@ bool BPFMIPeephole::eliminateZExtSeq(void) {
} // end default namespace
INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
- "BPF MachineSSA Peephole Optimization", false, false)
+ "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
+ false, false)
char BPFMIPeephole::ID = 0;
FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
@@ -253,12 +255,16 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) {
// enabled. The special type cast insn MOV_32_64 involves different
// register class on src (i32) and dst (i64), RA could generate useless
// instruction due to this.
- if (MI.getOpcode() == BPF::MOV_32_64) {
- unsigned dst = MI.getOperand(0).getReg();
- unsigned dst_sub = TRI->getSubReg(dst, BPF::sub_32);
- unsigned src = MI.getOperand(1).getReg();
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == BPF::MOV_32_64 ||
+ Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) {
+ Register dst = MI.getOperand(0).getReg();
+ Register src = MI.getOperand(1).getReg();
+
+ if (Opcode == BPF::MOV_32_64)
+ dst = TRI->getSubReg(dst, BPF::sub_32);
- if (dst_sub != src)
+ if (dst != src)
continue;
ToErase = &MI;
@@ -281,3 +287,177 @@ FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
{
return new BPFMIPreEmitPeephole();
}
+
+STATISTIC(TruncElemNum, "Number of truncation eliminated");
+
+namespace {
+
+struct BPFMIPeepholeTruncElim : public MachineFunctionPass {
+
+ static char ID;
+ const BPFInstrInfo *TII;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+
+ BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) {
+ initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize class variables.
+ void initialize(MachineFunction &MFParm);
+
+ bool eliminateTruncSeq(void);
+
+public:
+
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ initialize(MF);
+
+ return eliminateTruncSeq();
+ }
+};
+
+static bool TruncSizeCompatible(int TruncSize, unsigned opcode)
+{
+ if (TruncSize == 1)
+ return opcode == BPF::LDB || opcode == BPF::LDB32;
+
+ if (TruncSize == 2)
+ return opcode == BPF::LDH || opcode == BPF::LDH32;
+
+ if (TruncSize == 4)
+ return opcode == BPF::LDW || opcode == BPF::LDW32;
+
+ return false;
+}
+
+// Initialize class variables.
+void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ MRI = &MF->getRegInfo();
+ TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
+ LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n");
+}
+
+// Reg truncating is often the result of 8/16/32bit->64bit or
+// 8/16bit->32bit conversion. If the reg value is loaded with
+// masked byte width, the AND operation can be removed since
+// BPF LOAD already has zero extension.
+//
+// This also solved a correctness issue.
+// In BPF socket-related program, e.g., __sk_buff->{data, data_end}
+// are 32-bit registers, but later on, kernel verifier will rewrite
+// it with 64-bit value. Therefore, truncating the value after the
+// load will result in incorrect code.
+bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
+ MachineInstr* ToErase = nullptr;
+ bool Eliminated = false;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ // The second insn to remove if the eliminate candidate is a pair.
+ MachineInstr *MI2 = nullptr;
+ Register DstReg, SrcReg;
+ MachineInstr *DefMI;
+ int TruncSize = -1;
+
+ // If the previous instruction was marked for elimination, remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate
+ // for BPF ANDI is i32, and this case only happens on ALU64.
+ if (MI.getOpcode() == BPF::SRL_ri &&
+ MI.getOperand(2).getImm() == 32) {
+ SrcReg = MI.getOperand(1).getReg();
+ MI2 = MRI->getVRegDef(SrcReg);
+ DstReg = MI.getOperand(0).getReg();
+
+ if (!MI2 ||
+ MI2->getOpcode() != BPF::SLL_ri ||
+ MI2->getOperand(2).getImm() != 32)
+ continue;
+
+ // Update SrcReg.
+ SrcReg = MI2->getOperand(1).getReg();
+ DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI)
+ TruncSize = 4;
+ } else if (MI.getOpcode() == BPF::AND_ri ||
+ MI.getOpcode() == BPF::AND_ri_32) {
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ DefMI = MRI->getVRegDef(SrcReg);
+
+ if (!DefMI)
+ continue;
+
+ int64_t imm = MI.getOperand(2).getImm();
+ if (imm == 0xff)
+ TruncSize = 1;
+ else if (imm == 0xffff)
+ TruncSize = 2;
+ }
+
+ if (TruncSize == -1)
+ continue;
+
+ // The definition is PHI node, check all inputs.
+ if (DefMI->isPHI()) {
+ bool CheckFail = false;
+
+ for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
+ MachineOperand &opnd = DefMI->getOperand(i);
+ if (!opnd.isReg()) {
+ CheckFail = true;
+ break;
+ }
+
+ MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
+ if (!PhiDef || PhiDef->isPHI() ||
+ !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) {
+ CheckFail = true;
+ break;
+ }
+ }
+
+ if (CheckFail)
+ continue;
+ } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) {
+ continue;
+ }
+
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg)
+ .addReg(SrcReg);
+
+ if (MI2)
+ MI2->eraseFromParent();
+
+ // Mark it to ToErase, and erase in the next iteration.
+ ToErase = &MI;
+ TruncElemNum++;
+ Eliminated = true;
+ }
+ }
+
+ return Eliminated;
+}
+
+} // end default namespace
+
+INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim",
+ "BPF MachineSSA Peephole Optimization For TRUNC Eliminate",
+ false, false)
+
+char BPFMIPeepholeTruncElim::ID = 0;
+FunctionPass* llvm::createBPFMIPeepholeTruncElimPass()
+{
+ return new BPFMIPeepholeTruncElim();
+}
diff --git a/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index e9114d7187e3..9c689aed6417 100644
--- a/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -11,19 +11,15 @@
// ldd r2, r1, 0
// add r3, struct_base_reg, r2
//
-// Here @global should either present a AMA (abstruct member access) or
-// a patchable extern variable. And these two kinds of accesses
-// are subject to bpf load time patching. After this pass, the
+// Here @global should represent an AMA (abstruct member access).
+// Such an access is subject to bpf load time patching. After this pass, the
// code becomes
// ld_imm64 r1, @global
// add r3, struct_base_reg, r1
//
// Eventually, at BTF output stage, a relocation record will be generated
// for ld_imm64 which should be replaced later by bpf loader:
-// r1 = <calculated offset> or <to_be_patched_extern_val>
-// add r3, struct_base_reg, r1
-// or
-// ld_imm64 r1, <to_be_patched_extern_val>
+// r1 = <calculated field_info>
// add r3, struct_base_reg, r1
//
//===----------------------------------------------------------------------===//
@@ -34,6 +30,7 @@
#include "BPFTargetMachine.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -100,9 +97,8 @@ bool BPFMISimplifyPatchable::removeLD() {
if (!MI.getOperand(2).isImm() || MI.getOperand(2).getImm())
continue;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
- int64_t ImmVal = MI.getOperand(2).getImm();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
MachineInstr *DefInst = MRI->getUniqueVRegDef(SrcReg);
if (!DefInst)
@@ -118,17 +114,8 @@ bool BPFMISimplifyPatchable::removeLD() {
// Global variables representing structure offset or
// patchable extern globals.
if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
- assert(ImmVal == 0);
+ assert(MI.getOperand(2).getImm() == 0);
IsCandidate = true;
- } else if (!GVar->hasInitializer() && GVar->hasExternalLinkage() &&
- GVar->getSection() ==
- BPFCoreSharedInfo::PatchableExtSecName) {
- if (ImmVal == 0)
- IsCandidate = true;
- else
- errs() << "WARNING: unhandled patchable extern "
- << GVar->getName() << " with load offset " << ImmVal
- << "\n";
}
}
}
diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp
index 714af06e11d9..8de81a469b84 100644
--- a/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -77,7 +77,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
- unsigned FrameReg = getFrameRegister(MF);
+ Register FrameReg = getFrameRegister(MF);
int FrameIndex = MI.getOperand(i).getIndex();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@@ -86,7 +86,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
WarnSize(Offset, MF, DL);
MI.getOperand(i).ChangeToRegister(FrameReg, false);
- unsigned reg = MI.getOperand(i - 1).getReg();
+ Register reg = MI.getOperand(i - 1).getReg();
BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg)
.addReg(reg)
.addImm(Offset);
@@ -105,7 +105,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// architecture does not really support FI_ri, replace it with
// MOV_rr <target_reg>, frame_reg
// ADD_ri <target_reg>, imm
- unsigned reg = MI.getOperand(i - 1).getReg();
+ Register reg = MI.getOperand(i - 1).getReg();
BuildMI(MBB, ++II, DL, TII.get(BPF::MOV_rr), reg)
.addReg(FrameReg);
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index 24c0ff0f7f15..0c4f2c74e7a4 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -36,6 +36,7 @@ extern "C" void LLVMInitializeBPFTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeBPFAbstractMemberAccessPass(PR);
initializeBPFMIPeepholePass(PR);
+ initializeBPFMIPeepholeTruncElimPass(PR);
}
// DataLayout: little or big endian
@@ -61,7 +62,7 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- TLOF(make_unique<TargetLoweringObjectFileELF>()),
+ TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
@@ -94,7 +95,7 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
void BPFPassConfig::addIRPasses() {
- addPass(createBPFAbstractMemberAccess());
+ addPass(createBPFAbstractMemberAccess(&getBPFTargetMachine()));
TargetPassConfig::addIRPasses();
}
@@ -115,15 +116,16 @@ void BPFPassConfig::addMachineSSAOptimization() {
TargetPassConfig::addMachineSSAOptimization();
const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl();
- if (Subtarget->getHasAlu32() && !DisableMIPeephole)
- addPass(createBPFMIPeepholePass());
+ if (!DisableMIPeephole) {
+ if (Subtarget->getHasAlu32())
+ addPass(createBPFMIPeepholePass());
+ addPass(createBPFMIPeepholeTruncElimPass());
+ }
}
void BPFPassConfig::addPreEmitPass() {
- const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl();
-
addPass(createBPFMIPreEmitCheckingPass());
if (getOptLevel() != CodeGenOpt::None)
- if (Subtarget->getHasAlu32() && !DisableMIPeephole)
+ if (!DisableMIPeephole)
addPass(createBPFMIPreEmitPeepholePass());
}
diff --git a/lib/Target/BPF/BTF.h b/lib/Target/BPF/BTF.h
index ad56716710a6..a13c862bf840 100644
--- a/lib/Target/BPF/BTF.h
+++ b/lib/Target/BPF/BTF.h
@@ -17,7 +17,7 @@
///
/// The binary layout for .BTF.ext section:
/// struct ExtHeader
-/// FuncInfo, LineInfo, OffsetReloc and ExternReloc subsections
+/// FuncInfo, LineInfo, FieldReloc and ExternReloc subsections
/// The FuncInfo subsection is defined as below:
/// BTFFuncInfo Size
/// struct SecFuncInfo for ELF section #1
@@ -32,19 +32,12 @@
/// struct SecLineInfo for ELF section #2
/// A number of struct BPFLineInfo for ELF section #2
/// ...
-/// The OffsetReloc subsection is defined as below:
-/// BPFOffsetReloc Size
-/// struct SecOffsetReloc for ELF section #1
-/// A number of struct BPFOffsetReloc for ELF section #1
-/// struct SecOffsetReloc for ELF section #2
-/// A number of struct BPFOffsetReloc for ELF section #2
-/// ...
-/// The ExternReloc subsection is defined as below:
-/// BPFExternReloc Size
-/// struct SecExternReloc for ELF section #1
-/// A number of struct BPFExternReloc for ELF section #1
-/// struct SecExternReloc for ELF section #2
-/// A number of struct BPFExternReloc for ELF section #2
+/// The FieldReloc subsection is defined as below:
+/// BPFFieldReloc Size
+/// struct SecFieldReloc for ELF section #1
+/// A number of struct BPFFieldReloc for ELF section #1
+/// struct SecFieldReloc for ELF section #2
+/// A number of struct BPFFieldReloc for ELF section #2
/// ...
///
/// The section formats are also defined at
@@ -63,7 +56,7 @@ enum : uint32_t { MAGIC = 0xeB9F, VERSION = 1 };
/// Sizes in bytes of various things in the BTF format.
enum {
HeaderSize = 24,
- ExtHeaderSize = 40,
+ ExtHeaderSize = 32,
CommonTypeSize = 12,
BTFArraySize = 12,
BTFEnumSize = 8,
@@ -72,12 +65,10 @@ enum {
BTFDataSecVarSize = 12,
SecFuncInfoSize = 8,
SecLineInfoSize = 8,
- SecOffsetRelocSize = 8,
- SecExternRelocSize = 8,
+ SecFieldRelocSize = 8,
BPFFuncInfoSize = 8,
BPFLineInfoSize = 16,
- BPFOffsetRelocSize = 12,
- BPFExternRelocSize = 8,
+ BPFFieldRelocSize = 16,
};
/// The .BTF section header definition.
@@ -213,10 +204,8 @@ struct ExtHeader {
uint32_t FuncInfoLen; ///< Length of func info section
uint32_t LineInfoOff; ///< Offset of line info section
uint32_t LineInfoLen; ///< Length of line info section
- uint32_t OffsetRelocOff; ///< Offset of offset reloc section
- uint32_t OffsetRelocLen; ///< Length of offset reloc section
- uint32_t ExternRelocOff; ///< Offset of extern reloc section
- uint32_t ExternRelocLen; ///< Length of extern reloc section
+ uint32_t FieldRelocOff; ///< Offset of offset reloc section
+ uint32_t FieldRelocLen; ///< Length of offset reloc section
};
/// Specifying one function info.
@@ -247,28 +236,17 @@ struct SecLineInfo {
};
/// Specifying one offset relocation.
-struct BPFOffsetReloc {
+struct BPFFieldReloc {
uint32_t InsnOffset; ///< Byte offset in this section
uint32_t TypeID; ///< TypeID for the relocation
uint32_t OffsetNameOff; ///< The string to traverse types
+ uint32_t RelocKind; ///< What to patch the instruction
};
/// Specifying offset relocation's in one section.
-struct SecOffsetReloc {
- uint32_t SecNameOff; ///< Section name index in the .BTF string table
- uint32_t NumOffsetReloc; ///< Number of offset reloc's in this section
-};
-
-/// Specifying one offset relocation.
-struct BPFExternReloc {
- uint32_t InsnOffset; ///< Byte offset in this section
- uint32_t ExternNameOff; ///< The string for external variable
-};
-
-/// Specifying extern relocation's in one section.
-struct SecExternReloc {
+struct SecFieldReloc {
uint32_t SecNameOff; ///< Section name index in the .BTF string table
- uint32_t NumExternReloc; ///< Number of extern reloc's in this section
+ uint32_t NumFieldReloc; ///< Number of offset reloc's in this section
};
} // End namespace BTF.
diff --git a/lib/Target/BPF/BTFDebug.cpp b/lib/Target/BPF/BTFDebug.cpp
index fa35c6619e21..db551e739bd7 100644
--- a/lib/Target/BPF/BTFDebug.cpp
+++ b/lib/Target/BPF/BTFDebug.cpp
@@ -184,9 +184,7 @@ void BTFTypeEnum::emitType(MCStreamer &OS) {
}
}
-BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize,
- uint32_t NumElems)
- : ElemSize(ElemSize) {
+BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems) {
Kind = BTF::BTF_KIND_ARRAY;
BTFType.NameOff = 0;
BTFType.Info = Kind << 24;
@@ -216,12 +214,6 @@ void BTFTypeArray::emitType(MCStreamer &OS) {
OS.EmitIntValue(ArrayInfo.Nelems, 4);
}
-void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset,
- uint32_t &ElementTypeId) {
- ElementTypeId = ArrayInfo.ElemType;
- LocOffset = Loc * ElemSize;
-}
-
/// Represent either a struct or a union.
BTFTypeStruct::BTFTypeStruct(const DICompositeType *STy, bool IsStruct,
bool HasBitField, uint32_t Vlen)
@@ -251,7 +243,8 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) {
} else {
BTFMember.Offset = DDTy->getOffsetInBits();
}
- BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType());
+ const auto *BaseTy = DDTy->getBaseType();
+ BTFMember.Type = BDebug.getTypeId(BaseTy);
Members.push_back(BTFMember);
}
}
@@ -268,15 +261,6 @@ void BTFTypeStruct::emitType(MCStreamer &OS) {
std::string BTFTypeStruct::getName() { return STy->getName(); }
-void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset,
- uint32_t &MemberType) {
- MemberType = Members[Loc].Type;
- MemberOffset =
- HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset;
-}
-
-uint32_t BTFTypeStruct::getStructSize() { return STy->getSizeInBits() >> 3; }
-
/// The Func kind represents both subprogram and pointee of function
/// pointers. If the FuncName is empty, it represents a pointee of function
/// pointer. Otherwise, it represents a subprogram. The func arg names
@@ -428,7 +412,7 @@ void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) {
// Create a BTF type instance for this DIBasicType and put it into
// DIToIdMap for cross-type reference check.
- auto TypeEntry = llvm::make_unique<BTFTypeInt>(
+ auto TypeEntry = std::make_unique<BTFTypeInt>(
Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
TypeId = addType(std::move(TypeEntry), BTy);
}
@@ -447,7 +431,7 @@ void BTFDebug::visitSubroutineType(
// a function pointer has an empty name. The subprogram type will
// not be added to DIToIdMap as it should not be referenced by
// any other types.
- auto TypeEntry = llvm::make_unique<BTFTypeFuncProto>(STy, VLen, FuncArgNames);
+ auto TypeEntry = std::make_unique<BTFTypeFuncProto>(STy, VLen, FuncArgNames);
if (ForSubprog)
TypeId = addType(std::move(TypeEntry)); // For subprogram
else
@@ -478,7 +462,7 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
}
auto TypeEntry =
- llvm::make_unique<BTFTypeStruct>(CTy, IsStruct, HasBitField, VLen);
+ std::make_unique<BTFTypeStruct>(CTy, IsStruct, HasBitField, VLen);
StructTypes.push_back(TypeEntry.get());
TypeId = addType(std::move(TypeEntry), CTy);
@@ -489,35 +473,29 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
// Visit array element type.
- uint32_t ElemTypeId, ElemSize;
+ uint32_t ElemTypeId;
const DIType *ElemType = CTy->getBaseType();
visitTypeEntry(ElemType, ElemTypeId, false, false);
- ElemSize = ElemType->getSizeInBits() >> 3;
- if (!CTy->getSizeInBits()) {
- auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemTypeId, 0, 0);
- ArrayTypes.push_back(TypeEntry.get());
- ElemTypeId = addType(std::move(TypeEntry), CTy);
- } else {
- // Visit array dimensions.
- DINodeArray Elements = CTy->getElements();
- for (int I = Elements.size() - 1; I >= 0; --I) {
- if (auto *Element = dyn_cast_or_null<DINode>(Elements[I]))
- if (Element->getTag() == dwarf::DW_TAG_subrange_type) {
- const DISubrange *SR = cast<DISubrange>(Element);
- auto *CI = SR->getCount().dyn_cast<ConstantInt *>();
- int64_t Count = CI->getSExtValue();
-
- auto TypeEntry =
- llvm::make_unique<BTFTypeArray>(ElemTypeId, ElemSize, Count);
- ArrayTypes.push_back(TypeEntry.get());
- if (I == 0)
- ElemTypeId = addType(std::move(TypeEntry), CTy);
- else
- ElemTypeId = addType(std::move(TypeEntry));
- ElemSize = ElemSize * Count;
- }
- }
+ // Visit array dimensions.
+ DINodeArray Elements = CTy->getElements();
+ for (int I = Elements.size() - 1; I >= 0; --I) {
+ if (auto *Element = dyn_cast_or_null<DINode>(Elements[I]))
+ if (Element->getTag() == dwarf::DW_TAG_subrange_type) {
+ const DISubrange *SR = cast<DISubrange>(Element);
+ auto *CI = SR->getCount().dyn_cast<ConstantInt *>();
+ int64_t Count = CI->getSExtValue();
+
+ // For struct s { int b; char c[]; }, the c[] will be represented
+ // as an array with Count = -1.
+ auto TypeEntry =
+ std::make_unique<BTFTypeArray>(ElemTypeId,
+ Count >= 0 ? Count : 0);
+ if (I == 0)
+ ElemTypeId = addType(std::move(TypeEntry), CTy);
+ else
+ ElemTypeId = addType(std::move(TypeEntry));
+ }
}
// The array TypeId is the type id of the outermost dimension.
@@ -526,7 +504,7 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
// The IR does not have a type for array index while BTF wants one.
// So create an array index type if there is none.
if (!ArrayIndexTypeId) {
- auto TypeEntry = llvm::make_unique<BTFTypeInt>(dwarf::DW_ATE_unsigned, 32,
+ auto TypeEntry = std::make_unique<BTFTypeInt>(dwarf::DW_ATE_unsigned, 32,
0, "__ARRAY_SIZE_TYPE__");
ArrayIndexTypeId = addType(std::move(TypeEntry));
}
@@ -538,7 +516,7 @@ void BTFDebug::visitEnumType(const DICompositeType *CTy, uint32_t &TypeId) {
if (VLen > BTF::MAX_VLEN)
return;
- auto TypeEntry = llvm::make_unique<BTFTypeEnum>(CTy, VLen);
+ auto TypeEntry = std::make_unique<BTFTypeEnum>(CTy, VLen);
TypeId = addType(std::move(TypeEntry), CTy);
// No need to visit base type as BTF does not encode it.
}
@@ -546,7 +524,7 @@ void BTFDebug::visitEnumType(const DICompositeType *CTy, uint32_t &TypeId) {
/// Handle structure/union forward declarations.
void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion,
uint32_t &TypeId) {
- auto TypeEntry = llvm::make_unique<BTFTypeFwd>(CTy->getName(), IsUnion);
+ auto TypeEntry = std::make_unique<BTFTypeFwd>(CTy->getName(), IsUnion);
TypeId = addType(std::move(TypeEntry), CTy);
}
@@ -588,7 +566,7 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
/// Find a candidate, generate a fixup. Later on the struct/union
/// pointee type will be replaced with either a real type or
/// a forward declaration.
- auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag, true);
+ auto TypeEntry = std::make_unique<BTFTypeDerived>(DTy, Tag, true);
auto &Fixup = FixupDerivedTypes[CTy->getName()];
Fixup.first = CTag == dwarf::DW_TAG_union_type;
Fixup.second.push_back(TypeEntry.get());
@@ -602,7 +580,7 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef ||
Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
Tag == dwarf::DW_TAG_restrict_type) {
- auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag, false);
+ auto TypeEntry = std::make_unique<BTFTypeDerived>(DTy, Tag, false);
TypeId = addType(std::move(TypeEntry), DTy);
} else if (Tag != dwarf::DW_TAG_member) {
return;
@@ -669,7 +647,7 @@ void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) {
}
auto TypeEntry =
- llvm::make_unique<BTFTypeStruct>(CTy, true, HasBitField, Elements.size());
+ std::make_unique<BTFTypeStruct>(CTy, true, HasBitField, Elements.size());
StructTypes.push_back(TypeEntry.get());
TypeId = addType(std::move(TypeEntry), CTy);
@@ -774,9 +752,10 @@ void BTFDebug::emitBTFSection() {
}
void BTFDebug::emitBTFExtSection() {
- // Do not emit section if empty FuncInfoTable and LineInfoTable.
+ // Do not emit section if empty FuncInfoTable and LineInfoTable
+ // and FieldRelocTable.
if (!FuncInfoTable.size() && !LineInfoTable.size() &&
- !OffsetRelocTable.size() && !ExternRelocTable.size())
+ !FieldRelocTable.size())
return;
MCContext &Ctx = OS.getContext();
@@ -788,8 +767,8 @@ void BTFDebug::emitBTFExtSection() {
// Account for FuncInfo/LineInfo record size as well.
uint32_t FuncLen = 4, LineLen = 4;
- // Do not account for optional OffsetReloc/ExternReloc.
- uint32_t OffsetRelocLen = 0, ExternRelocLen = 0;
+ // Do not account for optional FieldReloc.
+ uint32_t FieldRelocLen = 0;
for (const auto &FuncSec : FuncInfoTable) {
FuncLen += BTF::SecFuncInfoSize;
FuncLen += FuncSec.second.size() * BTF::BPFFuncInfoSize;
@@ -798,28 +777,20 @@ void BTFDebug::emitBTFExtSection() {
LineLen += BTF::SecLineInfoSize;
LineLen += LineSec.second.size() * BTF::BPFLineInfoSize;
}
- for (const auto &OffsetRelocSec : OffsetRelocTable) {
- OffsetRelocLen += BTF::SecOffsetRelocSize;
- OffsetRelocLen += OffsetRelocSec.second.size() * BTF::BPFOffsetRelocSize;
- }
- for (const auto &ExternRelocSec : ExternRelocTable) {
- ExternRelocLen += BTF::SecExternRelocSize;
- ExternRelocLen += ExternRelocSec.second.size() * BTF::BPFExternRelocSize;
+ for (const auto &FieldRelocSec : FieldRelocTable) {
+ FieldRelocLen += BTF::SecFieldRelocSize;
+ FieldRelocLen += FieldRelocSec.second.size() * BTF::BPFFieldRelocSize;
}
- if (OffsetRelocLen)
- OffsetRelocLen += 4;
- if (ExternRelocLen)
- ExternRelocLen += 4;
+ if (FieldRelocLen)
+ FieldRelocLen += 4;
OS.EmitIntValue(0, 4);
OS.EmitIntValue(FuncLen, 4);
OS.EmitIntValue(FuncLen, 4);
OS.EmitIntValue(LineLen, 4);
OS.EmitIntValue(FuncLen + LineLen, 4);
- OS.EmitIntValue(OffsetRelocLen, 4);
- OS.EmitIntValue(FuncLen + LineLen + OffsetRelocLen, 4);
- OS.EmitIntValue(ExternRelocLen, 4);
+ OS.EmitIntValue(FieldRelocLen, 4);
// Emit func_info table.
OS.AddComment("FuncInfo");
@@ -853,35 +824,20 @@ void BTFDebug::emitBTFExtSection() {
}
}
- // Emit offset reloc table.
- if (OffsetRelocLen) {
- OS.AddComment("OffsetReloc");
- OS.EmitIntValue(BTF::BPFOffsetRelocSize, 4);
- for (const auto &OffsetRelocSec : OffsetRelocTable) {
- OS.AddComment("Offset reloc section string offset=" +
- std::to_string(OffsetRelocSec.first));
- OS.EmitIntValue(OffsetRelocSec.first, 4);
- OS.EmitIntValue(OffsetRelocSec.second.size(), 4);
- for (const auto &OffsetRelocInfo : OffsetRelocSec.second) {
- Asm->EmitLabelReference(OffsetRelocInfo.Label, 4);
- OS.EmitIntValue(OffsetRelocInfo.TypeID, 4);
- OS.EmitIntValue(OffsetRelocInfo.OffsetNameOff, 4);
- }
- }
- }
-
- // Emit extern reloc table.
- if (ExternRelocLen) {
- OS.AddComment("ExternReloc");
- OS.EmitIntValue(BTF::BPFExternRelocSize, 4);
- for (const auto &ExternRelocSec : ExternRelocTable) {
- OS.AddComment("Extern reloc section string offset=" +
- std::to_string(ExternRelocSec.first));
- OS.EmitIntValue(ExternRelocSec.first, 4);
- OS.EmitIntValue(ExternRelocSec.second.size(), 4);
- for (const auto &ExternRelocInfo : ExternRelocSec.second) {
- Asm->EmitLabelReference(ExternRelocInfo.Label, 4);
- OS.EmitIntValue(ExternRelocInfo.ExternNameOff, 4);
+ // Emit field reloc table.
+ if (FieldRelocLen) {
+ OS.AddComment("FieldReloc");
+ OS.EmitIntValue(BTF::BPFFieldRelocSize, 4);
+ for (const auto &FieldRelocSec : FieldRelocTable) {
+ OS.AddComment("Field reloc section string offset=" +
+ std::to_string(FieldRelocSec.first));
+ OS.EmitIntValue(FieldRelocSec.first, 4);
+ OS.EmitIntValue(FieldRelocSec.second.size(), 4);
+ for (const auto &FieldRelocInfo : FieldRelocSec.second) {
+ Asm->EmitLabelReference(FieldRelocInfo.Label, 4);
+ OS.EmitIntValue(FieldRelocInfo.TypeID, 4);
+ OS.EmitIntValue(FieldRelocInfo.OffsetNameOff, 4);
+ OS.EmitIntValue(FieldRelocInfo.RelocKind, 4);
}
}
}
@@ -942,7 +898,7 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
// Construct subprogram func type
auto FuncTypeEntry =
- llvm::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId);
+ std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId);
uint32_t FuncTypeId = addType(std::move(FuncTypeEntry));
for (const auto &TypeEntry : TypeEntries)
@@ -980,71 +936,27 @@ unsigned BTFDebug::populateStructType(const DIType *Ty) {
return Id;
}
-// Find struct/array debuginfo types given a type id.
-void BTFDebug::setTypeFromId(uint32_t TypeId, BTFTypeStruct **PrevStructType,
- BTFTypeArray **PrevArrayType) {
- for (const auto &StructType : StructTypes) {
- if (StructType->getId() == TypeId) {
- *PrevStructType = StructType;
- return;
- }
- }
- for (const auto &ArrayType : ArrayTypes) {
- if (ArrayType->getId() == TypeId) {
- *PrevArrayType = ArrayType;
- return;
- }
- }
-}
-
-/// Generate a struct member offset relocation.
-void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
+/// Generate a struct member field relocation.
+void BTFDebug::generateFieldReloc(const MachineInstr *MI,
const MCSymbol *ORSym, DIType *RootTy,
StringRef AccessPattern) {
- BTFTypeStruct *PrevStructType = nullptr;
- BTFTypeArray *PrevArrayType = nullptr;
unsigned RootId = populateStructType(RootTy);
- setTypeFromId(RootId, &PrevStructType, &PrevArrayType);
- unsigned RootTySize = PrevStructType->getStructSize();
-
- BTFOffsetReloc OffsetReloc;
- OffsetReloc.Label = ORSym;
- OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back());
- OffsetReloc.TypeID = RootId;
-
- uint32_t Start = 0, End = 0, Offset = 0;
- bool FirstAccess = true;
- for (auto C : AccessPattern) {
- if (C != ':') {
- End++;
- } else {
- std::string SubStr = AccessPattern.substr(Start, End - Start);
- int Loc = std::stoi(SubStr);
-
- if (FirstAccess) {
- Offset = Loc * RootTySize;
- FirstAccess = false;
- } else if (PrevStructType) {
- uint32_t MemberOffset, MemberTypeId;
- PrevStructType->getMemberInfo(Loc, MemberOffset, MemberTypeId);
-
- Offset += MemberOffset >> 3;
- PrevStructType = nullptr;
- setTypeFromId(MemberTypeId, &PrevStructType, &PrevArrayType);
- } else if (PrevArrayType) {
- uint32_t LocOffset, ElementTypeId;
- PrevArrayType->getLocInfo(Loc, LocOffset, ElementTypeId);
-
- Offset += LocOffset;
- PrevArrayType = nullptr;
- setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType);
- }
- Start = End + 1;
- End = Start;
- }
- }
- AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset;
- OffsetRelocTable[SecNameOff].push_back(OffsetReloc);
+ size_t FirstDollar = AccessPattern.find_first_of('$');
+ size_t FirstColon = AccessPattern.find_first_of(':');
+ size_t SecondColon = AccessPattern.find_first_of(':', FirstColon + 1);
+ StringRef IndexPattern = AccessPattern.substr(FirstDollar + 1);
+ StringRef RelocKindStr = AccessPattern.substr(FirstColon + 1,
+ SecondColon - FirstColon);
+ StringRef PatchImmStr = AccessPattern.substr(SecondColon + 1,
+ FirstDollar - SecondColon);
+
+ BTFFieldReloc FieldReloc;
+ FieldReloc.Label = ORSym;
+ FieldReloc.OffsetNameOff = addString(IndexPattern);
+ FieldReloc.TypeID = RootId;
+ FieldReloc.RelocKind = std::stoull(RelocKindStr);
+ PatchImms[AccessPattern.str()] = std::stoul(PatchImmStr);
+ FieldRelocTable[SecNameOff].push_back(FieldReloc);
}
void BTFDebug::processLDimm64(const MachineInstr *MI) {
@@ -1052,7 +964,7 @@ void BTFDebug::processLDimm64(const MachineInstr *MI) {
// will generate an .BTF.ext record.
//
// If the insn is "r2 = LD_imm64 @__BTF_...",
- // add this insn into the .BTF.ext OffsetReloc subsection.
+ // add this insn into the .BTF.ext FieldReloc subsection.
// Relocation looks like:
// . SecName:
// . InstOffset
@@ -1083,16 +995,7 @@ void BTFDebug::processLDimm64(const MachineInstr *MI) {
MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
DIType *Ty = dyn_cast<DIType>(MDN);
- generateOffsetReloc(MI, ORSym, Ty, GVar->getName());
- } else if (GVar && !GVar->hasInitializer() && GVar->hasExternalLinkage() &&
- GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) {
- MCSymbol *ORSym = OS.getContext().createTempSymbol();
- OS.EmitLabel(ORSym);
-
- BTFExternReloc ExternReloc;
- ExternReloc.Label = ORSym;
- ExternReloc.ExternNameOff = addString(GVar->getName());
- ExternRelocTable[SecNameOff].push_back(ExternReloc);
+ generateFieldReloc(MI, ORSym, Ty, GVar->getName());
}
}
}
@@ -1200,12 +1103,12 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
? BTF::VAR_GLOBAL_ALLOCATED
: BTF::VAR_STATIC;
auto VarEntry =
- llvm::make_unique<BTFKindVar>(Global.getName(), GVTypeId, GVarInfo);
+ std::make_unique<BTFKindVar>(Global.getName(), GVTypeId, GVarInfo);
uint32_t VarId = addType(std::move(VarEntry));
// Find or create a DataSec
if (DataSecEntries.find(SecName) == DataSecEntries.end()) {
- DataSecEntries[SecName] = llvm::make_unique<BTFKindDataSec>(Asm, SecName);
+ DataSecEntries[SecName] = std::make_unique<BTFKindDataSec>(Asm, SecName);
}
// Calculate symbol size
@@ -1224,30 +1127,12 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
const GlobalValue *GVal = MO.getGlobal();
auto *GVar = dyn_cast<GlobalVariable>(GVal);
if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
- MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
- DIType *Ty = dyn_cast<DIType>(MDN);
- std::string TypeName = Ty->getName();
- int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()];
-
- // Emit "mov ri, <imm>" for abstract member accesses.
+ // Emit "mov ri, <imm>" for patched immediate.
+ uint32_t Imm = PatchImms[GVar->getName().str()];
OutMI.setOpcode(BPF::MOV_ri);
OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
OutMI.addOperand(MCOperand::createImm(Imm));
return true;
- } else if (GVar && !GVar->hasInitializer() &&
- GVar->hasExternalLinkage() &&
- GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) {
- const IntegerType *IntTy = dyn_cast<IntegerType>(GVar->getValueType());
- assert(IntTy);
- // For patchable externals, emit "LD_imm64, ri, 0" if the external
- // variable is 64bit width, emit "mov ri, 0" otherwise.
- if (IntTy->getBitWidth() == 64)
- OutMI.setOpcode(BPF::LD_imm64);
- else
- OutMI.setOpcode(BPF::MOV_ri);
- OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
- OutMI.addOperand(MCOperand::createImm(0));
- return true;
}
}
}
@@ -1281,7 +1166,7 @@ void BTFDebug::endModule() {
}
if (StructTypeId == 0) {
- auto FwdTypeEntry = llvm::make_unique<BTFTypeFwd>(TypeName, IsUnion);
+ auto FwdTypeEntry = std::make_unique<BTFTypeFwd>(TypeName, IsUnion);
StructTypeId = addType(std::move(FwdTypeEntry));
}
diff --git a/lib/Target/BPF/BTFDebug.h b/lib/Target/BPF/BTFDebug.h
index 6c0cdde17d9b..c01e0d1d1612 100644
--- a/lib/Target/BPF/BTFDebug.h
+++ b/lib/Target/BPF/BTFDebug.h
@@ -104,15 +104,13 @@ public:
/// Handle array type.
class BTFTypeArray : public BTFTypeBase {
- uint32_t ElemSize;
struct BTF::BTFArray ArrayInfo;
public:
- BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems);
+ BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems);
uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
void completeType(BTFDebug &BDebug);
void emitType(MCStreamer &OS);
- void getLocInfo(uint32_t Loc, uint32_t &LocOffset, uint32_t &ElementTypeId);
};
/// Handle struct/union type.
@@ -130,8 +128,6 @@ public:
void completeType(BTFDebug &BDebug);
void emitType(MCStreamer &OS);
std::string getName();
- void getMemberInfo(uint32_t Loc, uint32_t &Offset, uint32_t &MemberType);
- uint32_t getStructSize();
};
/// Handle function pointer.
@@ -199,7 +195,7 @@ class BTFStringTable {
/// A mapping from string table offset to the index
/// of the Table. It is used to avoid putting
/// duplicated strings in the table.
- std::unordered_map<uint32_t, uint32_t> OffsetToIdMap;
+ std::map<uint32_t, uint32_t> OffsetToIdMap;
/// A vector of strings to represent the string table.
std::vector<std::string> Table;
@@ -228,16 +224,11 @@ struct BTFLineInfo {
};
/// Represent one offset relocation.
-struct BTFOffsetReloc {
+struct BTFFieldReloc {
const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc
uint32_t TypeID; ///< Type ID
uint32_t OffsetNameOff; ///< The string to traverse types
-};
-
-/// Represent one extern relocation.
-struct BTFExternReloc {
- const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc
- uint32_t ExternNameOff; ///< The extern variable name
+ uint32_t RelocKind; ///< What to patch the instruction
};
/// Collect and emit BTF information.
@@ -253,13 +244,11 @@ class BTFDebug : public DebugHandlerBase {
std::unordered_map<const DIType *, uint32_t> DIToIdMap;
std::map<uint32_t, std::vector<BTFFuncInfo>> FuncInfoTable;
std::map<uint32_t, std::vector<BTFLineInfo>> LineInfoTable;
- std::map<uint32_t, std::vector<BTFOffsetReloc>> OffsetRelocTable;
- std::map<uint32_t, std::vector<BTFExternReloc>> ExternRelocTable;
+ std::map<uint32_t, std::vector<BTFFieldReloc>> FieldRelocTable;
StringMap<std::vector<std::string>> FileContent;
std::map<std::string, std::unique_ptr<BTFKindDataSec>> DataSecEntries;
std::vector<BTFTypeStruct *> StructTypes;
- std::vector<BTFTypeArray *> ArrayTypes;
- std::map<std::string, int64_t> AccessOffsets;
+ std::map<std::string, uint32_t> PatchImms;
std::map<StringRef, std::pair<bool, std::vector<BTFTypeDerived *>>>
FixupDerivedTypes;
@@ -305,13 +294,9 @@ class BTFDebug : public DebugHandlerBase {
void processGlobals(bool ProcessingMapDef);
/// Generate one offset relocation record.
- void generateOffsetReloc(const MachineInstr *MI, const MCSymbol *ORSym,
+ void generateFieldReloc(const MachineInstr *MI, const MCSymbol *ORSym,
DIType *RootTy, StringRef AccessPattern);
- /// Set the to-be-traversed Struct/Array Type based on TypeId.
- void setTypeFromId(uint32_t TypeId, BTFTypeStruct **PrevStructType,
- BTFTypeArray **PrevArrayType);
-
/// Populating unprocessed struct type.
unsigned populateStructType(const DIType *Ty);
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 057bbf5c3b06..ef4e324c3bdd 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -39,7 +39,7 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
// determine the type of the relocation
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getKind()) {
default:
llvm_unreachable("invalid fixup kind!");
case FK_SecRel_8:
@@ -85,5 +85,5 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
std::unique_ptr<MCObjectTargetWriter>
llvm::createBPFELFObjectWriter(uint8_t OSABI) {
- return llvm::make_unique<BPFELFObjectWriter>(OSABI);
+ return std::make_unique<BPFELFObjectWriter>(OSABI);
}
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 0881bf841f90..590c4a2eb69d 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -702,7 +702,7 @@ bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) {
// Make sure we have a number (false is returned if expression is a number)
if (!getParser().parseExpression(Value)) {
// Make sure this is a number that is in range
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+ auto *MCE = cast<MCConstantExpr>(Value);
uint64_t IntValue = MCE->getValue();
if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue))
return Error(ExprLoc, "literal value out of range (256) for falign");
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index b7e95caf24fb..efd5ed915127 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -84,7 +84,7 @@ namespace {
raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
if (PV.R)
- OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
+ OS << 'v' << Register::virtReg2Index(PV.R);
else
OS << 's';
return OS;
@@ -201,7 +201,7 @@ BitTracker::~BitTracker() {
bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
// An example when "meet" can be invoked with SelfR == 0 is a phi node
// with a physical register as an operand.
- assert(SelfR == 0 || TargetRegisterInfo::isVirtualRegister(SelfR));
+ assert(SelfR == 0 || Register::isVirtualRegister(SelfR));
bool Changed = false;
for (uint16_t i = 0, n = Bits.size(); i < n; ++i) {
const BitValue &RCV = RC[i];
@@ -335,12 +335,13 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
// 1. find a physical register PhysR from the same class as RR.Reg,
// 2. find a physical register PhysS that corresponds to PhysR:RR.Sub,
// 3. find a register class that contains PhysS.
- if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) {
+ if (Register::isVirtualRegister(RR.Reg)) {
const auto &VC = composeWithSubRegIndex(*MRI.getRegClass(RR.Reg), RR.Sub);
return TRI.getRegSizeInBits(VC);
}
- assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg));
- unsigned PhysR = (RR.Sub == 0) ? RR.Reg : TRI.getSubReg(RR.Reg, RR.Sub);
+ assert(Register::isPhysicalRegister(RR.Reg));
+ Register PhysR =
+ (RR.Sub == 0) ? Register(RR.Reg) : TRI.getSubReg(RR.Reg, RR.Sub);
return getPhysRegBitWidth(PhysR);
}
@@ -350,10 +351,10 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
// Physical registers are assumed to be present in the map with an unknown
// value. Don't actually insert anything in the map, just return the cell.
- if (TargetRegisterInfo::isPhysicalRegister(RR.Reg))
+ if (Register::isPhysicalRegister(RR.Reg))
return RegisterCell::self(0, BW);
- assert(TargetRegisterInfo::isVirtualRegister(RR.Reg));
+ assert(Register::isVirtualRegister(RR.Reg));
// For virtual registers that belong to a class that is not tracked,
// generate an "unknown" value as well.
const TargetRegisterClass *C = MRI.getRegClass(RR.Reg);
@@ -376,7 +377,7 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
// While updating the cell map can be done in a meaningful way for
// a part of a register, it makes little sense to implement it as the
// SSA representation would never contain such "partial definitions".
- if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ if (!Register::isVirtualRegister(RR.Reg))
return;
assert(RR.Sub == 0 && "Unexpected sub-register in definition");
// Eliminate all ref-to-reg-0 bit values: replace them with "self".
@@ -711,7 +712,7 @@ BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
}
uint16_t BT::MachineEvaluator::getPhysRegBitWidth(unsigned Reg) const {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Register::isPhysicalRegister(Reg));
const TargetRegisterClass &PC = *TRI.getMinimalPhysRegClass(Reg);
return TRI.getRegSizeInBits(PC);
}
@@ -874,7 +875,7 @@ void BT::visitNonBranch(const MachineInstr &MI) {
continue;
RegisterRef RD(MO);
assert(RD.Sub == 0 && "Unexpected sub-register in definition");
- if (!TargetRegisterInfo::isVirtualRegister(RD.Reg))
+ if (!Register::isVirtualRegister(RD.Reg))
continue;
bool Changed = false;
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index b07d15609ede..3d771d388e28 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -130,7 +130,7 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!MO.isReg())
return true;
- unsigned RegNumber = MO.getReg();
+ Register RegNumber = MO.getReg();
// This should be an assert in the frontend.
if (Hexagon::DoubleRegsRegClass.contains(RegNumber))
RegNumber = TRI->getSubReg(RegNumber, ExtraCode[0] == 'L' ?
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 7b75d251ccd3..3068fb6f9629 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -147,11 +147,11 @@ namespace {
}
static inline unsigned v2x(unsigned v) {
- return TargetRegisterInfo::virtReg2Index(v);
+ return Register::virtReg2Index(v);
}
static inline unsigned x2v(unsigned x) {
- return TargetRegisterInfo::index2VirtReg(x);
+ return Register::index2VirtReg(x);
}
};
@@ -290,8 +290,8 @@ void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
for (auto &Op : MI.operands()) {
if (!Op.isReg() || !Op.isDef())
continue;
- unsigned R = Op.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = Op.getReg();
+ if (!Register::isVirtualRegister(R))
continue;
Defs.insert(R);
}
@@ -302,8 +302,8 @@ void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
for (auto &Op : MI.operands()) {
if (!Op.isReg() || !Op.isUse())
continue;
- unsigned R = Op.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = Op.getReg();
+ if (!Register::isVirtualRegister(R))
continue;
Uses.insert(R);
}
@@ -353,8 +353,7 @@ bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
MachineRegisterInfo &MRI) {
- if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
- !TargetRegisterInfo::isVirtualRegister(NewR))
+ if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR))
return false;
auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
decltype(End) NextI;
@@ -367,8 +366,7 @@ bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
unsigned NewSR, MachineRegisterInfo &MRI) {
- if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
- !TargetRegisterInfo::isVirtualRegister(NewR))
+ if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR))
return false;
if (hasTiedUse(OldR, MRI, NewSR))
return false;
@@ -384,8 +382,7 @@ bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
- if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
- !TargetRegisterInfo::isVirtualRegister(NewR))
+ if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR))
return false;
if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR))
return false;
@@ -896,7 +893,7 @@ bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
// register class.
const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
- if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ if (!Register::isVirtualRegister(RR.Reg))
return nullptr;
auto *RC = MRI.getRegClass(RR.Reg);
if (RR.Sub == 0)
@@ -927,8 +924,8 @@ const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
// with a 32-bit register.
bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
- if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) ||
- !TargetRegisterInfo::isVirtualRegister(RS.Reg))
+ if (!Register::isVirtualRegister(RD.Reg) ||
+ !Register::isVirtualRegister(RS.Reg))
return false;
// Return false if one (or both) classes are nullptr.
auto *DRC = getFinalVRegClass(RD, MRI);
@@ -979,7 +976,7 @@ bool DeadCodeElimination::isDead(unsigned R) const {
continue;
if (UseI->isPHI()) {
assert(!UseI->getOperand(0).getSubReg());
- unsigned DR = UseI->getOperand(0).getReg();
+ Register DR = UseI->getOperand(0).getReg();
if (DR == R)
continue;
}
@@ -1018,8 +1015,8 @@ bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
for (auto &Op : MI->operands()) {
if (!Op.isReg() || !Op.isDef())
continue;
- unsigned R = Op.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) {
+ Register R = Op.getReg();
+ if (!Register::isVirtualRegister(R) || !isDead(R)) {
AllDead = false;
break;
}
@@ -1220,8 +1217,8 @@ bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
return false;
MachineInstr &UseI = *I->getParent();
if (UseI.isPHI() || UseI.isCopy()) {
- unsigned DefR = UseI.getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DefR))
+ Register DefR = UseI.getOperand(0).getReg();
+ if (!Register::isVirtualRegister(DefR))
return false;
Pending.push_back(DefR);
} else {
@@ -1345,7 +1342,7 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
// If found, replace the instruction with a COPY.
const DebugLoc &DL = MI->getDebugLoc();
const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
- unsigned NewR = MRI.createVirtualRegister(FRC);
+ Register NewR = MRI.createVirtualRegister(FRC);
MachineInstr *CopyI =
BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
.addReg(RS.Reg, 0, RS.Sub);
@@ -1412,7 +1409,7 @@ bool ConstGeneration::isTfrConst(const MachineInstr &MI) {
// register class and the actual value being transferred.
unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) {
- unsigned Reg = MRI.createVirtualRegister(RC);
+ Register Reg = MRI.createVirtualRegister(RC);
if (RC == &Hexagon::IntRegsRegClass) {
BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
.addImm(int32_t(C));
@@ -1470,7 +1467,7 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
if (Defs.count() != 1)
continue;
unsigned DR = Defs.find_first();
- if (!TargetRegisterInfo::isVirtualRegister(DR))
+ if (!Register::isVirtualRegister(DR))
continue;
uint64_t U;
const BitTracker::RegisterCell &DRC = BT.lookup(DR);
@@ -1609,7 +1606,7 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B,
auto *FRC = HBS::getFinalVRegClass(R, MRI);
if (findMatch(R, MR, AVB)) {
- unsigned NewR = MRI.createVirtualRegister(FRC);
+ Register NewR = MRI.createVirtualRegister(FRC);
BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
.addReg(MR.Reg, 0, MR.Sub);
BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
@@ -1628,7 +1625,7 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B,
BitTracker::RegisterRef ML, MH;
if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {
auto *FRC = HBS::getFinalVRegClass(R, MRI);
- unsigned NewR = MRI.createVirtualRegister(FRC);
+ Register NewR = MRI.createVirtualRegister(FRC);
BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
.addReg(ML.Reg, 0, ML.Sub)
.addImm(SubLo)
@@ -1819,7 +1816,7 @@ bool BitSimplification::matchHalf(unsigned SelfR,
if (Reg == 0 || Reg == SelfR) // Don't match "self".
return false;
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return false;
if (!BT.has(Reg))
return false;
@@ -2025,7 +2022,7 @@ bool BitSimplification::genPackhl(MachineInstr *MI,
return false;
MachineBasicBlock &B = *MI->getParent();
- unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+ Register NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
DebugLoc DL = MI->getDebugLoc();
auto At = MI->isPHI() ? B.getFirstNonPHI()
: MachineBasicBlock::iterator(MI);
@@ -2097,7 +2094,7 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI,
MachineBasicBlock &B = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
- unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
auto At = MI->isPHI() ? B.getFirstNonPHI()
: MachineBasicBlock::iterator(MI);
BuildMI(B, At, DL, HII.get(COpc), NewR)
@@ -2154,7 +2151,7 @@ bool BitSimplification::genExtractLow(MachineInstr *MI,
if (!validateReg(RS, NewOpc, 1))
continue;
- unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
auto At = MI->isPHI() ? B.getFirstNonPHI()
: MachineBasicBlock::iterator(MI);
auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)
@@ -2368,7 +2365,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
return true;
}
} else if (V.is(0) || V.is(1)) {
- unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true;
BuildMI(B, At, DL, HII.get(NewOpc), NewR);
HBS::replaceReg(RD.Reg, NewR, MRI);
@@ -2541,7 +2538,7 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
DebugLoc DL = MI->getDebugLoc();
MachineBasicBlock &B = *MI->getParent();
- unsigned NewR = MRI.createVirtualRegister(FRC);
+ Register NewR = MRI.createVirtualRegister(FRC);
auto At = MI->isPHI() ? B.getFirstNonPHI()
: MachineBasicBlock::iterator(MI);
auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR)
@@ -2612,8 +2609,8 @@ bool BitSimplification::simplifyRCmp0(MachineInstr *MI,
KnownNZ = true;
}
- auto ReplaceWithConst = [&] (int C) {
- unsigned NewR = MRI.createVirtualRegister(FRC);
+ auto ReplaceWithConst = [&](int C) {
+ Register NewR = MRI.createVirtualRegister(FRC);
BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR)
.addImm(C);
HBS::replaceReg(RD.Reg, NewR, MRI);
@@ -2678,7 +2675,7 @@ bool BitSimplification::simplifyRCmp0(MachineInstr *MI,
// replace the comparison with a C2_muxii, using the same predicate
// register, but with operands substituted with 0/1 accordingly.
if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {
- unsigned NewR = MRI.createVirtualRegister(FRC);
+ Register NewR = MRI.createVirtualRegister(FRC);
BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR)
.addReg(InpDef->getOperand(1).getReg())
.addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))
@@ -3071,7 +3068,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
DenseMap<unsigned,unsigned> RegMap;
const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
- unsigned PhiR = MRI->createVirtualRegister(PhiRC);
+ Register PhiR = MRI->createVirtualRegister(PhiRC);
BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
.addReg(NewPredR)
.addMBB(&PB)
@@ -3083,7 +3080,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
const MachineInstr *SI = G.Ins[i-1];
unsigned DR = getDefReg(SI);
const TargetRegisterClass *RC = MRI->getRegClass(DR);
- unsigned NewDR = MRI->createVirtualRegister(RC);
+ Register NewDR = MRI->createVirtualRegister(RC);
DebugLoc DL = SI->getDebugLoc();
auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
@@ -3162,7 +3159,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
if (Defs.count() != 1)
continue;
unsigned DefR = Defs.find_first();
- if (!TargetRegisterInfo::isVirtualRegister(DefR))
+ if (!Register::isVirtualRegister(DefR))
continue;
if (!isBitShuffle(&*I, DefR))
continue;
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index ba50faac2cf9..ebd060ce503e 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -111,7 +111,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
}
uint16_t HexagonEvaluator::getPhysRegBitWidth(unsigned Reg) const {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Register::isPhysicalRegister(Reg));
using namespace Hexagon;
const auto &HST = MF.getSubtarget<HexagonSubtarget>();
@@ -1042,8 +1042,8 @@ unsigned HexagonEvaluator::getUniqueDefVReg(const MachineInstr &MI) const {
for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || !Op.isDef())
continue;
- unsigned R = Op.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = Op.getReg();
+ if (!Register::isVirtualRegister(R))
continue;
if (DefReg != 0)
return 0;
@@ -1220,7 +1220,7 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI,
RegisterRef RD = MI.getOperand(0);
RegisterRef RS = MI.getOperand(1);
assert(RD.Sub == 0);
- if (!TargetRegisterInfo::isPhysicalRegister(RS.Reg))
+ if (!Register::isPhysicalRegister(RS.Reg))
return false;
RegExtMap::const_iterator F = VRX.find(RD.Reg);
if (F == VRX.end())
diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp
index 999150fc8c6e..d1d1b8ee7d41 100644
--- a/lib/Target/Hexagon/HexagonBlockRanges.cpp
+++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -268,14 +268,14 @@ HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs(
return SRs;
}
- if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) {
+ if (Register::isPhysicalRegister(R.Reg)) {
MCSubRegIterator I(R.Reg, &TRI);
if (!I.isValid())
SRs.insert({R.Reg, 0});
for (; I.isValid(); ++I)
SRs.insert({*I, 0});
} else {
- assert(TargetRegisterInfo::isVirtualRegister(R.Reg));
+ assert(Register::isVirtualRegister(R.Reg));
auto &RC = *MRI.getRegClass(R.Reg);
unsigned PReg = *RC.begin();
MCSubRegIndexIterator I(PReg, &TRI);
@@ -321,7 +321,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
if (!Op.isReg() || !Op.isUse() || Op.isUndef())
continue;
RegisterRef R = { Op.getReg(), Op.getSubReg() };
- if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg])
+ if (Register::isPhysicalRegister(R.Reg) && Reserved[R.Reg])
continue;
bool IsKill = Op.isKill();
for (auto S : expandToSubRegs(R, MRI, TRI)) {
@@ -338,7 +338,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
continue;
RegisterRef R = { Op.getReg(), Op.getSubReg() };
for (auto S : expandToSubRegs(R, MRI, TRI)) {
- if (TargetRegisterInfo::isPhysicalRegister(S.Reg) && Reserved[S.Reg])
+ if (Register::isPhysicalRegister(S.Reg) && Reserved[S.Reg])
continue;
if (Op.isDead())
Clobbers.insert(S);
@@ -374,7 +374,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
// Update maps for defs.
for (RegisterRef S : Defs) {
// Defs should already be expanded into subregs.
- assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) ||
+ assert(!Register::isPhysicalRegister(S.Reg) ||
!MCSubRegIterator(S.Reg, &TRI, false).isValid());
if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None)
closeRange(S);
@@ -383,7 +383,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
// Update maps for clobbers.
for (RegisterRef S : Clobbers) {
// Clobbers should already be expanded into subregs.
- assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) ||
+ assert(!Register::isPhysicalRegister(S.Reg) ||
!MCSubRegIterator(S.Reg, &TRI, false).isValid());
if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None)
closeRange(S);
@@ -482,7 +482,7 @@ HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap(
}
}
for (auto &P : LiveMap)
- if (TargetRegisterInfo::isVirtualRegister(P.first.Reg))
+ if (Register::isVirtualRegister(P.first.Reg))
addDeadRanges(P.first);
LLVM_DEBUG(dbgs() << __func__ << ": dead map\n"
diff --git a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
index ee93739b2c7b..08f740806879 100644
--- a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
+++ b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
@@ -105,12 +105,11 @@ void HexagonBranchRelaxation::computeOffset(MachineFunction &MF,
// offset of the current instruction from the start.
unsigned InstOffset = 0;
for (auto &B : MF) {
- if (B.getAlignment()) {
+ if (B.getAlignment() != Align::None()) {
// Although we don't know the exact layout of the final code, we need
// to account for alignment padding somehow. This heuristic pads each
// aligned basic block according to the alignment value.
- int ByteAlign = (1u << B.getAlignment()) - 1;
- InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign);
+ InstOffset = alignTo(InstOffset, B.getAlignment());
}
OffsetMap[&B] = InstOffset;
for (auto &MI : B.instrs()) {
diff --git a/lib/Target/Hexagon/HexagonConstExtenders.cpp b/lib/Target/Hexagon/HexagonConstExtenders.cpp
index cfed0ecef272..ddc9b847ef1c 100644
--- a/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -14,9 +14,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Pass.h"
#include <map>
#include <set>
#include <utility>
@@ -235,24 +236,24 @@ namespace {
Reg = Op.getReg();
Sub = Op.getSubReg();
} else if (Op.isFI()) {
- Reg = TargetRegisterInfo::index2StackSlot(Op.getIndex());
+ Reg = llvm::Register::index2StackSlot(Op.getIndex());
}
return *this;
}
bool isVReg() const {
- return Reg != 0 && !TargetRegisterInfo::isStackSlot(Reg) &&
- TargetRegisterInfo::isVirtualRegister(Reg);
+ return Reg != 0 && !llvm::Register::isStackSlot(Reg) &&
+ llvm::Register::isVirtualRegister(Reg);
}
bool isSlot() const {
- return Reg != 0 && TargetRegisterInfo::isStackSlot(Reg);
+ return Reg != 0 && llvm::Register::isStackSlot(Reg);
}
operator MachineOperand() const {
if (isVReg())
return MachineOperand::CreateReg(Reg, /*Def*/false, /*Imp*/false,
/*Kill*/false, /*Dead*/false, /*Undef*/false,
/*EarlyClobber*/false, Sub);
- if (TargetRegisterInfo::isStackSlot(Reg)) {
- int FI = TargetRegisterInfo::stackSlot2Index(Reg);
+ if (llvm::Register::isStackSlot(Reg)) {
+ int FI = llvm::Register::stackSlot2Index(Reg);
return MachineOperand::CreateFI(FI);
}
llvm_unreachable("Cannot create MachineOperand");
@@ -1524,7 +1525,7 @@ void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
}
HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) {
- unsigned DefR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ llvm::Register DefR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
MachineBasicBlock &MBB = *DefL.Block;
MachineBasicBlock::iterator At = DefL.At;
DebugLoc dl = DefL.Block->findDebugLoc(DefL.At);
diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp
index d1fde5da5fe8..a82501cabb9b 100644
--- a/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -208,14 +208,14 @@ namespace {
bool has(unsigned R) const {
// All non-virtual registers are considered "bottom".
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ if (!Register::isVirtualRegister(R))
return true;
MapType::const_iterator F = Map.find(R);
return F != Map.end();
}
const LatticeCell &get(unsigned R) const {
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ if (!Register::isVirtualRegister(R))
return Bottom;
MapType::const_iterator F = Map.find(R);
if (F != Map.end())
@@ -623,7 +623,7 @@ void MachineConstPropagator::visitPHI(const MachineInstr &PN) {
const MachineOperand &MD = PN.getOperand(0);
RegisterSubReg DefR(MD);
- assert(TargetRegisterInfo::isVirtualRegister(DefR.Reg));
+ assert(Register::isVirtualRegister(DefR.Reg));
bool Changed = false;
@@ -652,7 +652,7 @@ Bottomize:
RegisterSubReg UseR(SO);
// If the input is not a virtual register, we don't really know what
// value it holds.
- if (!TargetRegisterInfo::isVirtualRegister(UseR.Reg))
+ if (!Register::isVirtualRegister(UseR.Reg))
goto Bottomize;
// If there is no cell for an input register, it means top.
if (!Cells.has(UseR.Reg))
@@ -694,7 +694,7 @@ void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) {
continue;
RegisterSubReg DefR(MO);
// Only track virtual registers.
- if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg))
+ if (!Register::isVirtualRegister(DefR.Reg))
continue;
bool Changed = false;
// If the evaluation failed, set cells for all output registers to bottom.
@@ -1070,7 +1070,7 @@ bool MachineConstPropagator::run(MachineFunction &MF) {
bool MachineConstEvaluator::getCell(const RegisterSubReg &R, const CellMap &Inputs,
LatticeCell &RC) {
- if (!TargetRegisterInfo::isVirtualRegister(R.Reg))
+ if (!Register::isVirtualRegister(R.Reg))
return false;
const LatticeCell &L = Inputs.get(R.Reg);
if (!R.SubReg) {
@@ -1926,7 +1926,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
unsigned Opc = MI.getOpcode();
RegisterSubReg DefR(MD);
assert(!DefR.SubReg);
- if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg))
+ if (!Register::isVirtualRegister(DefR.Reg))
return false;
if (MI.isCopy()) {
@@ -2793,7 +2793,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
if (!MO.isReg() || !MO.isUse() || MO.isImplicit())
continue;
RegisterSubReg R(MO);
- if (!TargetRegisterInfo::isVirtualRegister(R.Reg))
+ if (!Register::isVirtualRegister(R.Reg))
continue;
HasUse = true;
// PHIs can legitimately have "top" cells after propagation.
@@ -2813,7 +2813,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse() || MO.isImplicit())
continue;
- unsigned R = MO.getReg();
+ Register R = MO.getReg();
dbgs() << printReg(R, &TRI) << ": " << Inputs.get(R) << "\n";
}
}
@@ -2831,8 +2831,8 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned R = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = MO.getReg();
+ if (!Register::isVirtualRegister(R))
continue;
assert(!MO.getSubReg());
assert(Inputs.has(R));
@@ -2871,7 +2871,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
const MCInstrDesc *NewD = (Ps & P::Zero) ?
&HII.get(Hexagon::PS_false) :
&HII.get(Hexagon::PS_true);
- unsigned NewR = MRI->createVirtualRegister(PredRC);
+ Register NewR = MRI->createVirtualRegister(PredRC);
const MachineInstrBuilder &MIB = BuildMI(B, At, DL, *NewD, NewR);
(void)MIB;
#ifndef NDEBUG
@@ -2893,7 +2893,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
NewRC = &Hexagon::IntRegsRegClass;
else
NewRC = &Hexagon::DoubleRegsRegClass;
- unsigned NewR = MRI->createVirtualRegister(NewRC);
+ Register NewR = MRI->createVirtualRegister(NewRC);
const MachineInstr *NewMI;
if (W == 32) {
@@ -3009,7 +3009,7 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
if (V < 0)
V = -V;
const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg);
- unsigned NewR = MRI->createVirtualRegister(RC);
+ Register NewR = MRI->createVirtualRegister(RC);
const MachineOperand &Src1 = MI.getOperand(1);
NewMI = BuildMI(B, At, DL, D, NewR)
.addReg(Src1.getReg(), getRegState(Src1), Src1.getSubReg())
@@ -3111,8 +3111,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
void HexagonConstEvaluator::replaceAllRegUsesWith(unsigned FromReg,
unsigned ToReg) {
- assert(TargetRegisterInfo::isVirtualRegister(FromReg));
- assert(TargetRegisterInfo::isVirtualRegister(ToReg));
+ assert(Register::isVirtualRegister(FromReg));
+ assert(Register::isVirtualRegister(ToReg));
for (auto I = MRI->use_begin(FromReg), E = MRI->use_end(); I != E;) {
MachineOperand &O = *I;
++I;
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index a09ccab483cf..394a329ac447 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -133,8 +133,8 @@ static bool isCombinableInstType(MachineInstr &MI, const HexagonInstrInfo *TII,
const MachineOperand &Op1 = MI.getOperand(1);
assert(Op0.isReg() && Op1.isReg());
- unsigned DestReg = Op0.getReg();
- unsigned SrcReg = Op1.getReg();
+ Register DestReg = Op0.getReg();
+ Register SrcReg = Op1.getReg();
return Hexagon::IntRegsRegClass.contains(DestReg) &&
Hexagon::IntRegsRegClass.contains(SrcReg);
}
@@ -146,7 +146,7 @@ static bool isCombinableInstType(MachineInstr &MI, const HexagonInstrInfo *TII,
const MachineOperand &Op1 = MI.getOperand(1);
assert(Op0.isReg());
- unsigned DestReg = Op0.getReg();
+ Register DestReg = Op0.getReg();
// Ensure that TargetFlags are MO_NO_FLAG for a global. This is a
// workaround for an ABI bug that prevents GOT relocations on combine
// instructions
@@ -226,7 +226,7 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI,
}
static bool isEvenReg(unsigned Reg) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Register::isPhysicalRegister(Reg));
if (Hexagon::IntRegsRegClass.contains(Reg))
return (Reg - Hexagon::R0) % 2 == 0;
if (Hexagon::HvxVRRegClass.contains(Reg))
@@ -265,7 +265,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1,
unsigned I1DestReg,
unsigned I2DestReg,
bool &DoInsertAtI1) {
- unsigned I2UseReg = UseReg(I2.getOperand(1));
+ Register I2UseReg = UseReg(I2.getOperand(1));
// It is not safe to move I1 and I2 into one combine if I2 has a true
// dependence on I1.
@@ -332,7 +332,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1,
// At O3 we got better results (dhrystone) by being more conservative here.
if (!ShouldCombineAggressively)
End = std::next(MachineBasicBlock::iterator(I2));
- unsigned I1UseReg = UseReg(I1.getOperand(1));
+ Register I1UseReg = UseReg(I1.getOperand(1));
// Track killed operands. If we move across an instruction that kills our
// operand, we need to update the kill information on the moved I1. It kills
// the operand now.
@@ -410,7 +410,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
continue;
// Look for the defining instruction.
- unsigned Reg = Op.getReg();
+ Register Reg = Op.getReg();
MachineInstr *DefInst = LastDef[Reg];
if (!DefInst)
continue;
@@ -442,7 +442,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
if (Op.isReg()) {
if (!Op.isDef() || !Op.getReg())
continue;
- unsigned Reg = Op.getReg();
+ Register Reg = Op.getReg();
if (Hexagon::DoubleRegsRegClass.contains(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
LastDef[*SubRegs] = &MI;
@@ -528,7 +528,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1,
while (I2 != I1.getParent()->end() && I2->isDebugInstr())
++I2;
- unsigned I1DestReg = I1.getOperand(0).getReg();
+ Register I1DestReg = I1.getOperand(0).getReg();
for (MachineBasicBlock::iterator End = I1.getParent()->end(); I2 != End;
++I2) {
@@ -544,7 +544,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1,
if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(&*I2))
continue;
- unsigned I2DestReg = I2->getOperand(0).getReg();
+ Register I2DestReg = I2->getOperand(0).getReg();
// Check that registers are adjacent and that the first destination register
// is even.
@@ -579,8 +579,8 @@ void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2,
++MI;
// Figure out whether I1 or I2 goes into the lowreg part.
- unsigned I1DestReg = I1.getOperand(0).getReg();
- unsigned I2DestReg = I2.getOperand(0).getReg();
+ Register I1DestReg = I1.getOperand(0).getReg();
+ Register I2DestReg = I2.getOperand(0).getReg();
bool IsI1Loreg = (I2DestReg - I1DestReg) == 1;
unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg;
unsigned SubLo;
@@ -758,7 +758,7 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt,
unsigned DoubleDestReg,
MachineOperand &HiOperand,
MachineOperand &LoOperand) {
- unsigned LoReg = LoOperand.getReg();
+ Register LoReg = LoOperand.getReg();
unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill());
DebugLoc DL = InsertPt->getDebugLoc();
@@ -807,7 +807,7 @@ void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt,
MachineOperand &HiOperand,
MachineOperand &LoOperand) {
unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill());
- unsigned HiReg = HiOperand.getReg();
+ Register HiReg = HiOperand.getReg();
DebugLoc DL = InsertPt->getDebugLoc();
MachineBasicBlock *BB = InsertPt->getParent();
@@ -857,8 +857,8 @@ void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt,
MachineOperand &LoOperand) {
unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill());
unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill());
- unsigned LoReg = LoOperand.getReg();
- unsigned HiReg = HiOperand.getReg();
+ Register LoReg = LoOperand.getReg();
+ Register HiReg = HiOperand.getReg();
DebugLoc DL = InsertPt->getDebugLoc();
MachineBasicBlock *BB = InsertPt->getParent();
diff --git a/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index 2ce1419e4790..e4a2ba0ec29c 100644
--- a/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -37,12 +37,12 @@ def: Pat<(int_hexagon_F2_sfmax IntRegs:$src1, IntRegs:$src2),
(F2_sfmax IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vabswsat DoubleRegs:$src1),
(A2_vabswsat DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2),
- (S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
- (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred:$src2),
- (A4_combineri IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
+ (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -75,8 +75,8 @@ def: Pat<(int_hexagon_A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_maxup DoubleRegs:$src1, DoubleRegs:$src2),
(A2_maxup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2),
- (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
+ (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_interleave DoubleRegs:$src1),
(S2_interleave DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2),
@@ -89,10 +89,10 @@ def: Pat<(int_hexagon_C2_cmpgtu IntRegs:$src1, IntRegs:$src2),
(C2_cmpgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2),
(C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2),
- (A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2),
- (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2),
+ (A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyi IntRegs:$src1, IntRegs:$src2),
(M2_mpyi IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_df2uw_chop DoubleRegs:$src1),
@@ -103,12 +103,12 @@ def: Pat<(int_hexagon_M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
- (S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3),
+ (S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S4_ntstbit_r IntRegs:$src1, IntRegs:$src2),
(S4_ntstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_w2sf IntRegs:$src1),
@@ -125,10 +125,10 @@ def: Pat<(int_hexagon_A4_cmpbgt IntRegs:$src1, IntRegs:$src2),
(A4_cmpbgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2),
- (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_orp DoubleRegs:$src1, DoubleRegs:$src2),
@@ -137,28 +137,28 @@ def: Pat<(int_hexagon_M2_mpyu_up IntRegs:$src1, IntRegs:$src2),
(M2_mpyu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
- (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
- (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
+ (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_cmpbgtu IntRegs:$src1, IntRegs:$src2),
(A4_cmpbgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2),
(A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2),
- (A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2),
+ (A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_addsp IntRegs:$src1, DoubleRegs:$src2),
(A2_addsp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2),
(S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2),
- (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
+ (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2),
(S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_pmpyw IntRegs:$src1, IntRegs:$src2),
@@ -177,10 +177,10 @@ def: Pat<(int_hexagon_A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
- (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2),
- (S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
+ (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -199,10 +199,10 @@ def: Pat<(int_hexagon_M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
(M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C2_bitsset IntRegs:$src1, IntRegs:$src2),
(C2_bitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2),
- (M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2),
- (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred_timm:$src2),
+ (M2_mpysip IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred_timm:$src2),
+ (M2_mpysin IntRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_boundscheck IntRegs:$src1, DoubleRegs:$src2),
(A4_boundscheck IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2),
@@ -225,10 +225,10 @@ def: Pat<(int_hexagon_F2_conv_ud2df DoubleRegs:$src1),
(F2_conv_ud2df DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vzxthw IntRegs:$src1),
(S2_vzxthw IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sfadd IntRegs:$src1, IntRegs:$src2),
@@ -241,12 +241,12 @@ def: Pat<(int_hexagon_M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$sr
(M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2),
(M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4),
- (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4),
+ (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_packhl IntRegs:$src1, IntRegs:$src2),
(S2_packhl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2),
- (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
+ (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -259,8 +259,8 @@ def: Pat<(int_hexagon_M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_d2df DoubleRegs:$src1),
(F2_conv_d2df DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2),
- (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2),
+ (C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vconj DoubleRegs:$src1),
(A2_vconj DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2),
@@ -279,8 +279,8 @@ def: Pat<(int_hexagon_C2_any8 PredRegs:$src1),
(C2_any8 PredRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_togglebit_r IntRegs:$src1, IntRegs:$src2),
(S2_togglebit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2),
- (S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_uw2sf IntRegs:$src1),
(F2_conv_uw2sf IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vsathb_nopack DoubleRegs:$src1),
@@ -303,10 +303,10 @@ def: Pat<(int_hexagon_C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)
(C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
- (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2),
+ (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
(M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
@@ -323,34 +323,34 @@ def: Pat<(int_hexagon_M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleR
(M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2),
(M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred:$src1),
- (A2_tfrsi s32_0ImmPred:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred_timm:$src1),
+ (A2_tfrsi s32_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_svnavgh IntRegs:$src1, IntRegs:$src2),
(A2_svnavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2),
- (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
- (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2),
+ (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_svavgh IntRegs:$src1, IntRegs:$src2),
(A2_svavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
(M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
(M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
- (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
+ (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_combine_hl IntRegs:$src1, IntRegs:$src2),
(A2_combine_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_up IntRegs:$src1, IntRegs:$src2),
@@ -381,10 +381,10 @@ def: Pat<(int_hexagon_M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3
(M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
- (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2),
@@ -453,8 +453,8 @@ def: Pat<(int_hexagon_M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2),
- (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_sat DoubleRegs:$src1),
@@ -469,8 +469,8 @@ def: Pat<(int_hexagon_A2_svavghs IntRegs:$src1, IntRegs:$src2),
(A2_svavghs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
(A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2),
- (C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2),
+ (C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
(A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
@@ -535,10 +535,10 @@ def: Pat<(int_hexagon_C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3
(C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_parityp DoubleRegs:$src1, DoubleRegs:$src2),
(S2_parityp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -557,30 +557,30 @@ def: Pat<(int_hexagon_M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src
(M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3),
- (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3),
+ (S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_tstbit_r IntRegs:$src1, IntRegs:$src2),
(S2_tstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3),
- (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3),
+ (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
(M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
(M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2),
- (S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2),
(S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
- (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
@@ -605,14 +605,14 @@ def: Pat<(int_hexagon_F2_conv_w2df IntRegs:$src1),
(F2_conv_w2df IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
(A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2),
- (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vcnegh DoubleRegs:$src1, IntRegs:$src2),
(S2_vcnegh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2),
- (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
+ (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2),
@@ -633,8 +633,8 @@ def: Pat<(int_hexagon_S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3
(S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_cl0p DoubleRegs:$src1),
(S2_cl0p DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3),
- (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3),
+ (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sffixupd IntRegs:$src1, IntRegs:$src2),
(F2_sffixupd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
@@ -653,8 +653,8 @@ def: Pat<(int_hexagon_M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs
(M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2),
- (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sffixupr IntRegs:$src1),
(F2_sffixupr IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
@@ -669,32 +669,32 @@ def: Pat<(int_hexagon_C2_andn PredRegs:$src1, PredRegs:$src2),
(C2_andn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2),
(M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
- (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2),
- (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2),
- (A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_max IntRegs:$src1, IntRegs:$src2),
(A2_max IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_round_rr IntRegs:$src1, IntRegs:$src2),
(A4_round_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2),
- (A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_combineir s32_0ImmPred:$src1, IntRegs:$src2),
- (A4_combineir s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineii s8_0ImmPred_timm:$src1, u32_0ImmPred_timm:$src2),
+ (A4_combineii s8_0ImmPred_timm:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2),
+ (A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
(C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -703,8 +703,8 @@ def: Pat<(int_hexagon_A4_rcmpeq IntRegs:$src1, IntRegs:$src2),
(A4_rcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2),
(M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vzxtbh IntRegs:$src1),
(S2_vzxtbh IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
@@ -721,8 +721,8 @@ def: Pat<(int_hexagon_M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2),
(M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C4_nbitsset IntRegs:$src1, IntRegs:$src2),
(C4_nbitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -745,10 +745,10 @@ def: Pat<(int_hexagon_M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs
(M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred:$src1),
- (F2_sfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred:$src1),
- (F2_sfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred_timm:$src1),
+ (F2_sfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred_timm:$src1),
+ (F2_sfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2),
(M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
@@ -759,14 +759,14 @@ def: Pat<(int_hexagon_F2_conv_d2sf DoubleRegs:$src1),
(F2_conv_d2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2),
- (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2),
+ (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sfcmpuo IntRegs:$src1, IntRegs:$src2),
(F2_sfcmpuo IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vsatwh_nopack DoubleRegs:$src1),
(S2_vsatwh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2),
@@ -783,8 +783,8 @@ def: Pat<(int_hexagon_M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_minp DoubleRegs:$src1, DoubleRegs:$src2),
(A2_minp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
- (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
@@ -817,16 +817,16 @@ def: Pat<(int_hexagon_S4_extract_rp IntRegs:$src1, DoubleRegs:$src2),
(S4_extract_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2),
- (C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2),
+ (C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_tfrcpp CtrRegs64:$src1),
(A4_tfrcpp CtrRegs64:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2),
- (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2),
- (A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
@@ -837,8 +837,8 @@ def: Pat<(int_hexagon_S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRe
(S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2),
(S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2),
@@ -851,14 +851,14 @@ def: Pat<(int_hexagon_S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs
(S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_satb IntRegs:$src1),
(A2_satb IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4),
- (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4),
+ (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2),
(S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2),
@@ -925,8 +925,8 @@ def: Pat<(int_hexagon_M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2),
(M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2),
(M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3),
- (C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3),
+ (C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
(M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
@@ -937,8 +937,8 @@ def: Pat<(int_hexagon_M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -947,8 +947,8 @@ def: Pat<(int_hexagon_S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs
(S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -957,16 +957,16 @@ def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs
(M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2),
(C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3),
- (M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
- (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3),
+ (M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3),
+ (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_tfrcrr CtrRegs:$src1),
(A2_tfrcrr CtrRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
- (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3),
+ (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C2_orn PredRegs:$src1, PredRegs:$src2),
(C2_orn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -1005,8 +1005,8 @@ def: Pat<(int_hexagon_M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntR
(M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2),
(F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
- (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2),
(A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2),
@@ -1017,10 +1017,10 @@ def: Pat<(int_hexagon_A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vsxthw IntRegs:$src1),
(S2_vsxthw IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C2_cmpgt IntRegs:$src1, IntRegs:$src2),
@@ -1035,22 +1035,22 @@ def: Pat<(int_hexagon_F2_conv_sf2w IntRegs:$src1),
(F2_conv_sf2w IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2),
- (F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3),
- (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3),
+ (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
(M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred:$src2),
- (A2_addi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_addp DoubleRegs:$src1, DoubleRegs:$src2),
(A2_addp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2),
@@ -1063,8 +1063,8 @@ def: Pat<(int_hexagon_M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2),
(S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
@@ -1131,12 +1131,12 @@ def: Pat<(int_hexagon_C2_and PredRegs:$src1, PredRegs:$src2),
(C2_and PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S5_popcountp DoubleRegs:$src1),
(S5_popcountp DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
- (S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3),
+ (S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_cl0 IntRegs:$src1),
(S2_cl0 IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2),
- (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
+ (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
(M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
@@ -1167,8 +1167,8 @@ def: Pat<(int_hexagon_M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2),
- (A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2),
@@ -1185,26 +1185,26 @@ def: Pat<(int_hexagon_F2_conv_sf2d IntRegs:$src1),
(F2_conv_sf2d IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred:$src1),
- (F2_dfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred_timm:$src1),
+ (F2_dfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_cmphgt IntRegs:$src1, IntRegs:$src2),
(A4_cmphgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred:$src1),
- (F2_dfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred_timm:$src1),
+ (F2_dfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
(M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3),
- (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3),
+ (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
(M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
(M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3),
- (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3),
+ (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -1215,20 +1215,20 @@ def: Pat<(int_hexagon_A2_maxu IntRegs:$src1, IntRegs:$src2),
(A2_maxu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_maxp DoubleRegs:$src1, DoubleRegs:$src2),
(A2_maxp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred:$src2),
- (A2_andir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sfrecipa IntRegs:$src1, IntRegs:$src2),
(F2_sfrecipa IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2),
- (A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2),
+ (A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_orn IntRegs:$src1, IntRegs:$src2),
(A4_orn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2),
- (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2),
+ (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2),
- (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2),
+ (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_r IntRegs:$src1, IntRegs:$src2),
(S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2),
@@ -1251,16 +1251,16 @@ def: Pat<(int_hexagon_A2_satub IntRegs:$src1),
(A2_satub IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2),
(M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
- (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2),
(C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2),
- (A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred:$src2),
- (A2_tfril IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
- (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2),
+ (A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2),
+ (A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3),
+ (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vtrunehb DoubleRegs:$src1),
(S2_vtrunehb DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vabsw DoubleRegs:$src1),
@@ -1269,14 +1269,14 @@ def: Pat<(int_hexagon_A2_vabsh DoubleRegs:$src1),
(A2_vabsh DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sfsub IntRegs:$src1, IntRegs:$src2),
(F2_sfsub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3),
- (C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
- (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3),
+ (C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_swiz IntRegs:$src1),
(A2_swiz IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2),
(M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2),
@@ -1295,44 +1295,44 @@ def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs
(M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
- (S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3),
+ (S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_ud2sf DoubleRegs:$src1),
(F2_conv_ud2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_tfr IntRegs:$src1),
(A2_tfr IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subri s32_0ImmPred:$src1, IntRegs:$src2),
- (A2_subri s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2),
+ (A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M5_vmpybuu IntRegs:$src1, IntRegs:$src2),
(M5_vmpybuu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
- (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_brev IntRegs:$src1),
(S2_brev IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2),
- (S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
- (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
+ (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
@@ -1343,8 +1343,8 @@ def: Pat<(int_hexagon_M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
- (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vrndpackwhs DoubleRegs:$src1),
(S2_vrndpackwhs DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2),
@@ -1357,24 +1357,24 @@ def: Pat<(int_hexagon_M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4),
- (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4),
+ (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_uw2df IntRegs:$src1),
(F2_conv_uw2df IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred:$src2),
- (A2_orir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_andp DoubleRegs:$src1, DoubleRegs:$src2),
(A2_andp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2),
(S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_min IntRegs:$src1, IntRegs:$src2),
(A2_min IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2),
- (M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2),
+ (M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
(M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -1397,10 +1397,10 @@ def: Pat<(int_hexagon_M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_df2w DoubleRegs:$src1),
(F2_conv_df2w DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2),
- (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
+ (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_df2d DoubleRegs:$src1),
(F2_conv_df2d DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
@@ -1423,8 +1423,8 @@ def: Pat<(int_hexagon_A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4),
(F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2),
- (F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_df2ud DoubleRegs:$src1),
(F2_conv_df2ud DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_df2uw DoubleRegs:$src1),
@@ -1433,16 +1433,16 @@ def: Pat<(int_hexagon_M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2),
(M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2),
(M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2),
- (C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C4_cmplteu IntRegs:$src1, IntRegs:$src2),
(C4_cmplteu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vsubb_map DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2),
(A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2),
- (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
@@ -1471,14 +1471,14 @@ def: Pat<(int_hexagon_M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2),
(M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2),
- (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
+ (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2),
(A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_svaddhs IntRegs:$src1, IntRegs:$src2),
(A2_svaddhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
@@ -1499,8 +1499,8 @@ def: Pat<(int_hexagon_M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_lsli s6_0ImmPred:$src1, IntRegs:$src2),
- (S4_lsli s6_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_lsli s6_0ImmPred_timm:$src1, IntRegs:$src2),
+ (S4_lsli s6_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2),
(S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2),
@@ -1529,8 +1529,8 @@ def: Pat<(int_hexagon_A4_cmpbeq IntRegs:$src1, IntRegs:$src2),
(A4_cmpbeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_negp DoubleRegs:$src1),
(A2_negp DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2),
- (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
(A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vsatwuh DoubleRegs:$src1),
@@ -1541,10 +1541,10 @@ def: Pat<(int_hexagon_S2_svsathb IntRegs:$src1),
(S2_svsathb IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2),
(C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2),
- (A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2),
- (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2),
+ (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_cround_rr IntRegs:$src1, IntRegs:$src2),
(A4_cround_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -1563,12 +1563,12 @@ def: Pat<(int_hexagon_A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vminub DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vminub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
- (S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3),
+ (S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_svsubh IntRegs:$src1, IntRegs:$src2),
(A2_svsubh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2),
- (S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2),
+ (S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vsxtbh IntRegs:$src1),
@@ -1589,16 +1589,16 @@ def: Pat<(int_hexagon_M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$sr
(M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2),
- (A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -1623,8 +1623,8 @@ def: Pat<(int_hexagon_M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntReg
(M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2),
- (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -1637,10 +1637,10 @@ def: Pat<(int_hexagon_M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRe
(M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2),
- (S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_andn IntRegs:$src1, IntRegs:$src2),
(A4_andn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
@@ -1655,8 +1655,8 @@ def: Pat<(int_hexagon_C2_bitsclr IntRegs:$src1, IntRegs:$src2),
(C2_bitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
- (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2),
+ (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_ornp DoubleRegs:$src1, DoubleRegs:$src2),
(A4_ornp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
@@ -1673,14 +1673,14 @@ def: Pat<(int_hexagon_M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2),
(M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2),
(M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2),
- (C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
- (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2),
+ (C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
+ (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
// V55 Scalar Instructions.
@@ -1689,30 +1689,30 @@ def: Pat<(int_hexagon_A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src
// V60 Scalar Instructions.
-def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
- (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2),
- (S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
- (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
- (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
+ (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
// V62 Scalar Instructions.
@@ -1744,8 +1744,8 @@ def: Pat<(int_hexagon_F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2),
(F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
def: Pat<(int_hexagon_M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV66]>;
-def: Pat<(int_hexagon_S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2),
- (S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_S2_mask u5_0ImmPred_timm:$src1, u5_0ImmPred_timm:$src2),
+ (S2_mask u5_0ImmPred_timm:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV66]>;
// V60 HVX Instructions.
@@ -1773,10 +1773,10 @@ def: Pat<(int_hexagon_V6_vaddh_dv HvxWR:$src1, HvxWR:$src2),
(V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vaddh_dv_128B HvxWR:$src1, HvxWR:$src2),
(V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
- (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
- (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vshufoh HvxVR:$src1, HvxVR:$src2),
(V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vshufoh_128B HvxVR:$src1, HvxVR:$src2),
@@ -1789,10 +1789,10 @@ def: Pat<(int_hexagon_V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2),
(V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vdmpyhsuisat_128B HvxWR:$src1, IntRegs:$src2),
(V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
- (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
- (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vnavgw HvxVR:$src1, HvxVR:$src2),
(V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vnavgw_128B HvxVR:$src1, HvxVR:$src2),
@@ -2369,10 +2369,10 @@ def: Pat<(int_hexagon_V6_vsubhsat HvxVR:$src1, HvxVR:$src2),
(V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsubhsat_128B HvxVR:$src1, HvxVR:$src2),
(V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
- (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
- (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vabsw HvxVR:$src1),
(V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vabsw_128B HvxVR:$src1),
@@ -2489,10 +2489,10 @@ def: Pat<(int_hexagon_V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpybv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
- (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
- (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
(V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
@@ -2677,10 +2677,10 @@ def: Pat<(int_hexagon_V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vaddbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
- (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
- (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vsatwh HvxVR:$src1, HvxVR:$src2),
(V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsatwh_128B HvxVR:$src1, HvxVR:$src2),
@@ -2721,10 +2721,10 @@ def: Pat<(int_hexagon_V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_veqh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
- (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
- (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vaddwsat HvxVR:$src1, HvxVR:$src2),
(V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vaddwsat_128B HvxVR:$src1, HvxVR:$src2),
@@ -2885,10 +2885,10 @@ def: Pat<(int_hexagon_V6_vsubh HvxVR:$src1, HvxVR:$src2),
(V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsubh_128B HvxVR:$src1, HvxVR:$src2),
(V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
- (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
- (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vminw HvxVR:$src1, HvxVR:$src2),
(V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vminw_128B HvxVR:$src1, HvxVR:$src2),
@@ -2929,10 +2929,10 @@ def: Pat<(int_hexagon_V6_vsubuhw HvxVR:$src1, HvxVR:$src2),
(V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsubuhw_128B HvxVR:$src1, HvxVR:$src2),
(V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
- (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
- (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vasrw HvxVR:$src1, IntRegs:$src2),
(V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vasrw_128B HvxVR:$src1, IntRegs:$src2),
@@ -3016,10 +3016,10 @@ def: Pat<(int_hexagon_V6_vlsrb HvxVR:$src1, IntRegs:$src2),
(V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vlsrb_128B HvxVR:$src1, IntRegs:$src2),
(V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
- (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
- (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2),
(V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vaddububb_sat_128B HvxVR:$src1, HvxVR:$src2),
@@ -3032,10 +3032,10 @@ def: Pat<(int_hexagon_V6_ldtp0 PredRegs:$src1, IntRegs:$src2),
(V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_ldtp0_128B PredRegs:$src1, IntRegs:$src2),
(V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
- (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
- (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4),
+ (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4),
+ (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2),
(V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsubuwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
@@ -3124,10 +3124,10 @@ def: Pat<(int_hexagon_V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$sr
(V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vasrwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
(V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
- (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
- (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vsubuwsat HvxVR:$src1, HvxVR:$src2),
(V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsubuwsat_128B HvxVR:$src1, HvxVR:$src2),
@@ -3188,10 +3188,10 @@ def: Pat<(int_hexagon_V6_ldcnp0 PredRegs:$src1, IntRegs:$src2),
(V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_ldcnp0_128B PredRegs:$src1, IntRegs:$src2),
(V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
- (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
- (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4),
+ (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4),
+ (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vsubbsat HvxVR:$src1, HvxVR:$src2),
(V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsubbsat_128B HvxVR:$src1, HvxVR:$src2),
diff --git a/lib/Target/Hexagon/HexagonDepOperands.td b/lib/Target/Hexagon/HexagonDepOperands.td
index fdba7b971258..8a94d96522cc 100644
--- a/lib/Target/Hexagon/HexagonDepOperands.td
+++ b/lib/Target/Hexagon/HexagonDepOperands.td
@@ -8,120 +8,125 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
+multiclass ImmOpPred<code pred, ValueType vt = i32> {
+ def "" : PatLeaf<(vt imm), pred>;
+ def _timm : PatLeaf<(vt timm), pred>;
+}
+
def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
-def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
+defm s4_0ImmPred : ImmOpPred<[{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
-def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
+defm s29_3ImmPred : ImmOpPred<[{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
+defm u6_0ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def a30_2Imm : Operand<i32> { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
-def a30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+defm a30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; }
def u29_3Imm : Operand<i32> { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
+defm u29_3ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; }
-def s8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<8, 0>(N->getSExtValue());}]>;
+defm s8_0ImmPred : ImmOpPred<[{ return isShiftedInt<8, 0>(N->getSExtValue());}]>;
def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; }
def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>;
+defm u32_0ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>;
def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; }
def u4_2Imm : Operand<i32> { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
+defm u4_2ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; }
def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
+defm u3_0ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def b15_2Imm : Operand<OtherVT> { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
-def b15_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<15, 2>(N->getSExtValue());}]>;
+defm b15_2ImmPred : ImmOpPred<[{ return isShiftedInt<15, 2>(N->getSExtValue());}]>;
def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; }
def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u11_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
+defm u11_3ImmPred : ImmOpPred<[{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; }
-def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
+defm s4_3ImmPred : ImmOpPred<[{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; }
def m32_0Imm : Operand<i32> { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def m32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
+defm m32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; }
def u3_1Imm : Operand<i32> { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u3_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
+defm u3_1ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; }
def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u1_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
+defm u1_0ImmPred : ImmOpPred<[{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; }
def s31_1Imm : Operand<i32> { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; }
-def s31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 1>(N->getSExtValue());}]>;
+defm s31_1ImmPred : ImmOpPred<[{ return isShiftedInt<32, 1>(N->getSExtValue());}]>;
def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s3_0Imm : Operand<i32> { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; }
-def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
+defm s3_0ImmPred : ImmOpPred<[{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def s30_2Imm : Operand<i32> { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; }
-def s30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+defm s30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; }
def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>;
+defm u4_0ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>;
def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; }
-def s6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 0>(N->getSExtValue());}]>;
+defm s6_0ImmPred : ImmOpPred<[{ return isShiftedInt<6, 0>(N->getSExtValue());}]>;
def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; }
def u5_3Imm : Operand<i32> { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u5_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
+defm u5_3ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; }
-def s32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
+defm s32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s6_3Imm : Operand<i32> { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; }
-def s6_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
+defm s6_3ImmPred : ImmOpPred<[{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; }
def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
+defm u10_0ImmPred : ImmOpPred<[{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; }
def u31_1Imm : Operand<i32> { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
+defm u31_1ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; }
-def s4_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
+defm s4_1ImmPred : ImmOpPred<[{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; }
def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u16_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
+defm u16_0ImmPred : ImmOpPred<[{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; }
def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u6_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>;
+defm u6_1ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>;
def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; }
def u5_2Imm : Operand<i32> { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u5_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>;
+defm u5_2ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>;
def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; }
def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
+defm u26_6ImmPred : ImmOpPred<[{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; }
def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u6_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
+defm u6_2ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; }
def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u7_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
+defm u7_0ImmPred : ImmOpPred<[{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def b13_2Imm : Operand<OtherVT> { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
-def b13_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
+defm b13_2ImmPred : ImmOpPred<[{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; }
def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u5_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
+defm u5_0ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; }
def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u2_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
+defm u2_0ImmPred : ImmOpPred<[{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; }
-def s4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
+defm s4_2ImmPred : ImmOpPred<[{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def b30_2Imm : Operand<OtherVT> { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
-def b30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+defm b30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; }
def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
+defm u8_0ImmPred : ImmOpPred<[{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
+defm u30_2ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index c1f32e54e98d..0844fb8a8629 100644
--- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -250,7 +250,7 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
unsigned Opc = T1I->getOpcode();
if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf)
return false;
- unsigned PredR = T1I->getOperand(0).getReg();
+ Register PredR = T1I->getOperand(0).getReg();
// Get the layout successor, or 0 if B does not have one.
MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B));
@@ -384,8 +384,8 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned R = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = MO.getReg();
+ if (!Register::isVirtualRegister(R))
continue;
if (!isPredicate(R))
continue;
@@ -401,8 +401,8 @@ bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const {
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned R = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = MO.getReg();
+ if (!Register::isVirtualRegister(R))
continue;
const MachineInstr *DefI = MRI->getVRegDef(R);
// "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
@@ -437,7 +437,7 @@ bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
break;
if (usesUndefVReg(&MI))
return false;
- unsigned DefR = MI.getOperand(0).getReg();
+ Register DefR = MI.getOperand(0).getReg();
if (isPredicate(DefR))
return false;
}
@@ -491,8 +491,8 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs(
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned R = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = MO.getReg();
+ if (!Register::isVirtualRegister(R))
continue;
if (isPredicate(R))
PredDefs++;
@@ -798,7 +798,7 @@ unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock *B,
const MCInstrDesc &D = HII->get(Opc);
DebugLoc DL = B->findBranchDebugLoc();
- unsigned MuxR = MRI->createVirtualRegister(DRC);
+ Register MuxR = MRI->createVirtualRegister(DRC);
BuildMI(*B, At, DL, D, MuxR)
.addReg(PredR)
.addReg(TR, 0, TSR)
@@ -837,7 +837,7 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
unsigned MuxR = 0, MuxSR = 0;
if (TR && FR) {
- unsigned DR = PN->getOperand(0).getReg();
+ Register DR = PN->getOperand(0).getReg();
const TargetRegisterClass *RC = MRI->getRegClass(DR);
MuxR = buildMux(FP.SplitB, FP.SplitB->getFirstTerminator(), RC,
FP.PredR, TR, TSR, FR, FSR);
@@ -988,8 +988,8 @@ void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
MachineInstr *PN = &*I;
assert(PN->getNumOperands() == 3 && "Invalid phi node");
MachineOperand &UO = PN->getOperand(1);
- unsigned UseR = UO.getReg(), UseSR = UO.getSubReg();
- unsigned DefR = PN->getOperand(0).getReg();
+ Register UseR = UO.getReg(), UseSR = UO.getSubReg();
+ Register DefR = PN->getOperand(0).getReg();
unsigned NewR = UseR;
if (UseSR) {
// MRI.replaceVregUsesWith does not allow to update the subregister,
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index c343e426ac7d..8984ee82960d 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -285,7 +285,7 @@ bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) {
}
LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub)
: MRI->getMaxLaneMaskForVReg(Reg);
}
@@ -364,7 +364,7 @@ void HexagonExpandCondsets::updateKillFlags(unsigned Reg) {
void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
LiveRange &Range) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
if (Range.empty())
return;
@@ -372,8 +372,8 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> {
if (!Op.isReg() || !Op.isDef())
return { false, false };
- unsigned DR = Op.getReg(), DSR = Op.getSubReg();
- if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg)
+ Register DR = Op.getReg(), DSR = Op.getSubReg();
+ if (!Register::isVirtualRegister(DR) || DR != Reg)
return { false, false };
LaneBitmask SLM = getLaneMask(DR, DSR);
LaneBitmask A = SLM & LM;
@@ -551,8 +551,8 @@ void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet,
bool Recalc, bool UpdateKills, bool UpdateDeads) {
UpdateKills |= UpdateDeads;
for (unsigned R : RegSet) {
- if (!TargetRegisterInfo::isVirtualRegister(R)) {
- assert(TargetRegisterInfo::isPhysicalRegister(R));
+ if (!Register::isVirtualRegister(R)) {
+ assert(Register::isPhysicalRegister(R));
// There shouldn't be any physical registers as operands, except
// possibly reserved registers.
assert(MRI->isReserved(R));
@@ -579,17 +579,17 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
using namespace Hexagon;
if (SO.isReg()) {
- unsigned PhysR;
+ Register PhysR;
RegisterRef RS = SO;
- if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) {
+ if (Register::isVirtualRegister(RS.Reg)) {
const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
assert(VC->begin() != VC->end() && "Empty register class");
PhysR = *VC->begin();
} else {
- assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
+ assert(Register::isPhysicalRegister(RS.Reg));
PhysR = RS.Reg;
}
- unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
+ Register PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
switch (TRI->getRegSizeInBits(*RC)) {
case 32:
@@ -671,7 +671,7 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
MachineOperand &MD = MI.getOperand(0); // Definition
MachineOperand &MP = MI.getOperand(1); // Predicate register
assert(MD.isDef());
- unsigned DR = MD.getReg(), DSR = MD.getSubReg();
+ Register DR = MD.getReg(), DSR = MD.getSubReg();
bool ReadUndef = MD.isUndef();
MachineBasicBlock::iterator At = MI;
@@ -802,7 +802,7 @@ bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs,
// For physical register we would need to check register aliases, etc.
// and we don't want to bother with that. It would be of little value
// before the actual register rewriting (from virtual to physical).
- if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ if (!Register::isVirtualRegister(RR.Reg))
return false;
// No redefs for any operand.
if (isRefInMap(RR, Defs, Exec_Then))
@@ -954,7 +954,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
return false;
RegisterRef RT(MS);
- unsigned PredR = MP.getReg();
+ Register PredR = MP.getReg();
MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
if (!DefI || !isPredicable(DefI))
return false;
@@ -999,7 +999,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
// subregisters are other physical registers, and we are not checking
// that.
RegisterRef RR = Op;
- if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ if (!Register::isVirtualRegister(RR.Reg))
return false;
ReferenceMap &Map = Op.isDef() ? Defs : Uses;
@@ -1091,7 +1091,7 @@ bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B,
}
bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
- if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ if (!Register::isVirtualRegister(RR.Reg))
return false;
const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
if (RC == &Hexagon::IntRegsRegClass) {
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index f7edc168de4a..d21de8ccb5ab 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -114,12 +114,11 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
// First pass - compute the offset of each basic block.
for (const MachineBasicBlock &MBB : MF) {
- if (MBB.getAlignment()) {
+ if (MBB.getAlignment() != Align::None()) {
// Although we don't know the exact layout of the final code, we need
// to account for alignment padding somehow. This heuristic pads each
// aligned basic block according to the alignment value.
- int ByteAlign = (1u << MBB.getAlignment()) - 1;
- InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign);
+ InstOffset = alignTo(InstOffset, MBB.getAlignment());
}
BlockToInstOffset[&MBB] = InstOffset;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 3368ee4fb3b9..bfa3372d7faf 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -303,10 +303,10 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
if (MO.isFI())
return true;
if (MO.isReg()) {
- unsigned R = MO.getReg();
+ Register R = MO.getReg();
// Virtual registers will need scavenging, which then may require
// a stack slot.
- if (TargetRegisterInfo::isVirtualRegister(R))
+ if (Register::isVirtualRegister(R))
return true;
for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
if (CSR[*S])
@@ -973,8 +973,8 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
// understand paired registers for cfi_offset.
// Eg .cfi_offset r1:0, -64
- unsigned HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi);
- unsigned LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo);
+ Register HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi);
+ Register LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo);
unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
@@ -1377,10 +1377,10 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
}
MFI.setLocalFrameSize(LFS);
- unsigned A = MFI.getLocalFrameMaxAlign();
+ Align A = MFI.getLocalFrameMaxAlign();
assert(A <= 8 && "Unexpected local frame alignment");
- if (A == 0)
- MFI.setLocalFrameMaxAlign(8);
+ if (A == 1)
+ MFI.setLocalFrameMaxAlign(Align(8));
MFI.setUseLocalStackAllocationBlock(true);
// Set the physical aligned-stack base address register.
@@ -1570,13 +1570,13 @@ bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
MachineInstr *MI = &*It;
DebugLoc DL = MI->getDebugLoc();
- unsigned DstR = MI->getOperand(0).getReg();
- unsigned SrcR = MI->getOperand(1).getReg();
+ Register DstR = MI->getOperand(0).getReg();
+ Register SrcR = MI->getOperand(1).getReg();
if (!Hexagon::ModRegsRegClass.contains(DstR) ||
!Hexagon::ModRegsRegClass.contains(SrcR))
return false;
- unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1));
BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)
.addReg(TmpR, RegState::Kill);
@@ -1595,13 +1595,13 @@ bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
DebugLoc DL = MI->getDebugLoc();
unsigned Opc = MI->getOpcode();
- unsigned SrcR = MI->getOperand(2).getReg();
+ Register SrcR = MI->getOperand(2).getReg();
bool IsKill = MI->getOperand(2).isKill();
int FI = MI->getOperand(0).getIndex();
// TmpR = C2_tfrpr SrcR if SrcR is a predicate register
// TmpR = A2_tfrcrr SrcR if SrcR is a modifier register
- unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr
: Hexagon::A2_tfrcrr;
BuildMI(B, It, DL, HII.get(TfrOpc), TmpR)
@@ -1628,11 +1628,11 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
DebugLoc DL = MI->getDebugLoc();
unsigned Opc = MI->getOpcode();
- unsigned DstR = MI->getOperand(0).getReg();
+ Register DstR = MI->getOperand(0).getReg();
int FI = MI->getOperand(1).getIndex();
// TmpR = L2_loadri_io FI, 0
- unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
.addFrameIndex(FI)
.addImm(0)
@@ -1658,7 +1658,7 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
return false;
DebugLoc DL = MI->getDebugLoc();
- unsigned SrcR = MI->getOperand(2).getReg();
+ Register SrcR = MI->getOperand(2).getReg();
bool IsKill = MI->getOperand(2).isKill();
int FI = MI->getOperand(0).getIndex();
auto *RC = &Hexagon::HvxVRRegClass;
@@ -1667,8 +1667,8 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
// TmpR0 = A2_tfrsi 0x01010101
// TmpR1 = V6_vandqrt Qx, TmpR0
// store FI, 0, TmpR1
- unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- unsigned TmpR1 = MRI.createVirtualRegister(RC);
+ Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register TmpR1 = MRI.createVirtualRegister(RC);
BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
.addImm(0x01010101);
@@ -1695,15 +1695,15 @@ bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
return false;
DebugLoc DL = MI->getDebugLoc();
- unsigned DstR = MI->getOperand(0).getReg();
+ Register DstR = MI->getOperand(0).getReg();
int FI = MI->getOperand(1).getIndex();
auto *RC = &Hexagon::HvxVRRegClass;
// TmpR0 = A2_tfrsi 0x01010101
// TmpR1 = load FI, 0
// DstR = V6_vandvrt TmpR1, TmpR0
- unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- unsigned TmpR1 = MRI.createVirtualRegister(RC);
+ Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register TmpR1 = MRI.createVirtualRegister(RC);
BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
.addImm(0x01010101);
@@ -1745,9 +1745,9 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
}
DebugLoc DL = MI->getDebugLoc();
- unsigned SrcR = MI->getOperand(2).getReg();
- unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo);
- unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
+ Register SrcR = MI->getOperand(2).getReg();
+ Register SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo);
+ Register SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
bool IsKill = MI->getOperand(2).isKill();
int FI = MI->getOperand(0).getIndex();
@@ -1793,9 +1793,9 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
return false;
DebugLoc DL = MI->getDebugLoc();
- unsigned DstR = MI->getOperand(0).getReg();
- unsigned DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
- unsigned DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
+ Register DstR = MI->getOperand(0).getReg();
+ Register DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
+ Register DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
int FI = MI->getOperand(1).getIndex();
unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
@@ -1834,7 +1834,7 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
DebugLoc DL = MI->getDebugLoc();
- unsigned SrcR = MI->getOperand(2).getReg();
+ Register SrcR = MI->getOperand(2).getReg();
bool IsKill = MI->getOperand(2).isKill();
int FI = MI->getOperand(0).getIndex();
@@ -1863,7 +1863,7 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
DebugLoc DL = MI->getDebugLoc();
- unsigned DstR = MI->getOperand(0).getReg();
+ Register DstR = MI->getOperand(0).getReg();
int FI = MI->getOperand(1).getIndex();
unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
@@ -2299,7 +2299,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
int TFI;
if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI)
continue;
- unsigned DstR = MI.getOperand(0).getReg();
+ Register DstR = MI.getOperand(0).getReg();
assert(MI.getOperand(0).getSubReg() == 0);
MachineInstr *CopyOut = nullptr;
if (DstR != FoundR) {
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 65e8c7686640..27265dd53794 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -30,7 +30,7 @@ class TargetRegisterClass;
class HexagonFrameLowering : public TargetFrameLowering {
public:
explicit HexagonFrameLowering()
- : TargetFrameLowering(StackGrowsDown, 8, 0, 1, true) {}
+ : TargetFrameLowering(StackGrowsDown, Align(8), 0, Align::None(), true) {}
// All of the prolog/epilog functionality, including saving and restoring
// callee-saved registers is handled in emitPrologue. This is to have the
diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp
index 3417c74e359b..caa0e4d80397 100644
--- a/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -184,7 +184,7 @@ bool HexagonGenExtract::convert(Instruction *In) {
// The width of the extracted field is the minimum of the original bits
// that remain after the shifts and the number of contiguous 1s in the mask.
uint32_t W = std::min(U, T);
- if (W == 0)
+ if (W == 0 || W == 1)
return false;
// Check if the extracted bits are contained within the mask that it is
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
index 81025c1c5325..48881e02f4d3 100644
--- a/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -163,11 +163,11 @@ namespace {
}
static inline unsigned v2x(unsigned v) {
- return TargetRegisterInfo::virtReg2Index(v);
+ return Register::virtReg2Index(v);
}
static inline unsigned x2v(unsigned x) {
- return TargetRegisterInfo::index2VirtReg(x);
+ return Register::index2VirtReg(x);
}
};
@@ -267,7 +267,7 @@ namespace {
CellMapShadow(const BitTracker &T) : BT(T) {}
const BitTracker::RegisterCell &lookup(unsigned VR) {
- unsigned RInd = TargetRegisterInfo::virtReg2Index(VR);
+ unsigned RInd = Register::virtReg2Index(VR);
// Grow the vector to at least 32 elements.
if (RInd >= CVect.size())
CVect.resize(std::max(RInd+16, 32U), nullptr);
@@ -606,9 +606,9 @@ void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef()) {
- unsigned R = MO.getReg();
+ Register R = MO.getReg();
assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
- if (TargetRegisterInfo::isVirtualRegister(R))
+ if (Register::isVirtualRegister(R))
RO.insert(std::make_pair(R, Index++));
}
}
@@ -724,8 +724,8 @@ void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned R = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = MO.getReg();
+ if (!Register::isVirtualRegister(R))
continue;
Defs.insert(R);
}
@@ -737,8 +737,8 @@ void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned R = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = MO.getReg();
+ if (!Register::isVirtualRegister(R))
continue;
Uses.insert(R);
}
@@ -1399,7 +1399,7 @@ bool HexagonGenInsert::generateInserts() {
for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
unsigned VR = I->first;
const TargetRegisterClass *RC = MRI->getRegClass(VR);
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
RegMap[VR] = NewVR;
}
@@ -1477,9 +1477,8 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned R = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R) ||
- !MRI->use_nodbg_empty(R)) {
+ Register R = MO.getReg();
+ if (!Register::isVirtualRegister(R) || !MRI->use_nodbg_empty(R)) {
AllDead = false;
break;
}
@@ -1598,7 +1597,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
IterListType Out;
for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
- unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first);
+ unsigned Idx = Register::virtReg2Index(I->first);
if (Idx >= Cutoff)
Out.push_back(I);
}
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
index cdafbc20ab86..b559e7bbbb60 100644
--- a/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -171,7 +171,7 @@ void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs,
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isImplicit())
continue;
- unsigned R = MO.getReg();
+ Register R = MO.getReg();
BitVector &Set = MO.isDef() ? Defs : Uses;
expandReg(R, Set);
}
@@ -239,14 +239,14 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
unsigned Opc = MI->getOpcode();
if (!isCondTransfer(Opc))
continue;
- unsigned DR = MI->getOperand(0).getReg();
+ Register DR = MI->getOperand(0).getReg();
if (isRegPair(DR))
continue;
MachineOperand &PredOp = MI->getOperand(1);
if (PredOp.isUndef())
continue;
- unsigned PR = PredOp.getReg();
+ Register PR = PredOp.getReg();
unsigned Idx = I2X.lookup(MI);
CondsetMap::iterator F = CM.find(DR);
bool IfTrue = HII->isPredicatedTrue(Opc);
diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp
index e991fa8b61c8..24d33c91a29b 100644
--- a/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -133,7 +133,7 @@ INITIALIZE_PASS_END(HexagonGenPredicate, "hexagon-gen-pred",
"Hexagon generate predicate operations", false, false)
bool HexagonGenPredicate::isPredReg(unsigned R) {
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ if (!Register::isVirtualRegister(R))
return false;
const TargetRegisterClass *RC = MRI->getRegClass(R);
return RC == &Hexagon::PredRegsRegClass;
@@ -213,7 +213,7 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
case TargetOpcode::COPY:
if (isPredReg(MI->getOperand(1).getReg())) {
RegisterSubReg RD = MI->getOperand(0);
- if (TargetRegisterInfo::isVirtualRegister(RD.R))
+ if (Register::isVirtualRegister(RD.R))
PredGPRs.insert(RD);
}
break;
@@ -245,7 +245,7 @@ RegisterSubReg HexagonGenPredicate::getPredRegFor(const RegisterSubReg &Reg) {
// Create a predicate register for a given Reg. The newly created register
// will have its value copied from Reg, so that it can be later used as
// an operand in other instructions.
- assert(TargetRegisterInfo::isVirtualRegister(Reg.R));
+ assert(Register::isVirtualRegister(Reg.R));
RegToRegMap::iterator F = G2P.find(Reg);
if (F != G2P.end())
return F->second;
@@ -265,7 +265,7 @@ RegisterSubReg HexagonGenPredicate::getPredRegFor(const RegisterSubReg &Reg) {
MachineBasicBlock &B = *DefI->getParent();
DebugLoc DL = DefI->getDebugLoc();
const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
- unsigned NewPR = MRI->createVirtualRegister(PredRC);
+ Register NewPR = MRI->createVirtualRegister(PredRC);
// For convertible instructions, do not modify them, so that they can
// be converted later. Generate a copy from Reg to NewPR.
@@ -432,7 +432,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
// Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR
// with NewGPR.
const TargetRegisterClass *RC = MRI->getRegClass(OutR.R);
- unsigned NewOutR = MRI->createVirtualRegister(RC);
+ Register NewOutR = MRI->createVirtualRegister(RC);
BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), NewOutR)
.addReg(NewPR.R, 0, NewPR.S);
MRI->replaceRegWith(OutR.R, NewOutR);
@@ -471,9 +471,9 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
continue;
RegisterSubReg DR = MI.getOperand(0);
RegisterSubReg SR = MI.getOperand(1);
- if (!TargetRegisterInfo::isVirtualRegister(DR.R))
+ if (!Register::isVirtualRegister(DR.R))
continue;
- if (!TargetRegisterInfo::isVirtualRegister(SR.R))
+ if (!Register::isVirtualRegister(SR.R))
continue;
if (MRI->getRegClass(DR.R) != PredRC)
continue;
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index cecbaedb6d70..62291790f0fe 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -435,17 +435,17 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
if (Phi->getOperand(i+1).getMBB() != Latch)
continue;
- unsigned PhiOpReg = Phi->getOperand(i).getReg();
+ Register PhiOpReg = Phi->getOperand(i).getReg();
MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
if (DI->getDesc().isAdd()) {
// If the register operand to the add is the PHI we're looking at, this
// meets the induction pattern.
- unsigned IndReg = DI->getOperand(1).getReg();
+ Register IndReg = DI->getOperand(1).getReg();
MachineOperand &Opnd2 = DI->getOperand(2);
int64_t V;
if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
- unsigned UpdReg = DI->getOperand(0).getReg();
+ Register UpdReg = DI->getOperand(0).getReg();
IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
}
}
@@ -694,7 +694,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
Cmp = Comparison::getSwappedComparison(Cmp);
if (InitialValue->isReg()) {
- unsigned R = InitialValue->getReg();
+ Register R = InitialValue->getReg();
MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
if (!MDT->properlyDominates(DefBB, Header)) {
int64_t V;
@@ -704,7 +704,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
OldInsts.push_back(MRI->getVRegDef(R));
}
if (EndValue->isReg()) {
- unsigned R = EndValue->getReg();
+ Register R = EndValue->getReg();
MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
if (!MDT->properlyDominates(DefBB, Header)) {
int64_t V;
@@ -910,7 +910,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
(RegToImm ? TII->get(Hexagon::A2_subri) :
TII->get(Hexagon::A2_addi));
if (RegToReg || RegToImm) {
- unsigned SubR = MRI->createVirtualRegister(IntRC);
+ Register SubR = MRI->createVirtualRegister(IntRC);
MachineInstrBuilder SubIB =
BuildMI(*PH, InsertPos, DL, SubD, SubR);
@@ -931,7 +931,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
EndValInstr->getOperand(2).getImm() == StartV) {
DistR = EndValInstr->getOperand(1).getReg();
} else {
- unsigned SubR = MRI->createVirtualRegister(IntRC);
+ Register SubR = MRI->createVirtualRegister(IntRC);
MachineInstrBuilder SubIB =
BuildMI(*PH, InsertPos, DL, SubD, SubR);
SubIB.addReg(End->getReg(), 0, End->getSubReg())
@@ -950,7 +950,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
AdjSR = DistSR;
} else {
// Generate CountR = ADD DistR, AdjVal
- unsigned AddR = MRI->createVirtualRegister(IntRC);
+ Register AddR = MRI->createVirtualRegister(IntRC);
MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
BuildMI(*PH, InsertPos, DL, AddD, AddR)
.addReg(DistR, 0, DistSR)
@@ -971,7 +971,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
unsigned Shift = Log2_32(IVBump);
// Generate NormR = LSR DistR, Shift.
- unsigned LsrR = MRI->createVirtualRegister(IntRC);
+ Register LsrR = MRI->createVirtualRegister(IntRC);
const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
.addReg(AdjR, 0, AdjSR)
@@ -1038,7 +1038,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (MRI->use_nodbg_empty(Reg))
continue;
@@ -1058,7 +1058,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
if (!OPO.isReg() || !OPO.isDef())
continue;
- unsigned OPReg = OPO.getReg();
+ Register OPReg = OPO.getReg();
use_nodbg_iterator nextJ;
for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
J != End; J = nextJ) {
@@ -1092,7 +1092,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
MachineRegisterInfo::use_iterator nextI;
for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
E = MRI->use_end(); I != E; I = nextI) {
@@ -1244,7 +1244,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
if (TripCount->isReg()) {
// Create a copy of the loop count register.
- unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
.addReg(TripCount->getReg(), 0, TripCount->getSubReg());
// Add the Loop instruction to the beginning of the loop.
@@ -1257,7 +1257,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
// create a new virtual register.
int64_t CountImm = TripCount->getImm();
if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
- unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
.addImm(CountImm);
BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
@@ -1333,7 +1333,7 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
return true;
// Out of order.
- unsigned PredR = CmpI->getOperand(0).getReg();
+ Register PredR = CmpI->getOperand(0).getReg();
bool FoundBump = false;
instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
@@ -1428,10 +1428,10 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
if (checkForImmediate(*InitVal, Imm))
return (EndVal->getImm() == Imm);
- unsigned Reg = InitVal->getReg();
+ Register Reg = InitVal->getReg();
// We don't know the value of a physical register.
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return true;
MachineInstr *Def = MRI->getVRegDef(Reg);
@@ -1508,8 +1508,8 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
// processed to handle potential subregisters in MO.
int64_t TV;
- unsigned R = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = MO.getReg();
+ if (!Register::isVirtualRegister(R))
return false;
MachineInstr *DI = MRI->getVRegDef(R);
unsigned DOpc = DI->getOpcode();
@@ -1582,11 +1582,11 @@ void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
}
assert(MO.isReg());
- unsigned R = MO.getReg();
+ Register R = MO.getReg();
MachineInstr *DI = MRI->getVRegDef(R);
const TargetRegisterClass *RC = MRI->getRegClass(R);
- unsigned NewR = MRI->createVirtualRegister(RC);
+ Register NewR = MRI->createVirtualRegister(RC);
MachineBasicBlock &B = *DI->getParent();
DebugLoc DL = DI->getDebugLoc();
BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
@@ -1634,17 +1634,17 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
if (Phi->getOperand(i+1).getMBB() != Latch)
continue;
- unsigned PhiReg = Phi->getOperand(i).getReg();
+ Register PhiReg = Phi->getOperand(i).getReg();
MachineInstr *DI = MRI->getVRegDef(PhiReg);
if (DI->getDesc().isAdd()) {
// If the register operand to the add/sub is the PHI we are looking
// at, this meets the induction pattern.
- unsigned IndReg = DI->getOperand(1).getReg();
+ Register IndReg = DI->getOperand(1).getReg();
MachineOperand &Opnd2 = DI->getOperand(2);
int64_t V;
if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
- unsigned UpdReg = DI->getOperand(0).getReg();
+ Register UpdReg = DI->getOperand(0).getReg();
IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
}
}
@@ -1702,7 +1702,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
if (!Cond[CSz-1].isReg())
return false;
- unsigned P = Cond[CSz-1].getReg();
+ Register P = Cond[CSz - 1].getReg();
MachineInstr *PredDef = MRI->getVRegDef(P);
if (!PredDef->isCompare())
@@ -1903,15 +1903,15 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
NewPH->insert(NewPH->end(), NewPN);
- unsigned PR = PN->getOperand(0).getReg();
+ Register PR = PN->getOperand(0).getReg();
const TargetRegisterClass *RC = MRI->getRegClass(PR);
- unsigned NewPR = MRI->createVirtualRegister(RC);
+ Register NewPR = MRI->createVirtualRegister(RC);
NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
// Copy all non-latch operands of a header's PHI node to the newly
// created PHI node in the preheader.
for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
- unsigned PredR = PN->getOperand(i).getReg();
+ Register PredR = PN->getOperand(i).getReg();
unsigned PredRSub = PN->getOperand(i).getSubReg();
MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
if (PredB == Latch)
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 605fcfc25559..4684d8e4781a 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -697,7 +697,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
//
void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
SDLoc dl(N);
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ auto *CN = cast<ConstantFPSDNode>(N);
APInt A = CN->getValueAPF().bitcastToAPInt();
if (N->getValueType(0) == MVT::f32) {
SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index fef5a98cdb00..8a8986e232a0 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -240,12 +240,12 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return true;
}
-unsigned HexagonTargetLowering::getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
+Register HexagonTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &) const {
// Just support r19, the linux kernel uses it.
- unsigned Reg = StringSwitch<unsigned>(RegName)
+ Register Reg = StringSwitch<Register>(RegName)
.Case("r19", Hexagon::R19)
- .Default(0);
+ .Default(Register());
if (Reg)
return Reg;
@@ -286,7 +286,7 @@ SDValue HexagonTargetLowering::LowerCallResult(
SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
MVT::i32, Glue);
// FR0 = (Value, Chain, Glue)
- unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
FR0.getValue(0), FR0.getValue(2));
// TPR = (Chain, Glue)
@@ -736,7 +736,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
RegVT = VA.getValVT();
const TargetRegisterClass *RC = getRegClassFor(RegVT);
- unsigned VReg = MRI.createVirtualRegister(RC);
+ Register VReg = MRI.createVirtualRegister(RC);
SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
// Treat values of type MVT::i1 specially: they are passed in
@@ -870,15 +870,20 @@ SDValue
HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue PredOp = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
- EVT OpVT = Op1.getValueType();
- SDLoc DL(Op);
+ MVT OpTy = ty(Op1);
+ const SDLoc &dl(Op);
- if (OpVT == MVT::v2i16) {
- SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
- SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
- SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
- SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
- return TR;
+ if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
+ MVT ElemTy = OpTy.getVectorElementType();
+ assert(ElemTy.isScalarInteger());
+ MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
+ OpTy.getVectorNumElements());
+ // Generate (trunc (select (_, sext, sext))).
+ return DAG.getSExtOrTrunc(
+ DAG.getSelect(dl, WideTy, PredOp,
+ DAG.getSExtOrTrunc(Op1, dl, WideTy),
+ DAG.getSExtOrTrunc(Op2, dl, WideTy)),
+ dl, OpTy);
}
return SDValue();
@@ -1230,9 +1235,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
Subtarget(ST) {
auto &HRI = *Subtarget.getRegisterInfo();
- setPrefLoopAlignment(4);
- setPrefFunctionAlignment(4);
- setMinFunctionAlignment(2);
+ setPrefLoopAlignment(Align(16));
+ setMinFunctionAlignment(Align(4));
+ setPrefFunctionAlignment(Align(16));
setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
@@ -1434,12 +1439,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE
};
- for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
for (unsigned VectExpOp : VectExpOps)
setOperationAction(VectExpOp, VT, Expand);
// Expand all extending loads and truncating stores:
- for (MVT TargetVT : MVT::vector_valuetypes()) {
+ for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) {
if (TargetVT == VT)
continue;
setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
@@ -1496,16 +1501,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, VT, Custom);
}
- for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) {
- setCondCodeAction(ISD::SETLT, VT, Expand);
+ for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
+ MVT::v2i32}) {
+ setCondCodeAction(ISD::SETNE, VT, Expand);
setCondCodeAction(ISD::SETLE, VT, Expand);
- setCondCodeAction(ISD::SETULT, VT, Expand);
+ setCondCodeAction(ISD::SETGE, VT, Expand);
+ setCondCodeAction(ISD::SETLT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
+ setCondCodeAction(ISD::SETUGE, VT, Expand);
+ setCondCodeAction(ISD::SETULT, VT, Expand);
}
// Custom-lower bitcasts from i8 to v8i1.
setOperationAction(ISD::BITCAST, MVT::i8, Custom);
setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v4i8, Custom);
setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
@@ -1554,6 +1564,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSUB, MVT::f64, Legal);
}
+ setTargetDAGCombine(ISD::VSELECT);
+
if (Subtarget.useHVXOps())
initializeHVXLowering();
@@ -1643,6 +1655,8 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0";
case HexagonISD::VROR: return "HexagonISD::VROR";
case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
+ case HexagonISD::PTRUE: return "HexagonISD::PTRUE";
+ case HexagonISD::PFALSE: return "HexagonISD::PFALSE";
case HexagonISD::VZERO: return "HexagonISD::VZERO";
case HexagonISD::VSPLATW: return "HexagonISD::VSPLATW";
case HexagonISD::D2P: return "HexagonISD::D2P";
@@ -1783,7 +1797,8 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// The offset value comes through Modifier register. For now, assume the
// offset is 0.
Info.offset = 0;
- Info.align = DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont));
+ Info.align =
+ MaybeAlign(DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont)));
Info.flags = MachineMemOperand::MOLoad;
return true;
}
@@ -1805,7 +1820,8 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(VecTy);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8;
+ Info.align =
+ MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8);
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
@@ -1817,6 +1833,10 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
}
+bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+ return X.getValueType().isScalarInteger(); // 'tstbit'
+}
+
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}
@@ -1844,26 +1864,33 @@ bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
TargetLoweringBase::LegalizeTypeAction
HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
- if (VT.getVectorNumElements() == 1)
- return TargetLoweringBase::TypeScalarizeVector;
-
- // Always widen vectors of i1.
+ unsigned VecLen = VT.getVectorNumElements();
MVT ElemTy = VT.getVectorElementType();
- if (ElemTy == MVT::i1)
- return TargetLoweringBase::TypeWidenVector;
+
+ if (VecLen == 1 || VT.isScalableVector())
+ return TargetLoweringBase::TypeScalarizeVector;
if (Subtarget.useHVXOps()) {
+ unsigned HwLen = Subtarget.getVectorLength();
// If the size of VT is at least half of the vector length,
// widen the vector. Note: the threshold was not selected in
// any scientific way.
ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
if (llvm::find(Tys, ElemTy) != Tys.end()) {
- unsigned HwWidth = 8*Subtarget.getVectorLength();
+ unsigned HwWidth = 8*HwLen;
unsigned VecWidth = VT.getSizeInBits();
if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
return TargetLoweringBase::TypeWidenVector;
}
+ // Split vectors of i1 that correspond to (byte) vector pairs.
+ if (ElemTy == MVT::i1 && VecLen == 2*HwLen)
+ return TargetLoweringBase::TypeSplitVector;
}
+
+ // Always widen (remaining) vectors of i1.
+ if (ElemTy == MVT::i1)
+ return TargetLoweringBase::TypeWidenVector;
+
return TargetLoweringBase::TypeSplitVector;
}
@@ -2452,6 +2479,23 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return buildVector64(Ops, dl, VecTy, DAG);
if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
+ // Check if this is a special case or all-0 or all-1.
+ bool All0 = true, All1 = true;
+ for (SDValue P : Ops) {
+ auto *CN = dyn_cast<ConstantSDNode>(P.getNode());
+ if (CN == nullptr) {
+ All0 = All1 = false;
+ break;
+ }
+ uint32_t C = CN->getZExtValue();
+ All0 &= (C == 0);
+ All1 &= (C == 1);
+ }
+ if (All0)
+ return DAG.getNode(HexagonISD::PFALSE, dl, VecTy);
+ if (All1)
+ return DAG.getNode(HexagonISD::PTRUE, dl, VecTy);
+
// For each i1 element in the resulting predicate register, put 1
// shifted by the index of the element into a general-purpose register,
// then or them together and transfer it back into a predicate register.
@@ -2629,7 +2673,8 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
DoDefault = true;
if (!AlignLoads) {
- if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), *LN->getMemOperand()))
+ if (allowsMemoryAccessForAlignment(Ctx, DL, LN->getMemoryVT(),
+ *LN->getMemOperand()))
return Op;
DoDefault = true;
}
@@ -2637,7 +2682,8 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
// The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign)
: MVT::getVectorVT(MVT::i8, HaveAlign);
- DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, *LN->getMemOperand());
+ DoDefault =
+ allowsMemoryAccessForAlignment(Ctx, DL, PartTy, *LN->getMemOperand());
}
if (DoDefault) {
std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
@@ -2865,12 +2911,54 @@ HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
if (N->getValueType(0) == MVT::i8) {
SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
N->getOperand(0), DAG);
- Results.push_back(P);
+ SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
+ Results.push_back(T);
}
break;
}
}
+SDValue
+HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
+ const {
+ SDValue Op(N, 0);
+ if (isHvxOperation(Op)) {
+ if (SDValue V = PerformHvxDAGCombine(N, DCI))
+ return V;
+ return SDValue();
+ }
+
+ const SDLoc &dl(Op);
+ unsigned Opc = Op.getOpcode();
+
+ if (Opc == HexagonISD::P2D) {
+ SDValue P = Op.getOperand(0);
+ switch (P.getOpcode()) {
+ case HexagonISD::PTRUE:
+ return DCI.DAG.getConstant(-1, dl, ty(Op));
+ case HexagonISD::PFALSE:
+ return getZero(dl, ty(Op), DCI.DAG);
+ default:
+ break;
+ }
+ } else if (Opc == ISD::VSELECT) {
+ // This is pretty much duplicated in HexagonISelLoweringHVX...
+ //
+ // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
+ SDValue Cond = Op.getOperand(0);
+ if (Cond->getOpcode() == ISD::XOR) {
+ SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
+ if (C1->getOpcode() == HexagonISD::PTRUE) {
+ SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
+ Op.getOperand(2), Op.getOperand(1));
+ return VSel;
+ }
+ }
+ }
+
+ return SDValue();
+}
+
/// Returns relocation base for the given PIC jumptable.
SDValue
HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 4e467cb22727..75f553bfec7f 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -68,6 +68,8 @@ namespace HexagonISD {
EH_RETURN,
DCFETCH,
READCYCLE,
+ PTRUE,
+ PFALSE,
D2P, // Convert 8-byte value to 8-bit predicate register. [*]
P2D, // Convert 8-bit predicate register to 8-byte value. [*]
V2Q, // Convert HVX vector to a vector predicate reg. [*]
@@ -127,6 +129,8 @@ namespace HexagonISD {
bool isCheapToSpeculateCtlz() const override { return true; }
bool isCtlzFast() const override { return true; }
+ bool hasBitTest(SDValue X, SDValue Y) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
/// Return true if an FMA operation is faster than a pair of mul and add
@@ -221,10 +225,12 @@ namespace HexagonISD {
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const override;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
@@ -299,7 +305,8 @@ namespace HexagonISD {
const AttributeList &FuncAttributes) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
- unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override;
+ unsigned Align, MachineMemOperand::Flags Flags, bool *Fast)
+ const override;
/// Returns relocation base for the given PIC jumptable.
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
@@ -456,6 +463,8 @@ namespace HexagonISD {
bool isHvxOperation(SDValue Op) const;
SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 345c657787a0..bc8a9959c917 100644
--- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -193,6 +193,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::OR, BoolV, Legal);
setOperationAction(ISD::XOR, BoolV, Legal);
}
+
+ setTargetDAGCombine(ISD::VSELECT);
}
SDValue
@@ -1580,6 +1582,28 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Unhandled HVX operation");
}
+SDValue
+HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
+ const {
+ const SDLoc &dl(N);
+ SDValue Op(N, 0);
+
+ unsigned Opc = Op.getOpcode();
+ if (Opc == ISD::VSELECT) {
+ // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
+ SDValue Cond = Op.getOperand(0);
+ if (Cond->getOpcode() == ISD::XOR) {
+ SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
+ if (C1->getOpcode() == HexagonISD::QTRUE) {
+ SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
+ Op.getOperand(2), Op.getOperand(1));
+ return VSel;
+ }
+ }
+ }
+ return SDValue();
+}
+
bool
HexagonTargetLowering::isHvxOperation(SDValue Op) const {
// If the type of the result, or any operand type are HVX vector types,
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index a156de5ba128..767538f92ed6 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -193,7 +193,7 @@ static inline void parseOperands(const MachineInstr &MI,
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
@@ -674,86 +674,96 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB,
return 2;
}
-/// Analyze the loop code to find the loop induction variable and compare used
-/// to compute the number of iterations. Currently, we analyze loop that are
-/// controlled using hardware loops. In this case, the induction variable
-/// instruction is null. For all other cases, this function returns true, which
-/// means we're unable to analyze it.
-bool HexagonInstrInfo::analyzeLoop(MachineLoop &L,
- MachineInstr *&IndVarInst,
- MachineInstr *&CmpInst) const {
+namespace {
+class HexagonPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
+ MachineInstr *Loop, *EndLoop;
+ MachineFunction *MF;
+ const HexagonInstrInfo *TII;
+ int64_t TripCount;
+ Register LoopCount;
+ DebugLoc DL;
- MachineBasicBlock *LoopEnd = L.getBottomBlock();
- MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
- // We really "analyze" only hardware loops right now.
- if (I != LoopEnd->end() && isEndLoopN(I->getOpcode())) {
- IndVarInst = nullptr;
- CmpInst = &*I;
- return false;
+public:
+ HexagonPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop)
+ : Loop(Loop), EndLoop(EndLoop), MF(Loop->getParent()->getParent()),
+ TII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()),
+ DL(Loop->getDebugLoc()) {
+ // Inspect the Loop instruction up-front, as it may be deleted when we call
+ // createTripCountGreaterCondition.
+ TripCount = Loop->getOpcode() == Hexagon::J2_loop0r
+ ? -1
+ : Loop->getOperand(1).getImm();
+ if (TripCount == -1)
+ LoopCount = Loop->getOperand(1).getReg();
}
- return true;
-}
-/// Generate code to reduce the loop iteration by one and check if the loop is
-/// finished. Return the value/register of the new loop count. this function
-/// assumes the nth iteration is peeled first.
-unsigned HexagonInstrInfo::reduceLoopCount(
- MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
- MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
- SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
- unsigned MaxIter) const {
- // We expect a hardware loop currently. This means that IndVar is set
- // to null, and the compare is the ENDLOOP instruction.
- assert((!IndVar) && isEndLoopN(Cmp.getOpcode())
- && "Expecting a hardware loop");
- MachineFunction *MF = MBB.getParent();
- DebugLoc DL = Cmp.getDebugLoc();
- SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(),
- Cmp.getOperand(0).getMBB(), VisitedBBs);
- if (!Loop)
- return 0;
- // If the loop trip count is a compile-time value, then just change the
- // value.
- if (Loop->getOpcode() == Hexagon::J2_loop0i ||
- Loop->getOpcode() == Hexagon::J2_loop1i) {
- int64_t Offset = Loop->getOperand(1).getImm();
- if (Offset <= 1)
- Loop->eraseFromParent();
- else
- Loop->getOperand(1).setImm(Offset - 1);
- return Offset - 1;
+ bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
+ // Only ignore the terminator.
+ return MI == EndLoop;
}
- // The loop trip count is a run-time value. We generate code to subtract
- // one from the trip count, and update the loop instruction.
- assert(Loop->getOpcode() == Hexagon::J2_loop0r && "Unexpected instruction");
- unsigned LoopCount = Loop->getOperand(1).getReg();
- // Check if we're done with the loop.
- unsigned LoopEnd = createVR(MF, MVT::i1);
- MachineInstr *NewCmp = BuildMI(&MBB, DL, get(Hexagon::C2_cmpgtui), LoopEnd).
- addReg(LoopCount).addImm(1);
- unsigned NewLoopCount = createVR(MF, MVT::i32);
- MachineInstr *NewAdd = BuildMI(&MBB, DL, get(Hexagon::A2_addi), NewLoopCount).
- addReg(LoopCount).addImm(-1);
- const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
- // Update the previously generated instructions with the new loop counter.
- for (SmallVectorImpl<MachineInstr *>::iterator I = PrevInsts.begin(),
- E = PrevInsts.end(); I != E; ++I)
- (*I)->substituteRegister(LoopCount, NewLoopCount, 0, HRI);
- PrevInsts.clear();
- PrevInsts.push_back(NewCmp);
- PrevInsts.push_back(NewAdd);
- // Insert the new loop instruction if this is the last time the loop is
- // decremented.
- if (Iter == MaxIter)
- BuildMI(&MBB, DL, get(Hexagon::J2_loop0r)).
- addMBB(Loop->getOperand(0).getMBB()).addReg(NewLoopCount);
- // Delete the old loop instruction.
- if (Iter == 0)
- Loop->eraseFromParent();
- Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf));
- Cond.push_back(NewCmp->getOperand(0));
- return NewLoopCount;
+
+ Optional<bool>
+ createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond) override {
+ if (TripCount == -1) {
+ // Check if we're done with the loop.
+ unsigned Done = TII->createVR(MF, MVT::i1);
+ MachineInstr *NewCmp = BuildMI(&MBB, DL,
+ TII->get(Hexagon::C2_cmpgtui), Done)
+ .addReg(LoopCount)
+ .addImm(TC);
+ Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf));
+ Cond.push_back(NewCmp->getOperand(0));
+ return {};
+ }
+
+ return TripCount > TC;
+ }
+
+ void setPreheader(MachineBasicBlock *NewPreheader) override {
+ NewPreheader->splice(NewPreheader->getFirstTerminator(), Loop->getParent(),
+ Loop);
+ }
+
+ void adjustTripCount(int TripCountAdjust) override {
+ // If the loop trip count is a compile-time value, then just change the
+ // value.
+ if (Loop->getOpcode() == Hexagon::J2_loop0i ||
+ Loop->getOpcode() == Hexagon::J2_loop1i) {
+ int64_t TripCount = Loop->getOperand(1).getImm() + TripCountAdjust;
+ assert(TripCount > 0 && "Can't create an empty or negative loop!");
+ Loop->getOperand(1).setImm(TripCount);
+ return;
+ }
+
+ // The loop trip count is a run-time value. We generate code to subtract
+ // one from the trip count, and update the loop instruction.
+ Register LoopCount = Loop->getOperand(1).getReg();
+ Register NewLoopCount = TII->createVR(MF, MVT::i32);
+ BuildMI(*Loop->getParent(), Loop, Loop->getDebugLoc(),
+ TII->get(Hexagon::A2_addi), NewLoopCount)
+ .addReg(LoopCount)
+ .addImm(TripCountAdjust);
+ Loop->getOperand(1).setReg(NewLoopCount);
+ }
+
+ void disposed() override { Loop->eraseFromParent(); }
+};
+} // namespace
+
+std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+HexagonInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
+ // We really "analyze" only hardware loops right now.
+ MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
+
+ if (I != LoopBB->end() && isEndLoopN(I->getOpcode())) {
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+ MachineInstr *LoopInst = findLoopInstr(
+ LoopBB, I->getOpcode(), I->getOperand(0).getMBB(), VisitedBBs);
+ if (LoopInst)
+ return std::make_unique<HexagonPipelinerLoopInfo>(LoopInst, &*I);
+ }
+ return nullptr;
}
bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
@@ -839,8 +849,8 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
if (Hexagon::HvxWRRegClass.contains(SrcReg, DestReg)) {
- unsigned LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
- unsigned HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
+ Register LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
+ Register HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg)
.addReg(HiSrc, KillFlag)
.addReg(LoSrc, KillFlag);
@@ -1017,7 +1027,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
auto RealCirc = [&](unsigned Opc, bool HasImm, unsigned MxOp) {
- unsigned Mx = MI.getOperand(MxOp).getReg();
+ Register Mx = MI.getOperand(MxOp).getReg();
unsigned CSx = (Mx == Hexagon::M0 ? Hexagon::CS0 : Hexagon::CS1);
BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrrcr), CSx)
.add(MI.getOperand((HasImm ? 5 : 4)));
@@ -1049,8 +1059,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MBB.erase(MI);
return true;
case Hexagon::V6_vassignp: {
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
unsigned Kill = getKillRegState(MI.getOperand(1).isKill());
BuildMI(MBB, MI, DL, get(Hexagon::V6_vcombine), DstReg)
.addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_hi), Kill)
@@ -1059,18 +1069,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case Hexagon::V6_lo: {
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI.getOperand(1).isKill());
MBB.erase(MI);
MRI.clearKillFlags(SrcSubLo);
return true;
}
case Hexagon::V6_hi: {
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI.getOperand(1).isKill());
MBB.erase(MI);
MRI.clearKillFlags(SrcSubHi);
@@ -1079,9 +1089,9 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case Hexagon::PS_vstorerw_ai:
case Hexagon::PS_vstorerwu_ai: {
bool Aligned = Opc == Hexagon::PS_vstorerw_ai;
- unsigned SrcReg = MI.getOperand(2).getReg();
- unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
- unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
+ Register SrcReg = MI.getOperand(2).getReg();
+ Register SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
+ Register SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
unsigned NewOpc = Aligned ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32Ub_ai;
unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass);
@@ -1103,7 +1113,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case Hexagon::PS_vloadrw_ai:
case Hexagon::PS_vloadrwu_ai: {
bool Aligned = Opc == Hexagon::PS_vloadrw_ai;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
unsigned NewOpc = Aligned ? Hexagon::V6_vL32b_ai : Hexagon::V6_vL32Ub_ai;
unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass);
@@ -1122,7 +1132,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case Hexagon::PS_true: {
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
.addReg(Reg, RegState::Undef)
.addReg(Reg, RegState::Undef);
@@ -1130,7 +1140,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case Hexagon::PS_false: {
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg)
.addReg(Reg, RegState::Undef)
.addReg(Reg, RegState::Undef);
@@ -1152,7 +1162,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case Hexagon::PS_vdd0: {
- unsigned Vd = MI.getOperand(0).getReg();
+ Register Vd = MI.getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(Hexagon::V6_vsubw_dv), Vd)
.addReg(Vd, RegState::Undef)
.addReg(Vd, RegState::Undef);
@@ -1161,13 +1171,13 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case Hexagon::PS_vmulw: {
// Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned Src1Reg = MI.getOperand(1).getReg();
- unsigned Src2Reg = MI.getOperand(2).getReg();
- unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi);
- unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo);
- unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi);
- unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src1Reg = MI.getOperand(1).getReg();
+ Register Src2Reg = MI.getOperand(2).getReg();
+ Register Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi);
+ Register Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo);
+ Register Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi);
+ Register Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo);
BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_mpyi),
HRI.getSubReg(DstReg, Hexagon::isub_hi))
.addReg(Src1SubHi)
@@ -1185,16 +1195,16 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case Hexagon::PS_vmulw_acc: {
// Expand 64-bit vector multiply with addition into 2 scalar multiplies.
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned Src1Reg = MI.getOperand(1).getReg();
- unsigned Src2Reg = MI.getOperand(2).getReg();
- unsigned Src3Reg = MI.getOperand(3).getReg();
- unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi);
- unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo);
- unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi);
- unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo);
- unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::isub_hi);
- unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::isub_lo);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src1Reg = MI.getOperand(1).getReg();
+ Register Src2Reg = MI.getOperand(2).getReg();
+ Register Src3Reg = MI.getOperand(3).getReg();
+ Register Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi);
+ Register Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo);
+ Register Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi);
+ Register Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo);
+ Register Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::isub_hi);
+ Register Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::isub_lo);
BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_maci),
HRI.getSubReg(DstReg, Hexagon::isub_hi))
.addReg(Src1SubHi)
@@ -1219,10 +1229,10 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
const MachineOperand &Op1 = MI.getOperand(1);
const MachineOperand &Op2 = MI.getOperand(2);
const MachineOperand &Op3 = MI.getOperand(3);
- unsigned Rd = Op0.getReg();
- unsigned Pu = Op1.getReg();
- unsigned Rs = Op2.getReg();
- unsigned Rt = Op3.getReg();
+ Register Rd = Op0.getReg();
+ Register Pu = Op1.getReg();
+ Register Rs = Op2.getReg();
+ Register Rt = Op3.getReg();
DebugLoc DL = MI.getDebugLoc();
unsigned K1 = getKillRegState(Op1.isKill());
unsigned K2 = getKillRegState(Op2.isKill());
@@ -1246,7 +1256,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
LivePhysRegs LiveAtMI(HRI);
getLiveRegsAt(LiveAtMI, MI);
bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
- unsigned PReg = Op1.getReg();
+ Register PReg = Op1.getReg();
assert(Op1.getSubReg() == 0);
unsigned PState = getRegState(Op1);
@@ -1280,15 +1290,15 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
LivePhysRegs LiveAtMI(HRI);
getLiveRegsAt(LiveAtMI, MI);
bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
- unsigned PReg = Op1.getReg();
+ Register PReg = Op1.getReg();
assert(Op1.getSubReg() == 0);
unsigned PState = getRegState(Op1);
if (Op0.getReg() != Op2.getReg()) {
unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill
: PState;
- unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo);
- unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi);
+ Register SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo);
+ Register SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi);
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine))
.add(Op0)
.addReg(PReg, S)
@@ -1299,8 +1309,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
IsDestLive = true;
}
if (Op0.getReg() != Op3.getReg()) {
- unsigned SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_lo);
- unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi);
+ Register SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_lo);
+ Register SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi);
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine))
.add(Op0)
.addReg(PReg, PState)
@@ -1856,8 +1866,7 @@ DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
// S2_storeri_io %r29, 132, killed %r1; flags: mem:ST4[FixedStack1]
// Currently AA considers the addresses in these instructions to be aliasing.
bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
- const MachineInstr &MIa, const MachineInstr &MIb,
- AliasAnalysis *AA) const {
+ const MachineInstr &MIa, const MachineInstr &MIb) const {
if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
return false;
@@ -1872,7 +1881,7 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
if (!getBaseAndOffsetPosition(MIa, BasePosA, OffsetPosA))
return false;
const MachineOperand &BaseA = MIa.getOperand(BasePosA);
- unsigned BaseRegA = BaseA.getReg();
+ Register BaseRegA = BaseA.getReg();
unsigned BaseSubA = BaseA.getSubReg();
// Get the base register in MIb.
@@ -1880,7 +1889,7 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
if (!getBaseAndOffsetPosition(MIb, BasePosB, OffsetPosB))
return false;
const MachineOperand &BaseB = MIb.getOperand(BasePosB);
- unsigned BaseRegB = BaseB.getReg();
+ Register BaseRegB = BaseB.getReg();
unsigned BaseSubB = BaseB.getSubReg();
if (BaseRegA != BaseRegB || BaseSubA != BaseSubB)
@@ -1984,7 +1993,7 @@ unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const {
llvm_unreachable("Cannot handle this register class");
}
- unsigned NewReg = MRI.createVirtualRegister(TRC);
+ Register NewReg = MRI.createVirtualRegister(TRC);
return NewReg;
}
@@ -2094,12 +2103,12 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI,
if (RegA == RegB)
return true;
- if (TargetRegisterInfo::isPhysicalRegister(RegA))
+ if (Register::isPhysicalRegister(RegA))
for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs)
if (RegB == *SubRegs)
return true;
- if (TargetRegisterInfo::isPhysicalRegister(RegB))
+ if (Register::isPhysicalRegister(RegB))
for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs)
if (RegA == *SubRegs)
return true;
@@ -2605,7 +2614,7 @@ bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1,
const MachineInstr &MI2) const {
if (mayBeCurLoad(MI1)) {
// if (result of SU is used in Next) return true;
- unsigned DstReg = MI1.getOperand(0).getReg();
+ Register DstReg = MI1.getOperand(0).getReg();
int N = MI2.getNumOperands();
for (int I = 0; I < N; I++)
if (MI2.getOperand(I).isReg() && DstReg == MI2.getOperand(I).getReg())
@@ -3374,7 +3383,7 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA,
if ((GA.getOpcode() != Hexagon::C2_cmpeqi) ||
(GB.getOpcode() != Hexagon::J2_jumptnew))
return -1u;
- unsigned DestReg = GA.getOperand(0).getReg();
+ Register DestReg = GA.getOperand(0).getReg();
if (!GB.readsRegister(DestReg))
return -1u;
if (DestReg != Hexagon::P0 && DestReg != Hexagon::P1)
@@ -4091,7 +4100,7 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// Get DefIdx and UseIdx for super registers.
const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
- if (DefMO.isReg() && HRI.isPhysicalRegister(DefMO.getReg())) {
+ if (DefMO.isReg() && Register::isPhysicalRegister(DefMO.getReg())) {
if (DefMO.isImplicit()) {
for (MCSuperRegIterator SR(DefMO.getReg(), &HRI); SR.isValid(); ++SR) {
int Idx = DefMI.findRegisterDefOperandIdx(*SR, false, false, &HRI);
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index e0a999d0f4c4..60298cd666bb 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -129,21 +129,10 @@ public:
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
- /// Analyze the loop code, return true if it cannot be understood. Upon
- /// success, this function returns false and returns information about the
- /// induction variable and compare instruction used at the end.
- bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
- MachineInstr *&CmpInst) const override;
-
- /// Generate code to reduce the loop iteration by one and check if the loop
- /// is finished. Return the value/register of the new loop count. We need
- /// this function when peeling off one or more iterations of a loop. This
- /// function assumes the nth iteration is peeled first.
- unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
- MachineInstr *IndVar, MachineInstr &Cmp,
- SmallVectorImpl<MachineOperand> &Cond,
- SmallVectorImpl<MachineInstr *> &PrevInsts,
- unsigned Iter, unsigned MaxIter) const override;
+ /// Analyze loop L, which must be a single-basic-block loop, and if the
+ /// conditions can be understood enough produce a PipelinerLoopInfo object.
+ std::unique_ptr<PipelinerLoopInfo>
+ analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
/// Return true if it's profitable to predicate
/// instructions with accumulated instruction latency of "NumCycles"
@@ -299,8 +288,7 @@ public:
// memory addresses and false otherwise.
bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA = nullptr) const override;
+ const MachineInstr &MIb) const override;
/// For instructions with a base and offset, return the position of the
/// base register and offset operands.
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index cabfd783effa..c5e3cfd080d6 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -22,14 +22,14 @@ class T_RP_pat <InstHexagon MI, Intrinsic IntID>
def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt),
(A2_add IntRegs:$Rs, IntRegs:$Rt)>;
-def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, imm:$s16),
+def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, timm:$s16),
(A2_addi IntRegs:$Rs, imm:$s16)>;
def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt),
(A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt),
(A2_sub IntRegs:$Rs, IntRegs:$Rt)>;
-def: Pat<(int_hexagon_A2_subri imm:$s10, IntRegs:$Rs),
+def: Pat<(int_hexagon_A2_subri timm:$s10, IntRegs:$Rs),
(A2_subri imm:$s10, IntRegs:$Rs)>;
def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt),
(A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
@@ -45,26 +45,26 @@ def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt),
def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt),
(M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>;
-def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, imm:$u5),
+def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, timm:$u5),
(S2_asl_i_r IntRegs:$Rs, imm:$u5)>;
-def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, imm:$u5),
+def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, timm:$u5),
(S2_lsr_i_r IntRegs:$Rs, imm:$u5)>;
-def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, imm:$u5),
+def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, timm:$u5),
(S2_asr_i_r IntRegs:$Rs, imm:$u5)>;
-def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, imm:$u6),
+def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, timm:$u6),
(S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>;
-def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, imm:$u6),
+def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, timm:$u6),
(S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>;
-def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, imm:$u6),
+def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, timm:$u6),
(S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>;
def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt),
(A2_and IntRegs:$Rs, IntRegs:$Rt)>;
-def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, imm:$s10),
+def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, timm:$s10),
(A2_andir IntRegs:$Rs, imm:$s10)>;
def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt),
(A2_or IntRegs:$Rs, IntRegs:$Rt)>;
-def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, imm:$s10),
+def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, timm:$s10),
(A2_orir IntRegs:$Rs, imm:$s10)>;
def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt),
(A2_xor IntRegs:$Rs, IntRegs:$Rt)>;
@@ -99,13 +99,13 @@ def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, (i32 0)),
(S2_vsathub I64:$Rs)>;
}
-def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred:$imm),
+def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred_timm:$imm),
(S2_asr_i_r_rnd I32:$Rs, (UDEC1 u5_0ImmPred:$imm))>;
-def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred:$imm),
+def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred_timm:$imm),
(S2_asr_i_p_rnd I64:$Rs, (UDEC1 u6_0ImmPred:$imm))>;
-def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
+def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm),
(S5_vasrhrnd I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
-def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
+def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm),
(S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
def ImmExt64: SDNodeXForm<imm, [{
@@ -121,13 +121,13 @@ def ImmExt64: SDNodeXForm<imm, [{
// To connect the builtin with the instruction, the builtin's operand
// needs to be extended to the right type.
-def : Pat<(int_hexagon_A2_tfrpi imm:$Is),
+def : Pat<(int_hexagon_A2_tfrpi timm:$Is),
(A2_tfrpi (ImmExt64 $Is))>;
-def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2),
+def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred_timm:$src2),
(C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>;
-def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred:$src2),
+def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred_timm:$src2),
(C2_tfrpr (C2_cmpgtui I32:$src1, (UDEC1 u32_0ImmPred:$src2)))>;
def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0),
@@ -142,7 +142,7 @@ def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2),
//===----------------------------------------------------------------------===//
class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst,
SDNodeXForm XformImm>
- : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred:$src3, u5_0ImmPred:$src4),
+ : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4),
(OutputInst I32:$src1, I32:$src2, u4_0ImmPred:$src3,
(XformImm u5_0ImmPred:$src4))>;
@@ -197,11 +197,11 @@ class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
: Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
(MI I32:$Rs, Imm:$s, I32:$Ru, Val:$Rt)>;
-def: T_stc_pat<S2_storerb_pci, int_hexagon_circ_stb, s4_0ImmPred, I32>;
-def: T_stc_pat<S2_storerh_pci, int_hexagon_circ_sth, s4_1ImmPred, I32>;
-def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw, s4_2ImmPred, I32>;
-def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std, s4_3ImmPred, I64>;
-def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
+def: T_stc_pat<S2_storerb_pci, int_hexagon_circ_stb, s4_0ImmPred_timm, I32>;
+def: T_stc_pat<S2_storerh_pci, int_hexagon_circ_sth, s4_1ImmPred_timm, I32>;
+def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw, s4_2ImmPred_timm, I32>;
+def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std, s4_3ImmPred_timm, I64>;
+def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred_timm, I32>;
multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> {
def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index ac48e1dc30b0..bda3eccac0cd 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -93,9 +93,9 @@ static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
cl::Hidden, cl::init(true),
cl::desc("Only enable generating memmove in non-nested loops"));
-cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
- cl::Hidden, cl::init(false),
- cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
+static cl::opt<bool> HexagonVolatileMemcpy(
+ "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false),
+ cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
@@ -632,9 +632,9 @@ Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
continue;
Value *IterV = PN->getIncomingValueForBlock(BB);
- if (!isa<BinaryOperator>(IterV))
- continue;
auto *BO = dyn_cast<BinaryOperator>(IterV);
+ if (!BO)
+ continue;
if (BO->getOpcode() != Instruction::Add)
continue;
Value *IncV = nullptr;
@@ -2020,7 +2020,7 @@ bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided load. If we have something else, it's a
// random load we can't handle.
- LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ auto *LI = cast<LoadInst>(SI->getValueOperand());
auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
// The trip count of the loop and the base pointer of the addrec SCEV is
@@ -2426,7 +2426,8 @@ bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
DL = &L->getHeader()->getModule()->getDataLayout();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent());
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
HasMemcpy = TLI->has(LibFunc_memcpy);
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index db44901ca706..680d01e12af0 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -177,7 +177,7 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
(II->getOperand(i).isUse() || II->getOperand(i).isDef())) {
MachineBasicBlock::iterator localII = II;
++localII;
- unsigned Reg = II->getOperand(i).getReg();
+ Register Reg = II->getOperand(i).getReg();
for (MachineBasicBlock::iterator localBegin = localII; localBegin != end;
++localBegin) {
if (localBegin == skip)
@@ -290,7 +290,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
// at machine code level, we don't need this, but if we decide
// to move new value jump prior to RA, we would be needing this.
MachineRegisterInfo &MRI = MF.getRegInfo();
- if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) {
+ if (secondReg && !Register::isPhysicalRegister(cmpOp2)) {
MachineInstr *def = MRI.getVRegDef(cmpOp2);
if (def->getOpcode() == TargetOpcode::COPY)
return false;
@@ -516,7 +516,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
jmpPos = MII;
jmpInstr = &MI;
predReg = MI.getOperand(0).getReg();
- afterRA = TargetRegisterInfo::isPhysicalRegister(predReg);
+ afterRA = Register::isPhysicalRegister(predReg);
// If ifconverter had not messed up with the kill flags of the
// operands, the following check on the kill flag would suffice.
@@ -603,7 +603,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
(isSecondOpReg &&
MI.getOperand(0).getReg() == (unsigned)cmpOp2))) {
- unsigned feederReg = MI.getOperand(0).getReg();
+ Register feederReg = MI.getOperand(0).getReg();
// First try to see if we can get the feeder from the first operand
// of the compare. If we can not, and if secondOpReg is true
@@ -651,7 +651,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned UseR = MO.getReg();
+ Register UseR = MO.getReg();
for (auto I = std::next(MI.getIterator()); I != jmpPos; ++I) {
if (I == cmpPos)
continue;
diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 547da9fd598f..9121115020a2 100644
--- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -162,7 +162,7 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
if (!OffsetOp.isImm() || OffsetOp.getImm() > 3)
return false;
- unsigned OffsetReg = MI.getOperand(2).getReg();
+ Register OffsetReg = MI.getOperand(2).getReg();
RegisterRef OffsetRR;
NodeId OffsetRegRD = 0;
for (NodeAddr<UseNode *> UA : AddAslSN.Addr->members_if(DFG->IsUse, *DFG)) {
@@ -348,7 +348,7 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN,
MachineInstr *AddMI,
const NodeList &UNodeList) {
- unsigned AddDefR = AddMI->getOperand(0).getReg();
+ Register AddDefR = AddMI->getOperand(0).getReg();
for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) {
NodeAddr<UseNode *> UN = *I;
NodeAddr<StmtNode *> SN = UN.Addr->getOwner(*DFG);
@@ -381,7 +381,7 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN,
// Ex: Rx= add(Rt,#10)
// memw(Rx+#0) = Rs
// will be replaced with => memw(Rt+#10) = Rs
- unsigned BaseReg = AddMI->getOperand(1).getReg();
+ Register BaseReg = AddMI->getOperand(1).getReg();
if (!isSafeToExtLR(AddSN, AddMI, BaseReg, UNodeList))
return false;
}
@@ -411,7 +411,7 @@ bool HexagonOptAddrMode::updateAddUses(MachineInstr *AddMI,
MachineInstr *UseMI) {
const MachineOperand ImmOp = AddMI->getOperand(2);
const MachineOperand AddRegOp = AddMI->getOperand(1);
- unsigned newReg = AddRegOp.getReg();
+ Register newReg = AddRegOp.getReg();
const MCInstrDesc &MID = UseMI->getDesc();
MachineOperand &BaseOp = MID.mayLoad() ? UseMI->getOperand(1)
@@ -543,7 +543,7 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
unsigned ImmOpNum) {
bool Changed = false;
- unsigned OpStart;
+ unsigned OpStart = 0;
unsigned OpEnd = OldMI->getNumOperands();
MachineBasicBlock *BB = OldMI->getParent();
auto UsePos = MachineBasicBlock::iterator(OldMI);
@@ -724,7 +724,7 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
}
short SizeInc = 0;
- unsigned DefR = MI->getOperand(0).getReg();
+ Register DefR = MI->getOperand(0).getReg();
InstrEvalMap InstrEvalResult;
// Analyze all uses and calculate increase in size. Perform the optimization
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index fb731f56bfbf..485e658e1c84 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -99,13 +99,21 @@ def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>;
def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>;
def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>;
+def SDTVecLeaf:
+ SDTypeProfile<1, 0, [SDTCisVec<0>]>;
def SDTVecVecIntOp:
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>,
SDTCisVT<3,i32>]>;
+def HexagonPTRUE: SDNode<"HexagonISD::PTRUE", SDTVecLeaf>;
+def HexagonPFALSE: SDNode<"HexagonISD::PFALSE", SDTVecLeaf>;
def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>;
def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>;
+def ptrue: PatFrag<(ops), (HexagonPTRUE)>;
+def pfalse: PatFrag<(ops), (HexagonPFALSE)>;
+def pnot: PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>;
+
def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru),
(HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>;
def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>;
@@ -154,6 +162,11 @@ def IsNPow2_64H: PatLeaf<(i64 imm), [{
return isPowerOf2_64(NV) && Log2_64(NV) >= 32;
}]>;
+class IsULE<int Width, int Arg>: PatLeaf<(i32 imm),
+ "uint64_t V = N->getZExtValue();" #
+ "return isUInt<" # Width # ">(V) && V <= " # Arg # ";"
+>;
+
class IsUGT<int Width, int Arg>: PatLeaf<(i32 imm),
"uint64_t V = N->getZExtValue();" #
"return isUInt<" # Width # ">(V) && V > " # Arg # ";"
@@ -320,6 +333,24 @@ multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val,
(InstB Val:$A, Val:$B)>;
}
+multiclass MinMax_pats<InstHexagon PickT, InstHexagon PickS,
+ PatFrag Sel, PatFrag CmpOp,
+ ValueType CmpType, PatFrag CmpPred> {
+ def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)),
+ CmpPred:$Vt, CmpPred:$Vs),
+ (PickT CmpPred:$Vs, CmpPred:$Vt)>;
+ def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)),
+ CmpPred:$Vs, CmpPred:$Vt),
+ (PickS CmpPred:$Vs, CmpPred:$Vt)>;
+}
+
+// Bitcasts between same-size vector types are no-ops, except for the
+// actual type change.
+multiclass NopCast_pat<ValueType Ty1, ValueType Ty2, RegisterClass RC> {
+ def: Pat<(Ty1 (bitconvert (Ty2 RC:$Val))), (Ty1 RC:$Val)>;
+ def: Pat<(Ty2 (bitconvert (Ty1 RC:$Val))), (Ty2 RC:$Val)>;
+}
+
// Frags for commonly used SDNodes.
def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
@@ -403,17 +434,18 @@ def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
-multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> {
- def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>;
- def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>;
-}
-
-// Bit convert vector types to integers.
-defm: Cast_pat<v4i8, i32, IntRegs>;
-defm: Cast_pat<v2i16, i32, IntRegs>;
-defm: Cast_pat<v8i8, i64, DoubleRegs>;
-defm: Cast_pat<v4i16, i64, DoubleRegs>;
-defm: Cast_pat<v2i32, i64, DoubleRegs>;
+// Bit convert 32- and 64-bit types.
+// All of these are bitcastable to one another: i32, v2i16, v4i8.
+defm: NopCast_pat<i32, v2i16, IntRegs>;
+defm: NopCast_pat<i32, v4i8, IntRegs>;
+defm: NopCast_pat<v2i16, v4i8, IntRegs>;
+// All of these are bitcastable to one another: i64, v2i32, v4i16, v8i8.
+defm: NopCast_pat<i64, v2i32, DoubleRegs>;
+defm: NopCast_pat<i64, v4i16, DoubleRegs>;
+defm: NopCast_pat<i64, v8i8, DoubleRegs>;
+defm: NopCast_pat<v2i32, v4i16, DoubleRegs>;
+defm: NopCast_pat<v2i32, v8i8, DoubleRegs>;
+defm: NopCast_pat<v4i16, v8i8, DoubleRegs>;
// --(3) Extend/truncate -------------------------------------------------
@@ -497,7 +529,9 @@ def: Pat<(v2i16 (trunc V2I32:$Rs)),
//
def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>;
-def: Pat<(not V8I1:$Ps), (C2_not V8I1:$Ps)>;
+def: Pat<(pnot V2I1:$Ps), (C2_not V2I1:$Ps)>;
+def: Pat<(pnot V4I1:$Ps), (C2_not V4I1:$Ps)>;
+def: Pat<(pnot V8I1:$Ps), (C2_not V8I1:$Ps)>;
def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>;
multiclass BoolOpR_RR_pat<InstHexagon MI, PatFrag Op> {
@@ -816,14 +850,6 @@ def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
(C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
-def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt),
- (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
-def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt),
- (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
-def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt),
- (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
- (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
-
def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt),
(C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt),
@@ -831,6 +857,14 @@ def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt),
def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt),
(C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
+def: Pat<(vselect (pnot V8I1:$Pu), V8I8:$Rs, V8I8:$Rt),
+ (C2_vmux V8I1:$Pu, V8I8:$Rt, V8I8:$Rs)>;
+def: Pat<(vselect (pnot V4I1:$Pu), V4I16:$Rs, V4I16:$Rt),
+ (C2_vmux V4I1:$Pu, V4I16:$Rt, V4I16:$Rs)>;
+def: Pat<(vselect (pnot V2I1:$Pu), V2I32:$Rs, V2I32:$Rt),
+ (C2_vmux V2I1:$Pu, V2I32:$Rt, V2I32:$Rs)>;
+
+
// From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw).
def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw),
(C2_or (C2_and I1:$Pu, I1:$Pv),
@@ -863,32 +897,44 @@ let AddedComplexity = 200 in {
}
let AddedComplexity = 200 in {
- defm: SelMinMax_pats<setge, I32, A2_max, A2_min>;
- defm: SelMinMax_pats<setgt, I32, A2_max, A2_min>;
- defm: SelMinMax_pats<setle, I32, A2_min, A2_max>;
- defm: SelMinMax_pats<setlt, I32, A2_min, A2_max>;
- defm: SelMinMax_pats<setuge, I32, A2_maxu, A2_minu>;
- defm: SelMinMax_pats<setugt, I32, A2_maxu, A2_minu>;
- defm: SelMinMax_pats<setule, I32, A2_minu, A2_maxu>;
- defm: SelMinMax_pats<setult, I32, A2_minu, A2_maxu>;
-
- defm: SelMinMax_pats<setge, I64, A2_maxp, A2_minp>;
- defm: SelMinMax_pats<setgt, I64, A2_maxp, A2_minp>;
- defm: SelMinMax_pats<setle, I64, A2_minp, A2_maxp>;
- defm: SelMinMax_pats<setlt, I64, A2_minp, A2_maxp>;
- defm: SelMinMax_pats<setuge, I64, A2_maxup, A2_minup>;
- defm: SelMinMax_pats<setugt, I64, A2_maxup, A2_minup>;
- defm: SelMinMax_pats<setule, I64, A2_minup, A2_maxup>;
- defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
+ defm: MinMax_pats<A2_min, A2_max, select, setgt, i1, I32>;
+ defm: MinMax_pats<A2_min, A2_max, select, setge, i1, I32>;
+ defm: MinMax_pats<A2_max, A2_min, select, setlt, i1, I32>;
+ defm: MinMax_pats<A2_max, A2_min, select, setle, i1, I32>;
+ defm: MinMax_pats<A2_minu, A2_maxu, select, setugt, i1, I32>;
+ defm: MinMax_pats<A2_minu, A2_maxu, select, setuge, i1, I32>;
+ defm: MinMax_pats<A2_maxu, A2_minu, select, setult, i1, I32>;
+ defm: MinMax_pats<A2_maxu, A2_minu, select, setule, i1, I32>;
+
+ defm: MinMax_pats<A2_minp, A2_maxp, select, setgt, i1, I64>;
+ defm: MinMax_pats<A2_minp, A2_maxp, select, setge, i1, I64>;
+ defm: MinMax_pats<A2_maxp, A2_minp, select, setlt, i1, I64>;
+ defm: MinMax_pats<A2_maxp, A2_minp, select, setle, i1, I64>;
+ defm: MinMax_pats<A2_minup, A2_maxup, select, setugt, i1, I64>;
+ defm: MinMax_pats<A2_minup, A2_maxup, select, setuge, i1, I64>;
+ defm: MinMax_pats<A2_maxup, A2_minup, select, setult, i1, I64>;
+ defm: MinMax_pats<A2_maxup, A2_minup, select, setule, i1, I64>;
}
let AddedComplexity = 100 in {
- defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
- defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
- defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
- defm: SelMinMax_pats<setoge, F32, F2_sfmax, F2_sfmin>;
-}
-
+ defm: MinMax_pats<F2_sfmin, F2_sfmax, select, setogt, i1, F32>;
+ defm: MinMax_pats<F2_sfmin, F2_sfmax, select, setoge, i1, F32>;
+ defm: MinMax_pats<F2_sfmax, F2_sfmin, select, setolt, i1, F32>;
+ defm: MinMax_pats<F2_sfmax, F2_sfmin, select, setole, i1, F32>;
+}
+
+defm: MinMax_pats<A2_vminb, A2_vmaxb, vselect, setgt, v8i1, V8I8>;
+defm: MinMax_pats<A2_vminb, A2_vmaxb, vselect, setge, v8i1, V8I8>;
+defm: MinMax_pats<A2_vminh, A2_vmaxh, vselect, setgt, v4i1, V4I16>;
+defm: MinMax_pats<A2_vminh, A2_vmaxh, vselect, setge, v4i1, V4I16>;
+defm: MinMax_pats<A2_vminw, A2_vmaxw, vselect, setgt, v2i1, V2I32>;
+defm: MinMax_pats<A2_vminw, A2_vmaxw, vselect, setge, v2i1, V2I32>;
+defm: MinMax_pats<A2_vminub, A2_vmaxub, vselect, setugt, v8i1, V8I8>;
+defm: MinMax_pats<A2_vminub, A2_vmaxub, vselect, setuge, v8i1, V8I8>;
+defm: MinMax_pats<A2_vminuh, A2_vmaxuh, vselect, setugt, v4i1, V4I16>;
+defm: MinMax_pats<A2_vminuh, A2_vmaxuh, vselect, setuge, v4i1, V4I16>;
+defm: MinMax_pats<A2_vminuw, A2_vmaxuw, vselect, setugt, v2i1, V2I32>;
+defm: MinMax_pats<A2_vminuw, A2_vmaxuw, vselect, setuge, v2i1, V2I32>;
// --(7) Insert/extract --------------------------------------------------
//
@@ -1639,19 +1685,19 @@ def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
//
// Count leading zeros.
-def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
// Count trailing zeros.
-def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>;
+def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
// Count leading ones.
-def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
// Count trailing ones.
-def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>;
+def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
// Define leading/trailing patterns that require zero-extensions to 64 bits.
@@ -1706,6 +1752,7 @@ let AddedComplexity = 20 in { // Complexity greater than and/or/xor
(i32 (LoReg $Rss)))>;
}
+
let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
(S2_tstbit_i IntRegs:$Rs, imm:$u5)>;
@@ -1717,6 +1764,20 @@ let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
(S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
}
+def: Pat<(and (srl I32:$Rs, u5_0ImmPred:$u5), 1),
+ (I1toI32 (S2_tstbit_i I32:$Rs, imm:$u5))>;
+def: Pat<(and (srl I64:$Rss, IsULE<32,31>:$u6), 1),
+ (ToZext64 (I1toI32 (S2_tstbit_i (LoReg $Rss), imm:$u6)))>;
+def: Pat<(and (srl I64:$Rss, IsUGT<32,31>:$u6), 1),
+ (ToZext64 (I1toI32 (S2_tstbit_i (HiReg $Rss), (UDEC32 $u6))))>;
+
+def: Pat<(and (not (srl I32:$Rs, u5_0ImmPred:$u5)), 1),
+ (I1toI32 (S4_ntstbit_i I32:$Rs, imm:$u5))>;
+def: Pat<(and (not (srl I64:$Rss, IsULE<32,31>:$u6)), 1),
+ (ToZext64 (I1toI32 (S4_ntstbit_i (LoReg $Rss), imm:$u6)))>;
+def: Pat<(and (not (srl I64:$Rss, IsUGT<32,31>:$u6)), 1),
+ (ToZext64 (I1toI32 (S4_ntstbit_i (HiReg $Rss), (UDEC32 $u6))))>;
+
let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)),
(C2_bitsclri IntRegs:$Rs, imm:$u6)>;
@@ -1737,23 +1798,28 @@ def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5),
def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt),
(S2_tstbit_r I32:$Rs, I32:$Rt)>;
+// Add extra complexity to prefer these instructions over bitsset/bitsclr.
+// The reason is that tstbit/ntstbit can be folded into a compound instruction:
+// if ([!]tstbit(...)) jump ...
let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
- def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
- (S4_ntstbit_i I32:$Rs, imm:$u5)>;
+ def: Pat<(i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)),
+ (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
+ def: Pat<(i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)),
+ (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)),
(S4_ntstbit_r I32:$Rs, I32:$Rt)>;
+ def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)),
+ (S2_tstbit_r I32:$Rs, I32:$Rt)>;
}
-// Add extra complexity to prefer these instructions over bitsset/bitsclr.
-// The reason is that tstbit/ntstbit can be folded into a compound instruction:
-// if ([!]tstbit(...)) jump ...
-let AddedComplexity = 100 in
-def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))),
- (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
-
-let AddedComplexity = 100 in
-def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))),
- (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
+def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64L:$u6), 0)),
+ (S4_ntstbit_i (LoReg $Rs), (Log2_64 $u6))>;
+def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64H:$u6), 0)),
+ (S4_ntstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 $u6))))>;
+def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64L:$u6), 0)),
+ (S2_tstbit_i (LoReg $Rs), (Log2_32 imm:$u6))>;
+def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64H:$u6), 0)),
+ (S2_tstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_32 imm:$u6))))>;
// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
// represented as a compare against "value & 0xFF", which is an exact match
@@ -1773,10 +1839,18 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
let AddedComplexity = 100 in {
// Avoid A4_rcmp[n]eqi in these cases:
+ def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
+ (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
(I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
+ def: Pat<(i32 (zext (i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)))),
+ (I1toI32 (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5)))>;
+ def: Pat<(i32 (zext (i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)))),
+ (I1toI32 (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5)))>;
def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
- (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
+ (I1toI32 (S4_ntstbit_r I32:$Rs, I32:$Rt))>;
+ def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
+ (I1toI32 (S2_tstbit_r I32:$Rs, I32:$Rt))>;
}
// --(11) PIC ------------------------------------------------------------
diff --git a/lib/Target/Hexagon/HexagonPatternsHVX.td b/lib/Target/Hexagon/HexagonPatternsHVX.td
index a4cfca9ac7d7..078a7135c55b 100644
--- a/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -1,5 +1,3 @@
-def SDTVecLeaf:
- SDTypeProfile<1, 0, [SDTCisVec<0>]>;
def SDTVecBinOp:
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>;
@@ -162,23 +160,14 @@ let Predicates = [UseHVX] in {
// Bitcasts between same-size vector types are no-ops, except for the
// actual type change.
-class Bitcast<ValueType ResTy, ValueType InpTy, RegisterClass RC>
- : Pat<(ResTy (bitconvert (InpTy RC:$Val))), (ResTy RC:$Val)>;
-
let Predicates = [UseHVX] in {
- def: Bitcast<VecI8, VecI16, HvxVR>;
- def: Bitcast<VecI8, VecI32, HvxVR>;
- def: Bitcast<VecI16, VecI8, HvxVR>;
- def: Bitcast<VecI16, VecI32, HvxVR>;
- def: Bitcast<VecI32, VecI8, HvxVR>;
- def: Bitcast<VecI32, VecI16, HvxVR>;
-
- def: Bitcast<VecPI8, VecPI16, HvxWR>;
- def: Bitcast<VecPI8, VecPI32, HvxWR>;
- def: Bitcast<VecPI16, VecPI8, HvxWR>;
- def: Bitcast<VecPI16, VecPI32, HvxWR>;
- def: Bitcast<VecPI32, VecPI8, HvxWR>;
- def: Bitcast<VecPI32, VecPI16, HvxWR>;
+ defm: NopCast_pat<VecI8, VecI16, HvxVR>;
+ defm: NopCast_pat<VecI8, VecI32, HvxVR>;
+ defm: NopCast_pat<VecI16, VecI32, HvxVR>;
+
+ defm: NopCast_pat<VecPI8, VecPI16, HvxWR>;
+ defm: NopCast_pat<VecPI8, VecPI32, HvxWR>;
+ defm: NopCast_pat<VecPI16, VecPI32, HvxWR>;
}
let Predicates = [UseHVX] in {
@@ -260,6 +249,21 @@ class Vnot<ValueType VecTy>
: PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
let Predicates = [UseHVX] in {
+ let AddedComplexity = 220 in {
+ defm: MinMax_pats<V6_vminb, V6_vmaxb, vselect, setgt, VecQ8, HVI8>;
+ defm: MinMax_pats<V6_vminb, V6_vmaxb, vselect, setge, VecQ8, HVI8>;
+ defm: MinMax_pats<V6_vminub, V6_vmaxub, vselect, setugt, VecQ8, HVI8>;
+ defm: MinMax_pats<V6_vminub, V6_vmaxub, vselect, setuge, VecQ8, HVI8>;
+ defm: MinMax_pats<V6_vminh, V6_vmaxh, vselect, setgt, VecQ16, HVI16>;
+ defm: MinMax_pats<V6_vminh, V6_vmaxh, vselect, setge, VecQ16, HVI16>;
+ defm: MinMax_pats<V6_vminuh, V6_vmaxuh, vselect, setugt, VecQ16, HVI16>;
+ defm: MinMax_pats<V6_vminuh, V6_vmaxuh, vselect, setuge, VecQ16, HVI16>;
+ defm: MinMax_pats<V6_vminw, V6_vmaxw, vselect, setgt, VecQ32, HVI32>;
+ defm: MinMax_pats<V6_vminw, V6_vmaxw, vselect, setge, VecQ32, HVI32>;
+ }
+}
+
+let Predicates = [UseHVX] in {
let AddedComplexity = 200 in {
def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>;
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 8f761d2d4805..0ccfe64ad1e5 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -136,11 +136,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
assert(MI.getNumOperands() == 2);
MachineOperand &Dst = MI.getOperand(0);
MachineOperand &Src = MI.getOperand(1);
- unsigned DstReg = Dst.getReg();
- unsigned SrcReg = Src.getReg();
+ Register DstReg = Dst.getReg();
+ Register SrcReg = Src.getReg();
// Just handle virtual registers.
- if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
- TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (Register::isVirtualRegister(DstReg) &&
+ Register::isVirtualRegister(SrcReg)) {
// Map the following:
// %170 = SXTW %166
// PeepholeMap[170] = %166
@@ -157,8 +157,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
MachineOperand &Src2 = MI.getOperand(2);
if (Src1.getImm() != 0)
continue;
- unsigned DstReg = Dst.getReg();
- unsigned SrcReg = Src2.getReg();
+ Register DstReg = Dst.getReg();
+ Register SrcReg = Src2.getReg();
PeepholeMap[DstReg] = SrcReg;
}
@@ -174,8 +174,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
MachineOperand &Src2 = MI.getOperand(2);
if (Src2.getImm() != 32)
continue;
- unsigned DstReg = Dst.getReg();
- unsigned SrcReg = Src1.getReg();
+ Register DstReg = Dst.getReg();
+ Register SrcReg = Src1.getReg();
PeepholeDoubleRegsMap[DstReg] =
std::make_pair(*&SrcReg, Hexagon::isub_hi);
}
@@ -185,11 +185,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
assert(MI.getNumOperands() == 2);
MachineOperand &Dst = MI.getOperand(0);
MachineOperand &Src = MI.getOperand(1);
- unsigned DstReg = Dst.getReg();
- unsigned SrcReg = Src.getReg();
+ Register DstReg = Dst.getReg();
+ Register SrcReg = Src.getReg();
// Just handle virtual registers.
- if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
- TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (Register::isVirtualRegister(DstReg) &&
+ Register::isVirtualRegister(SrcReg)) {
// Map the following:
// %170 = NOT_xx %166
// PeepholeMap[170] = %166
@@ -208,10 +208,10 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
if (Src.getSubReg() != Hexagon::isub_lo)
continue;
- unsigned DstReg = Dst.getReg();
- unsigned SrcReg = Src.getReg();
- if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
- TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ Register DstReg = Dst.getReg();
+ Register SrcReg = Src.getReg();
+ if (Register::isVirtualRegister(DstReg) &&
+ Register::isVirtualRegister(SrcReg)) {
// Try to find in the map.
if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
// Change the 1st operand.
@@ -237,12 +237,12 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
bool Done = false;
if (QII->isPredicated(MI)) {
MachineOperand &Op0 = MI.getOperand(0);
- unsigned Reg0 = Op0.getReg();
+ Register Reg0 = Op0.getReg();
const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0);
if (RC0->getID() == Hexagon::PredRegsRegClassID) {
// Handle instructions that have a prediate register in op0
// (most cases of predicable instructions).
- if (TargetRegisterInfo::isVirtualRegister(Reg0)) {
+ if (Register::isVirtualRegister(Reg0)) {
// Try to find in the map.
if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
// Change the 1st operand and, flip the opcode.
@@ -275,7 +275,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
break;
}
if (NewOp) {
- unsigned PSrc = MI.getOperand(PR).getReg();
+ Register PSrc = MI.getOperand(PR).getReg();
if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(),
QII->get(NewOp), MI.getOperand(0).getReg())
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 4f5f750e5842..b7171fb14272 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -217,7 +217,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If the offset is not valid, calculate the address in a temporary
// register and use it with offset 0.
auto &MRI = MF.getRegInfo();
- unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
const DebugLoc &DL = MI.getDebugLoc();
BuildMI(MB, II, DL, HII.get(Hexagon::A2_addi), TmpR)
.addReg(BP)
@@ -249,8 +249,8 @@ bool HexagonRegisterInfo::shouldCoalesce(MachineInstr *MI,
if (!SmallSrc && !SmallDst)
return true;
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
+ Register SrcReg = MI->getOperand(1).getReg();
const SlotIndexes &Indexes = *LIS.getSlotIndexes();
auto HasCall = [&Indexes] (const LiveInterval::Segment &S) {
for (SlotIndex I = S.start.getBaseIndex(), E = S.end.getBaseIndex();
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index bd4254aea276..f9fb14c190ff 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -76,18 +76,18 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
unsigned Opc = MI.getOpcode();
if (Opc == Hexagon::CONST32) {
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
uint64_t ImmValue = MI.getOperand(1).getImm();
const DebugLoc &DL = MI.getDebugLoc();
BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), DestReg)
.addImm(ImmValue);
B.erase(&MI);
} else if (Opc == Hexagon::CONST64) {
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
int64_t ImmValue = MI.getOperand(1).getImm();
const DebugLoc &DL = MI.getDebugLoc();
- unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::isub_lo);
- unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::isub_hi);
+ Register DestLo = TRI->getSubReg(DestReg, Hexagon::isub_lo);
+ Register DestHi = TRI->getSubReg(DestReg, Hexagon::isub_hi);
int32_t LowWord = (ImmValue & 0xFFFFFFFF);
int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF;
diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 013eede2d414..55f31c628854 100644
--- a/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -210,8 +210,8 @@ bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
for (auto &Op : MI->operands()) {
if (!Op.isReg())
continue;
- unsigned R = Op.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = Op.getReg();
+ if (!Register::isVirtualRegister(R))
return true;
}
return false;
@@ -224,14 +224,14 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
unsigned NumRegs = MRI->getNumVirtRegs();
BitVector DoubleRegs(NumRegs);
for (unsigned i = 0; i < NumRegs; ++i) {
- unsigned R = TargetRegisterInfo::index2VirtReg(i);
+ unsigned R = Register::index2VirtReg(i);
if (MRI->getRegClass(R) == DoubleRC)
DoubleRegs.set(i);
}
BitVector FixedRegs(NumRegs);
for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
- unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ unsigned R = Register::index2VirtReg(x);
MachineInstr *DefI = MRI->getVRegDef(R);
// In some cases a register may exist, but never be defined or used.
// It should never appear anywhere, but mark it as "fixed", just to be
@@ -244,7 +244,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
if (FixedRegs[x])
continue;
- unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ unsigned R = Register::index2VirtReg(x);
LLVM_DEBUG(dbgs() << printReg(R, TRI) << " ~~");
USet &Asc = AssocMap[R];
for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
@@ -258,14 +258,14 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
// Skip non-registers or registers with subregisters.
if (&MO == &Op || !MO.isReg() || MO.getSubReg())
continue;
- unsigned T = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(T)) {
+ Register T = MO.getReg();
+ if (!Register::isVirtualRegister(T)) {
FixedRegs.set(x);
continue;
}
if (MRI->getRegClass(T) != DoubleRC)
continue;
- unsigned u = TargetRegisterInfo::virtReg2Index(T);
+ unsigned u = Register::virtReg2Index(T);
if (FixedRegs[u])
continue;
LLVM_DEBUG(dbgs() << ' ' << printReg(T, TRI));
@@ -281,7 +281,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
unsigned NextP = 1;
USet Visited;
for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
- unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ unsigned R = Register::index2VirtReg(x);
if (Visited.count(R))
continue;
// Create a new partition for R.
@@ -372,8 +372,8 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
case Hexagon::A2_andp:
case Hexagon::A2_orp:
case Hexagon::A2_xorp: {
- unsigned Rs = MI->getOperand(1).getReg();
- unsigned Rt = MI->getOperand(2).getReg();
+ Register Rs = MI->getOperand(1).getReg();
+ Register Rt = MI->getOperand(2).getReg();
return profit(Rs) + profit(Rt);
}
@@ -400,7 +400,7 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
}
int32_t HexagonSplitDoubleRegs::profit(unsigned Reg) const {
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
const MachineInstr *DefI = MRI->getVRegDef(Reg);
switch (DefI->getOpcode()) {
@@ -499,7 +499,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
return;
assert(Cond[1].isReg() && "Unexpected Cond vector from analyzeBranch");
// Expect a predicate register.
- unsigned PR = Cond[1].getReg();
+ Register PR = Cond[1].getReg();
assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass);
// Get the registers on which the loop controlling compare instruction
@@ -535,7 +535,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
if (!MI.isPHI())
break;
const MachineOperand &MD = MI.getOperand(0);
- unsigned R = MD.getReg();
+ Register R = MD.getReg();
if (MRI->getRegClass(R) == DoubleRC)
DP.push_back(R);
}
@@ -551,7 +551,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
// Get the output from the add. If it is one of the inputs to the
// loop-controlling compare instruction, then R is likely an induc-
// tion register.
- unsigned T = UseI->getOperand(0).getReg();
+ Register T = UseI->getOperand(0).getReg();
if (T == CmpR1 || T == CmpR2)
return false;
}
@@ -603,9 +603,9 @@ void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
continue;
}
// For register operands, set the subregister.
- unsigned R = Op.getReg();
+ Register R = Op.getReg();
unsigned SR = Op.getSubReg();
- bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R);
+ bool isVirtReg = Register::isVirtualRegister(R);
bool isKill = Op.isKill();
if (isVirtReg && MRI->getRegClass(R) == DoubleRC) {
isKill = false;
@@ -674,7 +674,7 @@ void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
: MI->getOperand(2).getImm();
MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0);
const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg());
- unsigned NewR = MRI->createVirtualRegister(RC);
+ Register NewR = MRI->createVirtualRegister(RC);
assert(!UpdOp.getSubReg() && "Def operand with subreg");
BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR)
.addReg(AdrOp.getReg(), RSA)
@@ -789,8 +789,8 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
assert(F != PairMap.end());
const UUPair &P = F->second;
- unsigned LoR = P.first;
- unsigned HiR = P.second;
+ Register LoR = P.first;
+ Register HiR = P.second;
unsigned Opc = MI->getOpcode();
bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p);
@@ -813,7 +813,7 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
.addReg(Op1.getReg(), RS, HiSR);
} else if (S < 32) {
const TargetRegisterClass *IntRC = &IntRegsRegClass;
- unsigned TmpR = MRI->createVirtualRegister(IntRC);
+ Register TmpR = MRI->createVirtualRegister(IntRC);
// Expansion:
// Shift left: DR = shl R, #s
// LoR = shl R.lo, #s
@@ -953,12 +953,12 @@ void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
.addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
.addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
.addImm(S);
- unsigned TmpR1 = MRI->createVirtualRegister(IntRC);
+ Register TmpR1 = MRI->createVirtualRegister(IntRC);
BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1)
.addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
.addImm(S)
.addImm(32-S);
- unsigned TmpR2 = MRI->createVirtualRegister(IntRC);
+ Register TmpR2 = MRI->createVirtualRegister(IntRC);
BuildMI(B, MI, DL, TII->get(A2_or), TmpR2)
.addReg(Op1.getReg(), RS1, HiSR)
.addReg(TmpR1);
@@ -1002,7 +1002,7 @@ bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
switch (Opc) {
case TargetOpcode::PHI:
case TargetOpcode::COPY: {
- unsigned DstR = MI->getOperand(0).getReg();
+ Register DstR = MI->getOperand(0).getReg();
if (MRI->getRegClass(DstR) == DoubleRC) {
createHalfInstr(Opc, MI, PairMap, isub_lo);
createHalfInstr(Opc, MI, PairMap, isub_hi);
@@ -1079,7 +1079,7 @@ void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
for (auto &Op : MI->operands()) {
if (!Op.isReg() || !Op.isUse() || !Op.getSubReg())
continue;
- unsigned R = Op.getReg();
+ Register R = Op.getReg();
UUPairMap::const_iterator F = PairMap.find(R);
if (F == PairMap.end())
continue;
@@ -1104,8 +1104,8 @@ void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
for (auto &Op : MI->operands()) {
if (!Op.isReg() || !Op.isUse())
continue;
- unsigned R = Op.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(R))
+ Register R = Op.getReg();
+ if (!Register::isVirtualRegister(R))
continue;
if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg())
continue;
@@ -1113,7 +1113,7 @@ void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
if (F == PairMap.end())
continue;
const UUPair &Pr = F->second;
- unsigned NewDR = MRI->createVirtualRegister(DoubleRC);
+ Register NewDR = MRI->createVirtualRegister(DoubleRC);
BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR)
.addReg(Pr.first)
.addImm(Hexagon::isub_lo)
@@ -1145,8 +1145,8 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
U != W; ++U)
SplitIns.insert(U->getParent());
- unsigned LoR = MRI->createVirtualRegister(IntRC);
- unsigned HiR = MRI->createVirtualRegister(IntRC);
+ Register LoR = MRI->createVirtualRegister(IntRC);
+ Register HiR = MRI->createVirtualRegister(IntRC);
LLVM_DEBUG(dbgs() << "Created mapping: " << printReg(DR, TRI) << " -> "
<< printReg(HiR, TRI) << ':' << printReg(LoR, TRI)
<< '\n');
diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp
index b8b61517ff95..27fefa5f5e2b 100644
--- a/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -441,7 +441,7 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
// Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
- unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
+ Register VReg = MF->getRegInfo().createVirtualRegister(RC);
MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg)
.addImm(int(Acc));
NG.push_back(TfrI);
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 7ec63a642b0c..6c706fea096b 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -119,7 +119,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
FeatureBitset Features = getFeatureBits();
if (HexagonDisableDuplex)
- setFeatureBits(Features.set(Hexagon::FeatureDuplex, false));
+ setFeatureBits(Features.reset(Hexagon::FeatureDuplex));
setFeatureBits(Hexagon_MC::completeHVXFeatures(Features));
return *this;
@@ -230,7 +230,7 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
else if (SchedRetvalOptimization) {
const MachineInstr *MI = DAG->SUnits[su].getInstr();
if (MI->isCopy() &&
- TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
+ Register::isPhysicalRegister(MI->getOperand(1).getReg())) {
// %vregX = COPY %r0
VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
LastVRegUse.erase(MI->getOperand(1).getReg());
@@ -243,8 +243,7 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
VRegHoldingReg.count(MO.getReg())) {
// <use of %vregX>
LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
- } else if (MO.isDef() &&
- TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ } else if (MO.isDef() && Register::isPhysicalRegister(MO.getReg())) {
for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
++AI) {
if (LastVRegUse.count(*AI) &&
@@ -345,7 +344,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
// If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
// the correct latency.
if ((DstInst->isRegSequence() || DstInst->isCopy()) && Dst->NumSuccs == 1) {
- unsigned DReg = DstInst->getOperand(0).getReg();
+ Register DReg = DstInst->getOperand(0).getReg();
MachineInstr *DDst = Dst->Succs[0].getSUnit()->getInstr();
unsigned UseIdx = -1;
for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
@@ -375,15 +374,15 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
void HexagonSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
- Mutations.push_back(llvm::make_unique<UsrOverflowMutation>());
- Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>());
- Mutations.push_back(llvm::make_unique<BankConflictMutation>());
+ Mutations.push_back(std::make_unique<UsrOverflowMutation>());
+ Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
+ Mutations.push_back(std::make_unique<BankConflictMutation>());
}
void HexagonSubtarget::getSMSMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
- Mutations.push_back(llvm::make_unique<UsrOverflowMutation>());
- Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>());
+ Mutations.push_back(std::make_unique<UsrOverflowMutation>());
+ Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
}
// Pin the vtable to this file.
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 007423ef1902..31157a0065d9 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -228,7 +228,7 @@ public:
}
bool isHVXVectorType(MVT VecTy, bool IncludeBool = false) const {
- if (!VecTy.isVector() || !useHVXOps())
+ if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
return false;
MVT ElemTy = VecTy.getVectorElementType();
if (!IncludeBool && ElemTy == MVT::i1)
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 80b8480448fe..d709a82be660 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -111,10 +111,10 @@ int HexagonTargetMachineModule = 0;
static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
- new VLIWMachineScheduler(C, make_unique<ConvergingVLIWScheduler>());
- DAG->addMutation(make_unique<HexagonSubtarget::UsrOverflowMutation>());
- DAG->addMutation(make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
- DAG->addMutation(make_unique<HexagonSubtarget::CallMutation>());
+ new VLIWMachineScheduler(C, std::make_unique<ConvergingVLIWScheduler>());
+ DAG->addMutation(std::make_unique<HexagonSubtarget::UsrOverflowMutation>());
+ DAG->addMutation(std::make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
+ DAG->addMutation(std::make_unique<HexagonSubtarget::CallMutation>());
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
@@ -218,7 +218,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small),
(HexagonNoOpt ? CodeGenOpt::None : OL)),
- TLOF(make_unique<HexagonTargetObjectFile>()) {
+ TLOF(std::make_unique<HexagonTargetObjectFile>()) {
initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
initAsmInfo();
}
@@ -244,7 +244,7 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<HexagonSubtarget>(TargetTriple, CPU, FS, *this);
+ I = std::make_unique<HexagonSubtarget>(TargetTriple, CPU, FS, *this);
}
return I.get();
}
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 38062e8e922c..ddbc5543348d 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -45,6 +45,8 @@ bool HexagonTTIImpl::useHVX() const {
bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
assert(VecTy->isVectorTy());
+ if (cast<VectorType>(VecTy)->isScalable())
+ return false;
// Avoid types like <2 x i32*>.
if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
return false;
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 27e8fc019007..12ede503af83 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -68,8 +68,8 @@ public:
bool shouldFavorPostInc() const;
// L1 cache prefetch.
- unsigned getPrefetchDistance() const;
- unsigned getCacheLineSize() const;
+ unsigned getPrefetchDistance() const override;
+ unsigned getCacheLineSize() const override;
/// @}
diff --git a/lib/Target/Hexagon/HexagonVExtract.cpp b/lib/Target/Hexagon/HexagonVExtract.cpp
index a9692f42e468..0c0266a6839a 100644
--- a/lib/Target/Hexagon/HexagonVExtract.cpp
+++ b/lib/Target/Hexagon/HexagonVExtract.cpp
@@ -67,9 +67,9 @@ unsigned HexagonVExtract::genElemLoad(MachineInstr *ExtI, unsigned BaseR,
MachineRegisterInfo &MRI) {
MachineBasicBlock &ExtB = *ExtI->getParent();
DebugLoc DL = ExtI->getDebugLoc();
- unsigned ElemR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register ElemR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- unsigned ExtIdxR = ExtI->getOperand(2).getReg();
+ Register ExtIdxR = ExtI->getOperand(2).getReg();
unsigned ExtIdxS = ExtI->getOperand(2).getSubReg();
// Simplified check for a compile-time constant value of ExtIdxR.
@@ -86,7 +86,7 @@ unsigned HexagonVExtract::genElemLoad(MachineInstr *ExtI, unsigned BaseR,
}
}
- unsigned IdxR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register IdxR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::A2_andir), IdxR)
.add(ExtI->getOperand(2))
.addImm(-4);
@@ -111,7 +111,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
unsigned Opc = MI.getOpcode();
if (Opc != Hexagon::V6_extractw)
continue;
- unsigned VecR = MI.getOperand(1).getReg();
+ Register VecR = MI.getOperand(1).getReg();
VExtractMap[VecR].push_back(&MI);
}
}
@@ -144,13 +144,13 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock &ExtB = *ExtI->getParent();
DebugLoc DL = ExtI->getDebugLoc();
- unsigned BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ Register BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::PS_fi), BaseR)
.addFrameIndex(FI)
.addImm(SR == 0 ? 0 : VecSize/2);
unsigned ElemR = genElemLoad(ExtI, BaseR, MRI);
- unsigned ExtR = ExtI->getOperand(0).getReg();
+ Register ExtR = ExtI->getOperand(0).getReg();
MRI.replaceRegWith(ExtR, ElemR);
ExtB.erase(ExtI);
Changed = true;
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 3619e4c239d7..fab5edefb553 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -57,9 +58,9 @@ static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Hexagon packetizer pass"));
-cl::opt<bool> Slot1Store("slot1-store-slot0-load", cl::Hidden,
- cl::ZeroOrMore, cl::init(true),
- cl::desc("Allow slot1 store and slot0 load"));
+static cl::opt<bool> Slot1Store("slot1-store-slot0-load", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true),
+ cl::desc("Allow slot1 store and slot0 load"));
static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
cl::ZeroOrMore, cl::Hidden, cl::init(true),
@@ -129,16 +130,16 @@ INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
"Hexagon Packetizer", false, false)
HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
- MachineLoopInfo &MLI, AliasAnalysis *AA,
+ MachineLoopInfo &MLI, AAResults *AA,
const MachineBranchProbabilityInfo *MBPI, bool Minimal)
: VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI),
Minimal(Minimal) {
HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
- addMutation(llvm::make_unique<HexagonSubtarget::UsrOverflowMutation>());
- addMutation(llvm::make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
- addMutation(llvm::make_unique<HexagonSubtarget::BankConflictMutation>());
+ addMutation(std::make_unique<HexagonSubtarget::UsrOverflowMutation>());
+ addMutation(std::make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
+ addMutation(std::make_unique<HexagonSubtarget::BankConflictMutation>());
}
// Check if FirstI modifies a register that SecondI reads.
@@ -148,7 +149,7 @@ static bool hasWriteToReadDep(const MachineInstr &FirstI,
for (auto &MO : FirstI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned R = MO.getReg();
+ Register R = MO.getReg();
if (SecondI.readsRegister(R, TRI))
return true;
}
@@ -422,7 +423,7 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI,
dbgs() << "Checking CUR against ";
MJ.dump();
});
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
bool FoundMatch = false;
for (auto &MO : MJ.operands())
if (MO.isReg() && MO.getReg() == DestReg)
@@ -515,7 +516,7 @@ bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) {
unsigned BPJ, OPJ;
if (!HII->getBaseAndOffsetPosition(MJ, BPJ, OPJ))
return false;
- unsigned Reg = MI.getOperand(BPI).getReg();
+ Register Reg = MI.getOperand(BPI).getReg();
if (Reg != MJ.getOperand(BPJ).getReg())
return false;
// Make sure that the dependences do not restrict adding MI to the packet.
@@ -788,7 +789,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
return false;
if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
continue;
- unsigned R = MO.getReg();
+ Register R = MO.getReg();
if (R == DepReg || HRI->isSuperRegister(DepReg, R))
return false;
}
@@ -1208,7 +1209,7 @@ bool HexagonPacketizerList::hasDeadDependence(const MachineInstr &I,
for (auto &MO : J.operands()) {
if (!MO.isReg() || !MO.isDef() || !MO.isDead())
continue;
- unsigned R = MO.getReg();
+ Register R = MO.getReg();
if (R != Hexagon::USR_OVF && DeadDefs[R])
return true;
}
@@ -1585,7 +1586,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// subset of the volatile register set.
for (const MachineOperand &Op : I.operands()) {
if (Op.isReg() && Op.isDef()) {
- unsigned R = Op.getReg();
+ Register R = Op.getReg();
if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI))
continue;
} else if (!Op.isRegMask()) {
@@ -1763,6 +1764,16 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
MachineBasicBlock::iterator EndMI) {
// Replace VLIWPacketizerList::endPacket(MBB, EndMI).
+ LLVM_DEBUG({
+ if (!CurrentPacketMIs.empty()) {
+ dbgs() << "Finalizing packet:\n";
+ unsigned Idx = 0;
+ for (MachineInstr *MI : CurrentPacketMIs) {
+ unsigned R = ResourceTracker->getUsedResources(Idx++);
+ dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
+ }
+ }
+ });
bool memShufDisabled = getmemShufDisabled();
if (memShufDisabled && !foundLSInPacket()) {
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index daa86b6f5393..943b9ac7ecc4 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -69,8 +69,7 @@ private:
public:
HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- AliasAnalysis *AA,
- const MachineBranchProbabilityInfo *MBPI,
+ AAResults *AA, const MachineBranchProbabilityInfo *MBPI,
bool Minimal);
// initPacketizerState - initialize some internal flags.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 7c0770926abe..75cb398d4097 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -201,9 +201,7 @@ public:
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override {
- MCFixupKind Kind = Fixup.getKind();
-
- switch((unsigned)Kind) {
+ switch(Fixup.getTargetKind()) {
default:
llvm_unreachable("Unknown Fixup Kind!");
@@ -583,7 +581,7 @@ public:
return false;
// If we cannot resolve the fixup value, it requires relaxation.
if (!Resolved) {
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
case fixup_Hexagon_B22_PCREL:
// GetFixupCount assumes B22 won't relax
LLVM_FALLTHROUGH;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index f678bf49322e..cdbeae38b3a1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -44,7 +44,7 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx,
MCFixup const &Fixup,
bool IsPCRel) const {
MCSymbolRefExpr::VariantKind Variant = Target.getAccessVariant();
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default:
report_fatal_error("Unrecognized relocation type");
break;
@@ -299,5 +299,5 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx,
std::unique_ptr<MCObjectTargetWriter>
llvm::createHexagonELFObjectWriter(uint8_t OSABI, StringRef CPU) {
- return llvm::make_unique<HexagonELFObjectWriter>(OSABI, CPU);
+ return std::make_unique<HexagonELFObjectWriter>(OSABI, CPU);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index fcd3758600c1..8b262bd0248e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -726,9 +726,6 @@ void HexagonMCChecker::reportNote(SMLoc Loc, llvm::Twine const &Msg) {
}
void HexagonMCChecker::reportWarning(Twine const &Msg) {
- if (ReportErrors) {
- auto SM = Context.getSourceManager();
- if (SM)
- SM->PrintMessage(MCB.getLoc(), SourceMgr::DK_Warning, Msg);
- }
+ if (ReportErrors)
+ Context.reportWarning(MCB.getLoc(), Msg);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index f2432883af6f..a799f7f7c0b9 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -116,8 +116,8 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
}
// Update the maximum alignment of the section if necessary.
- if (ByteAlignment > Section.getAlignment())
- Section.setAlignment(ByteAlignment);
+ if (Align(ByteAlignment) > Section.getAlignment())
+ Section.setAlignment(Align(ByteAlignment));
SwitchSection(P.first, P.second);
} else {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 9c50b25156c3..870ab9e94a63 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -72,7 +72,6 @@ cl::opt<bool> MV65("mv65", cl::Hidden, cl::desc("Build for Hexagon V65"),
cl::init(false));
cl::opt<bool> MV66("mv66", cl::Hidden, cl::desc("Build for Hexagon V66"),
cl::init(false));
-} // namespace
cl::opt<Hexagon::ArchEnum>
EnableHVX("mhvx",
@@ -86,6 +85,7 @@ cl::opt<Hexagon::ArchEnum>
clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
// Sentinel for flag not present.
cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional);
+} // namespace
static cl::opt<bool>
DisableHVX("mno-hvx", cl::Hidden,
@@ -264,14 +264,12 @@ createHexagonObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
}
static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) {
- uint64_t FB = STI->getFeatureBits().to_ullong();
- if (FB & (1ULL << F))
+ if (STI->getFeatureBits()[F])
STI->ToggleFeature(F);
}
static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) {
- uint64_t FB = STI->getFeatureBits().to_ullong();
- return (FB & (1ULL << F)) != 0;
+ return STI->getFeatureBits()[F];
}
namespace {
@@ -398,7 +396,7 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS);
if (HexagonDisableDuplex) {
llvm::FeatureBitset Features = X->getFeatureBits();
- X->setFeatureBits(Features.set(Hexagon::FeatureDuplex, false));
+ X->setFeatureBits(Features.reset(Hexagon::FeatureDuplex));
}
X->setFeatureBits(completeHVXFeatures(X->getFeatureBits()));
diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp
index 7702024f87bd..a9d39fd4b2dc 100644
--- a/lib/Target/Hexagon/RDFCopy.cpp
+++ b/lib/Target/Hexagon/RDFCopy.cpp
@@ -45,8 +45,8 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
const MachineOperand &Src = MI->getOperand(1);
RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg());
RegisterRef SrcR = DFG.makeRegRef(Src.getReg(), Src.getSubReg());
- assert(TargetRegisterInfo::isPhysicalRegister(DstR.Reg));
- assert(TargetRegisterInfo::isPhysicalRegister(SrcR.Reg));
+ assert(Register::isPhysicalRegister(DstR.Reg));
+ assert(Register::isPhysicalRegister(SrcR.Reg));
const TargetRegisterInfo &TRI = DFG.getTRI();
if (TRI.getMinimalPhysRegClass(DstR.Reg) !=
TRI.getMinimalPhysRegClass(SrcR.Reg))
diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp
index 52178931aa6d..af86c7b1956b 100644
--- a/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
#include <queue>
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index 9d8f706b8a0f..0cb35dc98819 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -633,7 +633,7 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
// uses or defs, and those lists do not allow sub-registers.
if (Op.getSubReg() != 0)
return false;
- RegisterId Reg = Op.getReg();
+ Register Reg = Op.getReg();
const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs()
: D.getImplicitUses();
if (!ImpR)
@@ -963,7 +963,7 @@ void DataFlowGraph::build(unsigned Options) {
RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const {
assert(PhysicalRegisterInfo::isRegMaskId(Reg) ||
- TargetRegisterInfo::isPhysicalRegister(Reg));
+ Register::isPhysicalRegister(Reg));
assert(Reg != 0);
if (Sub != 0)
Reg = TRI.getSubReg(Reg, Sub);
@@ -1291,8 +1291,8 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
continue;
- unsigned R = Op.getReg();
- if (!R || !TargetRegisterInfo::isPhysicalRegister(R))
+ Register R = Op.getReg();
+ if (!R || !Register::isPhysicalRegister(R))
continue;
uint16_t Flags = NodeAttrs::None;
if (TOI.isPreserving(In, OpN)) {
@@ -1336,8 +1336,8 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
continue;
- unsigned R = Op.getReg();
- if (!R || !TargetRegisterInfo::isPhysicalRegister(R) || DoneDefs.test(R))
+ Register R = Op.getReg();
+ if (!R || !Register::isPhysicalRegister(R) || DoneDefs.test(R))
continue;
RegisterRef RR = makeRegRef(Op);
uint16_t Flags = NodeAttrs::None;
@@ -1365,8 +1365,8 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isReg() || !Op.isUse())
continue;
- unsigned R = Op.getReg();
- if (!R || !TargetRegisterInfo::isPhysicalRegister(R))
+ Register R = Op.getReg();
+ if (!R || !Register::isPhysicalRegister(R))
continue;
uint16_t Flags = NodeAttrs::None;
if (Op.isUndef())
diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp
index 9cd304aa10bc..7d7b89462ff9 100644
--- a/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/lib/Target/Hexagon/RDFLiveness.cpp
@@ -889,8 +889,8 @@ void Liveness::resetKills(MachineBasicBlock *B) {
// implicit defs.
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
continue;
- unsigned R = Op.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(R))
+ Register R = Op.getReg();
+ if (!Register::isPhysicalRegister(R))
continue;
for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
Live.reset(*SR);
@@ -898,8 +898,8 @@ void Liveness::resetKills(MachineBasicBlock *B) {
for (auto &Op : MI->operands()) {
if (!Op.isReg() || !Op.isUse() || Op.isUndef())
continue;
- unsigned R = Op.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(R))
+ Register R = Op.getReg();
+ if (!Register::isPhysicalRegister(R))
continue;
bool IsLive = false;
for (MCRegAliasIterator AR(R, &TRI, true); AR.isValid(); ++AR) {
diff --git a/lib/Target/Hexagon/RDFRegisters.cpp b/lib/Target/Hexagon/RDFRegisters.cpp
index 6e0f33695f0e..b5675784e34b 100644
--- a/lib/Target/Hexagon/RDFRegisters.cpp
+++ b/lib/Target/Hexagon/RDFRegisters.cpp
@@ -101,7 +101,7 @@ RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const {
std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const {
// Do not include RR in the alias set.
std::set<RegisterId> AS;
- assert(isRegMaskId(Reg) || TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(isRegMaskId(Reg) || Register::isPhysicalRegister(Reg));
if (isRegMaskId(Reg)) {
// XXX SLOW
const uint32_t *MB = getRegMaskBits(Reg);
@@ -129,8 +129,8 @@ std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const {
}
bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const {
- assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg));
- assert(TargetRegisterInfo::isPhysicalRegister(RB.Reg));
+ assert(Register::isPhysicalRegister(RA.Reg));
+ assert(Register::isPhysicalRegister(RB.Reg));
MCRegUnitMaskIterator UMA(RA.Reg, &TRI);
MCRegUnitMaskIterator UMB(RB.Reg, &TRI);
@@ -160,7 +160,7 @@ bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const {
}
bool PhysicalRegisterInfo::aliasRM(RegisterRef RR, RegisterRef RM) const {
- assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg));
+ assert(Register::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg));
const uint32_t *MB = getRegMaskBits(RM.Reg);
bool Preserved = MB[RR.Reg/32] & (1u << (RR.Reg%32));
// If the lane mask information is "full", e.g. when the given lane mask
diff --git a/lib/Target/Hexagon/RDFRegisters.h b/lib/Target/Hexagon/RDFRegisters.h
index 646233bacda5..4afaf80e4659 100644
--- a/lib/Target/Hexagon/RDFRegisters.h
+++ b/lib/Target/Hexagon/RDFRegisters.h
@@ -99,15 +99,15 @@ namespace rdf {
const MachineFunction &mf);
static bool isRegMaskId(RegisterId R) {
- return TargetRegisterInfo::isStackSlot(R);
+ return Register::isStackSlot(R);
}
RegisterId getRegMaskId(const uint32_t *RM) const {
- return TargetRegisterInfo::index2StackSlot(RegMasks.find(RM));
+ return Register::index2StackSlot(RegMasks.find(RM));
}
const uint32_t *getRegMaskBits(RegisterId R) const {
- return RegMasks.get(TargetRegisterInfo::stackSlot2Index(R));
+ return RegMasks.get(Register::stackSlot2Index(R));
}
RegisterRef normalize(RegisterRef RR) const;
@@ -125,7 +125,7 @@ namespace rdf {
}
const BitVector &getMaskUnits(RegisterId MaskId) const {
- return MaskInfos[TargetRegisterInfo::stackSlot2Index(MaskId)].Units;
+ return MaskInfos[Register::stackSlot2Index(MaskId)].Units;
}
RegisterRef mapTo(RegisterRef RR, unsigned R) const;
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 9af8a0b35b2f..ec82e3a41f2a 100644
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -469,13 +469,14 @@ public:
else if (isa<LanaiMCExpr>(getImm())) {
#ifndef NDEBUG
const LanaiMCExpr *SymbolRefExpr = dyn_cast<LanaiMCExpr>(getImm());
- assert(SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_LO);
+ assert(SymbolRefExpr &&
+ SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_LO);
#endif
Inst.addOperand(MCOperand::createExpr(getImm()));
} else if (isa<MCBinaryExpr>(getImm())) {
#ifndef NDEBUG
const MCBinaryExpr *BinaryExpr = dyn_cast<MCBinaryExpr>(getImm());
- assert(isa<LanaiMCExpr>(BinaryExpr->getLHS()) &&
+ assert(BinaryExpr && isa<LanaiMCExpr>(BinaryExpr->getLHS()) &&
cast<LanaiMCExpr>(BinaryExpr->getLHS())->getKind() ==
LanaiMCExpr::VK_Lanai_ABS_LO);
#endif
@@ -499,13 +500,14 @@ public:
else if (isa<LanaiMCExpr>(getImm())) {
#ifndef NDEBUG
const LanaiMCExpr *SymbolRefExpr = dyn_cast<LanaiMCExpr>(getImm());
- assert(SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_HI);
+ assert(SymbolRefExpr &&
+ SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_HI);
#endif
Inst.addOperand(MCOperand::createExpr(getImm()));
} else if (isa<MCBinaryExpr>(getImm())) {
#ifndef NDEBUG
const MCBinaryExpr *BinaryExpr = dyn_cast<MCBinaryExpr>(getImm());
- assert(isa<LanaiMCExpr>(BinaryExpr->getLHS()) &&
+ assert(BinaryExpr && isa<LanaiMCExpr>(BinaryExpr->getLHS()) &&
cast<LanaiMCExpr>(BinaryExpr->getLHS())->getKind() ==
LanaiMCExpr::VK_Lanai_ABS_HI);
#endif
@@ -544,10 +546,9 @@ public:
} else if (isa<MCBinaryExpr>(getImm())) {
#ifndef NDEBUG
const MCBinaryExpr *BinaryExpr = dyn_cast<MCBinaryExpr>(getImm());
- const LanaiMCExpr *SymbolRefExpr =
- dyn_cast<LanaiMCExpr>(BinaryExpr->getLHS());
- assert(SymbolRefExpr &&
- SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_None);
+ assert(BinaryExpr && isa<LanaiMCExpr>(BinaryExpr->getLHS()) &&
+ cast<LanaiMCExpr>(BinaryExpr->getLHS())->getKind() ==
+ LanaiMCExpr::VK_Lanai_None);
#endif
Inst.addOperand(MCOperand::createExpr(getImm()));
} else
@@ -580,7 +581,7 @@ public:
}
static std::unique_ptr<LanaiOperand> CreateToken(StringRef Str, SMLoc Start) {
- auto Op = make_unique<LanaiOperand>(TOKEN);
+ auto Op = std::make_unique<LanaiOperand>(TOKEN);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = Start;
@@ -590,7 +591,7 @@ public:
static std::unique_ptr<LanaiOperand> createReg(unsigned RegNum, SMLoc Start,
SMLoc End) {
- auto Op = make_unique<LanaiOperand>(REGISTER);
+ auto Op = std::make_unique<LanaiOperand>(REGISTER);
Op->Reg.RegNum = RegNum;
Op->StartLoc = Start;
Op->EndLoc = End;
@@ -599,7 +600,7 @@ public:
static std::unique_ptr<LanaiOperand> createImm(const MCExpr *Value,
SMLoc Start, SMLoc End) {
- auto Op = make_unique<LanaiOperand>(IMMEDIATE);
+ auto Op = std::make_unique<LanaiOperand>(IMMEDIATE);
Op->Imm.Value = Value;
Op->StartLoc = Start;
Op->EndLoc = End;
diff --git a/lib/Target/Lanai/LanaiAsmPrinter.cpp b/lib/Target/Lanai/LanaiAsmPrinter.cpp
index 64d963475e1a..12a3202446a8 100644
--- a/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -133,7 +133,7 @@ bool LanaiAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO = MI->getOperand(RegOp);
if (!MO.isReg())
return true;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
O << LanaiInstPrinter::getRegisterName(Reg);
return false;
}
diff --git a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
index 09c63dca23e2..b9e577d201f9 100644
--- a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
+++ b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Simple pass to fills delay slots with useful instructions.
+// Simple pass to fill delay slots with useful instructions.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/LanaiFrameLowering.cpp b/lib/Target/Lanai/LanaiFrameLowering.cpp
index 142c09c504cc..eddc2b8e61f7 100644
--- a/lib/Target/Lanai/LanaiFrameLowering.cpp
+++ b/lib/Target/Lanai/LanaiFrameLowering.cpp
@@ -72,8 +72,8 @@ void LanaiFrameLowering::replaceAdjDynAllocPseudo(MachineFunction &MF) const {
MachineInstr &MI = *MBBI++;
if (MI.getOpcode() == Lanai::ADJDYNALLOC) {
DebugLoc DL = MI.getDebugLoc();
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
BuildMI(*MBB, MI, DL, LII.get(Lanai::ADD_I_LO), Dst)
.addReg(Src)
diff --git a/lib/Target/Lanai/LanaiFrameLowering.h b/lib/Target/Lanai/LanaiFrameLowering.h
index 5fe4535543ec..380d63df7301 100644
--- a/lib/Target/Lanai/LanaiFrameLowering.h
+++ b/lib/Target/Lanai/LanaiFrameLowering.h
@@ -31,7 +31,7 @@ protected:
public:
explicit LanaiFrameLowering(const LanaiSubtarget &Subtarget)
: TargetFrameLowering(StackGrowsDown,
- /*StackAlignment=*/8,
+ /*StackAlignment=*/Align(8),
/*LocalAreaOffset=*/0),
STI(Subtarget) {}
diff --git a/lib/Target/Lanai/LanaiISelLowering.cpp b/lib/Target/Lanai/LanaiISelLowering.cpp
index 1ed078bb433f..43933d062a7e 100644
--- a/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -144,9 +144,9 @@ LanaiTargetLowering::LanaiTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
- // Function alignments (log2)
- setMinFunctionAlignment(2);
- setPrefFunctionAlignment(2);
+ // Function alignments
+ setMinFunctionAlignment(Align(4));
+ setPrefFunctionAlignment(Align(4));
setJumpIsExpensive(true);
@@ -212,10 +212,11 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op,
// Lanai Inline Assembly Support
//===----------------------------------------------------------------------===//
-unsigned LanaiTargetLowering::getRegisterByName(const char *RegName, EVT /*VT*/,
- SelectionDAG & /*DAG*/) const {
+Register LanaiTargetLowering::getRegisterByName(
+ const char *RegName, EVT /*VT*/,
+ const MachineFunction & /*MF*/) const {
// Only unallocatable registers should be matched here.
- unsigned Reg = StringSwitch<unsigned>(RegName)
+ Register Reg = StringSwitch<unsigned>(RegName)
.Case("pc", Lanai::PC)
.Case("sp", Lanai::SP)
.Case("fp", Lanai::FP)
@@ -459,7 +460,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
EVT RegVT = VA.getLocVT();
switch (RegVT.getSimpleVT().SimpleTy) {
case MVT::i32: {
- unsigned VReg = RegInfo.createVirtualRegister(&Lanai::GPRRegClass);
+ Register VReg = RegInfo.createVirtualRegister(&Lanai::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT);
diff --git a/lib/Target/Lanai/LanaiISelLowering.h b/lib/Target/Lanai/LanaiISelLowering.h
index e7b5755e9041..4c35a2c6fb8e 100644
--- a/lib/Target/Lanai/LanaiISelLowering.h
+++ b/lib/Target/Lanai/LanaiISelLowering.h
@@ -90,8 +90,8 @@ public:
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- unsigned getRegisterByName(const char *RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char *RegName, EVT VT,
+ const MachineFunction &MF) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
diff --git a/lib/Target/Lanai/LanaiInstrInfo.cpp b/lib/Target/Lanai/LanaiInstrInfo.cpp
index 700a86069102..b950fd0424ef 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -86,8 +86,7 @@ void LanaiInstrInfo::loadRegFromStackSlot(
}
bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint(
- const MachineInstr &MIa, const MachineInstr &MIb,
- AliasAnalysis * /*AA*/) const {
+ const MachineInstr &MIa, const MachineInstr &MIb) const {
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
@@ -457,7 +456,7 @@ bool LanaiInstrInfo::analyzeSelect(const MachineInstr &MI,
// return the defining instruction.
static MachineInstr *canFoldIntoSelect(unsigned Reg,
const MachineRegisterInfo &MRI) {
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
return nullptr;
@@ -479,7 +478,7 @@ static MachineInstr *canFoldIntoSelect(unsigned Reg,
// MI can't have any tied operands, that would conflict with predication.
if (MO.isTied())
return nullptr;
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ if (Register::isPhysicalRegister(MO.getReg()))
return nullptr;
if (MO.isDef() && !MO.isDead())
return nullptr;
@@ -505,7 +504,7 @@ LanaiInstrInfo::optimizeSelect(MachineInstr &MI,
// Find new register class to use.
MachineOperand FalseReg = MI.getOperand(Invert ? 1 : 2);
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
if (!MRI.constrainRegClass(DestReg, PreviousClass))
return nullptr;
diff --git a/lib/Target/Lanai/LanaiInstrInfo.h b/lib/Target/Lanai/LanaiInstrInfo.h
index d71424aeb0b1..59a04d2cc388 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/lib/Target/Lanai/LanaiInstrInfo.h
@@ -36,8 +36,7 @@ public:
}
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA) const override;
+ const MachineInstr &MIb) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.cpp b/lib/Target/Lanai/LanaiRegisterInfo.cpp
index d3056a1eba8e..7c28debb94dd 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -155,7 +155,7 @@ void LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!HasFP || (needsStackRealignment(MF) && FrameIndex >= 0))
Offset += MF.getFrameInfo().getStackSize();
- unsigned FrameReg = getFrameRegister(MF);
+ Register FrameReg = getFrameRegister(MF);
if (FrameIndex >= 0) {
if (hasBasePointer(MF))
FrameReg = getBaseRegister();
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
index 4313fa5a82b5..919d43ad9b9b 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@@ -88,5 +88,5 @@ bool LanaiELFObjectWriter::needsRelocateWithSymbol(const MCSymbol & /*SD*/,
std::unique_ptr<MCObjectTargetWriter>
llvm::createLanaiELFObjectWriter(uint8_t OSABI) {
- return llvm::make_unique<LanaiELFObjectWriter>(OSABI);
+ return std::make_unique<LanaiELFObjectWriter>(OSABI);
}
diff --git a/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index a0ec14ae2381..85dcc0f152f9 100644
--- a/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -191,33 +191,33 @@ public:
}
static std::unique_ptr<MSP430Operand> CreateToken(StringRef Str, SMLoc S) {
- return make_unique<MSP430Operand>(Str, S);
+ return std::make_unique<MSP430Operand>(Str, S);
}
static std::unique_ptr<MSP430Operand> CreateReg(unsigned RegNum, SMLoc S,
SMLoc E) {
- return make_unique<MSP430Operand>(k_Reg, RegNum, S, E);
+ return std::make_unique<MSP430Operand>(k_Reg, RegNum, S, E);
}
static std::unique_ptr<MSP430Operand> CreateImm(const MCExpr *Val, SMLoc S,
SMLoc E) {
- return make_unique<MSP430Operand>(Val, S, E);
+ return std::make_unique<MSP430Operand>(Val, S, E);
}
static std::unique_ptr<MSP430Operand> CreateMem(unsigned RegNum,
const MCExpr *Val,
SMLoc S, SMLoc E) {
- return make_unique<MSP430Operand>(RegNum, Val, S, E);
+ return std::make_unique<MSP430Operand>(RegNum, Val, S, E);
}
static std::unique_ptr<MSP430Operand> CreateIndReg(unsigned RegNum, SMLoc S,
SMLoc E) {
- return make_unique<MSP430Operand>(k_IndReg, RegNum, S, E);
+ return std::make_unique<MSP430Operand>(k_IndReg, RegNum, S, E);
}
static std::unique_ptr<MSP430Operand> CreatePostIndReg(unsigned RegNum, SMLoc S,
SMLoc E) {
- return make_unique<MSP430Operand>(k_PostIndReg, RegNum, S, E);
+ return std::make_unique<MSP430Operand>(k_PostIndReg, RegNum, S, E);
}
SMLoc getStartLoc() const { return Start; }
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
index 38b7da32c246..0cdd1f4f701f 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
@@ -31,7 +31,7 @@ protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override {
// Translate fixup kind to ELF relocation type.
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
case FK_Data_1: return ELF::R_MSP430_8;
case FK_Data_2: return ELF::R_MSP430_16_BYTE;
case FK_Data_4: return ELF::R_MSP430_32;
@@ -54,5 +54,5 @@ protected:
std::unique_ptr<MCObjectTargetWriter>
llvm::createMSP430ELFObjectWriter(uint8_t OSABI) {
- return llvm::make_unique<MSP430ELFObjectWriter>(OSABI);
+ return std::make_unique<MSP430ELFObjectWriter>(OSABI);
}
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 3a71a084d1af..a3b91acdc6d0 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -159,8 +159,9 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) {
MCSection *Cur = OutStreamer->getCurrentSectionOnly();
const auto *F = &ISR.getFunction();
- assert(F->hasFnAttribute("interrupt") &&
- "Functions with MSP430_INTR CC should have 'interrupt' attribute");
+ if (F->getCallingConv() != CallingConv::MSP430_INTR) {
+ report_fatal_error("Functions with 'interrupt' attribute must have msp430_intrcc CC");
+ }
StringRef IVIdx = F->getFnAttribute("interrupt").getValueAsString();
MCSection *IV = OutStreamer->getContext().getELFSection(
"__interrupt_vector_" + IVIdx,
@@ -174,8 +175,9 @@ void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) {
bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit separate section for an interrupt vector if ISR
- if (MF.getFunction().getCallingConv() == CallingConv::MSP430_INTR)
+ if (MF.getFunction().hasFnAttribute("interrupt")) {
EmitInterruptVectorSection(MF);
+ }
SetupMachineFunction(MF);
EmitFunctionBody();
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 45e7c26e4d30..ce5affdc25b0 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index 33ce3c70a2a3..70e284053021 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -22,7 +22,8 @@ protected:
public:
explicit MSP430FrameLowering()
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2) {}
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(2), -2,
+ Align(2)) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index fedfb857bd0f..64169d1f5eb1 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -327,8 +327,8 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::MSP430_BUILTIN);
// TODO: __mspabi_srall, __mspabi_srlll, __mspabi_sllll
- setMinFunctionAlignment(1);
- setPrefFunctionAlignment(1);
+ setMinFunctionAlignment(Align(2));
+ setPrefFunctionAlignment(Align(2));
}
SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
@@ -353,6 +353,9 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
}
}
+unsigned MSP430TargetLowering::getShiftAmountThreshold(EVT VT) const {
+ return 2;
+}
//===----------------------------------------------------------------------===//
// MSP430 Inline Assembly Support
//===----------------------------------------------------------------------===//
@@ -632,7 +635,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
llvm_unreachable(nullptr);
}
case MVT::i16:
- unsigned VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass);
+ Register VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
@@ -1446,8 +1449,8 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI,
case MSP430::Rrcl16: {
BuildMI(*BB, MI, dl, TII.get(MSP430::BIC16rc), MSP430::SR)
.addReg(MSP430::SR).addImm(1);
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
unsigned RrcOpc = MI.getOpcode() == MSP430::Rrcl16
? MSP430::RRC16r : MSP430::RRC8r;
BuildMI(*BB, MI, dl, TII.get(RrcOpc), DstReg)
@@ -1479,13 +1482,13 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI,
LoopBB->addSuccessor(RemBB);
LoopBB->addSuccessor(LoopBB);
- unsigned ShiftAmtReg = RI.createVirtualRegister(&MSP430::GR8RegClass);
- unsigned ShiftAmtReg2 = RI.createVirtualRegister(&MSP430::GR8RegClass);
- unsigned ShiftReg = RI.createVirtualRegister(RC);
- unsigned ShiftReg2 = RI.createVirtualRegister(RC);
- unsigned ShiftAmtSrcReg = MI.getOperand(2).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register ShiftAmtReg = RI.createVirtualRegister(&MSP430::GR8RegClass);
+ Register ShiftAmtReg2 = RI.createVirtualRegister(&MSP430::GR8RegClass);
+ Register ShiftReg = RI.createVirtualRegister(RC);
+ Register ShiftReg2 = RI.createVirtualRegister(RC);
+ Register ShiftAmtSrcReg = MI.getOperand(2).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
// BB:
// cmp 0, N
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index ee6b6316d7a9..9224e5e3d005 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -124,6 +124,8 @@ namespace llvm {
bool isZExtFree(EVT VT1, EVT VT2) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ unsigned getShiftAmountThreshold(EVT VT) const override;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index afbb2f213b45..bec357a1548d 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -139,7 +139,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
// We need to materialize the offset via add instruction.
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if (Offset < 0)
BuildMI(MBB, std::next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
.addReg(DstReg).addImm(-Offset);
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 8c4ca982c966..e9aeba76de85 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -46,7 +46,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS,
Options, getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- TLOF(make_unique<TargetLoweringObjectFileELF>()),
+ TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 1f7d095bf49b..21d0df74d458 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -39,6 +39,7 @@
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -233,9 +234,14 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
MCStreamer &Out, const MCSubtargetInfo *STI);
- bool expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, bool Is64FPU,
- SMLoc IDLoc, MCStreamer &Out,
- const MCSubtargetInfo *STI);
+ bool expandLoadSingleImmToGPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+ bool expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+ bool expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+ bool expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU, SMLoc IDLoc,
+ MCStreamer &Out, const MCSubtargetInfo *STI);
bool expandLoadAddress(unsigned DstReg, unsigned BaseReg,
const MCOperand &Offset, bool Is32BitAddress,
@@ -512,11 +518,11 @@ public:
// Remember the initial assembler options. The user can not modify these.
AssemblerOptions.push_back(
- llvm::make_unique<MipsAssemblerOptions>(getSTI().getFeatureBits()));
+ std::make_unique<MipsAssemblerOptions>(getSTI().getFeatureBits()));
// Create an assembler options environment for the user to modify.
AssemblerOptions.push_back(
- llvm::make_unique<MipsAssemblerOptions>(getSTI().getFeatureBits()));
+ std::make_unique<MipsAssemblerOptions>(getSTI().getFeatureBits()));
getTargetStreamer().updateABIInfo(*this);
@@ -844,7 +850,7 @@ private:
const MCRegisterInfo *RegInfo,
SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
- auto Op = llvm::make_unique<MipsOperand>(k_RegisterIndex, Parser);
+ auto Op = std::make_unique<MipsOperand>(k_RegisterIndex, Parser);
Op->RegIdx.Index = Index;
Op->RegIdx.RegInfo = RegInfo;
Op->RegIdx.Kind = RegKind;
@@ -1446,7 +1452,7 @@ public:
static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S,
MipsAsmParser &Parser) {
- auto Op = llvm::make_unique<MipsOperand>(k_Token, Parser);
+ auto Op = std::make_unique<MipsOperand>(k_Token, Parser);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -1521,7 +1527,7 @@ public:
static std::unique_ptr<MipsOperand>
CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MipsAsmParser &Parser) {
- auto Op = llvm::make_unique<MipsOperand>(k_Immediate, Parser);
+ auto Op = std::make_unique<MipsOperand>(k_Immediate, Parser);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -1531,7 +1537,7 @@ public:
static std::unique_ptr<MipsOperand>
CreateMem(std::unique_ptr<MipsOperand> Base, const MCExpr *Off, SMLoc S,
SMLoc E, MipsAsmParser &Parser) {
- auto Op = llvm::make_unique<MipsOperand>(k_Memory, Parser);
+ auto Op = std::make_unique<MipsOperand>(k_Memory, Parser);
Op->Mem.Base = Base.release();
Op->Mem.Off = Off;
Op->StartLoc = S;
@@ -1544,7 +1550,7 @@ public:
MipsAsmParser &Parser) {
assert(Regs.size() > 0 && "Empty list not allowed");
- auto Op = llvm::make_unique<MipsOperand>(k_RegList, Parser);
+ auto Op = std::make_unique<MipsOperand>(k_RegList, Parser);
Op->RegList.List = new SmallVector<unsigned, 10>(Regs.begin(), Regs.end());
Op->StartLoc = StartLoc;
Op->EndLoc = EndLoc;
@@ -1804,8 +1810,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
break; // We'll deal with this situation later on when applying fixups.
if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(),
- 1LL << (inMicroMipsMode() ? 1 : 2)))
+ if (offsetToAlignment(Offset.getImm(),
+ (inMicroMipsMode() ? Align(2) : Align(4))))
return Error(IDLoc, "branch to misaligned address");
break;
case Mips::BGEZ:
@@ -1834,8 +1840,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
break; // We'll deal with this situation later on when applying fixups.
if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(),
- 1LL << (inMicroMipsMode() ? 1 : 2)))
+ if (offsetToAlignment(Offset.getImm(),
+ (inMicroMipsMode() ? Align(2) : Align(4))))
return Error(IDLoc, "branch to misaligned address");
break;
case Mips::BGEC: case Mips::BGEC_MMR6:
@@ -1850,7 +1856,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
break; // We'll deal with this situation later on when applying fixups.
if (!isIntN(18, Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(), 1LL << 2))
+ if (offsetToAlignment(Offset.getImm(), Align(4)))
return Error(IDLoc, "branch to misaligned address");
break;
case Mips::BLEZC: case Mips::BLEZC_MMR6:
@@ -1863,7 +1869,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
break; // We'll deal with this situation later on when applying fixups.
if (!isIntN(18, Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(), 1LL << 2))
+ if (offsetToAlignment(Offset.getImm(), Align(4)))
return Error(IDLoc, "branch to misaligned address");
break;
case Mips::BEQZC: case Mips::BEQZC_MMR6:
@@ -1874,7 +1880,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
break; // We'll deal with this situation later on when applying fixups.
if (!isIntN(23, Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(), 1LL << 2))
+ if (offsetToAlignment(Offset.getImm(), Align(4)))
return Error(IDLoc, "branch to misaligned address");
break;
case Mips::BEQZ16_MM:
@@ -1887,7 +1893,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
break; // We'll deal with this situation later on when applying fixups.
if (!isInt<8>(Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(), 2LL))
+ if (offsetToAlignment(Offset.getImm(), Align(2)))
return Error(IDLoc, "branch to misaligned address");
break;
}
@@ -2454,25 +2460,21 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
: MER_Success;
case Mips::LoadImmSingleGPR:
- return expandLoadImmReal(Inst, true, true, false, IDLoc, Out, STI)
- ? MER_Fail
- : MER_Success;
+ return expandLoadSingleImmToGPR(Inst, IDLoc, Out, STI) ? MER_Fail
+ : MER_Success;
case Mips::LoadImmSingleFGR:
- return expandLoadImmReal(Inst, true, false, false, IDLoc, Out, STI)
- ? MER_Fail
- : MER_Success;
+ return expandLoadSingleImmToFPR(Inst, IDLoc, Out, STI) ? MER_Fail
+ : MER_Success;
case Mips::LoadImmDoubleGPR:
- return expandLoadImmReal(Inst, false, true, false, IDLoc, Out, STI)
- ? MER_Fail
- : MER_Success;
+ return expandLoadDoubleImmToGPR(Inst, IDLoc, Out, STI) ? MER_Fail
+ : MER_Success;
case Mips::LoadImmDoubleFGR:
- return expandLoadImmReal(Inst, false, false, true, IDLoc, Out, STI)
- ? MER_Fail
- : MER_Success;
+ return expandLoadDoubleImmToFPR(Inst, true, IDLoc, Out, STI) ? MER_Fail
+ : MER_Success;
case Mips::LoadImmDoubleFGR_32:
- return expandLoadImmReal(Inst, false, false, false, IDLoc, Out, STI)
- ? MER_Fail
- : MER_Success;
+ return expandLoadDoubleImmToFPR(Inst, false, IDLoc, Out, STI) ? MER_Fail
+ : MER_Success;
+
case Mips::Ulh:
return expandUlh(Inst, true, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::Ulhu:
@@ -2868,12 +2870,12 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
bool Is32BitSym, SMLoc IDLoc,
MCStreamer &Out,
const MCSubtargetInfo *STI) {
- // FIXME: These expansions do not respect -mxgot.
MipsTargetStreamer &TOut = getTargetStreamer();
- bool UseSrcReg = SrcReg != Mips::NoRegister;
+ bool UseSrcReg = SrcReg != Mips::NoRegister && SrcReg != Mips::ZERO &&
+ SrcReg != Mips::ZERO_64;
warnIfNoMacro(IDLoc);
- if (inPicMode() && ABI.IsO32()) {
+ if (inPicMode()) {
MCValue Res;
if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) {
Error(IDLoc, "expected relocatable expression");
@@ -2884,46 +2886,41 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
return true;
}
+ bool IsPtr64 = ABI.ArePtrs64bit();
+ bool IsLocalSym =
+ Res.getSymA()->getSymbol().isInSection() ||
+ Res.getSymA()->getSymbol().isTemporary() ||
+ (Res.getSymA()->getSymbol().isELF() &&
+ cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() ==
+ ELF::STB_LOCAL);
+ bool UseXGOT = STI->getFeatureBits()[Mips::FeatureXGOT] && !IsLocalSym;
+
// The case where the result register is $25 is somewhat special. If the
// symbol in the final relocation is external and not modified with a
- // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT16.
+ // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT16
+ // or R_MIPS_CALL16 instead of R_MIPS_GOT_DISP in 64-bit case.
if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg &&
- Res.getConstant() == 0 &&
- !(Res.getSymA()->getSymbol().isInSection() ||
- Res.getSymA()->getSymbol().isTemporary() ||
- (Res.getSymA()->getSymbol().isELF() &&
- cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() ==
- ELF::STB_LOCAL))) {
- const MCExpr *CallExpr =
- MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
- TOut.emitRRX(Mips::LW, DstReg, GPReg, MCOperand::createExpr(CallExpr),
- IDLoc, STI);
+ Res.getConstant() == 0 && !IsLocalSym) {
+ if (UseXGOT) {
+ const MCExpr *CallHiExpr = MipsMCExpr::create(MipsMCExpr::MEK_CALL_HI16,
+ SymExpr, getContext());
+ const MCExpr *CallLoExpr = MipsMCExpr::create(MipsMCExpr::MEK_CALL_LO16,
+ SymExpr, getContext());
+ TOut.emitRX(Mips::LUi, DstReg, MCOperand::createExpr(CallHiExpr), IDLoc,
+ STI);
+ TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, DstReg, DstReg, GPReg,
+ IDLoc, STI);
+ TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, DstReg, DstReg,
+ MCOperand::createExpr(CallLoExpr), IDLoc, STI);
+ } else {
+ const MCExpr *CallExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
+ TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, DstReg, GPReg,
+ MCOperand::createExpr(CallExpr), IDLoc, STI);
+ }
return false;
}
- // The remaining cases are:
- // External GOT: lw $tmp, %got(symbol+offset)($gp)
- // >addiu $tmp, $tmp, %lo(offset)
- // >addiu $rd, $tmp, $rs
- // Local GOT: lw $tmp, %got(symbol+offset)($gp)
- // addiu $tmp, $tmp, %lo(symbol+offset)($gp)
- // >addiu $rd, $tmp, $rs
- // The addiu's marked with a '>' may be omitted if they are redundant. If
- // this happens then the last instruction must use $rd as the result
- // register.
- const MipsMCExpr *GotExpr =
- MipsMCExpr::create(MipsMCExpr::MEK_GOT, SymExpr, getContext());
- const MCExpr *LoExpr = nullptr;
- if (Res.getSymA()->getSymbol().isInSection() ||
- Res.getSymA()->getSymbol().isTemporary())
- LoExpr = MipsMCExpr::create(MipsMCExpr::MEK_LO, SymExpr, getContext());
- else if (Res.getConstant() != 0) {
- // External symbols fully resolve the symbol with just the %got(symbol)
- // but we must still account for any offset to the symbol for expressions
- // like symbol+8.
- LoExpr = MCConstantExpr::create(Res.getConstant(), getContext());
- }
-
unsigned TmpReg = DstReg;
if (UseSrcReg &&
getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg,
@@ -2936,94 +2933,102 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
TmpReg = ATReg;
}
- TOut.emitRRX(Mips::LW, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc,
- STI);
-
- if (LoExpr)
- TOut.emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr),
+ if (UseXGOT) {
+ // Loading address from XGOT
+ // External GOT: lui $tmp, %got_hi(symbol)($gp)
+ // addu $tmp, $tmp, $gp
+ // lw $tmp, %got_lo(symbol)($tmp)
+ // >addiu $tmp, $tmp, offset
+ // >addiu $rd, $tmp, $rs
+ // The addiu's marked with a '>' may be omitted if they are redundant. If
+ // this happens then the last instruction must use $rd as the result
+ // register.
+ const MCExpr *CallHiExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_GOT_HI16, SymExpr, getContext());
+ const MCExpr *CallLoExpr = MipsMCExpr::create(
+ MipsMCExpr::MEK_GOT_LO16, Res.getSymA(), getContext());
+
+ TOut.emitRX(Mips::LUi, TmpReg, MCOperand::createExpr(CallHiExpr), IDLoc,
+ STI);
+ TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, TmpReg, TmpReg, GPReg,
IDLoc, STI);
+ TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, TmpReg, TmpReg,
+ MCOperand::createExpr(CallLoExpr), IDLoc, STI);
- if (UseSrcReg)
- TOut.emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, STI);
-
- return false;
- }
-
- if (inPicMode() && ABI.ArePtrs64bit()) {
- MCValue Res;
- if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) {
- Error(IDLoc, "expected relocatable expression");
- return true;
- }
- if (Res.getSymB() != nullptr) {
- Error(IDLoc, "expected relocatable expression with only one symbol");
- return true;
- }
+ if (Res.getConstant() != 0)
+ TOut.emitRRX(IsPtr64 ? Mips::DADDiu : Mips::ADDiu, TmpReg, TmpReg,
+ MCOperand::createExpr(MCConstantExpr::create(
+ Res.getConstant(), getContext())),
+ IDLoc, STI);
- // The case where the result register is $25 is somewhat special. If the
- // symbol in the final relocation is external and not modified with a
- // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT_DISP.
- if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg &&
- Res.getConstant() == 0 &&
- !(Res.getSymA()->getSymbol().isInSection() ||
- Res.getSymA()->getSymbol().isTemporary() ||
- (Res.getSymA()->getSymbol().isELF() &&
- cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() ==
- ELF::STB_LOCAL))) {
- const MCExpr *CallExpr =
- MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
- TOut.emitRRX(Mips::LD, DstReg, GPReg, MCOperand::createExpr(CallExpr),
- IDLoc, STI);
+ if (UseSrcReg)
+ TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, DstReg, TmpReg, SrcReg,
+ IDLoc, STI);
return false;
}
- // The remaining cases are:
- // Small offset: ld $tmp, %got_disp(symbol)($gp)
- // >daddiu $tmp, $tmp, offset
- // >daddu $rd, $tmp, $rs
- // The daddiu's marked with a '>' may be omitted if they are redundant. If
- // this happens then the last instruction must use $rd as the result
- // register.
- const MipsMCExpr *GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP,
- Res.getSymA(),
- getContext());
+ const MipsMCExpr *GotExpr = nullptr;
const MCExpr *LoExpr = nullptr;
- if (Res.getConstant() != 0) {
- // Symbols fully resolve with just the %got_disp(symbol) but we
- // must still account for any offset to the symbol for
- // expressions like symbol+8.
- LoExpr = MCConstantExpr::create(Res.getConstant(), getContext());
-
- // FIXME: Offsets greater than 16 bits are not yet implemented.
- // FIXME: The correct range is a 32-bit sign-extended number.
- if (Res.getConstant() < -0x8000 || Res.getConstant() > 0x7fff) {
- Error(IDLoc, "macro instruction uses large offset, which is not "
- "currently supported");
- return true;
+ if (IsPtr64) {
+ // The remaining cases are:
+ // Small offset: ld $tmp, %got_disp(symbol)($gp)
+ // >daddiu $tmp, $tmp, offset
+ // >daddu $rd, $tmp, $rs
+ // The daddiu's marked with a '>' may be omitted if they are redundant. If
+ // this happens then the last instruction must use $rd as the result
+ // register.
+ GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, Res.getSymA(),
+ getContext());
+ if (Res.getConstant() != 0) {
+ // Symbols fully resolve with just the %got_disp(symbol) but we
+ // must still account for any offset to the symbol for
+ // expressions like symbol+8.
+ LoExpr = MCConstantExpr::create(Res.getConstant(), getContext());
+
+ // FIXME: Offsets greater than 16 bits are not yet implemented.
+ // FIXME: The correct range is a 32-bit sign-extended number.
+ if (Res.getConstant() < -0x8000 || Res.getConstant() > 0x7fff) {
+ Error(IDLoc, "macro instruction uses large offset, which is not "
+ "currently supported");
+ return true;
+ }
+ }
+ } else {
+ // The remaining cases are:
+ // External GOT: lw $tmp, %got(symbol)($gp)
+ // >addiu $tmp, $tmp, offset
+ // >addiu $rd, $tmp, $rs
+ // Local GOT: lw $tmp, %got(symbol+offset)($gp)
+ // addiu $tmp, $tmp, %lo(symbol+offset)($gp)
+ // >addiu $rd, $tmp, $rs
+ // The addiu's marked with a '>' may be omitted if they are redundant. If
+ // this happens then the last instruction must use $rd as the result
+ // register.
+ if (IsLocalSym) {
+ GotExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_GOT, SymExpr, getContext());
+ LoExpr = MipsMCExpr::create(MipsMCExpr::MEK_LO, SymExpr, getContext());
+ } else {
+ // External symbols fully resolve the symbol with just the %got(symbol)
+ // but we must still account for any offset to the symbol for
+ // expressions like symbol+8.
+ GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT, Res.getSymA(),
+ getContext());
+ if (Res.getConstant() != 0)
+ LoExpr = MCConstantExpr::create(Res.getConstant(), getContext());
}
}
- unsigned TmpReg = DstReg;
- if (UseSrcReg &&
- getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg,
- SrcReg)) {
- // If $rs is the same as $rd, we need to use AT.
- // If it is not available we exit.
- unsigned ATReg = getATReg(IDLoc);
- if (!ATReg)
- return true;
- TmpReg = ATReg;
- }
-
- TOut.emitRRX(Mips::LD, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc,
- STI);
+ TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, TmpReg, GPReg,
+ MCOperand::createExpr(GotExpr), IDLoc, STI);
if (LoExpr)
- TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr),
- IDLoc, STI);
+ TOut.emitRRX(IsPtr64 ? Mips::DADDiu : Mips::ADDiu, TmpReg, TmpReg,
+ MCOperand::createExpr(LoExpr), IDLoc, STI);
if (UseSrcReg)
- TOut.emitRRR(Mips::DADDu, DstReg, TmpReg, SrcReg, IDLoc, STI);
+ TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, DstReg, TmpReg, SrcReg,
+ IDLoc, STI);
return false;
}
@@ -3289,10 +3294,43 @@ bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc,
return false;
}
-bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR,
- bool Is64FPU, SMLoc IDLoc,
- MCStreamer &Out,
- const MCSubtargetInfo *STI) {
+static uint64_t convertIntToDoubleImm(uint64_t ImmOp64) {
+ // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the
+ // exponent field), convert it to double (e.g. 1 to 1.0)
+ if ((Hi_32(ImmOp64) & 0x7ff00000) == 0) {
+ APFloat RealVal(APFloat::IEEEdouble(), ImmOp64);
+ ImmOp64 = RealVal.bitcastToAPInt().getZExtValue();
+ }
+ return ImmOp64;
+}
+
+static uint32_t covertDoubleImmToSingleImm(uint64_t ImmOp64) {
+ // Conversion of a double in an uint64_t to a float in a uint32_t,
+ // retaining the bit pattern of a float.
+ double DoubleImm = BitsToDouble(ImmOp64);
+ float TmpFloat = static_cast<float>(DoubleImm);
+ return FloatToBits(TmpFloat);
+}
+
+bool MipsAsmParser::expandLoadSingleImmToGPR(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ assert(Inst.getNumOperands() == 2 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() &&
+ "Invalid instruction operand.");
+
+ unsigned FirstReg = Inst.getOperand(0).getReg();
+ uint64_t ImmOp64 = Inst.getOperand(1).getImm();
+
+ uint32_t ImmOp32 = covertDoubleImmToSingleImm(convertIntToDoubleImm(ImmOp64));
+
+ return loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, false, IDLoc,
+ Out, STI);
+}
+
+bool MipsAsmParser::expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
MipsTargetStreamer &TOut = getTargetStreamer();
assert(Inst.getNumOperands() == 2 && "Invalid operand count");
assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() &&
@@ -3301,166 +3339,184 @@ bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR,
unsigned FirstReg = Inst.getOperand(0).getReg();
uint64_t ImmOp64 = Inst.getOperand(1).getImm();
- uint32_t HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32;
- // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the
- // exponent field), convert it to double (e.g. 1 to 1.0)
- if ((HiImmOp64 & 0x7ff00000) == 0) {
- APFloat RealVal(APFloat::IEEEdouble(), ImmOp64);
- ImmOp64 = RealVal.bitcastToAPInt().getZExtValue();
+ ImmOp64 = convertIntToDoubleImm(ImmOp64);
+
+ uint32_t ImmOp32 = covertDoubleImmToSingleImm(ImmOp64);
+
+ unsigned TmpReg = Mips::ZERO;
+ if (ImmOp32 != 0) {
+ TmpReg = getATReg(IDLoc);
+ if (!TmpReg)
+ return true;
}
- uint32_t LoImmOp64 = ImmOp64 & 0xffffffff;
- HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32;
+ if (Lo_32(ImmOp64) == 0) {
+ if (TmpReg != Mips::ZERO && loadImmediate(ImmOp32, TmpReg, Mips::NoRegister,
+ true, false, IDLoc, Out, STI))
+ return true;
+ TOut.emitRR(Mips::MTC1, FirstReg, TmpReg, IDLoc, STI);
+ return false;
+ }
- if (IsSingle) {
- // Conversion of a double in an uint64_t to a float in a uint32_t,
- // retaining the bit pattern of a float.
- uint32_t ImmOp32;
- double doubleImm = BitsToDouble(ImmOp64);
- float tmp_float = static_cast<float>(doubleImm);
- ImmOp32 = FloatToBits(tmp_float);
+ MCSection *CS = getStreamer().getCurrentSectionOnly();
+ // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections
+ // where appropriate.
+ MCSection *ReadOnlySection =
+ getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
- if (IsGPR) {
- if (loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, true, IDLoc,
- Out, STI))
- return true;
- return false;
- } else {
- unsigned ATReg = getATReg(IDLoc);
- if (!ATReg)
- return true;
- if (LoImmOp64 == 0) {
- if (loadImmediate(ImmOp32, ATReg, Mips::NoRegister, true, true, IDLoc,
- Out, STI))
- return true;
- TOut.emitRR(Mips::MTC1, FirstReg, ATReg, IDLoc, STI);
- return false;
- }
+ MCSymbol *Sym = getContext().createTempSymbol();
+ const MCExpr *LoSym =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ const MipsMCExpr *LoExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
- MCSection *CS = getStreamer().getCurrentSectionOnly();
- // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections
- // where appropriate.
- MCSection *ReadOnlySection = getContext().getELFSection(
- ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ getStreamer().SwitchSection(ReadOnlySection);
+ getStreamer().EmitLabel(Sym, IDLoc);
+ getStreamer().EmitIntValue(ImmOp32, 4);
+ getStreamer().SwitchSection(CS);
- MCSymbol *Sym = getContext().createTempSymbol();
- const MCExpr *LoSym =
- MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- const MipsMCExpr *LoExpr =
- MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+ if (emitPartialAddress(TOut, IDLoc, Sym))
+ return true;
+ TOut.emitRRX(Mips::LWC1, FirstReg, TmpReg, MCOperand::createExpr(LoExpr),
+ IDLoc, STI);
+ return false;
+}
+
+bool MipsAsmParser::expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ assert(Inst.getNumOperands() == 2 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() &&
+ "Invalid instruction operand.");
+
+ unsigned FirstReg = Inst.getOperand(0).getReg();
+ uint64_t ImmOp64 = Inst.getOperand(1).getImm();
+
+ ImmOp64 = convertIntToDoubleImm(ImmOp64);
- getStreamer().SwitchSection(ReadOnlySection);
- getStreamer().EmitLabel(Sym, IDLoc);
- getStreamer().EmitIntValue(ImmOp32, 4);
- getStreamer().SwitchSection(CS);
+ if (Lo_32(ImmOp64) == 0) {
+ if (isGP64bit()) {
+ if (loadImmediate(ImmOp64, FirstReg, Mips::NoRegister, false, false,
+ IDLoc, Out, STI))
+ return true;
+ } else {
+ if (loadImmediate(Hi_32(ImmOp64), FirstReg, Mips::NoRegister, true, false,
+ IDLoc, Out, STI))
+ return true;
- if(emitPartialAddress(TOut, IDLoc, Sym))
+ if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, false,
+ IDLoc, Out, STI))
return true;
- TOut.emitRRX(Mips::LWC1, FirstReg, ATReg,
- MCOperand::createExpr(LoExpr), IDLoc, STI);
}
return false;
}
- // if(!IsSingle)
- unsigned ATReg = getATReg(IDLoc);
- if (!ATReg)
+ MCSection *CS = getStreamer().getCurrentSectionOnly();
+ MCSection *ReadOnlySection =
+ getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+ MCSymbol *Sym = getContext().createTempSymbol();
+ const MCExpr *LoSym =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ const MipsMCExpr *LoExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+
+ getStreamer().SwitchSection(ReadOnlySection);
+ getStreamer().EmitLabel(Sym, IDLoc);
+ getStreamer().EmitValueToAlignment(8);
+ getStreamer().EmitIntValue(ImmOp64, 8);
+ getStreamer().SwitchSection(CS);
+
+ unsigned TmpReg = getATReg(IDLoc);
+ if (!TmpReg)
return true;
- if (IsGPR) {
- if (LoImmOp64 == 0) {
- if(isABI_N32() || isABI_N64()) {
- if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, false, true,
- IDLoc, Out, STI))
- return true;
- return false;
- } else {
- if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, true, true,
- IDLoc, Out, STI))
- return true;
+ if (emitPartialAddress(TOut, IDLoc, Sym))
+ return true;
- if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, true,
- IDLoc, Out, STI))
- return true;
- return false;
- }
- }
+ TOut.emitRRX(isABI_N64() ? Mips::DADDiu : Mips::ADDiu, TmpReg, TmpReg,
+ MCOperand::createExpr(LoExpr), IDLoc, STI);
- MCSection *CS = getStreamer().getCurrentSectionOnly();
- MCSection *ReadOnlySection = getContext().getELFSection(
- ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ if (isGP64bit())
+ TOut.emitRRI(Mips::LD, FirstReg, TmpReg, 0, IDLoc, STI);
+ else {
+ TOut.emitRRI(Mips::LW, FirstReg, TmpReg, 0, IDLoc, STI);
+ TOut.emitRRI(Mips::LW, nextReg(FirstReg), TmpReg, 4, IDLoc, STI);
+ }
+ return false;
+}
- MCSymbol *Sym = getContext().createTempSymbol();
- const MCExpr *LoSym =
- MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- const MipsMCExpr *LoExpr =
- MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+bool MipsAsmParser::expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU,
+ SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ assert(Inst.getNumOperands() == 2 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() &&
+ "Invalid instruction operand.");
- getStreamer().SwitchSection(ReadOnlySection);
- getStreamer().EmitLabel(Sym, IDLoc);
- getStreamer().EmitIntValue(HiImmOp64, 4);
- getStreamer().EmitIntValue(LoImmOp64, 4);
- getStreamer().SwitchSection(CS);
+ unsigned FirstReg = Inst.getOperand(0).getReg();
+ uint64_t ImmOp64 = Inst.getOperand(1).getImm();
+
+ ImmOp64 = convertIntToDoubleImm(ImmOp64);
- if(emitPartialAddress(TOut, IDLoc, Sym))
+ unsigned TmpReg = Mips::ZERO;
+ if (ImmOp64 != 0) {
+ TmpReg = getATReg(IDLoc);
+ if (!TmpReg)
return true;
- if(isABI_N64())
- TOut.emitRRX(Mips::DADDiu, ATReg, ATReg,
- MCOperand::createExpr(LoExpr), IDLoc, STI);
- else
- TOut.emitRRX(Mips::ADDiu, ATReg, ATReg,
- MCOperand::createExpr(LoExpr), IDLoc, STI);
+ }
- if(isABI_N32() || isABI_N64())
- TOut.emitRRI(Mips::LD, FirstReg, ATReg, 0, IDLoc, STI);
- else {
- TOut.emitRRI(Mips::LW, FirstReg, ATReg, 0, IDLoc, STI);
- TOut.emitRRI(Mips::LW, nextReg(FirstReg), ATReg, 4, IDLoc, STI);
- }
- return false;
- } else { // if(!IsGPR && !IsSingle)
- if ((LoImmOp64 == 0) &&
- !((HiImmOp64 & 0xffff0000) && (HiImmOp64 & 0x0000ffff))) {
- // FIXME: In the case where the constant is zero, we can load the
- // register directly from the zero register.
- if (loadImmediate(HiImmOp64, ATReg, Mips::NoRegister, true, true, IDLoc,
+ if ((Lo_32(ImmOp64) == 0) &&
+ !((Hi_32(ImmOp64) & 0xffff0000) && (Hi_32(ImmOp64) & 0x0000ffff))) {
+ if (isGP64bit()) {
+ if (TmpReg != Mips::ZERO &&
+ loadImmediate(ImmOp64, TmpReg, Mips::NoRegister, false, false, IDLoc,
Out, STI))
return true;
- if (isABI_N32() || isABI_N64())
- TOut.emitRR(Mips::DMTC1, FirstReg, ATReg, IDLoc, STI);
- else if (hasMips32r2()) {
- TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI);
- TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, ATReg, IDLoc, STI);
- } else {
- TOut.emitRR(Mips::MTC1, nextReg(FirstReg), ATReg, IDLoc, STI);
- TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI);
- }
+ TOut.emitRR(Mips::DMTC1, FirstReg, TmpReg, IDLoc, STI);
return false;
}
- MCSection *CS = getStreamer().getCurrentSectionOnly();
- // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections
- // where appropriate.
- MCSection *ReadOnlySection = getContext().getELFSection(
- ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ if (TmpReg != Mips::ZERO &&
+ loadImmediate(Hi_32(ImmOp64), TmpReg, Mips::NoRegister, true, false,
+ IDLoc, Out, STI))
+ return true;
- MCSymbol *Sym = getContext().createTempSymbol();
- const MCExpr *LoSym =
- MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- const MipsMCExpr *LoExpr =
- MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+ if (hasMips32r2()) {
+ TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI);
+ TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, TmpReg, IDLoc, STI);
+ } else {
+ TOut.emitRR(Mips::MTC1, nextReg(FirstReg), TmpReg, IDLoc, STI);
+ TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI);
+ }
+ return false;
+ }
- getStreamer().SwitchSection(ReadOnlySection);
- getStreamer().EmitLabel(Sym, IDLoc);
- getStreamer().EmitIntValue(HiImmOp64, 4);
- getStreamer().EmitIntValue(LoImmOp64, 4);
- getStreamer().SwitchSection(CS);
+ MCSection *CS = getStreamer().getCurrentSectionOnly();
+ // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections
+ // where appropriate.
+ MCSection *ReadOnlySection =
+ getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+ MCSymbol *Sym = getContext().createTempSymbol();
+ const MCExpr *LoSym =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ const MipsMCExpr *LoExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+
+ getStreamer().SwitchSection(ReadOnlySection);
+ getStreamer().EmitLabel(Sym, IDLoc);
+ getStreamer().EmitValueToAlignment(8);
+ getStreamer().EmitIntValue(ImmOp64, 8);
+ getStreamer().SwitchSection(CS);
+
+ if (emitPartialAddress(TOut, IDLoc, Sym))
+ return true;
+
+ TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, TmpReg,
+ MCOperand::createExpr(LoExpr), IDLoc, STI);
- if(emitPartialAddress(TOut, IDLoc, Sym))
- return true;
- TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, ATReg,
- MCOperand::createExpr(LoExpr), IDLoc, STI);
- }
return false;
}
@@ -3489,7 +3545,7 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc,
} else {
if (!isInt<17>(Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(), 1LL << 1))
+ if (offsetToAlignment(Offset.getImm(), Align(2)))
return Error(IDLoc, "branch to misaligned address");
Inst.clear();
Inst.setOpcode(Mips::BEQ_MM);
@@ -3581,7 +3637,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
assert(DstRegOp.isReg() && "expected register operand kind");
const MCOperand &BaseRegOp = Inst.getOperand(1);
assert(BaseRegOp.isReg() && "expected register operand kind");
- const MCOperand &OffsetOp = Inst.getOperand(2);
MipsTargetStreamer &TOut = getTargetStreamer();
unsigned DstReg = DstRegOp.getReg();
@@ -3603,6 +3658,26 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return;
}
+ if (Inst.getNumOperands() > 3) {
+ const MCOperand &BaseRegOp = Inst.getOperand(2);
+ assert(BaseRegOp.isReg() && "expected register operand kind");
+ const MCOperand &ExprOp = Inst.getOperand(3);
+ assert(ExprOp.isExpr() && "expected expression oprand kind");
+
+ unsigned BaseReg = BaseRegOp.getReg();
+ const MCExpr *ExprOffset = ExprOp.getExpr();
+
+ MCOperand LoOperand = MCOperand::createExpr(
+ MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext()));
+ MCOperand HiOperand = MCOperand::createExpr(
+ MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext()));
+ TOut.emitSCWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand,
+ LoOperand, TmpReg, IDLoc, STI);
+ return;
+ }
+
+ const MCOperand &OffsetOp = Inst.getOperand(2);
+
if (OffsetOp.isImm()) {
int64_t LoOffset = OffsetOp.getImm() & 0xffff;
int64_t HiOffset = OffsetOp.getImm() & ~0xffff;
@@ -3625,21 +3700,54 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu, TmpReg, TmpReg,
BaseReg, IDLoc, STI);
TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, LoOffset, IDLoc, STI);
- } else {
- assert(OffsetOp.isExpr() && "expected expression operand kind");
- const MCExpr *ExprOffset = OffsetOp.getExpr();
- MCOperand LoOperand = MCOperand::createExpr(
- MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext()));
- MCOperand HiOperand = MCOperand::createExpr(
- MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext()));
+ return;
+ }
+
+ if (OffsetOp.isExpr()) {
+ if (inPicMode()) {
+ // FIXME:
+ // c) Check that immediates of R_MIPS_GOT16/R_MIPS_LO16 relocations
+ // do not exceed 16-bit.
+ // d) Use R_MIPS_GOT_PAGE/R_MIPS_GOT_OFST relocations instead
+ // of R_MIPS_GOT_DISP in appropriate cases to reduce number
+ // of GOT entries.
+ MCValue Res;
+ if (!OffsetOp.getExpr()->evaluateAsRelocatable(Res, nullptr, nullptr)) {
+ Error(IDLoc, "expected relocatable expression");
+ return;
+ }
+ if (Res.getSymB() != nullptr) {
+ Error(IDLoc, "expected relocatable expression with only one symbol");
+ return;
+ }
- if (IsLoad)
- TOut.emitLoadWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand,
- LoOperand, TmpReg, IDLoc, STI);
- else
- TOut.emitStoreWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand,
- LoOperand, TmpReg, IDLoc, STI);
+ loadAndAddSymbolAddress(Res.getSymA(), TmpReg, BaseReg,
+ !ABI.ArePtrs64bit(), IDLoc, Out, STI);
+ TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, Res.getConstant(), IDLoc,
+ STI);
+ } else {
+ // FIXME: Implement 64-bit case.
+ // 1) lw $8, sym => lui $8, %hi(sym)
+ // lw $8, %lo(sym)($8)
+ // 2) sw $8, sym => lui $at, %hi(sym)
+ // sw $8, %lo(sym)($at)
+ const MCExpr *ExprOffset = OffsetOp.getExpr();
+ MCOperand LoOperand = MCOperand::createExpr(
+ MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext()));
+ MCOperand HiOperand = MCOperand::createExpr(
+ MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext()));
+
+ // Generate the base address in TmpReg.
+ TOut.emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI);
+ if (BaseReg != Mips::ZERO)
+ TOut.emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI);
+ // Emit the load or store with the adjusted base and offset.
+ TOut.emitRRX(Inst.getOpcode(), DstReg, TmpReg, LoOperand, IDLoc, STI);
+ }
+ return;
}
+
+ llvm_unreachable("unexpected operand type");
}
bool MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
@@ -6976,7 +7084,7 @@ bool MipsAsmParser::parseSetPushDirective() {
// Create a copy of the current assembler options environment and push it.
AssemblerOptions.push_back(
- llvm::make_unique<MipsAssemblerOptions>(AssemblerOptions.back().get()));
+ std::make_unique<MipsAssemblerOptions>(AssemblerOptions.back().get()));
getTargetStreamer().emitDirectiveSetPush();
return false;
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index ef13507fe63a..c3e98fe410c1 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -267,6 +267,13 @@ static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+// DecodeJumpTargetXMM - Decode microMIPS jump and link exchange target,
+// which is shifted left by 2 bit.
+static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMem(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -2291,6 +2298,15 @@ static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
+ Inst.addOperand(MCOperand::createImm(JumpOffset));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst,
unsigned Value,
uint64_t Address,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 859f9cbbca07..70f2a7bdf10f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -304,7 +304,6 @@ Optional<MCFixupKind> MipsAsmBackend::getFixupKind(StringRef Name) const {
return StringSwitch<Optional<MCFixupKind>>(Name)
.Case("R_MIPS_NONE", FK_NONE)
.Case("R_MIPS_32", FK_Data_4)
- .Case("R_MIPS_GOT_PAGE", (MCFixupKind)Mips::fixup_Mips_GOT_PAGE)
.Case("R_MIPS_CALL_HI16", (MCFixupKind)Mips::fixup_Mips_CALL_HI16)
.Case("R_MIPS_CALL_LO16", (MCFixupKind)Mips::fixup_Mips_CALL_LO16)
.Case("R_MIPS_CALL16", (MCFixupKind)Mips::fixup_Mips_CALL16)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 4d7e36995ae4..cca75dfc45c2 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -66,9 +66,9 @@ public:
/// fixupNeedsRelaxation - Target specific predicate for whether a given
/// fixup requires the associated instruction to be relaxed.
- bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override {
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
// FIXME.
llvm_unreachable("RelaxInstruction() unimplemented");
return false;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index cf7bae98a27f..cc3168790b98 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -219,7 +219,7 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
const MCFixup &Fixup,
bool IsPCRel) const {
// Determine the type of the relocation.
- unsigned Kind = (unsigned)Fixup.getKind();
+ unsigned Kind = Fixup.getTargetKind();
switch (Kind) {
case FK_NONE:
@@ -690,6 +690,6 @@ llvm::createMipsELFObjectWriter(const Triple &TT, bool IsN32) {
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
bool IsN64 = TT.isArch64Bit() && !IsN32;
bool HasRelocationAddend = TT.isArch64Bit();
- return llvm::make_unique<MipsELFObjectWriter>(OSABI, HasRelocationAddend,
+ return std::make_unique<MipsELFObjectWriter>(OSABI, HasRelocationAddend,
IsN64);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 759a7fdb32b8..142e9cebb79e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -485,8 +485,11 @@ getJumpOffset16OpValue(const MCInst &MI, unsigned OpNo,
assert(MO.isExpr() &&
"getJumpOffset16OpValue expects only expressions or an immediate");
- // TODO: Push fixup.
- return 0;
+ const MCExpr *Expr = MO.getExpr();
+ Mips::Fixups FixupKind =
+ isMicroMips(STI) ? Mips::fixup_MICROMIPS_LO16 : Mips::fixup_Mips_LO16;
+ Fixups.push_back(MCFixup::create(0, Expr, MCFixupKind(FixupKind)));
+ return 0;
}
/// getJumpTargetOpValue - Return binary encoding of the jump
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index ad5aff6552f6..a84ca8ccfb2d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -10,11 +10,12 @@
#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCNACL_H
#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/Support/Alignment.h"
namespace llvm {
-// Log2 of the NaCl MIPS sandbox's instruction bundle size.
-static const unsigned MIPS_NACL_BUNDLE_ALIGN = 4u;
+// NaCl MIPS sandbox's instruction bundle size.
+static const Align MIPS_NACL_BUNDLE_ALIGN = Align(16);
bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx,
bool *IsStore = nullptr);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index ddeec03ba784..79c47d1b6508 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -143,12 +143,15 @@ public:
return false;
switch (Info->get(Inst.getOpcode()).OpInfo[NumOps - 1].OperandType) {
case MCOI::OPERAND_UNKNOWN:
- case MCOI::OPERAND_IMMEDIATE:
- // jal, bal ...
- Target = Inst.getOperand(NumOps - 1).getImm();
+ case MCOI::OPERAND_IMMEDIATE: {
+ // j, jal, jalx, jals
+ // Absolute branch within the current 256 MB-aligned region
+ uint64_t Region = Addr & ~uint64_t(0xfffffff);
+ Target = Region + Inst.getOperand(NumOps - 1).getImm();
return true;
+ }
case MCOI::OPERAND_PCREL:
- // b, j, beq ...
+ // b, beq ...
Target = Addr + Inst.getOperand(NumOps - 1).getImm();
return true;
default:
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index c050db8a17fd..2d53750ad0ee 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -270,7 +270,7 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context,
S->getAssembler().setRelaxAll(true);
// Set bundle-alignment as required by the NaCl ABI for the target.
- S->EmitBundleAlignMode(MIPS_NACL_BUNDLE_ALIGN);
+ S->EmitBundleAlignMode(Log2(MIPS_NACL_BUNDLE_ALIGN));
return S;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index b4ebb9d18b72..3ff9c722484b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -37,7 +37,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() {
Context.getELFSection(".MIPS.options", ELF::SHT_MIPS_OPTIONS,
ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP, 1, "");
MCA.registerSection(*Sec);
- Sec->setAlignment(8);
+ Sec->setAlignment(Align(8));
Streamer->SwitchSection(Sec);
Streamer->EmitIntValue(ELF::ODK_REGINFO, 1); // kind
@@ -55,7 +55,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() {
MCSectionELF *Sec = Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO,
ELF::SHF_ALLOC, 24, "");
MCA.registerSection(*Sec);
- Sec->setAlignment(MTS->getABI().IsN32() ? 8 : 4);
+ Sec->setAlignment(MTS->getABI().IsN32() ? Align(8) : Align(4));
Streamer->SwitchSection(Sec);
Streamer->EmitIntValue(ri_gprmask, 4);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index e3bdb3b140a8..b6dae9f6dea8 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -34,6 +34,15 @@ static cl::opt<bool> RoundSectionSizes(
cl::desc("Round section sizes up to the section alignment"), cl::Hidden);
} // end anonymous namespace
+static bool isMipsR6(const MCSubtargetInfo *STI) {
+ return STI->getFeatureBits()[Mips::FeatureMips32r6] ||
+ STI->getFeatureBits()[Mips::FeatureMips64r6];
+}
+
+static bool isMicroMips(const MCSubtargetInfo *STI) {
+ return STI->getFeatureBits()[Mips::FeatureMicroMips];
+}
+
MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S)
: MCTargetStreamer(S), GPReg(Mips::GP), ModuleDirectiveAllowed(true) {
GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
@@ -216,6 +225,19 @@ void MipsTargetStreamer::emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1,
emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc, STI);
}
+void MipsTargetStreamer::emitRRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1,
+ unsigned Reg2, MCOperand Op3, SMLoc IDLoc,
+ const MCSubtargetInfo *STI) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ TmpInst.addOperand(MCOperand::createReg(Reg0));
+ TmpInst.addOperand(MCOperand::createReg(Reg1));
+ TmpInst.addOperand(MCOperand::createReg(Reg2));
+ TmpInst.addOperand(Op3);
+ TmpInst.setLoc(IDLoc);
+ getStreamer().EmitInstruction(TmpInst, *STI);
+}
+
void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1,
int16_t Imm, SMLoc IDLoc,
const MCSubtargetInfo *STI) {
@@ -264,8 +286,7 @@ void MipsTargetStreamer::emitEmptyDelaySlot(bool hasShortDelaySlot, SMLoc IDLoc,
}
void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {
- const FeatureBitset &Features = STI->getFeatureBits();
- if (Features[Mips::FeatureMicroMips])
+ if (isMicroMips(STI))
emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, STI);
else
emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
@@ -311,21 +332,34 @@ void MipsTargetStreamer::emitStoreWithImmOffset(
emitRRI(Opcode, SrcReg, ATReg, LoOffset, IDLoc, STI);
}
-/// Emit a store instruction with an symbol offset. Symbols are assumed to be
-/// out of range for a simm16 will be expanded to appropriate instructions.
-void MipsTargetStreamer::emitStoreWithSymOffset(
- unsigned Opcode, unsigned SrcReg, unsigned BaseReg, MCOperand &HiOperand,
- MCOperand &LoOperand, unsigned ATReg, SMLoc IDLoc,
- const MCSubtargetInfo *STI) {
- // sw $8, sym => lui $at, %hi(sym)
- // sw $8, %lo(sym)($at)
+/// Emit a store instruction with an symbol offset.
+void MipsTargetStreamer::emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg,
+ unsigned BaseReg,
+ MCOperand &HiOperand,
+ MCOperand &LoOperand,
+ unsigned ATReg, SMLoc IDLoc,
+ const MCSubtargetInfo *STI) {
+ // sc $8, sym => lui $at, %hi(sym)
+ // sc $8, %lo(sym)($at)
// Generate the base address in ATReg.
emitRX(Mips::LUi, ATReg, HiOperand, IDLoc, STI);
- if (BaseReg != Mips::ZERO)
- emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI);
- // Emit the store with the adjusted base and offset.
- emitRRX(Opcode, SrcReg, ATReg, LoOperand, IDLoc, STI);
+ if (!isMicroMips(STI) && isMipsR6(STI)) {
+ // For non-micromips r6 offset for 'sc' is not in the lower 16 bits so we
+ // put it in 'at'.
+ // sc $8, sym => lui $at, %hi(sym)
+ // addiu $at, $at, %lo(sym)
+ // sc $8, 0($at)
+ emitRRX(Mips::ADDiu, ATReg, ATReg, LoOperand, IDLoc, STI);
+ MCOperand Offset = MCOperand::createImm(0);
+ // Emit the store with the adjusted base and offset.
+ emitRRRX(Opcode, SrcReg, SrcReg, ATReg, Offset, IDLoc, STI);
+ } else {
+ if (BaseReg != Mips::ZERO)
+ emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI);
+ // Emit the store with the adjusted base and offset.
+ emitRRRX(Opcode, SrcReg, SrcReg, ATReg, LoOperand, IDLoc, STI);
+ }
}
/// Emit a load instruction with an immediate offset. DstReg and TmpReg are
@@ -364,30 +398,6 @@ void MipsTargetStreamer::emitLoadWithImmOffset(unsigned Opcode, unsigned DstReg,
emitRRI(Opcode, DstReg, TmpReg, LoOffset, IDLoc, STI);
}
-/// Emit a load instruction with an symbol offset. Symbols are assumed to be
-/// out of range for a simm16 will be expanded to appropriate instructions.
-/// DstReg and TmpReg are permitted to be the same register iff DstReg is a
-/// GPR. It is the callers responsibility to identify such cases and pass the
-/// appropriate register in TmpReg.
-void MipsTargetStreamer::emitLoadWithSymOffset(unsigned Opcode, unsigned DstReg,
- unsigned BaseReg,
- MCOperand &HiOperand,
- MCOperand &LoOperand,
- unsigned TmpReg, SMLoc IDLoc,
- const MCSubtargetInfo *STI) {
- // 1) lw $8, sym => lui $8, %hi(sym)
- // lw $8, %lo(sym)($8)
- // 2) ldc1 $f0, sym => lui $at, %hi(sym)
- // ldc1 $f0, %lo(sym)($at)
-
- // Generate the base address in TmpReg.
- emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI);
- if (BaseReg != Mips::ZERO)
- emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI);
- // Emit the load with the adjusted base and offset.
- emitRRX(Opcode, DstReg, TmpReg, LoOperand, IDLoc, STI);
-}
-
MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
: MipsTargetStreamer(S), OS(OS) {}
@@ -891,9 +901,9 @@ void MipsTargetELFStreamer::finish() {
MCSection &BSSSection = *OFI.getBSSSection();
MCA.registerSection(BSSSection);
- TextSection.setAlignment(std::max(16u, TextSection.getAlignment()));
- DataSection.setAlignment(std::max(16u, DataSection.getAlignment()));
- BSSSection.setAlignment(std::max(16u, BSSSection.getAlignment()));
+ TextSection.setAlignment(Align(std::max(16u, TextSection.getAlignment())));
+ DataSection.setAlignment(Align(std::max(16u, DataSection.getAlignment())));
+ BSSSection.setAlignment(Align(std::max(16u, BSSSection.getAlignment())));
if (RoundSectionSizes) {
// Make sections sizes a multiple of the alignment. This is useful for
@@ -1016,7 +1026,7 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Context);
MCA.registerSection(*Sec);
- Sec->setAlignment(4);
+ Sec->setAlignment(Align(4));
OS.PushSection();
@@ -1306,7 +1316,7 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() {
MCSectionELF *Sec = Context.getELFSection(
".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS, ELF::SHF_ALLOC, 24, "");
MCA.registerSection(*Sec);
- Sec->setAlignment(8);
+ Sec->setAlignment(Align(8));
OS.SwitchSection(Sec);
OS << ABIFlagsSection;
diff --git a/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/lib/Target/Mips/MicroMipsDSPInstrInfo.td
index 5a12568893af..9a1e47e5ecca 100644
--- a/lib/Target/Mips/MicroMipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsDSPInstrInfo.td
@@ -360,7 +360,7 @@ class RDDSP_MM_DESC {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins uimm7:$mask);
string AsmString = !strconcat("rddsp", "\t$rt, $mask");
- list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt7:$mask))];
+ list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp timmZExt7:$mask))];
InstrItinClass Itinerary = NoItinerary;
}
@@ -383,7 +383,7 @@ class WRDSP_MM_DESC {
dag OutOperandList = (outs);
dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask);
string AsmString = !strconcat("wrdsp", "\t$rt, $mask");
- list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)];
+ list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, timmZExt7:$mask)];
InstrItinClass Itinerary = NoItinerary;
bit isMoveReg = 1;
}
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 9b7f7b25fa94..8cc0029fc896 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -955,17 +955,18 @@ let DecoderNamespace = "MicroMips" in {
EXT_FM_MM<0x0c>, ISA_MICROMIPS32_NOT_MIPS32R6;
/// Jump Instructions
- let DecoderMethod = "DecodeJumpTargetMM" in
+ let DecoderMethod = "DecodeJumpTargetMM" in {
def J_MM : MMRel, JumpFJ<jmptarget_mm, "j", br, bb, "j">,
J_FM_MM<0x35>, AdditionalRequires<[RelocNotPIC]>,
IsBranch, ISA_MICROMIPS32_NOT_MIPS32R6;
-
- let DecoderMethod = "DecodeJumpTargetMM" in {
def JAL_MM : MMRel, JumpLink<"jal", calltarget_mm>, J_FM_MM<0x3d>,
ISA_MICROMIPS32_NOT_MIPS32R6;
+ }
+
+ let DecoderMethod = "DecodeJumpTargetXMM" in
def JALX_MM : MMRel, JumpLink<"jalx", calltarget>, J_FM_MM<0x3c>,
ISA_MICROMIPS32_NOT_MIPS32R6;
- }
+
def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>,
ISA_MICROMIPS32_NOT_MIPS32R6;
def JALR_MM : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>,
diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp
index 70af95592aa5..db93b3d80ede 100644
--- a/lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -361,7 +361,7 @@ static bool CheckXWPInstr(MachineInstr *MI, bool ReduceToLwp,
MI->getOpcode() == Mips::SW16_MM))
return false;
- unsigned reg = MI->getOperand(0).getReg();
+ Register reg = MI->getOperand(0).getReg();
if (reg == Mips::RA)
return false;
@@ -403,8 +403,8 @@ static bool ConsecutiveInstr(MachineInstr *MI1, MachineInstr *MI2) {
if (!GetImm(MI2, 2, Offset2))
return false;
- unsigned Reg1 = MI1->getOperand(0).getReg();
- unsigned Reg2 = MI2->getOperand(0).getReg();
+ Register Reg1 = MI1->getOperand(0).getReg();
+ Register Reg2 = MI2->getOperand(0).getReg();
return ((Offset1 == (Offset2 - 4)) && (ConsecutiveRegisters(Reg1, Reg2)));
}
@@ -475,8 +475,8 @@ bool MicroMipsSizeReduce::ReduceXWtoXWP(ReduceEntryFunArgs *Arguments) {
if (!CheckXWPInstr(MI2, ReduceToLwp, Entry))
return false;
- unsigned Reg1 = MI1->getOperand(1).getReg();
- unsigned Reg2 = MI2->getOperand(1).getReg();
+ Register Reg1 = MI1->getOperand(1).getReg();
+ Register Reg2 = MI2->getOperand(1).getReg();
if (Reg1 != Reg2)
return false;
@@ -621,8 +621,8 @@ bool MicroMipsSizeReduce::ReduceMoveToMovep(ReduceEntryFunArgs *Arguments) {
MachineInstr *MI1 = Arguments->MI;
MachineInstr *MI2 = &*NextMII;
- unsigned RegDstMI1 = MI1->getOperand(0).getReg();
- unsigned RegSrcMI1 = MI1->getOperand(1).getReg();
+ Register RegDstMI1 = MI1->getOperand(0).getReg();
+ Register RegSrcMI1 = MI1->getOperand(1).getReg();
if (!IsMovepSrcRegister(RegSrcMI1))
return false;
@@ -633,8 +633,8 @@ bool MicroMipsSizeReduce::ReduceMoveToMovep(ReduceEntryFunArgs *Arguments) {
if (MI2->getOpcode() != Entry.WideOpc())
return false;
- unsigned RegDstMI2 = MI2->getOperand(0).getReg();
- unsigned RegSrcMI2 = MI2->getOperand(1).getReg();
+ Register RegDstMI2 = MI2->getOperand(0).getReg();
+ Register RegSrcMI2 = MI2->getOperand(1).getReg();
if (!IsMovepSrcRegister(RegSrcMI2))
return false;
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 7b83ea8535ae..a5908362e81f 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -25,6 +25,8 @@ class PredicateControl {
list<Predicate> GPRPredicates = [];
// Predicates for the PTR size such as IsPTR64bit
list<Predicate> PTRPredicates = [];
+ // Predicates for a symbol's size such as hasSym32.
+ list<Predicate> SYMPredicates = [];
// Predicates for the FGR size and layout such as IsFP64bit
list<Predicate> FGRPredicates = [];
// Predicates for the instruction group membership such as ISA's.
@@ -38,6 +40,7 @@ class PredicateControl {
list<Predicate> Predicates = !listconcat(EncodingPredicates,
GPRPredicates,
PTRPredicates,
+ SYMPredicates,
FGRPredicates,
InsnPredicates,
HardFloatPredicate,
@@ -206,6 +209,9 @@ def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">;
def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true",
"Disable use of the jal instruction">;
+def FeatureXGOT
+ : SubtargetFeature<"xgot", "UseXGOT", "true", "Assume 32-bit GOT">;
+
def FeatureUseIndirectJumpsHazard : SubtargetFeature<"use-indirect-jump-hazard",
"UseIndirectJumpsHazard",
"true", "Use indirect jump"
@@ -257,3 +263,9 @@ def Mips : Target {
let AssemblyParserVariants = [MipsAsmParserVariant];
let AllowRegisterRenaming = 1;
}
+
+//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "MipsPfmCounters.td"
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 3ab4f1e064da..768d54fc9c24 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -72,7 +72,7 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL;
- unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ Register V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass;
V0 = RegInfo.createVirtualRegister(RC);
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 6d8e5aef2a3f..5a5b78c9d5f9 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -708,8 +708,8 @@ Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
if (DontExpandCondPseudos16)
return BB;
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- unsigned regX = MI.getOperand(0).getReg();
- unsigned regY = MI.getOperand(1).getReg();
+ Register regX = MI.getOperand(0).getReg();
+ Register regY = MI.getOperand(1).getReg();
MachineBasicBlock *target = MI.getOperand(2).getMBB();
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(CmpOpc))
.addReg(regX)
@@ -725,7 +725,7 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
if (DontExpandCondPseudos16)
return BB;
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- unsigned regX = MI.getOperand(0).getReg();
+ Register regX = MI.getOperand(0).getReg();
int64_t imm = MI.getOperand(1).getImm();
MachineBasicBlock *target = MI.getOperand(2).getMBB();
unsigned CmpOpc;
@@ -758,9 +758,9 @@ Mips16TargetLowering::emitFEXT_CCRX16_ins(unsigned SltOpc, MachineInstr &MI,
if (DontExpandCondPseudos16)
return BB;
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- unsigned CC = MI.getOperand(0).getReg();
- unsigned regX = MI.getOperand(1).getReg();
- unsigned regY = MI.getOperand(2).getReg();
+ Register CC = MI.getOperand(0).getReg();
+ Register regX = MI.getOperand(1).getReg();
+ Register regY = MI.getOperand(2).getReg();
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SltOpc))
.addReg(regX)
.addReg(regY);
@@ -777,8 +777,8 @@ Mips16TargetLowering::emitFEXT_CCRXI16_ins(unsigned SltiOpc, unsigned SltiXOpc,
if (DontExpandCondPseudos16)
return BB;
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- unsigned CC = MI.getOperand(0).getReg();
- unsigned regX = MI.getOperand(1).getReg();
+ Register CC = MI.getOperand(0).getReg();
+ Register regX = MI.getOperand(1).getReg();
int64_t Imm = MI.getOperand(2).getImm();
unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SltOpc)).addReg(regX).addImm(Imm);
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index c234c309d760..0d735c20ec2f 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -358,7 +358,7 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm,
for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
MachineOperand &MO = II->getOperand(i);
if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() &&
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ !Register::isVirtualRegister(MO.getReg()))
Candidates.reset(MO.getReg());
}
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 7f35280f7936..cc15949b0d57 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -16,6 +16,7 @@
// shamt must fit in 6 bits.
def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
+def timmZExt6 : TImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
// Node immediate fits as 10-bit sign extended on target immediate.
// e.g. seqi, snei
@@ -651,6 +652,7 @@ def : MipsPat<(MipsTlsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>,
let AdditionalPredicates = [NotInMicroMips] in {
def : MipsPat<(MipsJmpLink (i64 texternalsym:$dst)),
(JAL texternalsym:$dst)>, ISA_MIPS3, GPR_64, SYM_64;
+
def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)),
(LUi64 tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64;
def : MipsPat<(MipsHighest (i64 tblockaddress:$in)),
@@ -682,6 +684,20 @@ let AdditionalPredicates = [NotInMicroMips] in {
(DADDiu GPR64:$hi, tjumptable:$lo)>, ISA_MIPS3, GPR_64, SYM_64;
def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tconstpool:$lo))),
(DADDiu GPR64:$hi, tconstpool:$lo)>, ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 texternalsym:$lo))),
+ (DADDiu GPR64:$hi, texternalsym:$lo)>,
+ ISA_MIPS3, GPR_64, SYM_64;
+
+ def : MipsPat<(MipsHi (i64 tglobaladdr:$in)),
+ (DADDiu ZERO_64, tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(MipsHi (i64 tblockaddress:$in)),
+ (DADDiu ZERO_64, tblockaddress:$in)>, ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(MipsHi (i64 tjumptable:$in)),
+ (DADDiu ZERO_64, tjumptable:$in)>, ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(MipsHi (i64 tconstpool:$in)),
+ (DADDiu ZERO_64, tconstpool:$in)>, ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(MipsHi (i64 texternalsym:$in)),
+ (DADDiu ZERO_64, texternalsym:$in)>, ISA_MIPS3, GPR_64, SYM_64;
def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))),
(DADDiu GPR64:$hi, tglobaladdr:$lo)>, ISA_MIPS3, GPR_64, SYM_64;
@@ -692,6 +708,23 @@ let AdditionalPredicates = [NotInMicroMips] in {
(DADDiu GPR64:$hi, tjumptable:$lo)>, ISA_MIPS3, GPR_64, SYM_64;
def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tconstpool:$lo))),
(DADDiu GPR64:$hi, tconstpool:$lo)>, ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 texternalsym:$lo))),
+ (DADDiu GPR64:$hi, texternalsym:$lo)>,
+ ISA_MIPS3, GPR_64, SYM_64;
+
+ def : MipsPat<(MipsLo (i64 tglobaladdr:$in)),
+ (DADDiu ZERO_64, tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(MipsLo (i64 tblockaddress:$in)),
+ (DADDiu ZERO_64, tblockaddress:$in)>, ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(MipsLo (i64 tjumptable:$in)),
+ (DADDiu ZERO_64, tjumptable:$in)>, ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(MipsLo (i64 tconstpool:$in)),
+ (DADDiu ZERO_64, tconstpool:$in)>, ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(MipsLo (i64 tglobaltlsaddr:$in)),
+ (DADDiu ZERO_64, tglobaltlsaddr:$in)>,
+ ISA_MIPS3, GPR_64, SYM_64;
+ def : MipsPat<(MipsLo (i64 texternalsym:$in)),
+ (DADDiu ZERO_64, texternalsym:$in)>, ISA_MIPS3, GPR_64, SYM_64;
def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))),
(DADDiu GPR64:$hi, tglobaladdr:$lo)>, ISA_MIPS3, GPR_64, SYM_64;
@@ -705,6 +738,9 @@ let AdditionalPredicates = [NotInMicroMips] in {
def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaltlsaddr:$lo))),
(DADDiu GPR64:$hi, tglobaltlsaddr:$lo)>, ISA_MIPS3, GPR_64,
SYM_64;
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 texternalsym:$lo))),
+ (DADDiu GPR64:$hi, texternalsym:$lo)>,
+ ISA_MIPS3, GPR_64, SYM_64;
}
// gp_rel relocs
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index db83fe49cec0..2201545adc94 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -56,6 +56,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <cassert>
#include <cstdint>
@@ -376,7 +377,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
void MipsAsmPrinter::emitFrameDirective() {
const TargetRegisterInfo &RI = *MF->getSubtarget().getRegisterInfo();
- unsigned stackReg = RI.getFrameRegister(*MF);
+ Register stackReg = RI.getFrameRegister(*MF);
unsigned returnReg = RI.getRARegister();
unsigned stackSize = MF->getFrameInfo().getStackSize();
@@ -571,7 +572,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// for 2 for 32 bit mode and 1 for 64 bit mode.
if (NumVals != 2) {
if (Subtarget->isGP64bit() && NumVals == 1 && MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
O << '$' << MipsInstPrinter::getRegisterName(Reg);
return false;
}
@@ -597,7 +598,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &MO = MI->getOperand(RegOp);
if (!MO.isReg())
return true;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
O << '$' << MipsInstPrinter::getRegisterName(Reg);
return false;
}
@@ -780,7 +781,7 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
StringRef CPU = MIPS_MC::selectMipsCPU(TT, TM.getTargetCPU());
StringRef FS = TM.getTargetFeatureString();
const MipsTargetMachine &MTM = static_cast<const MipsTargetMachine &>(TM);
- const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, 0);
+ const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, None);
bool IsABICalls = STI.isABICalls();
const MipsABIInfo &ABI = MTM.getABI();
@@ -821,6 +822,9 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// option has changed the default (i.e. FPXX) and omit it otherwise.
if (ABI.IsO32() && (!STI.useOddSPReg() || STI.isABI_FPXX()))
TS.emitDirectiveModuleOddSPReg();
+
+ // Switch to the .text section.
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
}
void MipsAsmPrinter::emitInlineAsmStart() const {
diff --git a/lib/Target/Mips/MipsCallLowering.cpp b/lib/Target/Mips/MipsCallLowering.cpp
index da65689ecff5..cad82953af50 100644
--- a/lib/Target/Mips/MipsCallLowering.cpp
+++ b/lib/Target/Mips/MipsCallLowering.cpp
@@ -106,6 +106,7 @@ private:
Register ArgsReg, const EVT &VT) override;
virtual void markPhysRegUsed(unsigned PhysReg) {
+ MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
@@ -357,7 +358,7 @@ bool OutgoingValueHandler::handleSplit(SmallVectorImpl<Register> &VRegs,
return true;
}
-static bool isSupportedType(Type *T) {
+static bool isSupportedArgumentType(Type *T) {
if (T->isIntegerTy())
return true;
if (T->isPointerTy())
@@ -367,6 +368,18 @@ static bool isSupportedType(Type *T) {
return false;
}
+static bool isSupportedReturnType(Type *T) {
+ if (T->isIntegerTy())
+ return true;
+ if (T->isPointerTy())
+ return true;
+ if (T->isFloatingPointTy())
+ return true;
+ if (T->isAggregateType())
+ return true;
+ return false;
+}
+
static CCValAssign::LocInfo determineLocInfo(const MVT RegisterVT, const EVT VT,
const ISD::ArgFlagsTy &Flags) {
// > does not mean loss of information as type RegisterVT can't hold type VT,
@@ -403,7 +416,7 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(Mips::RetRA);
- if (Val != nullptr && !isSupportedType(Val->getType()))
+ if (Val != nullptr && !isSupportedReturnType(Val->getType()))
return false;
if (!VRegs.empty()) {
@@ -411,21 +424,13 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Function &F = MF.getFunction();
const DataLayout &DL = MF.getDataLayout();
const MipsTargetLowering &TLI = *getTLI<MipsTargetLowering>();
- LLVMContext &Ctx = Val->getType()->getContext();
-
- SmallVector<EVT, 4> SplitEVTs;
- ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
- assert(VRegs.size() == SplitEVTs.size() &&
- "For each split Type there should be exactly one VReg.");
SmallVector<ArgInfo, 8> RetInfos;
SmallVector<unsigned, 8> OrigArgIndices;
- for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
- ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)};
- setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
- splitToValueTypes(CurArgInfo, 0, RetInfos, OrigArgIndices);
- }
+ ArgInfo ArgRetInfo(VRegs, Val->getType());
+ setArgFlags(ArgRetInfo, AttributeList::ReturnIndex, DL, F);
+ splitToValueTypes(DL, ArgRetInfo, 0, RetInfos, OrigArgIndices);
SmallVector<ISD::OutputArg, 8> Outs;
subTargetRegTypeForCallingConv(F, RetInfos, OrigArgIndices, Outs);
@@ -453,12 +458,8 @@ bool MipsCallLowering::lowerFormalArguments(
if (F.arg_empty())
return true;
- if (F.isVarArg()) {
- return false;
- }
-
for (auto &Arg : F.args()) {
- if (!isSupportedType(Arg.getType()))
+ if (!isSupportedArgumentType(Arg.getType()))
return false;
}
@@ -472,7 +473,8 @@ bool MipsCallLowering::lowerFormalArguments(
for (auto &Arg : F.args()) {
ArgInfo AInfo(VRegs[i], Arg.getType());
setArgFlags(AInfo, i + AttributeList::FirstArgIndex, DL, F);
- splitToValueTypes(AInfo, i, ArgInfos, OrigArgIndices);
+ ArgInfos.push_back(AInfo);
+ OrigArgIndices.push_back(i);
++i;
}
@@ -495,30 +497,64 @@ bool MipsCallLowering::lowerFormalArguments(
if (!Handler.handle(ArgLocs, ArgInfos))
return false;
+ if (F.isVarArg()) {
+ ArrayRef<MCPhysReg> ArgRegs = ABI.GetVarArgRegs();
+ unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
+
+ int VaArgOffset;
+ unsigned RegSize = 4;
+ if (ArgRegs.size() == Idx)
+ VaArgOffset = alignTo(CCInfo.getNextStackOffset(), RegSize);
+ else {
+ VaArgOffset =
+ (int)ABI.GetCalleeAllocdArgSizeInBytes(CCInfo.getCallingConv()) -
+ (int)(RegSize * (ArgRegs.size() - Idx));
+ }
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
+ MF.getInfo<MipsFunctionInfo>()->setVarArgsFrameIndex(FI);
+
+ for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) {
+ MIRBuilder.getMBB().addLiveIn(ArgRegs[I]);
+
+ MachineInstrBuilder Copy =
+ MIRBuilder.buildCopy(LLT::scalar(RegSize * 8), Register(ArgRegs[I]));
+ FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
+ MachinePointerInfo MPO = MachinePointerInfo::getFixedStack(MF, FI);
+ MachineInstrBuilder FrameIndex =
+ MIRBuilder.buildFrameIndex(LLT::pointer(MPO.getAddrSpace(), 32), FI);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, RegSize,
+ /* Alignment */ RegSize);
+ MIRBuilder.buildStore(Copy, FrameIndex, *MMO);
+ }
+ }
+
return true;
}
bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
- CallingConv::ID CallConv,
- const MachineOperand &Callee,
- const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const {
+ CallLoweringInfo &Info) const {
- if (CallConv != CallingConv::C)
+ if (Info.CallConv != CallingConv::C)
return false;
- for (auto &Arg : OrigArgs) {
- if (!isSupportedType(Arg.Ty))
+ for (auto &Arg : Info.OrigArgs) {
+ if (!isSupportedArgumentType(Arg.Ty))
+ return false;
+ if (Arg.Flags[0].isByVal())
return false;
- if (Arg.Flags.isByVal() || Arg.Flags.isSRet())
+ if (Arg.Flags[0].isSRet() && !Arg.Ty->isPointerTy())
return false;
}
- if (OrigRet.Regs[0] && !isSupportedType(OrigRet.Ty))
+ if (!Info.OrigRet.Ty->isVoidTy() && !isSupportedReturnType(Info.OrigRet.Ty))
return false;
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
+ const DataLayout &DL = MF.getDataLayout();
const MipsTargetLowering &TLI = *getTLI<MipsTargetLowering>();
const MipsTargetMachine &TM =
static_cast<const MipsTargetMachine &>(MF.getTarget());
@@ -528,37 +564,38 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIRBuilder.buildInstr(Mips::ADJCALLSTACKDOWN);
const bool IsCalleeGlobalPIC =
- Callee.isGlobal() && TM.isPositionIndependent();
+ Info.Callee.isGlobal() && TM.isPositionIndependent();
MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert(
- Callee.isReg() || IsCalleeGlobalPIC ? Mips::JALRPseudo : Mips::JAL);
+ Info.Callee.isReg() || IsCalleeGlobalPIC ? Mips::JALRPseudo : Mips::JAL);
MIB.addDef(Mips::SP, RegState::Implicit);
if (IsCalleeGlobalPIC) {
Register CalleeReg =
MF.getRegInfo().createGenericVirtualRegister(LLT::pointer(0, 32));
MachineInstr *CalleeGlobalValue =
- MIRBuilder.buildGlobalValue(CalleeReg, Callee.getGlobal());
- if (!Callee.getGlobal()->hasLocalLinkage())
+ MIRBuilder.buildGlobalValue(CalleeReg, Info.Callee.getGlobal());
+ if (!Info.Callee.getGlobal()->hasLocalLinkage())
CalleeGlobalValue->getOperand(1).setTargetFlags(MipsII::MO_GOT_CALL);
MIB.addUse(CalleeReg);
} else
- MIB.add(Callee);
+ MIB.add(Info.Callee);
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
TargetLowering::ArgListTy FuncOrigArgs;
- FuncOrigArgs.reserve(OrigArgs.size());
+ FuncOrigArgs.reserve(Info.OrigArgs.size());
SmallVector<ArgInfo, 8> ArgInfos;
SmallVector<unsigned, 8> OrigArgIndices;
unsigned i = 0;
- for (auto &Arg : OrigArgs) {
+ for (auto &Arg : Info.OrigArgs) {
TargetLowering::ArgListEntry Entry;
Entry.Ty = Arg.Ty;
FuncOrigArgs.push_back(Entry);
- splitToValueTypes(Arg, i, ArgInfos, OrigArgIndices);
+ ArgInfos.push_back(Arg);
+ OrigArgIndices.push_back(i);
++i;
}
@@ -566,11 +603,17 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
subTargetRegTypeForCallingConv(F, ArgInfos, OrigArgIndices, Outs);
SmallVector<CCValAssign, 8> ArgLocs;
- MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
+ bool IsCalleeVarArg = false;
+ if (Info.Callee.isGlobal()) {
+ const Function *CF = static_cast<const Function *>(Info.Callee.getGlobal());
+ IsCalleeVarArg = CF->isVarArg();
+ }
+ MipsCCState CCInfo(F.getCallingConv(), IsCalleeVarArg, MF, ArgLocs,
F.getContext());
- CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
- const char *Call = Callee.isSymbol() ? Callee.getSymbolName() : nullptr;
+ CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(Info.CallConv), 1);
+ const char *Call =
+ Info.Callee.isSymbol() ? Info.Callee.getSymbolName() : nullptr;
CCInfo.AnalyzeCallOperands(Outs, TLI.CCAssignFnForCall(), FuncOrigArgs, Call);
setLocInfo(ArgLocs, Outs);
@@ -599,11 +642,11 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
*STI.getRegBankInfo());
}
- if (OrigRet.Regs[0]) {
+ if (!Info.OrigRet.Ty->isVoidTy()) {
ArgInfos.clear();
SmallVector<unsigned, 8> OrigRetIndices;
- splitToValueTypes(OrigRet, 0, ArgInfos, OrigRetIndices);
+ splitToValueTypes(DL, Info.OrigRet, 0, ArgInfos, OrigRetIndices);
SmallVector<ISD::InputArg, 8> Ins;
subTargetRegTypeForCallingConv(F, ArgInfos, OrigRetIndices, Ins);
@@ -612,7 +655,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
F.getContext());
- CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), OrigRet.Ty, Call);
+ CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), Info.OrigRet.Ty, Call);
setLocInfo(ArgLocs, Ins);
CallReturnHandler Handler(MIRBuilder, MF.getRegInfo(), MIB);
@@ -642,12 +685,12 @@ void MipsCallLowering::subTargetRegTypeForCallingConv(
F.getContext(), F.getCallingConv(), VT);
for (unsigned i = 0; i < NumRegs; ++i) {
- ISD::ArgFlagsTy Flags = Arg.Flags;
+ ISD::ArgFlagsTy Flags = Arg.Flags[0];
if (i == 0)
Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
else
- Flags.setOrigAlign(1);
+ Flags.setOrigAlign(Align::None());
ISDArgs.emplace_back(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo],
0);
@@ -657,12 +700,21 @@ void MipsCallLowering::subTargetRegTypeForCallingConv(
}
void MipsCallLowering::splitToValueTypes(
- const ArgInfo &OrigArg, unsigned OriginalIndex,
+ const DataLayout &DL, const ArgInfo &OrigArg, unsigned OriginalIndex,
SmallVectorImpl<ArgInfo> &SplitArgs,
SmallVectorImpl<unsigned> &SplitArgsOrigIndices) const {
- // TODO : perform structure and array split. For now we only deal with
- // types that pass isSupportedType check.
- SplitArgs.push_back(OrigArg);
- SplitArgsOrigIndices.push_back(OriginalIndex);
+ SmallVector<EVT, 4> SplitEVTs;
+ SmallVector<Register, 4> SplitVRegs;
+ const MipsTargetLowering &TLI = *getTLI<MipsTargetLowering>();
+ LLVMContext &Ctx = OrigArg.Ty->getContext();
+
+ ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitEVTs);
+
+ for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+ ArgInfo Info = ArgInfo{OrigArg.Regs[i], SplitEVTs[i].getTypeForEVT(Ctx)};
+ Info.Flags = OrigArg.Flags;
+ SplitArgs.push_back(Info);
+ SplitArgsOrigIndices.push_back(OriginalIndex);
+ }
}
diff --git a/lib/Target/Mips/MipsCallLowering.h b/lib/Target/Mips/MipsCallLowering.h
index 11c2d53ad35d..a284cf5e26cf 100644
--- a/lib/Target/Mips/MipsCallLowering.h
+++ b/lib/Target/Mips/MipsCallLowering.h
@@ -68,9 +68,8 @@ public:
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const override;
- bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
- const MachineOperand &Callee, const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const override;
+ bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const override;
private:
/// Based on registers available on target machine split or extend
@@ -83,7 +82,8 @@ private:
/// Split structures and arrays, save original argument indices since
/// Mips calling convention needs info about original argument type.
- void splitToValueTypes(const ArgInfo &OrigArg, unsigned OriginalIndex,
+ void splitToValueTypes(const DataLayout &DL, const ArgInfo &OrigArg,
+ unsigned OriginalIndex,
SmallVectorImpl<ArgInfo> &SplitArgs,
SmallVectorImpl<unsigned> &SplitArgsOrigIndices) const;
};
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index eea28df7eda1..f50640521738 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -222,12 +222,7 @@ namespace {
BasicBlockInfo() = default;
- // FIXME: ignore LogAlign for this patch
- //
- unsigned postOffset(unsigned LogAlign = 0) const {
- unsigned PO = Offset + Size;
- return PO;
- }
+ unsigned postOffset() const { return Offset + Size; }
};
std::vector<BasicBlockInfo> BBInfo;
@@ -376,7 +371,7 @@ namespace {
void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
- unsigned getCPELogAlign(const MachineInstr &CPEMI);
+ Align getCPEAlign(const MachineInstr &CPEMI);
void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
unsigned getOffsetOf(MachineInstr *MI) const;
unsigned getUserOffset(CPUser&) const;
@@ -534,11 +529,11 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
MF->push_back(BB);
// MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
- unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+ const Align MaxAlign(MCP->getConstantPoolAlignment());
// Mark the basic block as required by the const-pool.
// If AlignConstantIslands isn't set, use 4-byte alignment for everything.
- BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+ BB->setAlignment(AlignConstantIslands ? MaxAlign : Align(4));
// The function needs to be as aligned as the basic blocks. The linker may
// move functions around based on their alignment.
@@ -548,7 +543,8 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// alignment of all entries as long as BB is sufficiently aligned. Keep
// track of the insertion point for each alignment. We are going to bucket
// sort the entries as they are created.
- SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(Log2(MaxAlign) + 1,
+ BB->end());
// Add all of the constants from the constant pool to the end block, use an
// identity mapping of CPI's to CPE's.
@@ -576,7 +572,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// Ensure that future entries with higher alignment get inserted before
// CPEMI. This is bucket sort with iterators.
- for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+ for (unsigned a = LogAlign + 1; a <= Log2(MaxAlign); ++a)
if (InsPoint[a] == InsAt)
InsPoint[a] = CPEMI;
// Add a new CPEntry, but no corresponding CPUser yet.
@@ -621,20 +617,18 @@ MipsConstantIslands::CPEntry
return nullptr;
}
-/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// getCPEAlign - Returns the required alignment of the constant pool entry
/// represented by CPEMI. Alignment is measured in log2(bytes) units.
-unsigned MipsConstantIslands::getCPELogAlign(const MachineInstr &CPEMI) {
+Align MipsConstantIslands::getCPEAlign(const MachineInstr &CPEMI) {
assert(CPEMI.getOpcode() == Mips::CONSTPOOL_ENTRY);
// Everything is 4-byte aligned unless AlignConstantIslands is set.
if (!AlignConstantIslands)
- return 2;
+ return Align(4);
unsigned CPI = CPEMI.getOperand(1).getIndex();
assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
- unsigned Align = MCP->getConstants()[CPI].getAlignment();
- assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
- return Log2_32(Align);
+ return Align(MCP->getConstants()[CPI].getAlignment());
}
/// initializeFunctionInfo - Do the initial scan of the function, building up
@@ -940,13 +934,13 @@ bool MipsConstantIslands::isOffsetInRange(unsigned UserOffset,
bool MipsConstantIslands::isWaterInRange(unsigned UserOffset,
MachineBasicBlock* Water, CPUser &U,
unsigned &Growth) {
- unsigned CPELogAlign = getCPELogAlign(*U.CPEMI);
- unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
- unsigned NextBlockOffset, NextBlockAlignment;
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset();
+ unsigned NextBlockOffset;
+ Align NextBlockAlignment;
MachineFunction::const_iterator NextBlock = ++Water->getIterator();
if (NextBlock == MF->end()) {
NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
- NextBlockAlignment = 0;
+ NextBlockAlignment = Align::None();
} else {
NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
NextBlockAlignment = NextBlock->getAlignment();
@@ -961,7 +955,7 @@ bool MipsConstantIslands::isWaterInRange(unsigned UserOffset,
Growth = CPEEnd - NextBlockOffset;
// Compute the padding that would go at the end of the CPE to align the next
// block.
- Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment);
+ Growth += offsetToAlignment(CPEEnd, NextBlockAlignment);
// If the CPE is to be inserted before the instruction, that will raise
// the offset of the instruction. Also account for unknown alignment padding
@@ -1221,7 +1215,6 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
- unsigned CPELogAlign = getCPELogAlign(*CPEMI);
MachineBasicBlock *UserMBB = UserMI->getParent();
const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
@@ -1231,7 +1224,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
// Size of branch to insert.
unsigned Delta = 2;
// Compute the offset where the CPE will begin.
- unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
+ unsigned CPEOffset = UserBBI.postOffset() + Delta;
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB)
@@ -1257,9 +1250,8 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
// Try to split the block so it's fully aligned. Compute the latest split
// point where we can add a 4-byte branch instruction, and then align to
- // LogAlign which is the largest possible alignment in the function.
- unsigned LogAlign = MF->getAlignment();
- assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ // Align which is the largest possible alignment in the function.
+ const Align Align = MF->getAlignment();
unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x",
BaseInsertOffset));
@@ -1270,7 +1262,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
BaseInsertOffset -= 4;
LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
- << " la=" << LogAlign << '\n');
+ << " la=" << Log2(Align) << '\n');
// This could point off the end of the block if we've already got constant
// pool entries following this block; only the last one is in the water list.
@@ -1295,8 +1287,8 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
CPUser &U = CPUsers[CPUIndex];
if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
// Shift intertion point by one unit of alignment so it is within reach.
- BaseInsertOffset -= 1u << LogAlign;
- EndInsertOffset -= 1u << LogAlign;
+ BaseInsertOffset -= Align.value();
+ EndInsertOffset -= Align.value();
}
// This is overly conservative, as we don't account for CPEMIs being
// reused within the block, but it doesn't matter much. Also assume CPEs
@@ -1399,7 +1391,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
++NumCPEs;
// Mark the basic block as aligned as required by the const-pool entry.
- NewIsland->setAlignment(getCPELogAlign(*U.CPEMI));
+ NewIsland->setAlignment(getCPEAlign(*U.CPEMI));
// Increase the size of the island block to account for the new entry.
BBInfo[NewIsland->getNumber()].Size += Size;
@@ -1431,10 +1423,11 @@ void MipsConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
BBInfo[CPEBB->getNumber()].Size = 0;
// This block no longer needs to be aligned.
- CPEBB->setAlignment(0);
- } else
+ CPEBB->setAlignment(Align(1));
+ } else {
// Entries are sorted by descending alignment, so realign from the front.
- CPEBB->setAlignment(getCPELogAlign(*CPEBB->begin()));
+ CPEBB->setAlignment(getCPEAlign(*CPEBB->begin()));
+ }
adjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
@@ -1529,7 +1522,7 @@ MipsConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
// We should have a way to back out this alignment restriction if we "can" later.
// but it is not harmful.
//
- DestBB->setAlignment(2);
+ DestBB->setAlignment(Align(4));
Br.MaxDisp = ((1<<24)-1) * 2;
MI->setDesc(TII->get(Mips::JalB16));
}
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index daca8b907081..d3e68c014fb7 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -12,12 +12,19 @@
// ImmLeaf
def immZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}]>;
+def timmZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>;
+def timmZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>;
+def timmZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
+def timmZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
+def timmZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
+def timmZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}], NOOP_SDNodeXForm, timm>;
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
+def timmSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}], NOOP_SDNodeXForm, timm>;
def immSExt10 : ImmLeaf<i32, [{return isInt<10>(Imm);}]>;
// Mips-specific dsp nodes
@@ -306,7 +313,7 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag OutOperandList = (outs ROT:$rt);
dag InOperandList = (ins ROS:$rs, uimm5:$sa, ROS:$src);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
- list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))];
+ list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, timmZExt5:$sa))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
string BaseOpcode = instr_asm;
@@ -443,7 +450,7 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag OutOperandList = (outs GPR32Opnd:$rd);
dag InOperandList = (ins uimm10:$mask);
string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
- list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))];
+ list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode timmZExt10:$mask))];
InstrItinClass Itinerary = itin;
string BaseOpcode = instr_asm;
bit isMoveReg = 1;
@@ -454,7 +461,7 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag OutOperandList = (outs);
dag InOperandList = (ins GPR32Opnd:$rs, uimm10:$mask);
string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
- list<dag> Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)];
+ list<dag> Pattern = [(OpNode GPR32Opnd:$rs, timmZExt10:$mask)];
InstrItinClass Itinerary = itin;
string BaseOpcode = instr_asm;
bit isMoveReg = 1;
@@ -1096,14 +1103,14 @@ class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
NoItinerary, DSPROpnd>;
// Misc
-class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5,
+class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, timmZExt5,
NoItinerary>;
-class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2,
+class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, timmZExt2,
NoItinerary>;
class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5,
- immZExt5, NoItinerary>;
+ timmZExt5, NoItinerary>;
// Pseudos.
def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp
index 65d84a6c44a0..00cd284e7094 100644
--- a/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -99,15 +99,15 @@ bool MipsExpandPseudo::expandAtomicCmpSwapSubword(
: (ArePtrs64bit ? Mips::SC64 : Mips::SC);
}
- unsigned Dest = I->getOperand(0).getReg();
- unsigned Ptr = I->getOperand(1).getReg();
- unsigned Mask = I->getOperand(2).getReg();
- unsigned ShiftCmpVal = I->getOperand(3).getReg();
- unsigned Mask2 = I->getOperand(4).getReg();
- unsigned ShiftNewVal = I->getOperand(5).getReg();
- unsigned ShiftAmnt = I->getOperand(6).getReg();
- unsigned Scratch = I->getOperand(7).getReg();
- unsigned Scratch2 = I->getOperand(8).getReg();
+ Register Dest = I->getOperand(0).getReg();
+ Register Ptr = I->getOperand(1).getReg();
+ Register Mask = I->getOperand(2).getReg();
+ Register ShiftCmpVal = I->getOperand(3).getReg();
+ Register Mask2 = I->getOperand(4).getReg();
+ Register ShiftNewVal = I->getOperand(5).getReg();
+ Register ShiftAmnt = I->getOperand(6).getReg();
+ Register Scratch = I->getOperand(7).getReg();
+ Register Scratch2 = I->getOperand(8).getReg();
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB.getBasicBlock();
@@ -240,11 +240,11 @@ bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB,
MOVE = Mips::OR64;
}
- unsigned Dest = I->getOperand(0).getReg();
- unsigned Ptr = I->getOperand(1).getReg();
- unsigned OldVal = I->getOperand(2).getReg();
- unsigned NewVal = I->getOperand(3).getReg();
- unsigned Scratch = I->getOperand(4).getReg();
+ Register Dest = I->getOperand(0).getReg();
+ Register Ptr = I->getOperand(1).getReg();
+ Register OldVal = I->getOperand(2).getReg();
+ Register NewVal = I->getOperand(3).getReg();
+ Register Scratch = I->getOperand(4).getReg();
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB.getBasicBlock();
@@ -374,15 +374,15 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
llvm_unreachable("Unknown subword atomic pseudo for expansion!");
}
- unsigned Dest = I->getOperand(0).getReg();
- unsigned Ptr = I->getOperand(1).getReg();
- unsigned Incr = I->getOperand(2).getReg();
- unsigned Mask = I->getOperand(3).getReg();
- unsigned Mask2 = I->getOperand(4).getReg();
- unsigned ShiftAmnt = I->getOperand(5).getReg();
- unsigned OldVal = I->getOperand(6).getReg();
- unsigned BinOpRes = I->getOperand(7).getReg();
- unsigned StoreVal = I->getOperand(8).getReg();
+ Register Dest = I->getOperand(0).getReg();
+ Register Ptr = I->getOperand(1).getReg();
+ Register Incr = I->getOperand(2).getReg();
+ Register Mask = I->getOperand(3).getReg();
+ Register Mask2 = I->getOperand(4).getReg();
+ Register ShiftAmnt = I->getOperand(5).getReg();
+ Register OldVal = I->getOperand(6).getReg();
+ Register BinOpRes = I->getOperand(7).getReg();
+ Register StoreVal = I->getOperand(8).getReg();
const BasicBlock *LLVM_BB = BB.getBasicBlock();
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -513,10 +513,10 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
BEQ = Mips::BEQ64;
}
- unsigned OldVal = I->getOperand(0).getReg();
- unsigned Ptr = I->getOperand(1).getReg();
- unsigned Incr = I->getOperand(2).getReg();
- unsigned Scratch = I->getOperand(3).getReg();
+ Register OldVal = I->getOperand(0).getReg();
+ Register Ptr = I->getOperand(1).getReg();
+ Register Incr = I->getOperand(2).getReg();
+ Register Scratch = I->getOperand(3).getReg();
unsigned Opcode = 0;
unsigned OR = 0;
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 123d3cc242f0..80f288ac500c 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -1162,14 +1162,20 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
if (ArgVT == MVT::f32) {
VA.convertToReg(Mips::F12);
} else if (ArgVT == MVT::f64) {
- VA.convertToReg(Mips::D6);
+ if (Subtarget->isFP64bit())
+ VA.convertToReg(Mips::D6_64);
+ else
+ VA.convertToReg(Mips::D6);
}
} else if (i == 1) {
if ((firstMVT == MVT::f32) || (firstMVT == MVT::f64)) {
if (ArgVT == MVT::f32) {
VA.convertToReg(Mips::F14);
} else if (ArgVT == MVT::f64) {
- VA.convertToReg(Mips::D7);
+ if (Subtarget->isFP64bit())
+ VA.convertToReg(Mips::D7_64);
+ else
+ VA.convertToReg(Mips::D7);
}
}
}
@@ -1722,7 +1728,7 @@ bool MipsFastISel::selectRet(const Instruction *I) {
return false;
unsigned SrcReg = Reg + VA.getValNo();
- unsigned DestReg = VA.getLocReg();
+ Register DestReg = VA.getLocReg();
// Avoid a cross-class copy. This is very unlikely.
if (!MRI.getRegClass(SrcReg)->contains(DestReg))
return false;
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 0537cfd1cb30..612b2b712fa8 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -24,8 +24,9 @@ protected:
const MipsSubtarget &STI;
public:
- explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment)
- : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {}
+ explicit MipsFrameLowering(const MipsSubtarget &sti, Align Alignment)
+ : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {
+ }
static const MipsFrameLowering *create(const MipsSubtarget &ST);
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 9ba54d6bb73c..e5997af3bcc5 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -65,7 +65,7 @@ bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
/// getGlobalBaseReg - Output the instructions required to put the
/// GOT address into a register.
SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
- unsigned GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg();
+ Register GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg();
return CurDAG->getRegister(GlobalBaseReg, getTargetLowering()->getPointerTy(
CurDAG->getDataLayout()))
.getNode();
@@ -217,6 +217,51 @@ bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
return false;
}
+/// Convert vector addition with vector subtraction if that allows to encode
+/// constant as an immediate and thus avoid extra 'ldi' instruction.
+/// add X, <-1, -1...> --> sub X, <1, 1...>
+bool MipsDAGToDAGISel::selectVecAddAsVecSubIfProfitable(SDNode *Node) {
+ assert(Node->getOpcode() == ISD::ADD && "Should only get 'add' here.");
+
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Should only be called for vectors.");
+
+ SDValue X = Node->getOperand(0);
+ SDValue C = Node->getOperand(1);
+
+ auto *BVN = dyn_cast<BuildVectorSDNode>(C);
+ if (!BVN)
+ return false;
+
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+ 8, !Subtarget->isLittle()))
+ return false;
+
+ auto IsInlineConstant = [](const APInt &Imm) { return Imm.isIntN(5); };
+
+ if (IsInlineConstant(SplatValue))
+ return false; // Can already be encoded as an immediate.
+
+ APInt NegSplatValue = 0 - SplatValue;
+ if (!IsInlineConstant(NegSplatValue))
+ return false; // Even if we negate it it won't help.
+
+ SDLoc DL(Node);
+
+ SDValue NegC = CurDAG->FoldConstantArithmetic(
+ ISD::SUB, DL, VT, CurDAG->getConstant(0, DL, VT).getNode(), C.getNode());
+ assert(NegC && "Constant-folding failed!");
+ SDValue NewNode = CurDAG->getNode(ISD::SUB, DL, VT, X, NegC);
+
+ ReplaceNode(Node, NewNode.getNode());
+ SelectCode(NewNode.getNode());
+ return true;
+}
+
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
void MipsDAGToDAGISel::Select(SDNode *Node) {
@@ -236,6 +281,12 @@ void MipsDAGToDAGISel::Select(SDNode *Node) {
switch(Opcode) {
default: break;
+ case ISD::ADD:
+ if (Node->getSimpleValueType(0).isVector() &&
+ selectVecAddAsVecSubIfProfitable(Node))
+ return;
+ break;
+
// Get target GOT address.
case ISD::GLOBAL_OFFSET_TABLE:
ReplaceNode(Node, getGlobalBaseReg());
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index bae3bbf71f3b..a768589b374b 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -125,6 +125,11 @@ private:
/// starting at bit zero.
virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const;
+ /// Convert vector addition with vector subtraction if that allows to encode
+ /// constant as an immediate and thus avoid extra 'ldi' instruction.
+ /// add X, <-1, -1...> --> sub X, <1, 1...>
+ bool selectVecAddAsVecSubIfProfitable(SDNode *Node);
+
void Select(SDNode *N) override;
virtual bool trySelect(SDNode *Node) = 0;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 0ff09007da4b..bf1b4756b24f 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -83,10 +83,6 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
-LargeGOT("mxgot", cl::Hidden,
- cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false));
-
-static cl::opt<bool>
NoZeroDivCheck("mno-check-zero-division", cl::Hidden,
cl::desc("MIPS: Don't trap on integer division by zero."),
cl::init(false));
@@ -330,7 +326,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
}
// Set LoadExtAction for f16 vectors to Expand
- for (MVT VT : MVT::fp_vector_valuetypes()) {
+ for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements());
if (F16VT.isValid())
setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand);
@@ -518,11 +514,12 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setLibcallName(RTLIB::SRA_I128, nullptr);
}
- setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2);
+ setMinFunctionAlignment(Subtarget.isGP64bit() ? Align(8) : Align(4));
// The arguments on the stack are defined in terms of 4-byte slots on O32
// and 8-byte slots on N32/N64.
- setMinStackArgumentAlignment((ABI.IsN32() || ABI.IsN64()) ? 8 : 4);
+ setMinStackArgumentAlignment((ABI.IsN32() || ABI.IsN64()) ? Align(8)
+ : Align(4));
setStackPointerRegisterToSaveRestore(ABI.IsN64() ? Mips::SP_64 : Mips::SP);
@@ -552,8 +549,9 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
!Subtarget.inMicroMipsMode();
// Disable if either of the following is true:
- // We do not generate PIC, the ABI is not O32, LargeGOT is being used.
- if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || LargeGOT)
+ // We do not generate PIC, the ABI is not O32, XGOT is being used.
+ if (!TM.isPositionIndependent() || !TM.getABI().IsO32() ||
+ Subtarget.useXGOT())
UseFastISel = false;
return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr;
@@ -1257,7 +1255,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
static unsigned
addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
{
- unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ Register VReg = MF.getRegInfo().createVirtualRegister(RC);
MF.getRegInfo().addLiveIn(PReg, VReg);
return VReg;
}
@@ -1477,10 +1475,10 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI,
llvm_unreachable("Unknown pseudo atomic for replacement!");
}
- unsigned OldVal = MI.getOperand(0).getReg();
- unsigned Ptr = MI.getOperand(1).getReg();
- unsigned Incr = MI.getOperand(2).getReg();
- unsigned Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal));
+ Register OldVal = MI.getOperand(0).getReg();
+ Register Ptr = MI.getOperand(1).getReg();
+ Register Incr = MI.getOperand(2).getReg();
+ Register Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal));
MachineBasicBlock::iterator II(MI);
@@ -1519,8 +1517,8 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI,
// containing the word.
//
- unsigned PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr));
- unsigned IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr));
+ Register PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr));
+ Register IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr));
BuildMI(*BB, II, DL, TII->get(Mips::COPY), IncrCopy).addReg(Incr);
BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr);
@@ -1556,7 +1554,7 @@ MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg(
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
- unsigned ScrReg = RegInfo.createVirtualRegister(RC);
+ Register ScrReg = RegInfo.createVirtualRegister(RC);
assert(Size < 32);
int64_t ShiftImm = 32 - (Size * 8);
@@ -1581,21 +1579,21 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Ptr = MI.getOperand(1).getReg();
- unsigned Incr = MI.getOperand(2).getReg();
-
- unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp);
- unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
- unsigned Mask = RegInfo.createVirtualRegister(RC);
- unsigned Mask2 = RegInfo.createVirtualRegister(RC);
- unsigned Incr2 = RegInfo.createVirtualRegister(RC);
- unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp);
- unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
- unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
- unsigned Scratch = RegInfo.createVirtualRegister(RC);
- unsigned Scratch2 = RegInfo.createVirtualRegister(RC);
- unsigned Scratch3 = RegInfo.createVirtualRegister(RC);
+ Register Dest = MI.getOperand(0).getReg();
+ Register Ptr = MI.getOperand(1).getReg();
+ Register Incr = MI.getOperand(2).getReg();
+
+ Register AlignedAddr = RegInfo.createVirtualRegister(RCp);
+ Register ShiftAmt = RegInfo.createVirtualRegister(RC);
+ Register Mask = RegInfo.createVirtualRegister(RC);
+ Register Mask2 = RegInfo.createVirtualRegister(RC);
+ Register Incr2 = RegInfo.createVirtualRegister(RC);
+ Register MaskLSB2 = RegInfo.createVirtualRegister(RCp);
+ Register PtrLSB2 = RegInfo.createVirtualRegister(RC);
+ Register MaskUpper = RegInfo.createVirtualRegister(RC);
+ Register Scratch = RegInfo.createVirtualRegister(RC);
+ Register Scratch2 = RegInfo.createVirtualRegister(RC);
+ Register Scratch3 = RegInfo.createVirtualRegister(RC);
unsigned AtomicOp = 0;
switch (MI.getOpcode()) {
@@ -1678,7 +1676,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
if (Subtarget.isLittle()) {
BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
} else {
- unsigned Off = RegInfo.createVirtualRegister(RC);
+ Register Off = RegInfo.createVirtualRegister(RC);
BuildMI(BB, DL, TII->get(Mips::XORi), Off)
.addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2);
BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3);
@@ -1738,12 +1736,12 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32
? Mips::ATOMIC_CMP_SWAP_I32_POSTRA
: Mips::ATOMIC_CMP_SWAP_I64_POSTRA;
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Ptr = MI.getOperand(1).getReg();
- unsigned OldVal = MI.getOperand(2).getReg();
- unsigned NewVal = MI.getOperand(3).getReg();
+ Register Dest = MI.getOperand(0).getReg();
+ Register Ptr = MI.getOperand(1).getReg();
+ Register OldVal = MI.getOperand(2).getReg();
+ Register NewVal = MI.getOperand(3).getReg();
- unsigned Scratch = MRI.createVirtualRegister(RC);
+ Register Scratch = MRI.createVirtualRegister(RC);
MachineBasicBlock::iterator II(MI);
// We need to create copies of the various registers and kill them at the
@@ -1751,9 +1749,9 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
// after fast register allocation, the spills will end up outside of the
// blocks that their values are defined in, causing livein errors.
- unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr));
- unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal));
- unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal));
+ Register PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr));
+ Register OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal));
+ Register NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal));
BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr);
BuildMI(*BB, II, DL, TII->get(Mips::COPY), OldValCopy).addReg(OldVal);
@@ -1790,22 +1788,22 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Ptr = MI.getOperand(1).getReg();
- unsigned CmpVal = MI.getOperand(2).getReg();
- unsigned NewVal = MI.getOperand(3).getReg();
-
- unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp);
- unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
- unsigned Mask = RegInfo.createVirtualRegister(RC);
- unsigned Mask2 = RegInfo.createVirtualRegister(RC);
- unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC);
- unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC);
- unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp);
- unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
- unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
- unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC);
- unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC);
+ Register Dest = MI.getOperand(0).getReg();
+ Register Ptr = MI.getOperand(1).getReg();
+ Register CmpVal = MI.getOperand(2).getReg();
+ Register NewVal = MI.getOperand(3).getReg();
+
+ Register AlignedAddr = RegInfo.createVirtualRegister(RCp);
+ Register ShiftAmt = RegInfo.createVirtualRegister(RC);
+ Register Mask = RegInfo.createVirtualRegister(RC);
+ Register Mask2 = RegInfo.createVirtualRegister(RC);
+ Register ShiftedCmpVal = RegInfo.createVirtualRegister(RC);
+ Register ShiftedNewVal = RegInfo.createVirtualRegister(RC);
+ Register MaskLSB2 = RegInfo.createVirtualRegister(RCp);
+ Register PtrLSB2 = RegInfo.createVirtualRegister(RC);
+ Register MaskUpper = RegInfo.createVirtualRegister(RC);
+ Register MaskedCmpVal = RegInfo.createVirtualRegister(RC);
+ Register MaskedNewVal = RegInfo.createVirtualRegister(RC);
unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8
? Mips::ATOMIC_CMP_SWAP_I8_POSTRA
: Mips::ATOMIC_CMP_SWAP_I16_POSTRA;
@@ -1820,8 +1818,8 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
// value isn't a problem.
// The Dead flag is needed as the value in scratch isn't used by any other
// instruction. Kill isn't used as Dead is more precise.
- unsigned Scratch = RegInfo.createVirtualRegister(RC);
- unsigned Scratch2 = RegInfo.createVirtualRegister(RC);
+ Register Scratch = RegInfo.createVirtualRegister(RC);
+ Register Scratch2 = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -1859,7 +1857,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
if (Subtarget.isLittle()) {
BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
} else {
- unsigned Off = RegInfo.createVirtualRegister(RC);
+ Register Off = RegInfo.createVirtualRegister(RC);
BuildMI(BB, DL, TII->get(Mips::XORi), Off)
.addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2);
BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3);
@@ -1967,10 +1965,10 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
// %gp_rel relocation
return getAddrGPRel(N, SDLoc(N), Ty, DAG, ABI.IsN64());
- // %hi/%lo relocation
+ // %hi/%lo relocation
return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
- // %highest/%higher/%hi/%lo relocation
- : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
+ // %highest/%higher/%hi/%lo relocation
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
}
// Every other architecture would use shouldAssumeDSOLocal in here, but
@@ -1987,7 +1985,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
if (GV->hasLocalLinkage())
return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
- if (LargeGOT)
+ if (Subtarget.useXGOT())
return getAddrGlobalLargeGOT(
N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16,
DAG.getEntryNode(),
@@ -2149,7 +2147,8 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
SDValue Chain = Node->getOperand(0);
SDValue VAListPtr = Node->getOperand(1);
- unsigned Align = Node->getConstantOperandVal(3);
+ const Align Align =
+ llvm::MaybeAlign(Node->getConstantOperandVal(3)).valueOrOne();
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
SDLoc DL(Node);
unsigned ArgSlotSizeInBytes = (ABI.IsN32() || ABI.IsN64()) ? 8 : 4;
@@ -2166,14 +2165,13 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// when the pointer is still aligned from the last va_arg (or pair of
// va_args for the i64 on O32 case).
if (Align > getMinStackArgumentAlignment()) {
- assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+ VAList = DAG.getNode(
+ ISD::ADD, DL, VAList.getValueType(), VAList,
+ DAG.getConstant(Align.value() - 1, DL, VAList.getValueType()));
- VAList = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
- DAG.getConstant(Align - 1, DL, VAList.getValueType()));
-
- VAList = DAG.getNode(ISD::AND, DL, VAList.getValueType(), VAList,
- DAG.getConstant(-(int64_t)Align, DL,
- VAList.getValueType()));
+ VAList = DAG.getNode(
+ ISD::AND, DL, VAList.getValueType(), VAList,
+ DAG.getConstant(-(int64_t)Align.value(), DL, VAList.getValueType()));
}
// Increment the pointer, VAList, to the next vaarg.
@@ -2870,7 +2868,7 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
#include "MipsGenCallingConv.inc"
CCAssignFn *MipsTargetLowering::CCAssignFnForCall() const{
- return CC_Mips;
+ return CC_Mips_FixedArg;
}
CCAssignFn *MipsTargetLowering::CCAssignFnForReturn() const{
@@ -3167,7 +3165,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Arg, DAG.getConstant(1, DL, MVT::i32));
if (!Subtarget.isLittle())
std::swap(Lo, Hi);
- unsigned LocRegLo = VA.getLocReg();
+ Register LocRegLo = VA.getLocReg();
unsigned LocRegHigh = getNextIntArgReg(LocRegLo);
RegsToPass.push_back(std::make_pair(LocRegLo, Lo));
RegsToPass.push_back(std::make_pair(LocRegHigh, Hi));
@@ -3270,7 +3268,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (InternalLinkage)
Callee = getAddrLocal(G, DL, Ty, DAG, ABI.IsN32() || ABI.IsN64());
- else if (LargeGOT) {
+ else if (Subtarget.useXGOT()) {
Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
FuncInfo->callPtrInfo(Val));
@@ -3292,7 +3290,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!IsPIC) // static
Callee = DAG.getTargetExternalSymbol(
Sym, getPointerTy(DAG.getDataLayout()), MipsII::MO_NO_FLAG);
- else if (LargeGOT) {
+ else if (Subtarget.useXGOT()) {
Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
FuncInfo->callPtrInfo(Sym));
@@ -3523,7 +3521,7 @@ SDValue MipsTargetLowering::LowerFormalArguments(
// Arguments stored on registers
if (IsRegLoc) {
MVT RegVT = VA.getLocVT();
- unsigned ArgReg = VA.getLocReg();
+ Register ArgReg = VA.getLocReg();
const TargetRegisterClass *RC = getRegClassFor(RegVT);
// Transform the arguments stored on
@@ -4568,20 +4566,20 @@ MachineBasicBlock *MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
+Register MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const {
// Named registers is expected to be fairly rare. For now, just support $28
// since the linux kernel uses it.
if (Subtarget.isGP64bit()) {
- unsigned Reg = StringSwitch<unsigned>(RegName)
+ Register Reg = StringSwitch<Register>(RegName)
.Case("$28", Mips::GP_64)
- .Default(0);
+ .Default(Register());
if (Reg)
return Reg;
} else {
- unsigned Reg = StringSwitch<unsigned>(RegName)
+ Register Reg = StringSwitch<Register>(RegName)
.Case("$28", Mips::GP)
- .Default(0);
+ .Default(Register());
if (Reg)
return Reg;
}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 2db60e9801f1..0a5cddd45afb 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -304,11 +304,12 @@ class TargetRegisterClass;
unsigned &NumIntermediates, MVT &RegisterVT) const override;
/// Return the correct alignment for the current calling convention.
- unsigned getABIAlignmentForCallingConv(Type *ArgTy,
- DataLayout DL) const override {
+ Align getABIAlignmentForCallingConv(Type *ArgTy,
+ DataLayout DL) const override {
+ const Align ABIAlign(DL.getABITypeAlignment(ArgTy));
if (ArgTy->isVectorTy())
- return std::min(DL.getABITypeAlignment(ArgTy), 8U);
- return DL.getABITypeAlignment(ArgTy);
+ return std::min(ABIAlign, Align(8));
+ return ABIAlign;
}
ISD::NodeType getExtendForAtomicOps() const override {
@@ -347,8 +348,8 @@ class TargetRegisterClass;
void HandleByVal(CCState *, unsigned &, unsigned) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index fbd56206b249..6bb25ee5754d 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -677,7 +677,8 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
return MIB;
}
-bool MipsInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+bool MipsInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
assert(!MI.isBundle() &&
"TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index a626c0c3fdb8..092a960b4ba7 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -148,7 +148,7 @@ public:
MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
MachineBasicBlock::iterator I) const;
- bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
/// Perform target specific instruction verification.
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index a4e85a38ab28..58167e0f344d 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -211,9 +211,9 @@ def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
AssemblerPredicate<"FeatureCnMips">;
def NotCnMips : Predicate<"!Subtarget->hasCnMips()">,
AssemblerPredicate<"!FeatureCnMips">;
-def IsSym32 : Predicate<"Subtarget->HasSym32()">,
+def IsSym32 : Predicate<"Subtarget->hasSym32()">,
AssemblerPredicate<"FeatureSym32">;
-def IsSym64 : Predicate<"!Subtarget->HasSym32()">,
+def IsSym64 : Predicate<"!Subtarget->hasSym32()">,
AssemblerPredicate<"!FeatureSym32">;
def IsN64 : Predicate<"Subtarget->isABI_N64()">;
def IsNotN64 : Predicate<"!Subtarget->isABI_N64()">;
@@ -1263,6 +1263,7 @@ def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
// Node immediate fits as 7-bit zero extended on target immediate.
def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>;
+def timmZExt7 : PatLeaf<(timm), [{ return isUInt<7>(N->getZExtValue()); }]>;
// Node immediate fits as 16-bit zero extended on target immediate.
// The LO16 param means that only the lower 16 bits of the node
@@ -1295,6 +1296,7 @@ def immZExt32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
// shamt field must fit in 5 bits.
def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
+def timmZExt5 : TImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
def immZExt5Plus1 : PatLeaf<(imm), [{
return isUInt<5>(N->getZExtValue() - 1);
@@ -3142,25 +3144,31 @@ multiclass MipsHiLoRelocs<Instruction Lui, Instruction Addiu,
def : MipsPat<(MipsHi tconstpool:$in), (Lui tconstpool:$in)>;
def : MipsPat<(MipsHi texternalsym:$in), (Lui texternalsym:$in)>;
- def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>;
+ def : MipsPat<(MipsLo tglobaladdr:$in),
+ (Addiu ZeroReg, tglobaladdr:$in)>;
def : MipsPat<(MipsLo tblockaddress:$in),
(Addiu ZeroReg, tblockaddress:$in)>;
- def : MipsPat<(MipsLo tjumptable:$in), (Addiu ZeroReg, tjumptable:$in)>;
- def : MipsPat<(MipsLo tconstpool:$in), (Addiu ZeroReg, tconstpool:$in)>;
+ def : MipsPat<(MipsLo tjumptable:$in),
+ (Addiu ZeroReg, tjumptable:$in)>;
+ def : MipsPat<(MipsLo tconstpool:$in),
+ (Addiu ZeroReg, tconstpool:$in)>;
def : MipsPat<(MipsLo tglobaltlsaddr:$in),
(Addiu ZeroReg, tglobaltlsaddr:$in)>;
- def : MipsPat<(MipsLo texternalsym:$in), (Addiu ZeroReg, texternalsym:$in)>;
+ def : MipsPat<(MipsLo texternalsym:$in),
+ (Addiu ZeroReg, texternalsym:$in)>;
def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)),
- (Addiu GPROpnd:$hi, tglobaladdr:$lo)>;
+ (Addiu GPROpnd:$hi, tglobaladdr:$lo)>;
def : MipsPat<(add GPROpnd:$hi, (MipsLo tblockaddress:$lo)),
- (Addiu GPROpnd:$hi, tblockaddress:$lo)>;
+ (Addiu GPROpnd:$hi, tblockaddress:$lo)>;
def : MipsPat<(add GPROpnd:$hi, (MipsLo tjumptable:$lo)),
- (Addiu GPROpnd:$hi, tjumptable:$lo)>;
+ (Addiu GPROpnd:$hi, tjumptable:$lo)>;
def : MipsPat<(add GPROpnd:$hi, (MipsLo tconstpool:$lo)),
- (Addiu GPROpnd:$hi, tconstpool:$lo)>;
+ (Addiu GPROpnd:$hi, tconstpool:$lo)>;
def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (Addiu GPROpnd:$hi, tglobaltlsaddr:$lo)>;
+ (Addiu GPROpnd:$hi, tglobaltlsaddr:$lo)>;
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo texternalsym:$lo)),
+ (Addiu GPROpnd:$hi, texternalsym:$lo)>;
}
// wrapper_pic
diff --git a/lib/Target/Mips/MipsInstructionSelector.cpp b/lib/Target/Mips/MipsInstructionSelector.cpp
index 45a47ad3c087..f8fc7cb0898b 100644
--- a/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -17,6 +17,7 @@
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#define DEBUG_TYPE "mips-isel"
@@ -33,7 +34,7 @@ public:
MipsInstructionSelector(const MipsTargetMachine &TM, const MipsSubtarget &STI,
const MipsRegisterBankInfo &RBI);
- bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
+ bool select(MachineInstr &I) override;
static const char *getName() { return DEBUG_TYPE; }
private:
@@ -44,6 +45,8 @@ private:
const TargetRegisterClass *
getRegClassForTypeOnBank(unsigned OpSize, const RegisterBank &RB,
const RegisterBankInfo &RBI) const;
+ unsigned selectLoadStoreOpCode(MachineInstr &I,
+ MachineRegisterInfo &MRI) const;
const MipsTargetMachine &TM;
const MipsSubtarget &STI;
@@ -84,7 +87,7 @@ MipsInstructionSelector::MipsInstructionSelector(
bool MipsInstructionSelector::selectCopy(MachineInstr &I,
MachineRegisterInfo &MRI) const {
Register DstReg = I.getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ if (Register::isPhysicalRegister(DstReg))
return true;
const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI);
@@ -158,9 +161,15 @@ bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm,
}
/// Returning Opc indicates that we failed to select MIPS instruction opcode.
-static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes,
- unsigned RegBank, bool isFP64) {
- bool isStore = Opc == TargetOpcode::G_STORE;
+unsigned
+MipsInstructionSelector::selectLoadStoreOpCode(MachineInstr &I,
+ MachineRegisterInfo &MRI) const {
+ STI.getRegisterInfo();
+ const Register DestReg = I.getOperand(0).getReg();
+ const unsigned RegBank = RBI.getRegBank(DestReg, MRI, TRI)->getID();
+ const unsigned MemSizeInBytes = (*I.memoperands_begin())->getSize();
+ unsigned Opc = I.getOpcode();
+ const bool isStore = Opc == TargetOpcode::G_STORE;
if (RegBank == Mips::GPRBRegBankID) {
if (isStore)
switch (MemSizeInBytes) {
@@ -192,10 +201,24 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes,
case 4:
return isStore ? Mips::SWC1 : Mips::LWC1;
case 8:
- if (isFP64)
+ if (STI.isFP64bit())
return isStore ? Mips::SDC164 : Mips::LDC164;
else
return isStore ? Mips::SDC1 : Mips::LDC1;
+ case 16: {
+ assert(STI.hasMSA() && "Vector instructions require target with MSA.");
+ const unsigned VectorElementSizeInBytes =
+ MRI.getType(DestReg).getElementType().getSizeInBytes();
+ if (VectorElementSizeInBytes == 1)
+ return isStore ? Mips::ST_B : Mips::LD_B;
+ if (VectorElementSizeInBytes == 2)
+ return isStore ? Mips::ST_H : Mips::LD_H;
+ if (VectorElementSizeInBytes == 4)
+ return isStore ? Mips::ST_W : Mips::LD_W;
+ if (VectorElementSizeInBytes == 8)
+ return isStore ? Mips::ST_D : Mips::LD_D;
+ return Opc;
+ }
default:
return Opc;
}
@@ -203,8 +226,7 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes,
return Opc;
}
-bool MipsInstructionSelector::select(MachineInstr &I,
- CodeGenCoverage &CoverageInfo) const {
+bool MipsInstructionSelector::select(MachineInstr &I) {
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
@@ -231,7 +253,7 @@ bool MipsInstructionSelector::select(MachineInstr &I,
return true;
}
- if (selectImpl(I, CoverageInfo))
+ if (selectImpl(I, *CoverageInfo))
return true;
MachineInstr *MI = nullptr;
@@ -265,6 +287,11 @@ bool MipsInstructionSelector::select(MachineInstr &I,
.add(I.getOperand(2));
break;
}
+ case G_INTTOPTR:
+ case G_PTRTOINT: {
+ I.setDesc(TII.get(COPY));
+ return selectCopy(I, MRI);
+ }
case G_FRAME_INDEX: {
MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
.add(I.getOperand(0))
@@ -279,12 +306,71 @@ bool MipsInstructionSelector::select(MachineInstr &I,
.add(I.getOperand(1));
break;
}
+ case G_BRJT: {
+ unsigned EntrySize =
+ MF.getJumpTableInfo()->getEntrySize(MF.getDataLayout());
+ assert(isPowerOf2_32(EntrySize) &&
+ "Non-power-of-two jump-table entry size not supported.");
+
+ Register JTIndex = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ MachineInstr *SLL = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::SLL))
+ .addDef(JTIndex)
+ .addUse(I.getOperand(2).getReg())
+ .addImm(Log2_32(EntrySize));
+ if (!constrainSelectedInstRegOperands(*SLL, TII, TRI, RBI))
+ return false;
+
+ Register DestAddress = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ MachineInstr *ADDu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDu))
+ .addDef(DestAddress)
+ .addUse(I.getOperand(0).getReg())
+ .addUse(JTIndex);
+ if (!constrainSelectedInstRegOperands(*ADDu, TII, TRI, RBI))
+ return false;
+
+ Register Dest = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ MachineInstr *LW =
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW))
+ .addDef(Dest)
+ .addUse(DestAddress)
+ .addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_ABS_LO)
+ .addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOLoad, 4, 4));
+ if (!constrainSelectedInstRegOperands(*LW, TII, TRI, RBI))
+ return false;
+
+ if (MF.getTarget().isPositionIndependent()) {
+ Register DestTmp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ LW->getOperand(0).setReg(DestTmp);
+ MachineInstr *ADDu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDu))
+ .addDef(Dest)
+ .addUse(DestTmp)
+ .addUse(MF.getInfo<MipsFunctionInfo>()
+ ->getGlobalBaseRegForGlobalISel());
+ if (!constrainSelectedInstRegOperands(*ADDu, TII, TRI, RBI))
+ return false;
+ }
+
+ MachineInstr *Branch =
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::PseudoIndirectBranch))
+ .addUse(Dest);
+ if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
+ case G_BRINDIRECT: {
+ MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::PseudoIndirectBranch))
+ .add(I.getOperand(0));
+ break;
+ }
case G_PHI: {
const Register DestReg = I.getOperand(0).getReg();
const unsigned OpSize = MRI.getType(DestReg).getSizeInBits();
const TargetRegisterClass *DefRC = nullptr;
- if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ if (Register::isPhysicalRegister(DestReg))
DefRC = TRI.getRegClass(DestReg);
else
DefRC = getRegClassForTypeOnBank(OpSize,
@@ -297,26 +383,35 @@ bool MipsInstructionSelector::select(MachineInstr &I,
case G_LOAD:
case G_ZEXTLOAD:
case G_SEXTLOAD: {
- const Register DestReg = I.getOperand(0).getReg();
- const unsigned DestRegBank = RBI.getRegBank(DestReg, MRI, TRI)->getID();
- const unsigned OpSize = MRI.getType(DestReg).getSizeInBits();
- const unsigned OpMemSizeInBytes = (*I.memoperands_begin())->getSize();
-
- if (DestRegBank == Mips::GPRBRegBankID && OpSize != 32)
- return false;
-
- if (DestRegBank == Mips::FPRBRegBankID && OpSize != 32 && OpSize != 64)
- return false;
-
- const unsigned NewOpc = selectLoadStoreOpCode(
- I.getOpcode(), OpMemSizeInBytes, DestRegBank, STI.isFP64bit());
+ const unsigned NewOpc = selectLoadStoreOpCode(I, MRI);
if (NewOpc == I.getOpcode())
return false;
+ MachineOperand BaseAddr = I.getOperand(1);
+ int64_t SignedOffset = 0;
+ // Try to fold load/store + G_GEP + G_CONSTANT
+ // %SignedOffset:(s32) = G_CONSTANT i32 16_bit_signed_immediate
+ // %Addr:(p0) = G_GEP %BaseAddr, %SignedOffset
+ // %LoadResult/%StoreSrc = load/store %Addr(p0)
+ // into:
+ // %LoadResult/%StoreSrc = NewOpc %BaseAddr(p0), 16_bit_signed_immediate
+
+ MachineInstr *Addr = MRI.getVRegDef(I.getOperand(1).getReg());
+ if (Addr->getOpcode() == G_GEP) {
+ MachineInstr *Offset = MRI.getVRegDef(Addr->getOperand(2).getReg());
+ if (Offset->getOpcode() == G_CONSTANT) {
+ APInt OffsetValue = Offset->getOperand(1).getCImm()->getValue();
+ if (OffsetValue.isSignedIntN(16)) {
+ BaseAddr = Addr->getOperand(1);
+ SignedOffset = OffsetValue.getSExtValue();
+ }
+ }
+ }
+
MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
.add(I.getOperand(0))
- .add(I.getOperand(1))
- .addImm(0)
+ .add(BaseAddr)
+ .addImm(SignedOffset)
.addMemOperand(*I.memoperands_begin());
break;
}
@@ -356,6 +451,18 @@ bool MipsInstructionSelector::select(MachineInstr &I,
.add(I.getOperand(3));
break;
}
+ case G_IMPLICIT_DEF: {
+ MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::IMPLICIT_DEF))
+ .add(I.getOperand(0));
+
+ // Set class based on register bank, there can be fpr and gpr implicit def.
+ MRI.setRegClass(MI->getOperand(0).getReg(),
+ getRegClassForTypeOnBank(
+ MRI.getType(I.getOperand(0).getReg()).getSizeInBits(),
+ *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI),
+ RBI));
+ break;
+ }
case G_CONSTANT: {
MachineIRBuilder B(I);
if (!materialize32BitImm(I.getOperand(0).getReg(),
@@ -423,7 +530,7 @@ bool MipsInstructionSelector::select(MachineInstr &I,
Opcode = Mips::TRUNC_W_S;
else
Opcode = STI.isFP64bit() ? Mips::TRUNC_W_D64 : Mips::TRUNC_W_D32;
- unsigned ResultInFPR = MRI.createVirtualRegister(&Mips::FGR32RegClass);
+ Register ResultInFPR = MRI.createVirtualRegister(&Mips::FGR32RegClass);
MachineInstr *Trunc = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode))
.addDef(ResultInFPR)
.addUse(I.getOperand(1).getReg());
@@ -496,6 +603,24 @@ bool MipsInstructionSelector::select(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case G_JUMP_TABLE: {
+ if (MF.getTarget().isPositionIndependent()) {
+ MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW))
+ .addDef(I.getOperand(0).getReg())
+ .addReg(MF.getInfo<MipsFunctionInfo>()
+ ->getGlobalBaseRegForGlobalISel())
+ .addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_GOT)
+ .addMemOperand(
+ MF.getMachineMemOperand(MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad, 4, 4));
+ } else {
+ MI =
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
+ .addDef(I.getOperand(0).getReg())
+ .addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_ABS_HI);
+ }
+ break;
+ }
case G_ICMP: {
struct Instr {
unsigned Opcode;
@@ -626,7 +751,7 @@ bool MipsInstructionSelector::select(MachineInstr &I,
// MipsFCMPCondCode, result is inverted i.e. MOVT_I is used.
unsigned MoveOpcode = isLogicallyNegated ? Mips::MOVT_I : Mips::MOVF_I;
- unsigned TrueInReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register TrueInReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
.addDef(TrueInReg)
.addUse(Mips::ZERO)
@@ -654,6 +779,33 @@ bool MipsInstructionSelector::select(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case G_FENCE: {
+ MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::SYNC)).addImm(0);
+ break;
+ }
+ case G_VASTART: {
+ MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
+ int FI = FuncInfo->getVarArgsFrameIndex();
+
+ Register LeaReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ MachineInstr *LEA_ADDiu =
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LEA_ADDiu))
+ .addDef(LeaReg)
+ .addFrameIndex(FI)
+ .addImm(0);
+ if (!constrainSelectedInstRegOperands(*LEA_ADDiu, TII, TRI, RBI))
+ return false;
+
+ MachineInstr *Store = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::SW))
+ .addUse(LeaReg)
+ .addUse(I.getOperand(0).getReg())
+ .addImm(0);
+ if (!constrainSelectedInstRegOperands(*Store, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
}
diff --git a/lib/Target/Mips/MipsLegalizerInfo.cpp b/lib/Target/Mips/MipsLegalizerInfo.cpp
index e442a81837ed..bb4a1d902d75 100644
--- a/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -16,18 +16,65 @@
using namespace llvm;
+struct TypesAndMemOps {
+ LLT ValTy;
+ LLT PtrTy;
+ unsigned MemSize;
+ bool MustBeNaturallyAligned;
+};
+
+static bool
+CheckTy0Ty1MemSizeAlign(const LegalityQuery &Query,
+ std::initializer_list<TypesAndMemOps> SupportedValues) {
+ for (auto &Val : SupportedValues) {
+ if (Val.ValTy != Query.Types[0])
+ continue;
+ if (Val.PtrTy != Query.Types[1])
+ continue;
+ if (Val.MemSize != Query.MMODescrs[0].SizeInBits)
+ continue;
+ if (Val.MustBeNaturallyAligned &&
+ Query.MMODescrs[0].SizeInBits % Query.MMODescrs[0].AlignInBits != 0)
+ continue;
+ return true;
+ }
+ return false;
+}
+
+static bool CheckTyN(unsigned N, const LegalityQuery &Query,
+ std::initializer_list<LLT> SupportedValues) {
+ for (auto &Val : SupportedValues)
+ if (Val == Query.Types[N])
+ return true;
+ return false;
+}
+
MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
using namespace TargetOpcode;
const LLT s1 = LLT::scalar(1);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
+ const LLT v16s8 = LLT::vector(16, 8);
+ const LLT v8s16 = LLT::vector(8, 16);
+ const LLT v4s32 = LLT::vector(4, 32);
+ const LLT v2s64 = LLT::vector(2, 64);
const LLT p0 = LLT::pointer(0, 32);
- getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
+ getActionDefinitionsBuilder({G_SUB, G_MUL})
.legalFor({s32})
.clampScalar(0, s32, s32);
+ getActionDefinitionsBuilder(G_ADD)
+ .legalIf([=, &ST](const LegalityQuery &Query) {
+ if (CheckTyN(0, Query, {s32}))
+ return true;
+ if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}))
+ return true;
+ return false;
+ })
+ .clampScalar(0, s32, s32);
+
getActionDefinitionsBuilder({G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO})
.lowerFor({{s32, s1}});
@@ -36,13 +83,26 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
.maxScalar(0, s32);
getActionDefinitionsBuilder({G_LOAD, G_STORE})
- .legalForTypesWithMemDesc({{s32, p0, 8, 8},
- {s32, p0, 16, 8},
- {s32, p0, 32, 8},
- {s64, p0, 64, 8},
- {p0, p0, 32, 8}})
+ .legalIf([=, &ST](const LegalityQuery &Query) {
+ if (CheckTy0Ty1MemSizeAlign(Query, {{s32, p0, 8, ST.hasMips32r6()},
+ {s32, p0, 16, ST.hasMips32r6()},
+ {s32, p0, 32, ST.hasMips32r6()},
+ {p0, p0, 32, ST.hasMips32r6()},
+ {s64, p0, 64, ST.hasMips32r6()}}))
+ return true;
+ if (ST.hasMSA() &&
+ CheckTy0Ty1MemSizeAlign(Query, {{v16s8, p0, 128, false},
+ {v8s16, p0, 128, false},
+ {v4s32, p0, 128, false},
+ {v2s64, p0, 128, false}}))
+ return true;
+ return false;
+ })
.minScalar(0, s32);
+ getActionDefinitionsBuilder(G_IMPLICIT_DEF)
+ .legalFor({s32, s64});
+
getActionDefinitionsBuilder(G_UNMERGE_VALUES)
.legalFor({{s32, s64}});
@@ -50,9 +110,17 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
.legalFor({{s64, s32}});
getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD})
- .legalForTypesWithMemDesc({{s32, p0, 8, 8},
- {s32, p0, 16, 8}})
- .minScalar(0, s32);
+ .legalForTypesWithMemDesc({{s32, p0, 8, 8},
+ {s32, p0, 16, 8}})
+ .clampScalar(0, s32, s32);
+
+ getActionDefinitionsBuilder({G_ZEXT, G_SEXT})
+ .legalIf([](const LegalityQuery &Query) { return false; })
+ .maxScalar(0, s32);
+
+ getActionDefinitionsBuilder(G_TRUNC)
+ .legalIf([](const LegalityQuery &Query) { return false; })
+ .maxScalar(1, s32);
getActionDefinitionsBuilder(G_SELECT)
.legalForCartesianProduct({p0, s32, s64}, {s32})
@@ -63,6 +131,12 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
.legalFor({s32})
.minScalar(0, s32);
+ getActionDefinitionsBuilder(G_BRJT)
+ .legalFor({{p0, s32}});
+
+ getActionDefinitionsBuilder(G_BRINDIRECT)
+ .legalFor({p0});
+
getActionDefinitionsBuilder(G_PHI)
.legalFor({p0, s32, s64})
.minScalar(0, s32);
@@ -77,8 +151,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
.libcallFor({s64});
getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
- .legalFor({s32, s32})
- .minScalar(1, s32);
+ .legalFor({{s32, s32}})
+ .clampScalar(1, s32, s32)
+ .clampScalar(0, s32, s32);
getActionDefinitionsBuilder(G_ICMP)
.legalForCartesianProduct({s32}, {s32, p0})
@@ -89,15 +164,24 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
.legalFor({s32})
.clampScalar(0, s32, s32);
- getActionDefinitionsBuilder(G_GEP)
+ getActionDefinitionsBuilder({G_GEP, G_INTTOPTR})
.legalFor({{p0, s32}});
+ getActionDefinitionsBuilder(G_PTRTOINT)
+ .legalFor({{s32, p0}});
+
getActionDefinitionsBuilder(G_FRAME_INDEX)
.legalFor({p0});
- getActionDefinitionsBuilder(G_GLOBAL_VALUE)
+ getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE})
.legalFor({p0});
+ getActionDefinitionsBuilder(G_DYN_STACKALLOC)
+ .lowerFor({{p0, s32}});
+
+ getActionDefinitionsBuilder(G_VASTART)
+ .legalFor({p0});
+
// FP instructions
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({s32, s64});
@@ -126,6 +210,7 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
getActionDefinitionsBuilder(G_FPTOUI)
.libcallForCartesianProduct({s64}, {s64, s32})
+ .lowerForCartesianProduct({s32}, {s64, s32})
.minScalar(0, s32);
// Int to FP conversion instructions
@@ -136,8 +221,11 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
getActionDefinitionsBuilder(G_UITOFP)
.libcallForCartesianProduct({s64, s32}, {s64})
+ .customForCartesianProduct({s64, s32}, {s32})
.minScalar(1, s32);
+ getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+
computeTables();
verify(*ST.getInstrInfo());
}
@@ -150,6 +238,134 @@ bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI,
using namespace TargetOpcode;
MIRBuilder.setInstr(MI);
+ const MipsSubtarget &STI =
+ static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
- return false;
+ switch (MI.getOpcode()) {
+ case G_UITOFP: {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ if (SrcTy != s32)
+ return false;
+ if (DstTy != s32 && DstTy != s64)
+ return false;
+
+ // Let 0xABCDEFGH be given unsigned in MI.getOperand(1). First let's convert
+ // unsigned to double. Mantissa has 52 bits so we use following trick:
+ // First make floating point bit mask 0x43300000ABCDEFGH.
+ // Mask represents 2^52 * 0x1.00000ABCDEFGH i.e. 0x100000ABCDEFGH.0 .
+ // Next, subtract 2^52 * 0x1.0000000000000 i.e. 0x10000000000000.0 from it.
+ // Done. Trunc double to float if needed.
+
+ MachineInstrBuilder Bitcast = MIRBuilder.buildInstr(
+ STI.isFP64bit() ? Mips::BuildPairF64_64 : Mips::BuildPairF64, {s64},
+ {Src, MIRBuilder.buildConstant(s32, UINT32_C(0x43300000))});
+ Bitcast.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+ *STI.getRegBankInfo());
+
+ MachineInstrBuilder TwoP52FP = MIRBuilder.buildFConstant(
+ s64, BitsToDouble(UINT64_C(0x4330000000000000)));
+
+ if (DstTy == s64)
+ MIRBuilder.buildFSub(Dst, Bitcast, TwoP52FP);
+ else {
+ MachineInstrBuilder ResF64 = MIRBuilder.buildFSub(s64, Bitcast, TwoP52FP);
+ MIRBuilder.buildFPTrunc(Dst, ResF64);
+ }
+
+ MI.eraseFromParent();
+ break;
+ }
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+static bool SelectMSA3OpIntrinsic(MachineInstr &MI, unsigned Opcode,
+ MachineIRBuilder &MIRBuilder,
+ const MipsSubtarget &ST) {
+ assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA.");
+ if (!MIRBuilder.buildInstr(Opcode)
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(),
+ *ST.getRegBankInfo()))
+ return false;
+ MI.eraseFromParent();
+ return true;
+}
+
+static bool MSA3OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
+ MachineIRBuilder &MIRBuilder,
+ const MipsSubtarget &ST) {
+ assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA.");
+ MIRBuilder.buildInstr(Opcode)
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ MI.eraseFromParent();
+ return true;
+}
+
+bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ const MipsSubtarget &ST =
+ static_cast<const MipsSubtarget &>(MI.getMF()->getSubtarget());
+ const MipsInstrInfo &TII = *ST.getInstrInfo();
+ const MipsRegisterInfo &TRI = *ST.getRegisterInfo();
+ const RegisterBankInfo &RBI = *ST.getRegBankInfo();
+ MIRBuilder.setInstr(MI);
+
+ switch (MI.getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ if (createMemLibcall(MIRBuilder, MRI, MI) ==
+ LegalizerHelper::UnableToLegalize)
+ return false;
+ MI.eraseFromParent();
+ return true;
+ case Intrinsic::trap: {
+ MachineInstr *Trap = MIRBuilder.buildInstr(Mips::TRAP);
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Trap, TII, TRI, RBI);
+ }
+ case Intrinsic::vacopy: {
+ Register Tmp = MRI.createGenericVirtualRegister(LLT::pointer(0, 32));
+ MachinePointerInfo MPO;
+ MIRBuilder.buildLoad(Tmp, MI.getOperand(2),
+ *MI.getMF()->getMachineMemOperand(
+ MPO, MachineMemOperand::MOLoad, 4, 4));
+ MIRBuilder.buildStore(Tmp, MI.getOperand(1),
+ *MI.getMF()->getMachineMemOperand(
+ MPO, MachineMemOperand::MOStore, 4, 4));
+ MI.eraseFromParent();
+ return true;
+ }
+ case Intrinsic::mips_addv_b:
+ case Intrinsic::mips_addv_h:
+ case Intrinsic::mips_addv_w:
+ case Intrinsic::mips_addv_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_ADD, MIRBuilder, ST);
+ case Intrinsic::mips_addvi_b:
+ return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_B, MIRBuilder, ST);
+ case Intrinsic::mips_addvi_h:
+ return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_H, MIRBuilder, ST);
+ case Intrinsic::mips_addvi_w:
+ return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_W, MIRBuilder, ST);
+ case Intrinsic::mips_addvi_d:
+ return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_D, MIRBuilder, ST);
+ default:
+ break;
+ }
+ return true;
}
diff --git a/lib/Target/Mips/MipsLegalizerInfo.h b/lib/Target/Mips/MipsLegalizerInfo.h
index e5021e081890..9696c262b2db 100644
--- a/lib/Target/Mips/MipsLegalizerInfo.h
+++ b/lib/Target/Mips/MipsLegalizerInfo.h
@@ -28,6 +28,9 @@ public:
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const override;
+
+ bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const override;
};
} // end namespace llvm
#endif
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index 907ed9ef746f..f585d9c1a148 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -60,6 +60,11 @@ def immZExt2Ptr : ImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
def immZExt3Ptr : ImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
+def timmZExt1Ptr : TImmLeaf<iPTR, [{return isUInt<1>(Imm);}]>;
+def timmZExt2Ptr : TImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
+def timmZExt3Ptr : TImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
+def timmZExt4Ptr : TImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
+
// Operands
def immZExt2Lsa : ImmLeaf<i32, [{return isUInt<2>(Imm - 1);}]>;
@@ -1270,7 +1275,7 @@ class MSA_I8_SHF_DESC_BASE<string instr_asm, RegisterOperand ROWD,
dag OutOperandList = (outs ROWD:$wd);
dag InOperandList = (ins ROWS:$ws, uimm8:$u8);
string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
- list<dag> Pattern = [(set ROWD:$wd, (MipsSHF immZExt8:$u8, ROWS:$ws))];
+ list<dag> Pattern = [(set ROWD:$wd, (MipsSHF timmZExt8:$u8, ROWS:$ws))];
InstrItinClass Itinerary = itin;
}
@@ -2299,13 +2304,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC :
class INSERT_FD_VIDX64_PSEUDO_DESC :
MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd, GPR64Opnd>;
-class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4,
+class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, timmZExt4,
MSA128BOpnd>;
-class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3,
+class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, timmZExt3,
MSA128HOpnd>;
-class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2,
+class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, timmZExt2,
MSA128WOpnd>;
-class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1,
+class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, timmZExt1,
MSA128DOpnd>;
class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -2518,22 +2523,22 @@ class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>;
class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>;
class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3,
- immZExt3, MSA128BOpnd>;
+ timmZExt3, MSA128BOpnd>;
class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4,
- immZExt4, MSA128HOpnd>;
+ timmZExt4, MSA128HOpnd>;
class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5,
- immZExt5, MSA128WOpnd>;
+ timmZExt5, MSA128WOpnd>;
class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6,
- immZExt6, MSA128DOpnd>;
+ timmZExt6, MSA128DOpnd>;
class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3,
- immZExt3, MSA128BOpnd>;
+ timmZExt3, MSA128BOpnd>;
class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4,
- immZExt4, MSA128HOpnd>;
+ timmZExt4, MSA128HOpnd>;
class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5,
- immZExt5, MSA128WOpnd>;
+ timmZExt5, MSA128WOpnd>;
class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6,
- immZExt6, MSA128DOpnd>;
+ timmZExt6, MSA128DOpnd>;
class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>;
class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>;
@@ -2546,16 +2551,16 @@ class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>;
class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b,
MSA128BOpnd, MSA128BOpnd, uimm4,
- immZExt4>;
+ timmZExt4>;
class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h,
MSA128HOpnd, MSA128HOpnd, uimm3,
- immZExt3>;
+ timmZExt3>;
class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w,
MSA128WOpnd, MSA128WOpnd, uimm2,
- immZExt2>;
+ timmZExt2>;
class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d,
MSA128DOpnd, MSA128DOpnd, uimm1,
- immZExt1>;
+ timmZExt1>;
class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>;
class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>;
@@ -2609,13 +2614,13 @@ class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>;
class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>;
class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3,
- immZExt3, MSA128BOpnd>;
+ timmZExt3, MSA128BOpnd>;
class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4,
- immZExt4, MSA128HOpnd>;
+ timmZExt4, MSA128HOpnd>;
class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5,
- immZExt5, MSA128WOpnd>;
+ timmZExt5, MSA128WOpnd>;
class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6,
- immZExt6, MSA128DOpnd>;
+ timmZExt6, MSA128DOpnd>;
class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>;
class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>;
@@ -2637,13 +2642,13 @@ class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>;
class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>;
class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3,
- immZExt3, MSA128BOpnd>;
+ timmZExt3, MSA128BOpnd>;
class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4,
- immZExt4, MSA128HOpnd>;
+ timmZExt4, MSA128HOpnd>;
class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5,
- immZExt5, MSA128WOpnd>;
+ timmZExt5, MSA128WOpnd>;
class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6,
- immZExt6, MSA128DOpnd>;
+ timmZExt6, MSA128DOpnd>;
class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
ValueType TyNode, RegisterOperand ROWD,
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index 5ef07a2d283e..8bd64ff6cb27 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -127,8 +127,7 @@ static MachineOperand *getCallTargetRegOpnd(MachineInstr &MI) {
MachineOperand &MO = MI.getOperand(0);
- if (!MO.isReg() || !MO.isUse() ||
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.isUse() || !Register::isVirtualRegister(MO.getReg()))
return nullptr;
return &MO;
@@ -152,7 +151,7 @@ static void setCallTargetReg(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I) {
MachineFunction &MF = *MBB->getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- unsigned SrcReg = I->getOperand(0).getReg();
+ Register SrcReg = I->getOperand(0).getReg();
unsigned DstReg = getRegTy(SrcReg, MF) == MVT::i32 ? Mips::T9 : Mips::T9_64;
BuildMI(*MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), DstReg)
.addReg(SrcReg);
diff --git a/lib/Target/Mips/MipsPfmCounters.td b/lib/Target/Mips/MipsPfmCounters.td
new file mode 100644
index 000000000000..c7779b474b91
--- /dev/null
+++ b/lib/Target/Mips/MipsPfmCounters.td
@@ -0,0 +1,18 @@
+//===-- MipsPfmCounters.td - Mips Hardware Counters --------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the available hardware counters for Mips.
+//
+//===----------------------------------------------------------------------===//
+
+def CpuCyclesPfmCounter : PfmCounter<"CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+ let CycleCounter = CpuCyclesPfmCounter;
+}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
diff --git a/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
index 85076590d407..ace0735652bd 100644
--- a/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
+++ b/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -27,7 +27,8 @@ class MipsPreLegalizerCombinerInfo : public CombinerInfo {
public:
MipsPreLegalizerCombinerInfo()
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
- /*LegalizerInfo*/ nullptr) {}
+ /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
+ /*EnableOptSize*/ false, /*EnableMinSize*/ false) {}
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
MachineIRBuilder &B) const override;
};
diff --git a/lib/Target/Mips/MipsRegisterBankInfo.cpp b/lib/Target/Mips/MipsRegisterBankInfo.cpp
index d8bcf16afd50..d334366e727c 100644
--- a/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -12,6 +12,7 @@
#include "MipsRegisterBankInfo.h"
#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -27,20 +28,23 @@ enum PartialMappingIdx {
PMI_GPR,
PMI_SPR,
PMI_DPR,
+ PMI_MSA,
PMI_Min = PMI_GPR,
};
RegisterBankInfo::PartialMapping PartMappings[]{
{0, 32, GPRBRegBank},
{0, 32, FPRBRegBank},
- {0, 64, FPRBRegBank}
+ {0, 64, FPRBRegBank},
+ {0, 128, FPRBRegBank}
};
enum ValueMappingIdx {
InvalidIdx = 0,
GPRIdx = 1,
SPRIdx = 4,
- DPRIdx = 7
+ DPRIdx = 7,
+ MSAIdx = 10
};
RegisterBankInfo::ValueMapping ValueMappings[] = {
@@ -50,14 +54,18 @@ RegisterBankInfo::ValueMapping ValueMappings[] = {
{&PartMappings[PMI_GPR - PMI_Min], 1},
{&PartMappings[PMI_GPR - PMI_Min], 1},
{&PartMappings[PMI_GPR - PMI_Min], 1},
- // up to 3 ops operands FPRs - single precission
+ // up to 3 operands in FPRs - single precission
{&PartMappings[PMI_SPR - PMI_Min], 1},
{&PartMappings[PMI_SPR - PMI_Min], 1},
{&PartMappings[PMI_SPR - PMI_Min], 1},
- // up to 3 ops operands FPRs - double precission
+ // up to 3 operands in FPRs - double precission
{&PartMappings[PMI_DPR - PMI_Min], 1},
{&PartMappings[PMI_DPR - PMI_Min], 1},
- {&PartMappings[PMI_DPR - PMI_Min], 1}
+ {&PartMappings[PMI_DPR - PMI_Min], 1},
+ // up to 3 operands in FPRs - MSA
+ {&PartMappings[PMI_MSA - PMI_Min], 1},
+ {&PartMappings[PMI_MSA - PMI_Min], 1},
+ {&PartMappings[PMI_MSA - PMI_Min], 1}
};
} // end namespace Mips
@@ -86,6 +94,10 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass(
case Mips::FGR32RegClassID:
case Mips::FGR64RegClassID:
case Mips::AFGR64RegClassID:
+ case Mips::MSA128BRegClassID:
+ case Mips::MSA128HRegClassID:
+ case Mips::MSA128WRegClassID:
+ case Mips::MSA128DRegClassID:
return getRegBank(Mips::FPRBRegBankID);
default:
llvm_unreachable("Register class not supported");
@@ -149,6 +161,7 @@ static bool isAmbiguous(unsigned Opc) {
case TargetOpcode::G_STORE:
case TargetOpcode::G_PHI:
case TargetOpcode::G_SELECT:
+ case TargetOpcode::G_IMPLICIT_DEF:
return true;
default:
return false;
@@ -163,8 +176,7 @@ void MipsRegisterBankInfo::AmbiguousRegDefUseContainer::addDefUses(
MachineInstr *NonCopyInstr = skipCopiesOutgoing(&UseMI);
// Copy with many uses.
if (NonCopyInstr->getOpcode() == TargetOpcode::COPY &&
- !TargetRegisterInfo::isPhysicalRegister(
- NonCopyInstr->getOperand(0).getReg()))
+ !Register::isPhysicalRegister(NonCopyInstr->getOperand(0).getReg()))
addDefUses(NonCopyInstr->getOperand(0).getReg(), MRI);
else
DefUses.push_back(skipCopiesOutgoing(&UseMI));
@@ -186,7 +198,7 @@ MipsRegisterBankInfo::AmbiguousRegDefUseContainer::skipCopiesOutgoing(
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineInstr *Ret = MI;
while (Ret->getOpcode() == TargetOpcode::COPY &&
- !TargetRegisterInfo::isPhysicalRegister(Ret->getOperand(0).getReg()) &&
+ !Register::isPhysicalRegister(Ret->getOperand(0).getReg()) &&
MRI.hasOneUse(Ret->getOperand(0).getReg())) {
Ret = &(*MRI.use_instr_begin(Ret->getOperand(0).getReg()));
}
@@ -200,7 +212,7 @@ MipsRegisterBankInfo::AmbiguousRegDefUseContainer::skipCopiesIncoming(
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineInstr *Ret = MI;
while (Ret->getOpcode() == TargetOpcode::COPY &&
- !TargetRegisterInfo::isPhysicalRegister(Ret->getOperand(1).getReg()))
+ !Register::isPhysicalRegister(Ret->getOperand(1).getReg()))
Ret = MRI.getVRegDef(Ret->getOperand(1).getReg());
return Ret;
}
@@ -231,6 +243,9 @@ MipsRegisterBankInfo::AmbiguousRegDefUseContainer::AmbiguousRegDefUseContainer(
addUseDef(MI->getOperand(2).getReg(), MRI);
addUseDef(MI->getOperand(3).getReg(), MRI);
}
+
+ if (MI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
+ addDefUses(MI->getOperand(0).getReg(), MRI);
}
bool MipsRegisterBankInfo::TypeInfoForMF::visit(
@@ -318,8 +333,7 @@ void MipsRegisterBankInfo::TypeInfoForMF::setTypes(const MachineInstr *MI,
void MipsRegisterBankInfo::TypeInfoForMF::setTypesAccordingToPhysicalRegister(
const MachineInstr *MI, const MachineInstr *CopyInst, unsigned Op) {
- assert((TargetRegisterInfo::isPhysicalRegister(
- CopyInst->getOperand(Op).getReg())) &&
+ assert((Register::isPhysicalRegister(CopyInst->getOperand(Op).getReg())) &&
"Copies of non physical registers should not be considered here.\n");
const MachineFunction &MF = *CopyInst->getMF();
@@ -353,6 +367,31 @@ void MipsRegisterBankInfo::TypeInfoForMF::cleanupIfNewFunction(
}
}
+static const MipsRegisterBankInfo::ValueMapping *
+getMSAMapping(const MachineFunction &MF) {
+ assert(static_cast<const MipsSubtarget &>(MF.getSubtarget()).hasMSA() &&
+ "MSA mapping not available on target without MSA.");
+ return &Mips::ValueMappings[Mips::MSAIdx];
+}
+
+static const MipsRegisterBankInfo::ValueMapping *getFprbMapping(unsigned Size) {
+ return Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx];
+}
+
+static const unsigned CustomMappingID = 1;
+
+// Only 64 bit mapping is available in fprb and will be marked as custom, i.e.
+// will be split into two 32 bit registers in gprb.
+static const MipsRegisterBankInfo::ValueMapping *
+getGprbOrCustomMapping(unsigned Size, unsigned &MappingID) {
+ if (Size == 32)
+ return &Mips::ValueMappings[Mips::GPRIdx];
+
+ MappingID = CustomMappingID;
+ return &Mips::ValueMappings[Mips::DPRIdx];
+}
+
const RegisterBankInfo::InstructionMapping &
MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
@@ -377,17 +416,35 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned NumOperands = MI.getNumOperands();
const ValueMapping *OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
unsigned MappingID = DefaultMappingID;
- const unsigned CustomMappingID = 1;
+
+ // Check if LLT sizes match sizes of available register banks.
+ for (const MachineOperand &Op : MI.operands()) {
+ if (Op.isReg()) {
+ LLT RegTy = MRI.getType(Op.getReg());
+
+ if (RegTy.isScalar() &&
+ (RegTy.getSizeInBits() != 32 && RegTy.getSizeInBits() != 64))
+ return getInvalidInstructionMapping();
+
+ if (RegTy.isVector() && RegTy.getSizeInBits() != 128)
+ return getInvalidInstructionMapping();
+ }
+ }
+
+ const LLT Op0Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Op0Size = Op0Ty.getSizeInBits();
+ InstType InstTy = InstType::Integer;
switch (Opc) {
case G_TRUNC:
- case G_ADD:
case G_SUB:
case G_MUL:
case G_UMULH:
case G_ZEXTLOAD:
case G_SEXTLOAD:
case G_GEP:
+ case G_INTTOPTR:
+ case G_PTRTOINT:
case G_AND:
case G_OR:
case G_XOR:
@@ -398,66 +455,42 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_UDIV:
case G_SREM:
case G_UREM:
+ case G_BRINDIRECT:
+ case G_VASTART:
OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
break;
- case G_LOAD: {
- unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- InstType InstTy = InstType::Integer;
- if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
- InstTy = TI.determineInstType(&MI);
- }
-
- if (InstTy == InstType::FloatingPoint ||
- (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb
- OperandsMapping =
- getOperandsMapping({Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx],
- &Mips::ValueMappings[Mips::GPRIdx]});
+ case G_ADD:
+ OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
+ if (Op0Size == 128)
+ OperandsMapping = getMSAMapping(MF);
+ break;
+ case G_STORE:
+ case G_LOAD:
+ if (Op0Size == 128) {
+ OperandsMapping = getOperandsMapping(
+ {getMSAMapping(MF), &Mips::ValueMappings[Mips::GPRIdx]});
break;
- } else { // gprb
- OperandsMapping =
- getOperandsMapping({Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx],
- &Mips::ValueMappings[Mips::GPRIdx]});
- if (Size == 64)
- MappingID = CustomMappingID;
}
- break;
- }
- case G_STORE: {
- unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- InstType InstTy = InstType::Integer;
- if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+ if (!Op0Ty.isPointer())
InstTy = TI.determineInstType(&MI);
- }
if (InstTy == InstType::FloatingPoint ||
- (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb
- OperandsMapping =
- getOperandsMapping({Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx],
- &Mips::ValueMappings[Mips::GPRIdx]});
- break;
- } else { // gprb
+ (Op0Size == 64 && InstTy == InstType::Ambiguous))
+ OperandsMapping = getOperandsMapping(
+ {getFprbMapping(Op0Size), &Mips::ValueMappings[Mips::GPRIdx]});
+ else
OperandsMapping =
- getOperandsMapping({Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx],
+ getOperandsMapping({getGprbOrCustomMapping(Op0Size, MappingID),
&Mips::ValueMappings[Mips::GPRIdx]});
- if (Size == 64)
- MappingID = CustomMappingID;
- }
+
break;
- }
- case G_PHI: {
- unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- InstType InstTy = InstType::Integer;
- if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+ case G_PHI:
+ if (!Op0Ty.isPointer())
InstTy = TI.determineInstType(&MI);
- }
// PHI is copylike and should have one regbank in mapping for def register.
- if (InstTy == InstType::Integer && Size == 64) { // fprb
+ if (InstTy == InstType::Integer && Op0Size == 64) {
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx]});
return getInstructionMapping(CustomMappingID, /*Cost=*/1, OperandsMapping,
@@ -465,80 +498,63 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
// Use default handling for PHI, i.e. set reg bank of def operand to match
// register banks of use operands.
- const RegisterBankInfo::InstructionMapping &Mapping =
- getInstrMappingImpl(MI);
- return Mapping;
- }
+ return getInstrMappingImpl(MI);
case G_SELECT: {
- unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- InstType InstTy = InstType::Integer;
- if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+ if (!Op0Ty.isPointer())
InstTy = TI.determineInstType(&MI);
- }
if (InstTy == InstType::FloatingPoint ||
- (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb
- const RegisterBankInfo::ValueMapping *Bank =
- Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx];
+ (Op0Size == 64 && InstTy == InstType::Ambiguous)) {
+ const RegisterBankInfo::ValueMapping *Bank = getFprbMapping(Op0Size);
OperandsMapping = getOperandsMapping(
{Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank});
break;
- } else { // gprb
+ } else {
const RegisterBankInfo::ValueMapping *Bank =
- Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx];
+ getGprbOrCustomMapping(Op0Size, MappingID);
OperandsMapping = getOperandsMapping(
{Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank});
- if (Size == 64)
- MappingID = CustomMappingID;
}
break;
}
- case G_UNMERGE_VALUES: {
+ case G_IMPLICIT_DEF:
+ if (!Op0Ty.isPointer())
+ InstTy = TI.determineInstType(&MI);
+
+ if (InstTy == InstType::FloatingPoint)
+ OperandsMapping = getFprbMapping(Op0Size);
+ else
+ OperandsMapping = getGprbOrCustomMapping(Op0Size, MappingID);
+
+ break;
+ case G_UNMERGE_VALUES:
OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx],
&Mips::ValueMappings[Mips::GPRIdx],
&Mips::ValueMappings[Mips::DPRIdx]});
MappingID = CustomMappingID;
break;
- }
- case G_MERGE_VALUES: {
+ case G_MERGE_VALUES:
OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx],
&Mips::ValueMappings[Mips::GPRIdx],
&Mips::ValueMappings[Mips::GPRIdx]});
MappingID = CustomMappingID;
break;
- }
case G_FADD:
case G_FSUB:
case G_FMUL:
case G_FDIV:
case G_FABS:
- case G_FSQRT:{
- unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- assert((Size == 32 || Size == 64) && "Unsupported floating point size");
- OperandsMapping = Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx];
+ case G_FSQRT:
+ OperandsMapping = getFprbMapping(Op0Size);
break;
- }
- case G_FCONSTANT: {
- unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- assert((Size == 32 || Size == 64) && "Unsupported floating point size");
- const RegisterBankInfo::ValueMapping *FPRValueMapping =
- Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx];
- OperandsMapping = getOperandsMapping({FPRValueMapping, nullptr});
+ case G_FCONSTANT:
+ OperandsMapping = getOperandsMapping({getFprbMapping(Op0Size), nullptr});
break;
- }
case G_FCMP: {
- unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
- assert((Size == 32 || Size == 64) && "Unsupported floating point size");
- const RegisterBankInfo::ValueMapping *FPRValueMapping =
- Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx];
+ unsigned Op2Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr,
- FPRValueMapping, FPRValueMapping});
+ getFprbMapping(Op2Size), getFprbMapping(Op2Size)});
break;
}
case G_FPEXT:
@@ -550,36 +566,31 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
&Mips::ValueMappings[Mips::DPRIdx]});
break;
case G_FPTOSI: {
+ assert((Op0Size == 32) && "Unsupported integer size");
unsigned SizeFP = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- assert((MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 32) &&
- "Unsupported integer size");
- assert((SizeFP == 32 || SizeFP == 64) && "Unsupported floating point size");
- OperandsMapping = getOperandsMapping({
- &Mips::ValueMappings[Mips::GPRIdx],
- SizeFP == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx],
- });
+ OperandsMapping = getOperandsMapping(
+ {&Mips::ValueMappings[Mips::GPRIdx], getFprbMapping(SizeFP)});
break;
}
- case G_SITOFP: {
- unsigned SizeInt = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- unsigned SizeFP = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- (void)SizeInt;
- assert((SizeInt == 32) && "Unsupported integer size");
- assert((SizeFP == 32 || SizeFP == 64) && "Unsupported floating point size");
- OperandsMapping =
- getOperandsMapping({SizeFP == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
- : &Mips::ValueMappings[Mips::DPRIdx],
- &Mips::ValueMappings[Mips::GPRIdx]});
+ case G_SITOFP:
+ assert((MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() == 32) &&
+ "Unsupported integer size");
+ OperandsMapping = getOperandsMapping(
+ {getFprbMapping(Op0Size), &Mips::ValueMappings[Mips::GPRIdx]});
break;
- }
case G_CONSTANT:
case G_FRAME_INDEX:
case G_GLOBAL_VALUE:
+ case G_JUMP_TABLE:
case G_BRCOND:
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr});
break;
+ case G_BRJT:
+ OperandsMapping =
+ getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr,
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ break;
case G_ICMP:
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr,
@@ -609,11 +620,41 @@ public:
};
} // end anonymous namespace
-/// Here we have to narrowScalar s64 operands to s32, combine away
-/// G_MERGE/G_UNMERGE and erase instructions that became dead in the process.
-/// We manually assign 32 bit gprb to register operands of all new instructions
-/// that got created in the process since they will not end up in RegBankSelect
-/// loop. Careful not to delete instruction after MI i.e. MI.getIterator()++.
+void MipsRegisterBankInfo::setRegBank(MachineInstr &MI,
+ MachineRegisterInfo &MRI) const {
+ Register Dest = MI.getOperand(0).getReg();
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_STORE:
+ // No def operands, skip this instruction.
+ break;
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_SELECT:
+ case TargetOpcode::G_PHI:
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ assert(MRI.getType(Dest) == LLT::scalar(32) && "Unexpected operand type.");
+ MRI.setRegBank(Dest, getRegBank(Mips::GPRBRegBankID));
+ break;
+ }
+ case TargetOpcode::G_GEP: {
+ assert(MRI.getType(Dest).isPointer() && "Unexpected operand type.");
+ MRI.setRegBank(Dest, getRegBank(Mips::GPRBRegBankID));
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected opcode.");
+ }
+}
+
+static void
+combineAwayG_UNMERGE_VALUES(LegalizationArtifactCombiner &ArtCombiner,
+ MachineInstr &MI) {
+ SmallVector<MachineInstr *, 2> DeadInstrs;
+ ArtCombiner.tryCombineMerges(MI, DeadInstrs);
+ for (MachineInstr *DeadMI : DeadInstrs)
+ DeadMI->eraseFromParent();
+}
+
void MipsRegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@@ -621,18 +662,19 @@ void MipsRegisterBankInfo::applyMappingImpl(
MachineIRBuilder B(MI);
MachineFunction *MF = MI.getMF();
MachineRegisterInfo &MRI = OpdMapper.getMRI();
+ const LegalizerInfo &LegInfo = *MF->getSubtarget().getLegalizerInfo();
InstManager NewInstrObserver(NewInstrs);
GISelObserverWrapper WrapperObserver(&NewInstrObserver);
LegalizerHelper Helper(*MF, WrapperObserver, B);
- LegalizationArtifactCombiner ArtCombiner(
- B, MF->getRegInfo(), *MF->getSubtarget().getLegalizerInfo());
+ LegalizationArtifactCombiner ArtCombiner(B, MF->getRegInfo(), LegInfo);
switch (MI.getOpcode()) {
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
case TargetOpcode::G_PHI:
- case TargetOpcode::G_SELECT: {
+ case TargetOpcode::G_SELECT:
+ case TargetOpcode::G_IMPLICIT_DEF: {
Helper.narrowScalar(MI, 0, LLT::scalar(32));
// Handle new instructions.
while (!NewInstrs.empty()) {
@@ -640,35 +682,21 @@ void MipsRegisterBankInfo::applyMappingImpl(
// This is new G_UNMERGE that was created during narrowScalar and will
// not be considered for regbank selection. RegBankSelect for mips
// visits/makes corresponding G_MERGE first. Combine them here.
- if (NewMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
- SmallVector<MachineInstr *, 2> DeadInstrs;
- ArtCombiner.tryCombineMerges(*NewMI, DeadInstrs);
- for (MachineInstr *DeadMI : DeadInstrs)
- DeadMI->eraseFromParent();
- }
+ if (NewMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES)
+ combineAwayG_UNMERGE_VALUES(ArtCombiner, *NewMI);
// This G_MERGE will be combined away when its corresponding G_UNMERGE
// gets regBankSelected.
else if (NewMI->getOpcode() == TargetOpcode::G_MERGE_VALUES)
continue;
else
- // Manually set register banks for all register operands to 32 bit gprb.
- for (auto Op : NewMI->operands()) {
- if (Op.isReg()) {
- assert(MRI.getType(Op.getReg()).getSizeInBits() == 32 &&
- "Only 32 bit gprb is handled here.\n");
- MRI.setRegBank(Op.getReg(), getRegBank(Mips::GPRBRegBankID));
- }
- }
+ // Manually set register banks for def operands to 32 bit gprb.
+ setRegBank(*NewMI, MRI);
}
return;
}
- case TargetOpcode::G_UNMERGE_VALUES: {
- SmallVector<MachineInstr *, 2> DeadInstrs;
- ArtCombiner.tryCombineMerges(MI, DeadInstrs);
- for (MachineInstr *DeadMI : DeadInstrs)
- DeadMI->eraseFromParent();
+ case TargetOpcode::G_UNMERGE_VALUES:
+ combineAwayG_UNMERGE_VALUES(ArtCombiner, MI);
return;
- }
default:
break;
}
diff --git a/lib/Target/Mips/MipsRegisterBankInfo.h b/lib/Target/Mips/MipsRegisterBankInfo.h
index 176813c031ed..fa0f1c7bc941 100644
--- a/lib/Target/Mips/MipsRegisterBankInfo.h
+++ b/lib/Target/Mips/MipsRegisterBankInfo.h
@@ -38,8 +38,17 @@ public:
const InstructionMapping &
getInstrMapping(const MachineInstr &MI) const override;
+ /// Here we have to narrowScalar s64 operands to s32, combine away G_MERGE or
+ /// G_UNMERGE and erase instructions that became dead in the process. We
+ /// manually assign bank to def operand of all new instructions that were
+ /// created in the process since they will not end up in RegBankSelect loop.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+ /// RegBankSelect determined that s64 operand is better to be split into two
+ /// s32 operands in gprb. Here we manually set register banks of def operands
+ /// of newly created instructions since they will not get regbankselected.
+ void setRegBank(MachineInstr &MI, MachineRegisterInfo &MRI) const;
+
private:
/// Some instructions are used with both floating point and integer operands.
/// We assign InstType to such instructions as it helps us to avoid cross bank
diff --git a/lib/Target/Mips/MipsRegisterBanks.td b/lib/Target/Mips/MipsRegisterBanks.td
index 14a0181f8f11..7d11475884ce 100644
--- a/lib/Target/Mips/MipsRegisterBanks.td
+++ b/lib/Target/Mips/MipsRegisterBanks.td
@@ -11,4 +11,4 @@
def GPRBRegBank : RegisterBank<"GPRB", [GPR32]>;
-def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64]>;
+def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64, MSA128D]>;
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 4c6cc1ef771c..166ddea0431f 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -171,8 +171,8 @@ void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) {
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const TargetRegisterClass *RC = RegInfo.intRegClass(4);
- unsigned VR = MRI.createVirtualRegister(RC);
- unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+ Register VR = MRI.createVirtualRegister(RC);
+ Register Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
TII.loadRegFromStack(MBB, I, VR, FI, RC, &RegInfo, 0);
BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), Dst)
@@ -186,8 +186,8 @@ void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) {
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const TargetRegisterClass *RC = RegInfo.intRegClass(4);
- unsigned VR = MRI.createVirtualRegister(RC);
- unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+ Register VR = MRI.createVirtualRegister(RC);
+ Register Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), VR)
.addReg(Src, getKillRegState(I->getOperand(0).isKill()));
@@ -204,11 +204,11 @@ void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I,
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
- unsigned VR0 = MRI.createVirtualRegister(RC);
- unsigned VR1 = MRI.createVirtualRegister(RC);
- unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
- unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo);
- unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+ Register VR0 = MRI.createVirtualRegister(RC);
+ Register VR1 = MRI.createVirtualRegister(RC);
+ Register Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+ Register Lo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+ Register Hi = RegInfo.getSubReg(Dst, Mips::sub_hi);
DebugLoc DL = I->getDebugLoc();
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
@@ -229,9 +229,9 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
- unsigned VR0 = MRI.createVirtualRegister(RC);
- unsigned VR1 = MRI.createVirtualRegister(RC);
- unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+ Register VR0 = MRI.createVirtualRegister(RC);
+ Register VR1 = MRI.createVirtualRegister(RC);
+ Register Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
unsigned SrcKill = getKillRegState(I->getOperand(0).isKill());
DebugLoc DL = I->getDebugLoc();
@@ -242,7 +242,7 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
}
bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) {
- unsigned Src = I->getOperand(1).getReg();
+ Register Src = I->getOperand(1).getReg();
std::pair<unsigned, unsigned> Opcodes = getMFHiLoOpc(Src);
if (!Opcodes.first)
@@ -262,11 +262,11 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
const TargetRegisterClass *DstRC = RegInfo.getMinimalPhysRegClass(Dst);
unsigned VRegSize = RegInfo.getRegSizeInBits(*DstRC) / 16;
const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize);
- unsigned VR0 = MRI.createVirtualRegister(RC);
- unsigned VR1 = MRI.createVirtualRegister(RC);
+ Register VR0 = MRI.createVirtualRegister(RC);
+ Register VR1 = MRI.createVirtualRegister(RC);
unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
- unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
- unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+ Register DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+ Register DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
DebugLoc DL = I->getDebugLoc();
BuildMI(MBB, I, DL, TII.get(MFLoOpc), VR0).addReg(Src);
@@ -304,9 +304,9 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
// stack is used.
if (I->getNumOperands() == 4 && I->getOperand(3).isReg()
&& I->getOperand(3).getReg() == Mips::SP) {
- unsigned DstReg = I->getOperand(0).getReg();
- unsigned LoReg = I->getOperand(1).getReg();
- unsigned HiReg = I->getOperand(2).getReg();
+ Register DstReg = I->getOperand(0).getReg();
+ Register LoReg = I->getOperand(1).getReg();
+ Register HiReg = I->getOperand(2).getReg();
// It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are
// the cases where mthc1 is not available). 64-bit architectures and
@@ -346,7 +346,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
const MachineOperand &Op2 = I->getOperand(2);
if ((Op1.isReg() && Op1.isUndef()) || (Op2.isReg() && Op2.isUndef())) {
- unsigned DstReg = I->getOperand(0).getReg();
+ Register DstReg = I->getOperand(0).getReg();
BuildMI(MBB, I, I->getDebugLoc(), TII.get(Mips::IMPLICIT_DEF), DstReg);
return true;
}
@@ -369,8 +369,8 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
// stack is used.
if (I->getNumOperands() == 4 && I->getOperand(3).isReg()
&& I->getOperand(3).getReg() == Mips::SP) {
- unsigned DstReg = I->getOperand(0).getReg();
- unsigned SrcReg = Op1.getReg();
+ Register DstReg = I->getOperand(0).getReg();
+ Register SrcReg = Op1.getReg();
unsigned N = Op2.getImm();
int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N));
@@ -538,7 +538,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
if (RegInfo.needsStackRealignment(MF)) {
// addiu $Reg, $zero, -MaxAlignment
// andi $sp, $sp, $Reg
- unsigned VR = MF.getRegInfo().createVirtualRegister(RC);
+ Register VR = MF.getRegInfo().createVirtualRegister(RC);
assert(isInt<16>(MFI.getMaxAlignment()) &&
"Function's alignment size requirement is not supported.");
int MaxAlign = -(int)MFI.getMaxAlignment();
@@ -865,12 +865,15 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
MipsABIInfo ABI = STI.getABI();
+ unsigned RA = ABI.IsN64() ? Mips::RA_64 : Mips::RA;
unsigned FP = ABI.GetFramePtr();
unsigned BP = ABI.IsN64() ? Mips::S7_64 : Mips::S7;
- // Mark $fp as used if function has dedicated frame pointer.
- if (hasFP(MF))
+ // Mark $ra and $fp as used if function has dedicated frame pointer.
+ if (hasFP(MF)) {
+ setAliasRegs(MF, SavedRegs, RA);
setAliasRegs(MF, SavedRegs, FP);
+ }
// Mark $s7 as used if function has dedicated base pointer.
if (hasBP(MF))
setAliasRegs(MF, SavedRegs, BP);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 703f99f37dd1..c8313240a678 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -124,6 +124,33 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
return true;
}
+void MipsSEDAGToDAGISel::emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB,
+ MachineFunction &MF) {
+ MachineInstrBuilder MIB(MF, &MI);
+ if (!Subtarget->isABI_O32()) { // N32, N64
+ // Save current return address.
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR64))
+ .addDef(Mips::AT_64)
+ .addUse(Mips::RA_64, RegState::Undef)
+ .addUse(Mips::ZERO_64);
+ // Stops instruction above from being removed later on.
+ MIB.addUse(Mips::AT_64, RegState::Implicit);
+ } else { // O32
+ // Save current return address.
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR))
+ .addDef(Mips::AT)
+ .addUse(Mips::RA, RegState::Undef)
+ .addUse(Mips::ZERO);
+ // _mcount pops 2 words from stack.
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::ADDiu))
+ .addDef(Mips::SP)
+ .addUse(Mips::SP)
+ .addImm(-8);
+ // Stops first instruction above from being removed later on.
+ MIB.addUse(Mips::AT, RegState::Implicit);
+ }
+}
+
void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
MF.getInfo<MipsFunctionInfo>()->initGlobalBaseReg();
@@ -150,6 +177,24 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
if (Subtarget->isABI_FPXX() && !Subtarget->hasMTHC1())
MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true));
break;
+ case Mips::JAL:
+ case Mips::JAL_MM:
+ if (MI.getOperand(0).isGlobal() &&
+ MI.getOperand(0).getGlobal()->getGlobalIdentifier() == "_mcount")
+ emitMCountABI(MI, MBB, MF);
+ break;
+ case Mips::JALRPseudo:
+ case Mips::JALR64Pseudo:
+ case Mips::JALR16_MM:
+ if (MI.getOperand(2).isMCSymbol() &&
+ MI.getOperand(2).getMCSymbol()->getName() == "_mcount")
+ emitMCountABI(MI, MBB, MF);
+ break;
+ case Mips::JALR:
+ if (MI.getOperand(3).isMCSymbol() &&
+ MI.getOperand(3).getMCSymbol()->getName() == "_mcount")
+ emitMCountABI(MI, MBB, MF);
+ break;
default:
replaceUsesWithZeroReg(MRI, MI);
}
@@ -247,7 +292,8 @@ bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset(
Base = Addr.getOperand(0);
// If base is a FI, additional offset calculation is done in
// eliminateFrameIndex, otherwise we need to check the alignment
- if (OffsetToAlignment(CN->getZExtValue(), 1ull << ShiftAmount) != 0)
+ const Align Alignment(1ULL << ShiftAmount);
+ if (!isAligned(Alignment, CN->getZExtValue()))
return false;
}
@@ -719,7 +765,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
}
case ISD::ConstantFP: {
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+ auto *CN = cast<ConstantFPSDNode>(Node);
if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
if (Subtarget->isGP64bit()) {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
@@ -743,7 +789,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
}
case ISD::Constant: {
- const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+ auto *CN = cast<ConstantSDNode>(Node);
int64_t Imm = CN->getSExtValue();
unsigned Size = CN->getValueSizeInBits(0);
@@ -969,7 +1015,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
break;
}
- SDNode *Res;
+ SDNode *Res = nullptr;
// If we have a signed 10 bit integer, we can splat it directly.
//
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index ce594e1fb4fa..39f665be571e 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -120,7 +120,7 @@ private:
/// power of 2.
bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const override;
/// Select constant vector splats whose value is a run of set bits
- /// ending at the most significant bit
+ /// ending at the most significant bit.
bool selectVSplatMaskL(SDValue N, SDValue &Imm) const override;
/// Select constant vector splats whose value is a run of set bits
/// starting at bit zero.
@@ -128,6 +128,10 @@ private:
bool trySelect(SDNode *Node) override;
+ // Emits proper ABI for _mcount profiling calls.
+ void emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB,
+ MachineFunction &MF);
+
void processFunctionAfterISel(MachineFunction &MF) override;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index edf57a3840d1..5bd234f955ba 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -71,8 +71,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
// Expand all truncating stores and extending loads.
- for (MVT VT0 : MVT::vector_valuetypes()) {
- for (MVT VT1 : MVT::vector_valuetypes()) {
+ for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) {
+ for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT0, VT1, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand);
@@ -327,6 +327,7 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
+ setOperationAction(ISD::UNDEF, Ty, Legal);
setOperationAction(ISD::ADD, Ty, Legal);
setOperationAction(ISD::AND, Ty, Legal);
@@ -2595,7 +2596,8 @@ static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
SDLoc DL(Op);
return DAG.getNode(MipsISD::SHF, DL, ResTy,
- DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0));
+ DAG.getTargetConstant(Imm, DL, MVT::i32),
+ Op->getOperand(0));
}
/// Determine whether a range fits a regular pattern of values.
@@ -3062,13 +3064,13 @@ MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB);
// Fill $FBB.
- unsigned VR2 = RegInfo.createVirtualRegister(RC);
+ Register VR2 = RegInfo.createVirtualRegister(RC);
BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
.addReg(Mips::ZERO).addImm(0);
BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
// Fill $TBB.
- unsigned VR1 = RegInfo.createVirtualRegister(RC);
+ Register VR1 = RegInfo.createVirtualRegister(RC);
BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
.addReg(Mips::ZERO).addImm(1);
@@ -3131,13 +3133,13 @@ MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
.addMBB(TBB);
// Fill $FBB.
- unsigned RD1 = RegInfo.createVirtualRegister(RC);
+ Register RD1 = RegInfo.createVirtualRegister(RC);
BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
.addReg(Mips::ZERO).addImm(0);
BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
// Fill $TBB.
- unsigned RD2 = RegInfo.createVirtualRegister(RC);
+ Register RD2 = RegInfo.createVirtualRegister(RC);
BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
.addReg(Mips::ZERO).addImm(1);
@@ -3169,8 +3171,8 @@ MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Fd = MI.getOperand(0).getReg();
- unsigned Ws = MI.getOperand(1).getReg();
+ Register Fd = MI.getOperand(0).getReg();
+ Register Ws = MI.getOperand(1).getReg();
unsigned Lane = MI.getOperand(2).getImm();
if (Lane == 0) {
@@ -3185,9 +3187,9 @@ MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
} else {
- unsigned Wt = RegInfo.createVirtualRegister(
- Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
- &Mips::MSA128WEvensRegClass);
+ Register Wt = RegInfo.createVirtualRegister(
+ Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
+ : &Mips::MSA128WEvensRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
@@ -3214,15 +3216,15 @@ MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- unsigned Fd = MI.getOperand(0).getReg();
- unsigned Ws = MI.getOperand(1).getReg();
+ Register Fd = MI.getOperand(0).getReg();
+ Register Ws = MI.getOperand(1).getReg();
unsigned Lane = MI.getOperand(2).getImm() * 2;
DebugLoc DL = MI.getDebugLoc();
if (Lane == 0)
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64);
else {
- unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+ Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64);
@@ -3244,13 +3246,13 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Wd = MI.getOperand(0).getReg();
- unsigned Wd_in = MI.getOperand(1).getReg();
+ Register Wd = MI.getOperand(0).getReg();
+ Register Wd_in = MI.getOperand(1).getReg();
unsigned Lane = MI.getOperand(2).getImm();
- unsigned Fs = MI.getOperand(3).getReg();
- unsigned Wt = RegInfo.createVirtualRegister(
- Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
- &Mips::MSA128WEvensRegClass);
+ Register Fs = MI.getOperand(3).getReg();
+ Register Wt = RegInfo.createVirtualRegister(
+ Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
+ : &Mips::MSA128WEvensRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
.addImm(0)
@@ -3280,11 +3282,11 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Wd = MI.getOperand(0).getReg();
- unsigned Wd_in = MI.getOperand(1).getReg();
+ Register Wd = MI.getOperand(0).getReg();
+ Register Wd_in = MI.getOperand(1).getReg();
unsigned Lane = MI.getOperand(2).getImm();
- unsigned Fs = MI.getOperand(3).getReg();
- unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+ Register Fs = MI.getOperand(3).getReg();
+ Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
.addImm(0)
@@ -3326,10 +3328,10 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Wd = MI.getOperand(0).getReg();
- unsigned SrcVecReg = MI.getOperand(1).getReg();
- unsigned LaneReg = MI.getOperand(2).getReg();
- unsigned SrcValReg = MI.getOperand(3).getReg();
+ Register Wd = MI.getOperand(0).getReg();
+ Register SrcVecReg = MI.getOperand(1).getReg();
+ Register LaneReg = MI.getOperand(2).getReg();
+ Register SrcValReg = MI.getOperand(3).getReg();
const TargetRegisterClass *VecRC = nullptr;
// FIXME: This should be true for N32 too.
@@ -3370,7 +3372,7 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
}
if (IsFP) {
- unsigned Wt = RegInfo.createVirtualRegister(VecRC);
+ Register Wt = RegInfo.createVirtualRegister(VecRC);
BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
.addImm(0)
.addReg(SrcValReg)
@@ -3380,7 +3382,7 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
// Convert the lane index into a byte index
if (EltSizeInBytes != 1) {
- unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC);
+ Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC);
BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1)
.addReg(LaneReg)
.addImm(EltLog2Size);
@@ -3388,13 +3390,13 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
}
// Rotate bytes around so that the desired lane is element zero
- unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC);
+ Register WdTmp1 = RegInfo.createVirtualRegister(VecRC);
BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1)
.addReg(SrcVecReg)
.addReg(SrcVecReg)
.addReg(LaneReg, 0, SubRegIdx);
- unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC);
+ Register WdTmp2 = RegInfo.createVirtualRegister(VecRC);
if (IsFP) {
// Use insve.df to insert to element zero
BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2)
@@ -3413,7 +3415,7 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
// Rotate elements the rest of the way for a full rotation.
// sld.df inteprets $rt modulo the number of columns so we only need to negate
// the lane index to do this.
- unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
+ Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
LaneTmp2)
.addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
@@ -3440,12 +3442,12 @@ MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Wd = MI.getOperand(0).getReg();
- unsigned Fs = MI.getOperand(1).getReg();
- unsigned Wt1 = RegInfo.createVirtualRegister(
+ Register Wd = MI.getOperand(0).getReg();
+ Register Fs = MI.getOperand(1).getReg();
+ Register Wt1 = RegInfo.createVirtualRegister(
Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
: &Mips::MSA128WEvensRegClass);
- unsigned Wt2 = RegInfo.createVirtualRegister(
+ Register Wt2 = RegInfo.createVirtualRegister(
Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
: &Mips::MSA128WEvensRegClass);
@@ -3475,10 +3477,10 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Wd = MI.getOperand(0).getReg();
- unsigned Fs = MI.getOperand(1).getReg();
- unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
- unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+ Register Wd = MI.getOperand(0).getReg();
+ Register Fs = MI.getOperand(1).getReg();
+ Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+ Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
@@ -3509,8 +3511,8 @@ MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Ws = MI.getOperand(0).getReg();
- unsigned Rt = MI.getOperand(1).getReg();
+ Register Ws = MI.getOperand(0).getReg();
+ Register Rt = MI.getOperand(1).getReg();
const MachineMemOperand &MMO = **MI.memoperands_begin();
unsigned Imm = MMO.getOffset();
@@ -3522,11 +3524,11 @@ MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
: (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
: &Mips::GPR64RegClass);
const bool UsingMips32 = RC == &Mips::GPR32RegClass;
- unsigned Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
+ Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
if(!UsingMips32) {
- unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
+ Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp)
.addImm(0)
.addReg(Rs)
@@ -3564,7 +3566,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Wd = MI.getOperand(0).getReg();
+ Register Wd = MI.getOperand(0).getReg();
// Caution: A load via the GOT can expand to a GPR32 operand, a load via
// spill and reload can expand as a GPR64 operand. Examine the
@@ -3575,7 +3577,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
: &Mips::GPR64RegClass);
const bool UsingMips32 = RC == &Mips::GPR32RegClass;
- unsigned Rt = RegInfo.createVirtualRegister(RC);
+ Register Rt = RegInfo.createVirtualRegister(RC);
MachineInstrBuilder MIB =
BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
@@ -3583,7 +3585,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
MIB.add(MI.getOperand(i));
if(!UsingMips32) {
- unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
+ Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32);
Rt = Tmp;
}
@@ -3658,11 +3660,11 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Wd = MI.getOperand(0).getReg();
- unsigned Fs = MI.getOperand(1).getReg();
+ Register Wd = MI.getOperand(0).getReg();
+ Register Fs = MI.getOperand(1).getReg();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
const TargetRegisterClass *GPRRC =
IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
unsigned MFC1Opc = IsFGR64onMips64
@@ -3671,16 +3673,16 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
// Perform the register class copy as mentioned above.
- unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
+ Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
unsigned WPHI = Wtemp;
if (IsFGR64onMips32) {
- unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
+ Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
- unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
- unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2)
.addReg(Wtemp)
.addReg(Rtemp2)
@@ -3693,7 +3695,7 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
}
if (IsFGR64) {
- unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2)
.addReg(WPHI)
.addReg(WPHI);
@@ -3817,8 +3819,8 @@ MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
- unsigned Ws1 = RegInfo.createVirtualRegister(RC);
- unsigned Ws2 = RegInfo.createVirtualRegister(RC);
+ Register Ws1 = RegInfo.createVirtualRegister(RC);
+ Register Ws2 = RegInfo.createVirtualRegister(RC);
DebugLoc DL = MI.getDebugLoc();
// Splat 1.0 into a vector
@@ -3846,8 +3848,8 @@ MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
- unsigned Ws1 = RegInfo.createVirtualRegister(RC);
- unsigned Ws2 = RegInfo.createVirtualRegister(RC);
+ Register Ws1 = RegInfo.createVirtualRegister(RC);
+ Register Ws2 = RegInfo.createVirtualRegister(RC);
DebugLoc DL = MI.getDebugLoc();
// Splat 1.0 into a vector
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 4e49f5e7d9d1..2126a1bda493 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -628,7 +628,7 @@ unsigned MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
// The first instruction can be a LUi, which is different from other
// instructions (ADDiu, ORI and SLL) in that it does not have a register
// operand.
- unsigned Reg = RegInfo.createVirtualRegister(RC);
+ Register Reg = RegInfo.createVirtualRegister(RC);
if (Inst->Opc == LUi)
BuildMI(MBB, II, DL, get(LUi), Reg).addImm(SignExtend64<16>(Inst->ImmOpnd));
@@ -734,9 +734,9 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB,
// Add lo/hi registers if the mtlo/hi instructions created have explicit
// def registers.
if (HasExplicitDef) {
- unsigned DstReg = I->getOperand(0).getReg();
- unsigned DstLo = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo);
- unsigned DstHi = getRegisterInfo().getSubReg(DstReg, Mips::sub_hi);
+ Register DstReg = I->getOperand(0).getReg();
+ Register DstLo = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo);
+ Register DstHi = getRegisterInfo().getSubReg(DstReg, Mips::sub_hi);
LoInst.addReg(DstLo, RegState::Define);
HiInst.addReg(DstHi, RegState::Define);
}
@@ -773,14 +773,14 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
bool isMicroMips,
bool FP64) const {
- unsigned DstReg = I->getOperand(0).getReg();
- unsigned SrcReg = I->getOperand(1).getReg();
+ Register DstReg = I->getOperand(0).getReg();
+ Register SrcReg = I->getOperand(1).getReg();
unsigned N = I->getOperand(2).getImm();
DebugLoc dl = I->getDebugLoc();
assert(N < 2 && "Invalid immediate");
unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo;
- unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
+ Register SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
// FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload
// in MipsSEFrameLowering.cpp.
@@ -815,7 +815,7 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB,
void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
bool isMicroMips, bool FP64) const {
- unsigned DstReg = I->getOperand(0).getReg();
+ Register DstReg = I->getOperand(0).getReg();
unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
DebugLoc dl = I->getDebugLoc();
@@ -883,8 +883,8 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
unsigned RA = Subtarget.isGP64bit() ? Mips::RA_64 : Mips::RA;
unsigned T9 = Subtarget.isGP64bit() ? Mips::T9_64 : Mips::T9;
unsigned ZERO = Subtarget.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
- unsigned OffsetReg = I->getOperand(0).getReg();
- unsigned TargetReg = I->getOperand(1).getReg();
+ Register OffsetReg = I->getOperand(0).getReg();
+ Register TargetReg = I->getOperand(1).getReg();
// addu $ra, $v0, $zero
// addu $sp, $sp, $v1
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index f4b164d5c0ab..a48088c28919 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -212,11 +212,9 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// element size), otherwise it is a 16-bit signed immediate.
unsigned OffsetBitSize =
getLoadStoreOffsetSizeInBits(MI.getOpcode(), MI.getOperand(OpNo - 1));
- unsigned OffsetAlign = getLoadStoreOffsetAlign(MI.getOpcode());
-
+ const Align OffsetAlign(getLoadStoreOffsetAlign(MI.getOpcode()));
if (OffsetBitSize < 16 && isInt<16>(Offset) &&
- (!isIntN(OffsetBitSize, Offset) ||
- OffsetToAlignment(Offset, OffsetAlign) != 0)) {
+ (!isIntN(OffsetBitSize, Offset) || !isAligned(OffsetAlign, Offset))) {
// If we have an offset that needs to fit into a signed n-bit immediate
// (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu
MachineBasicBlock &MBB = *MI.getParent();
@@ -224,7 +222,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
const TargetRegisterClass *PtrRC =
ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
- unsigned Reg = RegInfo.createVirtualRegister(PtrRC);
+ Register Reg = RegInfo.createVirtualRegister(PtrRC);
const MipsSEInstrInfo &TII =
*static_cast<const MipsSEInstrInfo *>(
MBB.getParent()->getSubtarget().getInstrInfo());
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index d021b3d021b1..b9245c9fc0eb 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -69,7 +69,7 @@ void MipsSubtarget::anchor() {}
MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
bool little, const MipsTargetMachine &TM,
- unsigned StackAlignOverride)
+ MaybeAlign StackAlignOverride)
: MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault),
IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false),
NoABICalls(false), Abs2008(false), IsFP64bit(false), UseOddSPReg(true),
@@ -81,10 +81,9 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
HasEVA(false), DisableMadd4(false), HasMT(false), HasCRC(false),
HasVirt(false), HasGINV(false), UseIndirectJumpsHazard(false),
- StackAlignOverride(StackAlignOverride),
- TM(TM), TargetTriple(TT), TSInfo(),
- InstrInfo(
- MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
+ StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT),
+ TSInfo(), InstrInfo(MipsInstrInfo::create(
+ initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
TLInfo(MipsTargetLowering::create(TM, *this)) {
@@ -248,12 +247,12 @@ MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
InMips16HardFloat = true;
if (StackAlignOverride)
- stackAlignment = StackAlignOverride;
+ stackAlignment = *StackAlignOverride;
else if (isABI_N32() || isABI_N64())
- stackAlignment = 16;
+ stackAlignment = Align(16);
else {
assert(isABI_O32() && "Unknown ABI for stack alignment!");
- stackAlignment = 8;
+ stackAlignment = Align(8);
}
return *this;
@@ -286,6 +285,6 @@ const RegisterBankInfo *MipsSubtarget::getRegBankInfo() const {
return RegBankInfo.get();
}
-const InstructionSelector *MipsSubtarget::getInstructionSelector() const {
+InstructionSelector *MipsSubtarget::getInstructionSelector() const {
return InstSelector.get();
}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index aa1200579fc8..0a8c2ef8ae5c 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -189,12 +189,15 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// Disable use of the `jal` instruction.
bool UseLongCalls = false;
+ // Assume 32-bit GOT.
+ bool UseXGOT = false;
+
/// The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
- unsigned stackAlignment;
+ Align stackAlignment;
/// The overridden stack alignment.
- unsigned StackAlignOverride;
+ MaybeAlign StackAlignOverride;
InstrItineraryData InstrItins;
@@ -227,7 +230,7 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, bool little,
- const MipsTargetMachine &TM, unsigned StackAlignOverride);
+ const MipsTargetMachine &TM, MaybeAlign StackAlignOverride);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
@@ -323,6 +326,8 @@ public:
bool useLongCalls() const { return UseLongCalls; }
+ bool useXGOT() const { return UseXGOT; }
+
bool enableLongBranchPass() const {
return hasStandardEncoding() || inMicroMipsMode() || allowMixed16_32();
}
@@ -344,7 +349,7 @@ public:
// really use them if in addition we are in mips16 mode
static bool useConstantIslands();
- unsigned getStackAlignment() const { return stackAlignment; }
+ Align getStackAlignment() const { return stackAlignment; }
// Grab relocation model
Reloc::Model getRelocationModel() const;
@@ -391,7 +396,7 @@ public:
const CallLowering *getCallLowering() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
- const InstructionSelector *getInstructionSelector() const override;
+ InstructionSelector *getInstructionSelector() const override;
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index c878abb042e4..e58f316791ba 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -117,14 +117,17 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
CPU, FS, Options, getEffectiveRelocModel(JIT, RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- isLittle(isLittle), TLOF(llvm::make_unique<MipsTargetObjectFile>()),
+ isLittle(isLittle), TLOF(std::make_unique<MipsTargetObjectFile>()),
ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)),
- Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this,
- Options.StackAlignmentOverride),
+ Subtarget(nullptr),
+ DefaultSubtarget(TT, CPU, FS, isLittle, *this,
+ MaybeAlign(Options.StackAlignmentOverride)),
NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16",
- isLittle, *this, Options.StackAlignmentOverride),
+ isLittle, *this,
+ MaybeAlign(Options.StackAlignmentOverride)),
Mips16Subtarget(TT, CPU, FS.empty() ? "+mips16" : FS.str() + ",+mips16",
- isLittle, *this, Options.StackAlignmentOverride) {
+ isLittle, *this,
+ MaybeAlign(Options.StackAlignmentOverride)) {
Subtarget = &DefaultSubtarget;
initAsmInfo();
}
@@ -196,8 +199,9 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<MipsSubtarget>(TargetTriple, CPU, FS, isLittle, *this,
- Options.StackAlignmentOverride);
+ I = std::make_unique<MipsSubtarget>(
+ TargetTriple, CPU, FS, isLittle, *this,
+ MaybeAlign(Options.StackAlignmentOverride));
}
return I.get();
}
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 1fa8ebadd643..298d056ce2c3 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -130,6 +130,8 @@ public:
SMLoc IDLoc, const MCSubtargetInfo *STI);
void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2,
SMLoc IDLoc, const MCSubtargetInfo *STI);
+ void emitRRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2,
+ MCOperand Op3, SMLoc IDLoc, const MCSubtargetInfo *STI);
void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm,
SMLoc IDLoc, const MCSubtargetInfo *STI);
void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0,
@@ -154,17 +156,13 @@ public:
unsigned BaseReg, int64_t Offset,
function_ref<unsigned()> GetATReg, SMLoc IDLoc,
const MCSubtargetInfo *STI);
- void emitStoreWithSymOffset(unsigned Opcode, unsigned SrcReg,
- unsigned BaseReg, MCOperand &HiOperand,
- MCOperand &LoOperand, unsigned ATReg, SMLoc IDLoc,
- const MCSubtargetInfo *STI);
+ void emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg, unsigned BaseReg,
+ MCOperand &HiOperand, MCOperand &LoOperand,
+ unsigned ATReg, SMLoc IDLoc,
+ const MCSubtargetInfo *STI);
void emitLoadWithImmOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg,
int64_t Offset, unsigned TmpReg, SMLoc IDLoc,
const MCSubtargetInfo *STI);
- void emitLoadWithSymOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg,
- MCOperand &HiOperand, MCOperand &LoOperand,
- unsigned ATReg, SMLoc IDLoc,
- const MCSubtargetInfo *STI);
void emitGPRestore(int Offset, SMLoc IDLoc, const MCSubtargetInfo *STI);
void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 6530c40ea100..0acbace5f848 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -44,7 +44,7 @@ MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM);
-BasicBlockPass *createNVPTXLowerAllocaPass();
+FunctionPass *createNVPTXLowerAllocaPass();
MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 5f38b4a3c4c5..307f4d58c3ab 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -282,7 +282,7 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
}
unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
@@ -434,7 +434,7 @@ bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
return false;
}
-void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
AsmPrinter::EmitBasicBlockStart(MBB);
if (isLoopHeaderOfNoUnroll(MBB))
OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
@@ -507,8 +507,8 @@ const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const {
}
void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
- unsigned RegNo = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(RegNo)) {
+ Register RegNo = MI->getOperand(0).getReg();
+ if (Register::isVirtualRegister(RegNo)) {
OutStreamer->AddComment(Twine("implicit-def: ") +
getVirtualRegisterName(RegNo));
} else {
@@ -1397,7 +1397,7 @@ static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
auto *FTy = dyn_cast<FunctionType>(Ty);
if (FTy)
- return DL.getPointerPrefAlignment();
+ return DL.getPointerPrefAlignment().value();
return DL.getPrefTypeAlignment(Ty);
}
@@ -1473,12 +1473,11 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// Just print .param .align <a> .b8 .param[size];
// <a> = PAL.getparamalignment
// size = typeallocsize of element type
- unsigned align = PAL.getParamAlignment(paramIndex);
- if (align == 0)
- align = DL.getABITypeAlignment(Ty);
+ const Align align = DL.getValueOrABITypeAlignment(
+ PAL.getParamAlignment(paramIndex), Ty);
unsigned sz = DL.getTypeAllocSize(Ty);
- O << "\t.param .align " << align << " .b8 ";
+ O << "\t.param .align " << align.value() << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
@@ -1559,9 +1558,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// Just print .param .align <a> .b8 .param[size];
// <a> = PAL.getparamalignment
// size = typeallocsize of element type
- unsigned align = PAL.getParamAlignment(paramIndex);
- if (align == 0)
- align = DL.getABITypeAlignment(ETy);
+ Align align =
+ DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy);
// Work around a bug in ptxas. When PTX code takes address of
// byval parameter with alignment < 4, ptxas generates code to
// spill argument into memory. Alas on sm_50+ ptxas generates
@@ -1573,10 +1571,10 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// TODO: this will need to be undone when we get to support multi-TU
// device-side compilation as it breaks ABI compatibility with nvcc.
// Hopefully ptxas bug is fixed by then.
- if (!isKernelFunc && align < 4)
- align = 4;
+ if (!isKernelFunc && align < Align(4))
+ align = Align(4);
unsigned sz = DL.getTypeAllocSize(ETy);
- O << "\t.param .align " << align << " .b8 ";
+ O << "\t.param .align " << align.value() << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
continue;
@@ -1653,7 +1651,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
// We use the per class virtual register number in the ptx output.
unsigned int numVRs = MRI->getNumVirtRegs();
for (unsigned i = 0; i < numVRs; i++) {
- unsigned int vr = TRI->index2VirtReg(i);
+ unsigned int vr = Register::index2VirtReg(i);
const TargetRegisterClass *RC = MRI->getRegClass(vr);
DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
int n = regmap.size();
@@ -1861,7 +1859,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::HalfTyID:
case Type::FloatTyID:
case Type::DoubleTyID: {
- const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
+ const auto *CFP = cast<ConstantFP>(CPV);
Type *Ty = CFP->getType();
if (Ty == Type::getHalfTy(CPV->getContext())) {
APInt API = CFP->getValueAPF().bitcastToAPInt();
@@ -2212,7 +2210,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (Register::isPhysicalRegister(MO.getReg())) {
if (MO.getReg() == NVPTX::VRDepot)
O << DEPOTNAME << getFunctionNumber();
else
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 43ae57ac1262..7a66854d32f4 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -200,7 +200,7 @@ private:
const Function *F;
std::string CurrentFnName;
- void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override;
+ void EmitBasicBlockStart(const MachineBasicBlock &MBB) override;
void EmitFunctionEntryLabel() override;
void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override;
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 46f08b23d31a..d26912f47e50 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -25,7 +25,7 @@
using namespace llvm;
NVPTXFrameLowering::NVPTXFrameLowering()
- : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0) {}
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, Align(8), 0) {}
bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index ae1aa98da0e8..9acd0bea66fd 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -480,7 +480,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// Register custom handling for vector loads/stores
- for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
if (IsPTXVectorType(VT)) {
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
@@ -1291,8 +1291,8 @@ std::string NVPTXTargetLowering::getPrototype(
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
O << ".param .b" << PtrVT.getSizeInBits() << " _";
- } else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) {
- auto &DL = CS.getCalledFunction()->getParent()->getDataLayout();
+ } else if (retTy->isAggregateType() || retTy->isVectorTy() ||
+ retTy->isIntegerTy(128)) {
O << ".param .align " << retAlignment << " .b8 _["
<< DL.getTypeAllocSize(retTy) << "]";
} else {
@@ -2230,8 +2230,8 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::v2f16) {
LoadSDNode *Load = cast<LoadSDNode>(Op);
EVT MemVT = Load->getMemoryVT();
- if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
- *Load->getMemOperand())) {
+ if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ MemVT, *Load->getMemOperand())) {
SDValue Ops[2];
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
return DAG.getMergeValues(Ops, SDLoc(Op));
@@ -2273,8 +2273,8 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// v2f16 is legal, so we can't rely on legalizer to handle unaligned
// stores and have to handle it here.
if (VT == MVT::v2f16 &&
- !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- *Store->getMemOperand()))
+ !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ VT, *Store->getMemOperand()))
return expandUnalignedStore(Store, DAG);
if (VT.isVector())
@@ -3497,7 +3497,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
}
case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
@@ -3521,7 +3521,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 8;
+ Info.align = Align(8);
return true;
}
@@ -3547,7 +3547,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
}
@@ -3585,7 +3585,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 4;
+ Info.align = Align(4);
return true;
}
@@ -3606,7 +3606,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
}
@@ -3627,7 +3627,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
}
@@ -3648,7 +3648,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
}
@@ -3665,7 +3665,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 8;
+ Info.align = Align(8);
return true;
}
@@ -3686,7 +3686,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOStore;
- Info.align = 16;
+ Info.align = Align(16);
return true;
}
@@ -3707,7 +3707,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOStore;
- Info.align = 16;
+ Info.align = Align(16);
return true;
}
@@ -3728,7 +3728,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOStore;
- Info.align = 16;
+ Info.align = Align(16);
return true;
}
@@ -3745,7 +3745,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOStore;
- Info.align = 8;
+ Info.align = Align(8);
return true;
}
@@ -3780,7 +3780,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
- Info.align = 0;
+ Info.align.reset();
return true;
}
@@ -3798,7 +3798,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Info.align =
+ MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
return true;
}
@@ -3817,7 +3818,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Info.align =
+ MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
return true;
}
@@ -3883,7 +3885,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = nullptr;
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
case Intrinsic::nvvm_tex_1d_v4s32_s32:
@@ -4003,7 +4005,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = nullptr;
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
case Intrinsic::nvvm_suld_1d_i8_clamp:
@@ -4056,7 +4058,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = nullptr;
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
case Intrinsic::nvvm_suld_1d_i16_clamp:
@@ -4109,7 +4111,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = nullptr;
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
case Intrinsic::nvvm_suld_1d_i32_clamp:
@@ -4162,7 +4164,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = nullptr;
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
case Intrinsic::nvvm_suld_1d_i64_clamp:
@@ -4200,7 +4202,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = nullptr;
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align = 16;
+ Info.align = Align(16);
return true;
}
return false;
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 62da3c79f465..fe7a84f9a361 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -143,12 +143,17 @@ def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">;
def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">;
def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">;
+def hasPTX64 : Predicate<"Subtarget->getPTXVersion() >= 64">;
def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">;
def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">;
def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">;
def hasSM75 : Predicate<"Subtarget->getSmVersion() >= 75">;
+// non-sync shfl instructions are not available on sm_70+ in PTX6.4+
+def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
+ "&& Subtarget->getPTXVersion() >= 64)">;
+
def useShortPtr : Predicate<"useShortPointers()">;
def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
@@ -2908,7 +2913,7 @@ def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
// ctz instruction always returns a 32-bit value. For ctlz.i64, convert the
// ptx value to 64 bits to match the ISD node's semantics, unless we know we're
// truncating back down to 32 bits.
-def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(i64 (ctlz Int64Regs:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>;
// For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the
@@ -2925,10 +2930,10 @@ def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>;
// and then ctlz that value. This way we don't have to subtract 16 from the
// result. Unfortunately today we don't have a way to generate
// "mov b32reg, {b16imm, b16reg}", so we don't do this optimization.
-def : Pat<(ctlz Int16Regs:$a),
+def : Pat<(i16 (ctlz Int16Regs:$a)),
(SUBi16ri (CVT_u16_u32
(CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>;
-def : Pat<(i32 (zext (ctlz Int16Regs:$a))),
+def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))),
(SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>;
// Population count
@@ -2953,7 +2958,7 @@ def : Pat<(i32 (trunc (ctpop Int64Regs:$a))), (POPCr64 Int64Regs:$a)>;
// If we know that we're storing into an i32, we can avoid the final trunc.
def : Pat<(ctpop Int16Regs:$a),
(CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
-def : Pat<(i32 (zext (ctpop Int16Regs:$a))),
+def : Pat<(i32 (zext (i16 (ctpop Int16Regs:$a)))),
(POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>;
// fpround f32 -> f16
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 1752d3e0575e..c52195fb0449 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -56,6 +56,10 @@ class RegSeq<int n, string prefix> {
[]);
}
+class THREADMASK_INFO<bit sync> {
+ list<bit> ret = !if(sync, [0,1], [0]);
+}
+
//-----------------------------------
// Synchronization and shuffle functions
//-----------------------------------
@@ -129,121 +133,64 @@ def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
[(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
Requires<[hasPTX60, hasSM30]>;
-
-// shfl.{up,down,bfly,idx}.b32
-multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
- // The last two parameters to shfl can be regs or imms. ptxas is smart
- // enough to inline constant registers, so strictly speaking we don't need to
- // handle immediates here. But it's easy enough, and it makes our ptx more
- // readable.
- def reg : NVPTXInst<
- (outs regclass:$dst),
- (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
- !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
- [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
-
- def imm1 : NVPTXInst<
- (outs regclass:$dst),
- (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
- !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
- [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
-
- def imm2 : NVPTXInst<
- (outs regclass:$dst),
- (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
- !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
- [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
-
- def imm3 : NVPTXInst<
- (outs regclass:$dst),
- (ins regclass:$src, i32imm:$offset, i32imm:$mask),
- !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
- [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
+class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
+ bit offset_imm, bit mask_imm, bit threadmask_imm>
+ : NVPTXInst<(outs), (ins), "?", []> {
+ NVPTXRegClass rc = !cond(
+ !eq(reg, "i32"): Int32Regs,
+ !eq(reg, "f32"): Float32Regs);
+ string IntrName = "int_nvvm_shfl_"
+ # !if(sync, "sync_", "")
+ # mode
+ # "_" # reg
+ # !if(return_pred, "p", "");
+ Intrinsic Intr = !cast<Intrinsic>(IntrName);
+ let InOperandList = !con(
+ !if(sync,
+ !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
+ (ins)),
+ (ins rc:$src),
+ !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
+ !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
+ );
+ let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
+ let AsmString = "shfl."
+ # !if(sync, "sync.", "")
+ # mode # ".b32\t"
+ # "$dst"
+ # !if(return_pred, "|$pred", "") # ", "
+ # "$src, $offset, $mask"
+ # !if(sync, ", $threadmask", "")
+ # ";"
+ ;
+ let Pattern = [!con(
+ !foreach(tmp, OutOperandList,
+ !subst(outs, set,
+ !subst(i32imm, imm, tmp))),
+ (set !foreach(tmp, InOperandList,
+ !subst(ins, Intr,
+ !subst(i32imm, imm, tmp))))
+ )];
}
-defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
-defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
-defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
-defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
-defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
-defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
-defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
-defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
-
-multiclass SHFL_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
- // Threadmask and the last two parameters to shfl.sync can be regs or imms.
- // ptxas is smart enough to inline constant registers, so strictly speaking we
- // don't need to handle immediates here. But it's easy enough, and it makes
- // our ptx more readable.
- def rrr : NVPTXInst<
- (outs regclass:$dst),
- (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
- !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
- [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
- Int32Regs:$offset, Int32Regs:$mask))]>;
-
- def rri : NVPTXInst<
- (outs regclass:$dst),
- (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
- !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
- [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
- Int32Regs:$offset, imm:$mask))]>;
-
- def rir : NVPTXInst<
- (outs regclass:$dst),
- (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
- !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
- [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
- imm:$offset, Int32Regs:$mask))]>;
-
- def rii : NVPTXInst<
- (outs regclass:$dst),
- (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
- !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
- [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
- imm:$offset, imm:$mask))]>;
-
- def irr : NVPTXInst<
- (outs regclass:$dst),
- (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
- !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
- [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
- Int32Regs:$offset, Int32Regs:$mask))]>;
-
- def iri : NVPTXInst<
- (outs regclass:$dst),
- (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
- !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
- [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
- Int32Regs:$offset, imm:$mask))]>;
-
- def iir : NVPTXInst<
- (outs regclass:$dst),
- (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
- !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
- [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
- imm:$offset, Int32Regs:$mask))]>;
-
- def iii : NVPTXInst<
- (outs regclass:$dst),
- (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
- !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
- [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
- imm:$offset, imm:$mask))]>;
+foreach sync = [0, 1] in {
+ foreach mode = ["up", "down", "bfly", "idx"] in {
+ foreach regclass = ["i32", "f32"] in {
+ foreach return_pred = [0, 1] in {
+ foreach offset_imm = [0, 1] in {
+ foreach mask_imm = [0, 1] in {
+ foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
+ def : SHFL_INSTR<sync, mode, regclass, return_pred,
+ offset_imm, mask_imm, threadmask_imm>,
+ Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
+ }
+ }
+ }
+ }
+ }
+ }
}
-// On sm_70 these don't have to be convergent, so we may eventually want to
-// implement non-convergent variant of this intrinsic.
-defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC<Int32Regs, "down", int_nvvm_shfl_sync_down_i32>;
-defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC<Float32Regs, "down", int_nvvm_shfl_sync_down_f32>;
-defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC<Int32Regs, "up", int_nvvm_shfl_sync_up_i32>;
-defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC<Float32Regs, "up", int_nvvm_shfl_sync_up_f32>;
-defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC<Int32Regs, "bfly", int_nvvm_shfl_sync_bfly_i32>;
-defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_bfly_f32>;
-defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>;
-defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>;
-
-
// vote.{all,any,uni,ballot}
multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 0743a2986718..83039241a7c7 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -103,7 +103,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
// Do the transformation of an aggr load/copy/set to a loop
//
for (LoadInst *LI : AggrLoads) {
- StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin());
+ auto *SI = cast<StoreInst>(*LI->user_begin());
Value *SrcAddr = LI->getOperand(0);
Value *DstAddr = SI->getOperand(1);
unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
diff --git a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 76fb9f3fa692..945b7286b03c 100644
--- a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -41,12 +41,12 @@ void initializeNVPTXLowerAllocaPass(PassRegistry &);
}
namespace {
-class NVPTXLowerAlloca : public BasicBlockPass {
- bool runOnBasicBlock(BasicBlock &BB) override;
+class NVPTXLowerAlloca : public FunctionPass {
+ bool runOnFunction(Function &F) override;
public:
static char ID; // Pass identification, replacement for typeid
- NVPTXLowerAlloca() : BasicBlockPass(ID) {}
+ NVPTXLowerAlloca() : FunctionPass(ID) {}
StringRef getPassName() const override {
return "convert address space of alloca'ed memory to local";
}
@@ -61,58 +61,61 @@ INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca",
// =============================================================================
// Main function for this pass.
// =============================================================================
-bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) {
- if (skipBasicBlock(BB))
+bool NVPTXLowerAlloca::runOnFunction(Function &F) {
+ if (skipFunction(F))
return false;
bool Changed = false;
- for (auto &I : BB) {
- if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
- Changed = true;
- auto PTy = dyn_cast<PointerType>(allocaInst->getType());
- auto ETy = PTy->getElementType();
- auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL);
- auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, "");
- auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC);
- auto NewASCToGeneric = new AddrSpaceCastInst(NewASCToLocal,
- GenericAddrTy, "");
- NewASCToLocal->insertAfter(allocaInst);
- NewASCToGeneric->insertAfter(NewASCToLocal);
- for (Value::use_iterator UI = allocaInst->use_begin(),
- UE = allocaInst->use_end();
- UI != UE; ) {
- // Check Load, Store, GEP, and BitCast Uses on alloca and make them
- // use the converted generic address, in order to expose non-generic
- // addrspacecast to NVPTXInferAddressSpaces. For other types
- // of instructions this is unnecessary and may introduce redundant
- // address cast.
- const auto &AllocaUse = *UI++;
- auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
- if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) {
- LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric);
- continue;
- }
- auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
- if (SI && SI->getPointerOperand() == allocaInst && !SI->isVolatile()) {
- SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric);
- continue;
- }
- auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
- if (GI && GI->getPointerOperand() == allocaInst) {
- GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric);
- continue;
- }
- auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
- if (BI && BI->getOperand(0) == allocaInst) {
- BI->setOperand(0, NewASCToGeneric);
- continue;
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
+ Changed = true;
+ auto PTy = dyn_cast<PointerType>(allocaInst->getType());
+ auto ETy = PTy->getElementType();
+ auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL);
+ auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, "");
+ auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC);
+ auto NewASCToGeneric =
+ new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, "");
+ NewASCToLocal->insertAfter(allocaInst);
+ NewASCToGeneric->insertAfter(NewASCToLocal);
+ for (Value::use_iterator UI = allocaInst->use_begin(),
+ UE = allocaInst->use_end();
+ UI != UE;) {
+ // Check Load, Store, GEP, and BitCast Uses on alloca and make them
+ // use the converted generic address, in order to expose non-generic
+ // addrspacecast to NVPTXInferAddressSpaces. For other types
+ // of instructions this is unnecessary and may introduce redundant
+ // address cast.
+ const auto &AllocaUse = *UI++;
+ auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
+ if (LI && LI->getPointerOperand() == allocaInst &&
+ !LI->isVolatile()) {
+ LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric);
+ continue;
+ }
+ auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
+ if (SI && SI->getPointerOperand() == allocaInst &&
+ !SI->isVolatile()) {
+ SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric);
+ continue;
+ }
+ auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
+ if (GI && GI->getPointerOperand() == allocaInst) {
+ GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric);
+ continue;
+ }
+ auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
+ if (BI && BI->getOperand(0) == allocaInst) {
+ BI->setOperand(0, NewASCToGeneric);
+ continue;
+ }
}
}
}
- }
return Changed;
}
-BasicBlockPass *llvm::createNVPTXLowerAllocaPass() {
+FunctionPass *llvm::createNVPTXLowerAllocaPass() {
return new NVPTXLowerAlloca();
}
diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index c5e02e34e25e..c3c5f6fbcba7 100644
--- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -164,7 +164,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
// Set the alignment to alignment of the byval parameter. This is because,
// later load/stores assume that alignment, and we are going to replace
// the use of the byval parameter with this alloca instruction.
- AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo()));
+ AllocA->setAlignment(MaybeAlign(Func->getParamAlignment(Arg->getArgNo())));
Arg->replaceAllUsesWith(AllocA);
Value *ArgInParam = new AddrSpaceCastInst(
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
index 629757db8707..5e6411c61eab 100644
--- a/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -81,7 +81,7 @@ static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
auto &Op = Root.getOperand(1);
const auto &MRI = MF.getRegInfo();
MachineInstr *GenericAddrDef = nullptr;
- if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
+ if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
}
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 4c5a9adf1f65..a7127b0e9a99 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -178,7 +178,7 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// frame index registers. Functions which don't want/need this optimization
// will continue to use the existing code path.
if (MFI.getUseLocalStackAllocationBlock()) {
- unsigned Align = MFI.getLocalFrameMaxAlign();
+ unsigned Align = MFI.getLocalFrameMaxAlign().value();
// Adjust to alignment boundary.
Offset = (Offset + Align - 1) / Align * Align;
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 11b3fe2fa3d3..f58fb5717773 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -116,7 +116,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
CPU, FS, Options, Reloc::PIC_,
getEffectiveCodeModel(CM, CodeModel::Small), OL),
is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
- TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
+ TLOF(std::make_unique<NVPTXTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
if (TT.getOS() == Triple::NVCL)
drvInterface = NVPTX::NVCL;
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 665eb1383253..43c2e9920403 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -19,10 +19,11 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/Mutex.h"
#include <algorithm>
#include <cstring>
#include <map>
+#include <mutex>
#include <string>
#include <vector>
@@ -38,12 +39,12 @@ static ManagedStatic<per_module_annot_t> annotationCache;
static sys::Mutex Lock;
void clearAnnotationCache(const Module *Mod) {
- MutexGuard Guard(Lock);
+ std::lock_guard<sys::Mutex> Guard(Lock);
annotationCache->erase(Mod);
}
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
- MutexGuard Guard(Lock);
+ std::lock_guard<sys::Mutex> Guard(Lock);
assert(md && "Invalid mdnode for annotation");
assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
// start index = 1, to skip the global variable key
@@ -69,7 +70,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
}
static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
- MutexGuard Guard(Lock);
+ std::lock_guard<sys::Mutex> Guard(Lock);
NamedMDNode *NMD = m->getNamedMetadata("nvvm.annotations");
if (!NMD)
return;
@@ -103,7 +104,7 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
unsigned &retval) {
- MutexGuard Guard(Lock);
+ std::lock_guard<sys::Mutex> Guard(Lock);
const Module *m = gv->getParent();
if ((*annotationCache).find(m) == (*annotationCache).end())
cacheAnnotationFromMD(m, gv);
@@ -117,7 +118,7 @@ bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
std::vector<unsigned> &retval) {
- MutexGuard Guard(Lock);
+ std::lock_guard<sys::Mutex> Guard(Lock);
const Module *m = gv->getParent();
if ((*annotationCache).find(m) == (*annotationCache).end())
cacheAnnotationFromMD(m, gv);
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index c9524da93acd..aedf5b713c3f 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -579,7 +579,7 @@ public:
static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S,
bool IsPPC64) {
- auto Op = make_unique<PPCOperand>(Token);
+ auto Op = std::make_unique<PPCOperand>(Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -608,7 +608,7 @@ public:
static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E,
bool IsPPC64) {
- auto Op = make_unique<PPCOperand>(Immediate);
+ auto Op = std::make_unique<PPCOperand>(Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -618,7 +618,7 @@ public:
static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S,
SMLoc E, bool IsPPC64) {
- auto Op = make_unique<PPCOperand>(Expression);
+ auto Op = std::make_unique<PPCOperand>(Expression);
Op->Expr.Val = Val;
Op->Expr.CRVal = EvaluateCRExpr(Val);
Op->StartLoc = S;
@@ -629,7 +629,7 @@ public:
static std::unique_ptr<PPCOperand>
CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) {
- auto Op = make_unique<PPCOperand>(TLSRegister);
+ auto Op = std::make_unique<PPCOperand>(TLSRegister);
Op->TLSReg.Sym = Sym;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -639,7 +639,7 @@ public:
static std::unique_ptr<PPCOperand>
CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
- auto Op = make_unique<PPCOperand>(ContextImmediate);
+ auto Op = std::make_unique<PPCOperand>(ContextImmediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 7a8af57961cb..3597fd15eeb1 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -167,12 +167,6 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, QFRegs);
}
-static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, RRegs);
-}
-
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 042ddf48d5df..20f752c3041a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -78,7 +78,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
// determine the type of the relocation
unsigned Type;
if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default:
llvm_unreachable("Unimplemented");
case PPC::fixup_ppc_br24:
@@ -131,7 +131,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
break;
}
} else {
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default: llvm_unreachable("invalid fixup kind!");
case FK_NONE:
Type = ELF::R_PPC_NONE;
@@ -443,5 +443,5 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
std::unique_ptr<MCObjectTargetWriter>
llvm::createPPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) {
- return llvm::make_unique<PPCELFObjectWriter>(Is64Bit, OSABI);
+ return std::make_unique<PPCELFObjectWriter>(Is64Bit, OSABI);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 0e64ae55ab1c..7fc231618fa9 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -66,6 +66,31 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot, const MCSubtargetInfo &STI) {
+ // Customize printing of the addis instruction on AIX. When an operand is a
+ // symbol reference, the instruction syntax is changed to look like a load
+ // operation, i.e:
+ // Transform: addis $rD, $rA, $src --> addis $rD, $src($rA).
+ if (TT.isOSAIX() &&
+ (MI->getOpcode() == PPC::ADDIS8 || MI->getOpcode() == PPC::ADDIS) &&
+ MI->getOperand(2).isExpr()) {
+ assert((MI->getOperand(0).isReg() && MI->getOperand(1).isReg()) &&
+ "The first and the second operand of an addis instruction"
+ " should be registers.");
+
+ assert(isa<MCSymbolRefExpr>(MI->getOperand(2).getExpr()) &&
+ "The third operand of an addis instruction should be a symbol "
+ "reference expression if it is an expression at all.");
+
+ O << "\taddis ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 2, O);
+ O << "(";
+ printOperand(MI, 1, O);
+ O << ")";
+ return;
+ }
+
// Check for slwi/srwi mnemonics.
if (MI->getOpcode() == PPC::RLWINM) {
unsigned char SH = MI->getOperand(2).getImm();
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 5f0005ea1d7b..1216cd727289 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -86,4 +86,5 @@ void PPCXCOFFMCAsmInfo::anchor() {}
PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
+ ZeroDirective = "\t.space\t";
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index d467f5c4a439..fb9dd5d7aa75 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -19,8 +19,8 @@ using namespace llvm;
const PPCMCExpr*
PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr,
- bool isDarwin, MCContext &Ctx) {
- return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin);
+ bool IsDarwin, MCContext &Ctx) {
+ return new (Ctx) PPCMCExpr(Kind, Expr, IsDarwin);
}
void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index 449e2c34f74d..ad1454566162 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -45,21 +45,21 @@ public:
/// @{
static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr,
- bool isDarwin, MCContext &Ctx);
+ bool IsDarwin, MCContext &Ctx);
static const PPCMCExpr *createLo(const MCExpr *Expr,
- bool isDarwin, MCContext &Ctx) {
- return create(VK_PPC_LO, Expr, isDarwin, Ctx);
+ bool IsDarwin, MCContext &Ctx) {
+ return create(VK_PPC_LO, Expr, IsDarwin, Ctx);
}
static const PPCMCExpr *createHi(const MCExpr *Expr,
- bool isDarwin, MCContext &Ctx) {
- return create(VK_PPC_HI, Expr, isDarwin, Ctx);
+ bool IsDarwin, MCContext &Ctx) {
+ return create(VK_PPC_HI, Expr, IsDarwin, Ctx);
}
static const PPCMCExpr *createHa(const MCExpr *Expr,
- bool isDarwin, MCContext &Ctx) {
- return create(VK_PPC_HA, Expr, isDarwin, Ctx);
+ bool IsDarwin, MCContext &Ctx) {
+ return create(VK_PPC_HA, Expr, IsDarwin, Ctx);
}
/// @}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 4cf7fd15fa75..672f910ab086 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -178,7 +178,7 @@ static uint32_t getFixupOffset(const MCAsmLayout &Layout,
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
// On Mach-O, ppc_fixup_half16 relocations must refer to the
// start of the instruction, not the second halfword, as ELF does
- if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16)
+ if (Fixup.getTargetKind() == PPC::fixup_ppc_half16)
FixupOffset &= ~uint32_t(3);
return FixupOffset;
}
@@ -376,5 +376,5 @@ void PPCMachObjectWriter::RecordPPCRelocation(
std::unique_ptr<MCObjectTargetWriter>
llvm::createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype) {
- return llvm::make_unique<PPCMachObjectWriter>(Is64Bit, CPUType, CPUSubtype);
+ return std::make_unique<PPCMachObjectWriter>(Is64Bit, CPUType, CPUSubtype);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index 9c661286d455..7fdbb8990b55 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -25,5 +25,5 @@ PPCXCOFFObjectWriter::PPCXCOFFObjectWriter(bool Is64Bit)
std::unique_ptr<MCObjectTargetWriter>
llvm::createPPCXCOFFObjectWriter(bool Is64Bit) {
- return llvm::make_unique<PPCXCOFFObjectWriter>(Is64Bit);
+ return std::make_unique<PPCXCOFFObjectWriter>(Is64Bit);
}
diff --git a/lib/Target/PowerPC/P9InstrResources.td b/lib/Target/PowerPC/P9InstrResources.td
index 2a10322d3f49..f6cd8ed00c82 100644
--- a/lib/Target/PowerPC/P9InstrResources.td
+++ b/lib/Target/PowerPC/P9InstrResources.td
@@ -64,6 +64,7 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
XXLAND,
XXLANDC,
XXLEQV,
+ XXLEQVOnes,
XXLNAND,
XXLNOR,
XXLOR,
@@ -124,8 +125,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
(instregex "SRAD(I)?$"),
(instregex "EXTSWSLI_32_64$"),
(instregex "MFV(S)?RD$"),
- (instregex "MTVSRD$"),
- (instregex "MTVSRW(A|Z)$"),
+ (instregex "MTV(S)?RD$"),
+ (instregex "MTV(S)?RW(A|Z)$"),
(instregex "CMP(WI|LWI|W|LW)(8)?$"),
(instregex "CMP(L)?D(I)?$"),
(instregex "SUBF(I)?C(8)?$"),
@@ -148,7 +149,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
(instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
(instregex "ADD(4|8)(TLS)?(_)?$"),
(instregex "NEG(8)?$"),
- (instregex "ADDI(S)?toc(HA|L)$"),
+ (instregex "ADDI(S)?toc(HA|L)(8)?$"),
COPY,
MCRF,
MCRXRX,
@@ -158,6 +159,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
XSNEGDP,
XSCPSGNDP,
MFVSRWZ,
+ MFVRWZ,
EXTSWSLI,
SRADI_32,
RLDIC,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index c6951ab67b08..0534773c4c9e 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -50,10 +50,10 @@ namespace llvm {
FunctionPass *createPPCExpandISELPass();
FunctionPass *createPPCPreEmitPeepholePass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP, bool isDarwin);
+ AsmPrinter &AP, bool IsDarwin);
bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
MCOperand &OutMO, AsmPrinter &AP,
- bool isDarwin);
+ bool IsDarwin);
void initializePPCCTRLoopsPass(PassRegistry&);
#ifndef NDEBUG
@@ -86,8 +86,8 @@ namespace llvm {
MO_NO_FLAG,
/// On a symbol operand "FOO", this indicates that the reference is actually
- /// to "FOO@plt". This is used for calls and jumps to external functions on
- /// for PIC calls on Linux and ELF systems.
+ /// to "FOO@plt". This is used for calls and jumps to external functions
+ /// and for PIC calls on 32-bit ELF systems.
MO_PLT = 1,
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index bd87ce06b4fb..66236b72a1a3 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -51,9 +51,11 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@@ -76,7 +78,7 @@ namespace {
class PPCAsmPrinter : public AsmPrinter {
protected:
- MapVector<MCSymbol *, MCSymbol *> TOC;
+ MapVector<const MCSymbol *, MCSymbol *> TOC;
const PPCSubtarget *Subtarget;
StackMaps SM;
@@ -87,7 +89,7 @@ public:
StringRef getPassName() const override { return "PowerPC Assembly Printer"; }
- MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
+ MCSymbol *lookUpOrCreateTOCEntry(const MCSymbol *Sym);
bool doInitialization(Module &M) override {
if (!TOC.empty())
@@ -164,6 +166,14 @@ public:
: PPCAsmPrinter(TM, std::move(Streamer)) {}
StringRef getPassName() const override { return "AIX PPC Assembly Printer"; }
+
+ void SetupMachineFunction(MachineFunction &MF) override;
+
+ void EmitGlobalVariable(const GlobalVariable *GV) override;
+
+ void EmitFunctionDescriptor() override;
+
+ void EmitEndOfAsmFile(Module &) override;
};
} // end anonymous namespace
@@ -265,7 +275,7 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return true;
// This operand uses VSX numbering.
// If the operand is a VMX register, convert it to a VSX register.
- unsigned Reg = MI->getOperand(OpNo).getReg();
+ Register Reg = MI->getOperand(OpNo).getReg();
if (PPCInstrInfo::isVRRegister(Reg))
Reg = PPC::VSX32 + (Reg - PPC::V0);
else if (PPCInstrInfo::isVFRegister(Reg))
@@ -328,7 +338,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
-MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
+MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(const MCSymbol *Sym) {
MCSymbol *&TOCEntry = TOC[Sym];
if (!TOCEntry)
TOCEntry = createTempSymbol("C");
@@ -378,7 +388,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
if (CallTarget) {
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
"High 16 bits of call target should be zero.");
- unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
+ Register ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
EncodedBytes = 0;
// Materialize the jump address:
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8)
@@ -502,13 +512,32 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
.addExpr(SymVar));
}
+/// Map a machine operand for a TOC pseudo-machine instruction to its
+/// corresponding MCSymbol.
+static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
+ AsmPrinter &AP) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_GlobalAddress:
+ return AP.getSymbol(MO.getGlobal());
+ case MachineOperand::MO_ConstantPoolIndex:
+ return AP.GetCPISymbol(MO.getIndex());
+ case MachineOperand::MO_JumpTableIndex:
+ return AP.GetJTISymbol(MO.getIndex());
+ case MachineOperand::MO_BlockAddress:
+ return AP.GetBlockAddressSymbol(MO.getBlockAddress());
+ default:
+ llvm_unreachable("Unexpected operand type to get symbol.");
+ }
+}
+
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
- bool isPPC64 = Subtarget->isPPC64();
- bool isDarwin = TM.getTargetTriple().isOSDarwin();
+ const bool IsDarwin = TM.getTargetTriple().isOSDarwin();
+ const bool IsPPC64 = Subtarget->isPPC64();
+ const bool IsAIX = Subtarget->isAIXABI();
const Module *M = MF->getFunction().getParent();
PICLevel::Level PL = M->getPICLevel();
@@ -517,7 +546,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (!MI->isInlineAsm()) {
for (const MachineOperand &MO: MI->operands()) {
if (MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Subtarget->hasSPE()) {
if (PPC::F4RCRegClass.contains(Reg) ||
PPC::F8RCRegClass.contains(Reg) ||
@@ -595,7 +624,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha
// addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l
// Get the offset from the GOT Base Register to the GOT
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
if (Subtarget->isSecurePlt() && isPositionIndependent() ) {
unsigned PICR = TmpInst.getOperand(0).getReg();
MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol(
@@ -646,43 +675,57 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
case PPC::LWZtoc: {
- // Transform %r3 = LWZtoc @min1, %r2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ assert(!IsDarwin && "TOC is an ELF/XCOFF construct.");
+
+ // Transform %rN = LWZtoc @op1, %r2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Change the opcode to LWZ, and the global address operand to be a
- // reference to the GOT entry we will synthesize later.
+ // Change the opcode to LWZ.
TmpInst.setOpcode(PPC::LWZ);
+
const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+ "Invalid operand for LWZtoc.");
- // Map symbol -> label of TOC entry
- assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
- MCSymbol *MOSymbol = nullptr;
- if (MO.isGlobal())
- MOSymbol = getSymbol(MO.getGlobal());
- else if (MO.isCPI())
- MOSymbol = GetCPISymbol(MO.getIndex());
- else if (MO.isJTI())
- MOSymbol = GetJTISymbol(MO.getIndex());
- else if (MO.isBlockAddress())
- MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
-
- if (PL == PICLevel::SmallPIC) {
+ // Map the operand to its corresponding MCSymbol.
+ const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
+ // Create a reference to the GOT entry for the symbol. The GOT entry will be
+ // synthesized later.
+ if (PL == PICLevel::SmallPIC && !IsAIX) {
const MCExpr *Exp =
MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT,
OutContext);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
- } else {
- MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
- const MCExpr *Exp =
- MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None,
- OutContext);
- const MCExpr *PB =
- MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
- OutContext);
- Exp = MCBinaryExpr::createSub(Exp, PB, OutContext);
+ // Otherwise, use the TOC. 'TOCEntry' is a label used to reference the
+ // storage allocated in the TOC which contains the address of
+ // 'MOSymbol'. Said TOC entry will be synthesized later.
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, OutContext);
+
+ // AIX uses the label directly as the lwz displacement operand for
+ // references into the toc section. The displacement value will be generated
+ // relative to the toc-base.
+ if (IsAIX) {
+ assert(
+ TM.getCodeModel() == CodeModel::Small &&
+ "This pseudo should only be selected for 32-bit small code model.");
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
}
+
+ // Create an explicit subtract expression between the local symbol and
+ // '.LTOC' to manifest the toc-relative offset.
+ const MCExpr *PB = MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol(Twine(".LTOC")), OutContext);
+ Exp = MCBinaryExpr::createSub(Exp, PB, OutContext);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
@@ -690,72 +733,121 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDtocCPT:
case PPC::LDtocBA:
case PPC::LDtoc: {
+ assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
+
// Transform %x3 = LDtoc @min1, %x2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Change the opcode to LD, and the global address operand to be a
- // reference to the TOC entry we will synthesize later.
+ // Change the opcode to LD.
TmpInst.setOpcode(PPC::LD);
+
const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+ "Invalid operand!");
+
+ // Map the machine operand to its corresponding MCSymbol, then map the
+ // global address operand to be a reference to the TOC entry we will
+ // synthesize later.
+ MCSymbol *TOCEntry =
+ lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this));
+
+ const MCSymbolRefExpr::VariantKind VK =
+ IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(TOCEntry, VK, OutContext);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::ADDIStocHA: {
+ assert((IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large) &&
+ "This pseudo should only be selected for 32-bit large code model on"
+ " AIX.");
+
+ // Transform %rd = ADDIStocHA %rA, @sym(%r2)
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Map symbol -> label of TOC entry
- assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
- MCSymbol *MOSymbol = nullptr;
- if (MO.isGlobal())
- MOSymbol = getSymbol(MO.getGlobal());
- else if (MO.isCPI())
- MOSymbol = GetCPISymbol(MO.getIndex());
- else if (MO.isJTI())
- MOSymbol = GetJTISymbol(MO.getIndex());
- else if (MO.isBlockAddress())
- MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+ // Change the opcode to ADDIS.
+ TmpInst.setOpcode(PPC::ADDIS);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+ "Invalid operand for ADDIStocHA.");
+
+ // Map the machine operand to its corresponding MCSymbol.
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
+ // Always use TOC on AIX. Map the global address operand to be a reference
+ // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
+ // reference the storage allocated in the TOC which contains the address of
+ // 'MOSymbol'.
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+ const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry,
+ MCSymbolRefExpr::VK_PPC_U,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LWZtocL: {
+ assert(IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large &&
+ "This pseudo should only be selected for 32-bit large code model on"
+ " AIX.");
- const MCExpr *Exp =
- MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
- OutContext);
+ // Transform %rd = LWZtocL @sym, %rs.
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+
+ // Change the opcode to lwz.
+ TmpInst.setOpcode(PPC::LWZ);
+
+ const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+ "Invalid operand for LWZtocL.");
+
+ // Map the machine operand to its corresponding MCSymbol.
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
+ // Always use TOC on AIX. Map the global address operand to be a reference
+ // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
+ // reference the storage allocated in the TOC which contains the address of
+ // 'MOSymbol'.
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+ const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry,
+ MCSymbolRefExpr::VK_PPC_L,
+ OutContext);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
+ case PPC::ADDIStocHA8: {
+ assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
- case PPC::ADDIStocHA: {
- // Transform %xd = ADDIStocHA %x2, @sym
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ // Transform %xd = ADDIStocHA8 %x2, @sym
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Change the opcode to ADDIS8. If the global address is external, has
- // common linkage, is a non-local function address, or is a jump table
- // address, then generate a TOC entry and reference that. Otherwise
- // reference the symbol directly.
+ // Change the opcode to ADDIS8. If the global address is the address of
+ // an external symbol, is a jump table address, is a block address, or is a
+ // constant pool index with large code model enabled, then generate a TOC
+ // entry and reference that. Otherwise, reference the symbol directly.
TmpInst.setOpcode(PPC::ADDIS8);
+
const MachineOperand &MO = MI->getOperand(2);
- assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
- MO.isBlockAddress()) &&
- "Invalid operand for ADDIStocHA!");
- MCSymbol *MOSymbol = nullptr;
- bool GlobalToc = false;
-
- if (MO.isGlobal()) {
- const GlobalValue *GV = MO.getGlobal();
- MOSymbol = getSymbol(GV);
- unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
- GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG);
- } else if (MO.isCPI()) {
- MOSymbol = GetCPISymbol(MO.getIndex());
- } else if (MO.isJTI()) {
- MOSymbol = GetJTISymbol(MO.getIndex());
- } else if (MO.isBlockAddress()) {
- MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
- }
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+ "Invalid operand for ADDIStocHA8!");
+
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ const bool GlobalToc =
+ MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
- TM.getCodeModel() == CodeModel::Large)
+ (MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ const MCSymbolRefExpr::VariantKind VK =
+ IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
+
const MCExpr *Exp =
- MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
- OutContext);
+ MCSymbolRefExpr::create(MOSymbol, VK, OutContext);
if (!MO.isJTI() && MO.getOffset())
Exp = MCBinaryExpr::createAdd(Exp,
@@ -768,73 +860,59 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case PPC::LDtocL: {
+ assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
+
// Transform %xd = LDtocL @sym, %xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Change the opcode to LD. If the global address is external, has
- // common linkage, or is a jump table address, then reference the
- // associated TOC entry. Otherwise reference the symbol directly.
+ // Change the opcode to LD. If the global address is the address of
+ // an external symbol, is a jump table address, is a block address, or is
+ // a constant pool index with large code model enabled, then generate a
+ // TOC entry and reference that. Otherwise, reference the symbol directly.
TmpInst.setOpcode(PPC::LD);
+
const MachineOperand &MO = MI->getOperand(1);
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
MO.isBlockAddress()) &&
"Invalid operand for LDtocL!");
- MCSymbol *MOSymbol = nullptr;
- if (MO.isJTI())
- MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
- else if (MO.isBlockAddress()) {
- MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
- MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
- }
- else if (MO.isCPI()) {
- MOSymbol = GetCPISymbol(MO.getIndex());
- if (TM.getCodeModel() == CodeModel::Large)
- MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
- }
- else if (MO.isGlobal()) {
- const GlobalValue *GV = MO.getGlobal();
- MOSymbol = getSymbol(GV);
- LLVM_DEBUG(
- unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
- assert((GVFlags & PPCII::MO_NLP_FLAG) &&
- "LDtocL used on symbol that could be accessed directly is "
- "invalid. Must match ADDIStocHA."));
+ LLVM_DEBUG(assert(
+ (!MO.isGlobal() || Subtarget->isGVIndirectSymbol(MO.getGlobal())) &&
+ "LDtocL used on symbol that could be accessed directly is "
+ "invalid. Must match ADDIStocHA8."));
+
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
+ if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
- }
+ const MCSymbolRefExpr::VariantKind VK =
+ IsAIX ? MCSymbolRefExpr::VK_PPC_L : MCSymbolRefExpr::VK_PPC_TOC_LO;
const MCExpr *Exp =
- MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
- OutContext);
+ MCSymbolRefExpr::create(MOSymbol, VK, OutContext);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case PPC::ADDItocL: {
// Transform %xd = ADDItocL %xs, @sym
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Change the opcode to ADDI8. If the global address is external, then
- // generate a TOC entry and reference that. Otherwise reference the
+ // Change the opcode to ADDI8. If the global address is external, then
+ // generate a TOC entry and reference that. Otherwise, reference the
// symbol directly.
TmpInst.setOpcode(PPC::ADDI8);
+
const MachineOperand &MO = MI->getOperand(2);
- assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
- MCSymbol *MOSymbol = nullptr;
-
- if (MO.isGlobal()) {
- const GlobalValue *GV = MO.getGlobal();
- LLVM_DEBUG(unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
- assert(!(GVFlags & PPCII::MO_NLP_FLAG) &&
- "Interposable definitions must use indirect access."));
- MOSymbol = getSymbol(GV);
- } else if (MO.isCPI()) {
- MOSymbol = GetCPISymbol(MO.getIndex());
- }
+ assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL.");
+
+ LLVM_DEBUG(assert(
+ !(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) &&
+ "Interposable definitions must use indirect access."));
const MCExpr *Exp =
- MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
- OutContext);
+ MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this),
+ MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
@@ -842,13 +920,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::ADDISgotTprelHA: {
// Transform: %xd = ADDISgotTprelHA %x2, @sym
// Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha
- assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
+ assert(IsPPC64 && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTprel =
- MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
- OutContext);
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
+ OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
@@ -858,16 +936,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDgotTprelL:
case PPC::LDgotTprelL32: {
// Transform %xd = LDgotTprelL @sym, %xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
// Change the opcode to LD.
- TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ);
+ TmpInst.setOpcode(IsPPC64 ? PPC::LD : PPC::LWZ);
const MachineOperand &MO = MI->getOperand(1);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *Exp =
- MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
- OutContext);
+ const MCExpr *Exp = MCSymbolRefExpr::create(
+ MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO
+ : MCSymbolRefExpr::VK_PPC_GOT_TPREL,
+ OutContext);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
@@ -920,7 +999,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::ADDIStlsgdHA: {
// Transform: %xd = ADDIStlsgdHA %x2, @sym
// Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha
- assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
+ assert(IsPPC64 && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
@@ -943,11 +1022,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD = MCSymbolRefExpr::create(
- MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO
- : MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
+ MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO
+ : MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
OutContext);
EmitToStreamer(*OutStreamer,
- MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsGD));
@@ -965,7 +1044,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::ADDIStlsldHA: {
// Transform: %xd = ADDIStlsldHA %x2, @sym
// Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha
- assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
+ assert(IsPPC64 && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
@@ -988,11 +1067,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD = MCSymbolRefExpr::create(
- MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO
- : MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
+ MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO
+ : MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
OutContext);
EmitToStreamer(*OutStreamer,
- MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsLD));
@@ -1021,7 +1100,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext);
EmitToStreamer(
*OutStreamer,
- MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+ MCInstBuilder(IsPPC64 ? PPC::ADDIS8 : PPC::ADDIS)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymDtprel));
@@ -1040,7 +1119,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
EmitToStreamer(*OutStreamer,
- MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymDtprel));
@@ -1087,7 +1166,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// suite shows a handful of test cases that fail this check for
// Darwin. Those need to be investigated before this sanity test
// can be enabled for those subtargets.
- if (!Subtarget->isDarwin()) {
+ if (!IsDarwin) {
unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
@@ -1098,7 +1177,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
EmitToStreamer(*OutStreamer, TmpInst);
}
@@ -1368,15 +1447,16 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
OutStreamer->SwitchSection(Section);
- for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
- E = TOC.end(); I != E; ++I) {
- OutStreamer->EmitLabel(I->second);
- MCSymbol *S = I->first;
+ for (const auto &TOCMapPair : TOC) {
+ const MCSymbol *const TOCEntryTarget = TOCMapPair.first;
+ MCSymbol *const TOCEntryLabel = TOCMapPair.second;
+
+ OutStreamer->EmitLabel(TOCEntryLabel);
if (isPPC64) {
- TS.emitTCEntry(*S);
+ TS.emitTCEntry(*TOCEntryTarget);
} else {
OutStreamer->EmitValueToAlignment(4);
- OutStreamer->EmitSymbolValue(S, 4);
+ OutStreamer->EmitSymbolValue(TOCEntryTarget, 4);
}
}
}
@@ -1602,7 +1682,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
- EmitAlignment(isPPC64 ? 3 : 2);
+ EmitAlignment(isPPC64 ? Align(8) : Align(4));
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
@@ -1643,6 +1723,106 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
return AsmPrinter::doFinalization(M);
}
+void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ // Get the function descriptor symbol.
+ CurrentFnDescSym = getSymbol(&MF.getFunction());
+ // Set the containing csect.
+ MCSectionXCOFF *FnDescSec = OutStreamer->getContext().getXCOFFSection(
+ CurrentFnDescSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD,
+ XCOFF::C_HIDEXT, SectionKind::getData());
+ cast<MCSymbolXCOFF>(CurrentFnDescSym)->setContainingCsect(FnDescSec);
+
+ return AsmPrinter::SetupMachineFunction(MF);
+}
+
+void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ // Early error checking limiting what is supported.
+ if (GV->isThreadLocal())
+ report_fatal_error("Thread local not yet supported on AIX.");
+
+ if (GV->hasSection())
+ report_fatal_error("Custom section for Data not yet supported.");
+
+ if (GV->hasComdat())
+ report_fatal_error("COMDAT not yet supported by AIX.");
+
+ SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM);
+ if (!GVKind.isCommon() && !GVKind.isBSSLocal() && !GVKind.isData())
+ report_fatal_error("Encountered a global variable kind that is "
+ "not supported yet.");
+
+ // Create the containing csect and switch to it.
+ MCSectionXCOFF *CSect = cast<MCSectionXCOFF>(
+ getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
+ OutStreamer->SwitchSection(CSect);
+
+ // Create the symbol, set its storage class, and emit it.
+ MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
+ GVSym->setStorageClass(
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
+ GVSym->setContainingCsect(CSect);
+
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+
+ // Handle common symbols.
+ if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+ unsigned Align =
+ GV->getAlignment() ? GV->getAlignment() : DL.getPreferredAlignment(GV);
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+
+ if (GVKind.isBSSLocal())
+ OutStreamer->EmitXCOFFLocalCommonSymbol(GVSym, Size, Align);
+ else
+ OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ MCSymbol *EmittedInitSym = GVSym;
+ EmitLinkage(GV, EmittedInitSym);
+ EmitAlignment(getGVAlignment(GV, DL), GV);
+ OutStreamer->EmitLabel(EmittedInitSym);
+ EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+}
+
+void PPCAIXAsmPrinter::EmitFunctionDescriptor() {
+ const DataLayout &DL = getDataLayout();
+ const unsigned PointerSize = DL.getPointerSizeInBits() == 64 ? 8 : 4;
+
+ MCSectionSubPair Current = OutStreamer->getCurrentSection();
+ // Emit function descriptor.
+ OutStreamer->SwitchSection(
+ cast<MCSymbolXCOFF>(CurrentFnDescSym)->getContainingCsect());
+ OutStreamer->EmitLabel(CurrentFnDescSym);
+ // Emit function entry point address.
+ OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
+ PointerSize);
+ // Emit TOC base address.
+ MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]"));
+ OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
+ PointerSize);
+ // Emit a null environment pointer.
+ OutStreamer->EmitIntValue(0, PointerSize);
+
+ OutStreamer->SwitchSection(Current.first, Current.second);
+}
+
+void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ // If there are no functions in this module, we will never need to reference
+ // the TOC base.
+ if (M.empty())
+ return;
+
+ // Emit TOC base.
+ MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]"));
+ MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection(
+ StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
+ SectionKind::getData());
+ cast<MCSymbolXCOFF>(TOCBaseSym)->setContainingCsect(TOCBaseSection);
+ // Switch to section to emit TOC base.
+ OutStreamer->SwitchSection(TOCBaseSection);
+}
+
+
/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
/// for a MachineFunction to the given output stream, in a format that the
/// Darwin assembler can deal with.
diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index 5e9a661f8f0b..d325b078979f 100644
--- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -340,9 +340,10 @@ bool PPCBranchCoalescing::identicalOperands(
if (Op1.isIdenticalTo(Op2)) {
// filter out instructions with physical-register uses
- if (Op1.isReg() && TargetRegisterInfo::isPhysicalRegister(Op1.getReg())
- // If the physical register is constant then we can assume the value
- // has not changed between uses.
+ if (Op1.isReg() &&
+ Register::isPhysicalRegister(Op1.getReg())
+ // If the physical register is constant then we can assume the value
+ // has not changed between uses.
&& !(Op1.isUse() && MRI->isConstantPhysReg(Op1.getReg()))) {
LLVM_DEBUG(dbgs() << "The operands are not provably identical.\n");
return false;
@@ -355,8 +356,8 @@ bool PPCBranchCoalescing::identicalOperands(
// definition of the register produces the same value. If they produce the
// same value, consider them to be identical.
if (Op1.isReg() && Op2.isReg() &&
- TargetRegisterInfo::isVirtualRegister(Op1.getReg()) &&
- TargetRegisterInfo::isVirtualRegister(Op2.getReg())) {
+ Register::isVirtualRegister(Op1.getReg()) &&
+ Register::isVirtualRegister(Op2.getReg())) {
MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg());
MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg());
if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) {
@@ -456,7 +457,7 @@ bool PPCBranchCoalescing::canMoveToEnd(const MachineInstr &MI,
<< TargetMBB.getNumber() << "\n");
for (auto &Use : MI.uses()) {
- if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) {
+ if (Use.isReg() && Register::isVirtualRegister(Use.getReg())) {
MachineInstr *DefInst = MRI->getVRegDef(Use.getReg());
if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) {
LLVM_DEBUG(dbgs() << " *** Cannot move this instruction ***\n");
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 793d690baec3..cdff4d383d23 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -81,21 +81,20 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
/// original Offset.
unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB,
unsigned Offset) {
- unsigned Align = MBB.getAlignment();
- if (!Align)
+ const Align Alignment = MBB.getAlignment();
+ if (Alignment == Align::None())
return 0;
- unsigned AlignAmt = 1 << Align;
- unsigned ParentAlign = MBB.getParent()->getAlignment();
+ const Align ParentAlign = MBB.getParent()->getAlignment();
- if (Align <= ParentAlign)
- return OffsetToAlignment(Offset, AlignAmt);
+ if (Alignment <= ParentAlign)
+ return offsetToAlignment(Offset, Alignment);
// The alignment of this MBB is larger than the function's alignment, so we
// can't tell whether or not it will insert nops. Assume that it will.
if (FirstImpreciseBlock < 0)
FirstImpreciseBlock = MBB.getNumber();
- return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
+ return Alignment.value() + offsetToAlignment(Offset, Alignment);
}
/// We need to be careful about the offset of the first block in the function
@@ -179,7 +178,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn,
const MachineBasicBlock *Dest,
unsigned BrOffset) {
int BranchSize;
- unsigned MaxAlign = 2;
+ Align MaxAlign = Align(4);
bool NeedExtraAdjustment = false;
if (Dest->getNumber() <= Src->getNumber()) {
// If this is a backwards branch, the delta is the offset from the
@@ -192,8 +191,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn,
BranchSize += BlockSizes[DestBlock].first;
for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) {
BranchSize += BlockSizes[i].first;
- MaxAlign = std::max(MaxAlign,
- Fn.getBlockNumbered(i)->getAlignment());
+ MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment());
}
NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
@@ -207,8 +205,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn,
MaxAlign = std::max(MaxAlign, Dest->getAlignment());
for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) {
BranchSize += BlockSizes[i].first;
- MaxAlign = std::max(MaxAlign,
- Fn.getBlockNumbered(i)->getAlignment());
+ MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment());
}
NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
@@ -258,7 +255,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn,
// The computed offset is at most ((1 << alignment) - 4) bytes smaller
// than actual offset. So we add this number to the offset for safety.
if (NeedExtraAdjustment)
- BranchSize += (1 << MaxAlign) - 4;
+ BranchSize += MaxAlign.value() - 4;
return BranchSize;
}
@@ -339,16 +336,16 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// 1. CR register
// 2. Target MBB
PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
- unsigned CRReg = I->getOperand(1).getReg();
+ Register CRReg = I->getOperand(1).getReg();
// Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
BuildMI(MBB, I, dl, TII->get(PPC::BCC))
.addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
} else if (I->getOpcode() == PPC::BC) {
- unsigned CRBit = I->getOperand(0).getReg();
+ Register CRBit = I->getOperand(0).getReg();
BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);
} else if (I->getOpcode() == PPC::BCn) {
- unsigned CRBit = I->getOperand(0).getReg();
+ Register CRBit = I->getOperand(0).getReg();
BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);
} else if (I->getOpcode() == PPC::BDNZ) {
BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 264d6b590f95..d8425d89da92 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -162,7 +162,7 @@ class PPCFastISel final : public FastISel {
bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt, unsigned DestReg,
const PPC::Predicate Pred);
- bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
const TargetRegisterClass *RC, bool IsZExt = true,
unsigned FP64LoadOpc = PPC::LFD);
bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
@@ -451,7 +451,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
// Emit a load instruction if possible, returning true if we succeeded,
// otherwise false. See commentary below for how the register class of
// the load is determined.
-bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
const TargetRegisterClass *RC,
bool IsZExt, unsigned FP64LoadOpc) {
unsigned Opc;
@@ -469,7 +469,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
(ResultReg ? MRI.getRegClass(ResultReg) :
(RC ? RC :
(VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
- (VT == MVT::f32 ? (HasSPE ? &PPC::SPE4RCRegClass : &PPC::F4RCRegClass) :
+ (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
(VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
&PPC::GPRC_and_GPRC_NOR0RegClass)))));
@@ -612,7 +612,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
const TargetRegisterClass *RC =
AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
- unsigned ResultReg = 0;
+ Register ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
@@ -989,7 +989,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
unsigned DestReg;
auto RC = MRI.getRegClass(SrcReg);
if (PPCSubTarget->hasSPE()) {
- DestReg = createResultReg(&PPC::SPE4RCRegClass);
+ DestReg = createResultReg(&PPC::GPRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::EFSCFD), DestReg)
.addReg(SrcReg);
@@ -1051,7 +1051,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
}
const TargetRegisterClass *RC = &PPC::F8RCRegClass;
- unsigned ResultReg = 0;
+ Register ResultReg = 0;
if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
return 0;
@@ -1176,7 +1176,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
const TargetRegisterClass *RC =
AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
- unsigned ResultReg = 0;
+ Register ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
return 0;
@@ -1229,9 +1229,9 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (PPCSubTarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
if (IsSigned)
- Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
+ Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
else
- Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
+ Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
} else if (isVSFRCRegClass(RC)) {
DestReg = createResultReg(&PPC::VSFRCRegClass);
if (DstVT == MVT::i32)
@@ -1717,7 +1717,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
CCValAssign &VA = ValLocs[0];
- unsigned RetReg = VA.getLocReg();
+ Register RetReg = VA.getLocReg();
// We still need to worry about properly extending the sign. For example,
// we could have only a single bit or a constant that needs zero
// extension rather than sign extension. Make sure we pass the return
@@ -2002,7 +2002,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
const bool HasSPE = PPCSubTarget->hasSPE();
const TargetRegisterClass *RC;
if (HasSPE)
- RC = ((VT == MVT::f32) ? &PPC::SPE4RCRegClass : &PPC::SPERCRegClass);
+ RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
else
RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
@@ -2031,8 +2031,8 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addImm(0).addReg(TmpReg).addMemOperand(MMO);
} else {
- // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
+ // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
// But for large code model, we must generate a LDtocL followed
// by the LF[SD].
@@ -2085,16 +2085,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// or externally available linkage, a non-local function address, or a
// jump table address (not yet needed), or if we are generating code
// for large code model, we generate:
- // LDtocL(GV, ADDIStocHA(%x2, GV))
+ // LDtocL(GV, ADDIStocHA8(%x2, GV))
// Otherwise we generate:
- // ADDItocL(ADDIStocHA(%x2, GV), GV)
- // Either way, start with the ADDIStocHA:
+ // ADDItocL(ADDIStocHA8(%x2, GV), GV)
+ // Either way, start with the ADDIStocHA8:
unsigned HighPartReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
- unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
- if (GVFlags & PPCII::MO_NLP_FLAG) {
+ if (PPCSubTarget->isGVIndirectSymbol(GV)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
} else {
@@ -2353,7 +2352,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
if (!PPCComputeAddress(LI->getOperand(0), Addr))
return false;
- unsigned ResultReg = MI->getOperand(0).getReg();
+ Register ResultReg = MI->getOperand(0).getReg();
if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
@@ -2464,7 +2463,7 @@ namespace llvm {
const TargetLibraryInfo *LibInfo) {
// Only available on 64-bit ELF for now.
const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
- if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
+ if (Subtarget.is64BitELFABI())
return new PPCFastISel(FuncInfo, LibInfo);
return nullptr;
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index ebfb1ef7f49b..06a4d183e781 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -47,13 +47,15 @@ static const MCPhysReg VRRegNo[] = {
};
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
- if (STI.isDarwinABI())
+ if (STI.isDarwinABI() || STI.isAIXABI())
return STI.isPPC64() ? 16 : 8;
// SVR4 ABI:
return STI.isPPC64() ? 16 : 4;
}
static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
+ if (STI.isAIXABI())
+ return STI.isPPC64() ? 40 : 20;
return STI.isELFv2ABI() ? 24 : 40;
}
@@ -88,6 +90,11 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
: STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
}
+static unsigned computeCRSaveOffset() {
+ // The condition register save offset needs to be updated for AIX PPC32.
+ return 8;
+}
+
PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
STI.getPlatformStackAlignment(), 0),
@@ -95,7 +102,8 @@ PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
LinkageSize(computeLinkageSize(Subtarget)),
- BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {}
+ BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
+ CRSaveOffset(computeCRSaveOffset()) {}
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
@@ -370,8 +378,8 @@ static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
return;
}
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
if (DstReg != SrcReg)
@@ -781,15 +789,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
bool isPPC64 = Subtarget.isPPC64();
// Get the ABI.
bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isAIXABI = Subtarget.isAIXABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
- "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
+ assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) &&
+ "Unsupported PPC ABI.");
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
if (!isSVR4ABI)
for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ if (isAIXABI)
+ report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
HandleVRSaveUpdate(*MBBI, TII);
break;
}
@@ -819,7 +830,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
bool HasRedZone = isPPC64 || !isSVR4ABI;
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned BPReg = RegInfo->getBaseRegister(MF);
+ Register BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
@@ -908,6 +919,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isPPC64 || !MustSaveCR) &&
"Prologue CR saving supported only in 64-bit mode");
+ if (MustSaveCR && isAIXABI)
+ report_fatal_error("Prologue CR saving is unimplemented on AIX.");
+
// Check if we can move the stack update instruction (stdu) down the prologue
// past the callee saves. Hopefully this will avoid the situation where the
// saves are waiting for the update on the store with update to complete.
@@ -966,7 +980,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MIB.addReg(MustSaveCRs[i], CrState);
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
.addReg(TempReg, getKillRegState(true))
- .addImm(8)
+ .addImm(getCRSaveOffset())
.addReg(SPReg);
}
@@ -1020,7 +1034,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert(HasRedZone && "A red zone is always available on PPC64");
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
.addReg(TempReg, getKillRegState(true))
- .addImm(8)
+ .addImm(getCRSaveOffset())
.addReg(SPReg);
}
@@ -1324,7 +1338,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// actually saved gets its own CFI record.
unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
+ nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
continue;
@@ -1387,7 +1401,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned BPReg = RegInfo->getBaseRegister(MF);
+ Register BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned ScratchReg = 0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
@@ -1590,7 +1604,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// is live here.
assert(HasRedZone && "Expecting red zone");
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
- .addImm(8)
+ .addImm(getCRSaveOffset())
.addReg(SPReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
@@ -1614,7 +1628,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
assert(isPPC64 && "Expecting 64-bit mode");
assert(RBReg == SPReg && "Should be using SP as a base register");
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
- .addImm(8)
+ .addImm(getCRSaveOffset())
.addReg(RBReg);
}
@@ -1762,8 +1776,8 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
- bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
+ const bool isPPC64 = Subtarget.isPPC64();
+ const bool IsDarwinABI = Subtarget.isDarwinABI();
MachineFrameInfo &MFI = MF.getFrameInfo();
// If the frame pointer save index hasn't been defined yet.
@@ -1812,7 +1826,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
// For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
// function uses CR 2, 3, or 4.
- if (!isPPC64 && !isDarwinABI &&
+ if (!isPPC64 && !IsDarwinABI &&
(SavedRegs.test(PPC::CR2) ||
SavedRegs.test(PPC::CR3) ||
SavedRegs.test(PPC::CR4))) {
@@ -1872,8 +1886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
(Reg != PPC::X2 && Reg != PPC::R2)) &&
"Not expecting to try to spill R2 in a function that must save TOC");
- if (PPC::GPRCRegClass.contains(Reg) ||
- PPC::SPE4RCRegClass.contains(Reg)) {
+ if (PPC::GPRCRegClass.contains(Reg)) {
HasGPSaveArea = true;
GPRegs.push_back(CSI[i]);
@@ -1967,7 +1980,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
assert(FI && "No Base Pointer Save Slot!");
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
- unsigned BP = RegInfo->getBaseRegister(MF);
+ Register BP = RegInfo->getBaseRegister(MF);
if (PPC::G8RCRegClass.contains(BP)) {
MinG8R = std::min<unsigned>(MinG8R, BP);
HasG8SaveArea = true;
@@ -2428,6 +2441,26 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+unsigned PPCFrameLowering::getTOCSaveOffset() const {
+ if (Subtarget.isAIXABI())
+ // TOC save/restore is normally handled by the linker.
+ // Indirect calls should hit this limitation.
+ report_fatal_error("TOC save is not implemented on AIX yet.");
+ return TOCSaveOffset;
+}
+
+unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("FramePointer is not implemented on AIX yet.");
+ return FramePointerSaveOffset;
+}
+
+unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("BasePointer is not implemented on AIX yet.");
+ return BasePointerSaveOffset;
+}
+
bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
return false;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index d116e9fd22e1..a5fbc9acbb28 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -26,6 +26,7 @@ class PPCFrameLowering: public TargetFrameLowering {
const unsigned FramePointerSaveOffset;
const unsigned LinkageSize;
const unsigned BasePointerSaveOffset;
+ const unsigned CRSaveOffset;
/**
* Find register[s] that can be used in function prologue and epilogue
@@ -142,15 +143,19 @@ public:
/// getTOCSaveOffset - Return the previous frame offset to save the
/// TOC register -- 64-bit SVR4 ABI only.
- unsigned getTOCSaveOffset() const { return TOCSaveOffset; }
+ unsigned getTOCSaveOffset() const;
/// getFramePointerSaveOffset - Return the previous frame offset to save the
/// frame pointer.
- unsigned getFramePointerSaveOffset() const { return FramePointerSaveOffset; }
+ unsigned getFramePointerSaveOffset() const;
/// getBasePointerSaveOffset - Return the previous frame offset to save the
/// base pointer.
- unsigned getBasePointerSaveOffset() const { return BasePointerSaveOffset; }
+ unsigned getBasePointerSaveOffset() const;
+
+ /// getCRSaveOffset - Return the previous frame offset to save the
+ /// CR register.
+ unsigned getCRSaveOffset() const { return CRSaveOffset; }
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 543cac075f55..4ad6c88233fe 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -371,7 +371,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// by the scheduler. Detect them now.
bool HasVectorVReg = false;
for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
HasVectorVReg = true;
break;
@@ -391,8 +391,8 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// Create two vregs - one to hold the VRSAVE register that is live-in to the
// function and one for the value after having bits or'd into it.
- unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
@@ -447,7 +447,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
} else {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
- unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(FirstMBB, MBBI, dl,
TII.get(PPC::UpdateGBR), GlobalBaseReg)
.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
@@ -5065,52 +5065,95 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
case PPCISD::TOC_ENTRY: {
- assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
- "Only supported for 64-bit ABI and 32-bit SVR4");
- if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
- SDValue GA = N->getOperand(0);
- SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
- N->getOperand(1));
- transferMemOperands(N, MN);
- ReplaceNode(N, MN);
- return;
- }
+ const bool isPPC64 = PPCSubTarget->isPPC64();
+ const bool isELFABI = PPCSubTarget->isSVR4ABI();
+ const bool isAIXABI = PPCSubTarget->isAIXABI();
+
+ assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct");
+
+ // PowerPC only support small, medium and large code model.
+ const CodeModel::Model CModel = TM.getCodeModel();
+ assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
+ "PowerPC doesn't support tiny or kernel code models.");
- // For medium and large code model, we generate two instructions as
- // described below. Otherwise we allow SelectCodeCommon to handle this,
+ if (isAIXABI && CModel == CodeModel::Medium)
+ report_fatal_error("Medium code model is not supported on AIX.");
+
+ // For 64-bit small code model, we allow SelectCodeCommon to handle this,
// selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
- CodeModel::Model CModel = TM.getCodeModel();
- if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
+ if (isPPC64 && CModel == CodeModel::Small)
break;
- // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
- // If it must be toc-referenced according to PPCSubTarget, we generate:
- // LDtocL(@sym, ADDIStocHA(%x2, @sym))
+ // Handle 32-bit small code model.
+ if (!isPPC64) {
+ // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc.
+ auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) {
+ SDValue GA = TocEntry->getOperand(0);
+ SDValue TocBase = TocEntry->getOperand(1);
+ SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
+ TocBase);
+ transferMemOperands(TocEntry, MN);
+ ReplaceNode(TocEntry, MN);
+ };
+
+ if (isELFABI) {
+ assert(TM.isPositionIndependent() &&
+ "32-bit ELF can only have TOC entries in position independent"
+ " code.");
+ // 32-bit ELF always uses a small code model toc access.
+ replaceWithLWZtoc(N);
+ return;
+ }
+
+ if (isAIXABI && CModel == CodeModel::Small) {
+ replaceWithLWZtoc(N);
+ return;
+ }
+ }
+
+ assert(CModel != CodeModel::Small && "All small code models handled.");
+
+ assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
+ " ELF/AIX or 32-bit AIX in the following.");
+
+ // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
+ // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
+ // generate two instructions as described below. The first source operand
+ // is a symbol reference. If it must be toc-referenced according to
+ // PPCSubTarget, we generate:
+ // [32-bit AIX]
+ // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
+ // [64-bit ELF/AIX]
+ // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
// Otherwise we generate:
- // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
+ // ADDItocL(ADDIStocHA8(%x2, @sym), @sym)
SDValue GA = N->getOperand(0);
SDValue TOCbase = N->getOperand(1);
- SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
- TOCbase, GA);
+
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDNode *Tmp = CurDAG->getMachineNode(
+ isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
+
if (PPCLowering->isAccessedAsGotIndirect(GA)) {
- // If it is access as got-indirect, we need an extra LD to load
+ // If it is accessed as got-indirect, we need an extra LWZ/LD to load
// the address.
- SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
+ SDNode *MN = CurDAG->getMachineNode(
+ isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
+
transferMemOperands(N, MN);
ReplaceNode(N, MN);
return;
}
- // Build the address relative to the TOC-pointer..
+ // Build the address relative to the TOC-pointer.
ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA));
return;
}
case PPCISD::PPC32_PICGOT:
// Generate a PIC-safe GOT reference.
- assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
- "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
+ assert(PPCSubTarget->is32BitELFABI() &&
+ "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
PPCLowering->getPointerTy(CurDAG->getDataLayout()),
MVT::i32);
@@ -6456,7 +6499,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
continue;
if (!HBase.isMachineOpcode() ||
- HBase.getMachineOpcode() != PPC::ADDIStocHA)
+ HBase.getMachineOpcode() != PPC::ADDIStocHA8)
continue;
if (!Base.hasOneUse() || !HBase.hasOneUse())
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 24d50074860d..8cf6a660b08b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -139,13 +139,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
bool isPPC64 = Subtarget.isPPC64();
- setMinStackArgumentAlignment(isPPC64 ? 8:4);
+ setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
// Set up the register classes.
addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
if (!useSoftFloat()) {
if (hasSPE()) {
- addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
+ addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
} else {
addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
@@ -431,28 +431,26 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- if (Subtarget.isSVR4ABI()) {
- if (isPPC64) {
- // VAARG always uses double-word chunks, so promote anything smaller.
- setOperationAction(ISD::VAARG, MVT::i1, Promote);
- AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
- setOperationAction(ISD::VAARG, MVT::i8, Promote);
- AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
- setOperationAction(ISD::VAARG, MVT::i16, Promote);
- AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
- setOperationAction(ISD::VAARG, MVT::i32, Promote);
- AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
- setOperationAction(ISD::VAARG, MVT::Other, Expand);
- } else {
- // VAARG is custom lowered with the 32-bit SVR4 ABI.
- setOperationAction(ISD::VAARG, MVT::Other, Custom);
- setOperationAction(ISD::VAARG, MVT::i64, Custom);
- }
+ if (Subtarget.is64BitELFABI()) {
+ // VAARG always uses double-word chunks, so promote anything smaller.
+ setOperationAction(ISD::VAARG, MVT::i1, Promote);
+ AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i8, Promote);
+ AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i16, Promote);
+ AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i32, Promote);
+ AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ } else if (Subtarget.is32BitELFABI()) {
+ // VAARG is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i64, Custom);
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
- if (Subtarget.isSVR4ABI() && !isPPC64)
- // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ if (Subtarget.is32BitELFABI())
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
else
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
@@ -553,17 +551,25 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
- for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
// For v2i64, these are only valid with P8Vector. This is corrected after
// the loop.
- setOperationAction(ISD::SMAX, VT, Legal);
- setOperationAction(ISD::SMIN, VT, Legal);
- setOperationAction(ISD::UMAX, VT, Legal);
- setOperationAction(ISD::UMIN, VT, Legal);
+ if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+ }
+ else {
+ setOperationAction(ISD::SMAX, VT, Expand);
+ setOperationAction(ISD::SMIN, VT, Expand);
+ setOperationAction(ISD::UMAX, VT, Expand);
+ setOperationAction(ISD::UMIN, VT, Expand);
+ }
if (Subtarget.hasVSX()) {
setOperationAction(ISD::FMAXNUM, VT, Legal);
@@ -646,7 +652,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
- for (MVT InnerVT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
@@ -944,7 +950,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
- setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
@@ -1118,6 +1123,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+
if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
@@ -1172,9 +1179,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setJumpIsExpensive();
}
- setMinFunctionAlignment(2);
+ setMinFunctionAlignment(Align(4));
if (Subtarget.isDarwin())
- setPrefFunctionAlignment(4);
+ setPrefFunctionAlignment(Align(16));
switch (Subtarget.getDarwinDirective()) {
default: break;
@@ -1191,8 +1198,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
- setPrefFunctionAlignment(4);
- setPrefLoopAlignment(4);
+ setPrefLoopAlignment(Align(16));
+ setPrefFunctionAlignment(Align(16));
break;
}
@@ -1352,6 +1359,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
+ case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
+ case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
case PPCISD::ST_VSR_SCAL_INT:
return "PPCISD::ST_VSR_SCAL_INT";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
@@ -1396,7 +1405,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
- case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH";
+ case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
+ case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
}
return nullptr;
}
@@ -1517,7 +1527,7 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
const PPCSubtarget& Subtarget =
- static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+ static_cast<const PPCSubtarget&>(DAG.getSubtarget());
if (!Subtarget.hasP8Vector())
return false;
@@ -1769,10 +1779,10 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
-/// VSPLTB/VSPLTH/VSPLTW.
+/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
- assert(N->getValueType(0) == MVT::v16i8 &&
- (EltSize == 1 || EltSize == 2 || EltSize == 4));
+ assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
+ EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
// The consecutive indices need to specify an element, not part of two
// different elements. So abandon ship early if this isn't the case.
@@ -2065,10 +2075,11 @@ bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
}
-/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
-/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
-unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
- SelectionDAG &DAG) {
+/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
+/// appropriate for PPC mnemonics (which have a big endian bias - namely
+/// elements are counted from the left of the vector register).
+unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
if (DAG.getDataLayout().isLittleEndian())
@@ -2667,12 +2678,14 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) {
setUsesTOCBasePtr(DAG.getMachineFunction());
}
-static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
- SDValue GA) {
+SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue GA) const {
+ const bool Is64Bit = Subtarget.isPPC64();
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
- SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
- DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
-
+ SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
+ : Subtarget.isAIXABI()
+ ? DAG.getRegister(PPC::R2, VT)
+ : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
SDValue Ops[] = { GA, Reg };
return DAG.getMemIntrinsicNode(
PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
@@ -2688,10 +2701,10 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ if (Subtarget.is64BitELFABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
- return getTOCEntry(DAG, SDLoc(CP), true, GA);
+ return getTOCEntry(DAG, SDLoc(CP), GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -2701,7 +2714,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
if (IsPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
PPCII::MO_PIC_FLAG);
- return getTOCEntry(DAG, SDLoc(CP), false, GA);
+ return getTOCEntry(DAG, SDLoc(CP), GA);
}
SDValue CPIHi =
@@ -2764,10 +2777,10 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ if (Subtarget.is64BitELFABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- return getTOCEntry(DAG, SDLoc(JT), true, GA);
+ return getTOCEntry(DAG, SDLoc(JT), GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -2777,7 +2790,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
if (IsPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
PPCII::MO_PIC_FLAG);
- return getTOCEntry(DAG, SDLoc(GA), false, GA);
+ return getTOCEntry(DAG, SDLoc(GA), GA);
}
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
@@ -2793,14 +2806,18 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual BlockAddress is stored in the TOC.
- if (Subtarget.isSVR4ABI() &&
- (Subtarget.isPPC64() || isPositionIndependent())) {
- if (Subtarget.isPPC64())
- setUsesTOCBasePtr(DAG);
+ if (Subtarget.is64BitELFABI()) {
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
- return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA);
+ return getTOCEntry(DAG, SDLoc(BASDN), GA);
}
+ // 32-bit position-independent ELF stores the BlockAddress in the .got.
+ if (Subtarget.is32BitELFABI() && isPositionIndependent())
+ return getTOCEntry(
+ DAG, SDLoc(BASDN),
+ DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
+
unsigned MOHiFlag, MOLoFlag;
bool IsPIC = isPositionIndependent();
getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
@@ -2913,12 +2930,12 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SDLoc DL(GSDN);
const GlobalValue *GV = GSDN->getGlobal();
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
- return getTOCEntry(DAG, DL, true, GA);
+ return getTOCEntry(DAG, DL, GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -2929,7 +2946,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
GSDN->getOffset(),
PPCII::MO_PIC_FLAG);
- return getTOCEntry(DAG, DL, false, GA);
+ return getTOCEntry(DAG, DL, GA);
}
SDValue GAHi =
@@ -3235,8 +3252,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV, nextOffset));
}
-/// FPR - The set of FP registers that should be allocated for arguments,
-/// on Darwin.
+/// FPR - The set of FP registers that should be allocated for arguments
+/// on Darwin and AIX.
static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
PPC::F11, PPC::F12, PPC::F13};
@@ -3377,17 +3394,17 @@ SDValue PPCTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- if (Subtarget.isSVR4ABI()) {
- if (Subtarget.isPPC64())
- return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
- else
- return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
- } else {
- return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
- }
+ if (Subtarget.is64BitELFABI())
+ return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
+ else if (Subtarget.is32BitELFABI())
+ return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
+
+ // FIXME: We are using this for both AIX and Darwin. We should add appropriate
+ // AIX testing, and rename it appropriately.
+ return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
}
SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
@@ -3467,7 +3484,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
if (Subtarget.hasP8Vector())
RC = &PPC::VSSRCRegClass;
else if (Subtarget.hasSPE())
- RC = &PPC::SPE4RCRegClass;
+ RC = &PPC::GPRCRegClass;
else
RC = &PPC::F4RCRegClass;
break;
@@ -4516,7 +4533,7 @@ callsShareTOCBase(const Function *Caller, SDValue Callee,
static bool
needStackSlotPassParameters(const PPCSubtarget &Subtarget,
const SmallVectorImpl<ISD::OutputArg> &Outs) {
- assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
+ assert(Subtarget.is64BitELFABI());
const unsigned PtrByteSize = 8;
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
@@ -4926,7 +4943,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
- bool isELFv2ABI = Subtarget.isELFv2ABI();
+ bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI();
bool isAIXABI = Subtarget.isAIXABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
@@ -4997,7 +5014,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
- if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
+ if (is64BitELFv1ABI) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
// entry point, but to the function descriptor (the function entry point
// address is part of the function descriptor though).
@@ -5085,7 +5102,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
CallOpc = PPCISD::BCTRL;
Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
- if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
+ if (is64BitELFv1ABI && !hasNest)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
@@ -6730,8 +6747,12 @@ SDValue PPCTargetLowering::LowerCall_AIX(
const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
: array_lengthof(GPR_32);
+ const unsigned NumFPRs = array_lengthof(FPR);
+ assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing "
+ "on AIX");
+
const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
- unsigned GPR_idx = 0;
+ unsigned GPR_idx = 0, FPR_idx = 0;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
@@ -6768,6 +6789,20 @@ SDValue PPCTargetLowering::LowerCall_AIX(
break;
case MVT::f32:
case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
+ // GPRs.
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64)
+ ++GPR_idx;
+ } else
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ break;
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
@@ -8152,6 +8187,18 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
Op0.getOperand(1));
}
+static const SDValue *getNormalLoadInput(const SDValue &Op) {
+ const SDValue *InputLoad = &Op;
+ if (InputLoad->getOpcode() == ISD::BITCAST)
+ InputLoad = &InputLoad->getOperand(0);
+ if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ InputLoad = &InputLoad->getOperand(0);
+ if (InputLoad->getOpcode() != ISD::LOAD)
+ return nullptr;
+ LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
+ return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -8274,6 +8321,34 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
SplatBitSize > 32) {
+
+ const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
+ // Handle load-and-splat patterns as we have instructions that will do this
+ // in one go.
+ if (InputLoad && DAG.isSplatValue(Op, true)) {
+ LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
+
+ // We have handling for 4 and 8 byte elements.
+ unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
+
+ // Checking for a single use of this load, we have to check for vector
+ // width (128 bits) / ElementSize uses (since each operand of the
+ // BUILD_VECTOR is a separate use of the value.
+ if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
+ ((Subtarget.hasVSX() && ElementSize == 64) ||
+ (Subtarget.hasP9Vector() && ElementSize == 32))) {
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ DAG.getValueType(Op.getValueType()) // VT
+ };
+ return
+ DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
+ DAG.getVTList(Op.getValueType(), MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+ }
+ }
+
// BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
// lowered to VSX instructions under certain conditions.
// Without VSX, there is no pattern more efficient than expanding the node.
@@ -8759,6 +8834,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned ShiftElts, InsertAtByte;
bool Swap = false;
+
+ // If this is a load-and-splat, we can do that with a single instruction
+ // in some cases. However if the load has multiple uses, we don't want to
+ // combine it because that will just produce multiple loads.
+ const SDValue *InputLoad = getNormalLoadInput(V1);
+ if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
+ (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
+ InputLoad->hasOneUse()) {
+ bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
+ int SplatIdx =
+ PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
+
+ LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
+ // For 4-byte load-and-splat, we need Power9.
+ if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
+ uint64_t Offset = 0;
+ if (IsFourByte)
+ Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
+ else
+ Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
+ SDValue BasePtr = LD->getBasePtr();
+ if (Offset != 0)
+ BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ BasePtr, DAG.getIntPtrConstant(Offset, dl));
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ BasePtr, // BasePtr
+ DAG.getValueType(Op.getValueType()) // VT
+ };
+ SDVTList VTL =
+ DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
+ SDValue LdSplt =
+ DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+ if (LdSplt.getValueType() != SVOp->getValueType(0))
+ LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
+ return LdSplt;
+ }
+ }
if (Subtarget.hasP9Vector() &&
PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
isLittleEndian)) {
@@ -8835,7 +8949,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
- int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
+ int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
@@ -9880,6 +9994,30 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
switch (Op0.getOpcode()) {
default:
return SDValue();
+ case ISD::EXTRACT_SUBVECTOR: {
+ assert(Op0.getNumOperands() == 2 &&
+ isa<ConstantSDNode>(Op0->getOperand(1)) &&
+ "Node should have 2 operands with second one being a constant!");
+
+ if (Op0.getOperand(0).getValueType() != MVT::v4f32)
+ return SDValue();
+
+ // Custom lower is only done for high or low doubleword.
+ int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
+ if (Idx % 2 != 0)
+ return SDValue();
+
+ // Since input is v4f32, at this point Idx is either 0 or 2.
+ // Shift to get the doubleword position we want.
+ int DWord = Idx >> 1;
+
+ // High and low word positions are different on little endian.
+ if (Subtarget.isLittleEndian())
+ DWord ^= 0x1;
+
+ return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
+ Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
+ }
case ISD::FADD:
case ISD::FMUL:
case ISD::FSUB: {
@@ -9891,26 +10029,25 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
// Generate new load node.
LoadSDNode *LD = cast<LoadSDNode>(LdOp);
- SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
- NewLoad[i] =
- DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
- DAG.getVTList(MVT::v4f32, MVT::Other),
- LoadOps, LD->getMemoryVT(),
- LD->getMemOperand());
- }
- SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32,
- NewLoad[0], NewLoad[1],
- Op0.getNode()->getFlags());
- return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp);
+ SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
+ NewLoad[i] = DAG.getMemIntrinsicNode(
+ PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
+ LD->getMemoryVT(), LD->getMemOperand());
+ }
+ SDValue NewOp =
+ DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
+ NewLoad[1], Op0.getNode()->getFlags());
+ return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
+ DAG.getConstant(0, dl, MVT::i32));
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op0);
- SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
- SDValue NewLd =
- DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
- DAG.getVTList(MVT::v4f32, MVT::Other),
- LoadOps, LD->getMemoryVT(), LD->getMemOperand());
- return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd);
+ SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
+ SDValue NewLd = DAG.getMemIntrinsicNode(
+ PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
+ LD->getMemoryVT(), LD->getMemOperand());
+ return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
+ DAG.getConstant(0, dl, MVT::i32));
}
}
llvm_unreachable("ERROR:Should return for all cases within swtich.");
@@ -10048,9 +10185,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::TRUNCATE: {
EVT TrgVT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
if (TrgVT.isVector() &&
isOperationCustom(N->getOpcode(), TrgVT) &&
- N->getOperand(0).getValueType().getSizeInBits() <= 128)
+ OpVT.getSizeInBits() <= 128 &&
+ isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
return;
}
@@ -10192,7 +10331,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
if (CmpOpcode) {
// Signed comparisons of byte or halfword values must be sign-extended.
if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
- unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
ExtReg).addReg(dest);
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
@@ -10243,10 +10382,10 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = ++BB->getIterator();
- unsigned dest = MI.getOperand(0).getReg();
- unsigned ptrA = MI.getOperand(1).getReg();
- unsigned ptrB = MI.getOperand(2).getReg();
- unsigned incr = MI.getOperand(3).getReg();
+ Register dest = MI.getOperand(0).getReg();
+ Register ptrA = MI.getOperand(1).getReg();
+ Register ptrB = MI.getOperand(2).getReg();
+ Register incr = MI.getOperand(3).getReg();
DebugLoc dl = MI.getDebugLoc();
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -10364,7 +10503,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
if (CmpOpcode) {
// For unsigned comparisons, we can directly compare the shifted values.
// For signed comparisons we shift and sign extend.
- unsigned SReg = RegInfo.createVirtualRegister(GPRC);
+ Register SReg = RegInfo.createVirtualRegister(GPRC);
BuildMI(BB, dl, TII->get(PPC::AND), SReg)
.addReg(TmpDestReg)
.addReg(MaskReg);
@@ -10375,7 +10514,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
.addReg(SReg)
.addReg(ShiftReg);
- unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC);
+ Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
.addReg(ValueReg);
ValueReg = ValueSReg;
@@ -10426,11 +10565,11 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = ++MBB->getIterator();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
- unsigned mainDstReg = MRI.createVirtualRegister(RC);
- unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+ Register mainDstReg = MRI.createVirtualRegister(RC);
+ Register restoreDstReg = MRI.createVirtualRegister(RC);
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
@@ -10482,10 +10621,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
// Prepare IP either in reg.
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
- unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
- unsigned BufReg = MI.getOperand(1).getReg();
+ Register LabelReg = MRI.createVirtualRegister(PtrRC);
+ Register BufReg = MI.getOperand(1).getReg();
- if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
+ if (Subtarget.is64BitELFABI()) {
setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
@@ -10570,7 +10709,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
const TargetRegisterClass *RC =
(PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
- unsigned Tmp = MRI.createVirtualRegister(RC);
+ Register Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
@@ -10587,7 +10726,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
const int64_t TOCOffset = 3 * PVT.getStoreSize();
const int64_t BPOffset = 4 * PVT.getStoreSize();
- unsigned BufReg = MI.getOperand(0).getReg();
+ Register BufReg = MI.getOperand(0).getReg();
// Reload FP (the jumped-to function may not have had a
// frame pointer, and if so, then its r31 will be restored
@@ -10662,7 +10801,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
if (MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
- if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
+ if (Subtarget.is64BitELFABI() &&
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
// Call lowering should have added an r2 operand to indicate a dependence
// on the TOC base pointer value. It can't however, because there is no
@@ -10828,15 +10967,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB = readMBB;
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
- unsigned LoReg = MI.getOperand(0).getReg();
- unsigned HiReg = MI.getOperand(1).getReg();
+ Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ Register LoReg = MI.getOperand(0).getReg();
+ Register HiReg = MI.getOperand(1).getReg();
BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
- unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+ Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
.addReg(HiReg)
@@ -10978,11 +11117,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
StoreMnemonic = PPC::STDCX;
break;
}
- unsigned dest = MI.getOperand(0).getReg();
- unsigned ptrA = MI.getOperand(1).getReg();
- unsigned ptrB = MI.getOperand(2).getReg();
- unsigned oldval = MI.getOperand(3).getReg();
- unsigned newval = MI.getOperand(4).getReg();
+ Register dest = MI.getOperand(0).getReg();
+ Register ptrA = MI.getOperand(1).getReg();
+ Register ptrB = MI.getOperand(2).getReg();
+ Register oldval = MI.getOperand(3).getReg();
+ Register newval = MI.getOperand(4).getReg();
DebugLoc dl = MI.getDebugLoc();
MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -11057,11 +11196,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
bool isLittleEndian = Subtarget.isLittleEndian();
bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
- unsigned dest = MI.getOperand(0).getReg();
- unsigned ptrA = MI.getOperand(1).getReg();
- unsigned ptrB = MI.getOperand(2).getReg();
- unsigned oldval = MI.getOperand(3).getReg();
- unsigned newval = MI.getOperand(4).getReg();
+ Register dest = MI.getOperand(0).getReg();
+ Register ptrA = MI.getOperand(1).getReg();
+ Register ptrB = MI.getOperand(2).getReg();
+ Register oldval = MI.getOperand(3).getReg();
+ Register newval = MI.getOperand(4).getReg();
DebugLoc dl = MI.getDebugLoc();
MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -11238,13 +11377,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// This pseudo performs an FADD with rounding mode temporarily forced
// to round-to-zero. We emit this via custom inserter since the FPSCR
// is not modeled at the SelectionDAG level.
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Src1 = MI.getOperand(1).getReg();
- unsigned Src2 = MI.getOperand(2).getReg();
+ Register Dest = MI.getOperand(0).getReg();
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
DebugLoc dl = MI.getDebugLoc();
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+ Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
// Save FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
@@ -11270,7 +11409,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned Dest = RegInfo.createVirtualRegister(
+ Register Dest = RegInfo.createVirtualRegister(
Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
DebugLoc dl = MI.getDebugLoc();
@@ -11283,7 +11422,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
} else if (MI.getOpcode() == PPC::TCHECK_RET) {
DebugLoc Dl = MI.getDebugLoc();
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+ Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
MI.getOperand(0).getReg())
@@ -11297,7 +11436,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(PPC::CR0EQ);
} else if (MI.getOpcode() == PPC::SETRNDi) {
DebugLoc dl = MI.getDebugLoc();
- unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+ Register OldFPSCRReg = MI.getOperand(0).getReg();
// Save FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
@@ -11378,7 +11517,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
};
- unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+ Register OldFPSCRReg = MI.getOperand(0).getReg();
// Save FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
@@ -11393,12 +11532,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// mtfsf 255, NewFPSCRReg
MachineOperand SrcOp = MI.getOperand(1);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
- unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
- unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
// The first operand of INSERT_SUBREG should be a register which has
// subregisters, we only care about its RegClass, so we should use an
@@ -11409,14 +11548,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.add(SrcOp)
.addImm(1);
- unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
.addReg(OldFPSCRTmpReg)
.addReg(ExtSrcReg)
.addImm(0)
.addImm(62);
- unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+ Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
// The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
@@ -13113,6 +13252,61 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
return Val;
}
+SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
+ LSBaseSDNode *LSBase,
+ DAGCombinerInfo &DCI) const {
+ assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
+ "Not a reverse memop pattern!");
+
+ auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
+ auto Mask = SVN->getMask();
+ int i = 0;
+ auto I = Mask.rbegin();
+ auto E = Mask.rend();
+
+ for (; I != E; ++I) {
+ if (*I != i)
+ return false;
+ i++;
+ }
+ return true;
+ };
+
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = SVN->getValueType(0);
+
+ if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
+ return SDValue();
+
+ // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
+ // See comment in PPCVSXSwapRemoval.cpp.
+ // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
+ if (!Subtarget.hasP9Vector())
+ return SDValue();
+
+ if(!IsElementReverse(SVN))
+ return SDValue();
+
+ if (LSBase->getOpcode() == ISD::LOAD) {
+ SDLoc dl(SVN);
+ SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
+ return DAG.getMemIntrinsicNode(
+ PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
+ LSBase->getMemoryVT(), LSBase->getMemOperand());
+ }
+
+ if (LSBase->getOpcode() == ISD::STORE) {
+ SDLoc dl(LSBase);
+ SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
+ LSBase->getBasePtr()};
+ return DAG.getMemIntrinsicNode(
+ PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
+ LSBase->getMemoryVT(), LSBase->getMemOperand());
+ }
+
+ llvm_unreachable("Expected a load or store node here");
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -13159,6 +13353,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return combineFPToIntToFP(N, DCI);
+ case ISD::VECTOR_SHUFFLE:
+ if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
+ LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
+ return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
+ }
+ break;
case ISD::STORE: {
EVT Op1VT = N->getOperand(1).getValueType();
@@ -13170,6 +13370,13 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return Val;
}
+ if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
+ SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
+ if (Val)
+ return Val;
+ }
+
// Turn STORE (BSWAP) -> sthbrx/stwbrx.
if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
@@ -13903,7 +14110,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
}
-unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
switch (Subtarget.getDarwinDirective()) {
default: break;
case PPC::DIR_970:
@@ -13924,7 +14131,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
// Actual alignment of the loop will depend on the hotness check and other
// logic in alignBlocks.
if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
- return 5;
+ return Align(32);
}
const PPCInstrInfo *TII = Subtarget.getInstrInfo();
@@ -13940,7 +14147,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
}
if (LoopSize > 16 && LoopSize <= 32)
- return 5;
+ return Align(32);
break;
}
@@ -14063,7 +14270,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'f':
if (Subtarget.hasSPE()) {
if (VT == MVT::f32 || VT == MVT::i32)
- return std::make_pair(0U, &PPC::SPE4RCRegClass);
+ return std::make_pair(0U, &PPC::GPRCRegClass);
if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &PPC::SPERCRegClass);
} else {
@@ -14306,22 +14513,22 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
+Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const {
bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
+ bool IsDarwinABI = Subtarget.isDarwinABI();
if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
(!isPPC64 && VT != MVT::i32))
report_fatal_error("Invalid register global variable type");
bool is64Bit = isPPC64 && VT == MVT::i64;
- unsigned Reg = StringSwitch<unsigned>(RegName)
+ Register Reg = StringSwitch<Register>(RegName)
.Case("r1", is64Bit ? PPC::X1 : PPC::R1)
- .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
- .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
+ .Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2)
+ .Case("r13", (!isPPC64 && IsDarwinABI) ? Register() :
(is64Bit ? PPC::X13 : PPC::R13))
- .Default(0);
+ .Default(Register());
if (Reg)
return Reg;
@@ -14330,14 +14537,17 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
// 32-bit SVR4 ABI access everything as got-indirect.
- if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+ if (Subtarget.is32BitELFABI())
+ return true;
+
+ // AIX accesses everything indirectly through the TOC, which is similar to
+ // the GOT.
+ if (Subtarget.isAIXABI())
return true;
CodeModel::Model CModel = getTargetMachine().getCodeModel();
// If it is small or large code model, module locals are accessed
- // indirectly by loading their address from .toc/.got. The difference
- // is that for large code model we have ADDISTocHa + LDtocL and for
- // small code model we simply have LDtoc.
+ // indirectly by loading their address from .toc/.got.
if (CModel == CodeModel::Small || CModel == CodeModel::Large)
return true;
@@ -14345,14 +14555,8 @@ bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
return true;
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
- const GlobalValue *GV = G->getGlobal();
- unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
- // The NLP flag indicates that a global access has to use an
- // extra indirection.
- if (GVFlags & PPCII::MO_NLP_FLAG)
- return true;
- }
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
+ return Subtarget.isGVIndirectSymbol(G->getGlobal());
return false;
}
@@ -14417,7 +14621,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = -VT.getStoreSize()+1;
Info.size = 2*VT.getStoreSize()-1;
- Info.align = 1;
+ Info.align = Align::None();
Info.flags = MachineMemOperand::MOLoad;
return true;
}
@@ -14451,7 +14655,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.size = VT.getStoreSize();
- Info.align = 1;
+ Info.align = Align::None();
Info.flags = MachineMemOperand::MOLoad;
return true;
}
@@ -14503,7 +14707,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = -VT.getStoreSize()+1;
Info.size = 2*VT.getStoreSize()-1;
- Info.align = 1;
+ Info.align = Align::None();
Info.flags = MachineMemOperand::MOStore;
return true;
}
@@ -14536,7 +14740,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.size = VT.getStoreSize();
- Info.align = 1;
+ Info.align = Align::None();
Info.flags = MachineMemOperand::MOStore;
return true;
}
@@ -14786,7 +14990,7 @@ void PPCTargetLowering::insertCopiesSplitCSR(
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
@@ -15146,7 +15350,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
- if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
+ if (!Subtarget.is64BitELFABI())
return false;
// If not a tail call then no need to proceed.
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 97422c6eda36..62922ea2d4c4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -412,8 +412,9 @@ namespace llvm {
/// representation.
QBFLT,
- /// Custom extend v4f32 to v2f64.
- FP_EXTEND_LH,
+ /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
+ /// lower (IDX=1) half of v4f32 to v2f64.
+ FP_EXTEND_HALF,
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
@@ -456,15 +457,29 @@ namespace llvm {
/// an xxswapd.
LXVD2X,
+ /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
+ /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
+ /// the vector type to load vector in big-endian element order.
+ LOAD_VEC_BE,
+
/// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
/// v2f32 value into the lower half of a VSR register.
LD_VSX_LH,
+ /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
+ /// instructions such as LXVDSX, LXVWSX.
+ LD_SPLAT,
+
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
STXVD2X,
+ /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
+ /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
+ /// the vector type to store vector in big-endian element order.
+ STORE_VEC_BE,
+
/// Store scalar integers from VSR.
ST_VSR_SCAL_INT,
@@ -563,9 +578,11 @@ namespace llvm {
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE);
- /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
- /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
- unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
+ /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
+ /// appropriate for PPC mnemonics (which have a big endian bias - namely
+ /// elements are counted from the left of the vector register).
+ unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG);
/// get_VSPLTI_elt - If this is a build_vector of constants which can be
/// formed by using a vspltis[bhw] instruction of the specified element
@@ -716,8 +733,8 @@ namespace llvm {
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
@@ -725,7 +742,7 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
+ Align getPrefLoopAlignment(MachineLoop *ML) const override;
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
return true;
@@ -834,6 +851,18 @@ namespace llvm {
return true;
}
+ bool isDesirableToTransformToIntegerOp(unsigned Opc,
+ EVT VT) const override {
+ // Only handle float load/store pair because float(fpr) load/store
+ // instruction has more cycles than integer(gpr) load/store in PPC.
+ if (Opc != ISD::LOAD && Opc != ISD::STORE)
+ return false;
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return false;
+
+ return true;
+ }
+
// Returns true if the address of the global is stored in TOC entry.
bool isAccessedAsGotIndirect(SDValue N) const;
@@ -998,6 +1027,8 @@ namespace llvm {
SDValue &FPOpOut,
const SDLoc &dl) const;
+ SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const;
+
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
@@ -1155,6 +1186,8 @@ namespace llvm {
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
+ DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index d598567f8e4e..f16187149d36 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1099,8 +1099,8 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src),
// Support for medium and large code model.
let hasSideEffects = 0 in {
let isReMaterializable = 1 in {
-def ADDIStocHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
- "#ADDIStocHA", []>, isPPC64;
+def ADDIStocHA8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+ "#ADDIStocHA8", []>, isPPC64;
def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDItocL", []>, isPPC64;
}
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 8176c5120a83..fd3fc2af2327 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -215,21 +215,21 @@ def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG), SDLoc(N));
+ return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 1, *CurDAG), SDLoc(N));
}]>;
def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
}], VSPLTB_get_imm>;
def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG), SDLoc(N));
+ return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 2, *CurDAG), SDLoc(N));
}]>;
def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
}], VSPLTH_get_imm>;
def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG), SDLoc(N));
+ return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 4, *CurDAG), SDLoc(N));
}]>;
def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
@@ -331,7 +331,7 @@ class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
: VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
!strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
- [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
+ [(set Ty:$vD, (IntID Ty:$vA, timm:$ST, timm:$SIX))]>;
//===----------------------------------------------------------------------===//
// Instruction Definitions.
@@ -401,10 +401,10 @@ let isCodeGenOnly = 1 in {
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
"mfvscr $vD", IIC_LdStStore,
- [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
+ [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad,
- [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
+ [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src),
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index a48eb1690695..96b9c9a119c0 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -1209,20 +1209,13 @@ class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = XT{5};
}
-class XX3Form_Zero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+class XX3Form_SameOp<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let XA = XT;
let XB = XT;
}
-class XX3Form_SetZero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let XB = XT;
- let XA = XT;
-}
-
class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index a787bdd56b9d..6b10672965c9 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -90,7 +90,6 @@ enum SpillOpcodeKey {
SOK_QuadBitSpill,
SOK_SpillToVSR,
SOK_SPESpill,
- SOK_SPE4Spill,
SOK_LastOpcodeSpill // This must be last on the enum.
};
@@ -184,10 +183,10 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
- unsigned Reg = DefMO.getReg();
+ Register Reg = DefMO.getReg();
bool IsRegCR;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
const MachineRegisterInfo *MRI =
&DefMI.getParent()->getParent()->getRegInfo();
IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
@@ -330,11 +329,13 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::LIS8:
case PPC::QVGPCI:
case PPC::ADDIStocHA:
+ case PPC::ADDIStocHA8:
case PPC::ADDItocL:
case PPC::LOAD_STACK_GUARD:
case PPC::XXLXORz:
case PPC::XXLXORspz:
case PPC::XXLXORdpz:
+ case PPC::XXLEQVOnes:
case PPC::V_SET0B:
case PPC::V_SET0H:
case PPC::V_SET0:
@@ -448,7 +449,8 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return &MI;
}
-bool PPCInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
// For VSX A-Type FMA instructions, it is the first two operands that can be
// commuted, however, because the non-encoded tied input operand is listed
@@ -966,11 +968,11 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
getKillRegState(KillSrc);
return;
} else if (PPC::SPERCRegClass.contains(SrcReg) &&
- PPC::SPE4RCRegClass.contains(DestReg)) {
+ PPC::GPRCRegClass.contains(DestReg)) {
BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
getKillRegState(KillSrc);
return;
- } else if (PPC::SPE4RCRegClass.contains(SrcReg) &&
+ } else if (PPC::GPRCRegClass.contains(SrcReg) &&
PPC::SPERCRegClass.contains(DestReg)) {
BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
getKillRegState(KillSrc);
@@ -1009,8 +1011,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::QVFMRb;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
- else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::OR;
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
Opc = PPC::EVOR;
else
@@ -1043,8 +1043,6 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float4Spill;
} else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_SPESpill;
- } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
@@ -1083,8 +1081,6 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float4Spill;
} else if (PPC::SPERCRegClass.contains(Reg)) {
OpcodeIndex = SOK_SPESpill;
- } else if (PPC::SPE4RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1133,8 +1129,6 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float4Spill;
} else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_SPESpill;
- } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
@@ -1173,8 +1167,6 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float4Spill;
} else if (PPC::SPERCRegClass.contains(Reg)) {
OpcodeIndex = SOK_SPESpill;
- } else if (PPC::SPE4RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1648,7 +1640,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
return false;
int OpC = CmpInstr.getOpcode();
- unsigned CRReg = CmpInstr.getOperand(0).getReg();
+ Register CRReg = CmpInstr.getOperand(0).getReg();
// FP record forms set CR1 based on the exception status bits, not a
// comparison with zero.
@@ -1671,7 +1663,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// Look through copies unless that gets us to a physical register.
unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
- if (TargetRegisterInfo::isVirtualRegister(ActualSrc))
+ if (Register::isVirtualRegister(ActualSrc))
SrcReg = ActualSrc;
// Get the unique definition of SrcReg.
@@ -1937,7 +1929,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// Rotates are expensive instructions. If we're emitting a record-form
// rotate that can just be an andi/andis, we should just emit that.
if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
- unsigned GPRRes = MI->getOperand(0).getReg();
+ Register GPRRes = MI->getOperand(0).getReg();
int64_t SH = MI->getOperand(2).getImm();
int64_t MB = MI->getOperand(3).getImm();
int64_t ME = MI->getOperand(4).getImm();
@@ -2122,7 +2114,7 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
llvm_unreachable("Unknown Operation!");
}
- unsigned TargetReg = MI.getOperand(0).getReg();
+ Register TargetReg = MI.getOperand(0).getReg();
unsigned Opcode;
if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
(TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
@@ -2184,7 +2176,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return expandVSXMemPseudo(MI);
}
case PPC::SPILLTOVSR_LD: {
- unsigned TargetReg = MI.getOperand(0).getReg();
+ Register TargetReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(TargetReg)) {
MI.setDesc(get(PPC::DFLOADf64));
return expandPostRAPseudo(MI);
@@ -2194,7 +2186,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case PPC::SPILLTOVSR_ST: {
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(SrcReg)) {
NumStoreSPILLVSRRCAsVec++;
MI.setDesc(get(PPC::DFSTOREf64));
@@ -2206,7 +2198,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case PPC::SPILLTOVSR_LDX: {
- unsigned TargetReg = MI.getOperand(0).getReg();
+ Register TargetReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(TargetReg))
MI.setDesc(get(PPC::LXSDX));
else
@@ -2214,7 +2206,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case PPC::SPILLTOVSR_STX: {
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(SrcReg)) {
NumStoreSPILLVSRRCAsVec++;
MI.setDesc(get(PPC::STXSDX));
@@ -2279,10 +2271,10 @@ void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI,
int64_t Imm) const {
assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
// Replace the REG with the Immediate.
- unsigned InUseReg = MI.getOperand(OpNo).getReg();
+ Register InUseReg = MI.getOperand(OpNo).getReg();
MI.getOperand(OpNo).ChangeToImmediate(Imm);
- if (empty(MI.implicit_operands()))
+ if (MI.implicit_operands().empty())
return;
// We need to make sure that the MI didn't have any implicit use
@@ -2328,6 +2320,23 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
.addImm(LII.Imm);
}
+MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI,
+ bool &SeenIntermediateUse) const {
+ assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
+ "Should be called after register allocation.");
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
+ It++;
+ SeenIntermediateUse = false;
+ for (; It != E; ++It) {
+ if (It->modifiesRegister(Reg, TRI))
+ return &*It;
+ if (It->readsRegister(Reg, TRI))
+ SeenIntermediateUse = true;
+ }
+ return nullptr;
+}
+
MachineInstr *PPCInstrInfo::getForwardingDefMI(
MachineInstr &MI,
unsigned &OpNoForForwarding,
@@ -2342,11 +2351,11 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
if (!MI.getOperand(i).isReg())
continue;
- unsigned Reg = MI.getOperand(i).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MI.getOperand(i).getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI);
- if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
+ if (Register::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
OpNoForForwarding = i;
@@ -2370,7 +2379,10 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
- if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm)
+ bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
+ ? isVFRegister(MI.getOperand(0).getReg())
+ : false;
+ if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
return nullptr;
// Don't convert or %X, %Y, %Y since that's just a register move.
@@ -2381,29 +2393,24 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
MachineOperand &MO = MI.getOperand(i);
SeenIntermediateUse = false;
if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
- MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
- It++;
- unsigned Reg = MI.getOperand(i).getReg();
-
- // Is this register defined by some form of add-immediate (including
- // load-immediate) within this basic block?
- for ( ; It != E; ++It) {
- if (It->modifiesRegister(Reg, &getRegisterInfo())) {
- switch (It->getOpcode()) {
- default: break;
- case PPC::LI:
- case PPC::LI8:
- case PPC::ADDItocL:
- case PPC::ADDI:
- case PPC::ADDI8:
- OpNoForForwarding = i;
- return &*It;
- }
+ Register Reg = MI.getOperand(i).getReg();
+ // If we see another use of this reg between the def and the MI,
+ // we want to flat it so the def isn't deleted.
+ MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
+ if (DefMI) {
+ // Is this register defined by some form of add-immediate (including
+ // load-immediate) within this basic block?
+ switch (DefMI->getOpcode()) {
+ default:
break;
- } else if (It->readsRegister(Reg, &getRegisterInfo()))
- // If we see another use of this reg between the def and the MI,
- // we want to flat it so the def isn't deleted.
- SeenIntermediateUse = true;
+ case PPC::LI:
+ case PPC::LI8:
+ case PPC::ADDItocL:
+ case PPC::ADDI:
+ case PPC::ADDI8:
+ OpNoForForwarding = i;
+ return DefMI;
+ }
}
}
}
@@ -2417,7 +2424,7 @@ const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {
{PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR,
PPC::SPILL_CRBIT, PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX,
PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb,
- PPC::SPILLTOVSR_ST, PPC::EVSTDD, PPC::SPESTW},
+ PPC::SPILLTOVSR_ST, PPC::EVSTDD},
// Power 9
{PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR,
PPC::SPILL_CRBIT, PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32,
@@ -2433,7 +2440,7 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
{PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX,
PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb,
- PPC::SPILLTOVSR_LD, PPC::EVLDD, PPC::SPELWZ},
+ PPC::SPILLTOVSR_LD, PPC::EVLDD},
// Power 9
{PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, PPC::DFLOADf32,
@@ -2538,12 +2545,15 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
"The forwarding operand needs to be valid at this point");
bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
- unsigned ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg();
+ Register ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg();
if (KilledDef && KillFwdDefMI)
*KilledDef = DefMI;
ImmInstrInfo III;
- bool HasImmForm = instrHasImmForm(MI, III, PostRA);
+ bool IsVFReg = MI.getOperand(0).isReg()
+ ? isVFRegister(MI.getOperand(0).getReg())
+ : false;
+ bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
// If this is a reg+reg instruction that has a reg+imm form,
// and one of the operands is produced by an add-immediate,
// try to convert it.
@@ -2591,7 +2601,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
// If a compare-immediate is fed by an immediate and is itself an input of
// an ISEL (the most common case) into a COPY of the correct register.
bool Changed = false;
- unsigned DefReg = MI.getOperand(0).getReg();
+ Register DefReg = MI.getOperand(0).getReg();
int64_t Comparand = MI.getOperand(2).getImm();
int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ?
(Comparand | 0xFFFFFFFFFFFF0000) : Comparand;
@@ -2601,8 +2611,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
continue;
unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
- unsigned TrueReg = CompareUseMI.getOperand(1).getReg();
- unsigned FalseReg = CompareUseMI.getOperand(2).getReg();
+ Register TrueReg = CompareUseMI.getOperand(1).getReg();
+ Register FalseReg = CompareUseMI.getOperand(2).getReg();
unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg,
FalseReg, CRSubReg);
if (RegToCopy == PPC::NoRegister)
@@ -2777,9 +2787,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
-bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
+bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
ImmInstrInfo &III, bool PostRA) const {
- unsigned Opc = MI.getOpcode();
// The vast majority of the instructions would need their operand 2 replaced
// with an immediate when switching to the reg+imm form. A marked exception
// are the update form loads/stores for which a constant operand 2 would need
@@ -3111,7 +3120,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::LXSSPX:
if (PostRA) {
- if (isVFRegister(MI.getOperand(0).getReg()))
+ if (IsVFReg)
III.ImmOpcode = PPC::LXSSP;
else {
III.ImmOpcode = PPC::LFS;
@@ -3125,7 +3134,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::LXSDX:
if (PostRA) {
- if (isVFRegister(MI.getOperand(0).getReg()))
+ if (IsVFReg)
III.ImmOpcode = PPC::LXSD;
else {
III.ImmOpcode = PPC::LFD;
@@ -3143,7 +3152,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::STXSSPX:
if (PostRA) {
- if (isVFRegister(MI.getOperand(0).getReg()))
+ if (IsVFReg)
III.ImmOpcode = PPC::STXSSP;
else {
III.ImmOpcode = PPC::STFS;
@@ -3157,7 +3166,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::STXSDX:
if (PostRA) {
- if (isVFRegister(MI.getOperand(0).getReg()))
+ if (IsVFReg)
III.ImmOpcode = PPC::STXSD;
else {
III.ImmOpcode = PPC::STFD;
@@ -3287,7 +3296,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding(
if (MRI.isSSA())
return false;
- unsigned Reg = RegMO.getReg();
+ Register Reg = RegMO.getReg();
// Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
MachineBasicBlock::const_reverse_iterator It = MI;
@@ -3511,8 +3520,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
III.ZeroIsSpecialNew + 1;
- unsigned OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
- unsigned NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
+ Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
+ Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
// If R0 is in the operand where zero is special for the new instruction,
// it is unsafe to transform if the constant operand isn't that operand.
if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
@@ -3563,16 +3572,20 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
} else {
// The 32 bit and 64 bit instructions are quite different.
if (SpecialShift32) {
- // Left shifts use (N, 0, 31-N), right shifts use (32-N, N, 31).
- uint64_t SH = RightShift ? 32 - ShAmt : ShAmt;
+ // Left shifts use (N, 0, 31-N).
+ // Right shifts use (32-N, N, 31) if 0 < N < 32.
+ // use (0, 0, 31) if N == 0.
+ uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
uint64_t MB = RightShift ? ShAmt : 0;
uint64_t ME = RightShift ? 31 : 31 - ShAmt;
replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
.addImm(ME);
} else {
- // Left shifts use (N, 63-N), right shifts use (64-N, N).
- uint64_t SH = RightShift ? 64 - ShAmt : ShAmt;
+ // Left shifts use (N, 63-N).
+ // Right shifts use (64-N, N) if 0 < N < 64.
+ // use (0, 0) if N == 0.
+ uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
@@ -3601,8 +3614,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
if (III.ZeroIsSpecialNew) {
// If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
// need to fix up register class.
- unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
- if (TargetRegisterInfo::isVirtualRegister(RegToModify)) {
+ Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
+ if (Register::isVirtualRegister(RegToModify)) {
const TargetRegisterClass *NewRC =
MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
&PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
@@ -3747,7 +3760,7 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
return false;
unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
unsigned StackOffset = MI.getOperand(1).getImm();
- unsigned StackReg = MI.getOperand(2).getReg();
+ Register StackReg = MI.getOperand(2).getReg();
if (StackReg == PPC::X1 && StackOffset == TOCSaveOffset)
return true;
@@ -3772,7 +3785,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
switch (MI.getOpcode()) {
case PPC::COPY: {
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
// In both ELFv1 and v2 ABI, method parameters and the return value
// are sign- or zero-extended.
@@ -3781,7 +3794,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
// We check the ZExt/SExt flags for a method parameter.
if (MI.getParent()->getBasicBlock() ==
&MF->getFunction().getEntryBlock()) {
- unsigned VReg = MI.getOperand(0).getReg();
+ Register VReg = MI.getOperand(0).getReg();
if (MF->getRegInfo().isLiveIn(VReg))
return SignExt ? FuncInfo->isLiveInSExt(VReg) :
FuncInfo->isLiveInZExt(VReg);
@@ -3818,7 +3831,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
}
// If this is a copy from another register, we recursively check source.
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ if (!Register::isVirtualRegister(SrcReg))
return false;
const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
if (SrcMI != NULL)
@@ -3841,8 +3854,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
case PPC::XORIS8: {
// logical operation with 16-bit immediate does not change the upper bits.
// So, we track the operand register as we do for register copy.
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return false;
const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
if (SrcMI != NULL)
@@ -3870,8 +3883,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
for (unsigned I = 1; I != E; I += D) {
if (MI.getOperand(I).isReg()) {
- unsigned SrcReg = MI.getOperand(I).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(I).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return false;
const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1))
@@ -3893,12 +3906,12 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
assert(MI.getOperand(1).isReg() && MI.getOperand(2).isReg());
- unsigned SrcReg1 = MI.getOperand(1).getReg();
- unsigned SrcReg2 = MI.getOperand(2).getReg();
+ Register SrcReg1 = MI.getOperand(1).getReg();
+ Register SrcReg2 = MI.getOperand(2).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg1) ||
- !TargetRegisterInfo::isVirtualRegister(SrcReg2))
- return false;
+ if (!Register::isVirtualRegister(SrcReg1) ||
+ !Register::isVirtualRegister(SrcReg2))
+ return false;
const MachineInstr *MISrc1 = MRI->getVRegDef(SrcReg1);
const MachineInstr *MISrc2 = MRI->getVRegDef(SrcReg2);
@@ -3923,21 +3936,99 @@ bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
}
-bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
- MachineInstr *&CmpInst) const {
- MachineBasicBlock *LoopEnd = L.getBottomBlock();
- MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
- // We really "analyze" only CTR loops right now.
- if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) {
- IndVarInst = nullptr;
- CmpInst = &*I;
- return false;
+namespace {
+class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
+ MachineInstr *Loop, *EndLoop, *LoopCount;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ int64_t TripCount;
+
+public:
+ PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
+ MachineInstr *LoopCount)
+ : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
+ MF(Loop->getParent()->getParent()),
+ TII(MF->getSubtarget().getInstrInfo()) {
+ // Inspect the Loop instruction up-front, as it may be deleted when we call
+ // createTripCountGreaterCondition.
+ if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
+ TripCount = LoopCount->getOperand(1).getImm();
+ else
+ TripCount = -1;
}
- return true;
+
+ bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
+ // Only ignore the terminator.
+ return MI == EndLoop;
+ }
+
+ Optional<bool>
+ createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond) override {
+ if (TripCount == -1) {
+ // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
+ // so we don't need to generate any thing here.
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(
+ MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
+ true));
+ return {};
+ }
+
+ return TripCount > TC;
+ }
+
+ void setPreheader(MachineBasicBlock *NewPreheader) override {
+ // Do nothing. We want the LOOP setup instruction to stay in the *old*
+ // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
+ }
+
+ void adjustTripCount(int TripCountAdjust) override {
+ // If the loop trip count is a compile-time value, then just change the
+ // value.
+ if (LoopCount->getOpcode() == PPC::LI8 ||
+ LoopCount->getOpcode() == PPC::LI) {
+ int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
+ LoopCount->getOperand(1).setImm(TripCount);
+ return;
+ }
+
+ // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
+ // so we don't need to generate any thing here.
+ }
+
+ void disposed() override {
+ Loop->eraseFromParent();
+ // Ensure the loop setup instruction is deleted too.
+ LoopCount->eraseFromParent();
+ }
+};
+} // namespace
+
+std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
+ // We really "analyze" only hardware loops right now.
+ MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
+ MachineBasicBlock *Preheader = *LoopBB->pred_begin();
+ if (Preheader == LoopBB)
+ Preheader = *std::next(LoopBB->pred_begin());
+ MachineFunction *MF = Preheader->getParent();
+
+ if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
+ SmallPtrSet<MachineBasicBlock *, 8> Visited;
+ if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
+ Register LoopCountReg = LoopInst->getOperand(0).getReg();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
+ return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
+ }
+ }
+ return nullptr;
}
-MachineInstr *
-PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const {
+MachineInstr *PPCInstrInfo::findLoopInstr(
+ MachineBasicBlock &PreHeader,
+ SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
@@ -3948,50 +4039,6 @@ PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const {
return nullptr;
}
-unsigned PPCInstrInfo::reduceLoopCount(
- MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
- MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
- SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
- unsigned MaxIter) const {
- // We expect a hardware loop currently. This means that IndVar is set
- // to null, and the compare is the ENDLOOP instruction.
- assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop");
- MachineFunction *MF = MBB.getParent();
- DebugLoc DL = Cmp.getDebugLoc();
- MachineInstr *Loop = findLoopInstr(PreHeader);
- if (!Loop)
- return 0;
- unsigned LoopCountReg = Loop->getOperand(0).getReg();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
-
- if (!LoopCount)
- return 0;
- // If the loop trip count is a compile-time value, then just change the
- // value.
- if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) {
- int64_t Offset = LoopCount->getOperand(1).getImm();
- if (Offset <= 1) {
- LoopCount->eraseFromParent();
- Loop->eraseFromParent();
- return 0;
- }
- LoopCount->getOperand(1).setImm(Offset - 1);
- return Offset - 1;
- }
-
- // The loop trip count is a run-time value.
- // We need to subtract one from the trip count,
- // and insert branch later to check if we're done with the loop.
-
- // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
- // so we don't need to generate any thing here.
- Cond.push_back(MachineOperand::CreateImm(0));
- Cond.push_back(MachineOperand::CreateReg(
- Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true));
- return LoopCountReg;
-}
-
// Return true if get the base operand, byte offset of an instruction and the
// memory width. Width is the size of memory that is being loaded/stored.
bool PPCInstrInfo::getMemOperandWithOffsetWidth(
@@ -4018,8 +4065,7 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth(
}
bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
- const MachineInstr &MIa, const MachineInstr &MIb,
- AliasAnalysis * /*AA*/) const {
+ const MachineInstr &MIa, const MachineInstr &MIb) const {
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 70fb757e8f1e..19ab30cb0908 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -248,11 +248,11 @@ public:
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const override;
+ AAResults *AA) const override;
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
void insertNoop(MachineBasicBlock &MBB,
@@ -370,8 +370,7 @@ public:
/// otherwise
bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA = nullptr) const override;
+ const MachineInstr &MIb) const override;
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
@@ -439,9 +438,14 @@ public:
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
int64_t Imm) const;
- bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III,
+ bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III,
bool PostRA) const;
+ // In PostRA phase, try to find instruction defines \p Reg before \p MI.
+ // \p SeenIntermediate is set to true if uses between DefMI and \p MI exist.
+ MachineInstr *getDefMIPostRA(unsigned Reg, MachineInstr &MI,
+ bool &SeenIntermediateUse) const;
+
/// getRegNumForOperand - some operands use different numbering schemes
/// for the same registers. For example, a VSX instruction may have any of
/// vs0-vs63 allocated whereas an Altivec instruction could only have
@@ -481,26 +485,14 @@ public:
/// On PPC, we have two instructions used to set-up the hardware loop
/// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8)
/// instructions to indicate the end of a loop.
- MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const;
-
- /// Analyze the loop code to find the loop induction variable and compare used
- /// to compute the number of iterations. Currently, we analyze loop that are
- /// controlled using hardware loops. In this case, the induction variable
- /// instruction is null. For all other cases, this function returns true,
- /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will
- /// return new values when we can analyze the readonly loop \p L, otherwise,
- /// nothing got changed
- bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
- MachineInstr *&CmpInst) const override;
- /// Generate code to reduce the loop iteration by one and check if the loop
- /// is finished. Return the value/register of the new loop count. We need
- /// this function when peeling off one or more iterations of a loop. This
- /// function assumes the last iteration is peeled first.
- unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
- MachineInstr *IndVar, MachineInstr &Cmp,
- SmallVectorImpl<MachineOperand> &Cond,
- SmallVectorImpl<MachineInstr *> &PrevInsts,
- unsigned Iter, unsigned MaxIter) const override;
+ MachineInstr *
+ findLoopInstr(MachineBasicBlock &PreHeader,
+ SmallPtrSet<MachineBasicBlock *, 8> &Visited) const;
+
+ /// Analyze loop L, which must be a single-basic-block loop, and if the
+ /// conditions can be understood enough produce a PipelinerLoopInfo object.
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+ analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
};
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index c313337047f0..24183277519b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -386,7 +386,9 @@ def immZExt16 : PatLeaf<(imm), [{
// field. Used by instructions like 'ori'.
return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
}], LO16>;
-def immAnyExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm) || isUInt<8>(Imm); }]>;
+def immNonAllOneAnyExt8 : ImmLeaf<i32, [{
+ return (isInt<8>(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF));
+}]>;
def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>;
// imm16Shifted* - These match immediates where the low 16-bits are zero. There
@@ -577,7 +579,7 @@ def sperc : RegisterOperand<SPERC> {
def PPCRegSPE4RCAsmOperand : AsmOperandClass {
let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber";
}
-def spe4rc : RegisterOperand<SPE4RC> {
+def spe4rc : RegisterOperand<GPRC> {
let ParserMatchClass = PPCRegSPE4RCAsmOperand;
}
@@ -3161,7 +3163,16 @@ def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s1
def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
"#LWZtoc",
[(set i32:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+def LWZtocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc_nor0:$reg),
+ "#LWZtocL",
+ [(set i32:$rD,
(PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp),
+ "#ADDIStocHA",
+ [(set i32:$rD,
+ (PPCtoc_entry i32:$reg, tglobaladdr:$disp))]>;
+
// Get Global (GOT) Base Register offset, from the word immediately preceding
// the function label.
def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
@@ -3177,21 +3188,21 @@ def : Pat<(srl i32:$rS, i32:$rB),
def : Pat<(shl i32:$rS, i32:$rB),
(SLW $rS, $rB)>;
-def : Pat<(zextloadi1 iaddr:$src),
+def : Pat<(i32 (zextloadi1 iaddr:$src)),
(LBZ iaddr:$src)>;
-def : Pat<(zextloadi1 xaddr:$src),
+def : Pat<(i32 (zextloadi1 xaddr:$src)),
(LBZX xaddr:$src)>;
-def : Pat<(extloadi1 iaddr:$src),
+def : Pat<(i32 (extloadi1 iaddr:$src)),
(LBZ iaddr:$src)>;
-def : Pat<(extloadi1 xaddr:$src),
+def : Pat<(i32 (extloadi1 xaddr:$src)),
(LBZX xaddr:$src)>;
-def : Pat<(extloadi8 iaddr:$src),
+def : Pat<(i32 (extloadi8 iaddr:$src)),
(LBZ iaddr:$src)>;
-def : Pat<(extloadi8 xaddr:$src),
+def : Pat<(i32 (extloadi8 xaddr:$src)),
(LBZX xaddr:$src)>;
-def : Pat<(extloadi16 iaddr:$src),
+def : Pat<(i32 (extloadi16 iaddr:$src)),
(LHZ iaddr:$src)>;
-def : Pat<(extloadi16 xaddr:$src),
+def : Pat<(i32 (extloadi16 xaddr:$src)),
(LHZX xaddr:$src)>;
let Predicates = [HasFPU] in {
def : Pat<(f64 (extloadf32 iaddr:$src)),
@@ -3564,23 +3575,6 @@ def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)),
(EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
(LO16 imm:$imm)), sub_eq)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)),
- (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)),
- (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)),
- (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)),
- (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)),
- (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)),
- (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
-
-defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)),
- (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
- (LO16 imm:$imm)), sub_eq)>;
-
def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)),
(EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)),
@@ -3592,17 +3586,6 @@ def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)),
def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)),
(EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)),
- (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)),
- (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)),
- (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)),
- (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)),
- (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
-
// SETCC for i64.
def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)),
(EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
@@ -3632,6 +3615,47 @@ def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)),
(EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
(LO16 imm:$imm)), sub_eq)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+
+// Instantiations of CRNotPat for i32.
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
+
+// Instantiations of CRNotPat for i64.
defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)),
(EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)),
@@ -3649,17 +3673,6 @@ defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)),
(EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
(LO16 imm:$imm)), sub_eq)>;
-def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)),
- (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)),
- (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)),
- (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)),
- (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)),
- (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
-
defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)),
(EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)),
@@ -3671,6 +3684,56 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)),
defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)),
(EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+let Predicates = [HasFPU] in {
+// Instantiations of CRNotPat for f32.
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+
+// Instantiations of CRNotPat for f64.
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+
+// Instantiations of CRNotPat for f128.
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>;
+}
+
// SETCC for f32.
let Predicates = [HasFPU] in {
def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)),
@@ -3688,21 +3751,6 @@ def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)),
def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)),
(EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
-
// SETCC for f64.
def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)),
(EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
@@ -3719,21 +3767,6 @@ def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)),
def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)),
(EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
-
// SETCC for f128.
def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)),
(EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
@@ -3750,21 +3783,6 @@ def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)),
def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)),
(EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>;
-
}
// This must be in this file because it relies on patterns defined in this file
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 07f38a61d098..2aad5860d87f 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -58,8 +58,12 @@ def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [
SDTCisVT<0, v4f32>, SDTCisPtrTy<1>
]>;
-def SDT_PPCfpextlh : SDTypeProfile<1, 1, [
- SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>
+def SDT_PPCfpexth : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>, SDTCisPtrTy<2>
+]>;
+
+def SDT_PPCldsplat : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
]>;
// Little-endian-specific nodes.
@@ -78,12 +82,21 @@ def SDTVecConv : SDTypeProfile<1, 2, [
def SDTVabsd : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>
]>;
-
+def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
+def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be,
+ [SDNPHasChain, SDNPMayStore]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
@@ -93,9 +106,11 @@ def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
-def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>;
+def PPCfpexth : SDNode<"PPCISD::FP_EXTEND_HALF", SDT_PPCfpexth, []>;
def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
@@ -855,14 +870,14 @@ let Uses = [RM] in {
let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
isReMaterializable = 1 in {
- def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
+ def XXLXORz : XX3Form_SameOp<60, 154, (outs vsrc:$XT), (ins),
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
[(set v4i32:$XT, (v4i32 immAllZerosV))]>;
- def XXLXORdpz : XX3Form_SetZero<60, 154,
+ def XXLXORdpz : XX3Form_SameOp<60, 154,
(outs vsfrc:$XT), (ins),
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
[(set f64:$XT, (fpimm0))]>;
- def XXLXORspz : XX3Form_SetZero<60, 154,
+ def XXLXORspz : XX3Form_SameOp<60, 154,
(outs vssrc:$XT), (ins),
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
[(set f32:$XT, (fpimm0))]>;
@@ -996,21 +1011,21 @@ def : Pat<(f64 (extractelt v2f64:$S, 1)),
(f64 (EXTRACT_SUBREG $S, sub_64))>;
}
-// Additional fnmsub patterns: -a*c + b == -(a*c - b)
-def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
- (XSNMSUBADP $B, $C, $A)>;
-def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
- (XSNMSUBADP $B, $C, $A)>;
+// Additional fnmsub patterns: -a*b + c == -(a*b - c)
+def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C),
+ (XSNMSUBADP $C, $A, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C),
+ (XSNMSUBADP $C, $A, $B)>;
-def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B),
- (XVNMSUBADP $B, $C, $A)>;
-def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B),
- (XVNMSUBADP $B, $C, $A)>;
+def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C),
+ (XVNMSUBADP $C, $A, $B)>;
+def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C),
+ (XVNMSUBADP $C, $A, $B)>;
-def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
- (XVNMSUBASP $B, $C, $A)>;
-def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
- (XVNMSUBASP $B, $C, $A)>;
+def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C),
+ (XVNMSUBASP $C, $A, $B)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C),
+ (XVNMSUBASP $C, $A, $B)>;
def : Pat<(v2f64 (bitconvert v4f32:$A)),
(COPY_TO_REGCLASS $A, VSRC)>;
@@ -1077,7 +1092,8 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
(v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
-def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>;
+def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>;
+def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>;
// Loads.
let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
@@ -1088,6 +1104,19 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
(STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
}
+
+// Load vector big endian order
+let Predicates = [IsLittleEndian, HasVSX] in {
+ def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+ def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+}
+
let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -1288,6 +1317,13 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
(XXLEQV $A, $B)>;
+ let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isReMaterializable = 1 in {
+ def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins),
+ "xxleqv $XT, $XT, $XT", IIC_VecGeneral,
+ [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>;
+ }
+
def XXLORC : XX3Form<60, 170,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlorc $XT, $XA, $XB", IIC_VecGeneral,
@@ -1476,6 +1512,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
AltVSXFMARel;
}
+ // Additional xsnmsubasp patterns: -a*b + c == -(a*b - c)
+ def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C),
+ (XSNMSUBASP $C, $A, $B)>;
+ def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C),
+ (XSNMSUBASP $C, $A, $B)>;
+
// Single Precision Conversions (FP <-> INT)
def XSCVSXDSP : XX2Form<60, 312,
(outs vssrc:$XT), (ins vsfrc:$XB),
@@ -1564,16 +1606,33 @@ let Predicates = [HasDirectMove] in {
def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
"mfvsrwz $rA, $XT", IIC_VecGeneral,
[(set i32:$rA, (PPCmfvsr f64:$XT))]>;
+ let isCodeGenOnly = 1 in
+ def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT),
+ "mfvsrwz $rA, $XT", IIC_VecGeneral,
+ []>;
def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
"mtvsrd $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsra i64:$rA))]>,
Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in
+ def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA),
+ "mtvsrd $XT, $rA", IIC_VecGeneral,
+ []>,
+ Requires<[In64BitMode]>;
def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
"mtvsrwa $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+ let isCodeGenOnly = 1 in
+ def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA),
+ "mtvsrwa $XT, $rA", IIC_VecGeneral,
+ []>;
def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
"mtvsrwz $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+ let isCodeGenOnly = 1 in
+ def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA),
+ "mtvsrwz $XT, $rA", IIC_VecGeneral,
+ []>;
} // HasDirectMove
let Predicates = [IsISA3_0, HasDirectMove] in {
@@ -1597,6 +1656,22 @@ def : InstAlias<"mfvrd $rA, $XT",
(MFVRD g8rc:$rA, vrrc:$XT), 0>;
def : InstAlias<"mffprd $rA, $src",
(MFVSRD g8rc:$rA, f8rc:$src)>;
+def : InstAlias<"mtvrd $XT, $rA",
+ (MTVRD vrrc:$XT, g8rc:$rA), 0>;
+def : InstAlias<"mtfprd $dst, $rA",
+ (MTVSRD f8rc:$dst, g8rc:$rA)>;
+def : InstAlias<"mfvrwz $rA, $XT",
+ (MFVRWZ gprc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprwz $rA, $src",
+ (MFVSRWZ gprc:$rA, f8rc:$src)>;
+def : InstAlias<"mtvrwa $XT, $rA",
+ (MTVRWA vrrc:$XT, gprc:$rA), 0>;
+def : InstAlias<"mtfprwa $dst, $rA",
+ (MTVSRWA f8rc:$dst, gprc:$rA)>;
+def : InstAlias<"mtvrwz $XT, $rA",
+ (MTVRWZ vrrc:$XT, gprc:$rA), 0>;
+def : InstAlias<"mtfprwz $dst, $rA",
+ (MTVSRWZ f8rc:$dst, gprc:$rA)>;
/* Direct moves of various widths from GPR's into VSR's. Each move lines
the value up into element 0 (both BE and LE). Namely, entities smaller than
@@ -2581,9 +2656,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(fneg (int_ppc_fmaf128_round_to_odd
f128:$vA, f128:$vB, (fneg f128:$vTi))))]>;
- // Additional fnmsub patterns: -a*c + b == -(a*c - b)
- def : Pat<(fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>;
- def : Pat<(fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>;
+ // Additional fnmsub patterns: -a*b + c == -(a*b - c)
+ def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>;
+ def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>;
//===--------------------------------------------------------------------===//
// Quad/Double-Precision Compare Instructions:
@@ -2799,12 +2874,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
"xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
[(set v4i32: $XT,
- (int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>;
+ (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>;
def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
"xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
[(set v2i64: $XT,
- (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>;
+ (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
//===--------------------------------------------------------------------===//
@@ -3024,6 +3099,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+
+ def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>;
+ def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst),
+ (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+
+ def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>;
+ def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst),
+ (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
} // IsLittleEndian, HasP9Vector
let Predicates = [IsBigEndian, HasP9Vector] in {
@@ -3059,7 +3144,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
- } // IsLittleEndian, HasP9Vector
+ } // IsBigEndian, HasP9Vector
// D-Form Load/Store
def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
@@ -3858,6 +3943,10 @@ let AddedComplexity = 400 in {
(XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
+ def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
+ (v2f64 (LXVDSX xoaddr:$A))>;
+ def : Pat<(v2i64 (PPCldsplat xoaddr:$A)),
+ (v2i64 (LXVDSX xoaddr:$A))>;
// Build vectors of floating point converted to i64.
def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
@@ -4063,27 +4152,32 @@ let AddedComplexity = 400 in {
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
}
+ let Predicates = [HasP8Vector] in {
+ def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))),
+ (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+ def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))),
+ (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+ def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))),
+ (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+ def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
+ (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+ }
+
let Predicates = [HasP9Vector] in {
// Endianness-neutral patterns for const splats with ISA 3.0 instructions.
def : Pat<(v4i32 (scalar_to_vector i32:$A)),
(v4i32 (MTVSRWS $A))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(v4i32 (MTVSRWS $A))>;
- def : Pat<(v16i8 (build_vector immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
- immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
- immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
- immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
- immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
- immAnyExt8:$A)),
+ def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
(v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
- def : Pat<(v16i8 immAllOnesV),
- (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
- def : Pat<(v8i16 immAllOnesV),
- (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
- def : Pat<(v4i32 immAllOnesV),
- (v4i32 (XXSPLTIB 255))>;
- def : Pat<(v2i64 immAllOnesV),
- (v2i64 (XXSPLTIB 255))>;
def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
(v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
@@ -4102,6 +4196,10 @@ let AddedComplexity = 400 in {
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;
+ def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
+ (v4f32 (LXVWSX xoaddr:$A))>;
+ def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
+ (v4i32 (LXVWSX xoaddr:$A))>;
}
let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 4d45d96d4479..d252cfbd26b1 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -63,8 +63,24 @@ static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
cl::desc("Potential PHI threshold for PPC preinc loop prep"));
STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form");
+STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
namespace {
+ struct BucketElement {
+ BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
+ BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
+
+ const SCEVConstant *Offset;
+ Instruction *Instr;
+ };
+
+ struct Bucket {
+ Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
+ Elements(1, BucketElement(I)) {}
+
+ const SCEV *BaseSCEV;
+ SmallVector<BucketElement, 16> Elements;
+ };
class PPCLoopPreIncPrep : public FunctionPass {
public:
@@ -85,21 +101,47 @@ namespace {
AU.addRequired<ScalarEvolutionWrapperPass>();
}
- bool alreadyPrepared(Loop *L, Instruction* MemI,
- const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV);
bool runOnFunction(Function &F) override;
- bool runOnLoop(Loop *L);
- void simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L);
-
private:
PPCTargetMachine *TM = nullptr;
+ const PPCSubtarget *ST;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
bool PreserveLCSSA;
+
+ bool runOnLoop(Loop *L);
+
+ /// Check if required PHI node is already exist in Loop \p L.
+ bool alreadyPrepared(Loop *L, Instruction* MemI,
+ const SCEV *BasePtrStartSCEV,
+ const SCEVConstant *BasePtrIncSCEV);
+
+ /// Collect condition matched(\p isValidCandidate() returns true)
+ /// candidates in Loop \p L.
+ SmallVector<Bucket, 16>
+ collectCandidates(Loop *L,
+ std::function<bool(const Instruction *, const Value *)>
+ isValidCandidate,
+ unsigned MaxCandidateNum);
+
+ /// Add a candidate to candidates \p Buckets.
+ void addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
+ SmallVector<Bucket, 16> &Buckets,
+ unsigned MaxCandidateNum);
+
+ /// Prepare all candidates in \p Buckets for update form.
+ bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets);
+
+ /// Prepare for one chain \p BucketChain, find the best base element and
+ /// update all other elements in \p BucketChain accordingly.
+ bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
+
+ /// Rewrite load/store instructions in \p BucketChain according to
+ /// preparation.
+ bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
+ SmallSet<BasicBlock *, 16> &BBChanged);
};
} // end anonymous namespace
@@ -111,30 +153,15 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+static const std::string PHINodeNameSuffix = ".phi";
+static const std::string CastNodeNameSuffix = ".cast";
+static const std::string GEPNodeIncNameSuffix = ".inc";
+static const std::string GEPNodeOffNameSuffix = ".off";
+
FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
return new PPCLoopPreIncPrep(TM);
}
-namespace {
-
- struct BucketElement {
- BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
- BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
-
- const SCEVConstant *Offset;
- Instruction *Instr;
- };
-
- struct Bucket {
- Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
- Elements(1, BucketElement(I)) {}
-
- const SCEV *BaseSCEV;
- SmallVector<BucketElement, 16> Elements;
- };
-
-} // end anonymous namespace
-
static bool IsPtrInBounds(Value *BasePtr) {
Value *StrippedBasePtr = BasePtr;
while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
@@ -145,6 +172,14 @@ static bool IsPtrInBounds(Value *BasePtr) {
return false;
}
+static std::string getInstrName(const Value *I, const std::string Suffix) {
+ assert(I && "Invalid paramater!");
+ if (I->hasName())
+ return (I->getName() + Suffix).str();
+ else
+ return "";
+}
+
static Value *GetPointerOperand(Value *MemI) {
if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
return LMemI->getPointerOperand();
@@ -167,6 +202,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+ ST = TM ? TM->getSubtargetImpl(F) : nullptr;
bool MadeChange = false;
@@ -177,10 +213,280 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
return MadeChange;
}
+void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
+ SmallVector<Bucket, 16> &Buckets,
+ unsigned MaxCandidateNum) {
+ assert((MemI && GetPointerOperand(MemI)) &&
+ "Candidate should be a memory instruction.");
+ assert(LSCEV && "Invalid SCEV for Ptr value.");
+ bool FoundBucket = false;
+ for (auto &B : Buckets) {
+ const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
+ if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
+ B.Elements.push_back(BucketElement(CDiff, MemI));
+ FoundBucket = true;
+ break;
+ }
+ }
+
+ if (!FoundBucket) {
+ if (Buckets.size() == MaxCandidateNum)
+ return;
+ Buckets.push_back(Bucket(LSCEV, MemI));
+ }
+}
+
+SmallVector<Bucket, 16> PPCLoopPreIncPrep::collectCandidates(
+ Loop *L,
+ std::function<bool(const Instruction *, const Value *)> isValidCandidate,
+ unsigned MaxCandidateNum) {
+ SmallVector<Bucket, 16> Buckets;
+ for (const auto &BB : L->blocks())
+ for (auto &J : *BB) {
+ Value *PtrValue;
+ Instruction *MemI;
+
+ if (LoadInst *LMemI = dyn_cast<LoadInst>(&J)) {
+ MemI = LMemI;
+ PtrValue = LMemI->getPointerOperand();
+ } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&J)) {
+ MemI = SMemI;
+ PtrValue = SMemI->getPointerOperand();
+ } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
+ if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
+ MemI = IMemI;
+ PtrValue = IMemI->getArgOperand(0);
+ } else continue;
+ } else continue;
+
+ unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
+ if (PtrAddrSpace)
+ continue;
+
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
+ const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LARSCEV || LARSCEV->getLoop() != L)
+ continue;
+
+ if (isValidCandidate(&J, PtrValue))
+ addOneCandidate(MemI, LSCEV, Buckets, MaxCandidateNum);
+ }
+ return Buckets;
+}
+
+// TODO: implement a more clever base choosing policy.
+// Currently we always choose an exist load/store offset. This maybe lead to
+// suboptimal code sequences. For example, for one DS chain with offsets
+// {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp
+// for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a
+// multipler of 4, it cannot be represented by sint16.
+bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
+ // We have a choice now of which instruction's memory operand we use as the
+ // base for the generated PHI. Always picking the first instruction in each
+ // bucket does not work well, specifically because that instruction might
+ // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
+ // the choice is somewhat arbitrary, because the backend will happily
+ // generate direct offsets from both the pre-incremented and
+ // post-incremented pointer values. Thus, we'll pick the first non-prefetch
+ // instruction in each bucket, and adjust the recurrence and other offsets
+ // accordingly.
+ for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
+ if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr))
+ if (II->getIntrinsicID() == Intrinsic::prefetch)
+ continue;
+
+ // If we'd otherwise pick the first element anyway, there's nothing to do.
+ if (j == 0)
+ break;
+
+ // If our chosen element has no offset from the base pointer, there's
+ // nothing to do.
+ if (!BucketChain.Elements[j].Offset ||
+ BucketChain.Elements[j].Offset->isZero())
+ break;
+
+ const SCEV *Offset = BucketChain.Elements[j].Offset;
+ BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
+ for (auto &E : BucketChain.Elements) {
+ if (E.Offset)
+ E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+ else
+ E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+ }
+
+ std::swap(BucketChain.Elements[j], BucketChain.Elements[0]);
+ break;
+ }
+ return true;
+}
+
+bool PPCLoopPreIncPrep::rewriteLoadStores(
+ Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged) {
+ bool MadeChange = false;
+ const SCEVAddRecExpr *BasePtrSCEV =
+ cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
+ if (!BasePtrSCEV->isAffine())
+ return MadeChange;
+
+ LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+
+ assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
+
+ // The instruction corresponding to the Bucket's BaseSCEV must be the first
+ // in the vector of elements.
+ Instruction *MemI = BucketChain.Elements.begin()->Instr;
+ Value *BasePtr = GetPointerOperand(MemI);
+ assert(BasePtr && "No pointer operand");
+
+ Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
+ Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
+ BasePtr->getType()->getPointerAddressSpace());
+
+ const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
+ if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
+ return MadeChange;
+
+ const SCEVConstant *BasePtrIncSCEV =
+ dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
+ if (!BasePtrIncSCEV)
+ return MadeChange;
+ BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
+ if (!isSafeToExpand(BasePtrStartSCEV, *SE))
+ return MadeChange;
+
+ if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV))
+ return MadeChange;
+
+ LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
+ BasicBlock *Header = L->getHeader();
+ unsigned HeaderLoopPredCount = pred_size(Header);
+ BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+
+ PHINode *NewPHI =
+ PHINode::Create(I8PtrTy, HeaderLoopPredCount,
+ getInstrName(MemI, PHINodeNameSuffix),
+ Header->getFirstNonPHI());
+
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
+ Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
+ LoopPredecessor->getTerminator());
+
+ // Note that LoopPredecessor might occur in the predecessor list multiple
+ // times, and we need to add it the right number of times.
+ for (const auto &PI : predecessors(Header)) {
+ if (PI != LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
+ }
+
+ Instruction *InsPoint = &*Header->getFirstInsertionPt();
+ GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+ getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
+ PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
+ for (const auto &PI : predecessors(Header)) {
+ if (PI == LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(PtrInc, PI);
+ }
+
+ Instruction *NewBasePtr;
+ if (PtrInc->getType() != BasePtr->getType())
+ NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
+ getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
+ else
+ NewBasePtr = PtrInc;
+
+ if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
+ BBChanged.insert(IDel->getParent());
+ BasePtr->replaceAllUsesWith(NewBasePtr);
+ RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
+
+ // Keep track of the replacement pointer values we've inserted so that we
+ // don't generate more pointer values than necessary.
+ SmallPtrSet<Value *, 16> NewPtrs;
+ NewPtrs.insert(NewBasePtr);
+
+ for (auto I = std::next(BucketChain.Elements.begin()),
+ IE = BucketChain.Elements.end(); I != IE; ++I) {
+ Value *Ptr = GetPointerOperand(I->Instr);
+ assert(Ptr && "No pointer operand");
+ if (NewPtrs.count(Ptr))
+ continue;
+
+ Instruction *RealNewPtr;
+ if (!I->Offset || I->Offset->getValue()->isZero()) {
+ RealNewPtr = NewBasePtr;
+ } else {
+ Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
+ if (PtrIP && isa<Instruction>(NewBasePtr) &&
+ cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
+ PtrIP = nullptr;
+ else if (PtrIP && isa<PHINode>(PtrIP))
+ PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
+ else if (!PtrIP)
+ PtrIP = I->Instr;
+
+ GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+ I8Ty, PtrInc, I->Offset->getValue(),
+ getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP);
+ if (!PtrIP)
+ NewPtr->insertAfter(cast<Instruction>(PtrInc));
+ NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
+ RealNewPtr = NewPtr;
+ }
+
+ if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
+ BBChanged.insert(IDel->getParent());
+
+ Instruction *ReplNewPtr;
+ if (Ptr->getType() != RealNewPtr->getType()) {
+ ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
+ getInstrName(Ptr, CastNodeNameSuffix));
+ ReplNewPtr->insertAfter(RealNewPtr);
+ } else
+ ReplNewPtr = RealNewPtr;
+
+ Ptr->replaceAllUsesWith(ReplNewPtr);
+ RecursivelyDeleteTriviallyDeadInstructions(Ptr);
+
+ NewPtrs.insert(RealNewPtr);
+ }
+
+ MadeChange = true;
+ UpdFormChainRewritten++;
+
+ return MadeChange;
+}
+
+bool PPCLoopPreIncPrep::updateFormPrep(Loop *L,
+ SmallVector<Bucket, 16> &Buckets) {
+ bool MadeChange = false;
+ if (Buckets.empty())
+ return MadeChange;
+ SmallSet<BasicBlock *, 16> BBChanged;
+ for (auto &Bucket : Buckets)
+ // The base address of each bucket is transformed into a phi and the others
+ // are rewritten based on new base.
+ if (prepareBaseForUpdateFormChain(Bucket))
+ MadeChange |= rewriteLoadStores(L, Bucket, BBChanged);
+ if (MadeChange)
+ for (auto &BB : L->blocks())
+ if (BBChanged.count(BB))
+ DeleteDeadPHIs(BB);
+ return MadeChange;
+}
+
// In order to prepare for the pre-increment a PHI is added.
// This function will check to see if that PHI already exists and will return
-// true if it found an existing PHI with the same start and increment as the
-// one we wanted to create.
+// true if it found an existing PHI with the same start and increment as the
+// one we wanted to create.
bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
const SCEV *BasePtrStartSCEV,
const SCEVConstant *BasePtrIncSCEV) {
@@ -216,10 +522,10 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
continue;
if (CurrentPHINode->getNumIncomingValues() == 2) {
- if ( (CurrentPHINode->getIncomingBlock(0) == LatchBB &&
- CurrentPHINode->getIncomingBlock(1) == PredBB) ||
- (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
- CurrentPHINode->getIncomingBlock(0) == PredBB) ) {
+ if ((CurrentPHINode->getIncomingBlock(0) == LatchBB &&
+ CurrentPHINode->getIncomingBlock(1) == PredBB) ||
+ (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
+ CurrentPHINode->getIncomingBlock(0) == PredBB)) {
if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV &&
PHIBasePtrIncSCEV == BasePtrIncSCEV) {
// The existing PHI (CurrentPHINode) has the same start and increment
@@ -242,89 +548,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
- BasicBlock *Header = L->getHeader();
-
- const PPCSubtarget *ST =
- TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;
-
- unsigned HeaderLoopPredCount = pred_size(Header);
-
- // Collect buckets of comparable addresses used by loads and stores.
- SmallVector<Bucket, 16> Buckets;
- for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
- I != IE; ++I) {
- for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
- J != JE; ++J) {
- Value *PtrValue;
- Instruction *MemI;
-
- if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
- MemI = LMemI;
- PtrValue = LMemI->getPointerOperand();
- } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
- MemI = SMemI;
- PtrValue = SMemI->getPointerOperand();
- } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) {
- if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
- MemI = IMemI;
- PtrValue = IMemI->getArgOperand(0);
- } else continue;
- } else continue;
-
- unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
- if (PtrAddrSpace)
- continue;
-
- // There are no update forms for Altivec vector load/stores.
- if (ST && ST->hasAltivec() &&
- PtrValue->getType()->getPointerElementType()->isVectorTy())
- continue;
-
- if (L->isLoopInvariant(PtrValue))
- continue;
-
- const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
- if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
- if (LARSCEV->getLoop() != L)
- continue;
- // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
- // be 4's multiple (DS-form). For i64 loads/stores when the displacement
- // fits in a 16-bit signed field but isn't a multiple of 4, it will be
- // useless and possible to break some original well-form addressing mode
- // to make this pre-inc prep for it.
- if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) {
- if (const SCEVConstant *StepConst =
- dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
- const APInt &ConstInt = StepConst->getValue()->getValue();
- if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
- continue;
- }
- }
- } else {
- continue;
- }
-
- bool FoundBucket = false;
- for (auto &B : Buckets) {
- const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
- if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
- B.Elements.push_back(BucketElement(CDiff, MemI));
- FoundBucket = true;
- break;
- }
- }
-
- if (!FoundBucket) {
- if (Buckets.size() == MaxVars)
- return MadeChange;
- Buckets.push_back(Bucket(LSCEV, MemI));
- }
- }
- }
-
- if (Buckets.empty())
- return MadeChange;
-
BasicBlock *LoopPredecessor = L->getLoopPredecessor();
// If there is no loop predecessor, or the loop predecessor's terminator
// returns a value (which might contribute to determining the loop's
@@ -335,191 +558,48 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (LoopPredecessor)
MadeChange = true;
}
- if (!LoopPredecessor)
+ if (!LoopPredecessor) {
+ LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");
return MadeChange;
+ }
- LLVM_DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n");
-
- SmallSet<BasicBlock *, 16> BBChanged;
- for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
- // The base address of each bucket is transformed into a phi and the others
- // are rewritten as offsets of that variable.
-
- // We have a choice now of which instruction's memory operand we use as the
- // base for the generated PHI. Always picking the first instruction in each
- // bucket does not work well, specifically because that instruction might
- // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
- // the choice is somewhat arbitrary, because the backend will happily
- // generate direct offsets from both the pre-incremented and
- // post-incremented pointer values. Thus, we'll pick the first non-prefetch
- // instruction in each bucket, and adjust the recurrence and other offsets
- // accordingly.
- for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
- if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
- if (II->getIntrinsicID() == Intrinsic::prefetch)
- continue;
-
- // If we'd otherwise pick the first element anyway, there's nothing to do.
- if (j == 0)
- break;
-
- // If our chosen element has no offset from the base pointer, there's
- // nothing to do.
- if (!Buckets[i].Elements[j].Offset ||
- Buckets[i].Elements[j].Offset->isZero())
- break;
-
- const SCEV *Offset = Buckets[i].Elements[j].Offset;
- Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset);
- for (auto &E : Buckets[i].Elements) {
- if (E.Offset)
- E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
- else
- E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
- }
-
- std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]);
- break;
- }
-
- const SCEVAddRecExpr *BasePtrSCEV =
- cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV);
- if (!BasePtrSCEV->isAffine())
- continue;
-
- LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
- assert(BasePtrSCEV->getLoop() == L &&
- "AddRec for the wrong loop?");
-
- // The instruction corresponding to the Bucket's BaseSCEV must be the first
- // in the vector of elements.
- Instruction *MemI = Buckets[i].Elements.begin()->Instr;
- Value *BasePtr = GetPointerOperand(MemI);
- assert(BasePtr && "No pointer operand");
-
- Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
- Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
- BasePtr->getType()->getPointerAddressSpace());
-
- const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
- if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
- continue;
-
- const SCEVConstant *BasePtrIncSCEV =
- dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
- if (!BasePtrIncSCEV)
- continue;
- BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
- if (!isSafeToExpand(BasePtrStartSCEV, *SE))
- continue;
-
- LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
-
- if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV))
- continue;
-
- PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
- MemI->hasName() ? MemI->getName() + ".phi" : "",
- Header->getFirstNonPHI());
-
- SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
- Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
- LoopPredecessor->getTerminator());
-
- // Note that LoopPredecessor might occur in the predecessor list multiple
- // times, and we need to add it the right number of times.
- for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI) {
- if (*PI != LoopPredecessor)
- continue;
-
- NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
- }
-
- Instruction *InsPoint = &*Header->getFirstInsertionPt();
- GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
- I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
- MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
- PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
- for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI) {
- if (*PI == LoopPredecessor)
- continue;
-
- NewPHI->addIncoming(PtrInc, *PI);
- }
-
- Instruction *NewBasePtr;
- if (PtrInc->getType() != BasePtr->getType())
- NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
- PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint);
- else
- NewBasePtr = PtrInc;
-
- if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
- BBChanged.insert(IDel->getParent());
- BasePtr->replaceAllUsesWith(NewBasePtr);
- RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
-
- // Keep track of the replacement pointer values we've inserted so that we
- // don't generate more pointer values than necessary.
- SmallPtrSet<Value *, 16> NewPtrs;
- NewPtrs.insert( NewBasePtr);
-
- for (auto I = std::next(Buckets[i].Elements.begin()),
- IE = Buckets[i].Elements.end(); I != IE; ++I) {
- Value *Ptr = GetPointerOperand(I->Instr);
- assert(Ptr && "No pointer operand");
- if (NewPtrs.count(Ptr))
- continue;
-
- Instruction *RealNewPtr;
- if (!I->Offset || I->Offset->getValue()->isZero()) {
- RealNewPtr = NewBasePtr;
- } else {
- Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
- if (PtrIP && isa<Instruction>(NewBasePtr) &&
- cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
- PtrIP = nullptr;
- else if (isa<PHINode>(PtrIP))
- PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
- else if (!PtrIP)
- PtrIP = I->Instr;
-
- GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
- I8Ty, PtrInc, I->Offset->getValue(),
- I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP);
- if (!PtrIP)
- NewPtr->insertAfter(cast<Instruction>(PtrInc));
- NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
- RealNewPtr = NewPtr;
+ // Check if a load/store has update form. This lambda is used by function
+ // collectCandidates which can collect candidates for types defined by lambda.
+ auto isUpdateFormCandidate = [&] (const Instruction *I,
+ const Value *PtrValue) {
+ assert((PtrValue && I) && "Invalid parameter!");
+ // There are no update forms for Altivec vector load/stores.
+ if (ST && ST->hasAltivec() &&
+ PtrValue->getType()->getPointerElementType()->isVectorTy())
+ return false;
+ // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
+ // be 4's multiple (DS-form). For i64 loads/stores when the displacement
+ // fits in a 16-bit signed field but isn't a multiple of 4, it will be
+ // useless and possible to break some original well-form addressing mode
+ // to make this pre-inc prep for it.
+ if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) {
+ const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L);
+ const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LARSCEV || LARSCEV->getLoop() != L)
+ return false;
+ if (const SCEVConstant *StepConst =
+ dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
+ const APInt &ConstInt = StepConst->getValue()->getValue();
+ if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
+ return false;
}
-
- if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
- BBChanged.insert(IDel->getParent());
-
- Instruction *ReplNewPtr;
- if (Ptr->getType() != RealNewPtr->getType()) {
- ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
- Ptr->hasName() ? Ptr->getName() + ".cast" : "");
- ReplNewPtr->insertAfter(RealNewPtr);
- } else
- ReplNewPtr = RealNewPtr;
-
- Ptr->replaceAllUsesWith(ReplNewPtr);
- RecursivelyDeleteTriviallyDeadInstructions(Ptr);
-
- NewPtrs.insert(RealNewPtr);
}
+ return true;
+ };
- MadeChange = true;
- }
+ // Collect buckets of comparable addresses used by loads, stores and prefetch
+ // intrinsic for update form.
+ SmallVector<Bucket, 16> UpdateFormBuckets =
+ collectCandidates(L, isUpdateFormCandidate, MaxVars);
- for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
- I != IE; ++I) {
- if (BBChanged.count(*I))
- DeleteDeadPHIs(*I);
- }
+ // Prepare for update form.
+ if (!UpdateFormBuckets.empty())
+ MadeChange |= updateFormPrep(L, UpdateFormBuckets);
return MadeChange;
}
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 027e6bd1ba06..b6496f189a3a 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -79,7 +79,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
}
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
- AsmPrinter &Printer, bool isDarwin) {
+ AsmPrinter &Printer, bool IsDarwin) {
MCContext &Ctx = Printer.OutContext;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
@@ -137,10 +137,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
// Add ha16() / lo16() markers if required.
switch (access) {
case PPCII::MO_LO:
- Expr = PPCMCExpr::createLo(Expr, isDarwin, Ctx);
+ Expr = PPCMCExpr::createLo(Expr, IsDarwin, Ctx);
break;
case PPCII::MO_HA:
- Expr = PPCMCExpr::createHa(Expr, isDarwin, Ctx);
+ Expr = PPCMCExpr::createHa(Expr, IsDarwin, Ctx);
break;
}
@@ -148,20 +148,20 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
}
void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP, bool isDarwin) {
+ AsmPrinter &AP, bool IsDarwin) {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MCOperand MCOp;
if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP,
- isDarwin))
+ IsDarwin))
OutMI.addOperand(MCOp);
}
}
bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
MCOperand &OutMO, AsmPrinter &AP,
- bool isDarwin) {
+ bool IsDarwin) {
switch (MO.getType()) {
default:
llvm_unreachable("unknown operand type");
@@ -181,17 +181,20 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
return true;
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
- OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin);
+ OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, IsDarwin);
return true;
case MachineOperand::MO_JumpTableIndex:
- OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin);
+ OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, IsDarwin);
return true;
case MachineOperand::MO_ConstantPoolIndex:
- OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin);
+ OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, IsDarwin);
return true;
case MachineOperand::MO_BlockAddress:
OutMO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP,
- isDarwin);
+ IsDarwin);
+ return true;
+ case MachineOperand::MO_MCSymbol:
+ OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP, IsDarwin);
return true;
case MachineOperand::MO_RegisterMask:
return false;
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index 446246358e96..ac8ac060f460 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -148,8 +148,8 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
if (!Op->isReg())
return nullptr;
- unsigned Reg = Op->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = Op->getReg();
+ if (!Register::isVirtualRegister(Reg))
return nullptr;
return MRI->getVRegDef(Reg);
@@ -344,8 +344,7 @@ bool PPCMIPeephole::simplifyCode(void) {
unsigned TrueReg2 =
TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
- if (TrueReg1 == TrueReg2
- && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
+ if (TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)) {
MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
@@ -358,7 +357,7 @@ bool PPCMIPeephole::simplifyCode(void) {
return false;
unsigned DefReg =
TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
- if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
+ if (Register::isVirtualRegister(DefReg)) {
MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
return true;
@@ -444,7 +443,7 @@ bool PPCMIPeephole::simplifyCode(void) {
unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
unsigned TrueReg =
TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
- if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
+ if (!Register::isVirtualRegister(TrueReg))
break;
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
if (!DefMI)
@@ -453,8 +452,8 @@ bool PPCMIPeephole::simplifyCode(void) {
auto isConvertOfSplat = [=]() -> bool {
if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
return false;
- unsigned ConvReg = DefMI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
+ Register ConvReg = DefMI->getOperand(1).getReg();
+ if (!Register::isVirtualRegister(ConvReg))
return false;
MachineInstr *Splt = MRI->getVRegDef(ConvReg);
return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
@@ -481,9 +480,9 @@ bool PPCMIPeephole::simplifyCode(void) {
// Splat fed by a shift. Usually when we align value to splat into
// vector element zero.
if (DefOpcode == PPC::XXSLDWI) {
- unsigned ShiftRes = DefMI->getOperand(0).getReg();
- unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
- unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
+ Register ShiftRes = DefMI->getOperand(0).getReg();
+ Register ShiftOp1 = DefMI->getOperand(1).getReg();
+ Register ShiftOp2 = DefMI->getOperand(2).getReg();
unsigned ShiftImm = DefMI->getOperand(3).getImm();
unsigned SplatImm = MI.getOperand(2).getImm();
if (ShiftOp1 == ShiftOp2) {
@@ -507,7 +506,7 @@ bool PPCMIPeephole::simplifyCode(void) {
// If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
unsigned TrueReg =
TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
- if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
+ if (!Register::isVirtualRegister(TrueReg))
break;
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
@@ -518,8 +517,8 @@ bool PPCMIPeephole::simplifyCode(void) {
TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
unsigned DefsReg2 =
TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
- if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
- !TargetRegisterInfo::isVirtualRegister(DefsReg2))
+ if (!Register::isVirtualRegister(DefsReg1) ||
+ !Register::isVirtualRegister(DefsReg2))
break;
MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
@@ -533,8 +532,8 @@ bool PPCMIPeephole::simplifyCode(void) {
if (RoundInstr->getOpcode() == PPC::FRSP &&
MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
Simplified = true;
- unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
- unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
+ Register ConvReg1 = RoundInstr->getOperand(1).getReg();
+ Register FRSPDefines = RoundInstr->getOperand(0).getReg();
MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
if (Use.getOperand(i).isReg() &&
@@ -566,8 +565,8 @@ bool PPCMIPeephole::simplifyCode(void) {
case PPC::EXTSH8:
case PPC::EXTSH8_32_64: {
if (!EnableSExtElimination) break;
- unsigned NarrowReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
+ Register NarrowReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(NarrowReg))
break;
MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
@@ -610,8 +609,8 @@ bool PPCMIPeephole::simplifyCode(void) {
case PPC::EXTSW_32:
case PPC::EXTSW_32_64: {
if (!EnableSExtElimination) break;
- unsigned NarrowReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
+ Register NarrowReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(NarrowReg))
break;
MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
@@ -652,8 +651,8 @@ bool PPCMIPeephole::simplifyCode(void) {
// We can eliminate EXTSW if the input is known to be already
// sign-extended.
LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
- unsigned TmpReg =
- MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
+ Register TmpReg =
+ MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
TmpReg);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
@@ -679,8 +678,8 @@ bool PPCMIPeephole::simplifyCode(void) {
if (MI.getOperand(2).getImm() != 0)
break;
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
break;
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
@@ -695,8 +694,8 @@ bool PPCMIPeephole::simplifyCode(void) {
SrcMI = SubRegMI;
if (SubRegMI->getOpcode() == PPC::COPY) {
- unsigned CopyReg = SubRegMI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(CopyReg))
+ Register CopyReg = SubRegMI->getOperand(1).getReg();
+ if (Register::isVirtualRegister(CopyReg))
SrcMI = MRI->getVRegDef(CopyReg);
}
@@ -757,7 +756,7 @@ bool PPCMIPeephole::simplifyCode(void) {
break; // We don't have an ADD fed by LI's that can be transformed
// Now we know that Op1 is the PHI node and Op2 is the dominator
- unsigned DominatorReg = Op2.getReg();
+ Register DominatorReg = Op2.getReg();
const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
? &PPC::G8RC_and_G8RC_NOX0RegClass
@@ -927,7 +926,7 @@ static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
}
else if (Inst->isFullCopy())
NextReg = Inst->getOperand(1).getReg();
- if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg))
+ if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
break;
SrcReg = NextReg;
}
@@ -949,9 +948,8 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
(*BII).getOpcode() == PPC::BCC &&
(*BII).getOperand(1).isReg()) {
// We optimize only if the condition code is used only by one BCC.
- unsigned CndReg = (*BII).getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
- !MRI->hasOneNonDBGUse(CndReg))
+ Register CndReg = (*BII).getOperand(1).getReg();
+ if (!Register::isVirtualRegister(CndReg) || !MRI->hasOneNonDBGUse(CndReg))
return false;
MachineInstr *CMPI = MRI->getVRegDef(CndReg);
@@ -961,7 +959,7 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
// We skip this BB if a physical register is used in comparison.
for (MachineOperand &MO : CMPI->operands())
- if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && !Register::isVirtualRegister(MO.getReg()))
return false;
return true;
@@ -1271,8 +1269,8 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) {
// We touch up the compare instruction in MBB2 and move it to
// a previous BB to handle partially redundant case.
if (SwapOperands) {
- unsigned Op1 = CMPI2->getOperand(1).getReg();
- unsigned Op2 = CMPI2->getOperand(2).getReg();
+ Register Op1 = CMPI2->getOperand(1).getReg();
+ Register Op2 = CMPI2->getOperand(2).getReg();
CMPI2->getOperand(1).setReg(Op2);
CMPI2->getOperand(2).setReg(Op1);
}
@@ -1295,7 +1293,7 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) {
MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
DebugLoc DL = CMPI2->getDebugLoc();
- unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
+ Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(MBB2, MBB2.begin(), DL,
TII->get(PPC::PHI), NewVReg)
.addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
@@ -1334,8 +1332,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
if (MI.getOpcode() != PPC::RLDICR)
return false;
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return false;
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
@@ -1414,8 +1412,8 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
if (SHMI + MEMI != 63)
return false;
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return false;
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
@@ -1428,6 +1426,12 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
if (!MRI->hasOneNonDBGUse(SrcReg))
return false;
+ assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
+ assert(SrcMI->getOperand(1).isReg() &&
+ "EXTSW's second operand should be a register");
+ if (!Register::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+ return false;
+
LLVM_DEBUG(dbgs() << "Combining pair: ");
LLVM_DEBUG(SrcMI->dump());
LLVM_DEBUG(MI.dump());
diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index d83c92276800..b1c0433641dd 100644
--- a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -57,6 +57,109 @@ namespace {
MachineFunctionProperties::Property::NoVRegs);
}
+ // This function removes any redundant load immediates. It has two level
+ // loops - The outer loop finds the load immediates BBI that could be used
+ // to replace following redundancy. The inner loop scans instructions that
+ // after BBI to find redundancy and update kill/dead flags accordingly. If
+ // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
+ // that modify the def register of BBI would break the scanning.
+ // DeadOrKillToUnset is a pointer to the previous operand that had the
+ // kill/dead flag set. It keeps track of the def register of BBI, the use
+ // registers of AfterBBIs and the def registers of AfterBBIs.
+ bool removeRedundantLIs(MachineBasicBlock &MBB,
+ const TargetRegisterInfo *TRI) {
+ LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
+ MBB.dump(); dbgs() << "\n");
+
+ DenseSet<MachineInstr *> InstrsToErase;
+ for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
+ // Skip load immediate that is marked to be erased later because it
+ // cannot be used to replace any other instructions.
+ if (InstrsToErase.find(&*BBI) != InstrsToErase.end())
+ continue;
+ // Skip non-load immediate.
+ unsigned Opc = BBI->getOpcode();
+ if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
+ Opc != PPC::LIS8)
+ continue;
+ // Skip load immediate, where the operand is a relocation (e.g., $r3 =
+ // LI target-flags(ppc-lo) %const.0).
+ if (!BBI->getOperand(1).isImm())
+ continue;
+ assert(BBI->getOperand(0).isReg() &&
+ "Expected a register for the first operand");
+
+ LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
+
+ Register Reg = BBI->getOperand(0).getReg();
+ int64_t Imm = BBI->getOperand(1).getImm();
+ MachineOperand *DeadOrKillToUnset = nullptr;
+ if (BBI->getOperand(0).isDead()) {
+ DeadOrKillToUnset = &BBI->getOperand(0);
+ LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
+ << " from load immediate " << *BBI
+ << " is a unsetting candidate\n");
+ }
+ // This loop scans instructions after BBI to see if there is any
+ // redundant load immediate.
+ for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
+ ++AfterBBI) {
+ // Track the operand that kill Reg. We would unset the kill flag of
+ // the operand if there is a following redundant load immediate.
+ int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
+ if (KillIdx != -1) {
+ assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
+ DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
+ LLVM_DEBUG(dbgs()
+ << " Kill flag of " << *DeadOrKillToUnset << " from "
+ << *AfterBBI << " is a unsetting candidate\n");
+ }
+
+ if (!AfterBBI->modifiesRegister(Reg, TRI))
+ continue;
+ // Finish scanning because Reg is overwritten by a non-load
+ // instruction.
+ if (AfterBBI->getOpcode() != Opc)
+ break;
+ assert(AfterBBI->getOperand(0).isReg() &&
+ "Expected a register for the first operand");
+ // Finish scanning because Reg is overwritten by a relocation or a
+ // different value.
+ if (!AfterBBI->getOperand(1).isImm() ||
+ AfterBBI->getOperand(1).getImm() != Imm)
+ break;
+
+ // It loads same immediate value to the same Reg, which is redundant.
+ // We would unset kill flag in previous Reg usage to extend live range
+ // of Reg first, then remove the redundancy.
+ if (DeadOrKillToUnset) {
+ LLVM_DEBUG(dbgs()
+ << " Unset dead/kill flag of " << *DeadOrKillToUnset
+ << " from " << *DeadOrKillToUnset->getParent());
+ if (DeadOrKillToUnset->isDef())
+ DeadOrKillToUnset->setIsDead(false);
+ else
+ DeadOrKillToUnset->setIsKill(false);
+ }
+ DeadOrKillToUnset =
+ AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
+ if (DeadOrKillToUnset)
+ LLVM_DEBUG(dbgs()
+ << " Dead flag of " << *DeadOrKillToUnset << " from "
+ << *AfterBBI << " is a unsetting candidate\n");
+ InstrsToErase.insert(&*AfterBBI);
+ LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
+ AfterBBI->dump());
+ }
+ }
+
+ for (MachineInstr *MI : InstrsToErase) {
+ MI->eraseFromParent();
+ }
+ NumRemovedInPreEmit += InstrsToErase.size();
+ return !InstrsToErase.empty();
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole)
return false;
@@ -65,6 +168,7 @@ namespace {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
+ Changed |= removeRedundantLIs(MBB, TRI);
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
// Detect self copies - these can result from running AADB.
@@ -111,7 +215,7 @@ namespace {
if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
continue;
MachineInstr *CRSetMI = nullptr;
- unsigned CRBit = Br->getOperand(0).getReg();
+ Register CRBit = Br->getOperand(0).getReg();
unsigned CRReg = getCRFromCRBit(CRBit);
bool SeenUse = false;
MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
diff --git a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
index 3a83cc27439c..6e9042643820 100644
--- a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
+++ b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
@@ -79,8 +79,8 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
for (auto SI = Splats.begin(); SI != Splats.end();) {
MachineInstr *SMI = *SI;
- unsigned SplatReg = SMI->getOperand(0).getReg();
- unsigned SrcReg = SMI->getOperand(1).getReg();
+ Register SplatReg = SMI->getOperand(0).getReg();
+ Register SrcReg = SMI->getOperand(1).getReg();
if (MI->modifiesRegister(SrcReg, TRI)) {
switch (MI->getOpcode()) {
@@ -102,7 +102,7 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
// the QPX splat source register.
unsigned SubRegIndex =
TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
- unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
+ Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
// Substitute both the explicit defined register, and also the
// implicit def of the containing QPX register.
diff --git a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 8eaa6dfe2bf7..3b71ed219c17 100644
--- a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -381,10 +381,10 @@ private:
const MachineBranchProbabilityInfo *MBPI;
// A vector to contain all the CR logical operations
- std::vector<CRLogicalOpInfo> AllCRLogicalOps;
+ SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
void initialize(MachineFunction &MFParm);
void collectCRLogicals();
- bool handleCROp(CRLogicalOpInfo &CRI);
+ bool handleCROp(unsigned Idx);
bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI);
static bool isCRLogical(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
@@ -398,7 +398,7 @@ private:
// Not using a range-based for loop here as the vector may grow while being
// operated on.
for (unsigned i = 0; i < AllCRLogicalOps.size(); i++)
- Changed |= handleCROp(AllCRLogicalOps[i]);
+ Changed |= handleCROp(i);
return Changed;
}
@@ -535,15 +535,15 @@ MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg,
unsigned &Subreg,
MachineInstr *&CpDef) {
Subreg = -1;
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return nullptr;
MachineInstr *Copy = MRI->getVRegDef(Reg);
CpDef = Copy;
if (!Copy->isCopy())
return Copy;
- unsigned CopySrc = Copy->getOperand(1).getReg();
+ Register CopySrc = Copy->getOperand(1).getReg();
Subreg = Copy->getOperand(1).getSubReg();
- if (!TargetRegisterInfo::isVirtualRegister(CopySrc)) {
+ if (!Register::isVirtualRegister(CopySrc)) {
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
// Set the Subreg
if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ)
@@ -578,10 +578,11 @@ void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) {
/// a unary CR logical might be used to change the condition code on a
/// comparison feeding it. A nullary CR logical might simply be removable
/// if the user of the bit it [un]sets can be transformed.
-bool PPCReduceCRLogicals::handleCROp(CRLogicalOpInfo &CRI) {
+bool PPCReduceCRLogicals::handleCROp(unsigned Idx) {
// We can definitely split a block on the inputs to a binary CR operation
// whose defs and (single) use are within the same block.
bool Changed = false;
+ CRLogicalOpInfo CRI = AllCRLogicalOps[Idx];
if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR &&
CRI.DefsSingleUse) {
Changed = splitBlockOnBinaryCROp(CRI);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 12554ea8d079..9ec26a19bdaa 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -325,13 +325,13 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool IsPositionIndependent = TM.isPositionIndependent();
if (hasBasePointer(MF)) {
- if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent)
+ if (Subtarget.is32BitELFABI() && IsPositionIndependent)
markSuperRegs(Reserved, PPC::R29);
else
markSuperRegs(Reserved, PPC::R30);
}
- if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent)
+ if (Subtarget.is32BitELFABI() && IsPositionIndependent)
markSuperRegs(Reserved, PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
@@ -391,7 +391,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
const MachineFunction &MF) const {
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ assert(Register::isPhysicalRegister(PhysReg));
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (!TM.isPPC64())
@@ -425,7 +425,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case PPC::G8RC_NOX0RegClassID:
case PPC::GPRC_NOR0RegClassID:
case PPC::SPERCRegClassID:
- case PPC::SPE4RCRegClassID:
case PPC::G8RCRegClassID:
case PPC::GPRCRegClassID: {
unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -527,7 +526,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
if (LP64)
@@ -549,7 +548,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
}
bool KillNegSizeReg = MI.getOperand(1).isKill();
- unsigned NegSizeReg = MI.getOperand(1).getReg();
+ Register NegSizeReg = MI.getOperand(1).getReg();
// Grow the stack and update the stack pointer link, then determine the
// address of new allocated space.
@@ -655,8 +654,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
// an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg.
@@ -700,8 +699,8 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_CR does not define its destination");
@@ -744,8 +743,8 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register SrcReg = MI.getOperand(0).getReg();
// Search up the BB to find the definition of the CR bit.
MachineBasicBlock::reverse_iterator Ins;
@@ -823,8 +822,8 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_CRBIT does not define its destination");
@@ -833,7 +832,7 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg);
- unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO)
.addReg(getCRFromCRBit(DestReg));
@@ -870,8 +869,8 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ Register SrcReg = MI.getOperand(0).getReg();
BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
.addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
@@ -896,8 +895,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ Register DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_VRSAVE does not define its destination");
@@ -1128,7 +1127,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
OperandBase = OffsetOperandNo;
}
- unsigned StackReg = MI.getOperand(FIOperandNum).getReg();
+ Register StackReg = MI.getOperand(FIOperandNum).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index af0dff6347a6..4719e947b172 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -253,15 +253,14 @@ def RM: PPCReg<"**ROUNDING MODE**">;
/// Register classes
// Allocate volatiles first
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
-def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
- (sequence "R%u", 30, 13),
- R31, R0, R1, FP, BP)> {
+def GPRC : RegisterClass<"PPC", [i32,f32], 32, (add (sequence "R%u", 2, 12),
+ (sequence "R%u", 30, 13),
+ R31, R0, R1, FP, BP)> {
// On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
// put it at the end of the list.
let AltOrders = [(add (sub GPRC, R2), R2)];
let AltOrderSelect = [{
- const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
- return S.isPPC64() && S.isSVR4ABI();
+ return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
}];
}
@@ -272,21 +271,19 @@ def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
// put it at the end of the list.
let AltOrders = [(add (sub G8RC, X2), X2)];
let AltOrderSelect = [{
- const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
- return S.isPPC64() && S.isSVR4ABI();
+ return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
}];
}
// For some instructions r0 is special (representing the value 0 instead of
// the value in the r0 register), and we use these register subclasses to
// prevent r0 from being allocated for use by those instructions.
-def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)> {
+def GPRC_NOR0 : RegisterClass<"PPC", [i32,f32], 32, (add (sub GPRC, R0), ZERO)> {
// On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
// put it at the end of the list.
let AltOrders = [(add (sub GPRC_NOR0, R2), R2)];
let AltOrderSelect = [{
- const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
- return S.isPPC64() && S.isSVR4ABI();
+ return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
}];
}
@@ -295,8 +292,7 @@ def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> {
// put it at the end of the list.
let AltOrders = [(add (sub G8RC_NOX0, X2), X2)];
let AltOrderSelect = [{
- const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
- return S.isPPC64() && S.isSVR4ABI();
+ return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
}];
}
@@ -304,8 +300,6 @@ def SPERC : RegisterClass<"PPC", [f64], 64, (add (sequence "S%u", 2, 12),
(sequence "S%u", 30, 13),
S31, S0, S1)>;
-def SPE4RC : RegisterClass<"PPC", [f32], 32, (add GPRC)>;
-
// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
// ABI the size of the Floating-point register save area is determined by the
// allocated non-volatile register with the lowest register number, as FP
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 6aa7528634d3..10568ed4b655 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -60,7 +60,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
InstrInfo(*this), TLInfo(TM, *this) {}
void PPCSubtarget::initializeEnvironment() {
- StackAlignment = 16;
+ StackAlignment = Align(16);
DarwinDirective = PPC::DIR_NONE;
HasMFOCRF = false;
Has64BitSupport = false;
@@ -145,7 +145,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isDarwin())
HasLazyResolverStubs = true;
- if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
+ if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) ||
+ TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
TargetTriple.isMusl())
SecurePlt = true;
@@ -228,18 +229,13 @@ bool PPCSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
}
-unsigned char
-PPCSubtarget::classifyGlobalReference(const GlobalValue *GV) const {
- // Note that currently we don't generate non-pic references.
- // If a caller wants that, this will have to be updated.
-
+bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
// Large code model always uses the TOC even for local symbols.
if (TM.getCodeModel() == CodeModel::Large)
- return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
-
+ return true;
if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
- return PPCII::MO_PIC_FLAG;
- return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
+ return false;
+ return true;
}
bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 55fec1cb6d99..d96c2893aee9 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -78,7 +78,7 @@ protected:
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
- unsigned StackAlignment;
+ Align StackAlignment;
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
@@ -166,7 +166,7 @@ public:
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
- unsigned getStackAlignment() const { return StackAlignment; }
+ Align getStackAlignment() const { return StackAlignment; }
/// getDarwinDirective - Returns the -m directive specified for the cpu.
///
@@ -210,7 +210,11 @@ public:
/// instructions, regardless of whether we are in 32-bit or 64-bit mode.
bool has64BitSupport() const { return Has64BitSupport; }
// useSoftFloat - Return true if soft-float option is turned on.
- bool useSoftFloat() const { return !HasHardFloat; }
+ bool useSoftFloat() const {
+ if (isAIXABI() && !HasHardFloat)
+ report_fatal_error("soft-float is not yet supported on AIX.");
+ return !HasHardFloat;
+ }
/// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
/// registers in 32-bit mode when possible. This can only true if
@@ -277,11 +281,11 @@ public:
bool hasDirectMove() const { return HasDirectMove; }
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
- unsigned getPlatformStackAlignment() const {
+ Align getPlatformStackAlignment() const {
if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
- return 32;
+ return Align(32);
- return 16;
+ return Align(16);
}
// DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no
@@ -316,6 +320,9 @@ public:
bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); }
bool isELFv2ABI() const;
+ bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); }
+ bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); }
+
/// Originally, this function return hasISEL(). Now we always enable it,
/// but may expand the ISEL instruction later.
bool enableEarlyIfConversion() const override { return true; }
@@ -337,9 +344,8 @@ public:
bool enableSubRegLiveness() const override;
- /// classifyGlobalReference - Classify a global variable reference for the
- /// current subtarget accourding to how we should reference it.
- unsigned char classifyGlobalReference(const GlobalValue *GV) const;
+ /// True if the GV will be accessed via an indirect symbol.
+ bool isGVIndirectSymbol(const GlobalValue *GV) const;
bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
};
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index fb826c4a32f1..8f313d9d01c4 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -74,8 +74,8 @@ protected:
LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI);
- unsigned OutReg = MI.getOperand(0).getReg();
- unsigned InReg = MI.getOperand(1).getReg();
+ Register OutReg = MI.getOperand(0).getReg();
+ Register InReg = MI.getOperand(1).getReg();
DebugLoc DL = MI.getDebugLoc();
unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
unsigned Opc1, Opc2;
diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 3eb0569fb955..895ae6744421 100644
--- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -95,7 +95,8 @@ namespace {
protected:
bool hasTOCLoReloc(const MachineInstr &MI) {
if (MI.getOpcode() == PPC::LDtocL ||
- MI.getOpcode() == PPC::ADDItocL)
+ MI.getOpcode() == PPC::ADDItocL ||
+ MI.getOpcode() == PPC::LWZtocL)
return true;
for (const MachineOperand &MO : MI.operands()) {
@@ -109,11 +110,15 @@ protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+ const bool isPPC64 =
+ MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
+ const unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
+
for (auto &MI : MBB) {
if (!hasTOCLoReloc(MI))
continue;
- MI.addOperand(MachineOperand::CreateReg(PPC::X2,
+ MI.addOperand(MachineOperand::CreateReg(TOCReg,
false /*IsDef*/,
true /*IsImp*/));
Changed = true;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index ce00f848dd72..abefee8b339d 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -93,7 +93,7 @@ EnableMachineCombinerPass("ppc-machine-combiner",
static cl::opt<bool>
ReduceCRLogical("ppc-reduce-cr-logicals",
cl::desc("Expand eligible cr-logical binary ops to branches"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
@@ -185,12 +185,13 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
- // If it isn't a Mach-O file then it's going to be a linux ELF
- // object file.
if (TT.isOSDarwin())
- return llvm::make_unique<TargetLoweringObjectFileMachO>();
+ return std::make_unique<TargetLoweringObjectFileMachO>();
+
+ if (TT.isOSAIX())
+ return std::make_unique<TargetLoweringObjectFileXCOFF>();
- return llvm::make_unique<PPC64LinuxTargetObjectFile>();
+ return std::make_unique<PPC64LinuxTargetObjectFile>();
}
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
@@ -248,10 +249,19 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
report_fatal_error("Target does not support the kernel CodeModel", false);
return *CM;
}
- if (!TT.isOSDarwin() && !JIT &&
- (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
- return CodeModel::Medium;
- return CodeModel::Small;
+
+ if (JIT)
+ return CodeModel::Small;
+ if (TT.isOSAIX())
+ return CodeModel::Small;
+
+ assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based.");
+
+ if (TT.isArch32Bit())
+ return CodeModel::Small;
+
+ assert(TT.isArch64Bit() && "Unsupported PPC architecture.");
+ return CodeModel::Medium;
}
@@ -259,8 +269,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
ScheduleDAGMILive *DAG =
new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ?
- llvm::make_unique<PPCPreRASchedStrategy>(C) :
- llvm::make_unique<GenericScheduler>(C));
+ std::make_unique<PPCPreRASchedStrategy>(C) :
+ std::make_unique<GenericScheduler>(C));
// add DAG Mutations here.
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
return DAG;
@@ -271,8 +281,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler(
const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
ScheduleDAGMI *DAG =
new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ?
- llvm::make_unique<PPCPostRASchedStrategy>(C) :
- llvm::make_unique<PostGenericScheduler>(C), true);
+ std::make_unique<PPCPostRASchedStrategy>(C) :
+ std::make_unique<PostGenericScheduler>(C), true);
// add DAG Mutations here.
return DAG;
}
@@ -328,7 +338,7 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<PPCSubtarget>(
+ I = std::make_unique<PPCSubtarget>(
TargetTriple, CPU,
// FIXME: It would be good to have the subtarget additions here
// not necessary. Anything that turns them on/off (overrides) ends
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index ff3dfbfaca05..f51300c656aa 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -594,10 +594,37 @@ bool PPCTTIImpl::enableInterleavedAccessVectorization() {
return true;
}
-unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
- if (Vector && !ST->hasAltivec() && !ST->hasQPX())
- return 0;
- return ST->hasVSX() ? 64 : 32;
+unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
+ assert(ClassID == GPRRC || ClassID == FPRRC ||
+ ClassID == VRRC || ClassID == VSXRC);
+ if (ST->hasVSX()) {
+ assert(ClassID == GPRRC || ClassID == VSXRC);
+ return ClassID == GPRRC ? 32 : 64;
+ }
+ assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
+ return 32;
+}
+
+unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
+ if (Vector)
+ return ST->hasVSX() ? VSXRC : VRRC;
+ else if (Ty && Ty->getScalarType()->isFloatTy())
+ return ST->hasVSX() ? VSXRC : FPRRC;
+ else
+ return GPRRC;
+}
+
+const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
+
+ switch (ClassID) {
+ default:
+ llvm_unreachable("unknown register class");
+ return "PPC::unknown register class";
+ case GPRRC: return "PPC::GPRRC";
+ case FPRRC: return "PPC::FPRRC";
+ case VRRC: return "PPC::VRRC";
+ case VSXRC: return "PPC::VSXRC";
+ }
}
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
@@ -613,7 +640,7 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
}
-unsigned PPCTTIImpl::getCacheLineSize() {
+unsigned PPCTTIImpl::getCacheLineSize() const {
// Check first if the user specified a custom line size.
if (CacheLineSize.getNumOccurrences() > 0)
return CacheLineSize;
@@ -628,7 +655,7 @@ unsigned PPCTTIImpl::getCacheLineSize() {
return 64;
}
-unsigned PPCTTIImpl::getPrefetchDistance() {
+unsigned PPCTTIImpl::getPrefetchDistance() const {
// This seems like a reasonable default for the BG/Q (this pass is enabled, by
// default, only on the BG/Q).
return 300;
@@ -752,6 +779,35 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return 0;
return Cost;
+
+ } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
+ if (ST->hasP9Altivec()) {
+ if (ISD == ISD::INSERT_VECTOR_ELT)
+ // A move-to VSR and a permute/insert. Assume vector operation cost
+ // for both (cost will be 2x on P9).
+ return vectorCostAdjustment(2, Opcode, Val, nullptr);
+
+ // It's an extract. Maybe we can do a cheap move-from VSR.
+ unsigned EltSize = Val->getScalarSizeInBits();
+ if (EltSize == 64) {
+ unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
+ if (Index == MfvsrdIndex)
+ return 1;
+ } else if (EltSize == 32) {
+ unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
+ if (Index == MfvsrwzIndex)
+ return 1;
+ }
+
+ // We need a vector extract (or mfvsrld). Assume vector operation cost.
+ // The cost of the load constant for a vector extract is disregarded
+ // (invariant, easily schedulable).
+ return vectorCostAdjustment(1, Opcode, Val, nullptr);
+
+ } else if (ST->hasDirectMove())
+ // Assume permute has standard cost.
+ // Assume move-to/move-from VSR have 2x standard cost.
+ return 3;
}
// Estimated cost of a load-hit-store delay. This was obtained
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 5d76ee418b69..83a70364bf68 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -72,10 +72,16 @@ public:
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
- unsigned getNumberOfRegisters(bool Vector);
+
+ enum PPCRegisterClass {
+ GPRRC, FPRRC, VRRC, VSXRC
+ };
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
+ const char* getRegisterClassName(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;
- unsigned getCacheLineSize();
- unsigned getPrefetchDistance();
+ unsigned getCacheLineSize() const override;
+ unsigned getPrefetchDistance() const override;
unsigned getMaxInterleaveFactor(unsigned VF);
int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
int getArithmeticInstrCost(
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index 719ed7b63878..3463bbbdc5f0 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -50,7 +50,7 @@ namespace {
bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
MachineRegisterInfo &MRI) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
return RC->hasSubClassEq(MRI.getRegClass(Reg));
} else if (RC->contains(Reg)) {
return true;
@@ -102,7 +102,7 @@ protected:
IsVSFReg(SrcMO.getReg(), MRI)) &&
"Unknown source for a VSX copy");
- unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
+ Register NewVReg = MRI.createVirtualRegister(SrcRC);
BuildMI(MBB, MI, MI.getDebugLoc(),
TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
.addImm(1) // add 1, not 0, because there is no implicit clearing
@@ -124,7 +124,7 @@ protected:
"Unknown destination for a VSX copy");
// Copy the VSX value into a new VSX register of the correct subclass.
- unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ Register NewVReg = MRI.createVirtualRegister(DstRC);
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
NewVReg)
.add(SrcMO);
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index ce78239df0a8..5e150be544ed 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -126,8 +126,8 @@ protected:
if (!AddendMI->isFullCopy())
continue;
- unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
+ Register AddendSrcReg = AddendMI->getOperand(1).getReg();
+ if (Register::isVirtualRegister(AddendSrcReg)) {
if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
MRI.getRegClass(AddendSrcReg))
continue;
@@ -182,12 +182,12 @@ protected:
// %5 = A-form-op %5, %5, %11;
// where %5 and %11 are both kills. This case would be skipped
// otherwise.
- unsigned OldFMAReg = MI.getOperand(0).getReg();
+ Register OldFMAReg = MI.getOperand(0).getReg();
// Find one of the product operands that is killed by this instruction.
unsigned KilledProdOp = 0, OtherProdOp = 0;
- unsigned Reg2 = MI.getOperand(2).getReg();
- unsigned Reg3 = MI.getOperand(3).getReg();
+ Register Reg2 = MI.getOperand(2).getReg();
+ Register Reg3 = MI.getOperand(3).getReg();
if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
&& Reg2 != OldFMAReg) {
KilledProdOp = 2;
@@ -208,14 +208,14 @@ protected:
// legality checks above, the live range for the addend source register
// could be extended), but it seems likely that such a trivial copy can
// be coalesced away later, and thus is not worth the effort.
- if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
+ if (Register::isVirtualRegister(AddendSrcReg) &&
!LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
continue;
// Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
- unsigned KilledProdReg = MI.getOperand(KilledProdOp).getReg();
- unsigned OtherProdReg = MI.getOperand(OtherProdOp).getReg();
+ Register KilledProdReg = MI.getOperand(KilledProdOp).getReg();
+ Register OtherProdReg = MI.getOperand(OtherProdOp).getReg();
unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
@@ -314,7 +314,7 @@ protected:
// Extend the live interval of the addend source (it might end at the
// copy to be removed, or somewhere in between there and here). This
// is necessary only if it is a physical register.
- if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg))
+ if (!Register::isVirtualRegister(AddendSrcReg))
for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid();
++Units) {
unsigned Unit = *Units;
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 44175af7f9b6..c3729da0b07b 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -158,7 +158,7 @@ private:
// Return true iff the given register is in the given class.
bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return RC->hasSubClassEq(MRI->getRegClass(Reg));
return RC->contains(Reg);
}
@@ -253,7 +253,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (isAnyVecReg(Reg, Partial)) {
RelevantInstr = true;
break;
@@ -566,7 +566,7 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
CopySrcReg = MI->getOperand(2).getReg();
}
- if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
+ if (!Register::isVirtualRegister(CopySrcReg)) {
if (!isScalarVecReg(CopySrcReg))
SwapVector[VecIdx].MentionsPhysVR = 1;
return CopySrcReg;
@@ -601,11 +601,11 @@ void PPCVSXSwapRemoval::formWebs() {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!isVecReg(Reg) && !isScalarVecReg(Reg))
continue;
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!Register::isVirtualRegister(Reg)) {
if (!(MI->isCopy() && isScalarVecReg(Reg)))
SwapVector[EntryIdx].MentionsPhysVR = 1;
continue;
@@ -667,7 +667,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
// than a swap instruction.
else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
- unsigned DefReg = MI->getOperand(0).getReg();
+ Register DefReg = MI->getOperand(0).getReg();
// We skip debug instructions in the analysis. (Note that debug
// location information is still maintained by this optimization
@@ -695,9 +695,9 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
// other than a swap instruction.
} else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
- unsigned UseReg = MI->getOperand(0).getReg();
+ Register UseReg = MI->getOperand(0).getReg();
MachineInstr *DefMI = MRI->getVRegDef(UseReg);
- unsigned DefReg = DefMI->getOperand(0).getReg();
+ Register DefReg = DefMI->getOperand(0).getReg();
int DefIdx = SwapMap[DefMI];
if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
@@ -756,7 +756,7 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
if (!SwapVector[Repr].WebRejected) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
- unsigned DefReg = MI->getOperand(0).getReg();
+ Register DefReg = MI->getOperand(0).getReg();
for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
int UseIdx = SwapMap[&UseMI];
@@ -772,7 +772,7 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
if (!SwapVector[Repr].WebRejected) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
- unsigned UseReg = MI->getOperand(0).getReg();
+ Register UseReg = MI->getOperand(0).getReg();
MachineInstr *DefMI = MRI->getVRegDef(UseReg);
int DefIdx = SwapMap[DefMI];
SwapVector[DefIdx].WillRemove = 1;
@@ -869,8 +869,8 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
Selector = 3 - Selector;
MI->getOperand(3).setImm(Selector);
- unsigned Reg1 = MI->getOperand(1).getReg();
- unsigned Reg2 = MI->getOperand(2).getReg();
+ Register Reg1 = MI->getOperand(1).getReg();
+ Register Reg2 = MI->getOperand(2).getReg();
MI->getOperand(1).setReg(Reg2);
MI->getOperand(2).setReg(Reg1);
@@ -894,9 +894,9 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
LLVM_DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
LLVM_DEBUG(MI->dump());
- unsigned DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
- unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ Register NewVReg = MRI->createVirtualRegister(DstRC);
MI->getOperand(0).setReg(NewVReg);
LLVM_DEBUG(dbgs() << " Into: ");
@@ -910,8 +910,8 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
// prior to the swap, and from VSRC to VRRC following the swap.
// Coalescing will usually remove all this mess.
if (DstRC == &PPC::VRRCRegClass) {
- unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
- unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+ Register VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+ Register VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
TII->get(PPC::COPY), VSRCTmp1)
diff --git a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 0172c6298772..300ba8dc675c 100644
--- a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -79,7 +80,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
// Helper to emit a combination of LUI, ADDI(W), and SLLI instructions that
// synthesize the desired immedate value into the destination register.
- void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out);
+ void emitLoadImm(Register DestReg, int64_t Value, MCStreamer &Out);
// Helper to emit a combination of AUIPC and SecondOpcode. Used to implement
// helpers such as emitLoadLocalAddress and emitLoadAddress.
@@ -127,6 +128,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseRegister(OperandVector &Operands,
bool AllowParens = false);
OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
+ OperandMatchResultTy parseAtomicMemOp(OperandVector &Operands);
OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
OperandMatchResultTy parseBareSymbol(OperandVector &Operands);
OperandMatchResultTy parseCallSymbol(OperandVector &Operands);
@@ -193,7 +195,7 @@ public:
/// instruction
struct RISCVOperand : public MCParsedAsmOperand {
- enum KindTy {
+ enum class KindTy {
Token,
Register,
Immediate,
@@ -203,7 +205,7 @@ struct RISCVOperand : public MCParsedAsmOperand {
bool IsRV64;
struct RegOp {
- unsigned RegNum;
+ Register RegNum;
};
struct ImmOp {
@@ -235,26 +237,26 @@ public:
StartLoc = o.StartLoc;
EndLoc = o.EndLoc;
switch (Kind) {
- case Register:
+ case KindTy::Register:
Reg = o.Reg;
break;
- case Immediate:
+ case KindTy::Immediate:
Imm = o.Imm;
break;
- case Token:
+ case KindTy::Token:
Tok = o.Tok;
break;
- case SystemRegister:
+ case KindTy::SystemRegister:
SysReg = o.SysReg;
break;
}
}
- bool isToken() const override { return Kind == Token; }
- bool isReg() const override { return Kind == Register; }
- bool isImm() const override { return Kind == Immediate; }
+ bool isToken() const override { return Kind == KindTy::Token; }
+ bool isReg() const override { return Kind == KindTy::Register; }
+ bool isImm() const override { return Kind == KindTy::Immediate; }
bool isMem() const override { return false; }
- bool isSystemRegister() const { return Kind == SystemRegister; }
+ bool isSystemRegister() const { return Kind == KindTy::SystemRegister; }
static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
RISCVMCExpr::VariantKind &VK) {
@@ -276,7 +278,7 @@ public:
// modifiers and isShiftedInt<N-1, 1>(Op).
template <int N> bool isBareSimmNLsb0() const {
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
@@ -292,7 +294,7 @@ public:
bool isBareSymbol() const {
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
@@ -302,7 +304,7 @@ public:
bool isCallSymbol() const {
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
@@ -313,7 +315,7 @@ public:
bool isTPRelAddSymbol() const {
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
@@ -364,7 +366,7 @@ public:
bool isImmXLenLI() const {
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
@@ -372,13 +374,13 @@ public:
return true;
// Given only Imm, ensuring that the actually specified constant is either
// a signed or unsigned 64-bit number is unfortunately impossible.
- bool IsInRange = isRV64() ? true : isInt<32>(Imm) || isUInt<32>(Imm);
- return IsConstantImm && IsInRange && VK == RISCVMCExpr::VK_RISCV_None;
+ return IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None &&
+ (isRV64() || (isInt<32>(Imm) || isUInt<32>(Imm)));
}
bool isUImmLog2XLen() const {
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
if (!evaluateConstantImm(getImm(), Imm, VK) ||
@@ -389,7 +391,7 @@ public:
bool isUImmLog2XLenNonZero() const {
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
if (!evaluateConstantImm(getImm(), Imm, VK) ||
@@ -402,7 +404,7 @@ public:
bool isUImm5() const {
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
@@ -411,7 +413,7 @@ public:
bool isUImm5NonZero() const {
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
@@ -422,7 +424,7 @@ public:
bool isSImm6() const {
if (!isImm())
return false;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isInt<6>(Imm) &&
@@ -432,7 +434,7 @@ public:
bool isSImm6NonZero() const {
if (!isImm())
return false;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isInt<6>(Imm) && (Imm != 0) &&
@@ -443,7 +445,7 @@ public:
if (!isImm())
return false;
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && (Imm != 0) &&
(isUInt<5>(Imm) || (Imm >= 0xfffe0 && Imm <= 0xfffff)) &&
@@ -454,7 +456,7 @@ public:
if (!isImm())
return false;
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<5, 2>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
@@ -464,7 +466,7 @@ public:
if (!isImm())
return false;
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<6, 2>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
@@ -474,7 +476,7 @@ public:
if (!isImm())
return false;
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<5, 3>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
@@ -486,7 +488,7 @@ public:
if (!isImm())
return false;
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<6, 3>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
@@ -496,14 +498,14 @@ public:
if (!isImm())
return false;
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<8, 2>(Imm) && (Imm != 0) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm12() const {
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsValid;
if (!isImm())
@@ -527,14 +529,14 @@ public:
if (!isImm())
return false;
int64_t Imm;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && (Imm != 0) && isShiftedInt<6, 4>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm20LUI() const {
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsValid;
if (!isImm())
@@ -552,7 +554,7 @@ public:
}
bool isUImm20AUIPC() const {
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsValid;
if (!isImm())
@@ -575,6 +577,15 @@ public:
bool isSImm21Lsb0JAL() const { return isBareSimmNLsb0<21>(); }
+ bool isImmZero() const {
+ if (!isImm())
+ return false;
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && (Imm == 0) && VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
/// getStartLoc - Gets location of the first token of this operand
SMLoc getStartLoc() const override { return StartLoc; }
/// getEndLoc - Gets location of the last token of this operand
@@ -583,38 +594,38 @@ public:
bool isRV64() const { return IsRV64; }
unsigned getReg() const override {
- assert(Kind == Register && "Invalid type access!");
- return Reg.RegNum;
+ assert(Kind == KindTy::Register && "Invalid type access!");
+ return Reg.RegNum.id();
}
StringRef getSysReg() const {
- assert(Kind == SystemRegister && "Invalid access!");
+ assert(Kind == KindTy::SystemRegister && "Invalid access!");
return StringRef(SysReg.Data, SysReg.Length);
}
const MCExpr *getImm() const {
- assert(Kind == Immediate && "Invalid type access!");
+ assert(Kind == KindTy::Immediate && "Invalid type access!");
return Imm.Val;
}
StringRef getToken() const {
- assert(Kind == Token && "Invalid type access!");
+ assert(Kind == KindTy::Token && "Invalid type access!");
return Tok;
}
void print(raw_ostream &OS) const override {
switch (Kind) {
- case Immediate:
+ case KindTy::Immediate:
OS << *getImm();
break;
- case Register:
+ case KindTy::Register:
OS << "<register x";
OS << getReg() << ">";
break;
- case Token:
+ case KindTy::Token:
OS << "'" << getToken() << "'";
break;
- case SystemRegister:
+ case KindTy::SystemRegister:
OS << "<sysreg: " << getSysReg() << '>';
break;
}
@@ -622,7 +633,7 @@ public:
static std::unique_ptr<RISCVOperand> createToken(StringRef Str, SMLoc S,
bool IsRV64) {
- auto Op = make_unique<RISCVOperand>(Token);
+ auto Op = std::make_unique<RISCVOperand>(KindTy::Token);
Op->Tok = Str;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -632,7 +643,7 @@ public:
static std::unique_ptr<RISCVOperand> createReg(unsigned RegNo, SMLoc S,
SMLoc E, bool IsRV64) {
- auto Op = make_unique<RISCVOperand>(Register);
+ auto Op = std::make_unique<RISCVOperand>(KindTy::Register);
Op->Reg.RegNum = RegNo;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -642,7 +653,7 @@ public:
static std::unique_ptr<RISCVOperand> createImm(const MCExpr *Val, SMLoc S,
SMLoc E, bool IsRV64) {
- auto Op = make_unique<RISCVOperand>(Immediate);
+ auto Op = std::make_unique<RISCVOperand>(KindTy::Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -652,7 +663,7 @@ public:
static std::unique_ptr<RISCVOperand>
createSysReg(StringRef Str, SMLoc S, unsigned Encoding, bool IsRV64) {
- auto Op = make_unique<RISCVOperand>(SystemRegister);
+ auto Op = std::make_unique<RISCVOperand>(KindTy::SystemRegister);
Op->SysReg.Data = Str.data();
Op->SysReg.Length = Str.size();
Op->SysReg.Encoding = Encoding;
@@ -664,7 +675,7 @@ public:
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
assert(Expr && "Expr shouldn't be null!");
int64_t Imm = 0;
- RISCVMCExpr::VariantKind VK;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstant = evaluateConstantImm(Expr, Imm, VK);
if (IsConstant)
@@ -730,46 +741,9 @@ public:
#define GET_MATCHER_IMPLEMENTATION
#include "RISCVGenAsmMatcher.inc"
-// Return the matching FPR64 register for the given FPR32.
-// FIXME: Ideally this function could be removed in favour of using
-// information from TableGen.
-unsigned convertFPR32ToFPR64(unsigned Reg) {
- switch (Reg) {
- default:
- llvm_unreachable("Not a recognised FPR32 register");
- case RISCV::F0_32: return RISCV::F0_64;
- case RISCV::F1_32: return RISCV::F1_64;
- case RISCV::F2_32: return RISCV::F2_64;
- case RISCV::F3_32: return RISCV::F3_64;
- case RISCV::F4_32: return RISCV::F4_64;
- case RISCV::F5_32: return RISCV::F5_64;
- case RISCV::F6_32: return RISCV::F6_64;
- case RISCV::F7_32: return RISCV::F7_64;
- case RISCV::F8_32: return RISCV::F8_64;
- case RISCV::F9_32: return RISCV::F9_64;
- case RISCV::F10_32: return RISCV::F10_64;
- case RISCV::F11_32: return RISCV::F11_64;
- case RISCV::F12_32: return RISCV::F12_64;
- case RISCV::F13_32: return RISCV::F13_64;
- case RISCV::F14_32: return RISCV::F14_64;
- case RISCV::F15_32: return RISCV::F15_64;
- case RISCV::F16_32: return RISCV::F16_64;
- case RISCV::F17_32: return RISCV::F17_64;
- case RISCV::F18_32: return RISCV::F18_64;
- case RISCV::F19_32: return RISCV::F19_64;
- case RISCV::F20_32: return RISCV::F20_64;
- case RISCV::F21_32: return RISCV::F21_64;
- case RISCV::F22_32: return RISCV::F22_64;
- case RISCV::F23_32: return RISCV::F23_64;
- case RISCV::F24_32: return RISCV::F24_64;
- case RISCV::F25_32: return RISCV::F25_64;
- case RISCV::F26_32: return RISCV::F26_64;
- case RISCV::F27_32: return RISCV::F27_64;
- case RISCV::F28_32: return RISCV::F28_64;
- case RISCV::F29_32: return RISCV::F29_64;
- case RISCV::F30_32: return RISCV::F30_64;
- case RISCV::F31_32: return RISCV::F31_64;
- }
+static Register convertFPR64ToFPR32(Register Reg) {
+ assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register");
+ return Reg - RISCV::F0_D + RISCV::F0_F;
}
unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
@@ -778,17 +752,17 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
if (!Op.isReg())
return Match_InvalidOperand;
- unsigned Reg = Op.getReg();
- bool IsRegFPR32 =
- RISCVMCRegisterClasses[RISCV::FPR32RegClassID].contains(Reg);
- bool IsRegFPR32C =
- RISCVMCRegisterClasses[RISCV::FPR32CRegClassID].contains(Reg);
+ Register Reg = Op.getReg();
+ bool IsRegFPR64 =
+ RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg);
+ bool IsRegFPR64C =
+ RISCVMCRegisterClasses[RISCV::FPR64CRegClassID].contains(Reg);
// As the parser couldn't differentiate an FPR32 from an FPR64, coerce the
- // register from FPR32 to FPR64 or FPR32C to FPR64C if necessary.
- if ((IsRegFPR32 && Kind == MCK_FPR64) ||
- (IsRegFPR32C && Kind == MCK_FPR64C)) {
- Op.Reg.RegNum = convertFPR32ToFPR64(Reg);
+ // register from FPR64 to FPR32 or FPR64C to FPR32C if necessary.
+ if ((IsRegFPR64 && Kind == MCK_FPR32) ||
+ (IsRegFPR64C && Kind == MCK_FPR32C)) {
+ Op.Reg.RegNum = convertFPR64ToFPR32(Reg);
return Match_Success;
}
return Match_InvalidOperand;
@@ -853,6 +827,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(Operands, ErrorInfo,
std::numeric_limits<int32_t>::min(),
std::numeric_limits<uint32_t>::max());
+ case Match_InvalidImmZero: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "immediate must be zero");
+ }
case Match_InvalidUImmLog2XLen:
if (isRV64())
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 6) - 1);
@@ -968,14 +946,19 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// alternative ABI names), setting RegNo to the matching register. Upon
// failure, returns true and sets RegNo to 0. If IsRV32E then registers
// x16-x31 will be rejected.
-static bool matchRegisterNameHelper(bool IsRV32E, unsigned &RegNo,
+static bool matchRegisterNameHelper(bool IsRV32E, Register &RegNo,
StringRef Name) {
RegNo = MatchRegisterName(Name);
- if (RegNo == 0)
+ // The 32- and 64-bit FPRs have the same asm name. Check that the initial
+ // match always matches the 64-bit variant, and not the 32-bit one.
+ assert(!(RegNo >= RISCV::F0_F && RegNo <= RISCV::F31_F));
+ // The default FPR register class is based on the tablegen enum ordering.
+ static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated");
+ if (RegNo == RISCV::NoRegister)
RegNo = MatchRegisterAltName(Name);
if (IsRV32E && RegNo >= RISCV::X16 && RegNo <= RISCV::X31)
- RegNo = 0;
- return RegNo == 0;
+ RegNo = RISCV::NoRegister;
+ return RegNo == RISCV::NoRegister;
}
bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
@@ -986,7 +969,7 @@ bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
RegNo = 0;
StringRef Name = getLexer().getTok().getIdentifier();
- if (matchRegisterNameHelper(isRV32E(), RegNo, Name))
+ if (matchRegisterNameHelper(isRV32E(), (Register&)RegNo, Name))
return Error(StartLoc, "invalid register name");
getParser().Lex(); // Eat identifier token.
@@ -1018,10 +1001,10 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
return MatchOperand_NoMatch;
case AsmToken::Identifier:
StringRef Name = getLexer().getTok().getIdentifier();
- unsigned RegNo;
+ Register RegNo;
matchRegisterNameHelper(isRV32E(), RegNo, Name);
- if (RegNo == 0) {
+ if (RegNo == RISCV::NoRegister) {
if (HadParens)
getLexer().UnLex(LParen);
return MatchOperand_NoMatch;
@@ -1208,6 +1191,24 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
Res = V;
} else
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+
+ MCBinaryExpr::Opcode Opcode;
+ switch (getLexer().getKind()) {
+ default:
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return MatchOperand_Success;
+ case AsmToken::Plus:
+ Opcode = MCBinaryExpr::Add;
+ break;
+ case AsmToken::Minus:
+ Opcode = MCBinaryExpr::Sub;
+ break;
+ }
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr))
+ return MatchOperand_ParseFail;
+ Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
}
@@ -1282,6 +1283,73 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
return MatchOperand_Success;
}
+OperandMatchResultTy RISCVAsmParser::parseAtomicMemOp(OperandVector &Operands) {
+ // Atomic operations such as lr.w, sc.w, and amo*.w accept a "memory operand"
+ // as one of their register operands, such as `(a0)`. This just denotes that
+ // the register (in this case `a0`) contains a memory address.
+ //
+ // Normally, we would be able to parse these by putting the parens into the
+ // instruction string. However, GNU as also accepts a zero-offset memory
+ // operand (such as `0(a0)`), and ignores the 0. Normally this would be parsed
+ // with parseImmediate followed by parseMemOpBaseReg, but these instructions
+ // do not accept an immediate operand, and we do not want to add a "dummy"
+ // operand that is silently dropped.
+ //
+ // Instead, we use this custom parser. This will: allow (and discard) an
+ // offset if it is zero; require (and discard) parentheses; and add only the
+ // parsed register operand to `Operands`.
+ //
+ // These operands are printed with RISCVInstPrinter::printAtomicMemOp, which
+ // will only print the register surrounded by parentheses (which GNU as also
+ // uses as its canonical representation for these operands).
+ std::unique_ptr<RISCVOperand> OptionalImmOp;
+
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // Parse an Integer token. We do not accept arbritrary constant expressions
+ // in the offset field (because they may include parens, which complicates
+ // parsing a lot).
+ int64_t ImmVal;
+ SMLoc ImmStart = getLoc();
+ if (getParser().parseIntToken(ImmVal,
+ "expected '(' or optional integer offset"))
+ return MatchOperand_ParseFail;
+
+ // Create a RISCVOperand for checking later (so the error messages are
+ // nicer), but we don't add it to Operands.
+ SMLoc ImmEnd = getLoc();
+ OptionalImmOp =
+ RISCVOperand::createImm(MCConstantExpr::create(ImmVal, getContext()),
+ ImmStart, ImmEnd, isRV64());
+ }
+
+ if (getLexer().isNot(AsmToken::LParen)) {
+ Error(getLoc(), OptionalImmOp ? "expected '(' after optional integer offset"
+ : "expected '(' or optional integer offset");
+ return MatchOperand_ParseFail;
+ }
+ getParser().Lex(); // Eat '('
+
+ if (parseRegister(Operands) != MatchOperand_Success) {
+ Error(getLoc(), "expected register");
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ Error(getLoc(), "expected ')'");
+ return MatchOperand_ParseFail;
+ }
+ getParser().Lex(); // Eat ')'
+
+ // Deferred Handling of non-zero offsets. This makes the error messages nicer.
+ if (OptionalImmOp && !OptionalImmOp->isImmZero()) {
+ Error(OptionalImmOp->getStartLoc(), "optional integer offset must be 0",
+ SMRange(OptionalImmOp->getStartLoc(), OptionalImmOp->getEndLoc()));
+ return MatchOperand_ParseFail;
+ }
+
+ return MatchOperand_Success;
+}
+
/// Looks at a token type and creates the relevant operand from this
/// information, adding to Operands. If operand was parsed, returns false, else
/// true.
@@ -1523,12 +1591,12 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
S.EmitInstruction((Res ? CInst : Inst), getSTI());
}
-void RISCVAsmParser::emitLoadImm(unsigned DestReg, int64_t Value,
+void RISCVAsmParser::emitLoadImm(Register DestReg, int64_t Value,
MCStreamer &Out) {
RISCVMatInt::InstSeq Seq;
RISCVMatInt::generateInstSeq(Value, isRV64(), Seq);
- unsigned SrcReg = RISCV::X0;
+ Register SrcReg = RISCV::X0;
for (RISCVMatInt::Inst &Inst : Seq) {
if (Inst.Opc == RISCV::LUI) {
emitToStreamer(
@@ -1682,7 +1750,7 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
default:
break;
case RISCV::PseudoLI: {
- unsigned Reg = Inst.getOperand(0).getReg();
+ Register Reg = Inst.getOperand(0).getReg();
const MCOperand &Op1 = Inst.getOperand(1);
if (Op1.isExpr()) {
// We must have li reg, %lo(sym) or li reg, %pcrel_lo(sym) or similar.
diff --git a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 36200c03f703..15943ba42156 100644
--- a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -13,6 +13,7 @@
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "Utils/RISCVBaseInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -56,17 +57,6 @@ extern "C" void LLVMInitializeRISCVDisassembler() {
createRISCVDisassembler);
}
-static const unsigned GPRDecoderTable[] = {
- RISCV::X0, RISCV::X1, RISCV::X2, RISCV::X3,
- RISCV::X4, RISCV::X5, RISCV::X6, RISCV::X7,
- RISCV::X8, RISCV::X9, RISCV::X10, RISCV::X11,
- RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15,
- RISCV::X16, RISCV::X17, RISCV::X18, RISCV::X19,
- RISCV::X20, RISCV::X21, RISCV::X22, RISCV::X23,
- RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27,
- RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31
-};
-
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
@@ -76,38 +66,21 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
.getFeatureBits();
bool IsRV32E = FeatureBits[RISCV::FeatureRV32E];
- if (RegNo > array_lengthof(GPRDecoderTable) || (IsRV32E && RegNo > 15))
+ if (RegNo >= 32 || (IsRV32E && RegNo >= 16))
return MCDisassembler::Fail;
- // We must define our own mapping from RegNo to register identifier.
- // Accessing index RegNo in the register class will work in the case that
- // registers were added in ascending order, but not in general.
- unsigned Reg = GPRDecoderTable[RegNo];
+ Register Reg = RISCV::X0 + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
-static const unsigned FPR32DecoderTable[] = {
- RISCV::F0_32, RISCV::F1_32, RISCV::F2_32, RISCV::F3_32,
- RISCV::F4_32, RISCV::F5_32, RISCV::F6_32, RISCV::F7_32,
- RISCV::F8_32, RISCV::F9_32, RISCV::F10_32, RISCV::F11_32,
- RISCV::F12_32, RISCV::F13_32, RISCV::F14_32, RISCV::F15_32,
- RISCV::F16_32, RISCV::F17_32, RISCV::F18_32, RISCV::F19_32,
- RISCV::F20_32, RISCV::F21_32, RISCV::F22_32, RISCV::F23_32,
- RISCV::F24_32, RISCV::F25_32, RISCV::F26_32, RISCV::F27_32,
- RISCV::F28_32, RISCV::F29_32, RISCV::F30_32, RISCV::F31_32
-};
-
static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- if (RegNo > array_lengthof(FPR32DecoderTable))
+ if (RegNo >= 32)
return MCDisassembler::Fail;
- // We must define our own mapping from RegNo to register identifier.
- // Accessing index RegNo in the register class will work in the case that
- // registers were added in ascending order, but not in general.
- unsigned Reg = FPR32DecoderTable[RegNo];
+ Register Reg = RISCV::F0_F + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -115,35 +88,21 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- if (RegNo > 8) {
+ if (RegNo >= 8) {
return MCDisassembler::Fail;
}
- unsigned Reg = FPR32DecoderTable[RegNo + 8];
+ Register Reg = RISCV::F8_F + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
-static const unsigned FPR64DecoderTable[] = {
- RISCV::F0_64, RISCV::F1_64, RISCV::F2_64, RISCV::F3_64,
- RISCV::F4_64, RISCV::F5_64, RISCV::F6_64, RISCV::F7_64,
- RISCV::F8_64, RISCV::F9_64, RISCV::F10_64, RISCV::F11_64,
- RISCV::F12_64, RISCV::F13_64, RISCV::F14_64, RISCV::F15_64,
- RISCV::F16_64, RISCV::F17_64, RISCV::F18_64, RISCV::F19_64,
- RISCV::F20_64, RISCV::F21_64, RISCV::F22_64, RISCV::F23_64,
- RISCV::F24_64, RISCV::F25_64, RISCV::F26_64, RISCV::F27_64,
- RISCV::F28_64, RISCV::F29_64, RISCV::F30_64, RISCV::F31_64
-};
-
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- if (RegNo > array_lengthof(FPR64DecoderTable))
+ if (RegNo >= 32)
return MCDisassembler::Fail;
- // We must define our own mapping from RegNo to register identifier.
- // Accessing index RegNo in the register class will work in the case that
- // registers were added in ascending order, but not in general.
- unsigned Reg = FPR64DecoderTable[RegNo];
+ Register Reg = RISCV::F0_D + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -151,10 +110,10 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- if (RegNo > 8) {
+ if (RegNo >= 8) {
return MCDisassembler::Fail;
}
- unsigned Reg = FPR64DecoderTable[RegNo + 8];
+ Register Reg = RISCV::F8_D + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -182,10 +141,10 @@ static DecodeStatus DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- if (RegNo > 8)
+ if (RegNo >= 8)
return MCDisassembler::Fail;
- unsigned Reg = GPRDecoderTable[RegNo + 8];
+ Register Reg = RISCV::X8 + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -279,8 +238,80 @@ static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
#include "RISCVGenDisassemblerTables.inc"
+static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ uint64_t SImm6 =
+ fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
+ DecodeStatus Result = decodeSImmOperand<6>(Inst, SImm6, Address, Decoder);
+ (void)Result;
+ assert(Result == MCDisassembler::Success && "Invalid immediate");
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeGPRRegisterClass(Inst, 0, Address, Decoder);
+ uint64_t SImm6 =
+ fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
+ DecodeStatus Result = decodeSImmOperand<6>(Inst, SImm6, Address, Decoder);
+ (void)Result;
+ assert(Result == MCDisassembler::Success && "Invalid immediate");
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeGPRRegisterClass(Inst, 0, Address, Decoder);
+ Inst.addOperand(Inst.getOperand(0));
+ uint64_t UImm6 =
+ fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
+ DecodeStatus Result = decodeUImmOperand<6>(Inst, UImm6, Address, Decoder);
+ (void)Result;
+ assert(Result == MCDisassembler::Success && "Invalid immediate");
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(Insn, 7, 5);
+ unsigned Rs2 = fieldFromInstruction(Insn, 2, 5);
+ DecodeGPRRegisterClass(Inst, Rd, Address, Decoder);
+ DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(Insn, 7, 5);
+ unsigned Rs2 = fieldFromInstruction(Insn, 2, 5);
+ DecodeGPRRegisterClass(Inst, Rd, Address, Decoder);
+ Inst.addOperand(Inst.getOperand(0));
+ DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder);
+ return MCDisassembler::Success;
+}
+
DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index ee5f760ebcb0..f6b727ae37c7 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -30,9 +30,16 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCValue &Target) {
bool ShouldForce = false;
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default:
break;
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ case FK_Data_8:
+ if (Target.isAbsolute())
+ return false;
+ break;
case RISCV::fixup_riscv_got_hi20:
case RISCV::fixup_riscv_tls_got_hi20:
case RISCV::fixup_riscv_tls_gd_hi20:
@@ -48,7 +55,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
return false;
}
- switch ((unsigned)T->getKind()) {
+ switch (T->getTargetKind()) {
default:
llvm_unreachable("Unexpected fixup kind for pcrel_lo12");
break;
@@ -83,7 +90,7 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
return true;
int64_t Offset = int64_t(Value);
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default:
return false;
case RISCV::fixup_riscv_rvc_branch:
@@ -174,8 +181,7 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
MCContext &Ctx) {
- unsigned Kind = Fixup.getKind();
- switch (Kind) {
+ switch (Fixup.getTargetKind()) {
default:
llvm_unreachable("Unknown fixup kind!");
case RISCV::fixup_riscv_got_hi20:
@@ -186,6 +192,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
+ case FK_Data_6b:
return Value;
case RISCV::fixup_riscv_lo12_i:
case RISCV::fixup_riscv_pcrel_lo12_i:
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 3ccbc86d2619..cab2bbcb81bc 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/RISCVFixupKinds.h"
+#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
@@ -47,8 +48,9 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
+ const MCExpr *Expr = Fixup.getValue();
// Determine the type of the relocation
- unsigned Kind = Fixup.getKind();
+ unsigned Kind = Fixup.getTargetKind();
if (IsPCRel) {
switch (Kind) {
default:
@@ -87,6 +89,9 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
default:
llvm_unreachable("invalid fixup kind!");
case FK_Data_4:
+ if (Expr->getKind() == MCExpr::Target &&
+ cast<RISCVMCExpr>(Expr)->getKind() == RISCVMCExpr::VK_RISCV_32_PCREL)
+ return ELF::R_RISCV_32_PCREL;
return ELF::R_RISCV_32;
case FK_Data_8:
return ELF::R_RISCV_64;
@@ -98,6 +103,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_RISCV_ADD32;
case FK_Data_Add_8:
return ELF::R_RISCV_ADD64;
+ case FK_Data_Add_6b:
+ return ELF::R_RISCV_SET6;
case FK_Data_Sub_1:
return ELF::R_RISCV_SUB8;
case FK_Data_Sub_2:
@@ -106,6 +113,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_RISCV_SUB32;
case FK_Data_Sub_8:
return ELF::R_RISCV_SUB64;
+ case FK_Data_Sub_6b:
+ return ELF::R_RISCV_SUB6;
case RISCV::fixup_riscv_hi20:
return ELF::R_RISCV_HI20;
case RISCV::fixup_riscv_lo12_i:
@@ -129,5 +138,5 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
std::unique_ptr<MCObjectTargetWriter>
llvm::createRISCVELFObjectWriter(uint8_t OSABI, bool Is64Bit) {
- return llvm::make_unique<RISCVELFObjectWriter>(OSABI, Is64Bit);
+ return std::make_unique<RISCVELFObjectWriter>(OSABI, Is64Bit);
}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index fe37b70811d8..8b5fe6dd8252 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -39,6 +39,30 @@ static cl::opt<bool>
cl::desc("Disable the emission of assembler pseudo instructions"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ ArchRegNames("riscv-arch-reg-names",
+ cl::desc("Print architectural register names rather than the "
+ "ABI names (such as x2 instead of sp)"),
+ cl::init(false), cl::Hidden);
+
+// The command-line flags above are used by llvm-mc and llc. They can be used by
+// `llvm-objdump`, but we override their values here to handle options passed to
+// `llvm-objdump` with `-M` (which matches GNU objdump). There did not seem to
+// be an easier way to allow these options in all these tools, without doing it
+// this way.
+bool RISCVInstPrinter::applyTargetSpecificCLOption(StringRef Opt) {
+ if (Opt == "no-aliases") {
+ NoAliases = true;
+ return true;
+ }
+ if (Opt == "numeric") {
+ ArchRegNames = true;
+ return true;
+ }
+
+ return false;
+}
+
void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot, const MCSubtargetInfo &STI) {
bool Res = false;
@@ -112,3 +136,20 @@ void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
static_cast<RISCVFPRndMode::RoundingMode>(MI->getOperand(OpNo).getImm());
O << RISCVFPRndMode::roundingModeToString(FRMArg);
}
+
+void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+
+ assert(MO.isReg() && "printAtomicMemOp can only print register operands");
+ O << "(";
+ printRegName(O, MO.getReg());
+ O << ")";
+ return;
+}
+
+const char *RISCVInstPrinter::getRegisterName(unsigned RegNo) {
+ return getRegisterName(RegNo, ArchRegNames ? RISCV::NoRegAltName
+ : RISCV::ABIRegAltName);
+}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
index 5ca1d3fa20fe..189d72626f3e 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -25,6 +25,8 @@ public:
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
+ bool applyTargetSpecificCLOption(StringRef Opt) override;
+
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
void printRegName(raw_ostream &O, unsigned RegNo) const override;
@@ -37,6 +39,8 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printAtomicMemOp(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
@@ -46,8 +50,8 @@ public:
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
unsigned PrintMethodIdx,
const MCSubtargetInfo &STI, raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo,
- unsigned AltIdx = RISCV::ABIRegAltName);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getRegisterName(unsigned RegNo, unsigned AltIdx);
};
} // namespace llvm
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index 983629692883..089a2def4c21 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -11,7 +11,10 @@
//===----------------------------------------------------------------------===//
#include "RISCVMCAsmInfo.h"
+#include "MCTargetDesc/RISCVMCExpr.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/MC/MCStreamer.h"
using namespace llvm;
void RISCVMCAsmInfo::anchor() {}
@@ -25,3 +28,20 @@ RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
}
+
+const MCExpr *RISCVMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ if (!(Encoding & dwarf::DW_EH_PE_pcrel))
+ return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer);
+
+ // The default symbol subtraction results in an ADD/SUB relocation pair.
+ // Processing this relocation pair is problematic when linker relaxation is
+ // enabled, so we follow binutils in using the R_RISCV_32_PCREL relocation
+ // for the FDE initial location.
+ MCContext &Ctx = Streamer.getContext();
+ const MCExpr *ME =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
+ assert(Encoding & dwarf::DW_EH_PE_sdata4 && "Unexpected encoding");
+ return RISCVMCExpr::create(ME, RISCVMCExpr::VK_RISCV_32_PCREL, Ctx);
+}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
index 043fdb7c08c0..6824baf699aa 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
@@ -23,6 +23,9 @@ class RISCVMCAsmInfo : public MCAsmInfoELF {
public:
explicit RISCVMCAsmInfo(const Triple &TargetTriple);
+
+ const MCExpr *getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const override;
};
} // namespace llvm
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 0fc775f63ed4..de99960848a5 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -15,6 +15,7 @@
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "Utils/RISCVBaseInfo.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -100,7 +101,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
const MCSubtargetInfo &STI) const {
MCInst TmpInst;
MCOperand Func;
- unsigned Ra;
+ Register Ra;
if (MI.getOpcode() == RISCV::PseudoTAIL) {
Func = MI.getOperand(0);
Ra = RISCV::X6;
@@ -266,6 +267,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
switch (RVExpr->getKind()) {
case RISCVMCExpr::VK_RISCV_None:
case RISCVMCExpr::VK_RISCV_Invalid:
+ case RISCVMCExpr::VK_RISCV_32_PCREL:
llvm_unreachable("Unhandled fixup kind!");
case RISCVMCExpr::VK_RISCV_TPREL_ADD:
// tprel_add is only used to indicate that a relocation should be emitted
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
index b5a292dc1b1a..921df376f3df 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
@@ -36,6 +36,7 @@ public:
VK_RISCV_TLS_GD_HI,
VK_RISCV_CALL,
VK_RISCV_CALL_PLT,
+ VK_RISCV_32_PCREL,
VK_RISCV_Invalid
};
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index bc45262ab2de..5a4c86e48f1e 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -16,7 +16,9 @@
#include "RISCVMCAsmInfo.h"
#include "RISCVTargetStreamer.h"
#include "TargetInfo/RISCVTargetInfo.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -52,7 +54,7 @@ static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT) {
MCAsmInfo *MAI = new RISCVMCAsmInfo(TT);
- unsigned SP = MRI.getDwarfRegNum(RISCV::X2, true);
+ Register SP = MRI.getDwarfRegNum(RISCV::X2, true);
MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0);
MAI->addInitialFrameState(Inst);
diff --git a/lib/Target/RISCV/RISCV.h b/lib/Target/RISCV/RISCV.h
index 834a1d171143..f23f742a4782 100644
--- a/lib/Target/RISCV/RISCV.h
+++ b/lib/Target/RISCV/RISCV.h
@@ -18,9 +18,12 @@
#include "llvm/Target/TargetMachine.h"
namespace llvm {
+class RISCVRegisterBankInfo;
+class RISCVSubtarget;
class RISCVTargetMachine;
class AsmPrinter;
class FunctionPass;
+class InstructionSelector;
class MCInst;
class MCOperand;
class MachineInstr;
@@ -39,6 +42,10 @@ void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
FunctionPass *createRISCVExpandPseudoPass();
void initializeRISCVExpandPseudoPass(PassRegistry &);
+
+InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
+ RISCVSubtarget &,
+ RISCVRegisterBankInfo &);
}
#endif
diff --git a/lib/Target/RISCV/RISCV.td b/lib/Target/RISCV/RISCV.td
index e19b70b8e709..46530a8f74a8 100644
--- a/lib/Target/RISCV/RISCV.td
+++ b/lib/Target/RISCV/RISCV.td
@@ -43,6 +43,11 @@ def FeatureStdExtC
def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">,
AssemblerPredicate<"FeatureStdExtC">;
+def FeatureRVCHints
+ : SubtargetFeature<"rvc-hints", "EnableRVCHintInstrs", "true",
+ "Enable RVC Hint Instructions.">;
+def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">,
+ AssemblerPredicate<"FeatureRVCHints">;
def Feature64Bit
: SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">;
@@ -77,14 +82,16 @@ include "RISCVSystemOperands.td"
include "RISCVRegisterInfo.td"
include "RISCVCallingConv.td"
include "RISCVInstrInfo.td"
+include "RISCVRegisterBanks.td"
//===----------------------------------------------------------------------===//
// RISC-V processors supported.
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"generic-rv32", NoSchedModel, []>;
+def : ProcessorModel<"generic-rv32", NoSchedModel, [FeatureRVCHints]>;
-def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>;
+def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit,
+ FeatureRVCHints]>;
//===----------------------------------------------------------------------===//
// Define the RISC-V target.
diff --git a/lib/Target/RISCV/RISCVCallLowering.cpp b/lib/Target/RISCV/RISCVCallLowering.cpp
new file mode 100644
index 000000000000..c63a84739c4a
--- /dev/null
+++ b/lib/Target/RISCV/RISCVCallLowering.cpp
@@ -0,0 +1,50 @@
+//===-- RISCVCallLowering.cpp - Call lowering -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements the lowering of LLVM calls to machine code calls for
+/// GlobalISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVCallLowering.h"
+#include "RISCVISelLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+
+using namespace llvm;
+
+RISCVCallLowering::RISCVCallLowering(const RISCVTargetLowering &TLI)
+ : CallLowering(&TLI) {}
+
+bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
+ const Value *Val,
+ ArrayRef<Register> VRegs) const {
+
+ MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(RISCV::PseudoRET);
+
+ if (Val != nullptr) {
+ return false;
+ }
+ MIRBuilder.insertInstr(Ret);
+ return true;
+}
+
+bool RISCVCallLowering::lowerFormalArguments(
+ MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs) const {
+
+ if (F.arg_empty())
+ return true;
+
+ return false;
+}
+
+bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const {
+ return false;
+}
diff --git a/lib/Target/RISCV/RISCVCallLowering.h b/lib/Target/RISCV/RISCVCallLowering.h
new file mode 100644
index 000000000000..7ce074a61f0a
--- /dev/null
+++ b/lib/Target/RISCV/RISCVCallLowering.h
@@ -0,0 +1,42 @@
+//===-- RISCVCallLowering.h - Call lowering ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file describes how to lower LLVM calls to machine code calls.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVCALLLOWERING_H
+#define LLVM_LIB_TARGET_RISCV_RISCVCALLLOWERING_H
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+namespace llvm {
+
+class RISCVTargetLowering;
+
+class RISCVCallLowering : public CallLowering {
+
+public:
+ RISCVCallLowering(const RISCVTargetLowering &TLI);
+
+ bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
+ ArrayRef<Register> VRegs) const override;
+
+ bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs) const override;
+
+ bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_RISCV_RISCVCALLLOWERING_H
diff --git a/lib/Target/RISCV/RISCVCallingConv.td b/lib/Target/RISCV/RISCVCallingConv.td
index db13e6e8beca..025454f8fcca 100644
--- a/lib/Target/RISCV/RISCVCallingConv.td
+++ b/lib/Target/RISCV/RISCVCallingConv.td
@@ -18,11 +18,11 @@ def CSR_ILP32_LP64
def CSR_ILP32F_LP64F
: CalleeSavedRegs<(add CSR_ILP32_LP64,
- F8_32, F9_32, (sequence "F%u_32", 18, 27))>;
+ F8_F, F9_F, (sequence "F%u_F", 18, 27))>;
def CSR_ILP32D_LP64D
: CalleeSavedRegs<(add CSR_ILP32_LP64,
- F8_64, F9_64, (sequence "F%u_64", 18, 27))>;
+ F8_D, F9_D, (sequence "F%u_D", 18, 27))>;
// Needed for implementation of RISCVRegisterInfo::getNoPreservedMask()
def CSR_NoRegs : CalleeSavedRegs<(add)>;
@@ -43,12 +43,12 @@ def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add X1,
(sequence "X%u", 12, 17),
(sequence "X%u", 18, 27),
(sequence "X%u", 28, 31),
- (sequence "F%u_32", 0, 7),
- (sequence "F%u_32", 10, 11),
- (sequence "F%u_32", 12, 17),
- (sequence "F%u_32", 28, 31),
- (sequence "F%u_32", 8, 9),
- (sequence "F%u_32", 18, 27))>;
+ (sequence "F%u_F", 0, 7),
+ (sequence "F%u_F", 10, 11),
+ (sequence "F%u_F", 12, 17),
+ (sequence "F%u_F", 28, 31),
+ (sequence "F%u_F", 8, 9),
+ (sequence "F%u_F", 18, 27))>;
// Same as CSR_Interrupt, but including all 64-bit FP registers.
def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add X1,
@@ -57,9 +57,9 @@ def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add X1,
(sequence "X%u", 12, 17),
(sequence "X%u", 18, 27),
(sequence "X%u", 28, 31),
- (sequence "F%u_64", 0, 7),
- (sequence "F%u_64", 10, 11),
- (sequence "F%u_64", 12, 17),
- (sequence "F%u_64", 28, 31),
- (sequence "F%u_64", 8, 9),
- (sequence "F%u_64", 18, 27))>;
+ (sequence "F%u_D", 0, 7),
+ (sequence "F%u_D", 10, 11),
+ (sequence "F%u_D", 12, 17),
+ (sequence "F%u_D", 28, 31),
+ (sequence "F%u_D", 8, 9),
+ (sequence "F%u_D", 18, 27))>;
diff --git a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 1c5171a7b7a4..da5cd16e750c 100644
--- a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -235,10 +235,10 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
MachineBasicBlock *LoopMBB,
MachineBasicBlock *DoneMBB,
AtomicRMWInst::BinOp BinOp, int Width) {
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned ScratchReg = MI.getOperand(1).getReg();
- unsigned AddrReg = MI.getOperand(2).getReg();
- unsigned IncrReg = MI.getOperand(3).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register IncrReg = MI.getOperand(3).getReg();
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
@@ -271,9 +271,9 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
}
static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
- MachineBasicBlock *MBB, unsigned DestReg,
- unsigned OldValReg, unsigned NewValReg,
- unsigned MaskReg, unsigned ScratchReg) {
+ MachineBasicBlock *MBB, Register DestReg,
+ Register OldValReg, Register NewValReg,
+ Register MaskReg, Register ScratchReg) {
assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
@@ -297,11 +297,11 @@ static void doMaskedAtomicBinOpExpansion(
MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned ScratchReg = MI.getOperand(1).getReg();
- unsigned AddrReg = MI.getOperand(2).getReg();
- unsigned IncrReg = MI.getOperand(3).getReg();
- unsigned MaskReg = MI.getOperand(4).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register IncrReg = MI.getOperand(3).getReg();
+ Register MaskReg = MI.getOperand(4).getReg();
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
@@ -394,8 +394,8 @@ bool RISCVExpandPseudo::expandAtomicBinOp(
}
static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
- MachineBasicBlock *MBB, unsigned ValReg,
- unsigned ShamtReg) {
+ MachineBasicBlock *MBB, Register ValReg,
+ Register ShamtReg) {
BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
.addReg(ValReg)
.addReg(ShamtReg);
@@ -436,12 +436,12 @@ bool RISCVExpandPseudo::expandAtomicMinMaxOp(
DoneMBB->transferSuccessors(&MBB);
MBB.addSuccessor(LoopHeadMBB);
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned Scratch1Reg = MI.getOperand(1).getReg();
- unsigned Scratch2Reg = MI.getOperand(2).getReg();
- unsigned AddrReg = MI.getOperand(3).getReg();
- unsigned IncrReg = MI.getOperand(4).getReg();
- unsigned MaskReg = MI.getOperand(5).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
+ Register Scratch1Reg = MI.getOperand(1).getReg();
+ Register Scratch2Reg = MI.getOperand(2).getReg();
+ Register AddrReg = MI.getOperand(3).getReg();
+ Register IncrReg = MI.getOperand(4).getReg();
+ Register MaskReg = MI.getOperand(5).getReg();
bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
@@ -549,11 +549,11 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg(
DoneMBB->transferSuccessors(&MBB);
MBB.addSuccessor(LoopHeadMBB);
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned ScratchReg = MI.getOperand(1).getReg();
- unsigned AddrReg = MI.getOperand(2).getReg();
- unsigned CmpValReg = MI.getOperand(3).getReg();
- unsigned NewValReg = MI.getOperand(4).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register CmpValReg = MI.getOperand(3).getReg();
+ Register NewValReg = MI.getOperand(4).getReg();
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
@@ -582,7 +582,7 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg(
// lr.w dest, (addr)
// and scratch, dest, mask
// bne scratch, cmpval, done
- unsigned MaskReg = MI.getOperand(5).getReg();
+ Register MaskReg = MI.getOperand(5).getReg();
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
@@ -629,7 +629,7 @@ bool RISCVExpandPseudo::expandAuipcInstPair(
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
const MachineOperand &Symbol = MI.getOperand(1);
MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp
index 32c3b9684d2c..6b6f62e18ce9 100644
--- a/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -40,8 +40,16 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
uint64_t FrameSize = MFI.getStackSize();
// Get the alignment.
- uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment()
- : getStackAlignment();
+ unsigned StackAlign = getStackAlignment();
+ if (RI->needsStackRealignment(MF)) {
+ unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment());
+ FrameSize += (MaxStackAlign - StackAlign);
+ StackAlign = MaxStackAlign;
+ }
+
+ // Set Max Call Frame Size
+ uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign);
+ MFI.setMaxCallFrameSize(MaxCallSize);
// Make sure the frame is aligned.
FrameSize = alignTo(FrameSize, StackAlign);
@@ -52,8 +60,8 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, int64_t Val,
+ const DebugLoc &DL, Register DestReg,
+ Register SrcReg, int64_t Val,
MachineInstr::MIFlag Flag) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const RISCVInstrInfo *TII = STI.getInstrInfo();
@@ -66,7 +74,7 @@ void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB,
.addReg(SrcReg)
.addImm(Val)
.setMIFlag(Flag);
- } else if (isInt<32>(Val)) {
+ } else {
unsigned Opc = RISCV::ADD;
bool isSub = Val < 0;
if (isSub) {
@@ -74,22 +82,20 @@ void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB,
Opc = RISCV::SUB;
}
- unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- TII->movImm32(MBB, MBBI, DL, ScratchReg, Val, Flag);
+ Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag);
BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
.addReg(SrcReg)
.addReg(ScratchReg, RegState::Kill)
.setMIFlag(Flag);
- } else {
- report_fatal_error("adjustReg cannot yet handle adjustments >32 bits");
}
}
// Returns the register used to hold the frame pointer.
-static unsigned getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; }
+static Register getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; }
// Returns the register used to hold the stack pointer.
-static unsigned getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; }
+static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; }
void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
@@ -101,8 +107,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
const RISCVInstrInfo *TII = STI.getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
- unsigned FPReg = getFPReg(STI);
- unsigned SPReg = getSPReg(STI);
+ if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) {
+ report_fatal_error(
+ "RISC-V backend can't currently handle functions that need stack "
+ "realignment and have variable sized objects");
+ }
+
+ Register FPReg = getFPReg(STI);
+ Register SPReg = getSPReg(STI);
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -119,6 +131,11 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
if (StackSize == 0 && !MFI.adjustsStack())
return;
+ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
+ // Split the SP adjustment to reduce the offsets of callee saved spill.
+ if (FirstSPAdjustAmount)
+ StackSize = FirstSPAdjustAmount;
+
// Allocate space on the stack if necessary.
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup);
@@ -141,7 +158,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// directives.
for (const auto &Entry : CSI) {
int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx());
- unsigned Reg = Entry.getReg();
+ Register Reg = Entry.getReg();
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, RI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
@@ -159,6 +176,45 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
+
+ // Emit the second SP adjustment after saving callee saved registers.
+ if (FirstSPAdjustAmount) {
+ uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount;
+ assert(SecondSPAdjustAmount > 0 &&
+ "SecondSPAdjustAmount should be greater than zero");
+ adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount,
+ MachineInstr::FrameSetup);
+ // Emit ".cfi_def_cfa_offset StackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
+ if (hasFP(MF)) {
+ // Realign Stack
+ const RISCVRegisterInfo *RI = STI.getRegisterInfo();
+ if (RI->needsStackRealignment(MF)) {
+ unsigned MaxAlignment = MFI.getMaxAlignment();
+
+ const RISCVInstrInfo *TII = STI.getInstrInfo();
+ if (isInt<12>(-(int)MaxAlignment)) {
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg)
+ .addReg(SPReg)
+ .addImm(-(int)MaxAlignment);
+ } else {
+ unsigned ShiftAmount = countTrailingZeros(MaxAlignment);
+ Register VR =
+ MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR)
+ .addReg(SPReg)
+ .addImm(ShiftAmount);
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg)
+ .addReg(VR)
+ .addImm(ShiftAmount);
+ }
+ }
+ }
}
void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -169,8 +225,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
DebugLoc DL = MBBI->getDebugLoc();
const RISCVInstrInfo *TII = STI.getInstrInfo();
- unsigned FPReg = getFPReg(STI);
- unsigned SPReg = getSPReg(STI);
+ Register FPReg = getFPReg(STI);
+ Register SPReg = getSPReg(STI);
// Skip to before the restores of callee-saved registers
// FIXME: assumes exactly one instruction is used to restore each
@@ -189,11 +245,29 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
MachineInstr::FrameDestroy);
}
+ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
+ if (FirstSPAdjustAmount) {
+ uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount;
+ assert(SecondSPAdjustAmount > 0 &&
+ "SecondSPAdjustAmount should be greater than zero");
+
+ adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg, SecondSPAdjustAmount,
+ MachineInstr::FrameDestroy);
+
+ // Emit ".cfi_def_cfa_offset FirstSPAdjustAmount"
+ unsigned CFIIndex =
+ MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr,
+ -FirstSPAdjustAmount));
+ BuildMI(MBB, LastFrameDestroy, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
if (hasFP(MF)) {
// To find the instruction restoring FP from stack.
for (auto &I = LastFrameDestroy; I != MBBI; ++I) {
if (I->mayLoad() && I->getOperand(0).isReg()) {
- unsigned DestReg = I->getOperand(0).getReg();
+ Register DestReg = I->getOperand(0).getReg();
if (DestReg == FPReg) {
// If there is frame pointer, after restoring $fp registers, we
// need adjust CFA to ($sp - FPOffset).
@@ -214,13 +288,16 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
// Iterate over list of callee-saved registers and emit .cfi_restore
// directives.
for (const auto &Entry : CSI) {
- unsigned Reg = Entry.getReg();
+ Register Reg = Entry.getReg();
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
nullptr, RI->getDwarfRegNum(Reg, true)));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
+ if (FirstSPAdjustAmount)
+ StackSize = FirstSPAdjustAmount;
+
// Deallocate stack
adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy);
@@ -249,6 +326,8 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea() +
MFI.getOffsetAdjustment();
+ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
+
if (CSI.size()) {
MinCSFI = CSI[0].getFrameIdx();
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
@@ -256,6 +335,17 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
if (FI >= MinCSFI && FI <= MaxCSFI) {
FrameReg = RISCV::X2;
+
+ if (FirstSPAdjustAmount)
+ Offset += FirstSPAdjustAmount;
+ else
+ Offset += MF.getFrameInfo().getStackSize();
+ } else if (RI->needsStackRealignment(MF)) {
+ assert(!MFI.hasVarSizedObjects() &&
+ "Unexpected combination of stack realignment and varsized objects");
+ // If the stack was realigned, the frame pointer is set in order to allow
+ // SP to be restored, but we still access stack objects using SP.
+ FrameReg = RISCV::X2;
Offset += MF.getFrameInfo().getStackSize();
} else {
FrameReg = RI->getFrameRegister(MF);
@@ -338,7 +428,7 @@ bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
- unsigned SPReg = RISCV::X2;
+ Register SPReg = RISCV::X2;
DebugLoc DL = MI->getDebugLoc();
if (!hasReservedCallFrame(MF)) {
@@ -362,3 +452,39 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
return MBB.erase(MI);
}
+
+// We would like to split the SP adjustment to reduce prologue/epilogue
+// as following instructions. In this way, the offset of the callee saved
+// register could fit in a single store.
+// add sp,sp,-2032
+// sw ra,2028(sp)
+// sw s0,2024(sp)
+// sw s1,2020(sp)
+// sw s3,2012(sp)
+// sw s4,2008(sp)
+// add sp,sp,-64
+uint64_t
+RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ uint64_t StackSize = MFI.getStackSize();
+ uint64_t StackAlign = getStackAlignment();
+
+ // FIXME: Disable SplitSPAdjust if save-restore libcall enabled when the patch
+ // landing. The callee saved registers will be pushed by the
+ // save-restore libcalls, so we don't have to split the SP adjustment
+ // in this case.
+ //
+ // Return the FirstSPAdjustAmount if the StackSize can not fit in signed
+ // 12-bit and there exists a callee saved register need to be pushed.
+ if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
+ // FirstSPAdjustAmount is choosed as (2048 - StackAlign)
+ // because 2048 will cause sp = sp + 2048 in epilogue split into
+ // multi-instructions. The offset smaller than 2048 can fit in signle
+ // load/store instruction and we have to stick with the stack alignment.
+ // 2048 is 16-byte alignment. The stack alignment for RV32 and RV64 is 16,
+ // for RV32E is 4. So (2048 - StackAlign) will satisfy the stack alignment.
+ return 2048 - StackAlign;
+ }
+ return 0;
+}
diff --git a/lib/Target/RISCV/RISCVFrameLowering.h b/lib/Target/RISCV/RISCVFrameLowering.h
index 0e045c3ff853..f4a5949773d9 100644
--- a/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/lib/Target/RISCV/RISCVFrameLowering.h
@@ -22,7 +22,7 @@ class RISCVFrameLowering : public TargetFrameLowering {
public:
explicit RISCVFrameLowering(const RISCVSubtarget &STI)
: TargetFrameLowering(StackGrowsDown,
- /*StackAlignment=*/16,
+ /*StackAlignment=*/Align(16),
/*LocalAreaOffset=*/0),
STI(STI) {}
@@ -45,13 +45,18 @@ public:
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+ // Get the first stack adjustment amount for SplitSPAdjust.
+ // Return 0 if we don't want to to split the SP adjustment in prologue and
+ // epilogue.
+ uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const;
+
protected:
const RISCVSubtarget &STI;
private:
void determineFrameLayout(MachineFunction &MF) const;
void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, Register DestReg, Register SrcReg,
int64_t Val, MachineInstr::MIFlag Flag) const;
};
}
diff --git a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index d0a3af375a6d..1a12d9177d2a 100644
--- a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -68,7 +68,7 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
RISCVMatInt::InstSeq Seq;
RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64, Seq);
- SDNode *Result;
+ SDNode *Result = nullptr;
SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
for (RISCVMatInt::Inst &Inst : Seq) {
SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
@@ -179,6 +179,9 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
// operand and need no special handling.
OutOps.push_back(Op);
return false;
+ case InlineAsm::Constraint_A:
+ OutOps.push_back(Op);
+ return false;
default:
break;
}
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index ce7b85911ab6..dc829fce9013 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -100,6 +100,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::ADD, MVT::i32, Custom);
+ setOperationAction(ISD::SUB, MVT::i32, Custom);
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i32, Custom);
@@ -116,6 +118,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
+ setOperationAction(ISD::MUL, MVT::i32, Custom);
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Custom);
@@ -194,8 +197,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setBooleanContents(ZeroOrOneBooleanContent);
- // Function alignments (log2).
- unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2;
+ // Function alignments.
+ const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
setMinFunctionAlignment(FunctionAlignment);
setPrefFunctionAlignment(FunctionAlignment);
@@ -231,7 +234,7 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = 4;
+ Info.align = Align(4);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
@@ -660,7 +663,7 @@ SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setFrameAddressIsTaken(true);
- unsigned FrameReg = RI.getFrameRegister(MF);
+ Register FrameReg = RI.getFrameRegister(MF);
int XLenInBytes = Subtarget.getXLen() / 8;
EVT VT = Op.getValueType();
@@ -703,7 +706,7 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
// Return the value of the return address register, marking it an implicit
// live-in.
- unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
+ Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
}
@@ -834,6 +837,18 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
}
+// Converts the given 32-bit operation to a i64 operation with signed extension
+// semantic to reduce the signed extension instructions.
+static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
+ SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
+ DAG.getValueType(MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
+}
+
void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -854,6 +869,15 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(RCW.getValue(2));
break;
}
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ if (N->getOperand(1).getOpcode() == ISD::Constant)
+ return;
+ Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
+ break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
@@ -1007,12 +1031,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
// We can materialise `c1 << c2` into an add immediate, so it's "free",
// and the combine should happen, to potentially allow further combines
// later.
- if (isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
+ if (ShiftedC1Int.getMinSignedBits() <= 64 &&
+ isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
return true;
// We can materialise `c1` in an add immediate, so it's "free", and the
// combine should be prevented.
- if (isLegalAddImmediate(C1Int.getSExtValue()))
+ if (C1Int.getMinSignedBits() <= 64 &&
+ isLegalAddImmediate(C1Int.getSExtValue()))
return false;
// Neither constant will fit into an immediate, so find materialisation
@@ -1052,8 +1078,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
-MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
- MachineBasicBlock *BB) {
+static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
+ MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
// To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
@@ -1085,9 +1111,9 @@ MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
BB->addSuccessor(LoopMBB);
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
- unsigned LoReg = MI.getOperand(0).getReg();
- unsigned HiReg = MI.getOperand(1).getReg();
+ Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
+ Register LoReg = MI.getOperand(0).getReg();
+ Register HiReg = MI.getOperand(1).getReg();
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
@@ -1122,9 +1148,9 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
- unsigned LoReg = MI.getOperand(0).getReg();
- unsigned HiReg = MI.getOperand(1).getReg();
- unsigned SrcReg = MI.getOperand(2).getReg();
+ Register LoReg = MI.getOperand(0).getReg();
+ Register HiReg = MI.getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
@@ -1154,9 +1180,9 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned LoReg = MI.getOperand(1).getReg();
- unsigned HiReg = MI.getOperand(2).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register LoReg = MI.getOperand(1).getReg();
+ Register HiReg = MI.getOperand(2).getReg();
const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
@@ -1215,12 +1241,12 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
// previous selects in the sequence.
// These conditions could be further relaxed. See the X86 target for a
// related approach and more information.
- unsigned LHS = MI.getOperand(1).getReg();
- unsigned RHS = MI.getOperand(2).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
SmallVector<MachineInstr *, 4> SelectDebugValues;
- SmallSet<unsigned, 4> SelectDests;
+ SmallSet<Register, 4> SelectDests;
SelectDests.insert(MI.getOperand(0).getReg());
MachineInstr *LastSelectPseudo = &MI;
@@ -1363,12 +1389,12 @@ static const MCPhysReg ArgGPRs[] = {
RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
};
static const MCPhysReg ArgFPR32s[] = {
- RISCV::F10_32, RISCV::F11_32, RISCV::F12_32, RISCV::F13_32,
- RISCV::F14_32, RISCV::F15_32, RISCV::F16_32, RISCV::F17_32
+ RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
+ RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
};
static const MCPhysReg ArgFPR64s[] = {
- RISCV::F10_64, RISCV::F11_64, RISCV::F12_64, RISCV::F13_64,
- RISCV::F14_64, RISCV::F15_64, RISCV::F16_64, RISCV::F17_64
+ RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
+ RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
};
// Pass a 2*XLEN argument that has been split into two XLEN values through
@@ -1378,7 +1404,7 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
MVT ValVT2, MVT LocVT2,
ISD::ArgFlagsTy ArgFlags2) {
unsigned XLenInBytes = XLen / 8;
- if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
+ if (Register Reg = State.AllocateReg(ArgGPRs)) {
// At least one half can be passed via register.
State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
VA1.getLocVT(), CCValAssign::Full));
@@ -1395,7 +1421,7 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
return false;
}
- if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
+ if (Register Reg = State.AllocateReg(ArgGPRs)) {
// The second half can also be passed via register.
State.addLoc(
CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
@@ -1495,7 +1521,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
// GPRs, split between a GPR and the stack, or passed completely on the
// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
// cases.
- unsigned Reg = State.AllocateReg(ArgGPRs);
+ Register Reg = State.AllocateReg(ArgGPRs);
LocVT = MVT::i32;
if (!Reg) {
unsigned StackOffset = State.AllocateStack(8, 8);
@@ -1537,7 +1563,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
}
// Allocate to a register if possible, or else a stack slot.
- unsigned Reg;
+ Register Reg;
if (ValVT == MVT::f32 && !UseGPRForF32)
Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s);
else if (ValVT == MVT::f64 && !UseGPRForF64)
@@ -1673,7 +1699,7 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
break;
}
- unsigned VReg = RegInfo.createVirtualRegister(RC);
+ Register VReg = RegInfo.createVirtualRegister(RC);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
@@ -1751,7 +1777,7 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
assert(VA.isRegLoc() && "Expected register VA assignment");
- unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
+ Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
SDValue Hi;
@@ -1763,13 +1789,70 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
MachinePointerInfo::getFixedStack(MF, FI));
} else {
// Second half of f64 is passed in another GPR.
- unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
+ Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
}
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}
+// FastCC has less than 1% performance improvement for some particular
+// benchmark. But theoretically, it may has benenfit for some cases.
+static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (LocVT == MVT::i32 || LocVT == MVT::i64) {
+ // X5 and X6 might be used for save-restore libcall.
+ static const MCPhysReg GPRList[] = {
+ RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
+ RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
+ RISCV::X29, RISCV::X30, RISCV::X31};
+ if (unsigned Reg = State.AllocateReg(GPRList)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::f32) {
+ static const MCPhysReg FPR32List[] = {
+ RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
+ RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
+ RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
+ RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
+ if (unsigned Reg = State.AllocateReg(FPR32List)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::f64) {
+ static const MCPhysReg FPR64List[] = {
+ RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
+ RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
+ RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
+ RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
+ if (unsigned Reg = State.AllocateReg(FPR64List)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+ unsigned Offset4 = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
+ return false;
+ }
+
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+ unsigned Offset5 = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
// Transform physical registers into virtual registers.
SDValue RISCVTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
@@ -1809,7 +1892,11 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
- analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
+
+ if (CallConv == CallingConv::Fast)
+ CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
+ else
+ analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -1877,8 +1964,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
// ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
// offsets to even-numbered registered remain 2*XLEN-aligned.
if (Idx % 2) {
- FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes,
- true);
+ MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
VarArgsSaveSize += XLenInBytes;
}
@@ -1886,7 +1972,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
// to the vararg save area.
for (unsigned I = Idx; I < ArgRegs.size();
++I, VaArgOffset += XLenInBytes) {
- const unsigned Reg = RegInfo.createVirtualRegister(RC);
+ const Register Reg = RegInfo.createVirtualRegister(RC);
RegInfo.addLiveIn(ArgRegs[I], Reg);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
@@ -1920,7 +2006,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
auto &Callee = CLI.Callee;
auto CalleeCC = CLI.CallConv;
- auto IsVarArg = CLI.IsVarArg;
auto &Outs = CLI.Outs;
auto &Caller = MF.getFunction();
auto CallerCC = Caller.getCallingConv();
@@ -1937,10 +2022,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
if (Caller.hasFnAttribute("interrupt"))
return false;
- // Do not tail call opt functions with varargs.
- if (IsVarArg)
- return false;
-
// Do not tail call opt if the stack is used to pass parameters.
if (CCInfo.getNextStackOffset() != 0)
return false;
@@ -2015,7 +2096,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
- analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
+
+ if (CallConv == CallingConv::Fast)
+ ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
+ else
+ analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
// Check if it's really possible to do a tail call.
if (IsTailCall)
@@ -2057,7 +2142,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
// Copy argument values to their designated locations.
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -2074,7 +2159,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
- unsigned RegLo = VA.getLocReg();
+ Register RegLo = VA.getLocReg();
RegsToPass.push_back(std::make_pair(RegLo, Lo));
if (RegLo == RISCV::X17) {
@@ -2087,7 +2172,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
} else {
// Second half of f64 is passed in another GPR.
- unsigned RegHigh = RegLo + 1;
+ assert(RegLo < RISCV::X31 && "Invalid register pair");
+ Register RegHigh = RegLo + 1;
RegsToPass.push_back(std::make_pair(RegHigh, Hi));
}
continue;
@@ -2302,8 +2388,9 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
DAG.getVTList(MVT::i32, MVT::i32), Val);
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
- unsigned RegLo = VA.getLocReg();
- unsigned RegHi = RegLo + 1;
+ Register RegLo = VA.getLocReg();
+ assert(RegLo < RISCV::X31 && "Invalid register pair");
+ Register RegHi = RegLo + 1;
Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
@@ -2397,6 +2484,27 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+RISCVTargetLowering::ConstraintType
+RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ break;
+ case 'f':
+ return C_RegisterClass;
+ case 'I':
+ case 'J':
+ case 'K':
+ return C_Immediate;
+ case 'A':
+ return C_Memory;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
std::pair<unsigned, const TargetRegisterClass *>
RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
@@ -2407,14 +2515,125 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
switch (Constraint[0]) {
case 'r':
return std::make_pair(0U, &RISCV::GPRRegClass);
+ case 'f':
+ if (Subtarget.hasStdExtF() && VT == MVT::f32)
+ return std::make_pair(0U, &RISCV::FPR32RegClass);
+ if (Subtarget.hasStdExtD() && VT == MVT::f64)
+ return std::make_pair(0U, &RISCV::FPR64RegClass);
+ break;
default:
break;
}
}
+ // Clang will correctly decode the usage of register name aliases into their
+ // official names. However, other frontends like `rustc` do not. This allows
+ // users of these frontends to use the ABI names for registers in LLVM-style
+ // register constraints.
+ Register XRegFromAlias = StringSwitch<Register>(Constraint.lower())
+ .Case("{zero}", RISCV::X0)
+ .Case("{ra}", RISCV::X1)
+ .Case("{sp}", RISCV::X2)
+ .Case("{gp}", RISCV::X3)
+ .Case("{tp}", RISCV::X4)
+ .Case("{t0}", RISCV::X5)
+ .Case("{t1}", RISCV::X6)
+ .Case("{t2}", RISCV::X7)
+ .Cases("{s0}", "{fp}", RISCV::X8)
+ .Case("{s1}", RISCV::X9)
+ .Case("{a0}", RISCV::X10)
+ .Case("{a1}", RISCV::X11)
+ .Case("{a2}", RISCV::X12)
+ .Case("{a3}", RISCV::X13)
+ .Case("{a4}", RISCV::X14)
+ .Case("{a5}", RISCV::X15)
+ .Case("{a6}", RISCV::X16)
+ .Case("{a7}", RISCV::X17)
+ .Case("{s2}", RISCV::X18)
+ .Case("{s3}", RISCV::X19)
+ .Case("{s4}", RISCV::X20)
+ .Case("{s5}", RISCV::X21)
+ .Case("{s6}", RISCV::X22)
+ .Case("{s7}", RISCV::X23)
+ .Case("{s8}", RISCV::X24)
+ .Case("{s9}", RISCV::X25)
+ .Case("{s10}", RISCV::X26)
+ .Case("{s11}", RISCV::X27)
+ .Case("{t3}", RISCV::X28)
+ .Case("{t4}", RISCV::X29)
+ .Case("{t5}", RISCV::X30)
+ .Case("{t6}", RISCV::X31)
+ .Default(RISCV::NoRegister);
+ if (XRegFromAlias != RISCV::NoRegister)
+ return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
+
+ // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
+ // TableGen record rather than the AsmName to choose registers for InlineAsm
+ // constraints, plus we want to match those names to the widest floating point
+ // register type available, manually select floating point registers here.
+ //
+ // The second case is the ABI name of the register, so that frontends can also
+ // use the ABI names in register constraint lists.
+ if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) {
+ std::pair<Register, Register> FReg =
+ StringSwitch<std::pair<Register, Register>>(Constraint.lower())
+ .Cases("{f0}", "{ft0}", {RISCV::F0_F, RISCV::F0_D})
+ .Cases("{f1}", "{ft1}", {RISCV::F1_F, RISCV::F1_D})
+ .Cases("{f2}", "{ft2}", {RISCV::F2_F, RISCV::F2_D})
+ .Cases("{f3}", "{ft3}", {RISCV::F3_F, RISCV::F3_D})
+ .Cases("{f4}", "{ft4}", {RISCV::F4_F, RISCV::F4_D})
+ .Cases("{f5}", "{ft5}", {RISCV::F5_F, RISCV::F5_D})
+ .Cases("{f6}", "{ft6}", {RISCV::F6_F, RISCV::F6_D})
+ .Cases("{f7}", "{ft7}", {RISCV::F7_F, RISCV::F7_D})
+ .Cases("{f8}", "{fs0}", {RISCV::F8_F, RISCV::F8_D})
+ .Cases("{f9}", "{fs1}", {RISCV::F9_F, RISCV::F9_D})
+ .Cases("{f10}", "{fa0}", {RISCV::F10_F, RISCV::F10_D})
+ .Cases("{f11}", "{fa1}", {RISCV::F11_F, RISCV::F11_D})
+ .Cases("{f12}", "{fa2}", {RISCV::F12_F, RISCV::F12_D})
+ .Cases("{f13}", "{fa3}", {RISCV::F13_F, RISCV::F13_D})
+ .Cases("{f14}", "{fa4}", {RISCV::F14_F, RISCV::F14_D})
+ .Cases("{f15}", "{fa5}", {RISCV::F15_F, RISCV::F15_D})
+ .Cases("{f16}", "{fa6}", {RISCV::F16_F, RISCV::F16_D})
+ .Cases("{f17}", "{fa7}", {RISCV::F17_F, RISCV::F17_D})
+ .Cases("{f18}", "{fs2}", {RISCV::F18_F, RISCV::F18_D})
+ .Cases("{f19}", "{fs3}", {RISCV::F19_F, RISCV::F19_D})
+ .Cases("{f20}", "{fs4}", {RISCV::F20_F, RISCV::F20_D})
+ .Cases("{f21}", "{fs5}", {RISCV::F21_F, RISCV::F21_D})
+ .Cases("{f22}", "{fs6}", {RISCV::F22_F, RISCV::F22_D})
+ .Cases("{f23}", "{fs7}", {RISCV::F23_F, RISCV::F23_D})
+ .Cases("{f24}", "{fs8}", {RISCV::F24_F, RISCV::F24_D})
+ .Cases("{f25}", "{fs9}", {RISCV::F25_F, RISCV::F25_D})
+ .Cases("{f26}", "{fs10}", {RISCV::F26_F, RISCV::F26_D})
+ .Cases("{f27}", "{fs11}", {RISCV::F27_F, RISCV::F27_D})
+ .Cases("{f28}", "{ft8}", {RISCV::F28_F, RISCV::F28_D})
+ .Cases("{f29}", "{ft9}", {RISCV::F29_F, RISCV::F29_D})
+ .Cases("{f30}", "{ft10}", {RISCV::F30_F, RISCV::F30_D})
+ .Cases("{f31}", "{ft11}", {RISCV::F31_F, RISCV::F31_D})
+ .Default({RISCV::NoRegister, RISCV::NoRegister});
+ if (FReg.first != RISCV::NoRegister)
+ return Subtarget.hasStdExtD()
+ ? std::make_pair(FReg.second, &RISCV::FPR64RegClass)
+ : std::make_pair(FReg.first, &RISCV::FPR32RegClass);
+ }
+
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
+unsigned
+RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
+ // Currently only support length 1 constraints.
+ if (ConstraintCode.size() == 1) {
+ switch (ConstraintCode[0]) {
+ case 'A':
+ return InlineAsm::Constraint_A;
+ default:
+ break;
+ }
+ }
+
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+}
+
void RISCVTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
@@ -2619,3 +2838,13 @@ unsigned RISCVTargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
return RISCV::X11;
}
+
+bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
+ // Return false to suppress the unnecessary extensions if the LibCall
+ // arguments or return value is f32 type for LP64 ABI.
+ RISCVABI::ABI ABI = Subtarget.getTargetABI();
+ if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
+ return false;
+
+ return true;
+}
diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h
index 17db03bbb69e..18fc7350bbbf 100644
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@@ -92,6 +92,10 @@ public:
// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
+
+ unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
+
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
@@ -141,6 +145,8 @@ public:
unsigned
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+ bool shouldExtendTypeInLibCall(EVT Type) const override;
+
private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/RISCV/RISCVInstrInfo.cpp b/lib/Target/RISCV/RISCVInstrInfo.cpp
index 99c8d2ef73de..084839299530 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -14,6 +14,7 @@
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
+#include "Utils/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -28,8 +29,9 @@
using namespace llvm;
-RISCVInstrInfo::RISCVInstrInfo()
- : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP) {}
+RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
+ : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
+ STI(STI) {}
unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
@@ -156,24 +158,43 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(Opcode), DstReg).addFrameIndex(FI).addImm(0);
}
-void RISCVInstrInfo::movImm32(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DstReg, uint64_t Val,
- MachineInstr::MIFlag Flag) const {
- assert(isInt<32>(Val) && "Can only materialize 32-bit constants");
-
- // TODO: If the value can be materialized using only one instruction, only
- // insert a single instruction.
-
- uint64_t Hi20 = ((Val + 0x800) >> 12) & 0xfffff;
- uint64_t Lo12 = SignExtend64<12>(Val);
- BuildMI(MBB, MBBI, DL, get(RISCV::LUI), DstReg)
- .addImm(Hi20)
- .setMIFlag(Flag);
- BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
- .addReg(DstReg, RegState::Kill)
- .addImm(Lo12)
- .setMIFlag(Flag);
+void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register DstReg, uint64_t Val,
+ MachineInstr::MIFlag Flag) const {
+ MachineFunction *MF = MBB.getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ bool IsRV64 = MF->getSubtarget<RISCVSubtarget>().is64Bit();
+ Register SrcReg = RISCV::X0;
+ Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ unsigned Num = 0;
+
+ if (!IsRV64 && !isInt<32>(Val))
+ report_fatal_error("Should only materialize 32-bit constants for RV32");
+
+ RISCVMatInt::InstSeq Seq;
+ RISCVMatInt::generateInstSeq(Val, IsRV64, Seq);
+ assert(Seq.size() > 0);
+
+ for (RISCVMatInt::Inst &Inst : Seq) {
+ // Write the final result to DstReg if it's the last instruction in the Seq.
+ // Otherwise, write the result to the temp register.
+ if (++Num == Seq.size())
+ Result = DstReg;
+
+ if (Inst.Opc == RISCV::LUI) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result)
+ .addImm(Inst.Imm)
+ .setMIFlag(Flag);
+ } else {
+ BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(Inst.Imm)
+ .setMIFlag(Flag);
+ }
+ // Only the first instruction has X0 as its source.
+ SrcReg = Result;
+ }
}
// The contents of values added to Cond are not examined outside of
@@ -372,7 +393,7 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// FIXME: A virtual register must be used initially, as the register
// scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
// uses the same workaround).
- unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
auto II = MBB.end();
MachineInstr &LuiMI = *BuildMI(MBB, II, DL, get(RISCV::LUI), ScratchReg)
@@ -466,3 +487,58 @@ bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
}
return MI.isAsCheapAsAMove();
}
+
+bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
+ StringRef &ErrInfo) const {
+ const MCInstrInfo *MCII = STI.getInstrInfo();
+ MCInstrDesc const &Desc = MCII->get(MI.getOpcode());
+
+ for (auto &OI : enumerate(Desc.operands())) {
+ unsigned OpType = OI.value().OperandType;
+ if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
+ OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
+ const MachineOperand &MO = MI.getOperand(OI.index());
+ if (MO.isImm()) {
+ int64_t Imm = MO.getImm();
+ bool Ok;
+ switch (OpType) {
+ default:
+ llvm_unreachable("Unexpected operand type");
+ case RISCVOp::OPERAND_UIMM4:
+ Ok = isUInt<4>(Imm);
+ break;
+ case RISCVOp::OPERAND_UIMM5:
+ Ok = isUInt<5>(Imm);
+ break;
+ case RISCVOp::OPERAND_UIMM12:
+ Ok = isUInt<12>(Imm);
+ break;
+ case RISCVOp::OPERAND_SIMM12:
+ Ok = isInt<12>(Imm);
+ break;
+ case RISCVOp::OPERAND_SIMM13_LSB0:
+ Ok = isShiftedInt<12, 1>(Imm);
+ break;
+ case RISCVOp::OPERAND_UIMM20:
+ Ok = isUInt<20>(Imm);
+ break;
+ case RISCVOp::OPERAND_SIMM21_LSB0:
+ Ok = isShiftedInt<20, 1>(Imm);
+ break;
+ case RISCVOp::OPERAND_UIMMLOG2XLEN:
+ if (STI.getTargetTriple().isArch64Bit())
+ Ok = isUInt<6>(Imm);
+ else
+ Ok = isUInt<5>(Imm);
+ break;
+ }
+ if (!Ok) {
+ ErrInfo = "Invalid immediate";
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
diff --git a/lib/Target/RISCV/RISCVInstrInfo.h b/lib/Target/RISCV/RISCVInstrInfo.h
index ff098e660d19..d3ae04aefe04 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/lib/Target/RISCV/RISCVInstrInfo.h
@@ -21,10 +21,12 @@
namespace llvm {
+class RISCVSubtarget;
+
class RISCVInstrInfo : public RISCVGenInstrInfo {
public:
- RISCVInstrInfo();
+ explicit RISCVInstrInfo(RISCVSubtarget &STI);
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
@@ -46,10 +48,10 @@ public:
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- // Materializes the given int32 Val into DstReg.
- void movImm32(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DstReg, uint64_t Val,
- MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
+ // Materializes the given integer Val into DstReg.
+ void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register DstReg, uint64_t Val,
+ MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
@@ -80,6 +82,12 @@ public:
int64_t BrOffset) const override;
bool isAsCheapAsAMove(const MachineInstr &MI) const override;
+
+ bool verifyInstruction(const MachineInstr &MI,
+ StringRef &ErrInfo) const override;
+
+protected:
+ const RISCVSubtarget &STI;
};
}
#endif
diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td
index 69bde15f1218..db2ecc49d14e 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/lib/Target/RISCV/RISCVInstrInfo.td
@@ -69,6 +69,12 @@ class ImmAsmOperand<string prefix, int width, string suffix> : AsmOperandClass {
let DiagnosticType = !strconcat("Invalid", Name);
}
+def ImmZeroAsmOperand : AsmOperandClass {
+ let Name = "ImmZero";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = !strconcat("Invalid", Name);
+}
+
class SImmAsmOperand<int width, string suffix = "">
: ImmAsmOperand<"S", width, suffix> {
}
@@ -87,6 +93,8 @@ def fencearg : Operand<XLenVT> {
let ParserMatchClass = FenceArg;
let PrintMethod = "printFenceArg";
let DecoderMethod = "decodeUImmOperand<4>";
+ let OperandType = "OPERAND_UIMM4";
+ let OperandNamespace = "RISCVOp";
}
def UImmLog2XLenAsmOperand : AsmOperandClass {
@@ -111,11 +119,15 @@ def uimmlog2xlen : Operand<XLenVT>, ImmLeaf<XLenVT, [{
return isUInt<6>(Imm);
return isUInt<5>(Imm);
}];
+ let OperandType = "OPERAND_UIMMLOG2XLEN";
+ let OperandNamespace = "RISCVOp";
}
def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<5>;
let DecoderMethod = "decodeUImmOperand<5>";
+ let OperandType = "OPERAND_UIMM5";
+ let OperandNamespace = "RISCVOp";
}
def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
@@ -128,6 +140,8 @@ def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
return isInt<12>(Imm);
return MCOp.isBareSymbolRef();
}];
+ let OperandType = "OPERAND_SIMM12";
+ let OperandNamespace = "RISCVOp";
}
// A 13-bit signed immediate where the least significant bit is zero.
@@ -141,6 +155,8 @@ def simm13_lsb0 : Operand<OtherVT> {
return isShiftedInt<12, 1>(Imm);
return MCOp.isBareSymbolRef();
}];
+ let OperandType = "OPERAND_SIMM13_LSB0";
+ let OperandNamespace = "RISCVOp";
}
class UImm20Operand : Operand<XLenVT> {
@@ -152,6 +168,8 @@ class UImm20Operand : Operand<XLenVT> {
return isUInt<20>(Imm);
return MCOp.isBareSymbolRef();
}];
+ let OperandType = "OPERAND_UIMM20";
+ let OperandNamespace = "RISCVOp";
}
def uimm20_lui : UImm20Operand {
@@ -176,6 +194,8 @@ def simm21_lsb0_jal : Operand<OtherVT> {
return isShiftedInt<20, 1>(Imm);
return MCOp.isBareSymbolRef();
}];
+ let OperandType = "OPERAND_SIMM21_LSB0";
+ let OperandNamespace = "RISCVOp";
}
def BareSymbol : AsmOperandClass {
@@ -224,6 +244,8 @@ def csr_sysreg : Operand<XLenVT> {
let ParserMatchClass = CSRSystemRegister;
let PrintMethod = "printCSRSystemRegister";
let DecoderMethod = "decodeUImmOperand<12>";
+ let OperandType = "OPERAND_UIMM12";
+ let OperandNamespace = "RISCVOp";
}
// A parameterized register class alternative to i32imm/i64imm from Target.td.
diff --git a/lib/Target/RISCV/RISCVInstrInfoA.td b/lib/Target/RISCV/RISCVInstrInfoA.td
index b768c9347b38..38ba3f9fb24e 100644
--- a/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -12,14 +12,32 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+// A parse method for (${gpr}) or 0(${gpr}), where the 0 is be silently ignored.
+// Used for GNU as Compatibility.
+def AtomicMemOpOperand : AsmOperandClass {
+ let Name = "AtomicMemOpOperand";
+ let RenderMethod = "addRegOperands";
+ let PredicateMethod = "isReg";
+ let ParserMethod = "parseAtomicMemOp";
+}
+
+def GPRMemAtomic : RegisterOperand<GPR> {
+ let ParserMatchClass = AtomicMemOpOperand;
+ let PrintMethod = "printAtomicMemOp";
+}
+
+//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
class LR_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
: RVInstRAtomic<0b00010, aq, rl, funct3, OPC_AMO,
- (outs GPR:$rd), (ins GPR:$rs1),
- opcodestr, "$rd, (${rs1})"> {
+ (outs GPR:$rd), (ins GPRMemAtomic:$rs1),
+ opcodestr, "$rd, $rs1"> {
let rs2 = 0;
}
@@ -33,8 +51,8 @@ multiclass LR_r_aq_rl<bits<3> funct3, string opcodestr> {
let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in
class AMO_rr<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr>
: RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO,
- (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
- opcodestr, "$rd, $rs2, (${rs1})">;
+ (outs GPR:$rd), (ins GPRMemAtomic:$rs1, GPR:$rs2),
+ opcodestr, "$rd, $rs2, $rs1">;
multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> {
def "" : AMO_rr<funct5, 0, 0, funct3, opcodestr>;
@@ -196,12 +214,12 @@ class PseudoMaskedAMOUMinUMax
}
class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
- : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering),
+ : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
- imm:$ordering),
+ timm:$ordering),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
imm:$ordering)>;
@@ -270,7 +288,7 @@ def PseudoMaskedCmpXchg32
}
def : Pat<(int_riscv_masked_cmpxchg_i32
- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
@@ -347,7 +365,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>;
def : Pat<(int_riscv_masked_cmpxchg_i64
- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
} // Predicates = [HasStdExtA, IsRV64]
diff --git a/lib/Target/RISCV/RISCVInstrInfoC.td b/lib/Target/RISCV/RISCVInstrInfoC.td
index 94477341eea7..fa0050f107b2 100644
--- a/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -61,6 +61,11 @@ def simm6nonzero : Operand<XLenVT>,
}];
}
+def immzero : Operand<XLenVT>,
+ ImmLeaf<XLenVT, [{return (Imm == 0);}]> {
+ let ParserMatchClass = ImmZeroAsmOperand;
+}
+
def CLUIImmAsmOperand : AsmOperandClass {
let Name = "CLUIImm";
let RenderMethod = "addImmOperands";
@@ -132,7 +137,8 @@ def uimm8_lsb000 : Operand<XLenVT>,
}
// A 9-bit signed immediate where the least significant bit is zero.
-def simm9_lsb0 : Operand<OtherVT> {
+def simm9_lsb0 : Operand<OtherVT>,
+ ImmLeaf<XLenVT, [{return isShiftedInt<8, 1>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<9, "Lsb0">;
let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<9>";
@@ -191,7 +197,8 @@ def simm10_lsb0000nonzero : Operand<XLenVT>,
}
// A 12-bit signed immediate where the least significant bit is zero.
-def simm12_lsb0 : Operand<XLenVT> {
+def simm12_lsb0 : Operand<XLenVT>,
+ ImmLeaf<XLenVT, [{return isShiftedInt<11, 1>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<12, "Lsb0">;
let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<12>";
@@ -344,7 +351,10 @@ def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000> {
}
let rd = 0, imm = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">;
+def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">
+{
+ let Inst{6-2} = 0;
+}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
@@ -354,6 +364,15 @@ def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
let Inst{6-2} = imm{4-0};
}
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+def C_ADDI_NOP : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb),
+ (ins GPRX0:$rd, immzero:$imm),
+ "c.addi", "$rd, $imm"> {
+ let Constraints = "$rd = $rd_wb";
+ let Inst{6-2} = 0;
+ let isAsmParserOnly = 1;
+}
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1,
DecoderNamespace = "RISCV32Only_", Defs = [X1],
Predicates = [HasStdExtC, IsRV32] in
@@ -523,6 +542,105 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> {
} // Predicates = [HasStdExtC]
//===----------------------------------------------------------------------===//
+// HINT Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0,
+ mayStore = 0 in
+{
+
+let rd = 0 in
+def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm),
+ "c.nop", "$imm"> {
+ let Inst{6-2} = imm{4-0};
+ let DecoderMethod = "decodeRVCInstrSImm";
+}
+
+// Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6.
+def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb),
+ (ins GPRX0:$rd, simm6nonzero:$imm),
+ "c.addi", "$rd, $imm"> {
+ let Constraints = "$rd = $rd_wb";
+ let Inst{6-2} = imm{4-0};
+ let isAsmParserOnly = 1;
+}
+
+def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
+ (ins GPRNoX0:$rd, immzero:$imm),
+ "c.addi", "$rd, $imm"> {
+ let Constraints = "$rd = $rd_wb";
+ let Inst{6-2} = 0;
+ let isAsmParserOnly = 1;
+}
+
+def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm),
+ "c.li", "$rd, $imm"> {
+ let Inst{6-2} = imm{4-0};
+ let Inst{11-7} = 0;
+ let DecoderMethod = "decodeRVCInstrRdSImm";
+}
+
+def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd),
+ (ins c_lui_imm:$imm),
+ "c.lui", "$rd, $imm"> {
+ let Inst{6-2} = imm{4-0};
+ let Inst{11-7} = 0;
+ let DecoderMethod = "decodeRVCInstrRdSImm";
+}
+
+def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2),
+ "c.mv", "$rs1, $rs2">
+{
+ let Inst{11-7} = 0;
+ let DecoderMethod = "decodeRVCInstrRdRs2";
+}
+
+def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb),
+ (ins GPRX0:$rs1, GPRNoX0:$rs2),
+ "c.add", "$rs1, $rs2"> {
+ let Constraints = "$rs1 = $rs1_wb";
+ let Inst{11-7} = 0;
+ let DecoderMethod = "decodeRVCInstrRdRs1Rs2";
+}
+
+def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb),
+ (ins GPRX0:$rd, uimmlog2xlennonzero:$imm),
+ "c.slli" ,"$rd, $imm"> {
+ let Constraints = "$rd = $rd_wb";
+ let Inst{6-2} = imm{4-0};
+ let Inst{11-7} = 0;
+ let DecoderMethod = "decodeRVCInstrRdRs1UImm";
+}
+
+def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd),
+ "c.slli64" ,"$rd"> {
+ let Constraints = "$rd = $rd_wb";
+ let Inst{6-2} = 0;
+ let Inst{12} = 0;
+}
+
+def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb),
+ (ins GPRC:$rd),
+ "c.srli64", "$rd"> {
+ let Constraints = "$rd = $rd_wb";
+ let Inst{6-2} = 0;
+ let Inst{11-10} = 0;
+ let Inst{12} = 0;
+}
+
+def C_SRAI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb),
+ (ins GPRC:$rd),
+ "c.srai64", "$rd"> {
+ let Constraints = "$rd = $rd_wb";
+ let Inst{6-2} = 0;
+ let Inst{11-10} = 1;
+ let Inst{12} = 0;
+}
+
+} // Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0,
+ // mayStore = 0
+
+//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/RISCV/RISCVInstrInfoF.td b/lib/Target/RISCV/RISCVInstrInfoF.td
index 032642942f2b..3b73c865ea17 100644
--- a/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -227,6 +227,12 @@ def : InstAlias<"frcsr $rd", (CSRRS GPR:$rd, FCSR.Encoding, X0), 2>;
def : InstAlias<"fscsr $rd, $rs", (CSRRW GPR:$rd, FCSR.Encoding, GPR:$rs)>;
def : InstAlias<"fscsr $rs", (CSRRW X0, FCSR.Encoding, GPR:$rs), 2>;
+// frsr, fssr are obsolete aliases replaced by frcsr, fscsr, so give them
+// zero weight.
+def : InstAlias<"frsr $rd", (CSRRS GPR:$rd, FCSR.Encoding, X0), 0>;
+def : InstAlias<"fssr $rd, $rs", (CSRRW GPR:$rd, FCSR.Encoding, GPR:$rs), 0>;
+def : InstAlias<"fssr $rs", (CSRRW X0, FCSR.Encoding, GPR:$rs), 0>;
+
def : InstAlias<"frrm $rd", (CSRRS GPR:$rd, FRM.Encoding, X0), 2>;
def : InstAlias<"fsrm $rd, $rs", (CSRRW GPR:$rd, FRM.Encoding, GPR:$rs)>;
def : InstAlias<"fsrm $rs", (CSRRW X0, FRM.Encoding, GPR:$rs), 2>;
diff --git a/lib/Target/RISCV/RISCVInstructionSelector.cpp b/lib/Target/RISCV/RISCVInstructionSelector.cpp
new file mode 100644
index 000000000000..5bd09a546114
--- /dev/null
+++ b/lib/Target/RISCV/RISCVInstructionSelector.cpp
@@ -0,0 +1,103 @@
+//===-- RISCVInstructionSelector.cpp -----------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for
+/// RISCV.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "RISCVRegisterBankInfo.h"
+#include "RISCVSubtarget.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "riscv-isel"
+
+using namespace llvm;
+
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "RISCVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
+namespace {
+
+class RISCVInstructionSelector : public InstructionSelector {
+public:
+ RISCVInstructionSelector(const RISCVTargetMachine &TM,
+ const RISCVSubtarget &STI,
+ const RISCVRegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) override;
+ static const char *getName() { return DEBUG_TYPE; }
+
+private:
+ bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+
+ const RISCVSubtarget &STI;
+ const RISCVInstrInfo &TII;
+ const RISCVRegisterInfo &TRI;
+ const RISCVRegisterBankInfo &RBI;
+
+ // FIXME: This is necessary because DAGISel uses "Subtarget->" and GlobalISel
+ // uses "STI." in the code generated by TableGen. We need to unify the name of
+ // Subtarget variable.
+ const RISCVSubtarget *Subtarget = &STI;
+
+#define GET_GLOBALISEL_PREDICATES_DECL
+#include "RISCVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_DECL
+
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "RISCVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+
+} // end anonymous namespace
+
+#define GET_GLOBALISEL_IMPL
+#include "RISCVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
+
+RISCVInstructionSelector::RISCVInstructionSelector(
+ const RISCVTargetMachine &TM, const RISCVSubtarget &STI,
+ const RISCVRegisterBankInfo &RBI)
+ : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI),
+
+#define GET_GLOBALISEL_PREDICATES_INIT
+#include "RISCVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_INIT
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "RISCVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
+
+bool RISCVInstructionSelector::select(MachineInstr &I) {
+
+ if (!isPreISelGenericOpcode(I.getOpcode())) {
+ // Certain non-generic instructions also need some special handling.
+ return true;
+ }
+
+ if (selectImpl(I, *CoverageInfo))
+ return true;
+
+ return false;
+}
+
+namespace llvm {
+InstructionSelector *
+createRISCVInstructionSelector(const RISCVTargetMachine &TM,
+ RISCVSubtarget &Subtarget,
+ RISCVRegisterBankInfo &RBI) {
+ return new RISCVInstructionSelector(TM, Subtarget, RBI);
+}
+} // end namespace llvm
diff --git a/lib/Target/RISCV/RISCVLegalizerInfo.cpp b/lib/Target/RISCV/RISCVLegalizerInfo.cpp
new file mode 100644
index 000000000000..c92f4a3ee17b
--- /dev/null
+++ b/lib/Target/RISCV/RISCVLegalizerInfo.cpp
@@ -0,0 +1,23 @@
+//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for RISCV.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "RISCVLegalizerInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
+
+using namespace llvm;
+
+RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) {
+ computeTables();
+}
diff --git a/lib/Target/RISCV/RISCVLegalizerInfo.h b/lib/Target/RISCV/RISCVLegalizerInfo.h
new file mode 100644
index 000000000000..f2c2b9a3fd46
--- /dev/null
+++ b/lib/Target/RISCV/RISCVLegalizerInfo.h
@@ -0,0 +1,28 @@
+//===-- RISCVLegalizerInfo.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for RISCV.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_RISCV_RISCVMACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class RISCVSubtarget;
+
+/// This class provides the information for the target register banks.
+class RISCVLegalizerInfo : public LegalizerInfo {
+public:
+ RISCVLegalizerInfo(const RISCVSubtarget &ST);
+};
+} // end namespace llvm
+#endif
diff --git a/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 82b1209cb8e7..4c9013aa1e23 100644
--- a/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -45,7 +45,7 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
bool detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI);
void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail,
int64_t Offset);
- bool matchLargeOffset(MachineInstr &TailAdd, unsigned GSReg, int64_t &Offset);
+ bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset);
RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
MachineFunctionProperties getRequiredProperties() const override {
@@ -85,7 +85,7 @@ bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI,
HiLUI.getOperand(1).getOffset() != 0 ||
!MRI->hasOneUse(HiLUI.getOperand(0).getReg()))
return false;
- unsigned HiLuiDestReg = HiLUI.getOperand(0).getReg();
+ Register HiLuiDestReg = HiLUI.getOperand(0).getReg();
LoADDI = MRI->use_begin(HiLuiDestReg)->getParent();
if (LoADDI->getOpcode() != RISCV::ADDI ||
LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO ||
@@ -132,12 +132,12 @@ void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI,
// \ /
// TailAdd: add vreg4, vreg2, voff
bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
- unsigned GAReg,
+ Register GAReg,
int64_t &Offset) {
assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
- unsigned Rs = TailAdd.getOperand(1).getReg();
- unsigned Rt = TailAdd.getOperand(2).getReg();
- unsigned Reg = Rs == GAReg ? Rt : Rs;
+ Register Rs = TailAdd.getOperand(1).getReg();
+ Register Rt = TailAdd.getOperand(2).getReg();
+ Register Reg = Rs == GAReg ? Rt : Rs;
// Can't fold if the register has more than one use.
if (!MRI->hasOneUse(Reg))
@@ -178,7 +178,7 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
MachineInstr &LoADDI) {
- unsigned DestReg = LoADDI.getOperand(0).getReg();
+ Register DestReg = LoADDI.getOperand(0).getReg();
assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI");
// LoADDI has only one use.
MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent();
@@ -232,7 +232,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
return false;
// Register defined by LoADDI should be used in the base part of the
// load\store instruction. Otherwise, no folding possible.
- unsigned BaseAddrReg = Tail.getOperand(1).getReg();
+ Register BaseAddrReg = Tail.getOperand(1).getReg();
if (DestReg != BaseAddrReg)
return false;
MachineOperand &TailImmOp = Tail.getOperand(2);
diff --git a/lib/Target/RISCV/RISCVRegisterBankInfo.cpp b/lib/Target/RISCV/RISCVRegisterBankInfo.cpp
new file mode 100644
index 000000000000..bd3b95a98b9f
--- /dev/null
+++ b/lib/Target/RISCV/RISCVRegisterBankInfo.cpp
@@ -0,0 +1,26 @@
+//===-- RISCVRegisterBankInfo.cpp -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for RISCV.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "RISCVRegisterBankInfo.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "RISCVGenRegisterBank.inc"
+
+using namespace llvm;
+
+RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI)
+ : RISCVGenRegisterBankInfo() {}
diff --git a/lib/Target/RISCV/RISCVRegisterBankInfo.h b/lib/Target/RISCV/RISCVRegisterBankInfo.h
new file mode 100644
index 000000000000..05fac992734d
--- /dev/null
+++ b/lib/Target/RISCV/RISCVRegisterBankInfo.h
@@ -0,0 +1,37 @@
+//===-- RISCVRegisterBankInfo.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the RegisterBankInfo class for RISCV.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVREGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_RISCV_RISCVREGISTERBANKINFO_H
+
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+
+#define GET_REGBANK_DECLARATIONS
+#include "RISCVGenRegisterBank.inc"
+
+namespace llvm {
+
+class TargetRegisterInfo;
+
+class RISCVGenRegisterBankInfo : public RegisterBankInfo {
+protected:
+#define GET_TARGET_REGBANK_CLASS
+#include "RISCVGenRegisterBank.inc"
+};
+
+/// This class provides the information for the target register banks.
+class RISCVRegisterBankInfo final : public RISCVGenRegisterBankInfo {
+public:
+ RISCVRegisterBankInfo(const TargetRegisterInfo &TRI);
+};
+} // end namespace llvm
+#endif
diff --git a/lib/Target/RISCV/RISCVRegisterBanks.td b/lib/Target/RISCV/RISCVRegisterBanks.td
new file mode 100644
index 000000000000..400b65a1bf9a
--- /dev/null
+++ b/lib/Target/RISCV/RISCVRegisterBanks.td
@@ -0,0 +1,13 @@
+//=-- RISCVRegisterBank.td - Describe the RISCV Banks --------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers: X.
+def GPRRegBank : RegisterBank<"GPRB", [GPR]>;
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.cpp b/lib/Target/RISCV/RISCVRegisterInfo.cpp
index e6a126e3e513..66557687c0b6 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -26,6 +26,15 @@
using namespace llvm;
+static_assert(RISCV::X1 == RISCV::X0 + 1, "Register list not consecutive");
+static_assert(RISCV::X31 == RISCV::X0 + 31, "Register list not consecutive");
+static_assert(RISCV::F1_F == RISCV::F0_F + 1, "Register list not consecutive");
+static_assert(RISCV::F31_F == RISCV::F0_F + 31,
+ "Register list not consecutive");
+static_assert(RISCV::F1_D == RISCV::F0_D + 1, "Register list not consecutive");
+static_assert(RISCV::F31_D == RISCV::F0_D + 31,
+ "Register list not consecutive");
+
RISCVRegisterInfo::RISCVRegisterInfo(unsigned HwMode)
: RISCVGenRegisterInfo(RISCV::X1, /*DwarfFlavour*/0, /*EHFlavor*/0,
/*PC*/0, HwMode) {}
@@ -109,8 +118,8 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(isInt<32>(Offset) && "Int32 expected");
// The offset won't fit in an immediate, so use a scratch register instead
// Modify Offset and FrameReg appropriately
- unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- TII->movImm32(MBB, II, DL, ScratchReg, Offset);
+ Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ TII->movImm(MBB, II, DL, ScratchReg, Offset);
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), ScratchReg)
.addReg(FrameReg)
.addReg(ScratchReg, RegState::Kill);
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.h b/lib/Target/RISCV/RISCVRegisterInfo.h
index 4f339475508f..56a50fe6ddc0 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -52,6 +52,12 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
bool trackLivenessAfterRegAlloc(const MachineFunction &) const override {
return true;
}
+
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind = 0) const override {
+ return &RISCV::GPRRegClass;
+ }
};
}
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.td b/lib/Target/RISCV/RISCVRegisterInfo.td
index 79f8ab12f6c0..82b37afd0805 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -101,6 +101,12 @@ def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add
[RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>;
}
+def GPRX0 : RegisterClass<"RISCV", [XLenVT], 32, (add X0)> {
+ let RegInfos = RegInfoByHwMode<
+ [RV32, RV64, DefaultMode],
+ [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>;
+}
+
// The order of registers represents the preferred allocation sequence.
// Registers are listed in the order caller-save, callee-save, specials.
def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (add
@@ -159,41 +165,41 @@ def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> {
// Floating point registers
let RegAltNameIndices = [ABIRegAltName] in {
- def F0_32 : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>;
- def F1_32 : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>;
- def F2_32 : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>;
- def F3_32 : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>;
- def F4_32 : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>;
- def F5_32 : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>;
- def F6_32 : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>;
- def F7_32 : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>;
- def F8_32 : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>;
- def F9_32 : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>;
- def F10_32 : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>;
- def F11_32 : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>;
- def F12_32 : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>;
- def F13_32 : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>;
- def F14_32 : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>;
- def F15_32 : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>;
- def F16_32 : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>;
- def F17_32 : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>;
- def F18_32 : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>;
- def F19_32 : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>;
- def F20_32 : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>;
- def F21_32 : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>;
- def F22_32 : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>;
- def F23_32 : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>;
- def F24_32 : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>;
- def F25_32 : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>;
- def F26_32 : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>;
- def F27_32 : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>;
- def F28_32 : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>;
- def F29_32 : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>;
- def F30_32 : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>;
- def F31_32 : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>;
+ def F0_F : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>;
+ def F1_F : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>;
+ def F2_F : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>;
+ def F3_F : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>;
+ def F4_F : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>;
+ def F5_F : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>;
+ def F6_F : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>;
+ def F7_F : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>;
+ def F8_F : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>;
+ def F9_F : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>;
+ def F10_F : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>;
+ def F11_F : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>;
+ def F12_F : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>;
+ def F13_F : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>;
+ def F14_F : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>;
+ def F15_F : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>;
+ def F16_F : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>;
+ def F17_F : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>;
+ def F18_F : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>;
+ def F19_F : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>;
+ def F20_F : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>;
+ def F21_F : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>;
+ def F22_F : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>;
+ def F23_F : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>;
+ def F24_F : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>;
+ def F25_F : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>;
+ def F26_F : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>;
+ def F27_F : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>;
+ def F28_F : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>;
+ def F29_F : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>;
+ def F30_F : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>;
+ def F31_F : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>;
foreach Index = 0-31 in {
- def F#Index#_64 : RISCVReg64<!cast<RISCVReg32>("F"#Index#"_32")>,
+ def F#Index#_D : RISCVReg64<!cast<RISCVReg32>("F"#Index#"_F")>,
DwarfRegNum<[!add(Index, 32)]>;
}
}
@@ -201,29 +207,29 @@ let RegAltNameIndices = [ABIRegAltName] in {
// The order of registers represents the preferred allocation sequence,
// meaning caller-save regs are listed before callee-save.
def FPR32 : RegisterClass<"RISCV", [f32], 32, (add
- (sequence "F%u_32", 0, 7),
- (sequence "F%u_32", 10, 17),
- (sequence "F%u_32", 28, 31),
- (sequence "F%u_32", 8, 9),
- (sequence "F%u_32", 18, 27)
+ (sequence "F%u_F", 0, 7),
+ (sequence "F%u_F", 10, 17),
+ (sequence "F%u_F", 28, 31),
+ (sequence "F%u_F", 8, 9),
+ (sequence "F%u_F", 18, 27)
)>;
def FPR32C : RegisterClass<"RISCV", [f32], 32, (add
- (sequence "F%u_32", 10, 15),
- (sequence "F%u_32", 8, 9)
+ (sequence "F%u_F", 10, 15),
+ (sequence "F%u_F", 8, 9)
)>;
// The order of registers represents the preferred allocation sequence,
// meaning caller-save regs are listed before callee-save.
def FPR64 : RegisterClass<"RISCV", [f64], 64, (add
- (sequence "F%u_64", 0, 7),
- (sequence "F%u_64", 10, 17),
- (sequence "F%u_64", 28, 31),
- (sequence "F%u_64", 8, 9),
- (sequence "F%u_64", 18, 27)
+ (sequence "F%u_D", 0, 7),
+ (sequence "F%u_D", 10, 17),
+ (sequence "F%u_D", 28, 31),
+ (sequence "F%u_D", 8, 9),
+ (sequence "F%u_D", 18, 27)
)>;
def FPR64C : RegisterClass<"RISCV", [f64], 64, (add
- (sequence "F%u_64", 10, 15),
- (sequence "F%u_64", 8, 9)
+ (sequence "F%u_D", 10, 15),
+ (sequence "F%u_D", 8, 9)
)>;
diff --git a/lib/Target/RISCV/RISCVSubtarget.cpp b/lib/Target/RISCV/RISCVSubtarget.cpp
index 6902ed75d852..f114c6ac1925 100644
--- a/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -12,7 +12,11 @@
#include "RISCVSubtarget.h"
#include "RISCV.h"
+#include "RISCVCallLowering.h"
#include "RISCVFrameLowering.h"
+#include "RISCVLegalizerInfo.h"
+#include "RISCVRegisterBankInfo.h"
+#include "RISCVTargetMachine.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -47,4 +51,28 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
StringRef ABIName, const TargetMachine &TM)
: RISCVGenSubtargetInfo(TT, CPU, FS),
FrameLowering(initializeSubtargetDependencies(TT, CPU, FS, ABIName)),
- InstrInfo(), RegInfo(getHwMode()), TLInfo(TM, *this) {}
+ InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {
+ CallLoweringInfo.reset(new RISCVCallLowering(*getTargetLowering()));
+ Legalizer.reset(new RISCVLegalizerInfo(*this));
+
+ auto *RBI = new RISCVRegisterBankInfo(*getRegisterInfo());
+ RegBankInfo.reset(RBI);
+ InstSelector.reset(createRISCVInstructionSelector(
+ *static_cast<const RISCVTargetMachine *>(&TM), *this, *RBI));
+}
+
+const CallLowering *RISCVSubtarget::getCallLowering() const {
+ return CallLoweringInfo.get();
+}
+
+InstructionSelector *RISCVSubtarget::getInstructionSelector() const {
+ return InstSelector.get();
+}
+
+const LegalizerInfo *RISCVSubtarget::getLegalizerInfo() const {
+ return Legalizer.get();
+}
+
+const RegisterBankInfo *RISCVSubtarget::getRegBankInfo() const {
+ return RegBankInfo.get();
+}
diff --git a/lib/Target/RISCV/RISCVSubtarget.h b/lib/Target/RISCV/RISCVSubtarget.h
index 106ff49f021a..7d0373a5253a 100644
--- a/lib/Target/RISCV/RISCVSubtarget.h
+++ b/lib/Target/RISCV/RISCVSubtarget.h
@@ -17,6 +17,10 @@
#include "RISCVISelLowering.h"
#include "RISCVInstrInfo.h"
#include "Utils/RISCVBaseInfo.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -38,6 +42,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasRV64 = false;
bool IsRV32E = false;
bool EnableLinkerRelax = false;
+ bool EnableRVCHintInstrs = false;
unsigned XLen = 32;
MVT XLenVT = MVT::i32;
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
@@ -75,6 +80,7 @@ public:
const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
+ bool enableMachineScheduler() const override { return true; }
bool hasStdExtM() const { return HasStdExtM; }
bool hasStdExtA() const { return HasStdExtA; }
bool hasStdExtF() const { return HasStdExtF; }
@@ -83,9 +89,23 @@ public:
bool is64Bit() const { return HasRV64; }
bool isRV32E() const { return IsRV32E; }
bool enableLinkerRelax() const { return EnableLinkerRelax; }
+ bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; }
MVT getXLenVT() const { return XLenVT; }
unsigned getXLen() const { return XLen; }
RISCVABI::ABI getTargetABI() const { return TargetABI; }
+
+protected:
+ // GlobalISel related APIs.
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
+public:
+ const CallLowering *getCallLowering() const override;
+ InstructionSelector *getInstructionSelector() const override;
+ const LegalizerInfo *getLegalizerInfo() const override;
+ const RegisterBankInfo *getRegBankInfo() const override;
};
} // End llvm namespace
diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp
index f4e6ed9f6284..5ffc6eda6bd7 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -17,6 +17,10 @@
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -30,6 +34,7 @@ extern "C" void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
auto PR = PassRegistry::getPassRegistry();
+ initializeGlobalISel(*PR);
initializeRISCVExpandPseudoPass(*PR);
}
@@ -58,7 +63,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- TLOF(make_unique<RISCVELFTargetObjectFile>()),
+ TLOF(std::make_unique<RISCVELFTargetObjectFile>()),
Subtarget(TT, CPU, FS, Options.MCOptions.getABIName(), *this) {
initAsmInfo();
}
@@ -80,6 +85,10 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
+ bool addIRTranslator() override;
+ bool addLegalizeMachineIR() override;
+ bool addRegBankSelect() override;
+ bool addGlobalInstructionSelect() override;
void addPreEmitPass() override;
void addPreEmitPass2() override;
void addPreRegAlloc() override;
@@ -101,6 +110,26 @@ bool RISCVPassConfig::addInstSelector() {
return false;
}
+bool RISCVPassConfig::addIRTranslator() {
+ addPass(new IRTranslator());
+ return false;
+}
+
+bool RISCVPassConfig::addLegalizeMachineIR() {
+ addPass(new Legalizer());
+ return false;
+}
+
+bool RISCVPassConfig::addRegBankSelect() {
+ addPass(new RegBankSelect());
+ return false;
+}
+
+bool RISCVPassConfig::addGlobalInstructionSelect() {
+ addPass(new InstructionSelect());
+ return false;
+}
+
void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
void RISCVPassConfig::addPreEmitPass2() {
diff --git a/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/lib/Target/RISCV/Utils/RISCVBaseInfo.h
index c33c72f24319..30e475e80a01 100644
--- a/lib/Target/RISCV/Utils/RISCVBaseInfo.h
+++ b/lib/Target/RISCV/Utils/RISCVBaseInfo.h
@@ -16,6 +16,7 @@
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/SubtargetFeature.h"
namespace llvm {
@@ -63,6 +64,21 @@ enum {
};
} // namespace RISCVII
+namespace RISCVOp {
+enum OperandType : unsigned {
+ OPERAND_FIRST_RISCV_IMM = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_UIMM4 = OPERAND_FIRST_RISCV_IMM,
+ OPERAND_UIMM5,
+ OPERAND_UIMM12,
+ OPERAND_SIMM12,
+ OPERAND_SIMM13_LSB0,
+ OPERAND_UIMM20,
+ OPERAND_SIMM21_LSB0,
+ OPERAND_UIMMLOG2XLEN,
+ OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN
+};
+} // namespace RISCVOp
+
// Describes the predecessor/successor bits used in the FENCE instruction.
namespace RISCVFenceField {
enum FenceField {
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 15453ae59a4f..f6be9dd01249 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -376,7 +376,7 @@ public:
}
static std::unique_ptr<SparcOperand> CreateToken(StringRef Str, SMLoc S) {
- auto Op = make_unique<SparcOperand>(k_Token);
+ auto Op = std::make_unique<SparcOperand>(k_Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -386,7 +386,7 @@ public:
static std::unique_ptr<SparcOperand> CreateReg(unsigned RegNum, unsigned Kind,
SMLoc S, SMLoc E) {
- auto Op = make_unique<SparcOperand>(k_Register);
+ auto Op = std::make_unique<SparcOperand>(k_Register);
Op->Reg.RegNum = RegNum;
Op->Reg.Kind = (SparcOperand::RegisterKind)Kind;
Op->StartLoc = S;
@@ -396,7 +396,7 @@ public:
static std::unique_ptr<SparcOperand> CreateImm(const MCExpr *Val, SMLoc S,
SMLoc E) {
- auto Op = make_unique<SparcOperand>(k_Immediate);
+ auto Op = std::make_unique<SparcOperand>(k_Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -481,7 +481,7 @@ public:
static std::unique_ptr<SparcOperand>
CreateMEMr(unsigned Base, SMLoc S, SMLoc E) {
- auto Op = make_unique<SparcOperand>(k_MemoryReg);
+ auto Op = std::make_unique<SparcOperand>(k_MemoryReg);
Op->Mem.Base = Base;
Op->Mem.OffsetReg = Sparc::G0; // always 0
Op->Mem.Off = nullptr;
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index f1ca8e18c228..db8e7850300f 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -253,7 +253,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
if (!MO.isReg())
continue; // skip
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (MO.isDef()) {
// check whether Reg is defined or used before delay slot.
@@ -324,7 +324,7 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0)
continue;
if (MO.isDef())
@@ -380,7 +380,7 @@ static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI,
//
// After : restore <op0>, <op1>, %o[0-7]
- unsigned reg = AddMI->getOperand(0).getReg();
+ Register reg = AddMI->getOperand(0).getReg();
if (reg < SP::I0 || reg > SP::I7)
return false;
@@ -408,7 +408,7 @@ static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI,
//
// After : restore <op0>, <op1>, %o[0-7]
- unsigned reg = OrMI->getOperand(0).getReg();
+ Register reg = OrMI->getOperand(0).getReg();
if (reg < SP::I0 || reg > SP::I7)
return false;
@@ -446,7 +446,7 @@ static bool combineRestoreSETHIi(MachineBasicBlock::iterator RestoreMI,
//
// After : restore %g0, (imm3<<10), %o[0-7]
- unsigned reg = SetHiMI->getOperand(0).getReg();
+ Register reg = SetHiMI->getOperand(0).getReg();
if (reg < SP::I0 || reg > SP::I7)
return false;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 88547075c5ae..c97a30e634cc 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -49,7 +49,7 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx,
}
if (IsPCRel) {
- switch((unsigned)Fixup.getKind()) {
+ switch(Fixup.getTargetKind()) {
default:
llvm_unreachable("Unimplemented fixup -> relocation");
case FK_Data_1: return ELF::R_SPARC_DISP8;
@@ -65,7 +65,7 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx,
}
}
- switch((unsigned)Fixup.getKind()) {
+ switch(Fixup.getTargetKind()) {
default:
llvm_unreachable("Unimplemented fixup -> relocation");
case FK_Data_1: return ELF::R_SPARC_8;
@@ -135,5 +135,5 @@ bool SparcELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
std::unique_ptr<MCObjectTargetWriter>
llvm::createSparcELFObjectWriter(bool Is64Bit, uint8_t OSABI) {
- return llvm::make_unique<SparcELFObjectWriter>(Is64Bit, OSABI);
+ return std::make_unique<SparcELFObjectWriter>(Is64Bit, OSABI);
}
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 1834a6fd861d..0f74f2bb344c 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -34,7 +34,8 @@ DisableLeafProc("disable-sparc-leaf-proc",
SparcFrameLowering::SparcFrameLowering(const SparcSubtarget &ST)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
- ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8) {}
+ ST.is64Bit() ? Align(16) : Align(8), 0,
+ ST.is64Bit() ? Align(16) : Align(8)) {}
void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 8cff50d19ed4..4e61c341b703 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -231,7 +231,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
// the original GPRs.
- unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
+ Register GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
SDValue Chain = SDValue(N,0);
@@ -278,7 +278,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
// Copy REG_SEQ into a GPRPair-typed VR and replace the original two
// i32 VRs of inline asm with it.
- unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
+ Register GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index a6d440fa8aa2..4a2ba00ac6c2 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -417,7 +417,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
if (VA.needsCustom()) {
assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
- unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ Register VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
@@ -445,7 +445,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
InVals.push_back(WholeValue);
continue;
}
- unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ Register VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
if (VA.getLocVT() == MVT::f32)
@@ -552,7 +552,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
std::vector<SDValue> OutChains;
for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
- unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ Register VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
@@ -1016,9 +1016,9 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned SparcTargetLowering::getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
- unsigned Reg = StringSwitch<unsigned>(RegName)
+Register SparcTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const {
+ Register Reg = StringSwitch<unsigned>(RegName)
.Case("i0", SP::I0).Case("i1", SP::I1).Case("i2", SP::I2).Case("i3", SP::I3)
.Case("i4", SP::I4).Case("i5", SP::I5).Case("i6", SP::I6).Case("i7", SP::I7)
.Case("o0", SP::O0).Case("o1", SP::O1).Case("o2", SP::O2).Case("o3", SP::O3)
@@ -1438,7 +1438,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(Op, MVT::v2i32, Expand);
}
// Truncating/extending stores/loads are also not supported.
- for (MVT VT : MVT::integer_vector_valuetypes()) {
+ for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand);
@@ -1805,7 +1805,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setMinFunctionAlignment(2);
+ setMinFunctionAlignment(Align(4));
computeRegisterProperties(Subtarget->getRegisterInfo());
}
@@ -2244,7 +2244,7 @@ SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_UL : {
- SDValue Mask = DAG.getTargetConstant(1, DL, Result.getValueType());
+ SDValue Mask = DAG.getConstant(1, DL, Result.getValueType());
Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
@@ -2277,14 +2277,14 @@ SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_LG : {
- SDValue Mask = DAG.getTargetConstant(3, DL, Result.getValueType());
+ SDValue Mask = DAG.getConstant(3, DL, Result.getValueType());
Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_UE : {
- SDValue Mask = DAG.getTargetConstant(3, DL, Result.getValueType());
+ SDValue Mask = DAG.getConstant(3, DL, Result.getValueType());
Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_E;
@@ -2951,9 +2951,11 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
SDValue HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt);
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
SDValue MulResult = TLI.makeLibCall(DAG,
RTLIB::MUL_I128, WideVT,
- Args, isSigned, dl).first;
+ Args, CallOptions, dl).first;
SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
MulResult, DAG.getIntPtrConstant(0, dl));
SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
@@ -3183,7 +3185,7 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const {
case 'e':
return C_RegisterClass;
case 'I': // SIMM13
- return C_Other;
+ return C_Immediate;
}
}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 8d557a4225e5..3d798cec0c16 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -98,8 +98,8 @@ namespace llvm {
return MVT::i32;
}
- unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
index 2d4f687f72d2..d18ab3b1370b 100644
--- a/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -177,7 +177,7 @@ def LEAX_ADDri : F3_2<2, 0b000000,
def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>;
def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>;
-def : Pat<(ctpop i64:$src), (POPCrr $src)>;
+def : Pat<(i64 (ctpop i64:$src)), (POPCrr $src)>;
} // Predicates = [Is64Bit]
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index ad343fe6f80a..3d3d314a26bb 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -375,8 +375,8 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineInstr *MovMI = nullptr;
for (unsigned i = 0; i != numSubRegs; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]);
- unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]);
+ Register Dst = TRI->getSubReg(DestReg, subRegIdx[i]);
+ Register Src = TRI->getSubReg(SrcReg, subRegIdx[i]);
assert(Dst && Src && "Bad sub-register");
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst);
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 8474c7abffb3..73dbdc4f443e 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -516,9 +516,9 @@ let DecoderMethod = "DecodeLoadQFP" in
defm LDQF : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>,
Requires<[HasV9, HasHardQuad]>;
-let DecoderMethod = "DecodeLoadCP" in
- defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>;
-let DecoderMethod = "DecodeLoadCPPair" in
+let DecoderMethod = "DecodeLoadCP" in
+ defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>;
+let DecoderMethod = "DecodeLoadCPPair" in
defm LDDC : Load<"ldd", 0b110011, load, CoprocPair, v2i32, IIC_ldd>;
let DecoderMethod = "DecodeLoadCP", Defs = [CPSR] in {
@@ -1508,7 +1508,7 @@ let rs1 = 0 in
def POPCrr : F3_1<2, 0b101110,
(outs IntRegs:$rd), (ins IntRegs:$rs2),
"popc $rs2, $rd", []>, Requires<[HasV9]>;
-def : Pat<(ctpop i32:$src),
+def : Pat<(i32 (ctpop i32:$src)),
(POPCrr (SRLri $src, 0))>;
let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index ce11a423d10e..19a90e98db7e 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -182,9 +182,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
if (MI.getOpcode() == SP::STQFri) {
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- unsigned SrcReg = MI.getOperand(2).getReg();
- unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64);
- unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64);
+ Register SrcReg = MI.getOperand(2).getReg();
+ Register SrcEvenReg = getSubReg(SrcReg, SP::sub_even64);
+ Register SrcOddReg = getSubReg(SrcReg, SP::sub_odd64);
MachineInstr *StMI =
BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri))
.addReg(FrameReg).addImm(0).addReg(SrcEvenReg);
@@ -194,9 +194,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset += 8;
} else if (MI.getOpcode() == SP::LDQFri) {
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64);
- unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64);
+ Register DestReg = MI.getOperand(0).getReg();
+ Register DestEvenReg = getSubReg(DestReg, SP::sub_even64);
+ Register DestOddReg = getSubReg(DestReg, SP::sub_odd64);
MachineInstr *LdMI =
BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg)
.addReg(FrameReg).addImm(0);
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 195cff79de03..c1e3f8c36982 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -98,7 +98,7 @@ SparcTargetMachine::SparcTargetMachine(
getEffectiveSparcCodeModel(
CM, getEffectiveRelocModel(RM), is64bit, JIT),
OL),
- TLOF(make_unique<SparcELFTargetObjectFile>()),
+ TLOF(std::make_unique<SparcELFTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this, is64bit), is64Bit(is64bit) {
initAsmInfo();
}
@@ -133,7 +133,7 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<SparcSubtarget>(TargetTriple, CPU, FS, *this,
+ I = std::make_unique<SparcSubtarget>(TargetTriple, CPU, FS, *this,
this->is64Bit);
}
return I.get();
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index a259ba3433d6..93c4ce4b5ccc 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -155,11 +155,11 @@ public:
// Create particular kinds of operand.
static std::unique_ptr<SystemZOperand> createInvalid(SMLoc StartLoc,
SMLoc EndLoc) {
- return make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc);
+ return std::make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc);
}
static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) {
- auto Op = make_unique<SystemZOperand>(KindToken, Loc, Loc);
+ auto Op = std::make_unique<SystemZOperand>(KindToken, Loc, Loc);
Op->Token.Data = Str.data();
Op->Token.Length = Str.size();
return Op;
@@ -167,7 +167,7 @@ public:
static std::unique_ptr<SystemZOperand>
createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) {
- auto Op = make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc);
+ auto Op = std::make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc);
Op->Reg.Kind = Kind;
Op->Reg.Num = Num;
return Op;
@@ -175,7 +175,7 @@ public:
static std::unique_ptr<SystemZOperand>
createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) {
- auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc);
+ auto Op = std::make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc);
Op->Imm = Expr;
return Op;
}
@@ -184,7 +184,7 @@ public:
createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base,
const MCExpr *Disp, unsigned Index, const MCExpr *LengthImm,
unsigned LengthReg, SMLoc StartLoc, SMLoc EndLoc) {
- auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc);
+ auto Op = std::make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc);
Op->Mem.MemKind = MemKind;
Op->Mem.RegKind = RegKind;
Op->Mem.Base = Base;
@@ -200,7 +200,7 @@ public:
static std::unique_ptr<SystemZOperand>
createImmTLS(const MCExpr *Imm, const MCExpr *Sym,
SMLoc StartLoc, SMLoc EndLoc) {
- auto Op = make_unique<SystemZOperand>(KindImmTLS, StartLoc, EndLoc);
+ auto Op = std::make_unique<SystemZOperand>(KindImmTLS, StartLoc, EndLoc);
Op->ImmTLS.Imm = Imm;
Op->ImmTLS.Sym = Sym;
return Op;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 8d8ba5644e10..49b6fc490336 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -162,5 +162,5 @@ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
std::unique_ptr<MCObjectTargetWriter>
llvm::createSystemZObjectWriter(uint8_t OSABI) {
- return llvm::make_unique<SystemZObjectWriter>(OSABI);
+ return std::make_unique<SystemZObjectWriter>(OSABI);
}
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index 2b0f90182d7f..88cf589a3f10 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -190,7 +190,6 @@ static inline bool isImmHF(uint64_t Val) {
FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
-FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index ef378e4ade7a..10023e9e169c 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -501,6 +501,10 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
break;
+ case TargetOpcode::FENTRY_CALL:
+ LowerFENTRY_CALL(*MI, Lower);
+ return;
+
case TargetOpcode::STACKMAP:
LowerSTACKMAP(*MI);
return;
@@ -546,6 +550,22 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
}
}
+void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
+ SystemZMCInstLower &Lower) {
+ MCContext &Ctx = MF->getContext();
+ if (MF->getFunction().getFnAttribute("mnop-mcount")
+ .getValueAsString() == "true") {
+ EmitNop(Ctx, *OutStreamer, 6, getSubtargetInfo());
+ return;
+ }
+
+ MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
+ const MCSymbolRefExpr *Op =
+ MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_PLT, Ctx);
+ OutStreamer->EmitInstruction(MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R0D).addExpr(Op), getSubtargetInfo());
+}
+
void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(MF->getSubtarget().getInstrInfo());
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h
index aa5d3ca78e61..d01a17c2ebe2 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -46,6 +46,7 @@ public:
}
private:
+ void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL);
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower);
};
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index 9cbf6b320504..946eb2ba7c79 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -152,7 +152,7 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) {
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
if (MO.isReg()) {
- if (unsigned MOReg = MO.getReg()) {
+ if (Register MOReg = MO.getReg()) {
if (TRI->regsOverlap(MOReg, Reg)) {
if (MO.isUse())
Ref.Use = true;
@@ -378,11 +378,8 @@ bool SystemZElimCompare::adjustCCMasksForInstr(
}
// CC is now live after MI.
- if (!ConvOpc) {
- int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI);
- assert(CCDef >= 0 && "Couldn't find CC set");
- MI.getOperand(CCDef).setIsDead(false);
- }
+ if (!ConvOpc)
+ MI.clearRegisterDeads(SystemZ::CC);
// Check if MI lies before Compare.
bool BeforeCmp = false;
diff --git a/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/lib/Target/SystemZ/SystemZExpandPseudo.cpp
deleted file mode 100644
index 09708fb4241c..000000000000
--- a/lib/Target/SystemZ/SystemZExpandPseudo.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass that expands pseudo instructions into target
-// instructions to allow proper scheduling and other late optimizations. This
-// pass should be run after register allocation but before the post-regalloc
-// scheduling pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZ.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZSubtarget.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-using namespace llvm;
-
-#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass"
-
-namespace llvm {
- void initializeSystemZExpandPseudoPass(PassRegistry&);
-}
-
-namespace {
-class SystemZExpandPseudo : public MachineFunctionPass {
-public:
- static char ID;
- SystemZExpandPseudo() : MachineFunctionPass(ID) {
- initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry());
- }
-
- const SystemZInstrInfo *TII;
-
- bool runOnMachineFunction(MachineFunction &Fn) override;
-
- StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; }
-
-private:
- bool expandMBB(MachineBasicBlock &MBB);
- bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
- bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
-};
-char SystemZExpandPseudo::ID = 0;
-}
-
-INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo",
- SYSTEMZ_EXPAND_PSEUDO_NAME, false, false)
-
-/// Returns an instance of the pseudo instruction expansion pass.
-FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) {
- return new SystemZExpandPseudo();
-}
-
-// MI is a load-register-on-condition pseudo instruction that could not be
-// handled as a single hardware instruction. Replace it by a branch sequence.
-bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
- MachineFunction &MF = *MBB.getParent();
- const BasicBlock *BB = MBB.getBasicBlock();
- MachineInstr &MI = *MBBI;
- DebugLoc DL = MI.getDebugLoc();
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(2).getReg();
- unsigned CCValid = MI.getOperand(3).getImm();
- unsigned CCMask = MI.getOperand(4).getImm();
-
- LivePhysRegs LiveRegs(TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
-
- // Splice MBB at MI, moving the rest of the block into RestMBB.
- MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB);
- MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB);
- RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end());
- RestMBB->transferSuccessors(&MBB);
- for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
- RestMBB->addLiveIn(*I);
-
- // Create a new block MoveMBB to hold the move instruction.
- MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB);
- MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB);
- MoveMBB->addLiveIn(SrcReg);
- for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
- MoveMBB->addLiveIn(*I);
-
- // At the end of MBB, create a conditional branch to RestMBB if the
- // condition is false, otherwise fall through to MoveMBB.
- BuildMI(&MBB, DL, TII->get(SystemZ::BRC))
- .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB);
- MBB.addSuccessor(RestMBB);
- MBB.addSuccessor(MoveMBB);
-
- // In MoveMBB, emit an instruction to move SrcReg into DestReg,
- // then fall through to RestMBB.
- TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg,
- MI.getOperand(2).isKill());
- MoveMBB->addSuccessor(RestMBB);
-
- NextMBBI = MBB.end();
- MI.eraseFromParent();
- return true;
-}
-
-/// If MBBI references a pseudo instruction that should be expanded here,
-/// do the expansion and return true. Otherwise return false.
-bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
- MachineInstr &MI = *MBBI;
- switch (MI.getOpcode()) {
- case SystemZ::LOCRMux:
- return expandLOCRMux(MBB, MBBI, NextMBBI);
- default:
- break;
- }
- return false;
-}
-
-/// Iterate over the instructions in basic block MBB and expand any
-/// pseudo instructions. Return true if anything was modified.
-bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
- bool Modified = false;
-
- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- while (MBBI != E) {
- MachineBasicBlock::iterator NMBBI = std::next(MBBI);
- Modified |= expandMI(MBB, MBBI, NMBBI);
- MBBI = NMBBI;
- }
-
- return Modified;
-}
-
-bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
-
- bool Modified = false;
- for (auto &MBB : MF)
- Modified |= expandMBB(MBB);
- return Modified;
-}
-
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index da28faebb326..0b8b6880accc 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -46,8 +46,8 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
} // end anonymous namespace
SystemZFrameLowering::SystemZFrameLowering()
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
- -SystemZMC::CallFrameSize, 8,
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8),
+ -SystemZMC::CallFrameSize, Align(8),
false /* StackRealignable */) {
// Create a mapping from register number to save slot offset.
RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
@@ -118,7 +118,7 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
unsigned GPR64, bool IsImplicit) {
const TargetRegisterInfo *RI =
MBB.getParent()->getSubtarget().getRegisterInfo();
- unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
+ Register GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
if (!IsLive || !IsImplicit) {
MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive));
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 9dc4512255cc..751034c2d41a 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -346,6 +346,11 @@ public:
: SelectionDAGISel(TM, OptLevel) {}
bool runOnMachineFunction(MachineFunction &MF) override {
+ const Function &F = MF.getFunction();
+ if (F.getFnAttribute("mnop-mcount").getValueAsString() == "true" &&
+ F.getFnAttribute("fentry-call").getValueAsString() != "true")
+ report_fatal_error("mnop-mcount only supported with fentry-call");
+
Subtarget = &MF.getSubtarget<SystemZSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -1146,7 +1151,7 @@ void SystemZDAGToDAGISel::loadVectorConstant(
SDLoc DL(Node);
SmallVector<SDValue, 2> Ops;
for (unsigned OpVal : VCI.OpVals)
- Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(OpVal, DL, MVT::i32));
SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops);
if (VCI.VecVT == VT.getSimpleVT())
@@ -1550,8 +1555,8 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
uint64_t ConstCCMask =
cast<ConstantSDNode>(CCMask.getNode())->getZExtValue();
// Invert the condition.
- CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node),
- CCMask.getValueType());
+ CCMask = CurDAG->getTargetConstant(ConstCCValid ^ ConstCCMask,
+ SDLoc(Node), CCMask.getValueType());
SDValue Op4 = Node->getOperand(4);
SDNode *UpdatedNode =
CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 78820f511ab4..e0ca9da93561 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -120,9 +120,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Instructions are strings of 2-byte aligned 2-byte values.
- setMinFunctionAlignment(2);
+ setMinFunctionAlignment(Align(2));
// For performance reasons we prefer 16-byte alignment.
- setPrefFunctionAlignment(4);
+ setPrefFunctionAlignment(Align(16));
// Handle operations that are handled in a similar way for all types.
for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
@@ -206,6 +206,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// the default expansion.
if (!Subtarget.hasFPExtension())
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+
+ // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
+ // default to Expand, so need to be modified to Legal where appropriate.
+ setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
+ if (Subtarget.hasFPExtension())
+ setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
}
}
@@ -252,7 +258,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
setOperationAction(ISD::CTLZ, MVT::i64, Legal);
- // On arch13 we have native support for a 64-bit CTPOP.
+ // On z15 we have native support for a 64-bit CTPOP.
if (Subtarget.hasMiscellaneousExtensions3()) {
setOperationAction(ISD::CTPOP, MVT::i32, Promote);
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
@@ -294,14 +300,14 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Handle prefetches with PFD or PFDRL.
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
- for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
// Assume by default that all vector operations need to be expanded.
for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
if (getOperationAction(Opcode, VT) == Legal)
setOperationAction(Opcode, VT, Expand);
// Likewise all truncating stores and extending loads.
- for (MVT InnerVT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
@@ -327,7 +333,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
}
// Handle integer vector types.
- for (MVT VT : MVT::integer_vector_valuetypes()) {
+ for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
if (isTypeLegal(VT)) {
// These operations have direct equivalents.
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
@@ -381,6 +387,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
}
if (Subtarget.hasVectorEnhancements2()) {
@@ -392,6 +403,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
}
// Handle floating-point types.
@@ -831,7 +847,7 @@ supportedAddressingMode(Instruction *I, bool HasVector) {
}
if (isa<LoadInst>(I) && I->hasOneUse()) {
- auto *SingleUser = dyn_cast<Instruction>(*I->user_begin());
+ auto *SingleUser = cast<Instruction>(*I->user_begin());
if (SingleUser->getParent() == I->getParent()) {
if (isa<ICmpInst>(SingleUser)) {
if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
@@ -956,7 +972,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
case 'K': // Signed 16-bit constant
case 'L': // Signed 20-bit displacement (on all targets we support)
case 'M': // 0x7fffffff
- return C_Other;
+ return C_Immediate;
default:
break;
@@ -1335,7 +1351,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
break;
}
- unsigned VReg = MRI.createVirtualRegister(RC);
+ Register VReg = MRI.createVirtualRegister(RC);
MRI.addLiveIn(VA.getLocReg(), VReg);
ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
} else {
@@ -1430,7 +1446,7 @@ static bool canUseSiblingCall(const CCState &ArgCCInfo,
return false;
if (!VA.isRegLoc())
return false;
- unsigned Reg = VA.getLocReg();
+ Register Reg = VA.getLocReg();
if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
return false;
if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
@@ -1674,7 +1690,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
// Chain and glue the copies together.
- unsigned Reg = VA.getLocReg();
+ Register Reg = VA.getLocReg();
Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
@@ -2533,12 +2549,12 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
}
if (C.Opcode == SystemZISD::ICMP)
return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
- DAG.getConstant(C.ICmpType, DL, MVT::i32));
+ DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
if (C.Opcode == SystemZISD::TM) {
bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
- DAG.getConstant(RegisterOnly, DL, MVT::i32));
+ DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
}
return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
}
@@ -2576,10 +2592,10 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// in CCValid, so other values can be ignored.
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
unsigned CCValid, unsigned CCMask) {
- SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(CCValid, DL, MVT::i32),
- DAG.getConstant(CCMask, DL, MVT::i32), CCReg };
+ SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getTargetConstant(CCValid, DL, MVT::i32),
+ DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
}
@@ -2741,9 +2757,10 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
SDValue CCReg = emitCmp(DAG, DL, C);
- return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
- Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
- DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
+ return DAG.getNode(
+ SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
+ DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
+ DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
}
// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
@@ -2794,8 +2811,9 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
}
SDValue CCReg = emitCmp(DAG, DL, C);
- SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
- DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg};
+ SDValue Ops[] = {TrueOp, FalseOp,
+ DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
+ DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
}
@@ -3882,11 +3900,8 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
- SDValue Ops[] = {
- Op.getOperand(0),
- DAG.getConstant(Code, DL, MVT::i32),
- Op.getOperand(1)
- };
+ SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
+ Op.getOperand(1)};
return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
Node->getVTList(), Ops,
Node->getMemoryVT(), Node->getMemOperand());
@@ -4228,7 +4243,7 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
SDValue Op;
if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
- SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
+ SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
} else if (P.Opcode == SystemZISD::PACK) {
MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
@@ -4253,7 +4268,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
unsigned StartIndex, OpNo0, OpNo1;
if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
- Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
+ Ops[OpNo1],
+ DAG.getTargetConstant(StartIndex, DL, MVT::i32));
// Fall back on VPERM. Construct an SDNode for the permute vector.
SDValue IndexNodes[SystemZ::VectorBytes];
@@ -4751,7 +4767,7 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
// Otherwise keep it as a vector-to-vector operation.
return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
- DAG.getConstant(Index, DL, MVT::i32));
+ DAG.getTargetConstant(Index, DL, MVT::i32));
}
GeneralShuffle GS(VT);
@@ -6041,8 +6057,8 @@ SDValue SystemZTargetLowering::combineBR_CCMASK(
if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
Chain,
- DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
- DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
+ DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
+ DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
N->getOperand(3), CCReg);
return SDValue();
}
@@ -6063,10 +6079,9 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK(
if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
- N->getOperand(0),
- N->getOperand(1),
- DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
- DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
+ N->getOperand(0), N->getOperand(1),
+ DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
+ DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
CCReg);
return SDValue();
}
@@ -6548,19 +6563,17 @@ static bool isSelectPseudo(MachineInstr &MI) {
// Helper function, which inserts PHI functions into SinkMBB:
// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
-// where %FalseValue(i) and %TrueValue(i) are taken from the consequent Selects
-// in [MIItBegin, MIItEnd) range.
-static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin,
- MachineBasicBlock::iterator MIItEnd,
+// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
+static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
MachineBasicBlock *TrueMBB,
MachineBasicBlock *FalseMBB,
MachineBasicBlock *SinkMBB) {
MachineFunction *MF = TrueMBB->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- unsigned CCValid = MIItBegin->getOperand(3).getImm();
- unsigned CCMask = MIItBegin->getOperand(4).getImm();
- DebugLoc DL = MIItBegin->getDebugLoc();
+ MachineInstr *FirstMI = Selects.front();
+ unsigned CCValid = FirstMI->getOperand(3).getImm();
+ unsigned CCMask = FirstMI->getOperand(4).getImm();
MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
@@ -6572,16 +6585,15 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin,
// destination registers, and the registers that went into the PHI.
DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
- for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;
- MIIt = skipDebugInstructionsForward(++MIIt, MIItEnd)) {
- unsigned DestReg = MIIt->getOperand(0).getReg();
- unsigned TrueReg = MIIt->getOperand(1).getReg();
- unsigned FalseReg = MIIt->getOperand(2).getReg();
+ for (auto MI : Selects) {
+ Register DestReg = MI->getOperand(0).getReg();
+ Register TrueReg = MI->getOperand(1).getReg();
+ Register FalseReg = MI->getOperand(2).getReg();
// If this Select we are generating is the opposite condition from
// the jump we generated, then we have to swap the operands for the
// PHI that is going to be generated.
- if (MIIt->getOperand(4).getImm() == (CCValid ^ CCMask))
+ if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
std::swap(TrueReg, FalseReg);
if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end())
@@ -6590,6 +6602,7 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin,
if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end())
FalseReg = RegRewriteTable[FalseReg].second;
+ DebugLoc DL = MI->getDebugLoc();
BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
.addReg(TrueReg).addMBB(TrueMBB)
.addReg(FalseReg).addMBB(FalseMBB);
@@ -6605,36 +6618,61 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin,
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr &MI,
MachineBasicBlock *MBB) const {
+ assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
unsigned CCValid = MI.getOperand(3).getImm();
unsigned CCMask = MI.getOperand(4).getImm();
- DebugLoc DL = MI.getDebugLoc();
// If we have a sequence of Select* pseudo instructions using the
// same condition code value, we want to expand all of them into
// a single pair of basic blocks using the same condition.
- MachineInstr *LastMI = &MI;
- MachineBasicBlock::iterator NextMIIt = skipDebugInstructionsForward(
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
-
- if (isSelectPseudo(MI))
- while (NextMIIt != MBB->end() && isSelectPseudo(*NextMIIt) &&
- NextMIIt->getOperand(3).getImm() == CCValid &&
- (NextMIIt->getOperand(4).getImm() == CCMask ||
- NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask))) {
- LastMI = &*NextMIIt;
- NextMIIt = skipDebugInstructionsForward(++NextMIIt, MBB->end());
+ SmallVector<MachineInstr*, 8> Selects;
+ SmallVector<MachineInstr*, 8> DbgValues;
+ Selects.push_back(&MI);
+ unsigned Count = 0;
+ for (MachineBasicBlock::iterator NextMIIt =
+ std::next(MachineBasicBlock::iterator(MI));
+ NextMIIt != MBB->end(); ++NextMIIt) {
+ if (NextMIIt->definesRegister(SystemZ::CC))
+ break;
+ if (isSelectPseudo(*NextMIIt)) {
+ assert(NextMIIt->getOperand(3).getImm() == CCValid &&
+ "Bad CCValid operands since CC was not redefined.");
+ if (NextMIIt->getOperand(4).getImm() == CCMask ||
+ NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask)) {
+ Selects.push_back(&*NextMIIt);
+ continue;
+ }
+ break;
}
+ bool User = false;
+ for (auto SelMI : Selects)
+ if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) {
+ User = true;
+ break;
+ }
+ if (NextMIIt->isDebugInstr()) {
+ if (User) {
+ assert(NextMIIt->isDebugValue() && "Unhandled debug opcode.");
+ DbgValues.push_back(&*NextMIIt);
+ }
+ }
+ else if (User || ++Count > 20)
+ break;
+ }
+ MachineInstr *LastMI = Selects.back();
+ bool CCKilled =
+ (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
// Unless CC was killed in the last Select instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
- if (!LastMI->killsRegister(SystemZ::CC) && !checkCCKill(*LastMI, JoinMBB)) {
+ if (!CCKilled) {
FalseMBB->addLiveIn(SystemZ::CC);
JoinMBB->addLiveIn(SystemZ::CC);
}
@@ -6643,7 +6681,7 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
// BRC CCMask, JoinMBB
// # fallthrough to FalseMBB
MBB = StartMBB;
- BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
.addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
MBB->addSuccessor(JoinMBB);
MBB->addSuccessor(FalseMBB);
@@ -6657,12 +6695,14 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
// %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
// ...
MBB = JoinMBB;
- MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
- MachineBasicBlock::iterator MIItEnd = skipDebugInstructionsForward(
- std::next(MachineBasicBlock::iterator(LastMI)), MBB->end());
- createPHIsForSelects(MIItBegin, MIItEnd, StartMBB, FalseMBB, MBB);
+ createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
+ for (auto SelMI : Selects)
+ SelMI->eraseFromParent();
+
+ MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
+ for (auto DbgMI : DbgValues)
+ MBB->splice(InsertPos, StartMBB, DbgMI);
- StartMBB->erase(MIItBegin, MIItEnd);
return JoinMBB;
}
@@ -6678,10 +6718,10 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(0).getReg();
MachineOperand Base = MI.getOperand(1);
int64_t Disp = MI.getOperand(2).getImm();
- unsigned IndexReg = MI.getOperand(3).getReg();
+ Register IndexReg = MI.getOperand(3).getReg();
unsigned CCValid = MI.getOperand(4).getImm();
unsigned CCMask = MI.getOperand(5).getImm();
DebugLoc DL = MI.getDebugLoc();
@@ -6773,7 +6813,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
// Extract the operands. Base can be a register or a frame index.
// Src2 can be a register or immediate.
- unsigned Dest = MI.getOperand(0).getReg();
+ Register Dest = MI.getOperand(0).getReg();
MachineOperand Base = earlyUseOperand(MI.getOperand(1));
int64_t Disp = MI.getOperand(2).getImm();
MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
@@ -6833,7 +6873,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
.addReg(OldVal).addReg(BitShift).addImm(0);
if (Invert) {
// Perform the operation normally and then invert every bit of the field.
- unsigned Tmp = MRI.createVirtualRegister(RC);
+ Register Tmp = MRI.createVirtualRegister(RC);
BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
if (BitSize <= 32)
// XILF with the upper BitSize bits set.
@@ -6842,7 +6882,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
else {
// Use LCGR and add -1 to the result, which is more compact than
// an XILF, XILH pair.
- unsigned Tmp2 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
.addReg(Tmp2).addImm(-1);
@@ -6891,7 +6931,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
bool IsSubWord = (BitSize < 32);
// Extract the operands. Base can be a register or a frame index.
- unsigned Dest = MI.getOperand(0).getReg();
+ Register Dest = MI.getOperand(0).getReg();
MachineOperand Base = earlyUseOperand(MI.getOperand(1));
int64_t Disp = MI.getOperand(2).getImm();
Register Src2 = MI.getOperand(3).getReg();
@@ -7005,13 +7045,13 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
MachineRegisterInfo &MRI = MF.getRegInfo();
// Extract the operands. Base can be a register or a frame index.
- unsigned Dest = MI.getOperand(0).getReg();
+ Register Dest = MI.getOperand(0).getReg();
MachineOperand Base = earlyUseOperand(MI.getOperand(1));
int64_t Disp = MI.getOperand(2).getImm();
- unsigned OrigCmpVal = MI.getOperand(3).getReg();
- unsigned OrigSwapVal = MI.getOperand(4).getReg();
- unsigned BitShift = MI.getOperand(5).getReg();
- unsigned NegBitShift = MI.getOperand(6).getReg();
+ Register OrigCmpVal = MI.getOperand(3).getReg();
+ Register OrigSwapVal = MI.getOperand(4).getReg();
+ Register BitShift = MI.getOperand(5).getReg();
+ Register NegBitShift = MI.getOperand(6).getReg();
int64_t BitSize = MI.getOperand(7).getImm();
DebugLoc DL = MI.getDebugLoc();
@@ -7023,14 +7063,14 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
assert(LOpcode && CSOpcode && "Displacement out of range");
// Create virtual registers for temporary results.
- unsigned OrigOldVal = MRI.createVirtualRegister(RC);
- unsigned OldVal = MRI.createVirtualRegister(RC);
- unsigned CmpVal = MRI.createVirtualRegister(RC);
- unsigned SwapVal = MRI.createVirtualRegister(RC);
- unsigned StoreVal = MRI.createVirtualRegister(RC);
- unsigned RetryOldVal = MRI.createVirtualRegister(RC);
- unsigned RetryCmpVal = MRI.createVirtualRegister(RC);
- unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
+ Register OrigOldVal = MRI.createVirtualRegister(RC);
+ Register OldVal = MRI.createVirtualRegister(RC);
+ Register CmpVal = MRI.createVirtualRegister(RC);
+ Register SwapVal = MRI.createVirtualRegister(RC);
+ Register StoreVal = MRI.createVirtualRegister(RC);
+ Register RetryOldVal = MRI.createVirtualRegister(RC);
+ Register RetryCmpVal = MRI.createVirtualRegister(RC);
+ Register RetrySwapVal = MRI.createVirtualRegister(RC);
// Insert 2 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
@@ -7129,11 +7169,11 @@ SystemZTargetLowering::emitPair128(MachineInstr &MI,
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Hi = MI.getOperand(1).getReg();
- unsigned Lo = MI.getOperand(2).getReg();
- unsigned Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
- unsigned Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+ Register Dest = MI.getOperand(0).getReg();
+ Register Hi = MI.getOperand(1).getReg();
+ Register Lo = MI.getOperand(2).getReg();
+ Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+ Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
@@ -7157,14 +7197,14 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
- unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+ Register Dest = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
if (ClearEven) {
- unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
- unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+ Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+ Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
.addImm(0);
@@ -7308,7 +7348,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// The previous iteration might have created out-of-range displacements.
// Apply them using LAY if so.
if (!isUInt<12>(DestDisp)) {
- unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
.add(DestBase)
.addImm(DestDisp)
@@ -7317,7 +7357,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
DestDisp = 0;
}
if (!isUInt<12>(SrcDisp)) {
- unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
.add(SrcBase)
.addImm(SrcDisp)
@@ -7474,11 +7514,11 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
DebugLoc DL = MI.getDebugLoc();
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(0).getReg();
// Create new virtual register of the same class as source.
const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
- unsigned DstReg = MRI->createVirtualRegister(RC);
+ Register DstReg = MRI->createVirtualRegister(RC);
// Replace pseudo with a normal load-and-test that models the def as
// well.
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 19c7ec58ed3d..9c95e8aec940 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -25,10 +25,10 @@ let Predicates = [FeatureNoVectorEnhancements1] in
let Predicates = [FeatureVectorEnhancements1] in
def SelectVR128 : SelectWrapper<f128, VR128>;
-defm CondStoreF32 : CondStores<FP32, nonvolatile_store,
- nonvolatile_load, bdxaddr20only>;
-defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
- nonvolatile_load, bdxaddr20only>;
+defm CondStoreF32 : CondStores<FP32, simple_store,
+ simple_load, bdxaddr20only>;
+defm CondStoreF64 : CondStores<FP64, simple_store,
+ simple_load, bdxaddr20only>;
//===----------------------------------------------------------------------===//
// Move instructions
@@ -276,13 +276,13 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
}
// fp_to_sint always rounds towards zero, which is modifier value 5.
-def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>;
-def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>;
-def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>;
+def : Pat<(i32 (any_fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>;
+def : Pat<(i32 (any_fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>;
+def : Pat<(i32 (any_fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>;
-def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>;
-def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>;
-def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>;
+def : Pat<(i64 (any_fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>;
+def : Pat<(i64 (any_fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>;
+def : Pat<(i64 (any_fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>;
// The FP extension feature provides versions of the above that allow
// also specifying the inexact-exception suppression flag.
@@ -309,13 +309,13 @@ let Predicates = [FeatureFPExtension] in {
def CLGXBR : TernaryRRFe<"clgxbr", 0xB3AE, GR64, FP128>;
}
- def : Pat<(i32 (fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>;
- def : Pat<(i32 (fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>;
- def : Pat<(i32 (fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>;
+ def : Pat<(i32 (any_fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>;
+ def : Pat<(i32 (any_fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>;
+ def : Pat<(i32 (any_fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>;
- def : Pat<(i64 (fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>;
- def : Pat<(i64 (fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>;
- def : Pat<(i64 (fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>;
+ def : Pat<(i64 (any_fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>;
+ def : Pat<(i64 (any_fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>;
+ def : Pat<(i64 (any_fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>;
}
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 2a1d14de3ddf..c9dbe3da686d 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2141,17 +2141,17 @@ class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode,
}
class CmpBranchRIEa<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, cond4:$M3),
mnemonic#"$M3\t$R1, $I2", []>;
class AsmCmpBranchRIEa<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, imm32zx4:$M3),
mnemonic#"\t$R1, $I2, $M3", []>;
class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#V.suffix#"\t$R1, $I2", []> {
let isAsmParserOnly = V.alternate;
@@ -2159,7 +2159,7 @@ class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode,
}
multiclass CmpBranchRIEaPair<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm> {
+ RegisterOperand cls, ImmOpWithPattern imm> {
let isCodeGenOnly = 1 in
def "" : CmpBranchRIEa<mnemonic, opcode, cls, imm>;
def Asm : AsmCmpBranchRIEa<mnemonic, opcode, cls, imm>;
@@ -2193,19 +2193,19 @@ multiclass CmpBranchRIEbPair<string mnemonic, bits<16> opcode,
}
class CmpBranchRIEc<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEc<opcode, (outs),
(ins cls:$R1, imm:$I2, cond4:$M3, brtarget16:$RI4),
mnemonic#"$M3\t$R1, $I2, $RI4", []>;
class AsmCmpBranchRIEc<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEc<opcode, (outs),
(ins cls:$R1, imm:$I2, imm32zx4:$M3, brtarget16:$RI4),
mnemonic#"\t$R1, $I2, $M3, $RI4", []>;
class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEc<opcode, (outs), (ins cls:$R1, imm:$I2, brtarget16:$RI4),
mnemonic#V.suffix#"\t$R1, $I2, $RI4", []> {
let isAsmParserOnly = V.alternate;
@@ -2213,7 +2213,7 @@ class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode,
}
multiclass CmpBranchRIEcPair<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm> {
+ RegisterOperand cls, ImmOpWithPattern imm> {
let isCodeGenOnly = 1 in
def "" : CmpBranchRIEc<mnemonic, opcode, cls, imm>;
def Asm : AsmCmpBranchRIEc<mnemonic, opcode, cls, imm>;
@@ -2272,19 +2272,19 @@ multiclass CmpBranchRRSPair<string mnemonic, bits<16> opcode,
}
class CmpBranchRIS<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIS<opcode, (outs),
(ins cls:$R1, imm:$I2, cond4:$M3, bdaddr12only:$BD4),
mnemonic#"$M3\t$R1, $I2, $BD4", []>;
class AsmCmpBranchRIS<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIS<opcode, (outs),
(ins cls:$R1, imm:$I2, imm32zx4:$M3, bdaddr12only:$BD4),
mnemonic#"\t$R1, $I2, $M3, $BD4", []>;
class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIS<opcode, (outs), (ins cls:$R1, imm:$I2, bdaddr12only:$BD4),
mnemonic#V.suffix#"\t$R1, $I2, $BD4", []> {
let isAsmParserOnly = V.alternate;
@@ -2292,7 +2292,7 @@ class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
}
multiclass CmpBranchRISPair<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm> {
+ RegisterOperand cls, ImmOpWithPattern imm> {
let isCodeGenOnly = 1 in
def "" : CmpBranchRIS<mnemonic, opcode, cls, imm>;
def Asm : AsmCmpBranchRIS<mnemonic, opcode, cls, imm>;
@@ -2585,7 +2585,7 @@ multiclass StoreMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
// We therefore match the address in the same way as a normal store and
// only use the StoreSI* instruction if the matched address is suitable.
class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- Immediate imm>
+ ImmOpWithPattern imm>
: InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr12pair:$BD1)]> {
@@ -2593,7 +2593,7 @@ class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
}
class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- Immediate imm>
+ ImmOpWithPattern imm>
: InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr20pair:$BD1)]> {
@@ -2601,7 +2601,7 @@ class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- Immediate imm>
+ ImmOpWithPattern imm>
: InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr12pair:$BD1)]> {
@@ -2609,7 +2609,7 @@ class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
- SDPatternOperator operator, Immediate imm> {
+ SDPatternOperator operator, ImmOpWithPattern imm> {
let DispKey = mnemonic in {
let DispSize = "12" in
def "" : StoreSI<mnemonic, siOpcode, operator, imm>;
@@ -2665,7 +2665,7 @@ multiclass CondStoreRSYPair<string mnemonic, bits<16> opcode,
def Asm : AsmCondStoreRSY<mnemonic, opcode, cls, bytes, mode>;
}
-class SideEffectUnaryI<string mnemonic, bits<8> opcode, Immediate imm>
+class SideEffectUnaryI<string mnemonic, bits<8> opcode, ImmOpWithPattern imm>
: InstI<opcode, (outs), (ins imm:$I1),
mnemonic#"\t$I1", []>;
@@ -2761,13 +2761,13 @@ class UnaryMemRRFc<string mnemonic, bits<16> opcode,
}
class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIa<opcode, (outs cls:$R1), (ins imm:$I2),
mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator imm:$I2))]>;
class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRILa<opcode, (outs cls:$R1), (ins imm:$I2),
mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator imm:$I2))]>;
@@ -2885,14 +2885,14 @@ multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
}
class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- TypedReg tr, Immediate imm, bits<4> type = 0>
+ TypedReg tr, ImmOpWithPattern imm, bits<4> type = 0>
: InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2),
mnemonic#"\t$V1, $I2",
- [(set (tr.vt tr.op:$V1), (operator imm:$I2))]> {
+ [(set (tr.vt tr.op:$V1), (operator (i32 timm:$I2)))]> {
let M3 = type;
}
-class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, Immediate imm>
+class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, ImmOpWithPattern imm>
: InstVRIa<opcode, (outs VR128:$V1), (ins imm:$I2, imm32zx4:$M3),
mnemonic#"\t$V1, $I2, $M3", []>;
@@ -3021,7 +3021,7 @@ class SideEffectBinaryRRFc<string mnemonic, bits<16> opcode,
}
class SideEffectBinaryIE<string mnemonic, bits<16> opcode,
- Immediate imm1, Immediate imm2>
+ ImmOpWithPattern imm1, ImmOpWithPattern imm2>
: InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2),
mnemonic#"\t$I1, $I2", []>;
@@ -3030,7 +3030,7 @@ class SideEffectBinarySI<string mnemonic, bits<8> opcode, Operand imm>
mnemonic#"\t$BD1, $I2", []>;
class SideEffectBinarySIL<string mnemonic, bits<16> opcode,
- SDPatternOperator operator, Immediate imm>
+ SDPatternOperator operator, ImmOpWithPattern imm>
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", [(operator bdaddr12only:$BD1, imm:$I2)]>;
@@ -3165,7 +3165,7 @@ class BinaryRRFc<string mnemonic, bits<16> opcode,
mnemonic#"\t$R1, $R2, $M3", []>;
class BinaryMemRRFc<string mnemonic, bits<16> opcode,
- RegisterOperand cls1, RegisterOperand cls2, Immediate imm>
+ RegisterOperand cls1, RegisterOperand cls2, ImmOpWithPattern imm>
: InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []> {
let Constraints = "$R1 = $R1src";
@@ -3267,7 +3267,7 @@ multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode,
}
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
@@ -3276,14 +3276,14 @@ class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
}
class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2),
mnemonic#"\t$R1, $R3, $I2",
[(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
SDPatternOperator operator, RegisterOperand cls,
- Immediate imm> {
+ ImmOpWithPattern imm> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>,
@@ -3294,7 +3294,7 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
}
class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
- Immediate imm>
+ ImmOpWithPattern imm>
: InstRIEg<opcode, (outs cls:$R1),
(ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
mnemonic#"$M3\t$R1, $I2",
@@ -3308,7 +3308,7 @@ class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
// Like CondBinaryRIE, but used for the raw assembly form. The condition-code
// mask is the third operand rather than being part of the mnemonic.
class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
- Immediate imm>
+ ImmOpWithPattern imm>
: InstRIEg<opcode, (outs cls:$R1),
(ins cls:$R1src, imm:$I2, imm32zx4:$M3),
mnemonic#"\t$R1, $I2, $M3", []> {
@@ -3318,7 +3318,7 @@ class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
// Like CondBinaryRIE, but with a fixed CC mask.
class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#V.suffix#"\t$R1, $I2", []> {
let Constraints = "$R1 = $R1src";
@@ -3328,14 +3328,14 @@ class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
}
multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode,
- RegisterOperand cls, Immediate imm> {
+ RegisterOperand cls, ImmOpWithPattern imm> {
let isCodeGenOnly = 1 in
def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>;
def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>;
}
class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
@@ -3484,7 +3484,7 @@ class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, bits<4> type>
: InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3),
mnemonic#"\t$V1, $I2, $I3",
- [(set (tr.vt tr.op:$V1), (operator imm32zx8:$I2, imm32zx8:$I3))]> {
+ [(set (tr.vt tr.op:$V1), (operator imm32zx8_timm:$I2, imm32zx8_timm:$I3))]> {
let M4 = type;
}
@@ -3498,7 +3498,7 @@ class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2),
mnemonic#"\t$V1, $V3, $I2",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V3),
- imm32zx16:$I2))]> {
+ imm32zx16_timm:$I2))]> {
let M4 = type;
}
@@ -3512,7 +3512,7 @@ class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3),
mnemonic#"\t$V1, $V2, $I3",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
- imm32zx12:$I3))]> {
+ imm32zx12_timm:$I3))]> {
let M4 = type;
let M5 = m5;
}
@@ -3715,7 +3715,7 @@ class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
mnemonic#"\t$V1, $XBD2, $M3",
[(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2,
- imm32zx4:$M3))]> {
+ imm32zx4_timm:$M3))]> {
let mayLoad = 1;
let AccessBytes = bytes;
}
@@ -3765,7 +3765,7 @@ class BinaryVSI<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
- Immediate index>
+ ImmOpWithPattern index>
: InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
mnemonic#"\t$V1, $VBD2, $M3", []> {
let mayStore = 1;
@@ -3774,7 +3774,7 @@ class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
class StoreBinaryVRX<string mnemonic, bits<16> opcode,
SDPatternOperator operator, TypedReg tr, bits<5> bytes,
- Immediate index>
+ ImmOpWithPattern index>
: InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3),
mnemonic#"\t$V1, $XBD2, $M3",
[(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> {
@@ -3809,7 +3809,7 @@ class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRIa<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#"\t$R1, $I2",
[(set CC, (operator cls:$R1, imm:$I2))]> {
@@ -3817,7 +3817,7 @@ class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
}
class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
- RegisterOperand cls, Immediate imm>
+ RegisterOperand cls, ImmOpWithPattern imm>
: InstRILa<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#"\t$R1, $I2",
[(set CC, (operator cls:$R1, imm:$I2))]> {
@@ -3924,7 +3924,7 @@ class CompareSSb<string mnemonic, bits<8> opcode>
}
class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- SDPatternOperator load, Immediate imm,
+ SDPatternOperator load, ImmOpWithPattern imm,
AddressingMode mode = bdaddr12only>
: InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
@@ -3934,7 +3934,7 @@ class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
}
class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- SDPatternOperator load, Immediate imm>
+ SDPatternOperator load, ImmOpWithPattern imm>
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(set CC, (operator (load bdaddr12only:$BD1), imm:$I2))]> {
@@ -3943,7 +3943,7 @@ class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- SDPatternOperator load, Immediate imm,
+ SDPatternOperator load, ImmOpWithPattern imm,
AddressingMode mode = bdaddr20only>
: InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
@@ -3954,7 +3954,7 @@ class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
SDPatternOperator operator, SDPatternOperator load,
- Immediate imm> {
+ ImmOpWithPattern imm> {
let DispKey = mnemonic in {
let DispSize = "12" in
def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>;
@@ -4012,7 +4012,7 @@ class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class TestBinarySIL<string mnemonic, bits<16> opcode,
- SDPatternOperator operator, Immediate imm>
+ SDPatternOperator operator, ImmOpWithPattern imm>
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>;
@@ -4073,7 +4073,7 @@ class SideEffectTernaryMemMemMemRRFb<string mnemonic, bits<16> opcode,
class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
- Immediate imm>
+ ImmOpWithPattern imm>
: InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []>;
@@ -4086,7 +4086,7 @@ multiclass SideEffectTernaryRRFcOpt<string mnemonic, bits<16> opcode,
class SideEffectTernaryMemMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
- Immediate imm>
+ ImmOpWithPattern imm>
: InstRRFc<opcode, (outs cls1:$R1, cls2:$R2),
(ins cls1:$R1src, cls2:$R2src, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []> {
@@ -4221,7 +4221,7 @@ class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index>
+ TypedReg tr1, TypedReg tr2, ImmOpWithPattern imm, ImmOpWithPattern index>
: InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3),
mnemonic#"\t$V1, $I2, $M3",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src),
@@ -4237,7 +4237,7 @@ class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
mnemonic#"\t$V1, $V2, $V3, $I4",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3),
- imm32zx8:$I4))]> {
+ imm32zx8_timm:$I4))]> {
let M5 = type;
}
@@ -4252,8 +4252,8 @@ class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
(ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5),
mnemonic#"\t$V1, $V2, $M4, $M5",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
- imm32zx4:$M4,
- imm32zx4:$M5))],
+ imm32zx4_timm:$M4,
+ imm32zx4_timm:$M5))],
m4or> {
let M3 = type;
}
@@ -4285,13 +4285,13 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
TypedReg tr1, TypedReg tr2, bits<4> type,
bits<4> modifier = 0> {
def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
- imm32zx4even, !and (modifier, 14)>;
+ imm32zx4even_timm, !and (modifier, 14)>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, 0)>;
let Defs = [CC] in
def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
- imm32zx4even, !add(!and (modifier, 14), 1)>;
+ imm32zx4even_timm, !add(!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, 0)>;
@@ -4314,7 +4314,7 @@ class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
mnemonic#"\t$V1, $V2, $V3, $M4",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3),
- imm32zx4:$M4))]> {
+ imm32zx4_timm:$M4))]> {
let M5 = 0;
let M6 = 0;
}
@@ -4327,7 +4327,7 @@ class TernaryVRRcFloat<string mnemonic, bits<16> opcode,
mnemonic#"\t$V1, $V2, $V3, $M6",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3),
- imm32zx4:$M6))]> {
+ imm32zx4_timm:$M6))]> {
let M4 = type;
let M5 = m5;
}
@@ -4429,7 +4429,7 @@ class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
}
class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
- Immediate index>
+ ImmOpWithPattern index>
: InstVRV<opcode, (outs VR128:$V1),
(ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3),
mnemonic#"\t$V1, $VBD2, $M3", []> {
@@ -4440,7 +4440,7 @@ class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
}
class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index>
+ TypedReg tr1, TypedReg tr2, bits<5> bytes, ImmOpWithPattern index>
: InstVRX<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3),
mnemonic#"\t$V1, $XBD2, $M3",
@@ -4461,7 +4461,7 @@ class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operato
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src),
(tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3),
- imm32zx8:$I4))]> {
+ imm32zx8_timm:$I4))]> {
let Constraints = "$V1 = $V1src";
let DisableEncoding = "$V1src";
let M5 = type;
@@ -4480,7 +4480,7 @@ class QuaternaryVRIf<string mnemonic, bits<16> opcode>
: InstVRIf<opcode, (outs VR128:$V1),
(ins VR128:$V2, VR128:$V3,
imm32zx8:$I4, imm32zx4:$M5),
- mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>;
+ mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>;
class QuaternaryVRIg<string mnemonic, bits<16> opcode>
: InstVRIg<opcode, (outs VR128:$V1),
@@ -4491,7 +4491,7 @@ class QuaternaryVRIg<string mnemonic, bits<16> opcode>
class QuaternaryVRRd<string mnemonic, bits<16> opcode,
SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
TypedReg tr3, TypedReg tr4, bits<4> type,
- SDPatternOperator m6mask = imm32zx4, bits<4> m6or = 0>
+ SDPatternOperator m6mask = imm32zx4_timm, bits<4> m6or = 0>
: InstVRRd<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V2, tr3.op:$V3, tr4.op:$V4, m6mask:$M6),
mnemonic#"\t$V1, $V2, $V3, $V4, $M6",
@@ -4518,14 +4518,14 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
bits<4> modifier = 0> {
def "" : QuaternaryVRRd<mnemonic, opcode, operator,
tr1, tr2, tr2, tr2, type,
- imm32zx4even, !and (modifier, 14)>;
+ imm32zx4even_timm, !and (modifier, 14)>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, tr2.op:$V4, 0)>;
let Defs = [CC] in
def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc,
tr1, tr2, tr2, tr2, type,
- imm32zx4even, !add (!and (modifier, 14), 1)>;
+ imm32zx4even_timm, !add (!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, tr2.op:$V4, 0)>;
@@ -4536,7 +4536,7 @@ multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
- VR128:$V4, imm32zx4:$M5, 0)>;
+ VR128:$V4, imm32zx4_timm:$M5, 0)>;
}
class SideEffectQuaternaryRRFa<string mnemonic, bits<16> opcode,
@@ -4638,13 +4638,13 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
: InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
mnemonic##"\t$M1, $XBD2",
- [(operator imm32zx4:$M1, bdxaddr20only:$XBD2)]>;
+ [(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>;
class PrefetchRILPC<string mnemonic, bits<12> opcode,
SDPatternOperator operator>
- : InstRILc<opcode, (outs), (ins imm32zx4:$M1, pcrel32:$RI2),
+ : InstRILc<opcode, (outs), (ins imm32zx4_timm:$M1, pcrel32:$RI2),
mnemonic##"\t$M1, $RI2",
- [(operator imm32zx4:$M1, pcrel32:$RI2)]> {
+ [(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> {
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
// complex.
@@ -4691,7 +4691,7 @@ class Pseudo<dag outs, dag ins, list<dag> pattern>
// Like UnaryRI, but expanded after RA depending on the choice of register.
class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
- Immediate imm>
+ ImmOpWithPattern imm>
: Pseudo<(outs cls:$R1), (ins imm:$I2),
[(set cls:$R1, (operator imm:$I2))]>;
@@ -4720,7 +4720,7 @@ class UnaryRRPseudo<string key, SDPatternOperator operator,
// Like BinaryRI, but expanded after RA depending on the choice of register.
class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
- Immediate imm>
+ ImmOpWithPattern imm>
: Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2),
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
let Constraints = "$R1 = $R1src";
@@ -4728,13 +4728,13 @@ class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
// Like BinaryRIE, but expanded after RA depending on the choice of register.
class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls,
- Immediate imm>
+ ImmOpWithPattern imm>
: Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2),
[(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
// Like BinaryRIAndK, but expanded after RA depending on the choice of register.
multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
- RegisterOperand cls, Immediate imm> {
+ RegisterOperand cls, ImmOpWithPattern imm> {
let NumOpsKey = key in {
let NumOpsValue = "3" in
def K : BinaryRIEPseudo<operator, cls, imm>,
@@ -4764,7 +4764,7 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
// Like CompareRI, but expanded after RA depending on the choice of register.
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
- Immediate imm>
+ ImmOpWithPattern imm>
: Pseudo<(outs), (ins cls:$R1, imm:$I2),
[(set CC, (operator cls:$R1, imm:$I2))]> {
let isCompare = 1;
@@ -4783,7 +4783,7 @@ class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
}
// Like TestBinarySIL, but expanded later.
-class TestBinarySILPseudo<SDPatternOperator operator, Immediate imm>
+class TestBinarySILPseudo<SDPatternOperator operator, ImmOpWithPattern imm>
: Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2),
[(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>;
@@ -4812,7 +4812,7 @@ class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
// Like CondBinaryRIE, but expanded after RA depending on the choice of
// register.
-class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
+class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm>
: Pseudo<(outs cls:$R1),
(ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
[(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
@@ -4876,7 +4876,7 @@ class SelectWrapper<ValueType vt, RegisterOperand cls>
: Pseudo<(outs cls:$dst),
(ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc),
[(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2,
- imm32zx4:$valid, imm32zx4:$cc))]> {
+ imm32zx4_timm:$valid, imm32zx4_timm:$cc))]> {
let usesCustomInserter = 1;
let hasNoSchedulingInfo = 1;
let Uses = [CC];
@@ -4890,12 +4890,12 @@ multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
def "" : Pseudo<(outs),
(ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask cls:$new, (load mode:$addr),
- imm32zx4:$valid, imm32zx4:$cc),
+ imm32zx4_timm:$valid, imm32zx4_timm:$cc),
mode:$addr)]>;
def Inv : Pseudo<(outs),
(ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask (load mode:$addr), cls:$new,
- imm32zx4:$valid, imm32zx4:$cc),
+ imm32zx4_timm:$valid, imm32zx4_timm:$cc),
mode:$addr)]>;
}
}
@@ -4917,11 +4917,11 @@ class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
// Specializations of AtomicLoadWBinary.
class AtomicLoadBinaryReg32<SDPatternOperator operator>
: AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>;
-class AtomicLoadBinaryImm32<SDPatternOperator operator, Immediate imm>
+class AtomicLoadBinaryImm32<SDPatternOperator operator, ImmOpWithPattern imm>
: AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>;
class AtomicLoadBinaryReg64<SDPatternOperator operator>
: AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>;
-class AtomicLoadBinaryImm64<SDPatternOperator operator, Immediate imm>
+class AtomicLoadBinaryImm64<SDPatternOperator operator, ImmOpWithPattern imm>
: AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>;
// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND
@@ -4944,7 +4944,7 @@ class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
// Specializations of AtomicLoadWBinary.
class AtomicLoadWBinaryReg<SDPatternOperator operator>
: AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
-class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
+class AtomicLoadWBinaryImm<SDPatternOperator operator, ImmOpWithPattern imm>
: AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
// A pseudo instruction that is a direct alias of a real instruction.
@@ -4979,7 +4979,7 @@ class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
// An alias of a BinaryRI, but with different register sizes.
class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
- Immediate imm>
+ ImmOpWithPattern imm>
: Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
let Constraints = "$R1 = $R1src";
@@ -4987,7 +4987,7 @@ class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
// An alias of a BinaryRIL, but with different register sizes.
class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
- Immediate imm>
+ ImmOpWithPattern imm>
: Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
let Constraints = "$R1 = $R1src";
@@ -4999,7 +4999,7 @@ class BinaryAliasVRRf<RegisterOperand cls>
// An alias of a CompareRI, but with different register sizes.
class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
- Immediate imm>
+ ImmOpWithPattern imm>
: Alias<4, (outs), (ins cls:$R1, imm:$I2),
[(set CC, (operator cls:$R1, imm:$I2))]> {
let isCompare = 1;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 57c1cf4ec70a..bc783608d45b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -46,22 +46,12 @@ using namespace llvm;
#include "SystemZGenInstrInfo.inc"
#define DEBUG_TYPE "systemz-II"
-STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)");
// Return a mask with Count low bits set.
static uint64_t allOnes(unsigned int Count) {
return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
}
-// Reg should be a 32-bit GPR. Return true if it is a high register rather
-// than a low register.
-static bool isHighReg(unsigned int Reg) {
- if (SystemZ::GRH32BitRegClass.contains(Reg))
- return true;
- assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32");
- return false;
-}
-
// Pin the vtable to this file.
void SystemZInstrInfo::anchor() {}
@@ -85,7 +75,7 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
// Set up the two 64-bit registers and remember super reg and its flags.
MachineOperand &HighRegOp = EarlierMI->getOperand(0);
MachineOperand &LowRegOp = MI->getOperand(0);
- unsigned Reg128 = LowRegOp.getReg();
+ Register Reg128 = LowRegOp.getReg();
unsigned Reg128Killed = getKillRegState(LowRegOp.isKill());
unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef());
HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64));
@@ -147,8 +137,8 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode,
bool ConvertHigh) const {
- unsigned Reg = MI.getOperand(0).getReg();
- bool IsHigh = isHighReg(Reg);
+ Register Reg = MI.getOperand(0).getReg();
+ bool IsHigh = SystemZ::isHighReg(Reg);
MI.setDesc(get(IsHigh ? HighOpcode : LowOpcode));
if (IsHigh && ConvertHigh)
MI.getOperand(1).setImm(uint32_t(MI.getOperand(1).getImm()));
@@ -161,10 +151,10 @@ void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode,
void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned LowOpcodeK,
unsigned HighOpcode) const {
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
- bool DestIsHigh = isHighReg(DestReg);
- bool SrcIsHigh = isHighReg(SrcReg);
+ Register DestReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ bool DestIsHigh = SystemZ::isHighReg(DestReg);
+ bool SrcIsHigh = SystemZ::isHighReg(SrcReg);
if (!DestIsHigh && !SrcIsHigh)
MI.setDesc(get(LowOpcodeK));
else {
@@ -184,9 +174,10 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
// is a high GR32.
void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const {
- unsigned Reg = MI.getOperand(0).getReg();
- unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode,
- MI.getOperand(2).getImm());
+ Register Reg = MI.getOperand(0).getReg();
+ unsigned Opcode = getOpcodeForOffset(
+ SystemZ::isHighReg(Reg) ? HighOpcode : LowOpcode,
+ MI.getOperand(2).getImm());
MI.setDesc(get(Opcode));
}
@@ -195,93 +186,11 @@ void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
// register is a low GR32 and HighOpcode if the register is a high GR32.
void SystemZInstrInfo::expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const {
- unsigned Reg = MI.getOperand(0).getReg();
- unsigned Opcode = isHighReg(Reg) ? HighOpcode : LowOpcode;
+ Register Reg = MI.getOperand(0).getReg();
+ unsigned Opcode = SystemZ::isHighReg(Reg) ? HighOpcode : LowOpcode;
MI.setDesc(get(Opcode));
}
-// MI is a load-register-on-condition pseudo instruction. Replace it with
-// LowOpcode if source and destination are both low GR32s and HighOpcode if
-// source and destination are both high GR32s.
-void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
- unsigned HighOpcode) const {
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(2).getReg();
- bool DestIsHigh = isHighReg(DestReg);
- bool SrcIsHigh = isHighReg(SrcReg);
-
- if (!DestIsHigh && !SrcIsHigh)
- MI.setDesc(get(LowOpcode));
- else if (DestIsHigh && SrcIsHigh)
- MI.setDesc(get(HighOpcode));
- else
- LOCRMuxJumps++;
-
- // If we were unable to implement the pseudo with a single instruction, we
- // need to convert it back into a branch sequence. This cannot be done here
- // since the caller of expandPostRAPseudo does not handle changes to the CFG
- // correctly. This change is defered to the SystemZExpandPseudo pass.
-}
-
-// MI is a select pseudo instruction. Replace it with LowOpcode if source
-// and destination are all low GR32s and HighOpcode if source and destination
-// are all high GR32s. Otherwise, use the two-operand MixedOpcode.
-void SystemZInstrInfo::expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
- unsigned HighOpcode,
- unsigned MixedOpcode) const {
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned Src1Reg = MI.getOperand(1).getReg();
- unsigned Src2Reg = MI.getOperand(2).getReg();
- bool DestIsHigh = isHighReg(DestReg);
- bool Src1IsHigh = isHighReg(Src1Reg);
- bool Src2IsHigh = isHighReg(Src2Reg);
-
- // If sources and destination aren't all high or all low, we may be able to
- // simplify the operation by moving one of the sources to the destination
- // first. But only if this doesn't clobber the other source.
- if (DestReg != Src1Reg && DestReg != Src2Reg) {
- if (DestIsHigh != Src1IsHigh) {
- emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src1Reg,
- SystemZ::LR, 32, MI.getOperand(1).isKill(),
- MI.getOperand(1).isUndef());
- MI.getOperand(1).setReg(DestReg);
- Src1Reg = DestReg;
- Src1IsHigh = DestIsHigh;
- } else if (DestIsHigh != Src2IsHigh) {
- emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src2Reg,
- SystemZ::LR, 32, MI.getOperand(2).isKill(),
- MI.getOperand(2).isUndef());
- MI.getOperand(2).setReg(DestReg);
- Src2Reg = DestReg;
- Src2IsHigh = DestIsHigh;
- }
- }
-
- // If the destination (now) matches one source, prefer this to be first.
- if (DestReg != Src1Reg && DestReg == Src2Reg) {
- commuteInstruction(MI, false, 1, 2);
- std::swap(Src1Reg, Src2Reg);
- std::swap(Src1IsHigh, Src2IsHigh);
- }
-
- if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh)
- MI.setDesc(get(LowOpcode));
- else if (DestIsHigh && Src1IsHigh && Src2IsHigh)
- MI.setDesc(get(HighOpcode));
- else {
- // Given the simplifcation above, we must already have a two-operand case.
- assert (DestReg == Src1Reg);
- MI.setDesc(get(MixedOpcode));
- MI.tieOperands(0, 1);
- LOCRMuxJumps++;
- }
-
- // If we were unable to implement the pseudo with a single instruction, we
- // need to convert it back into a branch sequence. This cannot be done here
- // since the caller of expandPostRAPseudo does not handle changes to the CFG
- // correctly. This change is defered to the SystemZExpandPseudo pass.
-}
-
// MI is an RR-style pseudo instruction that zero-extends the low Size bits
// of one GRX32 into another. Replace it with LowOpcode if both operands
// are low registers, otherwise use RISB[LH]G.
@@ -302,8 +211,8 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
MachineBasicBlock *MBB = MI->getParent();
MachineFunction &MF = *MBB->getParent();
- const unsigned Reg64 = MI->getOperand(0).getReg();
- const unsigned Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32);
+ const Register Reg64 = MI->getOperand(0).getReg();
+ const Register Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32);
// EAR can only load the low subregister so us a shift for %a0 to produce
// the GR containing %a0 and %a1.
@@ -341,8 +250,8 @@ SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
unsigned Size, bool KillSrc,
bool UndefSrc) const {
unsigned Opcode;
- bool DestIsHigh = isHighReg(DestReg);
- bool SrcIsHigh = isHighReg(SrcReg);
+ bool DestIsHigh = SystemZ::isHighReg(DestReg);
+ bool SrcIsHigh = SystemZ::isHighReg(SrcReg);
if (DestIsHigh && SrcIsHigh)
Opcode = SystemZ::RISBHH;
else if (DestIsHigh && !SrcIsHigh)
@@ -468,7 +377,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Can't handle indirect branches.
SystemZII::Branch Branch(getBranchInfo(*I));
- if (!Branch.Target->isMBB())
+ if (!Branch.hasMBBTarget())
return true;
// Punt on compound branches.
@@ -478,7 +387,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
if (Branch.CCMask == SystemZ::CCMASK_ANY) {
// Handle unconditional branches.
if (!AllowModify) {
- TBB = Branch.Target->getMBB();
+ TBB = Branch.getMBBTarget();
continue;
}
@@ -490,7 +399,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
FBB = nullptr;
// Delete the JMP if it's equivalent to a fall-through.
- if (MBB.isLayoutSuccessor(Branch.Target->getMBB())) {
+ if (MBB.isLayoutSuccessor(Branch.getMBBTarget())) {
TBB = nullptr;
I->eraseFromParent();
I = MBB.end();
@@ -498,7 +407,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
// TBB is used to indicate the unconditinal destination.
- TBB = Branch.Target->getMBB();
+ TBB = Branch.getMBBTarget();
continue;
}
@@ -506,7 +415,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
if (Cond.empty()) {
// FIXME: add X86-style branch swap
FBB = TBB;
- TBB = Branch.Target->getMBB();
+ TBB = Branch.getMBBTarget();
Cond.push_back(MachineOperand::CreateImm(Branch.CCValid));
Cond.push_back(MachineOperand::CreateImm(Branch.CCMask));
continue;
@@ -517,7 +426,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Only handle the case where all conditional branches branch to the same
// destination.
- if (TBB != Branch.Target->getMBB())
+ if (TBB != Branch.getMBBTarget())
return true;
// If the conditions are the same, we can leave them alone.
@@ -547,7 +456,7 @@ unsigned SystemZInstrInfo::removeBranch(MachineBasicBlock &MBB,
continue;
if (!I->isBranch())
break;
- if (!getBranchInfo(*I).Target->isMBB())
+ if (!getBranchInfo(*I).hasMBBTarget())
break;
// Remove the branch.
I->eraseFromParent();
@@ -676,8 +585,8 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
else {
Opc = SystemZ::LOCR;
MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass);
- unsigned TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
- unsigned FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
BuildMI(MBB, I, DL, get(TargetOpcode::COPY), TReg).addReg(TrueReg);
BuildMI(MBB, I, DL, get(TargetOpcode::COPY), FReg).addReg(FalseReg);
TrueReg = TReg;
@@ -1258,13 +1167,14 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
assert(NumOps == 3 && "Expected two source registers.");
Register DstReg = MI.getOperand(0).getReg();
Register DstPhys =
- (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);
+ (Register::isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);
Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg()
: ((OpNum == 1 && MI.isCommutable())
? MI.getOperand(2).getReg()
: Register()));
if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg &&
- TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg))
+ Register::isVirtualRegister(SrcReg) &&
+ DstPhys == VRM->getPhys(SrcReg))
NeedsCommute = (OpNum == 1);
else
MemOpcode = -1;
@@ -1358,15 +1268,6 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI);
return true;
- case SystemZ::LOCRMux:
- expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR);
- return true;
-
- case SystemZ::SELRMux:
- expandSELRPseudo(MI, SystemZ::SELR, SystemZ::SELFHR,
- SystemZ::LOCRMux);
- return true;
-
case SystemZ::STCMux:
expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
return true;
@@ -1468,8 +1369,8 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
case SystemZ::RISBMux: {
- bool DestIsHigh = isHighReg(MI.getOperand(0).getReg());
- bool SrcIsHigh = isHighReg(MI.getOperand(2).getReg());
+ bool DestIsHigh = SystemZ::isHighReg(MI.getOperand(0).getReg());
+ bool SrcIsHigh = SystemZ::isHighReg(MI.getOperand(2).getReg());
if (SrcIsHigh == DestIsHigh)
MI.setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL));
else {
@@ -1545,6 +1446,10 @@ SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const {
return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP,
MI.getOperand(2).getImm(), &MI.getOperand(3));
+ case SystemZ::INLINEASM_BR:
+ // Don't try to analyze asm goto, so pass nullptr as branch target argument.
+ return SystemZII::Branch(SystemZII::AsmGoto, 0, 0, nullptr);
+
default:
llvm_unreachable("Unrecognized branch opcode");
}
@@ -1845,8 +1750,7 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
bool SystemZInstrInfo::
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA) const {
+ const MachineInstr &MIb) const {
if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand())
return false;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 2edde175542e..6dc6e72aa52a 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -100,11 +100,18 @@ enum BranchType {
// An instruction that decrements a 64-bit register and branches if
// the result is nonzero.
- BranchCTG
+ BranchCTG,
+
+ // An instruction representing an asm goto statement.
+ AsmGoto
};
// Information about a branch instruction.
-struct Branch {
+class Branch {
+ // The target of the branch. In case of INLINEASM_BR, this is nullptr.
+ const MachineOperand *Target;
+
+public:
// The type of the branch.
BranchType Type;
@@ -114,12 +121,15 @@ struct Branch {
// CCMASK_<N> is set if the branch should be taken when CC == N.
unsigned CCMask;
- // The target of the branch.
- const MachineOperand *Target;
-
Branch(BranchType type, unsigned ccValid, unsigned ccMask,
const MachineOperand *target)
- : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {}
+ : Target(target), Type(type), CCValid(ccValid), CCMask(ccMask) {}
+
+ bool isIndirect() { return Target != nullptr && Target->isReg(); }
+ bool hasMBBTarget() { return Target != nullptr && Target->isMBB(); }
+ MachineBasicBlock *getMBBTarget() {
+ return hasMBBTarget() ? Target->getMBB() : nullptr;
+ }
};
// Kinds of fused compares in compare-and-* instructions. Together with type
@@ -160,10 +170,6 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
unsigned HighOpcode) const;
void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const;
- void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
- unsigned HighOpcode) const;
- void expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
- unsigned HighOpcode, unsigned MixedOpcode) const;
void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const;
void expandLoadStackGuard(MachineInstr *MI) const;
@@ -322,8 +328,7 @@ public:
// memory addresses and false otherwise.
bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA = nullptr) const override;
+ const MachineInstr &MIb) const override;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 91856893e3bd..8b334756611a 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -337,15 +337,15 @@ defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8,
defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>,
Requires<[FeatureHighWord]>;
-defm CondStore32Mux : CondStores<GRX32, nonvolatile_store,
- nonvolatile_load, bdxaddr20only>,
+defm CondStore32Mux : CondStores<GRX32, simple_store,
+ simple_load, bdxaddr20only>,
Requires<[FeatureLoadStoreOnCond2]>;
defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>;
defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>;
-defm CondStore32 : CondStores<GR32, nonvolatile_store,
- nonvolatile_load, bdxaddr20only>;
+defm CondStore32 : CondStores<GR32, simple_store,
+ simple_load, bdxaddr20only>;
defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>;
@@ -353,8 +353,8 @@ defm : CondStores64<CondStore16, CondStore16Inv, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>;
defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32,
nonvolatile_anyextloadi32, bdxaddr20only>;
-defm CondStore64 : CondStores<GR64, nonvolatile_store,
- nonvolatile_load, bdxaddr20only>;
+defm CondStore64 : CondStores<GR64, simple_store,
+ simple_load, bdxaddr20only>;
//===----------------------------------------------------------------------===//
// Move instructions
@@ -531,8 +531,8 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
// Load on condition. Matched via DAG pattern.
// Expands to LOC or LOCFH, depending on the choice of register.
- def LOCMux : CondUnaryRSYPseudo<nonvolatile_load, GRX32, 4>;
- defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, nonvolatile_load, GRH32, 4>;
+ def LOCMux : CondUnaryRSYPseudo<simple_load, GRX32, 4>;
+ defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>;
// Store on condition. Expanded from CondStore* pseudos.
// Expands to STOC or STOCFH, depending on the choice of register.
@@ -563,8 +563,8 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
}
// Load on condition. Matched via DAG pattern.
- defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, nonvolatile_load, GR32, 4>;
- defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, nonvolatile_load, GR64, 8>;
+ defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>;
+ defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>;
// Store on condition. Expanded from CondStore* pseudos.
defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>;
@@ -2082,7 +2082,7 @@ let Predicates = [FeatureProcessorAssist] in {
// cleared. We only use the first result here.
let Defs = [CC] in
def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>;
-def : Pat<(ctlz GR64:$src),
+def : Pat<(i64 (ctlz GR64:$src)),
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
// Population count. Counts bits set per byte or doubleword.
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td
index 261727f89058..02364bbda5c1 100644
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in {
// Generate byte mask.
def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
- def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
+ def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16_timm>;
// Generate mask.
def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
@@ -71,10 +71,10 @@ let Predicates = [FeatureVector] in {
// Replicate immediate.
def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
- def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
- def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
- def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
- def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
+ def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16_timm, 0>;
+ def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16_timm, 1>;
+ def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16_timm, 2>;
+ def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16_timm, 3>;
}
// Load element immediate.
@@ -116,7 +116,7 @@ let Predicates = [FeatureVector] in {
(ins bdxaddr12only:$XBD2, imm32zx4:$M3),
"lcbb\t$R1, $XBD2, $M3",
[(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2,
- imm32zx4:$M3))]>;
+ imm32zx4_timm:$M3))]>;
// Load with length. The number of loaded bytes is only known at run time.
def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
@@ -362,9 +362,9 @@ let Predicates = [FeatureVector] in {
def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
- def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)),
+ def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16_timm:$index)),
(VREPF VR128:$vec, imm32zx16:$index)>;
- def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)),
+ def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16_timm:$index)),
(VREPG VR128:$vec, imm32zx16:$index)>;
// Select.
@@ -778,7 +778,7 @@ let Predicates = [FeatureVector] in {
// Shift left double by byte.
def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>;
- def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
+ def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8_timm:$z),
(VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
// Shift left double by bit.
@@ -1069,7 +1069,7 @@ let Predicates = [FeatureVector] in {
def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
}
// Rounding mode should agree with SystemZInstrFP.td.
- def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
+ def : FPConversion<VCGDB, any_fp_to_sint, v128g, v128db, 0, 5>;
let Predicates = [FeatureVectorEnhancements2] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
let isAsmParserOnly = 1 in
@@ -1078,7 +1078,7 @@ let Predicates = [FeatureVector] in {
def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>;
}
// Rounding mode should agree with SystemZInstrFP.td.
- def : FPConversion<VCFEB, fp_to_sint, v128f, v128sb, 0, 5>;
+ def : FPConversion<VCFEB, any_fp_to_sint, v128f, v128sb, 0, 5>;
}
// Convert to logical.
@@ -1088,7 +1088,7 @@ let Predicates = [FeatureVector] in {
def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
}
// Rounding mode should agree with SystemZInstrFP.td.
- def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
+ def : FPConversion<VCLGDB, any_fp_to_uint, v128g, v128db, 0, 5>;
let Predicates = [FeatureVectorEnhancements2] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
let isAsmParserOnly = 1 in
@@ -1097,7 +1097,7 @@ let Predicates = [FeatureVector] in {
def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>;
}
// Rounding mode should agree with SystemZInstrFP.td.
- def : FPConversion<VCLFEB, fp_to_uint, v128f, v128sb, 0, 5>;
+ def : FPConversion<VCLFEB, any_fp_to_uint, v128f, v128sb, 0, 5>;
}
// Divide.
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 95d7e22dec32..724111229569 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -85,9 +85,9 @@ struct MBBInfo {
// This value never changes.
uint64_t Size = 0;
- // The minimum alignment of the block, as a log2 value.
+ // The minimum alignment of the block.
// This value never changes.
- unsigned Alignment = 0;
+ Align Alignment;
// The number of terminators in this block. This value never changes.
unsigned NumTerminators = 0;
@@ -127,7 +127,8 @@ struct BlockPosition {
// as the runtime address.
unsigned KnownBits;
- BlockPosition(unsigned InitialAlignment) : KnownBits(InitialAlignment) {}
+ BlockPosition(unsigned InitialLogAlignment)
+ : KnownBits(InitialLogAlignment) {}
};
class SystemZLongBranch : public MachineFunctionPass {
@@ -178,17 +179,16 @@ const uint64_t MaxForwardRange = 0xfffe;
// instructions.
void SystemZLongBranch::skipNonTerminators(BlockPosition &Position,
MBBInfo &Block) {
- if (Block.Alignment > Position.KnownBits) {
+ if (Log2(Block.Alignment) > Position.KnownBits) {
// When calculating the address of Block, we need to conservatively
// assume that Block had the worst possible misalignment.
- Position.Address += ((uint64_t(1) << Block.Alignment) -
- (uint64_t(1) << Position.KnownBits));
- Position.KnownBits = Block.Alignment;
+ Position.Address +=
+ (Block.Alignment.value() - (uint64_t(1) << Position.KnownBits));
+ Position.KnownBits = Log2(Block.Alignment);
}
// Align the addresses.
- uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1;
- Position.Address = (Position.Address + AlignMask) & ~AlignMask;
+ Position.Address = alignTo(Position.Address, Block.Alignment);
// Record the block's position.
Block.Address = Position.Address;
@@ -257,7 +257,7 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) {
}
Terminator.Branch = &MI;
Terminator.TargetBlock =
- TII->getBranchInfo(MI).Target->getMBB()->getNumber();
+ TII->getBranchInfo(MI).getMBBTarget()->getNumber();
}
return Terminator;
}
@@ -275,7 +275,7 @@ uint64_t SystemZLongBranch::initMBBInfo() {
Terminators.clear();
Terminators.reserve(NumBlocks);
- BlockPosition Position(MF->getAlignment());
+ BlockPosition Position(Log2(MF->getAlignment()));
for (unsigned I = 0; I < NumBlocks; ++I) {
MachineBasicBlock *MBB = MF->getBlockNumbered(I);
MBBInfo &Block = MBBs[I];
@@ -339,7 +339,7 @@ bool SystemZLongBranch::mustRelaxABranch() {
// must be long.
void SystemZLongBranch::setWorstCaseAddresses() {
SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
- BlockPosition Position(MF->getAlignment());
+ BlockPosition Position(Log2(MF->getAlignment()));
for (auto &Block : MBBs) {
skipNonTerminators(Position, Block);
for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) {
@@ -440,7 +440,7 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
// Run a shortening pass and relax any branches that need to be relaxed.
void SystemZLongBranch::relaxBranches() {
SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
- BlockPosition Position(MF->getAlignment());
+ BlockPosition Position(Log2(MF->getAlignment()));
for (auto &Block : MBBs) {
skipNonTerminators(Position, Block);
for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) {
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index 0becfaa1d49c..3fc25034dded 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "SystemZMachineScheduler.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
using namespace llvm;
@@ -108,8 +109,8 @@ void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) {
I != SinglePredMBB->end(); I++) {
LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; I->dump(););
bool TakenBranch = (I->isBranch() &&
- (TII->getBranchInfo(*I).Target->isReg() || // Relative branch
- TII->getBranchInfo(*I).Target->getMBB() == MBB));
+ (TII->getBranchInfo(*I).isIndirect() ||
+ TII->getBranchInfo(*I).getMBBTarget() == MBB));
HazardRec->emitInstruction(&*I, TakenBranch);
if (TakenBranch)
break;
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 56632e1529a2..b2bab68a6274 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -21,15 +21,32 @@ class ImmediateTLSAsmOperand<string name>
let RenderMethod = "addImmTLSOperands";
}
+class ImmediateOp<ValueType vt, string asmop> : Operand<vt> {
+ let PrintMethod = "print"##asmop##"Operand";
+ let DecoderMethod = "decode"##asmop##"Operand";
+ let ParserMatchClass = !cast<AsmOperandClass>(asmop);
+}
+
+class ImmOpWithPattern<ValueType vt, string asmop, code pred, SDNodeXForm xform,
+ SDNode ImmNode = imm> :
+ ImmediateOp<vt, asmop>, PatLeaf<(vt ImmNode), pred, xform>;
+
+// class ImmediatePatLeaf<ValueType vt, code pred,
+// SDNodeXForm xform, SDNode ImmNode>
+// : PatLeaf<(vt ImmNode), pred, xform>;
+
+
// Constructs both a DAG pattern and instruction operand for an immediate
// of type VT. PRED returns true if a node is acceptable and XFORM returns
// the operand value associated with the node. ASMOP is the name of the
// associated asm operand, and also forms the basis of the asm print method.
-class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop>
- : PatLeaf<(vt imm), pred, xform>, Operand<vt> {
- let PrintMethod = "print"##asmop##"Operand";
- let DecoderMethod = "decode"##asmop##"Operand";
- let ParserMatchClass = !cast<AsmOperandClass>(asmop);
+multiclass Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> {
+ // def "" : ImmediateOp<vt, asmop>,
+ // PatLeaf<(vt imm), pred, xform>;
+ def "" : ImmOpWithPattern<vt, asmop, pred, xform>;
+
+// def _timm : PatLeaf<(vt timm), pred, xform>;
+ def _timm : ImmOpWithPattern<vt, asmop, pred, xform, timm>;
}
// Constructs an asm operand for a PC-relative address. SIZE says how
@@ -295,87 +312,87 @@ def U48Imm : ImmediateAsmOperand<"U48Imm">;
// Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being zero.
-def imm32ll16 : Immediate<i32, [{
+defm imm32ll16 : Immediate<i32, [{
return SystemZ::isImmLL(N->getZExtValue());
}], LL16, "U16Imm">;
-def imm32lh16 : Immediate<i32, [{
+defm imm32lh16 : Immediate<i32, [{
return SystemZ::isImmLH(N->getZExtValue());
}], LH16, "U16Imm">;
// Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being one.
-def imm32ll16c : Immediate<i32, [{
+defm imm32ll16c : Immediate<i32, [{
return SystemZ::isImmLL(uint32_t(~N->getZExtValue()));
}], LL16, "U16Imm">;
-def imm32lh16c : Immediate<i32, [{
+defm imm32lh16c : Immediate<i32, [{
return SystemZ::isImmLH(uint32_t(~N->getZExtValue()));
}], LH16, "U16Imm">;
// Short immediates
-def imm32zx1 : Immediate<i32, [{
+defm imm32zx1 : Immediate<i32, [{
return isUInt<1>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U1Imm">;
-def imm32zx2 : Immediate<i32, [{
+defm imm32zx2 : Immediate<i32, [{
return isUInt<2>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U2Imm">;
-def imm32zx3 : Immediate<i32, [{
+defm imm32zx3 : Immediate<i32, [{
return isUInt<3>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U3Imm">;
-def imm32zx4 : Immediate<i32, [{
+defm imm32zx4 : Immediate<i32, [{
return isUInt<4>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U4Imm">;
// Note: this enforces an even value during code generation only.
// When used from the assembler, any 4-bit value is allowed.
-def imm32zx4even : Immediate<i32, [{
+defm imm32zx4even : Immediate<i32, [{
return isUInt<4>(N->getZExtValue());
}], UIMM8EVEN, "U4Imm">;
-def imm32zx6 : Immediate<i32, [{
+defm imm32zx6 : Immediate<i32, [{
return isUInt<6>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U6Imm">;
-def imm32sx8 : Immediate<i32, [{
+defm imm32sx8 : Immediate<i32, [{
return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">;
-def imm32zx8 : Immediate<i32, [{
+defm imm32zx8 : Immediate<i32, [{
return isUInt<8>(N->getZExtValue());
}], UIMM8, "U8Imm">;
-def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
+defm imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
-def imm32zx12 : Immediate<i32, [{
+defm imm32zx12 : Immediate<i32, [{
return isUInt<12>(N->getZExtValue());
}], UIMM12, "U12Imm">;
-def imm32sx16 : Immediate<i32, [{
+defm imm32sx16 : Immediate<i32, [{
return isInt<16>(N->getSExtValue());
}], SIMM16, "S16Imm">;
-def imm32sx16n : Immediate<i32, [{
+defm imm32sx16n : Immediate<i32, [{
return isInt<16>(-N->getSExtValue());
}], NEGSIMM16, "S16Imm">;
-def imm32zx16 : Immediate<i32, [{
+defm imm32zx16 : Immediate<i32, [{
return isUInt<16>(N->getZExtValue());
}], UIMM16, "U16Imm">;
-def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
-def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
+defm imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+defm imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
// Full 32-bit immediates. we need both signed and unsigned versions
// because the assembler is picky. E.g. AFI requires signed operands
// while NILF requires unsigned ones.
-def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
-def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
+defm simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
+defm uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
-def simm32n : Immediate<i32, [{
+defm simm32n : Immediate<i32, [{
return isInt<32>(-N->getSExtValue());
}], NEGSIMM32, "S32Imm">;
@@ -387,107 +404,107 @@ def imm32 : ImmLeaf<i32, [{}]>;
// Immediates for 16-bit chunks of an i64, with the other bits of the
// i32 being zero.
-def imm64ll16 : Immediate<i64, [{
+defm imm64ll16 : Immediate<i64, [{
return SystemZ::isImmLL(N->getZExtValue());
}], LL16, "U16Imm">;
-def imm64lh16 : Immediate<i64, [{
+defm imm64lh16 : Immediate<i64, [{
return SystemZ::isImmLH(N->getZExtValue());
}], LH16, "U16Imm">;
-def imm64hl16 : Immediate<i64, [{
+defm imm64hl16 : Immediate<i64, [{
return SystemZ::isImmHL(N->getZExtValue());
}], HL16, "U16Imm">;
-def imm64hh16 : Immediate<i64, [{
+defm imm64hh16 : Immediate<i64, [{
return SystemZ::isImmHH(N->getZExtValue());
}], HH16, "U16Imm">;
// Immediates for 16-bit chunks of an i64, with the other bits of the
// i32 being one.
-def imm64ll16c : Immediate<i64, [{
+defm imm64ll16c : Immediate<i64, [{
return SystemZ::isImmLL(uint64_t(~N->getZExtValue()));
}], LL16, "U16Imm">;
-def imm64lh16c : Immediate<i64, [{
+defm imm64lh16c : Immediate<i64, [{
return SystemZ::isImmLH(uint64_t(~N->getZExtValue()));
}], LH16, "U16Imm">;
-def imm64hl16c : Immediate<i64, [{
+defm imm64hl16c : Immediate<i64, [{
return SystemZ::isImmHL(uint64_t(~N->getZExtValue()));
}], HL16, "U16Imm">;
-def imm64hh16c : Immediate<i64, [{
+defm imm64hh16c : Immediate<i64, [{
return SystemZ::isImmHH(uint64_t(~N->getZExtValue()));
}], HH16, "U16Imm">;
// Immediates for the lower and upper 32 bits of an i64, with the other
// bits of the i32 being zero.
-def imm64lf32 : Immediate<i64, [{
+defm imm64lf32 : Immediate<i64, [{
return SystemZ::isImmLF(N->getZExtValue());
}], LF32, "U32Imm">;
-def imm64hf32 : Immediate<i64, [{
+defm imm64hf32 : Immediate<i64, [{
return SystemZ::isImmHF(N->getZExtValue());
}], HF32, "U32Imm">;
// Immediates for the lower and upper 32 bits of an i64, with the other
// bits of the i32 being one.
-def imm64lf32c : Immediate<i64, [{
+defm imm64lf32c : Immediate<i64, [{
return SystemZ::isImmLF(uint64_t(~N->getZExtValue()));
}], LF32, "U32Imm">;
-def imm64hf32c : Immediate<i64, [{
+defm imm64hf32c : Immediate<i64, [{
return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
}], HF32, "U32Imm">;
// Negated immediates that fit LF32 or LH16.
-def imm64lh16n : Immediate<i64, [{
+defm imm64lh16n : Immediate<i64, [{
return SystemZ::isImmLH(uint64_t(-N->getZExtValue()));
}], NEGLH16, "U16Imm">;
-def imm64lf32n : Immediate<i64, [{
+defm imm64lf32n : Immediate<i64, [{
return SystemZ::isImmLF(uint64_t(-N->getZExtValue()));
}], NEGLF32, "U32Imm">;
// Short immediates.
-def imm64sx8 : Immediate<i64, [{
+defm imm64sx8 : Immediate<i64, [{
return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">;
-def imm64zx8 : Immediate<i64, [{
+defm imm64zx8 : Immediate<i64, [{
return isUInt<8>(N->getSExtValue());
}], UIMM8, "U8Imm">;
-def imm64sx16 : Immediate<i64, [{
+defm imm64sx16 : Immediate<i64, [{
return isInt<16>(N->getSExtValue());
}], SIMM16, "S16Imm">;
-def imm64sx16n : Immediate<i64, [{
+defm imm64sx16n : Immediate<i64, [{
return isInt<16>(-N->getSExtValue());
}], NEGSIMM16, "S16Imm">;
-def imm64zx16 : Immediate<i64, [{
+defm imm64zx16 : Immediate<i64, [{
return isUInt<16>(N->getZExtValue());
}], UIMM16, "U16Imm">;
-def imm64sx32 : Immediate<i64, [{
+defm imm64sx32 : Immediate<i64, [{
return isInt<32>(N->getSExtValue());
}], SIMM32, "S32Imm">;
-def imm64sx32n : Immediate<i64, [{
+defm imm64sx32n : Immediate<i64, [{
return isInt<32>(-N->getSExtValue());
}], NEGSIMM32, "S32Imm">;
-def imm64zx32 : Immediate<i64, [{
+defm imm64zx32 : Immediate<i64, [{
return isUInt<32>(N->getZExtValue());
}], UIMM32, "U32Imm">;
-def imm64zx32n : Immediate<i64, [{
+defm imm64zx32n : Immediate<i64, [{
return isUInt<32>(-N->getSExtValue());
}], NEGUIMM32, "U32Imm">;
-def imm64zx48 : Immediate<i64, [{
+defm imm64zx48 : Immediate<i64, [{
return isUInt<64>(N->getZExtValue());
}], UIMM48, "U48Imm">;
@@ -637,7 +654,7 @@ def bdvaddr12only : BDVMode< "64", "12">;
//===----------------------------------------------------------------------===//
// A 4-bit condition-code mask.
-def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>,
+def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>,
Operand<i32> {
let PrintMethod = "printCond4Operand";
}
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 15bd12bc98a4..6fe383e64b74 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -472,17 +472,17 @@ def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs),
(z_subcarry_1 node:$lhs, node:$rhs, CC)>;
// Signed and unsigned comparisons.
-def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
+def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
return Type != SystemZICMP::UnsignedOnly;
}]>;
-def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
+def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
return Type != SystemZICMP::SignedOnly;
}]>;
// Register- and memory-based TEST UNDER MASK.
-def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>;
+def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, timm)>;
def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
// Register sign-extend operations. Sub-32-bit values are represented as i32s.
diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td
index beaf4de285a3..65300fb47627 100644
--- a/lib/Target/SystemZ/SystemZPatterns.td
+++ b/lib/Target/SystemZ/SystemZPatterns.td
@@ -100,12 +100,12 @@ multiclass CondStores64<Instruction insn, Instruction insninv,
SDPatternOperator store, SDPatternOperator load,
AddressingMode mode> {
def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr),
- imm32zx4:$valid, imm32zx4:$cc),
+ imm32zx4_timm:$valid, imm32zx4_timm:$cc),
mode:$addr),
(insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
imm32zx4:$valid, imm32zx4:$cc)>;
def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new,
- imm32zx4:$valid, imm32zx4:$cc),
+ imm32zx4_timm:$valid, imm32zx4_timm:$cc),
mode:$addr),
(insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
imm32zx4:$valid, imm32zx4:$cc)>;
diff --git a/lib/Target/SystemZ/SystemZPostRewrite.cpp b/lib/Target/SystemZ/SystemZPostRewrite.cpp
index 8e4060eac74c..aaa7f8fc88f5 100644
--- a/lib/Target/SystemZ/SystemZPostRewrite.cpp
+++ b/lib/Target/SystemZ/SystemZPostRewrite.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
#define DEBUG_TYPE "systemz-postrewrite"
STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops.");
+STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)");
namespace llvm {
void initializeSystemZPostRewritePass(PassRegistry&);
@@ -45,12 +46,20 @@ public:
StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
private:
+ void selectLOCRMux(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI,
+ unsigned LowOpcode,
+ unsigned HighOpcode);
+ void selectSELRMux(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI,
+ unsigned LowOpcode,
+ unsigned HighOpcode);
+ bool expandCondMove(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool selectMBB(MachineBasicBlock &MBB);
@@ -68,11 +77,141 @@ FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) {
return new SystemZPostRewrite();
}
+// MI is a load-register-on-condition pseudo instruction. Replace it with
+// LowOpcode if source and destination are both low GR32s and HighOpcode if
+// source and destination are both high GR32s. Otherwise, a branch sequence
+// is created.
+void SystemZPostRewrite::selectLOCRMux(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI,
+ unsigned LowOpcode,
+ unsigned HighOpcode) {
+ Register DestReg = MBBI->getOperand(0).getReg();
+ Register SrcReg = MBBI->getOperand(2).getReg();
+ bool DestIsHigh = SystemZ::isHighReg(DestReg);
+ bool SrcIsHigh = SystemZ::isHighReg(SrcReg);
+
+ if (!DestIsHigh && !SrcIsHigh)
+ MBBI->setDesc(TII->get(LowOpcode));
+ else if (DestIsHigh && SrcIsHigh)
+ MBBI->setDesc(TII->get(HighOpcode));
+ else
+ expandCondMove(MBB, MBBI, NextMBBI);
+}
+
+// MI is a select pseudo instruction. Replace it with LowOpcode if source
+// and destination are all low GR32s and HighOpcode if source and destination
+// are all high GR32s. Otherwise, a branch sequence is created.
+void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI,
+ unsigned LowOpcode,
+ unsigned HighOpcode) {
+ Register DestReg = MBBI->getOperand(0).getReg();
+ Register Src1Reg = MBBI->getOperand(1).getReg();
+ Register Src2Reg = MBBI->getOperand(2).getReg();
+ bool DestIsHigh = SystemZ::isHighReg(DestReg);
+ bool Src1IsHigh = SystemZ::isHighReg(Src1Reg);
+ bool Src2IsHigh = SystemZ::isHighReg(Src2Reg);
+
+ // If sources and destination aren't all high or all low, we may be able to
+ // simplify the operation by moving one of the sources to the destination
+ // first. But only if this doesn't clobber the other source.
+ if (DestReg != Src1Reg && DestReg != Src2Reg) {
+ if (DestIsHigh != Src1IsHigh) {
+ BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(),
+ TII->get(SystemZ::COPY), DestReg)
+ .addReg(MBBI->getOperand(1).getReg(), getRegState(MBBI->getOperand(1)));
+ MBBI->getOperand(1).setReg(DestReg);
+ Src1Reg = DestReg;
+ Src1IsHigh = DestIsHigh;
+ } else if (DestIsHigh != Src2IsHigh) {
+ BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(),
+ TII->get(SystemZ::COPY), DestReg)
+ .addReg(MBBI->getOperand(2).getReg(), getRegState(MBBI->getOperand(2)));
+ MBBI->getOperand(2).setReg(DestReg);
+ Src2Reg = DestReg;
+ Src2IsHigh = DestIsHigh;
+ }
+ }
+
+ // If the destination (now) matches one source, prefer this to be first.
+ if (DestReg != Src1Reg && DestReg == Src2Reg) {
+ TII->commuteInstruction(*MBBI, false, 1, 2);
+ std::swap(Src1Reg, Src2Reg);
+ std::swap(Src1IsHigh, Src2IsHigh);
+ }
+
+ if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh)
+ MBBI->setDesc(TII->get(LowOpcode));
+ else if (DestIsHigh && Src1IsHigh && Src2IsHigh)
+ MBBI->setDesc(TII->get(HighOpcode));
+ else
+ // Given the simplification above, we must already have a two-operand case.
+ expandCondMove(MBB, MBBI, NextMBBI);
+}
+
+// Replace MBBI by a branch sequence that performs a conditional move of
+// operand 2 to the destination register. Operand 1 is expected to be the
+// same register as the destination.
+bool SystemZPostRewrite::expandCondMove(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineFunction &MF = *MBB.getParent();
+ const BasicBlock *BB = MBB.getBasicBlock();
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ Register DestReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
+ unsigned CCValid = MI.getOperand(3).getImm();
+ unsigned CCMask = MI.getOperand(4).getImm();
+ assert(DestReg == MI.getOperand(1).getReg() &&
+ "Expected destination and first source operand to be the same.");
+
+ LivePhysRegs LiveRegs(TII->getRegisterInfo());
+ LiveRegs.addLiveOuts(MBB);
+ for (auto I = std::prev(MBB.end()); I != MBBI; --I)
+ LiveRegs.stepBackward(*I);
+
+ // Splice MBB at MI, moving the rest of the block into RestMBB.
+ MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB);
+ MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB);
+ RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end());
+ RestMBB->transferSuccessors(&MBB);
+ for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
+ RestMBB->addLiveIn(*I);
+
+ // Create a new block MoveMBB to hold the move instruction.
+ MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB);
+ MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB);
+ MoveMBB->addLiveIn(SrcReg);
+ for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
+ MoveMBB->addLiveIn(*I);
+
+ // At the end of MBB, create a conditional branch to RestMBB if the
+ // condition is false, otherwise fall through to MoveMBB.
+ BuildMI(&MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB);
+ MBB.addSuccessor(RestMBB);
+ MBB.addSuccessor(MoveMBB);
+
+ // In MoveMBB, emit an instruction to move SrcReg into DestReg,
+ // then fall through to RestMBB.
+ BuildMI(*MoveMBB, MoveMBB->end(), DL, TII->get(SystemZ::COPY), DestReg)
+ .addReg(MI.getOperand(2).getReg(), getRegState(MI.getOperand(2)));
+ MoveMBB->addSuccessor(RestMBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+ LOCRMuxJumps++;
+ return true;
+}
+
/// If MBBI references a pseudo instruction that should be selected here,
/// do it and return true. Otherwise return false.
bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
@@ -83,7 +222,7 @@ bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB,
if (TargetMemOpcode != -1) {
MI.setDesc(TII->get(TargetMemOpcode));
MI.tieOperands(0, 1);
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
MachineOperand &SrcMO = MI.getOperand(1);
if (DstReg != SrcMO.getReg()) {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg)
@@ -94,6 +233,15 @@ bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB,
return true;
}
+ switch (Opcode) {
+ case SystemZ::LOCRMux:
+ selectLOCRMux(MBB, MBBI, NextMBBI, SystemZ::LOCR, SystemZ::LOCFHR);
+ return true;
+ case SystemZ::SELRMux:
+ selectSELRMux(MBB, MBBI, NextMBBI, SystemZ::SELR, SystemZ::SELFHR);
+ return true;
+ }
+
return false;
}
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index b27c25beb58c..af33a0300552 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -35,5 +35,6 @@ def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>;
def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>;
def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>;
-def : ProcessorModel<"arch13", Arch13Model, Arch13SupportedFeatures.List>;
+def : ProcessorModel<"arch13", Z15Model, Arch13SupportedFeatures.List>;
+def : ProcessorModel<"z15", Z15Model, Arch13SupportedFeatures.List>;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index e7cd6871dbb4..39ace5594b7f 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -41,7 +41,7 @@ static const TargetRegisterClass *getRC32(MachineOperand &MO,
return &SystemZ::GRH32BitRegClass;
if (VRM && VRM->hasPhys(MO.getReg())) {
- unsigned PhysReg = VRM->getPhys(MO.getReg());
+ Register PhysReg = VRM->getPhys(MO.getReg());
if (SystemZ::GR32BitRegClass.contains(PhysReg))
return &SystemZ::GR32BitRegClass;
assert (SystemZ::GRH32BitRegClass.contains(PhysReg) &&
@@ -120,8 +120,8 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
// Add the other operand of the LOCRMux to the worklist.
- unsigned OtherReg =
- (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg());
+ Register OtherReg =
+ (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg());
if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass)
Worklist.push_back(OtherReg);
} // end LOCRMux
@@ -169,7 +169,8 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
auto tryAddHint = [&](const MachineOperand *MO) -> void {
Register Reg = MO->getReg();
- Register PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
+ Register PhysReg =
+ Register::isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
if (PhysReg) {
if (MO->getSubReg())
PhysReg = getSubReg(PhysReg, MO->getSubReg());
@@ -297,8 +298,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
assert(Mask && "One offset must be OK");
} while (!OpcodeForOffset);
- unsigned ScratchReg =
- MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ Register ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass);
int64_t HighOffset = OldOffset - Offset;
if (MI->getDesc().TSFlags & SystemZII::HasIndex
@@ -351,8 +352,8 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
// regalloc may run out of registers.
unsigned WideOpNo = (getRegSizeInBits(*SrcRC) == 128 ? 1 : 0);
- unsigned GR128Reg = MI->getOperand(WideOpNo).getReg();
- unsigned GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg();
+ Register GR128Reg = MI->getOperand(WideOpNo).getReg();
+ Register GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg();
LiveInterval &IntGR128 = LIS.getInterval(GR128Reg);
LiveInterval &IntGRNar = LIS.getInterval(GRNarReg);
@@ -385,7 +386,7 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
MEE++;
for (; MII != MEE; ++MII) {
for (const MachineOperand &MO : MII->operands())
- if (MO.isReg() && isPhysicalRegister(MO.getReg())) {
+ if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
for (MCSuperRegIterator SI(MO.getReg(), this, true/*IncludeSelf*/);
SI.isValid(); ++SI)
if (NewRC->contains(*SI)) {
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 4f721ec23e53..7044efef1ac6 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -28,6 +28,15 @@ inline unsigned even128(bool Is32bit) {
inline unsigned odd128(bool Is32bit) {
return Is32bit ? subreg_l32 : subreg_l64;
}
+
+// Reg should be a 32-bit GPR. Return true if it is a high register rather
+// than a low register.
+inline bool isHighReg(unsigned int Reg) {
+ if (SystemZ::GRH32BitRegClass.contains(Reg))
+ return true;
+ assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32");
+ return false;
+}
} // end namespace SystemZ
struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
diff --git a/lib/Target/SystemZ/SystemZSchedule.td b/lib/Target/SystemZ/SystemZSchedule.td
index 98eca2802242..119e3ee7c22c 100644
--- a/lib/Target/SystemZ/SystemZSchedule.td
+++ b/lib/Target/SystemZ/SystemZSchedule.td
@@ -59,7 +59,7 @@ def VBU : SchedWrite; // Virtual branching unit
def MCD : SchedWrite; // Millicode
-include "SystemZScheduleArch13.td"
+include "SystemZScheduleZ15.td"
include "SystemZScheduleZ14.td"
include "SystemZScheduleZ13.td"
include "SystemZScheduleZEC12.td"
diff --git a/lib/Target/SystemZ/SystemZScheduleArch13.td b/lib/Target/SystemZ/SystemZScheduleZ15.td
index 9f82f24d0e8f..56ceb88f35d4 100644
--- a/lib/Target/SystemZ/SystemZScheduleArch13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ15.td
@@ -1,4 +1,4 @@
-//-- SystemZScheduleArch13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
+//-- SystemZScheduleZ15.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,14 +6,14 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the machine model for Arch13 to support instruction
+// This file defines the machine model for Z15 to support instruction
// scheduling and other instruction cost heuristics.
//
// Pseudos expanded right after isel do not need to be modelled here.
//
//===----------------------------------------------------------------------===//
-def Arch13Model : SchedMachineModel {
+def Z15Model : SchedMachineModel {
let UnsupportedFeatures = Arch13UnsupportedFeatures.List;
@@ -27,7 +27,7 @@ def Arch13Model : SchedMachineModel {
let MispredictPenalty = 20;
}
-let SchedModel = Arch13Model in {
+let SchedModel = Z15Model in {
// These definitions need the SchedModel value. They could be put in a
// subtarget common include file, but it seems the include system in Tablegen
// currently (2016) rejects multiple includes of same file.
@@ -73,43 +73,43 @@ let NumMicroOps = 0 in {
}
// Execution units.
-def Arch13_FXaUnit : ProcResource<2>;
-def Arch13_FXbUnit : ProcResource<2>;
-def Arch13_LSUnit : ProcResource<2>;
-def Arch13_VecUnit : ProcResource<2>;
-def Arch13_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ }
-def Arch13_VBUnit : ProcResource<2>;
-def Arch13_MCD : ProcResource<1>;
+def Z15_FXaUnit : ProcResource<2>;
+def Z15_FXbUnit : ProcResource<2>;
+def Z15_LSUnit : ProcResource<2>;
+def Z15_VecUnit : ProcResource<2>;
+def Z15_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ }
+def Z15_VBUnit : ProcResource<2>;
+def Z15_MCD : ProcResource<1>;
// Subtarget specific definitions of scheduling resources.
let NumMicroOps = 0 in {
- def : WriteRes<FXa, [Arch13_FXaUnit]>;
- def : WriteRes<FXb, [Arch13_FXbUnit]>;
- def : WriteRes<LSU, [Arch13_LSUnit]>;
- def : WriteRes<VecBF, [Arch13_VecUnit]>;
- def : WriteRes<VecDF, [Arch13_VecUnit]>;
- def : WriteRes<VecDFX, [Arch13_VecUnit]>;
- def : WriteRes<VecMul, [Arch13_VecUnit]>;
- def : WriteRes<VecStr, [Arch13_VecUnit]>;
- def : WriteRes<VecXsPm, [Arch13_VecUnit]>;
+ def : WriteRes<FXa, [Z15_FXaUnit]>;
+ def : WriteRes<FXb, [Z15_FXbUnit]>;
+ def : WriteRes<LSU, [Z15_LSUnit]>;
+ def : WriteRes<VecBF, [Z15_VecUnit]>;
+ def : WriteRes<VecDF, [Z15_VecUnit]>;
+ def : WriteRes<VecDFX, [Z15_VecUnit]>;
+ def : WriteRes<VecMul, [Z15_VecUnit]>;
+ def : WriteRes<VecStr, [Z15_VecUnit]>;
+ def : WriteRes<VecXsPm, [Z15_VecUnit]>;
foreach Num = 2-5 in { let ResourceCycles = [Num] in {
- def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Arch13_FXaUnit]>;
- def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Arch13_FXbUnit]>;
- def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Arch13_LSUnit]>;
- def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Arch13_VecUnit]>;
- def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Arch13_VecUnit]>;
- def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Arch13_VecUnit]>;
- def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Arch13_VecUnit]>;
- def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Arch13_VecUnit]>;
- def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z15_FXaUnit]>;
+ def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z15_FXbUnit]>;
+ def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z15_LSUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Z15_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Z15_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Z15_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Z15_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Z15_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z15_VecUnit]>;
}}
- def : WriteRes<VecFPd, [Arch13_VecFPdUnit]> { let ResourceCycles = [30]; }
+ def : WriteRes<VecFPd, [Z15_VecFPdUnit]> { let ResourceCycles = [30]; }
- def : WriteRes<VBU, [Arch13_VBUnit]>; // Virtual Branching Unit
+ def : WriteRes<VBU, [Z15_VBUnit]>; // Virtual Branching Unit
}
-def : WriteRes<MCD, [Arch13_MCD]> { let NumMicroOps = 3;
+def : WriteRes<MCD, [Z15_MCD]> { let NumMicroOps = 3;
let BeginGroup = 1;
let EndGroup = 1; }
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index a50e6aa59711..47c925dcf730 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -209,10 +209,10 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
// Now select between End and null, depending on whether the character
// was found.
- SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT),
- DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32),
- DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32),
- CCReg};
+ SDValue Ops[] = {
+ End, DAG.getConstant(0, DL, PtrVT),
+ DAG.getTargetConstant(SystemZ::CCMASK_SRST, DL, MVT::i32),
+ DAG.getTargetConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), CCReg};
End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops);
return std::make_pair(End, Chain);
}
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index e79dfc5b4b9e..2aca22c9082a 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -75,7 +75,7 @@ static void tieOpsIfNeeded(MachineInstr &MI) {
// instead of IIxF.
bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL,
unsigned LLIxH) {
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
// The new opcode will clear the other half of the GR64 reg, so
// cancel if that is live.
unsigned thisSubRegIdx =
@@ -86,7 +86,7 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL,
: SystemZ::subreg_l32);
unsigned GR64BitReg =
TRI->getMatchingSuperReg(Reg, thisSubRegIdx, &SystemZ::GR64BitRegClass);
- unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx);
+ Register OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx);
if (LiveRegs.contains(OtherReg))
return false;
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 5c49e6eff0bf..20865037fe38 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -154,7 +154,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
getEffectiveRelocModel(RM),
getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
OL),
- TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()),
+ TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
@@ -176,7 +176,7 @@ public:
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
return new ScheduleDAGMI(C,
- llvm::make_unique<SystemZPostRASchedStrategy>(C),
+ std::make_unique<SystemZPostRASchedStrategy>(C),
/*RemoveKillFlags=*/true);
}
@@ -184,6 +184,7 @@ public:
bool addInstSelector() override;
bool addILPOpts() override;
void addPostRewrite() override;
+ void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
};
@@ -217,14 +218,14 @@ void SystemZPassConfig::addPostRewrite() {
addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
}
-void SystemZPassConfig::addPreSched2() {
+void SystemZPassConfig::addPostRegAlloc() {
// PostRewrite needs to be run at -O0 also (in which case addPostRewrite()
// is not called).
if (getOptLevel() == CodeGenOpt::None)
addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
+}
- addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine()));
-
+void SystemZPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
}
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 145cf87ef9f5..11c99aa11174 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -304,7 +304,8 @@ bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
C2.ScaleCost, C2.SetupCost);
}
-unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
+unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
if (!Vector)
// Discount the stack pointer. Also leave out %r0, since it can't
// be used in an address.
@@ -707,7 +708,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// TODO: Fix base implementation which could simplify things a bit here
// (seems to miss on differentiating on scalar/vector types).
- // Only 64 bit vector conversions are natively supported before arch13.
+ // Only 64 bit vector conversions are natively supported before z15.
if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) {
if (SrcScalarBits == DstScalarBits)
return NumDstVectors;
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 16ce2ef1d7a0..3ba80b31439f 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -56,12 +56,12 @@ public:
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector);
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;
- unsigned getCacheLineSize() { return 256; }
- unsigned getPrefetchDistance() { return 2000; }
- unsigned getMinPrefetchStride() { return 2048; }
+ unsigned getCacheLineSize() const override { return 256; }
+ unsigned getPrefetchDistance() const override { return 2000; }
+ unsigned getMinPrefetchStride() const override { return 2048; }
bool hasDivRemOp(Type *DataType, bool IsSigned);
bool prefersVectorizedAddressing() { return false; }
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 17274e1c2c6e..dcd3934de0fa 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -253,6 +253,7 @@ MCSection *TargetLoweringObjectFile::SectionForGlobal(
auto Attrs = GVar->getAttributes();
if ((Attrs.hasAttribute("bss-section") && Kind.isBSS()) ||
(Attrs.hasAttribute("data-section") && Kind.isData()) ||
+ (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) ||
(Attrs.hasAttribute("rodata-section") && Kind.isReadOnly())) {
return getExplicitSectionGlobal(GO, Kind, TM);
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 634866d93570..4c98e140f446 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -63,18 +63,6 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math");
- RESET_OPTION(NoTrappingFPMath, "no-trapping-math");
-
- StringRef Denormal =
- F.getFnAttribute("denormal-fp-math").getValueAsString();
- if (Denormal == "ieee")
- Options.FPDenormalMode = FPDenormal::IEEE;
- else if (Denormal == "preserve-sign")
- Options.FPDenormalMode = FPDenormal::PreserveSign;
- else if (Denormal == "positive-zero")
- Options.FPDenormalMode = FPDenormal::PositiveZero;
- else
- Options.FPDenormalMode = DefaultOptions.FPDenormalMode;
}
/// Returns the code generation relocation model. The choices are static, PIC,
@@ -140,8 +128,8 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
// don't assume the variables to be DSO local unless we actually know
// that for sure. This only has to be done for variables; for functions
// the linker can insert thunks for calling functions from another DLL.
- if (TT.isWindowsGNUEnvironment() && GV && GV->isDeclarationForLinker() &&
- isa<GlobalVariable>(GV))
+ if (TT.isWindowsGNUEnvironment() && TT.isOSBinFormatCOFF() && GV &&
+ GV->isDeclarationForLinker() && isa<GlobalVariable>(GV))
return false;
// On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols
@@ -154,7 +142,9 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
// Make an exception for windows OS in the triple: Some firmware builds use
// *-win32-macho triples. This (accidentally?) produced windows relocations
// without GOT tables in older clang versions; Keep this behaviour.
- if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO()))
+ // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables
+ // either.
+ if (TT.isOSBinFormatCOFF() || TT.isOSWindows())
return true;
// Most PIC code sequences that assume that a symbol is local cannot
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 5d9029682fdd..3ac9c38dfc0b 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -219,7 +219,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
std::error_code EC;
- raw_fd_ostream dest(Filename, EC, sys::fs::F_None);
+ raw_fd_ostream dest(Filename, EC, sys::fs::OF_None);
if (EC) {
*ErrorMessage = strdup(EC.message().c_str());
return true;
diff --git a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 09628e872dd5..53a96fd6a97d 100644
--- a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -313,16 +313,17 @@ public:
return Optional<wasm::ValType>();
}
- WebAssembly::ExprType parseBlockType(StringRef ID) {
- return StringSwitch<WebAssembly::ExprType>(ID)
- .Case("i32", WebAssembly::ExprType::I32)
- .Case("i64", WebAssembly::ExprType::I64)
- .Case("f32", WebAssembly::ExprType::F32)
- .Case("f64", WebAssembly::ExprType::F64)
- .Case("v128", WebAssembly::ExprType::V128)
- .Case("exnref", WebAssembly::ExprType::Exnref)
- .Case("void", WebAssembly::ExprType::Void)
- .Default(WebAssembly::ExprType::Invalid);
+ WebAssembly::BlockType parseBlockType(StringRef ID) {
+ // Multivalue block types are handled separately in parseSignature
+ return StringSwitch<WebAssembly::BlockType>(ID)
+ .Case("i32", WebAssembly::BlockType::I32)
+ .Case("i64", WebAssembly::BlockType::I64)
+ .Case("f32", WebAssembly::BlockType::F32)
+ .Case("f64", WebAssembly::BlockType::F64)
+ .Case("v128", WebAssembly::BlockType::V128)
+ .Case("exnref", WebAssembly::BlockType::Exnref)
+ .Case("void", WebAssembly::BlockType::Void)
+ .Default(WebAssembly::BlockType::Invalid);
}
bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
@@ -343,7 +344,7 @@ public:
int64_t Val = Int.getIntVal();
if (IsNegative)
Val = -Val;
- Operands.push_back(make_unique<WebAssemblyOperand>(
+ Operands.push_back(std::make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
WebAssemblyOperand::IntOp{Val}));
Parser.Lex();
@@ -356,7 +357,7 @@ public:
return error("Cannot parse real: ", Flt);
if (IsNegative)
Val = -Val;
- Operands.push_back(make_unique<WebAssemblyOperand>(
+ Operands.push_back(std::make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
WebAssemblyOperand::FltOp{Val}));
Parser.Lex();
@@ -378,7 +379,7 @@ public:
}
if (IsNegative)
Val = -Val;
- Operands.push_back(make_unique<WebAssemblyOperand>(
+ Operands.push_back(std::make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
WebAssemblyOperand::FltOp{Val}));
Parser.Lex();
@@ -407,7 +408,7 @@ public:
// an opcode until after the assembly matcher, so set a default to fix
// up later.
auto Tok = Lexer.getTok();
- Operands.push_back(make_unique<WebAssemblyOperand>(
+ Operands.push_back(std::make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Integer, Tok.getLoc(), Tok.getEndLoc(),
WebAssemblyOperand::IntOp{-1}));
}
@@ -416,8 +417,8 @@ public:
}
void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
- WebAssembly::ExprType BT) {
- Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssembly::BlockType BT) {
+ Operands.push_back(std::make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Integer, NameLoc, NameLoc,
WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)}));
}
@@ -449,13 +450,14 @@ public:
}
// Now construct the name as first operand.
- Operands.push_back(make_unique<WebAssemblyOperand>(
+ Operands.push_back(std::make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
WebAssemblyOperand::TokOp{Name}));
// If this instruction is part of a control flow structure, ensure
// proper nesting.
bool ExpectBlockType = false;
+ bool ExpectFuncType = false;
if (Name == "block") {
push(Block);
ExpectBlockType = true;
@@ -489,9 +491,37 @@ public:
if (pop(Name, Block))
return true;
} else if (Name == "end_function") {
+ ensureLocals(getStreamer());
CurrentState = EndFunction;
if (pop(Name, Function) || ensureEmptyNestingStack())
return true;
+ } else if (Name == "call_indirect" || Name == "return_call_indirect") {
+ ExpectFuncType = true;
+ }
+
+ if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) {
+ // This has a special TYPEINDEX operand which in text we
+ // represent as a signature, such that we can re-build this signature,
+ // attach it to an anonymous symbol, which is what WasmObjectWriter
+ // expects to be able to recreate the actual unique-ified type indices.
+ auto Loc = Parser.getTok();
+ auto Signature = std::make_unique<wasm::WasmSignature>();
+ if (parseSignature(Signature.get()))
+ return true;
+ // Got signature as block type, don't need more
+ ExpectBlockType = false;
+ auto &Ctx = getStreamer().getContext();
+ // The "true" here will cause this to be a nameless symbol.
+ MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true);
+ auto *WasmSym = cast<MCSymbolWasm>(Sym);
+ WasmSym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ const MCExpr *Expr = MCSymbolRefExpr::create(
+ WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx);
+ Operands.push_back(std::make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Symbol, Loc.getLoc(), Loc.getEndLoc(),
+ WebAssemblyOperand::SymOp{Expr}));
}
while (Lexer.isNot(AsmToken::EndOfStatement)) {
@@ -504,7 +534,7 @@ public:
if (ExpectBlockType) {
// Assume this identifier is a block_type.
auto BT = parseBlockType(Id.getString());
- if (BT == WebAssembly::ExprType::Invalid)
+ if (BT == WebAssembly::BlockType::Invalid)
return error("Unknown block type: ", Id);
addBlockTypeOperand(Operands, NameLoc, BT);
Parser.Lex();
@@ -514,7 +544,7 @@ public:
SMLoc End;
if (Parser.parseExpression(Val, End))
return error("Cannot parse symbol: ", Lexer.getTok());
- Operands.push_back(make_unique<WebAssemblyOperand>(
+ Operands.push_back(std::make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
WebAssemblyOperand::SymOp{Val}));
if (checkForP2AlignIfLoadStore(Operands, Name))
@@ -549,7 +579,7 @@ public:
}
case AsmToken::LCurly: {
Parser.Lex();
- auto Op = make_unique<WebAssemblyOperand>(
+ auto Op = std::make_unique<WebAssemblyOperand>(
WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc());
if (!Lexer.is(AsmToken::RCurly))
for (;;) {
@@ -572,7 +602,7 @@ public:
}
if (ExpectBlockType && Operands.size() == 1) {
// Support blocks with no operands as default to void.
- addBlockTypeOperand(Operands, NameLoc, WebAssembly::ExprType::Void);
+ addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void);
}
Parser.Lex();
return false;
@@ -671,7 +701,7 @@ public:
LastFunctionLabel = LastLabel;
push(Function);
}
- auto Signature = make_unique<wasm::WasmSignature>();
+ auto Signature = std::make_unique<wasm::WasmSignature>();
if (parseSignature(Signature.get()))
return true;
WasmSym->setSignature(Signature.get());
@@ -687,7 +717,7 @@ public:
if (SymName.empty())
return true;
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
- auto Signature = make_unique<wasm::WasmSignature>();
+ auto Signature = std::make_unique<wasm::WasmSignature>();
if (parseRegTypeList(Signature->Params))
return true;
WasmSym->setSignature(Signature.get());
@@ -737,24 +767,30 @@ public:
return true; // We didn't process this directive.
}
+ // Called either when the first instruction is parsed of the function ends.
+ void ensureLocals(MCStreamer &Out) {
+ if (CurrentState == FunctionStart) {
+ // We haven't seen a .local directive yet. The streamer requires locals to
+ // be encoded as a prelude to the instructions, so emit an empty list of
+ // locals here.
+ auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
+ *Out.getTargetStreamer());
+ TOut.emitLocal(SmallVector<wasm::ValType, 0>());
+ CurrentState = FunctionLocals;
+ }
+ }
+
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override {
MCInst Inst;
+ Inst.setLoc(IDLoc);
unsigned MatchResult =
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
case Match_Success: {
- if (CurrentState == FunctionStart) {
- // This is the first instruction in a function, but we haven't seen
- // a .local directive yet. The streamer requires locals to be encoded
- // as a prelude to the instructions, so emit an empty list of locals
- // here.
- auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
- *Out.getTargetStreamer());
- TOut.emitLocal(SmallVector<wasm::ValType, 0>());
- }
+ ensureLocals(Out);
// Fix unknown p2align operands.
auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode());
if (Align != -1U) {
diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index f9bf3f85d30f..9a9c31cff2d5 100644
--- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/TargetRegistry.h"
@@ -213,10 +214,29 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
return MCDisassembler::Fail;
break;
}
- // block_type operands (uint8_t).
+ // block_type operands:
case WebAssembly::OPERAND_SIGNATURE: {
- if (!parseImmediate<uint8_t>(MI, Size, Bytes))
+ int64_t Val;
+ uint64_t PrevSize = Size;
+ if (!nextLEB(Val, Bytes, Size, true))
return MCDisassembler::Fail;
+ if (Val < 0) {
+ // Negative values are single septet value types or empty types
+ if (Size != PrevSize + 1) {
+ MI.addOperand(
+ MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
+ } else {
+ MI.addOperand(MCOperand::createImm(Val & 0x7f));
+ }
+ } else {
+ // We don't have access to the signature, so create a symbol without one
+ MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
+ auto *WasmSym = cast<MCSymbolWasm>(Sym);
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ const MCExpr *Expr = MCSymbolRefExpr::create(
+ WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
+ MI.addOperand(MCOperand::createExpr(Expr));
+ }
break;
}
// FP operands.
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 70b409cf4a90..8314de41021f 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -31,10 +31,12 @@ namespace {
class WebAssemblyAsmBackend final : public MCAsmBackend {
bool Is64Bit;
+ bool IsEmscripten;
public:
- explicit WebAssemblyAsmBackend(bool Is64Bit)
- : MCAsmBackend(support::little), Is64Bit(Is64Bit) {}
+ explicit WebAssemblyAsmBackend(bool Is64Bit, bool IsEmscripten)
+ : MCAsmBackend(support::little), Is64Bit(Is64Bit),
+ IsEmscripten(IsEmscripten) {}
unsigned getNumFixupKinds() const override {
return WebAssembly::NumTargetFixupKinds;
@@ -123,11 +125,11 @@ void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm,
std::unique_ptr<MCObjectTargetWriter>
WebAssemblyAsmBackend::createObjectTargetWriter() const {
- return createWebAssemblyWasmObjectWriter(Is64Bit);
+ return createWebAssemblyWasmObjectWriter(Is64Bit, IsEmscripten);
}
} // end anonymous namespace
MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Triple &TT) {
- return new WebAssemblyAsmBackend(TT.isArch64Bit());
+ return new WebAssemblyAsmBackend(TT.isArch64Bit(), TT.isOSEmscripten());
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index b5d4d369b726..221ac17b8336 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -15,6 +15,7 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -51,7 +52,9 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
// Print any additional variadic operands.
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- if (Desc.isVariadic())
+ if (Desc.isVariadic()) {
+ if (Desc.getNumOperands() == 0 && MI->getNumOperands() > 0)
+ OS << "\t";
for (auto I = Desc.getNumOperands(), E = MI->getNumOperands(); I < E; ++I) {
// FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because
// we have an extra flags operand which is not currently printed, for
@@ -62,6 +65,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
OS << ", ";
printOperand(MI, I, OS);
}
+ }
// Print any added annotation.
printAnnotation(OS, Annot);
@@ -232,7 +236,16 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- Op.getExpr()->print(O, &MAI);
+ // call_indirect instructions have a TYPEINDEX operand that we print
+ // as a signature here, such that the assembler can recover this
+ // information.
+ auto SRE = static_cast<const MCSymbolRefExpr *>(Op.getExpr());
+ if (SRE->getKind() == MCSymbolRefExpr::VK_WASM_TYPEINDEX) {
+ auto &Sym = static_cast<const MCSymbolWasm &>(SRE->getSymbol());
+ O << WebAssembly::signatureToString(Sym.getSignature());
+ } else {
+ Op.getExpr()->print(O, &MAI);
+ }
}
}
@@ -259,14 +272,26 @@ void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
unsigned OpNo,
raw_ostream &O) {
- auto Imm = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
- if (Imm != wasm::WASM_TYPE_NORESULT)
- O << WebAssembly::anyTypeToString(Imm);
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm()) {
+ auto Imm = static_cast<unsigned>(Op.getImm());
+ if (Imm != wasm::WASM_TYPE_NORESULT)
+ O << WebAssembly::anyTypeToString(Imm);
+ } else {
+ auto Expr = cast<MCSymbolRefExpr>(Op.getExpr());
+ auto *Sym = cast<MCSymbolWasm>(&Expr->getSymbol());
+ if (Sym->getSignature()) {
+ O << WebAssembly::signatureToString(Sym->getSignature());
+ } else {
+ // Disassembler does not currently produce a signature
+ O << "unknown_type";
+ }
+ }
}
// We have various enums representing a subset of these types, use this
// function to convert any of them to text.
-const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) {
+const char *WebAssembly::anyTypeToString(unsigned Ty) {
switch (Ty) {
case wasm::WASM_TYPE_I32:
return "i32";
@@ -291,6 +316,24 @@ const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) {
}
}
-const char *llvm::WebAssembly::typeToString(wasm::ValType Ty) {
+const char *WebAssembly::typeToString(wasm::ValType Ty) {
return anyTypeToString(static_cast<unsigned>(Ty));
}
+
+std::string WebAssembly::typeListToString(ArrayRef<wasm::ValType> List) {
+ std::string S;
+ for (auto &Ty : List) {
+ if (&Ty != &List[0]) S += ", ";
+ S += WebAssembly::typeToString(Ty);
+ }
+ return S;
+}
+
+std::string WebAssembly::signatureToString(const wasm::WasmSignature *Sig) {
+ std::string S("(");
+ S += typeListToString(Sig->Params);
+ S += ") -> (";
+ S += typeListToString(Sig->Returns);
+ S += ")";
+ return S;
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
index b979de5028bf..cf37778099a0 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
@@ -58,6 +58,9 @@ namespace WebAssembly {
const char *typeToString(wasm::ValType Ty);
const char *anyTypeToString(unsigned Ty);
+std::string typeListToString(ArrayRef<wasm::ValType> List);
+std::string signatureToString(const wasm::WasmSignature *Sig);
+
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 44b6d6a968a9..1a4c57e66d2f 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -152,6 +152,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
break;
case WebAssembly::OPERAND_FUNCTION32:
case WebAssembly::OPERAND_OFFSET32:
+ case WebAssembly::OPERAND_SIGNATURE:
case WebAssembly::OPERAND_TYPEINDEX:
case WebAssembly::OPERAND_GLOBAL:
case WebAssembly::OPERAND_EVENT:
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 7a9f59b1a4f2..b339860a381d 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -38,7 +38,7 @@ MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
std::unique_ptr<MCObjectTargetWriter>
-createWebAssemblyWasmObjectWriter(bool Is64Bit);
+createWebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten);
namespace WebAssembly {
enum OperandType {
@@ -122,16 +122,22 @@ enum TOF {
namespace llvm {
namespace WebAssembly {
-/// This is used to indicate block signatures.
-enum class ExprType : unsigned {
+/// Used as immediate MachineOperands for block signatures
+enum class BlockType : unsigned {
+ Invalid = 0x00,
Void = 0x40,
- I32 = 0x7F,
- I64 = 0x7E,
- F32 = 0x7D,
- F64 = 0x7C,
- V128 = 0x7B,
- Exnref = 0x68,
- Invalid = 0x00
+ I32 = unsigned(wasm::ValType::I32),
+ I64 = unsigned(wasm::ValType::I64),
+ F32 = unsigned(wasm::ValType::F32),
+ F64 = unsigned(wasm::ValType::F64),
+ V128 = unsigned(wasm::ValType::V128),
+ Exnref = unsigned(wasm::ValType::EXNREF),
+ // Multivalue blocks (and other non-void blocks) are only emitted when the
+ // blocks will never be exited and are at the ends of functions (see
+ // WebAssemblyCFGStackify::fixEndsAtEndOfFunction). They also are never made
+ // to pop values off the stack, so the exact multivalue signature can always
+ // be inferred from the return type of the parent function in MCInstLower.
+ Multivalue = 0xffff,
};
/// Instruction opcodes emitted via means other than CodeGen.
@@ -191,6 +197,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32_S:
case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64_S:
+ case WebAssembly::LOAD_SPLAT_v8x16:
+ case WebAssembly::LOAD_SPLAT_v8x16_S:
return 0;
case WebAssembly::LOAD16_S_I32:
case WebAssembly::LOAD16_S_I32_S:
@@ -240,6 +248,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32_S:
case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64_S:
+ case WebAssembly::LOAD_SPLAT_v16x8:
+ case WebAssembly::LOAD_SPLAT_v16x8_S:
return 1;
case WebAssembly::LOAD_I32:
case WebAssembly::LOAD_I32_S:
@@ -295,6 +305,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
case WebAssembly::ATOMIC_NOTIFY_S:
case WebAssembly::ATOMIC_WAIT_I32:
case WebAssembly::ATOMIC_WAIT_I32_S:
+ case WebAssembly::LOAD_SPLAT_v32x4:
+ case WebAssembly::LOAD_SPLAT_v32x4_S:
return 2;
case WebAssembly::LOAD_I64:
case WebAssembly::LOAD_I64_S:
@@ -324,31 +336,25 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
case WebAssembly::ATOMIC_RMW_CMPXCHG_I64_S:
case WebAssembly::ATOMIC_WAIT_I64:
case WebAssembly::ATOMIC_WAIT_I64_S:
+ case WebAssembly::LOAD_SPLAT_v64x2:
+ case WebAssembly::LOAD_SPLAT_v64x2_S:
+ case WebAssembly::LOAD_EXTEND_S_v8i16:
+ case WebAssembly::LOAD_EXTEND_S_v8i16_S:
+ case WebAssembly::LOAD_EXTEND_U_v8i16:
+ case WebAssembly::LOAD_EXTEND_U_v8i16_S:
+ case WebAssembly::LOAD_EXTEND_S_v4i32:
+ case WebAssembly::LOAD_EXTEND_S_v4i32_S:
+ case WebAssembly::LOAD_EXTEND_U_v4i32:
+ case WebAssembly::LOAD_EXTEND_U_v4i32_S:
+ case WebAssembly::LOAD_EXTEND_S_v2i64:
+ case WebAssembly::LOAD_EXTEND_S_v2i64_S:
+ case WebAssembly::LOAD_EXTEND_U_v2i64:
+ case WebAssembly::LOAD_EXTEND_U_v2i64_S:
return 3;
- case WebAssembly::LOAD_v16i8:
- case WebAssembly::LOAD_v16i8_S:
- case WebAssembly::LOAD_v8i16:
- case WebAssembly::LOAD_v8i16_S:
- case WebAssembly::LOAD_v4i32:
- case WebAssembly::LOAD_v4i32_S:
- case WebAssembly::LOAD_v2i64:
- case WebAssembly::LOAD_v2i64_S:
- case WebAssembly::LOAD_v4f32:
- case WebAssembly::LOAD_v4f32_S:
- case WebAssembly::LOAD_v2f64:
- case WebAssembly::LOAD_v2f64_S:
- case WebAssembly::STORE_v16i8:
- case WebAssembly::STORE_v16i8_S:
- case WebAssembly::STORE_v8i16:
- case WebAssembly::STORE_v8i16_S:
- case WebAssembly::STORE_v4i32:
- case WebAssembly::STORE_v4i32_S:
- case WebAssembly::STORE_v2i64:
- case WebAssembly::STORE_v2i64_S:
- case WebAssembly::STORE_v4f32:
- case WebAssembly::STORE_v4f32_S:
- case WebAssembly::STORE_v2f64:
- case WebAssembly::STORE_v2f64_S:
+ case WebAssembly::LOAD_V128:
+ case WebAssembly::LOAD_V128_S:
+ case WebAssembly::STORE_V128:
+ case WebAssembly::STORE_V128_S:
return 4;
default:
return -1;
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index e05efef7201b..40926201931a 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -60,39 +60,10 @@ void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) {
void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
-void WebAssemblyTargetAsmStreamer::emitSignature(
- const wasm::WasmSignature *Sig) {
- OS << "(";
- emitParamList(Sig);
- OS << ") -> (";
- emitReturnList(Sig);
- OS << ")";
-}
-
-void WebAssemblyTargetAsmStreamer::emitParamList(
- const wasm::WasmSignature *Sig) {
- auto &Params = Sig->Params;
- for (auto &Ty : Params) {
- if (&Ty != &Params[0])
- OS << ", ";
- OS << WebAssembly::typeToString(Ty);
- }
-}
-
-void WebAssemblyTargetAsmStreamer::emitReturnList(
- const wasm::WasmSignature *Sig) {
- auto &Returns = Sig->Returns;
- for (auto &Ty : Returns) {
- if (&Ty != &Returns[0])
- OS << ", ";
- OS << WebAssembly::typeToString(Ty);
- }
-}
-
void WebAssemblyTargetAsmStreamer::emitFunctionType(const MCSymbolWasm *Sym) {
assert(Sym->isFunction());
OS << "\t.functype\t" << Sym->getName() << " ";
- emitSignature(Sym->getSignature());
+ OS << WebAssembly::signatureToString(Sym->getSignature());
OS << "\n";
}
@@ -107,7 +78,7 @@ void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) {
void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) {
assert(Sym->isEvent());
OS << "\t.eventtype\t" << Sym->getName() << " ";
- emitParamList(Sym->getSignature());
+ OS << WebAssembly::typeListToString(Sym->getSignature()->Params);
OS << "\n";
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 5ea62b179d22..0164f8e572ef 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -56,9 +56,6 @@ protected:
/// This part is for ascii assembly output
class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
formatted_raw_ostream &OS;
- void emitSignature(const wasm::WasmSignature *Sig);
- void emitParamList(const wasm::WasmSignature *Sig);
- void emitReturnList(const wasm::WasmSignature *Sig);
public:
WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index a1cc3e268e8f..e7a599e3e175 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
namespace {
class WebAssemblyWasmObjectWriter final : public MCWasmObjectTargetWriter {
public:
- explicit WebAssemblyWasmObjectWriter(bool Is64Bit);
+ explicit WebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten);
private:
unsigned getRelocType(const MCValue &Target,
@@ -39,8 +39,9 @@ private:
};
} // end anonymous namespace
-WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit)
- : MCWasmObjectTargetWriter(Is64Bit) {}
+WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit,
+ bool IsEmscripten)
+ : MCWasmObjectTargetWriter(Is64Bit, IsEmscripten) {}
static const MCSection *getFixupSection(const MCExpr *Expr) {
if (auto SyExp = dyn_cast<MCSymbolRefExpr>(Expr)) {
@@ -116,6 +117,6 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
}
std::unique_ptr<MCObjectTargetWriter>
-llvm::createWebAssemblyWasmObjectWriter(bool Is64Bit) {
- return llvm::make_unique<WebAssemblyWasmObjectWriter>(Is64Bit);
+llvm::createWebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten) {
+ return std::make_unique<WebAssemblyWasmObjectWriter>(Is64Bit, IsEmscripten);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 7f9d41da3978..5d8b873ce23b 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -67,8 +67,8 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
}
std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
- unsigned RegNo = MO.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(RegNo) &&
+ Register RegNo = MO.getReg();
+ assert(Register::isVirtualRegister(RegNo) &&
"Unlowered physical register encountered during assembly printing");
assert(!MFI->isVRegStackified(RegNo));
unsigned WAReg = MFI->getWAReg(RegNo);
@@ -332,43 +332,15 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// These represent values which are live into the function entry, so there's
// no instruction to emit.
break;
- case WebAssembly::FALLTHROUGH_RETURN_I32:
- case WebAssembly::FALLTHROUGH_RETURN_I32_S:
- case WebAssembly::FALLTHROUGH_RETURN_I64:
- case WebAssembly::FALLTHROUGH_RETURN_I64_S:
- case WebAssembly::FALLTHROUGH_RETURN_F32:
- case WebAssembly::FALLTHROUGH_RETURN_F32_S:
- case WebAssembly::FALLTHROUGH_RETURN_F64:
- case WebAssembly::FALLTHROUGH_RETURN_F64_S:
- case WebAssembly::FALLTHROUGH_RETURN_v16i8:
- case WebAssembly::FALLTHROUGH_RETURN_v16i8_S:
- case WebAssembly::FALLTHROUGH_RETURN_v8i16:
- case WebAssembly::FALLTHROUGH_RETURN_v8i16_S:
- case WebAssembly::FALLTHROUGH_RETURN_v4i32:
- case WebAssembly::FALLTHROUGH_RETURN_v4i32_S:
- case WebAssembly::FALLTHROUGH_RETURN_v2i64:
- case WebAssembly::FALLTHROUGH_RETURN_v2i64_S:
- case WebAssembly::FALLTHROUGH_RETURN_v4f32:
- case WebAssembly::FALLTHROUGH_RETURN_v4f32_S:
- case WebAssembly::FALLTHROUGH_RETURN_v2f64:
- case WebAssembly::FALLTHROUGH_RETURN_v2f64_S: {
+ case WebAssembly::FALLTHROUGH_RETURN: {
// These instructions represent the implicit return at the end of a
- // function body. Always pops one value off the stack.
+ // function body.
if (isVerbose()) {
- OutStreamer->AddComment("fallthrough-return-value");
+ OutStreamer->AddComment("fallthrough-return");
OutStreamer->AddBlankLine();
}
break;
}
- case WebAssembly::FALLTHROUGH_RETURN_VOID:
- case WebAssembly::FALLTHROUGH_RETURN_VOID_S:
- // This instruction represents the implicit return at the end of a
- // function body with no return value.
- if (isVerbose()) {
- OutStreamer->AddComment("fallthrough-return-void");
- OutStreamer->AddBlankLine();
- }
- break;
case WebAssembly::COMPILER_FENCE:
// This is a compiler barrier that prevents instruction reordering during
// backend compilation, and should not be emitted.
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index 4c5d0192fc28..c069af9eed62 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -97,14 +97,14 @@ public:
// If the smallest region containing MBB is a loop
if (LoopMap.count(ML))
return LoopMap[ML].get();
- LoopMap[ML] = llvm::make_unique<ConcreteRegion<MachineLoop>>(ML);
+ LoopMap[ML] = std::make_unique<ConcreteRegion<MachineLoop>>(ML);
return LoopMap[ML].get();
} else {
// If the smallest region containing MBB is an exception
if (ExceptionMap.count(WE))
return ExceptionMap[WE].get();
ExceptionMap[WE] =
- llvm::make_unique<ConcreteRegion<WebAssemblyException>>(WE);
+ std::make_unique<ConcreteRegion<WebAssemblyException>>(WE);
return ExceptionMap[WE].get();
}
}
@@ -317,6 +317,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
// If Next was originally ordered before MBB, and it isn't because it was
// loop-rotated above the header, it's not preferred.
if (Next->getNumber() < MBB->getNumber() &&
+ (WasmDisableEHPadSort || !Next->isEHPad()) &&
(!R || !R->contains(Next) ||
R->getHeader()->getNumber() < Next->getNumber())) {
Ready.push(Next);
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index e6bfc5226e2e..7e867edaaa27 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/MC/MCAsmInfo.h"
using namespace llvm;
@@ -315,12 +316,12 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
// br_on_exn 0, $__cpp_exception
// rethrow
// end_block
- WebAssembly::ExprType ReturnType = WebAssembly::ExprType::Void;
+ WebAssembly::BlockType ReturnType = WebAssembly::BlockType::Void;
if (IsBrOnExn) {
const char *TagName = BrOnExn->getOperand(1).getSymbolName();
if (std::strcmp(TagName, "__cpp_exception") != 0)
llvm_unreachable("Only C++ exception is supported");
- ReturnType = WebAssembly::ExprType::I32;
+ ReturnType = WebAssembly::BlockType::I32;
}
auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet);
@@ -406,7 +407,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
auto InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet);
MachineInstr *Begin = BuildMI(MBB, InsertPos, MBB.findDebugLoc(InsertPos),
TII.get(WebAssembly::LOOP))
- .addImm(int64_t(WebAssembly::ExprType::Void));
+ .addImm(int64_t(WebAssembly::BlockType::Void));
// Decide where in Header to put the END_LOOP.
BeforeSet.clear();
@@ -526,46 +527,56 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
AfterSet.insert(&MI);
}
- // Local expression tree should go after the TRY.
- for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E;
- --I) {
- if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition())
- continue;
- if (WebAssembly::isChild(*std::prev(I), MFI))
- AfterSet.insert(&*std::prev(I));
- else
- break;
- }
-
// If Header unwinds to MBB (= Header contains 'invoke'), the try block should
// contain the call within it. So the call should go after the TRY. The
// exception is when the header's terminator is a rethrow instruction, in
// which case that instruction, not a call instruction before it, is gonna
// throw.
+ MachineInstr *ThrowingCall = nullptr;
if (MBB.isPredecessor(Header)) {
auto TermPos = Header->getFirstTerminator();
if (TermPos == Header->end() ||
TermPos->getOpcode() != WebAssembly::RETHROW) {
- for (const auto &MI : reverse(*Header)) {
+ for (auto &MI : reverse(*Header)) {
if (MI.isCall()) {
AfterSet.insert(&MI);
+ ThrowingCall = &MI;
// Possibly throwing calls are usually wrapped by EH_LABEL
// instructions. We don't want to split them and the call.
if (MI.getIterator() != Header->begin() &&
- std::prev(MI.getIterator())->isEHLabel())
+ std::prev(MI.getIterator())->isEHLabel()) {
AfterSet.insert(&*std::prev(MI.getIterator()));
+ ThrowingCall = &*std::prev(MI.getIterator());
+ }
break;
}
}
}
}
+ // Local expression tree should go after the TRY.
+ // For BLOCK placement, we start the search from the previous instruction of a
+ // BB's terminator, but in TRY's case, we should start from the previous
+ // instruction of a call that can throw, or a EH_LABEL that precedes the call,
+ // because the return values of the call's previous instructions can be
+ // stackified and consumed by the throwing call.
+ auto SearchStartPt = ThrowingCall ? MachineBasicBlock::iterator(ThrowingCall)
+ : Header->getFirstTerminator();
+ for (auto I = SearchStartPt, E = Header->begin(); I != E; --I) {
+ if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition())
+ continue;
+ if (WebAssembly::isChild(*std::prev(I), MFI))
+ AfterSet.insert(&*std::prev(I));
+ else
+ break;
+ }
+
// Add the TRY.
auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet);
MachineInstr *Begin =
BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
TII.get(WebAssembly::TRY))
- .addImm(int64_t(WebAssembly::ExprType::Void));
+ .addImm(int64_t(WebAssembly::BlockType::Void));
// Decide where in Header to put the END_TRY.
BeforeSet.clear();
@@ -694,8 +705,26 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
}
}
+// When MBB is split into MBB and Split, we should unstackify defs in MBB that
+// have their uses in Split.
+static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
+ MachineBasicBlock &Split,
+ WebAssemblyFunctionInfo &MFI,
+ MachineRegisterInfo &MRI) {
+ for (auto &MI : Split) {
+ for (auto &MO : MI.explicit_uses()) {
+ if (!MO.isReg() || Register::isPhysicalRegister(MO.getReg()))
+ continue;
+ if (MachineInstr *Def = MRI.getUniqueVRegDef(MO.getReg()))
+ if (Def->getParent() == &MBB)
+ MFI.unstackifyVReg(MO.getReg());
+ }
+ }
+}
+
bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
// Linearizing the control flow by placing TRY / END_TRY markers can create
@@ -830,7 +859,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
SmallVector<const MachineBasicBlock *, 8> EHPadStack;
// Range of intructions to be wrapped in a new nested try/catch
using TryRange = std::pair<MachineInstr *, MachineInstr *>;
- // In original CFG, <unwind destionation BB, a vector of try ranges>
+ // In original CFG, <unwind destination BB, a vector of try ranges>
DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> UnwindDestToTryRanges;
// In new CFG, <destination to branch to, a vector of try ranges>
DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> BrDestToTryRanges;
@@ -936,7 +965,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
// of the function with a local.get and a rethrow instruction.
if (NeedAppendixBlock) {
auto *AppendixBB = getAppendixBlock(MF);
- unsigned ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
+ Register ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
BuildMI(AppendixBB, DebugLoc(), TII.get(WebAssembly::RETHROW))
.addReg(ExnReg);
// These instruction ranges should branch to this appendix BB.
@@ -967,7 +996,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
// ...
// cont:
for (auto &P : UnwindDestToTryRanges) {
- NumUnwindMismatches++;
+ NumUnwindMismatches += P.second.size();
// This means the destination is the appendix BB, which was separately
// handled above.
@@ -1007,6 +1036,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
BrDest->insert(BrDest->end(), EndTry->removeFromParent());
// Take out the handler body from EH pad to the new branch destination BB.
BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end());
+ unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI);
// Fix predecessor-successor relationship.
BrDest->transferSuccessors(EHPad);
EHPad->addSuccessor(BrDest);
@@ -1100,7 +1130,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
MachineInstr *NestedTry =
BuildMI(*MBB, *RangeBegin, RangeBegin->getDebugLoc(),
TII.get(WebAssembly::TRY))
- .addImm(int64_t(WebAssembly::ExprType::Void));
+ .addImm(int64_t(WebAssembly::BlockType::Void));
// Create the nested EH pad and fill instructions in.
MachineBasicBlock *NestedEHPad = MF.CreateMachineBasicBlock();
@@ -1122,6 +1152,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
// new nested continuation BB.
NestedCont->splice(NestedCont->end(), MBB,
std::next(RangeEnd->getIterator()), MBB->end());
+ unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI);
registerTryScope(NestedTry, NestedEndTry, NestedEHPad);
// Fix predecessor-successor relationship.
@@ -1197,54 +1228,32 @@ getDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
/// checks for such cases and fixes up the signatures.
void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
- assert(MFI.getResults().size() <= 1);
if (MFI.getResults().empty())
return;
- WebAssembly::ExprType RetType;
- switch (MFI.getResults().front().SimpleTy) {
- case MVT::i32:
- RetType = WebAssembly::ExprType::I32;
- break;
- case MVT::i64:
- RetType = WebAssembly::ExprType::I64;
- break;
- case MVT::f32:
- RetType = WebAssembly::ExprType::F32;
- break;
- case MVT::f64:
- RetType = WebAssembly::ExprType::F64;
- break;
- case MVT::v16i8:
- case MVT::v8i16:
- case MVT::v4i32:
- case MVT::v2i64:
- case MVT::v4f32:
- case MVT::v2f64:
- RetType = WebAssembly::ExprType::V128;
- break;
- case MVT::exnref:
- RetType = WebAssembly::ExprType::Exnref;
- break;
- default:
- llvm_unreachable("unexpected return type");
- }
+ // MCInstLower will add the proper types to multivalue signatures based on the
+ // function return type
+ WebAssembly::BlockType RetType =
+ MFI.getResults().size() > 1
+ ? WebAssembly::BlockType::Multivalue
+ : WebAssembly::BlockType(
+ WebAssembly::toValType(MFI.getResults().front()));
for (MachineBasicBlock &MBB : reverse(MF)) {
for (MachineInstr &MI : reverse(MBB)) {
if (MI.isPosition() || MI.isDebugInstr())
continue;
- if (MI.getOpcode() == WebAssembly::END_BLOCK) {
- EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType));
- continue;
- }
- if (MI.getOpcode() == WebAssembly::END_LOOP) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::END_BLOCK:
+ case WebAssembly::END_LOOP:
+ case WebAssembly::END_TRY:
EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType));
continue;
+ default:
+ // Something other than an `end`. We're done.
+ return;
}
- // Something other than an `end`. We're done.
- return;
}
}
}
@@ -1280,7 +1289,9 @@ void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) {
}
}
// Fix mismatches in unwind destinations induced by linearizing the code.
- fixUnwindMismatches(MF);
+ if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm &&
+ MF.getFunction().hasPersonalityFn())
+ fixUnwindMismatches(MF);
}
void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
diff --git a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index dbd62179f055..ef75bb215317 100644
--- a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -168,7 +168,7 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
static MachineInstr *findStartOfTree(MachineOperand &MO,
MachineRegisterInfo &MRI,
WebAssemblyFunctionInfo &MFI) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
assert(MFI.isVRegStackified(Reg));
MachineInstr *Def = MRI.getVRegDef(Reg);
@@ -207,7 +207,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I++;
if (!WebAssembly::isArgument(MI.getOpcode()))
break;
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
assert(!MFI.isVRegStackified(Reg));
Reg2Local[Reg] = static_cast<unsigned>(MI.getOperand(1).getImm());
MI.eraseFromParent();
@@ -221,7 +221,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// drops to their defs.
BitVector UseEmpty(MRI.getNumVirtRegs());
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I)
- UseEmpty[I] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(I));
+ UseEmpty[I] = MRI.use_empty(Register::index2VirtReg(I));
// Visit each instruction in the function.
for (MachineBasicBlock &MBB : MF) {
@@ -238,13 +238,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
if (WebAssembly::isTee(MI.getOpcode())) {
assert(MFI.isVRegStackified(MI.getOperand(0).getReg()));
assert(!MFI.isVRegStackified(MI.getOperand(1).getReg()));
- unsigned OldReg = MI.getOperand(2).getReg();
+ Register OldReg = MI.getOperand(2).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
// Stackify the input if it isn't stackified yet.
if (!MFI.isVRegStackified(OldReg)) {
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
- unsigned NewReg = MRI.createVirtualRegister(RC);
+ Register NewReg = MRI.createVirtualRegister(RC);
unsigned Opc = getLocalGetOpcode(RC);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg)
.addImm(LocalId);
@@ -270,17 +270,17 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// we handle at most one def.
assert(MI.getDesc().getNumDefs() <= 1);
if (MI.getDesc().getNumDefs() == 1) {
- unsigned OldReg = MI.getOperand(0).getReg();
+ Register OldReg = MI.getOperand(0).getReg();
if (!MFI.isVRegStackified(OldReg)) {
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
- unsigned NewReg = MRI.createVirtualRegister(RC);
+ Register NewReg = MRI.createVirtualRegister(RC);
auto InsertPt = std::next(MI.getIterator());
if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) {
MI.eraseFromParent();
Changed = true;
continue;
}
- if (UseEmpty[TargetRegisterInfo::virtReg2Index(OldReg)]) {
+ if (UseEmpty[Register::virtReg2Index(OldReg)]) {
unsigned Opc = getDropOpcode(RC);
MachineInstr *Drop =
BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
@@ -310,7 +310,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
if (!MO.isReg())
continue;
- unsigned OldReg = MO.getReg();
+ Register OldReg = MO.getReg();
// Inline asm may have a def in the middle of the operands. Our contract
// with inline asm register operands is to provide local indices as
@@ -345,7 +345,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Insert a local.get.
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
- unsigned NewReg = MRI.createVirtualRegister(RC);
+ Register NewReg = MRI.createVirtualRegister(RC);
unsigned Opc = getLocalGetOpcode(RC);
InsertPt =
BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg)
@@ -369,7 +369,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// TODO: Sort the locals for better compression.
MFI.setNumLocals(CurLocal - MFI.getParams().size());
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
auto RL = Reg2Local.find(Reg);
if (RL == Reg2Local.end() || RL->second < MFI.getParams().size())
continue;
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 2552e9150833..c932f985489a 100644
--- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -1141,14 +1141,14 @@ bool WebAssemblyFastISel::selectBitCast(const Instruction *I) {
return true;
}
- unsigned Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(),
+ Register Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(),
In, I->getOperand(0)->hasOneUse());
if (!Reg)
return false;
MachineBasicBlock::iterator Iter = FuncInfo.InsertPt;
--Iter;
assert(Iter->isBitcast());
- Iter->setPhysRegsDeadExcept(ArrayRef<unsigned>(), TRI);
+ Iter->setPhysRegsDeadExcept(ArrayRef<Register>(), TRI);
updateValueMap(I, Reg);
return true;
}
@@ -1302,51 +1302,33 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
if (Ret->getNumOperands() == 0) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(WebAssembly::RETURN_VOID));
+ TII.get(WebAssembly::RETURN));
return true;
}
+ // TODO: support multiple return in FastISel
+ if (Ret->getNumOperands() > 1)
+ return false;
+
Value *RV = Ret->getOperand(0);
if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy())
return false;
- unsigned Opc;
switch (getSimpleType(RV->getType())) {
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
- Opc = WebAssembly::RETURN_I32;
- break;
case MVT::i64:
- Opc = WebAssembly::RETURN_I64;
- break;
case MVT::f32:
- Opc = WebAssembly::RETURN_F32;
- break;
case MVT::f64:
- Opc = WebAssembly::RETURN_F64;
- break;
case MVT::v16i8:
- Opc = WebAssembly::RETURN_v16i8;
- break;
case MVT::v8i16:
- Opc = WebAssembly::RETURN_v8i16;
- break;
case MVT::v4i32:
- Opc = WebAssembly::RETURN_v4i32;
- break;
case MVT::v2i64:
- Opc = WebAssembly::RETURN_v2i64;
- break;
case MVT::v4f32:
- Opc = WebAssembly::RETURN_v4f32;
- break;
case MVT::v2f64:
- Opc = WebAssembly::RETURN_v2f64;
- break;
case MVT::exnref:
- Opc = WebAssembly::RETURN_EXNREF;
break;
default:
return false;
@@ -1363,7 +1345,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
if (Reg == 0)
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)).addReg(Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(WebAssembly::RETURN))
+ .addReg(Reg);
return true;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index b7fc65401fc4..6b1bbd7a2b07 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -70,6 +70,8 @@ static void findUses(Value *V, Function &F,
for (Use &U : V->uses()) {
if (auto *BC = dyn_cast<BitCastOperator>(U.getUser()))
findUses(BC, F, Uses, ConstantBCs);
+ else if (auto *A = dyn_cast<GlobalAlias>(U.getUser()))
+ findUses(A, F, Uses, ConstantBCs);
else if (U.get()->getType() != F.getType()) {
CallSite CS(U.getUser());
if (!CS)
diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index 7d8e86d9b2c0..157ea9d525c9 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -56,6 +56,7 @@
#include "WebAssembly.h"
#include "WebAssemblySubtarget.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
@@ -358,7 +359,7 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
// Add the register which will be used to tell the jump table which block to
// jump to.
MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
MIB.addReg(Reg);
// Compute the indices in the superheader, one for each bad block, and
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 5299068efdd4..71eeebfada4b 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -183,14 +183,14 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
bool HasBP = hasBP(MF);
if (HasBP) {
auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
- unsigned BasePtr = MRI.createVirtualRegister(PtrRC);
+ Register BasePtr = MRI.createVirtualRegister(PtrRC);
FI->setBasePointerVreg(BasePtr);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), BasePtr)
.addReg(SPReg);
}
if (StackSize) {
// Subtract the frame size
- unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
+ Register OffsetReg = MRI.createVirtualRegister(PtrRC);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
.addImm(StackSize);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32),
@@ -199,7 +199,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(OffsetReg);
}
if (HasBP) {
- unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC);
+ Register BitmaskReg = MRI.createVirtualRegister(PtrRC);
unsigned Alignment = MFI.getMaxAlignment();
assert((1u << countTrailingZeros(Alignment)) == Alignment &&
"Alignment must be a power of 2");
@@ -244,7 +244,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
} else if (StackSize) {
const TargetRegisterClass *PtrRC =
MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
- unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
+ Register OffsetReg = MRI.createVirtualRegister(PtrRC);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
.addImm(StackSize);
// In the epilog we don't need to write the result back to the SP32 physreg
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index daddd4ca16ff..fdc0f561dcd9 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -29,9 +29,9 @@ public:
static const size_t RedZoneSize = 128;
WebAssemblyFrameLowering()
- : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/16,
+ : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/Align(16),
/*LocalAreaOffset=*/0,
- /*TransientStackAlignment=*/16,
+ /*TransientStackAlignment=*/Align(16),
/*StackRealignable=*/true) {}
MachineBasicBlock::iterator
diff --git a/lib/Target/WebAssembly/WebAssemblyISD.def b/lib/Target/WebAssembly/WebAssemblyISD.def
index 77217f16a727..13f0476eb4a5 100644
--- a/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -26,9 +26,11 @@ HANDLE_NODETYPE(WrapperPIC)
HANDLE_NODETYPE(BR_IF)
HANDLE_NODETYPE(BR_TABLE)
HANDLE_NODETYPE(SHUFFLE)
+HANDLE_NODETYPE(SWIZZLE)
HANDLE_NODETYPE(VEC_SHL)
HANDLE_NODETYPE(VEC_SHR_S)
HANDLE_NODETYPE(VEC_SHR_U)
+HANDLE_NODETYPE(LOAD_SPLAT)
HANDLE_NODETYPE(THROW)
HANDLE_NODETYPE(MEMORY_COPY)
HANDLE_NODETYPE(MEMORY_FILL)
diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 26339eaef37d..f83a8a984ae0 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -54,6 +54,12 @@ public:
ForCodeSize = MF.getFunction().hasOptSize();
Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
+
+ // Wasm64 is not fully supported right now (and is not specified)
+ if (Subtarget->hasAddr64())
+ report_fatal_error(
+ "64-bit WebAssembly (wasm64) is not currently supported");
+
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -88,88 +94,36 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
uint64_t SyncScopeID =
cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
+ MachineSDNode *Fence = nullptr;
switch (SyncScopeID) {
- case SyncScope::SingleThread: {
+ case SyncScope::SingleThread:
// We lower a single-thread fence to a pseudo compiler barrier instruction
// preventing instruction reordering. This will not be emitted in final
// binary.
- MachineSDNode *Fence =
- CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
- DL, // debug loc
- MVT::Other, // outchain type
- Node->getOperand(0) // inchain
- );
- ReplaceNode(Node, Fence);
- CurDAG->RemoveDeadNode(Node);
- return;
- }
-
- case SyncScope::System: {
- // For non-emscripten systems, we have not decided on what we should
- // traslate fences to yet.
- if (!Subtarget->getTargetTriple().isOSEmscripten())
- report_fatal_error(
- "ATOMIC_FENCE is not yet supported in non-emscripten OSes");
-
- // Wasm does not have a fence instruction, but because all atomic
- // instructions in wasm are sequentially consistent, we translate a
- // fence to an idempotent atomic RMW instruction to a linear memory
- // address. All atomic instructions in wasm are sequentially consistent,
- // but this is to ensure a fence also prevents reordering of non-atomic
- // instructions in the VM. Even though LLVM IR's fence instruction does
- // not say anything about its relationship with non-atomic instructions,
- // we think this is more user-friendly.
- //
- // While any address can work, here we use a value stored in
- // __stack_pointer wasm global because there's high chance that area is
- // in cache.
- //
- // So the selected instructions will be in the form of:
- // %addr = get_global $__stack_pointer
- // %0 = i32.const 0
- // i32.atomic.rmw.or %addr, %0
- SDValue StackPtrSym = CurDAG->getTargetExternalSymbol(
- "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout()));
- MachineSDNode *GetGlobal =
- CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode
- DL, // debug loc
- MVT::i32, // result type
- StackPtrSym // __stack_pointer symbol
- );
-
- SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
- auto *MMO = MF.getMachineMemOperand(
- MachinePointerInfo::getUnknownStack(MF),
- // FIXME Volatile isn't really correct, but currently all LLVM
- // atomic instructions are treated as volatiles in the backend, so
- // we should be consistent.
- MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore,
- 4, 4, AAMDNodes(), nullptr, SyncScope::System,
- AtomicOrdering::SequentiallyConsistent);
- MachineSDNode *Const0 =
- CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero);
- MachineSDNode *AtomicRMW = CurDAG->getMachineNode(
- WebAssembly::ATOMIC_RMW_OR_I32, // opcode
- DL, // debug loc
- MVT::i32, // result type
- MVT::Other, // outchain type
- {
- Zero, // alignment
- Zero, // offset
- SDValue(GetGlobal, 0), // __stack_pointer
- SDValue(Const0, 0), // OR with 0 to make it idempotent
- Node->getOperand(0) // inchain
- });
-
- CurDAG->setNodeMemRefs(AtomicRMW, {MMO});
- ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1));
- CurDAG->RemoveDeadNode(Node);
- return;
- }
+ Fence = CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
+ DL, // debug loc
+ MVT::Other, // outchain type
+ Node->getOperand(0) // inchain
+ );
+ break;
+ case SyncScope::System:
+ // Currently wasm only supports sequentially consistent atomics, so we
+ // always set the order to 0 (sequentially consistent).
+ Fence = CurDAG->getMachineNode(
+ WebAssembly::ATOMIC_FENCE,
+ DL, // debug loc
+ MVT::Other, // outchain type
+ CurDAG->getTargetConstant(0, DL, MVT::i32), // order
+ Node->getOperand(0) // inchain
+ );
+ break;
default:
llvm_unreachable("Unknown scope!");
}
+
+ ReplaceNode(Node, Fence);
+ CurDAG->RemoveDeadNode(Node);
+ return;
}
case ISD::GlobalTLSAddress: {
@@ -224,6 +178,33 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, TLSSize);
return;
}
+ case Intrinsic::wasm_tls_align: {
+ MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+ assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
+
+ MachineSDNode *TLSAlign = CurDAG->getMachineNode(
+ WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
+ CurDAG->getTargetExternalSymbol("__tls_align", MVT::i32));
+ ReplaceNode(Node, TLSAlign);
+ return;
+ }
+ }
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ case Intrinsic::wasm_tls_base: {
+ MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+ assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
+
+ MachineSDNode *TLSBase = CurDAG->getMachineNode(
+ WebAssembly::GLOBAL_GET_I32, DL, MVT::i32, MVT::Other,
+ CurDAG->getTargetExternalSymbol("__tls_base", PtrVT),
+ Node->getOperand(0));
+ ReplaceNode(Node, TLSBase);
+ return;
+ }
}
break;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4064a983099c..f06afdbcea9e 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -205,7 +205,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
for (auto T : {MVT::i8, MVT::i16, MVT::i32})
setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
}
- for (auto T : MVT::integer_vector_valuetypes())
+ for (auto T : MVT::integer_fixedlen_vector_valuetypes())
setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
// Dynamic stack allocation: use the default expansion.
@@ -228,7 +228,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// - Floating-point extending loads.
// - Floating-point truncating stores.
// - i1 extending loads.
- // - extending/truncating SIMD loads/stores
+ // - truncating SIMD stores and most extending loads
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
for (auto T : MVT::integer_valuetypes())
@@ -237,7 +237,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
if (Subtarget->hasSIMD128()) {
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
MVT::v2f64}) {
- for (auto MemT : MVT::vector_valuetypes()) {
+ for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
if (MVT(T) != MemT) {
setTruncStoreAction(T, MemT, Expand);
for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
@@ -245,6 +245,14 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
}
}
}
+ // But some vector extending loads are legal
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
+ setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
+ setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
+ setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
+ }
+ }
}
// Don't do anything clever with build_pairs
@@ -259,16 +267,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setMaxAtomicSizeInBitsSupported(64);
- if (Subtarget->hasBulkMemory()) {
- // Use memory.copy and friends over multiple loads and stores
- MaxStoresPerMemcpy = 1;
- MaxStoresPerMemcpyOptSize = 1;
- MaxStoresPerMemmove = 1;
- MaxStoresPerMemmoveOptSize = 1;
- MaxStoresPerMemset = 1;
- MaxStoresPerMemsetOptSize = 1;
- }
-
// Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
// consistent with the f64 and f128 names.
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
@@ -337,8 +335,8 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
bool Float64, unsigned LoweredOpcode) {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned OutReg = MI.getOperand(0).getReg();
- unsigned InReg = MI.getOperand(1).getReg();
+ Register OutReg = MI.getOperand(0).getReg();
+ Register InReg = MI.getOperand(1).getReg();
unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
@@ -396,9 +394,9 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
// For unsigned numbers, we have to do a separate comparison with zero.
if (IsUnsigned) {
Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
- unsigned SecondCmpReg =
+ Register SecondCmpReg =
MRI.createVirtualRegister(&WebAssembly::I32RegClass);
- unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
BuildMI(BB, DL, TII.get(FConst), Tmp1)
.addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
@@ -550,6 +548,16 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
return true;
}
+bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
+ if (!Subtarget->hasUnimplementedSIMD128())
+ return false;
+ MVT ExtT = ExtVal.getSimpleValueType();
+ MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0);
+ return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
+ (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
+ (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
+}
+
EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &C,
EVT VT) const {
@@ -569,7 +577,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = 4;
+ Info.align = Align(4);
// atomic.notify instruction does not really load the memory specified with
// this argument, but MachineMemOperand should either be load or store, so
// we set this to a load.
@@ -583,7 +591,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = 4;
+ Info.align = Align(4);
Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
return true;
case Intrinsic::wasm_atomic_wait_i64:
@@ -591,7 +599,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = 8;
+ Info.align = Align(8);
Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
return true;
default:
@@ -623,7 +631,8 @@ static bool callingConvSupported(CallingConv::ID CallConv) {
CallConv == CallingConv::Cold ||
CallConv == CallingConv::PreserveMost ||
CallConv == CallingConv::PreserveAll ||
- CallConv == CallingConv::CXX_FAST_TLS;
+ CallConv == CallingConv::CXX_FAST_TLS ||
+ CallConv == CallingConv::WASM_EmscriptenInvoke;
}
SDValue
@@ -644,13 +653,36 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (CLI.IsPatchPoint)
fail(DL, DAG, "WebAssembly doesn't support patch point yet");
- // Fail if tail calls are required but not enabled
- if (!Subtarget->hasTailCall()) {
- if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
- MF.getTarget().Options.GuaranteedTailCallOpt) ||
- (CLI.CS && CLI.CS.isMustTailCall()))
- fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled");
- CLI.IsTailCall = false;
+ if (CLI.IsTailCall) {
+ bool MustTail = CLI.CS && CLI.CS.isMustTailCall();
+ if (Subtarget->hasTailCall() && !CLI.IsVarArg) {
+ // Do not tail call unless caller and callee return types match
+ const Function &F = MF.getFunction();
+ const TargetMachine &TM = getTargetMachine();
+ Type *RetTy = F.getReturnType();
+ SmallVector<MVT, 4> CallerRetTys;
+ SmallVector<MVT, 4> CalleeRetTys;
+ computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
+ computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
+ bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
+ std::equal(CallerRetTys.begin(), CallerRetTys.end(),
+ CalleeRetTys.begin());
+ if (!TypesMatch) {
+ // musttail in this case would be an LLVM IR validation failure
+ assert(!MustTail);
+ CLI.IsTailCall = false;
+ }
+ } else {
+ CLI.IsTailCall = false;
+ if (MustTail) {
+ if (CLI.IsVarArg) {
+ // The return would pop the argument buffer
+ fail(DL, DAG, "WebAssembly does not support varargs tail calls");
+ } else {
+ fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled");
+ }
+ }
+ }
}
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
@@ -659,6 +691,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+
+ // The generic code may have added an sret argument. If we're lowering an
+ // invoke function, the ABI requires that the function pointer be the first
+ // argument, so we may have to swap the arguments.
+ if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
+ Outs[0].Flags.isSRet()) {
+ std::swap(Outs[0], Outs[1]);
+ std::swap(OutVals[0], OutVals[1]);
+ }
+
unsigned NumFixedArgs = 0;
for (unsigned I = 0; I < Outs.size(); ++I) {
const ISD::OutputArg &Out = Outs[I];
@@ -810,8 +852,8 @@ bool WebAssemblyTargetLowering::CanLowerReturn(
CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext & /*Context*/) const {
- // WebAssembly can't currently handle returning tuples.
- return Outs.size() <= 1;
+ // WebAssembly can only handle returning tuples with multivalue enabled
+ return Subtarget->hasMultivalue() || Outs.size() <= 1;
}
SDValue WebAssemblyTargetLowering::LowerReturn(
@@ -819,7 +861,8 @@ SDValue WebAssemblyTargetLowering::LowerReturn(
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const {
- assert(Outs.size() <= 1 && "WebAssembly can only return up to one value");
+ assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
+ "MVP WebAssembly can only return up to one value");
if (!callingConvSupported(CallConv))
fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
@@ -881,7 +924,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
// the buffer is passed as an argument.
if (IsVarArg) {
MVT PtrVT = getPointerTy(MF.getDataLayout());
- unsigned VarargVreg =
+ Register VarargVreg =
MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
MFI->setVarargBufferVreg(VarargVreg);
Chain = DAG.getCopyToReg(
@@ -1022,8 +1065,9 @@ SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
return SDValue();
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ MakeLibCallOptions CallOptions;
return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
- {DAG.getConstant(Depth, DL, MVT::i32)}, false, DL)
+ {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
.first;
}
@@ -1037,7 +1081,7 @@ SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
- unsigned FP =
+ Register FP =
Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
}
@@ -1249,68 +1293,116 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
const EVT VecT = Op.getValueType();
const EVT LaneT = Op.getOperand(0).getValueType();
const size_t Lanes = Op.getNumOperands();
+ bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8;
+
+ // BUILD_VECTORs are lowered to the instruction that initializes the highest
+ // possible number of lanes at once followed by a sequence of replace_lane
+ // instructions to individually initialize any remaining lanes.
+
+ // TODO: Tune this. For example, lanewise swizzling is very expensive, so
+ // swizzled lanes should be given greater weight.
+
+ // TODO: Investigate building vectors by shuffling together vectors built by
+ // separately specialized means.
+
auto IsConstant = [](const SDValue &V) {
return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
};
- // Find the most common operand, which is approximately the best to splat
- using Entry = std::pair<SDValue, size_t>;
- SmallVector<Entry, 16> ValueCounts;
- size_t NumConst = 0, NumDynamic = 0;
- for (const SDValue &Lane : Op->op_values()) {
- if (Lane.isUndef()) {
- continue;
- } else if (IsConstant(Lane)) {
- NumConst++;
- } else {
- NumDynamic++;
- }
- auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(),
- [&Lane](Entry A) { return A.first == Lane; });
- if (CountIt == ValueCounts.end()) {
- ValueCounts.emplace_back(Lane, 1);
+ // Returns the source vector and index vector pair if they exist. Checks for:
+ // (extract_vector_elt
+ // $src,
+ // (sign_extend_inreg (extract_vector_elt $indices, $i))
+ // )
+ auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
+ auto Bail = std::make_pair(SDValue(), SDValue());
+ if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return Bail;
+ const SDValue &SwizzleSrc = Lane->getOperand(0);
+ const SDValue &IndexExt = Lane->getOperand(1);
+ if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
+ return Bail;
+ const SDValue &Index = IndexExt->getOperand(0);
+ if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return Bail;
+ const SDValue &SwizzleIndices = Index->getOperand(0);
+ if (SwizzleSrc.getValueType() != MVT::v16i8 ||
+ SwizzleIndices.getValueType() != MVT::v16i8 ||
+ Index->getOperand(1)->getOpcode() != ISD::Constant ||
+ Index->getConstantOperandVal(1) != I)
+ return Bail;
+ return std::make_pair(SwizzleSrc, SwizzleIndices);
+ };
+
+ using ValueEntry = std::pair<SDValue, size_t>;
+ SmallVector<ValueEntry, 16> SplatValueCounts;
+
+ using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
+ SmallVector<SwizzleEntry, 16> SwizzleCounts;
+
+ auto AddCount = [](auto &Counts, const auto &Val) {
+ auto CountIt = std::find_if(Counts.begin(), Counts.end(),
+ [&Val](auto E) { return E.first == Val; });
+ if (CountIt == Counts.end()) {
+ Counts.emplace_back(Val, 1);
} else {
CountIt->second++;
}
+ };
+
+ auto GetMostCommon = [](auto &Counts) {
+ auto CommonIt =
+ std::max_element(Counts.begin(), Counts.end(),
+ [](auto A, auto B) { return A.second < B.second; });
+ assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
+ return *CommonIt;
+ };
+
+ size_t NumConstantLanes = 0;
+
+ // Count eligible lanes for each type of vector creation op
+ for (size_t I = 0; I < Lanes; ++I) {
+ const SDValue &Lane = Op->getOperand(I);
+ if (Lane.isUndef())
+ continue;
+
+ AddCount(SplatValueCounts, Lane);
+
+ if (IsConstant(Lane)) {
+ NumConstantLanes++;
+ } else if (CanSwizzle) {
+ auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
+ if (SwizzleSrcs.first)
+ AddCount(SwizzleCounts, SwizzleSrcs);
+ }
}
- auto CommonIt =
- std::max_element(ValueCounts.begin(), ValueCounts.end(),
- [](Entry A, Entry B) { return A.second < B.second; });
- assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector");
- SDValue SplatValue = CommonIt->first;
- size_t NumCommon = CommonIt->second;
-
- // If v128.const is available, consider using it instead of a splat
+
+ SDValue SplatValue;
+ size_t NumSplatLanes;
+ std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
+
+ SDValue SwizzleSrc;
+ SDValue SwizzleIndices;
+ size_t NumSwizzleLanes = 0;
+ if (SwizzleCounts.size())
+ std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
+ NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
+
+ // Predicate returning true if the lane is properly initialized by the
+ // original instruction
+ std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
+ SDValue Result;
if (Subtarget->hasUnimplementedSIMD128()) {
- // {i32,i64,f32,f64}.const opcode, and value
- const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes);
- // SIMD prefix and opcode
- const size_t SplatBytes = 2;
- const size_t SplatConstBytes = SplatBytes + ConstBytes;
- // SIMD prefix, opcode, and lane index
- const size_t ReplaceBytes = 3;
- const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes;
- // SIMD prefix, v128.const opcode, and 128-bit value
- const size_t VecConstBytes = 18;
- // Initial v128.const and a replace_lane for each non-const operand
- const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes;
- // Initial splat and all necessary replace_lanes
- const size_t SplatInitBytes =
- IsConstant(SplatValue)
- // Initial constant splat
- ? (SplatConstBytes +
- // Constant replace_lanes
- (NumConst - NumCommon) * ReplaceConstBytes +
- // Dynamic replace_lanes
- (NumDynamic * ReplaceBytes))
- // Initial dynamic splat
- : (SplatBytes +
- // Constant replace_lanes
- (NumConst * ReplaceConstBytes) +
- // Dynamic replace_lanes
- (NumDynamic - NumCommon) * ReplaceBytes);
- if (ConstInitBytes < SplatInitBytes) {
- // Create build_vector that will lower to initial v128.const
+ // Prefer swizzles over vector consts over splats
+ if (NumSwizzleLanes >= NumSplatLanes &&
+ NumSwizzleLanes >= NumConstantLanes) {
+ Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
+ SwizzleIndices);
+ auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
+ IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
+ return Swizzled == GetSwizzleSrcs(I, Lane);
+ };
+ } else if (NumConstantLanes >= NumSplatLanes) {
SmallVector<SDValue, 16> ConstLanes;
for (const SDValue &Lane : Op->op_values()) {
if (IsConstant(Lane)) {
@@ -1321,26 +1413,35 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
}
}
- SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes);
- // Add replace_lane instructions for non-const lanes
- for (size_t I = 0; I < Lanes; ++I) {
- const SDValue &Lane = Op->getOperand(I);
- if (!Lane.isUndef() && !IsConstant(Lane))
- Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
- DAG.getConstant(I, DL, MVT::i32));
- }
- return Result;
+ Result = DAG.getBuildVector(VecT, DL, ConstLanes);
+ IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+ return IsConstant(Lane);
+ };
+ }
+ }
+ if (!Result) {
+ // Use a splat, but possibly a load_splat
+ LoadSDNode *SplattedLoad;
+ if (Subtarget->hasUnimplementedSIMD128() &&
+ (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
+ SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
+ Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue);
+ } else {
+ Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
}
+ IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+ return Lane == SplatValue;
+ };
}
- // Use a splat for the initial vector
- SDValue Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
- // Add replace_lane instructions for other values
+
+ // Add replace_lane instructions for any unhandled values
for (size_t I = 0; I < Lanes; ++I) {
const SDValue &Lane = Op->getOperand(I);
- if (Lane != SplatValue)
+ if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
DAG.getConstant(I, DL, MVT::i32));
}
+
return Result;
}
@@ -1415,11 +1516,6 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
// Only manually lower vector shifts
assert(Op.getSimpleValueType().isVector());
- // Expand all vector shifts until V8 fixes its implementation
- // TODO: remove this once V8 is fixed
- if (!Subtarget->hasUnimplementedSIMD128())
- return unrollVectorShift(Op, DAG);
-
// Unroll non-splat vector shifts
BuildVectorSDNode *ShiftVec;
SDValue SplatVal;
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index b3c7f3defd5f..a53e24a05542 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -63,7 +63,7 @@ private:
MachineMemOperand::Flags Flags,
bool *Fast) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
-
+ bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index e85aa57efc42..a9a99d38f9f1 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -71,12 +71,6 @@ class NotifyPatImmOff<PatFrag operand> :
def : NotifyPatImmOff<regPlusImm>;
def : NotifyPatImmOff<or_is_add>;
-def NotifyPatGlobalAddr :
- Pat<(i32 (int_wasm_atomic_notify (regPlusGA I32:$addr,
- (WebAssemblywrapper tglobaladdr:$off)),
- I32:$count)),
- (ATOMIC_NOTIFY 0, tglobaladdr:$off, I32:$addr, I32:$count)>;
-
// Select notifys with just a constant offset.
def NotifyPatOffsetOnly :
Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)),
@@ -105,13 +99,6 @@ def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>;
def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>;
def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>;
-class WaitPatGlobalAddr<ValueType ty, Intrinsic kind, NI inst> :
- Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
- ty:$exp, I64:$timeout)),
- (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatGlobalAddr<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatGlobalAddr<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset.
class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> :
Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
@@ -127,6 +114,19 @@ def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
} // Predicates = [HasAtomics]
//===----------------------------------------------------------------------===//
+// Atomic fences
+//===----------------------------------------------------------------------===//
+
+// A compiler fence instruction that prevents reordering of instructions.
+let Defs = [ARGUMENTS] in {
+let isPseudo = 1, hasSideEffects = 1 in
+defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
+let hasSideEffects = 1 in
+defm ATOMIC_FENCE : ATOMIC_NRI<(outs), (ins i8imm:$flags), [], "atomic.fence",
+ 0x03>;
+} // Defs = [ARGUMENTS]
+
+//===----------------------------------------------------------------------===//
// Atomic loads
//===----------------------------------------------------------------------===//
@@ -151,9 +151,6 @@ def : LoadPatImmOff<i64, atomic_load_64, regPlusImm, ATOMIC_LOAD_I64>;
def : LoadPatImmOff<i32, atomic_load_32, or_is_add, ATOMIC_LOAD_I32>;
def : LoadPatImmOff<i64, atomic_load_64, or_is_add, ATOMIC_LOAD_I64>;
-def : LoadPatGlobalAddr<i32, atomic_load_32, ATOMIC_LOAD_I32>;
-def : LoadPatGlobalAddr<i64, atomic_load_64, ATOMIC_LOAD_I64>;
-
// Select loads with just a constant offset.
def : LoadPatOffsetOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
def : LoadPatOffsetOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
@@ -244,16 +241,6 @@ def : LoadPatImmOff<i64, sext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>;
def : LoadPatImmOff<i64, sext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>;
// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64
-def : LoadPatGlobalAddr<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
-def : LoadPatGlobalAddr<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
-def : LoadPatGlobalAddr<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatGlobalAddr<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-def : LoadPatGlobalAddr<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
-def : LoadPatGlobalAddr<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
-def : LoadPatGlobalAddr<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
-def : LoadPatGlobalAddr<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatGlobalAddr<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-
// Extending loads with just a constant offset
def : LoadPatOffsetOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
def : LoadPatOffsetOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
@@ -313,13 +300,6 @@ def : AStorePatImmOff<i64, atomic_store_64, regPlusImm, ATOMIC_STORE_I64>;
def : AStorePatImmOff<i32, atomic_store_32, or_is_add, ATOMIC_STORE_I32>;
def : AStorePatImmOff<i64, atomic_store_64, or_is_add, ATOMIC_STORE_I64>;
-class AStorePatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
- ty:$val),
- (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>;
-def : AStorePatGlobalAddr<i32, atomic_store_32, ATOMIC_STORE_I32>;
-def : AStorePatGlobalAddr<i64, atomic_store_64, ATOMIC_STORE_I64>;
-
// Select stores with just a constant offset.
class AStorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(kind imm:$off, ty:$val), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>;
@@ -374,12 +354,6 @@ def : AStorePatImmOff<i64, trunc_astore_8_64, or_is_add, ATOMIC_STORE8_I64>;
def : AStorePatImmOff<i64, trunc_astore_16_64, or_is_add, ATOMIC_STORE16_I64>;
def : AStorePatImmOff<i64, trunc_astore_32_64, or_is_add, ATOMIC_STORE32_I64>;
-def : AStorePatGlobalAddr<i32, atomic_store_8, ATOMIC_STORE8_I32>;
-def : AStorePatGlobalAddr<i32, atomic_store_16, ATOMIC_STORE16_I32>;
-def : AStorePatGlobalAddr<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
-def : AStorePatGlobalAddr<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
-def : AStorePatGlobalAddr<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
-
// Truncating stores with just a constant offset
def : AStorePatOffsetOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>;
def : AStorePatOffsetOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>;
@@ -500,11 +474,6 @@ class BinRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)),
(inst 0, imm:$off, I32:$addr, ty:$val)>;
-class BinRMWPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
- ty:$val)),
- (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>;
-
// Select binary RMWs with just a constant offset.
class BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind imm:$off, ty:$val)),
@@ -525,9 +494,6 @@ multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
def : BinRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>;
def : BinRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>;
- def : BinRMWPatGlobalAddr<i32, rmw_32, inst_32>;
- def : BinRMWPatGlobalAddr<i64, rmw_64, inst_64>;
-
def : BinRMWPatOffsetOnly<i32, rmw_32, inst_32>;
def : BinRMWPatOffsetOnly<i64, rmw_64, inst_64>;
@@ -622,17 +588,6 @@ multiclass BinRMWTruncExtPattern<
def : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
def : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
- def : BinRMWPatGlobalAddr<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatGlobalAddr<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatGlobalAddr<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatGlobalAddr<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
- def : BinRMWPatGlobalAddr<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
-
- def : BinRMWPatGlobalAddr<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatGlobalAddr<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatGlobalAddr<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatGlobalAddr<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
-
// Truncating-extending binary RMWs with just a constant offset
def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
@@ -732,11 +687,6 @@ class TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)),
(inst 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>;
-class TerRMWPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
- ty:$exp, ty:$new)),
- (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, ty:$new)>;
-
// Select ternary RMWs with just a constant offset.
class TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind imm:$off, ty:$exp, ty:$new)),
@@ -757,9 +707,6 @@ multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
def : TerRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>;
def : TerRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>;
- def : TerRMWPatGlobalAddr<i32, rmw_32, inst_32>;
- def : TerRMWPatGlobalAddr<i64, rmw_64, inst_64>;
-
def : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>;
def : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>;
@@ -846,17 +793,6 @@ multiclass TerRMWTruncExtPattern<
def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
- def : TerRMWPatGlobalAddr<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatGlobalAddr<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
- def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
-
- def : TerRMWPatGlobalAddr<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatGlobalAddr<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
-
// Truncating-extending ternary RMWs with just a constant offset
def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
@@ -887,13 +823,3 @@ defm : TerRMWTruncExtPattern<
ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
ATOMIC_RMW32_U_CMPXCHG_I64>;
-
-//===----------------------------------------------------------------------===//
-// Atomic fences
-//===----------------------------------------------------------------------===//
-
-// A compiler fence instruction that prevents reordering of instructions.
-let Defs = [ARGUMENTS] in {
-let isPseudo = 1, hasSideEffects = 1 in
-defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
-} // Defs = [ARGUMENTS]
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
index f4352e3d12ec..05735cf6d31f 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
@@ -39,7 +39,7 @@ defm MEMORY_INIT :
(ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest,
I32:$offset, I32:$size),
(outs), (ins i32imm_op:$seg, i32imm_op:$idx),
- [(int_wasm_memory_init (i32 imm:$seg), (i32 imm:$idx), I32:$dest,
+ [(int_wasm_memory_init (i32 timm:$seg), (i32 timm:$idx), I32:$dest,
I32:$offset, I32:$size
)],
"memory.init\t$seg, $idx, $dest, $offset, $size",
@@ -48,7 +48,7 @@ defm MEMORY_INIT :
let hasSideEffects = 1 in
defm DATA_DROP :
BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg),
- [(int_wasm_data_drop (i32 imm:$seg))],
+ [(int_wasm_data_drop (i32 timm:$seg))],
"data.drop\t$seg", "data.drop\t$seg", 0x09>;
let mayLoad = 1, mayStore = 1 in
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 1870c5bc34b0..1afc9a8790dc 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -84,49 +84,19 @@ let isTerminator = 1, isBarrier = 1 in
defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
-multiclass RETURN<WebAssemblyRegClass vt> {
- defm RETURN_#vt : I<(outs), (ins vt:$val), (outs), (ins),
- [(WebAssemblyreturn vt:$val)],
- "return \t$val", "return", 0x0f>;
- // Equivalent to RETURN_#vt, for use at the end of a function when wasm
- // semantics return by falling off the end of the block.
- let isCodeGenOnly = 1 in
- defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins vt:$val), (outs), (ins), []>;
-}
-
-multiclass SIMD_RETURN<ValueType vt> {
- defm RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
- [(WebAssemblyreturn (vt V128:$val))],
- "return \t$val", "return", 0x0f>,
- Requires<[HasSIMD128]>;
- // Equivalent to RETURN_#vt, for use at the end of a function when wasm
- // semantics return by falling off the end of the block.
- let isCodeGenOnly = 1 in
- defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
- []>,
- Requires<[HasSIMD128]>;
-}
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
let isReturn = 1 in {
- defm "": RETURN<I32>;
- defm "": RETURN<I64>;
- defm "": RETURN<F32>;
- defm "": RETURN<F64>;
- defm "": RETURN<EXNREF>;
- defm "": SIMD_RETURN<v16i8>;
- defm "": SIMD_RETURN<v8i16>;
- defm "": SIMD_RETURN<v4i32>;
- defm "": SIMD_RETURN<v2i64>;
- defm "": SIMD_RETURN<v4f32>;
- defm "": SIMD_RETURN<v2f64>;
-
- defm RETURN_VOID : NRI<(outs), (ins), [(WebAssemblyreturn)], "return", 0x0f>;
-
- // This is to RETURN_VOID what FALLTHROUGH_RETURN_#vt is to RETURN_#vt.
- let isCodeGenOnly = 1 in
- defm FALLTHROUGH_RETURN_VOID : NRI<(outs), (ins), []>;
+
+defm RETURN : I<(outs), (ins variable_ops), (outs), (ins),
+ [(WebAssemblyreturn)],
+ "return", "return", 0x0f>;
+// Equivalent to RETURN, for use at the end of a function when wasm
+// semantics return by falling off the end of the block.
+let isCodeGenOnly = 1 in
+defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>;
+
} // isReturn = 1
defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index 661fee2715ba..f3d9c5d5032c 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -171,6 +171,23 @@ defm I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
0xb1>;
} // hasSideEffects = 1
+def : Pat<(int_wasm_trunc_signed F32:$src),
+ (I32_TRUNC_S_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_unsigned F32:$src),
+ (I32_TRUNC_U_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_signed F64:$src),
+ (I32_TRUNC_S_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_unsigned F64:$src),
+ (I32_TRUNC_U_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_signed F32:$src),
+ (I64_TRUNC_S_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_unsigned F32:$src),
+ (I64_TRUNC_U_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_signed F64:$src),
+ (I64_TRUNC_S_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_unsigned F64:$src),
+ (I64_TRUNC_U_F64 F64:$src)>;
+
defm F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins),
[(set F32:$dst, (sint_to_fp I32:$src))],
"f32.convert_i32_s\t$dst, $src", "f32.convert_i32_s",
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index a86c9af28f0d..8e8126c90e72 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -38,7 +38,7 @@ WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
RI(STI.getTargetTriple()) {}
bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable(
- const MachineInstr &MI, AliasAnalysis *AA) const {
+ const MachineInstr &MI, AAResults *AA) const {
switch (MI.getOpcode()) {
case WebAssembly::CONST_I32:
case WebAssembly::CONST_I64:
@@ -60,7 +60,7 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// exist. However we need to handle both here.
auto &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
- TargetRegisterInfo::isVirtualRegister(DestReg)
+ Register::isVirtualRegister(DestReg)
? MRI.getRegClass(DestReg)
: MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(DestReg);
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index df1051b4f42c..fe6211663c31 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -43,7 +43,7 @@ public:
const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; }
bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const override;
+ AAResults *AA) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 73ddbe85d551..044901481381 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -106,7 +106,8 @@ def WebAssemblybr_table : SDNode<"WebAssemblyISD::BR_TABLE",
def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT",
SDT_WebAssemblyArgument>;
def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN",
- SDT_WebAssemblyReturn, [SDNPHasChain]>;
+ SDT_WebAssemblyReturn,
+ [SDNPHasChain, SDNPVariadic]>;
def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper",
SDT_WebAssemblyWrapper>;
def WebAssemblywrapperPIC : SDNode<"WebAssemblyISD::WrapperPIC",
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 6916b165f970..eba9b80d3286 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -37,16 +37,6 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
return (~Known0.Zero & ~Known1.Zero) == 0;
}]>;
-// GlobalAddresses are conceptually unsigned values, so we can also fold them
-// into immediate values as long as the add is 'nuw'.
-// TODO: We'd like to also match GA offsets but there are cases where the
-// register can have a negative value. Find out what more we can do.
-def regPlusGA : PatFrag<(ops node:$addr, node:$off),
- (add node:$addr, node:$off),
- [{
- return N->getFlags().hasNoUnsignedWrap();
-}]>;
-
// We don't need a regPlusES because external symbols never have constant
// offsets folded into them, so we can just use add.
@@ -93,15 +83,6 @@ def : LoadPatImmOff<i64, load, or_is_add, LOAD_I64>;
def : LoadPatImmOff<f32, load, or_is_add, LOAD_F32>;
def : LoadPatImmOff<f64, load, or_is_add, LOAD_F64>;
-class LoadPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))),
- (inst 0, tglobaladdr:$off, I32:$addr)>, Requires<[IsNotPIC]>;
-
-def : LoadPatGlobalAddr<i32, load, LOAD_I32>;
-def : LoadPatGlobalAddr<i64, load, LOAD_I64>;
-def : LoadPatGlobalAddr<f32, load, LOAD_F32>;
-def : LoadPatGlobalAddr<f64, load, LOAD_F64>;
-
// Select loads with just a constant offset.
class LoadPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>;
@@ -167,18 +148,6 @@ def : LoadPatImmOff<i64, zextloadi16, or_is_add, LOAD16_U_I64>;
def : LoadPatImmOff<i64, sextloadi32, or_is_add, LOAD32_S_I64>;
def : LoadPatImmOff<i64, zextloadi32, or_is_add, LOAD32_U_I64>;
-def : LoadPatGlobalAddr<i32, sextloadi8, LOAD8_S_I32>;
-def : LoadPatGlobalAddr<i32, zextloadi8, LOAD8_U_I32>;
-def : LoadPatGlobalAddr<i32, sextloadi16, LOAD16_S_I32>;
-def : LoadPatGlobalAddr<i32, zextloadi8, LOAD16_U_I32>;
-
-def : LoadPatGlobalAddr<i64, sextloadi8, LOAD8_S_I64>;
-def : LoadPatGlobalAddr<i64, zextloadi8, LOAD8_U_I64>;
-def : LoadPatGlobalAddr<i64, sextloadi16, LOAD16_S_I64>;
-def : LoadPatGlobalAddr<i64, zextloadi16, LOAD16_U_I64>;
-def : LoadPatGlobalAddr<i64, sextloadi32, LOAD32_S_I64>;
-def : LoadPatGlobalAddr<i64, zextloadi32, LOAD32_U_I64>;
-
// Select extending loads with just a constant offset.
def : LoadPatOffsetOnly<i32, sextloadi8, LOAD8_S_I32>;
def : LoadPatOffsetOnly<i32, zextloadi8, LOAD8_U_I32>;
@@ -224,11 +193,6 @@ def : LoadPatImmOff<i32, extloadi16, or_is_add, LOAD16_U_I32>;
def : LoadPatImmOff<i64, extloadi8, or_is_add, LOAD8_U_I64>;
def : LoadPatImmOff<i64, extloadi16, or_is_add, LOAD16_U_I64>;
def : LoadPatImmOff<i64, extloadi32, or_is_add, LOAD32_U_I64>;
-def : LoadPatGlobalAddr<i32, extloadi8, LOAD8_U_I32>;
-def : LoadPatGlobalAddr<i32, extloadi16, LOAD16_U_I32>;
-def : LoadPatGlobalAddr<i64, extloadi8, LOAD8_U_I64>;
-def : LoadPatGlobalAddr<i64, extloadi16, LOAD16_U_I64>;
-def : LoadPatGlobalAddr<i64, extloadi32, LOAD32_U_I64>;
// Select "don't care" extending loads with just a constant offset.
def : LoadPatOffsetOnly<i32, extloadi8, LOAD8_U_I32>;
@@ -282,15 +246,6 @@ def : StorePatImmOff<i64, store, or_is_add, STORE_I64>;
def : StorePatImmOff<f32, store, or_is_add, STORE_F32>;
def : StorePatImmOff<f64, store, or_is_add, STORE_F64>;
-class StorePatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind ty:$val,
- (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))),
- (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>, Requires<[IsNotPIC]>;
-def : StorePatGlobalAddr<i32, store, STORE_I32>;
-def : StorePatGlobalAddr<i64, store, STORE_I64>;
-def : StorePatGlobalAddr<f32, store, STORE_F32>;
-def : StorePatGlobalAddr<f64, store, STORE_F64>;
-
// Select stores with just a constant offset.
class StorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(kind ty:$val, imm:$off), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>;
@@ -333,12 +288,6 @@ def : StorePatImmOff<i64, truncstorei8, or_is_add, STORE8_I64>;
def : StorePatImmOff<i64, truncstorei16, or_is_add, STORE16_I64>;
def : StorePatImmOff<i64, truncstorei32, or_is_add, STORE32_I64>;
-def : StorePatGlobalAddr<i32, truncstorei8, STORE8_I32>;
-def : StorePatGlobalAddr<i32, truncstorei16, STORE16_I32>;
-def : StorePatGlobalAddr<i64, truncstorei8, STORE8_I64>;
-def : StorePatGlobalAddr<i64, truncstorei16, STORE16_I64>;
-def : StorePatGlobalAddr<i64, truncstorei32, STORE32_I64>;
-
// Select truncating stores with just a constant offset.
def : StorePatOffsetOnly<i32, truncstorei8, STORE8_I32>;
def : StorePatOffsetOnly<i32, truncstorei16, STORE16_I32>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index dd8930f079b0..fc5d73dac52e 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -40,47 +40,124 @@ def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
//===----------------------------------------------------------------------===//
// Load: v128.load
-multiclass SIMDLoad<ValueType vec_t> {
- let mayLoad = 1, UseNamedOperandTable = 1 in
- defm LOAD_#vec_t :
+let mayLoad = 1, UseNamedOperandTable = 1 in
+defm LOAD_V128 :
+ SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "v128.load\t$dst, ${off}(${addr})$p2align",
+ "v128.load\t$off$p2align", 0>;
+
+// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+def : LoadPatNoOffset<vec_t, load, LOAD_V128>;
+def : LoadPatImmOff<vec_t, load, regPlusImm, LOAD_V128>;
+def : LoadPatImmOff<vec_t, load, or_is_add, LOAD_V128>;
+def : LoadPatOffsetOnly<vec_t, load, LOAD_V128>;
+def : LoadPatGlobalAddrOffOnly<vec_t, load, LOAD_V128>;
+}
+
+// vNxM.load_splat
+multiclass SIMDLoadSplat<string vec, bits<32> simdop> {
+ let mayLoad = 1, UseNamedOperandTable = 1,
+ Predicates = [HasUnimplementedSIMD128] in
+ defm LOAD_SPLAT_#vec :
SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
- "v128.load\t$dst, ${off}(${addr})$p2align",
- "v128.load\t$off$p2align", 0>;
+ vec#".load_splat\t$dst, ${off}(${addr})$p2align",
+ vec#".load_splat\t$off$p2align", simdop>;
}
-foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
-defm "" : SIMDLoad<vec_t>;
-
-// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
-def : LoadPatNoOffset<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
-def : LoadPatImmOff<vec_t, load, regPlusImm, !cast<NI>("LOAD_"#vec_t)>;
-def : LoadPatImmOff<vec_t, load, or_is_add, !cast<NI>("LOAD_"#vec_t)>;
-def : LoadPatGlobalAddr<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
-def : LoadPatOffsetOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
-def : LoadPatGlobalAddrOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+defm "" : SIMDLoadSplat<"v8x16", 194>;
+defm "" : SIMDLoadSplat<"v16x8", 195>;
+defm "" : SIMDLoadSplat<"v32x4", 196>;
+defm "" : SIMDLoadSplat<"v64x2", 197>;
+
+def wasm_load_splat_t : SDTypeProfile<1, 1, []>;
+def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t>;
+
+foreach args = [["v16i8", "i32", "extloadi8"], ["v8i16", "i32", "extloadi16"],
+ ["v4i32", "i32", "load"], ["v2i64", "i64", "load"],
+ ["v4f32", "f32", "load"], ["v2f64", "f64", "load"]] in
+def load_splat_#args[0] :
+ PatFrag<(ops node:$addr), (wasm_load_splat
+ (!cast<ValueType>(args[1]) (!cast<PatFrag>(args[2]) node:$addr)))>;
+
+let Predicates = [HasUnimplementedSIMD128] in
+foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"],
+ ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in {
+def : LoadPatNoOffset<!cast<ValueType>(args[0]),
+ !cast<PatFrag>("load_splat_"#args[0]),
+ !cast<NI>("LOAD_SPLAT_"#args[1])>;
+def : LoadPatImmOff<!cast<ValueType>(args[0]),
+ !cast<PatFrag>("load_splat_"#args[0]),
+ regPlusImm,
+ !cast<NI>("LOAD_SPLAT_"#args[1])>;
+def : LoadPatImmOff<!cast<ValueType>(args[0]),
+ !cast<PatFrag>("load_splat_"#args[0]),
+ or_is_add,
+ !cast<NI>("LOAD_SPLAT_"#args[1])>;
+def : LoadPatOffsetOnly<!cast<ValueType>(args[0]),
+ !cast<PatFrag>("load_splat_"#args[0]),
+ !cast<NI>("LOAD_SPLAT_"#args[1])>;
+def : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]),
+ !cast<PatFrag>("load_splat_"#args[0]),
+ !cast<NI>("LOAD_SPLAT_"#args[1])>;
}
-// Store: v128.store
-multiclass SIMDStore<ValueType vec_t> {
- let mayStore = 1, UseNamedOperandTable = 1 in
- defm STORE_#vec_t :
- SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
+// Load and extend
+multiclass SIMDLoadExtend<ValueType vec_t, string name, bits<32> simdop> {
+ let mayLoad = 1, UseNamedOperandTable = 1,
+ Predicates = [HasUnimplementedSIMD128] in {
+ defm LOAD_EXTEND_S_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ name#"_s\t$dst, ${off}(${addr})$p2align",
+ name#"_s\t$off$p2align", simdop>;
+ defm LOAD_EXTEND_U_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
- "v128.store\t${off}(${addr})$p2align, $vec",
- "v128.store\t$off$p2align", 1>;
+ name#"_u\t$dst, ${off}(${addr})$p2align",
+ name#"_u\t$off$p2align", !add(simdop, 1)>;
+ }
}
-foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
-defm "" : SIMDStore<vec_t>;
+defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 210>;
+defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 212>;
+defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 214>;
+
+let Predicates = [HasUnimplementedSIMD128] in
+foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in
+foreach exts = [["sextloadv", "_S"],
+ ["zextloadv", "_U"],
+ ["extloadv", "_U"]] in {
+def : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]),
+ !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>;
+def : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm,
+ !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>;
+def : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add,
+ !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>;
+def : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
+ !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>;
+def : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
+ !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>;
+}
+
+
+// Store: v128.store
+let mayStore = 1, UseNamedOperandTable = 1 in
+defm STORE_V128 :
+ SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "v128.store\t${off}(${addr})$p2align, $vec",
+ "v128.store\t$off$p2align", 1>;
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
-def : StorePatNoOffset<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
-def : StorePatImmOff<vec_t, store, regPlusImm, !cast<NI>("STORE_"#vec_t)>;
-def : StorePatImmOff<vec_t, store, or_is_add, !cast<NI>("STORE_"#vec_t)>;
-def : StorePatGlobalAddr<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
-def : StorePatOffsetOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
-def : StorePatGlobalAddrOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatNoOffset<vec_t, store, STORE_V128>;
+def : StorePatImmOff<vec_t, store, regPlusImm, STORE_V128>;
+def : StorePatImmOff<vec_t, store, or_is_add, STORE_V128>;
+def : StorePatOffsetOnly<vec_t, store, STORE_V128>;
+def : StorePatGlobalAddrOffOnly<vec_t, store, STORE_V128>;
}
//===----------------------------------------------------------------------===//
@@ -90,7 +167,7 @@ def : StorePatGlobalAddrOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
// Constant: v128.const
multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
let isMoveImm = 1, isReMaterializable = 1,
- Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+ Predicates = [HasUnimplementedSIMD128] in
defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
[(set V128:$dst, (vec_t pat))],
"v128.const\t$dst, "#args,
@@ -198,6 +275,19 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
(i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>;
}
+// Swizzle lanes: v8x16.swizzle
+def wasm_swizzle_t : SDTypeProfile<1, 2, []>;
+def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>;
+let Predicates = [HasUnimplementedSIMD128] in
+defm SWIZZLE :
+ SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
+ [(set (v16i8 V128:$dst),
+ (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
+ "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 192>;
+
+def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)),
+ (SWIZZLE V128:$src, V128:$mask)>;
+
// Create vector with identical lanes: splat
def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>;
def splat4 : PatFrag<(ops node:$x), (build_vector
@@ -286,7 +376,7 @@ multiclass ExtractLaneExtended<string sign, bits<32> baseInst> {
}
defm "" : ExtractLaneExtended<"_s", 5>;
-let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+let Predicates = [HasUnimplementedSIMD128] in
defm "" : ExtractLaneExtended<"_u", 6>;
defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>;
defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 16>;
@@ -472,6 +562,11 @@ defm OR : SIMDBitwise<or, "or", 78>;
defm XOR : SIMDBitwise<xor, "xor", 79>;
} // isCommutable = 1
+// Bitwise logic: v128.andnot
+def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>;
+let Predicates = [HasUnimplementedSIMD128] in
+defm ANDNOT : SIMDBitwise<andnot, "andnot", 216>;
+
// Bitwise select: v128.bitselect
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
defm BITSELECT_#vec_t :
@@ -655,7 +750,7 @@ defm ABS : SIMDUnaryFP<fabs, "abs", 149>;
defm NEG : SIMDUnaryFP<fneg, "neg", 150>;
// Square root: sqrt
-let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+let Predicates = [HasUnimplementedSIMD128] in
defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 151>;
//===----------------------------------------------------------------------===//
@@ -679,7 +774,7 @@ let isCommutable = 1 in
defm MUL : SIMDBinaryFP<fmul, "mul", 156>;
// Division: div
-let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+let Predicates = [HasUnimplementedSIMD128] in
defm DIV : SIMDBinaryFP<fdiv, "div", 157>;
// NaN-propagating minimum: min
@@ -712,6 +807,42 @@ defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 172>;
defm "" : SIMDConvert<v2i64, v2f64, fp_to_sint, "i64x2.trunc_sat_f64x2_s", 173>;
defm "" : SIMDConvert<v2i64, v2f64, fp_to_uint, "i64x2.trunc_sat_f64x2_u", 174>;
+// Widening operations
+multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg,
+ bits<32> baseInst> {
+ defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_signed,
+ vec#".widen_low_"#arg#"_s", baseInst>;
+ defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_signed,
+ vec#".widen_high_"#arg#"_s", !add(baseInst, 1)>;
+ defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_unsigned,
+ vec#".widen_low_"#arg#"_u", !add(baseInst, 2)>;
+ defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_unsigned,
+ vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>;
+}
+
+defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 202>;
+defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 206>;
+
+// Narrowing operations
+multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg,
+ bits<32> baseInst> {
+ defm NARROW_S_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
+ [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_signed
+ (arg_t V128:$low), (arg_t V128:$high))))],
+ vec#".narrow_"#arg#"_s\t$dst, $low, $high", vec#".narrow_"#arg#"_s",
+ baseInst>;
+ defm NARROW_U_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
+ [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_unsigned
+ (arg_t V128:$low), (arg_t V128:$high))))],
+ vec#".narrow_"#arg#"_u\t$dst, $low, $high", vec#".narrow_"#arg#"_u",
+ !add(baseInst, 1)>;
+}
+
+defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 198>;
+defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 200>;
+
// Lower llvm.wasm.trunc.saturate.* to saturating instructions
def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
(fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>;
@@ -732,3 +863,25 @@ foreach t2 = !foldl(
)
) in
def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;
+
+//===----------------------------------------------------------------------===//
+// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS)
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> baseInst> {
+ defm QFMA_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
+ (outs), (ins),
+ [(set (vec_t V128:$dst),
+ (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],
+ vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>;
+ defm QFMS_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
+ (outs), (ins),
+ [(set (vec_t V128:$dst),
+ (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],
+ vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>;
+}
+
+defm "" : SIMDQFM<v4f32, "f32x4", 0x98>;
+defm "" : SIMDQFM<v2f64, "f64x2", 0xa3>;
diff --git a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index e92b34430272..75d04252cbe9 100644
--- a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-late-eh-prepare"
@@ -131,7 +132,7 @@ bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) {
auto InsertPos = MBB.begin();
if (InsertPos->isEHLabel()) // EH pad starts with an EH label
++InsertPos;
- unsigned DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
+ Register DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
BuildMI(MBB, InsertPos, MBB.begin()->getDebugLoc(),
TII.get(WebAssembly::CATCH), DstReg);
}
@@ -168,7 +169,7 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
if (CatchPos->isEHLabel()) // EH pad starts with an EH label
++CatchPos;
MachineInstr *Catch = &*CatchPos;
- unsigned ExnReg = Catch->getOperand(0).getReg();
+ Register ExnReg = Catch->getOperand(0).getReg();
BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
.addReg(ExnReg);
TI->eraseFromParent();
@@ -233,6 +234,7 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
// it. The pseudo instruction will be deleted later.
bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) {
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
auto *EHInfo = MF.getWasmEHFuncInfo();
SmallVector<MachineInstr *, 16> ExtractInstrs;
SmallVector<MachineInstr *, 8> ToDelete;
@@ -292,7 +294,7 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) {
// thenbb:
// %exn:i32 = extract_exception
// ... use exn ...
- unsigned ExnReg = Catch->getOperand(0).getReg();
+ Register ExnReg = Catch->getOperand(0).getReg();
auto *ThenMBB = MF.CreateMachineBasicBlock();
auto *ElseMBB = MF.CreateMachineBasicBlock();
MF.insert(std::next(MachineFunction::iterator(EHPad)), ElseMBB);
@@ -339,9 +341,11 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) {
WebAssembly::ClangCallTerminateFn);
assert(ClangCallTerminateFn &&
"There is no __clang_call_terminate() function");
+ Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ BuildMI(ElseMBB, DL, TII.get(WebAssembly::CONST_I32), Reg).addImm(0);
BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL_VOID))
.addGlobalAddress(ClangCallTerminateFn)
- .addImm(0);
+ .addReg(Reg);
BuildMI(ElseMBB, DL, TII.get(WebAssembly::UNREACHABLE));
} else {
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 34a8195ac4b4..4314aa611549 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -68,7 +68,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
if (MI->getOpcode() != WebAssembly::BR_UNLESS)
continue;
- unsigned Cond = MI->getOperand(1).getReg();
+ Register Cond = MI->getOperand(1).getReg();
bool Inverted = false;
// Attempt to invert the condition in place.
@@ -188,7 +188,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
// If we weren't able to invert the condition in place. Insert an
// instruction to invert it.
if (!Inverted) {
- unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ Register Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp)
.addReg(Cond);
MFI.stackifyVReg(Tmp);
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 960d5134f6e9..1cf397dd060b 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -227,15 +227,6 @@ static cl::list<std::string>
namespace {
class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
- static const char *ResumeFName;
- static const char *EHTypeIDFName;
- static const char *EmLongjmpFName;
- static const char *EmLongjmpJmpbufFName;
- static const char *SaveSetjmpFName;
- static const char *TestSetjmpFName;
- static const char *FindMatchingCatchPrefix;
- static const char *InvokePrefix;
-
bool EnableEH; // Enable exception handling
bool EnableSjLj; // Enable setjmp/longjmp handling
@@ -274,6 +265,7 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); }
bool canLongjmp(Module &M, const Value *Callee) const;
+ bool isEmAsmCall(Module &M, const Value *Callee) const;
void rebuildSSA(Function &F);
@@ -292,19 +284,6 @@ public:
};
} // End anonymous namespace
-const char *WebAssemblyLowerEmscriptenEHSjLj::ResumeFName = "__resumeException";
-const char *WebAssemblyLowerEmscriptenEHSjLj::EHTypeIDFName =
- "llvm_eh_typeid_for";
-const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpFName =
- "emscripten_longjmp";
-const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpJmpbufFName =
- "emscripten_longjmp_jmpbuf";
-const char *WebAssemblyLowerEmscriptenEHSjLj::SaveSetjmpFName = "saveSetjmp";
-const char *WebAssemblyLowerEmscriptenEHSjLj::TestSetjmpFName = "testSetjmp";
-const char *WebAssemblyLowerEmscriptenEHSjLj::FindMatchingCatchPrefix =
- "__cxa_find_matching_catch_";
-const char *WebAssemblyLowerEmscriptenEHSjLj::InvokePrefix = "__invoke_";
-
char WebAssemblyLowerEmscriptenEHSjLj::ID = 0;
INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE,
"WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp",
@@ -335,7 +314,8 @@ static bool canThrow(const Value *V) {
static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB,
const char *Name) {
- auto* GV = dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, IRB.getInt32Ty()));
+ auto *GV =
+ dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, IRB.getInt32Ty()));
if (!GV)
report_fatal_error(Twine("unable to create global: ") + Name);
@@ -376,9 +356,9 @@ WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M,
PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy);
FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false);
- Function *F =
- Function::Create(FTy, GlobalValue::ExternalLinkage,
- FindMatchingCatchPrefix + Twine(NumClauses + 2), &M);
+ Function *F = Function::Create(
+ FTy, GlobalValue::ExternalLinkage,
+ "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M);
FindMatchingCatches[NumClauses] = F;
return F;
}
@@ -418,7 +398,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
Args.append(CI->arg_begin(), CI->arg_end());
CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args);
NewCall->takeName(CI);
- NewCall->setCallingConv(CI->getCallingConv());
+ NewCall->setCallingConv(CallingConv::WASM_EmscriptenInvoke);
NewCall->setDebugLoc(CI->getDebugLoc());
// Because we added the pointer to the callee as first argument, all
@@ -432,9 +412,22 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
for (unsigned I = 0, E = CI->getNumArgOperands(); I < E; ++I)
ArgAttributes.push_back(InvokeAL.getParamAttributes(I));
+ AttrBuilder FnAttrs(InvokeAL.getFnAttributes());
+ if (FnAttrs.contains(Attribute::AllocSize)) {
+ // The allocsize attribute (if any) referes to parameters by index and needs
+ // to be adjusted.
+ unsigned SizeArg;
+ Optional<unsigned> NEltArg;
+ std::tie(SizeArg, NEltArg) = FnAttrs.getAllocSizeArgs();
+ SizeArg += 1;
+ if (NEltArg.hasValue())
+ NEltArg = NEltArg.getValue() + 1;
+ FnAttrs.addAllocSizeAttr(SizeArg, NEltArg);
+ }
+
// Reconstruct the AttributesList based on the vector we constructed.
AttributeList NewCallAL =
- AttributeList::get(C, InvokeAL.getFnAttributes(),
+ AttributeList::get(C, AttributeSet::get(C, FnAttrs),
InvokeAL.getRetAttributes(), ArgAttributes);
NewCall->setAttributes(NewCallAL);
@@ -473,8 +466,8 @@ Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) {
FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys,
CalleeFTy->isVarArg());
- Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage,
- InvokePrefix + Sig, M);
+ Function *F =
+ Function::Create(FTy, GlobalValue::ExternalLinkage, "__invoke_" + Sig, M);
InvokeWrappers[Sig] = F;
return F;
}
@@ -491,39 +484,44 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
// and can't be passed by pointer. The result is a crash with illegal IR.
if (isa<InlineAsm>(Callee))
return false;
+ StringRef CalleeName = Callee->getName();
// The reason we include malloc/free here is to exclude the malloc/free
// calls generated in setjmp prep / cleanup routines.
- Function *SetjmpF = M.getFunction("setjmp");
- Function *MallocF = M.getFunction("malloc");
- Function *FreeF = M.getFunction("free");
- if (Callee == SetjmpF || Callee == MallocF || Callee == FreeF)
+ if (CalleeName == "setjmp" || CalleeName == "malloc" || CalleeName == "free")
return false;
// There are functions in JS glue code
- if (Callee == ResumeF || Callee == EHTypeIDF || Callee == SaveSetjmpF ||
- Callee == TestSetjmpF)
+ if (CalleeName == "__resumeException" || CalleeName == "llvm_eh_typeid_for" ||
+ CalleeName == "saveSetjmp" || CalleeName == "testSetjmp" ||
+ CalleeName == "getTempRet0" || CalleeName == "setTempRet0")
return false;
// __cxa_find_matching_catch_N functions cannot longjmp
- if (Callee->getName().startswith(FindMatchingCatchPrefix))
+ if (Callee->getName().startswith("__cxa_find_matching_catch_"))
return false;
// Exception-catching related functions
- Function *BeginCatchF = M.getFunction("__cxa_begin_catch");
- Function *EndCatchF = M.getFunction("__cxa_end_catch");
- Function *AllocExceptionF = M.getFunction("__cxa_allocate_exception");
- Function *ThrowF = M.getFunction("__cxa_throw");
- Function *TerminateF = M.getFunction("__clang_call_terminate");
- if (Callee == BeginCatchF || Callee == EndCatchF ||
- Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF ||
- Callee == GetTempRet0Func || Callee == SetTempRet0Func)
+ if (CalleeName == "__cxa_begin_catch" || CalleeName == "__cxa_end_catch" ||
+ CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" ||
+ CalleeName == "__clang_call_terminate")
return false;
// Otherwise we don't know
return true;
}
+bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M,
+ const Value *Callee) const {
+ StringRef CalleeName = Callee->getName();
+ // This is an exhaustive list from Emscripten's <emscripten/em_asm.h>.
+ return CalleeName == "emscripten_asm_const_int" ||
+ CalleeName == "emscripten_asm_const_double" ||
+ CalleeName == "emscripten_asm_const_int_sync_on_main_thread" ||
+ CalleeName == "emscripten_asm_const_double_sync_on_main_thread" ||
+ CalleeName == "emscripten_asm_const_async_on_main_thread";
+}
+
// Generate testSetjmp function call seqence with preamble and postamble.
// The code this generates is equivalent to the following JavaScript code:
// if (%__THREW__.val != 0 & threwValue != 0) {
@@ -605,15 +603,12 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
SSAUpdater SSA;
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
+ SSA.Initialize(I.getType(), I.getName());
+ SSA.AddAvailableValue(&BB, &I);
for (auto UI = I.use_begin(), UE = I.use_end(); UI != UE;) {
Use &U = *UI;
++UI;
- SSA.Initialize(I.getType(), I.getName());
- SSA.AddAvailableValue(&BB, &I);
auto *User = cast<Instruction>(U.getUser());
- if (User->getParent() == &BB)
- continue;
-
if (auto *UserPN = dyn_cast<PHINode>(User))
if (UserPN->getIncomingBlock(U) == &BB)
continue;
@@ -660,13 +655,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
FunctionType *ResumeFTy =
FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false);
ResumeF = Function::Create(ResumeFTy, GlobalValue::ExternalLinkage,
- ResumeFName, &M);
+ "__resumeException", &M);
// Register llvm_eh_typeid_for function
FunctionType *EHTypeIDTy =
FunctionType::get(IRB.getInt32Ty(), IRB.getInt8PtrTy(), false);
EHTypeIDF = Function::Create(EHTypeIDTy, GlobalValue::ExternalLinkage,
- EHTypeIDFName, &M);
+ "llvm_eh_typeid_for", &M);
for (Function &F : M) {
if (F.isDeclaration())
@@ -684,7 +679,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
// defined in JS code
EmLongjmpJmpbufF = Function::Create(LongjmpF->getFunctionType(),
GlobalValue::ExternalLinkage,
- EmLongjmpJmpbufFName, &M);
+ "emscripten_longjmp_jmpbuf", &M);
LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF);
}
@@ -697,19 +692,19 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
IRB.getInt32Ty()};
FunctionType *FTy =
FunctionType::get(Type::getInt32PtrTy(C), Params, false);
- SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
- SaveSetjmpFName, &M);
+ SaveSetjmpF =
+ Function::Create(FTy, GlobalValue::ExternalLinkage, "saveSetjmp", &M);
// Register testSetjmp function
Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()};
FTy = FunctionType::get(IRB.getInt32Ty(), Params, false);
- TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
- TestSetjmpFName, &M);
+ TestSetjmpF =
+ Function::Create(FTy, GlobalValue::ExternalLinkage, "testSetjmp", &M);
FTy = FunctionType::get(IRB.getVoidTy(),
{IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
EmLongjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
- EmLongjmpFName, &M);
+ "emscripten_longjmp", &M);
// Only traverse functions that uses setjmp in order not to insert
// unnecessary prep / cleanup code in every function
@@ -970,10 +965,16 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
const Value *Callee = CI->getCalledValue();
if (!canLongjmp(M, Callee))
continue;
+ if (isEmAsmCall(M, Callee))
+ report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " +
+ F.getName() +
+ ". Please consider using EM_JS, or move the "
+ "EM_ASM into another function.",
+ false);
Value *Threw = nullptr;
BasicBlock *Tail;
- if (Callee->getName().startswith(InvokePrefix)) {
+ if (Callee->getName().startswith("__invoke_")) {
// If invoke wrapper has already been generated for this call in
// previous EH phase, search for the load instruction
// %__THREW__.val = __THREW__;
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
index 494d3fadbc8c..750b2233e67a 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -94,7 +94,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
break; // Found a null terminator, skip the rest.
Constant *Associated = CS->getOperand(2);
- Associated = cast<Constant>(Associated->stripPointerCastsNoFollowAliases());
+ Associated = cast<Constant>(Associated->stripPointerCasts());
DtorFuncs[PriorityValue][Associated].push_back(DtorFunc);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 288b991ae2c5..59c10243c545 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -79,7 +79,7 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
// Clang-provided symbols.
if (strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0 ||
strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0 ||
- strcmp(Name, "__tls_size") == 0) {
+ strcmp(Name, "__tls_size") == 0 || strcmp(Name, "__tls_align") == 0) {
bool Mutable =
strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0;
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
@@ -115,7 +115,7 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
getLibcallSignature(Subtarget, Name, Returns, Params);
}
auto Signature =
- make_unique<wasm::WasmSignature>(std::move(Returns), std::move(Params));
+ std::make_unique<wasm::WasmSignature>(std::move(Returns), std::move(Params));
WasmSym->setSignature(Signature.get());
Printer.addSignature(std::move(Signature));
@@ -163,6 +163,21 @@ MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
return MCOperand::createExpr(Expr);
}
+MCOperand WebAssemblyMCInstLower::lowerTypeIndexOperand(
+ SmallVector<wasm::ValType, 1> &&Returns,
+ SmallVector<wasm::ValType, 4> &&Params) const {
+ auto Signature = std::make_unique<wasm::WasmSignature>(std::move(Returns),
+ std::move(Params));
+ MCSymbol *Sym = Printer.createTempSymbol("typeindex");
+ auto *WasmSym = cast<MCSymbolWasm>(Sym);
+ WasmSym->setSignature(Signature.get());
+ Printer.addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx);
+ return MCOperand::createExpr(Expr);
+}
+
// Return the WebAssembly type associated with the given register class.
static wasm::ValType getType(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
@@ -178,6 +193,16 @@ static wasm::ValType getType(const TargetRegisterClass *RC) {
llvm_unreachable("Unexpected register class");
}
+static void getFunctionReturns(const MachineInstr *MI,
+ SmallVectorImpl<wasm::ValType> &Returns) {
+ const Function &F = MI->getMF()->getFunction();
+ const TargetMachine &TM = MI->getMF()->getTarget();
+ Type *RetTy = F.getReturnType();
+ SmallVector<MVT, 4> CallerRetTys;
+ computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
+ valTypesFromMVTs(CallerRetTys, Returns);
+}
+
void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
@@ -208,8 +233,6 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
if (I < Desc.NumOperands) {
const MCOperandInfo &Info = Desc.OpInfo[I];
if (Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
- MCSymbol *Sym = Printer.createTempSymbol("typeindex");
-
SmallVector<wasm::ValType, 4> Returns;
SmallVector<wasm::ValType, 4> Params;
@@ -226,17 +249,23 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
if (WebAssembly::isCallIndirect(MI->getOpcode()))
Params.pop_back();
- auto *WasmSym = cast<MCSymbolWasm>(Sym);
- auto Signature = make_unique<wasm::WasmSignature>(std::move(Returns),
- std::move(Params));
- WasmSym->setSignature(Signature.get());
- Printer.addSignature(std::move(Signature));
- WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ // return_call_indirect instructions have the return type of the
+ // caller
+ if (MI->getOpcode() == WebAssembly::RET_CALL_INDIRECT)
+ getFunctionReturns(MI, Returns);
- const MCExpr *Expr = MCSymbolRefExpr::create(
- WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx);
- MCOp = MCOperand::createExpr(Expr);
+ MCOp = lowerTypeIndexOperand(std::move(Returns), std::move(Params));
break;
+ } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) {
+ auto BT = static_cast<WebAssembly::BlockType>(MO.getImm());
+ assert(BT != WebAssembly::BlockType::Invalid);
+ if (BT == WebAssembly::BlockType::Multivalue) {
+ SmallVector<wasm::ValType, 1> Returns;
+ getFunctionReturns(MI, Returns);
+ MCOp = lowerTypeIndexOperand(std::move(Returns),
+ SmallVector<wasm::ValType, 4>());
+ break;
+ }
}
}
MCOp = MCOperand::createImm(MO.getImm());
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index 2c375a01a7f5..d79c54097eb7 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Compiler.h"
@@ -33,6 +34,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+ MCOperand lowerTypeIndexOperand(SmallVector<wasm::ValType, 1> &&,
+ SmallVector<wasm::ValType, 4> &&) const;
public:
WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer)
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index d31c1226bfdb..e4cc2389147b 100644
--- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -49,10 +49,12 @@ void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F,
computeLegalValueVTs(F, TM, Ty->getReturnType(), Results);
MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits());
- if (Results.size() > 1) {
- // WebAssembly currently can't lower returns of multiple values without
- // demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So
- // replace multiple return values with a pointer parameter.
+ if (Results.size() > 1 &&
+ !TM.getSubtarget<WebAssemblySubtarget>(F).hasMultivalue()) {
+ // WebAssembly can't lower returns of multiple values without demoting to
+ // sret unless multivalue is enabled (see
+ // WebAssemblyTargetLowering::CanLowerReturn). So replace multiple return
+ // values with a poitner parameter.
Results.clear();
Params.push_back(PtrVT);
}
@@ -72,7 +74,7 @@ void llvm::valTypesFromMVTs(const ArrayRef<MVT> &In,
std::unique_ptr<wasm::WasmSignature>
llvm::signatureFromMVTs(const SmallVectorImpl<MVT> &Results,
const SmallVectorImpl<MVT> &Params) {
- auto Sig = make_unique<wasm::WasmSignature>();
+ auto Sig = std::make_unique<wasm::WasmSignature>();
valTypesFromMVTs(Results, Sig->Returns);
valTypesFromMVTs(Params, Sig->Params);
return Sig;
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 4b9ba491dee6..16e2f4392984 100644
--- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -96,13 +96,18 @@ public:
void stackifyVReg(unsigned VReg) {
assert(MF.getRegInfo().getUniqueVRegDef(VReg));
- auto I = TargetRegisterInfo::virtReg2Index(VReg);
+ auto I = Register::virtReg2Index(VReg);
if (I >= VRegStackified.size())
VRegStackified.resize(I + 1);
VRegStackified.set(I);
}
+ void unstackifyVReg(unsigned VReg) {
+ auto I = Register::virtReg2Index(VReg);
+ if (I < VRegStackified.size())
+ VRegStackified.reset(I);
+ }
bool isVRegStackified(unsigned VReg) const {
- auto I = TargetRegisterInfo::virtReg2Index(VReg);
+ auto I = Register::virtReg2Index(VReg);
if (I >= VRegStackified.size())
return false;
return VRegStackified.test(I);
@@ -111,12 +116,12 @@ public:
void initWARegs();
void setWAReg(unsigned VReg, unsigned WAReg) {
assert(WAReg != UnusedReg);
- auto I = TargetRegisterInfo::virtReg2Index(VReg);
+ auto I = Register::virtReg2Index(VReg);
assert(I < WARegs.size());
WARegs[I] = WAReg;
}
unsigned getWAReg(unsigned VReg) const {
- auto I = TargetRegisterInfo::virtReg2Index(VReg);
+ auto I = Register::virtReg2Index(VReg);
assert(I < WARegs.size());
return WARegs[I];
}
diff --git a/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp b/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
index 7ac0511c28b0..ac428fcc826a 100644
--- a/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
@@ -166,8 +166,8 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
if (!LibInfo.getLibFunc(Name, Func))
return false;
- unsigned FromReg = MI.getOperand(2).getReg();
- unsigned ToReg = MI.getOperand(0).getReg();
+ Register FromReg = MI.getOperand(2).getReg();
+ Register ToReg = MI.getOperand(0).getReg();
if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
report_fatal_error("Memory Intrinsic results: call to builtin function "
"with wrong signature, from/to mismatch");
@@ -184,7 +184,8 @@ bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
auto &MDT = getAnalysis<MachineDominatorTree>();
const WebAssemblyTargetLowering &TLI =
*MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
- const auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ const auto &LibInfo =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(MF.getFunction());
auto &LIS = getAnalysis<LiveIntervals>();
bool Changed = false;
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 8c7c3305c201..0bd30791e57c 100644
--- a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -81,7 +81,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
// Split multiple-VN LiveIntervals into multiple LiveIntervals.
SmallVector<LiveInterval *, 4> SplitLIs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index d20352259e07..9b60596e42b4 100644
--- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -64,11 +64,8 @@ void OptimizeReturned::visitCallSite(CallSite CS) {
if (isa<Constant>(Arg))
continue;
// Like replaceDominatedUsesWith but using Instruction/Use dominance.
- for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) {
- Use &U = *UI++;
- if (DT->dominates(Inst, U))
- U.set(Inst);
- }
+ Arg->replaceUsesWithIf(Inst,
+ [&](Use &U) { return DT->dominates(Inst, U); });
}
}
diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index e11cdeaa0e79..ea6cd09a604c 100644
--- a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -63,7 +63,7 @@ static bool maybeRewriteToDrop(unsigned OldReg, unsigned NewReg,
bool Changed = false;
if (OldReg == NewReg) {
Changed = true;
- unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
MO.setReg(NewReg);
MO.setIsDead();
MFI.stackifyVReg(NewReg);
@@ -75,9 +75,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
const MachineFunction &MF,
WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI,
- const WebAssemblyInstrInfo &TII,
- unsigned FallthroughOpc,
- unsigned CopyLocalOpc) {
+ const WebAssemblyInstrInfo &TII) {
if (DisableWebAssemblyFallthroughReturnOpt)
return false;
if (&MBB != &MF.back())
@@ -90,13 +88,36 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
if (&MI != &*End)
return false;
- if (FallthroughOpc != WebAssembly::FALLTHROUGH_RETURN_VOID) {
- // If the operand isn't stackified, insert a COPY to read the operand and
- // stackify it.
- MachineOperand &MO = MI.getOperand(0);
- unsigned Reg = MO.getReg();
+ for (auto &MO : MI.explicit_operands()) {
+ // If the operand isn't stackified, insert a COPY to read the operands and
+ // stackify them.
+ Register Reg = MO.getReg();
if (!MFI.isVRegStackified(Reg)) {
- unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+ unsigned CopyLocalOpc;
+ const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
+ switch (RegClass->getID()) {
+ case WebAssembly::I32RegClassID:
+ CopyLocalOpc = WebAssembly::COPY_I32;
+ break;
+ case WebAssembly::I64RegClassID:
+ CopyLocalOpc = WebAssembly::COPY_I64;
+ break;
+ case WebAssembly::F32RegClassID:
+ CopyLocalOpc = WebAssembly::COPY_F32;
+ break;
+ case WebAssembly::F64RegClassID:
+ CopyLocalOpc = WebAssembly::COPY_F64;
+ break;
+ case WebAssembly::V128RegClassID:
+ CopyLocalOpc = WebAssembly::COPY_V128;
+ break;
+ case WebAssembly::EXNREFRegClassID:
+ CopyLocalOpc = WebAssembly::COPY_EXNREF;
+ break;
+ default:
+ llvm_unreachable("Unexpected register class for return operand");
+ }
+ Register NewReg = MRI.createVirtualRegister(RegClass);
BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg)
.addReg(Reg);
MO.setReg(NewReg);
@@ -104,8 +125,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
}
}
- // Rewrite the return.
- MI.setDesc(TII.get(FallthroughOpc));
+ MI.setDesc(TII.get(WebAssembly::FALLTHROUGH_RETURN));
return true;
}
@@ -120,7 +140,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
const WebAssemblyTargetLowering &TLI =
*MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
- auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &LibInfo =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(MF.getFunction());
bool Changed = false;
for (auto &MBB : MF)
@@ -143,8 +164,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
report_fatal_error("Peephole: call to builtin function with "
"wrong signature, not consuming reg");
MachineOperand &MO = MI.getOperand(0);
- unsigned OldReg = MO.getReg();
- unsigned NewReg = Op2.getReg();
+ Register OldReg = MO.getReg();
+ Register NewReg = Op2.getReg();
if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg))
report_fatal_error("Peephole: call to builtin function with "
@@ -156,60 +177,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
break;
}
// Optimize away an explicit void return at the end of the function.
- case WebAssembly::RETURN_I32:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I32,
- WebAssembly::COPY_I32);
- break;
- case WebAssembly::RETURN_I64:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I64,
- WebAssembly::COPY_I64);
- break;
- case WebAssembly::RETURN_F32:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F32,
- WebAssembly::COPY_F32);
- break;
- case WebAssembly::RETURN_F64:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64,
- WebAssembly::COPY_F64);
- break;
- case WebAssembly::RETURN_v16i8:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v16i8,
- WebAssembly::COPY_V128);
- break;
- case WebAssembly::RETURN_v8i16:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v8i16,
- WebAssembly::COPY_V128);
- break;
- case WebAssembly::RETURN_v4i32:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32,
- WebAssembly::COPY_V128);
- break;
- case WebAssembly::RETURN_v2i64:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2i64,
- WebAssembly::COPY_V128);
- break;
- case WebAssembly::RETURN_v4f32:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32,
- WebAssembly::COPY_V128);
- break;
- case WebAssembly::RETURN_v2f64:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2f64,
- WebAssembly::COPY_V128);
- break;
- case WebAssembly::RETURN_VOID:
- Changed |= maybeRewriteToFallthrough(
- MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID,
- WebAssembly::INSTRUCTION_LIST_END);
+ case WebAssembly::RETURN:
+ Changed |= maybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII);
break;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index 3bfbf607344d..799b9388097c 100644
--- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -95,7 +95,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
// TODO: This is fairly heavy-handed; find a better approach.
//
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
// Skip unused registers.
if (MRI.use_nodbg_empty(Reg))
diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index 6f09c45b6642..043b6f1b7d18 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -98,7 +98,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Interesting register intervals:\n");
for (unsigned I = 0; I < NumVRegs; ++I) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned VReg = Register::index2VirtReg(I);
if (MFI.isVRegStackified(VReg))
continue;
// Skip unused registers, which can use $drop.
@@ -157,9 +157,8 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
Changed |= Old != New;
UsedColors.set(Color);
Assignments[Color].push_back(LI);
- LLVM_DEBUG(
- dbgs() << "Assigning vreg" << TargetRegisterInfo::virtReg2Index(LI->reg)
- << " to vreg" << TargetRegisterInfo::virtReg2Index(New) << "\n");
+ LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg)
+ << " to vreg" << Register::virtReg2Index(New) << "\n");
}
if (!Changed)
return false;
diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index cdca23f55b29..72e7a7cf5042 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -89,7 +89,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
// Start the numbering for locals after the arg regs
unsigned CurReg = MFI.getParams().size();
for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(VRegIdx);
+ unsigned VReg = Register::index2VirtReg(VRegIdx);
// Skip unused registers.
if (MRI.use_empty(VReg))
continue;
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index a120a6471014..421d353a89e8 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -120,7 +120,7 @@ static void convertImplicitDefToConstZero(MachineInstr *MI,
Type::getDoubleTy(MF.getFunction().getContext())));
MI->addOperand(MachineOperand::CreateFPImm(Val));
} else if (RegClass == &WebAssembly::V128RegClass) {
- unsigned TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32));
MI->addOperand(MachineOperand::CreateReg(TempReg, false));
MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
@@ -334,14 +334,14 @@ static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
for (const MachineOperand &MO : Def->operands()) {
if (!MO.isReg() || MO.isUndef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// If the register is dead here and at Insert, ignore it.
if (MO.isDead() && Insert->definesRegister(Reg) &&
!Insert->readsRegister(Reg))
continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
// Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
// from moving down, and we've already checked for that.
if (Reg == WebAssembly::ARGUMENTS)
@@ -436,8 +436,8 @@ static bool oneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
const MachineOperand &MO = UseInst->getOperand(0);
if (!MO.isReg())
return false;
- unsigned DefReg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
+ Register DefReg = MO.getReg();
+ if (!Register::isVirtualRegister(DefReg) ||
!MFI.isVRegStackified(DefReg))
return false;
assert(MRI.hasOneNonDBGUse(DefReg));
@@ -499,7 +499,7 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
} else {
// The register may have unrelated uses or defs; create a new register for
// just our one def and use so that we can stackify it.
- unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+ Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
Def->getOperand(0).setReg(NewReg);
Op.setReg(NewReg);
@@ -535,7 +535,7 @@ static MachineInstr *rematerializeCheapDef(
WebAssemblyDebugValueManager DefDIs(&Def);
- unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+ Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
Op.setReg(NewReg);
MachineInstr *Clone = &*std::prev(Insert);
@@ -607,8 +607,8 @@ static MachineInstr *moveAndTeeForMultiUse(
// Create the Tee and attach the registers.
const auto *RegClass = MRI.getRegClass(Reg);
- unsigned TeeReg = MRI.createVirtualRegister(RegClass);
- unsigned DefReg = MRI.createVirtualRegister(RegClass);
+ Register TeeReg = MRI.createVirtualRegister(RegClass);
+ Register DefReg = MRI.createVirtualRegister(RegClass);
MachineOperand &DefMO = Def->getOperand(0);
MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
TII->get(getTeeOpcode(RegClass)), TeeReg)
@@ -807,11 +807,11 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
if (!Op.isReg())
continue;
- unsigned Reg = Op.getReg();
+ Register Reg = Op.getReg();
assert(Op.isUse() && "explicit_uses() should only iterate over uses");
assert(!Op.isImplicit() &&
"explicit_uses() should only iterate over explicit operands");
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
// Identify the definition for this register at this point.
@@ -915,7 +915,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
for (MachineOperand &MO : reverse(MI.explicit_operands())) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (MFI.isVRegStackified(Reg)) {
if (MO.isDef())
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index ea9cfc00adfd..789a025794ea 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -91,8 +91,8 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
if (MI.getOpcode() == WebAssembly::ADD_I32) {
MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum);
if (OtherMO.isReg()) {
- unsigned OtherMOReg = OtherMO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(OtherMOReg)) {
+ Register OtherMOReg = OtherMO.getReg();
+ if (Register::isVirtualRegister(OtherMOReg)) {
MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg);
// TODO: For now we just opportunistically do this in the case where
// the CONST_I32 happens to have exactly one def and one use. We
@@ -117,7 +117,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
// Create i32.add SP, offset and make it the operand.
const TargetRegisterClass *PtrRC =
MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
- unsigned OffsetOp = MRI.createVirtualRegister(PtrRC);
+ Register OffsetOp = MRI.createVirtualRegister(PtrRC);
BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32),
OffsetOp)
.addImm(FrameOffset);
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 7e65368e671a..bdf5fe2620a4 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -140,7 +140,7 @@ WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU,
std::string FS) const {
auto &I = SubtargetMap[CPU + FS];
if (!I) {
- I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
+ I = std::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
}
return I.get();
}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 46ef765ce0f4..1c53e90daea7 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -25,10 +25,11 @@ WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
return TargetTransformInfo::PSK_FastHardware;
}
-unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) {
- unsigned Result = BaseT::getNumberOfRegisters(Vector);
+unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
+ unsigned Result = BaseT::getNumberOfRegisters(ClassID);
// For SIMD, use at least 16 registers, as a rough guess.
+ bool Vector = (ClassID == 1);
if (Vector)
Result = std::max(Result, 16u);
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 1b11b4b631eb..f0ecc73e91de 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -53,7 +53,7 @@ public:
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector);
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index e9d88d4818a5..a237da8154ab 100644
--- a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -32,9 +32,8 @@ bool WebAssembly::isChild(const MachineInstr &MI,
const MachineOperand &MO = MI.getOperand(0);
if (!MO.isReg() || MO.isImplicit() || !MO.isDef())
return false;
- unsigned Reg = MO.getReg();
- return TargetRegisterInfo::isVirtualRegister(Reg) &&
- MFI.isVRegStackified(Reg);
+ Register Reg = MO.getReg();
+ return Register::isVirtualRegister(Reg) && MFI.isVRegStackified(Reg);
}
bool WebAssembly::mayThrow(const MachineInstr &MI) {
@@ -51,7 +50,21 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) {
return false;
const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode()));
- assert(MO.isGlobal());
+ assert(MO.isGlobal() || MO.isSymbol());
+
+ if (MO.isSymbol()) {
+ // Some intrinsics are lowered to calls to external symbols, which are then
+ // lowered to calls to library functions. Most of libcalls don't throw, but
+ // we only list some of them here now.
+ // TODO Consider adding 'nounwind' info in TargetLowering::CallLoweringInfo
+ // instead for more accurate info.
+ const char *Name = MO.getSymbolName();
+ if (strcmp(Name, "memcpy") == 0 || strcmp(Name, "memmove") == 0 ||
+ strcmp(Name, "memset") == 0)
+ return false;
+ return true;
+ }
+
const auto *F = dyn_cast<Function>(MO.getGlobal());
if (!F)
return true;
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 95cbf46d37ed..25be79ec2b1e 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -870,6 +870,14 @@ private:
bool parseDirectiveFPOEndProc(SMLoc L);
bool parseDirectiveFPOData(SMLoc L);
+ /// SEH directives.
+ bool parseSEHRegisterNumber(unsigned RegClassID, unsigned &RegNo);
+ bool parseDirectiveSEHPushReg(SMLoc);
+ bool parseDirectiveSEHSetFrame(SMLoc);
+ bool parseDirectiveSEHSaveReg(SMLoc);
+ bool parseDirectiveSEHSaveXMM(SMLoc);
+ bool parseDirectiveSEHPushFrame(SMLoc);
+
unsigned checkTargetMatchPredicate(MCInst &Inst) override;
bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
@@ -955,6 +963,8 @@ private:
public:
enum X86MatchResultTy {
Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "X86GenAsmMatcher.inc"
};
X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
@@ -3173,6 +3183,13 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
EmitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
+ case Match_InvalidImmUnsignedi4: {
+ SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
+ EmptyRange, MatchingInlineAsm);
+ }
case Match_MissingFeature:
return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
case Match_InvalidOperand:
@@ -3520,6 +3537,15 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
MatchingInlineAsm);
}
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_InvalidImmUnsignedi4) == 1) {
+ SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
+ EmptyRange, MatchingInlineAsm);
+ }
+
// If all of these were an outright failure, report it in a useless way.
return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
MatchingInlineAsm);
@@ -3572,6 +3598,16 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_endproc")
return parseDirectiveFPOEndProc(DirectiveID.getLoc());
+ else if (IDVal == ".seh_pushreg")
+ return parseDirectiveSEHPushReg(DirectiveID.getLoc());
+ else if (IDVal == ".seh_setframe")
+ return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
+ else if (IDVal == ".seh_savereg")
+ return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
+ else if (IDVal == ".seh_savexmm")
+ return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
+ else if (IDVal == ".seh_pushframe")
+ return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
return true;
}
@@ -3708,6 +3744,140 @@ bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
return getTargetStreamer().emitFPOEndProc(L);
}
+bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
+ unsigned &RegNo) {
+ SMLoc startLoc = getLexer().getLoc();
+ const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+
+ // Try parsing the argument as a register first.
+ if (getLexer().getTok().isNot(AsmToken::Integer)) {
+ SMLoc endLoc;
+ if (ParseRegister(RegNo, startLoc, endLoc))
+ return true;
+
+ if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
+ return Error(startLoc,
+ "register is not supported for use with this directive");
+ }
+ } else {
+ // Otherwise, an integer number matching the encoding of the desired
+ // register may appear.
+ int64_t EncodedReg;
+ if (getParser().parseAbsoluteExpression(EncodedReg))
+ return true;
+
+ // The SEH register number is the same as the encoding register number. Map
+ // from the encoding back to the LLVM register number.
+ RegNo = 0;
+ for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
+ if (MRI->getEncodingValue(Reg) == EncodedReg) {
+ RegNo = Reg;
+ break;
+ }
+ }
+ if (RegNo == 0) {
+ return Error(startLoc,
+ "incorrect register number for use with this directive");
+ }
+ }
+
+ return false;
+}
+
+bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
+ unsigned Reg = 0;
+ if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ getParser().Lex();
+ getStreamer().EmitWinCFIPushReg(Reg, Loc);
+ return false;
+}
+
+bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
+ unsigned Reg = 0;
+ int64_t Off;
+ if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
+ return true;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify a stack pointer offset");
+
+ getParser().Lex();
+ if (getParser().parseAbsoluteExpression(Off))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ getParser().Lex();
+ getStreamer().EmitWinCFISetFrame(Reg, Off, Loc);
+ return false;
+}
+
+bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
+ unsigned Reg = 0;
+ int64_t Off;
+ if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
+ return true;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify an offset on the stack");
+
+ getParser().Lex();
+ if (getParser().parseAbsoluteExpression(Off))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ getParser().Lex();
+ getStreamer().EmitWinCFISaveReg(Reg, Off, Loc);
+ return false;
+}
+
+bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
+ unsigned Reg = 0;
+ int64_t Off;
+ if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
+ return true;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify an offset on the stack");
+
+ getParser().Lex();
+ if (getParser().parseAbsoluteExpression(Off))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ getParser().Lex();
+ getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc);
+ return false;
+}
+
+bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
+ bool Code = false;
+ StringRef CodeID;
+ if (getLexer().is(AsmToken::At)) {
+ SMLoc startLoc = getLexer().getLoc();
+ getParser().Lex();
+ if (!getParser().parseIdentifier(CodeID)) {
+ if (CodeID != "code")
+ return Error(startLoc, "expected @code");
+ Code = true;
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ getParser().Lex();
+ getStreamer().EmitWinCFIPushFrame(Code, Loc);
+ return false;
+}
+
// Force static initialization.
extern "C" void LLVMInitializeX86AsmParser() {
RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
diff --git a/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
index 5bc979d1f18c..e9be28ca77b0 100644
--- a/lib/Target/X86/AsmParser/X86AsmParserCommon.h
+++ b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
@@ -35,6 +35,10 @@ inline bool isImmUnsignedi8Value(uint64_t Value) {
return isUInt<8>(Value) || isInt<8>(Value);
}
+inline bool isImmUnsignedi4Value(uint64_t Value) {
+ return isUInt<4>(Value);
+}
+
} // End of namespace llvm
#endif
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index a771ba366318..3a76d023e640 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -260,6 +260,15 @@ struct X86Operand final : public MCParsedAsmOperand {
return isImmSExti64i32Value(CE->getValue());
}
+ bool isImmUnsignedi4() const {
+ if (!isImm()) return false;
+ // If this isn't a constant expr, reject it. The immediate byte is shared
+ // with a register encoding. We can't have it affected by a relocation.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ return isImmUnsignedi4Value(CE->getValue());
+ }
+
bool isImmUnsignedi8() const {
if (!isImm()) return false;
// If this isn't a constant expr, just assume it fits and let relaxation
@@ -491,7 +500,7 @@ struct X86Operand final : public MCParsedAsmOperand {
void addGR32orGR64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- unsigned RegNo = getReg();
+ MCRegister RegNo = getReg();
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
RegNo = getX86SubSuperRegister(RegNo, 32);
Inst.addOperand(MCOperand::createReg(RegNo));
@@ -572,7 +581,7 @@ struct X86Operand final : public MCParsedAsmOperand {
static std::unique_ptr<X86Operand> CreateToken(StringRef Str, SMLoc Loc) {
SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
- auto Res = llvm::make_unique<X86Operand>(Token, Loc, EndLoc);
+ auto Res = std::make_unique<X86Operand>(Token, Loc, EndLoc);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
return Res;
@@ -582,7 +591,7 @@ struct X86Operand final : public MCParsedAsmOperand {
CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc(),
StringRef SymName = StringRef(), void *OpDecl = nullptr) {
- auto Res = llvm::make_unique<X86Operand>(Register, StartLoc, EndLoc);
+ auto Res = std::make_unique<X86Operand>(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
Res->AddressOf = AddressOf;
Res->OffsetOfLoc = OffsetOfLoc;
@@ -593,19 +602,19 @@ struct X86Operand final : public MCParsedAsmOperand {
static std::unique_ptr<X86Operand>
CreateDXReg(SMLoc StartLoc, SMLoc EndLoc) {
- return llvm::make_unique<X86Operand>(DXRegister, StartLoc, EndLoc);
+ return std::make_unique<X86Operand>(DXRegister, StartLoc, EndLoc);
}
static std::unique_ptr<X86Operand>
CreatePrefix(unsigned Prefixes, SMLoc StartLoc, SMLoc EndLoc) {
- auto Res = llvm::make_unique<X86Operand>(Prefix, StartLoc, EndLoc);
+ auto Res = std::make_unique<X86Operand>(Prefix, StartLoc, EndLoc);
Res->Pref.Prefixes = Prefixes;
return Res;
}
static std::unique_ptr<X86Operand> CreateImm(const MCExpr *Val,
SMLoc StartLoc, SMLoc EndLoc) {
- auto Res = llvm::make_unique<X86Operand>(Immediate, StartLoc, EndLoc);
+ auto Res = std::make_unique<X86Operand>(Immediate, StartLoc, EndLoc);
Res->Imm.Val = Val;
return Res;
}
@@ -615,7 +624,7 @@ struct X86Operand final : public MCParsedAsmOperand {
CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
unsigned Size = 0, StringRef SymName = StringRef(),
void *OpDecl = nullptr, unsigned FrontendSize = 0) {
- auto Res = llvm::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
+ auto Res = std::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = 0;
@@ -643,7 +652,7 @@ struct X86Operand final : public MCParsedAsmOperand {
// The scale should always be one of {1,2,4,8}.
assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
"Invalid scale!");
- auto Res = llvm::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
+ auto Res = std::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = SegReg;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = BaseReg;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index a241362a271d..e287f6625115 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -12,13 +12,14 @@
//
//===----------------------------------------------------------------------===//
+#include "X86DisassemblerDecoder.h"
+#include "llvm/ADT/StringRef.h"
+
#include <cstdarg> /* for va_*() */
#include <cstdio> /* for vsnprintf() */
#include <cstdlib> /* for exit() */
#include <cstring> /* for memset() */
-#include "X86DisassemblerDecoder.h"
-
using namespace llvm::X86Disassembler;
/// Specifies whether a ModR/M byte is needed and (if so) which
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 54413fa1a02f..f08fcb575bf0 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -287,7 +287,7 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// Relax if the value is too big for a (signed) i8.
- return int64_t(Value) != int64_t(int8_t(Value));
+ return !isInt<8>(Value);
}
// FIXME: Can tblgen help at all here to verify there aren't other instructions
@@ -557,7 +557,7 @@ protected:
// If the frame pointer is other than esp/rsp, we do not have a way to
// generate a compact unwinding representation, so bail out.
- if (MRI.getLLVMRegNum(Inst.getRegister(), true) !=
+ if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
(Is64Bit ? X86::RBP : X86::EBP))
return 0;
@@ -605,7 +605,7 @@ protected:
// unwind encoding.
return CU::UNWIND_MODE_DWARF;
- unsigned Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
SavedRegs[SavedRegIdx++] = Reg;
StackAdjust += OffsetSize;
InstrOffset += PushInstrSize(Reg);
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 232a06593238..bd009da60851 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -46,10 +46,10 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
enum X86_64RelType { RT64_NONE, RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 };
-static X86_64RelType getType64(unsigned Kind,
+static X86_64RelType getType64(MCFixupKind Kind,
MCSymbolRefExpr::VariantKind &Modifier,
bool &IsPCRel) {
- switch (Kind) {
+ switch (unsigned(Kind)) {
default:
llvm_unreachable("Unimplemented");
case FK_NONE:
@@ -97,7 +97,7 @@ static void checkIs32(MCContext &Ctx, SMLoc Loc, X86_64RelType Type) {
static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
MCSymbolRefExpr::VariantKind Modifier,
X86_64RelType Type, bool IsPCRel,
- unsigned Kind) {
+ MCFixupKind Kind) {
switch (Modifier) {
default:
llvm_unreachable("Unimplemented");
@@ -202,7 +202,7 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
// and we want to keep back-compatibility.
if (!Ctx.getAsmInfo()->canRelaxRelocations())
return ELF::R_X86_64_GOTPCREL;
- switch (Kind) {
+ switch (unsigned(Kind)) {
default:
return ELF::R_X86_64_GOTPCREL;
case X86::reloc_riprel_4byte_relax:
@@ -237,7 +237,7 @@ static X86_32RelType getType32(X86_64RelType T) {
static unsigned getRelocType32(MCContext &Ctx,
MCSymbolRefExpr::VariantKind Modifier,
X86_32RelType Type, bool IsPCRel,
- unsigned Kind) {
+ MCFixupKind Kind) {
switch (Modifier) {
default:
llvm_unreachable("Unimplemented");
@@ -265,8 +265,9 @@ static unsigned getRelocType32(MCContext &Ctx,
if (!Ctx.getAsmInfo()->canRelaxRelocations())
return ELF::R_386_GOT32;
- return Kind == X86::reloc_signed_4byte_relax ? ELF::R_386_GOT32X
- : ELF::R_386_GOT32;
+ return Kind == MCFixupKind(X86::reloc_signed_4byte_relax)
+ ? ELF::R_386_GOT32X
+ : ELF::R_386_GOT32;
case MCSymbolRefExpr::VK_GOTOFF:
assert(Type == RT32_32);
assert(!IsPCRel);
@@ -317,7 +318,7 @@ unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
- unsigned Kind = Fixup.getKind();
+ MCFixupKind Kind = Fixup.getKind();
X86_64RelType Type = getType64(Kind, Modifier, IsPCRel);
if (getEMachine() == ELF::EM_X86_64)
return getRelocType64(Ctx, Fixup.getLoc(), Modifier, Type, IsPCRel, Kind);
@@ -329,5 +330,5 @@ unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
std::unique_ptr<MCObjectTargetWriter>
llvm::createX86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine) {
- return llvm::make_unique<X86ELFObjectWriter>(IsELF64, OSABI, EMachine);
+ return std::make_unique<X86ELFObjectWriter>(IsELF64, OSABI, EMachine);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index e1125c176b25..d986c829d98e 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -163,5 +163,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
TextAlignFillValue = 0x90;
+ AllowAtInName = true;
+
UseIntegratedAssembler = true;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 31d26d08a63f..ac36bf3a12fa 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -862,6 +862,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_B = ~(BaseRegEnc >> 3) & 1;
unsigned IndexRegEnc = getX86RegEncoding(MI, MemOperand+X86::AddrIndexReg);
VEX_X = ~(IndexRegEnc >> 3) & 1;
+ if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
+ EVEX_V2 = ~(IndexRegEnc >> 4) & 1;
+
break;
}
case X86II::MRMSrcReg: {
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index ce05ad974507..ced9eacc8b97 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -70,6 +70,10 @@ unsigned X86_MC::getDwarfRegFlavour(const Triple &TT, bool isEH) {
return DWARFFlavour::X86_32_Generic;
}
+bool X86_MC::hasLockPrefix(const MCInst &MI) {
+ return MI.getFlags() & X86::IP_HAS_LOCK;
+}
+
void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
// FIXME: TableGen these.
for (unsigned Reg = X86::NoRegister + 1; Reg < X86::NUM_TARGET_REGS; ++Reg) {
@@ -399,6 +403,9 @@ public:
findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
uint64_t GotSectionVA,
const Triple &TargetTriple) const override;
+ Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
+ uint64_t Addr,
+ uint64_t Size) const override;
};
#define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS
@@ -511,7 +518,31 @@ std::vector<std::pair<uint64_t, uint64_t>> X86MCInstrAnalysis::findPltEntries(
return findX86_64PltEntries(PltSectionVA, PltContents);
default:
return {};
- }
+ }
+}
+
+Optional<uint64_t> X86MCInstrAnalysis::evaluateMemoryOperandAddress(
+ const MCInst &Inst, uint64_t Addr, uint64_t Size) const {
+ const MCInstrDesc &MCID = Info->get(Inst.getOpcode());
+ int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags);
+ if (MemOpStart == -1)
+ return None;
+ MemOpStart += X86II::getOperandBias(MCID);
+
+ const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg);
+ const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg);
+ const MCOperand &IndexReg = Inst.getOperand(MemOpStart + X86::AddrIndexReg);
+ const MCOperand &ScaleAmt = Inst.getOperand(MemOpStart + X86::AddrScaleAmt);
+ const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp);
+ if (SegReg.getReg() != 0 || IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 ||
+ !Disp.isImm())
+ return None;
+
+ // RIP-relative addressing.
+ if (BaseReg.getReg() == X86::RIP)
+ return Addr + Size + Disp.getImm();
+
+ return None;
}
} // end of namespace X86_MC
@@ -567,13 +598,13 @@ extern "C" void LLVMInitializeX86TargetMC() {
createX86_64AsmBackend);
}
-unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size,
- bool High) {
+MCRegister llvm::getX86SubSuperRegisterOrZero(MCRegister Reg, unsigned Size,
+ bool High) {
switch (Size) {
- default: return 0;
+ default: return X86::NoRegister;
case 8:
if (High) {
- switch (Reg) {
+ switch (Reg.id()) {
default: return getX86SubSuperRegisterOrZero(Reg, 64);
case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
return X86::SI;
@@ -593,8 +624,8 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size,
return X86::BH;
}
} else {
- switch (Reg) {
- default: return 0;
+ switch (Reg.id()) {
+ default: return X86::NoRegister;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AL;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -630,8 +661,8 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size,
}
}
case 16:
- switch (Reg) {
- default: return 0;
+ switch (Reg.id()) {
+ default: return X86::NoRegister;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -666,8 +697,8 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size,
return X86::R15W;
}
case 32:
- switch (Reg) {
- default: return 0;
+ switch (Reg.id()) {
+ default: return X86::NoRegister;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::EAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -702,7 +733,7 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size,
return X86::R15D;
}
case 64:
- switch (Reg) {
+ switch (Reg.id()) {
default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::RAX;
@@ -740,9 +771,9 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size,
}
}
-unsigned llvm::getX86SubSuperRegister(unsigned Reg, unsigned Size, bool High) {
- unsigned Res = getX86SubSuperRegisterOrZero(Reg, Size, High);
- assert(Res != 0 && "Unexpected register or VT");
+MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High) {
+ MCRegister Res = getX86SubSuperRegisterOrZero(Reg, Size, High);
+ assert(Res != X86::NoRegister && "Unexpected register or VT");
return Res;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 00dd5908cbf5..0c789061f0e1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H
#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H
+#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/DataTypes.h"
#include <string>
@@ -57,6 +58,10 @@ unsigned getDwarfRegFlavour(const Triple &TT, bool isEH);
void initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI);
+
+/// Returns true if this instruction has a LOCK prefix.
+bool hasLockPrefix(const MCInst &MI);
+
/// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc.
/// do not need to go through TargetRegistry.
MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU,
@@ -111,12 +116,12 @@ createX86WinCOFFObjectWriter(bool Is64Bit);
/// Returns the sub or super register of a specific X86 register.
/// e.g. getX86SubSuperRegister(X86::EAX, 16) returns X86::AX.
/// Aborts on error.
-unsigned getX86SubSuperRegister(unsigned, unsigned, bool High=false);
+MCRegister getX86SubSuperRegister(MCRegister, unsigned, bool High=false);
/// Returns the sub or super register of a specific X86 register.
/// Like getX86SubSuperRegister() but returns 0 on error.
-unsigned getX86SubSuperRegisterOrZero(unsigned, unsigned,
- bool High = false);
+MCRegister getX86SubSuperRegisterOrZero(MCRegister, unsigned,
+ bool High = false);
} // End llvm namespace
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index fc7e99f61e5e..b67a7508fe72 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -276,7 +276,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
// x86_64 distinguishes movq foo@GOTPCREL so that the linker can
// rewrite the movq to an leaq at link time if the symbol ends up in
// the same linkage unit.
- if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
+ if (Fixup.getTargetKind() == X86::reloc_riprel_4byte_movq_load)
Type = MachO::X86_64_RELOC_GOT_LOAD;
else
Type = MachO::X86_64_RELOC_GOT;
@@ -339,8 +339,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
return;
} else {
Type = MachO::X86_64_RELOC_UNSIGNED;
- unsigned Kind = Fixup.getKind();
- if (Kind == X86::reloc_signed_4byte) {
+ if (Fixup.getTargetKind() == X86::reloc_signed_4byte) {
Asm.getContext().reportError(
Fixup.getLoc(),
"32-bit absolute addressing is not supported in 64-bit mode");
@@ -600,5 +599,5 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
std::unique_ptr<MCObjectTargetWriter>
llvm::createX86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype) {
- return llvm::make_unique<X86MachObjectWriter>(Is64Bit, CPUType, CPUSubtype);
+ return std::make_unique<X86MachObjectWriter>(Is64Bit, CPUType, CPUSubtype);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 3baab9da1c41..760239f76505 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -109,5 +109,5 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
std::unique_ptr<MCObjectTargetWriter>
llvm::createX86WinCOFFObjectWriter(bool Is64Bit) {
- return llvm::make_unique<X86WinCOFFObjectWriter>(Is64Bit);
+ return std::make_unique<X86WinCOFFObjectWriter>(Is64Bit);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 796a27a17255..db624378d517 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -35,8 +35,9 @@ void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
MCStreamer::EmitWinEHHandlerData(Loc);
// We have to emit the unwind info now, because this directive
- // actually switches to the .xdata section!
- EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo());
+ // actually switches to the .xdata section.
+ if (WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo())
+ EHStreamer.EmitUnwindInfo(*this, CurFrame);
}
void X86WinCOFFStreamer::EmitWindowsUnwindTables() {
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
index e9987d1f62bd..d5494ef12370 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
@@ -170,7 +170,7 @@ bool X86WinCOFFTargetStreamer::emitFPOProc(const MCSymbol *ProcSym,
L, "opening new .cv_fpo_proc before closing previous frame");
return true;
}
- CurFPOData = llvm::make_unique<FPOData>();
+ CurFPOData = std::make_unique<FPOData>();
CurFPOData->Function = ProcSym;
CurFPOData->Begin = emitFPOLabel();
CurFPOData->ParamsSize = ParamsSize;
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index a95f68434d12..6840fc12751d 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -81,6 +81,12 @@ FunctionPass *createX86FlagsCopyLoweringPass();
/// Return a pass that expands WinAlloca pseudo-instructions.
FunctionPass *createX86WinAllocaExpander();
+/// Return a pass that inserts int3 at the end of the function if it ends with a
+/// CALL instruction. The pass does the same for each funclet as well. This
+/// ensures that the open interval of function start and end PCs contains all
+/// return addresses for the benefit of the Windows x64 unwinder.
+FunctionPass *createX86AvoidTrailingCallPass();
+
/// Return a pass that optimizes the code-size of x86 call sequences. This is
/// done by replacing esp-relative movs with pushes.
FunctionPass *createX86CallFrameOptimization();
@@ -137,13 +143,13 @@ void initializeWinEHStatePassPass(PassRegistry &);
void initializeX86AvoidSFBPassPass(PassRegistry &);
void initializeX86CallFrameOptimizationPass(PassRegistry &);
void initializeX86CmovConverterPassPass(PassRegistry &);
-void initializeX86ExpandPseudoPass(PassRegistry&);
void initializeX86CondBrFoldingPassPass(PassRegistry &);
void initializeX86DomainReassignmentPass(PassRegistry &);
void initializeX86ExecutionDomainFixPass(PassRegistry &);
+void initializeX86ExpandPseudoPass(PassRegistry &);
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
+void initializeX86OptimizeLEAPassPass(PassRegistry &);
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
-
} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 3112f00c91f2..d8631aca2734 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -95,7 +95,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions">;
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
- "64-bit with cmpxchg16b">;
+ "64-bit with cmpxchg16b",
+ [FeatureCMPXCHG8B]>;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
@@ -240,8 +241,11 @@ def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
"Enable Cache Demote">;
def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
"Support ptwrite instruction">;
-def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
- "Support MPX instructions">;
+// FIXME: This feature is deprecated in 10.0 and should not be used for
+// anything, but removing it would break IR files that may contain it in a
+// target-feature attribute.
+def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false",
+ "Deprecated. Support MPX instructions">;
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
@@ -374,6 +378,10 @@ def FeatureHasFastGather
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
"Indicates if gather is reasonably fast">;
+def FeaturePrefer128Bit
+ : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
+ "Prefer 128-bit AVX instructions">;
+
def FeaturePrefer256Bit
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
"Prefer 256-bit AVX instructions">;
@@ -449,6 +457,10 @@ def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
"Merge branches to a three-way "
"conditional branch">;
+// Enable use of alias analysis during code generation.
+def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
+ "Use alias analysis during codegen">;
+
// Bonnell
def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
// Silvermont
@@ -579,7 +591,6 @@ def ProcessorFeatures {
// Skylake
list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
- FeatureMPX,
FeatureXSAVEC,
FeatureXSAVES,
FeatureCLFLUSHOPT,
@@ -594,6 +605,7 @@ def ProcessorFeatures {
// Skylake-AVX512
list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAVX512,
+ FeaturePrefer256Bit,
FeatureCDI,
FeatureDQI,
FeatureBWI,
@@ -627,6 +639,7 @@ def ProcessorFeatures {
// Cannonlake
list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
+ FeaturePrefer256Bit,
FeatureCDI,
FeatureDQI,
FeatureBWI,
@@ -665,6 +678,17 @@ def ProcessorFeatures {
list<SubtargetFeature> ICXFeatures =
!listconcat(ICLInheritableFeatures, ICXSpecificFeatures);
+ //Tigerlake
+ list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
+ FeatureMOVDIRI,
+ FeatureMOVDIR64B,
+ FeatureSHSTK];
+ list<SubtargetFeature> TGLSpecificFeatures = [FeatureHasFastGather];
+ list<SubtargetFeature> TGLInheritableFeatures =
+ !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures);
+ list<SubtargetFeature> TGLFeatures =
+ !listconcat(ICLFeatures, TGLInheritableFeatures );
+
// Atom
list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87,
FeatureCMPXCHG8B,
@@ -707,7 +731,6 @@ def ProcessorFeatures {
// Goldmont
list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
- FeatureMPX,
FeatureSHA,
FeatureRDSEED,
FeatureXSAVE,
@@ -786,6 +809,22 @@ def ProcessorFeatures {
list<SubtargetFeature> KNMFeatures =
!listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
+ // Barcelona
+ list<SubtargetFeature> BarcelonaInheritableFeatures = [FeatureX87,
+ FeatureCMPXCHG8B,
+ FeatureSSE4A,
+ Feature3DNowA,
+ FeatureFXSR,
+ FeatureNOPL,
+ FeatureCMPXCHG16B,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF,
+ FeatureCMOV,
+ Feature64Bit,
+ FeatureFastScalarShiftMasks];
+ list<SubtargetFeature> BarcelonaFeatures = BarcelonaInheritableFeatures;
// Bobcat
list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87,
@@ -1093,6 +1132,8 @@ def : ProcessorModel<"icelake-client", SkylakeServerModel,
ProcessorFeatures.ICLFeatures>;
def : ProcessorModel<"icelake-server", SkylakeServerModel,
ProcessorFeatures.ICXFeatures>;
+def : ProcessorModel<"tigerlake", SkylakeServerModel,
+ ProcessorFeatures.TGLFeatures>;
// AMD CPUs.
@@ -1129,10 +1170,7 @@ foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
}
foreach P = ["amdfam10", "barcelona"] in {
- def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE4A, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV,
- Feature64Bit, FeatureFastScalarShiftMasks]>;
+ def : Proc<P, ProcessorFeatures.BarcelonaFeatures>;
}
// Bobcat
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 80120722e0e6..8d27be30a277 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -242,7 +242,7 @@ void X86AsmPrinter::PrintModifiedOperand(const MachineInstr *MI, unsigned OpNo,
return PrintOperand(MI, OpNo, O);
if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
O << '%';
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 :
(strcmp(Modifier+6,"32") == 0) ? 32 :
@@ -388,7 +388,7 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
char Mode, raw_ostream &O) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
bool EmitPercent = true;
if (!X86::GR8RegClass.contains(Reg) &&
@@ -575,7 +575,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
// Emitting note header.
int WordSize = TT.isArch64Bit() ? 8 : 4;
- EmitAlignment(WordSize == 4 ? 2 : 3);
+ EmitAlignment(WordSize == 4 ? Align(4) : Align(8));
OutStreamer->EmitIntValue(4, 4 /*size*/); // data size for "GNU\0"
OutStreamer->EmitIntValue(8 + WordSize, 4 /*size*/); // Elf_Prop size
OutStreamer->EmitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4 /*size*/);
@@ -585,7 +585,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
OutStreamer->EmitIntValue(ELF::GNU_PROPERTY_X86_FEATURE_1_AND, 4);
OutStreamer->EmitIntValue(4, 4); // data size
OutStreamer->EmitIntValue(FeatureFlagsAnd, 4); // data
- EmitAlignment(WordSize == 4 ? 2 : 3); // padding
+ EmitAlignment(WordSize == 4 ? Align(4) : Align(8)); // padding
OutStreamer->endSection(Nt);
OutStreamer->SwitchSection(Cur);
diff --git a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index 3dcc1015dc7c..69c6b3356cbb 100644
--- a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -35,6 +35,7 @@
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -390,7 +391,7 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode,
MachineMemOperand *LMMO = *LoadInst->memoperands_begin();
MachineMemOperand *SMMO = *StoreInst->memoperands_begin();
- unsigned Reg1 = MRI->createVirtualRegister(
+ Register Reg1 = MRI->createVirtualRegister(
TII->getRegClass(TII->get(NLoadOpcode), 0, TRI, *(MBB->getParent())));
MachineInstr *NewLoad =
BuildMI(*MBB, LoadInst, LoadInst->getDebugLoc(), TII->get(NLoadOpcode),
diff --git a/lib/Target/X86/X86AvoidTrailingCall.cpp b/lib/Target/X86/X86AvoidTrailingCall.cpp
new file mode 100644
index 000000000000..fb4f9e2901dc
--- /dev/null
+++ b/lib/Target/X86/X86AvoidTrailingCall.cpp
@@ -0,0 +1,108 @@
+//===----- X86AvoidTrailingCall.cpp - Insert int3 after trailing calls ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The Windows x64 unwinder has trouble unwinding the stack when a return
+// address points to the end of the function. This pass maintains the invariant
+// that every return address is inside the bounds of its parent function or
+// funclet by inserting int3 if the last instruction would otherwise be a call.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+#define DEBUG_TYPE "x86-avoid-trailing-call"
+
+using namespace llvm;
+
+namespace {
+
+class X86AvoidTrailingCallPass : public MachineFunctionPass {
+public:
+ X86AvoidTrailingCallPass() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ StringRef getPassName() const override {
+ return "X86 avoid trailing call pass";
+ }
+ static char ID;
+};
+
+char X86AvoidTrailingCallPass::ID = 0;
+
+} // end anonymous namespace
+
+FunctionPass *llvm::createX86AvoidTrailingCallPass() {
+ return new X86AvoidTrailingCallPass();
+}
+
+// A real instruction is a non-meta, non-pseudo instruction. Some pseudos
+// expand to nothing, and some expand to code. This logic conservatively assumes
+// they might expand to nothing.
+static bool isRealInstruction(MachineInstr &MI) {
+ return !MI.isPseudo() && !MI.isMetaInstruction();
+}
+
+// Return true if this is a call instruction, but not a tail call.
+static bool isCallInstruction(const MachineInstr &MI) {
+ return MI.isCall() && !MI.isReturn();
+}
+
+bool X86AvoidTrailingCallPass::runOnMachineFunction(MachineFunction &MF) {
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const X86InstrInfo &TII = *STI.getInstrInfo();
+ assert(STI.isTargetWin64() && "pass only runs on Win64");
+
+ // FIXME: Perhaps this pass should also replace SEH_Epilogue by inserting nops
+ // before epilogues.
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ // Look for basic blocks that precede funclet entries or are at the end of
+ // the function.
+ MachineBasicBlock *NextMBB = MBB.getNextNode();
+ if (NextMBB && !NextMBB->isEHFuncletEntry())
+ continue;
+
+ // Find the last real instruction in this block, or previous blocks if this
+ // block is empty.
+ MachineBasicBlock::reverse_iterator LastRealInstr;
+ for (MachineBasicBlock &RMBB :
+ make_range(MBB.getReverseIterator(), MF.rend())) {
+ LastRealInstr = llvm::find_if(reverse(RMBB), isRealInstruction);
+ if (LastRealInstr != RMBB.rend())
+ break;
+ }
+
+ // Do nothing if this function or funclet has no instructions.
+ if (LastRealInstr == MF.begin()->rend())
+ continue;
+
+ // If this is a call instruction, insert int3 right after it with the same
+ // DebugLoc. Convert back to a forward iterator and advance the insertion
+ // position once.
+ if (isCallInstruction(*LastRealInstr)) {
+ LLVM_DEBUG({
+ dbgs() << "inserting int3 after trailing call instruction:\n";
+ LastRealInstr->dump();
+ dbgs() << '\n';
+ });
+
+ MachineBasicBlock::iterator MBBI = std::next(LastRealInstr.getReverse());
+ BuildMI(*LastRealInstr->getParent(), MBBI, LastRealInstr->getDebugLoc(),
+ TII.get(X86::INT3));
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 4df849a2e14c..ad7e32b4efc8 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -155,12 +155,22 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
// This is bad, and breaks SP adjustment.
// So, check that all of the frames in the function are closed inside
// the same block, and, for good measure, that there are no nested frames.
+ //
+ // If any call allocates more argument stack memory than the stack
+ // probe size, don't do this optimization. Otherwise, this pass
+ // would need to synthesize additional stack probe calls to allocate
+ // memory for arguments.
unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
+ bool UseStackProbe =
+ !STI->getTargetLowering()->getStackProbeSymbolName(MF).empty();
+ unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
for (MachineBasicBlock &BB : MF) {
bool InsideFrameSequence = false;
for (MachineInstr &MI : BB) {
if (MI.getOpcode() == FrameSetupOpcode) {
+ if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe)
+ return false;
if (InsideFrameSequence)
return false;
InsideFrameSequence = true;
@@ -325,8 +335,8 @@ X86CallFrameOptimization::classifyInstruction(
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
- unsigned int Reg = MO.getReg();
- if (!RegInfo.isPhysicalRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister()))
return Exit;
@@ -370,7 +380,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
while (I->getOpcode() == X86::LEA32r || I->isDebugInstr())
++I;
- unsigned StackPtr = RegInfo.getStackRegister();
+ Register StackPtr = RegInfo.getStackRegister();
auto StackPtrCopyInst = MBB.end();
// SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual
// register. If it's there, use that virtual register as stack pointer
@@ -443,8 +453,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
for (const MachineOperand &MO : I->uses()) {
if (!MO.isReg())
continue;
- unsigned int Reg = MO.getReg();
- if (RegInfo.isPhysicalRegister(Reg))
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg))
UsedRegs.insert(Reg);
}
}
@@ -524,12 +534,12 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
break;
case X86::MOV32mr:
case X86::MOV64mr: {
- unsigned int Reg = PushOp.getReg();
+ Register Reg = PushOp.getReg();
// If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg
// in preparation for the PUSH64. The upper 32 bits can be undef.
if (Is64Bit && Store->getOpcode() == X86::MOV32mr) {
- unsigned UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ Register UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass);
Reg = MRI->createVirtualRegister(&X86::GR64RegClass);
BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg);
BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg)
@@ -598,7 +608,7 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
// movl %eax, (%esp)
// call
// Get rid of those with prejudice.
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return nullptr;
// Make sure this is the only use of Reg.
diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp
index b16b3839c85a..7ee637cfd523 100644
--- a/lib/Target/X86/X86CallLowering.cpp
+++ b/lib/Target/X86/X86CallLowering.cpp
@@ -102,6 +102,8 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
DL(MIRBuilder.getMF().getDataLayout()),
STI(MIRBuilder.getMF().getSubtarget<X86Subtarget>()) {}
+ bool isIncomingArgumentHandler() const override { return false; }
+
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
LLT p0 = LLT::pointer(0, DL.getPointerSizeInBits(0));
@@ -155,8 +157,9 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
- const CallLowering::ArgInfo &Info, CCState &State) override {
- bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+ const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
+ CCState &State) override {
+ bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
StackSize = State.getNextStackOffset();
static const MCPhysReg XMMArgRegs[] = {X86::XMM0, X86::XMM1, X86::XMM2,
@@ -229,7 +232,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
: ValueHandler(MIRBuilder, MRI, AssignFn),
DL(MIRBuilder.getMF().getDataLayout()) {}
- bool isArgumentHandler() const override { return true; }
+ bool isIncomingArgumentHandler() const override { return true; }
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -237,7 +240,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
int FI = MFI.CreateFixedObject(Size, Offset, true);
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
- unsigned AddrReg = MRI.createGenericVirtualRegister(
+ Register AddrReg = MRI.createGenericVirtualRegister(
LLT::pointer(0, DL.getPointerSizeInBits(0)));
MIRBuilder.buildFrameIndex(AddrReg, FI);
return AddrReg;
@@ -301,6 +304,7 @@ struct FormalArgHandler : public IncomingValueHandler {
: IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
void markPhysRegUsed(unsigned PhysReg) override {
+ MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
};
@@ -372,10 +376,7 @@ bool X86CallLowering::lowerFormalArguments(
}
bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
- CallingConv::ID CallConv,
- const MachineOperand &Callee,
- const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const {
+ CallLoweringInfo &Info) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -385,8 +386,8 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
auto TRI = STI.getRegisterInfo();
// Handle only Linux C, X86_64_SysV calling conventions for now.
- if (!STI.isTargetLinux() ||
- !(CallConv == CallingConv::C || CallConv == CallingConv::X86_64_SysV))
+ if (!STI.isTargetLinux() || !(Info.CallConv == CallingConv::C ||
+ Info.CallConv == CallingConv::X86_64_SysV))
return false;
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
@@ -395,18 +396,19 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
bool Is64Bit = STI.is64Bit();
- unsigned CallOpc = Callee.isReg()
+ unsigned CallOpc = Info.Callee.isReg()
? (Is64Bit ? X86::CALL64r : X86::CALL32r)
: (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
- auto MIB = MIRBuilder.buildInstrNoInsert(CallOpc).add(Callee).addRegMask(
- TRI->getCallPreservedMask(MF, CallConv));
+ auto MIB = MIRBuilder.buildInstrNoInsert(CallOpc)
+ .add(Info.Callee)
+ .addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv));
SmallVector<ArgInfo, 8> SplitArgs;
- for (const auto &OrigArg : OrigArgs) {
+ for (const auto &OrigArg : Info.OrigArgs) {
// TODO: handle not simple cases.
- if (OrigArg.Flags.isByVal())
+ if (OrigArg.Flags[0].isByVal())
return false;
if (OrigArg.Regs.size() > 1)
@@ -423,8 +425,8 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
- bool IsFixed = OrigArgs.empty() ? true : OrigArgs.back().IsFixed;
- if (STI.is64Bit() && !IsFixed && !STI.isCallingConvWin64(CallConv)) {
+ bool IsFixed = Info.OrigArgs.empty() ? true : Info.OrigArgs.back().IsFixed;
+ if (STI.is64Bit() && !IsFixed && !STI.isCallingConvWin64(Info.CallConv)) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -445,23 +447,24 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// If Callee is a reg, since it is used by a target specific
// instruction, it must have a register class matching the
// constraint of that instruction.
- if (Callee.isReg())
+ if (Info.Callee.isReg())
MIB->getOperand(0).setReg(constrainOperandRegClass(
MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
- *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0));
+ *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
+ 0));
// Finally we can copy the returned value back into its virtual-register. In
// symmetry with the arguments, the physical register must be an
// implicit-define of the call instruction.
- if (!OrigRet.Ty->isVoidTy()) {
- if (OrigRet.Regs.size() > 1)
+ if (!Info.OrigRet.Ty->isVoidTy()) {
+ if (Info.OrigRet.Regs.size() > 1)
return false;
SplitArgs.clear();
SmallVector<Register, 8> NewRegs;
- if (!splitToValueTypes(OrigRet, SplitArgs, DL, MRI,
+ if (!splitToValueTypes(Info.OrigRet, SplitArgs, DL, MRI,
[&](ArrayRef<Register> Regs) {
NewRegs.assign(Regs.begin(), Regs.end());
}))
@@ -472,7 +475,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
if (!NewRegs.empty())
- MIRBuilder.buildMerge(OrigRet.Regs[0], NewRegs);
+ MIRBuilder.buildMerge(Info.OrigRet.Regs[0], NewRegs);
}
CallSeqStart.addImm(Handler.getStackSize())
diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h
index 0445331bc3ff..444a0c7d0122 100644
--- a/lib/Target/X86/X86CallLowering.h
+++ b/lib/Target/X86/X86CallLowering.h
@@ -34,9 +34,8 @@ public:
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const override;
- bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
- const MachineOperand &Callee, const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const override;
+ bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const override;
private:
/// A function of this type is used to perform value split action.
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 1c3034a5116a..4c49d68bec99 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -433,6 +433,7 @@ defm X86_SysV64_RegCall :
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ CCIfCC<"CallingConv::Tail", CCDelegateTo<RetCC_X86_32_Fast>>,
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
@@ -1000,6 +1001,7 @@ def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
+ CCIfCC<"CallingConv::Tail", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>,
diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp
index a61fa3246f09..5123853f5455 100644
--- a/lib/Target/X86/X86CmovConversion.cpp
+++ b/lib/Target/X86/X86CmovConversion.cpp
@@ -436,8 +436,8 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates(
// Checks for "isUse()" as "uses()" returns also implicit definitions.
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
- auto &RDM = RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)];
+ Register Reg = MO.getReg();
+ auto &RDM = RegDefMaps[Register::isVirtualRegister(Reg)];
if (MachineInstr *DefMI = RDM.lookup(Reg)) {
OperandToDefMap[&MO] = DefMI;
DepthInfo Info = DepthMap.lookup(DefMI);
@@ -456,8 +456,8 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates(
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
- RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)][Reg] = &MI;
+ Register Reg = MO.getReg();
+ RegDefMaps[Register::isVirtualRegister(Reg)][Reg] = &MI;
}
unsigned Latency = TSchedModel.computeInstrLatency(&MI);
@@ -710,7 +710,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
// Skip any CMOVs in this group which don't load from memory.
if (!MI.mayLoad()) {
// Remember the false-side register input.
- unsigned FalseReg =
+ Register FalseReg =
MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg();
// Walk back through any intermediate cmovs referenced.
while (true) {
@@ -753,7 +753,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
// Get a fresh register to use as the destination of the MOV.
const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg());
- unsigned TmpReg = MRI->createVirtualRegister(RC);
+ Register TmpReg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 4> NewMIs;
bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg,
@@ -810,9 +810,9 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
- unsigned DestReg = MIIt->getOperand(0).getReg();
- unsigned Op1Reg = MIIt->getOperand(1).getReg();
- unsigned Op2Reg = MIIt->getOperand(2).getReg();
+ Register DestReg = MIIt->getOperand(0).getReg();
+ Register Op1Reg = MIIt->getOperand(1).getReg();
+ Register Op2Reg = MIIt->getOperand(2).getReg();
// If this CMOV we are processing is the opposite condition from the jump we
// generated, then we have to swap the operands for the PHI that is going to
diff --git a/lib/Target/X86/X86CondBrFolding.cpp b/lib/Target/X86/X86CondBrFolding.cpp
index 9dea94f1368d..1bf2d5ba7b8f 100644
--- a/lib/Target/X86/X86CondBrFolding.cpp
+++ b/lib/Target/X86/X86CondBrFolding.cpp
@@ -564,7 +564,7 @@ X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) {
Modified = false;
break;
}
- return llvm::make_unique<TargetMBBInfo>(TargetMBBInfo{
+ return std::make_unique<TargetMBBInfo>(TargetMBBInfo{
TBB, FBB, BrInstr, CmpInstr, CC, SrcReg, CmpValue, Modified, CmpBrOnly});
}
diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp
index 18bbfa32e11b..b4cf5cafbc6e 100644
--- a/lib/Target/X86/X86DomainReassignment.cpp
+++ b/lib/Target/X86/X86DomainReassignment.cpp
@@ -182,7 +182,7 @@ public:
MachineBasicBlock *MBB = MI->getParent();
auto &DL = MI->getDebugLoc();
- unsigned Reg = MRI->createVirtualRegister(
+ Register Reg = MRI->createVirtualRegister(
TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(),
*MBB->getParent()));
MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg);
@@ -219,13 +219,13 @@ public:
// Don't allow copies to/flow GR8/GR16 physical registers.
// FIXME: Is there some better way to support this?
- unsigned DstReg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ Register DstReg = MI->getOperand(0).getReg();
+ if (Register::isPhysicalRegister(DstReg) &&
(X86::GR8RegClass.contains(DstReg) ||
X86::GR16RegClass.contains(DstReg)))
return false;
- unsigned SrcReg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ Register SrcReg = MI->getOperand(1).getReg();
+ if (Register::isPhysicalRegister(SrcReg) &&
(X86::GR8RegClass.contains(SrcReg) ||
X86::GR16RegClass.contains(SrcReg)))
return false;
@@ -241,7 +241,7 @@ public:
// Physical registers will not be converted. Assume that converting the
// COPY to the destination domain will eventually result in a actual
// instruction.
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ if (Register::isPhysicalRegister(MO.getReg()))
return 1;
RegDomain OpDomain = getDomain(MRI->getRegClass(MO.getReg()),
@@ -436,7 +436,7 @@ void X86DomainReassignment::visitRegister(Closure &C, unsigned Reg,
if (EnclosedEdges.count(Reg))
return;
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return;
if (!MRI->hasOneDef(Reg))
@@ -593,8 +593,8 @@ void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) {
if (!DefOp.isReg())
continue;
- unsigned DefReg = DefOp.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DefReg)) {
+ Register DefReg = DefOp.getReg();
+ if (!Register::isVirtualRegister(DefReg)) {
C.setAllIllegal();
continue;
}
@@ -751,7 +751,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
// Go over all virtual registers and calculate a closure.
unsigned ClosureID = 0;
for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(Idx);
+ unsigned Reg = Register::index2VirtReg(Idx);
// GPR only current source domain supported.
if (!isGPR(MRI->getRegClass(Reg)))
diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp
index 58680f1815bb..24c8e6d6f6eb 100755
--- a/lib/Target/X86/X86EvexToVex.cpp
+++ b/lib/Target/X86/X86EvexToVex.cpp
@@ -131,7 +131,7 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
assert(!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31) &&
"ZMM instructions should not be in the EVEX->VEX tables");
diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp
index b8624b40f2f7..9126a1fbea52 100644
--- a/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/lib/Target/X86/X86ExpandPseudo.cpp
@@ -194,7 +194,8 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case X86::TCRETURNmi64: {
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
MachineOperand &JumpTarget = MBBI->getOperand(0);
- MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
+ MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands
+ : 1);
assert(StackAdjust.isImm() && "Expecting immediate value.");
// Adjust stack pointer.
@@ -259,7 +260,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
? X86::TAILJMPm
: (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
- for (unsigned i = 0; i != 5; ++i)
+ for (unsigned i = 0; i != X86::AddrNumOperands; ++i)
MIB.add(MBBI->getOperand(i));
} else if (Opcode == X86::TCRETURNri64) {
JumpTarget.setIsKill();
@@ -274,7 +275,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstr &NewMI = *std::prev(MBBI);
NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI);
- MBB.getParent()->updateCallSiteInfo(&*MBBI, &NewMI);
+ MBB.getParent()->moveCallSiteInfo(&*MBBI, &NewMI);
// Delete the pseudo instruction TCRETURN.
MBB.erase(MBBI);
@@ -287,7 +288,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
assert(DestAddr.isReg() && "Offset should be in register!");
const bool Uses64BitFramePtr =
STI->isTarget64BitLP64() || STI->isTargetNaCl64();
- unsigned StackPtr = TRI->getStackRegister();
+ Register StackPtr = TRI->getStackRegister();
BuildMI(MBB, MBBI, DL,
TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr)
.addReg(DestAddr.getReg());
@@ -347,7 +348,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// actualcmpxchg Addr
// [E|R]BX = SaveRbx
const MachineOperand &InArg = MBBI->getOperand(6);
- unsigned SaveRbx = MBBI->getOperand(7).getReg();
+ Register SaveRbx = MBBI->getOperand(7).getReg();
unsigned ActualInArg =
Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 7b9ce0271205..e5e089d07d55 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1160,6 +1160,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
CallingConv::ID CC = F.getCallingConv();
if (CC != CallingConv::C &&
CC != CallingConv::Fast &&
+ CC != CallingConv::Tail &&
CC != CallingConv::X86_FastCall &&
CC != CallingConv::X86_StdCall &&
CC != CallingConv::X86_ThisCall &&
@@ -1173,7 +1174,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
+ if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
+ CC == CallingConv::Tail)
return false;
// Let SDISel handle vararg functions.
@@ -1241,7 +1243,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
}
// Make the copy.
- unsigned DstReg = VA.getLocReg();
+ Register DstReg = VA.getLocReg();
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
// Avoid a cross-class copy. This is very unlikely.
if (!SrcRC->contains(DstReg))
@@ -3157,7 +3159,7 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
if (Subtarget->getTargetTriple().isOSMSVCRT())
return 0;
if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
- CC == CallingConv::HiPE)
+ CC == CallingConv::HiPE || CC == CallingConv::Tail)
return 0;
if (CS)
@@ -3208,6 +3210,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
default: return false;
case CallingConv::C:
case CallingConv::Fast:
+ case CallingConv::Tail:
case CallingConv::WebKit_JS:
case CallingConv::Swift:
case CallingConv::X86_FastCall:
@@ -3224,7 +3227,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
+ if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
+ CC == CallingConv::Tail)
return false;
// Don't know how to handle Win64 varargs yet. Nothing special needed for
@@ -3387,6 +3391,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
case CCValAssign::SExtUpper:
case CCValAssign::ZExtUpper:
case CCValAssign::FPExt:
+ case CCValAssign::Trunc:
llvm_unreachable("Unexpected loc info!");
case CCValAssign::Indirect:
// FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
@@ -3547,7 +3552,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
unsigned CopyReg = ResultReg + i;
- unsigned SrcReg = VA.getLocReg();
+ Register SrcReg = VA.getLocReg();
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp
index bf541d933790..9f7c4afde760 100644
--- a/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/lib/Target/X86/X86FixupBWInsts.cpp
@@ -80,7 +80,7 @@ class FixupBWInstPass : public MachineFunctionPass {
/// destination register of the MachineInstr passed in. It returns true if
/// that super register is dead just prior to \p OrigMI, and false if not.
bool getSuperRegDestIfDead(MachineInstr *OrigMI,
- unsigned &SuperDestReg) const;
+ Register &SuperDestReg) const;
/// Change the MachineInstr \p MI into the equivalent extending load to 32 bit
/// register if it is safe to do so. Return the replacement instruction if
@@ -92,6 +92,12 @@ class FixupBWInstPass : public MachineFunctionPass {
/// nullptr.
MachineInstr *tryReplaceCopy(MachineInstr *MI) const;
+ /// Change the MachineInstr \p MI into the equivalent extend to 32 bit
+ /// register if it is safe to do so. Return the replacement instruction if
+ /// OK, otherwise return nullptr.
+ MachineInstr *tryReplaceExtend(unsigned New32BitOpcode,
+ MachineInstr *MI) const;
+
// Change the MachineInstr \p MI into an eqivalent 32 bit instruction if
// possible. Return the replacement instruction if OK, return nullptr
// otherwise.
@@ -169,10 +175,10 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
///
/// If so, return that super register in \p SuperDestReg.
bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
- unsigned &SuperDestReg) const {
+ Register &SuperDestReg) const {
auto *TRI = &TII->getRegisterInfo();
- unsigned OrigDestReg = OrigMI->getOperand(0).getReg();
+ Register OrigDestReg = OrigMI->getOperand(0).getReg();
SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32);
const auto SubRegIdx = TRI->getSubRegIndex(SuperDestReg, OrigDestReg);
@@ -232,12 +238,12 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
// %ax = KILL %ax, implicit killed %eax
// RET 0, %ax
unsigned Opc = OrigMI->getOpcode(); (void)Opc;
- // These are the opcodes currently handled by the pass, if something
- // else will be added we need to ensure that new opcode has the same
- // properties.
- assert((Opc == X86::MOV8rm || Opc == X86::MOV16rm || Opc == X86::MOV8rr ||
- Opc == X86::MOV16rr) &&
- "Unexpected opcode.");
+ // These are the opcodes currently known to work with the code below, if
+ // something // else will be added we need to ensure that new opcode has the
+ // same properties.
+ if (Opc != X86::MOV8rm && Opc != X86::MOV16rm && Opc != X86::MOV8rr &&
+ Opc != X86::MOV16rr)
+ return false;
bool IsDefined = false;
for (auto &MO: OrigMI->implicit_operands()) {
@@ -247,7 +253,7 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
assert((MO.isDef() || MO.isUse()) && "Expected Def or Use only!");
if (MO.isDef() && TRI->isSuperRegisterEq(OrigDestReg, MO.getReg()))
- IsDefined = true;
+ IsDefined = true;
// If MO is a use of any part of the destination register but is not equal
// to OrigDestReg or one of its subregisters, we cannot use SuperDestReg.
@@ -268,7 +274,7 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
MachineInstr *MI) const {
- unsigned NewDestReg;
+ Register NewDestReg;
// We are going to try to rewrite this load to a larger zero-extending
// load. This is safe if all portions of the 32 bit super-register
@@ -295,11 +301,11 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
auto &OldDest = MI->getOperand(0);
auto &OldSrc = MI->getOperand(1);
- unsigned NewDestReg;
+ Register NewDestReg;
if (!getSuperRegDestIfDead(MI, NewDestReg))
return nullptr;
- unsigned NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32);
+ Register NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32);
// This is only correct if we access the same subregister index: otherwise,
// we could try to replace "movb %ah, %al" with "movl %eax, %eax".
@@ -326,6 +332,33 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
return MIB;
}
+MachineInstr *FixupBWInstPass::tryReplaceExtend(unsigned New32BitOpcode,
+ MachineInstr *MI) const {
+ Register NewDestReg;
+ if (!getSuperRegDestIfDead(MI, NewDestReg))
+ return nullptr;
+
+ // Don't interfere with formation of CBW instructions which should be a
+ // shorter encoding than even the MOVSX32rr8. It's also immunte to partial
+ // merge issues on Intel CPUs.
+ if (MI->getOpcode() == X86::MOVSX16rr8 &&
+ MI->getOperand(0).getReg() == X86::AX &&
+ MI->getOperand(1).getReg() == X86::AL)
+ return nullptr;
+
+ // Safe to change the instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(*MF, MI->getDebugLoc(), TII->get(New32BitOpcode), NewDestReg);
+
+ unsigned NumArgs = MI->getNumOperands();
+ for (unsigned i = 1; i < NumArgs; ++i)
+ MIB.add(MI->getOperand(i));
+
+ MIB.setMemRefs(MI->memoperands());
+
+ return MIB;
+}
+
MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI,
MachineBasicBlock &MBB) const {
// See if this is an instruction of the type we are currently looking for.
@@ -355,6 +388,15 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI,
// of the register.
return tryReplaceCopy(MI);
+ case X86::MOVSX16rr8:
+ return tryReplaceExtend(X86::MOVSX32rr8, MI);
+ case X86::MOVSX16rm8:
+ return tryReplaceExtend(X86::MOVSX32rm8, MI);
+ case X86::MOVZX16rr8:
+ return tryReplaceExtend(X86::MOVZX32rr8, MI);
+ case X86::MOVZX16rm8:
+ return tryReplaceExtend(X86::MOVZX32rm8, MI);
+
default:
// nothing to do here.
break;
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 041529a0be68..543dc8b00fa0 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -67,8 +67,8 @@ class FixupLEAPass : public MachineFunctionPass {
/// - LEA that uses RIP relative addressing mode
/// - LEA that uses 16-bit addressing mode "
/// This function currently handles the first 2 cases only.
- MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
- MachineBasicBlock &MBB);
+ void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
+ MachineBasicBlock &MBB, bool OptIncDec);
/// Look for LEAs that are really two address LEAs that we might be able to
/// turn into regular ADD instructions.
@@ -216,14 +216,10 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
continue;
- if (IsSlowLEA) {
+ if (IsSlowLEA)
processInstructionForSlowLEA(I, MBB);
- } else if (IsSlow3OpsLEA) {
- if (auto *NewMI = processInstrForSlow3OpLEA(*I, MBB)) {
- MBB.erase(I);
- I = NewMI;
- }
- }
+ else if (IsSlow3OpsLEA)
+ processInstrForSlow3OpLEA(I, MBB, OptIncDec);
}
// Second pass for creating LEAs. This may reverse some of the
@@ -301,18 +297,14 @@ static inline bool isInefficientLEAReg(unsigned Reg) {
Reg == X86::R13D || Reg == X86::R13;
}
-static inline bool isRegOperand(const MachineOperand &Op) {
- return Op.isReg() && Op.getReg() != X86::NoRegister;
-}
-
/// Returns true if this LEA uses base an index registers, and the base register
/// is known to be inefficient for the subtarget.
// TODO: use a variant scheduling class to model the latency profile
// of LEA instructions, and implement this logic as a scheduling predicate.
static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
const MachineOperand &Index) {
- return Base.isReg() && isInefficientLEAReg(Base.getReg()) &&
- isRegOperand(Index);
+ return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
+ Index.getReg() != X86::NoRegister;
}
static inline bool hasLEAOffset(const MachineOperand &Offset) {
@@ -372,9 +364,9 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
!TII->isSafeToClobberEFLAGS(MBB, I))
return false;
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned BaseReg = Base.getReg();
- unsigned IndexReg = Index.getReg();
+ Register DestReg = MI.getOperand(0).getReg();
+ Register BaseReg = Base.getReg();
+ Register IndexReg = Index.getReg();
// Don't change stack adjustment LEAs.
if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
@@ -500,9 +492,9 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
if (Segment.getReg() != 0 || !Offset.isImm() ||
!TII->isSafeToClobberEFLAGS(MBB, I))
return;
- const unsigned DstR = Dst.getReg();
- const unsigned SrcR1 = Base.getReg();
- const unsigned SrcR2 = Index.getReg();
+ const Register DstR = Dst.getReg();
+ const Register SrcR1 = Base.getReg();
+ const Register SrcR2 = Index.getReg();
if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
return;
if (Scale.getImm() > 1)
@@ -534,111 +526,150 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
}
}
-MachineInstr *
-FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
- MachineBasicBlock &MBB) {
+void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
+ MachineBasicBlock &MBB,
+ bool OptIncDec) {
+ MachineInstr &MI = *I;
const unsigned LEAOpcode = MI.getOpcode();
- const MachineOperand &Dst = MI.getOperand(0);
+ const MachineOperand &Dest = MI.getOperand(0);
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
- if (!(TII->isThreeOperandsLEA(MI) ||
- hasInefficientLEABaseReg(Base, Index)) ||
+ if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
!TII->isSafeToClobberEFLAGS(MBB, MI) ||
Segment.getReg() != X86::NoRegister)
- return nullptr;
+ return;
+
+ Register DestReg = Dest.getReg();
+ Register BaseReg = Base.getReg();
+ Register IndexReg = Index.getReg();
+
+ if (MI.getOpcode() == X86::LEA64_32r) {
+ if (BaseReg != 0)
+ BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
+ if (IndexReg != 0)
+ IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
+ }
- unsigned DstR = Dst.getReg();
- unsigned BaseR = Base.getReg();
- unsigned IndexR = Index.getReg();
- unsigned SSDstR =
- (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR;
bool IsScale1 = Scale.getImm() == 1;
- bool IsInefficientBase = isInefficientLEAReg(BaseR);
- bool IsInefficientIndex = isInefficientLEAReg(IndexR);
+ bool IsInefficientBase = isInefficientLEAReg(BaseReg);
+ bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
// Skip these cases since it takes more than 2 instructions
// to replace the LEA instruction.
- if (IsInefficientBase && SSDstR == BaseR && !IsScale1)
- return nullptr;
- if (LEAOpcode == X86::LEA64_32r && IsInefficientBase &&
- (IsInefficientIndex || !IsScale1))
- return nullptr;
-
- const DebugLoc DL = MI.getDebugLoc();
- const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode));
- const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset));
+ if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
+ return;
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
+ MachineInstr *NewMI = nullptr;
+
// First try to replace LEA with one or two (for the 3-op LEA case)
// add instructions:
// 1.lea (%base,%index,1), %base => add %index,%base
// 2.lea (%base,%index,1), %index => add %base,%index
- if (IsScale1 && (DstR == BaseR || DstR == IndexR)) {
- const MachineOperand &Src = DstR == BaseR ? Index : Base;
- MachineInstr *NewMI =
- BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
- LLVM_DEBUG(NewMI->dump(););
- // Create ADD instruction for the Offset in case of 3-Ops LEA.
- if (hasLEAOffset(Offset)) {
- NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
- LLVM_DEBUG(NewMI->dump(););
+ if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
+ unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
+ if (DestReg != BaseReg)
+ std::swap(BaseReg, IndexReg);
+
+ if (MI.getOpcode() == X86::LEA64_32r) {
+ // TODO: Do we need the super register implicit use?
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+ .addReg(BaseReg)
+ .addReg(IndexReg)
+ .addReg(Base.getReg(), RegState::Implicit)
+ .addReg(Index.getReg(), RegState::Implicit);
+ } else {
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+ .addReg(BaseReg)
+ .addReg(IndexReg);
}
- return NewMI;
- }
- // If the base is inefficient try switching the index and base operands,
- // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
- // lea offset(%base,%index,scale),%dst =>
- // lea (%base,%index,scale); add offset,%dst
- if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
- MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode))
- .add(Dst)
- .add(IsInefficientBase ? Index : Base)
- .add(Scale)
- .add(IsInefficientBase ? Base : Index)
- .addImm(0)
- .add(Segment);
+ } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
+ // If the base is inefficient try switching the index and base operands,
+ // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
+ // lea offset(%base,%index,scale),%dst =>
+ // lea (%base,%index,scale); add offset,%dst
+ NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
+ .add(Dest)
+ .add(IsInefficientBase ? Index : Base)
+ .add(Scale)
+ .add(IsInefficientBase ? Base : Index)
+ .addImm(0)
+ .add(Segment);
LLVM_DEBUG(NewMI->dump(););
+ }
+
+ // If either replacement succeeded above, add the offset if needed, then
+ // replace the instruction.
+ if (NewMI) {
// Create ADD instruction for the Offset in case of 3-Ops LEA.
if (hasLEAOffset(Offset)) {
- NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
- LLVM_DEBUG(NewMI->dump(););
+ if (OptIncDec && Offset.isImm() &&
+ (Offset.getImm() == 1 || Offset.getImm() == -1)) {
+ unsigned NewOpc =
+ getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+ .addReg(DestReg);
+ LLVM_DEBUG(NewMI->dump(););
+ } else {
+ unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+ .addReg(DestReg)
+ .add(Offset);
+ LLVM_DEBUG(NewMI->dump(););
+ }
}
- return NewMI;
+
+ MBB.erase(I);
+ I = NewMI;
+ return;
}
+
// Handle the rest of the cases with inefficient base register:
- assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!");
+ assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
assert(IsInefficientBase && "efficient base should be handled already!");
+ // FIXME: Handle LEA64_32r.
+ if (LEAOpcode == X86::LEA64_32r)
+ return;
+
// lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
if (IsScale1 && !hasLEAOffset(Offset)) {
- bool BIK = Base.isKill() && BaseR != IndexR;
- TII->copyPhysReg(MBB, MI, DL, DstR, BaseR, BIK);
+ bool BIK = Base.isKill() && BaseReg != IndexReg;
+ TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
LLVM_DEBUG(MI.getPrevNode()->dump(););
- MachineInstr *NewMI =
- BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
+ unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
+ NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+ .addReg(DestReg)
+ .add(Index);
LLVM_DEBUG(NewMI->dump(););
- return NewMI;
+ return;
}
+
// lea offset(%base,%index,scale), %dst =>
// lea offset( ,%index,scale), %dst; add %base,%dst
- MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode))
- .add(Dst)
- .addReg(0)
- .add(Scale)
- .add(Index)
- .add(Offset)
- .add(Segment);
+ NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
+ .add(Dest)
+ .addReg(0)
+ .add(Scale)
+ .add(Index)
+ .add(Offset)
+ .add(Segment);
LLVM_DEBUG(NewMI->dump(););
- NewMI = BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
+ unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
+ NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+ .addReg(DestReg)
+ .add(Base);
LLVM_DEBUG(NewMI->dump(););
- return NewMI;
+
+ MBB.erase(I);
+ I = NewMI;
}
diff --git a/lib/Target/X86/X86FixupSetCC.cpp b/lib/Target/X86/X86FixupSetCC.cpp
index e2d4d1ede6f3..cbde280aa280 100644
--- a/lib/Target/X86/X86FixupSetCC.cpp
+++ b/lib/Target/X86/X86FixupSetCC.cpp
@@ -136,8 +136,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit()
? &X86::GR32RegClass
: &X86::GR32_ABCDRegClass;
- unsigned ZeroReg = MRI->createVirtualRegister(RC);
- unsigned InsertReg = MRI->createVirtualRegister(RC);
+ Register ZeroReg = MRI->createVirtualRegister(RC);
+ Register InsertReg = MRI->createVirtualRegister(RC);
// Initialize a register with 0. This must go before the eflags def
BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp
index 5ce3255ea96a..cfba06fb6533 100644
--- a/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -721,8 +721,9 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
for (MachineInstr &MI :
llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
X86::CondCode Cond = X86::getCondFromSETCC(MI);
- if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() &&
- TRI->isVirtualRegister(MI.getOperand(0).getReg())) {
+ if (Cond != X86::COND_INVALID && !MI.mayStore() &&
+ MI.getOperand(0).isReg() &&
+ Register::isVirtualRegister(MI.getOperand(0).getReg())) {
assert(MI.getOperand(0).isDef() &&
"A non-storing SETcc should always define a register!");
CondRegs[Cond] = MI.getOperand(0).getReg();
@@ -739,7 +740,7 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
DebugLoc TestLoc, X86::CondCode Cond) {
- unsigned Reg = MRI->createVirtualRegister(PromoteRC);
+ Register Reg = MRI->createVirtualRegister(PromoteRC);
auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
TII->get(X86::SETCCr), Reg).addImm(Cond);
(void)SetI;
@@ -813,7 +814,7 @@ void X86FlagsCopyLoweringPass::rewriteArithmetic(
MachineBasicBlock &MBB = *MI.getParent();
// Insert an instruction that will set the flag back to the desired value.
- unsigned TmpReg = MRI->createVirtualRegister(PromoteRC);
+ Register TmpReg = MRI->createVirtualRegister(PromoteRC);
auto AddI =
BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri))
.addDef(TmpReg, RegState::Dead)
@@ -974,7 +975,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
// Now we need to turn this into a bitmask. We do this by subtracting it from
// zero.
- unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+ Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
ZeroReg = AdjustReg(ZeroReg);
@@ -999,7 +1000,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
default:
llvm_unreachable("Invalid SETB_C* opcode!");
}
- unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
+ Register ResultReg = MRI->createVirtualRegister(&SetBRC);
BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
.addReg(ZeroReg)
.addReg(ExtCondReg);
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 074cf21d03f5..fcfb5bc91314 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -288,8 +288,8 @@ namespace {
// Check if a COPY instruction is using FP registers.
static bool isFPCopy(MachineInstr &MI) {
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
return X86::RFP80RegClass.contains(DstReg) ||
X86::RFP80RegClass.contains(SrcReg);
@@ -313,7 +313,7 @@ FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
/// For example, this returns 3 for X86::FP3.
static unsigned getFPReg(const MachineOperand &MO) {
assert(MO.isReg() && "Expected an FP register!");
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
return Reg - X86::FP0;
}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index e310fe069117..1b469a814adc 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -35,8 +35,8 @@
using namespace llvm;
X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
- unsigned StackAlignOverride)
- : TargetFrameLowering(StackGrowsDown, StackAlignOverride,
+ MaybeAlign StackAlignOverride)
+ : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
STI.is64Bit() ? -8 : -4),
STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
// Cache a bunch of frame-related predicates for this subtarget.
@@ -176,7 +176,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
MachineOperand &MO = MBBI->getOperand(i);
if (!MO.isReg() || MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
@@ -216,7 +216,7 @@ flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg != X86::EFLAGS)
continue;
@@ -995,11 +995,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
bool NeedsDwarfCFI =
!IsWin64Prologue && (MMI.hasDebugInfo() || Fn.needsUnwindTableEntry());
- unsigned FramePtr = TRI->getFrameRegister(MF);
- const unsigned MachineFramePtr =
+ Register FramePtr = TRI->getFrameRegister(MF);
+ const Register MachineFramePtr =
STI.isTarget64BitILP32()
- ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
- unsigned BasePtr = TRI->getBaseRegister();
+ ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
+ Register BasePtr = TRI->getBaseRegister();
bool HasWinCFI = false;
// Debug location must be unknown since the first debug location is used
@@ -1016,14 +1016,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
-
- // The default stack probe size is 4096 if the function has no stackprobesize
- // attribute.
- unsigned StackProbeSize = 4096;
- if (Fn.hasFnAttribute("stack-probe-size"))
- Fn.getFnAttribute("stack-probe-size")
- .getValueAsString()
- .getAsInteger(0, StackProbeSize);
+ unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
// used and an error code was pushed, since the x86-64 ABI requires a 16-byte
@@ -1081,7 +1074,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
int stackGrowth = -SlotSize;
// Find the funclet establisher parameter
- unsigned Establisher = X86::NoRegister;
+ Register Establisher = X86::NoRegister;
if (IsClrFunclet)
Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
else if (IsFunclet)
@@ -1192,7 +1185,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
(MBBI->getOpcode() == X86::PUSH32r ||
MBBI->getOpcode() == X86::PUSH64r)) {
PushedRegs = true;
- unsigned Reg = MBBI->getOperand(0).getReg();
+ Register Reg = MBBI->getOperand(0).getReg();
++MBBI;
if (!HasFP && NeedsDwarfCFI) {
@@ -1396,9 +1389,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
int FI;
if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
if (X86::FR64RegClass.contains(Reg)) {
+ int Offset;
unsigned IgnoredFrameReg;
- int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
- Offset += SEHFrameOffset;
+ if (IsWin64Prologue && IsFunclet)
+ Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
+ else
+ Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg) +
+ SEHFrameOffset;
HasWinCFI = true;
assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
@@ -1554,9 +1551,13 @@ X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
unsigned
X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
// This is the size of the pushed CSRs.
- unsigned CSSize =
- MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
+ unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ // This is the size of callee saved XMMs.
+ const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
+ unsigned XMMSize = WinEHXMMSlotInfo.size() *
+ TRI->getSpillSize(X86::VR128RegClass);
// This is the amount of stack a funclet needs to allocate.
unsigned UsedSize;
EHPersonality Personality =
@@ -1576,7 +1577,7 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlignment());
// Subtract out the size of the callee saved registers. This is how much stack
// each funclet will allocate.
- return FrameSizeMinusRBP - CSSize;
+ return FrameSizeMinusRBP + XMMSize - CSSize;
}
static bool isTailCallOpcode(unsigned Opc) {
@@ -1597,9 +1598,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
DL = MBBI->getDebugLoc();
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
const bool Is64BitILP32 = STI.isTarget64BitILP32();
- unsigned FramePtr = TRI->getFrameRegister(MF);
+ Register FramePtr = TRI->getFrameRegister(MF);
unsigned MachineFramePtr =
- Is64BitILP32 ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
+ Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool NeedsWin64CFI =
@@ -1850,6 +1851,20 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return Offset + FPDelta;
}
+int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
+ const auto it = WinEHXMMSlotInfo.find(FI);
+
+ if (it == WinEHXMMSlotInfo.end())
+ return getFrameIndexReference(MF, FI, FrameReg);
+
+ FrameReg = TRI->getStackRegister();
+ return alignTo(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second;
+}
+
int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF,
int FI, unsigned &FrameReg,
int Adjustment) const {
@@ -1948,6 +1963,8 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
unsigned CalleeSavedFrameSize = 0;
+ unsigned XMMCalleeSavedFrameSize = 0;
+ auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -1984,7 +2001,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// Since emitPrologue and emitEpilogue will handle spilling and restoring of
// the frame register, we can delete it from CSI list and not have to worry
// about avoiding it later.
- unsigned FPReg = TRI->getFrameRegister(MF);
+ Register FPReg = TRI->getFrameRegister(MF);
for (unsigned i = 0; i < CSI.size(); ++i) {
if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
CSI.erase(CSI.begin() + i);
@@ -2025,12 +2042,20 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
unsigned Size = TRI->getSpillSize(*RC);
unsigned Align = TRI->getSpillAlignment(*RC);
// ensure alignment
- SpillSlotOffset -= std::abs(SpillSlotOffset) % Align;
+ assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
+ SpillSlotOffset = -alignTo(-SpillSlotOffset, Align);
+
// spill into slot
SpillSlotOffset -= Size;
int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
CSI[i - 1].setFrameIdx(SlotIndex);
MFI.ensureMaxAlignment(Align);
+
+ // Save the start offset and size of XMM in stack frame for funclets.
+ if (X86::VR128RegClass.contains(Reg)) {
+ WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
+ XMMCalleeSavedFrameSize += Size;
+ }
}
return true;
@@ -2200,7 +2225,7 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Spill the BasePtr if it's used.
if (TRI->hasBasePointer(MF)){
- unsigned BasePtr = TRI->getBaseRegister();
+ Register BasePtr = TRI->getBaseRegister();
if (STI.isTarget64BitILP32())
BasePtr = getX86SubSuperRegister(BasePtr, 64);
SavedRegs.set(BasePtr);
@@ -2212,7 +2237,7 @@ HasNestArgument(const MachineFunction *MF) {
const Function &F = MF->getFunction();
for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
I != E; I++) {
- if (I->hasNestAttr())
+ if (I->hasNestAttr() && !I->use_empty())
return true;
}
return false;
@@ -2244,7 +2269,8 @@ GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Pr
bool IsNested = HasNestArgument(&MF);
if (CallingConvention == CallingConv::X86_FastCall ||
- CallingConvention == CallingConv::Fast) {
+ CallingConvention == CallingConv::Fast ||
+ CallingConvention == CallingConv::Tail) {
if (IsNested)
report_fatal_error("Segmented stacks does not support fastcall with "
"nested function.");
@@ -2525,6 +2551,18 @@ static unsigned getHiPELiteral(
+ " required but not provided");
}
+// Return true if there are no non-ehpad successors to MBB and there are no
+// non-meta instructions between MBBI and MBB.end().
+static bool blockEndIsUnreachable(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator MBBI) {
+ return std::all_of(
+ MBB.succ_begin(), MBB.succ_end(),
+ [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
+ std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
+ return MI.isMetaInstruction();
+ });
+}
+
/// Erlang programs may need a special prologue to handle the stack size they
/// might need at runtime. That is because Erlang/OTP does not implement a C
/// stack but uses a custom implementation of hybrid stack/heap architecture.
@@ -2758,7 +2796,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
unsigned Opcode = I->getOpcode();
bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
DebugLoc DL = I->getDebugLoc();
- uint64_t Amount = !reserveCallFrame ? TII.getFrameSize(*I) : 0;
+ uint64_t Amount = TII.getFrameSize(*I);
uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
I = MBB.erase(I);
auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
@@ -2847,7 +2885,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
return I;
}
- if (isDestroy && InternalAmt) {
+ if (isDestroy && InternalAmt && !blockEndIsUnreachable(MBB, I)) {
// If we are performing frame pointer elimination and if the callee pops
// something off the stack pointer, add it back. We do this until we have
// more advanced stack pointer tracking ability.
@@ -2912,8 +2950,8 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
"restoring EBP/ESI on non-32-bit target");
MachineFunction &MF = *MBB.getParent();
- unsigned FramePtr = TRI->getFrameRegister(MF);
- unsigned BasePtr = TRI->getBaseRegister();
+ Register FramePtr = TRI->getFrameRegister(MF);
+ Register BasePtr = TRI->getBaseRegister();
WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
MachineFrameInfo &MFI = MF.getFrameInfo();
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index d32746e3a36e..2103d6471ead 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -25,7 +25,7 @@ class X86RegisterInfo;
class X86FrameLowering : public TargetFrameLowering {
public:
- X86FrameLowering(const X86Subtarget &STI, unsigned StackAlignOverride);
+ X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride);
// Cached subtarget predicates.
@@ -99,6 +99,8 @@ public:
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
+ int getWin64EHFrameIndexRef(const MachineFunction &MF,
+ int FI, unsigned &SPReg) const;
int getFrameIndexReferenceSP(const MachineFunction &MF,
int FI, unsigned &SPReg, int Adjustment) const;
int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 95d31e62cafc..5b546d42d98a 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -253,6 +253,11 @@ namespace {
return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
}
+ bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+
/// Implement addressing mode selection for inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
@@ -362,6 +367,11 @@ namespace {
if (User->getNumOperands() != 2)
continue;
+ // If this can match to INC/DEC, don't count it as a use.
+ if (User->getOpcode() == ISD::ADD &&
+ (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0))))
+ continue;
+
// Immediates that are used for offsets as part of stack
// manipulation should be left alone. These are typically
// used to indicate SP offsets for argument passing and
@@ -502,8 +512,10 @@ namespace {
bool shrinkAndImmediate(SDNode *N);
bool isMaskZeroExtended(SDNode *N) const;
bool tryShiftAmountMod(SDNode *N);
+ bool combineIncDecVector(SDNode *Node);
bool tryShrinkShlLogicImm(SDNode *N);
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
+ bool tryMatchBitSelect(SDNode *N);
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node);
@@ -746,7 +758,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
return false;
LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
if (!LD ||
- LD->isVolatile() ||
+ !LD->isSimple() ||
LD->getAddressingMode() != ISD::UNINDEXED ||
LD->getExtensionType() != ISD::NON_EXTLOAD)
return false;
@@ -873,10 +885,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
case ISD::FRINT: Imm = 0x4; break;
}
SDLoc dl(N);
- SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
- N->getValueType(0),
- N->getOperand(0),
- CurDAG->getConstant(Imm, dl, MVT::i8));
+ SDValue Res = CurDAG->getNode(
+ X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0),
+ CurDAG->getTargetConstant(Imm, dl, MVT::i8));
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
@@ -2305,10 +2316,10 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
return false;
// We can allow a full vector load here since narrowing a load is ok unless
- // it's volatile.
+ // it's volatile or atomic.
if (ISD::isNON_EXTLoad(N.getNode())) {
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (!LD->isVolatile() &&
+ if (LD->isSimple() &&
IsProfitableToFold(N, LD, Root) &&
IsLegalToFold(N, Parent, Root, OptLevel)) {
PatternNodeWithChain = N;
@@ -2464,6 +2475,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
Complexity += 2;
}
+ // Heuristic: try harder to form an LEA from ADD if the operands set flags.
+ // Unlike ADD, LEA does not affect flags, so we will be less likely to require
+ // duplicating flag-producing instructions later in the pipeline.
+ if (N.getOpcode() == ISD::ADD) {
+ auto isMathWithFlags = [](SDValue V) {
+ switch (V.getOpcode()) {
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::ADC:
+ case X86ISD::SBB:
+ /* TODO: These opcodes can be added safely, but we may want to justify
+ their inclusion for different reasons (better for reg-alloc).
+ case X86ISD::SMUL:
+ case X86ISD::UMUL:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ */
+ // Value 1 is the flag output of the node - verify it's not dead.
+ return !SDValue(V.getNode(), 1).use_empty();
+ default:
+ return false;
+ }
+ };
+ // TODO: This could be an 'or' rather than 'and' to make the transform more
+ // likely to happen. We might want to factor in whether there's a
+ // load folding opportunity for the math op that disappears with LEA.
+ if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
+ Complexity++;
+ }
+
if (AM.Disp)
Complexity++;
@@ -2544,6 +2586,7 @@ bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
+ assert(Root && P && "Unknown root/parent nodes");
if (!ISD::isNON_EXTLoad(N.getNode()) ||
!IsProfitableToFold(N, P, Root) ||
!IsLegalToFold(N, P, Root, OptLevel))
@@ -2553,6 +2596,20 @@ bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
+bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
+ assert(Root && P && "Unknown root/parent nodes");
+ if (N->getOpcode() != X86ISD::VBROADCAST_LOAD ||
+ !IsProfitableToFold(N, P, Root) ||
+ !IsLegalToFold(N, P, Root, OptLevel))
+ return false;
+
+ return selectAddr(N.getNode(),
+ N.getOperand(1), Base, Scale, Index, Disp, Segment);
+}
+
/// Return an SDNode that returns the value of the global base register.
/// Output instructions required to initialize the global base register,
/// if necessary.
@@ -3302,8 +3359,12 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
SDValue ImplDef = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
- NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
- NBits);
+
+ SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
+ NBits = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
+ NBits, SRIdxVal), 0);
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
if (Subtarget->hasBMI2()) {
@@ -3400,8 +3461,9 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
// TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
// hoisting the move immediate would make it worthwhile with a less optimal
// BEXTR?
- if (!Subtarget->hasTBM() &&
- !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
+ bool PreferBEXTR =
+ Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
+ if (!PreferBEXTR && !Subtarget->hasBMI2())
return nullptr;
// Must have a shift right.
@@ -3440,23 +3502,50 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
if (Shift + MaskSize > NVT.getSizeInBits())
return nullptr;
- SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
- unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
- unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
+ // BZHI, if available, is always fast, unlike BEXTR. But even if we decide
+ // that we can't use BEXTR, it is only worthwhile using BZHI if the mask
+ // does not fit into 32 bits. Load folding is not a sufficient reason.
+ if (!PreferBEXTR && MaskSize <= 32)
+ return nullptr;
- // BMI requires the immediate to placed in a register.
- if (!Subtarget->hasTBM()) {
- ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
- MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
+ SDValue Control;
+ unsigned ROpc, MOpc;
+
+ if (!PreferBEXTR) {
+ assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
+ // If we can't make use of BEXTR then we can't fuse shift+mask stages.
+ // Let's perform the mask first, and apply shift later. Note that we need to
+ // widen the mask to account for the fact that we'll apply shift afterwards!
+ Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
+ ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
+ MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
- New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
+ Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
+ } else {
+ // The 'control' of BEXTR has the pattern of:
+ // [15...8 bit][ 7...0 bit] location
+ // [ bit count][ shift] name
+ // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
+ Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
+ if (Subtarget->hasTBM()) {
+ ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
+ MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
+ } else {
+ assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
+ // BMI requires the immediate to placed in a register.
+ ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
+ MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
+ unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
+ Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
+ }
}
MachineSDNode *NewNode;
SDValue Input = N0->getOperand(0);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
+ SDValue Ops[] = {
+ Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)};
SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
// Update the chain.
@@ -3464,7 +3553,15 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
// Record the mem-refs
CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
} else {
- NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New);
+ NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control);
+ }
+
+ if (!PreferBEXTR) {
+ // We still need to apply the shift.
+ SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
+ unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri;
+ NewNode =
+ CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt);
}
return NewNode;
@@ -3735,6 +3832,52 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
return true;
}
+/// Convert vector increment or decrement to sub/add with an all-ones constant:
+/// add X, <1, 1...> --> sub X, <-1, -1...>
+/// sub X, <1, 1...> --> add X, <-1, -1...>
+/// The all-ones vector constant can be materialized using a pcmpeq instruction
+/// that is commonly recognized as an idiom (has no register dependency), so
+/// that's better/smaller than loading a splat 1 constant.
+bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) {
+ assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) &&
+ "Unexpected opcode for increment/decrement transform");
+
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Should only be called for vectors.");
+
+ SDValue X = Node->getOperand(0);
+ SDValue OneVec = Node->getOperand(1);
+
+ APInt SplatVal;
+ if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue())
+ return false;
+
+ SDLoc DL(Node);
+ SDValue OneConstant, AllOnesVec;
+
+ APInt Ones = APInt::getAllOnesValue(32);
+ assert(VT.getSizeInBits() % 32 == 0 &&
+ "Expected bit count to be a multiple of 32");
+ OneConstant = CurDAG->getConstant(Ones, DL, MVT::i32);
+ insertDAGNode(*CurDAG, X, OneConstant);
+
+ unsigned NumElts = VT.getSizeInBits() / 32;
+ assert(NumElts > 0 && "Expected to get non-empty vector.");
+ AllOnesVec = CurDAG->getSplatBuildVector(MVT::getVectorVT(MVT::i32, NumElts),
+ DL, OneConstant);
+ insertDAGNode(*CurDAG, X, AllOnesVec);
+
+ AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec);
+ insertDAGNode(*CurDAG, X, AllOnesVec);
+
+ unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
+ SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec);
+
+ ReplaceNode(Node, NewNode.getNode());
+ SelectCode(NewNode.getNode());
+ return true;
+}
+
/// If the high bits of an 'and' operand are known zero, try setting the
/// high bits of an 'and' constant operand to produce a smaller encoding by
/// creating a small, sign-extended negative immediate rather than a large
@@ -3975,12 +4118,18 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
if (CC != ISD::SETEQ && CC != ISD::SETNE)
return false;
- // See if we're comparing against zero. This should have been canonicalized
- // to RHS during lowering.
- if (!ISD::isBuildVectorAllZeros(Setcc.getOperand(1).getNode()))
+ SDValue SetccOp0 = Setcc.getOperand(0);
+ SDValue SetccOp1 = Setcc.getOperand(1);
+
+ // Canonicalize the all zero vector to the RHS.
+ if (ISD::isBuildVectorAllZeros(SetccOp0.getNode()))
+ std::swap(SetccOp0, SetccOp1);
+
+ // See if we're comparing against zero.
+ if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode()))
return false;
- SDValue N0 = Setcc.getOperand(0);
+ SDValue N0 = SetccOp0;
MVT CmpVT = N0.getSimpleValueType();
MVT CmpSVT = CmpVT.getVectorElementType();
@@ -4027,13 +4176,14 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
// Look through single use bitcasts.
- if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse())
- Src = Src.getOperand(0);
-
- if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) {
+ if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse()) {
Parent = Src.getNode();
Src = Src.getOperand(0);
- if (Src.getSimpleValueType() == CmpSVT)
+ }
+
+ if (Src.getOpcode() == X86ISD::VBROADCAST_LOAD && Src.hasOneUse()) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(Src);
+ if (MemIntr->getMemoryVT().getSizeInBits() == CmpSVT.getSizeInBits())
return Src;
}
@@ -4045,17 +4195,18 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
bool FoldedBCast = false;
if (!FoldedLoad && CanFoldLoads &&
(CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
- SDNode *ParentNode = nullptr;
+ SDNode *ParentNode = N0.getNode();
if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
- FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
- Tmp1, Tmp2, Tmp3, Tmp4);
+ FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0,
+ Tmp1, Tmp2, Tmp3, Tmp4);
}
// Try the other operand.
if (!FoldedBCast) {
+ SDNode *ParentNode = N0.getNode();
if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
- FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
- Tmp1, Tmp2, Tmp3, Tmp4);
+ FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0,
+ Tmp1, Tmp2, Tmp3, Tmp4);
if (FoldedBCast)
std::swap(Src0, Src1);
}
@@ -4125,7 +4276,7 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
// Update the chain.
ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
// Record the mem-refs
- CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
+ CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Load)->getMemOperand()});
} else {
if (IsMasked)
CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
@@ -4146,6 +4297,55 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
return true;
}
+// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
+// into vpternlog.
+bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR && "Unexpected opcode!");
+
+ MVT NVT = N->getSimpleValueType(0);
+
+ // Make sure we support VPTERNLOG.
+ if (!NVT.isVector() || !Subtarget->hasAVX512())
+ return false;
+
+ // We need VLX for 128/256-bit.
+ if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
+ return false;
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Canonicalize AND to LHS.
+ if (N1.getOpcode() == ISD::AND)
+ std::swap(N0, N1);
+
+ if (N0.getOpcode() != ISD::AND ||
+ N1.getOpcode() != X86ISD::ANDNP ||
+ !N0.hasOneUse() || !N1.hasOneUse())
+ return false;
+
+ // ANDN is not commutable, use it to pick down A and C.
+ SDValue A = N1.getOperand(0);
+ SDValue C = N1.getOperand(1);
+
+ // AND is commutable, if one operand matches A, the other operand is B.
+ // Otherwise this isn't a match.
+ SDValue B;
+ if (N0.getOperand(0) == A)
+ B = N0.getOperand(1);
+ else if (N0.getOperand(1) == A)
+ B = N0.getOperand(0);
+ else
+ return false;
+
+ SDLoc dl(N);
+ SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
+ SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
+ ReplaceNode(N, Ternlog.getNode());
+ SelectCode(Ternlog.getNode());
+ return true;
+}
+
void X86DAGToDAGISel::Select(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
unsigned Opcode = Node->getOpcode();
@@ -4170,6 +4370,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
unsigned Opc = 0;
switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::x86_sse3_monitor:
if (!Subtarget->hasSSE3())
break;
@@ -4303,9 +4504,16 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
if (tryShrinkShlLogicImm(Node))
return;
+ if (Opcode == ISD::OR && tryMatchBitSelect(Node))
+ return;
+
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB: {
+ if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() &&
+ combineIncDecVector(Node))
+ return;
+
// Try to avoid folding immediates with multiple uses for optsize.
// This code tries to select to register form directly to avoid going
// through the isel table which might fold the immediate. We can't change
@@ -4333,6 +4541,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
if (!isInt<8>(Val) && !isInt<32>(Val))
break;
+ // If this can match to INC/DEC, let it go.
+ if (Opcode == ISD::ADD && (Val == 1 || Val == -1))
+ break;
+
// Check if we should avoid folding this immediate.
if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
break;
@@ -4610,7 +4822,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
LoReg = X86::AL; ClrReg = HiReg = X86::AH;
- SExtOpcode = X86::CBW;
+ SExtOpcode = 0; // Not used.
break;
case MVT::i16:
LoReg = X86::AX; HiReg = X86::DX;
@@ -4632,24 +4844,27 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
bool signBitIsZero = CurDAG->SignBitIsZero(N0);
SDValue InFlag;
- if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
+ if (NVT == MVT::i8) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
MachineSDNode *Move;
if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
- Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
- MVT::Other, Ops);
+ unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8
+ : X86::MOVZX16rm8;
+ Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
Chain = SDValue(Move, 1);
ReplaceUses(N0.getValue(1), Chain);
// Record the mem-refs
CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
} else {
- Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
+ unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8
+ : X86::MOVZX16rr8;
+ Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
Chain = CurDAG->getEntryNode();
}
- Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
SDValue());
InFlag = Chain.getValue(1);
} else {
@@ -4996,10 +5211,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case ISD::FRINT: Imm = 0x4; break;
}
SDLoc dl(Node);
- SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
- Node->getValueType(0),
+ SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, Node->getValueType(0),
Node->getOperand(0),
- CurDAG->getConstant(Imm, dl, MVT::i8));
+ CurDAG->getTargetConstant(Imm, dl, MVT::i8));
ReplaceNode(Node, Res.getNode());
SelectCode(Res.getNode());
return;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0b4bf687e6cf..ed975e9248a8 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -65,17 +65,19 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
-static cl::opt<bool> ExperimentalVectorWideningLegalization(
- "x86-experimental-vector-widening-legalization", cl::init(false),
- cl::desc("Enable an experimental vector type legalization through widening "
- "rather than promotion."),
- cl::Hidden);
-
static cl::opt<int> ExperimentalPrefLoopAlignment(
"x86-experimental-pref-loop-alignment", cl::init(4),
- cl::desc("Sets the preferable loop alignment for experiments "
- "(the last x86-experimental-pref-loop-alignment bits"
- " of the loop header PC will be 0)."),
+ cl::desc(
+ "Sets the preferable loop alignment for experiments (as log2 bytes)"
+ "(the last x86-experimental-pref-loop-alignment bits"
+ " of the loop header PC will be 0)."),
+ cl::Hidden);
+
+// Added in 10.0.
+static cl::opt<bool> EnableOldKNLABI(
+ "x86-enable-old-knl-abi", cl::init(false),
+ cl::desc("Enables passing v32i16 and v64i8 in 2 YMM registers instead of "
+ "one ZMM register on AVX512F, but not AVX512BW targets."),
cl::Hidden);
static cl::opt<bool> MulConstantOptimization(
@@ -84,6 +86,13 @@ static cl::opt<bool> MulConstantOptimization(
"SHIFT, LEA, etc."),
cl::Hidden);
+static cl::opt<bool> ExperimentalUnorderedISEL(
+ "x86-experimental-unordered-atomic-isel", cl::init(false),
+ cl::desc("Use LoadSDNode and StoreSDNode instead of "
+ "AtomicSDNode for unordered atomic loads and "
+ "stores respectively."),
+ cl::Hidden);
+
/// Call this when the user attempts to do something unsupported, like
/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
/// report_fatal_error, so calling code should attempt to recover without
@@ -196,7 +205,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Integer absolute.
if (Subtarget.hasCMov()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom);
- setOperationAction(ISD::ABS , MVT::i32 , Custom);
+ setOperationAction(ISD::ABS , MVT::i32 , Custom);
}
setOperationAction(ISD::ABS , MVT::i64 , Custom);
@@ -214,14 +223,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
- if (Subtarget.is64Bit()) {
- if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
- // f32/f64 are legal, f80 is custom.
- setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
- else
- setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
- setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
- } else if (!Subtarget.useSoftFloat()) {
+ if (!Subtarget.useSoftFloat()) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
@@ -277,29 +279,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
- if (Subtarget.is64Bit()) {
- if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
- // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
- setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
- setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
- } else {
- setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
- setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
- }
- } else if (!Subtarget.useSoftFloat()) {
- // Since AVX is a superset of SSE3, only check for SSE here.
- if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
- // Expand FP_TO_UINT into a select.
- // FIXME: We would like to use a Custom expander here eventually to do
- // the optimal thing for SSE vs. the default expansion in the legalizer.
- setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
- else
- // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
- // With SSE3 we can use fisttpll to convert to a signed i64; without
- // SSE, we're stuck with a fistpll.
- setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
-
- setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
+ if (!Subtarget.useSoftFloat()) {
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
@@ -345,11 +327,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
- setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
+ setOperationAction(ISD::FREM , MVT::f128 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
// Promote the i8 variants and force them on up to i32 which has a shorter
@@ -396,15 +378,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// There's never any support for operations beyond MVT::f32.
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f80, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f16, Expand);
if (Subtarget.hasPOPCNT()) {
setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
@@ -638,17 +624,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
- // Long double always uses X87, except f128 in MMX.
+ // f80 always uses X87.
if (UseX87) {
- if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
- addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
- ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
- setOperationAction(ISD::FABS , MVT::f128, Custom);
- setOperationAction(ISD::FNEG , MVT::f128, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
- }
-
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -684,10 +661,60 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::f80, Expand);
}
+ // f128 uses xmm registers, but most operations require libcalls.
+ if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
+ addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
+ : &X86::VR128RegClass);
+
+ addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
+
+ setOperationAction(ISD::FADD, MVT::f128, Custom);
+ setOperationAction(ISD::FSUB, MVT::f128, Custom);
+ setOperationAction(ISD::FDIV, MVT::f128, Custom);
+ setOperationAction(ISD::FMUL, MVT::f128, Custom);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+
+ setOperationAction(ISD::FABS, MVT::f128, Custom);
+ setOperationAction(ISD::FNEG, MVT::f128, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
+
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f128, Expand);
+
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ // We need to custom handle any FP_ROUND with an f128 input, but
+ // LegalizeDAG uses the result type to know when to run a custom handler.
+ // So we have to list all legal floating point result types here.
+ if (isTypeLegal(MVT::f32)) {
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
+ }
+ if (isTypeLegal(MVT::f64)) {
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
+ }
+ if (isTypeLegal(MVT::f80)) {
+ setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
+ }
+
+ setOperationAction(ISD::SETCC, MVT::f128, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f80, Expand);
+ }
+
// Always use a library call for pow.
setOperationAction(ISD::FPOW , MVT::f32 , Expand);
setOperationAction(ISD::FPOW , MVT::f64 , Expand);
setOperationAction(ISD::FPOW , MVT::f80 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f128 , Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
@@ -716,7 +743,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// First set operation action for all vector types to either promote
// (for widening) or expand (for scalarization). Then we will selectively
// turn on ones that can be effectively codegen'd.
- for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
@@ -754,7 +781,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- for (MVT InnerVT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(InnerVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
@@ -797,6 +824,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -823,10 +852,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
setOperationAction(ISD::MUL, MVT::v2i8, Custom);
- setOperationAction(ISD::MUL, MVT::v2i16, Custom);
- setOperationAction(ISD::MUL, MVT::v2i32, Custom);
setOperationAction(ISD::MUL, MVT::v4i8, Custom);
- setOperationAction(ISD::MUL, MVT::v4i16, Custom);
setOperationAction(ISD::MUL, MVT::v8i8, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -863,28 +889,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
- if (!ExperimentalVectorWideningLegalization) {
- // Use widening instead of promotion.
- for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
- MVT::v4i16, MVT::v2i16 }) {
- setOperationAction(ISD::UADDSAT, VT, Custom);
- setOperationAction(ISD::SADDSAT, VT, Custom);
- setOperationAction(ISD::USUBSAT, VT, Custom);
- setOperationAction(ISD::SSUBSAT, VT, Custom);
- }
- }
-
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
- // Provide custom widening for v2f32 setcc. This is really for VLX when
- // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
- // type legalization changing the result type to v4i1 during widening.
- // It works fine for SSE2 and is probably faster so no need to qualify with
- // VLX support.
- setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
-
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
@@ -904,19 +912,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
- // We support custom legalizing of sext and anyext loads for specific
- // memory vector types which we can load as a scalar (or sequence of
- // scalars) and extend in-register to a legal 128-bit vector type. For sext
- // loads these must work with a single scalar load.
- for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
- }
-
for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
@@ -938,7 +933,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
// Custom legalize these to avoid over promotion or custom promotion.
setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
@@ -991,18 +985,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
- if (ExperimentalVectorWideningLegalization) {
- setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
- } else {
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
- }
+ setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
// In the customized shift lowering, the legal v4i32/v2i64 cases
// in AVX2 will be recognized.
@@ -1069,22 +1059,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
}
- if (!ExperimentalVectorWideningLegalization) {
- // Avoid narrow result types when widening. The legal types are listed
- // in the next loop.
- for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
- }
- }
-
// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
- if (!ExperimentalVectorWideningLegalization)
- setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
@@ -1145,6 +1123,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Custom);
+
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
@@ -1292,10 +1272,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STORE, VT, Custom);
}
- if (HasInt256)
- setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
-
if (HasInt256) {
+ setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
+
// Custom legalize 2x32 to get a little better code.
setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
@@ -1407,6 +1386,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Custom);
+
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
@@ -1433,12 +1414,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- if (ExperimentalVectorWideningLegalization) {
- // Need to custom widen this if we don't have AVX512BW.
- setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
- }
+ // Need to custom widen this if we don't have AVX512BW.
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
@@ -1529,10 +1508,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
- // Need to custom split v32i16/v64i8 bitcasts.
if (!Subtarget.hasBWI()) {
+ // Need to custom split v32i16/v64i8 bitcasts.
setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
+
+ // Better to split these into two 256-bit ops.
+ setOperationAction(ISD::BITREVERSE, MVT::v8i64, Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::v16i32, Custom);
}
if (Subtarget.hasVBMI2()) {
@@ -1777,6 +1760,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSHR, VT, Custom);
}
}
+
+ setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
}
// We want to custom lower some of our intrinsics.
@@ -1905,13 +1892,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
MaxLoadsPerMemcmpOptSize = 2;
// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
- setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
+ setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
// An out-of-order CPU can speculatively execute past a predictable branch,
// but a conditional move could be stalled by an expensive earlier operation.
PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
EnableExtLdPromotion = true;
- setPrefFunctionAlignment(4); // 2^4 bytes.
+ setPrefFunctionAlignment(Align(16));
verifyIntrinsicTables();
}
@@ -1939,8 +1926,7 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return TypeSplitVector;
- if (ExperimentalVectorWideningLegalization &&
- VT.getVectorNumElements() != 1 &&
+ if (VT.getVectorNumElements() != 1 &&
VT.getVectorElementType() != MVT::i1)
return TypeWidenVector;
@@ -1950,19 +1936,62 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
+ // v32i1 vectors should be promoted to v32i8 to match avx2.
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return MVT::v32i8;
+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+ Subtarget.hasAVX512() &&
+ (!isPowerOf2_32(VT.getVectorNumElements()) ||
+ (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
+ (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
+ return MVT::i8;
+ // FIXME: Should we just make these types legal and custom split operations?
+ if ((VT == MVT::v32i16 || VT == MVT::v64i8) &&
+ Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI)
+ return MVT::v16i32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
+ // v32i1 vectors should be promoted to v32i8 to match avx2.
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return 1;
+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+ Subtarget.hasAVX512() &&
+ (!isPowerOf2_32(VT.getVectorNumElements()) ||
+ (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
+ (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
+ return VT.getVectorNumElements();
+ // FIXME: Should we just make these types legal and custom split operations?
+ if ((VT == MVT::v32i16 || VT == MVT::v64i8) &&
+ Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI)
+ return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
+unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+ Subtarget.hasAVX512() &&
+ (!isPowerOf2_32(VT.getVectorNumElements()) ||
+ (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
+ (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
+ RegisterVT = MVT::i8;
+ IntermediateVT = MVT::i1;
+ NumIntermediates = VT.getVectorNumElements();
+ return NumIntermediates;
+ }
+
+ return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+}
+
EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext& Context,
EVT VT) const {
@@ -2060,6 +2089,11 @@ EVT X86TargetLowering::getOptimalMemOpType(
if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) {
+ // FIXME: Check if unaligned 64-byte accesses are slow.
+ if (Size >= 64 && Subtarget.hasAVX512() &&
+ (Subtarget.getPreferVectorWidth() >= 512)) {
+ return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
+ }
// FIXME: Check if unaligned 32-byte accesses are slow.
if (Size >= 32 && Subtarget.hasAVX() &&
(Subtarget.getPreferVectorWidth() >= 256)) {
@@ -2403,8 +2437,8 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
/// Breaks v64i1 value into two registers and adds the new node to the DAG
static void Passv64i1ArgInRegs(
- const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
- SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
+ const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
+ SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, CCValAssign &VA,
CCValAssign &NextVA, const X86Subtarget &Subtarget) {
assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
assert(Subtarget.is32Bit() && "Expecting 32 bit target");
@@ -2537,7 +2571,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs");
- Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
+ Passv64i1ArgInRegs(dl, DAG, ValToCopy, RegsToPass, VA, RVLocs[++I],
Subtarget);
assert(2 == RegsToPass.size() &&
@@ -2816,6 +2850,10 @@ SDValue X86TargetLowering::LowerCallResult(
((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
+ } else if (CopyVT == MVT::f64 &&
+ (Is64Bit && !Subtarget.hasSSE2())) {
+ errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
+ VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
}
// If we prefer to use the value in xmm registers, copy it out as f80 and
@@ -2925,7 +2963,7 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
static bool canGuaranteeTCO(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
- CC == CallingConv::HHVM);
+ CC == CallingConv::HHVM || CC == CallingConv::Tail);
}
/// Return true if we might ever do TCO for calls with this calling convention.
@@ -2951,7 +2989,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
/// Return true if the function is being made into a tailcall target by
/// changing its ABI.
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
- return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
+ return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
}
bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
@@ -3405,7 +3443,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
// Find the largest legal vector type.
MVT VecVT = MVT::Other;
// FIXME: Only some x86_32 calling conventions support AVX512.
- if (Subtarget.hasAVX512() &&
+ if (Subtarget.useAVX512Regs() &&
(Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
CallConv == CallingConv::Intel_OCL_BI)))
VecVT = MVT::v16f32;
@@ -3577,6 +3615,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
bool IsSibcall = false;
+ bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
+ CallConv == CallingConv::Tail;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
@@ -3597,8 +3637,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Attr.getValueAsString() == "true")
isTailCall = false;
- if (Subtarget.isPICStyleGOT() &&
- !MF.getTarget().Options.GuaranteedTailCallOpt) {
+ if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
// If we are using a GOT, disable tail calls to external symbols with
// default visibility. Tail calling such a symbol requires using a GOT
// relocation, which forces early binding of the symbol. This breaks code
@@ -3625,7 +3664,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
- if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
+ if (!IsGuaranteeTCO && isTailCall)
IsSibcall = true;
if (isTailCall)
@@ -3657,8 +3696,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
- else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
- canGuaranteeTCO(CallConv))
+ else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
@@ -3782,8 +3820,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs");
// Split v64i1 value into two registers
- Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
- Subtarget);
+ Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
const TargetOptions &Options = DAG.getTarget().Options;
@@ -4069,6 +4106,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
+ // Save heapallocsite metadata.
+ if (CLI.CS)
+ if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite"))
+ DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
+
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPop;
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
@@ -4190,7 +4232,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(VR))
+ if (!Register::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
@@ -4279,6 +4321,8 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
bool CCMatch = CallerCC == CalleeCC;
bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
+ bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
+ CalleeCC == CallingConv::Tail;
// Win64 functions have extra shadow space for argument homing. Don't do the
// sibcall if the caller and callee have mismatched expectations for this
@@ -4286,7 +4330,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (IsCalleeWin64 != IsCallerWin64)
return false;
- if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
+ if (IsGuaranteeTCO) {
if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
return false;
@@ -4413,7 +4457,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
continue;
- unsigned Reg = VA.getLocReg();
+ Register Reg = VA.getLocReg();
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
@@ -4652,7 +4696,11 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
// X < 0 -> X == 0, jump on sign.
return X86::COND_S;
}
- if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
+ if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
+ // X >= 0 -> X == 0, jump on !sign.
+ return X86::COND_NS;
+ }
+ if (SetCCOpcode == ISD::SETLT && RHSC->getAPIntValue() == 1) {
// X < 1 -> X <= 0
RHS = DAG.getConstant(0, DL, RHS.getValueType());
return X86::COND_LE;
@@ -4760,7 +4808,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
ScalarVT = MVT::i32;
Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
- Info.align = 1;
+ Info.align = Align::None();
Info.flags |= MachineMemOperand::MOStore;
break;
}
@@ -4773,7 +4821,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
unsigned NumElts = std::min(DataVT.getVectorNumElements(),
IndexVT.getVectorNumElements());
Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
- Info.align = 1;
+ Info.align = Align::None();
Info.flags |= MachineMemOperand::MOLoad;
break;
}
@@ -4785,7 +4833,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
unsigned NumElts = std::min(DataVT.getVectorNumElements(),
IndexVT.getVectorNumElements());
Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
- Info.align = 1;
+ Info.align = Align::None();
Info.flags |= MachineMemOperand::MOStore;
break;
}
@@ -4811,6 +4859,8 @@ bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
+ assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");
+
// "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
// relocation target a movq or addq instruction: don't let the load shrink.
SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
@@ -4852,11 +4902,12 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
-bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
// If we are using XMM registers in the ABI and the condition of the select is
// a floating-point compare and we have blendv or conditional move, then it is
// cheaper to select instead of doing a cross-register move and creating a
// load that depends on the compare result.
+ bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
}
@@ -4869,15 +4920,25 @@ bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
return true;
}
-bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
+bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const {
// TODO: We handle scalars using custom code, but generic combining could make
// that unnecessary.
APInt MulC;
if (!ISD::isConstantSplatVector(C.getNode(), MulC))
return false;
+ // Find the type this will be legalized too. Otherwise we might prematurely
+ // convert this to shl+add/sub and then still have to type legalize those ops.
+ // Another choice would be to defer the decision for illegal types until
+ // after type legalization. But constant splat vectors of i64 can't make it
+ // through type legalization on 32-bit targets so we would need to special
+ // case vXi64.
+ while (getTypeAction(Context, VT) != TypeLegal)
+ VT = getTypeToTransformTo(Context, VT);
+
// If vector multiply is legal, assume that's faster than shl + add/sub.
- // TODO: Multiply is a complex op with higher latency and lower througput in
+ // TODO: Multiply is a complex op with higher latency and lower throughput in
// most implementations, so this check could be loosened based on type
// and/or a CPU attribute.
if (isOperationLegal(ISD::MUL, VT))
@@ -5022,6 +5083,33 @@ bool X86TargetLowering::hasAndNot(SDValue Y) const {
return Subtarget.hasSSE2();
}
+bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+ return X.getValueType().isScalarInteger(); // 'bt'
+}
+
+bool X86TargetLowering::
+ shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const {
+ // Does baseline recommend not to perform the fold by default?
+ if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
+ return false;
+ // For scalars this transform is always beneficial.
+ if (X.getValueType().isScalarInteger())
+ return true;
+ // If all the shift amounts are identical, then transform is beneficial even
+ // with rudimentary SSE2 shifts.
+ if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
+ return true;
+ // If we have AVX2 with it's powerful shift operations, then it's also good.
+ if (Subtarget.hasAVX2())
+ return true;
+ // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
+ return NewShiftOpcode == ISD::SHL;
+}
+
bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
const SDNode *N, CombineLevel Level) const {
assert(((N->getOpcode() == ISD::SHL &&
@@ -5054,6 +5142,14 @@ bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
return true;
}
+bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
+ SDNode *N) const {
+ if (DAG.getMachineFunction().getFunction().hasMinSize() &&
+ !Subtarget.isOSWindows())
+ return false;
+ return true;
+}
+
bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
// Any legal vector type can be splatted more efficiently than
// loading/spilling from memory.
@@ -5093,10 +5189,8 @@ static bool isUndefOrZero(int Val) {
/// Return true if every element in Mask, beginning from position Pos and ending
/// in Pos+Size is the undef sentinel value.
static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
- for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
- if (Mask[i] != SM_SentinelUndef)
- return false;
- return true;
+ return llvm::all_of(Mask.slice(Pos, Size),
+ [](int M) { return M == SM_SentinelUndef; });
}
/// Return true if the mask creates a vector whose lower half is undefined.
@@ -5119,10 +5213,7 @@ static bool isInRange(int Val, int Low, int Hi) {
/// Return true if the value of any element in Mask falls within the specified
/// range (L, H].
static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
- for (int M : Mask)
- if (isInRange(M, Low, Hi))
- return true;
- return false;
+ return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
}
/// Return true if Val is undef or if its value falls within the
@@ -5133,12 +5224,9 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) {
/// Return true if every element in Mask is undef or if its value
/// falls within the specified range (L, H].
-static bool isUndefOrInRange(ArrayRef<int> Mask,
- int Low, int Hi) {
- for (int M : Mask)
- if (!isUndefOrInRange(M, Low, Hi))
- return false;
- return true;
+static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
+ return llvm::all_of(
+ Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
}
/// Return true if Val is undef, zero or if its value falls within the
@@ -5150,10 +5238,8 @@ static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
/// Return true if every element in Mask is undef, zero or if its value
/// falls within the specified range (L, H].
static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
- for (int M : Mask)
- if (!isUndefOrZeroOrInRange(M, Low, Hi))
- return false;
- return true;
+ return llvm::all_of(
+ Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
}
/// Return true if every element in Mask, beginning
@@ -5171,8 +5257,9 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
/// from position Pos and ending in Pos+Size, falls within the specified
/// sequential range (Low, Low+Size], or is undef or is zero.
static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
- unsigned Size, int Low) {
- for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
+ unsigned Size, int Low,
+ int Step = 1) {
+ for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
return false;
return true;
@@ -5182,10 +5269,8 @@ static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
/// from position Pos and ending in Pos+Size is undef or is zero.
static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
unsigned Size) {
- for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
- if (!isUndefOrZero(Mask[i]))
- return false;
- return true;
+ return llvm::all_of(Mask.slice(Pos, Size),
+ [](int M) { return isUndefOrZero(M); });
}
/// Helper function to test whether a shuffle mask could be
@@ -5357,6 +5442,8 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
SDValue Vec;
if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
+ } else if (VT.isFloatingPoint()) {
+ Vec = DAG.getConstantFP(+0.0, dl, VT);
} else if (VT.getVectorElementType() == MVT::i1) {
assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type");
@@ -5500,6 +5587,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
Idx == (VT.getVectorNumElements() / 2) &&
Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Src.getOperand(1).getValueType() == SubVT &&
isNullConstant(Src.getOperand(2))) {
Ops.push_back(Src.getOperand(1));
Ops.push_back(Sub);
@@ -5593,7 +5681,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
// May need to promote to a legal type.
Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- getZeroVector(WideOpVT, Subtarget, DAG, dl),
+ DAG.getConstant(0, dl, WideOpVT),
SubVec, Idx);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
@@ -5609,14 +5697,14 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (IdxVal == 0) {
// Zero lower bits of the Vec
- SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
+ SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
ZeroIdx);
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
// Merge them together, SubVec should be zero extended.
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- getZeroVector(WideOpVT, Subtarget, DAG, dl),
+ DAG.getConstant(0, dl, WideOpVT),
SubVec, ZeroIdx);
Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
@@ -5628,7 +5716,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (Vec.isUndef()) {
assert(IdxVal != 0 && "Unexpected index");
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
+ DAG.getTargetConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
}
@@ -5638,30 +5726,30 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
unsigned ShiftLeft = NumElems - SubVecNumElems;
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
- DAG.getConstant(ShiftLeft, dl, MVT::i8));
+ DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
if (ShiftRight != 0)
SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
- DAG.getConstant(ShiftRight, dl, MVT::i8));
+ DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
}
// Simple case when we put subvector in the upper part
if (IdxVal + SubVecNumElems == NumElems) {
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
+ DAG.getTargetConstant(IdxVal, dl, MVT::i8));
if (SubVecNumElems * 2 == NumElems) {
// Special case, use legal zero extending insert_subvector. This allows
// isel to opimitize when bits are known zero.
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- getZeroVector(WideOpVT, Subtarget, DAG, dl),
+ DAG.getConstant(0, dl, WideOpVT),
Vec, ZeroIdx);
} else {
// Otherwise use explicit shifts to zero the bits.
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
Undef, Vec, ZeroIdx);
NumElems = WideOpVT.getVectorNumElements();
- SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
+ SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8);
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
}
@@ -5675,30 +5763,47 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// Widen the vector if needed.
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- // Move the current value of the bit to be replace to the lsbs.
- Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
- // Xor with the new bit.
- Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
- // Shift to MSB, filling bottom bits with 0.
+
+ // Clear the upper bits of the subvector and move it to its insert position.
unsigned ShiftLeft = NumElems - SubVecNumElems;
- Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
- DAG.getConstant(ShiftLeft, dl, MVT::i8));
- // Shift to the final position, filling upper bits with 0.
+ SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
- Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
- DAG.getConstant(ShiftRight, dl, MVT::i8));
- // Xor with original vector leaving the new value.
- Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
+ SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
+ DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
+
+ // Isolate the bits below the insertion point.
+ unsigned LowShift = NumElems - IdxVal;
+ SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec,
+ DAG.getTargetConstant(LowShift, dl, MVT::i8));
+ Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low,
+ DAG.getTargetConstant(LowShift, dl, MVT::i8));
+
+ // Isolate the bits after the last inserted bit.
+ unsigned HighShift = IdxVal + SubVecNumElems;
+ SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
+ DAG.getTargetConstant(HighShift, dl, MVT::i8));
+ High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High,
+ DAG.getTargetConstant(HighShift, dl, MVT::i8));
+
+ // Now OR all 3 pieces together.
+ Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High);
+ SubVec = DAG.getNode(ISD::OR, dl, WideOpVT, SubVec, Vec);
+
// Reduce to original width if needed.
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
}
-static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT,
- unsigned NumElems, SelectionDAG &DAG,
- const SDLoc &dl, unsigned VectorWidth) {
- SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth);
- return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth);
+static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG,
+ const SDLoc &dl) {
+ assert(V1.getValueType() == V2.getValueType() && "subvector type mismatch");
+ EVT SubVT = V1.getValueType();
+ EVT SubSVT = SubVT.getScalarType();
+ unsigned SubNumElts = SubVT.getVectorNumElements();
+ unsigned SubVectorWidth = SubVT.getSizeInBits();
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), SubSVT, 2 * SubNumElts);
+ SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth);
+ return insertSubVector(V, V2, SubNumElts, DAG, dl, SubVectorWidth);
}
/// Returns a vector of specified type with all bits set.
@@ -5755,6 +5860,34 @@ static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT,
return DAG.getNode(Opcode, DL, VT, In);
}
+// Match (xor X, -1) -> X.
+// Match extract_subvector(xor X, -1) -> extract_subvector(X).
+// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y).
+static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
+ V = peekThroughBitcasts(V);
+ if (V.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()))
+ return V.getOperand(0);
+ if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
+ if (SDValue Not = IsNOT(V.getOperand(0), DAG)) {
+ Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(),
+ Not, V.getOperand(1));
+ }
+ }
+ SmallVector<SDValue, 2> CatOps;
+ if (collectConcatOps(V.getNode(), CatOps)) {
+ for (SDValue &CatOp : CatOps) {
+ SDValue NotCat = IsNOT(CatOp, DAG);
+ if (!NotCat) return SDValue();
+ CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps);
+ }
+ return SDValue();
+}
+
/// Returns a vector_shuffle node for an unpackl operation.
static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
SDValue V1, SDValue V2) {
@@ -6003,6 +6136,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
}
}
+ if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD &&
+ EltSizeInBits <= VT.getScalarSizeInBits()) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
+ if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits())
+ return false;
+
+ SDValue Ptr = MemIntr->getBasePtr();
+ if (Ptr->getOpcode() == X86ISD::Wrapper ||
+ Ptr->getOpcode() == X86ISD::WrapperRIP)
+ Ptr = Ptr->getOperand(0);
+
+ auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
+ if (!CNode || CNode->isMachineConstantPoolEntry() ||
+ CNode->getOffset() != 0)
+ return false;
+
+ if (const Constant *C = CNode->getConstVal()) {
+ unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+
+ APInt UndefSrcElts(NumSrcElts, 0);
+ SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
+ if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) {
+ if (UndefSrcElts[0])
+ UndefSrcElts.setBits(0, NumSrcElts);
+ SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
+ return CastBitData(UndefSrcElts, SrcEltBits);
+ }
+ }
+ }
+
// Extract constant bits from a subvector broadcast.
if (Op.getOpcode() == X86ISD::SUBV_BROADCAST) {
SmallVector<APInt, 16> SubEltBits;
@@ -6123,7 +6287,9 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
return false;
}
-static bool isConstantSplat(SDValue Op, APInt &SplatVal) {
+namespace llvm {
+namespace X86 {
+bool isConstantSplat(SDValue Op, APInt &SplatVal) {
APInt UndefElts;
SmallVector<APInt, 16> EltBits;
if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(),
@@ -6146,6 +6312,8 @@ static bool isConstantSplat(SDValue Op, APInt &SplatVal) {
return false;
}
+} // namespace X86
+} // namespace llvm
static bool getTargetShuffleMaskIndices(SDValue MaskNode,
unsigned MaskEltSizeInBits,
@@ -6551,13 +6719,12 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
return true;
}
-/// Check a target shuffle mask's inputs to see if we can set any values to
-/// SM_SentinelZero - this is for elements that are known to be zero
-/// (not just zeroable) from their inputs.
+/// Decode a target shuffle mask and inputs and see if any values are
+/// known to be undef or zero from their inputs.
/// Returns true if the target shuffle mask was decoded.
-static bool setTargetShuffleZeroElements(SDValue N,
- SmallVectorImpl<int> &Mask,
- SmallVectorImpl<SDValue> &Ops) {
+static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
+ SmallVectorImpl<SDValue> &Ops,
+ APInt &KnownUndef, APInt &KnownZero) {
bool IsUnary;
if (!isTargetShuffle(N.getOpcode()))
return false;
@@ -6566,15 +6733,17 @@ static bool setTargetShuffleZeroElements(SDValue N,
if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
return false;
+ int Size = Mask.size();
SDValue V1 = Ops[0];
SDValue V2 = IsUnary ? V1 : Ops[1];
+ KnownUndef = KnownZero = APInt::getNullValue(Size);
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
assert((VT.getSizeInBits() % Mask.size()) == 0 &&
"Illegal split of shuffle value type");
- unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();
+ unsigned EltSizeInBits = VT.getSizeInBits() / Size;
// Extract known constant input data.
APInt UndefSrcElts[2];
@@ -6585,12 +6754,18 @@ static bool setTargetShuffleZeroElements(SDValue N,
getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
SrcEltBits[1], true, false)};
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ for (int i = 0; i < Size; ++i) {
int M = Mask[i];
// Already decoded as SM_SentinelZero / SM_SentinelUndef.
- if (M < 0)
+ if (M < 0) {
+ assert(isUndefOrZero(M) && "Unknown shuffle sentinel value!");
+ if (SM_SentinelUndef == M)
+ KnownUndef.setBit(i);
+ if (SM_SentinelZero == M)
+ KnownZero.setBit(i);
continue;
+ }
// Determine shuffle input and normalize the mask.
unsigned SrcIdx = M / Size;
@@ -6599,7 +6774,7 @@ static bool setTargetShuffleZeroElements(SDValue N,
// We are referencing an UNDEF input.
if (V.isUndef()) {
- Mask[i] = SM_SentinelUndef;
+ KnownUndef.setBit(i);
continue;
}
@@ -6612,31 +6787,64 @@ static bool setTargetShuffleZeroElements(SDValue N,
int Scale = Size / V.getValueType().getVectorNumElements();
int Idx = M / Scale;
if (Idx != 0 && !VT.isFloatingPoint())
- Mask[i] = SM_SentinelUndef;
+ KnownUndef.setBit(i);
else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
- Mask[i] = SM_SentinelZero;
+ KnownZero.setBit(i);
continue;
}
// Attempt to extract from the source's constant bits.
if (IsSrcConstant[SrcIdx]) {
if (UndefSrcElts[SrcIdx][M])
- Mask[i] = SM_SentinelUndef;
+ KnownUndef.setBit(i);
else if (SrcEltBits[SrcIdx][M] == 0)
- Mask[i] = SM_SentinelZero;
+ KnownZero.setBit(i);
}
}
- assert(VT.getVectorNumElements() == Mask.size() &&
+ assert(VT.getVectorNumElements() == (unsigned)Size &&
"Different mask size from vector size!");
return true;
}
+// Replace target shuffle mask elements with known undef/zero sentinels.
+static void resolveTargetShuffleFromZeroables(SmallVectorImpl<int> &Mask,
+ const APInt &KnownUndef,
+ const APInt &KnownZero) {
+ unsigned NumElts = Mask.size();
+ assert(KnownUndef.getBitWidth() == NumElts &&
+ KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch");
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (KnownUndef[i])
+ Mask[i] = SM_SentinelUndef;
+ else if (KnownZero[i])
+ Mask[i] = SM_SentinelZero;
+ }
+}
+
+// Extract target shuffle mask sentinel elements to known undef/zero bitmasks.
+static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl<int> &Mask,
+ APInt &KnownUndef,
+ APInt &KnownZero) {
+ unsigned NumElts = Mask.size();
+ KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = Mask[i];
+ if (SM_SentinelUndef == M)
+ KnownUndef.setBit(i);
+ if (SM_SentinelZero == M)
+ KnownZero.setBit(i);
+ }
+}
+
// Forward declaration (for getFauxShuffleMask recursive check).
-static bool resolveTargetShuffleInputs(SDValue Op,
- SmallVectorImpl<SDValue> &Inputs,
- SmallVectorImpl<int> &Mask,
- SelectionDAG &DAG);
+// TODO: Use DemandedElts variant.
+static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
+ SmallVectorImpl<int> &Mask,
+ SelectionDAG &DAG, unsigned Depth,
+ bool ResolveKnownElts);
// Attempt to decode ops that could be represented as a shuffle mask.
// The decoded shuffle mask may contain a different number of elements to the
@@ -6644,7 +6852,8 @@ static bool resolveTargetShuffleInputs(SDValue Op,
static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
SmallVectorImpl<int> &Mask,
SmallVectorImpl<SDValue> &Ops,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG, unsigned Depth,
+ bool ResolveKnownElts) {
Mask.clear();
Ops.clear();
@@ -6685,7 +6894,7 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
Mask.push_back(SM_SentinelUndef);
continue;
}
- uint64_t ByteBits = EltBits[i].getZExtValue();
+ const APInt &ByteBits = EltBits[i];
if (ByteBits != 0 && ByteBits != 255)
return false;
Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
@@ -6696,8 +6905,10 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
case ISD::OR: {
// Inspect each operand at the byte level. We can merge these into a
// blend shuffle mask if for each byte at least one is masked out (zero).
- KnownBits Known0 = DAG.computeKnownBits(N.getOperand(0), DemandedElts);
- KnownBits Known1 = DAG.computeKnownBits(N.getOperand(1), DemandedElts);
+ KnownBits Known0 =
+ DAG.computeKnownBits(N.getOperand(0), DemandedElts, Depth + 1);
+ KnownBits Known1 =
+ DAG.computeKnownBits(N.getOperand(1), DemandedElts, Depth + 1);
if (Known0.One.isNullValue() && Known1.One.isNullValue()) {
bool IsByteMask = true;
unsigned NumSizeInBytes = NumSizeInBits / 8;
@@ -6736,14 +6947,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
return false;
SmallVector<int, 64> SrcMask0, SrcMask1;
SmallVector<SDValue, 2> SrcInputs0, SrcInputs1;
- if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG) ||
- !resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG))
+ if (!getTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG, Depth + 1,
+ true) ||
+ !getTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG, Depth + 1,
+ true))
return false;
- int MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
+ size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
SmallVector<int, 64> Mask0, Mask1;
scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
scaleShuffleMask<int>(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
- for (int i = 0; i != MaskSize; ++i) {
+ for (size_t i = 0; i != MaskSize; ++i) {
if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
Mask.push_back(SM_SentinelUndef);
else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero)
@@ -6751,14 +6964,12 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
else if (Mask1[i] == SM_SentinelZero)
Mask.push_back(Mask0[i]);
else if (Mask0[i] == SM_SentinelZero)
- Mask.push_back(Mask1[i] + (MaskSize * SrcInputs0.size()));
+ Mask.push_back(Mask1[i] + (int)(MaskSize * SrcInputs0.size()));
else
return false;
}
- for (SDValue &Op : SrcInputs0)
- Ops.push_back(Op);
- for (SDValue &Op : SrcInputs1)
- Ops.push_back(Op);
+ Ops.append(SrcInputs0.begin(), SrcInputs0.end());
+ Ops.append(SrcInputs1.begin(), SrcInputs1.end());
return true;
}
case ISD::INSERT_SUBVECTOR: {
@@ -6786,8 +6997,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
// Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).
SmallVector<int, 64> SubMask;
SmallVector<SDValue, 2> SubInputs;
- if (!resolveTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs,
- SubMask, DAG))
+ if (!getTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs,
+ SubMask, DAG, Depth + 1, ResolveKnownElts))
return false;
if (SubMask.size() != NumSubElts) {
assert(((SubMask.size() % NumSubElts) == 0 ||
@@ -6911,14 +7122,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
// as a truncation shuffle.
if (Opcode == X86ISD::PACKSS) {
if ((!N0.isUndef() &&
- DAG.ComputeNumSignBits(N0, EltsLHS) <= NumBitsPerElt) ||
+ DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) ||
(!N1.isUndef() &&
- DAG.ComputeNumSignBits(N1, EltsRHS) <= NumBitsPerElt))
+ DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt))
return false;
} else {
APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
- if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS)) ||
- (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS)))
+ if ((!N0.isUndef() &&
+ !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) ||
+ (!N1.isUndef() &&
+ !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1)))
return false;
}
@@ -7061,23 +7274,45 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
Inputs = UsedInputs;
}
-/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
-/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
-/// remaining input indices in case we now have a unary shuffle and adjust the
-/// inputs accordingly.
+/// Calls getTargetShuffleAndZeroables to resolve a target shuffle mask's inputs
+/// and then sets the SM_SentinelUndef and SM_SentinelZero values.
/// Returns true if the target shuffle mask was decoded.
-static bool resolveTargetShuffleInputs(SDValue Op,
- SmallVectorImpl<SDValue> &Inputs,
- SmallVectorImpl<int> &Mask,
- SelectionDAG &DAG) {
+static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts,
+ SmallVectorImpl<SDValue> &Inputs,
+ SmallVectorImpl<int> &Mask,
+ APInt &KnownUndef, APInt &KnownZero,
+ SelectionDAG &DAG, unsigned Depth,
+ bool ResolveKnownElts) {
+ EVT VT = Op.getValueType();
+ if (!VT.isSimple() || !VT.isVector())
+ return false;
+
+ if (getTargetShuffleAndZeroables(Op, Mask, Inputs, KnownUndef, KnownZero)) {
+ if (ResolveKnownElts)
+ resolveTargetShuffleFromZeroables(Mask, KnownUndef, KnownZero);
+ return true;
+ }
+ if (getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth,
+ ResolveKnownElts)) {
+ resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero);
+ return true;
+ }
+ return false;
+}
+
+static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
+ SmallVectorImpl<int> &Mask,
+ SelectionDAG &DAG, unsigned Depth = 0,
+ bool ResolveKnownElts = true) {
+ EVT VT = Op.getValueType();
+ if (!VT.isSimple() || !VT.isVector())
+ return false;
+
+ APInt KnownUndef, KnownZero;
unsigned NumElts = Op.getValueType().getVectorNumElements();
APInt DemandedElts = APInt::getAllOnesValue(NumElts);
- if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
- if (!getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG))
- return false;
-
- resolveTargetShuffleInputsAndMask(Inputs, Mask);
- return true;
+ return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, KnownUndef,
+ KnownZero, DAG, Depth, ResolveKnownElts);
}
/// Returns the scalar element that will make up the ith
@@ -7414,7 +7649,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
SDLoc DL(Op);
SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
- DAG.getIntPtrConstant(InsertPSMask, DL));
+ DAG.getIntPtrConstant(InsertPSMask, DL, true));
return DAG.getBitcast(VT, Result);
}
@@ -7427,7 +7662,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getBitcast(ShVT, SrcOp);
assert(NumBits % 8 == 0 && "Only support byte sized shifts");
- SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, MVT::i8);
+ SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8);
return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
}
@@ -7439,7 +7674,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
// the shuffle mask.
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
SDValue Ptr = LD->getBasePtr();
- if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+ if (!ISD::isNormalLoad(LD) || !LD->isSimple())
return SDValue();
EVT PVT = LD->getValueType(0);
if (PVT != MVT::i32 && PVT != MVT::f32)
@@ -7504,6 +7739,49 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
return SDValue();
}
+// Recurse to find a LoadSDNode source and the accumulated ByteOffest.
+static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
+ if (ISD::isNON_EXTLoad(Elt.getNode())) {
+ auto *BaseLd = cast<LoadSDNode>(Elt);
+ if (!BaseLd->isSimple())
+ return false;
+ Ld = BaseLd;
+ ByteOffset = 0;
+ return true;
+ }
+
+ switch (Elt.getOpcode()) {
+ case ISD::BITCAST:
+ case ISD::TRUNCATE:
+ case ISD::SCALAR_TO_VECTOR:
+ return findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset);
+ case ISD::SRL:
+ if (isa<ConstantSDNode>(Elt.getOperand(1))) {
+ uint64_t Idx = Elt.getConstantOperandVal(1);
+ if ((Idx % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) {
+ ByteOffset += Idx / 8;
+ return true;
+ }
+ }
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (isa<ConstantSDNode>(Elt.getOperand(1))) {
+ SDValue Src = Elt.getOperand(0);
+ unsigned SrcSizeInBits = Src.getScalarValueSizeInBits();
+ unsigned DstSizeInBits = Elt.getScalarValueSizeInBits();
+ if (DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 &&
+ findEltLoadSrc(Src, Ld, ByteOffset)) {
+ uint64_t Idx = Elt.getConstantOperandVal(1);
+ ByteOffset += Idx * (SrcSizeInBits / 8);
+ return true;
+ }
+ }
+ break;
+ }
+
+ return false;
+}
+
/// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
/// elements can be replaced by a single large load which has the same value as
/// a build_vector or insert_subvector whose loaded operands are 'Elts'.
@@ -7513,6 +7791,9 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
const SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
bool isAfterLegalize) {
+ if ((VT.getScalarSizeInBits() % 8) != 0)
+ return SDValue();
+
unsigned NumElems = Elts.size();
int LastLoadedElt = -1;
@@ -7521,6 +7802,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
APInt UndefMask = APInt::getNullValue(NumElems);
SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
+ SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
// For each element in the initializer, see if we've found a load, zero or an
// undef.
@@ -7539,13 +7821,16 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// Each loaded element must be the correct fractional portion of the
// requested vector load.
- if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
+ unsigned EltSizeInBits = Elt.getValueSizeInBits();
+ if ((NumElems * EltSizeInBits) != VT.getSizeInBits())
return SDValue();
- if (!ISD::isNON_EXTLoad(Elt.getNode()))
+ if (!findEltLoadSrc(Elt, Loads[i], ByteOffsets[i]) || ByteOffsets[i] < 0)
+ return SDValue();
+ unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0);
+ if (((ByteOffsets[i] * 8) + EltSizeInBits) > LoadSizeInBits)
return SDValue();
- Loads[i] = cast<LoadSDNode>(Elt);
LoadMask.setBit(i);
LastLoadedElt = i;
}
@@ -7575,6 +7860,24 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
int LoadSizeInBits = (1 + LastLoadedElt - FirstLoadedElt) * BaseSizeInBits;
assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected");
+ // TODO: Support offsetting the base load.
+ if (ByteOffsets[FirstLoadedElt] != 0)
+ return SDValue();
+
+ // Check to see if the element's load is consecutive to the base load
+ // or offset from a previous (already checked) load.
+ auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) {
+ LoadSDNode *Ld = Loads[EltIdx];
+ int64_t ByteOffset = ByteOffsets[EltIdx];
+ if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) {
+ int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes);
+ return (0 <= BaseIdx && BaseIdx < (int)NumElems && LoadMask[BaseIdx] &&
+ Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0);
+ }
+ return DAG.areNonVolatileConsecutiveLoads(Ld, Base, BaseSizeInBytes,
+ EltIdx - FirstLoadedElt);
+ };
+
// Consecutive loads can contain UNDEFS but not ZERO elements.
// Consecutive loads with UNDEFs and ZEROs elements require a
// an additional shuffle stage to clear the ZERO elements.
@@ -7582,8 +7885,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
bool IsConsecutiveLoadWithZeros = true;
for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
if (LoadMask[i]) {
- if (!DAG.areNonVolatileConsecutiveLoads(Loads[i], LDBase, BaseSizeInBytes,
- i - FirstLoadedElt)) {
+ if (!CheckConsecutiveLoad(LDBase, i)) {
IsConsecutiveLoad = false;
IsConsecutiveLoadWithZeros = false;
break;
@@ -7595,8 +7897,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
auto MMOFlags = LDBase->getMemOperand()->getFlags();
- assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&
- "Cannot merge volatile loads.");
+ assert(LDBase->isSimple() &&
+ "Cannot merge volatile or atomic loads.");
SDValue NewLd =
DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
@@ -7636,17 +7938,22 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
// vector and a zero vector to clear out the zero elements.
if (!isAfterLegalize && VT.isVector()) {
- SmallVector<int, 4> ClearMask(NumElems, -1);
- for (unsigned i = 0; i < NumElems; ++i) {
- if (ZeroMask[i])
- ClearMask[i] = i + NumElems;
- else if (LoadMask[i])
- ClearMask[i] = i;
+ unsigned NumMaskElts = VT.getVectorNumElements();
+ if ((NumMaskElts % NumElems) == 0) {
+ unsigned Scale = NumMaskElts / NumElems;
+ SmallVector<int, 4> ClearMask(NumMaskElts, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (UndefMask[i])
+ continue;
+ int Offset = ZeroMask[i] ? NumMaskElts : 0;
+ for (unsigned j = 0; j != Scale; ++j)
+ ClearMask[(i * Scale) + j] = (i * Scale) + j + Offset;
+ }
+ SDValue V = CreateLoad(VT, LDBase);
+ SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
+ : DAG.getConstantFP(0.0, DL, VT);
+ return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
}
- SDValue V = CreateLoad(VT, LDBase);
- SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
- : DAG.getConstantFP(0.0, DL, VT);
- return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
}
}
@@ -8194,34 +8501,10 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
"Unexpected type in LowerBUILD_VECTORvXi1!");
SDLoc dl(Op);
- if (ISD::isBuildVectorAllZeros(Op.getNode()))
- return Op;
-
- if (ISD::isBuildVectorAllOnes(Op.getNode()))
+ if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
+ ISD::isBuildVectorAllOnes(Op.getNode()))
return Op;
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
- if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
- // Split the pieces.
- SDValue Lower =
- DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(0, 32));
- SDValue Upper =
- DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32));
- // We have to manually lower both halves so getNode doesn't try to
- // reassemble the build_vector.
- Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget);
- Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper);
- }
- SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
- if (Imm.getValueSizeInBits() == VT.getSizeInBits())
- return DAG.getBitcast(VT, Imm);
- SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
- DAG.getIntPtrConstant(0, dl));
- }
-
- // Vector has one or more non-const elements
uint64_t Immediate = 0;
SmallVector<unsigned, 16> NonConstIdx;
bool IsSplat = true;
@@ -8244,29 +8527,40 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
}
// for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
- if (IsSplat)
- return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx),
+ if (IsSplat) {
+ // The build_vector allows the scalar element to be larger than the vector
+ // element type. We need to mask it to use as a condition unless we know
+ // the upper bits are zero.
+ // FIXME: Use computeKnownBits instead of checking specific opcode?
+ SDValue Cond = Op.getOperand(SplatIdx);
+ assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!");
+ if (Cond.getOpcode() != ISD::SETCC)
+ Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond,
+ DAG.getConstant(1, dl, MVT::i8));
+ return DAG.getSelect(dl, VT, Cond,
DAG.getConstant(1, dl, VT),
DAG.getConstant(0, dl, VT));
+ }
// insert elements one by one
SDValue DstVec;
- SDValue Imm;
- if (Immediate) {
- MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
- Imm = DAG.getConstant(Immediate, dl, ImmVT);
- }
- else if (HasConstElts)
- Imm = DAG.getConstant(0, dl, VT);
- else
- Imm = DAG.getUNDEF(VT);
- if (Imm.getValueSizeInBits() == VT.getSizeInBits())
- DstVec = DAG.getBitcast(VT, Imm);
- else {
- SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
- DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
- DAG.getIntPtrConstant(0, dl));
- }
+ if (HasConstElts) {
+ if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
+ SDValue ImmL = DAG.getConstant(Lo_32(Immediate), dl, MVT::i32);
+ SDValue ImmH = DAG.getConstant(Hi_32(Immediate), dl, MVT::i32);
+ ImmL = DAG.getBitcast(MVT::v32i1, ImmL);
+ ImmH = DAG.getBitcast(MVT::v32i1, ImmH);
+ DstVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, ImmL, ImmH);
+ } else {
+ MVT ImmVT = MVT::getIntegerVT(std::max(VT.getSizeInBits(), 8U));
+ SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT);
+ MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;
+ DstVec = DAG.getBitcast(VecVT, Imm);
+ DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec,
+ DAG.getIntPtrConstant(0, dl));
+ }
+ } else
+ DstVec = DAG.getUNDEF(VT);
for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) {
unsigned InsertIdx = NonConstIdx[i];
@@ -8757,7 +9051,7 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
// If we don't need the upper xmm, then perform as a xmm hop.
unsigned HalfNumElts = NumElts / 2;
if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) {
- MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), HalfNumElts);
+ MVT HalfVT = VT.getHalfNumVectorElementsVT();
V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128);
V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128);
SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1);
@@ -8965,21 +9259,14 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
MVT VT = Op.getSimpleValueType();
// Vectors containing all zeros can be matched by pxor and xorps.
- if (ISD::isBuildVectorAllZeros(Op.getNode())) {
- // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
- // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
- if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
- return Op;
-
- return getZeroVector(VT, Subtarget, DAG, DL);
- }
+ if (ISD::isBuildVectorAllZeros(Op.getNode()))
+ return Op;
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
// vpcmpeqd on 256-bit vectors.
if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
- if (VT == MVT::v4i32 || VT == MVT::v16i32 ||
- (VT == MVT::v8i32 && Subtarget.hasInt256()))
+ if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
return Op;
return getOnesVector(VT, DAG, DL);
@@ -9150,9 +9437,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
{4, 5, 6, 7, 4, 5, 6, 7});
if (Subtarget.hasXOP())
- return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32,
- LoLo, HiHi, IndicesVec,
- DAG.getConstant(0, DL, MVT::i8)));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi,
+ IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8)));
// Permute Lo and Hi and then select based on index range.
// This works as VPERMILPS only uses index bits[0:1] to permute elements.
SDValue Res = DAG.getSelectCC(
@@ -9186,9 +9473,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
// VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec.
IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec);
if (Subtarget.hasXOP())
- return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64,
- LoLo, HiHi, IndicesVec,
- DAG.getConstant(0, DL, MVT::i8)));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi,
+ IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8)));
// Permute Lo and Hi and then select based on index range.
// This works as VPERMILPD only uses index bit[1] to permute elements.
SDValue Res = DAG.getSelectCC(
@@ -9283,7 +9570,7 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
return SDValue();
auto *PermIdx = dyn_cast<ConstantSDNode>(ExtractedIndex.getOperand(1));
- if (!PermIdx || PermIdx->getZExtValue() != Idx)
+ if (!PermIdx || PermIdx->getAPIntValue() != Idx)
return SDValue();
}
@@ -9434,23 +9721,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// it to i32 first.
if (EltVT == MVT::i16 || EltVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
- if (VT.getSizeInBits() >= 256) {
- MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
- if (Subtarget.hasAVX()) {
- Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item);
- Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
- } else {
- // Without AVX, we need to extend to a 128-bit vector and then
- // insert into the 256-bit vector.
- Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
- SDValue ZeroVec = getZeroVector(ShufVT, Subtarget, DAG, dl);
- Item = insert128BitVector(ZeroVec, Item, 0, DAG, dl);
- }
- } else {
- assert(VT.is128BitVector() && "Expected an SSE value type!");
- Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
- Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
- }
+ MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
return DAG.getBitcast(VT, Item);
}
}
@@ -9549,8 +9822,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2));
// Recreate the wider vector with the lower and upper part.
- return concatSubVectors(Lower, Upper, VT, NumElems, DAG, dl,
- VT.getSizeInBits() / 2);
+ return concatSubVectors(Lower, Upper, DAG, dl);
}
// Let legalizer expand 2-wide build_vectors.
@@ -9703,8 +9975,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
// If we have more than 2 non-zeros, build each half separately.
if (NumNonZero > 2) {
- MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
- ResVT.getVectorNumElements()/2);
+ MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
ArrayRef<SDUse> Ops = Op->ops();
SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
Ops.slice(0, NumOperands/2));
@@ -9745,30 +10016,47 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
- unsigned NumZero = 0;
- unsigned NumNonZero = 0;
+ uint64_t Zeros = 0;
uint64_t NonZeros = 0;
for (unsigned i = 0; i != NumOperands; ++i) {
SDValue SubVec = Op.getOperand(i);
if (SubVec.isUndef())
continue;
+ assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
- ++NumZero;
- else {
- assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
+ Zeros |= (uint64_t)1 << i;
+ else
NonZeros |= (uint64_t)1 << i;
- ++NumNonZero;
- }
}
+ unsigned NumElems = ResVT.getVectorNumElements();
+
+ // If we are inserting non-zero vector and there are zeros in LSBs and undef
+ // in the MSBs we need to emit a KSHIFTL. The generic lowering to
+ // insert_subvector will give us two kshifts.
+ if (isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros &&
+ Log2_64(NonZeros) != NumOperands - 1) {
+ MVT ShiftVT = ResVT;
+ if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
+ ShiftVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+ unsigned Idx = Log2_64(NonZeros);
+ SDValue SubVec = Op.getOperand(Idx);
+ unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements();
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ShiftVT,
+ DAG.getUNDEF(ShiftVT), SubVec,
+ DAG.getIntPtrConstant(0, dl));
+ Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec,
+ DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op,
+ DAG.getIntPtrConstant(0, dl));
+ }
// If there are zero or one non-zeros we can handle this very simply.
- if (NumNonZero <= 1) {
- SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl)
- : DAG.getUNDEF(ResVT);
- if (!NumNonZero)
+ if (NonZeros == 0 || isPowerOf2_64(NonZeros)) {
+ SDValue Vec = Zeros ? DAG.getConstant(0, dl, ResVT) : DAG.getUNDEF(ResVT);
+ if (!NonZeros)
return Vec;
- unsigned Idx = countTrailingZeros(NonZeros);
+ unsigned Idx = Log2_64(NonZeros);
SDValue SubVec = Op.getOperand(Idx);
unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements();
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec,
@@ -9776,8 +10064,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
}
if (NumOperands > 2) {
- MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
- ResVT.getVectorNumElements()/2);
+ MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
ArrayRef<SDUse> Ops = Op->ops();
SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
Ops.slice(0, NumOperands/2));
@@ -9786,7 +10073,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
- assert(NumNonZero == 2 && "Simple cases not handled?");
+ assert(countPopulation(NonZeros) == 2 && "Simple cases not handled?");
if (ResVT.getVectorNumElements() >= 16)
return Op; // The operation is legal with KUNPCK
@@ -9794,7 +10081,6 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT,
DAG.getUNDEF(ResVT), Op.getOperand(0),
DAG.getIntPtrConstant(0, dl));
- unsigned NumElems = ResVT.getVectorNumElements();
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1),
DAG.getIntPtrConstant(NumElems/2, dl));
}
@@ -9997,42 +10283,44 @@ static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef<int> Mask,
/// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding
/// value in ExpectedMask is always accepted. Otherwise the indices must match.
///
-/// SM_SentinelZero is accepted as a valid negative index but must match in both.
+/// SM_SentinelZero is accepted as a valid negative index but must match in
+/// both.
static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
- ArrayRef<int> ExpectedMask) {
+ ArrayRef<int> ExpectedMask,
+ SDValue V1 = SDValue(),
+ SDValue V2 = SDValue()) {
int Size = Mask.size();
if (Size != (int)ExpectedMask.size())
return false;
assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) &&
"Illegal target shuffle mask");
- for (int i = 0; i < Size; ++i)
- if (Mask[i] == SM_SentinelUndef)
- continue;
- else if (Mask[i] < 0 && Mask[i] != SM_SentinelZero)
- return false;
- else if (Mask[i] != ExpectedMask[i])
- return false;
-
- return true;
-}
+ // Check for out-of-range target shuffle mask indices.
+ if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size))
+ return false;
-// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle
-// mask.
-static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask,
- const APInt &Zeroable) {
- int NumElts = Mask.size();
- assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes");
+ // If the values are build vectors, we can look through them to find
+ // equivalent inputs that make the shuffles equivalent.
+ auto *BV1 = dyn_cast_or_null<BuildVectorSDNode>(V1);
+ auto *BV2 = dyn_cast_or_null<BuildVectorSDNode>(V2);
+ BV1 = ((BV1 && Size != (int)BV1->getNumOperands()) ? nullptr : BV1);
+ BV2 = ((BV2 && Size != (int)BV2->getNumOperands()) ? nullptr : BV2);
- SmallVector<int, 64> TargetMask(NumElts, SM_SentinelUndef);
- for (int i = 0; i != NumElts; ++i) {
- int M = Mask[i];
- if (M == SM_SentinelUndef)
+ for (int i = 0; i < Size; ++i) {
+ if (Mask[i] == SM_SentinelUndef || Mask[i] == ExpectedMask[i])
continue;
- assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index");
- TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M);
+ if (0 <= Mask[i] && 0 <= ExpectedMask[i]) {
+ auto *MaskBV = Mask[i] < Size ? BV1 : BV2;
+ auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2;
+ if (MaskBV && ExpectedBV &&
+ MaskBV->getOperand(Mask[i] % Size) ==
+ ExpectedBV->getOperand(ExpectedMask[i] % Size))
+ continue;
+ }
+ // TODO - handle SM_Sentinel equivalences.
+ return false;
}
- return TargetMask;
+ return true;
}
// Attempt to create a shuffle mask from a VSELECT condition mask.
@@ -10133,7 +10421,7 @@ static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
SelectionDAG &DAG) {
- return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
+ return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
}
/// Compute whether each element of a shuffle is zeroable.
@@ -10573,14 +10861,14 @@ static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1,
// Try binary shuffle.
SmallVector<int, 32> BinaryMask;
createPackShuffleMask(VT, BinaryMask, false);
- if (isTargetShuffleEquivalent(TargetMask, BinaryMask))
+ if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2))
if (MatchPACK(V1, V2))
return true;
// Try unary shuffle.
SmallVector<int, 32> UnaryMask;
createPackShuffleMask(VT, UnaryMask, true);
- if (isTargetShuffleEquivalent(TargetMask, UnaryMask))
+ if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1))
if (MatchPACK(V1, V1))
return true;
@@ -10685,9 +10973,9 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
SelectionDAG &DAG);
static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
- MutableArrayRef<int> TargetMask,
- bool &ForceV1Zero, bool &ForceV2Zero,
- uint64_t &BlendMask) {
+ MutableArrayRef<int> Mask,
+ const APInt &Zeroable, bool &ForceV1Zero,
+ bool &ForceV2Zero, uint64_t &BlendMask) {
bool V1IsZeroOrUndef =
V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZeroOrUndef =
@@ -10695,13 +10983,12 @@ static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
BlendMask = 0;
ForceV1Zero = false, ForceV2Zero = false;
- assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask");
+ assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask");
// Attempt to generate the binary blend mask. If an input is zero then
// we can use any lane.
- // TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
- for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
- int M = TargetMask[i];
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ int M = Mask[i];
if (M == SM_SentinelUndef)
continue;
if (M == i)
@@ -10710,16 +10997,16 @@ static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
BlendMask |= 1ull << i;
continue;
}
- if (M == SM_SentinelZero) {
+ if (Zeroable[i]) {
if (V1IsZeroOrUndef) {
ForceV1Zero = true;
- TargetMask[i] = i;
+ Mask[i] = i;
continue;
}
if (V2IsZeroOrUndef) {
ForceV2Zero = true;
BlendMask |= 1ull << i;
- TargetMask[i] = i + Size;
+ Mask[i] = i + Size;
continue;
}
}
@@ -10748,11 +11035,10 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable);
-
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
- if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero,
+ SmallVector<int, 64> Mask(Original.begin(), Original.end());
+ if (!matchVectorShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero,
BlendMask))
return SDValue();
@@ -10778,7 +11064,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
case MVT::v8i16:
assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!");
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8));
+ DAG.getTargetConstant(BlendMask, DL, MVT::i8));
case MVT::v16i16: {
assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
@@ -10790,7 +11076,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
if (RepeatedMask[i] >= 8)
BlendMask |= 1ull << i;
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8));
+ DAG.getTargetConstant(BlendMask, DL, MVT::i8));
}
// Use PBLENDW for lower/upper lanes and then blend lanes.
// TODO - we should allow 2 PBLENDW here and leave shuffle combine to
@@ -10799,9 +11085,9 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
uint64_t HiMask = (BlendMask >> 8) & 0xFF;
if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) {
SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
- DAG.getConstant(LoMask, DL, MVT::i8));
+ DAG.getTargetConstant(LoMask, DL, MVT::i8));
SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
- DAG.getConstant(HiMask, DL, MVT::i8));
+ DAG.getTargetConstant(HiMask, DL, MVT::i8));
return DAG.getVectorShuffle(
MVT::v16i16, DL, Lo, Hi,
{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
@@ -11061,7 +11347,7 @@ static SDValue lowerShuffleAsByteRotateAndPermute(
SDValue Rotate = DAG.getBitcast(
VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi),
DAG.getBitcast(ByteVT, Lo),
- DAG.getConstant(Scale * RotAmt, DL, MVT::i8)));
+ DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8)));
SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef);
for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
@@ -11268,7 +11554,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
"512-bit PALIGNR requires BWI instructions");
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi,
- DAG.getConstant(ByteRotation, DL, MVT::i8)));
+ DAG.getTargetConstant(ByteRotation, DL, MVT::i8)));
}
assert(VT.is128BitVector() &&
@@ -11282,10 +11568,12 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
int LoByteShift = 16 - ByteRotation;
int HiByteShift = ByteRotation;
- SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
- DAG.getConstant(LoByteShift, DL, MVT::i8));
- SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
- DAG.getConstant(HiByteShift, DL, MVT::i8));
+ SDValue LoShift =
+ DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
+ DAG.getTargetConstant(LoByteShift, DL, MVT::i8));
+ SDValue HiShift =
+ DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
+ DAG.getTargetConstant(HiByteShift, DL, MVT::i8));
return DAG.getBitcast(VT,
DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
}
@@ -11317,7 +11605,7 @@ static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1,
return SDValue();
return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
- DAG.getConstant(Rotation, DL, MVT::i8));
+ DAG.getTargetConstant(Rotation, DL, MVT::i8));
}
/// Try to lower a vector shuffle as a byte shift sequence.
@@ -11356,27 +11644,27 @@ static SDValue lowerVectorShuffleAsByteShiftMask(
if (ZeroLo == 0) {
unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
- DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
- DAG.getConstant(Scale * ZeroHi, DL, MVT::i8));
+ DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8));
} else if (ZeroHi == 0) {
unsigned Shift = Mask[ZeroLo] % NumElts;
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
- DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
- DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
+ DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8));
} else if (!Subtarget.hasSSSE3()) {
// If we don't have PSHUFB then its worth avoiding an AND constant mask
// by performing 3 byte shifts. Shuffle combining can kick in above that.
// TODO: There may be some cases where VSH{LR}DQ+PAND is still better.
unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
- DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
Shift += Mask[ZeroLo] % NumElts;
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
- DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
- DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
+ DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8));
} else
return SDValue();
@@ -11498,7 +11786,7 @@ static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
"Illegal integer vector type");
V = DAG.getBitcast(ShiftVT, V);
V = DAG.getNode(Opcode, DL, ShiftVT, V,
- DAG.getConstant(ShiftAmt, DL, MVT::i8));
+ DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
return DAG.getBitcast(VT, V);
}
@@ -11632,14 +11920,14 @@ static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
uint64_t BitLen, BitIdx;
if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,
- DAG.getConstant(BitLen, DL, MVT::i8),
- DAG.getConstant(BitIdx, DL, MVT::i8));
+ DAG.getTargetConstant(BitLen, DL, MVT::i8),
+ DAG.getTargetConstant(BitIdx, DL, MVT::i8));
if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),
V2 ? V2 : DAG.getUNDEF(VT),
- DAG.getConstant(BitLen, DL, MVT::i8),
- DAG.getConstant(BitIdx, DL, MVT::i8));
+ DAG.getTargetConstant(BitLen, DL, MVT::i8),
+ DAG.getTargetConstant(BitIdx, DL, MVT::i8));
return SDValue();
}
@@ -11686,9 +11974,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask);
};
- // Found a valid zext mask! Try various lowering strategies based on the
+ // Found a valid a/zext mask! Try various lowering strategies based on the
// input type and available ISA extensions.
- // TODO: Add AnyExt support.
if (Subtarget.hasSSE41()) {
// Not worth offsetting 128-bit vectors if scale == 2, a pattern using
// PUNPCK will catch this in a later shuffle match.
@@ -11697,7 +11984,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
InputV = ShuffleOffset(InputV);
- InputV = getExtendInVec(ISD::ZERO_EXTEND, DL, ExtVT, InputV, DAG);
+ InputV = getExtendInVec(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND, DL,
+ ExtVT, InputV, DAG);
return DAG.getBitcast(VT, InputV);
}
@@ -11736,8 +12024,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
int LoIdx = Offset * EltBits;
SDValue Lo = DAG.getBitcast(
MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
- DAG.getConstant(EltBits, DL, MVT::i8),
- DAG.getConstant(LoIdx, DL, MVT::i8)));
+ DAG.getTargetConstant(EltBits, DL, MVT::i8),
+ DAG.getTargetConstant(LoIdx, DL, MVT::i8)));
if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1))
return DAG.getBitcast(VT, Lo);
@@ -11745,8 +12033,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
int HiIdx = (Offset + 1) * EltBits;
SDValue Hi = DAG.getBitcast(
MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
- DAG.getConstant(EltBits, DL, MVT::i8),
- DAG.getConstant(HiIdx, DL, MVT::i8)));
+ DAG.getTargetConstant(EltBits, DL, MVT::i8),
+ DAG.getTargetConstant(HiIdx, DL, MVT::i8)));
return DAG.getBitcast(VT,
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
}
@@ -11759,8 +12047,12 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
SDValue PSHUFBMask[16];
for (int i = 0; i < 16; ++i) {
int Idx = Offset + (i / Scale);
- PSHUFBMask[i] = DAG.getConstant(
- (i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8);
+ if ((i % Scale == 0 && SafeOffset(Idx))) {
+ PSHUFBMask[i] = DAG.getConstant(Idx, DL, MVT::i8);
+ continue;
+ }
+ PSHUFBMask[i] =
+ AnyExt ? DAG.getUNDEF(MVT::i8) : DAG.getConstant(0x80, DL, MVT::i8);
}
InputV = DAG.getBitcast(MVT::v16i8, InputV);
return DAG.getBitcast(
@@ -12052,9 +12344,9 @@ static SDValue lowerShuffleAsElementInsertion(
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
} else {
V2 = DAG.getBitcast(MVT::v16i8, V2);
- V2 = DAG.getNode(
- X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
- DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8));
+ V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
+ DAG.getTargetConstant(
+ V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8));
V2 = DAG.getBitcast(VT, V2);
}
}
@@ -12294,7 +12586,7 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
// If we can't broadcast from a register, check that the input is a load.
if (!BroadcastFromReg && !isShuffleFoldableLoad(V))
return SDValue();
- } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
+ } else if (MayFoldLoad(V) && cast<LoadSDNode>(V)->isSimple()) {
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) {
BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
@@ -12486,7 +12778,7 @@ static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2,
// Insert the V2 element into the desired position.
return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
- DAG.getConstant(InsertPSMask, DL, MVT::i8));
+ DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
}
/// Try to lower a shuffle as a permute of the inputs followed by an
@@ -12635,14 +12927,14 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have AVX, we can use VPERMILPS which will allow folding a load
// into the shuffle.
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
- DAG.getConstant(SHUFPDMask, DL, MVT::i8));
+ DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
}
return DAG.getNode(
X86ISD::SHUFP, DL, MVT::v2f64,
Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
- DAG.getConstant(SHUFPDMask, DL, MVT::i8));
+ DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
}
assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!");
assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!");
@@ -12688,7 +12980,7 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
- DAG.getConstant(SHUFPDMask, DL, MVT::i8));
+ DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
}
/// Handle lowering of 2-lane 64-bit integer shuffles.
@@ -12996,10 +13288,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
if (NumV2Elements == 0) {
- // Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2,
- Mask, Subtarget, DAG))
- return Broadcast;
+ // Try to use broadcast unless the mask only has one non-undef element.
+ if (count_if(Mask, [](int M) { return M >= 0 && M < 4; }) > 1) {
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+ }
// Straight shuffle of a single input vector. For everything from SSE2
// onward this has a single fast instruction with no scary immediates.
@@ -13680,16 +13974,16 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });
if (NumV2Inputs == 0) {
- // Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2,
- Mask, Subtarget, DAG))
- return Broadcast;
-
// Try to use shift instructions.
if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
Zeroable, Subtarget, DAG))
return Shift;
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
return V;
@@ -13984,8 +14278,16 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle));
// Unpack the bytes to form the i16s that will be shuffled into place.
+ bool EvenInUse = false, OddInUse = false;
+ for (int i = 0; i < 16; i += 2) {
+ EvenInUse |= (Mask[i + 0] >= 0);
+ OddInUse |= (Mask[i + 1] >= 0);
+ if (EvenInUse && OddInUse)
+ break;
+ }
V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
- MVT::v16i8, V1, V1);
+ MVT::v16i8, EvenInUse ? V1 : DAG.getUNDEF(MVT::v16i8),
+ OddInUse ? V1 : DAG.getUNDEF(MVT::v16i8));
int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
for (int i = 0; i < 16; ++i)
@@ -14100,11 +14402,10 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// First we need to zero all the dropped bytes.
assert(NumEvenDrops <= 3 &&
"No support for dropping even elements more than 3 times.");
- // We use the mask type to pick which bytes are preserved based on how many
- // elements are dropped.
- MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
- SDValue ByteClearMask = DAG.getBitcast(
- MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1]));
+ SmallVector<SDValue, 16> ByteClearOps(16, DAG.getConstant(0, DL, MVT::i8));
+ for (unsigned i = 0; i != 16; i += 1 << NumEvenDrops)
+ ByteClearOps[i] = DAG.getConstant(0xFF, DL, MVT::i8);
+ SDValue ByteClearMask = DAG.getBuildVector(MVT::v16i8, DL, ByteClearOps);
V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
if (!IsSingleInput)
V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);
@@ -14448,16 +14749,14 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask);
}
-/// Lower a vector shuffle crossing multiple 128-bit lanes as
-/// a permutation and blend of those lanes.
+/// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one
+/// source with a lane permutation.
///
-/// This essentially blends the out-of-lane inputs to each lane into the lane
-/// from a permuted copy of the vector. This lowering strategy results in four
-/// instructions in the worst case for a single-input cross lane shuffle which
-/// is lower than any other fully general cross-lane shuffle strategy I'm aware
-/// of. Special cases for each particular shuffle pattern should be handled
-/// prior to trying this lowering.
-static SDValue lowerShuffleAsLanePermuteAndBlend(
+/// This lowering strategy results in four instructions in the worst case for a
+/// single-input cross lane shuffle which is lower than any other fully general
+/// cross-lane shuffle strategy I'm aware of. Special cases for each particular
+/// shuffle pattern should be handled prior to trying this lowering.
+static SDValue lowerShuffleAsLanePermuteAndShuffle(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG, const X86Subtarget &Subtarget) {
// FIXME: This should probably be generalized for 512-bit vectors as well.
@@ -14484,24 +14783,28 @@ static SDValue lowerShuffleAsLanePermuteAndBlend(
return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
}
+ // TODO - we could support shuffling V2 in the Flipped input.
assert(V2.isUndef() &&
"This last part of this routine only works on single input shuffles");
- SmallVector<int, 32> FlippedBlendMask(Size);
- for (int i = 0; i < Size; ++i)
- FlippedBlendMask[i] =
- Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize)
- ? Mask[i]
- : Mask[i] % LaneSize +
- (i / LaneSize) * LaneSize + Size);
+ SmallVector<int, 32> InLaneMask(Mask.begin(), Mask.end());
+ for (int i = 0; i < Size; ++i) {
+ int &M = InLaneMask[i];
+ if (M < 0)
+ continue;
+ if (((M % Size) / LaneSize) != (i / LaneSize))
+ M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
+ }
+ assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) &&
+ "In-lane shuffle mask expected");
- // Flip the vector, and blend the results which should now be in-lane.
+ // Flip the lanes, and shuffle the results which should now be in-lane.
MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
SDValue Flipped = DAG.getBitcast(PVT, V1);
- Flipped = DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT),
- { 2, 3, 0, 1 });
+ Flipped =
+ DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1});
Flipped = DAG.getBitcast(VT, Flipped);
- return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask);
+ return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
}
/// Handle lowering 2-lane 128-bit shuffles.
@@ -14565,8 +14868,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
unsigned PermMask = ((WidenedMask[0] % 2) << 0) |
((WidenedMask[1] % 2) << 1);
- return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
- DAG.getConstant(PermMask, DL, MVT::i8));
+ return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
+ DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
}
}
@@ -14598,7 +14901,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
V2 = DAG.getUNDEF(VT);
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
- DAG.getConstant(PermMask, DL, MVT::i8));
+ DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
/// Lower a vector shuffle by first fixing the 128-bit lanes and then
@@ -14616,26 +14919,26 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
if (is128BitLaneRepeatedShuffleMask(VT, Mask))
return SDValue();
- int Size = Mask.size();
+ int NumElts = Mask.size();
int NumLanes = VT.getSizeInBits() / 128;
- int LaneSize = 128 / VT.getScalarSizeInBits();
- SmallVector<int, 16> RepeatMask(LaneSize, -1);
+ int NumLaneElts = 128 / VT.getScalarSizeInBits();
+ SmallVector<int, 16> RepeatMask(NumLaneElts, -1);
SmallVector<std::array<int, 2>, 2> LaneSrcs(NumLanes, {{-1, -1}});
// First pass will try to fill in the RepeatMask from lanes that need two
// sources.
for (int Lane = 0; Lane != NumLanes; ++Lane) {
- int Srcs[2] = { -1, -1 };
- SmallVector<int, 16> InLaneMask(LaneSize, -1);
- for (int i = 0; i != LaneSize; ++i) {
- int M = Mask[(Lane * LaneSize) + i];
+ int Srcs[2] = {-1, -1};
+ SmallVector<int, 16> InLaneMask(NumLaneElts, -1);
+ for (int i = 0; i != NumLaneElts; ++i) {
+ int M = Mask[(Lane * NumLaneElts) + i];
if (M < 0)
continue;
// Determine which of the possible input lanes (NumLanes from each source)
// this element comes from. Assign that as one of the sources for this
// lane. We can assign up to 2 sources for this lane. If we run out
// sources we can't do anything.
- int LaneSrc = M / LaneSize;
+ int LaneSrc = M / NumLaneElts;
int Src;
if (Srcs[0] < 0 || Srcs[0] == LaneSrc)
Src = 0;
@@ -14645,7 +14948,7 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
return SDValue();
Srcs[Src] = LaneSrc;
- InLaneMask[i] = (M % LaneSize) + Src * Size;
+ InLaneMask[i] = (M % NumLaneElts) + Src * NumElts;
}
// If this lane has two sources, see if it fits with the repeat mask so far.
@@ -14701,23 +15004,23 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
if (LaneSrcs[Lane][0] >= 0)
continue;
- for (int i = 0; i != LaneSize; ++i) {
- int M = Mask[(Lane * LaneSize) + i];
+ for (int i = 0; i != NumLaneElts; ++i) {
+ int M = Mask[(Lane * NumLaneElts) + i];
if (M < 0)
continue;
// If RepeatMask isn't defined yet we can define it ourself.
if (RepeatMask[i] < 0)
- RepeatMask[i] = M % LaneSize;
+ RepeatMask[i] = M % NumLaneElts;
- if (RepeatMask[i] < Size) {
- if (RepeatMask[i] != M % LaneSize)
+ if (RepeatMask[i] < NumElts) {
+ if (RepeatMask[i] != M % NumLaneElts)
return SDValue();
- LaneSrcs[Lane][0] = M / LaneSize;
+ LaneSrcs[Lane][0] = M / NumLaneElts;
} else {
- if (RepeatMask[i] != ((M % LaneSize) + Size))
+ if (RepeatMask[i] != ((M % NumLaneElts) + NumElts))
return SDValue();
- LaneSrcs[Lane][1] = M / LaneSize;
+ LaneSrcs[Lane][1] = M / NumLaneElts;
}
}
@@ -14725,14 +15028,14 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
return SDValue();
}
- SmallVector<int, 16> NewMask(Size, -1);
+ SmallVector<int, 16> NewMask(NumElts, -1);
for (int Lane = 0; Lane != NumLanes; ++Lane) {
int Src = LaneSrcs[Lane][0];
- for (int i = 0; i != LaneSize; ++i) {
+ for (int i = 0; i != NumLaneElts; ++i) {
int M = -1;
if (Src >= 0)
- M = Src * LaneSize + i;
- NewMask[Lane * LaneSize + i] = M;
+ M = Src * NumLaneElts + i;
+ NewMask[Lane * NumLaneElts + i] = M;
}
}
SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
@@ -14745,11 +15048,11 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
for (int Lane = 0; Lane != NumLanes; ++Lane) {
int Src = LaneSrcs[Lane][1];
- for (int i = 0; i != LaneSize; ++i) {
+ for (int i = 0; i != NumLaneElts; ++i) {
int M = -1;
if (Src >= 0)
- M = Src * LaneSize + i;
- NewMask[Lane * LaneSize + i] = M;
+ M = Src * NumLaneElts + i;
+ NewMask[Lane * NumLaneElts + i] = M;
}
}
SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
@@ -14760,12 +15063,12 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask)
return SDValue();
- for (int i = 0; i != Size; ++i) {
- NewMask[i] = RepeatMask[i % LaneSize];
+ for (int i = 0; i != NumElts; ++i) {
+ NewMask[i] = RepeatMask[i % NumLaneElts];
if (NewMask[i] < 0)
continue;
- NewMask[i] += (i / LaneSize) * LaneSize;
+ NewMask[i] += (i / NumLaneElts) * NumLaneElts;
}
return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask);
}
@@ -14831,14 +15134,13 @@ getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask,
static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
ArrayRef<int> HalfMask, int HalfIdx1,
int HalfIdx2, bool UndefLower,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG, bool UseConcat = false) {
assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?");
assert(V1.getValueType().isSimple() && "Expecting only simple types");
MVT VT = V1.getSimpleValueType();
- unsigned NumElts = VT.getVectorNumElements();
- unsigned HalfNumElts = NumElts / 2;
- MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
+ MVT HalfVT = VT.getHalfNumVectorElementsVT();
+ unsigned HalfNumElts = HalfVT.getVectorNumElements();
auto getHalfVector = [&](int HalfIdx) {
if (HalfIdx < 0)
@@ -14853,6 +15155,14 @@ static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
SDValue Half1 = getHalfVector(HalfIdx1);
SDValue Half2 = getHalfVector(HalfIdx2);
SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
+ if (UseConcat) {
+ SDValue Op0 = V;
+ SDValue Op1 = DAG.getUNDEF(HalfVT);
+ if (UndefLower)
+ std::swap(Op0, Op1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1);
+ }
+
unsigned Offset = UndefLower ? HalfNumElts : 0;
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
DAG.getIntPtrConstant(Offset, DL));
@@ -14877,9 +15187,8 @@ static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1,
// Upper half is undef and lower half is whole upper subvector.
// e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
- unsigned NumElts = VT.getVectorNumElements();
- unsigned HalfNumElts = NumElts / 2;
- MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
+ MVT HalfVT = VT.getHalfNumVectorElementsVT();
+ unsigned HalfNumElts = HalfVT.getVectorNumElements();
if (!UndefLower &&
isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
@@ -15155,11 +15464,19 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
}
static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
- unsigned &ShuffleImm, ArrayRef<int> Mask) {
+ bool &ForceV1Zero, bool &ForceV2Zero,
+ unsigned &ShuffleImm, ArrayRef<int> Mask,
+ const APInt &Zeroable) {
int NumElts = VT.getVectorNumElements();
assert(VT.getScalarSizeInBits() == 64 &&
(NumElts == 2 || NumElts == 4 || NumElts == 8) &&
"Unexpected data type for VSHUFPD");
+ assert(isUndefOrZeroOrInRange(Mask, 0, 2 * NumElts) &&
+ "Illegal shuffle mask");
+
+ bool ZeroLane[2] = { true, true };
+ for (int i = 0; i < NumElts; ++i)
+ ZeroLane[i & 1] &= Zeroable[i];
// Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
// Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
@@ -15167,7 +15484,7 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
bool ShufpdMask = true;
bool CommutableMask = true;
for (int i = 0; i < NumElts; ++i) {
- if (Mask[i] == SM_SentinelUndef)
+ if (Mask[i] == SM_SentinelUndef || ZeroLane[i & 1])
continue;
if (Mask[i] < 0)
return false;
@@ -15180,30 +15497,77 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
ShuffleImm |= (Mask[i] % 2) << i;
}
- if (ShufpdMask)
- return true;
- if (CommutableMask) {
+ if (!ShufpdMask && !CommutableMask)
+ return false;
+
+ if (!ShufpdMask && CommutableMask)
std::swap(V1, V2);
- return true;
- }
- return false;
+ ForceV1Zero = ZeroLane[0];
+ ForceV2Zero = ZeroLane[1];
+ return true;
}
-static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
- assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&&
+static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) &&
"Unexpected data type for VSHUFPD");
unsigned Immediate = 0;
- if (!matchShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
+ bool ForceV1Zero = false, ForceV2Zero = false;
+ if (!matchShuffleWithSHUFPD(VT, V1, V2, ForceV1Zero, ForceV2Zero, Immediate,
+ Mask, Zeroable))
return SDValue();
+ // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
+ if (ForceV1Zero)
+ V1 = getZeroVector(VT, Subtarget, DAG, DL);
+ if (ForceV2Zero)
+ V2 = getZeroVector(VT, Subtarget, DAG, DL);
+
return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
- DAG.getConstant(Immediate, DL, MVT::i8));
+ DAG.getTargetConstant(Immediate, DL, MVT::i8));
}
+// Look for {0, 8, 16, 24, 32, 40, 48, 56 } in the first 8 elements. Followed
+// by zeroable elements in the remaining 24 elements. Turn this into two
+// vmovqb instructions shuffled together.
+static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ SelectionDAG &DAG) {
+ assert(VT == MVT::v32i8 && "Unexpected type!");
+
+ // The first 8 indices should be every 8th element.
+ if (!isSequentialOrUndefInRange(Mask, 0, 8, 0, 8))
+ return SDValue();
+
+ // Remaining elements need to be zeroable.
+ if (Zeroable.countLeadingOnes() < (Mask.size() - 8))
+ return SDValue();
+
+ V1 = DAG.getBitcast(MVT::v4i64, V1);
+ V2 = DAG.getBitcast(MVT::v4i64, V2);
+
+ V1 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V1);
+ V2 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V2);
+
+ // The VTRUNCs will put 0s in the upper 12 bytes. Use them to put zeroes in
+ // the upper bits of the result using an unpckldq.
+ SDValue Unpack = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2,
+ { 0, 1, 2, 3, 16, 17, 18, 19,
+ 4, 5, 6, 7, 20, 21, 22, 23 });
+ // Insert the unpckldq into a zero vector to widen to v32i8.
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v32i8,
+ DAG.getConstant(0, DL, MVT::v32i8), Unpack,
+ DAG.getIntPtrConstant(0, DL));
+}
+
+
/// Handle lowering of 4-lane 64-bit floating point shuffles.
///
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
@@ -15236,7 +15600,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
- DAG.getConstant(VPERMILPMask, DL, MVT::i8));
+ DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8));
}
// With AVX2 we have direct support for this permutation.
@@ -15256,8 +15620,8 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
// Otherwise, fall back.
- return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, DAG,
- Subtarget);
+ return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v4f64, V1, V2, Mask,
+ DAG, Subtarget);
}
// Use dedicated unpack instructions for masks that match their pattern.
@@ -15269,7 +15633,8 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Blend;
// Check if the blend happens to exactly fit that of SHUFPD.
- if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
+ if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Op;
// If we have one input in place, then we can permute the other input and
@@ -15473,8 +15838,8 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);
// Otherwise, fall back.
- return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
- DAG, Subtarget);
+ return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v8f32, V1, V2, Mask,
+ DAG, Subtarget);
}
// Try to simplify this by merging 128-bit lanes to enable a lane-based
@@ -15681,8 +16046,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
return V;
- return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask,
- DAG, Subtarget);
+ return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v16i16, V1, V2, Mask,
+ DAG, Subtarget);
}
SmallVector<int, 8> RepeatedMask;
@@ -15780,8 +16145,8 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
return V;
- return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG,
- Subtarget);
+ return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask,
+ DAG, Subtarget);
}
if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2,
@@ -15803,6 +16168,14 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
return V;
+ // Look for {0, 8, 16, 24, 32, 40, 48, 56 } in the first 8 elements. Followed
+ // by zeroable elements in the remaining 24 elements. Turn this into two
+ // vmovqb instructions shuffled together.
+ if (Subtarget.hasVLX())
+ if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2,
+ Mask, Zeroable, DAG))
+ return V;
+
// Otherwise fall back on generic lowering.
return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
Subtarget, DAG);
@@ -15974,7 +16347,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
}
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
- DAG.getConstant(PermMask, DL, MVT::i8));
+ DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
/// Handle lowering of 8-lane 64-bit floating point shuffles.
@@ -15999,7 +16372,7 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) |
((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7);
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1,
- DAG.getConstant(VPERMILPMask, DL, MVT::i8));
+ DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8));
}
SmallVector<int, 4> RepeatedMask;
@@ -16016,7 +16389,8 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Unpck;
// Check if the blend happens to exactly fit that of SHUFPD.
- if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))
+ if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Op;
if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2,
@@ -16389,6 +16763,49 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
}
+static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask,
+ MVT VT, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ // Shuffle should be unary.
+ if (!V2.isUndef())
+ return SDValue();
+
+ int ShiftAmt = -1;
+ int NumElts = Mask.size();
+ for (int i = 0; i != NumElts; ++i) {
+ int M = Mask[i];
+ assert((M == SM_SentinelUndef || (0 <= M && M < NumElts)) &&
+ "Unexpected mask index.");
+ if (M < 0)
+ continue;
+
+ // The first non-undef element determines our shift amount.
+ if (ShiftAmt < 0) {
+ ShiftAmt = M - i;
+ // Need to be shifting right.
+ if (ShiftAmt <= 0)
+ return SDValue();
+ }
+ // All non-undef elements must shift by the same amount.
+ if (ShiftAmt != M - i)
+ return SDValue();
+ }
+ assert(ShiftAmt >= 0 && "All undef?");
+
+ // Great we found a shift right.
+ MVT WideVT = VT;
+ if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
+ WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+ SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT,
+ DAG.getUNDEF(WideVT), V1,
+ DAG.getIntPtrConstant(0, DL));
+ Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res,
+ DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
+}
+
// Determine if this shuffle can be implemented with a KSHIFT instruction.
// Returns the shift amount if possible or -1 if not. This is a simplified
// version of matchShuffleAsShift.
@@ -16434,13 +16851,20 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(Subtarget.hasAVX512() &&
"Cannot lower 512-bit vectors w/o basic ISA!");
- unsigned NumElts = Mask.size();
+ int NumElts = Mask.size();
// Try to recognize shuffles that are just padding a subvector with zeros.
- unsigned SubvecElts = 0;
- for (int i = 0; i != (int)NumElts; ++i) {
- if (Mask[i] >= 0 && Mask[i] != i)
- break;
+ int SubvecElts = 0;
+ int Src = -1;
+ for (int i = 0; i != NumElts; ++i) {
+ if (Mask[i] >= 0) {
+ // Grab the source from the first valid mask. All subsequent elements need
+ // to use this same source.
+ if (Src < 0)
+ Src = Mask[i] / NumElts;
+ if (Src != (Mask[i] / NumElts) || (Mask[i] % NumElts) != i)
+ break;
+ }
++SubvecElts;
}
@@ -16451,30 +16875,54 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Make sure the number of zeroable bits in the top at least covers the bits
// not covered by the subvector.
- if (Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) {
+ if ((int)Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) {
+ assert(Src >= 0 && "Expected a source!");
MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts);
SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT,
- V1, DAG.getIntPtrConstant(0, DL));
+ Src == 0 ? V1 : V2,
+ DAG.getIntPtrConstant(0, DL));
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- getZeroVector(VT, Subtarget, DAG, DL),
+ DAG.getConstant(0, DL, VT),
Extract, DAG.getIntPtrConstant(0, DL));
}
+ // Try a simple shift right with undef elements. Later we'll try with zeros.
+ if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget,
+ DAG))
+ return Shift;
+
// Try to match KSHIFTs.
- // TODO: Support narrower than legal shifts by widening and extracting.
- if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) {
- unsigned Offset = 0;
- for (SDValue V : { V1, V2 }) {
- unsigned Opcode;
- int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
- if (ShiftAmt >= 0)
- return DAG.getNode(Opcode, DL, VT, V,
- DAG.getConstant(ShiftAmt, DL, MVT::i8));
- Offset += NumElts; // Increment for next iteration.
+ unsigned Offset = 0;
+ for (SDValue V : { V1, V2 }) {
+ unsigned Opcode;
+ int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
+ if (ShiftAmt >= 0) {
+ MVT WideVT = VT;
+ if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
+ WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+ SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT,
+ DAG.getUNDEF(WideVT), V,
+ DAG.getIntPtrConstant(0, DL));
+ // Widened right shifts need two shifts to ensure we shift in zeroes.
+ if (Opcode == X86ISD::KSHIFTR && WideVT != VT) {
+ int WideElts = WideVT.getVectorNumElements();
+ // Shift left to put the original vector in the MSBs of the new size.
+ Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res,
+ DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8));
+ // Increase the shift amount to account for the left shift.
+ ShiftAmt += WideElts - NumElts;
+ }
+
+ Res = DAG.getNode(Opcode, DL, WideVT, Res,
+ DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
}
+ Offset += NumElts; // Increment for next iteration.
}
+
MVT ExtVT;
switch (VT.SimpleTy) {
default:
@@ -16594,7 +17042,7 @@ static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- ArrayRef<int> Mask = SVOp->getMask();
+ ArrayRef<int> OrigMask = SVOp->getMask();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
@@ -16620,8 +17068,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
// undef as well. This makes it easier to match the shuffle based solely on
// the mask.
if (V2IsUndef &&
- any_of(Mask, [NumElements](int M) { return M >= NumElements; })) {
- SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
+ any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
+ SmallVector<int, 8> NewMask(OrigMask.begin(), OrigMask.end());
for (int &M : NewMask)
if (M >= NumElements)
M = -1;
@@ -16629,15 +17077,16 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
}
// Check for illegal shuffle mask element index values.
- int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit;
- assert(llvm::all_of(Mask,
+ int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
+ (void)MaskUpperLimit;
+ assert(llvm::all_of(OrigMask,
[&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
"Out of bounds shuffle index");
// We actually see shuffles that are entirely re-arrangements of a set of
// zero inputs. This mostly happens while decomposing complex shuffles into
// simple ones. Directly lower these as a buildvector of zeros.
- APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2);
if (Zeroable.isAllOnesValue())
return getZeroVector(VT, Subtarget, DAG, DL);
@@ -16645,11 +17094,11 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
// Create an alternative mask with info about zeroable elements.
// Here we do not set undef elements as zeroable.
- SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
+ SmallVector<int, 64> ZeroableMask(OrigMask.begin(), OrigMask.end());
if (V2IsZero) {
assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
for (int i = 0; i != NumElements; ++i)
- if (Mask[i] != SM_SentinelUndef && Zeroable[i])
+ if (OrigMask[i] != SM_SentinelUndef && Zeroable[i])
ZeroableMask[i] = SM_SentinelZero;
}
@@ -16664,7 +17113,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
// by obfuscating the operands with bitcasts.
// TODO: Avoid lowering directly from this top-level function: make this
// a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask,
Subtarget, DAG))
return Broadcast;
@@ -16700,8 +17149,11 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
}
// Commute the shuffle if it will improve canonicalization.
- if (canonicalizeShuffleMaskWithCommute(Mask))
- return DAG.getCommutedVectorShuffle(*SVOp);
+ SmallVector<int, 64> Mask(OrigMask.begin(), OrigMask.end());
+ if (canonicalizeShuffleMaskWithCommute(Mask)) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(V1, V2);
+ }
if (SDValue V = lowerShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget))
return V;
@@ -16910,7 +17362,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
// Use kshiftr instruction to move to the lower element.
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
+ DAG.getTargetConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getIntPtrConstant(0, dl));
@@ -17137,8 +17589,8 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
(Subtarget.hasAVX2() && EltVT == MVT::i32)) {
SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
- N2 = DAG.getIntPtrConstant(1, dl);
- return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2);
+ return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec,
+ DAG.getTargetConstant(1, dl, MVT::i8));
}
}
@@ -17207,14 +17659,14 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// But if optimizing for size and there's a load folding opportunity,
// generate insertps because blendps does not have a 32-bit memory
// operand form.
- N2 = DAG.getIntPtrConstant(1, dl);
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
- return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2);
+ return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1,
+ DAG.getTargetConstant(1, dl, MVT::i8));
}
- N2 = DAG.getIntPtrConstant(IdxVal << 4, dl);
// Create this as a scalar to vector..
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
- return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1,
+ DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8));
}
// PINSR* works with constant index.
@@ -17300,7 +17752,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
// Shift to the LSB.
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
+ DAG.getTargetConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec,
DAG.getIntPtrConstant(0, dl));
@@ -17841,10 +18293,10 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
std::swap(Op0, Op1);
APInt APIntShiftAmt;
- if (isConstantSplat(Amt, APIntShiftAmt)) {
+ if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
- return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
- Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
+ return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0,
+ Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
}
return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
@@ -17970,6 +18422,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
+ if (VT == MVT::f128)
+ return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
+
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
@@ -18072,6 +18527,16 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
return Result;
}
+/// Horizontal vector math instructions may be slower than normal math with
+/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
+/// implementation, and likely shuffle complexity of the alternate sequence.
+static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool HasFastHOps = Subtarget.hasFastHorizontalOps();
+ return !IsSingleSource || IsOptimizingSize || HasFastHOps;
+}
+
/// 64-bit unsigned integer to double expansion.
static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -18126,8 +18591,7 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
- if (Subtarget.hasSSE3()) {
- // FIXME: The 'haddpd' instruction may be slower than 'shuffle + addsd'.
+ if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
@@ -18273,7 +18737,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// Low will be bitcasted right away, so do not bother bitcasting back to its
// original type.
Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
- VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32));
+ VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
// (uint4) 0x53000000, 0xaa);
SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
@@ -18281,7 +18745,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// High will be bitcasted right away, so do not bother bitcasting back to
// its original type.
High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
- VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i32));
+ VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
} else {
SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT);
// uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
@@ -18329,16 +18793,18 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
+ MVT SrcVT = N0.getSimpleValueType();
+ MVT DstVT = Op.getSimpleValueType();
- if (Op.getSimpleValueType().isVector())
+ if (DstVT == MVT::f128)
+ return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
+
+ if (DstVT.isVector())
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
- MVT SrcVT = N0.getSimpleValueType();
- MVT DstVT = Op.getSimpleValueType();
-
if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
(SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
// Conversions from unsigned i32 to f32/f64 are legal,
@@ -18346,6 +18812,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
return Op;
}
+ // Promote i32 to i64 and use a signed conversion on 64-bit targets.
+ if (SrcVT == MVT::i32 && Subtarget.is64Bit()) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0);
+ return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0);
+ }
+
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
@@ -18579,7 +19051,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
// Custom legalize v8i8->v8i64 on CPUs without avx512bw.
if (InVT == MVT::v8i8) {
- if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
+ if (VT != MVT::v8i64)
return SDValue();
In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
@@ -18602,10 +19074,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
// Concat upper and lower parts.
//
-
- MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() / 2);
-
+ MVT HalfVT = VT.getHalfNumVectorElementsVT();
SDValue OpLo = DAG.getNode(ExtendInVecOpc, dl, HalfVT, In);
// Short-circuit if we can determine that each 128-bit half is the same value.
@@ -18903,9 +19372,29 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
- // If called by the legalizer just return.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
+ // If we're called by the type legalizer, handle a few cases.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(InVT)) {
+ if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) &&
+ VT.is128BitVector()) {
+ assert(Subtarget.hasVLX() && "Unexpected subtarget!");
+ // The default behavior is to truncate one step, concatenate, and then
+ // truncate the remainder. We'd rather produce two 64-bit results and
+ // concatenate those.
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, LoVT, Lo);
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, HiVT, Hi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+ }
+
+ // Otherwise let default legalization handle it.
return SDValue();
+ }
if (VT.getVectorElementType() == MVT::i1)
return LowerTruncateVecI1(Op, DAG, Subtarget);
@@ -18940,6 +19429,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget))
return V;
+ // Handle truncation of V256 to V128 using shuffles.
+ assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");
+
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget.hasInt256()) {
@@ -19016,22 +19508,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi);
}
- // Handle truncation of V256 to V128 using shuffles.
- assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");
-
- assert(Subtarget.hasAVX() && "256-bit vector without AVX!");
-
- unsigned NumElems = VT.getVectorNumElements();
- MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2);
-
- SmallVector<int, 16> MaskVec(NumElems * 2, -1);
- // Prepare truncation shuffle mask
- for (unsigned i = 0; i != NumElems; ++i)
- MaskVec[i] = i * 2;
- In = DAG.getBitcast(NVT, In);
- SDValue V = DAG.getVectorShuffle(NVT, DL, In, In, MaskVec);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
- DAG.getIntPtrConstant(0, DL));
+ llvm_unreachable("All 256->128 cases should have been handled above!");
}
SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
@@ -19041,6 +19518,17 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
MVT SrcVT = Src.getSimpleValueType();
SDLoc dl(Op);
+ if (SrcVT == MVT::f128) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(SrcVT, VT);
+ else
+ LC = RTLIB::getFPTOUINT(SrcVT, VT);
+
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, LC, VT, Src, CallOptions, SDLoc(Op)).first;
+ }
+
if (VT.isVector()) {
if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
MVT ResVT = MVT::v4i32;
@@ -19075,14 +19563,27 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT);
- if (!IsSigned && Subtarget.hasAVX512()) {
- // Conversions from f32/f64 should be legal.
- if (UseSSEReg)
+ if (!IsSigned && UseSSEReg) {
+ // Conversions from f32/f64 with AVX512 should be legal.
+ if (Subtarget.hasAVX512())
return Op;
- // Use default expansion.
+ // Use default expansion for i64.
if (VT == MVT::i64)
return SDValue();
+
+ assert(VT == MVT::i32 && "Unexpected VT!");
+
+ // Promote i32 to i64 and use a signed operation on 64-bit targets.
+ if (Subtarget.is64Bit()) {
+ SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ }
+
+ // Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can
+ // use fisttp which will be handled later.
+ if (!Subtarget.hasSSE3())
+ return SDValue();
}
// Promote i16 to i32 if we can use a SSE operation.
@@ -19103,12 +19604,17 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
}
-static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
+ if (VT == MVT::f128) {
+ RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
+ return LowerF128Call(Op, DAG, LC);
+ }
+
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
return DAG.getNode(X86ISD::VFPEXT, DL, VT,
@@ -19116,14 +19622,31 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
In, DAG.getUNDEF(SVT)));
}
-/// Horizontal vector math instructions may be slower than normal math with
-/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
-/// implementation, and likely shuffle complexity of the alternate sequence.
-static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize();
- bool HasFastHOps = Subtarget.hasFastHorizontalOps();
- return !IsSingleSource || IsOptimizingSize || HasFastHOps;
+SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+ SDValue In = Op.getOperand(0);
+ MVT SVT = In.getSimpleValueType();
+
+ // It's legal except when f128 is involved
+ if (SVT != MVT::f128)
+ return Op;
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, VT);
+
+ // FP_ROUND node has a second operand indicating whether it is known to be
+ // precise. That doesn't take part in the LibCall so we can't directly use
+ // LowerF128Call.
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first;
+}
+
+// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking
+// the default expansion of STRICT_FP_ROUND.
+static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) {
+ // FIXME: Need to form a libcall with an input chain for f128.
+ assert(Op.getOperand(0).getValueType() != MVT::f128 &&
+ "Don't know how to handle f128 yet!");
+ return Op;
}
/// Depending on uarch and/or optimizing for size, we might prefer to use a
@@ -19200,8 +19723,13 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
/// Depending on uarch and/or optimizing for size, we might prefer to use a
/// vector operation in place of the typical scalar operation.
-static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType() == MVT::f128) {
+ RTLIB::Libcall LC = Op.getOpcode() == ISD::FADD ? RTLIB::ADD_F128
+ : RTLIB::SUB_F128;
+ return LowerF128Call(Op, DAG, LC);
+ }
+
assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) &&
"Only expecting float/double");
return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
@@ -19358,13 +19886,13 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
SelectionDAG &DAG) {
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(Cond, dl, MVT::i8), EFLAGS);
+ DAG.getTargetConstant(Cond, dl, MVT::i8), EFLAGS);
}
/// Helper for matching OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1),...))
/// style scalarized (associative) reduction patterns.
-static bool matchBitOpReduction(SDValue Op, ISD::NodeType BinOp,
- SmallVectorImpl<SDValue> &SrcOps) {
+static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
+ SmallVectorImpl<SDValue> &SrcOps) {
SmallVector<SDValue, 8> Opnds;
DenseMap<SDValue, APInt> SrcOpMap;
EVT VT = MVT::Other;
@@ -19437,7 +19965,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
return SDValue();
SmallVector<SDValue, 8> VecIns;
- if (!matchBitOpReduction(Op, ISD::OR, VecIns))
+ if (!matchScalarReduction(Op, ISD::OR, VecIns))
return SDValue();
// Quit if not 128/256-bit vector.
@@ -19461,8 +19989,8 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
}
- X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, DL,
- MVT::i8);
+ X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE,
+ DL, MVT::i8);
return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back());
}
@@ -19576,6 +20104,13 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
case X86ISD::XOR:
case X86ISD::AND:
return SDValue(Op.getNode(), 1);
+ case ISD::SSUBO:
+ case ISD::USUBO: {
+ // /USUBO/SSUBO will become a X86ISD::SUB and we can use its Z flag.
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0),
+ Op->getOperand(1)).getValue(1);
+ }
default:
default_case:
break;
@@ -19766,6 +20301,63 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
return 2;
}
+SDValue
+X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N,0); // Lower SDIV as SDIV
+
+ assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) &&
+ "Unexpected divisor!");
+
+ // Only perform this transform if CMOV is supported otherwise the select
+ // below will become a branch.
+ if (!Subtarget.hasCMov())
+ return SDValue();
+
+ // fold (sdiv X, pow2)
+ EVT VT = N->getValueType(0);
+ // FIXME: Support i8.
+ if (VT != MVT::i16 && VT != MVT::i32 &&
+ !(Subtarget.is64Bit() && VT == MVT::i64))
+ return SDValue();
+
+ unsigned Lg2 = Divisor.countTrailingZeros();
+
+ // If the divisor is 2 or -2, the default expansion is better.
+ if (Lg2 == 1)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
+ SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
+
+ // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+ SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CMov.getNode());
+
+ // Divide by pow2.
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (Divisor.isNonNegative())
+ return SRA;
+
+ Created.push_back(SRA.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
+}
+
/// Result of 'and' is compared against zero. Change to a BT node if possible.
/// Returns the BT node and the condition code needed to use it.
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
@@ -19842,8 +20434,8 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
if (Src.getValueType() != BitNo.getValueType())
BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
- X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
- dl, MVT::i8);
+ X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
+ dl, MVT::i8);
return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
}
@@ -19935,13 +20527,6 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- // If this is a seteq make sure any build vectors of all zeros are on the RHS.
- // This helps with vptestm matching.
- // TODO: Should we just canonicalize the setcc during DAG combine?
- if ((SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE) &&
- ISD::isBuildVectorAllZeros(Op0.getNode()))
- std::swap(Op0, Op1);
-
// Prefer SETGT over SETLT.
if (SetCCOpcode == ISD::SETLT) {
SetCCOpcode = ISD::getSetCCSwappedOperands(SetCCOpcode);
@@ -20007,7 +20592,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
// Only do this pre-AVX since vpcmp* is no longer destructive.
if (Subtarget.hasAVX())
return SDValue();
- SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, false);
+ SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false);
if (!ULEOp1)
return SDValue();
Op1 = ULEOp1;
@@ -20018,7 +20603,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
// This is beneficial because materializing a constant 0 for the PCMPEQ is
// probably cheaper than XOR+PCMPGT using 2 different vector constants:
// cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0
- SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, true);
+ SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true);
if (!UGEOp1)
return SDValue();
Op1 = Op0;
@@ -20086,14 +20671,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
- DAG.getConstant(CC0, dl, MVT::i8));
+ DAG.getTargetConstant(CC0, dl, MVT::i8));
SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
- DAG.getConstant(CC1, dl, MVT::i8));
+ DAG.getTargetConstant(CC1, dl, MVT::i8));
Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
} else {
// Handle all other FP comparisons here.
Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
- DAG.getConstant(SSECC, dl, MVT::i8));
+ DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
// If this is SSE/AVX CMPP, bitcast the result back to integer to match the
@@ -20106,16 +20691,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
MVT VTOp0 = Op0.getSimpleValueType();
+ (void)VTOp0;
assert(VTOp0 == Op1.getSimpleValueType() &&
"Expected operands with same type!");
assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() &&
"Invalid number of packed elements for source and destination!");
- // This is being called by type legalization because v2i32 is marked custom
- // for result type legalization for v2f32.
- if (VTOp0 == MVT::v2i32)
- return SDValue();
-
// The non-AVX512 code below works under the assumption that source and
// destination types are the same.
assert((Subtarget.hasAVX512() || (VT == VTOp0)) &&
@@ -20153,7 +20734,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
- DAG.getConstant(CmpMode, dl, MVT::i8));
+ DAG.getTargetConstant(CmpMode, dl, MVT::i8));
}
// (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2.
@@ -20222,21 +20803,19 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
TLI.isOperationLegal(ISD::UMIN, VT)) {
// If we have a constant operand, increment/decrement it and change the
// condition to avoid an invert.
- if (Cond == ISD::SETUGT &&
- ISD::matchUnaryPredicate(Op1, [](ConstantSDNode *C) {
- return !C->getAPIntValue().isMaxValue();
- })) {
+ if (Cond == ISD::SETUGT) {
// X > C --> X >= (C+1) --> X == umax(X, C+1)
- Op1 = DAG.getNode(ISD::ADD, dl, VT, Op1, DAG.getConstant(1, dl, VT));
- Cond = ISD::SETUGE;
+ if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true)) {
+ Op1 = UGTOp1;
+ Cond = ISD::SETUGE;
+ }
}
- if (Cond == ISD::SETULT &&
- ISD::matchUnaryPredicate(Op1, [](ConstantSDNode *C) {
- return !C->getAPIntValue().isNullValue();
- })) {
+ if (Cond == ISD::SETULT) {
// X < C --> X <= (C-1) --> X == umin(X, C-1)
- Op1 = DAG.getNode(ISD::SUB, dl, VT, Op1, DAG.getConstant(1, dl, VT));
- Cond = ISD::SETULE;
+ if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false)) {
+ Op1 = ULTOp1;
+ Cond = ISD::SETULE;
+ }
}
bool Invert = false;
unsigned Opc;
@@ -20360,11 +20939,11 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
return Result;
}
-// Try to select this as a KORTEST+SETCC if possible.
-static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
- const SDLoc &dl, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- SDValue &X86CC) {
+// Try to select this as a KORTEST+SETCC or KTEST+SETCC if possible.
+static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC,
+ const SDLoc &dl, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ SDValue &X86CC) {
// Only support equality comparisons.
if (CC != ISD::SETEQ && CC != ISD::SETNE)
return SDValue();
@@ -20389,6 +20968,21 @@ static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
} else
return SDValue();
+ // If the input is an AND, we can combine it's operands into the KTEST.
+ bool KTestable = false;
+ if (Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1))
+ KTestable = true;
+ if (Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))
+ KTestable = true;
+ if (!isNullConstant(Op1))
+ KTestable = false;
+ if (KTestable && Op0.getOpcode() == ISD::AND && Op0.hasOneUse()) {
+ SDValue LHS = Op0.getOperand(0);
+ SDValue RHS = Op0.getOperand(1);
+ X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
+ return DAG.getNode(X86ISD::KTEST, dl, MVT::i32, LHS, RHS);
+ }
+
// If the input is an OR, we can combine it's operands into the KORTEST.
SDValue LHS = Op0;
SDValue RHS = Op0;
@@ -20397,7 +20991,7 @@ static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
RHS = Op0.getOperand(1);
}
- X86CC = DAG.getConstant(X86Cond, dl, MVT::i8);
+ X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
}
@@ -20425,9 +21019,9 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
return PTEST;
}
- // Try to lower using KORTEST.
- if (SDValue KORTEST = EmitKORTEST(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
- return KORTEST;
+ // Try to lower using KORTEST or KTEST.
+ if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
+ return Test;
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
// these.
@@ -20442,7 +21036,7 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
if (Invert) {
X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
- X86CC = DAG.getConstant(CCode, dl, MVT::i8);
+ X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8);
}
return Op0.getOperand(1);
@@ -20456,7 +21050,7 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG);
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
- X86CC = DAG.getConstant(CondCode, dl, MVT::i8);
+ X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
return EFLAGS;
}
@@ -20472,6 +21066,19 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // Handle f128 first, since one possible outcome is a normal integer
+ // comparison which gets handled by emitFlagsForSetcc.
+ if (Op0.getValueType() == MVT::f128) {
+ softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1);
+
+ // If softenSetCCOperands returned a scalar, use it.
+ if (!Op1.getNode()) {
+ assert(Op0.getValueType() == Op.getValueType() &&
+ "Unexpected setcc expansion!");
+ return Op0;
+ }
+ }
+
SDValue X86CC;
SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
if (!EFLAGS)
@@ -20612,15 +21219,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
if (Subtarget.hasAVX512()) {
- SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0,
- CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
+ SDValue Cmp =
+ DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1,
+ DAG.getTargetConstant(SSECC, DL, MVT::i8));
assert(!VT.isVector() && "Not a scalar type?");
return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
}
if (SSECC < 8 || Subtarget.hasAVX()) {
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
- DAG.getConstant(SSECC, DL, MVT::i8));
+ DAG.getTargetConstant(SSECC, DL, MVT::i8));
// If we have AVX, we can use a variable vector select (VBLENDV) instead
// of 3 logic instructions for size savings and potentially speed.
@@ -20718,8 +21326,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
isNullConstant(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
- unsigned CondCode =
- cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+ unsigned CondCode = Cond.getConstantOperandVal(0);
if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
@@ -20807,8 +21414,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
- MVT VT = Op.getSimpleValueType();
-
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
!isScalarFPTypeInSSEReg(VT)) // FPStack?
@@ -20826,7 +21431,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
X86::CondCode X86Cond;
std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);
- CC = DAG.getConstant(X86Cond, DL, MVT::i8);
+ CC = DAG.getTargetConstant(X86Cond, DL, MVT::i8);
AddTest = false;
}
@@ -20848,7 +21453,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
if (AddTest) {
- CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8);
+ CC = DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8);
Cond = EmitCmp(Cond, DAG.getConstant(0, DL, Cond.getValueType()),
X86::COND_NE, DL, DAG);
}
@@ -20864,9 +21469,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
(isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(isNullConstant(Op1) || isNullConstant(Op2))) {
- SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
- Cond);
+ SDValue Res =
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cond);
if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B))
return DAG.getNOT(DL, Res, Res.getValueType());
return Res;
@@ -21037,8 +21642,8 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
// pre-AVX2 256-bit extensions need to be split into 128-bit instructions.
if (Subtarget.hasAVX()) {
assert(VT.is256BitVector() && "256-bit vector expected");
- int HalfNumElts = NumElts / 2;
- MVT HalfVT = MVT::getVectorVT(SVT, HalfNumElts);
+ MVT HalfVT = VT.getHalfNumVectorElementsVT();
+ int HalfNumElts = HalfVT.getVectorNumElements();
unsigned NumSrcElts = InVT.getVectorNumElements();
SmallVector<int, 16> HiMask(NumSrcElts, SM_SentinelUndef);
@@ -21081,7 +21686,7 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
unsigned SignExtShift = DestWidth - InSVT.getSizeInBits();
SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr,
- DAG.getConstant(SignExtShift, dl, MVT::i8));
+ DAG.getTargetConstant(SignExtShift, dl, MVT::i8));
}
if (VT == MVT::v2i64) {
@@ -21119,7 +21724,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
// Custom legalize v8i8->v8i64 on CPUs without avx512bw.
if (InVT == MVT::v8i8) {
- if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
+ if (VT != MVT::v8i64)
return SDValue();
In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
@@ -21138,10 +21743,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
// for v4i32 the high shuffle mask will be {2, 3, -1, -1}
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
// concat the vectors to original VT
-
- MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() / 2);
-
+ MVT HalfVT = VT.getHalfNumVectorElementsVT();
SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In);
unsigned NumElems = InVT.getVectorNumElements();
@@ -21165,7 +21767,7 @@ static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) {
// Splitting volatile memory ops is not allowed unless the operation was not
// legal to begin with. We are assuming the input op is legal (this transform
// is only used for targets with AVX).
- if (Store->isVolatile())
+ if (!Store->isSimple())
return SDValue();
MVT StoreVT = StoredVal.getSimpleValueType();
@@ -21201,7 +21803,7 @@ static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT,
// Splitting volatile memory ops is not allowed unless the operation was not
// legal to begin with. We are assuming the input op is legal (this transform
// is only used for targets with AVX).
- if (Store->isVolatile())
+ if (!Store->isSimple())
return SDValue();
MVT StoreSVT = StoreVT.getScalarType();
@@ -21266,14 +21868,13 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
return SDValue();
}
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
"Unexpected VT");
- if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
- TargetLowering::TypeWidenVector)
- return SDValue();
+ assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) ==
+ TargetLowering::TypeWidenVector && "Unexpected type action!");
- MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(),
- StoreVT.getVectorNumElements() * 2);
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT);
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
DAG.getUNDEF(StoreVT));
@@ -21313,11 +21914,10 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
- EVT MemVT = Ld->getMemoryVT();
// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads.
if (RegVT.getVectorElementType() == MVT::i1) {
- assert(EVT(RegVT) == MemVT && "Expected non-extending load");
+ assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load");
assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT");
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
"Expected AVX512F without AVX512DQI");
@@ -21336,176 +21936,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl);
}
- // Nothing useful we can do without SSE2 shuffles.
- assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2.");
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned RegSz = RegVT.getSizeInBits();
-
- ISD::LoadExtType Ext = Ld->getExtensionType();
-
- assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)
- && "Only anyext and sext are currently implemented.");
- assert(MemVT != RegVT && "Cannot extend to the same type");
- assert(MemVT.isVector() && "Must load a vector from memory");
-
- unsigned NumElems = RegVT.getVectorNumElements();
- unsigned MemSz = MemVT.getSizeInBits();
- assert(RegSz > MemSz && "Register size must be greater than the mem size");
-
- if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) {
- // The only way in which we have a legal 256-bit vector result but not the
- // integer 256-bit operations needed to directly lower a sextload is if we
- // have AVX1 but not AVX2. In that case, we can always emit a sextload to
- // a 128-bit vector and a normal sign_extend to 256-bits that should get
- // correctly legalized. We do this late to allow the canonical form of
- // sextload to persist throughout the rest of the DAG combiner -- it wants
- // to fold together any extensions it can, and so will fuse a sign_extend
- // of an sextload into a sextload targeting a wider value.
- SDValue Load;
- if (MemSz == 128) {
- // Just switch this to a normal load.
- assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, "
- "it must be a legal 128-bit vector "
- "type!");
- Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), Ld->getAlignment(),
- Ld->getMemOperand()->getFlags());
- } else {
- assert(MemSz < 128 &&
- "Can't extend a type wider than 128 bits to a 256 bit vector!");
- // Do an sext load to a 128-bit vector type. We want to use the same
- // number of elements, but elements half as wide. This will end up being
- // recursively lowered by this routine, but will succeed as we definitely
- // have all the necessary features if we're using AVX1.
- EVT HalfEltVT =
- EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2);
- EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems);
- Load =
- DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), MemVT, Ld->getAlignment(),
- Ld->getMemOperand()->getFlags());
- }
-
- // Replace chain users with the new chain.
- assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
-
- // Finally, do a normal sign-extend to the desired register.
- SDValue SExt = DAG.getSExtOrTrunc(Load, dl, RegVT);
- return DAG.getMergeValues({SExt, Load.getValue(1)}, dl);
- }
-
- // All sizes must be a power of two.
- assert(isPowerOf2_32(RegSz * MemSz * NumElems) &&
- "Non-power-of-two elements are not custom lowered!");
-
- // Attempt to load the original value using scalar loads.
- // Find the largest scalar type that divides the total loaded size.
- MVT SclrLoadTy = MVT::i8;
- for (MVT Tp : MVT::integer_valuetypes()) {
- if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
- SclrLoadTy = Tp;
- }
- }
-
- // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
- if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
- (64 <= MemSz))
- SclrLoadTy = MVT::f64;
-
- // Calculate the number of scalar loads that we need to perform
- // in order to load our vector from memory.
- unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
-
- assert((Ext != ISD::SEXTLOAD || NumLoads == 1) &&
- "Can only lower sext loads with a single scalar load!");
-
- unsigned loadRegSize = RegSz;
- if (Ext == ISD::SEXTLOAD && RegSz >= 256)
- loadRegSize = 128;
-
- // If we don't have BWI we won't be able to create the shuffle needed for
- // v8i8->v8i64.
- if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
- MemVT == MVT::v8i8)
- loadRegSize = 128;
-
- // Represent our vector as a sequence of elements which are the
- // largest scalar that we can load.
- EVT LoadUnitVecVT = EVT::getVectorVT(
- *DAG.getContext(), SclrLoadTy, loadRegSize / SclrLoadTy.getSizeInBits());
-
- // Represent the data using the same element type that is stored in
- // memory. In practice, we ''widen'' MemVT.
- EVT WideVecVT =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- loadRegSize / MemVT.getScalarSizeInBits());
-
- assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
- "Invalid vector type");
-
- // We can't shuffle using an illegal type.
- assert(TLI.isTypeLegal(WideVecVT) &&
- "We only lower types that form legal widened vector types");
-
- SmallVector<SDValue, 8> Chains;
- SDValue Ptr = Ld->getBasePtr();
- unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8;
- SDValue Increment = DAG.getConstant(OffsetInc, dl,
- TLI.getPointerTy(DAG.getDataLayout()));
- SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
-
- unsigned Offset = 0;
- for (unsigned i = 0; i < NumLoads; ++i) {
- unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset);
-
- // Perform a single load.
- SDValue ScalarLoad =
- DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr,
- Ld->getPointerInfo().getWithOffset(Offset),
- NewAlign, Ld->getMemOperand()->getFlags());
- Chains.push_back(ScalarLoad.getValue(1));
- // Create the first element type using SCALAR_TO_VECTOR in order to avoid
- // another round of DAGCombining.
- if (i == 0)
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
- else
- Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
- ScalarLoad, DAG.getIntPtrConstant(i, dl));
-
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- Offset += OffsetInc;
- }
-
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
-
- // Bitcast the loaded value to a vector of the original element type, in
- // the size of the target vector type.
- SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res);
- unsigned SizeRatio = RegSz / MemSz;
-
- if (Ext == ISD::SEXTLOAD) {
- SDValue Sext = getExtendInVec(ISD::SIGN_EXTEND, dl, RegVT, SlicedVec, DAG);
- return DAG.getMergeValues({Sext, TF}, dl);
- }
-
- if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
- MemVT == MVT::v8i8) {
- SDValue Sext = getExtendInVec(ISD::ZERO_EXTEND, dl, RegVT, SlicedVec, DAG);
- return DAG.getMergeValues({Sext, TF}, dl);
- }
-
- // Redistribute the loaded elements into the different locations.
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i * SizeRatio] = i;
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
- DAG.getUNDEF(WideVecVT), ShuffleVec);
-
- // Bitcast to the requested type.
- Shuff = DAG.getBitcast(RegVT, Shuff);
- return DAG.getMergeValues({Shuff, TF}, dl);
+ return SDValue();
}
/// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes
@@ -21610,7 +22041,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (Inverted)
X86Cond = X86::GetOppositeBranchCondition(X86Cond);
- CC = DAG.getConstant(X86Cond, dl, MVT::i8);
+ CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
addTest = false;
} else {
unsigned CondOpc;
@@ -21638,10 +22069,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (Cmp == Cond.getOperand(1).getOperand(1) &&
isX86LogicalCmp(Cmp) &&
Op.getNode()->hasOneUse()) {
- X86::CondCode CCode =
- (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
- CCode = X86::GetOppositeBranchCondition(CCode);
- CC = DAG.getConstant(CCode, dl, MVT::i8);
+ X86::CondCode CCode0 =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode0 = X86::GetOppositeBranchCondition(CCode0);
+ CC = DAG.getTargetConstant(CCode0, dl, MVT::i8);
SDNode *User = *Op.getNode()->use_begin();
// Look for an unconditional branch following this conditional branch.
// We need this because we need to reverse the successors in order
@@ -21654,12 +22085,12 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
(void)NewBR;
Dest = FalseBB;
- Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
- Chain, Dest, CC, Cmp);
- X86::CondCode CCode =
- (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
- CCode = X86::GetOppositeBranchCondition(CCode);
- CC = DAG.getConstant(CCode, dl, MVT::i8);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain,
+ Dest, CC, Cmp);
+ X86::CondCode CCode1 =
+ (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
+ CCode1 = X86::GetOppositeBranchCondition(CCode1);
+ CC = DAG.getTargetConstant(CCode1, dl, MVT::i8);
Cond = Cmp;
addTest = false;
}
@@ -21672,7 +22103,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
X86::CondCode CCode =
(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
- CC = DAG.getConstant(CCode, dl, MVT::i8);
+ CC = DAG.getTargetConstant(CCode, dl, MVT::i8);
Cond = Cond.getOperand(0).getOperand(1);
addTest = false;
} else if (Cond.getOpcode() == ISD::SETCC &&
@@ -21698,10 +22129,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
- CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
+ CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
- CC = DAG.getConstant(X86::COND_P, dl, MVT::i8);
+ CC = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8);
Cond = Cmp;
addTest = false;
}
@@ -21714,10 +22145,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
- CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
+ CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
- CC = DAG.getConstant(X86::COND_P, dl, MVT::i8);
+ CC = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8);
Cond = Cmp;
addTest = false;
}
@@ -21742,7 +22173,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (addTest) {
X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
- CC = DAG.getConstant(X86Cond, dl, MVT::i8);
+ CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()),
X86Cond, dl, DAG);
}
@@ -21770,7 +22201,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Align = Op.getConstantOperandVal(2);
EVT VT = Node->getValueType(0);
// Chain the dynamic stack allocation so that it doesn't modify the stack
@@ -21811,7 +22242,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
}
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
- unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
+ Register Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
DAG.getRegister(Vreg, SPTy));
@@ -21821,7 +22252,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
MF.getInfo<X86MachineFunctionInfo>()->setHasWinAlloca(true);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- unsigned SPReg = RegInfo->getStackRegister();
+ Register SPReg = RegInfo->getStackRegister();
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
Chain = SP.getValue(1);
@@ -22076,7 +22507,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
}
return DAG.getNode(Opc, dl, VT, SrcOp,
- DAG.getConstant(ShiftAmt, dl, MVT::i8));
+ DAG.getTargetConstant(ShiftAmt, dl, MVT::i8));
}
/// Handle vector element shifts where the shift amount may or may not be a
@@ -22121,7 +22552,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
MVT::v2i64, ShAmt);
else {
- SDValue ByteShift = DAG.getConstant(
+ SDValue ByteShift = DAG.getTargetConstant(
(128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
@@ -22308,13 +22739,21 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Helper to detect if the operand is CUR_DIRECTION rounding mode.
auto isRoundModeCurDirection = [](SDValue Rnd) {
if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
- return C->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION;
+ return C->getAPIntValue() == X86::STATIC_ROUNDING::CUR_DIRECTION;
return false;
};
auto isRoundModeSAE = [](SDValue Rnd) {
- if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
- return C->getZExtValue() == X86::STATIC_ROUNDING::NO_EXC;
+ if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) {
+ unsigned RC = C->getZExtValue();
+ if (RC & X86::STATIC_ROUNDING::NO_EXC) {
+ // Clear the NO_EXC bit and check remaining bits.
+ RC ^= X86::STATIC_ROUNDING::NO_EXC;
+ // As a convenience we allow no other bits or explicitly
+ // current direction.
+ return RC == 0 || RC == X86::STATIC_ROUNDING::CUR_DIRECTION;
+ }
+ }
return false;
};
@@ -22335,7 +22774,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
};
SDLoc dl(Op);
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntNo = Op.getConstantOperandVal(0);
MVT VT = Op.getSimpleValueType();
const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
if (IntrData) {
@@ -22411,9 +22850,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
- if (IntrData->Type == INTR_TYPE_3OP_IMM8)
- Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
-
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -22666,7 +23102,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case CMP_MASK_CC: {
MVT MaskVT = Op.getSimpleValueType();
SDValue CC = Op.getOperand(3);
- CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -22685,7 +23120,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case CMP_MASK_SCALAR_CC: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
- SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3));
+ SDValue CC = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
SDValue Cmp;
@@ -22750,16 +23185,16 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case COMI_RM: { // Comparison intrinsics with Sae
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- unsigned CondVal = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned CondVal = Op.getConstantOperandVal(3);
SDValue Sae = Op.getOperand(4);
SDValue FCmp;
if (isRoundModeCurDirection(Sae))
FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
- DAG.getConstant(CondVal, dl, MVT::i8));
+ DAG.getTargetConstant(CondVal, dl, MVT::i8));
else if (isRoundModeSAE(Sae))
FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS,
- DAG.getConstant(CondVal, dl, MVT::i8), Sae);
+ DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae);
else
return SDValue();
// Need to fill with zeros to ensure the bitcast will produce zeroes
@@ -22819,9 +23254,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");
// Clear the upper bits of the rounding immediate so that the legacy
// intrinsic can't trigger the scaling behavior of VRNDSCALE.
- SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32,
- Op.getOperand(2),
- DAG.getConstant(0xf, dl, MVT::i32));
+ auto Round = cast<ConstantSDNode>(Op.getOperand(2));
+ SDValue RoundingMode =
+ DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32);
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), RoundingMode);
}
@@ -22829,12 +23264,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode");
// Clear the upper bits of the rounding immediate so that the legacy
// intrinsic can't trigger the scaling behavior of VRNDSCALE.
- SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32,
- Op.getOperand(3),
- DAG.getConstant(0xf, dl, MVT::i32));
+ auto Round = cast<ConstantSDNode>(Op.getOperand(3));
+ SDValue RoundingMode =
+ DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32);
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), RoundingMode);
}
+ case BEXTRI: {
+ assert(IntrData->Opc0 == X86ISD::BEXTR && "Unexpected opcode");
+
+ // The control is a TargetConstant, but we need to convert it to a
+ // ConstantSDNode.
+ uint64_t Imm = Op.getConstantOperandVal(2);
+ SDValue Control = DAG.getConstant(Imm, dl, Op.getValueType());
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
+ Op.getOperand(1), Control);
+ }
// ADC/ADCX/SBB
case ADX: {
SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
@@ -23165,6 +23610,61 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
MaskVT, Operation);
return DAG.getMergeValues({Result0, Result1}, DL);
}
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDLoc DL(Op);
+ SDValue ShAmt = Op.getOperand(2);
+ // If the argument is a constant, convert it to a target constant.
+ if (auto *C = dyn_cast<ConstantSDNode>(ShAmt)) {
+ ShAmt = DAG.getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1), ShAmt);
+ }
+
+ unsigned NewIntrinsic;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+ break;
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Copy the 32 bits to an
+ // MMX register.
+ ShAmt = DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, ShAmt);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
+ DAG.getConstant(NewIntrinsic, DL, MVT::i32),
+ Op.getOperand(1), ShAmt);
+
+ }
}
}
@@ -23177,7 +23677,9 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
// Scale must be constant.
if (!C)
return SDValue();
- SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
// If source is undef or we know it won't be used, use a zero vector
@@ -23204,7 +23706,9 @@ static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
// Scale must be constant.
if (!C)
return SDValue();
- SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
VT.getVectorNumElements());
MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
@@ -23238,7 +23742,9 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
// Scale must be constant.
if (!C)
return SDValue();
- SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
Src.getSimpleValueType().getVectorNumElements());
MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
@@ -23266,7 +23772,9 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
// Scale must be constant.
if (!C)
return SDValue();
- SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
MVT MaskVT =
@@ -23435,8 +23943,7 @@ EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-
+ unsigned IntNo = Op.getConstantOperandVal(1);
const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo);
if (!IntrData) {
switch (IntNo) {
@@ -23538,10 +24045,10 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
// If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
// Otherwise return the value from Rand, which is always 0, casted to i32.
- SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
- DAG.getConstant(1, dl, Op->getValueType(1)),
- DAG.getConstant(X86::COND_B, dl, MVT::i8),
- SDValue(Result.getNode(), 1) };
+ SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
+ DAG.getConstant(1, dl, Op->getValueType(1)),
+ DAG.getTargetConstant(X86::COND_B, dl, MVT::i8),
+ SDValue(Result.getNode(), 1)};
SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops);
// Return { result, isValid, chain }.
@@ -23581,8 +24088,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
Scale, Chain, Subtarget);
}
case PREFETCH: {
- SDValue Hint = Op.getOperand(6);
- unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue();
+ const APInt &HintVal = Op.getConstantOperandAPInt(6);
assert((HintVal == 2 || HintVal == 3) &&
"Wrong prefetch hint in intrinsic: should be 2 or 3");
unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0);
@@ -23678,7 +24184,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
SDLoc dl(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -23730,7 +24236,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
unsigned FrameReg =
RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
SDLoc dl(Op); // FIXME probably not meaningful
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
"Invalid Frame Register!");
@@ -23743,12 +24249,11 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
+Register X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const {
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
- const MachineFunction &MF = DAG.getMachineFunction();
- unsigned Reg = StringSwitch<unsigned>(RegName)
+ Register Reg = StringSwitch<unsigned>(RegName)
.Case("esp", X86::ESP)
.Case("rsp", X86::RSP)
.Case("ebp", X86::EBP)
@@ -23762,8 +24267,7 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
#ifndef NDEBUG
else {
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- unsigned FrameReg =
- RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
+ Register FrameReg = RegInfo->getPtrSizedFrameRegister(MF);
assert((FrameReg == X86::EBP || FrameReg == X86::RBP) &&
"Invalid Frame Register!");
}
@@ -23809,7 +24313,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
+ Register FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
"Invalid Frame Register!");
@@ -23967,6 +24471,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
case CallingConv::Fast:
+ case CallingConv::Tail:
// Pass 'nest' parameter in EAX.
// Must be kept in sync with X86CallingConv.td
NestReg = X86::EAX;
@@ -24279,12 +24784,9 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
if (Opc == ISD::CTLZ) {
// If src is zero (i.e. bsr sets ZF), returns NumBits.
- SDValue Ops[] = {
- Op,
- DAG.getConstant(NumBits + NumBits - 1, dl, OpVT),
- DAG.getConstant(X86::COND_E, dl, MVT::i8),
- Op.getValue(1)
- };
+ SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT),
+ DAG.getTargetConstant(X86::COND_E, dl, MVT::i8),
+ Op.getValue(1)};
Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops);
}
@@ -24312,12 +24814,9 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0);
// If src is zero (i.e. bsf sets ZF), returns NumBits.
- SDValue Ops[] = {
- Op,
- DAG.getConstant(NumBits, dl, VT),
- DAG.getConstant(X86::COND_E, dl, MVT::i8),
- Op.getValue(1)
- };
+ SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT),
+ DAG.getTargetConstant(X86::COND_E, dl, MVT::i8),
+ Op.getValue(1)};
return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
}
@@ -24453,7 +24952,7 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
SDValue N0 = Op.getOperand(0);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
DAG.getConstant(0, DL, VT), N0);
- SDValue Ops[] = {N0, Neg, DAG.getConstant(X86::COND_GE, DL, MVT::i8),
+ SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_GE, DL, MVT::i8),
SDValue(Neg.getNode(), 1)};
return DAG.getNode(X86ISD::CMOV, DL, VT, Ops);
}
@@ -25033,7 +25532,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
// Optimize shl/srl/sra with constant shift amount.
APInt APIntShiftAmt;
- if (!isConstantSplat(Amt, APIntShiftAmt))
+ if (!X86::isConstantSplat(Amt, APIntShiftAmt))
return SDValue();
// If the shift amount is out of range, return undef.
@@ -25220,7 +25719,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
}
ConstantSDNode *ND = cast<ConstantSDNode>(Op);
- APInt C(SVTBits, ND->getAPIntValue().getZExtValue());
+ APInt C(SVTBits, ND->getZExtValue());
uint64_t ShAmt = C.getZExtValue();
if (ShAmt >= SVTBits) {
Elts.push_back(DAG.getUNDEF(SVT));
@@ -25502,7 +26001,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
(VT == MVT::v32i8 && Subtarget.hasInt256())) &&
!Subtarget.hasXOP()) {
int NumElts = VT.getVectorNumElements();
- SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8);
+ SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8);
// Extend constant shift amount to vXi16 (it doesn't matter if the type
// isn't legal).
@@ -25774,7 +26273,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
return DAG.getNode(Op, DL, VT, R,
- DAG.getConstant(RotateAmt, DL, MVT::i8));
+ DAG.getTargetConstant(RotateAmt, DL, MVT::i8));
}
// Else, fall-back on VPROLV/VPRORV.
@@ -25795,7 +26294,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (0 <= CstSplatIndex) {
uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
- DAG.getConstant(RotateAmt, DL, MVT::i8));
+ DAG.getTargetConstant(RotateAmt, DL, MVT::i8));
}
// Use general rotate by variable (per-element).
@@ -26032,7 +26531,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
// If this is a canonical idempotent atomicrmw w/no uses, we have a better
// lowering available in lowerAtomicArith.
- // TODO: push more cases through this path.
+ // TODO: push more cases through this path.
if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))
if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&
AI->use_empty())
@@ -26087,10 +26586,22 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
return Loaded;
}
+bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
+ if (!SI.isUnordered())
+ return false;
+ return ExperimentalUnorderedISEL;
+}
+bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
+ if (!LI.isUnordered())
+ return false;
+ return ExperimentalUnorderedISEL;
+}
+
+
/// Emit a locked operation on a stack location which does not change any
/// memory location, but does involve a lock prefix. Location is chosen to be
/// a) very likely accessed only by a single thread to minimize cache traffic,
-/// and b) definitely dereferenceable. Returns the new Chain result.
+/// and b) definitely dereferenceable. Returns the new Chain result.
static SDValue emitLockedStackOp(SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SDValue Chain, SDLoc DL) {
@@ -26099,22 +26610,22 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
// operations issued by the current processor. As such, the location
// referenced is not relevant for the ordering properties of the instruction.
// See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
- // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions
+ // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions
// 2) Using an immediate operand appears to be the best encoding choice
// here since it doesn't require an extra register.
// 3) OR appears to be very slightly faster than ADD. (Though, the difference
// is small enough it might just be measurement noise.)
// 4) When choosing offsets, there are several contributing factors:
// a) If there's no redzone, we default to TOS. (We could allocate a cache
- // line aligned stack object to improve this case.)
+ // line aligned stack object to improve this case.)
// b) To minimize our chances of introducing a false dependence, we prefer
- // to offset the stack usage from TOS slightly.
+ // to offset the stack usage from TOS slightly.
// c) To minimize concerns about cross thread stack usage - in particular,
// the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
// captures state in the TOS frame and accesses it from many threads -
// we want to use an offset such that the offset is in a distinct cache
// line from the TOS frame.
- //
+ //
// For a general discussion of the tradeoffs and benchmark results, see:
// https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
@@ -26155,10 +26666,10 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
- AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
- cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
- SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
- cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+ AtomicOrdering FenceOrdering =
+ static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
+ SyncScope::ID FenceSSID =
+ static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
// The only fence that needs an instruction is a sequentially-consistent
// cross-thread fence.
@@ -26167,7 +26678,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasMFence())
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
- SDValue Chain = Op.getOperand(0);
+ SDValue Chain = Op.getOperand(0);
return emitLockedStackOp(DAG, Subtarget, Chain, dl);
}
@@ -26218,6 +26729,17 @@ static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT InVT = V.getSimpleValueType();
+ if (InVT == MVT::v64i8) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(V, DL);
+ Lo = getPMOVMSKB(DL, Lo, DAG, Subtarget);
+ Hi = getPMOVMSKB(DL, Hi, DAG, Subtarget);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+ DAG.getConstant(32, DL, MVT::i8));
+ return DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+ }
if (InVT == MVT::v32i8 && !Subtarget.hasInt256()) {
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(V, DL);
@@ -26258,8 +26780,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl);
- EVT CastVT = MVT::getVectorVT(DstVT.getVectorElementType(),
- DstVT.getVectorNumElements() / 2);
+ MVT CastVT = DstVT.getHalfNumVectorElementsVT();
Lo = DAG.getBitcast(CastVT, Lo);
Hi = DAG.getBitcast(CastVT, Hi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi);
@@ -26275,53 +26796,37 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getZExtOrTrunc(V, DL, DstVT);
}
- if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
- SrcVT == MVT::i64) {
- assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
- if (DstVT != MVT::f64 && DstVT != MVT::i64 &&
- !(DstVT == MVT::x86mmx && SrcVT.isVector()))
- // This conversion needs to be expanded.
- return SDValue();
+ assert((SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
+ SrcVT == MVT::i64) && "Unexpected VT!");
- SDLoc dl(Op);
- if (SrcVT.isVector()) {
- // Widen the vector in input in the case of MVT::v2i32.
- // Example: from MVT::v2i32 to MVT::v4i32.
- MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(),
- SrcVT.getVectorNumElements() * 2);
- Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src,
- DAG.getUNDEF(SrcVT));
- } else {
- assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
- "Unexpected source type in LowerBITCAST");
- Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
- }
+ assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
+ if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) &&
+ !(DstVT == MVT::x86mmx && SrcVT.isVector()))
+ // This conversion needs to be expanded.
+ return SDValue();
- MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64;
- Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src);
+ SDLoc dl(Op);
+ if (SrcVT.isVector()) {
+ // Widen the vector in input in the case of MVT::v2i32.
+ // Example: from MVT::v2i32 to MVT::v4i32.
+ MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(),
+ SrcVT.getVectorNumElements() * 2);
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src,
+ DAG.getUNDEF(SrcVT));
+ } else {
+ assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
+ "Unexpected source type in LowerBITCAST");
+ Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
+ }
- if (DstVT == MVT::x86mmx)
- return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src);
+ MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64;
+ Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src,
- DAG.getIntPtrConstant(0, dl));
- }
+ if (DstVT == MVT::x86mmx)
+ return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src);
- assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() &&
- Subtarget.hasMMX() && "Unexpected custom BITCAST");
- assert((DstVT == MVT::i64 ||
- (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
- "Unexpected custom BITCAST");
- // i64 <=> MMX conversions are Legal.
- if (SrcVT==MVT::i64 && DstVT.isVector())
- return Op;
- if (DstVT==MVT::i64 && SrcVT.isVector())
- return Op;
- // MMX <=> MMX conversions are Legal.
- if (SrcVT.isVector() && DstVT.isVector())
- return Op;
- // All other conversions need to be expanded.
- return SDValue();
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src,
+ DAG.getIntPtrConstant(0, dl));
}
/// Compute the horizontal sum of bytes in V for the elements of VT.
@@ -26549,6 +27054,13 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
SDValue In = Op.getOperand(0);
SDLoc DL(Op);
+ // Split v8i64/v16i32 without BWI so that we can still use the PSHUFB
+ // lowering.
+ if (VT == MVT::v8i64 || VT == MVT::v16i32) {
+ assert(!Subtarget.hasBWI() && "BWI should Expand BITREVERSE");
+ return Lower512IntUnary(Op, DAG);
+ }
+
unsigned NumElts = VT.getVectorNumElements();
assert(VT.getScalarType() == MVT::i8 &&
"Only byte vector BITREVERSE supported");
@@ -26656,12 +27168,12 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
// seq_cst which isn't SingleThread, everything just needs to be preserved
// during codegen and then dropped. Note that we expect (but don't assume),
// that orderings other than seq_cst and acq_rel have been canonicalized to
- // a store or load.
+ // a store or load.
if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent &&
AN->getSyncScopeID() == SyncScope::System) {
// Prefer a locked operation against a stack location to minimize cache
// traffic. This assumes that stack locations are very likely to be
- // accessed only by the owning thread.
+ // accessed only by the owning thread.
SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
assert(!N->hasAnyUseOfValue(0));
// NOTE: The getUNDEF is needed to give something for the unused result 0.
@@ -26886,12 +27398,13 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
SDValue Chain = N->getChain();
SDValue BasePtr = N->getBasePtr();
- if (VT == MVT::v2f32) {
+ if (VT == MVT::v2f32 || VT == MVT::v2i32) {
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
// If the index is v2i64 and we have VLX we can use xmm for data and index.
if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) {
- Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
- DAG.getUNDEF(MVT::v2f32));
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT));
SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other);
SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
@@ -26901,30 +27414,6 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
return SDValue();
}
- if (VT == MVT::v2i32) {
- assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
- Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
- DAG.getUNDEF(MVT::v2i32));
- // If the index is v2i64 and we have VLX we can use xmm for data and index.
- if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) {
- SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other);
- SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
- SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
- VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
- return SDValue(NewScatter.getNode(), 1);
- }
- // Custom widen all the operands to avoid promotion.
- EVT NewIndexVT = EVT::getVectorVT(
- *DAG.getContext(), Index.getValueType().getVectorElementType(), 4);
- Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
- DAG.getUNDEF(Index.getValueType()));
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
- DAG.getConstant(0, dl, MVT::v2i1));
- SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(), dl,
- Ops, N->getMemOperand());
- }
-
MVT IndexVT = Index.getSimpleValueType();
MVT MaskVT = Mask.getSimpleValueType();
@@ -27160,6 +27649,13 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op,
return NOOP;
}
+SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const {
+ SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
+}
+
/// Provide custom lowering hooks for some operations.
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -27206,10 +27702,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::STRICT_FP_ROUND: return LowerSTRICT_FP_ROUND(Op, DAG);
case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG);
case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
case ISD::FADD:
- case ISD::FSUB: return lowerFaddFsub(Op, DAG, Subtarget);
+ case ISD::FSUB: return lowerFaddFsub(Op, DAG);
+ case ISD::FMUL: return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
+ case ISD::FDIV: return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FABS:
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
@@ -27347,37 +27847,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::MUL: {
EVT VT = N->getValueType(0);
- assert(VT.isVector() && "Unexpected VT");
- if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger &&
- VT.getVectorNumElements() == 2) {
- // Promote to a pattern that will be turned into PMULUDQ.
- SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
- N->getOperand(0));
- SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
- N->getOperand(1));
- SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1);
- Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul));
- } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
- VT.getVectorElementType() == MVT::i8) {
- // Pre-promote these to vXi16 to avoid op legalization thinking all 16
- // elements are needed.
- MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
- SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0));
- SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1));
- SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1);
- Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
- unsigned NumConcats = 16 / VT.getVectorNumElements();
- SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
- ConcatOps[0] = Res;
- Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps);
- Results.push_back(Res);
- }
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ VT.getVectorElementType() == MVT::i8 && "Unexpected VT!");
+ // Pre-promote these to vXi16 to avoid op legalization thinking all 16
+ // elements are needed.
+ MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
+ SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ unsigned NumConcats = 16 / VT.getVectorNumElements();
+ SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
+ ConcatOps[0] = Res;
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps);
+ Results.push_back(Res);
return;
}
- case ISD::UADDSAT:
- case ISD::SADDSAT:
- case ISD::USUBSAT:
- case ISD::SSUBSAT:
case X86ISD::VPMADDWD:
case X86ISD::AVG: {
// Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and
@@ -27388,6 +27873,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT InVT = N->getOperand(0).getValueType();
assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 &&
"Expected a VT that divides into 128 bits.");
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
unsigned NumConcat = 128 / InVT.getSizeInBits();
EVT InWideVT = EVT::getVectorVT(*DAG.getContext(),
@@ -27404,9 +27891,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1);
- if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
- DAG.getIntPtrConstant(0, dl));
Results.push_back(Res);
return;
}
@@ -27435,26 +27919,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Hi);
return;
}
- case ISD::SETCC: {
- // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when
- // setCC result type is v2i1 because type legalzation will end up with
- // a v4i1 setcc plus an extend.
- assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type");
- if (N->getOperand(0).getValueType() != MVT::v2f32 ||
- getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector)
- return;
- SDValue UNDEF = DAG.getUNDEF(MVT::v2f32);
- SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
- N->getOperand(0), UNDEF);
- SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
- N->getOperand(1), UNDEF);
- SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS,
- N->getOperand(2));
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- return;
- }
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
case X86ISD::FMINC:
case X86ISD::FMIN:
@@ -27475,7 +27939,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SREM:
case ISD::UREM: {
EVT VT = N->getValueType(0);
- if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector) {
+ if (VT.isVector()) {
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
// If this RHS is a constant splat vector we can widen this and let
// division/remainder by constant optimize it.
// TODO: Can we do something for non-splat?
@@ -27493,17 +27959,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
- if (VT == MVT::v2i32) {
- // Legalize v2i32 div/rem by unrolling. Otherwise we promote to the
- // v2i64 and unroll later. But then we create i64 scalar ops which
- // might be slow in 64-bit mode or require a libcall in 32-bit mode.
- Results.push_back(DAG.UnrollVectorOp(N));
- return;
- }
-
- if (VT.isVector())
- return;
-
LLVM_FALLTHROUGH;
}
case ISD::SDIVREM:
@@ -27561,58 +28016,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
}
- return;
- }
- case ISD::SIGN_EXTEND_VECTOR_INREG: {
- if (ExperimentalVectorWideningLegalization)
- return;
-
- EVT VT = N->getValueType(0);
- SDValue In = N->getOperand(0);
- EVT InVT = In.getValueType();
- if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
- (InVT == MVT::v16i16 || InVT == MVT::v32i8)) {
- // Custom split this so we can extend i8/i16->i32 invec. This is better
- // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
- // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
- // we allow the sra from the extend to i32 to be shared by the split.
- EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(),
- InVT.getVectorNumElements() / 2);
- MVT ExtendVT = MVT::getVectorVT(MVT::i32,
- VT.getVectorNumElements());
- In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT,
- In, DAG.getIntPtrConstant(0, dl));
- In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In);
-
- // Fill a vector with sign bits for each element.
- SDValue Zero = DAG.getConstant(0, dl, ExtendVT);
- SDValue SignBits = DAG.getSetCC(dl, ExtendVT, Zero, In, ISD::SETGT);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- // Create an unpackl and unpackh to interleave the sign bits then bitcast
- // to vXi64.
- SDValue Lo = getUnpackl(DAG, dl, ExtendVT, In, SignBits);
- Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
- SDValue Hi = getUnpackh(DAG, dl, ExtendVT, In, SignBits);
- Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 &&
+ getTypeAction(*DAG.getContext(), InVT) == TypeSplitVector &&
+ isTypeLegal(MVT::v4i64)) {
+ // Input needs to be split and output needs to widened. Let's use two
+ // VTRUNCs, and shuffle their results together into the wider type.
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(In, dl);
- SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ Lo = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Lo);
+ Hi = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Hi);
+ SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi,
+ { 0, 1, 2, 3, 16, 17, 18, 19,
+ -1, -1, -1, -1, -1, -1, -1, -1 });
Results.push_back(Res);
return;
}
+
return;
}
+ case ISD::ANY_EXTEND:
+ // Right now, only MVT::v8i8 has Custom action for an illegal type.
+ // It's intended to custom handle the input type.
+ assert(N->getValueType(0) == MVT::v8i8 &&
+ "Do not know how to legalize this Node");
+ return;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND: {
EVT VT = N->getValueType(0);
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
- (InVT == MVT::v4i16 || InVT == MVT::v4i8) &&
- getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector) {
+ (InVT == MVT::v4i16 || InVT == MVT::v4i8)){
+ assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector &&
+ "Unexpected type action!");
assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode");
// Custom split this so we can extend i8/i16->i32 invec. This is better
// since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
@@ -27683,27 +28120,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
- // Promote these manually to avoid over promotion to v2i64. Type
- // legalization will revisit the v2i32 operation for more cleanup.
- if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
- getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) {
- // AVX512DQ provides instructions that produce a v2i64 result.
- if (Subtarget.hasDQI())
- return;
-
- SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src);
- Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
- : ISD::AssertSext,
- dl, MVT::v2i32, Res,
- DAG.getValueType(VT.getVectorElementType()));
- Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
- Results.push_back(Res);
- return;
- }
-
if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
- if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
- return;
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
// Try to create a 128 bit vector, but don't exceed a 32 bit element.
unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);
@@ -27738,35 +28157,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
assert((IsSigned || Subtarget.hasAVX512()) &&
"Can only handle signed conversion without AVX512");
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
- bool Widenv2i32 =
- getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector;
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
if (Src.getValueType() == MVT::v2f64) {
- unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
if (!IsSigned && !Subtarget.hasVLX()) {
- // If v2i32 is widened, we can defer to the generic legalizer.
- if (Widenv2i32)
- return;
- // Custom widen by doubling to a legal vector with. Isel will
- // further widen to v8f64.
- Opc = ISD::FP_TO_UINT;
- Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64,
- Src, DAG.getUNDEF(MVT::v2f64));
+ // If we have VLX we can emit a target specific FP_TO_UINT node,
+ // otherwise we can defer to the generic legalizer which will widen
+ // the input as well. This will be further widened during op
+ // legalization to v8i32<-v8f64.
+ return;
}
+ unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
- if (!Widenv2i32)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- return;
- }
- if (SrcVT == MVT::v2f32 &&
- getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
- SDValue Idx = DAG.getIntPtrConstant(0, dl);
- SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
- DAG.getUNDEF(MVT::v2f32));
- Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
- : ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
Results.push_back(Res);
return;
}
@@ -27776,6 +28178,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
+ assert(!VT.isVector() && "Vectors should have been handled above!");
+
if (Subtarget.hasDQI() && VT == MVT::i64 &&
(SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
assert(!Subtarget.is64Bit() && "i64 should be legal");
@@ -27847,7 +28251,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(1);
switch (IntNo) {
default : llvm_unreachable("Do not know how to custom type "
"legalize this intrinsic operation!");
@@ -27905,7 +28309,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
SDValue Result;
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- unsigned BasePtr = TRI->getBaseRegister();
+ Register BasePtr = TRI->getBaseRegister();
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
(BasePtr == X86::RBX || BasePtr == X86::EBX)) {
@@ -28060,34 +28464,33 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
- if (SrcVT != MVT::f64 ||
- (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8) ||
- getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector)
+ if (DstVT.isVector() && SrcVT == MVT::x86mmx) {
+ assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector &&
+ "Unexpected type action!");
+ EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT);
+ SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, WideVT, N->getOperand(0));
+ Results.push_back(Res);
return;
+ }
- unsigned NumElts = DstVT.getVectorNumElements();
- EVT SVT = DstVT.getVectorElementType();
- EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
- SDValue Res;
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0));
- Res = DAG.getBitcast(WiderVT, Res);
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
return;
}
case ISD::MGATHER: {
EVT VT = N->getValueType(0);
- if (VT == MVT::v2f32 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
+ if ((VT == MVT::v2f32 || VT == MVT::v2i32) &&
+ (Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
auto *Gather = cast<MaskedGatherSDNode>(N);
SDValue Index = Gather->getIndex();
if (Index.getValueType() != MVT::v2i64)
return;
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
+ EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Mask = Gather->getMask();
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
- SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
+ SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT,
Gather->getPassThru(),
- DAG.getUNDEF(MVT::v2f32));
+ DAG.getUNDEF(VT));
if (!Subtarget.hasVLX()) {
// We need to widen the mask, but the instruction will only use 2
// of its elements. So we can use undef.
@@ -28098,66 +28501,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
Gather->getBasePtr(), Index, Gather->getScale() };
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
- DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl,
+ DAG.getVTList(WideVT, Mask.getValueType(), MVT::Other), Ops, dl,
Gather->getMemoryVT(), Gather->getMemOperand());
Results.push_back(Res);
Results.push_back(Res.getValue(2));
return;
}
- if (VT == MVT::v2i32) {
- auto *Gather = cast<MaskedGatherSDNode>(N);
- SDValue Index = Gather->getIndex();
- SDValue Mask = Gather->getMask();
- assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
- SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
- Gather->getPassThru(),
- DAG.getUNDEF(MVT::v2i32));
- // If the index is v2i64 we can use it directly.
- if (Index.getValueType() == MVT::v2i64 &&
- (Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
- if (!Subtarget.hasVLX()) {
- // We need to widen the mask, but the instruction will only use 2
- // of its elements. So we can use undef.
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
- DAG.getUNDEF(MVT::v2i1));
- Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
- }
- SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
- Gather->getBasePtr(), Index, Gather->getScale() };
- SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
- DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl,
- Gather->getMemoryVT(), Gather->getMemOperand());
- SDValue Chain = Res.getValue(2);
- if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- Results.push_back(Chain);
- return;
- }
- if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
- EVT IndexVT = Index.getValueType();
- EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(),
- IndexVT.getScalarType(), 4);
- // Otherwise we need to custom widen everything to avoid promotion.
- Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
- DAG.getUNDEF(IndexVT));
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
- DAG.getConstant(0, dl, MVT::v2i1));
- SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
- Gather->getBasePtr(), Index, Gather->getScale() };
- SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other),
- Gather->getMemoryVT(), dl, Ops,
- Gather->getMemOperand());
- SDValue Chain = Res.getValue(1);
- if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- Results.push_back(Chain);
- return;
- }
- }
return;
}
case ISD::LOAD: {
@@ -28166,8 +28515,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// cast since type legalization will try to use an i64 load.
MVT VT = N->getSimpleValueType(0);
assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT");
- if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
- return;
+ assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ "Unexpected type action!");
if (!ISD::isNON_EXTLoad(N))
return;
auto *Ld = cast<LoadSDNode>(N);
@@ -28177,11 +28526,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Ld->getPointerInfo(), Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
SDValue Chain = Res.getValue(1);
- MVT WideVT = MVT::getVectorVT(LdVT, 2);
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
- MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() * 2);
- Res = DAG.getBitcast(CastVT, Res);
+ MVT VecVT = MVT::getVectorVT(LdVT, 2);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Res);
+ EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
+ Res = DAG.getBitcast(WideVT, Res);
Results.push_back(Res);
Results.push_back(Chain);
return;
@@ -28236,6 +28584,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
case X86ISD::Wrapper: return "X86ISD::Wrapper";
case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP";
+ case X86ISD::MOVQ2DQ: return "X86ISD::MOVQ2DQ";
case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q";
case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W";
case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D";
@@ -28373,6 +28722,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
+ case X86ISD::VBROADCAST_LOAD: return "X86ISD::VBROADCAST_LOAD";
case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST";
case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV";
@@ -28737,6 +29087,9 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
}
bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
+ if (isa<MaskedLoadSDNode>(ExtVal.getOperand(0)))
+ return false;
+
EVT SrcVT = ExtVal.getOperand(0).getValueType();
// There is no extending load for vXi1.
@@ -28856,10 +29209,10 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- unsigned mainDstReg = MRI.createVirtualRegister(RC);
- unsigned fallDstReg = MRI.createVirtualRegister(RC);
+ Register mainDstReg = MRI.createVirtualRegister(RC);
+ Register fallDstReg = MRI.createVirtualRegister(RC);
// thisMBB:
// xbegin fallMBB
@@ -28913,7 +29266,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
static_assert(X86::AddrNumOperands == 5,
"VAARG_64 assumes 5 address operands");
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
MachineOperand &Base = MI.getOperand(1);
MachineOperand &Scale = MI.getOperand(2);
MachineOperand &Index = MI.getOperand(3);
@@ -29049,7 +29402,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
assert(OffsetReg != 0);
// Read the reg_save_area address.
- unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+ Register RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
.add(Base)
.add(Scale)
@@ -29059,8 +29412,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.setMemRefs(LoadOnlyMMO);
// Zero-extend the offset
- unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
- BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
.addImm(0)
.addReg(OffsetReg)
.addImm(X86::sub_32bit);
@@ -29071,7 +29424,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.addReg(RegSaveReg);
// Compute the offset for the next argument
- unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
.addReg(OffsetReg)
.addImm(UseFPOffset ? 16 : 8);
@@ -29096,7 +29449,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
//
// Load the overflow_area address into a register.
- unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ Register OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
.add(Base)
.add(Scale)
@@ -29110,7 +29463,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
if (NeedsAlign) {
// Align the overflow address
assert(isPowerOf2_32(Align) && "Alignment must be a power of 2");
- unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+ Register TmpReg = MRI.createVirtualRegister(AddrRegClass);
// aligned_addr = (addr + (align-1)) & ~(align-1)
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
@@ -29127,7 +29480,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Compute the next overflow address after this argument.
// (the overflow address should be kept 8-byte aligned)
- unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ Register NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
.addReg(OverflowDestReg)
.addImm(ArgSizeA8);
@@ -29191,7 +29544,7 @@ MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned CountReg = MI.getOperand(0).getReg();
+ Register CountReg = MI.getOperand(0).getReg();
int64_t RegSaveFrameIndex = MI.getOperand(1).getImm();
int64_t VarArgsFPOffset = MI.getOperand(2).getImm();
@@ -29273,7 +29626,9 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
static bool isCMOVPseudo(MachineInstr &MI) {
switch (MI.getOpcode()) {
case X86::CMOV_FR32:
+ case X86::CMOV_FR32X:
case X86::CMOV_FR64:
+ case X86::CMOV_FR64X:
case X86::CMOV_GR8:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
@@ -29326,9 +29681,9 @@ static MachineInstrBuilder createPHIsForCMOVsInSinkBB(
MachineInstrBuilder MIB;
for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
- unsigned DestReg = MIIt->getOperand(0).getReg();
- unsigned Op1Reg = MIIt->getOperand(1).getReg();
- unsigned Op2Reg = MIIt->getOperand(2).getReg();
+ Register DestReg = MIIt->getOperand(0).getReg();
+ Register Op1Reg = MIIt->getOperand(1).getReg();
+ Register Op2Reg = MIIt->getOperand(2).getReg();
// If this CMOV we are generating is the opposite condition from
// the jump we generated, then we have to swap the operands for the
@@ -29486,9 +29841,9 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
// SinkMBB:
// %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ]
- unsigned DestReg = FirstCMOV.getOperand(0).getReg();
- unsigned Op1Reg = FirstCMOV.getOperand(1).getReg();
- unsigned Op2Reg = FirstCMOV.getOperand(2).getReg();
+ Register DestReg = FirstCMOV.getOperand(0).getReg();
+ Register Op1Reg = FirstCMOV.getOperand(1).getReg();
+ Register Op2Reg = FirstCMOV.getOperand(2).getReg();
MachineInstrBuilder MIB =
BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg)
.addReg(Op1Reg)
@@ -30006,7 +30361,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
// call the retpoline thunk.
DebugLoc DL = MI.getDebugLoc();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
- unsigned CalleeVReg = MI.getOperand(0).getReg();
+ Register CalleeVReg = MI.getOperand(0).getReg();
unsigned Opc = getOpcodeForRetpoline(MI.getOpcode());
// Find an available scratch register to hold the callee. On 64-bit, we can
@@ -30079,7 +30434,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
// Initialize a register with zero.
MVT PVT = getPointerTy(MF->getDataLayout());
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
- unsigned ZReg = MRI.createVirtualRegister(PtrRC);
+ Register ZReg = MRI.createVirtualRegister(PtrRC);
unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
BuildMI(*MBB, MI, DL, TII->get(XorRROpc))
.addDef(ZReg)
@@ -30087,7 +30442,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
.addReg(ZReg, RegState::Undef);
// Read the current SSP Register value to the zeroed register.
- unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC);
+ Register SSPCopyReg = MRI.createVirtualRegister(PtrRC);
unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
@@ -30131,8 +30486,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
(void)TRI;
- unsigned mainDstReg = MRI.createVirtualRegister(RC);
- unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+ Register mainDstReg = MRI.createVirtualRegister(RC);
+ Register restoreDstReg = MRI.createVirtualRegister(RC);
MemOpndSlot = CurOp;
@@ -30246,8 +30601,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64();
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
X86FI->setRestoreBasePointer(MF);
- unsigned FramePtr = RegInfo->getFrameRegister(*MF);
- unsigned BasePtr = RegInfo->getBaseRegister();
+ Register FramePtr = RegInfo->getFrameRegister(*MF);
+ Register BasePtr = RegInfo->getBaseRegister();
unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr),
FramePtr, true, X86FI->getRestoreBasePointerOffset())
@@ -30329,7 +30684,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MBB->addSuccessor(checkSspMBB);
// Initialize a register with zero.
- unsigned ZReg = MRI.createVirtualRegister(PtrRC);
+ Register ZReg = MRI.createVirtualRegister(PtrRC);
unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
BuildMI(checkSspMBB, DL, TII->get(XorRROpc))
.addDef(ZReg)
@@ -30337,7 +30692,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
.addReg(ZReg, RegState::Undef);
// Read the current SSP Register value to the zeroed register.
- unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC);
+ Register SSPCopyReg = MRI.createVirtualRegister(PtrRC);
unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
@@ -30352,7 +30707,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
checkSspMBB->addSuccessor(fallMBB);
// Reload the previously saved SSP register value.
- unsigned PrevSSPReg = MRI.createVirtualRegister(PtrRC);
+ Register PrevSSPReg = MRI.createVirtualRegister(PtrRC);
unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
const int64_t SPPOffset = 3 * PVT.getStoreSize();
MachineInstrBuilder MIB =
@@ -30370,7 +30725,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MIB.setMemRefs(MMOs);
// Subtract the current SSP from the previous SSP.
- unsigned SspSubReg = MRI.createVirtualRegister(PtrRC);
+ Register SspSubReg = MRI.createVirtualRegister(PtrRC);
unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr;
BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg)
.addReg(PrevSSPReg)
@@ -30384,7 +30739,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// Shift right by 2/3 for 32/64 because incssp multiplies the argument by 4/8.
unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri;
unsigned Offset = (PVT == MVT::i64) ? 3 : 2;
- unsigned SspFirstShrReg = MRI.createVirtualRegister(PtrRC);
+ Register SspFirstShrReg = MRI.createVirtualRegister(PtrRC);
BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg)
.addReg(SspSubReg)
.addImm(Offset);
@@ -30394,7 +30749,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg);
// Reset the lower 8 bits.
- unsigned SspSecondShrReg = MRI.createVirtualRegister(PtrRC);
+ Register SspSecondShrReg = MRI.createVirtualRegister(PtrRC);
BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg)
.addReg(SspFirstShrReg)
.addImm(8);
@@ -30406,12 +30761,12 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// Do a single shift left.
unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1;
- unsigned SspAfterShlReg = MRI.createVirtualRegister(PtrRC);
+ Register SspAfterShlReg = MRI.createVirtualRegister(PtrRC);
BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg)
.addReg(SspSecondShrReg);
// Save the value 128 to a register (will be used next with incssp).
- unsigned Value128InReg = MRI.createVirtualRegister(PtrRC);
+ Register Value128InReg = MRI.createVirtualRegister(PtrRC);
unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri;
BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg)
.addImm(128);
@@ -30419,8 +30774,8 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// Since incssp only looks at the lower 8 bits, we might need to do several
// iterations of incssp until we finish fixing the shadow stack.
- unsigned DecReg = MRI.createVirtualRegister(PtrRC);
- unsigned CounterReg = MRI.createVirtualRegister(PtrRC);
+ Register DecReg = MRI.createVirtualRegister(PtrRC);
+ Register CounterReg = MRI.createVirtualRegister(PtrRC);
BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg)
.addReg(SspAfterShlReg)
.addMBB(fixShadowLoopPrepareMBB)
@@ -30460,11 +30815,11 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
const TargetRegisterClass *RC =
(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
- unsigned Tmp = MRI.createVirtualRegister(RC);
+ Register Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
- unsigned SP = RegInfo->getStackRegister();
+ Register SP = RegInfo->getStackRegister();
MachineInstrBuilder MIB;
@@ -30662,8 +31017,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
X86MachineFunctionInfo *MFI = MF->getInfo<X86MachineFunctionInfo>();
MFI->setRestoreBasePointer(MF);
- unsigned FP = RI.getFrameRegister(*MF);
- unsigned BP = RI.getBaseRegister();
+ Register FP = RI.getFrameRegister(*MF);
+ Register BP = RI.getBaseRegister();
unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm;
addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true,
MFI->getRestoreBasePointerOffset())
@@ -30674,7 +31029,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
}
// IReg is used as an index in a memory operand and therefore can't be SP
- unsigned IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass);
+ Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass);
addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI,
Subtarget.is64Bit() ? 8 : 4);
BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri))
@@ -30683,8 +31038,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE);
if (Subtarget.is64Bit()) {
- unsigned BReg = MRI->createVirtualRegister(&X86::GR64RegClass);
- unsigned IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
+ Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
// leaq .LJTI0_0(%rip), BReg
BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg)
@@ -30710,9 +31065,9 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(0);
break;
case MachineJumpTableInfo::EK_LabelDifference32: {
- unsigned OReg = MRI->createVirtualRegister(&X86::GR32RegClass);
- unsigned OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass);
- unsigned TReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ Register OReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+ Register OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass);
+ Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass);
// movl (BReg,IReg64,4), OReg
BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg)
@@ -30783,8 +31138,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
DefRegs[MOp.getReg()] = true;
MachineInstrBuilder MIB(*MF, &II);
- for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
- unsigned Reg = SavedRegs[RI];
+ for (unsigned RegIdx = 0; SavedRegs[RegIdx]; ++RegIdx) {
+ unsigned Reg = SavedRegs[RegIdx];
if (!DefRegs[Reg])
MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
}
@@ -30906,20 +31261,18 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
TII->get(X86::FNSTCW16m)), OrigCWFrameIdx);
// Load the old value of the control word...
- unsigned OldCW =
- MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
+ Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
OrigCWFrameIdx);
// OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero.
- unsigned NewCW =
- MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
+ Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
.addReg(OldCW, RegState::Kill).addImm(0xC00);
// Extract to 16 bits.
- unsigned NewCW16 =
- MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
+ Register NewCW16 =
+ MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
.addReg(NewCW, RegState::Kill, X86::sub_16bit);
@@ -31023,7 +31376,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
MVT SPTy = getPointerTy(MF->getDataLayout());
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
- unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass);
+ Register computedAddrVReg = MRI.createVirtualRegister(AddrRegClass);
X86AddressMode AM = getAddressFromInstr(&MI, 0);
// Regalloc does not need any help when the memory operand of CMPXCHG8B
@@ -31034,10 +31387,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its
// four operand definitions that are E[ABCD] registers. We skip them and
// then insert the LEA.
- MachineBasicBlock::iterator MBBI(MI);
- while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) ||
- MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX))
- --MBBI;
+ MachineBasicBlock::reverse_iterator RMBBI(MI.getReverseIterator());
+ while (RMBBI != BB->rend() && (RMBBI->definesRegister(X86::EAX) ||
+ RMBBI->definesRegister(X86::EBX) ||
+ RMBBI->definesRegister(X86::ECX) ||
+ RMBBI->definesRegister(X86::EDX))) {
+ ++RMBBI;
+ }
+ MachineBasicBlock::iterator MBBI(RMBBI);
addFullAddress(
BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);
@@ -31232,12 +31589,21 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.One |= Known2.One;
break;
}
+ case X86ISD::PSADBW: {
+ assert(VT.getScalarType() == MVT::i64 &&
+ Op.getOperand(0).getValueType().getScalarType() == MVT::i8 &&
+ "Unexpected PSADBW types");
+
+ // PSADBW - fills low 16 bits and zeros upper 48 bits of each i64 result.
+ Known.Zero.setBitsFrom(16);
+ break;
+ }
case X86ISD::CMOV: {
- Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
+ Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
- KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
+ KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
@@ -31650,8 +32016,8 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
- ArrayRef<int> LoMask(Mask.data() + 0, 4);
- ArrayRef<int> HiMask(Mask.data() + 4, 4);
+ ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4);
+ ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4);
// PSHUFLW: permute lower 4 elements only.
if (isUndefOrInRange(LoMask, 0, 4) &&
@@ -31789,8 +32155,8 @@ static bool matchBinaryPermuteShuffle(
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end());
- if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero,
- BlendMask)) {
+ if (matchVectorShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero,
+ ForceV2Zero, BlendMask)) {
if (MaskVT == MVT::v16i16) {
// We can only use v16i16 PBLENDW if the lanes are repeated.
SmallVector<int, 8> RepeatedMask;
@@ -31819,15 +32185,15 @@ static bool matchBinaryPermuteShuffle(
}
}
- // Attempt to combine to INSERTPS.
+ // Attempt to combine to INSERTPS, but only if it has elements that need to
+ // be set to zero.
if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
- MaskVT.is128BitVector()) {
- if (Zeroable.getBoolValue() &&
- matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
- Shuffle = X86ISD::INSERTPS;
- ShuffleVT = MVT::v4f32;
- return true;
- }
+ MaskVT.is128BitVector() &&
+ llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }) &&
+ matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
+ Shuffle = X86ISD::INSERTPS;
+ ShuffleVT = MVT::v4f32;
+ return true;
}
// Attempt to combine to SHUFPD.
@@ -31835,7 +32201,11 @@ static bool matchBinaryPermuteShuffle(
((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX()) ||
(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
- if (matchShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
+ bool ForceV1Zero = false, ForceV2Zero = false;
+ if (matchShuffleWithSHUFPD(MaskVT, V1, V2, ForceV1Zero, ForceV2Zero,
+ PermuteImm, Mask, Zeroable)) {
+ V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
+ V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
Shuffle = X86ISD::SHUFP;
ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64);
return true;
@@ -31889,6 +32259,15 @@ static bool matchBinaryPermuteShuffle(
}
}
+ // Attempt to combine to INSERTPS more generally if X86ISD::SHUFP failed.
+ if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
+ MaskVT.is128BitVector() &&
+ matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
+ Shuffle = X86ISD::INSERTPS;
+ ShuffleVT = MVT::v4f32;
+ return true;
+ }
+
return false;
}
@@ -31942,7 +32321,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
unsigned NumRootElts = RootVT.getVectorNumElements();
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
- (RootVT.isFloatingPoint() && Depth >= 2) ||
+ (RootVT.isFloatingPoint() && Depth >= 1) ||
(RootVT.is256BitVector() && !Subtarget.hasAVX2());
// Don't combine if we are a AVX512/EVEX target and the mask element size
@@ -31981,7 +32360,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
!(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) &&
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
+ if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
return SDValue(); // Nothing to do!
MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
unsigned PermMask = 0;
@@ -31991,7 +32370,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
Res = DAG.getBitcast(ShuffleVT, V1);
Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
DAG.getUNDEF(ShuffleVT),
- DAG.getConstant(PermMask, DL, MVT::i8));
+ DAG.getTargetConstant(PermMask, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
@@ -32026,8 +32405,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Which shuffle domains are permitted?
// Permit domain crossing at higher combine depths.
// TODO: Should we indicate which domain is preferred if both are allowed?
- bool AllowFloatDomain = FloatDomain || (Depth > 3);
- bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() &&
+ bool AllowFloatDomain = FloatDomain || (Depth >= 3);
+ bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() &&
(!MaskVT.is256BitVector() || Subtarget.hasAVX2());
// Determine zeroable mask elements.
@@ -32062,14 +32441,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (V1.getValueType() == MaskVT &&
V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
MayFoldLoad(V1.getOperand(0))) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+ if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do!
Res = V1.getOperand(0);
Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
return DAG.getBitcast(RootVT, Res);
}
if (Subtarget.hasAVX2()) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+ if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(MaskVT, V1);
Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
@@ -32083,7 +32462,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
- if (Depth == 1 && Root.getOpcode() == Shuffle)
+ if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
@@ -32094,11 +32473,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
AllowIntDomain, Subtarget, Shuffle, ShuffleVT,
PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
- if (Depth == 1 && Root.getOpcode() == Shuffle)
+ if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, V1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
- DAG.getConstant(PermuteImm, DL, MVT::i8));
+ DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
}
@@ -32109,7 +32488,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT, UnaryShuffle) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
- if (Depth == 1 && Root.getOpcode() == Shuffle)
+ if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
@@ -32123,12 +32502,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
- if (Depth == 1 && Root.getOpcode() == Shuffle)
+ if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
- DAG.getConstant(PermuteImm, DL, MVT::i8));
+ DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
@@ -32141,34 +32520,34 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
uint64_t BitLen, BitIdx;
if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
Zeroable)) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
+ if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI)
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
- DAG.getConstant(BitLen, DL, MVT::i8),
- DAG.getConstant(BitIdx, DL, MVT::i8));
+ DAG.getTargetConstant(BitLen, DL, MVT::i8),
+ DAG.getTargetConstant(BitIdx, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
+ if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI)
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
V2 = DAG.getBitcast(IntMaskVT, V2);
Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
- DAG.getConstant(BitLen, DL, MVT::i8),
- DAG.getConstant(BitIdx, DL, MVT::i8));
+ DAG.getTargetConstant(BitLen, DL, MVT::i8),
+ DAG.getTargetConstant(BitIdx, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
}
// Don't try to re-form single instruction chains under any circumstances now
// that we've done encoding canonicalization for them.
- if (Depth < 2)
+ if (Depth < 1)
return SDValue();
// Depth threshold above which we can efficiently use variable mask shuffles.
- int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3;
+ int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2;
AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
bool MaskContainsZeros =
@@ -32321,7 +32700,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
V2 = DAG.getBitcast(MaskVT, V2);
SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true);
Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
- DAG.getConstant(M2ZImm, DL, MVT::i8));
+ DAG.getTargetConstant(M2ZImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
@@ -32650,7 +33029,7 @@ static SDValue combineX86ShufflesRecursively(
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
const unsigned MaxRecursionDepth = 8;
- if (Depth > MaxRecursionDepth)
+ if (Depth >= MaxRecursionDepth)
return SDValue();
// Directly rip through bitcasts to find the underlying operand.
@@ -32667,11 +33046,18 @@ static SDValue combineX86ShufflesRecursively(
"Can only combine shuffles of the same vector register size.");
// Extract target shuffle mask and resolve sentinels and inputs.
+ // TODO - determine Op's demanded elts from RootMask.
SmallVector<int, 64> OpMask;
SmallVector<SDValue, 2> OpInputs;
- if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
+ APInt OpUndef, OpZero;
+ APInt OpDemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode());
+ if (!getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef,
+ OpZero, DAG, Depth, false))
return SDValue();
+ resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero);
+
// Add the inputs to the Ops list, avoiding duplicates.
SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
@@ -32772,6 +33158,9 @@ static SDValue combineX86ShufflesRecursively(
Mask[i] = OpMaskedIdx;
}
+ // Remove unused/repeated shuffle source ops.
+ resolveTargetShuffleInputsAndMask(Ops, Mask);
+
// Handle the all undef/zero cases early.
if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; }))
return DAG.getUNDEF(Root.getValueType());
@@ -32783,11 +33172,8 @@ static SDValue combineX86ShufflesRecursively(
return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG,
SDLoc(Root));
- // Remove unused/repeated shuffle source ops.
- resolveTargetShuffleInputsAndMask(Ops, Mask);
assert(!Ops.empty() && "Shuffle with no inputs detected");
-
- HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode());
+ HasVariableMask |= IsOpVariableMask;
// Update the list of shuffle nodes that have been combined so far.
SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes.begin(),
@@ -32853,7 +33239,7 @@ static SDValue combineX86ShufflesRecursively(
/// Helper entry wrapper to combineX86ShufflesRecursively.
static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 0,
/*HasVarMask*/ false,
/*AllowVarMask*/ true, DAG, Subtarget);
}
@@ -33088,7 +33474,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
for (unsigned i = 0; i != Scale; ++i)
DemandedMask[i] = i;
if (SDValue Res = combineX86ShufflesRecursively(
- {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1,
+ {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
@@ -33120,6 +33506,30 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
VT.getSizeInBits());
}
+ // vbroadcast(scalarload X) -> vbroadcast_load X
+ // For float loads, extract other uses of the scalar from the broadcast.
+ if (!SrcVT.isVector() && (Src.hasOneUse() || VT.isFloatingPoint()) &&
+ ISD::isNormalLoad(Src.getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(Src);
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
+ LN->getMemoryVT(), LN->getMemOperand());
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceExtract = Src.hasOneUse();
+ DCI.CombineTo(N.getNode(), BcastLd);
+ if (NoReplaceExtract) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ } else {
+ SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT, BcastLd,
+ DAG.getIntPtrConstant(0, DL));
+ DCI.CombineTo(LN, Scl, BcastLd.getValue(1));
+ }
+ return N; // Return N so it doesn't get rechecked!
+ }
+
return SDValue();
}
case X86ISD::BLENDI: {
@@ -33133,14 +33543,14 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
MVT SrcVT = N0.getOperand(0).getSimpleValueType();
if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
SrcVT.getScalarSizeInBits() >= 32) {
- unsigned Mask = N.getConstantOperandVal(2);
+ unsigned BlendMask = N.getConstantOperandVal(2);
unsigned Size = VT.getVectorNumElements();
unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
- unsigned ScaleMask = scaleVectorShuffleBlendMask(Mask, Size, Scale);
+ BlendMask = scaleVectorShuffleBlendMask(BlendMask, Size, Scale);
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
N1.getOperand(0),
- DAG.getConstant(ScaleMask, DL, MVT::i8)));
+ DAG.getTargetConstant(BlendMask, DL, MVT::i8)));
}
}
return SDValue();
@@ -33208,76 +33618,97 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
// If we zero out all elements from Op0 then we don't need to reference it.
if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef())
return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1,
- DAG.getConstant(InsertPSMask, DL, MVT::i8));
+ DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
// If we zero out the element from Op1 then we don't need to reference it.
if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef())
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
- DAG.getConstant(InsertPSMask, DL, MVT::i8));
+ DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
// Attempt to merge insertps Op1 with an inner target shuffle node.
SmallVector<int, 8> TargetMask1;
SmallVector<SDValue, 2> Ops1;
- if (setTargetShuffleZeroElements(Op1, TargetMask1, Ops1)) {
- int M = TargetMask1[SrcIdx];
- if (isUndefOrZero(M)) {
+ APInt KnownUndef1, KnownZero1;
+ if (getTargetShuffleAndZeroables(Op1, TargetMask1, Ops1, KnownUndef1,
+ KnownZero1)) {
+ if (KnownUndef1[SrcIdx] || KnownZero1[SrcIdx]) {
// Zero/UNDEF insertion - zero out element and remove dependency.
InsertPSMask |= (1u << DstIdx);
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
- DAG.getConstant(InsertPSMask, DL, MVT::i8));
+ DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
}
// Update insertps mask srcidx and reference the source input directly.
+ int M = TargetMask1[SrcIdx];
assert(0 <= M && M < 8 && "Shuffle index out of range");
InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6);
Op1 = Ops1[M < 4 ? 0 : 1];
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
- DAG.getConstant(InsertPSMask, DL, MVT::i8));
+ DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
}
// Attempt to merge insertps Op0 with an inner target shuffle node.
SmallVector<int, 8> TargetMask0;
SmallVector<SDValue, 2> Ops0;
- if (!setTargetShuffleZeroElements(Op0, TargetMask0, Ops0))
- return SDValue();
+ APInt KnownUndef0, KnownZero0;
+ if (getTargetShuffleAndZeroables(Op0, TargetMask0, Ops0, KnownUndef0,
+ KnownZero0)) {
+ bool Updated = false;
+ bool UseInput00 = false;
+ bool UseInput01 = false;
+ for (int i = 0; i != 4; ++i) {
+ if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) {
+ // No change if element is already zero or the inserted element.
+ continue;
+ } else if (KnownUndef0[i] || KnownZero0[i]) {
+ // If the target mask is undef/zero then we must zero the element.
+ InsertPSMask |= (1u << i);
+ Updated = true;
+ continue;
+ }
- bool Updated = false;
- bool UseInput00 = false;
- bool UseInput01 = false;
- for (int i = 0; i != 4; ++i) {
- int M = TargetMask0[i];
- if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) {
- // No change if element is already zero or the inserted element.
- continue;
- } else if (isUndefOrZero(M)) {
- // If the target mask is undef/zero then we must zero the element.
- InsertPSMask |= (1u << i);
- Updated = true;
- continue;
+ // The input vector element must be inline.
+ int M = TargetMask0[i];
+ if (M != i && M != (i + 4))
+ return SDValue();
+
+ // Determine which inputs of the target shuffle we're using.
+ UseInput00 |= (0 <= M && M < 4);
+ UseInput01 |= (4 <= M);
}
- // The input vector element must be inline.
- if (M != i && M != (i + 4))
- return SDValue();
+ // If we're not using both inputs of the target shuffle then use the
+ // referenced input directly.
+ if (UseInput00 && !UseInput01) {
+ Updated = true;
+ Op0 = Ops0[0];
+ } else if (!UseInput00 && UseInput01) {
+ Updated = true;
+ Op0 = Ops0[1];
+ }
- // Determine which inputs of the target shuffle we're using.
- UseInput00 |= (0 <= M && M < 4);
- UseInput01 |= (4 <= M);
+ if (Updated)
+ return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
+ DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
}
- // If we're not using both inputs of the target shuffle then use the
- // referenced input directly.
- if (UseInput00 && !UseInput01) {
- Updated = true;
- Op0 = Ops0[0];
- } else if (!UseInput00 && UseInput01) {
- Updated = true;
- Op0 = Ops0[1];
+ // If we're inserting an element from a vbroadcast load, fold the
+ // load into the X86insertps instruction. We need to convert the scalar
+ // load to a vector and clear the source lane of the INSERTPS control.
+ if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(Op1);
+ if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) {
+ SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(),
+ MemIntr->getBasePtr(),
+ MemIntr->getMemOperand());
+ SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
+ Load),
+ DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
+ return Insert;
+ }
}
- if (Updated)
- return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
- DAG.getConstant(InsertPSMask, DL, MVT::i8));
-
return SDValue();
}
default:
@@ -33580,7 +34011,7 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
}
/// Eliminate a redundant shuffle of a horizontal math op.
-static SDValue foldShuffleOfHorizOp(SDNode *N) {
+static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N->getOpcode();
if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
@@ -33611,17 +34042,36 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
HOp.getOperand(0) != HOp.getOperand(1))
return SDValue();
+ // The shuffle that we are eliminating may have allowed the horizontal op to
+ // have an undemanded (undefined) operand. Duplicate the other (defined)
+ // operand to ensure that the results are defined across all lanes without the
+ // shuffle.
+ auto updateHOp = [](SDValue HorizOp, SelectionDAG &DAG) {
+ SDValue X;
+ if (HorizOp.getOperand(0).isUndef()) {
+ assert(!HorizOp.getOperand(1).isUndef() && "Not expecting foldable h-op");
+ X = HorizOp.getOperand(1);
+ } else if (HorizOp.getOperand(1).isUndef()) {
+ assert(!HorizOp.getOperand(0).isUndef() && "Not expecting foldable h-op");
+ X = HorizOp.getOperand(0);
+ } else {
+ return HorizOp;
+ }
+ return DAG.getNode(HorizOp.getOpcode(), SDLoc(HorizOp),
+ HorizOp.getValueType(), X, X);
+ };
+
// When the operands of a horizontal math op are identical, the low half of
// the result is the same as the high half. If a target shuffle is also
- // replicating low and high halves, we don't need the shuffle.
+ // replicating low and high halves (and without changing the type/length of
+ // the vector), we don't need the shuffle.
if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
- if (HOp.getScalarValueSizeInBits() == 64) {
+ if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) {
// movddup (hadd X, X) --> hadd X, X
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
assert((HOp.getValueType() == MVT::v2f64 ||
- HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
- "Unexpected type for h-op");
- return HOp;
+ HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op");
+ return updateHOp(HOp, DAG);
}
return SDValue();
}
@@ -33635,14 +34085,14 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
(isTargetShuffleEquivalent(Mask, {0, 0}) ||
isTargetShuffleEquivalent(Mask, {0, 1, 0, 1}) ||
isTargetShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3})))
- return HOp;
+ return updateHOp(HOp, DAG);
if (HOp.getValueSizeInBits() == 256 &&
(isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}) ||
isTargetShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) ||
isTargetShuffleEquivalent(
Mask, {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11})))
- return HOp;
+ return updateHOp(HOp, DAG);
return SDValue();
}
@@ -33677,7 +34127,7 @@ static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
// the wide shuffle that we started with.
return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
Shuf->getOperand(1), HalfMask, HalfIdx1,
- HalfIdx2, false, DAG);
+ HalfIdx2, false, DAG, /*UseConcat*/true);
}
static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
@@ -33696,70 +34146,10 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
return AddSub;
- if (SDValue HAddSub = foldShuffleOfHorizOp(N))
+ if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG))
return HAddSub;
}
- // During Type Legalization, when promoting illegal vector types,
- // the backend might introduce new shuffle dag nodes and bitcasts.
- //
- // This code performs the following transformation:
- // fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
- // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
- //
- // We do this only if both the bitcast and the BINOP dag nodes have
- // one use. Also, perform this transformation only if the new binary
- // operation is legal. This is to avoid introducing dag nodes that
- // potentially need to be further expanded (or custom lowered) into a
- // less optimal sequence of dag nodes.
- if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() &&
- N->getOpcode() == ISD::VECTOR_SHUFFLE &&
- N->getOperand(0).getOpcode() == ISD::BITCAST &&
- N->getOperand(1).isUndef() && N->getOperand(0).hasOneUse()) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- SDValue BC0 = N0.getOperand(0);
- EVT SVT = BC0.getValueType();
- unsigned Opcode = BC0.getOpcode();
- unsigned NumElts = VT.getVectorNumElements();
-
- if (BC0.hasOneUse() && SVT.isVector() &&
- SVT.getVectorNumElements() * 2 == NumElts &&
- TLI.isOperationLegal(Opcode, VT)) {
- bool CanFold = false;
- switch (Opcode) {
- default : break;
- case ISD::ADD:
- case ISD::SUB:
- case ISD::MUL:
- // isOperationLegal lies for integer ops on floating point types.
- CanFold = VT.isInteger();
- break;
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
- // isOperationLegal lies for floating point ops on integer types.
- CanFold = VT.isFloatingPoint();
- break;
- }
-
- unsigned SVTNumElts = SVT.getVectorNumElements();
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i)
- CanFold = SVOp->getMaskElt(i) == (int)(i * 2);
- for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i)
- CanFold = SVOp->getMaskElt(i) < 0;
-
- if (CanFold) {
- SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0));
- SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1));
- SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
- return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, SVOp->getMask());
- }
- }
- }
-
// Attempt to combine into a vector load/broadcast.
if (SDValue LD = combineToConsecutiveLoads(VT, N, dl, DAG, Subtarget, true))
return LD;
@@ -33841,7 +34231,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
ISD::isNormalLoad(N->getOperand(0).getNode())) {
LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
- if (!LN->isVolatile()) {
+ if (LN->isSimple()) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
SDValue VZLoad =
@@ -33855,53 +34245,6 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
}
}
-
- // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
- // operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
- // FIXME: This can probably go away once we default to widening legalization.
- if (Subtarget.hasSSE41() && VT == MVT::v4i32 &&
- N->getOpcode() == ISD::VECTOR_SHUFFLE &&
- N->getOperand(0).getOpcode() == ISD::BITCAST &&
- N->getOperand(0).getOperand(0).getOpcode() == X86ISD::PMULUDQ) {
- SDValue BC = N->getOperand(0);
- SDValue MULUDQ = BC.getOperand(0);
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- ArrayRef<int> Mask = SVOp->getMask();
- if (BC.hasOneUse() && MULUDQ.hasOneUse() &&
- Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) {
- SDValue Op0 = MULUDQ.getOperand(0);
- SDValue Op1 = MULUDQ.getOperand(1);
- if (Op0.getOpcode() == ISD::BITCAST &&
- Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
- Op0.getOperand(0).getValueType() == MVT::v4i32) {
- ShuffleVectorSDNode *SVOp0 =
- cast<ShuffleVectorSDNode>(Op0.getOperand(0));
- ArrayRef<int> Mask2 = SVOp0->getMask();
- if (Mask2[0] == 0 && Mask2[1] == -1 &&
- Mask2[2] == 1 && Mask2[3] == -1) {
- Op0 = SVOp0->getOperand(0);
- Op1 = DAG.getBitcast(MVT::v4i32, Op1);
- Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask);
- return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1);
- }
- }
- if (Op1.getOpcode() == ISD::BITCAST &&
- Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
- Op1.getOperand(0).getValueType() == MVT::v4i32) {
- ShuffleVectorSDNode *SVOp1 =
- cast<ShuffleVectorSDNode>(Op1.getOperand(0));
- ArrayRef<int> Mask2 = SVOp1->getMask();
- if (Mask2[0] == 0 && Mask2[1] == -1 &&
- Mask2[2] == 1 && Mask2[3] == -1) {
- Op0 = DAG.getBitcast(MVT::v4i32, Op0);
- Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask);
- Op1 = SVOp1->getOperand(0);
- return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1);
- }
- }
- }
- }
-
return SDValue();
}
@@ -33966,6 +34309,84 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// TODO convert SrcUndef to KnownUndef.
break;
}
+ case X86ISD::KSHIFTL: {
+ SDValue Src = Op.getOperand(0);
+ auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
+ assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
+ unsigned ShiftAmt = Amt->getZExtValue();
+
+ if (ShiftAmt == 0)
+ return TLO.CombineTo(Op, Src);
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (Src.getOpcode() == X86ISD::KSHIFTR) {
+ if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) {
+ unsigned C1 = Src.getConstantOperandVal(1);
+ unsigned NewOpc = X86ISD::KSHIFTL;
+ int Diff = ShiftAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ NewOpc = X86ISD::KSHIFTR;
+ }
+
+ SDLoc dl(Op);
+ SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA));
+ }
+ }
+
+ APInt DemandedSrc = DemandedElts.lshr(ShiftAmt);
+ if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
+ Depth + 1))
+ return true;
+
+ KnownUndef <<= ShiftAmt;
+ KnownZero <<= ShiftAmt;
+ KnownZero.setLowBits(ShiftAmt);
+ break;
+ }
+ case X86ISD::KSHIFTR: {
+ SDValue Src = Op.getOperand(0);
+ auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
+ assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
+ unsigned ShiftAmt = Amt->getZExtValue();
+
+ if (ShiftAmt == 0)
+ return TLO.CombineTo(Op, Src);
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted
+ // out) are never demanded.
+ if (Src.getOpcode() == X86ISD::KSHIFTL) {
+ if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) {
+ unsigned C1 = Src.getConstantOperandVal(1);
+ unsigned NewOpc = X86ISD::KSHIFTR;
+ int Diff = ShiftAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ NewOpc = X86ISD::KSHIFTL;
+ }
+
+ SDLoc dl(Op);
+ SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA));
+ }
+ }
+
+ APInt DemandedSrc = DemandedElts.shl(ShiftAmt);
+ if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
+ Depth + 1))
+ return true;
+
+ KnownUndef.lshrInPlace(ShiftAmt);
+ KnownZero.lshrInPlace(ShiftAmt);
+ KnownZero.setHighBits(ShiftAmt);
+ break;
+ }
case X86ISD::CVTSI2P:
case X86ISD::CVTUI2P: {
SDValue Src = Op.getOperand(0);
@@ -33979,16 +34400,36 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
case X86ISD::PACKSS:
case X86ISD::PACKUS: {
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+
APInt DemandedLHS, DemandedRHS;
getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
APInt SrcUndef, SrcZero;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef,
- SrcZero, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(N0, DemandedLHS, SrcUndef, SrcZero, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef,
- SrcZero, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(N1, DemandedRHS, SrcUndef, SrcZero, TLO,
+ Depth + 1))
return true;
+
+ // Aggressively peek through ops to get at the demanded elts.
+ // TODO - we should do this for all target/faux shuffles ops.
+ if (!DemandedElts.isAllOnesValue()) {
+ APInt DemandedSrcBits =
+ APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
+ SDValue NewN0 = SimplifyMultipleUseDemandedBits(
+ N0, DemandedSrcBits, DemandedLHS, TLO.DAG, Depth + 1);
+ SDValue NewN1 = SimplifyMultipleUseDemandedBits(
+ N1, DemandedSrcBits, DemandedRHS, TLO.DAG, Depth + 1);
+ if (NewN0 || NewN1) {
+ NewN0 = NewN0 ? NewN0 : N0;
+ NewN1 = NewN1 ? NewN1 : N1;
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1));
+ }
+ }
break;
}
case X86ISD::HADD:
@@ -34062,25 +34503,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return true;
break;
}
- case X86ISD::SUBV_BROADCAST: {
- // Reduce size of broadcast if we don't need the upper half.
- unsigned HalfElts = NumElts / 2;
- if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) {
- SDValue Src = Op.getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
-
- SDValue Half = Src;
- if (SrcVT.getVectorNumElements() != HalfElts) {
- MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts);
- Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src);
- }
-
- return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0,
- TLO.DAG, SDLoc(Op),
- Half.getValueSizeInBits()));
- }
- break;
- }
case X86ISD::VPERMV: {
SDValue Mask = Op.getOperand(0);
APInt MaskUndef, MaskZero;
@@ -34135,6 +34557,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
}
+ // Subvector broadcast.
+ case X86ISD::SUBV_BROADCAST: {
+ SDLoc DL(Op);
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueSizeInBits() > ExtSizeInBits)
+ Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits);
+ else if (Src.getValueSizeInBits() < ExtSizeInBits) {
+ MVT SrcSVT = Src.getSimpleValueType().getScalarType();
+ MVT SrcVT =
+ MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits());
+ Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src);
+ }
+ return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0,
+ TLO.DAG, DL, ExtSizeInBits));
+ }
// Byte shifts by immediate.
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
@@ -34201,36 +34638,30 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
}
- // Simplify target shuffles.
- if (!isTargetShuffle(Opc) || !VT.isSimple())
- return false;
-
- // Get target shuffle mask.
- bool IsUnary;
+ // Get target/faux shuffle mask.
+ APInt OpUndef, OpZero;
SmallVector<int, 64> OpMask;
SmallVector<SDValue, 2> OpInputs;
- if (!getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, OpInputs,
- OpMask, IsUnary))
+ if (!getTargetShuffleInputs(Op, DemandedElts, OpInputs, OpMask, OpUndef,
+ OpZero, TLO.DAG, Depth, false))
return false;
- // Shuffle inputs must be the same type as the result.
- if (llvm::any_of(OpInputs,
- [VT](SDValue V) { return VT != V.getValueType(); }))
+ // Shuffle inputs must be the same size as the result.
+ if (OpMask.size() != (unsigned)NumElts ||
+ llvm::any_of(OpInputs, [VT](SDValue V) {
+ return VT.getSizeInBits() != V.getValueSizeInBits() ||
+ !V.getValueType().isVector();
+ }))
return false;
- // Clear known elts that might have been set above.
- KnownZero.clearAllBits();
- KnownUndef.clearAllBits();
+ KnownZero = OpZero;
+ KnownUndef = OpUndef;
// Check if shuffle mask can be simplified to undef/zero/identity.
int NumSrcs = OpInputs.size();
- for (int i = 0; i != NumElts; ++i) {
- int &M = OpMask[i];
+ for (int i = 0; i != NumElts; ++i)
if (!DemandedElts[i])
- M = SM_SentinelUndef;
- else if (0 <= M && OpInputs[M / NumElts].isUndef())
- M = SM_SentinelUndef;
- }
+ OpMask[i] = SM_SentinelUndef;
if (isUndefInRange(OpMask, 0, NumElts)) {
KnownUndef.setAllBits();
@@ -34243,10 +34674,14 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
for (int Src = 0; Src != NumSrcs; ++Src)
if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts))
- return TLO.CombineTo(Op, OpInputs[Src]);
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, OpInputs[Src]));
// Attempt to simplify inputs.
for (int Src = 0; Src != NumSrcs; ++Src) {
+ // TODO: Support inputs of different types.
+ if (OpInputs[Src].getValueType() != VT)
+ continue;
+
int Lo = Src * NumElts;
APInt SrcElts = APInt::getNullValue(NumElts);
for (int i = 0; i != NumElts; ++i)
@@ -34256,21 +34691,13 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SrcElts.setBit(M);
}
+ // TODO - Propagate input undef/zero elts.
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
}
- // Extract known zero/undef elements.
- // TODO - Propagate input undef/zero elts.
- for (int i = 0; i != NumElts; ++i) {
- if (OpMask[i] == SM_SentinelUndef)
- KnownUndef.setBit(i);
- if (OpMask[i] == SM_SentinelZero)
- KnownZero.setBit(i);
- }
-
return false;
}
@@ -34296,6 +34723,18 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp,
TLO, Depth + 1))
return true;
+
+ // Aggressively peek through ops to get at the demanded low bits.
+ SDValue DemandedLHS = SimplifyMultipleUseDemandedBits(
+ LHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedRHS = SimplifyMultipleUseDemandedBits(
+ RHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedLHS || DemandedRHS) {
+ DemandedLHS = DemandedLHS ? DemandedLHS : LHS;
+ DemandedRHS = DemandedRHS ? DemandedRHS : RHS;
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, DemandedLHS, DemandedRHS));
+ }
break;
}
case X86ISD::VSHLI: {
@@ -34323,7 +34762,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI;
SDValue NewShift = TLO.DAG.getNode(
NewOpc, SDLoc(Op), VT, Op0.getOperand(0),
- TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
+ TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
return TLO.CombineTo(Op, NewShift);
}
}
@@ -34441,6 +34880,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
KnownVec, TLO, Depth + 1))
return true;
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Vec, DemandedVecBits, DemandedVecElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1)));
+
Known = KnownVec.zext(BitWidth, true);
return false;
}
@@ -34542,12 +34986,80 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
}
+SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ int NumElts = DemandedElts.getBitWidth();
+ unsigned Opc = Op.getOpcode();
+ EVT VT = Op.getValueType();
+
+ switch (Opc) {
+ case X86ISD::PINSRB:
+ case X86ISD::PINSRW: {
+ // If we don't demand the inserted element, return the base vector.
+ SDValue Vec = Op.getOperand(0);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ MVT VecVT = Vec.getSimpleValueType();
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
+ !DemandedElts[CIdx->getZExtValue()])
+ return Vec;
+ break;
+ }
+ }
+
+ APInt ShuffleUndef, ShuffleZero;
+ SmallVector<int, 16> ShuffleMask;
+ SmallVector<SDValue, 2> ShuffleOps;
+ if (getTargetShuffleInputs(Op, DemandedElts, ShuffleOps, ShuffleMask,
+ ShuffleUndef, ShuffleZero, DAG, Depth, false)) {
+ // If all the demanded elts are from one operand and are inline,
+ // then we can use the operand directly.
+ int NumOps = ShuffleOps.size();
+ if (ShuffleMask.size() == (unsigned)NumElts &&
+ llvm::all_of(ShuffleOps, [VT](SDValue V) {
+ return VT.getSizeInBits() == V.getValueSizeInBits();
+ })) {
+
+ if (DemandedElts.isSubsetOf(ShuffleUndef))
+ return DAG.getUNDEF(VT);
+ if (DemandedElts.isSubsetOf(ShuffleUndef | ShuffleZero))
+ return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op));
+
+ // Bitmask that indicates which ops have only been accessed 'inline'.
+ APInt IdentityOp = APInt::getAllOnesValue(NumOps);
+ for (int i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (!DemandedElts[i] || ShuffleUndef[i])
+ continue;
+ int Op = M / NumElts;
+ int Index = M % NumElts;
+ if (M < 0 || Index != i) {
+ IdentityOp.clearAllBits();
+ break;
+ }
+ IdentityOp &= APInt::getOneBitSet(NumOps, Op);
+ if (IdentityOp == 0)
+ break;
+ }
+ assert((IdentityOp == 0 || IdentityOp.countPopulation() == 1) &&
+ "Multiple identity shuffles detected");
+
+ if (IdentityOp != 0)
+ return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countTrailingZeros()]);
+ }
+ }
+
+ return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
+ Op, DemandedBits, DemandedElts, DAG, Depth);
+}
+
/// Check if a vector extract from a target-specific shuffle of a load can be
/// folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
/// shuffles have been custom lowered so we need to handle those here.
-static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+static SDValue
+XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -34559,13 +35071,17 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT OriginalVT = InVec.getValueType();
+ unsigned NumOriginalElts = OriginalVT.getVectorNumElements();
// Peek through bitcasts, don't duplicate a load with other uses.
InVec = peekThroughOneUseBitcasts(InVec);
EVT CurrentVT = InVec.getValueType();
- if (!CurrentVT.isVector() ||
- CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
+ if (!CurrentVT.isVector())
+ return SDValue();
+
+ unsigned NumCurrentElts = CurrentVT.getVectorNumElements();
+ if ((NumOriginalElts % NumCurrentElts) != 0)
return SDValue();
if (!isTargetShuffle(InVec.getOpcode()))
@@ -34582,10 +35098,17 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
ShuffleOps, ShuffleMask, UnaryShuffle))
return SDValue();
+ unsigned Scale = NumOriginalElts / NumCurrentElts;
+ if (Scale > 1) {
+ SmallVector<int, 16> ScaledMask;
+ scaleShuffleMask<int>(Scale, ShuffleMask, ScaledMask);
+ ShuffleMask = std::move(ScaledMask);
+ }
+ assert(ShuffleMask.size() == NumOriginalElts && "Shuffle mask size mismatch");
+
// Select the input vector, guarding against out of range extract vector.
- unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
+ int Idx = (Elt > (int)NumOriginalElts) ? SM_SentinelUndef : ShuffleMask[Elt];
if (Idx == SM_SentinelZero)
return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
@@ -34598,8 +35121,9 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; }))
return SDValue();
- assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
- SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] : ShuffleOps[1];
+ assert(0 <= Idx && Idx < (int)(2 * NumOriginalElts) &&
+ "Shuffle index out of range");
+ SDValue LdNode = (Idx < (int)NumOriginalElts) ? ShuffleOps[0] : ShuffleOps[1];
// If inputs to shuffle are the same for both ops, then allow 2 uses
unsigned AllowedUses =
@@ -34619,7 +35143,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
- if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
+ if (!LN0 || !LN0->hasNUsesOfValue(AllowedUses, 0) || !LN0->isSimple())
return SDValue();
// If there's a bitcast before the shuffle, check if the load type and
@@ -34637,10 +35161,11 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
SDLoc dl(N);
// Create shuffle node taking into account the case that its a unary shuffle
- SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1];
- Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle,
- ShuffleMask);
- Shuffle = DAG.getBitcast(OriginalVT, Shuffle);
+ SDValue Shuffle = UnaryShuffle ? DAG.getUNDEF(OriginalVT)
+ : DAG.getBitcast(OriginalVT, ShuffleOps[1]);
+ Shuffle = DAG.getVectorShuffle(OriginalVT, dl,
+ DAG.getBitcast(OriginalVT, ShuffleOps[0]),
+ Shuffle, ShuffleMask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
EltNo);
}
@@ -34660,6 +35185,23 @@ static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
return false;
}
+// Helper to push sign extension of vXi1 SETCC result through bitops.
+static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
+ SDValue Src, const SDLoc &DL) {
+ switch (Src.getOpcode()) {
+ case ISD::SETCC:
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR:
+ return DAG.getNode(
+ Src.getOpcode(), DL, SExtVT,
+ signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
+ signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
+ }
+ llvm_unreachable("Unexpected node type for vXi1 sign extension");
+}
+
// Try to match patterns such as
// (i16 bitcast (v16i1 x))
// ->
@@ -34698,6 +35240,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
// For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
// (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
MVT SExtVT;
+ bool PropagateSExt = false;
switch (SrcVT.getSimpleVT().SimpleTy) {
default:
return SDValue();
@@ -34708,8 +35251,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
SExtVT = MVT::v4i32;
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
// sign-extend to a 256-bit operation to avoid truncation.
- if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256))
+ if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) {
SExtVT = MVT::v4i64;
+ PropagateSExt = true;
+ }
break;
case MVT::v8i1:
SExtVT = MVT::v8i16;
@@ -34718,11 +35263,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
// 256-bit because the shuffle is cheaper than sign extending the result of
// the compare.
- // TODO : use checkBitcastSrcVectorSize
- if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
- (Src.getOperand(0).getValueType().is256BitVector() ||
- Src.getOperand(0).getValueType().is512BitVector())) {
+ if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256) ||
+ checkBitcastSrcVectorSize(Src, 512))) {
SExtVT = MVT::v8i32;
+ PropagateSExt = true;
}
break;
case MVT::v16i1:
@@ -34745,19 +35289,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
return SDValue();
};
- SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
+ SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
+ : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
- if (SExtVT == MVT::v64i8) {
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitVector(V, DL);
- Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo);
- Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo);
- Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi);
- Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi);
- Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
- DAG.getConstant(32, DL, MVT::i8));
- V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
- } else if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
+ if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) {
V = getPMOVMSKB(DL, V, DAG, Subtarget);
} else {
if (SExtVT == MVT::v8i16)
@@ -34891,8 +35426,8 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
unsigned ShufMask = (NumElts > 2 ? 0 : 0x44);
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx,
- DAG.getConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), Splat,
- DAG.getConstant(ShufMask, DL, MVT::i8));
+ DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32),
+ Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8));
}
Ops.append(NumElts, Splat);
} else {
@@ -34935,6 +35470,24 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget))
return V;
+ // Recognize the IR pattern for the movmsk intrinsic under SSE1 befoer type
+ // legalization destroys the v4i32 type.
+ if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && SrcVT == MVT::v4i1 &&
+ VT.isScalarInteger() && N0.getOpcode() == ISD::SETCC &&
+ N0.getOperand(0).getValueType() == MVT::v4i32 &&
+ ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()) &&
+ cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETLT) {
+ SDValue N00 = N0.getOperand(0);
+ // Only do this if we can avoid scalarizing the input.
+ if (ISD::isNormalLoad(N00.getNode()) ||
+ (N00.getOpcode() == ISD::BITCAST &&
+ N00.getOperand(0).getValueType() == MVT::v4f32)) {
+ SDValue V = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32,
+ DAG.getBitcast(MVT::v4f32, N00));
+ return DAG.getZExtOrTrunc(V, dl, VT);
+ }
+ }
+
// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
// type, widen both sides to avoid a trip through memory.
if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
@@ -34949,6 +35502,26 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// type, widen both sides to avoid a trip through memory.
if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
Subtarget.hasAVX512()) {
+ // Use zeros for the widening if we already have some zeroes. This can
+ // allow SimplifyDemandedBits to remove scalar ANDs that may be down
+ // stream of this.
+ // FIXME: It might make sense to detect a concat_vectors with a mix of
+ // zeroes and undef and turn it into insert_subvector for i1 vectors as
+ // a separate combine. What we can't do is canonicalize the operands of
+ // such a concat or we'll get into a loop with SimplifyDemandedBits.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
+ SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1);
+ if (ISD::isBuildVectorAllZeros(LastOp.getNode())) {
+ SrcVT = LastOp.getValueType();
+ unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
+ SmallVector<SDValue, 4> Ops(N0->op_begin(), N0->op_end());
+ Ops.resize(NumConcats, DAG.getConstant(0, dl, SrcVT));
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
+ N0 = DAG.getBitcast(MVT::i8, N0);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ }
+ }
+
unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
Ops[0] = N0;
@@ -34958,6 +35531,33 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
}
}
+ // Look for (i8 (bitcast (v8i1 (extract_subvector (v16i1 X), 0)))) and
+ // replace with (i8 (trunc (i16 (bitcast (v16i1 X))))). This can occur
+ // due to insert_subvector legalization on KNL. By promoting the copy to i16
+ // we can help with known bits propagation from the vXi1 domain to the
+ // scalar domain.
+ if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.hasAVX512() &&
+ !Subtarget.hasDQI() && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N0.getOperand(0).getValueType() == MVT::v16i1 &&
+ isNullConstant(N0.getOperand(1)))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
+ DAG.getBitcast(MVT::i16, N0.getOperand(0)));
+
+ // Combine (bitcast (vbroadcast_load)) -> (vbroadcast_load). The memory VT
+ // determines // the number of bits loaded. Remaining bits are zero.
+ if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() &&
+ VT.getScalarSizeInBits() == SrcVT.getScalarSizeInBits()) {
+ auto *BCast = cast<MemIntrinsicSDNode>(N0);
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() };
+ SDValue ResNode =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops,
+ VT.getVectorElementType(),
+ BCast->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1));
+ return ResNode;
+ }
+
// Since MMX types are special and don't usually play with other vector types,
// it's better to handle them early to be sure we emit efficient code by
// avoiding store-load conversions.
@@ -35152,7 +35752,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
// Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
ISD::NodeType BinOp;
SDValue Src = DAG.matchBinOpReduction(
- Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN});
+ Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}, true);
if (!Src)
return SDValue();
@@ -35246,29 +35846,31 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
SDLoc DL(Extract);
EVT MatchVT = Match.getValueType();
unsigned NumElts = MatchVT.getVectorNumElements();
+ unsigned MaxElts = Subtarget.hasInt256() ? 32 : 16;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (ExtractVT == MVT::i1) {
// Special case for (pre-legalization) vXi1 reductions.
- if (NumElts > 32)
+ if (NumElts > 64 || !isPowerOf2_32(NumElts))
return SDValue();
- if (DAG.getTargetLoweringInfo().isTypeLegal(MatchVT)) {
+ if (TLI.isTypeLegal(MatchVT)) {
// If this is a legal AVX512 predicate type then we can just bitcast.
EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
Movmsk = DAG.getBitcast(MovmskVT, Match);
} else {
// Use combineBitcastvxi1 to create the MOVMSK.
- if (NumElts == 32 && !Subtarget.hasInt256()) {
+ while (NumElts > MaxElts) {
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(Match, DL);
Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi);
- NumElts = 16;
+ NumElts /= 2;
}
EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
Movmsk = combineBitcastvxi1(DAG, MovmskVT, Match, DL, Subtarget);
}
if (!Movmsk)
return SDValue();
- Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, MVT::i32);
+ Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, NumElts > 32 ? MVT::i64 : MVT::i32);
} else {
// Bail with AVX512VL (which uses predicate registers).
if (Subtarget.hasVLX())
@@ -35309,13 +35911,15 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
Movmsk = getPMOVMSKB(DL, BitcastLogicOp, DAG, Subtarget);
NumElts = MaskSrcVT.getVectorNumElements();
}
- assert(NumElts <= 32 && "Not expecting more than 32 elements");
+ assert((NumElts <= 32 || NumElts == 64) &&
+ "Not expecting more than 64 elements");
+ MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32;
if (BinOp == ISD::XOR) {
// parity -> (AND (CTPOP(MOVMSK X)), 1)
- SDValue Mask = DAG.getConstant(1, DL, MVT::i32);
- SDValue Result = DAG.getNode(ISD::CTPOP, DL, MVT::i32, Movmsk);
- Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, Mask);
+ SDValue Mask = DAG.getConstant(1, DL, CmpVT);
+ SDValue Result = DAG.getNode(ISD::CTPOP, DL, CmpVT, Movmsk);
+ Result = DAG.getNode(ISD::AND, DL, CmpVT, Result, Mask);
return DAG.getZExtOrTrunc(Result, DL, ExtractVT);
}
@@ -35323,19 +35927,19 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
ISD::CondCode CondCode;
if (BinOp == ISD::OR) {
// any_of -> MOVMSK != 0
- CmpC = DAG.getConstant(0, DL, MVT::i32);
+ CmpC = DAG.getConstant(0, DL, CmpVT);
CondCode = ISD::CondCode::SETNE;
} else {
// all_of -> MOVMSK == ((1 << NumElts) - 1)
- CmpC = DAG.getConstant((1ULL << NumElts) - 1, DL, MVT::i32);
+ CmpC = DAG.getConstant(APInt::getLowBitsSet(CmpVT.getSizeInBits(), NumElts),
+ DL, CmpVT);
CondCode = ISD::CondCode::SETEQ;
}
// The setcc produces an i8 of 0/1, so extend that to the result width and
// negate to get the final 0/-1 mask value.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT SetccVT =
- TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode);
SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT);
SDValue Zero = DAG.getConstant(0, DL, ExtractVT);
@@ -35431,6 +36035,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ SDLoc dl(N);
SDValue Src = N->getOperand(0);
SDValue Idx = N->getOperand(1);
@@ -35452,10 +36057,37 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
return DAG.getBitcast(VT, SrcOp);
}
+ // If we're extracting a single element from a broadcast load and there are
+ // no other users, just create a single load.
+ if (SrcBC.getOpcode() == X86ISD::VBROADCAST_LOAD && SrcBC.hasOneUse()) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC);
+ unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits();
+ if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth &&
+ VT.getSizeInBits() == SrcBCWidth) {
+ SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(),
+ MemIntr->getBasePtr(),
+ MemIntr->getPointerInfo(),
+ MemIntr->getAlignment(),
+ MemIntr->getMemOperand()->getFlags());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
+ return Load;
+ }
+ }
+
+ // Handle extract(truncate(x)) for 0'th index.
+ // TODO: Treat this as a faux shuffle?
+ // TODO: When can we use this for general indices?
+ if (ISD::TRUNCATE == Src.getOpcode() && SrcVT.is128BitVector() &&
+ isNullConstant(Idx)) {
+ Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl);
+ Src = DAG.getBitcast(SrcVT, Src);
+ return DAG.getNode(N->getOpcode(), dl, VT, Src, Idx);
+ }
+
// Resolve the target shuffle inputs and mask.
SmallVector<int, 16> Mask;
SmallVector<SDValue, 2> Ops;
- if (!resolveTargetShuffleInputs(SrcBC, Ops, Mask, DAG))
+ if (!getTargetShuffleInputs(SrcBC, Ops, Mask, DAG))
return SDValue();
// Attempt to narrow/widen the shuffle mask to the correct size.
@@ -35489,7 +36121,6 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
return SDValue();
int SrcIdx = Mask[N->getConstantOperandVal(1)];
- SDLoc dl(N);
// If the shuffle source element is undef/zero then we can just accept it.
if (SrcIdx == SM_SentinelUndef)
@@ -35584,7 +36215,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
}
// TODO: This switch could include FNEG and the x86-specific FP logic ops
- // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
+ // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
// missed load folding and fma+fneg combining.
switch (Vec.getOpcode()) {
case ISD::FMA: // Begin 3 operands
@@ -35631,27 +36262,84 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller");
- bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
- if (!Subtarget.hasFastHorizontalOps() && !OptForSize)
- return SDValue();
- SDValue Index = ExtElt->getOperand(1);
- if (!isNullConstant(Index))
- return SDValue();
- // TODO: Allow FADD with reduction and/or reassociation and no-signed-zeros.
ISD::NodeType Opc;
- SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD});
+ SDValue Rdx =
+ DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD, ISD::FADD}, true);
if (!Rdx)
return SDValue();
+ SDValue Index = ExtElt->getOperand(1);
+ assert(isNullConstant(Index) &&
+ "Reduction doesn't end in an extract from index 0");
+
EVT VT = ExtElt->getValueType(0);
- EVT VecVT = ExtElt->getOperand(0).getValueType();
+ EVT VecVT = Rdx.getValueType();
if (VecVT.getScalarType() != VT)
return SDValue();
- unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD;
SDLoc DL(ExtElt);
+ // vXi8 reduction - sub 128-bit vector.
+ if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) {
+ if (VecVT == MVT::v4i8) {
+ // Pad with zero.
+ if (Subtarget.hasSSE41()) {
+ Rdx = DAG.getBitcast(MVT::i32, Rdx);
+ Rdx = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ DAG.getConstant(0, DL, MVT::v4i32), Rdx,
+ DAG.getIntPtrConstant(0, DL));
+ Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
+ } else {
+ Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx,
+ DAG.getConstant(0, DL, VecVT));
+ }
+ }
+ if (Rdx.getValueType() == MVT::v8i8) {
+ // Pad with undef.
+ Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx,
+ DAG.getUNDEF(MVT::v8i8));
+ }
+ Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx,
+ DAG.getConstant(0, DL, MVT::v16i8));
+ Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
+ }
+
+ // Must be a >=128-bit vector with pow2 elements.
+ if ((VecVT.getSizeInBits() % 128) != 0 ||
+ !isPowerOf2_32(VecVT.getVectorNumElements()))
+ return SDValue();
+
+ // vXi8 reduction - sum lo/hi halves then use PSADBW.
+ if (VT == MVT::i8) {
+ while (Rdx.getValueSizeInBits() > 128) {
+ unsigned HalfSize = VecVT.getSizeInBits() / 2;
+ unsigned HalfElts = VecVT.getVectorNumElements() / 2;
+ SDValue Lo = extractSubVector(Rdx, 0, DAG, DL, HalfSize);
+ SDValue Hi = extractSubVector(Rdx, HalfElts, DAG, DL, HalfSize);
+ Rdx = DAG.getNode(ISD::ADD, DL, Lo.getValueType(), Lo, Hi);
+ VecVT = Rdx.getValueType();
+ }
+ assert(VecVT == MVT::v16i8 && "v16i8 reduction expected");
+
+ SDValue Hi = DAG.getVectorShuffle(
+ MVT::v16i8, DL, Rdx, Rdx,
+ {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});
+ Rdx = DAG.getNode(ISD::ADD, DL, MVT::v16i8, Rdx, Hi);
+ Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx,
+ getZeroVector(MVT::v16i8, Subtarget, DAG, DL));
+ Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
+ }
+
+ // Only use (F)HADD opcodes if they aren't microcoded or minimizes codesize.
+ bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ if (!Subtarget.hasFastHorizontalOps() && !OptForSize)
+ return SDValue();
+
+ unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD;
+
// 256-bit horizontal instructions operate on 128-bit chunks rather than
// across the whole vector, so we need an extract + hop preliminary stage.
// This is the only step where the operands of the hop are not the same value.
@@ -35661,15 +36349,14 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
unsigned NumElts = VecVT.getVectorNumElements();
SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL);
SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL);
- VecVT = EVT::getVectorVT(*DAG.getContext(), VT, NumElts / 2);
- Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Hi, Lo);
+ Rdx = DAG.getNode(HorizOpcode, DL, Lo.getValueType(), Hi, Lo);
+ VecVT = Rdx.getValueType();
}
if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) &&
!((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.hasSSE3()))
return SDValue();
// extract (add (shuf X), X), 0 --> extract (hadd X, X), 0
- assert(Rdx.getValueType() == VecVT && "Unexpected reduction match");
unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements());
for (unsigned i = 0; i != ReductionSteps; ++i)
Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx);
@@ -35714,15 +36401,26 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
}
}
- // TODO - Remove this once we can handle the implicit zero-extension of
- // X86ISD::PEXTRW/X86ISD::PEXTRB in:
- // XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and
- // combineBasicSADPattern.
if (IsPextr) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(
SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()), DCI))
return SDValue(N, 0);
+
+ // PEXTR*(PINSR*(v, s, c), c) -> s (with implicit zext handling).
+ if ((InputVector.getOpcode() == X86ISD::PINSRB ||
+ InputVector.getOpcode() == X86ISD::PINSRW) &&
+ InputVector.getOperand(2) == EltIdx) {
+ assert(SrcVT == InputVector.getOperand(0).getValueType() &&
+ "Vector type mismatch");
+ SDValue Scl = InputVector.getOperand(1);
+ Scl = DAG.getNode(ISD::TRUNCATE, dl, SrcVT.getScalarType(), Scl);
+ return DAG.getZExtOrTrunc(Scl, dl, VT);
+ }
+
+ // TODO - Remove this once we can handle the implicit zero-extension of
+ // X86ISD::PEXTRW/X86ISD::PEXTRB in XFormVExtractWithShuffleIntoLoad,
+ // combineHorizontalPredicateResult and combineBasicSADPattern.
return SDValue();
}
@@ -35832,6 +36530,15 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
// get simplified at node creation time)?
bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+ // If both inputs are 0/undef, create a complete zero vector.
+ // FIXME: As noted above this should be handled by DAGCombiner/getNode.
+ if (TValIsAllZeros && FValIsAllZeros) {
+ if (VT.isFloatingPoint())
+ return DAG.getConstantFP(0.0, DL, VT);
+ return DAG.getConstant(0, DL, VT);
+ }
+
if (TValIsAllZeros && !FValIsAllZeros && Subtarget.hasAVX512() &&
Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) {
// Invert the cond to not(cond) : xor(op,allones)=not(op)
@@ -36295,8 +37002,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Since SKX these selects have a proper lowering.
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1 &&
- (ExperimentalVectorWideningLegalization ||
- VT.getVectorNumElements() > 4) &&
(VT.getVectorElementType() == MVT::i8 ||
VT.getVectorElementType() == MVT::i16)) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
@@ -36358,6 +37063,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// subl %esi, $edi
// cmovsl %eax, %edi
if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
+ Cond.hasOneUse() &&
DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -36508,6 +37214,12 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (SDValue V = narrowVectorSelect(N, DAG, Subtarget))
return V;
+ // select(~Cond, X, Y) -> select(Cond, Y, X)
+ if (CondVT.getScalarType() != MVT::i1)
+ if (SDValue CondNot = IsNOT(Cond, DAG))
+ return DAG.getNode(N->getOpcode(), DL, VT,
+ DAG.getBitcast(CondVT, CondNot), RHS, LHS);
+
// Custom action for SELECT MMX
if (VT == MVT::x86mmx) {
LHS = DAG.getBitcast(MVT::i64, LHS);
@@ -36873,8 +37585,8 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
// We can't always do this as FCMOV only supports a subset of X86 cond.
if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) {
if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
- SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
- Flags};
+ SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8),
+ Flags};
return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
}
}
@@ -36923,12 +37635,13 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
- uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
- if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+ APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
+ assert(Diff.getBitWidth() == N->getValueType(0).getSizeInBits() &&
+ "Implicit constant truncation");
bool isFastMultiplier = false;
- if (Diff < 10) {
- switch ((unsigned char)Diff) {
+ if (Diff.ult(10)) {
+ switch (Diff.getZExtValue()) {
default: break;
case 1: // result = add base, cond
case 2: // result = lea base( , cond*2)
@@ -36943,7 +37656,6 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
}
if (isFastMultiplier) {
- APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
Cond = getSETCC(CC, Cond, DL ,DAG);
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
@@ -36994,8 +37706,8 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
if (CC == X86::COND_E &&
CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
- SDValue Ops[] = { FalseOp, Cond.getOperand(0),
- DAG.getConstant(CC, DL, MVT::i8), Cond };
+ SDValue Ops[] = {FalseOp, Cond.getOperand(0),
+ DAG.getTargetConstant(CC, DL, MVT::i8), Cond};
return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
}
}
@@ -37029,10 +37741,11 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
CC1 = X86::GetOppositeBranchCondition(CC1);
}
- SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8),
- Flags};
+ SDValue LOps[] = {FalseOp, TrueOp,
+ DAG.getTargetConstant(CC0, DL, MVT::i8), Flags};
SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps);
- SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags};
+ SDValue Ops[] = {LCMOV, TrueOp, DAG.getTargetConstant(CC1, DL, MVT::i8),
+ Flags};
SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
return CMOV;
}
@@ -37064,9 +37777,9 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
// This should constant fold.
SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));
- SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
- DAG.getConstant(X86::COND_NE, DL, MVT::i8),
- Cond);
+ SDValue CMov =
+ DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
+ DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8), Cond);
return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1));
}
}
@@ -37166,98 +37879,45 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
if ((NumElts % 2) != 0)
return SDValue();
- unsigned RegSize = 128;
- MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16);
EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
// Shrink the operands of mul.
SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0);
SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1);
- if (ExperimentalVectorWideningLegalization ||
- NumElts >= OpsVT.getVectorNumElements()) {
- // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
- // lower part is needed.
- SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
- if (Mode == MULU8 || Mode == MULS8)
- return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
- DL, VT, MulLo);
-
- MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
- // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
- // the higher part is also needed.
- SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
- ReducedVT, NewN0, NewN1);
-
- // Repack the lower part and higher part result of mul into a wider
- // result.
- // Generate shuffle functioning as punpcklwd.
- SmallVector<int, 16> ShuffleMask(NumElts);
- for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
- ShuffleMask[2 * i] = i;
- ShuffleMask[2 * i + 1] = i + NumElts;
- }
- SDValue ResLo =
- DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
- ResLo = DAG.getBitcast(ResVT, ResLo);
- // Generate shuffle functioning as punpckhwd.
- for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
- ShuffleMask[2 * i] = i + NumElts / 2;
- ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
- }
- SDValue ResHi =
- DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
- ResHi = DAG.getBitcast(ResVT, ResHi);
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
- }
-
- // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want
- // to legalize the mul explicitly because implicit legalization for type
- // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack
- // instructions which will not exist when we explicitly legalize it by
- // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with
- // <4 x i16> undef).
- //
- // Legalize the operands of mul.
- // FIXME: We may be able to handle non-concatenated vectors by insertion.
- unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
- if ((RegSize % ReducedSizeInBits) != 0)
- return SDValue();
-
- SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
- DAG.getUNDEF(ReducedVT));
- Ops[0] = NewN0;
- NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
- Ops[0] = NewN1;
- NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
-
- if (Mode == MULU8 || Mode == MULS8) {
- // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower
- // part is needed.
- SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
-
- // convert the type of mul result to VT.
- MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
- SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG
- : ISD::SIGN_EXTEND_VECTOR_INREG,
- DL, ResVT, Mul);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
- DAG.getIntPtrConstant(0, DL));
- }
+ // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
+ // lower part is needed.
+ SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
+ if (Mode == MULU8 || Mode == MULS8)
+ return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
+ DL, VT, MulLo);
- // Generate the lower and higher part of mul: pmulhw/pmulhuw. For
- // MULU16/MULS16, both parts are needed.
- SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
+ MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
+ // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
+ // the higher part is also needed.
SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
- OpsVT, NewN0, NewN1);
+ ReducedVT, NewN0, NewN1);
// Repack the lower part and higher part result of mul into a wider
- // result. Make sure the type of mul result is VT.
- MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
- SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi);
- Res = DAG.getBitcast(ResVT, Res);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
- DAG.getIntPtrConstant(0, DL));
+ // result.
+ // Generate shuffle functioning as punpcklwd.
+ SmallVector<int, 16> ShuffleMask(NumElts);
+ for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
+ ShuffleMask[2 * i] = i;
+ ShuffleMask[2 * i + 1] = i + NumElts;
+ }
+ SDValue ResLo =
+ DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
+ ResLo = DAG.getBitcast(ResVT, ResLo);
+ // Generate shuffle functioning as punpckhwd.
+ for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
+ ShuffleMask[2 * i] = i + NumElts / 2;
+ ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
+ }
+ SDValue ResHi =
+ DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
+ ResHi = DAG.getBitcast(ResVT, ResHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
}
static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
@@ -37365,8 +38025,7 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
// Make sure the vXi16 type is legal. This covers the AVX512 without BWI case.
// Also allow v2i32 if it will be widened.
MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements());
- if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) ||
- DAG.getTargetLoweringInfo().isTypeLegal(WVT)))
+ if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(WVT))
return SDValue();
SDValue N0 = N->getOperand(0);
@@ -37919,7 +38578,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
if (NewShiftVal >= NumBitsPerElt)
NewShiftVal = NumBitsPerElt - 1;
return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8));
+ DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8));
}
// We can decode 'whole byte' logical bit shifts as shuffles.
@@ -38039,7 +38698,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasAVX512()) {
SDValue FSetCC =
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
- DAG.getConstant(x86cc, DL, MVT::i8));
+ DAG.getTargetConstant(x86cc, DL, MVT::i8));
// Need to fill with zeros to ensure the bitcast will produce zeroes
// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1,
@@ -38048,10 +38707,9 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL,
N->getSimpleValueType(0));
}
- SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
- CMP00.getValueType(), CMP00, CMP01,
- DAG.getConstant(x86cc, DL,
- MVT::i8));
+ SDValue OnesOrZeroesF =
+ DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00,
+ CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8));
bool is64BitFP = (CMP00.getValueType() == MVT::f64);
MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
@@ -38083,34 +38741,6 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-// Match (xor X, -1) -> X.
-// Match extract_subvector(xor X, -1) -> extract_subvector(X).
-// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y).
-static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
- V = peekThroughBitcasts(V);
- if (V.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()))
- return V.getOperand(0);
- if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
- if (SDValue Not = IsNOT(V.getOperand(0), DAG)) {
- Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(),
- Not, V.getOperand(1));
- }
- }
- SmallVector<SDValue, 2> CatOps;
- if (collectConcatOps(V.getNode(), CatOps)) {
- for (SDValue &CatOp : CatOps) {
- SDValue NotCat = IsNOT(CatOp, DAG);
- if (!NotCat) return SDValue();
- CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat);
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps);
- }
- return SDValue();
-}
-
/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::AND);
@@ -38273,7 +38903,7 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
unsigned ShiftVal = SplatVal.countTrailingOnes();
- SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
+ SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift);
}
@@ -38499,7 +39129,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
// TODO: Support multiple SrcOps.
if (VT == MVT::i1) {
SmallVector<SDValue, 2> SrcOps;
- if (matchBitOpReduction(SDValue(N, 0), ISD::AND, SrcOps) &&
+ if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps) &&
SrcOps.size() == 1) {
SDLoc dl(N);
unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
@@ -38570,7 +39200,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
}
if (SDValue Shuffle = combineX86ShufflesRecursively(
- {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2,
+ {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
N->getOperand(0).getOperand(1));
@@ -38585,7 +39215,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
- EVT VT = N->getValueType(0);
+ MVT VT = N->getSimpleValueType(0);
if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0)
return SDValue();
@@ -38594,10 +39224,12 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
return SDValue();
- // On XOP we'll lower to PCMOV so accept one use, otherwise only
- // do this if either mask has multiple uses already.
- if (!(Subtarget.hasXOP() || !N0.getOperand(1).hasOneUse() ||
- !N1.getOperand(1).hasOneUse()))
+ // On XOP we'll lower to PCMOV so accept one use. With AVX512, we can use
+ // VPTERNLOG. Otherwise only do this if either mask has multiple uses already.
+ bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) ||
+ Subtarget.hasVLX();
+ if (!(Subtarget.hasXOP() || UseVPTERNLOG ||
+ !N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse()))
return SDValue();
// Attempt to extract constant byte masks.
@@ -38895,6 +39527,24 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
DAG.getBitcast(MVT::v4f32, N1)));
}
+ // Match any-of bool scalar reductions into a bitcast/movmsk + cmp.
+ // TODO: Support multiple SrcOps.
+ if (VT == MVT::i1) {
+ SmallVector<SDValue, 2> SrcOps;
+ if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) &&
+ SrcOps.size() == 1) {
+ SDLoc dl(N);
+ unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
+ EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+ SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
+ if (Mask) {
+ APInt AllBits = APInt::getNullValue(NumElts);
+ return DAG.getSetCC(dl, MVT::i1, Mask,
+ DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE);
+ }
+ }
+ }
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -39136,26 +39786,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
-/// Check if truncation with saturation form type \p SrcVT to \p DstVT
-/// is valid for the given \p Subtarget.
-static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
- const X86Subtarget &Subtarget) {
- if (!Subtarget.hasAVX512())
- return false;
-
- // FIXME: Scalar type may be supported if we move it to vector register.
- if (!SrcVT.isVector())
- return false;
-
- EVT SrcElVT = SrcVT.getScalarType();
- EVT DstElVT = DstVT.getScalarType();
- if (DstElVT != MVT::i8 && DstElVT != MVT::i16 && DstElVT != MVT::i32)
- return false;
- if (SrcVT.is512BitVector() || Subtarget.hasVLX())
- return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
- return false;
-}
-
/// Detect patterns of truncation with unsigned saturation:
///
/// 1. (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
@@ -39253,64 +39883,61 @@ static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) {
return SDValue();
}
-/// Detect a pattern of truncation with signed saturation.
-/// The types should allow to use VPMOVSS* instruction on AVX512.
-/// Return the source value to be truncated or SDValue() if the pattern was not
-/// matched.
-static SDValue detectAVX512SSatPattern(SDValue In, EVT VT,
- const X86Subtarget &Subtarget,
- const TargetLowering &TLI) {
- if (!TLI.isTypeLegal(In.getValueType()))
- return SDValue();
- if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
- return SDValue();
- return detectSSatPattern(In, VT);
-}
-
-/// Detect a pattern of truncation with saturation:
-/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
-/// The types should allow to use VPMOVUS* instruction on AVX512.
-/// Return the source value to be truncated or SDValue() if the pattern was not
-/// matched.
-static SDValue detectAVX512USatPattern(SDValue In, EVT VT, SelectionDAG &DAG,
- const SDLoc &DL,
- const X86Subtarget &Subtarget,
- const TargetLowering &TLI) {
- if (!TLI.isTypeLegal(In.getValueType()))
- return SDValue();
- if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
- return SDValue();
- return detectUSatPattern(In, VT, DAG, DL);
-}
-
static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- EVT SVT = VT.getScalarType();
+ if (!Subtarget.hasSSE2() || !VT.isVector())
+ return SDValue();
+
+ EVT SVT = VT.getVectorElementType();
EVT InVT = In.getValueType();
- EVT InSVT = InVT.getScalarType();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.isTypeLegal(InVT) && TLI.isTypeLegal(VT) &&
- isSATValidOnAVX512Subtarget(InVT, VT, Subtarget)) {
- if (auto SSatVal = detectSSatPattern(In, VT))
- return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal);
- if (auto USatVal = detectUSatPattern(In, VT, DAG, DL))
- return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
- }
- if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) &&
- !Subtarget.hasAVX512() &&
+ EVT InSVT = InVT.getVectorElementType();
+
+ // If we're clamping a signed 32-bit vector to 0-255 and the 32-bit vector is
+ // split across two registers. We can use a packusdw+perm to clamp to 0-65535
+ // and concatenate at the same time. Then we can use a final vpmovuswb to
+ // clip to 0-255.
+ if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+ InVT == MVT::v16i32 && VT == MVT::v16i8) {
+ if (auto USatVal = detectSSatPattern(In, VT, true)) {
+ // Emit a VPACKUSDW+VPERMQ followed by a VPMOVUSWB.
+ SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal,
+ DL, DAG, Subtarget);
+ assert(Mid && "Failed to pack!");
+ return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid);
+ }
+ }
+
+ // vXi32 truncate instructions are available with AVX512F.
+ // vXi16 truncate instructions are only available with AVX512BW.
+ // For 256-bit or smaller vectors, we require VLX.
+ // FIXME: We could widen truncates to 512 to remove the VLX restriction.
+ // If the result type is 256-bits or larger and we have disable 512-bit
+ // registers, we should go ahead and use the pack instructions if possible.
+ bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) ||
+ (Subtarget.hasBWI() && InSVT == MVT::i16)) &&
+ (InVT.getSizeInBits() > 128) &&
+ (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) &&
+ !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);
+
+ if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 &&
+ VT.getSizeInBits() >= 64 &&
(SVT == MVT::i8 || SVT == MVT::i16) &&
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
if (auto USatVal = detectSSatPattern(In, VT, true)) {
// vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW).
+ // Only do this when the result is at least 64 bits or we'll leaving
+ // dangling PACKSSDW nodes.
if (SVT == MVT::i8 && InSVT == MVT::i32) {
EVT MidVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
VT.getVectorNumElements());
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL,
DAG, Subtarget);
- if (Mid)
- return truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG,
- Subtarget);
+ assert(Mid && "Failed to pack!");
+ SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG,
+ Subtarget);
+ assert(V && "Failed to pack!");
+ return V;
} else if (SVT == MVT::i8 || Subtarget.hasSSE41())
return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG,
Subtarget);
@@ -39319,6 +39946,42 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG,
Subtarget);
}
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isTypeLegal(InVT) && InVT.isVector() && SVT != MVT::i1 &&
+ Subtarget.hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI())) {
+ unsigned TruncOpc;
+ SDValue SatVal;
+ if (auto SSatVal = detectSSatPattern(In, VT)) {
+ SatVal = SSatVal;
+ TruncOpc = X86ISD::VTRUNCS;
+ } else if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) {
+ SatVal = USatVal;
+ TruncOpc = X86ISD::VTRUNCUS;
+ }
+ if (SatVal) {
+ unsigned ResElts = VT.getVectorNumElements();
+ // If the input type is less than 512 bits and we don't have VLX, we need
+ // to widen to 512 bits.
+ if (!Subtarget.hasVLX() && !InVT.is512BitVector()) {
+ unsigned NumConcats = 512 / InVT.getSizeInBits();
+ ResElts *= NumConcats;
+ SmallVector<SDValue, 4> ConcatOps(NumConcats, DAG.getUNDEF(InVT));
+ ConcatOps[0] = SatVal;
+ InVT = EVT::getVectorVT(*DAG.getContext(), InSVT,
+ NumConcats * InVT.getVectorNumElements());
+ SatVal = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, ConcatOps);
+ }
+ // Widen the result if its narrower than 128 bits.
+ if (ResElts * SVT.getSizeInBits() < 128)
+ ResElts = 128 / SVT.getSizeInBits();
+ EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), SVT, ResElts);
+ SDValue Res = DAG.getNode(TruncOpc, DL, TruncVT, SatVal);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
+ }
+ }
+
return SDValue();
}
@@ -39377,7 +40040,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
return true;
};
- // Check if each element of the vector is left-shifted by one.
+ // Check if each element of the vector is right-shifted by one.
auto LHS = In.getOperand(0);
auto RHS = In.getOperand(1);
if (!IsConstVectorInRange(RHS, 1, 1))
@@ -39679,90 +40342,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
return Blend;
}
- if (Mld->getExtensionType() != ISD::EXTLOAD)
- return SDValue();
-
- // Resolve extending loads.
- EVT VT = Mld->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
- EVT LdVT = Mld->getMemoryVT();
- SDLoc dl(Mld);
-
- assert(LdVT != VT && "Cannot extend to the same type");
- unsigned ToSz = VT.getScalarSizeInBits();
- unsigned FromSz = LdVT.getScalarSizeInBits();
- // From/To sizes and ElemCount must be pow of two.
- assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
- "Unexpected size for extending masked load");
-
- unsigned SizeRatio = ToSz / FromSz;
- assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
-
- // Create a type on which we perform the shuffle.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
- LdVT.getScalarType(), NumElems*SizeRatio);
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
-
- // Convert PassThru value.
- SDValue WidePassThru = DAG.getBitcast(WideVecVT, Mld->getPassThru());
- if (!Mld->getPassThru().isUndef()) {
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i] = i * SizeRatio;
-
- // Can't shuffle using an illegal type.
- assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
- "WideVecVT should be legal");
- WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru,
- DAG.getUNDEF(WideVecVT), ShuffleVec);
- }
-
- // Prepare the new mask.
- SDValue NewMask;
- SDValue Mask = Mld->getMask();
- if (Mask.getValueType() == VT) {
- // Mask and original value have the same type.
- NewMask = DAG.getBitcast(WideVecVT, Mask);
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i] = i * SizeRatio;
- for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i)
- ShuffleVec[i] = NumElems * SizeRatio;
- NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
- DAG.getConstant(0, dl, WideVecVT),
- ShuffleVec);
- } else {
- assert(Mask.getValueType().getVectorElementType() == MVT::i1);
- unsigned WidenNumElts = NumElems*SizeRatio;
- unsigned MaskNumElts = VT.getVectorNumElements();
- EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- WidenNumElts);
-
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
- SmallVector<SDValue, 16> Ops(NumConcat, ZeroVal);
- Ops[0] = Mask;
- NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
- }
-
- SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
- Mld->getBasePtr(), NewMask, WidePassThru,
- Mld->getMemoryVT(), Mld->getMemOperand(),
- ISD::NON_EXTLOAD);
-
- SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd);
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i * SizeRatio] = i;
-
- // Can't shuffle using an illegal type.
- assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
- "WideVecVT should be legal");
- SlicedVec = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
- DAG.getUNDEF(WideVecVT), ShuffleVec);
- SlicedVec = DAG.getBitcast(VT, SlicedVec);
-
- return DCI.CombineTo(N, SlicedVec, WideLd.getValue(1), true);
+ return SDValue();
}
/// If exactly one element of the mask is set for a non-truncating masked store,
@@ -39800,123 +40380,45 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT VT = Mst->getValue().getValueType();
- EVT StVT = Mst->getMemoryVT();
SDLoc dl(Mst);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!Mst->isTruncatingStore()) {
- if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
- return ScalarStore;
-
- // If the mask value has been legalized to a non-boolean vector, try to
- // simplify ops leading up to it. We only demand the MSB of each lane.
- SDValue Mask = Mst->getMask();
- if (Mask.getScalarValueSizeInBits() != 1) {
- APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
- if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
- return SDValue(N, 0);
- }
-
- // TODO: AVX512 targets should also be able to simplify something like the
- // pattern above, but that pattern will be different. It will either need to
- // match setcc more generally or match PCMPGTM later (in tablegen?).
-
- SDValue Value = Mst->getValue();
- if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&
- TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
- Mst->getMemoryVT())) {
- return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
- Mst->getBasePtr(), Mask,
- Mst->getMemoryVT(), Mst->getMemOperand(), true);
- }
-
+ if (Mst->isTruncatingStore())
return SDValue();
- }
-
- // Resolve truncating stores.
- unsigned NumElems = VT.getVectorNumElements();
- assert(StVT != VT && "Cannot truncate to the same type");
- unsigned FromSz = VT.getScalarSizeInBits();
- unsigned ToSz = StVT.getScalarSizeInBits();
-
- // The truncating store is legal in some cases. For example
- // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
- // are designated for truncate store.
- // In this case we don't need any further transformations.
- if (TLI.isTruncStoreLegal(VT, StVT))
- return SDValue();
+ if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
+ return ScalarStore;
- // From/To sizes and ElemCount must be pow of two.
- assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
- "Unexpected size for truncating masked store");
- // We are going to use the original vector elt for storing.
- // Accumulated smaller vector elements must be a multiple of the store size.
- assert (((NumElems * FromSz) % ToSz) == 0 &&
- "Unexpected ratio for truncating masked store");
-
- unsigned SizeRatio = FromSz / ToSz;
- assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
-
- // Create a type on which we perform the shuffle.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
- StVT.getScalarType(), NumElems*SizeRatio);
-
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
-
- SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue());
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i] = i * SizeRatio;
-
- // Can't shuffle using an illegal type.
- assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
- "WideVecVT should be legal");
-
- SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
- DAG.getUNDEF(WideVecVT),
- ShuffleVec);
-
- SDValue NewMask;
+ // If the mask value has been legalized to a non-boolean vector, try to
+ // simplify ops leading up to it. We only demand the MSB of each lane.
SDValue Mask = Mst->getMask();
- if (Mask.getValueType() == VT) {
- // Mask and original value have the same type.
- NewMask = DAG.getBitcast(WideVecVT, Mask);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i] = i * SizeRatio;
- for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
- ShuffleVec[i] = NumElems*SizeRatio;
- NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
- DAG.getConstant(0, dl, WideVecVT),
- ShuffleVec);
- } else {
- assert(Mask.getValueType().getVectorElementType() == MVT::i1);
- unsigned WidenNumElts = NumElems*SizeRatio;
- unsigned MaskNumElts = VT.getVectorNumElements();
- EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- WidenNumElts);
+ if (Mask.getScalarValueSizeInBits() != 1) {
+ APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
+ return SDValue(N, 0);
+ }
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
- SmallVector<SDValue, 16> Ops(NumConcat, ZeroVal);
- Ops[0] = Mask;
- NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
+ SDValue Value = Mst->getValue();
+ if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ Mst->getMemoryVT())) {
+ return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
+ Mst->getBasePtr(), Mask,
+ Mst->getMemoryVT(), Mst->getMemOperand(), true);
}
- return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal,
- Mst->getBasePtr(), NewMask, StVT,
- Mst->getMemOperand(), false);
+ return SDValue();
}
static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
StoreSDNode *St = cast<StoreSDNode>(N);
- EVT VT = St->getValue().getValueType();
EVT StVT = St->getMemoryVT();
SDLoc dl(St);
unsigned Alignment = St->getAlignment();
- SDValue StoredVal = St->getOperand(1);
+ SDValue StoredVal = St->getValue();
+ EVT VT = StoredVal.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Convert a store of vXi1 into a store of iX and a bitcast.
@@ -39986,8 +40488,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getMemOperand()->getFlags());
}
- // If we are saving a concatenation of two XMM registers and 32-byte stores
- // are slow, such as on Sandy Bridge, perform two 16-byte stores.
+ // If we are saving a 32-byte vector and 32-byte stores are slow, such as on
+ // Sandy Bridge, perform two 16-byte stores.
bool Fast;
if (VT.is256BitVector() && StVT == VT &&
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
@@ -40026,13 +40528,24 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() &&
St->getValue().getOpcode() == ISD::TRUNCATE &&
St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
- TLI.isTruncStoreLegalOrCustom(MVT::v16i32, MVT::v16i8) &&
- !DCI.isBeforeLegalizeOps()) {
+ TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) &&
+ St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) {
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue());
return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
MVT::v16i8, St->getMemOperand());
}
+ // Try to fold a VTRUNCUS or VTRUNCS into a truncating store.
+ if (!St->isTruncatingStore() && StoredVal.hasOneUse() &&
+ (StoredVal.getOpcode() == X86ISD::VTRUNCUS ||
+ StoredVal.getOpcode() == X86ISD::VTRUNCS) &&
+ TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) {
+ bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS;
+ return EmitTruncSStore(IsSigned, St->getChain(),
+ dl, StoredVal.getOperand(0), St->getBasePtr(),
+ VT, St->getMemOperand(), DAG);
+ }
+
// Optimize trunc store (of multiple scalars) to shuffle and store.
// First, pack all of the elements in one place. Next, store to memory
// in fewer chunks.
@@ -40040,100 +40553,26 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// Check if we can detect an AVG pattern from the truncation. If yes,
// replace the trunc store by a normal store with the result of X86ISD::AVG
// instruction.
- if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG,
- Subtarget, dl))
- return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
- St->getMemOperand()->getFlags());
-
- if (SDValue Val =
- detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget,
- TLI))
- return EmitTruncSStore(true /* Signed saturation */, St->getChain(),
- dl, Val, St->getBasePtr(),
- St->getMemoryVT(), St->getMemOperand(), DAG);
- if (SDValue Val = detectAVX512USatPattern(St->getValue(), St->getMemoryVT(),
- DAG, dl, Subtarget, TLI))
- return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
- dl, Val, St->getBasePtr(),
- St->getMemoryVT(), St->getMemOperand(), DAG);
-
- unsigned NumElems = VT.getVectorNumElements();
- assert(StVT != VT && "Cannot truncate to the same type");
- unsigned FromSz = VT.getScalarSizeInBits();
- unsigned ToSz = StVT.getScalarSizeInBits();
-
- // The truncating store is legal in some cases. For example
- // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
- // are designated for truncate store.
- // In this case we don't need any further transformations.
- if (TLI.isTruncStoreLegalOrCustom(VT, StVT))
- return SDValue();
-
- // From, To sizes and ElemCount must be pow of two
- if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
- // We are going to use the original vector elt for storing.
- // Accumulated smaller vector elements must be a multiple of the store size.
- if (0 != (NumElems * FromSz) % ToSz) return SDValue();
-
- unsigned SizeRatio = FromSz / ToSz;
-
- assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
-
- // Create a type on which we perform the shuffle
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
- StVT.getScalarType(), NumElems*SizeRatio);
-
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
-
- SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue());
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i] = i * SizeRatio;
+ if (DCI.isBeforeLegalize() || TLI.isTypeLegal(St->getMemoryVT()))
+ if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG,
+ Subtarget, dl))
+ return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
- // Can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT))
- return SDValue();
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
- DAG.getUNDEF(WideVecVT),
- ShuffleVec);
- // At this point all of the data is stored at the bottom of the
- // register. We now need to save it to mem.
-
- // Find the largest store unit
- MVT StoreType = MVT::i8;
- for (MVT Tp : MVT::integer_valuetypes()) {
- if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz)
- StoreType = Tp;
- }
-
- // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
- if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 &&
- (64 <= NumElems * ToSz))
- StoreType = MVT::f64;
-
- // Bitcast the original vector into a vector of store-size units
- EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
- StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
- assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff);
- SmallVector<SDValue, 8> Chains;
- SDValue Ptr = St->getBasePtr();
-
- // Perform one or more big stores into memory.
- for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
- SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- StoreType, ShuffWide,
- DAG.getIntPtrConstant(i, dl));
- SDValue Ch =
- DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(),
- St->getAlignment(), St->getMemOperand()->getFlags());
- Ptr = DAG.getMemBasePlusOffset(Ptr, StoreType.getStoreSize(), dl);
- Chains.push_back(Ch);
+ if (TLI.isTruncStoreLegal(VT, StVT)) {
+ if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT()))
+ return EmitTruncSStore(true /* Signed saturation */, St->getChain(),
+ dl, Val, St->getBasePtr(),
+ St->getMemoryVT(), St->getMemOperand(), DAG);
+ if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(),
+ DAG, dl))
+ return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
+ dl, Val, St->getBasePtr(),
+ St->getMemoryVT(), St->getMemOperand(), DAG);
}
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ return SDValue();
}
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
@@ -40149,11 +40588,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
bool F64IsLegal =
!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
- if (((VT.isVector() && !VT.isFloatingPoint()) ||
- (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) &&
+ if ((VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit()) &&
isa<LoadSDNode>(St->getValue()) &&
- !cast<LoadSDNode>(St->getValue())->isVolatile() &&
- St->getChain().hasOneUse() && !St->isVolatile()) {
+ cast<LoadSDNode>(St->getValue())->isSimple() &&
+ St->getChain().hasOneUse() && St->isSimple()) {
LoadSDNode *Ld = cast<LoadSDNode>(St->getValue().getNode());
SmallVector<SDValue, 8> Ops;
@@ -40595,8 +41033,8 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- // Requires SSE2 but AVX512 has fast truncate.
- if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
+ // Requires SSE2.
+ if (!Subtarget.hasSSE2())
return SDValue();
if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple())
@@ -40620,6 +41058,13 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64)
return SDValue();
+ // AVX512 has fast truncate, but if the input is already going to be split,
+ // there's no harm in trying pack.
+ if (Subtarget.hasAVX512() &&
+ !(!Subtarget.useAVX512Regs() && VT.is256BitVector() &&
+ InVT.is512BitVector()))
+ return SDValue();
+
unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16);
unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;
@@ -40658,9 +41103,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
// Only handle vXi16 types that are at least 128-bits unless they will be
// widened.
- if (!VT.isVector() || VT.getVectorElementType() != MVT::i16 ||
- (!ExperimentalVectorWideningLegalization &&
- VT.getVectorNumElements() < 8))
+ if (!VT.isVector() || VT.getVectorElementType() != MVT::i16)
return SDValue();
// Input type should be vXi32.
@@ -40874,6 +41317,19 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
return combineVectorTruncation(N, DAG, Subtarget);
}
+static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDValue In = N->getOperand(0);
+ SDLoc DL(N);
+
+ if (auto SSatVal = detectSSatPattern(In, VT))
+ return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal);
+ if (auto USatVal = detectUSatPattern(In, VT, DAG, DL))
+ return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
+
+ return SDValue();
+}
+
/// Returns the negated value if the node \p N flips sign of FP value.
///
/// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000)
@@ -40883,10 +41339,14 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
/// In this case we go though all bitcasts.
/// This also recognizes splat of a negated value and returns the splat of that
/// value.
-static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
+static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) {
if (N->getOpcode() == ISD::FNEG)
return N->getOperand(0);
+ // Don't recurse exponentially.
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return SDValue();
+
unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits();
SDValue Op = peekThroughBitcasts(SDValue(N, 0));
@@ -40900,7 +41360,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
// of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here.
if (!SVOp->getOperand(1).isUndef())
return SDValue();
- if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode()))
+ if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode(), Depth + 1))
if (NegOp0.getValueType() == VT) // FIXME: Can we do better?
return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
SVOp->getMask());
@@ -40914,7 +41374,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
SDValue InsVal = Op.getOperand(1);
if (!InsVector.isUndef())
return SDValue();
- if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode()))
+ if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1))
if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
NegInsVal, Op.getOperand(2));
@@ -40951,6 +41411,57 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
return SDValue();
}
+static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
+ bool NegRes) {
+ if (NegMul) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::FMA: Opcode = X86ISD::FNMADD; break;
+ case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break;
+ case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break;
+ case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break;
+ case X86ISD::FNMADD: Opcode = ISD::FMA; break;
+ case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break;
+ case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break;
+ case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break;
+ }
+ }
+
+ if (NegAcc) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::FMA: Opcode = X86ISD::FMSUB; break;
+ case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break;
+ case X86ISD::FMSUB: Opcode = ISD::FMA; break;
+ case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
+ case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break;
+ case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break;
+ case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break;
+ case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break;
+ case X86ISD::FMADDSUB: Opcode = X86ISD::FMSUBADD; break;
+ case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break;
+ case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break;
+ case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break;
+ }
+ }
+
+ if (NegRes) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::FMA: Opcode = X86ISD::FNMSUB; break;
+ case X86ISD::FMADD_RND: Opcode = X86ISD::FNMSUB_RND; break;
+ case X86ISD::FMSUB: Opcode = X86ISD::FNMADD; break;
+ case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMADD_RND; break;
+ case X86ISD::FNMADD: Opcode = X86ISD::FMSUB; break;
+ case X86ISD::FNMADD_RND: Opcode = X86ISD::FMSUB_RND; break;
+ case X86ISD::FNMSUB: Opcode = ISD::FMA; break;
+ case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
+ }
+ }
+
+ return Opcode;
+}
+
/// Do target-specific dag combines on floating point negations.
static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -40980,29 +41491,123 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
// If we're negating an FMA node, then we can adjust the
// instruction to include the extra negation.
- unsigned NewOpcode = 0;
if (Arg.hasOneUse() && Subtarget.hasAnyFMA()) {
switch (Arg.getOpcode()) {
- case ISD::FMA: NewOpcode = X86ISD::FNMSUB; break;
- case X86ISD::FMSUB: NewOpcode = X86ISD::FNMADD; break;
- case X86ISD::FNMADD: NewOpcode = X86ISD::FMSUB; break;
- case X86ISD::FNMSUB: NewOpcode = ISD::FMA; break;
- case X86ISD::FMADD_RND: NewOpcode = X86ISD::FNMSUB_RND; break;
- case X86ISD::FMSUB_RND: NewOpcode = X86ISD::FNMADD_RND; break;
- case X86ISD::FNMADD_RND: NewOpcode = X86ISD::FMSUB_RND; break;
- case X86ISD::FNMSUB_RND: NewOpcode = X86ISD::FMADD_RND; break;
- // We can't handle scalar intrinsic node here because it would only
- // invert one element and not the whole vector. But we could try to handle
- // a negation of the lower element only.
- }
- }
- if (NewOpcode)
- return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT,
- Arg.getNode()->ops()));
+ case ISD::FMA:
+ case X86ISD::FMSUB:
+ case X86ISD::FNMADD:
+ case X86ISD::FNMSUB:
+ case X86ISD::FMADD_RND:
+ case X86ISD::FMSUB_RND:
+ case X86ISD::FNMADD_RND:
+ case X86ISD::FNMSUB_RND: {
+ // We can't handle scalar intrinsic node here because it would only
+ // invert one element and not the whole vector. But we could try to handle
+ // a negation of the lower element only.
+ unsigned NewOpcode = negateFMAOpcode(Arg.getOpcode(), false, false, true);
+ return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, Arg->ops()));
+ }
+ }
+ }
return SDValue();
}
+char X86TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations,
+ bool ForCodeSize,
+ unsigned Depth) const {
+ // fneg patterns are removable even if they have multiple uses.
+ if (isFNEG(DAG, Op.getNode(), Depth))
+ return 2;
+
+ // Don't recurse exponentially.
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return 0;
+
+ EVT VT = Op.getValueType();
+ EVT SVT = VT.getScalarType();
+ switch (Op.getOpcode()) {
+ case ISD::FMA:
+ case X86ISD::FMSUB:
+ case X86ISD::FNMADD:
+ case X86ISD::FNMSUB:
+ case X86ISD::FMADD_RND:
+ case X86ISD::FMSUB_RND:
+ case X86ISD::FNMADD_RND:
+ case X86ISD::FNMSUB_RND: {
+ if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) ||
+ !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations)
+ break;
+
+ // This is always negatible for free but we might be able to remove some
+ // extra operand negations as well.
+ for (int i = 0; i != 3; ++i) {
+ char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ if (V == 2)
+ return V;
+ }
+ return 1;
+ }
+ }
+
+ return TargetLowering::isNegatibleForFree(Op, DAG, LegalOperations,
+ ForCodeSize, Depth);
+}
+
+SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations,
+ bool ForCodeSize,
+ unsigned Depth) const {
+ // fneg patterns are removable even if they have multiple uses.
+ if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth))
+ return DAG.getBitcast(Op.getValueType(), Arg);
+
+ EVT VT = Op.getValueType();
+ EVT SVT = VT.getScalarType();
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ case ISD::FMA:
+ case X86ISD::FMSUB:
+ case X86ISD::FNMADD:
+ case X86ISD::FNMSUB:
+ case X86ISD::FMADD_RND:
+ case X86ISD::FMSUB_RND:
+ case X86ISD::FNMADD_RND:
+ case X86ISD::FNMSUB_RND: {
+ if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) ||
+ !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations)
+ break;
+
+ // This is always negatible for free but we might be able to remove some
+ // extra operand negations as well.
+ SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());
+ for (int i = 0; i != 3; ++i) {
+ char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ if (V == 2)
+ NewOps[i] = getNegatedExpression(Op.getOperand(i), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ }
+
+ bool NegA = !!NewOps[0];
+ bool NegB = !!NewOps[1];
+ bool NegC = !!NewOps[2];
+ unsigned NewOpc = negateFMAOpcode(Opc, NegA != NegB, NegC, true);
+
+ // Fill in the non-negated ops with the original values.
+ for (int i = 0, e = Op.getNumOperands(); i != e; ++i)
+ if (!NewOps[i])
+ NewOps[i] = Op.getOperand(i);
+ return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps);
+ }
+ }
+
+ return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations,
+ ForCodeSize, Depth);
+}
+
static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
@@ -41312,8 +41917,8 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
assert(InVT.is128BitVector() && "Expected 128-bit input vector");
LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
- // Unless the load is volatile.
- if (!LN->isVolatile()) {
+ // Unless the load is volatile or atomic.
+ if (LN->isSimple()) {
SDLoc dl(N);
unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
MVT MemVT = MVT::getIntegerVT(NumBits);
@@ -41347,8 +41952,8 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
assert(InVT.is128BitVector() && "Expected 128-bit input vector");
LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
- // Unless the load is volatile.
- if (!LN->isVolatile()) {
+ // Unless the load is volatile or atomic.
+ if (LN->isSimple()) {
SDLoc dl(N);
unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
MVT MemVT = MVT::getFloatingPointVT(NumBits);
@@ -41724,127 +42329,6 @@ combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(EltSizeInBits - 1, DL, VT));
}
-/// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or
-/// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating
-/// with UNDEFs) of the input to vectors of the same size as the target type
-/// which then extends the lowest elements.
-static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- if (ExperimentalVectorWideningLegalization)
- return SDValue();
-
- unsigned Opcode = N->getOpcode();
- // TODO - add ANY_EXTEND support.
- if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND)
- return SDValue();
- if (!DCI.isBeforeLegalizeOps())
- return SDValue();
- if (!Subtarget.hasSSE2())
- return SDValue();
-
- SDValue N0 = N->getOperand(0);
- EVT VT = N->getValueType(0);
- EVT SVT = VT.getScalarType();
- EVT InVT = N0.getValueType();
- EVT InSVT = InVT.getScalarType();
-
- // FIXME: Generic DAGCombiner previously had a bug that would cause a
- // sign_extend of setcc to sometimes return the original node and tricked it
- // into thinking CombineTo was used which prevented the target combines from
- // running.
- // Earlying out here to avoid regressions like this
- // (v4i32 (sext (v4i1 (setcc (v4i16)))))
- // Becomes
- // (v4i32 (sext_invec (v8i16 (concat (v4i16 (setcc (v4i16))), undef))))
- // Type legalized to
- // (v4i32 (sext_invec (v8i16 (trunc_invec (v4i32 (setcc (v4i32)))))))
- // Leading to a packssdw+pmovsxwd
- // We could write a DAG combine to fix this, but really we shouldn't be
- // creating sext_invec that's forcing v8i16 into the DAG.
- if (N0.getOpcode() == ISD::SETCC)
- return SDValue();
-
- // Input type must be a vector and we must be extending legal integer types.
- if (!VT.isVector() || VT.getVectorNumElements() < 2)
- return SDValue();
- if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
- return SDValue();
- if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
- return SDValue();
-
- // If the input/output types are both legal then we have at least AVX1 and
- // we will be able to use SIGN_EXTEND/ZERO_EXTEND directly.
- if (DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
- DAG.getTargetLoweringInfo().isTypeLegal(InVT))
- return SDValue();
-
- SDLoc DL(N);
-
- auto ExtendVecSize = [&DAG](const SDLoc &DL, SDValue N, unsigned Size) {
- EVT SrcVT = N.getValueType();
- EVT DstVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
- Size / SrcVT.getScalarSizeInBits());
- SmallVector<SDValue, 8> Opnds(Size / SrcVT.getSizeInBits(),
- DAG.getUNDEF(SrcVT));
- Opnds[0] = N;
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Opnds);
- };
-
- // If target-size is less than 128-bits, extend to a type that would extend
- // to 128 bits, extend that and extract the original target vector.
- if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits())) {
- unsigned Scale = 128 / VT.getSizeInBits();
- EVT ExVT =
- EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
- SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
- SDValue SExt = DAG.getNode(Opcode, DL, ExVT, Ex);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
- DAG.getIntPtrConstant(0, DL));
- }
-
- // If target-size is 128-bits (or 256-bits on AVX target), then convert to
- // ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT.
- // Also use this if we don't have SSE41 to allow the legalizer do its job.
- if (!Subtarget.hasSSE41() || VT.is128BitVector() ||
- (VT.is256BitVector() && Subtarget.hasAVX()) ||
- (VT.is512BitVector() && Subtarget.useAVX512Regs())) {
- SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
- Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode);
- return DAG.getNode(Opcode, DL, VT, ExOp);
- }
-
- auto SplitAndExtendInReg = [&](unsigned SplitSize) {
- unsigned NumVecs = VT.getSizeInBits() / SplitSize;
- unsigned NumSubElts = SplitSize / SVT.getSizeInBits();
- EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
- EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
-
- unsigned IROpc = getOpcode_EXTEND_VECTOR_INREG(Opcode);
- SmallVector<SDValue, 8> Opnds;
- for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
- SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
- DAG.getIntPtrConstant(Offset, DL));
- SrcVec = ExtendVecSize(DL, SrcVec, SplitSize);
- SrcVec = DAG.getNode(IROpc, DL, SubVT, SrcVec);
- Opnds.push_back(SrcVec);
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
- };
-
- // On pre-AVX targets, split into 128-bit nodes of
- // ISD::*_EXTEND_VECTOR_INREG.
- if (!Subtarget.hasAVX() && !(VT.getSizeInBits() % 128))
- return SplitAndExtendInReg(128);
-
- // On pre-AVX512 targets, split into 256-bit nodes of
- // ISD::*_EXTEND_VECTOR_INREG.
- if (!Subtarget.useAVX512Regs() && !(VT.getSizeInBits() % 256))
- return SplitAndExtendInReg(256);
-
- return SDValue();
-}
-
// Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm
// result type.
static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
@@ -41915,9 +42399,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT));
}
- if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
- return V;
-
if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
return V;
@@ -41931,45 +42412,15 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
- if (NegMul) {
- switch (Opcode) {
- default: llvm_unreachable("Unexpected opcode");
- case ISD::FMA: Opcode = X86ISD::FNMADD; break;
- case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break;
- case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break;
- case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break;
- case X86ISD::FNMADD: Opcode = ISD::FMA; break;
- case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break;
- case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break;
- case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break;
- }
- }
-
- if (NegAcc) {
- switch (Opcode) {
- default: llvm_unreachable("Unexpected opcode");
- case ISD::FMA: Opcode = X86ISD::FMSUB; break;
- case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break;
- case X86ISD::FMSUB: Opcode = ISD::FMA; break;
- case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
- case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break;
- case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break;
- case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break;
- case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break;
- }
- }
-
- return Opcode;
-}
-
static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
// Let legalize expand this if it isn't a legal type yet.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(VT))
return SDValue();
EVT ScalarVT = VT.getScalarType();
@@ -41980,17 +42431,21 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
SDValue B = N->getOperand(1);
SDValue C = N->getOperand(2);
- auto invertIfNegative = [&DAG](SDValue &V) {
- if (SDValue NegVal = isFNEG(DAG, V.getNode())) {
- V = DAG.getBitcast(V.getValueType(), NegVal);
+ auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
+ bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool LegalOperations = !DCI.isBeforeLegalizeOps();
+ if (TLI.isNegatibleForFree(V, DAG, LegalOperations, CodeSize) == 2) {
+ V = TLI.getNegatedExpression(V, DAG, LegalOperations, CodeSize);
return true;
}
// Look through extract_vector_elts. If it comes from an FNEG, create a
// new extract from the FNEG input.
if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isNullConstant(V.getOperand(1))) {
- if (SDValue NegVal = isFNEG(DAG, V.getOperand(0).getNode())) {
- NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal);
+ SDValue Vec = V.getOperand(0);
+ if (TLI.isNegatibleForFree(Vec, DAG, LegalOperations, CodeSize) == 2) {
+ SDValue NegVal =
+ TLI.getNegatedExpression(Vec, DAG, LegalOperations, CodeSize);
V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(),
NegVal, V.getOperand(1));
return true;
@@ -42009,7 +42464,8 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
if (!NegA && !NegB && !NegC)
return SDValue();
- unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
+ unsigned NewOpcode =
+ negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC, false);
if (N->getNumOperands() == 4)
return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3));
@@ -42017,33 +42473,27 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
}
// Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C)
+// Combine FMSUBADD(A, B, FNEG(C)) -> FMADDSUB(A, B, C)
static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+ TargetLowering::DAGCombinerInfo &DCI) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool LegalOperations = !DCI.isBeforeLegalizeOps();
- SDValue NegVal = isFNEG(DAG, N->getOperand(2).getNode());
- if (!NegVal)
+ SDValue N2 = N->getOperand(2);
+ if (TLI.isNegatibleForFree(N2, DAG, LegalOperations, CodeSize) != 2)
return SDValue();
- // FIXME: Should we bitcast instead?
- if (NegVal.getValueType() != VT)
- return SDValue();
-
- unsigned NewOpcode;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected opcode!");
- case X86ISD::FMADDSUB: NewOpcode = X86ISD::FMSUBADD; break;
- case X86ISD::FMADDSUB_RND: NewOpcode = X86ISD::FMSUBADD_RND; break;
- case X86ISD::FMSUBADD: NewOpcode = X86ISD::FMADDSUB; break;
- case X86ISD::FMSUBADD_RND: NewOpcode = X86ISD::FMADDSUB_RND; break;
- }
+ SDValue NegN2 = TLI.getNegatedExpression(N2, DAG, LegalOperations, CodeSize);
+ unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false);
if (N->getNumOperands() == 4)
return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1),
- NegVal, N->getOperand(3));
+ NegN2, N->getOperand(3));
return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1),
- NegVal);
+ NegN2);
}
static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
@@ -42090,9 +42540,6 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
return V;
- if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
- return V;
-
if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
return V;
@@ -42111,12 +42558,11 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
- unsigned NumSrcElts = N00.getValueType().getVectorNumElements();
unsigned NumSrcEltBits = N00.getScalarValueSizeInBits();
APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2);
if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) &&
(N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) {
- return concatSubVectors(N00, N01, VT, NumSrcElts * 2, DAG, dl, 128);
+ return concatSubVectors(N00, N01, DAG, dl);
}
}
@@ -42159,16 +42605,30 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
!IsOrXorXorCCZero)
return SDValue();
- // TODO: Use PXOR + PTEST for SSE4.1 or later?
EVT VT = SetCC->getValueType(0);
SDLoc DL(SetCC);
+ bool HasAVX = Subtarget.hasAVX();
+
+ // Use XOR (plus OR) and PTEST after SSE4.1 and before AVX512.
+ // Otherwise use PCMPEQ (plus AND) and mask testing.
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && Subtarget.hasAVX2()) ||
+ (OpSize == 256 && HasAVX) ||
(OpSize == 512 && Subtarget.useAVX512Regs())) {
- EVT VecVT = OpSize == 512 ? MVT::v16i32 :
- OpSize == 256 ? MVT::v32i8 :
- MVT::v16i8;
- EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT;
+ bool HasPT = Subtarget.hasSSE41();
+ EVT VecVT = MVT::v16i8;
+ EVT CmpVT = MVT::v16i8;
+ if (OpSize == 256)
+ VecVT = CmpVT = MVT::v32i8;
+ if (OpSize == 512) {
+ if (Subtarget.hasBWI()) {
+ VecVT = MVT::v64i8;
+ CmpVT = MVT::v64i1;
+ } else {
+ VecVT = MVT::v16i32;
+ CmpVT = MVT::v16i1;
+ }
+ }
+
SDValue Cmp;
if (IsOrXorXorCCZero) {
// This is a bitwise-combined equality comparison of 2 pairs of vectors:
@@ -42179,18 +42639,38 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1));
SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0));
SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1));
- SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
- SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ);
- Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2);
+ if (VecVT == CmpVT && HasPT) {
+ SDValue Cmp1 = DAG.getNode(ISD::XOR, DL, VecVT, A, B);
+ SDValue Cmp2 = DAG.getNode(ISD::XOR, DL, VecVT, C, D);
+ Cmp = DAG.getNode(ISD::OR, DL, VecVT, Cmp1, Cmp2);
+ } else {
+ SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+ SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ);
+ Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2);
+ }
} else {
SDValue VecX = DAG.getBitcast(VecVT, X);
SDValue VecY = DAG.getBitcast(VecVT, Y);
- Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
+ if (VecVT == CmpVT && HasPT) {
+ Cmp = DAG.getNode(ISD::XOR, DL, VecVT, VecX, VecY);
+ } else {
+ Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
+ }
}
// For 512-bits we want to emit a setcc that will lower to kortest.
- if (OpSize == 512)
- return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp),
- DAG.getConstant(0xFFFF, DL, MVT::i16), CC);
+ if (VecVT != CmpVT) {
+ EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64 : MVT::i16;
+ SDValue Mask = DAG.getAllOnesConstant(DL, KRegVT);
+ return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp), Mask, CC);
+ }
+ if (HasPT) {
+ SDValue BCCmp = DAG.getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64,
+ Cmp);
+ SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp);
+ X86::CondCode X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
+ SDValue SetCC = getSETCC(X86CC, PT, DL, DAG);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, SetCC.getValue(0));
+ }
// If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
// setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
// setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
@@ -42270,8 +42750,6 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
// go through type promotion to a 128-bit vector.
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
VT.getVectorElementType() == MVT::i1 &&
- (ExperimentalVectorWideningLegalization ||
- VT.getVectorNumElements() > 4) &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16)) {
SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS,
@@ -42289,7 +42767,8 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
SDValue Src = N->getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
MVT VT = N->getSimpleValueType(0);
@@ -42310,7 +42789,7 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
// Look through int->fp bitcasts that don't change the element width.
unsigned EltWidth = SrcVT.getScalarSizeInBits();
- if (Src.getOpcode() == ISD::BITCAST &&
+ if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::BITCAST &&
Src.getOperand(0).getScalarValueSizeInBits() == EltWidth)
return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0));
@@ -42334,71 +42813,123 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // With vector masks we only demand the upper bit of the mask.
+ SDValue Mask = cast<X86MaskedGatherScatterSDNode>(N)->getMask();
+ if (Mask.getScalarValueSizeInBits() != 1) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
+ if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
+ return SDValue(N, 0);
+ }
+
+ return SDValue();
+}
+
static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+ TargetLowering::DAGCombinerInfo &DCI) {
SDLoc DL(N);
+ auto *GorS = cast<MaskedGatherScatterSDNode>(N);
+ SDValue Chain = GorS->getChain();
+ SDValue Index = GorS->getIndex();
+ SDValue Mask = GorS->getMask();
+ SDValue Base = GorS->getBasePtr();
+ SDValue Scale = GorS->getScale();
- if (DCI.isBeforeLegalizeOps()) {
- SDValue Index = N->getOperand(4);
- // Remove any sign extends from 32 or smaller to larger than 32.
- // Only do this before LegalizeOps in case we need the sign extend for
- // legalization.
- if (Index.getOpcode() == ISD::SIGN_EXTEND) {
- if (Index.getScalarValueSizeInBits() > 32 &&
- Index.getOperand(0).getScalarValueSizeInBits() <= 32) {
- SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
- NewOps[4] = Index.getOperand(0);
- SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
- if (Res == N) {
- // The original sign extend has less users, add back to worklist in
- // case it needs to be removed
- DCI.AddToWorklist(Index.getNode());
- DCI.AddToWorklist(N);
+ if (DCI.isBeforeLegalize()) {
+ unsigned IndexWidth = Index.getScalarValueSizeInBits();
+
+ // Shrink constant indices if they are larger than 32-bits.
+ // Only do this before legalize types since v2i64 could become v2i32.
+ // FIXME: We could check that the type is legal if we're after legalize
+ // types, but then we would need to construct test cases where that happens.
+ // FIXME: We could support more than just constant vectors, but we need to
+ // careful with costing. A truncate that can be optimized out would be fine.
+ // Otherwise we might only want to create a truncate if it avoids a split.
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(Index)) {
+ if (BV->isConstant() && IndexWidth > 32 &&
+ DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) {
+ unsigned NumElts = Index.getValueType().getVectorNumElements();
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index);
+ if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
+ SDValue Ops[] = { Chain, Gather->getPassThru(),
+ Mask, Base, Index, Scale } ;
+ return DAG.getMaskedGather(Gather->getVTList(),
+ Gather->getMemoryVT(), DL, Ops,
+ Gather->getMemOperand(),
+ Gather->getIndexType());
}
- return SDValue(Res, 0);
- }
+ auto *Scatter = cast<MaskedScatterSDNode>(GorS);
+ SDValue Ops[] = { Chain, Scatter->getValue(),
+ Mask, Base, Index, Scale };
+ return DAG.getMaskedScatter(Scatter->getVTList(),
+ Scatter->getMemoryVT(), DL,
+ Ops, Scatter->getMemOperand(),
+ Scatter->getIndexType());
+ }
+ }
+
+ // Shrink any sign/zero extends from 32 or smaller to larger than 32 if
+ // there are sufficient sign bits. Only do this before legalize types to
+ // avoid creating illegal types in truncate.
+ if ((Index.getOpcode() == ISD::SIGN_EXTEND ||
+ Index.getOpcode() == ISD::ZERO_EXTEND) &&
+ IndexWidth > 32 &&
+ Index.getOperand(0).getScalarValueSizeInBits() <= 32 &&
+ DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) {
+ unsigned NumElts = Index.getValueType().getVectorNumElements();
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index);
+ if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
+ SDValue Ops[] = { Chain, Gather->getPassThru(),
+ Mask, Base, Index, Scale } ;
+ return DAG.getMaskedGather(Gather->getVTList(),
+ Gather->getMemoryVT(), DL, Ops,
+ Gather->getMemOperand(),
+ Gather->getIndexType());
+ }
+ auto *Scatter = cast<MaskedScatterSDNode>(GorS);
+ SDValue Ops[] = { Chain, Scatter->getValue(),
+ Mask, Base, Index, Scale };
+ return DAG.getMaskedScatter(Scatter->getVTList(),
+ Scatter->getMemoryVT(), DL,
+ Ops, Scatter->getMemOperand(),
+ Scatter->getIndexType());
}
+ }
+
+ if (DCI.isBeforeLegalizeOps()) {
+ unsigned IndexWidth = Index.getScalarValueSizeInBits();
// Make sure the index is either i32 or i64
- unsigned ScalarSize = Index.getScalarValueSizeInBits();
- if (ScalarSize != 32 && ScalarSize != 64) {
- MVT EltVT = ScalarSize > 32 ? MVT::i64 : MVT::i32;
+ if (IndexWidth != 32 && IndexWidth != 64) {
+ MVT EltVT = IndexWidth > 32 ? MVT::i64 : MVT::i32;
EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
Index.getValueType().getVectorNumElements());
Index = DAG.getSExtOrTrunc(Index, DL, IndexVT);
- SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
- NewOps[4] = Index;
- SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
- if (Res == N)
- DCI.AddToWorklist(N);
- return SDValue(Res, 0);
- }
-
- // Try to remove zero extends from 32->64 if we know the sign bit of
- // the input is zero.
- if (Index.getOpcode() == ISD::ZERO_EXTEND &&
- Index.getScalarValueSizeInBits() == 64 &&
- Index.getOperand(0).getScalarValueSizeInBits() == 32) {
- if (DAG.SignBitIsZero(Index.getOperand(0))) {
- SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
- NewOps[4] = Index.getOperand(0);
- SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
- if (Res == N) {
- // The original sign extend has less users, add back to worklist in
- // case it needs to be removed
- DCI.AddToWorklist(Index.getNode());
- DCI.AddToWorklist(N);
- }
- return SDValue(Res, 0);
- }
- }
- }
-
- // With AVX2 we only demand the upper bit of the mask.
- if (!Subtarget.hasAVX512()) {
+ if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
+ SDValue Ops[] = { Chain, Gather->getPassThru(),
+ Mask, Base, Index, Scale } ;
+ return DAG.getMaskedGather(Gather->getVTList(),
+ Gather->getMemoryVT(), DL, Ops,
+ Gather->getMemOperand(),
+ Gather->getIndexType());
+ }
+ auto *Scatter = cast<MaskedScatterSDNode>(GorS);
+ SDValue Ops[] = { Chain, Scatter->getValue(),
+ Mask, Base, Index, Scale };
+ return DAG.getMaskedScatter(Scatter->getVTList(),
+ Scatter->getMemoryVT(), DL,
+ Ops, Scatter->getMemOperand(),
+ Scatter->getIndexType());
+ }
+ }
+
+ // With vector masks we only demand the upper bit of the mask.
+ if (Mask.getScalarValueSizeInBits() != 1) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue Mask = N->getOperand(2);
APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
return SDValue(N, 0);
@@ -42432,7 +42963,7 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
// Make sure to not keep references to operands, as combineSetCCEFLAGS can
// RAUW them under us.
if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) {
- SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
+ SDValue Cond = DAG.getTargetConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
N->getOperand(1), Cond, Flags);
}
@@ -42549,6 +43080,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
// First try to optimize away the conversion entirely when it's
// conditionally from a constant. Vectors only.
@@ -42578,13 +43110,22 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
unsigned BitWidth = InVT.getScalarSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0);
if (NumSignBits >= (BitWidth - 31)) {
- EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32);
+ EVT TruncVT = MVT::i32;
if (InVT.isVector())
TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT,
InVT.getVectorNumElements());
SDLoc dl(N);
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
- return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
+ if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
+ }
+ // If we're after legalize and the type is v2i32 we need to shuffle and
+ // use CVTSI2P.
+ assert(InVT == MVT::v2i64 && "Unexpected VT!");
+ SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0);
+ SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast,
+ { 0, 2, -1, -1 });
+ return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf);
}
}
@@ -42604,7 +43145,7 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasDQI() && VT != MVT::f80)
return SDValue();
- if (!Ld->isVolatile() && !VT.isVector() &&
+ if (Ld->isSimple() && !VT.isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!Subtarget.is64Bit() && LdVT == MVT::i64) {
SDValue FILDChain = Subtarget.getTargetLowering()->BuildFILD(
@@ -42841,12 +43382,12 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1));
- SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
- DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getConstant(X86::COND_B, DL,
- MVT::i8),
- N->getOperand(2)),
- DAG.getConstant(1, DL, VT));
+ SDValue Res1 =
+ DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ N->getOperand(2)),
+ DAG.getConstant(1, DL, VT));
return DCI.CombineTo(N, Res1, CarryOut);
}
@@ -42906,7 +43447,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
// -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
// 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
Y.getOperand(1));
}
@@ -42924,7 +43465,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
EFLAGS.getOperand(1), EFLAGS.getOperand(0));
SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
NewEFLAGS);
}
}
@@ -42984,7 +43525,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
SDValue(Neg.getNode(), 1));
}
@@ -42997,7 +43538,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
SDValue One = DAG.getConstant(1, DL, ZVT);
SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);
return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp1);
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cmp1);
}
}
@@ -43025,9 +43566,6 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasSSE2())
return SDValue();
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
-
EVT VT = N->getValueType(0);
// If the vector size is less than 128, or greater than the supported RegSize,
@@ -43035,14 +43573,27 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
if (!VT.isVector() || VT.getVectorNumElements() < 8)
return SDValue();
- if (Op0.getOpcode() != ISD::MUL)
- std::swap(Op0, Op1);
- if (Op0.getOpcode() != ISD::MUL)
- return SDValue();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
- ShrinkMode Mode;
- if (!canReduceVMulWidth(Op0.getNode(), DAG, Mode) || Mode == MULU16)
- return SDValue();
+ auto UsePMADDWD = [&](SDValue Op) {
+ ShrinkMode Mode;
+ return Op.getOpcode() == ISD::MUL &&
+ canReduceVMulWidth(Op.getNode(), DAG, Mode) && Mode != MULU16 &&
+ (!Subtarget.hasSSE41() ||
+ (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
+ Op->isOnlyUserOf(Op.getOperand(1).getNode())));
+ };
+
+ SDValue MulOp, OtherOp;
+ if (UsePMADDWD(Op0)) {
+ MulOp = Op0;
+ OtherOp = Op1;
+ } else if (UsePMADDWD(Op1)) {
+ MulOp = Op1;
+ OtherOp = Op0;
+ } else
+ return SDValue();
SDLoc DL(N);
EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
@@ -43050,34 +43601,27 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
VT.getVectorNumElements() / 2);
+ // Shrink the operands of mul.
+ SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1));
+
// Madd vector size is half of the original vector size
auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops);
};
-
- auto BuildPMADDWD = [&](SDValue Mul) {
- // Shrink the operands of mul.
- SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(0));
- SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(1));
-
- SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 },
- PMADDWDBuilder);
- // Fill the rest of the output with 0
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd,
- DAG.getConstant(0, DL, MAddVT));
- };
-
- Op0 = BuildPMADDWD(Op0);
-
- // It's possible that Op1 is also a mul we can reduce.
- if (Op1.getOpcode() == ISD::MUL &&
- canReduceVMulWidth(Op1.getNode(), DAG, Mode) && Mode != MULU16) {
- Op1 = BuildPMADDWD(Op1);
- }
-
- return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
+ SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 },
+ PMADDWDBuilder);
+ // Fill the rest of the output with 0
+ SDValue Zero = DAG.getConstant(0, DL, Madd.getSimpleValueType());
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero);
+
+ // Preserve the reduction flag on the ADD. We may need to revisit for the
+ // other operand.
+ SDNodeFlags Flags;
+ Flags.setVectorReduction(true);
+ return DAG.getNode(ISD::ADD, DL, VT, Concat, OtherOp, Flags);
}
static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
@@ -43087,8 +43631,6 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
EVT VT = N->getValueType(0);
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
// TODO: There's nothing special about i32, any integer type above i16 should
// work just as well.
@@ -43108,80 +43650,53 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
if (VT.getSizeInBits() / 4 > RegSize)
return SDValue();
- // We know N is a reduction add, which means one of its operands is a phi.
- // To match SAD, we need the other operand to be a ABS.
- if (Op0.getOpcode() != ISD::ABS)
- std::swap(Op0, Op1);
- if (Op0.getOpcode() != ISD::ABS)
- return SDValue();
-
- auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) {
- // SAD pattern detected. Now build a SAD instruction and an addition for
- // reduction. Note that the number of elements of the result of SAD is less
- // than the number of elements of its input. Therefore, we could only update
- // part of elements in the reduction vector.
- SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget);
-
- // The output of PSADBW is a vector of i64.
- // We need to turn the vector of i64 into a vector of i32.
- // If the reduction vector is at least as wide as the psadbw result, just
- // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
- // anyway.
- MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
- if (VT.getSizeInBits() >= ResVT.getSizeInBits())
- Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
- else
- Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
-
- if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
- // Fill the upper elements with zero to match the add width.
- SDValue Zero = DAG.getConstant(0, DL, VT);
- Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad,
- DAG.getIntPtrConstant(0, DL));
- }
-
- return Sad;
- };
+ // We know N is a reduction add. To match SAD, we need one of the operands to
+ // be an ABS.
+ SDValue AbsOp = N->getOperand(0);
+ SDValue OtherOp = N->getOperand(1);
+ if (AbsOp.getOpcode() != ISD::ABS)
+ std::swap(AbsOp, OtherOp);
+ if (AbsOp.getOpcode() != ISD::ABS)
+ return SDValue();
// Check whether we have an abs-diff pattern feeding into the select.
SDValue SadOp0, SadOp1;
- if (!detectZextAbsDiff(Op0, SadOp0, SadOp1))
- return SDValue();
-
- Op0 = BuildPSADBW(SadOp0, SadOp1);
-
- // It's possible we have a sad on the other side too.
- if (Op1.getOpcode() == ISD::ABS &&
- detectZextAbsDiff(Op1, SadOp0, SadOp1)) {
- Op1 = BuildPSADBW(SadOp0, SadOp1);
- }
-
- return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
-}
-
-/// Convert vector increment or decrement to sub/add with an all-ones constant:
-/// add X, <1, 1...> --> sub X, <-1, -1...>
-/// sub X, <1, 1...> --> add X, <-1, -1...>
-/// The all-ones vector constant can be materialized using a pcmpeq instruction
-/// that is commonly recognized as an idiom (has no register dependency), so
-/// that's better/smaller than loading a splat 1 constant.
-static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
- assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
- "Unexpected opcode for increment/decrement transform");
-
- // Pseudo-legality check: getOnesVector() expects one of these types, so bail
- // out and wait for legalization if we have an unsupported vector length.
- EVT VT = N->getValueType(0);
- if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
- return SDValue();
-
- APInt SplatVal;
- if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue())
- return SDValue();
-
- SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
- unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
- return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec);
+ if(!detectZextAbsDiff(AbsOp, SadOp0, SadOp1))
+ return SDValue();
+
+ // SAD pattern detected. Now build a SAD instruction and an addition for
+ // reduction. Note that the number of elements of the result of SAD is less
+ // than the number of elements of its input. Therefore, we could only update
+ // part of elements in the reduction vector.
+ SDValue Sad = createPSADBW(DAG, SadOp0, SadOp1, DL, Subtarget);
+
+ // The output of PSADBW is a vector of i64.
+ // We need to turn the vector of i64 into a vector of i32.
+ // If the reduction vector is at least as wide as the psadbw result, just
+ // bitcast. If it's narrower which can only occur for v2i32, bits 127:16 of
+ // the PSADBW will be zero. If we promote/ narrow vectors, truncate the v2i64
+ // result to v2i32 which will be removed by type legalization. If we/ widen
+ // narrow vectors then we bitcast to v4i32 and extract v2i32.
+ MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
+ Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
+
+ if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
+ // Fill the upper elements with zero to match the add width.
+ assert(VT.getSizeInBits() % ResVT.getSizeInBits() == 0 && "Unexpected VTs");
+ unsigned NumConcats = VT.getSizeInBits() / ResVT.getSizeInBits();
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, DL, ResVT));
+ Ops[0] = Sad;
+ Sad = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
+ } else if (VT.getSizeInBits() < ResVT.getSizeInBits()) {
+ Sad = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Sad,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
+ // Preserve the reduction flag on the ADD. We may need to revisit for the
+ // other operand.
+ SDNodeFlags Flags;
+ Flags.setVectorReduction(true);
+ return DAG.getNode(ISD::ADD, DL, VT, Sad, OtherOp, Flags);
}
static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
@@ -43294,8 +43809,8 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
}
// Attempt to turn this pattern into PMADDWD.
-// (mul (add (zext (build_vector)), (zext (build_vector))),
-// (add (zext (build_vector)), (zext (build_vector)))
+// (mul (add (sext (build_vector)), (sext (build_vector))),
+// (add (sext (build_vector)), (sext (build_vector)))
static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
const SDLoc &DL, EVT VT,
const X86Subtarget &Subtarget) {
@@ -43415,6 +43930,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
}
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
const SDNodeFlags Flags = N->getFlags();
if (Flags.hasVectorReduction()) {
@@ -43445,8 +43961,29 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
HADDBuilder);
}
- if (SDValue V = combineIncDecVector(N, DAG))
- return V;
+ // If vectors of i1 are legal, turn (add (zext (vXi1 X)), Y) into
+ // (sub Y, (sext (vXi1 X))).
+ // FIXME: We have the (sub Y, (zext (vXi1 X))) -> (add (sext (vXi1 X)), Y) in
+ // generic DAG combine without a legal type check, but adding this there
+ // caused regressions.
+ if (VT.isVector()) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (Op0.getOpcode() == ISD::ZERO_EXTEND &&
+ Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
+ TLI.isTypeLegal(Op0.getOperand(0).getValueType())) {
+ SDLoc DL(N);
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt);
+ }
+
+ if (Op1.getOpcode() == ISD::ZERO_EXTEND &&
+ Op1.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
+ TLI.isTypeLegal(Op1.getOperand(0).getValueType())) {
+ SDLoc DL(N);
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt);
+ }
+ }
return combineAddOrSubToADCOrSBB(N, DAG);
}
@@ -43457,13 +43994,15 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
SDValue Op1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ if (!VT.isVector())
+ return SDValue();
+
// PSUBUS is supported, starting from SSE2, but truncation for v8i32
// is only worth it with SSSE3 (PSHUFB).
- if (!(Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) &&
+ EVT EltVT = VT.getVectorElementType();
+ if (!(Subtarget.hasSSE2() && (EltVT == MVT::i8 || EltVT == MVT::i16)) &&
!(Subtarget.hasSSSE3() && (VT == MVT::v8i32 || VT == MVT::v8i64)) &&
- !(Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)) &&
- !(Subtarget.useBWIRegs() && (VT == MVT::v64i8 || VT == MVT::v32i16 ||
- VT == MVT::v16i32 || VT == MVT::v8i64)))
+ !(Subtarget.useBWIRegs() && (VT == MVT::v16i32)))
return SDValue();
SDValue SubusLHS, SubusRHS;
@@ -43493,16 +44032,13 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
} else
return SDValue();
- auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
- return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops);
- };
-
// PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with
// special preprocessing in some cases.
- if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64)
- return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
- { SubusLHS, SubusRHS }, USUBSATBuilder);
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ return DAG.getNode(ISD::USUBSAT, SDLoc(N), VT, SubusLHS, SubusRHS);
+
+ assert((VT == MVT::v8i32 || VT == MVT::v16i32 || VT == MVT::v8i64) &&
+ "Unexpected VT!");
// Special preprocessing case can be only applied
// if the value was zero extended from 16 bit,
@@ -43531,15 +44067,16 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
SDValue NewSubusLHS =
DAG.getZExtOrTrunc(SubusLHS, SDLoc(SubusLHS), ShrinkedType);
SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType);
- SDValue Psubus =
- SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
- { NewSubusLHS, NewSubusRHS }, USUBSATBuilder);
+ SDValue Psubus = DAG.getNode(ISD::USUBSAT, SDLoc(N), ShrinkedType,
+ NewSubusLHS, NewSubusRHS);
+
// Zero extend the result, it may be used somewhere as 32 bit,
// if not zext and following trunc will shrink.
return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType);
}
static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -43576,9 +44113,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
HSUBBuilder);
}
- if (SDValue V = combineIncDecVector(N, DAG))
- return V;
-
// Try to create PSUBUS if SUB's argument is max/min
if (SDValue V = combineSubToSubus(N, DAG, Subtarget))
return V;
@@ -43712,14 +44246,6 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
}
- // If we're inserting all zeros into the upper half, change this to
- // an insert into an all zeros vector. We will match this to a move
- // with implicit upper bit zeroing during isel.
- if (Ops.size() == 2 && ISD::isBuildVectorAllZeros(Ops[1].getNode()))
- return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- getZeroVector(VT, Subtarget, DAG, DL), Ops[0],
- DAG.getIntPtrConstant(0, DL));
-
return SDValue();
}
@@ -43786,10 +44312,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
// least as large as the original insertion. Just insert the original
// subvector into a zero vector.
if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 &&
- SubVec.getConstantOperandAPInt(1) == 0 &&
+ isNullConstant(SubVec.getOperand(1)) &&
SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) {
SDValue Ins = SubVec.getOperand(0);
- if (Ins.getConstantOperandAPInt(2) == 0 &&
+ if (isNullConstant(Ins.getOperand(2)) &&
ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) &&
Ins.getOperand(1).getValueSizeInBits() <= SubVecVT.getSizeInBits())
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
@@ -43825,31 +44351,42 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
// Match concat_vector style patterns.
SmallVector<SDValue, 2> SubVectorOps;
- if (collectConcatOps(N, SubVectorOps))
+ if (collectConcatOps(N, SubVectorOps)) {
if (SDValue Fold =
combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget))
return Fold;
- // If we are inserting into both halves of the vector, the starting vector
- // should be undef. If it isn't, make it so. Only do this if the early insert
- // has no other uses.
- // TODO: Should this be a generic DAG combine?
- // TODO: Why doesn't SimplifyDemandedVectorElts catch this?
- if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
- Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
- OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 &&
- isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() &&
- Vec.hasOneUse()) {
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
- Vec.getOperand(1), Vec.getOperand(2));
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec,
- N->getOperand(2));
+ // If we're inserting all zeros into the upper half, change this to
+ // a concat with zero. We will match this to a move
+ // with implicit upper bit zeroing during isel.
+ // We do this here because we don't want combineConcatVectorOps to
+ // create INSERT_SUBVECTOR from CONCAT_VECTORS.
+ if (SubVectorOps.size() == 2 &&
+ ISD::isBuildVectorAllZeros(SubVectorOps[1].getNode()))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
+ getZeroVector(OpVT, Subtarget, DAG, dl),
+ SubVectorOps[0], DAG.getIntPtrConstant(0, dl));
}
// If this is a broadcast insert into an upper undef, use a larger broadcast.
if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST)
return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0));
+ // If this is a broadcast load inserted into an upper undef, use a larger
+ // broadcast load.
+ if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() &&
+ SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec);
+ SDVTList Tys = DAG.getVTList(OpVT, MVT::Other);
+ SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops,
+ MemIntr->getMemoryVT(),
+ MemIntr->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
+ return BcastLd;
+ }
+
return SDValue();
}
@@ -43928,12 +44465,15 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
return SDValue();
MVT VT = N->getSimpleValueType(0);
- EVT WideVecVT = N->getOperand(0).getValueType();
- SDValue WideVec = peekThroughBitcasts(N->getOperand(0));
+ SDValue InVec = N->getOperand(0);
+ SDValue InVecBC = peekThroughBitcasts(InVec);
+ EVT InVecVT = InVec.getValueType();
+ EVT InVecBCVT = InVecBC.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
if (Subtarget.hasAVX() && !Subtarget.hasAVX2() &&
- TLI.isTypeLegal(WideVecVT) &&
- WideVecVT.getSizeInBits() == 256 && WideVec.getOpcode() == ISD::AND) {
+ TLI.isTypeLegal(InVecVT) &&
+ InVecVT.getSizeInBits() == 256 && InVecBC.getOpcode() == ISD::AND) {
auto isConcatenatedNot = [] (SDValue V) {
V = peekThroughBitcasts(V);
if (!isBitwiseNot(V))
@@ -43941,12 +44481,12 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
SDValue NotOp = V->getOperand(0);
return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS;
};
- if (isConcatenatedNot(WideVec.getOperand(0)) ||
- isConcatenatedNot(WideVec.getOperand(1))) {
+ if (isConcatenatedNot(InVecBC.getOperand(0)) ||
+ isConcatenatedNot(InVecBC.getOperand(1))) {
// extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1
- SDValue Concat = split256IntArith(WideVec, DAG);
+ SDValue Concat = split256IntArith(InVecBC, DAG);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
- DAG.getBitcast(WideVecVT, Concat), N->getOperand(1));
+ DAG.getBitcast(InVecVT, Concat), N->getOperand(1));
}
}
@@ -43956,7 +44496,6 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
if (SDValue V = narrowExtractedVectorSelect(N, DAG))
return V;
- SDValue InVec = N->getOperand(0);
unsigned IdxVal = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
if (ISD::isBuildVectorAllZeros(InVec.getNode()))
@@ -43976,31 +44515,42 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
// TODO: Move this to DAGCombiner::visitEXTRACT_SUBVECTOR
- if (InVec.getOpcode() == ISD::BITCAST &&
- InVec.getOperand(0).getValueType().isVector()) {
- SDValue SrcOp = InVec.getOperand(0);
- EVT SrcVT = SrcOp.getValueType();
- unsigned SrcNumElts = SrcVT.getVectorNumElements();
- unsigned DestNumElts = InVec.getValueType().getVectorNumElements();
+ if (InVec != InVecBC && InVecBCVT.isVector()) {
+ unsigned SrcNumElts = InVecBCVT.getVectorNumElements();
+ unsigned DestNumElts = InVecVT.getVectorNumElements();
if ((DestNumElts % SrcNumElts) == 0) {
unsigned DestSrcRatio = DestNumElts / SrcNumElts;
if ((VT.getVectorNumElements() % DestSrcRatio) == 0) {
unsigned NewExtNumElts = VT.getVectorNumElements() / DestSrcRatio;
EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
- SrcVT.getScalarType(), NewExtNumElts);
+ InVecBCVT.getScalarType(), NewExtNumElts);
if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
SDLoc DL(N);
SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
- SrcOp, NewIndex);
+ InVecBC, NewIndex);
return DAG.getBitcast(VT, NewExtract);
}
}
}
}
+ // If we are extracting from an insert into a zero vector, replace with a
+ // smaller insert into zero if we don't access less than the original
+ // subvector. Don't do this for i1 vectors.
+ if (VT.getVectorElementType() != MVT::i1 &&
+ InVec.getOpcode() == ISD::INSERT_SUBVECTOR && IdxVal == 0 &&
+ InVec.hasOneUse() && isNullConstant(InVec.getOperand(2)) &&
+ ISD::isBuildVectorAllZeros(InVec.getOperand(0).getNode()) &&
+ InVec.getOperand(1).getValueSizeInBits() <= VT.getSizeInBits()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ getZeroVector(VT, Subtarget, DAG, DL),
+ InVec.getOperand(1), InVec.getOperand(2));
+ }
+
// If we're extracting from a broadcast then we're better off just
// broadcasting to the smaller type directly, assuming this is the only use.
// As its a broadcast we don't care about the extraction index.
@@ -44008,11 +44558,25 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
InVec.getOperand(0).getValueSizeInBits() <= VT.getSizeInBits())
return DAG.getNode(X86ISD::VBROADCAST, SDLoc(N), VT, InVec.getOperand(0));
+ if (InVec.getOpcode() == X86ISD::VBROADCAST_LOAD && InVec.hasOneUse()) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(InVec);
+ if (MemIntr->getMemoryVT().getSizeInBits() <= VT.getSizeInBits()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops,
+ MemIntr->getMemoryVT(),
+ MemIntr->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
+ return BcastLd;
+ }
+ }
+
// If we're extracting the lowest subvector and we're the only user,
// we may be able to perform this with a smaller vector width.
if (IdxVal == 0 && InVec.hasOneUse()) {
unsigned InOpcode = InVec.getOpcode();
- if (VT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) {
+ if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) {
// v2f64 CVTDQ2PD(v4i32).
if (InOpcode == ISD::SINT_TO_FP &&
InVec.getOperand(0).getValueType() == MVT::v4i32) {
@@ -44093,7 +44657,8 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
// Simplify PMULDQ and PMULUDQ operations.
static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -44103,23 +44668,43 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS);
// Multiply by zero.
+ // Don't return RHS as it may contain UNDEFs.
if (ISD::isBuildVectorAllZeros(RHS.getNode()))
- return RHS;
-
- // Aggressively peek through ops to get at the demanded low bits.
- APInt DemandedMask = APInt::getLowBitsSet(64, 32);
- SDValue DemandedLHS = DAG.GetDemandedBits(LHS, DemandedMask);
- SDValue DemandedRHS = DAG.GetDemandedBits(RHS, DemandedMask);
- if (DemandedLHS || DemandedRHS)
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
- DemandedLHS ? DemandedLHS : LHS,
- DemandedRHS ? DemandedRHS : RHS);
+ return DAG.getConstant(0, SDLoc(N), N->getValueType(0));
// PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI))
return SDValue(N, 0);
+ // If the input is an extend_invec and the SimplifyDemandedBits call didn't
+ // convert it to any_extend_invec, due to the LegalOperations check, do the
+ // conversion directly to a vector shuffle manually. This exposes combine
+ // opportunities missed by combineExtInVec not calling
+ // combineX86ShufflesRecursively on SSE4.1 targets.
+ // FIXME: This is basically a hack around several other issues related to
+ // ANY_EXTEND_VECTOR_INREG.
+ if (N->getValueType(0) == MVT::v2i64 && LHS.hasOneUse() &&
+ (LHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ LHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) &&
+ LHS.getOperand(0).getValueType() == MVT::v4i32) {
+ SDLoc dl(N);
+ LHS = DAG.getVectorShuffle(MVT::v4i32, dl, LHS.getOperand(0),
+ LHS.getOperand(0), { 0, -1, 1, -1 });
+ LHS = DAG.getBitcast(MVT::v2i64, LHS);
+ return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);
+ }
+ if (N->getValueType(0) == MVT::v2i64 && RHS.hasOneUse() &&
+ (RHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ RHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) &&
+ RHS.getOperand(0).getValueType() == MVT::v4i32) {
+ SDLoc dl(N);
+ RHS = DAG.getVectorShuffle(MVT::v4i32, dl, RHS.getOperand(0),
+ RHS.getOperand(0), { 0, -1, 1, -1 });
+ RHS = DAG.getBitcast(MVT::v2i64, RHS);
+ return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);
+ }
+
return SDValue();
}
@@ -44134,7 +44719,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) &&
In.hasOneUse()) {
auto *Ld = cast<LoadSDNode>(In);
- if (!Ld->isVolatile()) {
+ if (Ld->isSimple()) {
MVT SVT = In.getSimpleValueType().getVectorElementType();
ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
EVT MemVT = EVT::getVectorVT(*DAG.getContext(), SVT,
@@ -44150,17 +44735,6 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
}
}
- // Disabling for widening legalization for now. We can enable if we find a
- // case that needs it. Otherwise it can be deleted when we switch to
- // widening legalization.
- if (ExperimentalVectorWideningLegalization)
- return SDValue();
-
- // Combine (ext_invec (ext_invec X)) -> (ext_invec X)
- if (In.getOpcode() == N->getOpcode() &&
- TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getOperand(0).getValueType()))
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0));
-
// Attempt to combine as a shuffle.
// TODO: SSE41 support
if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) {
@@ -44173,6 +44747,20 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -44196,8 +44784,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
case X86ISD::CMP: return combineCMP(N, DAG);
- case ISD::ADD: return combineAdd(N, DAG, Subtarget);
- case ISD::SUB: return combineSub(N, DAG, Subtarget);
+ case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
+ case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
case X86ISD::ADD:
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
case X86ISD::SBB: return combineSBB(N, DAG);
@@ -44214,12 +44802,13 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget);
- case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget);
+ case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, DCI, Subtarget);
case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
+ case X86ISD::VTRUNC: return combineVTRUNC(N, DAG);
case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget);
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
@@ -44299,20 +44888,22 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FNMADD_RND:
case X86ISD::FNMSUB:
case X86ISD::FNMSUB_RND:
- case ISD::FMA: return combineFMA(N, DAG, Subtarget);
+ case ISD::FMA: return combineFMA(N, DAG, DCI, Subtarget);
case X86ISD::FMADDSUB_RND:
case X86ISD::FMSUBADD_RND:
case X86ISD::FMADDSUB:
- case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, Subtarget);
- case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI);
+ case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI);
+ case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget);
case X86ISD::MGATHER:
- case X86ISD::MSCATTER:
+ case X86ISD::MSCATTER: return combineX86GatherScatter(N, DAG, DCI);
case ISD::MGATHER:
- case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI, Subtarget);
+ case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI);
case X86ISD::PCMPEQ:
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
case X86ISD::PMULDQ:
- case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI);
+ case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
+ case X86ISD::KSHIFTL:
+ case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
}
return SDValue();
@@ -44660,10 +45251,11 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
case 'I':
case 'J':
case 'K':
- case 'L':
- case 'M':
case 'N':
case 'G':
+ case 'L':
+ case 'M':
+ return C_Immediate;
case 'C':
case 'e':
case 'Z':
@@ -45175,8 +45767,9 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VConstraint && Subtarget.hasVLX())
return std::make_pair(0U, &X86::FR64XRegClass);
return std::make_pair(0U, &X86::FR64RegClass);
- // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
- // Vector types.
+ // TODO: Handle i128 in FR128RegClass after it is tested well.
+ // Vector types and fp128.
+ case MVT::f128:
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
@@ -45469,7 +46062,7 @@ void X86TargetLowering::insertCopiesSplitCSR(
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
@@ -45514,3 +46107,16 @@ X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk";
return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk";
}
+
+unsigned
+X86TargetLowering::getStackProbeSize(MachineFunction &MF) const {
+ // The default stack probe size is 4096 if the function has no stackprobesize
+ // attribute.
+ unsigned StackProbeSize = 4096;
+ const Function &Fn = MF.getFunction();
+ if (Fn.hasFnAttribute("stack-probe-size"))
+ Fn.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ return StackProbeSize;
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index e0be03bc3f9d..6f7e90008de4 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
namespace llvm {
class X86Subtarget;
@@ -144,6 +143,10 @@ namespace llvm {
/// relative displacements.
WrapperRIP,
+ /// Copies a 64-bit value from an MMX vector to the low word
+ /// of an XMM vector, with the high word zero filled.
+ MOVQ2DQ,
+
/// Copies a 64-bit value from the low word of an XMM vector
/// to an MMX vector.
MOVDQ2Q,
@@ -422,7 +425,8 @@ namespace llvm {
// Tests Types Of a FP Values for scalar types.
VFPCLASSS,
- // Broadcast scalar to vector.
+ // Broadcast (splat) scalar or element 0 of a vector. If the operand is
+ // a vector, this node may change the vector length as part of the splat.
VBROADCAST,
// Broadcast mask to vector.
VBROADCASTM,
@@ -611,6 +615,9 @@ namespace llvm {
// extract_vector_elt, store.
VEXTRACT_STORE,
+ // scalar broadcast from memory
+ VBROADCAST_LOAD,
+
// Store FP control world into i16 memory.
FNSTCW16m,
@@ -680,6 +687,9 @@ namespace llvm {
bool isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
+ /// If Op is a constant whose elements are all the same constant or
+ /// undefined, return true and return the constant value in \p SplatVal.
+ bool isConstantSplat(SDValue Op, APInt &SplatVal);
} // end namespace X86
//===--------------------------------------------------------------------===//
@@ -792,6 +802,17 @@ namespace llvm {
/// and some i16 instructions are slow.
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
+ /// Return 1 if we can compute the negated form of the specified expression
+ /// for the same cost as the expression itself, or 2 if we can compute the
+ /// negated form more cheaply than the expression itself. Else return 0.
+ char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations,
+ bool ForCodeSize, unsigned Depth) const override;
+
+ /// If isNegatibleForFree returns true, return the newly negated expression.
+ SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, bool ForCodeSize,
+ unsigned Depth) const override;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
@@ -840,6 +861,13 @@ namespace llvm {
bool hasAndNot(SDValue Y) const override;
+ bool hasBitTest(SDValue X, SDValue Y) const override;
+
+ bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const override;
+
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
@@ -863,11 +891,7 @@ namespace llvm {
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
}
- bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
- if (DAG.getMachineFunction().getFunction().hasMinSize())
- return false;
- return true;
- }
+ bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
bool shouldSplatInsEltVarIndex(EVT VT) const override;
@@ -913,6 +937,10 @@ namespace llvm {
TargetLoweringOpt &TLO,
unsigned Depth) const override;
+ SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const override;
+
const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
SDValue unwrapAddress(SDValue N) const override;
@@ -1090,11 +1118,12 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
- bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
+ bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
bool convertSelectOfConstantsToMath(EVT VT) const override;
- bool decomposeMulByConstant(EVT VT, SDValue C) const override;
+ bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const override;
bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
bool IsSigned) const override;
@@ -1136,8 +1165,8 @@ namespace llvm {
return nullptr; // nothing to do, move along.
}
- unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
@@ -1189,12 +1218,18 @@ namespace llvm {
CallingConv::ID CC,
EVT VT) const override;
+ unsigned getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const override;
+
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool supportSwiftError() const override;
StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
+ unsigned getStackProbeSize(MachineFunction &MF) const;
+
bool hasVectorBlend() const override { return true; }
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
@@ -1326,6 +1361,12 @@ namespace llvm {
SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const;
SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
@@ -1372,6 +1413,9 @@ namespace llvm {
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
+ bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
+ bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
+
bool needsCmpXchgNb(Type *MemType) const;
void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
@@ -1462,6 +1506,9 @@ namespace llvm {
/// Reassociate floating point divisions into multiply by reciprocal.
unsigned combineRepeatedFPDivisors() const override;
+
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
};
namespace X86 {
@@ -1625,24 +1672,24 @@ namespace llvm {
/// mask. This is the reverse process to canWidenShuffleElements, but can
/// always succeed.
template <typename T>
- void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
+ void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask,
SmallVectorImpl<T> &ScaledMask) {
assert(0 < Scale && "Unexpected scaling factor");
size_t NumElts = Mask.size();
ScaledMask.assign(NumElts * Scale, -1);
- for (int i = 0; i != (int)NumElts; ++i) {
+ for (size_t i = 0; i != NumElts; ++i) {
int M = Mask[i];
// Repeat sentinel values in every mask element.
if (M < 0) {
- for (int s = 0; s != Scale; ++s)
+ for (size_t s = 0; s != Scale; ++s)
ScaledMask[(Scale * i) + s] = M;
continue;
}
// Scale mask element and increment across each mask element.
- for (int s = 0; s != Scale; ++s)
+ for (size_t s = 0; s != Scale; ++s)
ScaledMask[(Scale * i) + s] = (Scale * M) + s;
}
}
diff --git a/lib/Target/X86/X86IndirectBranchTracking.cpp b/lib/Target/X86/X86IndirectBranchTracking.cpp
index 04e8b2231fec..cc0f59ab329d 100644
--- a/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -84,7 +84,7 @@ bool X86IndirectBranchTrackingPass::addENDBR(
return false;
}
-bool IsCallReturnTwice(llvm::MachineOperand &MOp) {
+static bool IsCallReturnTwice(llvm::MachineOperand &MOp) {
if (!MOp.isGlobal())
return false;
auto *CalleeFn = dyn_cast<Function>(MOp.getGlobal());
diff --git a/lib/Target/X86/X86InsertPrefetch.cpp b/lib/Target/X86/X86InsertPrefetch.cpp
index 02ae73706a34..2b1e3f23efd7 100644
--- a/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/lib/Target/X86/X86InsertPrefetch.cpp
@@ -79,8 +79,8 @@ ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples,
// The prefetch instruction can't take memory operands involving vector
// registers.
bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) {
- unsigned BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg();
- unsigned IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg();
+ Register BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg();
+ Register IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg();
return (BaseReg == 0 ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) &&
@@ -108,7 +108,7 @@ bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
Prefetches &Prefetches) const {
assert(Prefetches.empty() &&
"Expected caller passed empty PrefetchInfo vector.");
- static const std::pair<const StringRef, unsigned> HintTypes[] = {
+ static constexpr std::pair<StringLiteral, unsigned> HintTypes[] = {
{"_nta_", X86::PREFETCHNTA},
{"_t0_", X86::PREFETCHT0},
{"_t1_", X86::PREFETCHT1},
@@ -173,7 +173,7 @@ bool X86InsertPrefetch::doInitialization(Module &M) {
void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
}
bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 54eddeacaa17..9b5de59430a5 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -74,6 +74,7 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
+ PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
!cast<ComplexPattern>("sse_load_f32"),
@@ -412,6 +413,14 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllOnesV))]>;
}
+let Predicates = [HasAVX512] in {
+def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
+}
+
// Alias instructions that allow VPTERNLOG to be used with a mask to create
// a mix of all ones and all zeros elements. This is done this way to force
// the same register to be used as input for all three sources.
@@ -436,6 +445,19 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
[(set VR256X:$dst, (v8i32 immAllZerosV))]>;
}
+let Predicates = [HasAVX512] in {
+def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
+}
+
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
@@ -443,7 +465,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
[(set FR32X:$dst, fp32imm0)]>;
def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
- [(set FR64X:$dst, fpimm0)]>;
+ [(set FR64X:$dst, fp64imm0)]>;
+ def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
+ [(set VR128X:$dst, fp128imm0)]>;
}
//===----------------------------------------------------------------------===//
@@ -730,14 +754,14 @@ let isCommutable = 1 in
def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
+ [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
(ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
- imm:$src3))]>,
+ timm:$src3))]>,
EVEX_4V, EVEX_CD8<32, CD8VT1>,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
@@ -1100,75 +1124,104 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
X86VectorVTInfo MaskInfo,
X86VectorVTInfo DestInfo,
X86VectorVTInfo SrcInfo,
- SDPatternOperator UnmaskedOp = X86VBroadcast> {
- let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
- defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
- (outs MaskInfo.RC:$dst),
- (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
- T8PD, EVEX, Sched<[SchedRR]>;
- let mayLoad = 1 in
- defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
- (outs MaskInfo.RC:$dst),
- (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT (UnmaskedOp
- (SrcInfo.ScalarLdFrag addr:$src))))),
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT (X86VBroadcast
- (SrcInfo.ScalarLdFrag addr:$src)))))>,
- T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
- Sched<[SchedRM]>;
- }
-
- def : Pat<(MaskInfo.VT
- (bitconvert
- (DestInfo.VT (UnmaskedOp
- (SrcInfo.VT (scalar_to_vector
- (SrcInfo.ScalarLdFrag addr:$src))))))),
- (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
- def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
+ bit IsConvertibleToThreeAddress,
+ SDPatternOperator UnmaskedOp = X86VBroadcast,
+ SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
+ let hasSideEffects = 0 in
+ def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
+ DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
+ def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
+ "${dst} {${mask}} {z}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
(bitconvert
(DestInfo.VT
- (X86VBroadcast
- (SrcInfo.VT (scalar_to_vector
- (SrcInfo.ScalarLdFrag addr:$src)))))),
- MaskInfo.RC:$src0)),
- (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
- MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
- def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
+ (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
+ MaskInfo.ImmAllZerosV))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
+ let Constraints = "$src0 = $dst" in
+ def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
+ SrcInfo.RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
+ MaskInfo.RC:$src0))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
+
+ let hasSideEffects = 0, mayLoad = 1 in
+ def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
+ (ins SrcInfo.ScalarMemOp:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (UnmaskedBcastOp addr:$src)))))],
+ DestInfo.ExeDomain>, T8PD, EVEX,
+ EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
+
+ def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
+ "${dst} {${mask}} {z}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
(bitconvert
(DestInfo.VT
- (X86VBroadcast
- (SrcInfo.VT (scalar_to_vector
- (SrcInfo.ScalarLdFrag addr:$src)))))),
- MaskInfo.ImmAllZerosV)),
- (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
- MaskInfo.KRCWM:$mask, addr:$src)>;
+ (SrcInfo.BroadcastLdFrag addr:$src)))),
+ MaskInfo.ImmAllZerosV))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
+ EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
+
+ let Constraints = "$src0 = $dst",
+ isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
+ def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
+ SrcInfo.ScalarMemOp:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (SrcInfo.BroadcastLdFrag addr:$src)))),
+ MaskInfo.RC:$src0))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
+ EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
}
// Helper class to force mask and broadcast result to same type.
multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
SchedWrite SchedRR, SchedWrite SchedRM,
X86VectorVTInfo DestInfo,
- X86VectorVTInfo SrcInfo> :
+ X86VectorVTInfo SrcInfo,
+ bit IsConvertibleToThreeAddress> :
avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
- DestInfo, DestInfo, SrcInfo>;
+ DestInfo, DestInfo, SrcInfo,
+ IsConvertibleToThreeAddress>;
multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
- WriteFShuffle256Ld, _.info512, _.info128>,
+ WriteFShuffle256Ld, _.info512, _.info128, 1>,
avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
_.info128>,
EVEX_V512;
@@ -1176,7 +1229,7 @@ multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX] in {
defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
- WriteFShuffle256Ld, _.info256, _.info128>,
+ WriteFShuffle256Ld, _.info256, _.info128, 1>,
avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
_.info128>,
EVEX_V256;
@@ -1187,7 +1240,7 @@ multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
- WriteFShuffle256Ld, _.info512, _.info128>,
+ WriteFShuffle256Ld, _.info512, _.info128, 1>,
avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
_.info128>,
EVEX_V512;
@@ -1195,12 +1248,12 @@ multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX] in {
defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
- WriteFShuffle256Ld, _.info256, _.info128>,
+ WriteFShuffle256Ld, _.info256, _.info128, 1>,
avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
_.info128>,
EVEX_V256;
defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
- WriteFShuffle256Ld, _.info128, _.info128>,
+ WriteFShuffle256Ld, _.info128, _.info128, 1>,
avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
_.info128>,
EVEX_V128;
@@ -1284,46 +1337,35 @@ defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
X86VBroadcast, GR64, HasAVX512>, VEX_W;
-// Provide aliases for broadcast from the same register class that
-// automatically does the extract.
-multiclass avx512_int_broadcast_rm_lowering<string Name,
- X86VectorVTInfo DestInfo,
- X86VectorVTInfo SrcInfo,
- X86VectorVTInfo ExtInfo> {
- def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
- (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
- (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>;
-}
-
multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _, Predicate prd> {
+ AVX512VLVectorVTInfo _, Predicate prd,
+ bit IsConvertibleToThreeAddress> {
let Predicates = [prd] in {
defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
- WriteShuffle256Ld, _.info512, _.info128>,
- avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>,
+ WriteShuffle256Ld, _.info512, _.info128,
+ IsConvertibleToThreeAddress>,
EVEX_V512;
- // Defined separately to avoid redefinition.
- defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>;
}
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
- WriteShuffle256Ld, _.info256, _.info128>,
- avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>,
+ WriteShuffle256Ld, _.info256, _.info128,
+ IsConvertibleToThreeAddress>,
EVEX_V256;
defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
- WriteShuffleXLd, _.info128, _.info128>,
+ WriteShuffleXLd, _.info128, _.info128,
+ IsConvertibleToThreeAddress>,
EVEX_V128;
}
}
defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
- avx512vl_i8_info, HasBWI>;
+ avx512vl_i8_info, HasBWI, 0>;
defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
- avx512vl_i16_info, HasBWI>;
+ avx512vl_i16_info, HasBWI, 0>;
defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
- avx512vl_i32_info, HasAVX512>;
+ avx512vl_i32_info, HasAVX512, 1>;
defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
- avx512vl_i64_info, HasAVX512>, VEX_W1X;
+ avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
@@ -1354,6 +1396,10 @@ let Predicates = [HasAVX512] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQZm addr:$src)>;
+
+ // FIXME this is to handle aligned extloads from i8.
+ def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDZm addr:$src)>;
}
let Predicates = [HasVLX] in {
@@ -1362,6 +1408,12 @@ let Predicates = [HasVLX] in {
(VPBROADCASTQZ128m addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQZ256m addr:$src)>;
+
+ // FIXME this is to handle aligned extloads from i8.
+ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDZ128m addr:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDZ256m addr:$src)>;
}
let Predicates = [HasVLX, HasBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
@@ -1382,6 +1434,12 @@ let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v16i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWZ256m addr:$src)>;
+
+ // FIXME this is to handle aligned extloads from i8.
+ def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWZ128m addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWZ256m addr:$src)>;
}
let Predicates = [HasBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
@@ -1394,6 +1452,10 @@ let Predicates = [HasBWI] in {
def : Pat<(v32i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWZm addr:$src)>;
+
+ // FIXME this is to handle aligned extloads from i8.
+ def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWZm addr:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -1629,12 +1691,12 @@ multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
let Predicates = [HasDQI] in
defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
WriteShuffle256Ld, _Dst.info512,
- _Src.info512, _Src.info128, null_frag>,
+ _Src.info512, _Src.info128, 0, null_frag, null_frag>,
EVEX_V512;
let Predicates = [HasDQI, HasVLX] in
defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
WriteShuffle256Ld, _Dst.info256,
- _Src.info256, _Src.info128, null_frag>,
+ _Src.info256, _Src.info128, 0, null_frag, null_frag>,
EVEX_V256;
}
@@ -1645,7 +1707,7 @@ multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
let Predicates = [HasDQI, HasVLX] in
defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
WriteShuffleXLd, _Dst.info128,
- _Src.info128, _Src.info128, null_frag>,
+ _Src.info128, _Src.info128, 0, null_frag, null_frag>,
EVEX_V128;
}
@@ -1654,23 +1716,6 @@ defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
avx512vl_f32_info, avx512vl_f64_info>;
-let Predicates = [HasVLX] in {
-def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
- (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
-def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
- (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
-}
-
-def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
- (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>;
-def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
- (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
-
-def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
- (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>;
-def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
- (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
-
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST MASK TO VECTOR REGISTER
//---
@@ -1730,7 +1775,7 @@ multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(_.VT (X86VPermt2 _.RC:$src2,
- IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
+ IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
AVX5128IBase, EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -1807,7 +1852,7 @@ multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86VPermt2 _.RC:$src2,
(IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
+ (_.BroadcastLdFrag addr:$src3)),
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3)>;
@@ -1846,7 +1891,7 @@ multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(_.VT (X86VPermt2 _.RC:$src1,
- IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
+ IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
AVX5128IBase, EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -1947,7 +1992,7 @@ multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
}
multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let mayLoad = 1, hasSideEffects = 0 in {
+ let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr,
@@ -2031,9 +2076,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
(OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
+ timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
@@ -2041,9 +2086,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
- imm:$cc),
+ timm:$cc),
(OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
- imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
+ timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
@@ -2052,9 +2097,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
(OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- imm:$cc),
+ timm:$cc),
(OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- imm:$cc)>,
+ timm:$cc)>,
EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
let isCodeGenOnly = 1 in {
@@ -2065,7 +2110,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
_.FRC:$src2,
- imm:$cc))]>,
+ timm:$cc))]>,
EVEX_4V, VEX_LIG, Sched<[sched]>;
def rm : AVX512Ii8<0xC2, MRMSrcMem,
(outs _.KRC:$dst),
@@ -2074,7 +2119,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2),
- imm:$cc))]>,
+ timm:$cc))]>,
EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -2100,94 +2145,82 @@ let Predicates = [HasAVX512] in {
SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
}
-multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- PatFrag OpNode_su, X86FoldableSchedWrite sched,
+multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
+ X86FoldableSchedWrite sched,
X86VectorVTInfo _, bit IsCommutable> {
- let isCommutable = IsCommutable in
+ let isCommutable = IsCommutable, hasSideEffects = 0 in
def rr : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
- EVEX_4V, Sched<[sched]>;
+ []>, EVEX_4V, Sched<[sched]>;
+ let mayLoad = 1, hasSideEffects = 0 in
def rm : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
- (_.VT (_.LdFrag addr:$src2))))]>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
- let isCommutable = IsCommutable in
+ []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ let isCommutable = IsCommutable, hasSideEffects = 0 in
def rrk : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
- [(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
- EVEX_4V, EVEX_K, Sched<[sched]>;
+ []>, EVEX_4V, EVEX_K, Sched<[sched]>;
+ let mayLoad = 1, hasSideEffects = 0 in
def rmk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
- [(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode_su (_.VT _.RC:$src1),
- (_.VT (_.LdFrag addr:$src2)))))]>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- PatFrag OpNode_su,
+multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
bit IsCommutable> :
- avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> {
+ avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
+ let mayLoad = 1, hasSideEffects = 0 in {
def rmb : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
"|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
- [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
- (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2),
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
- [(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode_su (_.VT _.RC:$src1),
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))))]>,
- EVEX_4V, EVEX_K, EVEX_B,
+ []>, EVEX_4V, EVEX_K, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
+ }
}
-multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- PatFrag OpNode_su, X86SchedWriteWidths sched,
+multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo, Predicate prd,
bit IsCommutable = 0> {
let Predicates = [prd] in
- defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
+ defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
VTInfo.info512, IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
+ defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
VTInfo.info256, IsCommutable>, EVEX_V256;
- defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
+ defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
VTInfo.info128, IsCommutable>, EVEX_V128;
}
}
multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
- PatFrag OpNode, PatFrag OpNode_su,
X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo,
Predicate prd, bit IsCommutable = 0> {
let Predicates = [prd] in
- defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
+ defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
VTInfo.info512, IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
+ defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
VTInfo.info256, IsCommutable>, EVEX_V256;
- defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
+ defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
VTInfo.info128, IsCommutable>, EVEX_V128;
}
}
@@ -2195,53 +2228,42 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
// This fragment treats X86cmpm as commutable to help match loads in both
// operands for PCMPEQ.
def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
-def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
- (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
(setcc node:$src1, node:$src2, SETGT)>;
-def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2),
- (X86pcmpeqm_c node:$src1, node:$src2), [{
- return N->hasOneUse();
-}]>;
-def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2),
- (X86pcmpgtm node:$src1, node:$src2), [{
- return N->hasOneUse();
-}]>;
-
// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
// increase the pattern complexity the way an immediate would.
let AddedComplexity = 2 in {
// FIXME: Is there a better scheduler class for VPCMP?
-defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su,
+defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
EVEX_CD8<8, CD8VF>, VEX_WIG;
-defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su,
+defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
EVEX_CD8<16, CD8VF>, VEX_WIG;
-defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su,
+defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
EVEX_CD8<32, CD8VF>;
-defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su,
+defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su,
+defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
EVEX_CD8<8, CD8VF>, VEX_WIG;
-defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su,
+defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
EVEX_CD8<16, CD8VF>, VEX_WIG;
-defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su,
+defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
EVEX_CD8<32, CD8VF>;
-defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su,
+defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
}
@@ -2322,8 +2344,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
"$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
[(set _.KRC:$dst, (_.KVT (Frag:$cc
(_.VT _.RC:$src1),
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)),
+ (_.BroadcastLdFrag addr:$src2),
cond)))]>,
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmibk : AVX512AIi8<opc, MRMSrcMem,
@@ -2335,23 +2356,21 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(_.KVT (Frag_su:$cc
(_.VT _.RC:$src1),
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)),
+ (_.BroadcastLdFrag addr:$src2),
cond))))]>,
EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
- def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
(_.VT _.RC:$src1), cond)),
(!cast<Instruction>(Name#_.ZSuffix#"rmib")
_.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
def : Pat<(and _.KRCWM:$mask,
- (_.KVT (CommFrag_su:$cc (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)),
+ (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
(_.VT _.RC:$src1), cond))),
(!cast<Instruction>(Name#_.ZSuffix#"rmibk")
_.KRCWM:$mask, _.RC:$src1, addr:$src2,
- (CommFrag.OperandTransform $cc))>;
+ (CommFrag_su.OperandTransform $cc))>;
}
multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
@@ -2496,14 +2515,19 @@ def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
return N->hasOneUse();
}]>;
+def X86cmpm_imm_commute : SDNodeXForm<timm, [{
+ uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
+ return getI8Imm(Imm, SDLoc(N));
+}]>;
+
multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
string Name> {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
- (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
- (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1>, Sched<[sched]>;
defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
@@ -2511,9 +2535,9 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
- imm:$cc),
+ timm:$cc),
(X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
- imm:$cc)>,
+ timm:$cc)>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
@@ -2523,38 +2547,37 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"$cc, ${src2}"#_.BroadcastStr#", $src1",
"$src1, ${src2}"#_.BroadcastStr#", $cc",
(X86cmpm (_.VT _.RC:$src1),
- (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- imm:$cc),
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ timm:$cc),
(X86cmpm_su (_.VT _.RC:$src1),
- (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- imm:$cc)>,
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ timm:$cc)>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Patterns for selecting with loads in other operand.
def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
- CommutableCMPCC:$cc),
+ timm:$cc),
(!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
- imm:$cc)>;
+ (X86cmpm_imm_commute timm:$cc))>;
def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
(_.VT _.RC:$src1),
- CommutableCMPCC:$cc)),
+ timm:$cc)),
(!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
_.RC:$src1, addr:$src2,
- imm:$cc)>;
+ (X86cmpm_imm_commute timm:$cc))>;
- def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
- (_.VT _.RC:$src1), CommutableCMPCC:$cc),
+ def : Pat<(X86cmpm (_.BroadcastLdFrag addr:$src2),
+ (_.VT _.RC:$src1), timm:$cc),
(!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
- imm:$cc)>;
+ (X86cmpm_imm_commute timm:$cc))>;
- def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)),
+ def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
(_.VT _.RC:$src1),
- CommutableCMPCC:$cc)),
+ timm:$cc)),
(!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
_.RC:$src1, addr:$src2,
- imm:$cc)>;
+ (X86cmpm_imm_commute timm:$cc))>;
}
multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
@@ -2564,9 +2587,9 @@ multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $cc",
- (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
(X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- imm:$cc)>,
+ timm:$cc)>,
EVEX_B, Sched<[sched]>;
}
@@ -2590,12 +2613,12 @@ defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
// Patterns to select fp compares with load as first operand.
let Predicates = [HasAVX512] in {
def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
- CommutableCMPCC:$cc)),
- (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
+ timm:$cc)),
+ (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
- CommutableCMPCC:$cc)),
- (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
+ timm:$cc)),
+ (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
}
// ----------------------------------------------------------------
@@ -2621,7 +2644,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
- (i32 imm:$src2)))]>,
+ (i32 timm:$src2)))]>,
Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
@@ -2629,7 +2652,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclasss_su (_.VT _.RC:$src1),
- (i32 imm:$src2))))]>,
+ (i32 timm:$src2))))]>,
EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
@@ -2637,7 +2660,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,
(X86Vfpclasss _.ScalarIntMemCPat:$src1,
- (i32 imm:$src2)))]>,
+ (i32 timm:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
@@ -2645,7 +2668,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
- (i32 imm:$src2))))]>,
+ (i32 timm:$src2))))]>,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2661,7 +2684,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
- (i32 imm:$src2)))]>,
+ (i32 timm:$src2)))]>,
Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
@@ -2669,7 +2692,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclass_su (_.VT _.RC:$src1),
- (i32 imm:$src2))))]>,
+ (i32 timm:$src2))))]>,
EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
@@ -2677,7 +2700,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclass
(_.VT (_.LdFrag addr:$src1)),
- (i32 imm:$src2)))]>,
+ (i32 timm:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
@@ -2685,7 +2708,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (_.LdFrag addr:$src1)),
- (i32 imm:$src2))))]>,
+ (i32 timm:$src2))))]>,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
@@ -2693,9 +2716,8 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
_.BroadcastStr##", $dst|$dst, ${src1}"
##_.BroadcastStr##", $src2}",
[(set _.KRC:$dst,(X86Vfpclass
- (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src1))),
- (i32 imm:$src2)))]>,
+ (_.VT (_.BroadcastLdFrag addr:$src1)),
+ (i32 timm:$src2)))]>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
@@ -2703,9 +2725,8 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
_.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
_.BroadcastStr##", $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
- (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src1))),
- (i32 imm:$src2))))]>,
+ (_.VT (_.BroadcastLdFrag addr:$src1)),
+ (i32 timm:$src2))))]>,
EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -2836,13 +2857,21 @@ def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
(KMOVWrk VK16:$src)>;
+def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
+ (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
(COPY_TO_REGCLASS VK16:$src, GR32)>;
+def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
+ (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
(KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
+def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
+ (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
(COPY_TO_REGCLASS VK8:$src, GR32)>;
+def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
+ (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
(COPY_TO_REGCLASS GR32:$src, VK32)>;
@@ -3075,7 +3104,7 @@ multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
!strconcat(OpcodeStr,
"\t{$imm, $src, $dst|$dst, $src, $imm}"),
- [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
+ [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
Sched<[sched]>;
}
@@ -3098,30 +3127,6 @@ defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShu
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
-multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
- string InstStr,
- X86VectorVTInfo Narrow,
- X86VectorVTInfo Wide> {
- def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2))),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(InstStr#"Zrr")
- (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
- (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
- Narrow.KRC)>;
-
- def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
- (Frag_su (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2)))),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(InstStr#"Zrrk")
- (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
- (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
- (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
- Narrow.KRC)>;
-}
-
-// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
string InstStr,
X86VectorVTInfo Narrow,
@@ -3129,7 +3134,7 @@ multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), cond)),
(COPY_TO_REGCLASS
- (!cast<Instruction>(InstStr##Zrri)
+ (!cast<Instruction>(InstStr#"Zrri")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
(Frag.OperandTransform $cc)), Narrow.KRC)>;
@@ -3138,53 +3143,111 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2),
cond)))),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
- (Frag.OperandTransform $cc)), Narrow.KRC)>;
+ (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
+}
+
+multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
+ PatFrag CommFrag, PatFrag CommFrag_su,
+ string InstStr,
+ X86VectorVTInfo Narrow,
+ X86VectorVTInfo Wide> {
+// Broadcast load.
+def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
+ (Narrow.BroadcastLdFrag addr:$src2), cond)),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(InstStr#"Zrmib")
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
+ addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
+
+def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
+ (Narrow.KVT
+ (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
+ (Narrow.BroadcastLdFrag addr:$src2),
+ cond)))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
+ (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
+ addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
+
+// Commuted with broadcast load.
+def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
+ (Narrow.VT Narrow.RC:$src1),
+ cond)),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(InstStr#"Zrmib")
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
+ addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
+
+def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
+ (Narrow.KVT
+ (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
+ (Narrow.VT Narrow.RC:$src1),
+ cond)))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
+ (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
+ addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
}
// Same as above, but for fp types which don't use PatFrags.
-multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su,
- string InstStr,
+multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
-def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2), imm:$cc)),
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT Narrow.RC:$src2), timm:$cc)),
(COPY_TO_REGCLASS
- (!cast<Instruction>(InstStr##Zrri)
+ (!cast<Instruction>(InstStr#"Zrri")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
- imm:$cc), Narrow.KRC)>;
+ timm:$cc), Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
- (OpNode_su (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2), imm:$cc))),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
+ (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT Narrow.RC:$src2), timm:$cc))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
- imm:$cc), Narrow.KRC)>;
-}
+ timm:$cc), Narrow.KRC)>;
-let Predicates = [HasAVX512, NoVLX] in {
- // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
- // increase the pattern complexity the way an immediate would.
- let AddedComplexity = 2 in {
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
+// Broadcast load.
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(InstStr#"Zrmbi")
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
+ addr:$src2, timm:$cc), Narrow.KRC)>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
+def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
+ (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
+ (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
+ addr:$src2, timm:$cc), Narrow.KRC)>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
+// Commuted with broadcast load.
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
+ (Narrow.VT Narrow.RC:$src1), timm:$cc)),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(InstStr#"Zrmbi")
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
+ addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
- }
+def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
+ (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
+ (Narrow.VT Narrow.RC:$src1), timm:$cc))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
+ (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
+ addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
+}
+let Predicates = [HasAVX512, NoVLX] in {
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
@@ -3197,29 +3260,25 @@ let Predicates = [HasAVX512, NoVLX] in {
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
- defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>;
- defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>;
- defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>;
- defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>;
-}
+ defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
-let Predicates = [HasBWI, NoVLX] in {
- // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
- // increase the pattern complexity the way an immediate would.
- let AddedComplexity = 2 in {
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>;
- }
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
+}
+let Predicates = [HasBWI, NoVLX] in {
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
@@ -4186,16 +4245,32 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
(COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
(v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
+def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
+ (COPY_TO_REGCLASS
+ (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
+ VK1WM:$mask, addr:$src)),
+ FR32X)>;
+def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
+ (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
+
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
(COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
(v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
(v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
-def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
+def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
(COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
(v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
+def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
+ (COPY_TO_REGCLASS
+ (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
+ VK1WM:$mask, addr:$src)),
+ FR64X)>;
+def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
+ (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
+
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
@@ -4537,8 +4612,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(_.VT (OpNode _.RC:$src1,
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))>,
+ (_.BroadcastLdFrag addr:$src2)))>,
AVX512BIBase, EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -4664,8 +4738,7 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
"${src2}"##_Brdct.BroadcastStr##", $src1",
"$src1, ${src2}"##_Brdct.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
- (_Brdct.VT (X86VBroadcast
- (_Brdct.ScalarLdFrag addr:$src2))))))>,
+ (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
AVX512BIBase, EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -4737,8 +4810,7 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
"${src2}"##_Src.BroadcastStr##", $src1",
"$src1, ${src2}"##_Src.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
- (_Src.VT (X86VBroadcast
- (_Src.ScalarLdFrag addr:$src2))))))>,
+ (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -4874,22 +4946,11 @@ let Predicates = [HasDQI, NoVLX] in {
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
sub_ymm)>;
-
- def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
- (EXTRACT_SUBREG
- (VPMULLQZrr
- (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
- (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
- sub_xmm)>;
-}
-
-// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
-let Predicates = [HasDQI, NoVLX] in {
- def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
+ def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
(EXTRACT_SUBREG
- (VPMULLQZrr
+ (VPMULLQZrmb
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
- (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
+ addr:$src2),
sub_ymm)>;
def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
@@ -4898,29 +4959,47 @@ let Predicates = [HasDQI, NoVLX] in {
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
sub_xmm)>;
+ def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
+ (EXTRACT_SUBREG
+ (VPMULLQZrmb
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
+ addr:$src2),
+ sub_xmm)>;
}
-multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
+multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
(EXTRACT_SUBREG
- (Instr
+ (!cast<Instruction>(Instr#"rr")
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
sub_ymm)>;
+ def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(Instr#"rmb")
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
+ addr:$src2),
+ sub_ymm)>;
def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
(EXTRACT_SUBREG
- (Instr
+ (!cast<Instruction>(Instr#"rr")
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
sub_xmm)>;
+ def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(Instr#"rmb")
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
+ addr:$src2),
+ sub_xmm)>;
}
let Predicates = [HasAVX512, NoVLX] in {
- defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
- defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
- defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
- defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
+ defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
+ defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
+ defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
+ defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
}
//===----------------------------------------------------------------------===//
@@ -4977,32 +5056,6 @@ let Predicates = [HasVLX] in {
def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
(VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
- def : Pat<(and VR128X:$src1,
- (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPANDDZ128rmb VR128X:$src1, addr:$src2)>;
- def : Pat<(or VR128X:$src1,
- (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPORDZ128rmb VR128X:$src1, addr:$src2)>;
- def : Pat<(xor VR128X:$src1,
- (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPXORDZ128rmb VR128X:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR128X:$src1,
- (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>;
-
- def : Pat<(and VR128X:$src1,
- (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPANDQZ128rmb VR128X:$src1, addr:$src2)>;
- def : Pat<(or VR128X:$src1,
- (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPORQZ128rmb VR128X:$src1, addr:$src2)>;
- def : Pat<(xor VR128X:$src1,
- (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPXORQZ128rmb VR128X:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR128X:$src1,
- (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>;
-
def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
(VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
@@ -5042,32 +5095,6 @@ let Predicates = [HasVLX] in {
(VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
(VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
-
- def : Pat<(and VR256X:$src1,
- (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPANDDZ256rmb VR256X:$src1, addr:$src2)>;
- def : Pat<(or VR256X:$src1,
- (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPORDZ256rmb VR256X:$src1, addr:$src2)>;
- def : Pat<(xor VR256X:$src1,
- (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPXORDZ256rmb VR256X:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR256X:$src1,
- (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>;
-
- def : Pat<(and VR256X:$src1,
- (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPANDQZ256rmb VR256X:$src1, addr:$src2)>;
- def : Pat<(or VR256X:$src1,
- (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPORQZ256rmb VR256X:$src1, addr:$src2)>;
- def : Pat<(xor VR256X:$src1,
- (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPXORQZ256rmb VR256X:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR256X:$src1,
- (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>;
}
let Predicates = [HasAVX512] in {
@@ -5110,32 +5137,6 @@ let Predicates = [HasAVX512] in {
(VPANDNQZrm VR512:$src1, addr:$src2)>;
def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
(VPANDNQZrm VR512:$src1, addr:$src2)>;
-
- def : Pat<(and VR512:$src1,
- (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPANDDZrmb VR512:$src1, addr:$src2)>;
- def : Pat<(or VR512:$src1,
- (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPORDZrmb VR512:$src1, addr:$src2)>;
- def : Pat<(xor VR512:$src1,
- (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPXORDZrmb VR512:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR512:$src1,
- (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
- (VPANDNDZrmb VR512:$src1, addr:$src2)>;
-
- def : Pat<(and VR512:$src1,
- (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPANDQZrmb VR512:$src1, addr:$src2)>;
- def : Pat<(or VR512:$src1,
- (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPORQZrmb VR512:$src1, addr:$src2)>;
- def : Pat<(xor VR512:$src1,
- (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPXORQZrmb VR512:$src1, addr:$src2)>;
- def : Pat<(X86andnp VR512:$src1,
- (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
- (VPANDNQZrmb VR512:$src1, addr:$src2)>;
}
// Patterns to catch vselect with different type than logic op.
@@ -5174,25 +5175,17 @@ multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
X86VectorVTInfo _,
X86VectorVTInfo IntInfo> {
// Register-broadcast logical operations.
- def : Pat<(IntInfo.VT (OpNode _.RC:$src1,
- (bitconvert (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))))),
- (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert
(IntInfo.VT (OpNode _.RC:$src1,
- (bitconvert (_.VT
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))))),
+ (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert
(IntInfo.VT (OpNode _.RC:$src1,
- (bitconvert (_.VT
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))))),
+ (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
@@ -5329,7 +5322,8 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
}
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode, SDNode VecNode, SDNode SaeNode,
- X86FoldableSchedWrite sched, bit IsCommutable> {
+ X86FoldableSchedWrite sched, bit IsCommutable,
+ string EVEX2VexOvrd> {
let ExeDomain = _.ExeDomain in {
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
@@ -5349,7 +5343,8 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
- Sched<[sched]> {
+ Sched<[sched]>,
+ EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
let isCommutable = IsCommutable;
}
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
@@ -5357,7 +5352,8 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
+ EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
}
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5387,10 +5383,12 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode VecNode, SDNode SaeNode,
X86SchedWriteSizes sched, bit IsCommutable> {
defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
- VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
+ VecNode, SaeNode, sched.PS.Scl, IsCommutable,
+ NAME#"SS">,
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
- VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
+ VecNode, SaeNode, sched.PD.Scl, IsCommutable,
+ NAME#"SD">,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
@@ -5410,13 +5408,14 @@ defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, SDNode OpNode,
- X86FoldableSchedWrite sched> {
+ X86FoldableSchedWrite sched,
+ string EVEX2VEXOvrd> {
let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
- Sched<[sched]> {
+ Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
let isCommutable = 1;
}
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
@@ -5424,24 +5423,27 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
+ EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
}
}
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
- SchedWriteFCmp.Scl>, XS, EVEX_4V,
- VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ SchedWriteFCmp.Scl, "VMINCSS">, XS,
+ EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
- SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
- VEX_LIG, EVEX_CD8<64, CD8VT1>;
+ SchedWriteFCmp.Scl, "VMINCSD">, XD,
+ VEX_W, EVEX_4V, VEX_LIG,
+ EVEX_CD8<64, CD8VT1>;
defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
- SchedWriteFCmp.Scl>, XS, EVEX_4V,
- VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ SchedWriteFCmp.Scl, "VMAXCSS">, XS,
+ EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
- SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
- VEX_LIG, EVEX_CD8<64, CD8VT1>;
+ SchedWriteFCmp.Scl, "VMAXCSD">, XD,
+ VEX_W, EVEX_4V, VEX_LIG,
+ EVEX_CD8<64, CD8VT1>;
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86VectorVTInfo _, X86FoldableSchedWrite sched,
@@ -5464,8 +5466,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
- (OpNode _.RC:$src1, (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))>,
+ (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -5595,8 +5596,7 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
- (OpNode _.RC:$src1, (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))>,
+ (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5751,13 +5751,13 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
+ (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
Sched<[sched]>;
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
- (i8 imm:$src2)))>,
+ (i8 timm:$src2)))>,
Sched<[sched.Folded]>;
}
}
@@ -5769,7 +5769,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
- (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
+ (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
EVEX_B, Sched<[sched.Folded]>;
}
@@ -5911,17 +5911,17 @@ let Predicates = [HasAVX512, NoVLX] in {
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
VR128X:$src2)), sub_xmm)>;
- def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
+ def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPSRAQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- imm:$src2)), sub_ymm)>;
+ timm:$src2)), sub_ymm)>;
- def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
+ def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPSRAQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
- imm:$src2)), sub_xmm)>;
+ timm:$src2)), sub_xmm)>;
}
//===-------------------------------------------------------------------===//
@@ -5953,8 +5953,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
- (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))))>,
+ (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -6062,27 +6061,27 @@ let Predicates = [HasAVX512, NoVLX] in {
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
sub_ymm)>;
- def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
+ def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPROLQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
- imm:$src2)), sub_xmm)>;
- def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
+ timm:$src2)), sub_xmm)>;
+ def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPROLQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- imm:$src2)), sub_ymm)>;
+ timm:$src2)), sub_ymm)>;
- def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
+ def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPROLDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
- imm:$src2)), sub_xmm)>;
- def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
+ timm:$src2)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPROLDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- imm:$src2)), sub_ymm)>;
+ timm:$src2)), sub_ymm)>;
}
// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
@@ -6113,27 +6112,27 @@ let Predicates = [HasAVX512, NoVLX] in {
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
sub_ymm)>;
- def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
+ def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPRORQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
- imm:$src2)), sub_xmm)>;
- def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
+ timm:$src2)), sub_xmm)>;
+ def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPRORQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- imm:$src2)), sub_ymm)>;
+ timm:$src2)), sub_ymm)>;
- def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
+ def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPRORDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
- imm:$src2)), sub_xmm)>;
- def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
+ timm:$src2)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPRORDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- imm:$src2)), sub_ymm)>;
+ timm:$src2)), sub_ymm)>;
}
//===-------------------------------------------------------------------===//
@@ -6228,8 +6227,7 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
"$src1, ${src2}"##_.BroadcastStr,
(_.VT (OpNode
_.RC:$src1,
- (Ctrl.VT (X86VBroadcast
- (Ctrl.ScalarLdFrag addr:$src2)))))>,
+ (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -6419,7 +6417,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src2,
- _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
+ _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6493,7 +6491,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
"$src2, ${src3}"##_.BroadcastStr,
(_.VT (OpNode _.RC:$src2,
- (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+ (_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -6571,7 +6569,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
"$src2, ${src3}"##_.BroadcastStr,
- (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+ (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2)), 1, 0>,
AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -6964,7 +6962,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src2,
- (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
+ (_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1)>,
AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -7504,14 +7502,13 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
OpcodeStr,
"${src}"##Broadcast, "${src}"##Broadcast,
(_.VT (OpNode (_Src.VT
- (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
+ (_Src.BroadcastLdFrag addr:$src))
)),
(vselect MaskRC:$mask,
(_.VT
(OpNode
(_Src.VT
- (X86VBroadcast
- (_Src.ScalarLdFrag addr:$src))))),
+ (_Src.BroadcastLdFrag addr:$src)))),
_.RC:$src0),
vselect, "$src0 = $dst">,
EVEX, EVEX_B, Sched<[sched.Folded]>;
@@ -7646,14 +7643,14 @@ let Predicates = [HasAVX512] in {
v8f32x_info.ImmAllZerosV),
(VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
- def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))),
+ def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))),
(VCVTPD2PSZrmb addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
+ (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
(v8f32 VR256X:$src0)),
(VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
+ (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
v8f32x_info.ImmAllZerosV),
(VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
}
@@ -7677,14 +7674,14 @@ let Predicates = [HasVLX] in {
v4f32x_info.ImmAllZerosV),
(VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
- def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+ def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
(VCVTPD2PSZ256rmb addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
- (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
VR128X:$src0),
(VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
- (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
v4f32x_info.ImmAllZerosV),
(VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
@@ -7708,12 +7705,12 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
+ def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
(VCVTPD2PSZ128rmb addr:$src)>;
- def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
(v4f32 VR128X:$src0), VK2WM:$mask),
(VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
v4f32x_info.ImmAllZerosV, VK2WM:$mask),
(VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
}
@@ -8194,12 +8191,12 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
(VCVTPD2DQZ128rmb addr:$src)>;
- def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
(v4i32 VR128X:$src0), VK2WM:$mask),
(VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
v4i32x_info.ImmAllZerosV, VK2WM:$mask),
(VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
@@ -8223,12 +8220,12 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
(VCVTTPD2DQZ128rmb addr:$src)>;
- def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
(v4i32 VR128X:$src0), VK2WM:$mask),
(VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
v4i32x_info.ImmAllZerosV, VK2WM:$mask),
(VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
@@ -8252,12 +8249,12 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
(VCVTPD2UDQZ128rmb addr:$src)>;
- def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
(v4i32 VR128X:$src0), VK2WM:$mask),
(VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
v4i32x_info.ImmAllZerosV, VK2WM:$mask),
(VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
@@ -8281,12 +8278,12 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
(VCVTTPD2UDQZ128rmb addr:$src)>;
- def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
(v4i32 VR128X:$src0), VK2WM:$mask),
(VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
v4i32x_info.ImmAllZerosV, VK2WM:$mask),
(VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
}
@@ -8419,12 +8416,12 @@ let Predicates = [HasDQI, HasVLX] in {
VK2WM:$mask),
(VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
+ def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
(VCVTQQ2PSZ128rmb addr:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
(v4f32 VR128X:$src0), VK2WM:$mask),
(VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
v4f32x_info.ImmAllZerosV, VK2WM:$mask),
(VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
@@ -8448,12 +8445,12 @@ let Predicates = [HasDQI, HasVLX] in {
VK2WM:$mask),
(VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
+ def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
(VCVTUQQ2PSZ128rmb addr:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
(v4f32 VR128X:$src0), VK2WM:$mask),
(VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
v4f32x_info.ImmAllZerosV, VK2WM:$mask),
(VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
}
@@ -8576,21 +8573,21 @@ let ExeDomain = GenericDomain in {
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _dest.RC:$dst,
- (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
+ (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
Sched<[RR]>;
let Constraints = "$src0 = $dst" in
def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
(ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _dest.RC:$dst,
- (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
+ (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
_dest.RC:$src0, _src.KRCWM:$mask))]>,
Sched<[RR]>, EVEX_K;
def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
(ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
[(set _dest.RC:$dst,
- (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
+ (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
_dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
Sched<[RR]>, EVEX_KZ;
let hasSideEffects = 0, mayStore = 1 in {
@@ -8631,17 +8628,17 @@ let Predicates = [HasAVX512] in {
}
def : Pat<(store (f64 (extractelt
- (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
+ (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
(iPTR 0))), addr:$dst),
- (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
+ (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
def : Pat<(store (i64 (extractelt
- (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
+ (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
(iPTR 0))), addr:$dst),
- (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
- def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
- (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
- def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
- (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
+ (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
+ def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
+ (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
+ def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
+ (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
}
// Patterns for matching conversions from float to half-float and vice versa.
@@ -8765,7 +8762,7 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.VT
- (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+ (_.BroadcastLdFrag addr:$src)))>,
EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8859,7 +8856,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.VT
- (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+ (_.BroadcastLdFrag addr:$src)))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8940,7 +8937,7 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(fsqrt (_.VT
- (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+ (_.BroadcastLdFrag addr:$src)))>,
EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9049,14 +9046,14 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 imm:$src3)))>,
+ (i32 timm:$src3)))>,
Sched<[sched]>;
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
(_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 imm:$src3)))>, EVEX_B,
+ (i32 timm:$src3)))>, EVEX_B,
Sched<[sched]>;
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -9064,7 +9061,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales _.RC:$src1,
- _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
+ _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
@@ -9082,15 +9079,15 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
}
let Predicates = [HasAVX512] in {
- def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
+ def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
- _.FRC:$src1, imm:$src2))>;
+ _.FRC:$src1, timm:$src2))>;
}
let Predicates = [HasAVX512, OptForSize] in {
- def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
+ def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
- addr:$src1, imm:$src2))>;
+ addr:$src1, timm:$src2))>;
}
}
@@ -10109,19 +10106,19 @@ multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNo
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
- (i32 imm:$src2))>, Sched<[sched]>;
+ (i32 timm:$src2))>, Sched<[sched]>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
- (i32 imm:$src2))>,
+ (i32 timm:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr##", $src2",
- (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
- (i32 imm:$src2))>, EVEX_B,
+ (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
+ (i32 timm:$src2))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10136,7 +10133,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
"$src1, {sae}, $src2",
(OpNode (_.VT _.RC:$src1),
- (i32 imm:$src2))>,
+ (i32 timm:$src2))>,
EVEX_B, Sched<[sched]>;
}
@@ -10169,22 +10166,22 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i32 imm:$src3))>,
+ (i32 timm:$src3))>,
Sched<[sched]>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
- (i32 imm:$src3))>,
+ (i32 timm:$src3))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1),
- (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- (i32 imm:$src3))>, EVEX_B,
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ (i32 timm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10200,7 +10197,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT SrcInfo.RC:$src2),
- (i8 imm:$src3)))>,
+ (i8 timm:$src3)))>,
Sched<[sched]>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
@@ -10208,7 +10205,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
- (i8 imm:$src3)))>,
+ (i8 timm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10226,8 +10223,8 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1),
- (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- (i8 imm:$src3))>, EVEX_B,
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ (i8 timm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -10241,15 +10238,14 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i32 imm:$src3))>,
+ (i32 timm:$src3))>,
Sched<[sched]>;
defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
- (_.VT (scalar_to_vector
- (_.ScalarLdFrag addr:$src2))),
- (i32 imm:$src3))>,
+ (_.VT _.ScalarIntMemCPat:$src2),
+ (i32 timm:$src3))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10265,7 +10261,7 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
"$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i32 imm:$src3))>,
+ (i32 timm:$src3))>,
EVEX_B, Sched<[sched]>;
}
@@ -10279,7 +10275,7 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode
"$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i32 imm:$src3))>,
+ (i32 timm:$src3))>,
EVEX_B, Sched<[sched]>;
}
@@ -10401,7 +10397,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (bitconvert
(CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
- (i8 imm:$src3)))))>,
+ (i8 timm:$src3)))))>,
Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
@@ -10410,7 +10406,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
(bitconvert
(CastInfo.VT (X86Shuf128 _.RC:$src1,
(CastInfo.LdFrag addr:$src2),
- (i8 imm:$src3)))))>,
+ (i8 timm:$src3)))))>,
Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -10421,8 +10417,8 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
(bitconvert
(CastInfo.VT
(X86Shuf128 _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
- (i8 imm:$src3)))))>, EVEX_B,
+ (_.BroadcastLdFrag addr:$src2),
+ (i8 timm:$src3)))))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10491,14 +10487,14 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
+ (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86VAlign _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)),
- (i8 imm:$src3)))>,
+ (i8 timm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<"VPALIGNRrmi">;
@@ -10507,8 +10503,8 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(X86VAlign _.RC:$src1,
- (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- (i8 imm:$src3))>, EVEX_B,
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ (i8 timm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10541,13 +10537,13 @@ defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
// Fragments to help convert valignq into masked valignd. Or valignq/valignd
// into vpalignr.
-def ValignqImm32XForm : SDNodeXForm<imm, [{
+def ValignqImm32XForm : SDNodeXForm<timm, [{
return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
}]>;
-def ValignqImm8XForm : SDNodeXForm<imm, [{
+def ValignqImm8XForm : SDNodeXForm<timm, [{
return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
}]>;
-def ValigndImm8XForm : SDNodeXForm<imm, [{
+def ValigndImm8XForm : SDNodeXForm<timm, [{
return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
}]>;
@@ -10557,40 +10553,40 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1, From.RC:$src2,
- imm:$src3))),
+ timm:$src3))),
To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, To.RC:$src2,
- (ImmXForm imm:$src3))>;
+ (ImmXForm timm:$src3))>;
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1, From.RC:$src2,
- imm:$src3))),
+ timm:$src3))),
To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
To.RC:$src1, To.RC:$src2,
- (ImmXForm imm:$src3))>;
+ (ImmXForm timm:$src3))>;
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1,
(From.LdFrag addr:$src2),
- imm:$src3))),
+ timm:$src3))),
To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
- (ImmXForm imm:$src3))>;
+ (ImmXForm timm:$src3))>;
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1,
(From.LdFrag addr:$src2),
- imm:$src3))),
+ timm:$src3))),
To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
- (ImmXForm imm:$src3))>;
+ (ImmXForm timm:$src3))>;
}
multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
@@ -10599,35 +10595,32 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
SDNodeXForm ImmXForm> :
avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
def : Pat<(From.VT (OpNode From.RC:$src1,
- (bitconvert (To.VT (X86VBroadcast
- (To.ScalarLdFrag addr:$src2)))),
- imm:$src3)),
+ (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
+ timm:$src3)),
(!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
- (ImmXForm imm:$src3))>;
+ (ImmXForm timm:$src3))>;
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1,
(bitconvert
- (To.VT (X86VBroadcast
- (To.ScalarLdFrag addr:$src2)))),
- imm:$src3))),
+ (To.VT (To.BroadcastLdFrag addr:$src2))),
+ timm:$src3))),
To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
- (ImmXForm imm:$src3))>;
+ (ImmXForm timm:$src3))>;
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1,
(bitconvert
- (To.VT (X86VBroadcast
- (To.ScalarLdFrag addr:$src2)))),
- imm:$src3))),
+ (To.VT (To.BroadcastLdFrag addr:$src2))),
+ timm:$src3))),
To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
- (ImmXForm imm:$src3))>;
+ (ImmXForm timm:$src3))>;
}
let Predicates = [HasAVX512] in {
@@ -10666,13 +10659,13 @@ multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr,
"$src1", "$src1",
- (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
+ (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1), OpcodeStr,
"$src1", "$src1",
- (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
+ (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded]>;
}
@@ -10685,8 +10678,7 @@ multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.ScalarMemOp:$src1), OpcodeStr,
"${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr,
- (_.VT (OpNode (X86VBroadcast
- (_.ScalarLdFrag addr:$src1))))>,
+ (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded]>;
}
@@ -10770,7 +10762,7 @@ let Predicates = [HasAVX512, NoVLX] in {
multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
AVX512VLVectorVTInfo _, Predicate prd> {
let Predicates = [prd, NoVLX] in {
- def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
+ def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
(EXTRACT_SUBREG
(!cast<Instruction>(InstrStr # "Zrr")
(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
@@ -10778,7 +10770,7 @@ multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
_.info256.SubRegIdx)),
_.info256.SubRegIdx)>;
- def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
+ def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
(EXTRACT_SUBREG
(!cast<Instruction>(InstrStr # "Zrr")
(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
@@ -10829,17 +10821,16 @@ defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
// AVX-512 - MOVDDUP
//===----------------------------------------------------------------------===//
-multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
- (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
+ (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
- (_.VT (OpNode (_.VT (scalar_to_vector
- (_.ScalarLdFrag addr:$src)))))>,
+ (_.VT (_.BroadcastLdFrag addr:$src))>,
EVEX, EVEX_CD8<_.EltSize, CD8VH>,
Sched<[sched.Folded]>;
}
@@ -10853,7 +10844,7 @@ multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
VTInfo.info256>, EVEX_V256;
- defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
+ defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
VTInfo.info128>, EVEX_V128;
}
}
@@ -10867,11 +10858,9 @@ multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
let Predicates = [HasVLX] in {
-def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
- (VMOVDDUPZ128rm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
(VMOVDDUPZ128rm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
(VMOVDDUPZ128rm addr:$src)>;
@@ -10884,17 +10873,17 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
immAllZerosV),
(VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
immAllZerosV),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
immAllZerosV),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}
@@ -11070,14 +11059,14 @@ multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
def rr : AVX512<opc, MRMr,
(outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
+ [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
Sched<[sched]>;
def rm : AVX512<opc, MRMm,
(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
- (i8 imm:$src2))))]>,
+ (i8 timm:$src2))))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -11104,6 +11093,7 @@ defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
string OpcodeStr, X86FoldableSchedWrite sched,
X86VectorVTInfo _dst, X86VectorVTInfo _src> {
+ let isCommutable = 1 in
def rr : AVX512BI<opc, MRMSrcReg,
(outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -11140,7 +11130,7 @@ defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
// Transforms to swizzle an immediate to enable better matching when
// memory operand isn't in the right place.
-def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
+def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
// Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
uint8_t Imm = N->getZExtValue();
// Swap bits 1/4 and 3/6.
@@ -11151,7 +11141,7 @@ def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
if (Imm & 0x40) NewImm |= 0x08;
return getI8Imm(NewImm, SDLoc(N));
}]>;
-def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
+def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
// Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
uint8_t Imm = N->getZExtValue();
// Swap bits 2/4 and 3/5.
@@ -11162,7 +11152,7 @@ def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
if (Imm & 0x20) NewImm |= 0x08;
return getI8Imm(NewImm, SDLoc(N));
}]>;
-def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
+def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
// Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
uint8_t Imm = N->getZExtValue();
// Swap bits 1/2 and 5/6.
@@ -11173,7 +11163,7 @@ def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
if (Imm & 0x40) NewImm |= 0x20;
return getI8Imm(NewImm, SDLoc(N));
}]>;
-def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
+def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
// Convert a VPTERNLOG immediate by moving operand 1 to the end.
uint8_t Imm = N->getZExtValue();
// Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
@@ -11186,7 +11176,7 @@ def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
if (Imm & 0x40) NewImm |= 0x20;
return getI8Imm(NewImm, SDLoc(N));
}]>;
-def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
+def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
// Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
uint8_t Imm = N->getZExtValue();
// Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
@@ -11210,7 +11200,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT _.RC:$src3),
- (i8 imm:$src4)), 1, 1>,
+ (i8 timm:$src4)), 1, 1>,
AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
@@ -11218,7 +11208,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (bitconvert (_.LdFrag addr:$src3))),
- (i8 imm:$src4)), 1, 0>,
+ (i8 timm:$src4)), 1, 0>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -11227,146 +11217,145 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src2, ${src3}"##_.BroadcastStr##", $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
- (i8 imm:$src4)), 1, 0>, EVEX_B,
+ (_.VT (_.BroadcastLdFrag addr:$src3)),
+ (i8 timm:$src4)), 1, 0>, EVEX_B,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}// Constraints = "$src1 = $dst"
// Additional patterns for matching passthru operand in other positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
+ (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
+ _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
// Additional patterns for matching loads in other positions.
def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
+ _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(_.VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src2, (i8 imm:$src4))),
+ _.RC:$src2, (i8 timm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+ addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
// Additional patterns for matching zero masking with loads in other
// positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src2, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
// Additional patterns for matching masked loads with different
// operand orders.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
+ (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
// Additional patterns for matching broadcasts in other positions.
- def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
- _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
+ def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
+ _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(_.VT (OpNode _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
- _.RC:$src2, (i8 imm:$src4))),
+ (_.BroadcastLdFrag addr:$src3),
+ _.RC:$src2, (i8 timm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+ addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
// Additional patterns for matching zero masking with broadcasts in other
// positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
- _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ (OpNode (_.BroadcastLdFrag addr:$src3),
+ _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
- (VPTERNLOG321_imm8 imm:$src4))>;
+ (VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
- _.RC:$src2, (i8 imm:$src4)),
+ (_.BroadcastLdFrag addr:$src3),
+ _.RC:$src2, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
- (VPTERNLOG132_imm8 imm:$src4))>;
+ (VPTERNLOG132_imm8 timm:$src4))>;
// Additional patterns for matching masked broadcasts with different
// operand orders.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
- _.RC:$src2, (i8 imm:$src4)),
+ (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
+ _.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
- _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ (OpNode (_.BroadcastLdFrag addr:$src3),
+ _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
- (i8 imm:$src4)), _.RC:$src1)),
+ (_.BroadcastLdFrag addr:$src3),
+ (i8 timm:$src4)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2,
- (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
- _.RC:$src1, (i8 imm:$src4)),
+ (_.BroadcastLdFrag addr:$src3),
+ _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
- _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
+ (OpNode (_.BroadcastLdFrag addr:$src3),
+ _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
- _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
+ _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
}
multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
@@ -11387,6 +11376,113 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
avx512vl_i64_info>, VEX_W;
+// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
+let Predicates = [HasVLX] in {
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (loadv16i8 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
+ VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (loadv8i16 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
+ VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (loadv32i8 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
+ VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (loadv16i16 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
+ VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+}
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
+ timm:$src4)>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
+ (loadv64i8 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
+ VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+ (loadv32i16 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
+ VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+}
+
// Patterns to implement vnot using vpternlog instead of creating all ones
// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
// so that the result is only dependent on src0. But we use the same source
@@ -11498,14 +11594,14 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT _.RC:$src3),
- (i32 imm:$src4))>, Sched<[sched]>;
+ (i32 timm:$src4))>, Sched<[sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
- (i32 imm:$src4))>,
+ (i32 timm:$src4))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
@@ -11513,8 +11609,8 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
"$src2, ${src3}"##_.BroadcastStr##", $src4",
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
- (i32 imm:$src4))>,
+ (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
+ (i32 timm:$src4))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
}
@@ -11531,7 +11627,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
(X86VFixupimmSAE (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT _.RC:$src3),
- (i32 imm:$src4))>,
+ (i32 timm:$src4))>,
EVEX_B, Sched<[sched]>;
}
}
@@ -11547,7 +11643,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(X86VFixupimms (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3),
- (i32 imm:$src4))>, Sched<[sched]>;
+ (i32 timm:$src4))>, Sched<[sched]>;
defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
@@ -11555,7 +11651,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(X86VFixupimmSAEs (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3),
- (i32 imm:$src4))>,
+ (i32 timm:$src4))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
@@ -11564,13 +11660,13 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(_.VT _.RC:$src2),
(_src3VT.VT (scalar_to_vector
(_src3VT.ScalarLdFrag addr:$src3))),
- (i32 imm:$src4))>,
+ (i32 timm:$src4))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
- AVX512VLVectorVTInfo _Vec,
+ AVX512VLVectorVTInfo _Vec,
AVX512VLVectorVTInfo _Tbl> {
let Predicates = [HasAVX512] in
defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
@@ -11804,7 +11900,7 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
"${src3}"##VTI.BroadcastStr##", $src2",
"$src2, ${src3}"##VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
+ (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
AVX512FMA3Base, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -11880,12 +11976,14 @@ defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
let Constraints = "$src1 = $dst" in
multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
- X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
+ X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
+ bit IsCommutable> {
defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1,
- VTI.RC:$src2, VTI.RC:$src3))>,
+ VTI.RC:$src2, VTI.RC:$src3)),
+ IsCommutable, IsCommutable>,
EVEX_4V, T8PD, Sched<[sched]>;
defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
@@ -11899,27 +11997,58 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
"$src2, ${src3}"##VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (X86VBroadcast
- (VTI.ScalarLdFrag addr:$src3))))>,
+ (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
- X86SchedWriteWidths sched> {
+ X86SchedWriteWidths sched, bit IsCommutable> {
let Predicates = [HasVNNI] in
- defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
+ defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
+ IsCommutable>, EVEX_V512;
let Predicates = [HasVNNI, HasVLX] in {
- defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
- defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
+ defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
+ IsCommutable>, EVEX_V256;
+ defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
+ IsCommutable>, EVEX_V128;
}
}
// FIXME: Is there a better scheduler class for VPDP?
-defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
-defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
-defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
-defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
+defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
+defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
+defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
+defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
+
+def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86vpmaddwd node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// Patterns to match VPDPWSSD from existing instructions/intrinsics.
+let Predicates = [HasVNNI] in {
+ def : Pat<(v16i32 (add VR512:$src1,
+ (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
+ (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
+ def : Pat<(v16i32 (add VR512:$src1,
+ (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
+ (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
+}
+let Predicates = [HasVNNI,HasVLX] in {
+ def : Pat<(v8i32 (add VR256X:$src1,
+ (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
+ (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
+ def : Pat<(v8i32 (add VR256X:$src1,
+ (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
+ (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
+ def : Pat<(v4i32 (add VR128X:$src1,
+ (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
+ (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
+ def : Pat<(v4i32 (add VR128X:$src1,
+ (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
+ (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
+}
//===----------------------------------------------------------------------===//
// Bit Algorithms
@@ -12004,8 +12133,8 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
"$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
(OpNode (VTI.VT VTI.RC:$src1),
- (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
- (i8 imm:$src3))>, EVEX_B,
+ (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
+ (i8 timm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -12116,7 +12245,7 @@ multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
!strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
[(set _.KRPC:$dst, (X86vp2intersect
- _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
+ _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
}
@@ -12217,12 +12346,12 @@ let Predicates = [HasBF16, HasVLX] in {
(VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
- (X86VBroadcast (loadf32 addr:$src))))),
+ (X86VBroadcastld32 addr:$src)))),
(VCVTNEPS2BF16Z128rmb addr:$src)>;
- def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
(v8i16 VR128X:$src0), VK4WM:$mask),
(VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
- def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
v8i16x_info.ImmAllZerosV, VK4WM:$mask),
(VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
}
@@ -12249,7 +12378,7 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr),
(_.VT (OpNode _.RC:$src1, _.RC:$src2,
- (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>,
+ (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
EVEX_B, EVEX_4V;
}
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index e52635f8d48b..1e399a894490 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1271,22 +1271,22 @@ let isCompare = 1 in {
// ANDN Instruction
//
multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
- PatFrag ld_frag> {
+ PatFrag ld_frag, X86FoldableSchedWrite sched> {
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
- Sched<[WriteALU]>;
+ Sched<[sched]>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
- Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Complexity is reduced to give and with immediate a chance to match first.
let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in {
- defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8PS, VEX_4V;
- defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8PS, VEX_4V, VEX_W;
+ defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS, VEX_4V;
+ defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, VEX_4V, VEX_W;
}
let Predicates = [HasBMI], AddedComplexity = -6 in {
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 50aed98112c3..aa45e9b191c1 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -131,11 +131,11 @@ addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
/// reference.
static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand,
unsigned Reg) {
- // Direct memory address is in a form of: Reg, 1 (Scale), NoReg, 0, NoReg.
- MI->getOperand(Operand).setReg(Reg);
+ // Direct memory address is in a form of: Reg/FI, 1 (Scale), NoReg, 0, NoReg.
+ MI->getOperand(Operand).ChangeToRegister(Reg, /*isDef=*/false);
MI->getOperand(Operand + 1).setImm(1);
MI->getOperand(Operand + 2).setReg(0);
- MI->getOperand(Operand + 3).setImm(0);
+ MI->getOperand(Operand + 3).ChangeToImmediate(0);
MI->getOperand(Operand + 4).setReg(0);
}
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index 099f6aa8d8bb..330b8c7a8a43 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -20,19 +20,19 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
: I<0x40, MRMSrcRegCC, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, ccode:$cond),
"cmov${cond}{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst,
- (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>,
+ (X86cmov GR16:$src1, GR16:$src2, timm:$cond, EFLAGS))]>,
TB, OpSize16;
def CMOV32rr
: I<0x40, MRMSrcRegCC, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, ccode:$cond),
"cmov${cond}{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst,
- (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>,
+ (X86cmov GR32:$src1, GR32:$src2, timm:$cond, EFLAGS))]>,
TB, OpSize32;
def CMOV64rr
:RI<0x40, MRMSrcRegCC, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, ccode:$cond),
"cmov${cond}{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst,
- (X86cmov GR64:$src1, GR64:$src2, imm:$cond, EFLAGS))]>, TB;
+ (X86cmov GR64:$src1, GR64:$src2, timm:$cond, EFLAGS))]>, TB;
}
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
@@ -41,29 +41,46 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
: I<0x40, MRMSrcMemCC, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2, ccode:$cond),
"cmov${cond}{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- imm:$cond, EFLAGS))]>, TB, OpSize16;
+ timm:$cond, EFLAGS))]>, TB, OpSize16;
def CMOV32rm
: I<0x40, MRMSrcMemCC, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2, ccode:$cond),
"cmov${cond}{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- imm:$cond, EFLAGS))]>, TB, OpSize32;
+ timm:$cond, EFLAGS))]>, TB, OpSize32;
def CMOV64rm
:RI<0x40, MRMSrcMemCC, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2, ccode:$cond),
"cmov${cond}{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- imm:$cond, EFLAGS))]>, TB;
+ timm:$cond, EFLAGS))]>, TB;
} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
} // isCodeGenOnly = 1, ForceDisassemble = 1
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+ X86::CondCode CC = static_cast<X86::CondCode>(N->getZExtValue());
+ return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC),
+ SDLoc(N), MVT::i8);
+}]>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+let Predicates = [HasCMov] in {
+ def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, timm:$cond, EFLAGS),
+ (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>;
+ def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, timm:$cond, EFLAGS),
+ (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>;
+ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, timm:$cond, EFLAGS),
+ (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>;
+}
+
// SetCC instructions.
let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in {
def SETCCr : I<0x90, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond),
"set${cond}\t$dst",
- [(set GR8:$dst, (X86setcc imm:$cond, EFLAGS))]>,
+ [(set GR8:$dst, (X86setcc timm:$cond, EFLAGS))]>,
TB, Sched<[WriteSETCC]>;
def SETCCm : I<0x90, MRMXmCC, (outs), (ins i8mem:$dst, ccode:$cond),
"set${cond}\t$dst",
- [(store (X86setcc imm:$cond, EFLAGS), addr:$dst)]>,
+ [(store (X86setcc timm:$cond, EFLAGS), addr:$dst)]>,
TB, Sched<[WriteSETCCStore]>;
} // Uses = [EFLAGS]
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index efaccdc9ee96..78d8dd3c0d03 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -542,7 +542,7 @@ multiclass CMOVrr_PSEUDO<RegisterClass RC, ValueType VT> {
def CMOV#NAME : I<0, Pseudo,
(outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond),
"#CMOV_"#NAME#" PSEUDO!",
- [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond,
+ [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, timm:$cond,
EFLAGS)))]>;
}
@@ -593,66 +593,66 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
defm _VK64 : CMOVrr_PSEUDO<VK64, v64i1>;
} // usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS]
-def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
let Predicates = [NoVLX] in {
- def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
- def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
- def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
- def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
- def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
-
- def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
- (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
- def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
- (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
- def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
- (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
- def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
- (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
- def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
- (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+ def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
+ def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
+ def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
+ def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
+ def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
+
+ def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>;
+ def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>;
+ def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>;
+ def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>;
+ def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>;
}
let Predicates = [HasVLX] in {
- def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
- def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
- def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
- def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
- def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
- (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
-
- def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
- (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
- def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
- (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
- def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
- (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
- def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
- (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
- def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
- (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+ def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
+ def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
+ def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
+ def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
+ def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
+
+ def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
+ def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
+ def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
+ def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
+ def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
}
-def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
- (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
-def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
- (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
-def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
- (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
-def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
- (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
-def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
- (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
+def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
+def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
+def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
+def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
//===----------------------------------------------------------------------===//
// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
@@ -1126,12 +1126,12 @@ def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
// binary size compared to a regular MOV, but it introduces an unnecessary
// load, so is not suitable for regular or optsize functions.
let Predicates = [OptForMinSize] in {
-def : Pat<(nonvolatile_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
-def : Pat<(nonvolatile_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
-def : Pat<(nonvolatile_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
-def : Pat<(nonvolatile_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
-def : Pat<(nonvolatile_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
-def : Pat<(nonvolatile_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
+def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
+def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
+def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
+def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
+def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
+def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
}
// In kernel code model, we can get the address of a label
@@ -1276,23 +1276,6 @@ def : Pat<(X86cmp GR32:$src1, 0),
def : Pat<(X86cmp GR64:$src1, 0),
(TEST64rr GR64:$src1, GR64:$src1)>;
-def inv_cond_XFORM : SDNodeXForm<imm, [{
- X86::CondCode CC = static_cast<X86::CondCode>(N->getZExtValue());
- return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC),
- SDLoc(N), MVT::i8);
-}]>;
-
-// Conditional moves with folded loads with operands swapped and conditions
-// inverted.
-let Predicates = [HasCMov] in {
- def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, imm:$cond, EFLAGS),
- (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>;
- def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, imm:$cond, EFLAGS),
- (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>;
- def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, imm:$cond, EFLAGS),
- (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>;
-}
-
// zextload bool -> zextload byte
// i1 stored in one byte in zero-extended form.
// Upper bits cleanup should be executed before Store.
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index f82e80965b7c..e1e6eea59884 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -75,7 +75,7 @@ let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump],
def JCC_1 : Ii8PCRel <0x70, AddCCFrm, (outs),
(ins brtarget8:$dst, ccode:$cond),
"j${cond}\t$dst",
- [(X86brcond bb:$dst, imm:$cond, EFLAGS)]>;
+ [(X86brcond bb:$dst, timm:$cond, EFLAGS)]>;
let hasSideEffects = 0 in {
def JCC_2 : Ii16PCRel<0x80, AddCCFrm, (outs),
(ins brtarget16:$dst, ccode:$cond),
@@ -145,6 +145,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
[(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>,
Sched<[WriteJumpLd]>;
+ // Win64 wants indirect jumps leaving the function to have a REX_W prefix.
+ // These are switched from TAILJMPr/m64_REX in MCInstLower.
+ let isCodeGenOnly = 1, hasREX_WPrefix = 1 in {
+ def JMP64r_REX : I<0xFF, MRM4r, (outs), (ins GR64:$dst),
+ "rex64 jmp{q}\t{*}$dst", []>, Sched<[WriteJump]>;
+ let mayLoad = 1 in
+ def JMP64m_REX : I<0xFF, MRM4m, (outs), (ins i64mem:$dst),
+ "rex64 jmp{q}\t{*}$dst", []>, Sched<[WriteJumpLd]>;
+
+ }
+
// Non-tracking jumps for IBT, use with caution.
let isCodeGenOnly = 1 in {
def JMP16r_NT : I<0xFF, MRM4r, (outs), (ins GR16 : $dst), "jmp{w}\t{*}$dst",
@@ -273,39 +284,35 @@ let isCall = 1 in
// Tail call stuff.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
- let Uses = [ESP, SSP] in {
- def TCRETURNdi : PseudoI<(outs),
- (ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable;
- def TCRETURNri : PseudoI<(outs),
- (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+ isCodeGenOnly = 1, Uses = [ESP, SSP] in {
+ def TCRETURNdi : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset),
+ []>, Sched<[WriteJump]>, NotMemoryFoldable;
+ def TCRETURNri : PseudoI<(outs), (ins ptr_rc_tailcall:$dst, i32imm:$offset),
+ []>, Sched<[WriteJump]>, NotMemoryFoldable;
let mayLoad = 1 in
- def TCRETURNmi : PseudoI<(outs),
- (ins i32mem_TC:$dst, i32imm:$offset), []>;
+ def TCRETURNmi : PseudoI<(outs), (ins i32mem_TC:$dst, i32imm:$offset),
+ []>, Sched<[WriteJumpLd]>;
- // FIXME: The should be pseudo instructions that are lowered when going to
- // mcinst.
- def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
- (ins i32imm_pcrel:$dst), "jmp\t$dst", []>;
+ def TAILJMPd : PseudoI<(outs), (ins i32imm_pcrel:$dst),
+ []>, Sched<[WriteJump]>;
- def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
- "", []>; // FIXME: Remove encoding when JIT is dead.
+ def TAILJMPr : PseudoI<(outs), (ins ptr_rc_tailcall:$dst),
+ []>, Sched<[WriteJump]>;
let mayLoad = 1 in
- def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
- "jmp{l}\t{*}$dst", []>;
+ def TAILJMPm : PseudoI<(outs), (ins i32mem_TC:$dst),
+ []>, Sched<[WriteJumpLd]>;
}
// Conditional tail calls are similar to the above, but they are branches
// rather than barriers, and they use EFLAGS.
let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
- isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+ isCodeGenOnly = 1, SchedRW = [WriteJump] in
let Uses = [ESP, EFLAGS, SSP] in {
def TCRETURNdicc : PseudoI<(outs),
(ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
// This gets substituted to a conditional jump instruction in MC lowering.
- def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
- (ins i32imm_pcrel:$dst, i32imm:$cond), "", []>;
+ def TAILJMPd_CC : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$cond), []>;
}
@@ -348,34 +355,36 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
+ isCodeGenOnly = 1, Uses = [RSP, SSP] in {
def TCRETURNdi64 : PseudoI<(outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$offset),
- []>;
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset),
+ []>, Sched<[WriteJump]>;
def TCRETURNri64 : PseudoI<(outs),
- (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset),
+ []>, Sched<[WriteJump]>, NotMemoryFoldable;
let mayLoad = 1 in
def TCRETURNmi64 : PseudoI<(outs),
- (ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+ (ins i64mem_TC:$dst, i32imm:$offset),
+ []>, Sched<[WriteJumpLd]>, NotMemoryFoldable;
- def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst),
- "jmp\t$dst", []>;
+ def TAILJMPd64 : PseudoI<(outs), (ins i64i32imm_pcrel:$dst),
+ []>, Sched<[WriteJump]>;
- def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
- "jmp{q}\t{*}$dst", []>;
+ def TAILJMPr64 : PseudoI<(outs), (ins ptr_rc_tailcall:$dst),
+ []>, Sched<[WriteJump]>;
let mayLoad = 1 in
- def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
- "jmp{q}\t{*}$dst", []>;
+ def TAILJMPm64 : PseudoI<(outs), (ins i64mem_TC:$dst),
+ []>, Sched<[WriteJumpLd]>;
// Win64 wants indirect jumps leaving the function to have a REX_W prefix.
let hasREX_WPrefix = 1 in {
- def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
- "rex64 jmp{q}\t{*}$dst", []>;
+ def TAILJMPr64_REX : PseudoI<(outs), (ins ptr_rc_tailcall:$dst),
+ []>, Sched<[WriteJump]>;
let mayLoad = 1 in
- def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
- "rex64 jmp{q}\t{*}$dst", []>;
+ def TAILJMPm64_REX : PseudoI<(outs), (ins i64mem_TC:$dst),
+ []>, Sched<[WriteJumpLd]>;
}
}
@@ -403,13 +412,13 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
// Conditional tail calls are similar to the above, but they are branches
// rather than barriers, and they use EFLAGS.
let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
- isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+ isCodeGenOnly = 1, SchedRW = [WriteJump] in
let Uses = [RSP, EFLAGS, SSP] in {
def TCRETURNdi64cc : PseudoI<(outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset,
i32imm:$cond), []>;
// This gets substituted to a conditional jump instruction in MC lowering.
- def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$cond), "", []>;
+ def TAILJMPd64_CC : PseudoI<(outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$cond), []>;
}
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 06e605fe5db2..7a4eb138ec34 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -17,19 +17,18 @@ let hasSideEffects = 0 in {
let Defs = [EAX], Uses = [AX] in // EAX = signext(AX)
def CWDE : I<0x98, RawFrm, (outs), (ins),
"{cwtl|cwde}", []>, OpSize32, Sched<[WriteALU]>;
+ let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX)
+ def CDQE : RI<0x98, RawFrm, (outs), (ins),
+ "{cltq|cdqe}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>;
+ // FIXME: CWD/CDQ/CQO shouldn't Def the A register, but the fast register
+ // allocator crashes if you remove it.
let Defs = [AX,DX], Uses = [AX] in // DX:AX = signext(AX)
def CWD : I<0x99, RawFrm, (outs), (ins),
"{cwtd|cwd}", []>, OpSize16, Sched<[WriteALU]>;
let Defs = [EAX,EDX], Uses = [EAX] in // EDX:EAX = signext(EAX)
def CDQ : I<0x99, RawFrm, (outs), (ins),
"{cltd|cdq}", []>, OpSize32, Sched<[WriteALU]>;
-
-
- let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX)
- def CDQE : RI<0x98, RawFrm, (outs), (ins),
- "{cltq|cdqe}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>;
-
let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX)
def CQO : RI<0x99, RawFrm, (outs), (ins),
"{cqto|cqo}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86InstrFoldTables.cpp b/lib/Target/X86/X86InstrFoldTables.cpp
index d42fec3770c7..f3b286e0375c 100644
--- a/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/lib/Target/X86/X86InstrFoldTables.cpp
@@ -292,6 +292,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
{ X86::JMP32r_NT, X86::JMP32m_NT, TB_FOLDED_LOAD },
{ X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
{ X86::JMP64r_NT, X86::JMP64m_NT, TB_FOLDED_LOAD },
+ { X86::MMX_MOVD64from64rr, X86::MMX_MOVD64from64rm, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::MMX_MOVD64grr, X86::MMX_MOVD64mr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
{ X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
{ X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
@@ -5245,6 +5247,270 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VXORPSZrrk, X86::VXORPSZrmk, 0 },
};
+static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
+ { X86::VADDPDZ128rr, X86::VADDPDZ128rmb, TB_BCAST_SD },
+ { X86::VADDPDZ256rr, X86::VADDPDZ256rmb, TB_BCAST_SD },
+ { X86::VADDPDZrr, X86::VADDPDZrmb, TB_BCAST_SD },
+ { X86::VADDPSZ128rr, X86::VADDPSZ128rmb, TB_BCAST_SS },
+ { X86::VADDPSZ256rr, X86::VADDPSZ256rmb, TB_BCAST_SS },
+ { X86::VADDPSZrr, X86::VADDPSZrmb, TB_BCAST_SS },
+ { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD },
+ { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD },
+ { X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD },
+ { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmbi, TB_BCAST_SS },
+ { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmbi, TB_BCAST_SS },
+ { X86::VCMPPSZrri, X86::VCMPPSZrmbi, TB_BCAST_SS },
+ { X86::VDIVPDZ128rr, X86::VDIVPDZ128rmb, TB_BCAST_SD },
+ { X86::VDIVPDZ256rr, X86::VDIVPDZ256rmb, TB_BCAST_SD },
+ { X86::VDIVPDZrr, X86::VDIVPDZrmb, TB_BCAST_SD },
+ { X86::VDIVPSZ128rr, X86::VDIVPSZ128rmb, TB_BCAST_SS },
+ { X86::VDIVPSZ256rr, X86::VDIVPSZ256rmb, TB_BCAST_SS },
+ { X86::VDIVPSZrr, X86::VDIVPSZrmb, TB_BCAST_SS },
+ { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rmb, TB_BCAST_SD },
+ { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rmb, TB_BCAST_SD },
+ { X86::VMAXCPDZrr, X86::VMAXCPDZrmb, TB_BCAST_SD },
+ { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS },
+ { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS },
+ { X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS },
+ { X86::VMAXPDZ128rr, X86::VMAXPDZ128rmb, TB_BCAST_SD },
+ { X86::VMAXPDZ256rr, X86::VMAXPDZ256rmb, TB_BCAST_SD },
+ { X86::VMAXPDZrr, X86::VMAXPDZrmb, TB_BCAST_SD },
+ { X86::VMAXPSZ128rr, X86::VMAXPSZ128rmb, TB_BCAST_SS },
+ { X86::VMAXPSZ256rr, X86::VMAXPSZ256rmb, TB_BCAST_SS },
+ { X86::VMAXPSZrr, X86::VMAXPSZrmb, TB_BCAST_SS },
+ { X86::VMINCPDZ128rr, X86::VMINCPDZ128rmb, TB_BCAST_SD },
+ { X86::VMINCPDZ256rr, X86::VMINCPDZ256rmb, TB_BCAST_SD },
+ { X86::VMINCPDZrr, X86::VMINCPDZrmb, TB_BCAST_SD },
+ { X86::VMINCPSZ128rr, X86::VMINCPSZ128rmb, TB_BCAST_SS },
+ { X86::VMINCPSZ256rr, X86::VMINCPSZ256rmb, TB_BCAST_SS },
+ { X86::VMINCPSZrr, X86::VMINCPSZrmb, TB_BCAST_SS },
+ { X86::VMINPDZ128rr, X86::VMINPDZ128rmb, TB_BCAST_SD },
+ { X86::VMINPDZ256rr, X86::VMINPDZ256rmb, TB_BCAST_SD },
+ { X86::VMINPDZrr, X86::VMINPDZrmb, TB_BCAST_SD },
+ { X86::VMINPSZ128rr, X86::VMINPSZ128rmb, TB_BCAST_SS },
+ { X86::VMINPSZ256rr, X86::VMINPSZ256rmb, TB_BCAST_SS },
+ { X86::VMINPSZrr, X86::VMINPSZrmb, TB_BCAST_SS },
+ { X86::VMULPDZ128rr, X86::VMULPDZ128rmb, TB_BCAST_SD },
+ { X86::VMULPDZ256rr, X86::VMULPDZ256rmb, TB_BCAST_SD },
+ { X86::VMULPDZrr, X86::VMULPDZrmb, TB_BCAST_SD },
+ { X86::VMULPSZ128rr, X86::VMULPSZ128rmb, TB_BCAST_SS },
+ { X86::VMULPSZ256rr, X86::VMULPSZ256rmb, TB_BCAST_SS },
+ { X86::VMULPSZrr, X86::VMULPSZrmb, TB_BCAST_SS },
+ { X86::VPADDDZ128rr, X86::VPADDDZ128rmb, TB_BCAST_D },
+ { X86::VPADDDZ256rr, X86::VPADDDZ256rmb, TB_BCAST_D },
+ { X86::VPADDDZrr, X86::VPADDDZrmb, TB_BCAST_D },
+ { X86::VPADDQZ128rr, X86::VPADDQZ128rmb, TB_BCAST_Q },
+ { X86::VPADDQZ256rr, X86::VPADDQZ256rmb, TB_BCAST_Q },
+ { X86::VPADDQZrr, X86::VPADDQZrmb, TB_BCAST_Q },
+ { X86::VPANDDZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D },
+ { X86::VPANDDZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D },
+ { X86::VPANDDZrr, X86::VPANDDZrmb, TB_BCAST_D },
+ { X86::VPANDNDZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D },
+ { X86::VPANDNDZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D },
+ { X86::VPANDNDZrr, X86::VPANDNDZrmb, TB_BCAST_D },
+ { X86::VPANDNQZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q },
+ { X86::VPANDNQZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q },
+ { X86::VPANDNQZrr, X86::VPANDNQZrmb, TB_BCAST_Q },
+ { X86::VPANDQZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q },
+ { X86::VPANDQZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q },
+ { X86::VPANDQZrr, X86::VPANDQZrmb, TB_BCAST_Q },
+ { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmib, TB_BCAST_D },
+ { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmib, TB_BCAST_D },
+ { X86::VPCMPDZrri, X86::VPCMPDZrmib, TB_BCAST_D },
+ { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rmb, TB_BCAST_D },
+ { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rmb, TB_BCAST_D },
+ { X86::VPCMPEQDZrr, X86::VPCMPEQDZrmb, TB_BCAST_D },
+ { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rmb, TB_BCAST_Q },
+ { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rmb, TB_BCAST_Q },
+ { X86::VPCMPEQQZrr, X86::VPCMPEQQZrmb, TB_BCAST_Q },
+ { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rmb, TB_BCAST_D },
+ { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rmb, TB_BCAST_D },
+ { X86::VPCMPGTDZrr, X86::VPCMPGTDZrmb, TB_BCAST_D },
+ { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rmb, TB_BCAST_Q },
+ { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rmb, TB_BCAST_Q },
+ { X86::VPCMPGTQZrr, X86::VPCMPGTQZrmb, TB_BCAST_Q },
+ { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmib, TB_BCAST_Q },
+ { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmib, TB_BCAST_Q },
+ { X86::VPCMPQZrri, X86::VPCMPQZrmib, TB_BCAST_Q },
+ { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmib, TB_BCAST_D },
+ { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmib, TB_BCAST_D },
+ { X86::VPCMPUDZrri, X86::VPCMPUDZrmib, TB_BCAST_D },
+ { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmib, TB_BCAST_Q },
+ { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmib, TB_BCAST_Q },
+ { X86::VPCMPUQZrri, X86::VPCMPUQZrmib, TB_BCAST_Q },
+ { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rmb, TB_BCAST_D },
+ { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rmb, TB_BCAST_D },
+ { X86::VPMAXSDZrr, X86::VPMAXSDZrmb, TB_BCAST_D },
+ { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rmb, TB_BCAST_Q },
+ { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rmb, TB_BCAST_Q },
+ { X86::VPMAXSQZrr, X86::VPMAXSQZrmb, TB_BCAST_Q },
+ { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rmb, TB_BCAST_D },
+ { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rmb, TB_BCAST_D },
+ { X86::VPMAXUDZrr, X86::VPMAXUDZrmb, TB_BCAST_D },
+ { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rmb, TB_BCAST_Q },
+ { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rmb, TB_BCAST_Q },
+ { X86::VPMAXUQZrr, X86::VPMAXUQZrmb, TB_BCAST_Q },
+ { X86::VPMINSDZ128rr, X86::VPMINSDZ128rmb, TB_BCAST_D },
+ { X86::VPMINSDZ256rr, X86::VPMINSDZ256rmb, TB_BCAST_D },
+ { X86::VPMINSDZrr, X86::VPMINSDZrmb, TB_BCAST_D },
+ { X86::VPMINSQZ128rr, X86::VPMINSQZ128rmb, TB_BCAST_Q },
+ { X86::VPMINSQZ256rr, X86::VPMINSQZ256rmb, TB_BCAST_Q },
+ { X86::VPMINSQZrr, X86::VPMINSQZrmb, TB_BCAST_Q },
+ { X86::VPMINUDZ128rr, X86::VPMINUDZ128rmb, TB_BCAST_D },
+ { X86::VPMINUDZ256rr, X86::VPMINUDZ256rmb, TB_BCAST_D },
+ { X86::VPMINUDZrr, X86::VPMINUDZrmb, TB_BCAST_D },
+ { X86::VPMINUQZ128rr, X86::VPMINUQZ128rmb, TB_BCAST_Q },
+ { X86::VPMINUQZ256rr, X86::VPMINUQZ256rmb, TB_BCAST_Q },
+ { X86::VPMINUQZrr, X86::VPMINUQZrmb, TB_BCAST_Q },
+ { X86::VPMULLDZ128rr, X86::VPMULLDZ128rmb, TB_BCAST_D },
+ { X86::VPMULLDZ256rr, X86::VPMULLDZ256rmb, TB_BCAST_D },
+ { X86::VPMULLDZrr, X86::VPMULLDZrmb, TB_BCAST_D },
+ { X86::VPMULLQZ128rr, X86::VPMULLQZ128rmb, TB_BCAST_Q },
+ { X86::VPMULLQZ256rr, X86::VPMULLQZ256rmb, TB_BCAST_Q },
+ { X86::VPMULLQZrr, X86::VPMULLQZrmb, TB_BCAST_Q },
+ { X86::VPORDZ128rr, X86::VPORDZ128rmb, TB_BCAST_D },
+ { X86::VPORDZ256rr, X86::VPORDZ256rmb, TB_BCAST_D },
+ { X86::VPORDZrr, X86::VPORDZrmb, TB_BCAST_D },
+ { X86::VPORQZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q },
+ { X86::VPORQZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q },
+ { X86::VPORQZrr, X86::VPORQZrmb, TB_BCAST_Q },
+ { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rmb, TB_BCAST_D },
+ { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rmb, TB_BCAST_D },
+ { X86::VPTESTMDZrr, X86::VPTESTMDZrmb, TB_BCAST_D },
+ { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rmb, TB_BCAST_Q },
+ { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rmb, TB_BCAST_Q },
+ { X86::VPTESTMQZrr, X86::VPTESTMQZrmb, TB_BCAST_Q },
+ { X86::VPTESTNMDZ128rr,X86::VPTESTNMDZ128rmb,TB_BCAST_D },
+ { X86::VPTESTNMDZ256rr,X86::VPTESTNMDZ256rmb,TB_BCAST_D },
+ { X86::VPTESTNMDZrr, X86::VPTESTNMDZrmb, TB_BCAST_D },
+ { X86::VPTESTNMQZ128rr,X86::VPTESTNMQZ128rmb,TB_BCAST_Q },
+ { X86::VPTESTNMQZ256rr,X86::VPTESTNMQZ256rmb,TB_BCAST_Q },
+ { X86::VPTESTNMQZrr, X86::VPTESTNMQZrmb, TB_BCAST_Q },
+ { X86::VPXORDZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D },
+ { X86::VPXORDZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D },
+ { X86::VPXORDZrr, X86::VPXORDZrmb, TB_BCAST_D },
+ { X86::VPXORQZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q },
+ { X86::VPXORQZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q },
+ { X86::VPXORQZrr, X86::VPXORQZrmb, TB_BCAST_Q },
+ { X86::VSUBPDZ128rr, X86::VSUBPDZ128rmb, TB_BCAST_SD },
+ { X86::VSUBPDZ256rr, X86::VSUBPDZ256rmb, TB_BCAST_SD },
+ { X86::VSUBPDZrr, X86::VSUBPDZrmb, TB_BCAST_SD },
+ { X86::VSUBPSZ128rr, X86::VSUBPSZ128rmb, TB_BCAST_SS },
+ { X86::VSUBPSZ256rr, X86::VSUBPSZ256rmb, TB_BCAST_SS },
+ { X86::VSUBPSZrr, X86::VSUBPSZrmb, TB_BCAST_SS },
+};
+
+static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = {
+ { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADD132PDZr, X86::VFMADD132PDZmb, TB_BCAST_SD },
+ { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADD132PSZr, X86::VFMADD132PSZmb, TB_BCAST_SS },
+ { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADD213PDZr, X86::VFMADD213PDZmb, TB_BCAST_SD },
+ { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADD213PSZr, X86::VFMADD213PSZmb, TB_BCAST_SS },
+ { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADD231PDZr, X86::VFMADD231PDZmb, TB_BCAST_SD },
+ { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADD231PSZr, X86::VFMADD231PSZmb, TB_BCAST_SS },
+ { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZmb, TB_BCAST_SD },
+ { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZmb, TB_BCAST_SS },
+ { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZmb, TB_BCAST_SD },
+ { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZmb, TB_BCAST_SS },
+ { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZmb, TB_BCAST_SD },
+ { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZmb, TB_BCAST_SS },
+ { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUB132PDZr, X86::VFMSUB132PDZmb, TB_BCAST_SD },
+ { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUB132PSZr, X86::VFMSUB132PSZmb, TB_BCAST_SS },
+ { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUB213PDZr, X86::VFMSUB213PDZmb, TB_BCAST_SD },
+ { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUB213PSZr, X86::VFMSUB213PSZmb, TB_BCAST_SS },
+ { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUB231PDZr, X86::VFMSUB231PDZmb, TB_BCAST_SD },
+ { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUB231PSZr, X86::VFMSUB231PSZmb, TB_BCAST_SS },
+ { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZmb, TB_BCAST_SD },
+ { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZmb, TB_BCAST_SS },
+ { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZmb, TB_BCAST_SD },
+ { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZmb, TB_BCAST_SS },
+ { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZmb, TB_BCAST_SD },
+ { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZmb, TB_BCAST_SS },
+ { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMADD132PDZr, X86::VFNMADD132PDZmb, TB_BCAST_SD },
+ { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMADD132PSZr, X86::VFNMADD132PSZmb, TB_BCAST_SS },
+ { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMADD213PDZr, X86::VFNMADD213PDZmb, TB_BCAST_SD },
+ { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMADD213PSZr, X86::VFNMADD213PSZmb, TB_BCAST_SS },
+ { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMADD231PDZr, X86::VFNMADD231PDZmb, TB_BCAST_SD },
+ { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMADD231PSZr, X86::VFNMADD231PSZmb, TB_BCAST_SS },
+ { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZmb, TB_BCAST_SD },
+ { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZmb, TB_BCAST_SS },
+ { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZmb, TB_BCAST_SD },
+ { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZmb, TB_BCAST_SS },
+ { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZmb, TB_BCAST_SD },
+ { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS },
+};
+
static const X86MemoryFoldTableEntry *
lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
#ifndef NDEBUG
@@ -5287,6 +5553,18 @@ lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
std::end(MemoryFoldTable4)) ==
std::end(MemoryFoldTable4) &&
"MemoryFoldTable4 is not sorted and unique!");
+ assert(std::is_sorted(std::begin(BroadcastFoldTable2),
+ std::end(BroadcastFoldTable2)) &&
+ std::adjacent_find(std::begin(BroadcastFoldTable2),
+ std::end(BroadcastFoldTable2)) ==
+ std::end(BroadcastFoldTable2) &&
+ "BroadcastFoldTable2 is not sorted and unique!");
+ assert(std::is_sorted(std::begin(BroadcastFoldTable3),
+ std::end(BroadcastFoldTable3)) &&
+ std::adjacent_find(std::begin(BroadcastFoldTable3),
+ std::end(BroadcastFoldTable3)) ==
+ std::end(BroadcastFoldTable3) &&
+ "BroadcastFoldTable3 is not sorted and unique!");
FoldTablesChecked.store(true, std::memory_order_relaxed);
}
#endif
@@ -5355,6 +5633,15 @@ struct X86MemUnfoldTable {
// Index 4, folded load
addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD);
+ // Broadcast tables.
+ for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable2)
+ // Index 2, folded broadcast
+ addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
+
+ for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable3)
+ // Index 2, folded broadcast
+ addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
+
// Sort the memory->reg unfold table.
array_pod_sort(Table.begin(), Table.end());
diff --git a/lib/Target/X86/X86InstrFoldTables.h b/lib/Target/X86/X86InstrFoldTables.h
index 419baf98f61d..7dc236a0d7e4 100644
--- a/lib/Target/X86/X86InstrFoldTables.h
+++ b/lib/Target/X86/X86InstrFoldTables.h
@@ -19,35 +19,48 @@ namespace llvm {
enum {
// Select which memory operand is being unfolded.
- // (stored in bits 0 - 3)
+ // (stored in bits 0 - 2)
TB_INDEX_0 = 0,
TB_INDEX_1 = 1,
TB_INDEX_2 = 2,
TB_INDEX_3 = 3,
TB_INDEX_4 = 4,
- TB_INDEX_MASK = 0xf,
+ TB_INDEX_MASK = 0x7,
// Do not insert the reverse map (MemOp -> RegOp) into the table.
// This may be needed because there is a many -> one mapping.
- TB_NO_REVERSE = 1 << 4,
+ TB_NO_REVERSE = 1 << 3,
// Do not insert the forward map (RegOp -> MemOp) into the table.
// This is needed for Native Client, which prohibits branch
// instructions from using a memory operand.
- TB_NO_FORWARD = 1 << 5,
+ TB_NO_FORWARD = 1 << 4,
- TB_FOLDED_LOAD = 1 << 6,
- TB_FOLDED_STORE = 1 << 7,
+ TB_FOLDED_LOAD = 1 << 5,
+ TB_FOLDED_STORE = 1 << 6,
+ TB_FOLDED_BCAST = 1 << 7,
// Minimum alignment required for load/store.
- // Used for RegOp->MemOp conversion.
- // (stored in bits 8 - 15)
+ // Used for RegOp->MemOp conversion. Encoded as Log2(Align) + 1 to allow 0
+ // to mean align of 0.
+ // (stored in bits 8 - 11)
TB_ALIGN_SHIFT = 8,
- TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
- TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
- TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
- TB_ALIGN_64 = 64 << TB_ALIGN_SHIFT,
- TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT
+ TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
+ TB_ALIGN_16 = 5 << TB_ALIGN_SHIFT,
+ TB_ALIGN_32 = 6 << TB_ALIGN_SHIFT,
+ TB_ALIGN_64 = 7 << TB_ALIGN_SHIFT,
+ TB_ALIGN_MASK = 0xf << TB_ALIGN_SHIFT,
+
+ // Broadcast type.
+ // (stored in bits 12 - 13)
+ TB_BCAST_TYPE_SHIFT = 12,
+ TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_MASK = 0x3 << TB_BCAST_TYPE_SHIFT,
+
+ // Unused bits 14-15
};
// This struct is used for both the folding and unfold tables. They KeyOp
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 096cc27861ca..de6f8a81dff6 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -103,6 +103,8 @@ def X86vzld : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86vextractst : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86VBroadcastld : SDNode<"X86ISD::VBROADCAST_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
@@ -954,6 +956,26 @@ def X86vextractstore64 : PatFrag<(ops node:$val, node:$ptr),
return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
}]>;
+def X86VBroadcastld8 : PatFrag<(ops node:$src),
+ (X86VBroadcastld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 1;
+}]>;
+
+def X86VBroadcastld16 : PatFrag<(ops node:$src),
+ (X86VBroadcastld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 2;
+}]>;
+
+def X86VBroadcastld32 : PatFrag<(ops node:$src),
+ (X86VBroadcastld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4;
+}]>;
+
+def X86VBroadcastld64 : PatFrag<(ops node:$src),
+ (X86VBroadcastld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
+}]>;
+
def fp32imm0 : PatLeaf<(f32 fpimm), [{
return N->isExactlyValue(+0.0);
@@ -963,6 +985,10 @@ def fp64imm0 : PatLeaf<(f64 fpimm), [{
return N->isExactlyValue(+0.0);
}]>;
+def fp128imm0 : PatLeaf<(f128 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
// EXTRACT_get_vextract128_imm xform function: convert extract_subvector index
// to VEXTRACTF128/VEXTRACTI128 imm.
def EXTRACT_get_vextract128_imm : SDNodeXForm<extract_subvector, [{
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index dbe45356c42b..c29029daeec9 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -30,7 +30,7 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -465,7 +465,7 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
// Don't waste compile time scanning use-def chains of physregs.
- if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
+ if (!Register::isVirtualRegister(BaseReg))
return false;
bool isPICBase = false;
for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg),
@@ -480,9 +480,50 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
}
bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const {
+ AAResults *AA) const {
switch (MI.getOpcode()) {
- default: break;
+ default:
+ // This function should only be called for opcodes with the ReMaterializable
+ // flag set.
+ llvm_unreachable("Unknown rematerializable operation!");
+ break;
+
+ case X86::LOAD_STACK_GUARD:
+ case X86::AVX1_SETALLONES:
+ case X86::AVX2_SETALLONES:
+ case X86::AVX512_128_SET0:
+ case X86::AVX512_256_SET0:
+ case X86::AVX512_512_SET0:
+ case X86::AVX512_512_SETALLONES:
+ case X86::AVX512_FsFLD0SD:
+ case X86::AVX512_FsFLD0SS:
+ case X86::AVX512_FsFLD0F128:
+ case X86::AVX_SET0:
+ case X86::FsFLD0SD:
+ case X86::FsFLD0SS:
+ case X86::FsFLD0F128:
+ case X86::KSET0D:
+ case X86::KSET0Q:
+ case X86::KSET0W:
+ case X86::KSET1D:
+ case X86::KSET1Q:
+ case X86::KSET1W:
+ case X86::MMX_SET0:
+ case X86::MOV32ImmSExti8:
+ case X86::MOV32r0:
+ case X86::MOV32r1:
+ case X86::MOV32r_1:
+ case X86::MOV32ri64:
+ case X86::MOV64ImmSExti8:
+ case X86::V_SET0:
+ case X86::V_SETALLONES:
+ case X86::MOV16ri:
+ case X86::MOV32ri:
+ case X86::MOV64ri:
+ case X86::MOV64ri32:
+ case X86::MOV8ri:
+ return true;
+
case X86::MOV8rm:
case X86::MOV8rm_NOREX:
case X86::MOV16rm:
@@ -561,7 +602,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
MI.isDereferenceableInvariantLoad(AA)) {
- unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
+ Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
if (BaseReg == 0 || BaseReg == X86::RIP)
return true;
// Allow re-materialization of PIC load.
@@ -583,7 +624,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
// lea fi#, lea GV, etc. are all rematerializable.
if (!MI.getOperand(1 + X86::AddrBaseReg).isReg())
return true;
- unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
+ Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
if (BaseReg == 0)
return true;
// Allow re-materialization of lea PICBase + x.
@@ -594,10 +635,6 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
return false;
}
}
-
- // All other instructions marked M_REMATERIALIZABLE are always trivially
- // rematerializable.
- return true;
}
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
@@ -664,7 +701,7 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
}
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
- unsigned Opc, bool AllowSP, unsigned &NewSrc,
+ unsigned Opc, bool AllowSP, Register &NewSrc,
bool &isKill, MachineOperand &ImplicitOp,
LiveVariables *LV) const {
MachineFunction &MF = *MI.getParent()->getParent();
@@ -675,7 +712,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
RC = Opc != X86::LEA32r ?
&X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
}
- unsigned SrcReg = Src.getReg();
+ Register SrcReg = Src.getReg();
// For both LEA64 and LEA32 the register already has essentially the right
// type (32-bit or 64-bit) we may just need to forbid SP.
@@ -684,7 +721,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
isKill = Src.isKill();
assert(!Src.isUndef() && "Undef op doesn't need optimization");
- if (TargetRegisterInfo::isVirtualRegister(NewSrc) &&
+ if (Register::isVirtualRegister(NewSrc) &&
!MF.getRegInfo().constrainRegClass(NewSrc, RC))
return false;
@@ -693,7 +730,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
// This is for an LEA64_32r and incoming registers are 32-bit. One way or
// another we need to add 64-bit registers to the final MI.
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ if (Register::isPhysicalRegister(SrcReg)) {
ImplicitOp = Src;
ImplicitOp.setImplicit();
@@ -740,8 +777,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
return nullptr;
unsigned Opcode = X86::LEA64_32r;
- unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
- unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+ Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// we will be shifting and then extracting the lower 8/16-bits.
@@ -751,8 +788,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
// But testing has shown this *does* help performance in 64-bit mode (at
// least on modern x86 machines).
MachineBasicBlock::iterator MBBI = MI.getIterator();
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
+ Register Dest = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
bool IsDead = MI.getOperand(0).isDead();
bool IsKill = MI.getOperand(1).isKill();
unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
@@ -794,7 +831,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
case X86::ADD8rr_DB:
case X86::ADD16rr:
case X86::ADD16rr_DB: {
- unsigned Src2 = MI.getOperand(2).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
bool IsKill2 = MI.getOperand(2).isKill();
assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
unsigned InRegLEA2 = 0;
@@ -888,7 +925,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
// LEA can't handle RSP.
- if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ if (Register::isVirtualRegister(Src.getReg()) &&
!MF.getRegInfo().constrainRegClass(Src.getReg(),
&X86::GR64_NOSPRegClass))
return nullptr;
@@ -911,7 +948,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// LEA can't handle ESP.
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, ImplicitOp, LV))
@@ -947,7 +984,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
(Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
ImplicitOp, LV))
@@ -970,7 +1007,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
: (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
ImplicitOp, LV))
@@ -1005,7 +1042,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, ImplicitOp, LV))
@@ -1013,7 +1050,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
const MachineOperand &Src2 = MI.getOperand(2);
bool isKill2;
- unsigned SrcReg2;
+ Register SrcReg2;
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
SrcReg2, isKill2, ImplicitOp2, LV))
@@ -1054,7 +1091,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, ImplicitOp, LV))
@@ -1085,6 +1122,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
case X86::SUB32ri8:
case X86::SUB32ri: {
+ if (!MI.getOperand(2).isImm())
+ return nullptr;
int64_t Imm = MI.getOperand(2).getImm();
if (!isInt<32>(-Imm))
return nullptr;
@@ -1093,7 +1132,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, ImplicitOp, LV))
@@ -1111,6 +1150,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::SUB64ri8:
case X86::SUB64ri32: {
+ if (!MI.getOperand(2).isImm())
+ return nullptr;
int64_t Imm = MI.getOperand(2).getImm();
if (!isInt<32>(-Imm))
return nullptr;
@@ -1140,40 +1181,62 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
- case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: {
+ case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk:
+ case X86::VBROADCASTSDZ256mk:
+ case X86::VBROADCASTSDZmk:
+ case X86::VBROADCASTSSZ128mk:
+ case X86::VBROADCASTSSZ256mk:
+ case X86::VBROADCASTSSZmk:
+ case X86::VPBROADCASTDZ128mk:
+ case X86::VPBROADCASTDZ256mk:
+ case X86::VPBROADCASTDZmk:
+ case X86::VPBROADCASTQZ128mk:
+ case X86::VPBROADCASTQZ256mk:
+ case X86::VPBROADCASTQZmk: {
unsigned Opc;
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
- case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
- case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
- case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
- case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
- case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
- case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
- case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
- case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
- case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
- case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
- case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
- case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
- case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
- case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
- case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
- case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
- case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
- case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
- case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
- case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
- case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
- case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
- case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
- case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
- case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
- case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
- case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
- case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
- case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
- case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
+ case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
+ case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
+ case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
+ case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
+ case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
+ case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VBROADCASTSDZ256mk: Opc = X86::VBLENDMPDZ256rmbk; break;
+ case X86::VBROADCASTSDZmk: Opc = X86::VBLENDMPDZrmbk; break;
+ case X86::VBROADCASTSSZ128mk: Opc = X86::VBLENDMPSZ128rmbk; break;
+ case X86::VBROADCASTSSZ256mk: Opc = X86::VBLENDMPSZ256rmbk; break;
+ case X86::VBROADCASTSSZmk: Opc = X86::VBLENDMPSZrmbk; break;
+ case X86::VPBROADCASTDZ128mk: Opc = X86::VPBLENDMDZ128rmbk; break;
+ case X86::VPBROADCASTDZ256mk: Opc = X86::VPBLENDMDZ256rmbk; break;
+ case X86::VPBROADCASTDZmk: Opc = X86::VPBLENDMDZrmbk; break;
+ case X86::VPBROADCASTQZ128mk: Opc = X86::VPBLENDMQZ128rmbk; break;
+ case X86::VPBROADCASTQZ256mk: Opc = X86::VPBLENDMQZ256rmbk; break;
+ case X86::VPBROADCASTQZmk: Opc = X86::VPBLENDMQZrmbk; break;
}
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@@ -1187,6 +1250,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.add(MI.getOperand(7));
break;
}
+
case X86::VMOVDQU8Z128rrk:
case X86::VMOVDQU8Z256rrk:
case X86::VMOVDQU8Zrrk:
@@ -1683,6 +1747,27 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
+ case X86::VCMPSDZrr:
+ case X86::VCMPSSZrr:
+ case X86::VCMPPDZrri:
+ case X86::VCMPPSZrri:
+ case X86::VCMPPDZ128rri:
+ case X86::VCMPPSZ128rri:
+ case X86::VCMPPDZ256rri:
+ case X86::VCMPPSZ256rri:
+ case X86::VCMPPDZrrik:
+ case X86::VCMPPSZrrik:
+ case X86::VCMPPDZ128rrik:
+ case X86::VCMPPSZ128rrik:
+ case X86::VCMPPDZ256rrik:
+ case X86::VCMPPSZ256rrik: {
+ unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x1f;
+ Imm = X86::getSwappedVCMPImm(Imm);
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm);
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
case X86::VPERM2F128rr:
case X86::VPERM2I128rr: {
// Flip permute source immediate.
@@ -1859,7 +1944,7 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
// CommutableOpIdx2 is well defined now. Let's choose another commutable
// operand and assign its index to CommutableOpIdx1.
- unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
+ Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
unsigned CommutableOpIdx1;
for (CommutableOpIdx1 = LastCommutableVecOp;
@@ -1889,7 +1974,8 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
return true;
}
-bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
const MCInstrDesc &Desc = MI.getDesc();
if (!Desc.isCommutable())
@@ -1926,17 +2012,23 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
// Ordered/Unordered/Equal/NotEqual tests
unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7;
switch (Imm) {
+ default:
+ // EVEX versions can be commuted.
+ if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX)
+ break;
+ return false;
case 0x00: // EQUAL
case 0x03: // UNORDERED
case 0x04: // NOT EQUAL
case 0x07: // ORDERED
- // The indices of the commutable operands are 1 and 2 (or 2 and 3
- // when masked).
- // Assign them to the returned operand indices here.
- return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
- 2 + OpOffset);
+ break;
}
- return false;
+
+ // The indices of the commutable operands are 1 and 2 (or 2 and 3
+ // when masked).
+ // Assign them to the returned operand indices here.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
+ 2 + OpOffset);
}
case X86::MOVSSrr:
// X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
@@ -1990,6 +2082,24 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
case X86::VPTERNLOGQZ256rmbikz:
case X86::VPTERNLOGQZrmbikz:
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+ case X86::VPDPWSSDZ128r:
+ case X86::VPDPWSSDZ128rk:
+ case X86::VPDPWSSDZ128rkz:
+ case X86::VPDPWSSDZ256r:
+ case X86::VPDPWSSDZ256rk:
+ case X86::VPDPWSSDZ256rkz:
+ case X86::VPDPWSSDZr:
+ case X86::VPDPWSSDZrk:
+ case X86::VPDPWSSDZrkz:
+ case X86::VPDPWSSDSZ128r:
+ case X86::VPDPWSSDSZ128rk:
+ case X86::VPDPWSSDSZ128rkz:
+ case X86::VPDPWSSDSZ256r:
+ case X86::VPDPWSSDSZ256rk:
+ case X86::VPDPWSSDSZ256rkz:
+ case X86::VPDPWSSDSZr:
+ case X86::VPDPWSSDSZrk:
+ case X86::VPDPWSSDSZrkz:
case X86::VPMADD52HUQZ128r:
case X86::VPMADD52HUQZ128rk:
case X86::VPMADD52HUQZ128rkz:
@@ -2215,7 +2325,7 @@ unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) {
}
}
-/// Get the VPCMP immediate if the opcodes are swapped.
+/// Get the VPCMP immediate if the operands are swapped.
unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
switch (Imm) {
default: llvm_unreachable("Unreachable!");
@@ -2233,7 +2343,7 @@ unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
return Imm;
}
-/// Get the VPCOM immediate if the opcodes are swapped.
+/// Get the VPCOM immediate if the operands are swapped.
unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
switch (Imm) {
default: llvm_unreachable("Unreachable!");
@@ -2251,6 +2361,23 @@ unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
return Imm;
}
+/// Get the VCMP immediate if the operands are swapped.
+unsigned X86::getSwappedVCMPImm(unsigned Imm) {
+ // Only need the lower 2 bits to distinquish.
+ switch (Imm & 0x3) {
+ default: llvm_unreachable("Unreachable!");
+ case 0x00: case 0x03:
+ // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted.
+ break;
+ case 0x01: case 0x02:
+ // Need to toggle bits 3:0. Bit 4 stays the same.
+ Imm ^= 0xf;
+ break;
+ }
+
+ return Imm;
+}
+
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
if (!MI.isTerminator()) return false;
@@ -3131,25 +3258,6 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(isKill));
}
-void X86InstrInfo::storeRegToAddr(
- MachineFunction &MF, unsigned SrcReg, bool isKill,
- SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC,
- ArrayRef<MachineMemOperand *> MMOs,
- SmallVectorImpl<MachineInstr *> &NewMIs) const {
- const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
- unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
- DebugLoc DL;
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
- for (unsigned i = 0, e = Addr.size(); i != e; ++i)
- MIB.add(Addr[i]);
- MIB.addReg(SrcReg, getKillRegState(isKill));
- MIB.setMemRefs(MMOs);
- NewMIs.push_back(MIB);
-}
-
-
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
@@ -3164,23 +3272,6 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx);
}
-void X86InstrInfo::loadRegFromAddr(
- MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC,
- ArrayRef<MachineMemOperand *> MMOs,
- SmallVectorImpl<MachineInstr *> &NewMIs) const {
- const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
- unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
- DebugLoc DL;
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
- for (unsigned i = 0, e = Addr.size(); i != e; ++i)
- MIB.add(Addr[i]);
- MIB.setMemRefs(MMOs);
- NewMIs.push_back(MIB);
-}
-
bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
unsigned &SrcReg2, int &CmpMask,
int &CmpValue) const {
@@ -3599,8 +3690,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (!IsCmpZero && !Sub)
return false;
- bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg);
+ bool IsSwapped =
+ (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg);
// Scan forward from the instruction after CmpInstr for uses of EFLAGS.
// It is safe to remove CmpInstr if EFLAGS is redefined or killed.
@@ -3755,7 +3847,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg != FoldAsLoadDefReg)
continue;
// Do not fold if we have a subreg use or a def.
@@ -3785,7 +3877,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
const MCInstrDesc &Desc) {
assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
MIB->setDesc(Desc);
// MachineInstr::addOperand() will insert explicit operands before any
@@ -3815,7 +3907,7 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
bool MinusOne) {
MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
// Insert the XOR.
BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
@@ -3891,7 +3983,7 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB,
const TargetInstrInfo &TII) {
MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
const GlobalValue *GV =
cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
auto Flags = MachineMemOperand::MOLoad |
@@ -3929,7 +4021,7 @@ static bool expandNOVLXLoad(MachineInstrBuilder &MIB,
const MCInstrDesc &LoadDesc,
const MCInstrDesc &BroadcastDesc,
unsigned SubIdx) {
- unsigned DestReg = MIB->getOperand(0).getReg();
+ Register DestReg = MIB->getOperand(0).getReg();
// Check if DestReg is XMM16-31 or YMM16-31.
if (TRI->getEncodingValue(DestReg) < 16) {
// We can use a normal VEX encoded load.
@@ -3952,7 +4044,7 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB,
const MCInstrDesc &StoreDesc,
const MCInstrDesc &ExtractDesc,
unsigned SubIdx) {
- unsigned SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg();
+ Register SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg();
// Check if DestReg is XMM16-31 or YMM16-31.
if (TRI->getEncodingValue(SrcReg) < 16) {
// We can use a normal VEX encoded store.
@@ -4008,12 +4100,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::V_SET0:
case X86::FsFLD0SS:
case X86::FsFLD0SD:
+ case X86::FsFLD0F128:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::AVX_SET0: {
assert(HasAVX && "AVX not supported");
const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned SrcReg = MIB->getOperand(0).getReg();
- unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
+ Register SrcReg = MIB->getOperand(0).getReg();
+ Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
MIB->getOperand(0).setReg(XReg);
Expand2AddrUndef(MIB, get(X86::VXORPSrr));
MIB.addReg(SrcReg, RegState::ImplicitDefine);
@@ -4021,9 +4114,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case X86::AVX512_128_SET0:
case X86::AVX512_FsFLD0SS:
- case X86::AVX512_FsFLD0SD: {
+ case X86::AVX512_FsFLD0SD:
+ case X86::AVX512_FsFLD0F128: {
bool HasVLX = Subtarget.hasVLX();
- unsigned SrcReg = MIB->getOperand(0).getReg();
+ Register SrcReg = MIB->getOperand(0).getReg();
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
return Expand2AddrUndef(MIB,
@@ -4037,10 +4131,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0: {
bool HasVLX = Subtarget.hasVLX();
- unsigned SrcReg = MIB->getOperand(0).getReg();
+ Register SrcReg = MIB->getOperand(0).getReg();
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
- unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
+ Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
MIB->getOperand(0).setReg(XReg);
Expand2AddrUndef(MIB,
get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
@@ -4060,14 +4154,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX2_SETALLONES:
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
case X86::AVX1_SETALLONES: {
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
// VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS.
MIB->setDesc(get(X86::VCMPPSYrri));
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
return true;
}
case X86::AVX512_512_SETALLONES: {
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
MIB->setDesc(get(X86::VPTERNLOGDZrri));
// VPTERNLOGD needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
@@ -4077,8 +4171,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case X86::AVX512_512_SEXT_MASK_32:
case X86::AVX512_512_SEXT_MASK_64: {
- unsigned Reg = MIB->getOperand(0).getReg();
- unsigned MaskReg = MIB->getOperand(1).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
+ Register MaskReg = MIB->getOperand(1).getReg();
unsigned MaskState = getRegState(MIB->getOperand(1));
unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
@@ -4115,8 +4209,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
case X86::MOV32ri64: {
- unsigned Reg = MIB->getOperand(0).getReg();
- unsigned Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
+ Register Reg = MIB->getOperand(0).getReg();
+ Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
MI.setDesc(get(X86::MOV32ri));
MIB->getOperand(0).setReg(Reg32);
MIB.addReg(Reg, RegState::ImplicitDefine);
@@ -4251,8 +4345,8 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance(
// If MI is marked as reading Reg, the partial register update is wanted.
const MachineOperand &MO = MI.getOperand(0);
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isVirtualRegister(Reg)) {
if (MO.readsReg() || MI.readsVirtualRegister(Reg))
return 0;
} else {
@@ -4268,7 +4362,10 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance(
// Return true for any instruction the copies the high bits of the first source
// operand into the unused high bits of the destination operand.
-static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
+static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
+ bool ForLoadFold = false) {
+ // Set the OpNum parameter to the first source operand.
+ OpNum = 1;
switch (Opcode) {
case X86::VCVTSI2SSrr:
case X86::VCVTSI2SSrm:
@@ -4427,6 +4524,14 @@ static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
return true;
+ case X86::VMOVSSZrrk:
+ case X86::VMOVSDZrrk:
+ OpNum = 3;
+ return true;
+ case X86::VMOVSSZrrkz:
+ case X86::VMOVSDZrrkz:
+ OpNum = 2;
+ return true;
}
return false;
@@ -4449,14 +4554,11 @@ static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
unsigned
X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
const TargetRegisterInfo *TRI) const {
- if (!hasUndefRegUpdate(MI.getOpcode()))
+ if (!hasUndefRegUpdate(MI.getOpcode(), OpNum))
return 0;
- // Set the OpNum parameter to the first source operand.
- OpNum = 1;
-
const MachineOperand &MO = MI.getOperand(OpNum);
- if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MO.isUndef() && Register::isPhysicalRegister(MO.getReg())) {
return UndefRegClearance;
}
return 0;
@@ -4464,7 +4566,7 @@ X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
void X86InstrInfo::breakPartialRegDependency(
MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
- unsigned Reg = MI.getOperand(OpNum).getReg();
+ Register Reg = MI.getOperand(OpNum).getReg();
// If MI kills this register, the false dependence is already broken.
if (MI.killsRegister(Reg, TRI))
return;
@@ -4480,7 +4582,7 @@ void X86InstrInfo::breakPartialRegDependency(
} else if (X86::VR256RegClass.contains(Reg)) {
// Use vxorps to clear the full ymm register.
// It wants to read and write the xmm sub-register.
- unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
+ Register XReg = TRI->getSubReg(Reg, X86::sub_xmm);
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg)
.addReg(XReg, RegState::Undef)
.addReg(XReg, RegState::Undef)
@@ -4489,7 +4591,7 @@ void X86InstrInfo::breakPartialRegDependency(
} else if (X86::GR64RegClass.contains(Reg)) {
// Using XOR32rr because it has shorter encoding and zeros up the upper bits
// as well.
- unsigned XReg = TRI->getSubReg(Reg, X86::sub_32bit);
+ Register XReg = TRI->getSubReg(Reg, X86::sub_32bit);
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg)
.addReg(XReg, RegState::Undef)
.addReg(XReg, RegState::Undef)
@@ -4538,8 +4640,8 @@ static void updateOperandRegConstraints(MachineFunction &MF,
// We only need to update constraints on virtual register operands.
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TRI.isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
auto *NewRC = MRI.constrainRegClass(
@@ -4698,7 +4800,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
MachineInstr &MI) {
- if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) ||
+ unsigned Ignored;
+ if (!hasUndefRegUpdate(MI.getOpcode(), Ignored, /*ForLoadFold*/true) ||
!MI.getOperand(1).isReg())
return false;
@@ -4788,6 +4891,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
if (I != nullptr) {
unsigned Opcode = I->DstOp;
unsigned MinAlign = (I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
+ MinAlign = MinAlign ? 1 << (MinAlign - 1) : 0;
if (Align < MinAlign)
return nullptr;
bool NarrowToMOV32rm = false;
@@ -4821,8 +4925,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// If this is the special case where we use a MOV32rm to load a 32-bit
// value and zero-extend the top bits. Change the destination register
// to a 32-bit one.
- unsigned DstReg = NewMI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ Register DstReg = NewMI->getOperand(0).getReg();
+ if (Register::isPhysicalRegister(DstReg))
NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
else
NewMI->getOperand(0).setSubReg(X86::sub_32bit);
@@ -5133,6 +5237,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::V_SET0:
case X86::V_SETALLONES:
case X86::AVX512_128_SET0:
+ case X86::FsFLD0F128:
+ case X86::AVX512_FsFLD0F128:
Alignment = 16;
break;
case X86::MMX_SET0:
@@ -5182,7 +5288,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::FsFLD0SD:
case X86::AVX512_FsFLD0SD:
case X86::FsFLD0SS:
- case X86::AVX512_FsFLD0SS: {
+ case X86::AVX512_FsFLD0SS:
+ case X86::FsFLD0F128:
+ case X86::AVX512_FsFLD0F128: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
@@ -5212,6 +5320,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Ty = Type::getFloatTy(MF.getFunction().getContext());
else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction().getContext());
+ else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
+ Ty = Type::getFP128Ty(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
@@ -5293,6 +5403,51 @@ extractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
return StoreMMOs;
}
+static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I,
+ const TargetRegisterClass *RC,
+ const X86Subtarget &STI) {
+ assert(STI.hasAVX512() && "Expected at least AVX512!");
+ unsigned SpillSize = STI.getRegisterInfo()->getSpillSize(*RC);
+ assert((SpillSize == 64 || STI.hasVLX()) &&
+ "Can't broadcast less than 64 bytes without AVX512VL!");
+
+ switch (I->Flags & TB_BCAST_MASK) {
+ default: llvm_unreachable("Unexpected broadcast type!");
+ case TB_BCAST_D:
+ switch (SpillSize) {
+ default: llvm_unreachable("Unknown spill size");
+ case 16: return X86::VPBROADCASTDZ128m;
+ case 32: return X86::VPBROADCASTDZ256m;
+ case 64: return X86::VPBROADCASTDZm;
+ }
+ break;
+ case TB_BCAST_Q:
+ switch (SpillSize) {
+ default: llvm_unreachable("Unknown spill size");
+ case 16: return X86::VPBROADCASTQZ128m;
+ case 32: return X86::VPBROADCASTQZ256m;
+ case 64: return X86::VPBROADCASTQZm;
+ }
+ break;
+ case TB_BCAST_SS:
+ switch (SpillSize) {
+ default: llvm_unreachable("Unknown spill size");
+ case 16: return X86::VBROADCASTSSZ128m;
+ case 32: return X86::VBROADCASTSSZ256m;
+ case 64: return X86::VBROADCASTSSZm;
+ }
+ break;
+ case TB_BCAST_SD:
+ switch (SpillSize) {
+ default: llvm_unreachable("Unknown spill size");
+ case 16: return X86::VMOVDDUPZ128rm;
+ case 32: return X86::VBROADCASTSDZ256m;
+ case 64: return X86::VBROADCASTSDZm;
+ }
+ break;
+ }
+}
+
bool X86InstrInfo::unfoldMemoryOperand(
MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
@@ -5303,6 +5458,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
unsigned Index = I->Flags & TB_INDEX_MASK;
bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
bool FoldedStore = I->Flags & TB_FOLDED_STORE;
+ bool FoldedBCast = I->Flags & TB_FOLDED_BCAST;
if (UnfoldLoad && !FoldedLoad)
return false;
UnfoldLoad &= FoldedLoad;
@@ -5311,7 +5467,9 @@ bool X86InstrInfo::unfoldMemoryOperand(
UnfoldStore &= FoldedStore;
const MCInstrDesc &MCID = get(Opc);
+
const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// TODO: Check if 32-byte or greater accesses are slow too?
if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass &&
Subtarget.isUnalignedMem16Slow())
@@ -5335,10 +5493,26 @@ bool X86InstrInfo::unfoldMemoryOperand(
AfterOps.push_back(Op);
}
- // Emit the load instruction.
+ // Emit the load or broadcast instruction.
if (UnfoldLoad) {
auto MMOs = extractLoadMMOs(MI.memoperands(), MF);
- loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs, NewMIs);
+
+ unsigned Opc;
+ if (FoldedBCast) {
+ Opc = getBroadcastOpcode(I, RC, Subtarget);
+ } else {
+ unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget);
+ }
+
+ DebugLoc DL;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg);
+ for (unsigned i = 0, e = AddrOps.size(); i != e; ++i)
+ MIB.add(AddrOps[i]);
+ MIB.setMemRefs(MMOs);
+ NewMIs.push_back(MIB);
+
if (UnfoldStore) {
// Address operands cannot be marked isKill.
for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
@@ -5404,7 +5578,16 @@ bool X86InstrInfo::unfoldMemoryOperand(
if (UnfoldStore) {
const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
- storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs, NewMIs);
+ unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16);
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget);
+ DebugLoc DL;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = AddrOps.size(); i != e; ++i)
+ MIB.add(AddrOps[i]);
+ MIB.addReg(Reg, RegState::Kill);
+ MIB.setMemRefs(MMOs);
+ NewMIs.push_back(MIB);
}
return true;
@@ -5423,6 +5606,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
unsigned Index = I->Flags & TB_INDEX_MASK;
bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
bool FoldedStore = I->Flags & TB_FOLDED_STORE;
+ bool FoldedBCast = I->Flags & TB_FOLDED_BCAST;
const MCInstrDesc &MCID = get(Opc);
MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
@@ -5456,10 +5640,17 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
- unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
- Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
- VT, MVT::Other, AddrOps);
+
+ unsigned Opc;
+ if (FoldedBCast) {
+ Opc = getBroadcastOpcode(I, RC, Subtarget);
+ } else {
+ unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget);
+ }
+
+ Load = DAG.getMachineNode(Opc, dl, VT, MVT::Other, AddrOps);
NewNodes.push_back(Load);
// Preserve memory reference information.
@@ -7367,6 +7558,96 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
}
}
+Optional<ParamLoadedValue>
+X86InstrInfo::describeLoadedValue(const MachineInstr &MI) const {
+ const MachineOperand *Op = nullptr;
+ DIExpression *Expr = nullptr;
+
+ switch (MI.getOpcode()) {
+ case X86::LEA32r:
+ case X86::LEA64r:
+ case X86::LEA64_32r: {
+ // Operand 4 could be global address. For now we do not support
+ // such situation.
+ if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm())
+ return None;
+
+ const MachineOperand &Op1 = MI.getOperand(1);
+ const MachineOperand &Op2 = MI.getOperand(3);
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ assert(Op2.isReg() && (Op2.getReg() == X86::NoRegister ||
+ Register::isPhysicalRegister(Op2.getReg())));
+
+ // Omit situations like:
+ // %rsi = lea %rsi, 4, ...
+ if ((Op1.isReg() && Op1.getReg() == MI.getOperand(0).getReg()) ||
+ Op2.getReg() == MI.getOperand(0).getReg())
+ return None;
+ else if ((Op1.isReg() && Op1.getReg() != X86::NoRegister &&
+ TRI->regsOverlap(Op1.getReg(), MI.getOperand(0).getReg())) ||
+ (Op2.getReg() != X86::NoRegister &&
+ TRI->regsOverlap(Op2.getReg(), MI.getOperand(0).getReg())))
+ return None;
+
+ int64_t Coef = MI.getOperand(2).getImm();
+ int64_t Offset = MI.getOperand(4).getImm();
+ SmallVector<uint64_t, 8> Ops;
+
+ if ((Op1.isReg() && Op1.getReg() != X86::NoRegister)) {
+ Op = &Op1;
+ } else if (Op1.isFI())
+ Op = &Op1;
+
+ if (Op && Op->isReg() && Op->getReg() == Op2.getReg() && Coef > 0) {
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(Coef + 1);
+ Ops.push_back(dwarf::DW_OP_mul);
+ } else {
+ if (Op && Op2.getReg() != X86::NoRegister) {
+ int dwarfReg = TRI->getDwarfRegNum(Op2.getReg(), false);
+ if (dwarfReg < 0)
+ return None;
+ else if (dwarfReg < 32) {
+ Ops.push_back(dwarf::DW_OP_breg0 + dwarfReg);
+ Ops.push_back(0);
+ } else {
+ Ops.push_back(dwarf::DW_OP_bregx);
+ Ops.push_back(dwarfReg);
+ Ops.push_back(0);
+ }
+ } else if (!Op) {
+ assert(Op2.getReg() != X86::NoRegister);
+ Op = &Op2;
+ }
+
+ if (Coef > 1) {
+ assert(Op2.getReg() != X86::NoRegister);
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(Coef);
+ Ops.push_back(dwarf::DW_OP_mul);
+ }
+
+ if (((Op1.isReg() && Op1.getReg() != X86::NoRegister) || Op1.isFI()) &&
+ Op2.getReg() != X86::NoRegister) {
+ Ops.push_back(dwarf::DW_OP_plus);
+ }
+ }
+
+ DIExpression::appendOffset(Ops, Offset);
+ Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), Ops);
+
+ return ParamLoadedValue(*Op, Expr);;
+ }
+ case X86::XOR32rr: {
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
+ return ParamLoadedValue(MachineOperand::CreateImm(0), Expr);
+ return None;
+ }
+ default:
+ return TargetInstrInfo::describeLoadedValue(MI);
+ }
+}
+
/// This is an architecture-specific helper function of reassociateOps.
/// Set special operand attributes for new instructions after reassociation.
void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
@@ -7500,9 +7781,8 @@ namespace {
// movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
// addq %rcx, %rax
// RAX now holds address of _GLOBAL_OFFSET_TABLE_.
- unsigned PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
- unsigned GOTReg =
- RegInfo.createVirtualRegister(&X86::GR64RegClass);
+ Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
+ Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
.addReg(X86::RIP)
.addImm(0)
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 13ca17139494..22b7b1d4cb19 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -67,6 +67,9 @@ unsigned getSwappedVPCMPImm(unsigned Imm);
/// Get the VPCOM immediate if the opcodes are swapped.
unsigned getSwappedVPCOMImm(unsigned Imm);
+/// Get the VCMP immediate if the opcodes are swapped.
+unsigned getSwappedVCMPImm(unsigned Imm);
+
} // namespace X86
/// isGlobalStubReference - Return true if the specified TargetFlag operand is
@@ -203,7 +206,7 @@ public:
int &FrameIndex) const override;
bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const override;
+ AAResults *AA) const override;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr &Orig,
@@ -218,7 +221,7 @@ public:
/// Reference parameters are set to indicate how caller should add this
/// operand to the LEA instruction.
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
- unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc,
+ unsigned LEAOpcode, bool AllowSP, Register &NewSrc,
bool &isKill, MachineOperand &ImplicitOp,
LiveVariables *LV) const;
@@ -251,7 +254,7 @@ public:
/// findCommutedOpIndices(MI, Op1, Op2);
/// can be interpreted as a query asking to find an operand that would be
/// commutable with the operand#1.
- bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
/// Returns an adjusted FMA opcode that must be used in FMA instruction that
@@ -317,23 +320,11 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- ArrayRef<MachineMemOperand *> MMOs,
- SmallVectorImpl<MachineInstr *> &NewMIs) const;
-
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, unsigned DestReg,
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- ArrayRef<MachineMemOperand *> MMOs,
- SmallVectorImpl<MachineInstr *> &NewMIs) const;
-
bool expandPostRAPseudo(MachineInstr &MI) const override;
/// Check whether the target can fold a load that feeds a subreg operand
@@ -527,6 +518,13 @@ public:
#define GET_INSTRINFO_HELPER_DECLS
#include "X86GenInstrInfo.inc"
+ static bool hasLockPrefix(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & X86II::LOCK;
+ }
+
+ Optional<ParamLoadedValue>
+ describeLoadedValue(const MachineInstr &MI) const override;
+
protected:
/// Commutes the operands in the given instruction by changing the operands
/// order and/or changing the instruction's opcode and/or the immediate value
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 8e05dd8ec5c1..e452145f3b65 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -673,6 +673,14 @@ def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
ImmSExti64i32AsmOperand];
}
+// 4-bit immediate used by some XOP instructions
+// [0, 0xF]
+def ImmUnsignedi4AsmOperand : AsmOperandClass {
+ let Name = "ImmUnsignedi4";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidImmUnsignedi4";
+}
+
// Unsigned immediate used by SSE/AVX instructions
// [0, 0xFF]
// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
@@ -705,6 +713,13 @@ def i64i8imm : Operand<i64> {
let OperandType = "OPERAND_IMMEDIATE";
}
+// Unsigned 4-bit immediate used by some XOP instructions.
+def u4imm : Operand<i8> {
+ let PrintMethod = "printU8Imm";
+ let ParserMatchClass = ImmUnsignedi4AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
// Unsigned 8-bit immediate used by SSE/AVX instructions.
def u8imm : Operand<i8> {
let PrintMethod = "printU8Imm";
@@ -925,7 +940,6 @@ def HasMOVDIR64B : Predicate<"Subtarget->hasMOVDIR64B()">;
def HasPTWRITE : Predicate<"Subtarget->hasPTWRITE()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
-def HasMPX : Predicate<"Subtarget->hasMPX()">;
def HasSHSTK : Predicate<"Subtarget->hasSHSTK()">;
def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
def HasCLWB : Predicate<"Subtarget->hasCLWB()">;
@@ -1103,7 +1117,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
if (ExtType == ISD::NON_EXTLOAD)
return true;
if (ExtType == ISD::EXTLOAD)
- return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return LD->getAlignment() >= 2 && LD->isSimple();
return false;
}]>;
@@ -1113,7 +1127,7 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
if (ExtType == ISD::NON_EXTLOAD)
return true;
if (ExtType == ISD::EXTLOAD)
- return LD->getAlignment() >= 4 && !LD->isVolatile();
+ return LD->getAlignment() >= 4 && LD->isSimple();
return false;
}]>;
@@ -1170,7 +1184,7 @@ def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [
if (LD->getMemoryVT() == MVT::i32)
return true;
- return LD->getAlignment() >= 4 && !LD->isVolatile();
+ return LD->getAlignment() >= 4 && LD->isSimple();
}]>;
@@ -2404,25 +2418,26 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
}
multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
- RegisterClass RC, X86MemOperand x86memop> {
+ RegisterClass RC, X86MemOperand x86memop,
+ X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in {
def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[WriteBLS]>;
+ T8PS, VEX_4V, Sched<[sched]>;
let mayLoad = 1 in
def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[WriteBLS.Folded]>;
+ T8PS, VEX_4V, Sched<[sched.Folded]>;
}
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
- defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem>;
- defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem>, VEX_W;
- defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem>;
- defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem>, VEX_W;
- defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem>;
- defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem>, VEX_W;
+ defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>;
+ defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, VEX_W;
+ defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>;
+ defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS>, VEX_W;
+ defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS>;
+ defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, VEX_W;
}
//===----------------------------------------------------------------------===//
@@ -2683,12 +2698,12 @@ def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst",
multiclass lwpins_intr<RegisterClass RC> {
def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl),
"lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
- [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, imm:$cntl))]>,
+ [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, timm:$cntl))]>,
XOP_4V, XOPA;
let mayLoad = 1 in
def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl),
"lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
- [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), imm:$cntl))]>,
+ [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), timm:$cntl))]>,
XOP_4V, XOPA;
}
@@ -2700,11 +2715,11 @@ let Defs = [EFLAGS] in {
multiclass lwpval_intr<RegisterClass RC, Intrinsic Int> {
def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl),
"lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
- [(Int RC:$src0, GR32:$src1, imm:$cntl)]>, XOP_4V, XOPA;
+ [(Int RC:$src0, GR32:$src1, timm:$cntl)]>, XOP_4V, XOPA;
let mayLoad = 1 in
def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl),
"lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
- [(Int RC:$src0, (loadi32 addr:$src1), imm:$cntl)]>,
+ [(Int RC:$src0, (loadi32 addr:$src1), timm:$cntl)]>,
XOP_4V, XOPA;
}
@@ -3205,13 +3220,13 @@ def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>;
// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
// Likewise for btc/btr/bts.
def : InstAlias<"bt\t{$imm, $mem|$mem, $imm}",
- (BT32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">;
+ (BT32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
def : InstAlias<"btc\t{$imm, $mem|$mem, $imm}",
- (BTC32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">;
+ (BTC32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
def : InstAlias<"btr\t{$imm, $mem|$mem, $imm}",
- (BTR32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">;
+ (BTR32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}",
- (BTS32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">;
+ (BTS32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
// clr aliases.
def : InstAlias<"clr{b}\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 57835b1a256a..cd9a866c91cb 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -30,7 +30,6 @@ def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "", []>;
let Constraints = "$src1 = $dst" in {
// MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
- // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
X86FoldableSchedWrite sched, bit Commutable = 0,
X86MemOperand OType = i64mem> {
@@ -67,7 +66,7 @@ let Constraints = "$src1 = $dst" in {
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId2 VR64:$src1, imm:$src2))]>,
+ [(set VR64:$dst, (IntId2 VR64:$src1, timm:$src2))]>,
Sched<[schedImm]>;
}
}
@@ -114,13 +113,13 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
def rri : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 timm:$src3)))]>,
Sched<[sched]>;
def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
- (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
+ (bitconvert (load_mmx addr:$src2)), (i8 timm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -496,14 +495,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>,
+ (int_x86_sse_pshuf_w VR64:$src1, timm:$src2))]>,
Sched<[SchedWriteShuffle.MMX]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w (load_mmx addr:$src1),
- imm:$src2))]>,
+ timm:$src2))]>,
Sched<[SchedWriteShuffle.MMX.Folded]>;
// -- Conversion Instructions
@@ -535,7 +534,7 @@ def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
- imm:$src2))]>,
+ timm:$src2))]>,
Sched<[WriteVecExtract]>;
let Constraints = "$src1 = $dst" in {
let Predicates = [HasMMX, HasSSE1] in {
@@ -544,7 +543,7 @@ let Predicates = [HasMMX, HasSSE1] in {
(ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
- GR32orGR64:$src2, imm:$src3))]>,
+ GR32orGR64:$src2, timm:$src3))]>,
Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
@@ -553,7 +552,7 @@ let Predicates = [HasMMX, HasSSE1] in {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
- imm:$src3))]>,
+ timm:$src3))]>,
Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
}
@@ -567,6 +566,13 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(int_x86_mmx_pmovmskb VR64:$src))]>,
Sched<[WriteMMXMOVMSK]>;
+// MMX to XMM for vector types
+def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
+ [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+
// Low word of XMM to MMX.
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
@@ -574,9 +580,13 @@ def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
(x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+def : Pat<(x86mmx (MMX_X86movdq2q (v2i64 (simple_load addr:$src)))),
(x86mmx (MMX_MOVQ64rm addr:$src))>;
+def : Pat<(v2i64 (X86vzmovl (scalar_to_vector
+ (i64 (bitconvert (x86mmx VR64:$src)))))),
+ (MMX_MOVQ2DQrr VR64:$src)>;
+
// Misc.
let SchedRW = [SchedWriteShuffle.MMX] in {
let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in
@@ -602,9 +612,6 @@ def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))),
(MMX_CVTTPS2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
- (bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
- (MMX_CVTTPS2PIirr VR128:$src)>;
-def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
(MMX_CVTPD2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td
index f7d931510fe2..44ba071947c2 100644
--- a/lib/Target/X86/X86InstrMPX.td
+++ b/lib/Target/X86/X86InstrMPX.td
@@ -12,16 +12,16 @@
//
//===----------------------------------------------------------------------===//
-// FIXME: Investigate a better scheduler class once MPX is used inside LLVM.
+// FIXME: Investigate a better scheduler class if MPX is ever used inside LLVM.
let SchedRW = [WriteSystem] in {
multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
- Requires<[HasMPX, Not64BitMode]>;
+ Requires<[Not64BitMode]>;
def 64rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
- Requires<[HasMPX, In64BitMode]>;
+ Requires<[In64BitMode]>;
}
defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
@@ -29,17 +29,17 @@ defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[HasMPX, Not64BitMode]>;
+ Requires<[Not64BitMode]>;
def 64rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[HasMPX, In64BitMode]>;
+ Requires<[In64BitMode]>;
def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[HasMPX, Not64BitMode]>;
+ Requires<[Not64BitMode]>;
def 64rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[HasMPX, In64BitMode]>;
+ Requires<[In64BitMode]>;
}
defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS, NotMemoryFoldable;
defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD, NotMemoryFoldable;
@@ -47,33 +47,31 @@ defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD, NotMemoryFoldable;
def BNDMOVrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[HasMPX]>, NotMemoryFoldable;
+ NotMemoryFoldable;
let mayLoad = 1 in {
def BNDMOV32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
+ Requires<[Not64BitMode]>, NotMemoryFoldable;
def BNDMOV64rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
+ Requires<[In64BitMode]>, NotMemoryFoldable;
}
let isCodeGenOnly = 1, ForceDisassemble = 1 in
def BNDMOVrr_REV : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[HasMPX]>, NotMemoryFoldable;
+ NotMemoryFoldable;
let mayStore = 1 in {
def BNDMOV32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
+ Requires<[Not64BitMode]>, NotMemoryFoldable;
def BNDMOV64mr : I<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
+ Requires<[In64BitMode]>, NotMemoryFoldable;
def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins anymem:$dst, BNDR:$src),
- "bndstx\t{$src, $dst|$dst, $src}", []>, PS,
- Requires<[HasMPX]>;
+ "bndstx\t{$src, $dst|$dst, $src}", []>, PS;
}
let mayLoad = 1 in
def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
- "bndldx\t{$src, $dst|$dst, $src}", []>, PS,
- Requires<[HasMPX]>;
+ "bndldx\t{$src, $dst|$dst, $src}", []>, PS;
} // SchedRW
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 7d0a5b87baf4..09a04c0338b4 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -115,7 +115,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
[(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>;
+ [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>;
+ def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>;
}
//===----------------------------------------------------------------------===//
@@ -128,13 +130,18 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, SchedRW = [WriteZero] in {
+ isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
-let Predicates = [NoAVX512] in
+let Predicates = [NoAVX512] in {
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+}
// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI,
@@ -147,6 +154,14 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8i32 immAllZerosV))]>;
}
+let Predicates = [NoAVX512] in {
+def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
+def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
+def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
+def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>;
+def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
+}
+
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
@@ -355,7 +370,7 @@ defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.YMM>,
PS, VEX, VEX_L, VEX_WIG;
-defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
+defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.YMM>,
PD, VEX, VEX_L, VEX_WIG;
}
@@ -661,7 +676,7 @@ let Predicates = [UseSSE1] in {
// This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll
// end up with a movsd or blend instead of shufp.
// No need for aligned load, we're only loading 64-bits.
- def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1,
+ def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1,
(i8 -28)),
(MOVLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)),
@@ -727,7 +742,7 @@ let Predicates = [UseSSE1] in {
// This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll
// end up with a movsd or blend instead of shufp.
// No need for aligned load, we're only loading 64-bits.
- def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))),
+ def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
@@ -761,7 +776,7 @@ let Predicates = [UseSSE2] in {
let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in {
// Use MOVLPD to load into the low bits from a full vector unless we can use
// BLENDPD.
- def : Pat<(X86Movsd VR128:$src1, (v2f64 (nonvolatile_load addr:$src2))),
+ def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
}
@@ -1713,12 +1728,12 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
let isCommutable = 1 in
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
- [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>,
+ [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, timm:$cc))]>,
Sched<[sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
- (ld_frag addr:$src2), imm:$cc))]>,
+ (ld_frag addr:$src2), timm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -1751,13 +1766,13 @@ multiclass sse12_cmp_scalar_int<Operand memop,
def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, u8imm:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
- VR128:$src, imm:$cc))]>,
+ VR128:$src, timm:$cc))]>,
Sched<[sched]>;
let mayLoad = 1 in
def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src, u8imm:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
- mem_cpat:$src, imm:$cc))]>,
+ mem_cpat:$src, timm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -1876,12 +1891,12 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
let isCommutable = 1 in
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
- [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, imm:$cc)))], d>,
+ [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, timm:$cc)))], d>,
Sched<[sched]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst,
- (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
+ (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -1906,7 +1921,7 @@ let Constraints = "$src1 = $dst" in {
SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
}
-def CommutableCMPCC : PatLeaf<(imm), [{
+def CommutableCMPCC : PatLeaf<(timm), [{
uint64_t Imm = N->getZExtValue() & 0x7;
return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07);
}]>;
@@ -1915,47 +1930,47 @@ def CommutableCMPCC : PatLeaf<(imm), [{
let Predicates = [HasAVX] in {
def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1,
CommutableCMPCC:$cc)),
- (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
+ (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>;
def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1,
CommutableCMPCC:$cc)),
- (VCMPPSYrmi VR256:$src1, addr:$src2, imm:$cc)>;
+ (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>;
def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1,
CommutableCMPCC:$cc)),
- (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+ (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>;
def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1,
CommutableCMPCC:$cc)),
- (VCMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
+ (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>;
def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
CommutableCMPCC:$cc)),
- (VCMPSDrm FR64:$src1, addr:$src2, imm:$cc)>;
+ (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>;
def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
CommutableCMPCC:$cc)),
- (VCMPSSrm FR32:$src1, addr:$src2, imm:$cc)>;
+ (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>;
}
let Predicates = [UseSSE2] in {
def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1,
CommutableCMPCC:$cc)),
- (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+ (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>;
def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
CommutableCMPCC:$cc)),
- (CMPSDrm FR64:$src1, addr:$src2, imm:$cc)>;
+ (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>;
}
let Predicates = [UseSSE1] in {
def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1,
CommutableCMPCC:$cc)),
- (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
+ (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>;
def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
CommutableCMPCC:$cc)),
- (CMPSSrm FR32:$src1, addr:$src2, imm:$cc)>;
+ (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>;
}
//===----------------------------------------------------------------------===//
@@ -1970,13 +1985,13 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
- (i8 imm:$src3))))], d>,
+ (i8 timm:$src3))))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = IsCommutable in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 imm:$src3))))], d>,
+ (i8 timm:$src3))))], d>,
Sched<[sched]>;
}
@@ -2097,7 +2112,7 @@ let Predicates = [HasAVX1Only] in {
let Predicates = [UseSSE2] in {
// Use MOVHPD if the load isn't aligned enough for UNPCKLPD.
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
- (v2f64 (nonvolatile_load addr:$src2)))),
+ (v2f64 (simple_load addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
}
@@ -2721,7 +2736,7 @@ defm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64,
defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
-
+
/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
/// represent the associated intrinsic operation. This form is unlike the
@@ -3482,7 +3497,7 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>,
+ [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>,
Sched<[schedImm]>;
}
@@ -3514,7 +3529,7 @@ multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (VT (OpNode RC:$src1, (i8 imm:$src2))))]>,
+ [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>,
Sched<[sched]>;
}
@@ -3597,7 +3612,7 @@ let Predicates = [HasAVX, prd] in {
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>,
+ (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
VEX, Sched<[sched.XMM]>, VEX_WIG;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
@@ -3605,7 +3620,7 @@ let Predicates = [HasAVX, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (load addr:$src1),
- (i8 imm:$src2))))]>, VEX,
+ (i8 timm:$src2))))]>, VEX,
Sched<[sched.XMM.Folded]>, VEX_WIG;
}
@@ -3615,7 +3630,7 @@ let Predicates = [HasAVX2, prd] in {
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))]>,
+ (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>,
VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, u8imm:$src2),
@@ -3623,7 +3638,7 @@ let Predicates = [HasAVX2, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode (load addr:$src1),
- (i8 imm:$src2))))]>, VEX, VEX_L,
+ (i8 timm:$src2))))]>, VEX, VEX_L,
Sched<[sched.YMM.Folded]>, VEX_WIG;
}
@@ -3633,7 +3648,7 @@ let Predicates = [UseSSE2] in {
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>,
+ (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
Sched<[sched.XMM]>;
def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2),
@@ -3641,7 +3656,7 @@ let Predicates = [UseSSE2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (memop addr:$src1),
- (i8 imm:$src2))))]>,
+ (i8 timm:$src2))))]>,
Sched<[sched.XMM.Folded]>;
}
}
@@ -4380,7 +4395,7 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
+ def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
@@ -4388,7 +4403,7 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [UseSSE3] in {
// No need for aligned memory as this only loads 64-bits.
- def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
+ def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))),
(MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(MOVDDUPrm addr:$src)>;
@@ -4812,7 +4827,7 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 imm:$src3))))]>,
+ [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>,
Sched<[sched]>;
let mayLoad = 1 in
def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst),
@@ -4823,7 +4838,7 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (VT (X86PAlignr RC:$src1,
(memop_frag addr:$src2),
- (i8 imm:$src3))))]>,
+ (i8 timm:$src3))))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5300,7 +5315,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86insertps VR128:$src1, VR128:$src2, imm:$src3))]>,
+ (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>,
Sched<[SchedWriteFShuffle.XMM]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, u8imm:$src3),
@@ -5311,7 +5326,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
[(set VR128:$dst,
(X86insertps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
- imm:$src3))]>,
+ timm:$src3))]>,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
@@ -5323,17 +5338,6 @@ let ExeDomain = SSEPackedSingle in {
defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>;
}
-let Predicates = [UseAVX] in {
- // If we're inserting an element from a vbroadcast of a load, fold the
- // load into the X86insertps instruction.
- def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1),
- (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)),
- (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
- def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1),
- (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)),
- (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
-}
-
//===----------------------------------------------------------------------===//
// SSE4.1 - Round Instructions
//===----------------------------------------------------------------------===//
@@ -5348,7 +5352,7 @@ multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))]>,
+ [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>,
Sched<[sched]>;
// Vector intrinsic operation, mem
@@ -5357,13 +5361,13 @@ multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (VT (OpNode (mem_frag addr:$src1),imm:$src2)))]>,
+ (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>,
Sched<[sched.Folded]>;
}
multiclass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd,
string OpcodeStr, X86FoldableSchedWrite sched> {
-let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
+let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in {
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3),
!strconcat(OpcodeStr,
@@ -5378,7 +5382,7 @@ let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
[]>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
-let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
+let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in {
def SDr : SS4AIi8<opcsd, MRMSrcReg,
(outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3),
!strconcat(OpcodeStr,
@@ -5396,7 +5400,7 @@ let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
multiclass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd,
string OpcodeStr, X86FoldableSchedWrite sched> {
-let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
+let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in {
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
@@ -5411,7 +5415,7 @@ let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
[]>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
-let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
+let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in {
def SDr : SS4AIi8<opcsd, MRMSrcReg,
(outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
@@ -5431,7 +5435,7 @@ multiclass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd,
string OpcodeStr, X86FoldableSchedWrite sched,
ValueType VT32, ValueType VT64,
SDNode OpNode, bit Is2Addr = 1> {
-let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
+let ExeDomain = SSEPackedSingle in {
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
!if(Is2Addr,
@@ -5439,7 +5443,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>,
+ [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
Sched<[sched]>;
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
@@ -5450,11 +5454,11 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ (OpNode VR128:$src1, sse_load_f32:$src2, timm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
-let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
+let ExeDomain = SSEPackedDouble in {
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
!if(Is2Addr,
@@ -5462,7 +5466,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>,
+ [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
Sched<[sched]>;
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
@@ -5473,7 +5477,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ (OpNode VR128:$src1, sse_load_f64:$src2, timm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
}
@@ -5508,17 +5512,17 @@ let Predicates = [UseAVX] in {
}
let Predicates = [UseAVX] in {
- def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>;
- def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>;
+ def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>;
+ def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>;
}
let Predicates = [UseAVX, OptForSize] in {
- def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
- (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
- def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
- (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
+ def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
+ (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
+ def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
+ (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
}
let ExeDomain = SSEPackedSingle in
@@ -5535,17 +5539,17 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales>;
let Predicates = [UseSSE41] in {
- def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
- (ROUNDSSr FR32:$src1, imm:$src2)>;
- def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
- (ROUNDSDr FR64:$src1, imm:$src2)>;
+ def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
+ (ROUNDSSr FR32:$src1, timm:$src2)>;
+ def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
+ (ROUNDSDr FR64:$src1, timm:$src2)>;
}
let Predicates = [UseSSE41, OptForSize] in {
- def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
- (ROUNDSSm addr:$src1, imm:$src2)>;
- def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
- (ROUNDSDm addr:$src1, imm:$src2)>;
+ def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
+ (ROUNDSSm addr:$src1, timm:$src2)>;
+ def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
+ (ROUNDSDm addr:$src1, timm:$src2)>;
}
//===----------------------------------------------------------------------===//
@@ -5826,7 +5830,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>,
Sched<[sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
@@ -5836,7 +5840,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
- (IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>,
+ (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -5853,7 +5857,7 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
Sched<[sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
@@ -5863,27 +5867,27 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-def BlendCommuteImm2 : SDNodeXForm<imm, [{
+def BlendCommuteImm2 : SDNodeXForm<timm, [{
uint8_t Imm = N->getZExtValue() & 0x03;
return getI8Imm(Imm ^ 0x03, SDLoc(N));
}]>;
-def BlendCommuteImm4 : SDNodeXForm<imm, [{
+def BlendCommuteImm4 : SDNodeXForm<timm, [{
uint8_t Imm = N->getZExtValue() & 0x0f;
return getI8Imm(Imm ^ 0x0f, SDLoc(N));
}]>;
-def BlendCommuteImm8 : SDNodeXForm<imm, [{
+def BlendCommuteImm8 : SDNodeXForm<timm, [{
uint8_t Imm = N->getZExtValue() & 0xff;
return getI8Imm(Imm ^ 0xff, SDLoc(N));
}]>;
// Turn a 4-bit blendi immediate to 8-bit for use with pblendw.
-def BlendScaleImm4 : SDNodeXForm<imm, [{
+def BlendScaleImm4 : SDNodeXForm<timm, [{
uint8_t Imm = N->getZExtValue();
uint8_t NewImm = 0;
for (unsigned i = 0; i != 4; ++i) {
@@ -5894,7 +5898,7 @@ def BlendScaleImm4 : SDNodeXForm<imm, [{
}]>;
// Turn a 2-bit blendi immediate to 8-bit for use with pblendw.
-def BlendScaleImm2 : SDNodeXForm<imm, [{
+def BlendScaleImm2 : SDNodeXForm<timm, [{
uint8_t Imm = N->getZExtValue();
uint8_t NewImm = 0;
for (unsigned i = 0; i != 2; ++i) {
@@ -5905,7 +5909,7 @@ def BlendScaleImm2 : SDNodeXForm<imm, [{
}]>;
// Turn a 2-bit blendi immediate to 4-bit for use with pblendd.
-def BlendScaleImm2to4 : SDNodeXForm<imm, [{
+def BlendScaleImm2to4 : SDNodeXForm<timm, [{
uint8_t Imm = N->getZExtValue();
uint8_t NewImm = 0;
for (unsigned i = 0; i != 2; ++i) {
@@ -5916,7 +5920,7 @@ def BlendScaleImm2to4 : SDNodeXForm<imm, [{
}]>;
// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it.
-def BlendScaleCommuteImm4 : SDNodeXForm<imm, [{
+def BlendScaleCommuteImm4 : SDNodeXForm<timm, [{
uint8_t Imm = N->getZExtValue();
uint8_t NewImm = 0;
for (unsigned i = 0; i != 4; ++i) {
@@ -5927,7 +5931,7 @@ def BlendScaleCommuteImm4 : SDNodeXForm<imm, [{
}]>;
// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it.
-def BlendScaleCommuteImm2 : SDNodeXForm<imm, [{
+def BlendScaleCommuteImm2 : SDNodeXForm<timm, [{
uint8_t Imm = N->getZExtValue();
uint8_t NewImm = 0;
for (unsigned i = 0; i != 2; ++i) {
@@ -5938,7 +5942,7 @@ def BlendScaleCommuteImm2 : SDNodeXForm<imm, [{
}]>;
// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it.
-def BlendScaleCommuteImm2to4 : SDNodeXForm<imm, [{
+def BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{
uint8_t Imm = N->getZExtValue();
uint8_t NewImm = 0;
for (unsigned i = 0; i != 2; ++i) {
@@ -6008,7 +6012,7 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
Sched<[sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
@@ -6018,14 +6022,14 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Pattern to commute if load is in first source.
- def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)),
+ def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)),
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
- (commuteXForm imm:$src3))>;
+ (commuteXForm timm:$src3))>;
}
let Predicates = [HasAVX] in {
@@ -6061,37 +6065,37 @@ let Predicates = [HasAVX2] in {
// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
// ExecutionDomainFixPass will cleanup domains later on.
let Predicates = [HasAVX1Only] in {
-def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
- (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>;
-def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
- (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>;
-def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
- (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
+def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
+ (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>;
+def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
+ (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>;
+def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
+ (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>;
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
// it from becoming movsd via commuting under optsize.
-def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
- (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
-def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
- (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
-def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
- (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
-
-def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3),
- (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>;
-def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3),
- (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>;
-def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3),
- (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>;
+def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
+ (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>;
+def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>;
+
+def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3),
+ (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>;
+def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3),
+ (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>;
+def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3),
+ (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>;
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
// it from becoming movss via commuting under optsize.
-def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
- (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
-def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3),
- (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
-def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3),
- (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
+def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
+ (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
+def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
}
defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
@@ -6107,19 +6111,19 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
let Predicates = [UseSSE41] in {
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
// it from becoming movss via commuting under optsize.
-def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
- (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
-def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3),
- (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
-def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3),
- (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
+def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
+ (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>;
+def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>;
-def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
- (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
-def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3),
- (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
-def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3),
- (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
+def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
+ (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
+def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
}
// For insertion into the zero index (low half) of a 256-bit vector, it is
@@ -6592,7 +6596,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
"sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
- (i8 imm:$src3)))]>, TA,
+ (i8 timm:$src3)))]>, TA,
Sched<[SchedWriteVecIMul.XMM]>;
def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
@@ -6600,7 +6604,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
[(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1,
(memop addr:$src2),
- (i8 imm:$src3)))]>, TA,
+ (i8 timm:$src3)))]>, TA,
Sched<[SchedWriteVecIMul.XMM.Folded,
SchedWriteVecIMul.XMM.ReadAfterFold]>;
@@ -6718,26 +6722,26 @@ let Predicates = [HasAVX, HasAES] in {
(ins VR128:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
Sched<[WriteAESKeyGen]>, VEX, VEX_WIG;
def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>,
Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, u8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
Sched<[WriteAESKeyGen]>;
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>,
Sched<[WriteAESKeyGen.Folded]>;
//===----------------------------------------------------------------------===//
@@ -6745,7 +6749,7 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
//===----------------------------------------------------------------------===//
// Immediate transform to help with commuting.
-def PCLMULCommuteImm : SDNodeXForm<imm, [{
+def PCLMULCommuteImm : SDNodeXForm<timm, [{
uint8_t Imm = N->getZExtValue();
return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N));
}]>;
@@ -6758,7 +6762,7 @@ let Predicates = [NoAVX, HasPCLMUL] in {
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
- (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>,
+ (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>,
Sched<[WriteCLMul]>;
def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
@@ -6766,14 +6770,14 @@ let Predicates = [NoAVX, HasPCLMUL] in {
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
- imm:$src3))]>,
+ timm:$src3))]>,
Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
- (i8 imm:$src3)),
+ (i8 timm:$src3)),
(PCLMULQDQrm VR128:$src1, addr:$src2,
- (PCLMULCommuteImm imm:$src3))>;
+ (PCLMULCommuteImm timm:$src3))>;
} // Predicates = [NoAVX, HasPCLMUL]
// SSE aliases
@@ -6795,21 +6799,21 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
(ins RC:$src1, RC:$src2, u8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set RC:$dst,
- (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ (IntId RC:$src1, RC:$src2, timm:$src3))]>,
Sched<[WriteCLMul]>;
def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, MemOp:$src2, u8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set RC:$dst,
- (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
+ (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>,
Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
// We can commute a load in the first operand by swapping the sources and
// rotating the immediate.
- def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)),
+ def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)),
(!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2,
- (PCLMULCommuteImm imm:$src3))>;
+ (PCLMULCommuteImm timm:$src3))>;
}
let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
@@ -6853,8 +6857,8 @@ let Constraints = "$src = $dst" in {
def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
(ins VR128:$src, u8imm:$len, u8imm:$idx),
"extrq\t{$idx, $len, $src|$src, $len, $idx}",
- [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len,
- imm:$idx))]>,
+ [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len,
+ timm:$idx))]>,
PD, Sched<[SchedWriteVecALU.XMM]>;
def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
@@ -6867,7 +6871,7 @@ def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
"insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
[(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
- imm:$len, imm:$idx))]>,
+ timm:$len, timm:$idx))]>,
XD, Sched<[SchedWriteVecALU.XMM]>;
def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
@@ -6907,10 +6911,10 @@ def : Pat<(nontemporalstore FR64:$src, addr:$dst),
//
class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType VT,
- PatFrag ld_frag, SchedWrite Sched> :
+ PatFrag bcast_frag, SchedWrite Sched> :
AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
+ [(set RC:$dst, (VT (bcast_frag addr:$src)))]>,
Sched<[Sched]>, VEX;
// AVX2 adds register forms
@@ -6923,15 +6927,15 @@ class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {
def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
- f32mem, v4f32, loadf32,
+ f32mem, v4f32, X86VBroadcastld32,
SchedWriteFShuffle.XMM.Folded>;
def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
- f32mem, v8f32, loadf32,
+ f32mem, v8f32, X86VBroadcastld32,
SchedWriteFShuffle.XMM.Folded>, VEX_L;
}
let ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in
def VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem,
- v4f64, loadf64,
+ v4f64, X86VBroadcastld64,
SchedWriteFShuffle.XMM.Folded>, VEX_L;
let ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in {
@@ -6944,15 +6948,6 @@ let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in
def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
v4f64, v2f64, WriteFShuffle256>, VEX_L;
-let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (VBROADCASTSSrm addr:$src)>;
- def : Pat<(v8f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (VBROADCASTSSYrm addr:$src)>;
- def : Pat<(v4f64 (X86VBroadcast (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (VBROADCASTSDYrm addr:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both
// halves of a 256-bit vector.
@@ -7081,27 +7076,29 @@ let Predicates = [HasAVX1Only] in {
//
multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
Intrinsic IntLd, Intrinsic IntLd256,
- Intrinsic IntSt, Intrinsic IntSt256> {
+ Intrinsic IntSt, Intrinsic IntSt256,
+ X86SchedWriteMaskMove schedX,
+ X86SchedWriteMaskMove schedY> {
def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
- VEX_4V, Sched<[WriteFMaskedLoad]>;
+ VEX_4V, Sched<[schedX.RM]>;
def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>;
+ VEX_4V, VEX_L, Sched<[schedY.RM]>;
def mr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>,
- VEX_4V, Sched<[WriteFMaskedStore]>;
+ VEX_4V, Sched<[schedX.MR]>;
def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
- VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>;
+ VEX_4V, VEX_L, Sched<[schedY.MR]>;
}
let ExeDomain = SSEPackedSingle in
@@ -7109,13 +7106,15 @@ defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
int_x86_avx_maskload_ps,
int_x86_avx_maskload_ps_256,
int_x86_avx_maskstore_ps,
- int_x86_avx_maskstore_ps_256>;
+ int_x86_avx_maskstore_ps_256,
+ WriteFMaskMove32, WriteFMaskMove32Y>;
let ExeDomain = SSEPackedDouble in
defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
int_x86_avx_maskload_pd,
int_x86_avx_maskload_pd_256,
int_x86_avx_maskstore_pd,
- int_x86_avx_maskstore_pd_256>;
+ int_x86_avx_maskstore_pd_256,
+ WriteFMaskMove64, WriteFMaskMove64Y>;
//===----------------------------------------------------------------------===//
// VPERMIL - Permute Single and Double Floating-Point Values
@@ -7143,13 +7142,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
+ [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX,
Sched<[sched]>;
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (f_vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
+ (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX,
Sched<[sched.Folded]>;
}// Predicates = [HasAVX, NoVLX]
}
@@ -7181,38 +7180,38 @@ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
- (i8 imm:$src3))))]>, VEX_4V, VEX_L,
+ (i8 timm:$src3))))]>, VEX_4V, VEX_L,
Sched<[WriteFShuffle256]>;
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2),
- (i8 imm:$src3)))]>, VEX_4V, VEX_L,
+ (i8 timm:$src3)))]>, VEX_4V, VEX_L,
Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
}
// Immediate transform to help with commuting.
-def Perm2XCommuteImm : SDNodeXForm<imm, [{
+def Perm2XCommuteImm : SDNodeXForm<timm, [{
return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
}]>;
let Predicates = [HasAVX] in {
// Pattern with load in other operand.
def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2),
- VR256:$src1, (i8 imm:$imm))),
- (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
+ VR256:$src1, (i8 timm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>;
}
let Predicates = [HasAVX1Only] in {
-def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, timm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
- (loadv4i64 addr:$src2), (i8 imm:$imm))),
- (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+ (loadv4i64 addr:$src2), (i8 timm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, timm:$imm)>;
// Pattern with load in other operand.
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
- VR256:$src1, (i8 imm:$imm))),
- (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
+ VR256:$src1, (i8 timm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>;
}
//===----------------------------------------------------------------------===//
@@ -7257,7 +7256,7 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
(ins RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
+ [(set VR128:$dst, (X86cvtps2ph RC:$src1, timm:$src2))]>,
TAPD, VEX, Sched<[RR]>;
let hasSideEffects = 0, mayStore = 1 in
def mr : Ii8<0x1D, MRMDestMem, (outs),
@@ -7282,15 +7281,15 @@ let Predicates = [HasF16C, NoVLX] in {
(VCVTPH2PSrm addr:$src)>;
def : Pat<(store (f64 (extractelt
- (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
+ (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))),
(iPTR 0))), addr:$dst),
- (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
+ (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
def : Pat<(store (i64 (extractelt
- (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
+ (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))),
(iPTR 0))), addr:$dst),
- (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
- def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst),
- (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
+ (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
+ def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, timm:$src2)), addr:$dst),
+ (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>;
}
// Patterns for matching conversions from float to half-float and vice versa.
@@ -7327,20 +7326,20 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins RC:$src1, RC:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
Sched<[sched]>, VEX_4V;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, (load addr:$src2), imm:$src3)))]>,
+ (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
// Pattern to commute if load is in first source.
- def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)),
+ def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)),
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
- (commuteXForm imm:$src3))>;
+ (commuteXForm timm:$src3))>;
}
let Predicates = [HasAVX2] in {
@@ -7351,19 +7350,19 @@ defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
SchedWriteBlend.YMM, VR256, i256mem,
BlendCommuteImm8>, VEX_L;
-def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
- (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>;
-def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
- (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
-def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
- (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
+def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
+ (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>;
+def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
+ (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
+def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
+ (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
-def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
- (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>;
-def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
- (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>;
-def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
- (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>;
+def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
+ (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
+ (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>;
+def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
+ (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>;
}
// For insertion into the zero index (low half) of a 256-bit vector, it is
@@ -7407,7 +7406,7 @@ def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0
// destination operand
//
multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
- X86MemOperand x86memop, PatFrag ld_frag,
+ X86MemOperand x86memop, PatFrag bcast_frag,
ValueType OpVT128, ValueType OpVT256, Predicate prd> {
let Predicates = [HasAVX2, prd] in {
def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -7418,7 +7417,7 @@ multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
+ (OpVT128 (bcast_frag addr:$src)))]>,
Sched<[SchedWriteShuffle.XMM.Folded]>, VEX;
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -7428,7 +7427,7 @@ multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
- (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
+ (OpVT256 (bcast_frag addr:$src)))]>,
Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L;
// Provide aliases for broadcast from the same register class that
@@ -7439,13 +7438,13 @@ multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
}
}
-defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
+defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8,
v16i8, v32i8, NoVLX_Or_NoBWI>;
-defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
+defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16,
v8i16, v16i16, NoVLX_Or_NoBWI>;
-defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
+defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32,
v4i32, v8i32, NoVLX>;
-defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
+defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64,
v2i64, v4i64, NoVLX>;
let Predicates = [HasAVX2, NoVLX] in {
@@ -7455,14 +7454,11 @@ let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQYrm addr:$src)>;
- def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ // FIXME this is to handle aligned extloads from i8/i16.
+ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
(VPBROADCASTDrm addr:$src)>;
- def : Pat<(v8i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
(VPBROADCASTDYrm addr:$src)>;
- def : Pat<(v2i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (VPBROADCASTQrm addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (VPBROADCASTQYrm addr:$src)>;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
@@ -7483,17 +7479,12 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v16i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWYrm addr:$src)>;
-}
-let Predicates = [HasAVX2, NoVLX] in {
- // Provide aliases for broadcast from the same register class that
- // automatically does the extract.
- def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
- (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src),
- sub_xmm)))>;
- def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256:$src))),
- (VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src),
- sub_xmm)))>;
+ // FIXME this is to handle aligned extloads from i8.
+ def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWYrm addr:$src)>;
}
let Predicates = [HasAVX2, NoVLX] in {
@@ -7509,45 +7500,41 @@ let Predicates = [HasAVX2, NoVLX] in {
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v16i8 (X86VBroadcast GR8:$src)),
- (VPBROADCASTBrr (v16i8 (COPY_TO_REGCLASS
+ (VPBROADCASTBrr (VMOVDI2PDIrr
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR8:$src, sub_8bit)),
- VR128)))>;
+ GR8:$src, sub_8bit))))>;
def : Pat<(v32i8 (X86VBroadcast GR8:$src)),
- (VPBROADCASTBYrr (v16i8 (COPY_TO_REGCLASS
+ (VPBROADCASTBYrr (VMOVDI2PDIrr
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR8:$src, sub_8bit)),
- VR128)))>;
+ GR8:$src, sub_8bit))))>;
def : Pat<(v8i16 (X86VBroadcast GR16:$src)),
- (VPBROADCASTWrr (v8i16 (COPY_TO_REGCLASS
+ (VPBROADCASTWrr (VMOVDI2PDIrr
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR16:$src, sub_16bit)),
- VR128)))>;
+ GR16:$src, sub_16bit))))>;
def : Pat<(v16i16 (X86VBroadcast GR16:$src)),
- (VPBROADCASTWYrr (v8i16 (COPY_TO_REGCLASS
+ (VPBROADCASTWYrr (VMOVDI2PDIrr
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR16:$src, sub_16bit)),
- VR128)))>;
+ GR16:$src, sub_16bit))))>;
}
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VPBROADCASTDrr (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)))>;
+ (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
- (VPBROADCASTDYrr (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)))>;
+ (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>;
def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
- (VPBROADCASTQrr (v2i64 (COPY_TO_REGCLASS GR64:$src, VR128)))>;
+ (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VPBROADCASTQYrr (v2i64 (COPY_TO_REGCLASS GR64:$src, VR128)))>;
+ (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>;
}
// AVX1 broadcast patterns
let Predicates = [HasAVX1Only] in {
-def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+def : Pat<(v8i32 (X86VBroadcastld32 addr:$src)),
(VBROADCASTSSYrm addr:$src)>;
-def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+def : Pat<(v4i64 (X86VBroadcastld64 addr:$src)),
(VBROADCASTSDYrm addr:$src)>;
-def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+def : Pat<(v4i32 (X86VBroadcastld32 addr:$src)),
(VBROADCASTSSrm addr:$src)>;
}
@@ -7557,12 +7544,12 @@ let Predicates = [HasAVX, NoVLX] in {
// 128bit broadcasts:
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
- def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)),
(VMOVDDUPrm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast v2f64:$src)),
(VMOVDDUPrr VR128:$src)>;
- def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
(VMOVDDUPrm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
(VMOVDDUPrm addr:$src)>;
@@ -7581,19 +7568,19 @@ let Predicates = [HasAVX1Only] in {
(v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)>;
+ (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)), sub_xmm),
- (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)), 1)>;
+ (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm),
+ (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
- (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)), sub_xmm),
- (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)), 1)>;
+ (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm),
+ (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>;
def : Pat<(v2i64 (X86VBroadcast i64:$src)),
- (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)>;
- def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>;
+ def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)),
(VMOVDDUPrm addr:$src)>;
}
@@ -7636,7 +7623,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
+ (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>,
Sched<[Sched]>, VEX, VEX_L;
def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
(ins memOp:$src1, u8imm:$src2),
@@ -7644,7 +7631,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermi (mem_frag addr:$src1),
- (i8 imm:$src2))))]>,
+ (i8 timm:$src2))))]>,
Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
}
}
@@ -7663,19 +7650,19 @@ def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
- (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>,
+ (i8 timm:$src3))))]>, Sched<[WriteShuffle256]>,
VEX_4V, VEX_L;
def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
- (i8 imm:$src3)))]>,
+ (i8 timm:$src3)))]>,
Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
let Predicates = [HasAVX2] in
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
- VR256:$src1, (i8 imm:$imm))),
- (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
+ VR256:$src1, (i8 timm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>;
//===----------------------------------------------------------------------===//
@@ -7760,7 +7747,7 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskstore_q_256>, VEX_W;
multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
- ValueType MaskVT, string BlendStr, ValueType ZeroVT> {
+ ValueType MaskVT> {
// masked store
def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
(!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
@@ -7772,23 +7759,23 @@ multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
(!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
}
let Predicates = [HasAVX] in {
- defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>;
- defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64, "VBLENDVPD", v4i32>;
- defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8i32>;
- defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8i32>;
+ defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>;
+ defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>;
+ defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>;
+ defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>;
}
let Predicates = [HasAVX1Only] in {
// load/store i32/i64 not supported use ps/pd version
- defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>;
- defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>;
- defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
- defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
+ defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>;
+ defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>;
+ defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>;
+ defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>;
}
let Predicates = [HasAVX2] in {
- defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>;
- defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>;
- defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
- defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
+ defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>;
+ defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>;
+ defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>;
+ defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>;
}
//===----------------------------------------------------------------------===//
@@ -7956,13 +7943,13 @@ multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), "",
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))],
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))],
SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>;
def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(MemOpFrag addr:$src2),
- imm:$src3)))], SSEPackedInt>,
+ timm:$src3)))], SSEPackedInt>,
Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
}
}
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 7050e1917494..7f41feb6c0d9 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -43,7 +43,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>;
let SchedRW = [WriteSystem] in {
def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
- [(int_x86_int imm:$trap)]>;
+ [(int_x86_int timm:$trap)]>;
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index fc0da845299f..3a1212342a13 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -45,7 +45,7 @@ def XTEST : I<0x01, MRM_D6, (outs), (ins),
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
- [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
+ [(int_x86_xabort timm:$imm)]>, Requires<[HasRTM]>;
} // SchedRW
// HLE prefixes
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index 66ca78556b82..229af366d940 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -143,13 +143,13 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>,
+ (vt128 (OpNode (vt128 VR128:$src1), timm:$src2)))]>,
XOP, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (vt128 (load addr:$src1)), imm:$src2)))]>,
+ (vt128 (OpNode (vt128 (load addr:$src1)), timm:$src2)))]>,
XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -251,7 +251,7 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- imm:$cc)))]>,
+ timm:$cc)))]>,
XOP_4V, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$cc),
@@ -260,14 +260,14 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (load addr:$src2)),
- imm:$cc)))]>,
+ timm:$cc)))]>,
XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def : Pat<(OpNode (load addr:$src2),
- (vt128 VR128:$src1), imm:$cc),
+ (vt128 VR128:$src1), timm:$cc),
(!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
- (CommuteVPCOMCC imm:$cc))>;
+ (CommuteVPCOMCC timm:$cc))>;
}
defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8, SchedWriteVecALU.XMM>;
@@ -418,27 +418,27 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag,
X86FoldableSchedWrite sched> {
def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
+ (ins RC:$src1, RC:$src2, RC:$src3, u4imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
- (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
+ (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 timm:$src4))))]>,
Sched<[sched]>;
def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
+ (ins RC:$src1, RC:$src2, intmemop:$src3, u4imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3),
- (i8 imm:$src4))))]>, VEX_W,
+ (i8 timm:$src4))))]>, VEX_W,
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
+ (ins RC:$src1, fpmemop:$src2, RC:$src3, u4imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
- RC:$src3, (i8 imm:$src4))))]>,
+ RC:$src3, (i8 timm:$src4))))]>,
Sched<[sched.Folded, sched.ReadAfterFold,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
@@ -447,7 +447,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
+ (ins RC:$src1, RC:$src2, RC:$src3, u4imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[]>, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index 892a083f4d1a..01620b7b64c9 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -60,7 +60,7 @@ public:
X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI,
const X86RegisterBankInfo &RBI);
- bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
+ bool select(MachineInstr &I) override;
static const char *getName() { return DEBUG_TYPE; }
private:
@@ -94,11 +94,9 @@ private:
MachineFunction &MF) const;
bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
- MachineFunction &MF,
- CodeGenCoverage &CoverageInfo) const;
+ MachineFunction &MF);
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
- MachineFunction &MF,
- CodeGenCoverage &CoverageInfo) const;
+ MachineFunction &MF);
bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI,
@@ -217,7 +215,7 @@ static unsigned getSubRegIndex(const TargetRegisterClass *RC) {
}
static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Register::isPhysicalRegister(Reg));
if (X86::GR64RegClass.contains(Reg))
return &X86::GR64RegClass;
if (X86::GR32RegClass.contains(Reg))
@@ -233,15 +231,15 @@ static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) {
// Set X86 Opcode and constrain DestReg.
bool X86InstructionSelector::selectCopy(MachineInstr &I,
MachineRegisterInfo &MRI) const {
- unsigned DstReg = I.getOperand(0).getReg();
+ Register DstReg = I.getOperand(0).getReg();
const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
- unsigned SrcReg = I.getOperand(1).getReg();
+ Register SrcReg = I.getOperand(1).getReg();
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ if (Register::isPhysicalRegister(DstReg)) {
assert(I.isCopy() && "Generic operators do not allow physical registers");
if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID &&
@@ -253,7 +251,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
if (SrcRC != DstRC) {
// This case can be generated by ABI lowering, performe anyext
- unsigned ExtSrc = MRI.createVirtualRegister(DstRC);
+ Register ExtSrc = MRI.createVirtualRegister(DstRC);
BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::SUBREG_TO_REG))
.addDef(ExtSrc)
@@ -268,12 +266,12 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
return true;
}
- assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
+ assert((!Register::isPhysicalRegister(SrcReg) || I.isCopy()) &&
"No phys reg on generic operators");
assert((DstSize == SrcSize ||
// Copies are a mean to setup initial types, the number of
// bits may not exactly match.
- (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ (Register::isPhysicalRegister(SrcReg) &&
DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) &&
"Copy with different width?!");
@@ -282,7 +280,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
if (SrcRegBank.getID() == X86::GPRRegBankID &&
DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize &&
- TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ Register::isPhysicalRegister(SrcReg)) {
// Change the physical register to performe truncate.
const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg);
@@ -308,8 +306,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
return true;
}
-bool X86InstructionSelector::select(MachineInstr &I,
- CodeGenCoverage &CoverageInfo) const {
+bool X86InstructionSelector::select(MachineInstr &I) {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -333,7 +330,7 @@ bool X86InstructionSelector::select(MachineInstr &I,
assert(I.getNumOperands() == I.getNumExplicitOperands() &&
"Generic instruction has unexpected implicit operands\n");
- if (selectImpl(I, CoverageInfo))
+ if (selectImpl(I, *CoverageInfo))
return true;
LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs()));
@@ -370,10 +367,10 @@ bool X86InstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_UADDE:
return selectUadde(I, MRI, MF);
case TargetOpcode::G_UNMERGE_VALUES:
- return selectUnmergeValues(I, MRI, MF, CoverageInfo);
+ return selectUnmergeValues(I, MRI, MF);
case TargetOpcode::G_MERGE_VALUES:
case TargetOpcode::G_CONCAT_VECTORS:
- return selectMergeValues(I, MRI, MF, CoverageInfo);
+ return selectMergeValues(I, MRI, MF);
case TargetOpcode::G_EXTRACT:
return selectExtract(I, MRI, MF);
case TargetOpcode::G_INSERT:
@@ -512,7 +509,7 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) &&
"unexpected instruction");
- const unsigned DefReg = I.getOperand(0).getReg();
+ const Register DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
@@ -572,7 +569,7 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_GEP) &&
"unexpected instruction");
- const unsigned DefReg = I.getOperand(0).getReg();
+ const Register DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
// Use LEA to calculate frame index and GEP
@@ -625,7 +622,7 @@ bool X86InstructionSelector::selectGlobalValue(MachineInstr &I,
AM.Base.Reg = X86::RIP;
}
- const unsigned DefReg = I.getOperand(0).getReg();
+ const Register DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
unsigned NewOpc = getLeaOP(Ty, STI);
@@ -644,7 +641,7 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I,
assert((I.getOpcode() == TargetOpcode::G_CONSTANT) &&
"unexpected instruction");
- const unsigned DefReg = I.getOperand(0).getReg();
+ const Register DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID)
@@ -717,8 +714,8 @@ bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I,
I.getOpcode() == TargetOpcode::G_PTRTOINT) &&
"unexpected instruction");
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned SrcReg = I.getOperand(1).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register SrcReg = I.getOperand(1).getReg();
const LLT DstTy = MRI.getType(DstReg);
const LLT SrcTy = MRI.getType(SrcReg);
@@ -781,8 +778,8 @@ bool X86InstructionSelector::selectZext(MachineInstr &I,
MachineFunction &MF) const {
assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction");
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned SrcReg = I.getOperand(1).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register SrcReg = I.getOperand(1).getReg();
const LLT DstTy = MRI.getType(DstReg);
const LLT SrcTy = MRI.getType(SrcReg);
@@ -892,8 +889,8 @@ bool X86InstructionSelector::selectAnyext(MachineInstr &I,
MachineFunction &MF) const {
assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction");
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned SrcReg = I.getOperand(1).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register SrcReg = I.getOperand(1).getReg();
const LLT DstTy = MRI.getType(DstReg);
const LLT SrcTy = MRI.getType(SrcReg);
@@ -952,8 +949,8 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I,
std::tie(CC, SwapArgs) = X86::getX86ConditionCode(
(CmpInst::Predicate)I.getOperand(1).getPredicate());
- unsigned LHS = I.getOperand(2).getReg();
- unsigned RHS = I.getOperand(3).getReg();
+ Register LHS = I.getOperand(2).getReg();
+ Register RHS = I.getOperand(3).getReg();
if (SwapArgs)
std::swap(LHS, RHS);
@@ -998,8 +995,8 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
MachineFunction &MF) const {
assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction");
- unsigned LhsReg = I.getOperand(2).getReg();
- unsigned RhsReg = I.getOperand(3).getReg();
+ Register LhsReg = I.getOperand(2).getReg();
+ Register RhsReg = I.getOperand(3).getReg();
CmpInst::Predicate Predicate =
(CmpInst::Predicate)I.getOperand(1).getPredicate();
@@ -1033,7 +1030,7 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
break;
}
- unsigned ResultReg = I.getOperand(0).getReg();
+ Register ResultReg = I.getOperand(0).getReg();
RBI.constrainGenericRegister(
ResultReg,
*getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI);
@@ -1043,8 +1040,8 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
.addReg(LhsReg)
.addReg(RhsReg);
- unsigned FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
- unsigned FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
+ Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
+ Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]);
MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
@@ -1089,11 +1086,11 @@ bool X86InstructionSelector::selectUadde(MachineInstr &I,
MachineFunction &MF) const {
assert((I.getOpcode() == TargetOpcode::G_UADDE) && "unexpected instruction");
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned CarryOutReg = I.getOperand(1).getReg();
- const unsigned Op0Reg = I.getOperand(2).getReg();
- const unsigned Op1Reg = I.getOperand(3).getReg();
- unsigned CarryInReg = I.getOperand(4).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register CarryOutReg = I.getOperand(1).getReg();
+ const Register Op0Reg = I.getOperand(2).getReg();
+ const Register Op1Reg = I.getOperand(3).getReg();
+ Register CarryInReg = I.getOperand(4).getReg();
const LLT DstTy = MRI.getType(DstReg);
@@ -1149,8 +1146,8 @@ bool X86InstructionSelector::selectExtract(MachineInstr &I,
assert((I.getOpcode() == TargetOpcode::G_EXTRACT) &&
"unexpected instruction");
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned SrcReg = I.getOperand(1).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register SrcReg = I.getOperand(1).getReg();
int64_t Index = I.getOperand(2).getImm();
const LLT DstTy = MRI.getType(DstReg);
@@ -1281,9 +1278,9 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I,
MachineFunction &MF) const {
assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction");
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned SrcReg = I.getOperand(1).getReg();
- const unsigned InsertReg = I.getOperand(2).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register SrcReg = I.getOperand(1).getReg();
+ const Register InsertReg = I.getOperand(2).getReg();
int64_t Index = I.getOperand(3).getImm();
const LLT DstTy = MRI.getType(DstReg);
@@ -1335,14 +1332,13 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I,
}
bool X86InstructionSelector::selectUnmergeValues(
- MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF,
- CodeGenCoverage &CoverageInfo) const {
+ MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) &&
"unexpected instruction");
// Split to extracts.
unsigned NumDefs = I.getNumOperands() - 1;
- unsigned SrcReg = I.getOperand(NumDefs).getReg();
+ Register SrcReg = I.getOperand(NumDefs).getReg();
unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
@@ -1352,7 +1348,7 @@ bool X86InstructionSelector::selectUnmergeValues(
.addReg(SrcReg)
.addImm(Idx * DefSize);
- if (!select(ExtrInst, CoverageInfo))
+ if (!select(ExtrInst))
return false;
}
@@ -1361,15 +1357,14 @@ bool X86InstructionSelector::selectUnmergeValues(
}
bool X86InstructionSelector::selectMergeValues(
- MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF,
- CodeGenCoverage &CoverageInfo) const {
+ MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES ||
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) &&
"unexpected instruction");
// Split to inserts.
- unsigned DstReg = I.getOperand(0).getReg();
- unsigned SrcReg0 = I.getOperand(1).getReg();
+ Register DstReg = I.getOperand(0).getReg();
+ Register SrcReg0 = I.getOperand(1).getReg();
const LLT DstTy = MRI.getType(DstReg);
const LLT SrcTy = MRI.getType(SrcReg0);
@@ -1378,13 +1373,13 @@ bool X86InstructionSelector::selectMergeValues(
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
// For the first src use insertSubReg.
- unsigned DefReg = MRI.createGenericVirtualRegister(DstTy);
+ Register DefReg = MRI.createGenericVirtualRegister(DstTy);
MRI.setRegBank(DefReg, RegBank);
if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF))
return false;
for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) {
- unsigned Tmp = MRI.createGenericVirtualRegister(DstTy);
+ Register Tmp = MRI.createGenericVirtualRegister(DstTy);
MRI.setRegBank(Tmp, RegBank);
MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
@@ -1395,7 +1390,7 @@ bool X86InstructionSelector::selectMergeValues(
DefReg = Tmp;
- if (!select(InsertInst, CoverageInfo))
+ if (!select(InsertInst))
return false;
}
@@ -1403,7 +1398,7 @@ bool X86InstructionSelector::selectMergeValues(
TII.get(TargetOpcode::COPY), DstReg)
.addReg(DefReg);
- if (!select(CopyInst, CoverageInfo))
+ if (!select(CopyInst))
return false;
I.eraseFromParent();
@@ -1415,7 +1410,7 @@ bool X86InstructionSelector::selectCondBranch(MachineInstr &I,
MachineFunction &MF) const {
assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction");
- const unsigned CondReg = I.getOperand(0).getReg();
+ const Register CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
MachineInstr &TestInst =
@@ -1442,7 +1437,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
if (CM != CodeModel::Small && CM != CodeModel::Large)
return false;
- const unsigned DstReg = I.getOperand(0).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
unsigned Align = DstTy.getSizeInBits();
@@ -1460,7 +1455,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
// Under X86-64 non-small code model, GV (and friends) are 64-bits, so
// they cannot be folded into immediate fields.
- unsigned AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass);
+ Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass);
BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg)
.addConstantPoolIndex(CPI, 0, OpFlag);
@@ -1503,7 +1498,7 @@ bool X86InstructionSelector::selectImplicitDefOrPHI(
I.getOpcode() == TargetOpcode::G_PHI) &&
"unexpected instruction");
- unsigned DstReg = I.getOperand(0).getReg();
+ Register DstReg = I.getOperand(0).getReg();
if (!MRI.getRegClassOrNull(DstReg)) {
const LLT DstTy = MRI.getType(DstReg);
@@ -1537,7 +1532,7 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
I.getOpcode() == TargetOpcode::G_LSHR) &&
"unexpected instruction");
- unsigned DstReg = I.getOperand(0).getReg();
+ Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
@@ -1578,8 +1573,8 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
return false;
}
- unsigned Op0Reg = I.getOperand(1).getReg();
- unsigned Op1Reg = I.getOperand(2).getReg();
+ Register Op0Reg = I.getOperand(1).getReg();
+ Register Op1Reg = I.getOperand(2).getReg();
assert(MRI.getType(Op1Reg).getSizeInBits() == 8);
@@ -1606,9 +1601,9 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
I.getOpcode() == TargetOpcode::G_UREM) &&
"unexpected instruction");
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned Op1Reg = I.getOperand(1).getReg();
- const unsigned Op2Reg = I.getOperand(2).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register Op1Reg = I.getOperand(1).getReg();
+ const Register Op2Reg = I.getOperand(2).getReg();
const LLT RegTy = MRI.getType(DstReg);
assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
@@ -1732,7 +1727,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(OpEntry.OpSignExtend));
else {
- unsigned Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
+ Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0),
Zero32);
@@ -1770,8 +1765,8 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
if ((I.getOpcode() == Instruction::SRem ||
I.getOpcode() == Instruction::URem) &&
OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) {
- unsigned SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
- unsigned ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
+ Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
+ Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
.addReg(X86::AX);
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 40141d894629..1d7adbaa9e99 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -23,7 +23,7 @@ enum IntrinsicType : uint16_t {
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
INTR_TYPE_3OP_IMM8,
- CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV,
+ CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV, BEXTRI,
CVTPD2PS_MASK,
INTR_TYPE_1OP_SAE, INTR_TYPE_2OP_SAE,
INTR_TYPE_1OP_MASK_SAE, INTR_TYPE_2OP_MASK_SAE, INTR_TYPE_3OP_MASK_SAE,
@@ -1101,8 +1101,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(subborrow_32, ADX, X86ISD::SBB, X86ISD::SUB),
X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB),
- X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
- X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
+ X86_INTRINSIC_DATA(tbm_bextri_u32, BEXTRI, X86ISD::BEXTR, 0),
+ X86_INTRINSIC_DATA(tbm_bextri_u64, BEXTRI, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index 00fb1b573858..04121f863c89 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -13,6 +13,7 @@
#include "X86LegalizerInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
@@ -84,6 +85,24 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
verify(*STI.getInstrInfo());
}
+bool X86LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ switch (MI.getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ if (createMemLibcall(MIRBuilder, MRI, MI) ==
+ LegalizerHelper::UnableToLegalize)
+ return false;
+ MI.eraseFromParent();
+ return true;
+ default:
+ break;
+ }
+ return true;
+}
+
void X86LegalizerInfo::setLegalizerInfo32bit() {
const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0));
@@ -158,6 +177,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
setAction({G_ANYEXT, Ty}, Legal);
}
setAction({G_ANYEXT, s128}, Legal);
+ getActionDefinitionsBuilder(G_SEXT_INREG).lower();
// Comparison
setAction({G_ICMP, s1}, Legal);
diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h
index d21707b9ab9b..7a0f13fb5ae6 100644
--- a/lib/Target/X86/X86LegalizerInfo.h
+++ b/lib/Target/X86/X86LegalizerInfo.h
@@ -32,6 +32,9 @@ private:
public:
X86LegalizerInfo(const X86Subtarget &STI, const X86TargetMachine &TM);
+ bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const override;
+
private:
void setLegalizerInfo32bit();
void setLegalizerInfo64bit();
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index b1fefaa84be4..78098fd6262f 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -427,6 +427,41 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
}
}
+// Replace TAILJMP opcodes with their equivalent opcodes that have encoding
+// information.
+static unsigned convertTailJumpOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::TAILJMPr:
+ Opcode = X86::JMP32r;
+ break;
+ case X86::TAILJMPm:
+ Opcode = X86::JMP32m;
+ break;
+ case X86::TAILJMPr64:
+ Opcode = X86::JMP64r;
+ break;
+ case X86::TAILJMPm64:
+ Opcode = X86::JMP64m;
+ break;
+ case X86::TAILJMPr64_REX:
+ Opcode = X86::JMP64r_REX;
+ break;
+ case X86::TAILJMPm64_REX:
+ Opcode = X86::JMP64m_REX;
+ break;
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64:
+ Opcode = X86::JMP_1;
+ break;
+ case X86::TAILJMPd_CC:
+ case X86::TAILJMPd64_CC:
+ Opcode = X86::JCC_1;
+ break;
+ }
+
+ return Opcode;
+}
+
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
@@ -500,21 +535,190 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
}
- // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
- // inputs modeled as normal uses instead of implicit uses. As such, truncate
- // off all but the first operand (the callee). FIXME: Change isel.
- case X86::TAILJMPr64:
- case X86::TAILJMPr64_REX:
- case X86::CALL64r:
- case X86::CALL64pcrel32: {
- unsigned Opcode = OutMI.getOpcode();
- MCOperand Saved = OutMI.getOperand(0);
- OutMI = MCInst();
- OutMI.setOpcode(Opcode);
- OutMI.addOperand(Saved);
+ case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rmik:
+ case X86::VPCMPBZ128rri: case X86::VPCMPBZ128rrik:
+ case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rmik:
+ case X86::VPCMPBZ256rri: case X86::VPCMPBZ256rrik:
+ case X86::VPCMPBZrmi: case X86::VPCMPBZrmik:
+ case X86::VPCMPBZrri: case X86::VPCMPBZrrik:
+ case X86::VPCMPDZ128rmi: case X86::VPCMPDZ128rmik:
+ case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
+ case X86::VPCMPDZ128rri: case X86::VPCMPDZ128rrik:
+ case X86::VPCMPDZ256rmi: case X86::VPCMPDZ256rmik:
+ case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
+ case X86::VPCMPDZ256rri: case X86::VPCMPDZ256rrik:
+ case X86::VPCMPDZrmi: case X86::VPCMPDZrmik:
+ case X86::VPCMPDZrmib: case X86::VPCMPDZrmibk:
+ case X86::VPCMPDZrri: case X86::VPCMPDZrrik:
+ case X86::VPCMPQZ128rmi: case X86::VPCMPQZ128rmik:
+ case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
+ case X86::VPCMPQZ128rri: case X86::VPCMPQZ128rrik:
+ case X86::VPCMPQZ256rmi: case X86::VPCMPQZ256rmik:
+ case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
+ case X86::VPCMPQZ256rri: case X86::VPCMPQZ256rrik:
+ case X86::VPCMPQZrmi: case X86::VPCMPQZrmik:
+ case X86::VPCMPQZrmib: case X86::VPCMPQZrmibk:
+ case X86::VPCMPQZrri: case X86::VPCMPQZrrik:
+ case X86::VPCMPWZ128rmi: case X86::VPCMPWZ128rmik:
+ case X86::VPCMPWZ128rri: case X86::VPCMPWZ128rrik:
+ case X86::VPCMPWZ256rmi: case X86::VPCMPWZ256rmik:
+ case X86::VPCMPWZ256rri: case X86::VPCMPWZ256rrik:
+ case X86::VPCMPWZrmi: case X86::VPCMPWZrmik:
+ case X86::VPCMPWZrri: case X86::VPCMPWZrrik: {
+ // Turn immediate 0 into the VPCMPEQ instruction.
+ if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPEQBZ128rm; break;
+ case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPEQBZ128rmk; break;
+ case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPEQBZ128rr; break;
+ case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPEQBZ128rrk; break;
+ case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPEQBZ256rm; break;
+ case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPEQBZ256rmk; break;
+ case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPEQBZ256rr; break;
+ case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPEQBZ256rrk; break;
+ case X86::VPCMPBZrmi: NewOpc = X86::VPCMPEQBZrm; break;
+ case X86::VPCMPBZrmik: NewOpc = X86::VPCMPEQBZrmk; break;
+ case X86::VPCMPBZrri: NewOpc = X86::VPCMPEQBZrr; break;
+ case X86::VPCMPBZrrik: NewOpc = X86::VPCMPEQBZrrk; break;
+ case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPEQDZ128rm; break;
+ case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPEQDZ128rmb; break;
+ case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
+ case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPEQDZ128rmk; break;
+ case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPEQDZ128rr; break;
+ case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPEQDZ128rrk; break;
+ case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPEQDZ256rm; break;
+ case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPEQDZ256rmb; break;
+ case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
+ case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPEQDZ256rmk; break;
+ case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPEQDZ256rr; break;
+ case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPEQDZ256rrk; break;
+ case X86::VPCMPDZrmi: NewOpc = X86::VPCMPEQDZrm; break;
+ case X86::VPCMPDZrmib: NewOpc = X86::VPCMPEQDZrmb; break;
+ case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPEQDZrmbk; break;
+ case X86::VPCMPDZrmik: NewOpc = X86::VPCMPEQDZrmk; break;
+ case X86::VPCMPDZrri: NewOpc = X86::VPCMPEQDZrr; break;
+ case X86::VPCMPDZrrik: NewOpc = X86::VPCMPEQDZrrk; break;
+ case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPEQQZ128rm; break;
+ case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPEQQZ128rmb; break;
+ case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
+ case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPEQQZ128rmk; break;
+ case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPEQQZ128rr; break;
+ case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPEQQZ128rrk; break;
+ case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPEQQZ256rm; break;
+ case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPEQQZ256rmb; break;
+ case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
+ case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPEQQZ256rmk; break;
+ case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPEQQZ256rr; break;
+ case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPEQQZ256rrk; break;
+ case X86::VPCMPQZrmi: NewOpc = X86::VPCMPEQQZrm; break;
+ case X86::VPCMPQZrmib: NewOpc = X86::VPCMPEQQZrmb; break;
+ case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPEQQZrmbk; break;
+ case X86::VPCMPQZrmik: NewOpc = X86::VPCMPEQQZrmk; break;
+ case X86::VPCMPQZrri: NewOpc = X86::VPCMPEQQZrr; break;
+ case X86::VPCMPQZrrik: NewOpc = X86::VPCMPEQQZrrk; break;
+ case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPEQWZ128rm; break;
+ case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPEQWZ128rmk; break;
+ case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPEQWZ128rr; break;
+ case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPEQWZ128rrk; break;
+ case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPEQWZ256rm; break;
+ case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPEQWZ256rmk; break;
+ case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPEQWZ256rr; break;
+ case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPEQWZ256rrk; break;
+ case X86::VPCMPWZrmi: NewOpc = X86::VPCMPEQWZrm; break;
+ case X86::VPCMPWZrmik: NewOpc = X86::VPCMPEQWZrmk; break;
+ case X86::VPCMPWZrri: NewOpc = X86::VPCMPEQWZrr; break;
+ case X86::VPCMPWZrrik: NewOpc = X86::VPCMPEQWZrrk; break;
+ }
+
+ OutMI.setOpcode(NewOpc);
+ OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
+ break;
+ }
+
+ // Turn immediate 6 into the VPCMPGT instruction.
+ if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPGTBZ128rm; break;
+ case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPGTBZ128rmk; break;
+ case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPGTBZ128rr; break;
+ case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPGTBZ128rrk; break;
+ case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPGTBZ256rm; break;
+ case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPGTBZ256rmk; break;
+ case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPGTBZ256rr; break;
+ case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPGTBZ256rrk; break;
+ case X86::VPCMPBZrmi: NewOpc = X86::VPCMPGTBZrm; break;
+ case X86::VPCMPBZrmik: NewOpc = X86::VPCMPGTBZrmk; break;
+ case X86::VPCMPBZrri: NewOpc = X86::VPCMPGTBZrr; break;
+ case X86::VPCMPBZrrik: NewOpc = X86::VPCMPGTBZrrk; break;
+ case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPGTDZ128rm; break;
+ case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPGTDZ128rmb; break;
+ case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
+ case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPGTDZ128rmk; break;
+ case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPGTDZ128rr; break;
+ case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPGTDZ128rrk; break;
+ case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPGTDZ256rm; break;
+ case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPGTDZ256rmb; break;
+ case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
+ case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPGTDZ256rmk; break;
+ case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPGTDZ256rr; break;
+ case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPGTDZ256rrk; break;
+ case X86::VPCMPDZrmi: NewOpc = X86::VPCMPGTDZrm; break;
+ case X86::VPCMPDZrmib: NewOpc = X86::VPCMPGTDZrmb; break;
+ case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPGTDZrmbk; break;
+ case X86::VPCMPDZrmik: NewOpc = X86::VPCMPGTDZrmk; break;
+ case X86::VPCMPDZrri: NewOpc = X86::VPCMPGTDZrr; break;
+ case X86::VPCMPDZrrik: NewOpc = X86::VPCMPGTDZrrk; break;
+ case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPGTQZ128rm; break;
+ case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPGTQZ128rmb; break;
+ case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
+ case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPGTQZ128rmk; break;
+ case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPGTQZ128rr; break;
+ case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPGTQZ128rrk; break;
+ case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPGTQZ256rm; break;
+ case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPGTQZ256rmb; break;
+ case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
+ case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPGTQZ256rmk; break;
+ case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPGTQZ256rr; break;
+ case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPGTQZ256rrk; break;
+ case X86::VPCMPQZrmi: NewOpc = X86::VPCMPGTQZrm; break;
+ case X86::VPCMPQZrmib: NewOpc = X86::VPCMPGTQZrmb; break;
+ case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPGTQZrmbk; break;
+ case X86::VPCMPQZrmik: NewOpc = X86::VPCMPGTQZrmk; break;
+ case X86::VPCMPQZrri: NewOpc = X86::VPCMPGTQZrr; break;
+ case X86::VPCMPQZrrik: NewOpc = X86::VPCMPGTQZrrk; break;
+ case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPGTWZ128rm; break;
+ case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPGTWZ128rmk; break;
+ case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPGTWZ128rr; break;
+ case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPGTWZ128rrk; break;
+ case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPGTWZ256rm; break;
+ case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPGTWZ256rmk; break;
+ case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPGTWZ256rr; break;
+ case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPGTWZ256rrk; break;
+ case X86::VPCMPWZrmi: NewOpc = X86::VPCMPGTWZrm; break;
+ case X86::VPCMPWZrmik: NewOpc = X86::VPCMPGTWZrmk; break;
+ case X86::VPCMPWZrri: NewOpc = X86::VPCMPGTWZrr; break;
+ case X86::VPCMPWZrrik: NewOpc = X86::VPCMPGTWZrrk; break;
+ }
+
+ OutMI.setOpcode(NewOpc);
+ OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
+ break;
+ }
+
break;
}
+ // CALL64r, CALL64pcrel32 - These instructions used to have
+ // register inputs modeled as normal uses instead of implicit uses. As such,
+ // they we used to truncate off all but the first operand (the callee). This
+ // issue seems to have been fixed at some point. This assert verifies that.
+ case X86::CALL64r:
+ case X86::CALL64pcrel32:
+ assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
+ break;
+
case X86::EH_RETURN:
case X86::EH_RETURN64: {
OutMI = MCInst();
@@ -539,36 +743,30 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
}
- // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
- // instruction.
- {
- unsigned Opcode;
- case X86::TAILJMPr:
- Opcode = X86::JMP32r;
- goto SetTailJmpOpcode;
- case X86::TAILJMPd:
- case X86::TAILJMPd64:
- Opcode = X86::JMP_1;
- goto SetTailJmpOpcode;
-
- SetTailJmpOpcode:
- MCOperand Saved = OutMI.getOperand(0);
- OutMI = MCInst();
- OutMI.setOpcode(Opcode);
- OutMI.addOperand(Saved);
- break;
- }
+ // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
+ // instruction.
+ case X86::TAILJMPr:
+ case X86::TAILJMPr64:
+ case X86::TAILJMPr64_REX:
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64:
+ assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
+ OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
+ break;
case X86::TAILJMPd_CC:
- case X86::TAILJMPd64_CC: {
- MCOperand Saved = OutMI.getOperand(0);
- MCOperand Saved2 = OutMI.getOperand(1);
- OutMI = MCInst();
- OutMI.setOpcode(X86::JCC_1);
- OutMI.addOperand(Saved);
- OutMI.addOperand(Saved2);
+ case X86::TAILJMPd64_CC:
+ assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
+ OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
+ break;
+
+ case X86::TAILJMPm:
+ case X86::TAILJMPm64:
+ case X86::TAILJMPm64_REX:
+ assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
+ "Unexpected number of operands!");
+ OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
break;
- }
case X86::DEC16r:
case X86::DEC32r:
@@ -958,7 +1156,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
// FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
// <opcode>, <operands>
- unsigned DefRegister = FaultingMI.getOperand(0).getReg();
+ Register DefRegister = FaultingMI.getOperand(0).getReg();
FaultMaps::FaultKind FK =
static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
@@ -1079,7 +1277,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
// Emit MOV to materialize the target address and the CALL to target.
// This is encoded with 12-13 bytes, depending on which register is used.
- unsigned ScratchReg = MI.getOperand(ScratchIdx).getReg();
+ Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
if (X86II::isX86_64ExtendedReg(ScratchReg))
EncodedBytes = 13;
else
@@ -1369,6 +1567,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
recordSled(CurSled, MI, SledKind::TAIL_CALL);
unsigned OpCode = MI.getOperand(0).getImm();
+ OpCode = convertTailJumpOpcode(OpCode);
MCInst TC;
TC.setOpcode(OpCode);
@@ -1538,8 +1737,6 @@ static void printConstant(const Constant *COp, raw_ostream &CS) {
void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
- const X86RegisterInfo *RI =
- MF->getSubtarget<X86Subtarget>().getRegisterInfo();
// Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
if (EmitFPOData) {
@@ -1577,17 +1774,16 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
// Otherwise, use the .seh_ directives for all other Windows platforms.
switch (MI->getOpcode()) {
case X86::SEH_PushReg:
- OutStreamer->EmitWinCFIPushReg(
- RI->getSEHRegNum(MI->getOperand(0).getImm()));
+ OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm());
break;
case X86::SEH_SaveReg:
- OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(),
MI->getOperand(1).getImm());
break;
case X86::SEH_SaveXMM:
- OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(),
MI->getOperand(1).getImm());
break;
@@ -1596,9 +1792,8 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
break;
case X86::SEH_SetFrame:
- OutStreamer->EmitWinCFISetFrame(
- RI->getSEHRegNum(MI->getOperand(0).getImm()),
- MI->getOperand(1).getImm());
+ OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
break;
case X86::SEH_PushFrame:
@@ -1650,7 +1845,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::EH_RETURN:
case X86::EH_RETURN64: {
// Lower these as normal, but add some comments.
- unsigned Reg = MI->getOperand(0).getReg();
+ Register Reg = MI->getOperand(0).getReg();
OutStreamer->AddComment(StringRef("eh_return, addr: %") +
X86ATTInstPrinter::getRegisterName(Reg));
break;
@@ -1697,11 +1892,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::MASKPAIR16LOAD: {
int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm();
assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
- const X86RegisterInfo *RI =
- MF->getSubtarget<X86Subtarget>().getRegisterInfo();
- unsigned Reg = MI->getOperand(0).getReg();
- unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
- unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
+ Register Reg = MI->getOperand(0).getReg();
+ Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
+ Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
// Load the first mask register
MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm);
@@ -1730,11 +1923,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::MASKPAIR16STORE: {
int64_t Disp = MI->getOperand(X86::AddrDisp).getImm();
assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
- const X86RegisterInfo *RI =
- MF->getSubtarget<X86Subtarget>().getRegisterInfo();
- unsigned Reg = MI->getOperand(X86::AddrNumOperands).getReg();
- unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
- unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
+ Register Reg = MI->getOperand(X86::AddrNumOperands).getReg();
+ Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
+ Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
// Store the first mask register
MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk);
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index d7e535598d81..5cb80a082b56 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -36,6 +36,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// is stashed.
signed char RestoreBasePointerOffset = 0;
+ /// WinEHXMMSlotInfo - Slot information of XMM registers in the stack frame
+ /// in bytes.
+ DenseMap<int, unsigned> WinEHXMMSlotInfo;
+
/// CalleeSavedFrameSize - Size of the callee-saved register portion of the
/// stack frame in bytes.
unsigned CalleeSavedFrameSize = 0;
@@ -120,6 +124,10 @@ public:
void setRestoreBasePointer(const MachineFunction *MF);
int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; }
+ DenseMap<int, unsigned>& getWinEHXMMSlotInfo() { return WinEHXMMSlotInfo; }
+ const DenseMap<int, unsigned>& getWinEHXMMSlotInfo() const {
+ return WinEHXMMSlotInfo; }
+
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index 7f75598b0655..1aee01563c4b 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -198,8 +198,7 @@ static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) {
static inline bool isIdenticalOp(const MachineOperand &MO1,
const MachineOperand &MO2) {
return MO1.isIdenticalTo(MO2) &&
- (!MO1.isReg() ||
- !TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ (!MO1.isReg() || !Register::isPhysicalRegister(MO1.getReg()));
}
#ifndef NDEBUG
@@ -235,9 +234,9 @@ static inline bool isLEA(const MachineInstr &MI) {
namespace {
-class OptimizeLEAPass : public MachineFunctionPass {
+class X86OptimizeLEAPass : public MachineFunctionPass {
public:
- OptimizeLEAPass() : MachineFunctionPass(ID) {}
+ X86OptimizeLEAPass() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "X86 LEA Optimize"; }
@@ -246,6 +245,8 @@ public:
/// been calculated by LEA. Also, remove redundant LEAs.
bool runOnMachineFunction(MachineFunction &MF) override;
+ static char ID;
+
private:
using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
@@ -296,18 +297,18 @@ private:
MachineRegisterInfo *MRI;
const X86InstrInfo *TII;
const X86RegisterInfo *TRI;
-
- static char ID;
};
} // end anonymous namespace
-char OptimizeLEAPass::ID = 0;
+char X86OptimizeLEAPass::ID = 0;
-FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); }
+FunctionPass *llvm::createX86OptimizeLEAs() { return new X86OptimizeLEAPass(); }
+INITIALIZE_PASS(X86OptimizeLEAPass, DEBUG_TYPE, "X86 optimize LEA pass", false,
+ false)
-int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
- const MachineInstr &Last) {
+int X86OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
+ const MachineInstr &Last) {
// Both instructions must be in the same basic block and they must be
// presented in InstrPos.
assert(Last.getParent() == First.getParent() &&
@@ -328,10 +329,9 @@ int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
// 3) Displacement of the new memory operand should fit in 1 byte if possible.
// 4) The LEA should be as close to MI as possible, and prior to it if
// possible.
-bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
- const MachineInstr &MI,
- MachineInstr *&BestLEA,
- int64_t &AddrDispShift, int &Dist) {
+bool X86OptimizeLEAPass::chooseBestLEA(
+ const SmallVectorImpl<MachineInstr *> &List, const MachineInstr &MI,
+ MachineInstr *&BestLEA, int64_t &AddrDispShift, int &Dist) {
const MachineFunction *MF = MI.getParent()->getParent();
const MCInstrDesc &Desc = MI.getDesc();
int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags) +
@@ -387,9 +387,10 @@ bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
// Get the difference between the addresses' displacements of the two
// instructions \p MI1 and \p MI2. The numbers of the first memory operands are
// passed through \p N1 and \p N2.
-int64_t OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, unsigned N1,
- const MachineInstr &MI2,
- unsigned N2) const {
+int64_t X86OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1,
+ unsigned N1,
+ const MachineInstr &MI2,
+ unsigned N2) const {
const MachineOperand &Op1 = MI1.getOperand(N1 + X86::AddrDisp);
const MachineOperand &Op2 = MI2.getOperand(N2 + X86::AddrDisp);
@@ -411,9 +412,9 @@ int64_t OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, unsigned N1,
// 2) Def registers of LEAs belong to the same class.
// 3) All uses of the Last LEA def register are replaceable, thus the
// register is used only as address base.
-bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
- const MachineInstr &Last,
- int64_t &AddrDispShift) const {
+bool X86OptimizeLEAPass::isReplaceable(const MachineInstr &First,
+ const MachineInstr &Last,
+ int64_t &AddrDispShift) const {
assert(isLEA(First) && isLEA(Last) &&
"The function works only with LEA instructions");
@@ -467,7 +468,8 @@ bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
return true;
}
-void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs) {
+void X86OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,
+ MemOpMap &LEAs) {
unsigned Pos = 0;
for (auto &MI : MBB) {
// Assign the position number to the instruction. Note that we are going to
@@ -485,7 +487,7 @@ void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs) {
// Try to find load and store instructions which recalculate addresses already
// calculated by some LEA and replace their memory operands with its def
// register.
-bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
+bool X86OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
bool Changed = false;
assert(!LEAs.empty());
@@ -564,9 +566,9 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
return Changed;
}
-MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
- unsigned VReg,
- int64_t AddrDispShift) {
+MachineInstr *X86OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
+ unsigned VReg,
+ int64_t AddrDispShift) {
DIExpression *Expr = const_cast<DIExpression *>(MI.getDebugExpression());
if (AddrDispShift != 0)
Expr = DIExpression::prepend(Expr, DIExpression::StackValue, AddrDispShift);
@@ -583,7 +585,7 @@ MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
}
// Try to find similar LEAs in the list and replace one with another.
-bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
+bool X86OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
bool Changed = false;
// Loop over all entries in the table.
@@ -613,8 +615,8 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
// Loop over all uses of the Last LEA and update their operands. Note
// that the correctness of this has already been checked in the
// isReplaceable function.
- unsigned FirstVReg = First.getOperand(0).getReg();
- unsigned LastVReg = Last.getOperand(0).getReg();
+ Register FirstVReg = First.getOperand(0).getReg();
+ Register LastVReg = Last.getOperand(0).getReg();
for (auto UI = MRI->use_begin(LastVReg), UE = MRI->use_end();
UI != UE;) {
MachineOperand &MO = *UI++;
@@ -670,7 +672,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
return Changed;
}
-bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
+bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
if (DisableX86LEAOpt || skipFunction(MF.getFunction()))
diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp
index 78fede3dcde2..daddf4231897 100644
--- a/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -46,7 +46,9 @@ const RegisterBank &X86RegisterBankInfo::getRegBankFromRegClass(
if (X86::GR8RegClass.hasSubClassEq(&RC) ||
X86::GR16RegClass.hasSubClassEq(&RC) ||
X86::GR32RegClass.hasSubClassEq(&RC) ||
- X86::GR64RegClass.hasSubClassEq(&RC))
+ X86::GR64RegClass.hasSubClassEq(&RC) ||
+ X86::LOW32_ADDR_ACCESSRegClass.hasSubClassEq(&RC) ||
+ X86::LOW32_ADDR_ACCESS_RBPRegClass.hasSubClassEq(&RC))
return getRegBank(X86::GPRRegBankID);
if (X86::FR32XRegClass.hasSubClassEq(&RC) ||
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 2e2f1f9e438a..ff625325b4c9 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -544,7 +544,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
"Stack realignment in presence of dynamic allocas is not supported with"
"this calling convention.");
- unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
+ Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true);
I.isValid(); ++I)
Reserved.set(*I);
@@ -677,13 +677,13 @@ static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
MI.getOperand(4).getImm() != 0 ||
MI.getOperand(5).getReg() != X86::NoRegister)
return false;
- unsigned BasePtr = MI.getOperand(1).getReg();
+ Register BasePtr = MI.getOperand(1).getReg();
// In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
// be replaced with a 32-bit operand MOV which will zero extend the upper
// 32-bits of the super register.
if (Opc == X86::LEA64_32r)
BasePtr = getX86SubSuperRegister(BasePtr, 32);
- unsigned NewDestReg = MI.getOperand(0).getReg();
+ Register NewDestReg = MI.getOperand(0).getReg();
const X86InstrInfo *TII =
MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
@@ -692,12 +692,27 @@ static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
return true;
}
+static bool isFuncletReturnInstr(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case X86::CATCHRET:
+ case X86::CLEANUPRET:
+ return true;
+ default:
+ return false;
+ }
+ llvm_unreachable("impossible");
+}
+
void
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
MachineInstr &MI = *II;
- MachineFunction &MF = *MI.getParent()->getParent();
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
+ : isFuncletReturnInstr(*MBBI);
const X86FrameLowering *TFI = getFrameLowering(MF);
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
@@ -709,6 +724,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
"Return instruction can only reference SP relative frame objects");
FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0);
+ } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
+ FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
} else {
FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr);
}
@@ -729,7 +746,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// register as source operand, semantic is the same and destination is
// 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
// Don't change BasePtr since it is used later for stack adjustment.
- unsigned MachineBasePtr = BasePtr;
+ Register MachineBasePtr = BasePtr;
if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
@@ -773,7 +790,7 @@ Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
unsigned
X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
- unsigned FrameReg = getFrameRegister(MF);
+ Register FrameReg = getFrameRegister(MF);
if (Subtarget.isTarget64BitILP32())
FrameReg = getX86SubSuperRegister(FrameReg, 32);
return FrameReg;
@@ -782,7 +799,7 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
unsigned
X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
- unsigned StackReg = getStackRegister();
+ Register StackReg = getStackRegister();
if (Subtarget.isTarget64BitILP32())
StackReg = getX86SubSuperRegister(StackReg, 32);
return StackReg;
diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp
index b435b22e8ac7..f8464c7e8298 100644
--- a/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/lib/Target/X86/X86RetpolineThunks.cpp
@@ -58,8 +58,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineModuleInfo>();
- AU.addPreserved<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
}
private:
@@ -97,7 +97,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
TII = STI->getInstrInfo();
Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
- MMI = &getAnalysis<MachineModuleInfo>();
+ MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
Module &M = const_cast<Module &>(*MMI->getModule());
// If this function is not a thunk, check to see if we need to insert
@@ -279,7 +279,7 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
CallTarget->addLiveIn(Reg);
CallTarget->setHasAddressTaken();
- CallTarget->setAlignment(4);
+ CallTarget->setAlignment(Align(16));
insertRegReturnAddrClobber(*CallTarget, Reg);
CallTarget->back().setPreInstrSymbol(MF, TargetSym);
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td
index 7574e4b8f896..9b1fcaa8a13d 100755
--- a/lib/Target/X86/X86SchedBroadwell.td
+++ b/lib/Target/X86/X86SchedBroadwell.td
@@ -232,8 +232,12 @@ defm : X86WriteRes<WriteFStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [BWPort5], 1, [1], 1>;
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 284d1567c5c6..06f417501b21 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -231,8 +231,12 @@ defm : X86WriteRes<WriteFStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [HWPort5], 1, [1], 1>;
diff --git a/lib/Target/X86/X86SchedPredicates.td b/lib/Target/X86/X86SchedPredicates.td
index 41bd776648f7..76001d382a27 100644
--- a/lib/Target/X86/X86SchedPredicates.td
+++ b/lib/Target/X86/X86SchedPredicates.td
@@ -84,3 +84,60 @@ def IsSETAm_Or_SETBEm : CheckAny<[
CheckImmOperand_s<5, "X86::COND_A">,
CheckImmOperand_s<5, "X86::COND_BE">
]>;
+
+// A predicate used to check if an instruction has a LOCK prefix.
+def CheckLockPrefix : CheckFunctionPredicate<
+ "X86_MC::hasLockPrefix",
+ "X86InstrInfo::hasLockPrefix"
+>;
+
+def IsRegRegCompareAndSwap_8 : CheckOpcode<[ CMPXCHG8rr ]>;
+
+def IsRegMemCompareAndSwap_8 : CheckOpcode<[
+ LCMPXCHG8, CMPXCHG8rm
+]>;
+
+def IsRegRegCompareAndSwap_16_32_64 : CheckOpcode<[
+ CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr
+]>;
+
+def IsRegMemCompareAndSwap_16_32_64 : CheckOpcode<[
+ CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm,
+ LCMPXCHG16, LCMPXCHG32, LCMPXCHG64,
+ LCMPXCHG8B, LCMPXCHG16B
+]>;
+
+def IsCompareAndSwap8B : CheckOpcode<[ CMPXCHG8B, LCMPXCHG8B ]>;
+def IsCompareAndSwap16B : CheckOpcode<[ CMPXCHG16B, LCMPXCHG16B ]>;
+
+def IsRegMemCompareAndSwap : CheckOpcode<
+ !listconcat(
+ IsRegMemCompareAndSwap_8.ValidOpcodes,
+ IsRegMemCompareAndSwap_16_32_64.ValidOpcodes
+ )>;
+
+def IsRegRegCompareAndSwap : CheckOpcode<
+ !listconcat(
+ IsRegRegCompareAndSwap_8.ValidOpcodes,
+ IsRegRegCompareAndSwap_16_32_64.ValidOpcodes
+ )>;
+
+def IsAtomicCompareAndSwap_8 : CheckAll<[
+ CheckLockPrefix,
+ IsRegMemCompareAndSwap_8
+]>;
+
+def IsAtomicCompareAndSwap : CheckAll<[
+ CheckLockPrefix,
+ IsRegMemCompareAndSwap
+]>;
+
+def IsAtomicCompareAndSwap8B : CheckAll<[
+ CheckLockPrefix,
+ IsCompareAndSwap8B
+]>;
+
+def IsAtomicCompareAndSwap16B : CheckAll<[
+ CheckLockPrefix,
+ IsCompareAndSwap16B
+]>;
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index d40bdf728a48..26d4d8fa3549 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -208,8 +208,12 @@ defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SBPort5], 1, [1], 1>;
diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td
index 8f3e4ae62d53..9a511ecc0071 100644
--- a/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/lib/Target/X86/X86SchedSkylakeClient.td
@@ -226,8 +226,12 @@ defm : X86WriteRes<WriteFStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SKLPort015], 1, [1], 1>;
diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td
index 58caf1dacfcb..a8c65435ab9b 100755
--- a/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/lib/Target/X86/X86SchedSkylakeServer.td
@@ -226,8 +226,12 @@ defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 55ca85ec1e3d..95f710061aeb 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -102,6 +102,12 @@ class X86SchedWriteMoveLS<SchedWrite MoveRR,
SchedWrite MR = StoreMR;
}
+// Multiclass that wraps masked load/store writes for a vector width.
+class X86SchedWriteMaskMove<SchedWrite LoadRM, SchedWrite StoreMR> {
+ SchedWrite RM = LoadRM;
+ SchedWrite MR = StoreMR;
+}
+
// Multiclass that wraps X86SchedWriteMoveLS for each vector width.
class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
X86SchedWriteMoveLS s128,
@@ -218,8 +224,12 @@ def WriteFStoreY : SchedWrite;
def WriteFStoreNT : SchedWrite;
def WriteFStoreNTX : SchedWrite;
def WriteFStoreNTY : SchedWrite;
-def WriteFMaskedStore : SchedWrite;
-def WriteFMaskedStoreY : SchedWrite;
+
+def WriteFMaskedStore32 : SchedWrite;
+def WriteFMaskedStore64 : SchedWrite;
+def WriteFMaskedStore32Y : SchedWrite;
+def WriteFMaskedStore64Y : SchedWrite;
+
def WriteFMove : SchedWrite;
def WriteFMoveX : SchedWrite;
def WriteFMoveY : SchedWrite;
@@ -530,6 +540,16 @@ def SchedWriteVecMoveLSNT
: X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
+// Conditional SIMD Packed Loads and Stores wrappers.
+def WriteFMaskMove32
+ : X86SchedWriteMaskMove<WriteFMaskedLoad, WriteFMaskedStore32>;
+def WriteFMaskMove64
+ : X86SchedWriteMaskMove<WriteFMaskedLoad, WriteFMaskedStore64>;
+def WriteFMaskMove32Y
+ : X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore32Y>;
+def WriteFMaskMove64Y
+ : X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore64Y>;
+
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index b0334655de7e..78acb1065ec8 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -216,8 +216,10 @@ defm : X86WriteResUnsupported<WriteFStoreY>;
def : WriteRes<WriteFStoreNT, [AtomPort0]>;
def : WriteRes<WriteFStoreNTX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteFStoreNTY>;
-defm : X86WriteResUnsupported<WriteFMaskedStore>;
-defm : X86WriteResUnsupported<WriteFMaskedStoreY>;
+defm : X86WriteResUnsupported<WriteFMaskedStore32>;
+defm : X86WriteResUnsupported<WriteFMaskedStore32Y>;
+defm : X86WriteResUnsupported<WriteFMaskedStore64>;
+defm : X86WriteResUnsupported<WriteFMaskedStore64Y>;
def : WriteRes<WriteFMove, [AtomPort01]>;
def : WriteRes<WriteFMoveX, [AtomPort01]>;
diff --git a/lib/Target/X86/X86ScheduleBdVer2.td b/lib/Target/X86/X86ScheduleBdVer2.td
index 8cc01c3acece..d7aea3cf4e9d 100644
--- a/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/lib/Target/X86/X86ScheduleBdVer2.td
@@ -726,8 +726,10 @@ defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>;
defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>;
defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>;
-defm : PdWriteRes<WriteFMaskedStore, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
-defm : PdWriteRes<WriteFMaskedStoreY, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
+defm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
+defm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
+defm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
+defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>;
defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>;
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
index 2d26232b4132..d0421d94ee05 100644
--- a/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -180,9 +180,11 @@ multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
// Instructions that have local forwarding disabled have an extra +1cy latency.
-// A folded store needs a cycle on the SAGU for the store data,
-// most RMW instructions don't need an extra uop.
-defm : X86WriteRes<WriteRMW, [JSAGU], 1, [1], 0>;
+// A folded store needs a cycle on the SAGU for the store data, most RMW
+// instructions don't need an extra uop. ALU RMW operations don't seem to
+// benefit from STLF, and their observed latency is 6cy. That is the reason why
+// this write adds two extra cycles (instead of just 1cy for the store).
+defm : X86WriteRes<WriteRMW, [JSAGU], 2, [1], 0>;
////////////////////////////////////////////////////////////////////////////////
// Arithmetic.
@@ -191,22 +193,22 @@ defm : X86WriteRes<WriteRMW, [JSAGU], 1, [1], 0>;
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteADC, [JALU01], 1, [2]>;
-defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
-defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
-defm : X86WriteRes<WriteCMPXCHG,[JALU01], 1, [1], 1>;
-defm : X86WriteRes<WriteCMPXCHGRMW,[JALU01, JSAGU, JLAGU], 4, [1, 1, 1], 2>;
-defm : X86WriteRes<WriteXCHG, [JALU01], 1, [1], 1>;
-
-defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul16Imm, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul16Reg, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul32, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul32Imm, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
-defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 2>;
-defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 2>;
+defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHG, [JALU01], 3, [3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW, [JALU01, JSAGU, JLAGU], 11, [3, 1, 1], 6>;
+defm : X86WriteRes<WriteXCHG, [JALU01], 1, [2], 2>;
+
+defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 1>;
+defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 3], 3>;
+defm : JWriteResIntPair<WriteIMul16Imm, [JALU1, JMul], 4, [1, 2], 2>;
+defm : JWriteResIntPair<WriteIMul16Reg, [JALU1, JMul], 3, [1, 1], 1>;
+defm : JWriteResIntPair<WriteIMul32, [JALU1, JMul], 3, [1, 2], 2>;
+defm : JWriteResIntPair<WriteIMul32Imm, [JALU1, JMul], 3, [1, 1], 1>;
+defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 1>;
+defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 1>;
+defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 1>;
defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
@@ -305,6 +307,192 @@ def : WriteRes<WriteFence, [JSAGU]>;
// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
+def JWriteCMPXCHG8rr : SchedWriteRes<[JALU01]> {
+ let Latency = 3;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3;
+}
+
+def JWriteLOCK_CMPXCHG8rm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
+ let Latency = 16;
+ let ResourceCycles = [3,16,16];
+ let NumMicroOps = 5;
+}
+
+def JWriteLOCK_CMPXCHGrm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
+ let Latency = 17;
+ let ResourceCycles = [3,17,17];
+ let NumMicroOps = 6;
+}
+
+def JWriteCMPXCHG8rm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
+ let Latency = 11;
+ let ResourceCycles = [3,1,1];
+ let NumMicroOps = 5;
+}
+
+def JWriteCMPXCHG8B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
+ let Latency = 11;
+ let ResourceCycles = [3,1,1];
+ let NumMicroOps = 18;
+}
+
+def JWriteCMPXCHG16B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
+ let Latency = 32;
+ let ResourceCycles = [6,1,1];
+ let NumMicroOps = 28;
+}
+
+def JWriteLOCK_CMPXCHG8B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
+ let Latency = 19;
+ let ResourceCycles = [3,19,19];
+ let NumMicroOps = 18;
+}
+
+def JWriteLOCK_CMPXCHG16B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
+ let Latency = 38;
+ let ResourceCycles = [6,38,38];
+ let NumMicroOps = 28;
+}
+
+def JWriteCMPXCHGVariant : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsAtomicCompareAndSwap8B>, [JWriteLOCK_CMPXCHG8B]>,
+ SchedVar<MCSchedPredicate<IsAtomicCompareAndSwap16B>, [JWriteLOCK_CMPXCHG16B]>,
+ SchedVar<MCSchedPredicate<IsAtomicCompareAndSwap_8>, [JWriteLOCK_CMPXCHG8rm]>,
+ SchedVar<MCSchedPredicate<IsAtomicCompareAndSwap>, [JWriteLOCK_CMPXCHGrm]>,
+ SchedVar<MCSchedPredicate<IsCompareAndSwap8B>, [JWriteCMPXCHG8B]>,
+ SchedVar<MCSchedPredicate<IsCompareAndSwap16B>, [JWriteCMPXCHG16B]>,
+ SchedVar<MCSchedPredicate<IsRegMemCompareAndSwap_8>, [JWriteCMPXCHG8rm]>,
+ SchedVar<MCSchedPredicate<IsRegMemCompareAndSwap>, [WriteCMPXCHGRMW]>,
+ SchedVar<MCSchedPredicate<IsRegRegCompareAndSwap_8>, [JWriteCMPXCHG8rr]>,
+ SchedVar<NoSchedPred, [WriteCMPXCHG]>
+]>;
+
+// The first five reads are contributed by the memory load operand.
+// We ignore those reads and set a read-advance for the other input operands
+// including the implicit read of RAX.
+def : InstRW<[JWriteCMPXCHGVariant,
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ ReadAfterLd, ReadAfterLd], (instrs LCMPXCHG8, LCMPXCHG16,
+ LCMPXCHG32, LCMPXCHG64,
+ CMPXCHG8rm, CMPXCHG16rm,
+ CMPXCHG32rm, CMPXCHG64rm)>;
+
+def : InstRW<[JWriteCMPXCHGVariant], (instrs CMPXCHG8rr, CMPXCHG16rr,
+ CMPXCHG32rr, CMPXCHG64rr)>;
+
+def : InstRW<[JWriteCMPXCHGVariant,
+ // Ignore reads contributed by the memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Add a read-advance to every implicit register read.
+ ReadAfterLd, ReadAfterLd, ReadAfterLd, ReadAfterLd], (instrs LCMPXCHG8B, LCMPXCHG16B,
+ CMPXCHG8B, CMPXCHG16B)>;
+
+def JWriteLOCK_ALURMW : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
+ let Latency = 19;
+ let ResourceCycles = [1,19,19];
+ let NumMicroOps = 1;
+}
+
+def JWriteLOCK_ALURMWVariant : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteLOCK_ALURMW]>,
+ SchedVar<NoSchedPred, [WriteALURMW]>
+]>;
+def : InstRW<[JWriteLOCK_ALURMWVariant], (instrs INC8m, INC16m, INC32m, INC64m,
+ DEC8m, DEC16m, DEC32m, DEC64m,
+ NOT8m, NOT16m, NOT32m, NOT64m,
+ NEG8m, NEG16m, NEG32m, NEG64m)>;
+
+def JWriteXCHG8rr_XADDrr : SchedWriteRes<[JALU01]> {
+ let Latency = 2;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3;
+}
+def : InstRW<[JWriteXCHG8rr_XADDrr], (instrs XCHG8rr, XADD8rr, XADD16rr,
+ XADD32rr, XADD64rr)>;
+
+// This write defines the latency of the in/out register operand of a non-atomic
+// XADDrm. This is the first of a pair of writes that model non-atomic
+// XADDrm instructions (the second write definition is JWriteXADDrm_LdSt_Part).
+//
+// We need two writes because the instruction latency differs from the output
+// register operand latency. In particular, the first write describes the first
+// (and only) output register operand of the instruction. However, the
+// instruction latency is set to the MAX of all the write latencies. That's why
+// a second write is needed in this case (see example below).
+//
+// Example:
+// XADD %ecx, (%rsp) ## Instruction latency: 11cy
+// ## ECX write Latency: 3cy
+//
+// Register ECX becomes available in 3 cycles. That is because the value of ECX
+// is exchanged with the value read from the stack pointer, and the load-to-use
+// latency is assumed to be 3cy.
+def JWriteXADDrm_XCHG_Part : SchedWriteRes<[JALU01]> {
+ let Latency = 3; // load-to-use latency
+ let ResourceCycles = [3];
+ let NumMicroOps = 3;
+}
+
+// This write defines the latency of the in/out register operand of an atomic
+// XADDrm. This is the first of a sequence of two writes used to model atomic
+// XADD instructions. The second write of the sequence is JWriteXCHGrm_LdSt_Part.
+//
+//
+// Example:
+// LOCK XADD %ecx, (%rsp) ## Instruction Latency: 16cy
+// ## ECX write Latency: 11cy
+//
+// The value of ECX becomes available only after 11cy from the start of
+// execution. This write is used to specifically set that operand latency.
+def JWriteLOCK_XADDrm_XCHG_Part : SchedWriteRes<[JALU01]> {
+ let Latency = 11;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3;
+}
+
+// This write defines the latency of the in/out register operand of an atomic
+// XCHGrm. This write is the first of a sequence of two writes that describe
+// atomic XCHG operations. We need two writes because the instruction latency
+// differs from the output register write latency. We want to make sure that
+// the output register operand becomes visible after 11cy. However, we want to
+// set the instruction latency to 16cy.
+def JWriteXCHGrm_XCHG_Part : SchedWriteRes<[JALU01]> {
+ let Latency = 11;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def JWriteXADDrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> {
+ let Latency = 11;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 1;
+}
+
+def JWriteXCHGrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> {
+ let Latency = 16;
+ let ResourceCycles = [16, 16];
+ let NumMicroOps = 1;
+}
+
+def JWriteXADDrm_Part1 : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteLOCK_XADDrm_XCHG_Part]>,
+ SchedVar<NoSchedPred, [JWriteXADDrm_XCHG_Part]>
+]>;
+
+def JWriteXADDrm_Part2 : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteXCHGrm_LdSt_Part]>,
+ SchedVar<NoSchedPred, [JWriteXADDrm_LdSt_Part]>
+]>;
+
+def : InstRW<[JWriteXADDrm_Part1, JWriteXADDrm_Part2, ReadAfterLd],
+ (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm,
+ LXADD8, LXADD16, LXADD32, LXADD64)>;
+
+def : InstRW<[JWriteXCHGrm_XCHG_Part, JWriteXCHGrm_LdSt_Part, ReadAfterLd],
+ (instrs XCHG8rm, XCHG16rm, XCHG32rm, XCHG64rm)>;
+
+
////////////////////////////////////////////////////////////////////////////////
// Floating point. This covers both scalar and vector operations.
////////////////////////////////////////////////////////////////////////////////
@@ -313,19 +501,22 @@ defm : X86WriteRes<WriteFLD0, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLD1, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLDC, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteFLoadX, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteFLoadY, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFLoadX, [JLAGU], 5, [1], 1>;
+defm : X86WriteRes<WriteFLoadY, [JLAGU], 5, [2], 2>;
defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 2, 2], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 4, 4], 2>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>;
defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
-defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
-defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 16, [1,1, 5, 5,4,4,4], 19>;
+defm : X86WriteRes<WriteFMaskedStore64, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 13, [1,1, 2, 2,2,2,2], 10>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 22, [1,1,10,10,8,8,8], 36>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 16, [1,1, 4, 4,4,4,4], 18>;
defm : X86WriteRes<WriteFMove, [JFPU01, JFPX], 1, [1, 1], 1>;
defm : X86WriteRes<WriteFMoveX, [JFPU01, JFPX], 1, [1, 1], 1>;
@@ -466,8 +657,8 @@ defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadX, [JLAGU], 5, [1], 1>;
+defm : X86WriteRes<WriteVecLoadY, [JLAGU], 5, [2], 2>;
defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 2, 2], 1>;
@@ -475,7 +666,7 @@ defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 4, 4],
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>;
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
@@ -631,6 +822,18 @@ def JWriteJVZEROUPPER: SchedWriteRes<[]> {
def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>;
///////////////////////////////////////////////////////////////////////////////
+// SSE2/AVX Store Selected Bytes of Double Quadword - (V)MASKMOVDQ
+///////////////////////////////////////////////////////////////////////////////
+
+def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> {
+ let Latency = 34;
+ let ResourceCycles = [1, 1, 2, 2, 2, 16, 42];
+ let NumMicroOps = 63;
+}
+def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64,
+ VMASKMOVDQU, VMASKMOVDQU64)>;
+
+///////////////////////////////////////////////////////////////////////////////
// SchedWriteVariant definitions.
///////////////////////////////////////////////////////////////////////////////
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index 34c251a5c5bb..8e3ce721f1a1 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -186,8 +186,12 @@ def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>;
-def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>;
-def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
+
+def : WriteRes<WriteFMaskedStore32, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFMaskedStore32Y, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFMaskedStore64, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFMaskedStore64Y, [SLM_MEC_RSV]>;
+
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveY, [SLM_FPC_RSV01]>;
diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td
index 65f6d89df610..06201f4a3a84 100644
--- a/lib/Target/X86/X86ScheduleZnver1.td
+++ b/lib/Target/X86/X86ScheduleZnver1.td
@@ -268,8 +268,12 @@ defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1], 1>;
-defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
-defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 50690953eef5..1ae8df977f83 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -36,7 +36,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
DAG.getSubtarget().getRegisterInfo());
- unsigned BaseReg = TRI->getBaseRegister();
+ Register BaseReg = TRI->getBaseRegister();
for (unsigned R : ClobberSet)
if (BaseReg == R)
return true;
diff --git a/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 40f5dbe57e4b..b8980789258e 100644
--- a/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -477,7 +477,7 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
// Otherwise, just build the predicate state itself by zeroing a register
// as we don't need any initial state.
PS->InitialReg = MRI->createVirtualRegister(PS->RC);
- unsigned PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+ Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
PredStateSubReg);
++NumInstsInserted;
@@ -750,7 +750,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
- unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+ Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
// Note that we intentionally use an empty debug location so that
// this picks up the preceding location.
auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
@@ -907,7 +907,7 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
MI.dump(); dbgs() << "\n");
report_fatal_error("Unable to unfold load!");
}
- unsigned Reg = MRI->createVirtualRegister(UnfoldedRC);
+ Register Reg = MRI->createVirtualRegister(UnfoldedRC);
SmallVector<MachineInstr *, 2> NewMIs;
// If we were able to compute an unfolded reg class, any failure here
// is just a programming error so just assert.
@@ -1102,7 +1102,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
// synthetic target in the predecessor. We do this at the bottom of the
// predecessor.
auto InsertPt = Pred->getFirstTerminator();
- unsigned TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
if (MF.getTarget().getCodeModel() == CodeModel::Small &&
!Subtarget->isPositionIndependent()) {
// Directly materialize it into an immediate.
@@ -1153,7 +1153,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
} else {
// Otherwise compute the address into a register first.
- unsigned AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
auto AddrI =
BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
.addReg(/*Base*/ X86::RIP)
@@ -1175,7 +1175,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
// Now cmov over the predicate if the comparison wasn't equal.
int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
- unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+ Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
auto CMovI =
BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
.addReg(PS->InitialReg)
@@ -1878,7 +1878,7 @@ unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
DebugLoc Loc) {
// FIXME: Hard coding this to a 32-bit register class seems weird, but matches
// what instruction selection does.
- unsigned Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
+ Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
// We directly copy the FLAGS register and rely on later lowering to clean
// this up into the appropriate setCC instructions.
BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
@@ -1905,7 +1905,7 @@ void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
unsigned PredStateReg) {
- unsigned TmpReg = MRI->createVirtualRegister(PS->RC);
+ Register TmpReg = MRI->createVirtualRegister(PS->RC);
// FIXME: This hard codes a shift distance based on the number of bits needed
// to stay canonical on 64-bit. We should compute this somehow and support
// 32-bit as part of that.
@@ -1925,8 +1925,8 @@ void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
DebugLoc Loc) {
- unsigned PredStateReg = MRI->createVirtualRegister(PS->RC);
- unsigned TmpReg = MRI->createVirtualRegister(PS->RC);
+ Register PredStateReg = MRI->createVirtualRegister(PS->RC);
+ Register TmpReg = MRI->createVirtualRegister(PS->RC);
// We know that the stack pointer will have any preserved predicate state in
// its high bit. We just want to smear this across the other bits. Turns out,
@@ -2031,9 +2031,9 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
}
for (MachineOperand *Op : HardenOpRegs) {
- unsigned OpReg = Op->getReg();
+ Register OpReg = Op->getReg();
auto *OpRC = MRI->getRegClass(OpReg);
- unsigned TmpReg = MRI->createVirtualRegister(OpRC);
+ Register TmpReg = MRI->createVirtualRegister(OpRC);
// If this is a vector register, we'll need somewhat custom logic to handle
// hardening it.
@@ -2045,7 +2045,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
// Move our state into a vector register.
// FIXME: We could skip this at the cost of longer encodings with AVX-512
// but that doesn't seem likely worth it.
- unsigned VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
+ Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
auto MovI =
BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
.addReg(StateReg);
@@ -2054,7 +2054,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
// Broadcast it across the vector register.
- unsigned VBStateReg = MRI->createVirtualRegister(OpRC);
+ Register VBStateReg = MRI->createVirtualRegister(OpRC);
auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
TII->get(Is128Bit ? X86::VPBROADCASTQrr
: X86::VPBROADCASTQYrr),
@@ -2084,7 +2084,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
// Broadcast our state into a vector register.
- unsigned VStateReg = MRI->createVirtualRegister(OpRC);
+ Register VStateReg = MRI->createVirtualRegister(OpRC);
unsigned BroadcastOp =
Is128Bit ? X86::VPBROADCASTQrZ128r
: Is256Bit ? X86::VPBROADCASTQrZ256r : X86::VPBROADCASTQrZr;
@@ -2153,7 +2153,7 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
// See if we can sink hardening the loaded value.
auto SinkCheckToSingleUse =
[&](MachineInstr &MI) -> Optional<MachineInstr *> {
- unsigned DefReg = MI.getOperand(0).getReg();
+ Register DefReg = MI.getOperand(0).getReg();
// We need to find a single use which we can sink the check. We can
// primarily do this because many uses may already end up checked on their
@@ -2210,8 +2210,8 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
// If this register isn't a virtual register we can't walk uses of sanely,
// just bail. Also check that its register class is one of the ones we
// can harden.
- unsigned UseDefReg = UseMI.getOperand(0).getReg();
- if (!TRI->isVirtualRegister(UseDefReg) ||
+ Register UseDefReg = UseMI.getOperand(0).getReg();
+ if (!Register::isVirtualRegister(UseDefReg) ||
!canHardenRegister(UseDefReg))
return {};
@@ -2241,6 +2241,9 @@ bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) {
// We don't support post-load hardening of vectors.
return false;
+ unsigned RegIdx = Log2_32(RegBytes);
+ assert(RegIdx < 4 && "Unsupported register size");
+
// If this register class is explicitly constrained to a class that doesn't
// require REX prefix, we may not be able to satisfy that constraint when
// emitting the hardening instructions, so bail out here.
@@ -2251,13 +2254,13 @@ bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) {
const TargetRegisterClass *NOREXRegClasses[] = {
&X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
&X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
- if (RC == NOREXRegClasses[Log2_32(RegBytes)])
+ if (RC == NOREXRegClasses[RegIdx])
return false;
const TargetRegisterClass *GPRRegClasses[] = {
&X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
&X86::GR64RegClass};
- return RC->hasSuperClassEq(GPRRegClasses[Log2_32(RegBytes)]);
+ return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
}
/// Harden a value in a register.
@@ -2278,7 +2281,7 @@ unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
unsigned Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
DebugLoc Loc) {
assert(canHardenRegister(Reg) && "Cannot harden this register!");
- assert(TRI->isVirtualRegister(Reg) && "Cannot harden a physical register!");
+ assert(Register::isVirtualRegister(Reg) && "Cannot harden a physical register!");
auto *RC = MRI->getRegClass(Reg);
int Bytes = TRI->getRegSizeInBits(*RC) / 8;
@@ -2289,7 +2292,7 @@ unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
if (Bytes != 8) {
unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
- unsigned NarrowStateReg = MRI->createVirtualRegister(RC);
+ Register NarrowStateReg = MRI->createVirtualRegister(RC);
BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
.addReg(StateReg, 0, SubRegImm);
StateReg = NarrowStateReg;
@@ -2299,7 +2302,7 @@ unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
if (isEFLAGSLive(MBB, InsertPt, *TRI))
FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
- unsigned NewReg = MRI->createVirtualRegister(RC);
+ Register NewReg = MRI->createVirtualRegister(RC);
unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
@@ -2329,13 +2332,13 @@ unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
DebugLoc Loc = MI.getDebugLoc();
auto &DefOp = MI.getOperand(0);
- unsigned OldDefReg = DefOp.getReg();
+ Register OldDefReg = DefOp.getReg();
auto *DefRC = MRI->getRegClass(OldDefReg);
// Because we want to completely replace the uses of this def'ed value with
// the hardened value, create a dedicated new register that will only be used
// to communicate the unhardened value to the hardening.
- unsigned UnhardenedReg = MRI->createVirtualRegister(DefRC);
+ Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
DefOp.setReg(UnhardenedReg);
// Now harden this register's value, getting a hardened reg that is safe to
@@ -2537,7 +2540,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
.addReg(ExpectedRetAddrReg, RegState::Kill)
.addSym(RetSymbol);
} else {
- unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
+ Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
.addReg(/*Base*/ X86::RIP)
.addImm(/*Scale*/ 1)
@@ -2554,7 +2557,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
- unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+ Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
.addReg(NewStateReg, RegState::Kill)
.addReg(PS->PoisonReg)
@@ -2611,7 +2614,7 @@ void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
// For all of these, the target register is the first operand of the
// instruction.
auto &TargetOp = MI.getOperand(0);
- unsigned OldTargetReg = TargetOp.getReg();
+ Register OldTargetReg = TargetOp.getReg();
// Try to lookup a hardened version of this register. We retain a reference
// here as we want to update the map to track any newly computed hardened
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index d5bb56603df9..f8f78da52cc2 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -146,6 +146,9 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
return X86II::MO_DLLIMPORT;
return X86II::MO_COFFSTUB;
}
+ // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables.
+ if (isOSWindows())
+ return X86II::MO_NO_FLAG;
if (is64Bit()) {
// ELF supports a large, truly PIC code model with non-PC relative GOT
@@ -285,10 +288,10 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
if (StackAlignOverride)
- stackAlignment = StackAlignOverride;
+ stackAlignment = *StackAlignOverride;
else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
isTargetKFreeBSD() || In64BitMode)
- stackAlignment = 16;
+ stackAlignment = Align(16);
// Some CPUs have more overhead for gather. The specified overhead is relative
// to the Load operation. "2" is the number provided by Intel architects. This
@@ -304,6 +307,8 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Consume the vector width attribute or apply any target specific limit.
if (PreferVectorWidthOverride)
PreferVectorWidth = PreferVectorWidthOverride;
+ else if (Prefer128Bit)
+ PreferVectorWidth = 128;
else if (Prefer256Bit)
PreferVectorWidth = 256;
}
@@ -316,12 +321,11 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
const X86TargetMachine &TM,
- unsigned StackAlignOverride,
+ MaybeAlign StackAlignOverride,
unsigned PreferVectorWidthOverride,
unsigned RequiredVectorWidth)
- : X86GenSubtargetInfo(TT, CPU, FS),
- PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
- StackAlignOverride(StackAlignOverride),
+ : X86GenSubtargetInfo(TT, CPU, FS), PICStyle(PICStyles::None), TM(TM),
+ TargetTriple(TT), StackAlignOverride(StackAlignOverride),
PreferVectorWidthOverride(PreferVectorWidthOverride),
RequiredVectorWidth(RequiredVectorWidth),
In64BitMode(TargetTriple.getArch() == Triple::x86_64),
@@ -355,7 +359,7 @@ const CallLowering *X86Subtarget::getCallLowering() const {
return CallLoweringInfo.get();
}
-const InstructionSelector *X86Subtarget::getInstructionSelector() const {
+InstructionSelector *X86Subtarget::getInstructionSelector() const {
return InstSelector.get();
}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 24ccc9cb7843..e8efe8f2afe5 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -365,8 +365,8 @@ protected:
/// Processor has AVX-512 vp2intersect instructions
bool HasVP2INTERSECT = false;
- /// Processor supports MPX - Memory Protection Extensions
- bool HasMPX = false;
+ /// Deprecated flag for MPX instructions.
+ bool DeprecatedHasMPX = false;
/// Processor supports CET SHSTK - Control-Flow Enforcement Technology
/// using Shadow Stack
@@ -427,15 +427,21 @@ protected:
/// Use software floating point for code generation.
bool UseSoftFloat = false;
+ /// Use alias analysis during code generation.
+ bool UseAA = false;
+
/// The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
- unsigned stackAlignment = 4;
+ Align stackAlignment = Align(4);
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
///
// FIXME: this is a known good value for Yonah. How about others?
unsigned MaxInlineSizeThreshold = 128;
+ /// Indicates target prefers 128 bit instructions.
+ bool Prefer128Bit = false;
+
/// Indicates target prefers 256 bit instructions.
bool Prefer256Bit = false;
@@ -453,7 +459,7 @@ protected:
private:
/// Override the stack alignment.
- unsigned StackAlignOverride;
+ MaybeAlign StackAlignOverride;
/// Preferred vector width from function attribute.
unsigned PreferVectorWidthOverride;
@@ -490,7 +496,7 @@ public:
/// of the specified triple.
///
X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
- const X86TargetMachine &TM, unsigned StackAlignOverride,
+ const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
unsigned PreferVectorWidthOverride,
unsigned RequiredVectorWidth);
@@ -515,7 +521,7 @@ public:
/// Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
- unsigned getStackAlignment() const { return stackAlignment; }
+ Align getStackAlignment() const { return stackAlignment; }
/// Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
@@ -527,7 +533,7 @@ public:
/// Methods used by Global ISel
const CallLowering *getCallLowering() const override;
- const InstructionSelector *getInstructionSelector() const override;
+ InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
@@ -684,7 +690,6 @@ public:
bool hasBF16() const { return HasBF16; }
bool hasVP2INTERSECT() const { return HasVP2INTERSECT; }
bool hasBITALG() const { return HasBITALG; }
- bool hasMPX() const { return HasMPX; }
bool hasSHSTK() const { return HasSHSTK; }
bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
bool hasCLWB() const { return HasCLWB; }
@@ -739,6 +744,7 @@ public:
X86ProcFamily == IntelTRM;
}
bool useSoftFloat() const { return UseSoftFloat; }
+ bool useAA() const override { return UseAA; }
/// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
/// no-sse2). There isn't any reason to disable it if the target processor
@@ -809,6 +815,7 @@ public:
// On Win64, all these conventions just use the default convention.
case CallingConv::C:
case CallingConv::Fast:
+ case CallingConv::Tail:
case CallingConv::Swift:
case CallingConv::X86_FastCall:
case CallingConv::X86_StdCall:
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 0cbf13899a29..c15297134e4d 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -81,27 +81,28 @@ extern "C" void LLVMInitializeX86Target() {
initializeX86SpeculativeLoadHardeningPassPass(PR);
initializeX86FlagsCopyLoweringPassPass(PR);
initializeX86CondBrFoldingPassPass(PR);
+ initializeX86OptimizeLEAPassPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO()) {
if (TT.getArch() == Triple::x86_64)
- return llvm::make_unique<X86_64MachoTargetObjectFile>();
- return llvm::make_unique<TargetLoweringObjectFileMachO>();
+ return std::make_unique<X86_64MachoTargetObjectFile>();
+ return std::make_unique<TargetLoweringObjectFileMachO>();
}
if (TT.isOSFreeBSD())
- return llvm::make_unique<X86FreeBSDTargetObjectFile>();
+ return std::make_unique<X86FreeBSDTargetObjectFile>();
if (TT.isOSLinux() || TT.isOSNaCl() || TT.isOSIAMCU())
- return llvm::make_unique<X86LinuxNaClTargetObjectFile>();
+ return std::make_unique<X86LinuxNaClTargetObjectFile>();
if (TT.isOSSolaris())
- return llvm::make_unique<X86SolarisTargetObjectFile>();
+ return std::make_unique<X86SolarisTargetObjectFile>();
if (TT.isOSFuchsia())
- return llvm::make_unique<X86FuchsiaTargetObjectFile>();
+ return std::make_unique<X86FuchsiaTargetObjectFile>();
if (TT.isOSBinFormatELF())
- return llvm::make_unique<X86ELFTargetObjectFile>();
+ return std::make_unique<X86ELFTargetObjectFile>();
if (TT.isOSBinFormatCOFF())
- return llvm::make_unique<TargetLoweringObjectFileCOFF>();
+ return std::make_unique<TargetLoweringObjectFileCOFF>();
llvm_unreachable("unknown subtarget type");
}
@@ -116,6 +117,9 @@ static std::string computeDataLayout(const Triple &TT) {
!TT.isArch64Bit())
Ret += "-p:32:32";
+ // Address spaces for 32 bit signed, 32 bit unsigned, and 64 bit pointers.
+ Ret += "-p270:32:32-p271:32:32-p272:64:64";
+
// Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
Ret += "-i64:64";
@@ -218,17 +222,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64),
OL),
TLOF(createTLOF(getTargetTriple())) {
- // Windows stack unwinder gets confused when execution flow "falls through"
- // after a call to 'noreturn' function.
- // To prevent that, we emit a trap for 'unreachable' IR instructions.
- // (which on X86, happens to be the 'ud2' instruction)
// On PS4, the "return address" of a 'noreturn' call must still be within
// the calling function, and TrapUnreachable is an easy way to get that.
- // The check here for 64-bit windows is a bit icky, but as we're unlikely
- // to ever want to mix 32 and 64-bit windows code in a single module
- // this should be fine.
- if ((TT.isOSWindows() && TT.getArch() == Triple::x86_64) || TT.isPS4() ||
- TT.isOSBinFormatMachO()) {
+ if (TT.isPS4() || TT.isOSBinFormatMachO()) {
this->Options.TrapUnreachable = true;
this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO();
}
@@ -311,10 +307,10 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this,
- Options.StackAlignmentOverride,
- PreferVectorWidthOverride,
- RequiredVectorWidth);
+ I = std::make_unique<X86Subtarget>(
+ TargetTriple, CPU, FS, *this,
+ MaybeAlign(Options.StackAlignmentOverride), PreferVectorWidthOverride,
+ RequiredVectorWidth);
}
return I.get();
}
@@ -517,12 +513,19 @@ void X86PassConfig::addPreEmitPass() {
}
void X86PassConfig::addPreEmitPass2() {
+ const Triple &TT = TM->getTargetTriple();
+ const MCAsmInfo *MAI = TM->getMCAsmInfo();
+
addPass(createX86RetpolineThunksPass());
+
+ // Insert extra int3 instructions after trailing call instructions to avoid
+ // issues in the unwinder.
+ if (TT.isOSWindows() && TT.getArch() == Triple::x86_64)
+ addPass(createX86AvoidTrailingCallPass());
+
// Verify basic block incoming and outgoing cfa offset and register values and
// correct CFA calculation rule where needed by inserting appropriate CFI
// instructions.
- const Triple &TT = TM->getTargetTriple();
- const MCAsmInfo *MAI = TM->getMCAsmInfo();
if (!TT.isOSDarwin() &&
(!TT.isOSWindows() ||
MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI))
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index b999e2e86af6..ec3db7b1e9e8 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -16,7 +16,6 @@
#include "X86Subtarget.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
#include <memory>
@@ -26,6 +25,7 @@ namespace llvm {
class StringRef;
class X86Subtarget;
class X86RegisterBankInfo;
+class TargetTransformInfo;
class X86TargetMachine final : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 92e0779c2e74..44185957686b 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -47,8 +47,8 @@ MCSymbol *X86_64MachoTargetObjectFile::getCFIPersonalitySymbol(
}
const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
- const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
- MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV,
+ int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const {
// On Darwin/X86-64, we need to use foo@GOTPCREL+4 to access the got entry
// from a data section. In case there's an additional offset, then use
// foo@GOTPCREL+4+<offset>.
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 13d7b4ad70d6..1fd0bbf56b19 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -30,7 +30,8 @@ namespace llvm {
const TargetMachine &TM,
MachineModuleInfo *MMI) const override;
- const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV,
+ const MCSymbol *Sym,
const MCValue &MV, int64_t Offset,
MachineModuleInfo *MMI,
MCStreamer &Streamer) const override;
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 3dc59aeb263e..70fd857fcf01 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -116,7 +116,8 @@ llvm::Optional<unsigned> X86TTIImpl::getCacheAssociativity(
llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
}
-unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) {
+unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
if (Vector && !ST->hasSSE1())
return 0;
@@ -887,7 +888,7 @@ int X86TTIImpl::getArithmeticInstrCost(
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
- // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
+ // 64-bit packed integer vectors (v2i32) are widened to type v4i32.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
// Treat Transpose as 2-op shuffles - there's no difference in lowering.
@@ -911,6 +912,39 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
int NumSubElts = SubLT.second.getVectorNumElements();
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
return SubLT.first;
+ // Handle some cases for widening legalization. For now we only handle
+ // cases where the original subvector was naturally aligned and evenly
+ // fit in its legalized subvector type.
+ // FIXME: Remove some of the alignment restrictions.
+ // FIXME: We can use permq for 64-bit or larger extracts from 256-bit
+ // vectors.
+ int OrigSubElts = SubTp->getVectorNumElements();
+ if (NumSubElts > OrigSubElts &&
+ (Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 &&
+ LT.second.getVectorElementType() ==
+ SubLT.second.getVectorElementType() &&
+ LT.second.getVectorElementType().getSizeInBits() ==
+ Tp->getVectorElementType()->getPrimitiveSizeInBits()) {
+ assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
+ "Unexpected number of elements!");
+ Type *VecTy = VectorType::get(Tp->getVectorElementType(),
+ LT.second.getVectorNumElements());
+ Type *SubTy = VectorType::get(Tp->getVectorElementType(),
+ SubLT.second.getVectorNumElements());
+ int ExtractIndex = alignDown((Index % NumElts), NumSubElts);
+ int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
+ ExtractIndex, SubTy);
+
+ // If the original size is 32-bits or more, we can use pshufd. Otherwise
+ // if we have SSSE3 we can use pshufb.
+ if (SubTp->getPrimitiveSizeInBits() >= 32 || ST->hasSSSE3())
+ return ExtractCost + 1; // pshufd or pshufb
+
+ assert(SubTp->getPrimitiveSizeInBits() == 16 &&
+ "Unexpected vector size");
+
+ return ExtractCost + 2; // worst case pshufhw + pshufd
+ }
}
}
@@ -1314,8 +1348,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
- { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
@@ -1354,6 +1390,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
@@ -1371,14 +1409,14 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
- { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
- { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
- { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
- { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
@@ -1402,13 +1440,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
- { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
@@ -1421,7 +1459,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
@@ -1507,6 +1548,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
+ { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
};
@@ -1520,7 +1562,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
@@ -1536,6 +1579,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 },
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
@@ -1562,15 +1607,21 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 },
+ { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // PAND+PACKUSWB
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
+ { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+3*PACKUSWB
+ { ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 1 },
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 },
+ { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 4 }, // PAND+3*PACKUSWB
+ { ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 2 }, // PSHUFD+PSHUFLW
+ { ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD
};
std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
@@ -1691,6 +1742,11 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
}
}
+ static const CostTblEntry SLMCostTbl[] = {
+ // slm pcmpeq/pcmpgt throughput is 2
+ { ISD::SETCC, MVT::v2i64, 2 },
+ };
+
static const CostTblEntry AVX512BWCostTbl[] = {
{ ISD::SETCC, MVT::v32i16, 1 },
{ ISD::SETCC, MVT::v64i8, 1 },
@@ -1777,6 +1833,10 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
{ ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps
};
+ if (ST->isSLM())
+ if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
+ return LT.first * (ExtraCost + Entry->Cost);
+
if (ST->hasBWI())
if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
return LT.first * (ExtraCost + Entry->Cost);
@@ -2043,8 +2103,26 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
};
+ static const CostTblEntry LZCNT64CostTbl[] = { // 64-bit targets
+ { ISD::CTLZ, MVT::i64, 1 },
+ };
+ static const CostTblEntry LZCNT32CostTbl[] = { // 32 or 64-bit targets
+ { ISD::CTLZ, MVT::i32, 1 },
+ { ISD::CTLZ, MVT::i16, 1 },
+ { ISD::CTLZ, MVT::i8, 1 },
+ };
+ static const CostTblEntry POPCNT64CostTbl[] = { // 64-bit targets
+ { ISD::CTPOP, MVT::i64, 1 },
+ };
+ static const CostTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets
+ { ISD::CTPOP, MVT::i32, 1 },
+ { ISD::CTPOP, MVT::i16, 1 },
+ { ISD::CTPOP, MVT::i8, 1 },
+ };
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::BITREVERSE, MVT::i64, 14 },
+ { ISD::CTLZ, MVT::i64, 4 }, // BSR+XOR or BSR+XOR+CMOV
+ { ISD::CTPOP, MVT::i64, 10 },
{ ISD::SADDO, MVT::i64, 1 },
{ ISD::UADDO, MVT::i64, 1 },
};
@@ -2052,6 +2130,12 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::BITREVERSE, MVT::i32, 14 },
{ ISD::BITREVERSE, MVT::i16, 14 },
{ ISD::BITREVERSE, MVT::i8, 11 },
+ { ISD::CTLZ, MVT::i32, 4 }, // BSR+XOR or BSR+XOR+CMOV
+ { ISD::CTLZ, MVT::i16, 4 }, // BSR+XOR or BSR+XOR+CMOV
+ { ISD::CTLZ, MVT::i8, 4 }, // BSR+XOR or BSR+XOR+CMOV
+ { ISD::CTPOP, MVT::i32, 8 },
+ { ISD::CTPOP, MVT::i16, 9 },
+ { ISD::CTPOP, MVT::i8, 7 },
{ ISD::SADDO, MVT::i32, 1 },
{ ISD::SADDO, MVT::i16, 1 },
{ ISD::SADDO, MVT::i8, 1 },
@@ -2163,6 +2247,26 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
+ if (ST->hasLZCNT()) {
+ if (ST->is64Bit())
+ if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+ }
+
+ if (ST->hasPOPCNT()) {
+ if (ST->is64Bit())
+ if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+ }
+
+ // TODO - add BMI (TZCNT) scalar handling
+
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
@@ -2357,8 +2461,9 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
unsigned NumElem = SrcVTy->getVectorNumElements();
VectorType *MaskTy =
VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
- if ((IsLoad && !isLegalMaskedLoad(SrcVTy)) ||
- (IsStore && !isLegalMaskedStore(SrcVTy)) || !isPowerOf2_32(NumElem)) {
+ if ((IsLoad && !isLegalMaskedLoad(SrcVTy, MaybeAlign(Alignment))) ||
+ (IsStore && !isLegalMaskedStore(SrcVTy, MaybeAlign(Alignment))) ||
+ !isPowerOf2_32(NumElem)) {
// Scalarization
int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
int ScalarCompareCost = getCmpSelInstrCost(
@@ -2425,70 +2530,107 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
bool IsPairwise) {
-
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
-
- MVT MTy = LT.second;
-
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
// and make it as the cost.
- static const CostTblEntry SSE42CostTblPairWise[] = {
+ static const CostTblEntry SSE2CostTblPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
+ { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32.
{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
+ { ISD::ADD, MVT::v2i16, 3 }, // FIXME: chosen to be less than v4i16
+ { ISD::ADD, MVT::v4i16, 4 }, // FIXME: chosen to be less than v8i16
{ ISD::ADD, MVT::v8i16, 5 },
+ { ISD::ADD, MVT::v2i8, 2 },
+ { ISD::ADD, MVT::v4i8, 2 },
+ { ISD::ADD, MVT::v8i8, 2 },
+ { ISD::ADD, MVT::v16i8, 3 },
};
static const CostTblEntry AVX1CostTblPairWise[] = {
- { ISD::FADD, MVT::v4f32, 4 },
{ ISD::FADD, MVT::v4f64, 5 },
{ ISD::FADD, MVT::v8f32, 7 },
{ ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
- { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
{ ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8".
- { ISD::ADD, MVT::v8i16, 5 },
{ ISD::ADD, MVT::v8i32, 5 },
+ { ISD::ADD, MVT::v16i16, 6 },
+ { ISD::ADD, MVT::v32i8, 4 },
};
- static const CostTblEntry SSE42CostTblNoPairWise[] = {
+ static const CostTblEntry SSE2CostTblNoPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
+ { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32
{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
+ { ISD::ADD, MVT::v2i16, 2 }, // The data reported by the IACA tool is "4.3".
+ { ISD::ADD, MVT::v4i16, 3 }, // The data reported by the IACA tool is "4.3".
{ ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
+ { ISD::ADD, MVT::v2i8, 2 },
+ { ISD::ADD, MVT::v4i8, 2 },
+ { ISD::ADD, MVT::v8i8, 2 },
+ { ISD::ADD, MVT::v16i8, 3 },
};
static const CostTblEntry AVX1CostTblNoPairWise[] = {
- { ISD::FADD, MVT::v4f32, 3 },
{ ISD::FADD, MVT::v4f64, 3 },
+ { ISD::FADD, MVT::v4f32, 3 },
{ ISD::FADD, MVT::v8f32, 4 },
{ ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
- { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8".
{ ISD::ADD, MVT::v4i64, 3 },
- { ISD::ADD, MVT::v8i16, 4 },
{ ISD::ADD, MVT::v8i32, 5 },
+ { ISD::ADD, MVT::v16i16, 5 },
+ { ISD::ADD, MVT::v32i8, 4 },
};
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Before legalizing the type, give a chance to look up illegal narrow types
+ // in the table.
+ // FIXME: Is there a better way to do this?
+ EVT VT = TLI->getValueType(DL, ValTy);
+ if (VT.isSimple()) {
+ MVT MTy = VT.getSimpleVT();
+ if (IsPairwise) {
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
+ return Entry->Cost;
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
+ return Entry->Cost;
+ } else {
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+ return Entry->Cost;
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
+ return Entry->Cost;
+ }
+ }
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+
+ MVT MTy = LT.second;
+
if (IsPairwise) {
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
return LT.first * Entry->Cost;
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
return LT.first * Entry->Cost;
} else {
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
return LT.first * Entry->Cost;
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
return LT.first * Entry->Cost;
}
@@ -3116,7 +3258,7 @@ bool X86TTIImpl::canMacroFuseCmp() {
return ST->hasMacroFusion() || ST->hasBranchFusion();
}
-bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
+bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
if (!ST->hasAVX())
return false;
@@ -3139,11 +3281,11 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
}
-bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
- return isLegalMaskedLoad(DataType);
+bool X86TTIImpl::isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) {
+ return isLegalMaskedLoad(DataType, Alignment);
}
-bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) {
+bool X86TTIImpl::isLegalNTLoad(Type *DataType, Align Alignment) {
unsigned DataSize = DL.getTypeStoreSize(DataType);
// The only supported nontemporal loads are for aligned vectors of 16 or 32
// bytes. Note that 32-byte nontemporal vector loads are supported by AVX2
@@ -3154,7 +3296,7 @@ bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) {
return false;
}
-bool X86TTIImpl::isLegalNTStore(Type *DataType, unsigned Alignment) {
+bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) {
unsigned DataSize = DL.getTypeStoreSize(DataType);
// SSE4A supports nontemporal stores of float and double at arbitrary
@@ -3299,9 +3441,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
if (IsZeroCmp) {
// Only enable vector loads for equality comparison. Right now the vector
// version is not as fast for three way compare (see #33329).
- // TODO: enable AVX512 when the DAG is ready.
- // if (ST->hasAVX512()) Options.LoadSizes.push_back(64);
const unsigned PreferredWidth = ST->getPreferVectorWidth();
+ if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64);
if (PreferredWidth >= 256 && ST->hasAVX2()) Options.LoadSizes.push_back(32);
if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
// All GPR and vector loads can be unaligned. SIMD compare requires integer
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index 25d9c33eb16d..7581257f41f8 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -83,6 +83,7 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
X86::FeatureSlowUAMem32,
// Based on whether user set the -mprefer-vector-width command line.
+ X86::FeaturePrefer128Bit,
X86::FeaturePrefer256Bit,
// CPU name enums. These just follow CPU string.
@@ -115,7 +116,7 @@ public:
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector);
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
unsigned getMaxInterleaveFactor(unsigned VF);
@@ -184,10 +185,10 @@ public:
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
bool canMacroFuseCmp();
- bool isLegalMaskedLoad(Type *DataType);
- bool isLegalMaskedStore(Type *DataType);
- bool isLegalNTLoad(Type *DataType, unsigned Alignment);
- bool isLegalNTStore(Type *DataType, unsigned Alignment);
+ bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment);
+ bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment);
+ bool isLegalNTLoad(Type *DataType, Align Alignment);
+ bool isLegalNTStore(Type *DataType, Align Alignment);
bool isLegalMaskedGather(Type *DataType);
bool isLegalMaskedScatter(Type *DataType);
bool isLegalMaskedExpandLoad(Type *DataType);
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index a07d2f20acab..9280d030b5d5 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -292,8 +292,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
// need to insert any VZEROUPPER instructions. This is constant-time, so it
// is cheap in the common case of no ymm/zmm use.
bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;
- const TargetRegisterClass *RCs[2] = {&X86::VR256RegClass, &X86::VR512RegClass};
- for (auto *RC : RCs) {
+ for (auto *RC : {&X86::VR256RegClass, &X86::VR512_0_15RegClass}) {
if (!YmmOrZmmUsed) {
for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
i++) {
@@ -304,9 +303,8 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
}
}
}
- if (!YmmOrZmmUsed) {
+ if (!YmmOrZmmUsed)
return false;
- }
assert(BlockStates.empty() && DirtySuccessors.empty() &&
"X86VZeroUpper state should be clear");
diff --git a/lib/Target/X86/X86WinAllocaExpander.cpp b/lib/Target/X86/X86WinAllocaExpander.cpp
index 9e499db1d7ee..ae72c6427588 100644
--- a/lib/Target/X86/X86WinAllocaExpander.cpp
+++ b/lib/Target/X86/X86WinAllocaExpander.cpp
@@ -81,7 +81,7 @@ static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) {
MI->getOpcode() == X86::WIN_ALLOCA_64);
assert(MI->getOperand(0).isReg());
- unsigned AmountReg = MI->getOperand(0).getReg();
+ Register AmountReg = MI->getOperand(0).getReg();
MachineInstr *Def = MRI->getUniqueVRegDef(AmountReg);
if (!Def ||
@@ -261,7 +261,7 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
break;
}
- unsigned AmountReg = MI->getOperand(0).getReg();
+ Register AmountReg = MI->getOperand(0).getReg();
MI->eraseFromParent();
// Delete the definition of AmountReg.
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index f68d17d7256d..d65e1f3ab414 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -339,7 +339,10 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
if (UseStackGuard) {
Value *Val = Builder.CreateLoad(Int32Ty, Cookie);
Value *FrameAddr = Builder.CreateCall(
- Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress),
+ Intrinsic::getDeclaration(
+ TheModule, Intrinsic::frameaddress,
+ Builder.getInt8PtrTy(
+ TheModule->getDataLayout().getAllocaAddrSpace())),
Builder.getInt32(0), "frameaddr");
Value *FrameAddrI32 = Builder.CreatePtrToInt(FrameAddr, Int32Ty);
FrameAddrI32 = Builder.CreateXor(FrameAddrI32, Val);
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 9f615b9e7741..6b3dc27cb886 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -115,7 +115,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
MCSymbol *GVSym = getSymbol(GV);
const Constant *C = GV->getInitializer();
- unsigned Align = (unsigned)DL.getPreferredTypeAlignmentShift(C->getType());
+ const Align Alignment(DL.getPrefTypeAlignment(C->getType()));
// Mark the start of the global
getTargetStreamer().emitCCTopData(GVSym->getName());
@@ -143,7 +143,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
llvm_unreachable("Unknown linkage type!");
}
- EmitAlignment(Align > 2 ? Align : 2, GV);
+ EmitAlignment(std::max(Alignment, Align(4)), GV);
if (GV->isThreadLocal()) {
report_fatal_error("TLS is not supported by this target!");
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 5066407c74aa..fd8b37e26e47 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -211,7 +211,7 @@ static void RestoreSpillList(MachineBasicBlock &MBB,
//===----------------------------------------------------------------------===//
XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0) {
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(4), 0) {
// Do nothing
}
@@ -367,8 +367,8 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList);
// Return to the landing pad.
- unsigned EhStackReg = MBBI->getOperand(0).getReg();
- unsigned EhHandlerReg = MBBI->getOperand(1).getReg();
+ Register EhStackReg = MBBI->getOperand(0).getReg();
+ Register EhHandlerReg = MBBI->getOperand(1).getReg();
BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)).addReg(EhStackReg);
BuildMI(MBB, MBBI, dl, TII.get(XCore::BAU_1r)).addReg(EhHandlerReg);
MBB.erase(MBBI); // Erase the previous return instruction.
diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index e433d21c59b7..b5dbdea98eea 100644
--- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -55,7 +55,7 @@ bool XCoreFTAOElim::runOnMachineFunction(MachineFunction &MF) {
MBBI != EE; ++MBBI) {
if (MBBI->getOpcode() == XCore::FRAME_TO_ARGS_OFFSET) {
MachineInstr &OldInst = *MBBI;
- unsigned Reg = OldInst.getOperand(0).getReg();
+ Register Reg = OldInst.getOperand(0).getReg();
MBBI = TII.loadImmediate(MBB, MBBI, Reg, StackSize);
OldInst.eraseFromParent();
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 072278d9fc46..bf006fd673f1 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -171,8 +171,8 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
- setMinFunctionAlignment(1);
- setPrefFunctionAlignment(2);
+ setMinFunctionAlignment(Align(2));
+ setPrefFunctionAlignment(Align(4));
}
bool XCoreTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
@@ -414,8 +414,8 @@ SDValue XCoreTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
"Unexpected extension type");
assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
- if (allowsMemoryAccess(Context, DAG.getDataLayout(), LD->getMemoryVT(),
- *LD->getMemOperand()))
+ if (allowsMemoryAccessForAlignment(Context, DAG.getDataLayout(),
+ LD->getMemoryVT(), *LD->getMemOperand()))
return SDValue();
SDValue Chain = LD->getChain();
@@ -488,8 +488,8 @@ SDValue XCoreTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
assert(!ST->isTruncatingStore() && "Unexpected store type");
assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
- if (allowsMemoryAccess(Context, DAG.getDataLayout(), ST->getMemoryVT(),
- *ST->getMemOperand()))
+ if (allowsMemoryAccessForAlignment(Context, DAG.getDataLayout(),
+ ST->getMemoryVT(), *ST->getMemOperand()))
return SDValue();
SDValue Chain = ST->getChain();
@@ -1309,7 +1309,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
llvm_unreachable(nullptr);
}
case MVT::i32:
- unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass);
+ Register VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
ArgIn = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
CFRegNode.push_back(ArgIn.getValue(ArgIn->getNumValues() - 1));
@@ -1360,7 +1360,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
offset -= StackSlotSize;
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
// Move argument from phys reg -> virt reg
- unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass);
+ Register VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass);
RegInfo.addLiveIn(ArgRegs[i], VReg);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
CFRegNode.push_back(Val.getValue(Val->getNumValues() - 1));
@@ -1780,8 +1780,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// Replace unaligned store of unaligned load with memmove.
StoreSDNode *ST = cast<StoreSDNode>(N);
if (!DCI.isBeforeLegalize() ||
- allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- ST->getMemoryVT(), *ST->getMemOperand()) ||
+ allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ ST->getMemoryVT(),
+ *ST->getMemOperand()) ||
ST->isVolatile() || ST->isIndexed()) {
break;
}
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 3752274e2cdf..86ec7f82d4d1 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -301,7 +301,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
<< "<--------->\n");
Offset/=4;
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand");
if (TFI->hasFP(MF)) {
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 2a8cd6b657b7..b5b7445265b7 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -53,7 +53,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT,
T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
getEffectiveXCoreCodeModel(CM), OL),
- TLOF(llvm::make_unique<XCoreTargetObjectFile>()),
+ TLOF(std::make_unique<XCoreTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.h b/lib/Target/XCore/XCoreTargetTransformInfo.h
index 3fecaaa59722..58df1f290ec9 100644
--- a/lib/Target/XCore/XCoreTargetTransformInfo.h
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -40,7 +40,8 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
TLI(ST->getTargetLowering()) {}
- unsigned getNumberOfRegisters(bool Vector) {
+ unsigned getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
if (Vector) {
return 0;
}
diff --git a/lib/TextAPI/MachO/Architecture.cpp b/lib/TextAPI/MachO/Architecture.cpp
index a66a982fa153..699fb5f4587a 100644
--- a/lib/TextAPI/MachO/Architecture.cpp
+++ b/lib/TextAPI/MachO/Architecture.cpp
@@ -68,6 +68,10 @@ std::pair<uint32_t, uint32_t> getCPUTypeFromArchitecture(Architecture Arch) {
return std::make_pair(0, 0);
}
+Architecture mapToArchitecture(const Triple &Target) {
+ return getArchitectureFromName(Target.getArchName());
+}
+
raw_ostream &operator<<(raw_ostream &OS, Architecture Arch) {
OS << getArchitectureName(Arch);
return OS;
diff --git a/lib/TextAPI/MachO/InterfaceFile.cpp b/lib/TextAPI/MachO/InterfaceFile.cpp
index 54ba8cc31267..c40a952a6a8b 100644
--- a/lib/TextAPI/MachO/InterfaceFile.cpp
+++ b/lib/TextAPI/MachO/InterfaceFile.cpp
@@ -27,36 +27,65 @@ typename C::iterator addEntry(C &Container, StringRef InstallName) {
return Container.emplace(I, InstallName);
}
+
+template <typename C>
+typename C::iterator addEntry(C &Container, const Target &Target_) {
+ auto Iter =
+ lower_bound(Container, Target_, [](const Target &LHS, const Target &RHS) {
+ return LHS < RHS;
+ });
+ if ((Iter != std::end(Container)) && !(Target_ < *Iter))
+ return Iter;
+
+ return Container.insert(Iter, Target_);
+}
} // end namespace detail.
-void InterfaceFile::addAllowableClient(StringRef Name,
- ArchitectureSet Architectures) {
- auto Client = detail::addEntry(AllowableClients, Name);
- Client->addArchitectures(Architectures);
+void InterfaceFileRef::addTarget(const Target &Target) {
+ detail::addEntry(Targets, Target);
+}
+
+void InterfaceFile::addAllowableClient(StringRef InstallName,
+ const Target &Target) {
+ auto Client = detail::addEntry(AllowableClients, InstallName);
+ Client->addTarget(Target);
}
void InterfaceFile::addReexportedLibrary(StringRef InstallName,
- ArchitectureSet Architectures) {
+ const Target &Target) {
auto Lib = detail::addEntry(ReexportedLibraries, InstallName);
- Lib->addArchitectures(Architectures);
+ Lib->addTarget(Target);
}
-void InterfaceFile::addUUID(Architecture Arch, StringRef UUID) {
- auto I = partition_point(UUIDs,
- [=](const std::pair<Architecture, std::string> &O) {
- return O.first < Arch;
- });
+void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) {
+ auto Iter = lower_bound(ParentUmbrellas, Target_,
+ [](const std::pair<Target, std::string> &LHS,
+ Target RHS) { return LHS.first < RHS; });
- if (I != UUIDs.end() && Arch == I->first) {
- I->second = UUID;
+ if ((Iter != ParentUmbrellas.end()) && !(Target_ < Iter->first)) {
+ Iter->second = Parent;
return;
}
- UUIDs.emplace(I, Arch, UUID);
+ ParentUmbrellas.emplace(Iter, Target_, Parent);
return;
}
-void InterfaceFile::addUUID(Architecture Arch, uint8_t UUID[16]) {
+void InterfaceFile::addUUID(const Target &Target_, StringRef UUID) {
+ auto Iter = lower_bound(UUIDs, Target_,
+ [](const std::pair<Target, std::string> &LHS,
+ Target RHS) { return LHS.first < RHS; });
+
+ if ((Iter != UUIDs.end()) && !(Target_ < Iter->first)) {
+ Iter->second = UUID;
+ return;
+ }
+
+ UUIDs.emplace(Iter, Target_, UUID);
+ return;
+}
+
+void InterfaceFile::addUUID(const Target &Target, uint8_t UUID[16]) {
std::stringstream Stream;
for (unsigned i = 0; i < 16; ++i) {
if (i == 4 || i == 6 || i == 8 || i == 10)
@@ -64,17 +93,30 @@ void InterfaceFile::addUUID(Architecture Arch, uint8_t UUID[16]) {
Stream << std::setfill('0') << std::setw(2) << std::uppercase << std::hex
<< static_cast<int>(UUID[i]);
}
- addUUID(Arch, Stream.str());
+ addUUID(Target, Stream.str());
+}
+
+void InterfaceFile::addTarget(const Target &Target) {
+ detail::addEntry(Targets, Target);
+}
+
+InterfaceFile::const_filtered_target_range
+InterfaceFile::targets(ArchitectureSet Archs) const {
+ std::function<bool(const Target &)> fn = [Archs](const Target &Target_) {
+ return Archs.has(Target_.Arch);
+ };
+ return make_filter_range(Targets, fn);
}
void InterfaceFile::addSymbol(SymbolKind Kind, StringRef Name,
- ArchitectureSet Archs, SymbolFlags Flags) {
+ const TargetList &Targets, SymbolFlags Flags) {
Name = copyString(Name);
auto result = Symbols.try_emplace(SymbolsMapKey{Kind, Name}, nullptr);
if (result.second)
- result.first->second = new (Allocator) Symbol{Kind, Name, Archs, Flags};
+ result.first->second = new (Allocator) Symbol{Kind, Name, Targets, Flags};
else
- result.first->second->addArchitectures(Archs);
+ for (const auto &Target : Targets)
+ result.first->second->addTarget(Target);
}
} // end namespace MachO.
diff --git a/lib/TextAPI/MachO/Platform.cpp b/lib/TextAPI/MachO/Platform.cpp
new file mode 100644
index 000000000000..588ec9a4d83b
--- /dev/null
+++ b/lib/TextAPI/MachO/Platform.cpp
@@ -0,0 +1,91 @@
+//===- llvm/TextAPI/MachO/Platform.cpp - Platform ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementations of Platform Helper functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/TextAPI/MachO/Platform.h"
+
+namespace llvm {
+namespace MachO {
+
+PlatformKind mapToPlatformKind(PlatformKind Platform, bool WantSim) {
+ switch (Platform) {
+ default:
+ return Platform;
+ case PlatformKind::iOS:
+ return WantSim ? PlatformKind::iOSSimulator : PlatformKind::iOS;
+ case PlatformKind::tvOS:
+ return WantSim ? PlatformKind::tvOSSimulator : PlatformKind::tvOS;
+ case PlatformKind::watchOS:
+ return WantSim ? PlatformKind::watchOSSimulator : PlatformKind::watchOS;
+ }
+ llvm_unreachable("Unknown llvm.MachO.PlatformKind enum");
+}
+
+PlatformKind mapToPlatformKind(const Triple &Target) {
+ switch (Target.getOS()) {
+ default:
+ return PlatformKind::unknown;
+ case Triple::MacOSX:
+ return PlatformKind::macOS;
+ case Triple::IOS:
+ if (Target.isSimulatorEnvironment())
+ return PlatformKind::iOSSimulator;
+ if (Target.getEnvironment() == Triple::MacABI)
+ return PlatformKind::macCatalyst;
+ return PlatformKind::iOS;
+ case Triple::TvOS:
+ return Target.isSimulatorEnvironment() ? PlatformKind::tvOSSimulator
+ : PlatformKind::tvOS;
+ case Triple::WatchOS:
+ return Target.isSimulatorEnvironment() ? PlatformKind::watchOSSimulator
+ : PlatformKind::watchOS;
+ // TODO: add bridgeOS once in llvm::Triple
+ }
+ llvm_unreachable("Unknown Target Triple");
+}
+
+PlatformSet mapToPlatformSet(ArrayRef<Triple> Targets) {
+ PlatformSet Result;
+ for (const auto &Target : Targets)
+ Result.insert(mapToPlatformKind(Target));
+ return Result;
+}
+
+StringRef getPlatformName(PlatformKind Platform) {
+ switch (Platform) {
+ case PlatformKind::unknown:
+ return "unknown";
+ case PlatformKind::macOS:
+ return "macOS";
+ case PlatformKind::iOS:
+ return "iOS";
+ case PlatformKind::tvOS:
+ return "tvOS";
+ case PlatformKind::watchOS:
+ return "watchOS";
+ case PlatformKind::bridgeOS:
+ return "bridgeOS";
+ case PlatformKind::macCatalyst:
+ return "macCatalyst";
+ case PlatformKind::iOSSimulator:
+ return "iOS Simulator";
+ case PlatformKind::tvOSSimulator:
+ return "tvOS Simulator";
+ case PlatformKind::watchOSSimulator:
+ return "watchOS Simulator";
+ }
+ llvm_unreachable("Unknown llvm.MachO.PlatformKind enum");
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/Symbol.cpp b/lib/TextAPI/MachO/Symbol.cpp
index 731b264f6082..9f2d8172beed 100644
--- a/lib/TextAPI/MachO/Symbol.cpp
+++ b/lib/TextAPI/MachO/Symbol.cpp
@@ -45,5 +45,14 @@ LLVM_DUMP_METHOD void Symbol::dump(raw_ostream &OS) const {
}
#endif
+Symbol::const_filtered_target_range
+Symbol::targets(ArchitectureSet Architectures) const {
+ std::function<bool(const Target &)> FN =
+ [Architectures](const Target &Target) {
+ return Architectures.has(Target.Arch);
+ };
+ return make_filter_range(Targets, FN);
+}
+
} // end namespace MachO.
} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/Target.cpp b/lib/TextAPI/MachO/Target.cpp
new file mode 100644
index 000000000000..aee8ef421425
--- /dev/null
+++ b/lib/TextAPI/MachO/Target.cpp
@@ -0,0 +1,75 @@
+//===- tapi/Core/Target.cpp - Target ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TextAPI/MachO/Target.h"
+
+namespace llvm {
+namespace MachO {
+
+Expected<Target> Target::create(StringRef TargetValue) {
+ auto Result = TargetValue.split('-');
+ auto ArchitectureStr = Result.first;
+ auto Architecture = getArchitectureFromName(ArchitectureStr);
+ auto PlatformStr = Result.second;
+ PlatformKind Platform;
+ Platform = StringSwitch<PlatformKind>(PlatformStr)
+ .Case("macos", PlatformKind::macOS)
+ .Case("ios", PlatformKind::iOS)
+ .Case("tvos", PlatformKind::tvOS)
+ .Case("watchos", PlatformKind::watchOS)
+ .Case("bridgeos", PlatformKind::bridgeOS)
+ .Case("maccatalyst", PlatformKind::macCatalyst)
+ .Case("ios-simulator", PlatformKind::iOSSimulator)
+ .Case("tvos-simulator", PlatformKind::tvOSSimulator)
+ .Case("watchos-simulator", PlatformKind::watchOSSimulator)
+ .Default(PlatformKind::unknown);
+
+ if (Platform == PlatformKind::unknown) {
+ if (PlatformStr.startswith("<") && PlatformStr.endswith(">")) {
+ PlatformStr = PlatformStr.drop_front().drop_back();
+ unsigned long long RawValue;
+ if (!PlatformStr.getAsInteger(10, RawValue))
+ Platform = (PlatformKind)RawValue;
+ }
+ }
+
+ return Target{Architecture, Platform};
+}
+
+Target::operator std::string() const {
+ return (getArchitectureName(Arch) + " (" + getPlatformName(Platform) + ")")
+ .str();
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Target &Target) {
+ OS << std::string(Target);
+ return OS;
+}
+
+PlatformSet mapToPlatformSet(ArrayRef<Target> Targets) {
+ PlatformSet Result;
+ for (const auto &Target : Targets)
+ Result.insert(Target.Platform);
+ return Result;
+}
+
+ArchitectureSet mapToArchitectureSet(ArrayRef<Target> Targets) {
+ ArchitectureSet Result;
+ for (const auto &Target : Targets)
+ Result.set(Target.Arch);
+ return Result;
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/TextStub.cpp b/lib/TextAPI/MachO/TextStub.cpp
index 799ebdc883ab..0584e43d5893 100644
--- a/lib/TextAPI/MachO/TextStub.cpp
+++ b/lib/TextAPI/MachO/TextStub.cpp
@@ -147,6 +147,58 @@ Each undefineds section is defined as following:
objc-ivars: [] # Optional: List of Objective C Instance Variables
weak-ref-symbols: [] # Optional: List of weak defined symbols
*/
+
+/*
+
+ YAML Format specification.
+
+--- !tapi-tbd
+tbd-version: 4 # The tbd version for format
+targets: [ armv7-ios, x86_64-maccatalyst ] # The list of applicable tapi supported target triples
+uuids: # Optional: List of target and UUID pairs.
+ - target: armv7-ios
+ value: ...
+ - target: x86_64-maccatalyst
+ value: ...
+flags: [] # Optional:
+install-name: /u/l/libfoo.dylib #
+current-version: 1.2.3 # Optional: defaults to 1.0
+compatibility-version: 1.0 # Optional: defaults to 1.0
+swift-abi-version: 0 # Optional: defaults to 0
+parent-umbrella: # Optional:
+allowable-clients:
+ - targets: [ armv7-ios ] # Optional:
+ clients: [ clientA ]
+exports: # List of export sections
+...
+re-exports: # List of reexport sections
+...
+undefineds: # List of undefineds sections
+...
+
+Each export and reexport section is defined as following:
+
+- targets: [ arm64-macos ] # The list of target triples associated with symbols
+ symbols: [ _symA ] # Optional: List of symbols
+ objc-classes: [] # Optional: List of Objective-C classes
+ objc-eh-types: [] # Optional: List of Objective-C classes
+ # with EH
+ objc-ivars: [] # Optional: List of Objective C Instance
+ # Variables
+ weak-symbols: [] # Optional: List of weak defined symbols
+ thread-local-symbols: [] # Optional: List of thread local symbols
+- targets: [ arm64-macos, x86_64-maccatalyst ] # Optional: Targets for applicable additional symbols
+ symbols: [ _symB ] # Optional: List of symbols
+
+Each undefineds section is defined as following:
+- targets: [ arm64-macos ] # The list of target triples associated with symbols
+ symbols: [ _symC ] # Optional: List of symbols
+ objc-classes: [] # Optional: List of Objective-C classes
+ objc-eh-types: [] # Optional: List of Objective-C classes
+ # with EH
+ objc-ivars: [] # Optional: List of Objective C Instance Variables
+ weak-symbols: [] # Optional: List of weak defined symbols
+*/
// clang-format on
using namespace llvm;
@@ -175,6 +227,38 @@ struct UndefinedSection {
std::vector<FlowStringRef> WeakRefSymbols;
};
+// Sections for direct target mapping in TBDv4
+struct SymbolSection {
+ TargetList Targets;
+ std::vector<FlowStringRef> Symbols;
+ std::vector<FlowStringRef> Classes;
+ std::vector<FlowStringRef> ClassEHs;
+ std::vector<FlowStringRef> Ivars;
+ std::vector<FlowStringRef> WeakSymbols;
+ std::vector<FlowStringRef> TlvSymbols;
+};
+
+struct MetadataSection {
+ enum Option { Clients, Libraries };
+ std::vector<Target> Targets;
+ std::vector<FlowStringRef> Values;
+};
+
+struct UmbrellaSection {
+ std::vector<Target> Targets;
+ std::string Umbrella;
+};
+
+// UUID's for TBDv4 are mapped to target not arch
+struct UUIDv4 {
+ Target TargetID;
+ std::string Value;
+
+ UUIDv4() = default;
+ UUIDv4(const Target &TargetID, const std::string &Value)
+ : TargetID(TargetID), Value(Value) {}
+};
+
// clang-format off
enum TBDFlags : unsigned {
None = 0U,
@@ -189,6 +273,12 @@ enum TBDFlags : unsigned {
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Architecture)
LLVM_YAML_IS_SEQUENCE_VECTOR(ExportSection)
LLVM_YAML_IS_SEQUENCE_VECTOR(UndefinedSection)
+// Specific to TBDv4
+LLVM_YAML_IS_SEQUENCE_VECTOR(SymbolSection)
+LLVM_YAML_IS_SEQUENCE_VECTOR(MetadataSection)
+LLVM_YAML_IS_SEQUENCE_VECTOR(UmbrellaSection)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Target)
+LLVM_YAML_IS_SEQUENCE_VECTOR(UUIDv4)
namespace llvm {
namespace yaml {
@@ -231,6 +321,49 @@ template <> struct MappingTraits<UndefinedSection> {
}
};
+template <> struct MappingTraits<SymbolSection> {
+ static void mapping(IO &IO, SymbolSection &Section) {
+ IO.mapRequired("targets", Section.Targets);
+ IO.mapOptional("symbols", Section.Symbols);
+ IO.mapOptional("objc-classes", Section.Classes);
+ IO.mapOptional("objc-eh-types", Section.ClassEHs);
+ IO.mapOptional("objc-ivars", Section.Ivars);
+ IO.mapOptional("weak-symbols", Section.WeakSymbols);
+ IO.mapOptional("thread-local-symbols", Section.TlvSymbols);
+ }
+};
+
+template <> struct MappingTraits<UmbrellaSection> {
+ static void mapping(IO &IO, UmbrellaSection &Section) {
+ IO.mapRequired("targets", Section.Targets);
+ IO.mapRequired("umbrella", Section.Umbrella);
+ }
+};
+
+template <> struct MappingTraits<UUIDv4> {
+ static void mapping(IO &IO, UUIDv4 &UUID) {
+ IO.mapRequired("target", UUID.TargetID);
+ IO.mapRequired("value", UUID.Value);
+ }
+};
+
+template <>
+struct MappingContextTraits<MetadataSection, MetadataSection::Option> {
+ static void mapping(IO &IO, MetadataSection &Section,
+ MetadataSection::Option &OptionKind) {
+ IO.mapRequired("targets", Section.Targets);
+ switch (OptionKind) {
+ case MetadataSection::Option::Clients:
+ IO.mapRequired("clients", Section.Values);
+ return;
+ case MetadataSection::Option::Libraries:
+ IO.mapRequired("libraries", Section.Values);
+ return;
+ }
+ llvm_unreachable("unexpected option for metadata");
+ }
+};
+
template <> struct ScalarBitSetTraits<TBDFlags> {
static void bitset(IO &IO, TBDFlags &Flags) {
IO.bitSetCase(Flags, "flat_namespace", TBDFlags::FlatNamespace);
@@ -240,13 +373,67 @@ template <> struct ScalarBitSetTraits<TBDFlags> {
}
};
+template <> struct ScalarTraits<Target> {
+ static void output(const Target &Value, void *, raw_ostream &OS) {
+ OS << Value.Arch << "-";
+ switch (Value.Platform) {
+ default:
+ OS << "unknown";
+ break;
+ case PlatformKind::macOS:
+ OS << "macos";
+ break;
+ case PlatformKind::iOS:
+ OS << "ios";
+ break;
+ case PlatformKind::tvOS:
+ OS << "tvos";
+ break;
+ case PlatformKind::watchOS:
+ OS << "watchos";
+ break;
+ case PlatformKind::bridgeOS:
+ OS << "bridgeos";
+ break;
+ case PlatformKind::macCatalyst:
+ OS << "maccatalyst";
+ break;
+ case PlatformKind::iOSSimulator:
+ OS << "ios-simulator";
+ break;
+ case PlatformKind::tvOSSimulator:
+ OS << "tvos-simulator";
+ break;
+ case PlatformKind::watchOSSimulator:
+ OS << "watchos-simulator";
+ break;
+ }
+ }
+
+ static StringRef input(StringRef Scalar, void *, Target &Value) {
+ auto Result = Target::create(Scalar);
+ if (!Result)
+ return toString(Result.takeError());
+
+ Value = *Result;
+ if (Value.Arch == AK_unknown)
+ return "unknown architecture";
+ if (Value.Platform == PlatformKind::unknown)
+ return "unknown platform";
+
+ return {};
+ }
+
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
template <> struct MappingTraits<const InterfaceFile *> {
struct NormalizedTBD {
explicit NormalizedTBD(IO &IO) {}
NormalizedTBD(IO &IO, const InterfaceFile *&File) {
Architectures = File->getArchitectures();
UUIDs = File->uuids();
- Platform = File->getPlatform();
+ Platforms = File->getPlatforms();
InstallName = File->getInstallName();
CurrentVersion = PackedVersion(File->getCurrentVersion());
CompatibilityVersion = PackedVersion(File->getCompatibilityVersion());
@@ -263,7 +450,10 @@ template <> struct MappingTraits<const InterfaceFile *> {
if (File->isInstallAPI())
Flags |= TBDFlags::InstallAPI;
- ParentUmbrella = File->getParentUmbrella();
+ for (const auto &Iter : File->umbrellas()) {
+ ParentUmbrella = Iter.second;
+ break;
+ }
std::set<ArchitectureSet> ArchSet;
for (const auto &Library : File->allowableClients())
@@ -396,6 +586,29 @@ template <> struct MappingTraits<const InterfaceFile *> {
}
}
+ // TBD v1 - TBD v3 files only support one platform and several
+ // architectures. It is possible to have more than one platform for TBD v3
+ // files, but the architectures don't apply to all
+ // platforms, specifically to filter out the i386 slice from
+ // platform macCatalyst.
+ TargetList synthesizeTargets(ArchitectureSet Architectures,
+ const PlatformSet &Platforms) {
+ TargetList Targets;
+
+ for (auto Platform : Platforms) {
+ Platform = mapToPlatformKind(Platform, Architectures.hasX86());
+
+ for (const auto &&Architecture : Architectures) {
+ if ((Architecture == AK_i386) &&
+ (Platform == PlatformKind::macCatalyst))
+ continue;
+
+ Targets.emplace_back(Architecture, Platform);
+ }
+ }
+ return Targets;
+ }
+
const InterfaceFile *denormalize(IO &IO) {
auto Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
assert(Ctx);
@@ -403,16 +616,16 @@ template <> struct MappingTraits<const InterfaceFile *> {
auto *File = new InterfaceFile;
File->setPath(Ctx->Path);
File->setFileType(Ctx->FileKind);
+ File->addTargets(synthesizeTargets(Architectures, Platforms));
for (auto &ID : UUIDs)
File->addUUID(ID.first, ID.second);
- File->setPlatform(Platform);
- File->setArchitectures(Architectures);
File->setInstallName(InstallName);
File->setCurrentVersion(CurrentVersion);
File->setCompatibilityVersion(CompatibilityVersion);
File->setSwiftABIVersion(SwiftABIVersion);
File->setObjCConstraint(ObjCConstraint);
- File->setParentUmbrella(ParentUmbrella);
+ for (const auto &Target : File->targets())
+ File->addParentUmbrella(Target, ParentUmbrella);
if (Ctx->FileKind == FileType::TBD_V1) {
File->setTwoLevelNamespace();
@@ -425,76 +638,80 @@ template <> struct MappingTraits<const InterfaceFile *> {
}
for (const auto &Section : Exports) {
- for (const auto &Library : Section.AllowableClients)
- File->addAllowableClient(Library, Section.Architectures);
- for (const auto &Library : Section.ReexportedLibraries)
- File->addReexportedLibrary(Library, Section.Architectures);
+ const auto Targets =
+ synthesizeTargets(Section.Architectures, Platforms);
+
+ for (const auto &Lib : Section.AllowableClients)
+ for (const auto &Target : Targets)
+ File->addAllowableClient(Lib, Target);
+
+ for (const auto &Lib : Section.ReexportedLibraries)
+ for (const auto &Target : Targets)
+ File->addReexportedLibrary(Lib, Target);
for (const auto &Symbol : Section.Symbols) {
if (Ctx->FileKind != FileType::TBD_V3 &&
Symbol.value.startswith("_OBJC_EHTYPE_$_"))
File->addSymbol(SymbolKind::ObjectiveCClassEHType,
- Symbol.value.drop_front(15), Section.Architectures);
+ Symbol.value.drop_front(15), Targets);
else
- File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
- Section.Architectures);
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets);
}
for (auto &Symbol : Section.Classes) {
auto Name = Symbol.value;
if (Ctx->FileKind != FileType::TBD_V3)
Name = Name.drop_front();
- File->addSymbol(SymbolKind::ObjectiveCClass, Name,
- Section.Architectures);
+ File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets);
}
for (auto &Symbol : Section.ClassEHs)
- File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol,
- Section.Architectures);
+ File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets);
for (auto &Symbol : Section.IVars) {
auto Name = Symbol.value;
if (Ctx->FileKind != FileType::TBD_V3)
Name = Name.drop_front();
File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name,
- Section.Architectures);
+ Targets);
}
for (auto &Symbol : Section.WeakDefSymbols)
- File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
- Section.Architectures, SymbolFlags::WeakDefined);
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
+ SymbolFlags::WeakDefined);
for (auto &Symbol : Section.TLVSymbols)
- File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
- Section.Architectures, SymbolFlags::ThreadLocalValue);
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
+ SymbolFlags::ThreadLocalValue);
}
for (const auto &Section : Undefineds) {
+ const auto Targets =
+ synthesizeTargets(Section.Architectures, Platforms);
for (auto &Symbol : Section.Symbols) {
if (Ctx->FileKind != FileType::TBD_V3 &&
Symbol.value.startswith("_OBJC_EHTYPE_$_"))
File->addSymbol(SymbolKind::ObjectiveCClassEHType,
- Symbol.value.drop_front(15), Section.Architectures,
+ Symbol.value.drop_front(15), Targets,
SymbolFlags::Undefined);
else
- File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
- Section.Architectures, SymbolFlags::Undefined);
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
+ SymbolFlags::Undefined);
}
for (auto &Symbol : Section.Classes) {
auto Name = Symbol.value;
if (Ctx->FileKind != FileType::TBD_V3)
Name = Name.drop_front();
- File->addSymbol(SymbolKind::ObjectiveCClass, Name,
- Section.Architectures, SymbolFlags::Undefined);
+ File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets,
+ SymbolFlags::Undefined);
}
for (auto &Symbol : Section.ClassEHs)
- File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol,
- Section.Architectures, SymbolFlags::Undefined);
+ File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets,
+ SymbolFlags::Undefined);
for (auto &Symbol : Section.IVars) {
auto Name = Symbol.value;
if (Ctx->FileKind != FileType::TBD_V3)
Name = Name.drop_front();
- File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name,
- Section.Architectures, SymbolFlags::Undefined);
+ File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, Targets,
+ SymbolFlags::Undefined);
}
for (auto &Symbol : Section.WeakRefSymbols)
- File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
- Section.Architectures,
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
SymbolFlags::Undefined | SymbolFlags::WeakReferenced);
}
@@ -513,7 +730,7 @@ template <> struct MappingTraits<const InterfaceFile *> {
std::vector<Architecture> Architectures;
std::vector<UUID> UUIDs;
- PlatformKind Platform{PlatformKind::unknown};
+ PlatformSet Platforms;
StringRef InstallName;
PackedVersion CurrentVersion;
PackedVersion CompatibilityVersion;
@@ -525,71 +742,336 @@ template <> struct MappingTraits<const InterfaceFile *> {
std::vector<UndefinedSection> Undefineds;
};
+ static void setFileTypeForInput(TextAPIContext *Ctx, IO &IO) {
+ if (IO.mapTag("!tapi-tbd", false))
+ Ctx->FileKind = FileType::TBD_V4;
+ else if (IO.mapTag("!tapi-tbd-v3", false))
+ Ctx->FileKind = FileType::TBD_V3;
+ else if (IO.mapTag("!tapi-tbd-v2", false))
+ Ctx->FileKind = FileType::TBD_V2;
+ else if (IO.mapTag("!tapi-tbd-v1", false) ||
+ IO.mapTag("tag:yaml.org,2002:map", false))
+ Ctx->FileKind = FileType::TBD_V1;
+ else {
+ Ctx->FileKind = FileType::Invalid;
+ return;
+ }
+ }
+
static void mapping(IO &IO, const InterfaceFile *&File) {
auto *Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
assert((!Ctx || !IO.outputting() ||
(Ctx && Ctx->FileKind != FileType::Invalid)) &&
"File type is not set in YAML context");
- MappingNormalization<NormalizedTBD, const InterfaceFile *> Keys(IO, File);
- // prope file type when reading.
if (!IO.outputting()) {
- if (IO.mapTag("!tapi-tbd-v2", false))
- Ctx->FileKind = FileType::TBD_V2;
- else if (IO.mapTag("!tapi-tbd-v3", false))
- Ctx->FileKind = FileType::TBD_V2;
- else if (IO.mapTag("!tapi-tbd-v1", false) ||
- IO.mapTag("tag:yaml.org,2002:map", false))
- Ctx->FileKind = FileType::TBD_V1;
- else {
+ setFileTypeForInput(Ctx, IO);
+ switch (Ctx->FileKind) {
+ default:
+ break;
+ case FileType::TBD_V4:
+ mapKeysToValuesV4(IO, File);
+ return;
+ case FileType::Invalid:
IO.setError("unsupported file type");
return;
}
- }
-
- // Set file tyoe when writing.
- if (IO.outputting()) {
+ } else {
+ // Set file type when writing.
switch (Ctx->FileKind) {
default:
llvm_unreachable("unexpected file type");
- case FileType::TBD_V1:
- // Don't write the tag into the .tbd file for TBD v1.
+ case FileType::TBD_V4:
+ mapKeysToValuesV4(IO, File);
+ return;
+ case FileType::TBD_V3:
+ IO.mapTag("!tapi-tbd-v3", true);
break;
case FileType::TBD_V2:
IO.mapTag("!tapi-tbd-v2", true);
break;
- case FileType::TBD_V3:
- IO.mapTag("!tapi-tbd-v3", true);
+ case FileType::TBD_V1:
+ // Don't write the tag into the .tbd file for TBD v1
break;
}
}
+ mapKeysToValues(Ctx->FileKind, IO, File);
+ }
+
+ using SectionList = std::vector<SymbolSection>;
+ struct NormalizedTBD_V4 {
+ explicit NormalizedTBD_V4(IO &IO) {}
+ NormalizedTBD_V4(IO &IO, const InterfaceFile *&File) {
+ auto Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
+ assert(Ctx);
+ TBDVersion = Ctx->FileKind >> 1;
+ Targets.insert(Targets.begin(), File->targets().begin(),
+ File->targets().end());
+ for (const auto &IT : File->uuids())
+ UUIDs.emplace_back(IT.first, IT.second);
+ InstallName = File->getInstallName();
+ CurrentVersion = File->getCurrentVersion();
+ CompatibilityVersion = File->getCompatibilityVersion();
+ SwiftABIVersion = File->getSwiftABIVersion();
+
+ Flags = TBDFlags::None;
+ if (!File->isApplicationExtensionSafe())
+ Flags |= TBDFlags::NotApplicationExtensionSafe;
+
+ if (!File->isTwoLevelNamespace())
+ Flags |= TBDFlags::FlatNamespace;
+
+ if (File->isInstallAPI())
+ Flags |= TBDFlags::InstallAPI;
+
+ {
+ std::map<std::string, TargetList> valueToTargetList;
+ for (const auto &it : File->umbrellas())
+ valueToTargetList[it.second].emplace_back(it.first);
+
+ for (const auto &it : valueToTargetList) {
+ UmbrellaSection CurrentSection;
+ CurrentSection.Targets.insert(CurrentSection.Targets.begin(),
+ it.second.begin(), it.second.end());
+ CurrentSection.Umbrella = it.first;
+ ParentUmbrellas.emplace_back(std::move(CurrentSection));
+ }
+ }
+
+ assignTargetsToLibrary(File->allowableClients(), AllowableClients);
+ assignTargetsToLibrary(File->reexportedLibraries(), ReexportedLibraries);
+
+ auto handleSymbols =
+ [](SectionList &CurrentSections,
+ InterfaceFile::const_filtered_symbol_range Symbols,
+ std::function<bool(const Symbol *)> Pred) {
+ std::set<TargetList> TargetSet;
+ std::map<const Symbol *, TargetList> SymbolToTargetList;
+ for (const auto *Symbol : Symbols) {
+ if (!Pred(Symbol))
+ continue;
+ TargetList Targets(Symbol->targets());
+ SymbolToTargetList[Symbol] = Targets;
+ TargetSet.emplace(std::move(Targets));
+ }
+ for (const auto &TargetIDs : TargetSet) {
+ SymbolSection CurrentSection;
+ CurrentSection.Targets.insert(CurrentSection.Targets.begin(),
+ TargetIDs.begin(), TargetIDs.end());
+
+ for (const auto &IT : SymbolToTargetList) {
+ if (IT.second != TargetIDs)
+ continue;
+
+ const auto *Symbol = IT.first;
+ switch (Symbol->getKind()) {
+ case SymbolKind::GlobalSymbol:
+ if (Symbol->isWeakDefined())
+ CurrentSection.WeakSymbols.emplace_back(Symbol->getName());
+ else if (Symbol->isThreadLocalValue())
+ CurrentSection.TlvSymbols.emplace_back(Symbol->getName());
+ else
+ CurrentSection.Symbols.emplace_back(Symbol->getName());
+ break;
+ case SymbolKind::ObjectiveCClass:
+ CurrentSection.Classes.emplace_back(Symbol->getName());
+ break;
+ case SymbolKind::ObjectiveCClassEHType:
+ CurrentSection.ClassEHs.emplace_back(Symbol->getName());
+ break;
+ case SymbolKind::ObjectiveCInstanceVariable:
+ CurrentSection.Ivars.emplace_back(Symbol->getName());
+ break;
+ }
+ }
+ sort(CurrentSection.Symbols);
+ sort(CurrentSection.Classes);
+ sort(CurrentSection.ClassEHs);
+ sort(CurrentSection.Ivars);
+ sort(CurrentSection.WeakSymbols);
+ sort(CurrentSection.TlvSymbols);
+ CurrentSections.emplace_back(std::move(CurrentSection));
+ }
+ };
+
+ handleSymbols(Exports, File->exports(), [](const Symbol *Symbol) {
+ return !Symbol->isReexported();
+ });
+ handleSymbols(Reexports, File->exports(), [](const Symbol *Symbol) {
+ return Symbol->isReexported();
+ });
+ handleSymbols(Undefineds, File->undefineds(),
+ [](const Symbol *Symbol) { return true; });
+ }
+
+ const InterfaceFile *denormalize(IO &IO) {
+ auto Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
+ assert(Ctx);
+
+ auto *File = new InterfaceFile;
+ File->setPath(Ctx->Path);
+ File->setFileType(Ctx->FileKind);
+ for (auto &id : UUIDs)
+ File->addUUID(id.TargetID, id.Value);
+ File->addTargets(Targets);
+ File->setInstallName(InstallName);
+ File->setCurrentVersion(CurrentVersion);
+ File->setCompatibilityVersion(CompatibilityVersion);
+ File->setSwiftABIVersion(SwiftABIVersion);
+ for (const auto &CurrentSection : ParentUmbrellas)
+ for (const auto &target : CurrentSection.Targets)
+ File->addParentUmbrella(target, CurrentSection.Umbrella);
+ File->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace));
+ File->setApplicationExtensionSafe(
+ !(Flags & TBDFlags::NotApplicationExtensionSafe));
+ File->setInstallAPI(Flags & TBDFlags::InstallAPI);
+
+ for (const auto &CurrentSection : AllowableClients) {
+ for (const auto &lib : CurrentSection.Values)
+ for (const auto &Target : CurrentSection.Targets)
+ File->addAllowableClient(lib, Target);
+ }
+
+ for (const auto &CurrentSection : ReexportedLibraries) {
+ for (const auto &Lib : CurrentSection.Values)
+ for (const auto &Target : CurrentSection.Targets)
+ File->addReexportedLibrary(Lib, Target);
+ }
+
+ auto handleSymbols = [File](const SectionList &CurrentSections,
+ SymbolFlags Flag = SymbolFlags::None) {
+ for (const auto &CurrentSection : CurrentSections) {
+ for (auto &sym : CurrentSection.Symbols)
+ File->addSymbol(SymbolKind::GlobalSymbol, sym,
+ CurrentSection.Targets, Flag);
+
+ for (auto &sym : CurrentSection.Classes)
+ File->addSymbol(SymbolKind::ObjectiveCClass, sym,
+ CurrentSection.Targets);
+
+ for (auto &sym : CurrentSection.ClassEHs)
+ File->addSymbol(SymbolKind::ObjectiveCClassEHType, sym,
+ CurrentSection.Targets);
+
+ for (auto &sym : CurrentSection.Ivars)
+ File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, sym,
+ CurrentSection.Targets);
+
+ for (auto &sym : CurrentSection.WeakSymbols)
+ File->addSymbol(SymbolKind::GlobalSymbol, sym,
+ CurrentSection.Targets);
+ for (auto &sym : CurrentSection.TlvSymbols)
+ File->addSymbol(SymbolKind::GlobalSymbol, sym,
+ CurrentSection.Targets,
+ SymbolFlags::ThreadLocalValue);
+ }
+ };
+
+ handleSymbols(Exports);
+ handleSymbols(Reexports, SymbolFlags::Rexported);
+ handleSymbols(Undefineds, SymbolFlags::Undefined);
+
+ return File;
+ }
+
+ unsigned TBDVersion;
+ std::vector<UUIDv4> UUIDs;
+ TargetList Targets;
+ StringRef InstallName;
+ PackedVersion CurrentVersion;
+ PackedVersion CompatibilityVersion;
+ SwiftVersion SwiftABIVersion{0};
+ std::vector<MetadataSection> AllowableClients;
+ std::vector<MetadataSection> ReexportedLibraries;
+ TBDFlags Flags{TBDFlags::None};
+ std::vector<UmbrellaSection> ParentUmbrellas;
+ SectionList Exports;
+ SectionList Reexports;
+ SectionList Undefineds;
+
+ private:
+ void assignTargetsToLibrary(const std::vector<InterfaceFileRef> &Libraries,
+ std::vector<MetadataSection> &Section) {
+ std::set<TargetList> targetSet;
+ std::map<const InterfaceFileRef *, TargetList> valueToTargetList;
+ for (const auto &library : Libraries) {
+ TargetList targets(library.targets());
+ valueToTargetList[&library] = targets;
+ targetSet.emplace(std::move(targets));
+ }
+
+ for (const auto &targets : targetSet) {
+ MetadataSection CurrentSection;
+ CurrentSection.Targets.insert(CurrentSection.Targets.begin(),
+ targets.begin(), targets.end());
+
+ for (const auto &it : valueToTargetList) {
+ if (it.second != targets)
+ continue;
+
+ CurrentSection.Values.emplace_back(it.first->getInstallName());
+ }
+ llvm::sort(CurrentSection.Values);
+ Section.emplace_back(std::move(CurrentSection));
+ }
+ }
+ };
+ static void mapKeysToValues(FileType FileKind, IO &IO,
+ const InterfaceFile *&File) {
+ MappingNormalization<NormalizedTBD, const InterfaceFile *> Keys(IO, File);
IO.mapRequired("archs", Keys->Architectures);
- if (Ctx->FileKind != FileType::TBD_V1)
+ if (FileKind != FileType::TBD_V1)
IO.mapOptional("uuids", Keys->UUIDs);
- IO.mapRequired("platform", Keys->Platform);
- if (Ctx->FileKind != FileType::TBD_V1)
+ IO.mapRequired("platform", Keys->Platforms);
+ if (FileKind != FileType::TBD_V1)
IO.mapOptional("flags", Keys->Flags, TBDFlags::None);
IO.mapRequired("install-name", Keys->InstallName);
IO.mapOptional("current-version", Keys->CurrentVersion,
PackedVersion(1, 0, 0));
IO.mapOptional("compatibility-version", Keys->CompatibilityVersion,
PackedVersion(1, 0, 0));
- if (Ctx->FileKind != FileType::TBD_V3)
+ if (FileKind != FileType::TBD_V3)
IO.mapOptional("swift-version", Keys->SwiftABIVersion, SwiftVersion(0));
else
IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion,
SwiftVersion(0));
IO.mapOptional("objc-constraint", Keys->ObjCConstraint,
- (Ctx->FileKind == FileType::TBD_V1)
+ (FileKind == FileType::TBD_V1)
? ObjCConstraintType::None
: ObjCConstraintType::Retain_Release);
- if (Ctx->FileKind != FileType::TBD_V1)
+ if (FileKind != FileType::TBD_V1)
IO.mapOptional("parent-umbrella", Keys->ParentUmbrella, StringRef());
IO.mapOptional("exports", Keys->Exports);
- if (Ctx->FileKind != FileType::TBD_V1)
+ if (FileKind != FileType::TBD_V1)
IO.mapOptional("undefineds", Keys->Undefineds);
}
+
+ static void mapKeysToValuesV4(IO &IO, const InterfaceFile *&File) {
+ MappingNormalization<NormalizedTBD_V4, const InterfaceFile *> Keys(IO,
+ File);
+ IO.mapTag("!tapi-tbd", true);
+ IO.mapRequired("tbd-version", Keys->TBDVersion);
+ IO.mapRequired("targets", Keys->Targets);
+ IO.mapOptional("uuids", Keys->UUIDs);
+ IO.mapOptional("flags", Keys->Flags, TBDFlags::None);
+ IO.mapRequired("install-name", Keys->InstallName);
+ IO.mapOptional("current-version", Keys->CurrentVersion,
+ PackedVersion(1, 0, 0));
+ IO.mapOptional("compatibility-version", Keys->CompatibilityVersion,
+ PackedVersion(1, 0, 0));
+ IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion, SwiftVersion(0));
+ IO.mapOptional("parent-umbrella", Keys->ParentUmbrellas);
+ auto OptionKind = MetadataSection::Option::Clients;
+ IO.mapOptionalWithContext("allowable-clients", Keys->AllowableClients,
+ OptionKind);
+ OptionKind = MetadataSection::Option::Libraries;
+ IO.mapOptionalWithContext("reexported-libraries", Keys->ReexportedLibraries,
+ OptionKind);
+ IO.mapOptional("exports", Keys->Exports);
+ IO.mapOptional("reexports", Keys->Reexports);
+ IO.mapOptional("undefineds", Keys->Undefineds);
+ }
};
template <>
@@ -623,15 +1105,17 @@ static void DiagHandler(const SMDiagnostic &Diag, void *Context) {
}
Expected<std::unique_ptr<InterfaceFile>>
-TextAPIReader::get(std::unique_ptr<MemoryBuffer> InputBuffer) {
+TextAPIReader::get(MemoryBufferRef InputBuffer) {
TextAPIContext Ctx;
- Ctx.Path = InputBuffer->getBufferIdentifier();
- yaml::Input YAMLIn(InputBuffer->getBuffer(), &Ctx, DiagHandler, &Ctx);
+ Ctx.Path = InputBuffer.getBufferIdentifier();
+ yaml::Input YAMLIn(InputBuffer.getBuffer(), &Ctx, DiagHandler, &Ctx);
// Fill vector with interface file objects created by parsing the YAML file.
std::vector<const InterfaceFile *> Files;
YAMLIn >> Files;
+ // YAMLIn dynamically allocates for Interface file and in case of error,
+ // memory leak will occur unless wrapped around unique_ptr
auto File = std::unique_ptr<InterfaceFile>(
const_cast<InterfaceFile *>(Files.front()));
diff --git a/lib/TextAPI/MachO/TextStubCommon.cpp b/lib/TextAPI/MachO/TextStubCommon.cpp
index 00382cd24573..183c5d5a93b0 100644
--- a/lib/TextAPI/MachO/TextStubCommon.cpp
+++ b/lib/TextAPI/MachO/TextStubCommon.cpp
@@ -41,9 +41,21 @@ void ScalarEnumerationTraits<ObjCConstraintType>::enumeration(
IO.enumCase(Constraint, "gc", ObjCConstraintType::GC);
}
-void ScalarTraits<PlatformKind>::output(const PlatformKind &Value, void *,
- raw_ostream &OS) {
- switch (Value) {
+void ScalarTraits<PlatformSet>::output(const PlatformSet &Values, void *IO,
+ raw_ostream &OS) {
+
+ const auto *Ctx = reinterpret_cast<TextAPIContext *>(IO);
+ assert((!Ctx || Ctx->FileKind != FileType::Invalid) &&
+ "File type is not set in context");
+
+ if (Ctx && Ctx->FileKind == TBD_V3 && Values.count(PlatformKind::macOS) &&
+ Values.count(PlatformKind::macCatalyst)) {
+ OS << "zippered";
+ return;
+ }
+
+ assert(Values.size() == 1U);
+ switch (*Values.begin()) {
default:
llvm_unreachable("unexpected platform");
break;
@@ -64,21 +76,44 @@ void ScalarTraits<PlatformKind>::output(const PlatformKind &Value, void *,
break;
}
}
-StringRef ScalarTraits<PlatformKind>::input(StringRef Scalar, void *,
- PlatformKind &Value) {
- Value = StringSwitch<PlatformKind>(Scalar)
- .Case("macosx", PlatformKind::macOS)
- .Case("ios", PlatformKind::iOS)
- .Case("watchos", PlatformKind::watchOS)
- .Case("tvos", PlatformKind::tvOS)
- .Case("bridgeos", PlatformKind::bridgeOS)
- .Default(PlatformKind::unknown);
-
- if (Value == PlatformKind::unknown)
+
+StringRef ScalarTraits<PlatformSet>::input(StringRef Scalar, void *IO,
+ PlatformSet &Values) {
+ const auto *Ctx = reinterpret_cast<TextAPIContext *>(IO);
+ assert((!Ctx || Ctx->FileKind != FileType::Invalid) &&
+ "File type is not set in context");
+
+ if (Scalar == "zippered") {
+ if (Ctx && Ctx->FileKind == FileType::TBD_V3) {
+ Values.insert(PlatformKind::macOS);
+ Values.insert(PlatformKind::macCatalyst);
+ return {};
+ }
+ return "invalid platform";
+ }
+
+ auto Platform = StringSwitch<PlatformKind>(Scalar)
+ .Case("unknown", PlatformKind::unknown)
+ .Case("macosx", PlatformKind::macOS)
+ .Case("ios", PlatformKind::iOS)
+ .Case("watchos", PlatformKind::watchOS)
+ .Case("tvos", PlatformKind::tvOS)
+ .Case("bridgeos", PlatformKind::bridgeOS)
+ .Case("iosmac", PlatformKind::macCatalyst)
+ .Default(PlatformKind::unknown);
+
+ if (Platform == PlatformKind::macCatalyst)
+ if (Ctx && Ctx->FileKind != FileType::TBD_V3)
+ return "invalid platform";
+
+ if (Platform == PlatformKind::unknown)
return "unknown platform";
+
+ Values.insert(Platform);
return {};
}
-QuotingType ScalarTraits<PlatformKind>::mustQuote(StringRef) {
+
+QuotingType ScalarTraits<PlatformSet>::mustQuote(StringRef) {
return QuotingType::None;
}
@@ -137,14 +172,25 @@ void ScalarTraits<SwiftVersion>::output(const SwiftVersion &Value, void *,
break;
}
}
-StringRef ScalarTraits<SwiftVersion>::input(StringRef Scalar, void *,
+StringRef ScalarTraits<SwiftVersion>::input(StringRef Scalar, void *IO,
SwiftVersion &Value) {
- Value = StringSwitch<SwiftVersion>(Scalar)
- .Case("1.0", 1)
- .Case("1.1", 2)
- .Case("2.0", 3)
- .Case("3.0", 4)
- .Default(0);
+ const auto *Ctx = reinterpret_cast<TextAPIContext *>(IO);
+ assert((!Ctx || Ctx->FileKind != FileType::Invalid) &&
+ "File type is not set in context");
+
+ if (Ctx->FileKind == FileType::TBD_V4) {
+ if (Scalar.getAsInteger(10, Value))
+ return "invalid Swift ABI version.";
+ return {};
+ } else {
+ Value = StringSwitch<SwiftVersion>(Scalar)
+ .Case("1.0", 1)
+ .Case("1.1", 2)
+ .Case("2.0", 3)
+ .Case("3.0", 4)
+ .Default(0);
+ }
+
if (Value != SwiftVersion(0))
return {};
@@ -166,10 +212,11 @@ StringRef ScalarTraits<UUID>::input(StringRef Scalar, void *, UUID &Value) {
auto UUID = Split.second.trim();
if (UUID.empty())
return "invalid uuid string pair";
- Value.first = getArchitectureFromName(Arch);
Value.second = UUID;
+ Value.first = Target{getArchitectureFromName(Arch), PlatformKind::unknown};
return {};
}
+
QuotingType ScalarTraits<UUID>::mustQuote(StringRef) {
return QuotingType::Single;
}
diff --git a/lib/TextAPI/MachO/TextStubCommon.h b/lib/TextAPI/MachO/TextStubCommon.h
index c4dd1075b1c8..a558cbcec9fb 100644
--- a/lib/TextAPI/MachO/TextStubCommon.h
+++ b/lib/TextAPI/MachO/TextStubCommon.h
@@ -21,7 +21,7 @@
#include "llvm/TextAPI/MachO/InterfaceFile.h"
#include "llvm/TextAPI/MachO/PackedVersion.h"
-using UUID = std::pair<llvm::MachO::Architecture, std::string>;
+using UUID = std::pair<llvm::MachO::Target, std::string>;
LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, FlowStringRef)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, SwiftVersion)
@@ -41,9 +41,9 @@ template <> struct ScalarEnumerationTraits<MachO::ObjCConstraintType> {
static void enumeration(IO &, MachO::ObjCConstraintType &);
};
-template <> struct ScalarTraits<MachO::PlatformKind> {
- static void output(const MachO::PlatformKind &, void *, raw_ostream &);
- static StringRef input(StringRef, void *, MachO::PlatformKind &);
+template <> struct ScalarTraits<MachO::PlatformSet> {
+ static void output(const MachO::PlatformSet &, void *, raw_ostream &);
+ static StringRef input(StringRef, void *, MachO::PlatformSet &);
static QuotingType mustQuote(StringRef);
};
diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index 0b406cc531a4..19f253be7952 100644
--- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -74,13 +74,6 @@ static MachineTypes getEmulation(StringRef S) {
.Default(IMAGE_FILE_MACHINE_UNKNOWN);
}
-static std::string getImplibPath(StringRef Path) {
- SmallString<128> Out = StringRef("lib");
- Out.append(Path);
- sys::path::replace_extension(Out, ".a");
- return Out.str();
-}
-
int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
DllOptTable Table;
unsigned MissingIndex;
@@ -149,13 +142,23 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
Def->OutputFile = Arg->getValue();
if (Def->OutputFile.empty()) {
- llvm::errs() << "no output file specified\n";
+ llvm::errs() << "no DLL name specified\n";
return 1;
}
std::string Path = Args.getLastArgValue(OPT_l);
- if (Path.empty())
- Path = getImplibPath(Def->OutputFile);
+
+ // If ExtName is set (if the "ExtName = Name" syntax was used), overwrite
+ // Name with ExtName and clear ExtName. When only creating an import
+ // library and not linking, the internal name is irrelevant. This avoids
+ // cases where writeImportLibrary tries to transplant decoration from
+ // symbol decoration onto ExtName.
+ for (COFFShortExport& E : Def->Exports) {
+ if (!E.ExtName.empty()) {
+ E.Name = E.ExtName;
+ E.ExtName.clear();
+ }
+ }
if (Machine == IMAGE_FILE_MACHINE_I386 && Args.getLastArg(OPT_k)) {
for (COFFShortExport& E : Def->Exports) {
@@ -174,7 +177,8 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
}
}
- if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true))
+ if (!Path.empty() &&
+ writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true))
return 1;
return 0;
}
diff --git a/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 18ab6637305e..286191abff20 100644
--- a/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -13,6 +13,7 @@
#include "llvm/ToolDrivers/llvm-lib/LibDriver.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Bitcode/BitcodeReader.h"
@@ -141,6 +142,125 @@ static void doList(opt::InputArgList& Args) {
fatalOpenError(std::move(Err), B->getBufferIdentifier());
}
+static COFF::MachineTypes getCOFFFileMachine(MemoryBufferRef MB) {
+ std::error_code EC;
+ object::COFFObjectFile Obj(MB, EC);
+ if (EC) {
+ llvm::errs() << MB.getBufferIdentifier()
+ << ": failed to open: " << EC.message() << '\n';
+ exit(1);
+ }
+
+ uint16_t Machine = Obj.getMachine();
+ if (Machine != COFF::IMAGE_FILE_MACHINE_I386 &&
+ Machine != COFF::IMAGE_FILE_MACHINE_AMD64 &&
+ Machine != COFF::IMAGE_FILE_MACHINE_ARMNT &&
+ Machine != COFF::IMAGE_FILE_MACHINE_ARM64) {
+ llvm::errs() << MB.getBufferIdentifier() << ": unknown machine: " << Machine
+ << '\n';
+ exit(1);
+ }
+
+ return static_cast<COFF::MachineTypes>(Machine);
+}
+
+static COFF::MachineTypes getBitcodeFileMachine(MemoryBufferRef MB) {
+ Expected<std::string> TripleStr = getBitcodeTargetTriple(MB);
+ if (!TripleStr) {
+ llvm::errs() << MB.getBufferIdentifier()
+ << ": failed to get target triple from bitcode\n";
+ exit(1);
+ }
+
+ switch (Triple(*TripleStr).getArch()) {
+ case Triple::x86:
+ return COFF::IMAGE_FILE_MACHINE_I386;
+ case Triple::x86_64:
+ return COFF::IMAGE_FILE_MACHINE_AMD64;
+ case Triple::arm:
+ return COFF::IMAGE_FILE_MACHINE_ARMNT;
+ case Triple::aarch64:
+ return COFF::IMAGE_FILE_MACHINE_ARM64;
+ default:
+ llvm::errs() << MB.getBufferIdentifier()
+ << ": unknown arch in target triple " << *TripleStr << '\n';
+ exit(1);
+ }
+}
+
+static void appendFile(std::vector<NewArchiveMember> &Members,
+ COFF::MachineTypes &LibMachine,
+ std::string &LibMachineSource, MemoryBufferRef MB) {
+ file_magic Magic = identify_magic(MB.getBuffer());
+
+ if (Magic != file_magic::coff_object && Magic != file_magic::bitcode &&
+ Magic != file_magic::archive && Magic != file_magic::windows_resource) {
+ llvm::errs() << MB.getBufferIdentifier()
+ << ": not a COFF object, bitcode, archive or resource file\n";
+ exit(1);
+ }
+
+ // If a user attempts to add an archive to another archive, llvm-lib doesn't
+ // handle the first archive file as a single file. Instead, it extracts all
+ // members from the archive and add them to the second archive. This beahvior
+ // is for compatibility with Microsoft's lib command.
+ if (Magic == file_magic::archive) {
+ Error Err = Error::success();
+ object::Archive Archive(MB, Err);
+ fatalOpenError(std::move(Err), MB.getBufferIdentifier());
+
+ for (auto &C : Archive.children(Err)) {
+ Expected<MemoryBufferRef> ChildMB = C.getMemoryBufferRef();
+ if (!ChildMB) {
+ handleAllErrors(ChildMB.takeError(), [&](const ErrorInfoBase &EIB) {
+ llvm::errs() << MB.getBufferIdentifier() << ": " << EIB.message()
+ << "\n";
+ });
+ exit(1);
+ }
+
+ appendFile(Members, LibMachine, LibMachineSource, *ChildMB);
+ }
+
+ fatalOpenError(std::move(Err), MB.getBufferIdentifier());
+ return;
+ }
+
+ // Check that all input files have the same machine type.
+ // Mixing normal objects and LTO bitcode files is fine as long as they
+ // have the same machine type.
+ // Doing this here duplicates the header parsing work that writeArchive()
+ // below does, but it's not a lot of work and it's a bit awkward to do
+ // in writeArchive() which needs to support many tools, can't assume the
+ // input is COFF, and doesn't have a good way to report errors.
+ if (Magic == file_magic::coff_object || Magic == file_magic::bitcode) {
+ COFF::MachineTypes FileMachine = (Magic == file_magic::coff_object)
+ ? getCOFFFileMachine(MB)
+ : getBitcodeFileMachine(MB);
+
+ // FIXME: Once lld-link rejects multiple resource .obj files:
+ // Call convertResToCOFF() on .res files and add the resulting
+ // COFF file to the .lib output instead of adding the .res file, and remove
+ // this check. See PR42180.
+ if (FileMachine != COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+ if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+ LibMachine = FileMachine;
+ LibMachineSource =
+ (" (inferred from earlier file '" + MB.getBufferIdentifier() + "')")
+ .str();
+ } else if (LibMachine != FileMachine) {
+ llvm::errs() << MB.getBufferIdentifier() << ": file machine type "
+ << machineToStr(FileMachine)
+ << " conflicts with library machine type "
+ << machineToStr(LibMachine) << LibMachineSource << '\n';
+ exit(1);
+ }
+ }
+ }
+
+ Members.emplace_back(MB);
+}
+
int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
BumpPtrAllocator Alloc;
StringSaver Saver(Alloc);
@@ -195,104 +315,40 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
std::string(" (from '/machine:") + Arg->getValue() + "' flag)";
}
- // Create a NewArchiveMember for each input file.
+ std::vector<std::unique_ptr<MemoryBuffer>> MBs;
+ StringSet<> Seen;
std::vector<NewArchiveMember> Members;
+
+ // Create a NewArchiveMember for each input file.
for (auto *Arg : Args.filtered(OPT_INPUT)) {
+ // Find a file
std::string Path = findInputFile(Arg->getValue(), SearchPaths);
if (Path.empty()) {
llvm::errs() << Arg->getValue() << ": no such file or directory\n";
return 1;
}
- Expected<NewArchiveMember> MOrErr =
- NewArchiveMember::getFile(Saver.save(Path), /*Deterministic=*/true);
- if (!MOrErr) {
- handleAllErrors(MOrErr.takeError(), [&](const ErrorInfoBase &EIB) {
- llvm::errs() << Arg->getValue() << ": " << EIB.message() << "\n";
- });
- return 1;
- }
-
- file_magic Magic = identify_magic(MOrErr->Buf->getBuffer());
- if (Magic != file_magic::coff_object && Magic != file_magic::bitcode &&
- Magic != file_magic::windows_resource) {
- llvm::errs() << Arg->getValue()
- << ": not a COFF object, bitcode or resource file\n";
- return 1;
- }
-
- // Check that all input files have the same machine type.
- // Mixing normal objects and LTO bitcode files is fine as long as they
- // have the same machine type.
- // Doing this here duplicates the header parsing work that writeArchive()
- // below does, but it's not a lot of work and it's a bit awkward to do
- // in writeArchive() which needs to support many tools, can't assume the
- // input is COFF, and doesn't have a good way to report errors.
- COFF::MachineTypes FileMachine = COFF::IMAGE_FILE_MACHINE_UNKNOWN;
- if (Magic == file_magic::coff_object) {
- std::error_code EC;
- object::COFFObjectFile Obj(*MOrErr->Buf, EC);
- if (EC) {
- llvm::errs() << Arg->getValue() << ": failed to open: " << EC.message()
- << '\n';
- return 1;
- }
- uint16_t Machine = Obj.getMachine();
- if (Machine != COFF::IMAGE_FILE_MACHINE_I386 &&
- Machine != COFF::IMAGE_FILE_MACHINE_AMD64 &&
- Machine != COFF::IMAGE_FILE_MACHINE_ARMNT &&
- Machine != COFF::IMAGE_FILE_MACHINE_ARM64) {
- llvm::errs() << Arg->getValue() << ": unknown machine: " << Machine
- << '\n';
- return 1;
- }
- FileMachine = static_cast<COFF::MachineTypes>(Machine);
- } else if (Magic == file_magic::bitcode) {
- Expected<std::string> TripleStr = getBitcodeTargetTriple(*MOrErr->Buf);
- if (!TripleStr) {
- llvm::errs() << Arg->getValue()
- << ": failed to get target triple from bitcode\n";
- return 1;
- }
- switch (Triple(*TripleStr).getArch()) {
- case Triple::x86:
- FileMachine = COFF::IMAGE_FILE_MACHINE_I386;
- break;
- case Triple::x86_64:
- FileMachine = COFF::IMAGE_FILE_MACHINE_AMD64;
- break;
- case Triple::arm:
- FileMachine = COFF::IMAGE_FILE_MACHINE_ARMNT;
- break;
- case Triple::aarch64:
- FileMachine = COFF::IMAGE_FILE_MACHINE_ARM64;
- break;
- default:
- llvm::errs() << Arg->getValue() << ": unknown arch in target triple "
- << *TripleStr << '\n';
- return 1;
- }
- }
-
- // FIXME: Once lld-link rejects multiple resource .obj files:
- // Call convertResToCOFF() on .res files and add the resulting
- // COFF file to the .lib output instead of adding the .res file, and remove
- // this check. See PR42180.
- if (FileMachine != COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
- if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
- LibMachine = FileMachine;
- LibMachineSource = std::string(" (inferred from earlier file '") +
- Arg->getValue() + "')";
- } else if (LibMachine != FileMachine) {
- llvm::errs() << Arg->getValue() << ": file machine type "
- << machineToStr(FileMachine)
- << " conflicts with library machine type "
- << machineToStr(LibMachine) << LibMachineSource << '\n';
- return 1;
- }
- }
-
- Members.emplace_back(std::move(*MOrErr));
+ // Input files are uniquified by pathname. If you specify the exact same
+ // path more than once, all but the first one are ignored.
+ //
+ // Note that there's a loophole in the rule; you can prepend `.\` or
+ // something like that to a path to make it look different, and they are
+ // handled as if they were different files. This behavior is compatible with
+ // Microsoft lib.exe.
+ if (!Seen.insert(Path).second)
+ continue;
+
+ // Open a file.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MOrErr =
+ MemoryBuffer::getFile(Path, -1, false);
+ fatalOpenError(errorCodeToError(MOrErr.getError()), Path);
+ MemoryBufferRef MBRef = (*MOrErr)->getMemBufferRef();
+
+ // Append a file.
+ appendFile(Members, LibMachine, LibMachineSource, MBRef);
+
+ // Take the ownership of the file buffer to keep the file open.
+ MBs.push_back(std::move(*MOrErr));
}
// Create an archive file.
diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 06222d7e7e44..a24de3ca213f 100644
--- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -121,14 +121,13 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) {
BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1);
BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1);
Instruction *TermI = GuardBB->getTerminator();
- BasicBlock *TrueBB, *FalseBB;
ICmpInst::Predicate Pred;
- if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), TrueBB,
- FalseBB)))
+ BasicBlock *PhiBB = Phi.getParent();
+ if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()),
+ m_SpecificBB(PhiBB), m_SpecificBB(RotBB))))
return false;
- BasicBlock *PhiBB = Phi.getParent();
- if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB)
+ if (Pred != CmpInst::ICMP_EQ)
return false;
// We matched a variation of this IR pattern:
@@ -251,6 +250,72 @@ static bool foldAnyOrAllBitsSet(Instruction &I) {
return true;
}
+// Try to recognize below function as popcount intrinsic.
+// This is the "best" algorithm from
+// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+// Also used in TargetLowering::expandCTPOP().
+//
+// int popcount(unsigned int i) {
+// i = i - ((i >> 1) & 0x55555555);
+// i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
+// i = ((i + (i >> 4)) & 0x0F0F0F0F);
+// return (i * 0x01010101) >> 24;
+// }
+static bool tryToRecognizePopCount(Instruction &I) {
+ if (I.getOpcode() != Instruction::LShr)
+ return false;
+
+ Type *Ty = I.getType();
+ if (!Ty->isIntOrIntVectorTy())
+ return false;
+
+ unsigned Len = Ty->getScalarSizeInBits();
+ // FIXME: fix Len == 8 and other irregular type lengths.
+ if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
+ return false;
+
+ APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
+ APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
+ APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
+ APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));
+ APInt MaskShift = APInt(Len, Len - 8);
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *MulOp0;
+ // Matching "(i * 0x01010101...) >> 24".
+ if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
+ match(Op1, m_SpecificInt(MaskShift))) {
+ Value *ShiftOp0;
+ // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
+ if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
+ m_Deferred(ShiftOp0)),
+ m_SpecificInt(Mask0F)))) {
+ Value *AndOp0;
+ // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".
+ if (match(ShiftOp0,
+ m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),
+ m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)),
+ m_SpecificInt(Mask33))))) {
+ Value *Root, *SubOp1;
+ // Matching "i - ((i >> 1) & 0x55555555...)".
+ if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
+ match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
+ m_SpecificInt(Mask55)))) {
+ LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+ IRBuilder<> Builder(&I);
+ Function *Func = Intrinsic::getDeclaration(
+ I.getModule(), Intrinsic::ctpop, I.getType());
+ I.replaceAllUsesWith(Builder.CreateCall(Func, {Root}));
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
@@ -269,6 +334,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedRotateToFunnelShift(I);
+ MadeChange |= tryToRecognizePopCount(I);
}
}
@@ -303,7 +369,7 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
}
bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return runImpl(F, TLI, DT);
}
diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp
index 1fb0a114d0c7..c3e05577f044 100644
--- a/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -73,6 +73,8 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
II->replaceAllUsesWith(ConstantInt::getTrue(Context));
break;
case Intrinsic::coro_id:
+ case Intrinsic::coro_id_retcon:
+ case Intrinsic::coro_id_retcon_once:
II->replaceAllUsesWith(ConstantTokenNone::get(Context));
break;
case Intrinsic::coro_subfn_addr:
@@ -111,8 +113,9 @@ struct CoroCleanup : FunctionPass {
bool doInitialization(Module &M) override {
if (coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin",
"llvm.coro.subfn.addr", "llvm.coro.free",
- "llvm.coro.id"}))
- L = llvm::make_unique<Lowerer>(M);
+ "llvm.coro.id", "llvm.coro.id.retcon",
+ "llvm.coro.id.retcon.once"}))
+ L = std::make_unique<Lowerer>(M);
return false;
}
diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp
index 692697d6f32e..55993d33ee4e 100644
--- a/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -91,13 +91,14 @@ void Lowerer::lowerCoroDone(IntrinsicInst *II) {
Value *Operand = II->getArgOperand(0);
// ResumeFnAddr is the first pointer sized element of the coroutine frame.
+ static_assert(coro::Shape::SwitchFieldIndex::Resume == 0,
+ "resume function not at offset zero");
auto *FrameTy = Int8Ptr;
PointerType *FramePtrTy = FrameTy->getPointerTo();
Builder.SetInsertPoint(II);
auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy);
- auto *Gep = Builder.CreateConstInBoundsGEP1_32(FrameTy, BCI, 0);
- auto *Load = Builder.CreateLoad(FrameTy, Gep);
+ auto *Load = Builder.CreateLoad(BCI);
auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
II->replaceAllUsesWith(Cond);
@@ -189,6 +190,10 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
}
}
break;
+ case Intrinsic::coro_id_retcon:
+ case Intrinsic::coro_id_retcon_once:
+ F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
+ break;
case Intrinsic::coro_resume:
lowerResumeOrDestroy(CS, CoroSubFnInst::ResumeIndex);
break;
@@ -231,11 +236,18 @@ struct CoroEarly : public FunctionPass {
// This pass has work to do only if we find intrinsics we are going to lower
// in the module.
bool doInitialization(Module &M) override {
- if (coro::declaresIntrinsics(
- M, {"llvm.coro.id", "llvm.coro.destroy", "llvm.coro.done",
- "llvm.coro.end", "llvm.coro.noop", "llvm.coro.free",
- "llvm.coro.promise", "llvm.coro.resume", "llvm.coro.suspend"}))
- L = llvm::make_unique<Lowerer>(M);
+ if (coro::declaresIntrinsics(M, {"llvm.coro.id",
+ "llvm.coro.id.retcon",
+ "llvm.coro.id.retcon.once",
+ "llvm.coro.destroy",
+ "llvm.coro.done",
+ "llvm.coro.end",
+ "llvm.coro.noop",
+ "llvm.coro.free",
+ "llvm.coro.promise",
+ "llvm.coro.resume",
+ "llvm.coro.suspend"}))
+ L = std::make_unique<Lowerer>(M);
return false;
}
diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp
index 6707aa1c827d..aca77119023b 100644
--- a/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/lib/Transforms/Coroutines/CoroElide.cpp
@@ -286,7 +286,7 @@ struct CoroElide : FunctionPass {
bool doInitialization(Module &M) override {
if (coro::declaresIntrinsics(M, {"llvm.coro.id"}))
- L = llvm::make_unique<Lowerer>(M);
+ L = std::make_unique<Lowerer>(M);
return false;
}
diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp
index 58bf22bee29b..2c42cf8a6d25 100644
--- a/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -18,6 +18,7 @@
#include "CoroInternal.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CFG.h"
@@ -28,6 +29,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/circular_raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
using namespace llvm;
@@ -120,6 +122,15 @@ struct SuspendCrossingInfo {
return false;
BasicBlock *UseBB = I->getParent();
+
+ // As a special case, treat uses by an llvm.coro.suspend.retcon
+ // as if they were uses in the suspend's single predecessor: the
+ // uses conceptually occur before the suspend.
+ if (isa<CoroSuspendRetconInst>(I)) {
+ UseBB = UseBB->getSinglePredecessor();
+ assert(UseBB && "should have split coro.suspend into its own block");
+ }
+
return hasPathCrossingSuspendPoint(DefBB, UseBB);
}
@@ -128,7 +139,17 @@ struct SuspendCrossingInfo {
}
bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
- return isDefinitionAcrossSuspend(I.getParent(), U);
+ auto *DefBB = I.getParent();
+
+ // As a special case, treat values produced by an llvm.coro.suspend.*
+ // as if they were defined in the single successor: the uses
+ // conceptually occur after the suspend.
+ if (isa<AnyCoroSuspendInst>(I)) {
+ DefBB = DefBB->getSingleSuccessor();
+ assert(DefBB && "should have split coro.suspend into its own block");
+ }
+
+ return isDefinitionAcrossSuspend(DefBB, U);
}
};
} // end anonymous namespace
@@ -183,9 +204,10 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
B.Suspend = true;
B.Kills |= B.Consumes;
};
- for (CoroSuspendInst *CSI : Shape.CoroSuspends) {
+ for (auto *CSI : Shape.CoroSuspends) {
markSuspendBlock(CSI);
- markSuspendBlock(CSI->getCoroSave());
+ if (auto *Save = CSI->getCoroSave())
+ markSuspendBlock(Save);
}
// Iterate propagating consumes and kills until they stop changing.
@@ -261,11 +283,13 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
// We build up the list of spills for every case where a use is separated
// from the definition by a suspend point.
+static const unsigned InvalidFieldIndex = ~0U;
+
namespace {
class Spill {
Value *Def = nullptr;
Instruction *User = nullptr;
- unsigned FieldNo = 0;
+ unsigned FieldNo = InvalidFieldIndex;
public:
Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {}
@@ -280,11 +304,11 @@ public:
// the definition the first time they encounter it. Consider refactoring
// SpillInfo into two arrays to normalize the spill representation.
unsigned fieldIndex() const {
- assert(FieldNo && "Accessing unassigned field");
+ assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field");
return FieldNo;
}
void setFieldIndex(unsigned FieldNumber) {
- assert(!FieldNo && "Reassigning field number");
+ assert(FieldNo == InvalidFieldIndex && "Reassigning field number");
FieldNo = FieldNumber;
}
};
@@ -376,18 +400,30 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
SmallString<32> Name(F.getName());
Name.append(".Frame");
StructType *FrameTy = StructType::create(C, Name);
- auto *FramePtrTy = FrameTy->getPointerTo();
- auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
- /*isVarArg=*/false);
- auto *FnPtrTy = FnTy->getPointerTo();
-
- // Figure out how wide should be an integer type storing the suspend index.
- unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
- Type *PromiseType = Shape.PromiseAlloca
- ? Shape.PromiseAlloca->getType()->getElementType()
- : Type::getInt1Ty(C);
- SmallVector<Type *, 8> Types{FnPtrTy, FnPtrTy, PromiseType,
- Type::getIntNTy(C, IndexBits)};
+ SmallVector<Type *, 8> Types;
+
+ AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
+
+ if (Shape.ABI == coro::ABI::Switch) {
+ auto *FramePtrTy = FrameTy->getPointerTo();
+ auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
+ /*IsVarArg=*/false);
+ auto *FnPtrTy = FnTy->getPointerTo();
+
+ // Figure out how wide should be an integer type storing the suspend index.
+ unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
+ Type *PromiseType = PromiseAlloca
+ ? PromiseAlloca->getType()->getElementType()
+ : Type::getInt1Ty(C);
+ Type *IndexType = Type::getIntNTy(C, IndexBits);
+ Types.push_back(FnPtrTy);
+ Types.push_back(FnPtrTy);
+ Types.push_back(PromiseType);
+ Types.push_back(IndexType);
+ } else {
+ assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
+ }
+
Value *CurrentDef = nullptr;
Padder.addTypes(Types);
@@ -399,7 +435,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
CurrentDef = S.def();
// PromiseAlloca was already added to Types array earlier.
- if (CurrentDef == Shape.PromiseAlloca)
+ if (CurrentDef == PromiseAlloca)
continue;
uint64_t Count = 1;
@@ -430,9 +466,80 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
}
FrameTy->setBody(Types);
+ switch (Shape.ABI) {
+ case coro::ABI::Switch:
+ break;
+
+ // Remember whether the frame is inline in the storage.
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce: {
+ auto &Layout = F.getParent()->getDataLayout();
+ auto Id = Shape.getRetconCoroId();
+ Shape.RetconLowering.IsFrameInlineInStorage
+ = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() &&
+ Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment());
+ break;
+ }
+ }
+
return FrameTy;
}
+// We use a pointer use visitor to discover if there are any writes into an
+// alloca that dominates CoroBegin. If that is the case, insertSpills will copy
+// the value from the alloca into the coroutine frame spill slot corresponding
+// to that alloca.
+namespace {
+struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
+ using Base = PtrUseVisitor<AllocaUseVisitor>;
+ AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
+ const CoroBeginInst &CB)
+ : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {}
+
+ // We are only interested in uses that dominate coro.begin.
+ void visit(Instruction &I) {
+ if (DT.dominates(&I, &CoroBegin))
+ Base::visit(I);
+ }
+ // We need to provide this overload as PtrUseVisitor uses a pointer based
+ // visiting function.
+ void visit(Instruction *I) { return visit(*I); }
+
+ void visitLoadInst(LoadInst &) {} // Good. Nothing to do.
+
+ // If the use is an operand, the pointer escaped and anything can write into
+ // that memory. If the use is the pointer, we are definitely writing into the
+ // alloca and therefore we need to copy.
+ void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); }
+
+ // Any other instruction that is not filtered out by PtrUseVisitor, will
+ // result in the copy.
+ void visitInstruction(Instruction &I) { PI.setAborted(&I); }
+
+private:
+ const DominatorTree &DT;
+ const CoroBeginInst &CoroBegin;
+};
+} // namespace
+static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT,
+ const CoroBeginInst &CB) {
+ const DataLayout &DL = A.getModule()->getDataLayout();
+ AllocaUseVisitor Visitor(DL, DT, CB);
+ auto PtrI = Visitor.visitPtr(A);
+ if (PtrI.isEscaped() || PtrI.isAborted()) {
+ auto *PointerEscapingInstr = PtrI.getEscapingInst()
+ ? PtrI.getEscapingInst()
+ : PtrI.getAbortingInst();
+ if (PointerEscapingInstr) {
+ LLVM_DEBUG(
+ dbgs() << "AllocaInst copy was triggered by instruction: "
+ << *PointerEscapingInstr << "\n");
+ }
+ return true;
+ }
+ return false;
+}
+
// We need to make room to insert a spill after initial PHIs, but before
// catchswitch instruction. Placing it before violates the requirement that
// catchswitch, like all other EHPads must be the first nonPHI in a block.
@@ -476,7 +583,7 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
// whatever
//
//
-static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
+static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
auto *CB = Shape.CoroBegin;
LLVMContext &C = CB->getContext();
IRBuilder<> Builder(CB->getNextNode());
@@ -484,11 +591,14 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
PointerType *FramePtrTy = FrameTy->getPointerTo();
auto *FramePtr =
cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
+ DominatorTree DT(*CB->getFunction());
Value *CurrentValue = nullptr;
BasicBlock *CurrentBlock = nullptr;
Value *CurrentReload = nullptr;
- unsigned Index = 0; // Proper field number will be read from field definition.
+
+ // Proper field number will be read from field definition.
+ unsigned Index = InvalidFieldIndex;
// We need to keep track of any allocas that need "spilling"
// since they will live in the coroutine frame now, all access to them
@@ -496,9 +606,11 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
// we remember allocas and their indices to be handled once we processed
// all the spills.
SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
- // Promise alloca (if present) has a fixed field number (Shape::PromiseField)
- if (Shape.PromiseAlloca)
- Allocas.emplace_back(Shape.PromiseAlloca, coro::Shape::PromiseField);
+ // Promise alloca (if present) has a fixed field number.
+ if (auto *PromiseAlloca = Shape.getPromiseAlloca()) {
+ assert(Shape.ABI == coro::ABI::Switch);
+ Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise);
+ }
// Create a GEP with the given index into the coroutine frame for the original
// value Orig. Appends an extra 0 index for array-allocas, preserving the
@@ -526,7 +638,7 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
// Create a load instruction to reload the spilled value from the coroutine
// frame.
auto CreateReload = [&](Instruction *InsertBefore) {
- assert(Index && "accessing unassigned field number");
+ assert(Index != InvalidFieldIndex && "accessing unassigned field number");
Builder.SetInsertPoint(InsertBefore);
auto *G = GetFramePointer(Index, CurrentValue);
@@ -558,29 +670,45 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
// coroutine frame.
Instruction *InsertPt = nullptr;
- if (isa<Argument>(CurrentValue)) {
+ if (auto Arg = dyn_cast<Argument>(CurrentValue)) {
// For arguments, we will place the store instruction right after
// the coroutine frame pointer instruction, i.e. bitcast of
// coro.begin from i8* to %f.frame*.
InsertPt = FramePtr->getNextNode();
+
+ // If we're spilling an Argument, make sure we clear 'nocapture'
+ // from the coroutine function.
+ Arg->getParent()->removeParamAttr(Arg->getArgNo(),
+ Attribute::NoCapture);
+
} else if (auto *II = dyn_cast<InvokeInst>(CurrentValue)) {
// If we are spilling the result of the invoke instruction, split the
// normal edge and insert the spill in the new block.
auto NewBB = SplitEdge(II->getParent(), II->getNormalDest());
InsertPt = NewBB->getTerminator();
- } else if (dyn_cast<PHINode>(CurrentValue)) {
+ } else if (isa<PHINode>(CurrentValue)) {
// Skip the PHINodes and EH pads instructions.
BasicBlock *DefBlock = cast<Instruction>(E.def())->getParent();
if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
InsertPt = splitBeforeCatchSwitch(CSI);
else
InsertPt = &*DefBlock->getFirstInsertionPt();
+ } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) {
+ // Don't spill immediately after a suspend; splitting assumes
+ // that the suspend will be followed by a branch.
+ InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
} else {
+ auto *I = cast<Instruction>(E.def());
+ assert(!I->isTerminator() && "unexpected terminator");
// For all other values, the spill is placed immediately after
// the definition.
- assert(!cast<Instruction>(E.def())->isTerminator() &&
- "unexpected terminator");
- InsertPt = cast<Instruction>(E.def())->getNextNode();
+ if (DT.dominates(CB, I)) {
+ InsertPt = I->getNextNode();
+ } else {
+ // Unless, it is not dominated by CoroBegin, then it will be
+ // inserted immediately after CoroFrame is computed.
+ InsertPt = FramePtr->getNextNode();
+ }
}
Builder.SetInsertPoint(InsertPt);
@@ -613,21 +741,53 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
}
BasicBlock *FramePtrBB = FramePtr->getParent();
- Shape.AllocaSpillBlock =
- FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
- Shape.AllocaSpillBlock->splitBasicBlock(&Shape.AllocaSpillBlock->front(),
- "PostSpill");
- Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
+ auto SpillBlock =
+ FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
+ SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
+ Shape.AllocaSpillBlock = SpillBlock;
// If we found any allocas, replace all of their remaining uses with Geps.
+ // Note: we cannot do it indiscriminately as some of the uses may not be
+ // dominated by CoroBegin.
+ bool MightNeedToCopy = false;
+ Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
+ SmallVector<Instruction *, 4> UsersToUpdate;
for (auto &P : Allocas) {
- auto *G = GetFramePointer(P.second, P.first);
+ AllocaInst *const A = P.first;
+ UsersToUpdate.clear();
+ for (User *U : A->users()) {
+ auto *I = cast<Instruction>(U);
+ if (DT.dominates(CB, I))
+ UsersToUpdate.push_back(I);
+ else
+ MightNeedToCopy = true;
+ }
+ if (!UsersToUpdate.empty()) {
+ auto *G = GetFramePointer(P.second, A);
+ G->takeName(A);
+ for (Instruction *I : UsersToUpdate)
+ I->replaceUsesOfWith(A, G);
+ }
+ }
+ // If we discovered such uses not dominated by CoroBegin, see if any of them
+ // preceed coro begin and have instructions that can modify the
+ // value of the alloca and therefore would require a copying the value into
+ // the spill slot in the coroutine frame.
+ if (MightNeedToCopy) {
+ Builder.SetInsertPoint(FramePtr->getNextNode());
+
+ for (auto &P : Allocas) {
+ AllocaInst *const A = P.first;
+ if (mightWriteIntoAllocaPtr(*A, DT, *CB)) {
+ if (A->isArrayAllocation())
+ report_fatal_error(
+ "Coroutines cannot handle copying of array allocas yet");
- // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) here,
- // as we are changing location of the instruction.
- G->takeName(P.first);
- P.first->replaceAllUsesWith(G);
- P.first->eraseFromParent();
+ auto *G = GetFramePointer(P.second, A);
+ auto *Value = Builder.CreateLoad(A);
+ Builder.CreateStore(Value, G);
+ }
+ }
}
return FramePtr;
}
@@ -829,52 +989,6 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
}
}
-// Move early uses of spilled variable after CoroBegin.
-// For example, if a parameter had address taken, we may end up with the code
-// like:
-// define @f(i32 %n) {
-// %n.addr = alloca i32
-// store %n, %n.addr
-// ...
-// call @coro.begin
-// we need to move the store after coro.begin
-static void moveSpillUsesAfterCoroBegin(Function &F, SpillInfo const &Spills,
- CoroBeginInst *CoroBegin) {
- DominatorTree DT(F);
- SmallVector<Instruction *, 8> NeedsMoving;
-
- Value *CurrentValue = nullptr;
-
- for (auto const &E : Spills) {
- if (CurrentValue == E.def())
- continue;
-
- CurrentValue = E.def();
-
- for (User *U : CurrentValue->users()) {
- Instruction *I = cast<Instruction>(U);
- if (!DT.dominates(CoroBegin, I)) {
- LLVM_DEBUG(dbgs() << "will move: " << *I << "\n");
-
- // TODO: Make this more robust. Currently if we run into a situation
- // where simple instruction move won't work we panic and
- // report_fatal_error.
- for (User *UI : I->users()) {
- if (!DT.dominates(CoroBegin, cast<Instruction>(UI)))
- report_fatal_error("cannot move instruction since its users are not"
- " dominated by CoroBegin");
- }
-
- NeedsMoving.push_back(I);
- }
- }
- }
-
- Instruction *InsertPt = CoroBegin->getNextNode();
- for (Instruction *I : NeedsMoving)
- I->moveBefore(InsertPt);
-}
-
// Splits the block at a particular instruction unless it is the first
// instruction in the block with a single predecessor.
static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
@@ -895,21 +1009,337 @@ static void splitAround(Instruction *I, const Twine &Name) {
splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
}
+static bool isSuspendBlock(BasicBlock *BB) {
+ return isa<AnyCoroSuspendInst>(BB->front());
+}
+
+typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
+
+/// Does control flow starting at the given block ever reach a suspend
+/// instruction before reaching a block in VisitedOrFreeBBs?
+static bool isSuspendReachableFrom(BasicBlock *From,
+ VisitedBlocksSet &VisitedOrFreeBBs) {
+ // Eagerly try to add this block to the visited set. If it's already
+ // there, stop recursing; this path doesn't reach a suspend before
+ // either looping or reaching a freeing block.
+ if (!VisitedOrFreeBBs.insert(From).second)
+ return false;
+
+ // We assume that we'll already have split suspends into their own blocks.
+ if (isSuspendBlock(From))
+ return true;
+
+ // Recurse on the successors.
+ for (auto Succ : successors(From)) {
+ if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
+ return true;
+ }
+
+ return false;
+}
+
+/// Is the given alloca "local", i.e. bounded in lifetime to not cross a
+/// suspend point?
+static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
+ // Seed the visited set with all the basic blocks containing a free
+ // so that we won't pass them up.
+ VisitedBlocksSet VisitedOrFreeBBs;
+ for (auto User : AI->users()) {
+ if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
+ VisitedOrFreeBBs.insert(FI->getParent());
+ }
+
+ return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
+}
+
+/// After we split the coroutine, will the given basic block be along
+/// an obvious exit path for the resumption function?
+static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
+ unsigned depth = 3) {
+ // If we've bottomed out our depth count, stop searching and assume
+ // that the path might loop back.
+ if (depth == 0) return false;
+
+ // If this is a suspend block, we're about to exit the resumption function.
+ if (isSuspendBlock(BB)) return true;
+
+ // Recurse into the successors.
+ for (auto Succ : successors(BB)) {
+ if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
+ return false;
+ }
+
+ // If none of the successors leads back in a loop, we're on an exit/abort.
+ return true;
+}
+
+static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
+ // Look for a free that isn't sufficiently obviously followed by
+ // either a suspend or a termination, i.e. something that will leave
+ // the coro resumption frame.
+ for (auto U : AI->users()) {
+ auto FI = dyn_cast<CoroAllocaFreeInst>(U);
+ if (!FI) continue;
+
+ if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
+ return true;
+ }
+
+ // If we never found one, we don't need a stack save.
+ return false;
+}
+
+/// Turn each of the given local allocas into a normal (dynamic) alloca
+/// instruction.
+static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
+ SmallVectorImpl<Instruction*> &DeadInsts) {
+ for (auto AI : LocalAllocas) {
+ auto M = AI->getModule();
+ IRBuilder<> Builder(AI);
+
+ // Save the stack depth. Try to avoid doing this if the stackrestore
+ // is going to immediately precede a return or something.
+ Value *StackSave = nullptr;
+ if (localAllocaNeedsStackSave(AI))
+ StackSave = Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::stacksave));
+
+ // Allocate memory.
+ auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
+ Alloca->setAlignment(MaybeAlign(AI->getAlignment()));
+
+ for (auto U : AI->users()) {
+ // Replace gets with the allocation.
+ if (isa<CoroAllocaGetInst>(U)) {
+ U->replaceAllUsesWith(Alloca);
+
+ // Replace frees with stackrestores. This is safe because
+ // alloca.alloc is required to obey a stack discipline, although we
+ // don't enforce that structurally.
+ } else {
+ auto FI = cast<CoroAllocaFreeInst>(U);
+ if (StackSave) {
+ Builder.SetInsertPoint(FI);
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
+ StackSave);
+ }
+ }
+ DeadInsts.push_back(cast<Instruction>(U));
+ }
+
+ DeadInsts.push_back(AI);
+ }
+}
+
+/// Turn the given coro.alloca.alloc call into a dynamic allocation.
+/// This happens during the all-instructions iteration, so it must not
+/// delete the call.
+static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
+ coro::Shape &Shape,
+ SmallVectorImpl<Instruction*> &DeadInsts) {
+ IRBuilder<> Builder(AI);
+ auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
+
+ for (User *U : AI->users()) {
+ if (isa<CoroAllocaGetInst>(U)) {
+ U->replaceAllUsesWith(Alloc);
+ } else {
+ auto FI = cast<CoroAllocaFreeInst>(U);
+ Builder.SetInsertPoint(FI);
+ Shape.emitDealloc(Builder, Alloc, nullptr);
+ }
+ DeadInsts.push_back(cast<Instruction>(U));
+ }
+
+ // Push this on last so that it gets deleted after all the others.
+ DeadInsts.push_back(AI);
+
+ // Return the new allocation value so that we can check for needed spills.
+ return cast<Instruction>(Alloc);
+}
+
+/// Get the current swifterror value.
+static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
+ coro::Shape &Shape) {
+ // Make a fake function pointer as a sort of intrinsic.
+ auto FnTy = FunctionType::get(ValueTy, {}, false);
+ auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
+
+ auto Call = Builder.CreateCall(Fn, {});
+ Shape.SwiftErrorOps.push_back(Call);
+
+ return Call;
+}
+
+/// Set the given value as the current swifterror value.
+///
+/// Returns a slot that can be used as a swifterror slot.
+static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
+ coro::Shape &Shape) {
+ // Make a fake function pointer as a sort of intrinsic.
+ auto FnTy = FunctionType::get(V->getType()->getPointerTo(),
+ {V->getType()}, false);
+ auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
+
+ auto Call = Builder.CreateCall(Fn, { V });
+ Shape.SwiftErrorOps.push_back(Call);
+
+ return Call;
+}
+
+/// Set the swifterror value from the given alloca before a call,
+/// then put in back in the alloca afterwards.
+///
+/// Returns an address that will stand in for the swifterror slot
+/// until splitting.
+static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
+ AllocaInst *Alloca,
+ coro::Shape &Shape) {
+ auto ValueTy = Alloca->getAllocatedType();
+ IRBuilder<> Builder(Call);
+
+ // Load the current value from the alloca and set it as the
+ // swifterror value.
+ auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
+ auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
+
+ // Move to after the call. Since swifterror only has a guaranteed
+ // value on normal exits, we can ignore implicit and explicit unwind
+ // edges.
+ if (isa<CallInst>(Call)) {
+ Builder.SetInsertPoint(Call->getNextNode());
+ } else {
+ auto Invoke = cast<InvokeInst>(Call);
+ Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
+ }
+
+ // Get the current swifterror value and store it to the alloca.
+ auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
+ Builder.CreateStore(ValueAfterCall, Alloca);
+
+ return Addr;
+}
+
+/// Eliminate a formerly-swifterror alloca by inserting the get/set
+/// intrinsics and attempting to MemToReg the alloca away.
+static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
+ coro::Shape &Shape) {
+ for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) {
+ // We're likely changing the use list, so use a mutation-safe
+ // iteration pattern.
+ auto &Use = *UI;
+ ++UI;
+
+ // swifterror values can only be used in very specific ways.
+ // We take advantage of that here.
+ auto User = Use.getUser();
+ if (isa<LoadInst>(User) || isa<StoreInst>(User))
+ continue;
+
+ assert(isa<CallInst>(User) || isa<InvokeInst>(User));
+ auto Call = cast<Instruction>(User);
+
+ auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
+
+ // Use the returned slot address as the call argument.
+ Use.set(Addr);
+ }
+
+ // All the uses should be loads and stores now.
+ assert(isAllocaPromotable(Alloca));
+}
+
+/// "Eliminate" a swifterror argument by reducing it to the alloca case
+/// and then loading and storing in the prologue and epilog.
+///
+/// The argument keeps the swifterror flag.
+static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
+ coro::Shape &Shape,
+ SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
+ IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
+
+ auto ArgTy = cast<PointerType>(Arg.getType());
+ auto ValueTy = ArgTy->getElementType();
+
+ // Reduce to the alloca case:
+
+ // Create an alloca and replace all uses of the arg with it.
+ auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
+ Arg.replaceAllUsesWith(Alloca);
+
+ // Set an initial value in the alloca. swifterror is always null on entry.
+ auto InitialValue = Constant::getNullValue(ValueTy);
+ Builder.CreateStore(InitialValue, Alloca);
+
+ // Find all the suspends in the function and save and restore around them.
+ for (auto Suspend : Shape.CoroSuspends) {
+ (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
+ }
+
+ // Find all the coro.ends in the function and restore the error value.
+ for (auto End : Shape.CoroEnds) {
+ Builder.SetInsertPoint(End);
+ auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
+ (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
+ }
+
+ // Now we can use the alloca logic.
+ AllocasToPromote.push_back(Alloca);
+ eliminateSwiftErrorAlloca(F, Alloca, Shape);
+}
+
+/// Eliminate all problematic uses of swifterror arguments and allocas
+/// from the function. We'll fix them up later when splitting the function.
+static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
+ SmallVector<AllocaInst*, 4> AllocasToPromote;
+
+ // Look for a swifterror argument.
+ for (auto &Arg : F.args()) {
+ if (!Arg.hasSwiftErrorAttr()) continue;
+
+ eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
+ break;
+ }
+
+ // Look for swifterror allocas.
+ for (auto &Inst : F.getEntryBlock()) {
+ auto Alloca = dyn_cast<AllocaInst>(&Inst);
+ if (!Alloca || !Alloca->isSwiftError()) continue;
+
+ // Clear the swifterror flag.
+ Alloca->setSwiftError(false);
+
+ AllocasToPromote.push_back(Alloca);
+ eliminateSwiftErrorAlloca(F, Alloca, Shape);
+ }
+
+ // If we have any allocas to promote, compute a dominator tree and
+ // promote them en masse.
+ if (!AllocasToPromote.empty()) {
+ DominatorTree DT(F);
+ PromoteMemToReg(AllocasToPromote, DT);
+ }
+}
+
void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
// Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite
// access to local variables.
LowerDbgDeclare(F);
- Shape.PromiseAlloca = Shape.CoroBegin->getId()->getPromise();
- if (Shape.PromiseAlloca) {
- Shape.CoroBegin->getId()->clearPromise();
+ eliminateSwiftError(F, Shape);
+
+ if (Shape.ABI == coro::ABI::Switch &&
+ Shape.SwitchLowering.PromiseAlloca) {
+ Shape.getSwitchCoroId()->clearPromise();
}
// Make sure that all coro.save, coro.suspend and the fallthrough coro.end
// intrinsics are in their own blocks to simplify the logic of building up
// SuspendCrossing data.
- for (CoroSuspendInst *CSI : Shape.CoroSuspends) {
- splitAround(CSI->getCoroSave(), "CoroSave");
+ for (auto *CSI : Shape.CoroSuspends) {
+ if (auto *Save = CSI->getCoroSave())
+ splitAround(Save, "CoroSave");
splitAround(CSI, "CoroSuspend");
}
@@ -926,6 +1356,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
IRBuilder<> Builder(F.getContext());
SpillInfo Spills;
+ SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
+ SmallVector<Instruction*, 4> DeadInstructions;
for (int Repeat = 0; Repeat < 4; ++Repeat) {
// See if there are materializable instructions across suspend points.
@@ -955,11 +1387,40 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
// of the Coroutine Frame.
if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
continue;
+
// The Coroutine Promise always included into coroutine frame, no need to
// check for suspend crossing.
- if (Shape.PromiseAlloca == &I)
+ if (Shape.ABI == coro::ABI::Switch &&
+ Shape.SwitchLowering.PromiseAlloca == &I)
continue;
+ // Handle alloca.alloc specially here.
+ if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
+ // Check whether the alloca's lifetime is bounded by suspend points.
+ if (isLocalAlloca(AI)) {
+ LocalAllocas.push_back(AI);
+ continue;
+ }
+
+ // If not, do a quick rewrite of the alloca and then add spills of
+ // the rewritten value. The rewrite doesn't invalidate anything in
+ // Spills because the other alloca intrinsics have no other operands
+ // besides AI, and it doesn't invalidate the iteration because we delay
+ // erasing AI.
+ auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
+
+ for (User *U : Alloc->users()) {
+ if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
+ Spills.emplace_back(Alloc, U);
+ }
+ continue;
+ }
+
+ // Ignore alloca.get; we process this as part of coro.alloca.alloc.
+ if (isa<CoroAllocaGetInst>(I)) {
+ continue;
+ }
+
for (User *U : I.users())
if (Checker.isDefinitionAcrossSuspend(I, U)) {
// We cannot spill a token.
@@ -970,7 +1431,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
}
}
LLVM_DEBUG(dump("Spills", Spills));
- moveSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin);
Shape.FrameTy = buildFrameType(F, Shape, Spills);
Shape.FramePtr = insertSpills(Spills, Shape);
+ lowerLocalAllocas(LocalAllocas, DeadInstructions);
+
+ for (auto I : DeadInstructions)
+ I->eraseFromParent();
}
diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h
index 5e19d7642e38..de2d2920cb15 100644
--- a/lib/Transforms/Coroutines/CoroInstr.h
+++ b/lib/Transforms/Coroutines/CoroInstr.h
@@ -27,6 +27,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -77,10 +78,8 @@ public:
}
};
-/// This represents the llvm.coro.alloc instruction.
-class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst {
- enum { AlignArg, PromiseArg, CoroutineArg, InfoArg };
-
+/// This represents a common base class for llvm.coro.id instructions.
+class LLVM_LIBRARY_VISIBILITY AnyCoroIdInst : public IntrinsicInst {
public:
CoroAllocInst *getCoroAlloc() {
for (User *U : users())
@@ -97,6 +96,24 @@ public:
llvm_unreachable("no coro.begin associated with coro.id");
}
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ auto ID = I->getIntrinsicID();
+ return ID == Intrinsic::coro_id ||
+ ID == Intrinsic::coro_id_retcon ||
+ ID == Intrinsic::coro_id_retcon_once;
+ }
+
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.id instruction.
+class LLVM_LIBRARY_VISIBILITY CoroIdInst : public AnyCoroIdInst {
+ enum { AlignArg, PromiseArg, CoroutineArg, InfoArg };
+
+public:
AllocaInst *getPromise() const {
Value *Arg = getArgOperand(PromiseArg);
return isa<ConstantPointerNull>(Arg)
@@ -182,6 +199,80 @@ public:
}
};
+/// This represents either the llvm.coro.id.retcon or
+/// llvm.coro.id.retcon.once instruction.
+class LLVM_LIBRARY_VISIBILITY AnyCoroIdRetconInst : public AnyCoroIdInst {
+ enum { SizeArg, AlignArg, StorageArg, PrototypeArg, AllocArg, DeallocArg };
+
+public:
+ void checkWellFormed() const;
+
+ uint64_t getStorageSize() const {
+ return cast<ConstantInt>(getArgOperand(SizeArg))->getZExtValue();
+ }
+
+ uint64_t getStorageAlignment() const {
+ return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+ }
+
+ Value *getStorage() const {
+ return getArgOperand(StorageArg);
+ }
+
+ /// Return the prototype for the continuation function. The type,
+ /// attributes, and calling convention of the continuation function(s)
+ /// are taken from this declaration.
+ Function *getPrototype() const {
+ return cast<Function>(getArgOperand(PrototypeArg)->stripPointerCasts());
+ }
+
+ /// Return the function to use for allocating memory.
+ Function *getAllocFunction() const {
+ return cast<Function>(getArgOperand(AllocArg)->stripPointerCasts());
+ }
+
+ /// Return the function to use for deallocating memory.
+ Function *getDeallocFunction() const {
+ return cast<Function>(getArgOperand(DeallocArg)->stripPointerCasts());
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ auto ID = I->getIntrinsicID();
+ return ID == Intrinsic::coro_id_retcon
+ || ID == Intrinsic::coro_id_retcon_once;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.id.retcon instruction.
+class LLVM_LIBRARY_VISIBILITY CoroIdRetconInst
+ : public AnyCoroIdRetconInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_id_retcon;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.id.retcon.once instruction.
+class LLVM_LIBRARY_VISIBILITY CoroIdRetconOnceInst
+ : public AnyCoroIdRetconInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_id_retcon_once;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This represents the llvm.coro.frame instruction.
class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst {
public:
@@ -215,7 +306,9 @@ class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst {
enum { IdArg, MemArg };
public:
- CoroIdInst *getId() const { return cast<CoroIdInst>(getArgOperand(IdArg)); }
+ AnyCoroIdInst *getId() const {
+ return cast<AnyCoroIdInst>(getArgOperand(IdArg));
+ }
Value *getMem() const { return getArgOperand(MemArg); }
@@ -261,8 +354,22 @@ public:
}
};
+class LLVM_LIBRARY_VISIBILITY AnyCoroSuspendInst : public IntrinsicInst {
+public:
+ CoroSaveInst *getCoroSave() const;
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_suspend ||
+ I->getIntrinsicID() == Intrinsic::coro_suspend_retcon;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This represents the llvm.coro.suspend instruction.
-class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public IntrinsicInst {
+class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public AnyCoroSuspendInst {
enum { SaveArg, FinalArg };
public:
@@ -273,6 +380,7 @@ public:
assert(isa<ConstantTokenNone>(Arg));
return nullptr;
}
+
bool isFinal() const {
return cast<Constant>(getArgOperand(FinalArg))->isOneValue();
}
@@ -286,6 +394,37 @@ public:
}
};
+inline CoroSaveInst *AnyCoroSuspendInst::getCoroSave() const {
+ if (auto Suspend = dyn_cast<CoroSuspendInst>(this))
+ return Suspend->getCoroSave();
+ return nullptr;
+}
+
+/// This represents the llvm.coro.suspend.retcon instruction.
+class LLVM_LIBRARY_VISIBILITY CoroSuspendRetconInst : public AnyCoroSuspendInst {
+public:
+ op_iterator value_begin() { return arg_begin(); }
+ const_op_iterator value_begin() const { return arg_begin(); }
+
+ op_iterator value_end() { return arg_end(); }
+ const_op_iterator value_end() const { return arg_end(); }
+
+ iterator_range<op_iterator> value_operands() {
+ return make_range(value_begin(), value_end());
+ }
+ iterator_range<const_op_iterator> value_operands() const {
+ return make_range(value_begin(), value_end());
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_suspend_retcon;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This represents the llvm.coro.size instruction.
class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst {
public:
@@ -317,6 +456,60 @@ public:
}
};
+/// This represents the llvm.coro.alloca.alloc instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAllocaAllocInst : public IntrinsicInst {
+ enum { SizeArg, AlignArg };
+public:
+ Value *getSize() const {
+ return getArgOperand(SizeArg);
+ }
+ unsigned getAlignment() const {
+ return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_alloca_alloc;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.alloca.get instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAllocaGetInst : public IntrinsicInst {
+ enum { AllocArg };
+public:
+ CoroAllocaAllocInst *getAlloc() const {
+ return cast<CoroAllocaAllocInst>(getArgOperand(AllocArg));
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_alloca_get;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.alloca.free instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAllocaFreeInst : public IntrinsicInst {
+ enum { AllocArg };
+public:
+ CoroAllocaAllocInst *getAlloc() const {
+ return cast<CoroAllocaAllocInst>(getArgOperand(AllocArg));
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_alloca_free;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
} // End namespace llvm.
#endif
diff --git a/lib/Transforms/Coroutines/CoroInternal.h b/lib/Transforms/Coroutines/CoroInternal.h
index 441c8a20f1f3..c151474316f9 100644
--- a/lib/Transforms/Coroutines/CoroInternal.h
+++ b/lib/Transforms/Coroutines/CoroInternal.h
@@ -12,6 +12,7 @@
#define LLVM_LIB_TRANSFORMS_COROUTINES_COROINTERNAL_H
#include "CoroInstr.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/Transforms/Coroutines.h"
namespace llvm {
@@ -61,37 +62,174 @@ struct LowererBase {
Value *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt);
};
+enum class ABI {
+ /// The "resume-switch" lowering, where there are separate resume and
+ /// destroy functions that are shared between all suspend points. The
+ /// coroutine frame implicitly stores the resume and destroy functions,
+ /// the current index, and any promise value.
+ Switch,
+
+ /// The "returned-continuation" lowering, where each suspend point creates a
+ /// single continuation function that is used for both resuming and
+ /// destroying. Does not support promises.
+ Retcon,
+
+ /// The "unique returned-continuation" lowering, where each suspend point
+ /// creates a single continuation function that is used for both resuming
+ /// and destroying. Does not support promises. The function is known to
+ /// suspend at most once during its execution, and the return value of
+ /// the continuation is void.
+ RetconOnce,
+};
+
// Holds structural Coroutine Intrinsics for a particular function and other
// values used during CoroSplit pass.
struct LLVM_LIBRARY_VISIBILITY Shape {
CoroBeginInst *CoroBegin;
SmallVector<CoroEndInst *, 4> CoroEnds;
SmallVector<CoroSizeInst *, 2> CoroSizes;
- SmallVector<CoroSuspendInst *, 4> CoroSuspends;
-
- // Field Indexes for known coroutine frame fields.
- enum {
- ResumeField,
- DestroyField,
- PromiseField,
- IndexField,
+ SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
+ SmallVector<CallInst*, 2> SwiftErrorOps;
+
+ // Field indexes for special fields in the switch lowering.
+ struct SwitchFieldIndex {
+ enum {
+ Resume,
+ Destroy,
+ Promise,
+ Index,
+ /// The index of the first spill field.
+ FirstSpill
+ };
};
+ coro::ABI ABI;
+
StructType *FrameTy;
Instruction *FramePtr;
BasicBlock *AllocaSpillBlock;
- SwitchInst *ResumeSwitch;
- AllocaInst *PromiseAlloca;
- bool HasFinalSuspend;
+
+ struct SwitchLoweringStorage {
+ SwitchInst *ResumeSwitch;
+ AllocaInst *PromiseAlloca;
+ BasicBlock *ResumeEntryBlock;
+ bool HasFinalSuspend;
+ };
+
+ struct RetconLoweringStorage {
+ Function *ResumePrototype;
+ Function *Alloc;
+ Function *Dealloc;
+ BasicBlock *ReturnBlock;
+ bool IsFrameInlineInStorage;
+ };
+
+ union {
+ SwitchLoweringStorage SwitchLowering;
+ RetconLoweringStorage RetconLowering;
+ };
+
+ CoroIdInst *getSwitchCoroId() const {
+ assert(ABI == coro::ABI::Switch);
+ return cast<CoroIdInst>(CoroBegin->getId());
+ }
+
+ AnyCoroIdRetconInst *getRetconCoroId() const {
+ assert(ABI == coro::ABI::Retcon ||
+ ABI == coro::ABI::RetconOnce);
+ return cast<AnyCoroIdRetconInst>(CoroBegin->getId());
+ }
IntegerType *getIndexType() const {
+ assert(ABI == coro::ABI::Switch);
assert(FrameTy && "frame type not assigned");
- return cast<IntegerType>(FrameTy->getElementType(IndexField));
+ return cast<IntegerType>(FrameTy->getElementType(SwitchFieldIndex::Index));
}
ConstantInt *getIndex(uint64_t Value) const {
return ConstantInt::get(getIndexType(), Value);
}
+ PointerType *getSwitchResumePointerType() const {
+ assert(ABI == coro::ABI::Switch);
+ assert(FrameTy && "frame type not assigned");
+ return cast<PointerType>(FrameTy->getElementType(SwitchFieldIndex::Resume));
+ }
+
+ FunctionType *getResumeFunctionType() const {
+ switch (ABI) {
+ case coro::ABI::Switch: {
+ auto *FnPtrTy = getSwitchResumePointerType();
+ return cast<FunctionType>(FnPtrTy->getPointerElementType());
+ }
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce:
+ return RetconLowering.ResumePrototype->getFunctionType();
+ }
+ llvm_unreachable("Unknown coro::ABI enum");
+ }
+
+ ArrayRef<Type*> getRetconResultTypes() const {
+ assert(ABI == coro::ABI::Retcon ||
+ ABI == coro::ABI::RetconOnce);
+ auto FTy = CoroBegin->getFunction()->getFunctionType();
+
+ // The safety of all this is checked by checkWFRetconPrototype.
+ if (auto STy = dyn_cast<StructType>(FTy->getReturnType())) {
+ return STy->elements().slice(1);
+ } else {
+ return ArrayRef<Type*>();
+ }
+ }
+
+ ArrayRef<Type*> getRetconResumeTypes() const {
+ assert(ABI == coro::ABI::Retcon ||
+ ABI == coro::ABI::RetconOnce);
+
+ // The safety of all this is checked by checkWFRetconPrototype.
+ auto FTy = RetconLowering.ResumePrototype->getFunctionType();
+ return FTy->params().slice(1);
+ }
+
+ CallingConv::ID getResumeFunctionCC() const {
+ switch (ABI) {
+ case coro::ABI::Switch:
+ return CallingConv::Fast;
+
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce:
+ return RetconLowering.ResumePrototype->getCallingConv();
+ }
+ llvm_unreachable("Unknown coro::ABI enum");
+ }
+
+ unsigned getFirstSpillFieldIndex() const {
+ switch (ABI) {
+ case coro::ABI::Switch:
+ return SwitchFieldIndex::FirstSpill;
+
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce:
+ return 0;
+ }
+ llvm_unreachable("Unknown coro::ABI enum");
+ }
+
+ AllocaInst *getPromiseAlloca() const {
+ if (ABI == coro::ABI::Switch)
+ return SwitchLowering.PromiseAlloca;
+ return nullptr;
+ }
+
+ /// Allocate memory according to the rules of the active lowering.
+ ///
+ /// \param CG - if non-null, will be updated for the new call
+ Value *emitAlloc(IRBuilder<> &Builder, Value *Size, CallGraph *CG) const;
+
+ /// Deallocate memory according to the rules of the active lowering.
+ ///
+ /// \param CG - if non-null, will be updated for the new call
+ void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const;
+
Shape() = default;
explicit Shape(Function &F) { buildFrom(F); }
void buildFrom(Function &F);
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 5458e70ff16a..04723cbde417 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -55,6 +55,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -70,9 +71,197 @@ using namespace llvm;
#define DEBUG_TYPE "coro-split"
+namespace {
+
+/// A little helper class for building
+class CoroCloner {
+public:
+ enum class Kind {
+ /// The shared resume function for a switch lowering.
+ SwitchResume,
+
+ /// The shared unwind function for a switch lowering.
+ SwitchUnwind,
+
+ /// The shared cleanup function for a switch lowering.
+ SwitchCleanup,
+
+ /// An individual continuation function.
+ Continuation,
+ };
+private:
+ Function &OrigF;
+ Function *NewF;
+ const Twine &Suffix;
+ coro::Shape &Shape;
+ Kind FKind;
+ ValueToValueMapTy VMap;
+ IRBuilder<> Builder;
+ Value *NewFramePtr = nullptr;
+ Value *SwiftErrorSlot = nullptr;
+
+ /// The active suspend instruction; meaningful only for continuation ABIs.
+ AnyCoroSuspendInst *ActiveSuspend = nullptr;
+
+public:
+ /// Create a cloner for a switch lowering.
+ CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
+ Kind FKind)
+ : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape),
+ FKind(FKind), Builder(OrigF.getContext()) {
+ assert(Shape.ABI == coro::ABI::Switch);
+ }
+
+ /// Create a cloner for a continuation lowering.
+ CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
+ Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
+ : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
+ FKind(Kind::Continuation), Builder(OrigF.getContext()),
+ ActiveSuspend(ActiveSuspend) {
+ assert(Shape.ABI == coro::ABI::Retcon ||
+ Shape.ABI == coro::ABI::RetconOnce);
+ assert(NewF && "need existing function for continuation");
+ assert(ActiveSuspend && "need active suspend point for continuation");
+ }
+
+ Function *getFunction() const {
+ assert(NewF != nullptr && "declaration not yet set");
+ return NewF;
+ }
+
+ void create();
+
+private:
+ bool isSwitchDestroyFunction() {
+ switch (FKind) {
+ case Kind::Continuation:
+ case Kind::SwitchResume:
+ return false;
+ case Kind::SwitchUnwind:
+ case Kind::SwitchCleanup:
+ return true;
+ }
+ llvm_unreachable("Unknown CoroCloner::Kind enum");
+ }
+
+ void createDeclaration();
+ void replaceEntryBlock();
+ Value *deriveNewFramePointer();
+ void replaceRetconSuspendUses();
+ void replaceCoroSuspends();
+ void replaceCoroEnds();
+ void replaceSwiftErrorOps();
+ void handleFinalSuspend();
+ void maybeFreeContinuationStorage();
+};
+
+} // end anonymous namespace
+
+static void maybeFreeRetconStorage(IRBuilder<> &Builder, coro::Shape &Shape,
+ Value *FramePtr, CallGraph *CG) {
+ assert(Shape.ABI == coro::ABI::Retcon ||
+ Shape.ABI == coro::ABI::RetconOnce);
+ if (Shape.RetconLowering.IsFrameInlineInStorage)
+ return;
+
+ Shape.emitDealloc(Builder, FramePtr, CG);
+}
+
+/// Replace a non-unwind call to llvm.coro.end.
+static void replaceFallthroughCoroEnd(CoroEndInst *End, coro::Shape &Shape,
+ Value *FramePtr, bool InResume,
+ CallGraph *CG) {
+ // Start inserting right before the coro.end.
+ IRBuilder<> Builder(End);
+
+ // Create the return instruction.
+ switch (Shape.ABI) {
+ // The cloned functions in switch-lowering always return void.
+ case coro::ABI::Switch:
+ // coro.end doesn't immediately end the coroutine in the main function
+ // in this lowering, because we need to deallocate the coroutine.
+ if (!InResume)
+ return;
+ Builder.CreateRetVoid();
+ break;
+
+ // In unique continuation lowering, the continuations always return void.
+ // But we may have implicitly allocated storage.
+ case coro::ABI::RetconOnce:
+ maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
+ Builder.CreateRetVoid();
+ break;
+
+ // In non-unique continuation lowering, we signal completion by returning
+ // a null continuation.
+ case coro::ABI::Retcon: {
+ maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
+ auto RetTy = Shape.getResumeFunctionType()->getReturnType();
+ auto RetStructTy = dyn_cast<StructType>(RetTy);
+ PointerType *ContinuationTy =
+ cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy);
+
+ Value *ReturnValue = ConstantPointerNull::get(ContinuationTy);
+ if (RetStructTy) {
+ ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy),
+ ReturnValue, 0);
+ }
+ Builder.CreateRet(ReturnValue);
+ break;
+ }
+ }
+
+ // Remove the rest of the block, by splitting it into an unreachable block.
+ auto *BB = End->getParent();
+ BB->splitBasicBlock(End);
+ BB->getTerminator()->eraseFromParent();
+}
+
+/// Replace an unwind call to llvm.coro.end.
+static void replaceUnwindCoroEnd(CoroEndInst *End, coro::Shape &Shape,
+ Value *FramePtr, bool InResume, CallGraph *CG){
+ IRBuilder<> Builder(End);
+
+ switch (Shape.ABI) {
+ // In switch-lowering, this does nothing in the main function.
+ case coro::ABI::Switch:
+ if (!InResume)
+ return;
+ break;
+
+ // In continuation-lowering, this frees the continuation storage.
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce:
+ maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
+ break;
+ }
+
+ // If coro.end has an associated bundle, add cleanupret instruction.
+ if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
+ auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
+ auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);
+ End->getParent()->splitBasicBlock(End);
+ CleanupRet->getParent()->getTerminator()->eraseFromParent();
+ }
+}
+
+static void replaceCoroEnd(CoroEndInst *End, coro::Shape &Shape,
+ Value *FramePtr, bool InResume, CallGraph *CG) {
+ if (End->isUnwind())
+ replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
+ else
+ replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
+
+ auto &Context = End->getContext();
+ End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context)
+ : ConstantInt::getFalse(Context));
+ End->eraseFromParent();
+}
+
// Create an entry block for a resume function with a switch that will jump to
// suspend points.
-static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) {
+static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
+ assert(Shape.ABI == coro::ABI::Switch);
LLVMContext &C = F.getContext();
// resume.entry:
@@ -91,15 +280,16 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) {
IRBuilder<> Builder(NewEntry);
auto *FramePtr = Shape.FramePtr;
auto *FrameTy = Shape.FrameTy;
- auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(
- FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr");
+ auto *GepIndex = Builder.CreateStructGEP(
+ FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
auto *Switch =
Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
- Shape.ResumeSwitch = Switch;
+ Shape.SwitchLowering.ResumeSwitch = Switch;
size_t SuspendIndex = 0;
- for (CoroSuspendInst *S : Shape.CoroSuspends) {
+ for (auto *AnyS : Shape.CoroSuspends) {
+ auto *S = cast<CoroSuspendInst>(AnyS);
ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
// Replace CoroSave with a store to Index:
@@ -109,14 +299,15 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) {
Builder.SetInsertPoint(Save);
if (S->isFinal()) {
// Final suspend point is represented by storing zero in ResumeFnAddr.
- auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0,
- 0, "ResumeFn.addr");
+ auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr,
+ coro::Shape::SwitchFieldIndex::Resume,
+ "ResumeFn.addr");
auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
cast<PointerType>(GepIndex->getType())->getElementType()));
Builder.CreateStore(NullPtr, GepIndex);
} else {
- auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(
- FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr");
+ auto *GepIndex = Builder.CreateStructGEP(
+ FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
Builder.CreateStore(IndexVal, GepIndex);
}
Save->replaceAllUsesWith(ConstantTokenNone::get(C));
@@ -164,48 +355,9 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) {
Builder.SetInsertPoint(UnreachBB);
Builder.CreateUnreachable();
- return NewEntry;
+ Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
}
-// In Resumers, we replace fallthrough coro.end with ret void and delete the
-// rest of the block.
-static void replaceFallthroughCoroEnd(IntrinsicInst *End,
- ValueToValueMapTy &VMap) {
- auto *NewE = cast<IntrinsicInst>(VMap[End]);
- ReturnInst::Create(NewE->getContext(), nullptr, NewE);
-
- // Remove the rest of the block, by splitting it into an unreachable block.
- auto *BB = NewE->getParent();
- BB->splitBasicBlock(NewE);
- BB->getTerminator()->eraseFromParent();
-}
-
-// In Resumers, we replace unwind coro.end with True to force the immediate
-// unwind to caller.
-static void replaceUnwindCoroEnds(coro::Shape &Shape, ValueToValueMapTy &VMap) {
- if (Shape.CoroEnds.empty())
- return;
-
- LLVMContext &Context = Shape.CoroEnds.front()->getContext();
- auto *True = ConstantInt::getTrue(Context);
- for (CoroEndInst *CE : Shape.CoroEnds) {
- if (!CE->isUnwind())
- continue;
-
- auto *NewCE = cast<IntrinsicInst>(VMap[CE]);
-
- // If coro.end has an associated bundle, add cleanupret instruction.
- if (auto Bundle = NewCE->getOperandBundle(LLVMContext::OB_funclet)) {
- Value *FromPad = Bundle->Inputs[0];
- auto *CleanupRet = CleanupReturnInst::Create(FromPad, nullptr, NewCE);
- NewCE->getParent()->splitBasicBlock(NewCE);
- CleanupRet->getParent()->getTerminator()->eraseFromParent();
- }
-
- NewCE->replaceAllUsesWith(True);
- NewCE->eraseFromParent();
- }
-}
// Rewrite final suspend point handling. We do not use suspend index to
// represent the final suspend point. Instead we zero-out ResumeFnAddr in the
@@ -216,83 +368,364 @@ static void replaceUnwindCoroEnds(coro::Shape &Shape, ValueToValueMapTy &VMap) {
// In the destroy function, we add a code sequence to check if ResumeFnAddress
// is Null, and if so, jump to the appropriate label to handle cleanup from the
// final suspend point.
-static void handleFinalSuspend(IRBuilder<> &Builder, Value *FramePtr,
- coro::Shape &Shape, SwitchInst *Switch,
- bool IsDestroy) {
- assert(Shape.HasFinalSuspend);
+void CoroCloner::handleFinalSuspend() {
+ assert(Shape.ABI == coro::ABI::Switch &&
+ Shape.SwitchLowering.HasFinalSuspend);
+ auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]);
auto FinalCaseIt = std::prev(Switch->case_end());
BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
Switch->removeCase(FinalCaseIt);
- if (IsDestroy) {
+ if (isSwitchDestroyFunction()) {
BasicBlock *OldSwitchBB = Switch->getParent();
auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
Builder.SetInsertPoint(OldSwitchBB->getTerminator());
- auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, FramePtr,
- 0, 0, "ResumeFn.addr");
- auto *Load = Builder.CreateLoad(
- Shape.FrameTy->getElementType(coro::Shape::ResumeField), GepIndex);
- auto *NullPtr =
- ConstantPointerNull::get(cast<PointerType>(Load->getType()));
- auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
+ auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr,
+ coro::Shape::SwitchFieldIndex::Resume,
+ "ResumeFn.addr");
+ auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(),
+ GepIndex);
+ auto *Cond = Builder.CreateIsNull(Load);
Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
OldSwitchBB->getTerminator()->eraseFromParent();
}
}
-// Create a resume clone by cloning the body of the original function, setting
-// new entry block and replacing coro.suspend an appropriate value to force
-// resume or cleanup pass for every suspend point.
-static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
- BasicBlock *ResumeEntry, int8_t FnIndex) {
- Module *M = F.getParent();
- auto *FrameTy = Shape.FrameTy;
- auto *FnPtrTy = cast<PointerType>(FrameTy->getElementType(0));
- auto *FnTy = cast<FunctionType>(FnPtrTy->getElementType());
+static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
+ const Twine &Suffix,
+ Module::iterator InsertBefore) {
+ Module *M = OrigF.getParent();
+ auto *FnTy = Shape.getResumeFunctionType();
Function *NewF =
- Function::Create(FnTy, GlobalValue::LinkageTypes::ExternalLinkage,
- F.getName() + Suffix, M);
+ Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
+ OrigF.getName() + Suffix);
NewF->addParamAttr(0, Attribute::NonNull);
NewF->addParamAttr(0, Attribute::NoAlias);
- ValueToValueMapTy VMap;
+ M->getFunctionList().insert(InsertBefore, NewF);
+
+ return NewF;
+}
+
+/// Replace uses of the active llvm.coro.suspend.retcon call with the
+/// arguments to the continuation function.
+///
+/// This assumes that the builder has a meaningful insertion point.
+void CoroCloner::replaceRetconSuspendUses() {
+ assert(Shape.ABI == coro::ABI::Retcon ||
+ Shape.ABI == coro::ABI::RetconOnce);
+
+ auto NewS = VMap[ActiveSuspend];
+ if (NewS->use_empty()) return;
+
+ // Copy out all the continuation arguments after the buffer pointer into
+ // an easily-indexed data structure for convenience.
+ SmallVector<Value*, 8> Args;
+ for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I)
+ Args.push_back(&*I);
+
+ // If the suspend returns a single scalar value, we can just do a simple
+ // replacement.
+ if (!isa<StructType>(NewS->getType())) {
+ assert(Args.size() == 1);
+ NewS->replaceAllUsesWith(Args.front());
+ return;
+ }
+
+ // Try to peephole extracts of an aggregate return.
+ for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) {
+ auto EVI = dyn_cast<ExtractValueInst>((UI++)->getUser());
+ if (!EVI || EVI->getNumIndices() != 1)
+ continue;
+
+ EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]);
+ EVI->eraseFromParent();
+ }
+
+ // If we have no remaining uses, we're done.
+ if (NewS->use_empty()) return;
+
+ // Otherwise, we need to create an aggregate.
+ Value *Agg = UndefValue::get(NewS->getType());
+ for (size_t I = 0, E = Args.size(); I != E; ++I)
+ Agg = Builder.CreateInsertValue(Agg, Args[I], I);
+
+ NewS->replaceAllUsesWith(Agg);
+}
+
+void CoroCloner::replaceCoroSuspends() {
+ Value *SuspendResult;
+
+ switch (Shape.ABI) {
+ // In switch lowering, replace coro.suspend with the appropriate value
+ // for the type of function we're extracting.
+ // Replacing coro.suspend with (0) will result in control flow proceeding to
+ // a resume label associated with a suspend point, replacing it with (1) will
+ // result in control flow proceeding to a cleanup label associated with this
+ // suspend point.
+ case coro::ABI::Switch:
+ SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0);
+ break;
+
+ // In returned-continuation lowering, the arguments from earlier
+ // continuations are theoretically arbitrary, and they should have been
+ // spilled.
+ case coro::ABI::RetconOnce:
+ case coro::ABI::Retcon:
+ return;
+ }
+
+ for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
+ // The active suspend was handled earlier.
+ if (CS == ActiveSuspend) continue;
+
+ auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]);
+ MappedCS->replaceAllUsesWith(SuspendResult);
+ MappedCS->eraseFromParent();
+ }
+}
+
+void CoroCloner::replaceCoroEnds() {
+ for (CoroEndInst *CE : Shape.CoroEnds) {
+ // We use a null call graph because there's no call graph node for
+ // the cloned function yet. We'll just be rebuilding that later.
+ auto NewCE = cast<CoroEndInst>(VMap[CE]);
+ replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr);
+ }
+}
+
+static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
+ ValueToValueMapTy *VMap) {
+ Value *CachedSlot = nullptr;
+ auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
+ if (CachedSlot) {
+ assert(CachedSlot->getType()->getPointerElementType() == ValueTy &&
+ "multiple swifterror slots in function with different types");
+ return CachedSlot;
+ }
+
+ // Check if the function has a swifterror argument.
+ for (auto &Arg : F.args()) {
+ if (Arg.isSwiftError()) {
+ CachedSlot = &Arg;
+ assert(Arg.getType()->getPointerElementType() == ValueTy &&
+ "swifterror argument does not have expected type");
+ return &Arg;
+ }
+ }
+
+ // Create a swifterror alloca.
+ IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
+ auto Alloca = Builder.CreateAlloca(ValueTy);
+ Alloca->setSwiftError(true);
+
+ CachedSlot = Alloca;
+ return Alloca;
+ };
+
+ for (CallInst *Op : Shape.SwiftErrorOps) {
+ auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op;
+ IRBuilder<> Builder(MappedOp);
+
+ // If there are no arguments, this is a 'get' operation.
+ Value *MappedResult;
+ if (Op->getNumArgOperands() == 0) {
+ auto ValueTy = Op->getType();
+ auto Slot = getSwiftErrorSlot(ValueTy);
+ MappedResult = Builder.CreateLoad(ValueTy, Slot);
+ } else {
+ assert(Op->getNumArgOperands() == 1);
+ auto Value = MappedOp->getArgOperand(0);
+ auto ValueTy = Value->getType();
+ auto Slot = getSwiftErrorSlot(ValueTy);
+ Builder.CreateStore(Value, Slot);
+ MappedResult = Slot;
+ }
+
+ MappedOp->replaceAllUsesWith(MappedResult);
+ MappedOp->eraseFromParent();
+ }
+
+ // If we're updating the original function, we've invalidated SwiftErrorOps.
+ if (VMap == nullptr) {
+ Shape.SwiftErrorOps.clear();
+ }
+}
+
+void CoroCloner::replaceSwiftErrorOps() {
+ ::replaceSwiftErrorOps(*NewF, Shape, &VMap);
+}
+
+void CoroCloner::replaceEntryBlock() {
+ // In the original function, the AllocaSpillBlock is a block immediately
+ // following the allocation of the frame object which defines GEPs for
+ // all the allocas that have been moved into the frame, and it ends by
+ // branching to the original beginning of the coroutine. Make this
+ // the entry block of the cloned function.
+ auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
+ Entry->setName("entry" + Suffix);
+ Entry->moveBefore(&NewF->getEntryBlock());
+ Entry->getTerminator()->eraseFromParent();
+
+ // Clear all predecessors of the new entry block. There should be
+ // exactly one predecessor, which we created when splitting out
+ // AllocaSpillBlock to begin with.
+ assert(Entry->hasOneUse());
+ auto BranchToEntry = cast<BranchInst>(Entry->user_back());
+ assert(BranchToEntry->isUnconditional());
+ Builder.SetInsertPoint(BranchToEntry);
+ Builder.CreateUnreachable();
+ BranchToEntry->eraseFromParent();
+
+ // TODO: move any allocas into Entry that weren't moved into the frame.
+ // (Currently we move all allocas into the frame.)
+
+ // Branch from the entry to the appropriate place.
+ Builder.SetInsertPoint(Entry);
+ switch (Shape.ABI) {
+ case coro::ABI::Switch: {
+ // In switch-lowering, we built a resume-entry block in the original
+ // function. Make the entry block branch to this.
+ auto *SwitchBB =
+ cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]);
+ Builder.CreateBr(SwitchBB);
+ break;
+ }
+
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce: {
+ // In continuation ABIs, we want to branch to immediately after the
+ // active suspend point. Earlier phases will have put the suspend in its
+ // own basic block, so just thread our jump directly to its successor.
+ auto MappedCS = cast<CoroSuspendRetconInst>(VMap[ActiveSuspend]);
+ auto Branch = cast<BranchInst>(MappedCS->getNextNode());
+ assert(Branch->isUnconditional());
+ Builder.CreateBr(Branch->getSuccessor(0));
+ break;
+ }
+ }
+}
+
+/// Derive the value of the new frame pointer.
+Value *CoroCloner::deriveNewFramePointer() {
+ // Builder should be inserting to the front of the new entry block.
+
+ switch (Shape.ABI) {
+ // In switch-lowering, the argument is the frame pointer.
+ case coro::ABI::Switch:
+ return &*NewF->arg_begin();
+
+ // In continuation-lowering, the argument is the opaque storage.
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce: {
+ Argument *NewStorage = &*NewF->arg_begin();
+ auto FramePtrTy = Shape.FrameTy->getPointerTo();
+
+ // If the storage is inline, just bitcast to the storage to the frame type.
+ if (Shape.RetconLowering.IsFrameInlineInStorage)
+ return Builder.CreateBitCast(NewStorage, FramePtrTy);
+
+ // Otherwise, load the real frame from the opaque storage.
+ auto FramePtrPtr =
+ Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo());
+ return Builder.CreateLoad(FramePtrPtr);
+ }
+ }
+ llvm_unreachable("bad ABI");
+}
+
+/// Clone the body of the original function into a resume function of
+/// some sort.
+void CoroCloner::create() {
+ // Create the new function if we don't already have one.
+ if (!NewF) {
+ NewF = createCloneDeclaration(OrigF, Shape, Suffix,
+ OrigF.getParent()->end());
+ }
+
// Replace all args with undefs. The buildCoroutineFrame algorithm already
// rewritten access to the args that occurs after suspend points with loads
// and stores to/from the coroutine frame.
- for (Argument &A : F.args())
+ for (Argument &A : OrigF.args())
VMap[&A] = UndefValue::get(A.getType());
SmallVector<ReturnInst *, 4> Returns;
- CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns);
- NewF->setLinkage(GlobalValue::LinkageTypes::InternalLinkage);
+ // Ignore attempts to change certain attributes of the function.
+ // TODO: maybe there should be a way to suppress this during cloning?
+ auto savedVisibility = NewF->getVisibility();
+ auto savedUnnamedAddr = NewF->getUnnamedAddr();
+ auto savedDLLStorageClass = NewF->getDLLStorageClass();
+
+ // NewF's linkage (which CloneFunctionInto does *not* change) might not
+ // be compatible with the visibility of OrigF (which it *does* change),
+ // so protect against that.
+ auto savedLinkage = NewF->getLinkage();
+ NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
+
+ CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns);
+
+ NewF->setLinkage(savedLinkage);
+ NewF->setVisibility(savedVisibility);
+ NewF->setUnnamedAddr(savedUnnamedAddr);
+ NewF->setDLLStorageClass(savedDLLStorageClass);
+
+ auto &Context = NewF->getContext();
+
+ // Replace the attributes of the new function:
+ auto OrigAttrs = NewF->getAttributes();
+ auto NewAttrs = AttributeList();
+
+ switch (Shape.ABI) {
+ case coro::ABI::Switch:
+ // Bootstrap attributes by copying function attributes from the
+ // original function. This should include optimization settings and so on.
+ NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex,
+ OrigAttrs.getFnAttributes());
+ break;
+
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce:
+ // If we have a continuation prototype, just use its attributes,
+ // full-stop.
+ NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
+ break;
+ }
- // Remove old returns.
- for (ReturnInst *Return : Returns)
- changeToUnreachable(Return, /*UseLLVMTrap=*/false);
+ // Make the frame parameter nonnull and noalias.
+ NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull);
+ NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias);
+
+ switch (Shape.ABI) {
+ // In these ABIs, the cloned functions always return 'void', and the
+ // existing return sites are meaningless. Note that for unique
+ // continuations, this includes the returns associated with suspends;
+ // this is fine because we can't suspend twice.
+ case coro::ABI::Switch:
+ case coro::ABI::RetconOnce:
+ // Remove old returns.
+ for (ReturnInst *Return : Returns)
+ changeToUnreachable(Return, /*UseLLVMTrap=*/false);
+ break;
+
+ // With multi-suspend continuations, we'll already have eliminated the
+ // original returns and inserted returns before all the suspend points,
+ // so we want to leave any returns in place.
+ case coro::ABI::Retcon:
+ break;
+ }
- // Remove old return attributes.
- NewF->removeAttributes(
- AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewF->getReturnType()));
+ NewF->setAttributes(NewAttrs);
+ NewF->setCallingConv(Shape.getResumeFunctionCC());
- // Make AllocaSpillBlock the new entry block.
- auto *SwitchBB = cast<BasicBlock>(VMap[ResumeEntry]);
- auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
- Entry->moveBefore(&NewF->getEntryBlock());
- Entry->getTerminator()->eraseFromParent();
- BranchInst::Create(SwitchBB, Entry);
- Entry->setName("entry" + Suffix);
+ // Set up the new entry block.
+ replaceEntryBlock();
- // Clear all predecessors of the new entry block.
- auto *Switch = cast<SwitchInst>(VMap[Shape.ResumeSwitch]);
- Entry->replaceAllUsesWith(Switch->getDefaultDest());
-
- IRBuilder<> Builder(&NewF->getEntryBlock().front());
+ Builder.SetInsertPoint(&NewF->getEntryBlock().front());
+ NewFramePtr = deriveNewFramePointer();
// Remap frame pointer.
- Argument *NewFramePtr = &*NewF->arg_begin();
- Value *OldFramePtr = cast<Value>(VMap[Shape.FramePtr]);
+ Value *OldFramePtr = VMap[Shape.FramePtr];
NewFramePtr->takeName(OldFramePtr);
OldFramePtr->replaceAllUsesWith(NewFramePtr);
@@ -302,50 +735,55 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
OldVFrame->replaceAllUsesWith(NewVFrame);
- // Rewrite final suspend handling as it is not done via switch (allows to
- // remove final case from the switch, since it is undefined behavior to resume
- // the coroutine suspended at the final suspend point.
- if (Shape.HasFinalSuspend) {
- auto *Switch = cast<SwitchInst>(VMap[Shape.ResumeSwitch]);
- bool IsDestroy = FnIndex != 0;
- handleFinalSuspend(Builder, NewFramePtr, Shape, Switch, IsDestroy);
+ switch (Shape.ABI) {
+ case coro::ABI::Switch:
+ // Rewrite final suspend handling as it is not done via switch (allows to
+ // remove final case from the switch, since it is undefined behavior to
+ // resume the coroutine suspended at the final suspend point.
+ if (Shape.SwitchLowering.HasFinalSuspend)
+ handleFinalSuspend();
+ break;
+
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce:
+ // Replace uses of the active suspend with the corresponding
+ // continuation-function arguments.
+ assert(ActiveSuspend != nullptr &&
+ "no active suspend when lowering a continuation-style coroutine");
+ replaceRetconSuspendUses();
+ break;
}
- // Replace coro suspend with the appropriate resume index.
- // Replacing coro.suspend with (0) will result in control flow proceeding to
- // a resume label associated with a suspend point, replacing it with (1) will
- // result in control flow proceeding to a cleanup label associated with this
- // suspend point.
- auto *NewValue = Builder.getInt8(FnIndex ? 1 : 0);
- for (CoroSuspendInst *CS : Shape.CoroSuspends) {
- auto *MappedCS = cast<CoroSuspendInst>(VMap[CS]);
- MappedCS->replaceAllUsesWith(NewValue);
- MappedCS->eraseFromParent();
- }
+ // Handle suspends.
+ replaceCoroSuspends();
+
+ // Handle swifterror.
+ replaceSwiftErrorOps();
// Remove coro.end intrinsics.
- replaceFallthroughCoroEnd(Shape.CoroEnds.front(), VMap);
- replaceUnwindCoroEnds(Shape, VMap);
+ replaceCoroEnds();
+
// Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
// to suppress deallocation code.
- coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
- /*Elide=*/FnIndex == 2);
-
- NewF->setCallingConv(CallingConv::Fast);
-
- return NewF;
+ if (Shape.ABI == coro::ABI::Switch)
+ coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
+ /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup);
}
-static void removeCoroEnds(coro::Shape &Shape) {
- if (Shape.CoroEnds.empty())
- return;
-
- LLVMContext &Context = Shape.CoroEnds.front()->getContext();
- auto *False = ConstantInt::getFalse(Context);
+// Create a resume clone by cloning the body of the original function, setting
+// new entry block and replacing coro.suspend an appropriate value to force
+// resume or cleanup pass for every suspend point.
+static Function *createClone(Function &F, const Twine &Suffix,
+ coro::Shape &Shape, CoroCloner::Kind FKind) {
+ CoroCloner Cloner(F, Suffix, Shape, FKind);
+ Cloner.create();
+ return Cloner.getFunction();
+}
- for (CoroEndInst *CE : Shape.CoroEnds) {
- CE->replaceAllUsesWith(False);
- CE->eraseFromParent();
+/// Remove calls to llvm.coro.end in the original function.
+static void removeCoroEnds(coro::Shape &Shape, CallGraph *CG) {
+ for (auto End : Shape.CoroEnds) {
+ replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG);
}
}
@@ -377,8 +815,12 @@ static void replaceFrameSize(coro::Shape &Shape) {
// i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*))
//
// Assumes that all the functions have the same signature.
-static void setCoroInfo(Function &F, CoroBeginInst *CoroBegin,
- std::initializer_list<Function *> Fns) {
+static void setCoroInfo(Function &F, coro::Shape &Shape,
+ ArrayRef<Function *> Fns) {
+ // This only works under the switch-lowering ABI because coro elision
+ // only works on the switch-lowering ABI.
+ assert(Shape.ABI == coro::ABI::Switch);
+
SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
assert(!Args.empty());
Function *Part = *Fns.begin();
@@ -393,38 +835,45 @@ static void setCoroInfo(Function &F, CoroBeginInst *CoroBegin,
// Update coro.begin instruction to refer to this constant.
LLVMContext &C = F.getContext();
auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C));
- CoroBegin->getId()->setInfo(BC);
+ Shape.getSwitchCoroId()->setInfo(BC);
}
// Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
Function *DestroyFn, Function *CleanupFn) {
+ assert(Shape.ABI == coro::ABI::Switch);
+
IRBuilder<> Builder(Shape.FramePtr->getNextNode());
- auto *ResumeAddr = Builder.CreateConstInBoundsGEP2_32(
- Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::ResumeField,
+ auto *ResumeAddr = Builder.CreateStructGEP(
+ Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
"resume.addr");
Builder.CreateStore(ResumeFn, ResumeAddr);
Value *DestroyOrCleanupFn = DestroyFn;
- CoroIdInst *CoroId = Shape.CoroBegin->getId();
+ CoroIdInst *CoroId = Shape.getSwitchCoroId();
if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
// If there is a CoroAlloc and it returns false (meaning we elide the
// allocation, use CleanupFn instead of DestroyFn).
DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
}
- auto *DestroyAddr = Builder.CreateConstInBoundsGEP2_32(
- Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::DestroyField,
+ auto *DestroyAddr = Builder.CreateStructGEP(
+ Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy,
"destroy.addr");
Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
}
static void postSplitCleanup(Function &F) {
removeUnreachableBlocks(F);
+
+ // For now, we do a mandatory verification step because we don't
+ // entirely trust this pass. Note that we don't want to add a verifier
+ // pass to FPM below because it will also verify all the global data.
+ verifyFunction(F);
+
legacy::FunctionPassManager FPM(F.getParent());
- FPM.add(createVerifierPass());
FPM.add(createSCCPPass());
FPM.add(createCFGSimplificationPass());
FPM.add(createEarlyCSEPass());
@@ -520,21 +969,34 @@ static void addMustTailToCoroResumes(Function &F) {
// Coroutine has no suspend points. Remove heap allocation for the coroutine
// frame if possible.
-static void handleNoSuspendCoroutine(CoroBeginInst *CoroBegin, Type *FrameTy) {
+static void handleNoSuspendCoroutine(coro::Shape &Shape) {
+ auto *CoroBegin = Shape.CoroBegin;
auto *CoroId = CoroBegin->getId();
auto *AllocInst = CoroId->getCoroAlloc();
- coro::replaceCoroFree(CoroId, /*Elide=*/AllocInst != nullptr);
- if (AllocInst) {
- IRBuilder<> Builder(AllocInst);
- // FIXME: Need to handle overaligned members.
- auto *Frame = Builder.CreateAlloca(FrameTy);
- auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
- AllocInst->replaceAllUsesWith(Builder.getFalse());
- AllocInst->eraseFromParent();
- CoroBegin->replaceAllUsesWith(VFrame);
- } else {
- CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
+ switch (Shape.ABI) {
+ case coro::ABI::Switch: {
+ auto SwitchId = cast<CoroIdInst>(CoroId);
+ coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
+ if (AllocInst) {
+ IRBuilder<> Builder(AllocInst);
+ // FIXME: Need to handle overaligned members.
+ auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
+ auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
+ AllocInst->replaceAllUsesWith(Builder.getFalse());
+ AllocInst->eraseFromParent();
+ CoroBegin->replaceAllUsesWith(VFrame);
+ } else {
+ CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
+ }
+ break;
+ }
+
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce:
+ CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType()));
+ break;
}
+
CoroBegin->eraseFromParent();
}
@@ -670,12 +1132,16 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
// Remove suspend points that are simplified.
static void simplifySuspendPoints(coro::Shape &Shape) {
+ // Currently, the only simplification we do is switch-lowering-specific.
+ if (Shape.ABI != coro::ABI::Switch)
+ return;
+
auto &S = Shape.CoroSuspends;
size_t I = 0, N = S.size();
if (N == 0)
return;
while (true) {
- if (simplifySuspendPoint(S[I], Shape.CoroBegin)) {
+ if (simplifySuspendPoint(cast<CoroSuspendInst>(S[I]), Shape.CoroBegin)) {
if (--N == I)
break;
std::swap(S[I], S[N]);
@@ -687,142 +1153,227 @@ static void simplifySuspendPoints(coro::Shape &Shape) {
S.resize(N);
}
-static SmallPtrSet<BasicBlock *, 4> getCoroBeginPredBlocks(CoroBeginInst *CB) {
- // Collect all blocks that we need to look for instructions to relocate.
- SmallPtrSet<BasicBlock *, 4> RelocBlocks;
- SmallVector<BasicBlock *, 4> Work;
- Work.push_back(CB->getParent());
+static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
+ SmallVectorImpl<Function *> &Clones) {
+ assert(Shape.ABI == coro::ABI::Switch);
- do {
- BasicBlock *Current = Work.pop_back_val();
- for (BasicBlock *BB : predecessors(Current))
- if (RelocBlocks.count(BB) == 0) {
- RelocBlocks.insert(BB);
- Work.push_back(BB);
- }
- } while (!Work.empty());
- return RelocBlocks;
-}
-
-static SmallPtrSet<Instruction *, 8>
-getNotRelocatableInstructions(CoroBeginInst *CoroBegin,
- SmallPtrSetImpl<BasicBlock *> &RelocBlocks) {
- SmallPtrSet<Instruction *, 8> DoNotRelocate;
- // Collect all instructions that we should not relocate
- SmallVector<Instruction *, 8> Work;
-
- // Start with CoroBegin and terminators of all preceding blocks.
- Work.push_back(CoroBegin);
- BasicBlock *CoroBeginBB = CoroBegin->getParent();
- for (BasicBlock *BB : RelocBlocks)
- if (BB != CoroBeginBB)
- Work.push_back(BB->getTerminator());
-
- // For every instruction in the Work list, place its operands in DoNotRelocate
- // set.
- do {
- Instruction *Current = Work.pop_back_val();
- LLVM_DEBUG(dbgs() << "CoroSplit: Will not relocate: " << *Current << "\n");
- DoNotRelocate.insert(Current);
- for (Value *U : Current->operands()) {
- auto *I = dyn_cast<Instruction>(U);
- if (!I)
- continue;
+ createResumeEntryBlock(F, Shape);
+ auto ResumeClone = createClone(F, ".resume", Shape,
+ CoroCloner::Kind::SwitchResume);
+ auto DestroyClone = createClone(F, ".destroy", Shape,
+ CoroCloner::Kind::SwitchUnwind);
+ auto CleanupClone = createClone(F, ".cleanup", Shape,
+ CoroCloner::Kind::SwitchCleanup);
- if (auto *A = dyn_cast<AllocaInst>(I)) {
- // Stores to alloca instructions that occur before the coroutine frame
- // is allocated should not be moved; the stored values may be used by
- // the coroutine frame allocator. The operands to those stores must also
- // remain in place.
- for (const auto &User : A->users())
- if (auto *SI = dyn_cast<llvm::StoreInst>(User))
- if (RelocBlocks.count(SI->getParent()) != 0 &&
- DoNotRelocate.count(SI) == 0) {
- Work.push_back(SI);
- DoNotRelocate.insert(SI);
- }
- continue;
- }
+ postSplitCleanup(*ResumeClone);
+ postSplitCleanup(*DestroyClone);
+ postSplitCleanup(*CleanupClone);
+
+ addMustTailToCoroResumes(*ResumeClone);
+
+ // Store addresses resume/destroy/cleanup functions in the coroutine frame.
+ updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
+
+ assert(Clones.empty());
+ Clones.push_back(ResumeClone);
+ Clones.push_back(DestroyClone);
+ Clones.push_back(CleanupClone);
+
+ // Create a constant array referring to resume/destroy/clone functions pointed
+ // by the last argument of @llvm.coro.info, so that CoroElide pass can
+ // determined correct function to call.
+ setCoroInfo(F, Shape, Clones);
+}
- if (DoNotRelocate.count(I) == 0) {
- Work.push_back(I);
- DoNotRelocate.insert(I);
+static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
+ SmallVectorImpl<Function *> &Clones) {
+ assert(Shape.ABI == coro::ABI::Retcon ||
+ Shape.ABI == coro::ABI::RetconOnce);
+ assert(Clones.empty());
+
+ // Reset various things that the optimizer might have decided it
+ // "knows" about the coroutine function due to not seeing a return.
+ F.removeFnAttr(Attribute::NoReturn);
+ F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+
+ // Allocate the frame.
+ auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
+ Value *RawFramePtr;
+ if (Shape.RetconLowering.IsFrameInlineInStorage) {
+ RawFramePtr = Id->getStorage();
+ } else {
+ IRBuilder<> Builder(Id);
+
+ // Determine the size of the frame.
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ auto Size = DL.getTypeAllocSize(Shape.FrameTy);
+
+ // Allocate. We don't need to update the call graph node because we're
+ // going to recompute it from scratch after splitting.
+ RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
+ RawFramePtr =
+ Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
+
+ // Stash the allocated frame pointer in the continuation storage.
+ auto Dest = Builder.CreateBitCast(Id->getStorage(),
+ RawFramePtr->getType()->getPointerTo());
+ Builder.CreateStore(RawFramePtr, Dest);
+ }
+
+ // Map all uses of llvm.coro.begin to the allocated frame pointer.
+ {
+ // Make sure we don't invalidate Shape.FramePtr.
+ TrackingVH<Instruction> Handle(Shape.FramePtr);
+ Shape.CoroBegin->replaceAllUsesWith(RawFramePtr);
+ Shape.FramePtr = Handle.getValPtr();
+ }
+
+ // Create a unique return block.
+ BasicBlock *ReturnBB = nullptr;
+ SmallVector<PHINode *, 4> ReturnPHIs;
+
+ // Create all the functions in order after the main function.
+ auto NextF = std::next(F.getIterator());
+
+ // Create a continuation function for each of the suspend points.
+ Clones.reserve(Shape.CoroSuspends.size());
+ for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
+ auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]);
+
+ // Create the clone declaration.
+ auto Continuation =
+ createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF);
+ Clones.push_back(Continuation);
+
+ // Insert a branch to the unified return block immediately before
+ // the suspend point.
+ auto SuspendBB = Suspend->getParent();
+ auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
+ auto Branch = cast<BranchInst>(SuspendBB->getTerminator());
+
+ // Create the unified return block.
+ if (!ReturnBB) {
+ // Place it before the first suspend.
+ ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F,
+ NewSuspendBB);
+ Shape.RetconLowering.ReturnBlock = ReturnBB;
+
+ IRBuilder<> Builder(ReturnBB);
+
+ // Create PHIs for all the return values.
+ assert(ReturnPHIs.empty());
+
+ // First, the continuation.
+ ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(),
+ Shape.CoroSuspends.size()));
+
+ // Next, all the directly-yielded values.
+ for (auto ResultTy : Shape.getRetconResultTypes())
+ ReturnPHIs.push_back(Builder.CreatePHI(ResultTy,
+ Shape.CoroSuspends.size()));
+
+ // Build the return value.
+ auto RetTy = F.getReturnType();
+
+ // Cast the continuation value if necessary.
+ // We can't rely on the types matching up because that type would
+ // have to be infinite.
+ auto CastedContinuationTy =
+ (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0));
+ auto *CastedContinuation =
+ Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy);
+
+ Value *RetV;
+ if (ReturnPHIs.size() == 1) {
+ RetV = CastedContinuation;
+ } else {
+ RetV = UndefValue::get(RetTy);
+ RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0);
+ for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
+ RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I);
}
+
+ Builder.CreateRet(RetV);
}
- } while (!Work.empty());
- return DoNotRelocate;
-}
-static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) {
- // Analyze which non-alloca instructions are needed for allocation and
- // relocate the rest to after coro.begin. We need to do it, since some of the
- // targets of those instructions may be placed into coroutine frame memory
- // for which becomes available after coro.begin intrinsic.
+ // Branch to the return block.
+ Branch->setSuccessor(0, ReturnBB);
+ ReturnPHIs[0]->addIncoming(Continuation, SuspendBB);
+ size_t NextPHIIndex = 1;
+ for (auto &VUse : Suspend->value_operands())
+ ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB);
+ assert(NextPHIIndex == ReturnPHIs.size());
+ }
- auto BlockSet = getCoroBeginPredBlocks(CoroBegin);
- auto DoNotRelocateSet = getNotRelocatableInstructions(CoroBegin, BlockSet);
+ assert(Clones.size() == Shape.CoroSuspends.size());
+ for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
+ auto Suspend = Shape.CoroSuspends[i];
+ auto Clone = Clones[i];
- Instruction *InsertPt = CoroBegin->getNextNode();
- BasicBlock &BB = F.getEntryBlock(); // TODO: Look at other blocks as well.
- for (auto B = BB.begin(), E = BB.end(); B != E;) {
- Instruction &I = *B++;
- if (isa<AllocaInst>(&I))
- continue;
- if (&I == CoroBegin)
- break;
- if (DoNotRelocateSet.count(&I))
- continue;
- I.moveBefore(InsertPt);
+ CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
+ }
+}
+
+namespace {
+ class PrettyStackTraceFunction : public PrettyStackTraceEntry {
+ Function &F;
+ public:
+ PrettyStackTraceFunction(Function &F) : F(F) {}
+ void print(raw_ostream &OS) const override {
+ OS << "While splitting coroutine ";
+ F.printAsOperand(OS, /*print type*/ false, F.getParent());
+ OS << "\n";
+ }
+ };
+}
+
+static void splitCoroutine(Function &F, coro::Shape &Shape,
+ SmallVectorImpl<Function *> &Clones) {
+ switch (Shape.ABI) {
+ case coro::ABI::Switch:
+ return splitSwitchCoroutine(F, Shape, Clones);
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce:
+ return splitRetconCoroutine(F, Shape, Clones);
}
+ llvm_unreachable("bad ABI kind");
}
static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
- EliminateUnreachableBlocks(F);
+ PrettyStackTraceFunction prettyStackTrace(F);
+
+ // The suspend-crossing algorithm in buildCoroutineFrame get tripped
+ // up by uses in unreachable blocks, so remove them as a first pass.
+ removeUnreachableBlocks(F);
coro::Shape Shape(F);
if (!Shape.CoroBegin)
return;
simplifySuspendPoints(Shape);
- relocateInstructionBefore(Shape.CoroBegin, F);
buildCoroutineFrame(F, Shape);
replaceFrameSize(Shape);
+ SmallVector<Function*, 4> Clones;
+
// If there are no suspend points, no split required, just remove
// the allocation and deallocation blocks, they are not needed.
if (Shape.CoroSuspends.empty()) {
- handleNoSuspendCoroutine(Shape.CoroBegin, Shape.FrameTy);
- removeCoroEnds(Shape);
- postSplitCleanup(F);
- coro::updateCallGraph(F, {}, CG, SCC);
- return;
+ handleNoSuspendCoroutine(Shape);
+ } else {
+ splitCoroutine(F, Shape, Clones);
}
- auto *ResumeEntry = createResumeEntryBlock(F, Shape);
- auto ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0);
- auto DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1);
- auto CleanupClone = createClone(F, ".cleanup", Shape, ResumeEntry, 2);
-
- // We no longer need coro.end in F.
- removeCoroEnds(Shape);
+ // Replace all the swifterror operations in the original function.
+ // This invalidates SwiftErrorOps in the Shape.
+ replaceSwiftErrorOps(F, Shape, nullptr);
+ removeCoroEnds(Shape, &CG);
postSplitCleanup(F);
- postSplitCleanup(*ResumeClone);
- postSplitCleanup(*DestroyClone);
- postSplitCleanup(*CleanupClone);
-
- addMustTailToCoroResumes(*ResumeClone);
-
- // Store addresses resume/destroy/cleanup functions in the coroutine frame.
- updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
-
- // Create a constant array referring to resume/destroy/clone functions pointed
- // by the last argument of @llvm.coro.info, so that CoroElide pass can
- // determined correct function to call.
- setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone});
// Update call graph and add the functions we created to the SCC.
- coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC);
+ coro::updateCallGraph(F, Clones, CG, SCC);
}
// When we see the coroutine the first time, we insert an indirect call to a
@@ -881,6 +1432,80 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
SCC.initialize(Nodes);
}
+/// Replace a call to llvm.coro.prepare.retcon.
+static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
+ auto CastFn = Prepare->getArgOperand(0); // as an i8*
+ auto Fn = CastFn->stripPointerCasts(); // as its original type
+
+ // Find call graph nodes for the preparation.
+ CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr;
+ if (auto ConcreteFn = dyn_cast<Function>(Fn)) {
+ PrepareUserNode = CG[Prepare->getFunction()];
+ FnNode = CG[ConcreteFn];
+ }
+
+ // Attempt to peephole this pattern:
+ // %0 = bitcast [[TYPE]] @some_function to i8*
+ // %1 = call @llvm.coro.prepare.retcon(i8* %0)
+ // %2 = bitcast %1 to [[TYPE]]
+ // ==>
+ // %2 = @some_function
+ for (auto UI = Prepare->use_begin(), UE = Prepare->use_end();
+ UI != UE; ) {
+ // Look for bitcasts back to the original function type.
+ auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser());
+ if (!Cast || Cast->getType() != Fn->getType()) continue;
+
+ // Check whether the replacement will introduce new direct calls.
+ // If so, we'll need to update the call graph.
+ if (PrepareUserNode) {
+ for (auto &Use : Cast->uses()) {
+ if (auto *CB = dyn_cast<CallBase>(Use.getUser())) {
+ if (!CB->isCallee(&Use))
+ continue;
+ PrepareUserNode->removeCallEdgeFor(*CB);
+ PrepareUserNode->addCalledFunction(CB, FnNode);
+ }
+ }
+ }
+
+ // Replace and remove the cast.
+ Cast->replaceAllUsesWith(Fn);
+ Cast->eraseFromParent();
+ }
+
+ // Replace any remaining uses with the function as an i8*.
+ // This can never directly be a callee, so we don't need to update CG.
+ Prepare->replaceAllUsesWith(CastFn);
+ Prepare->eraseFromParent();
+
+ // Kill dead bitcasts.
+ while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
+ if (!Cast->use_empty()) break;
+ CastFn = Cast->getOperand(0);
+ Cast->eraseFromParent();
+ }
+}
+
+/// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent
+/// IPO from operating on calls to a retcon coroutine before it's been
+/// split. This is only safe to do after we've split all retcon
+/// coroutines in the module. We can do that this in this pass because
+/// this pass does promise to split all retcon coroutines (as opposed to
+/// switch coroutines, which are lowered in multiple stages).
+static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
+ bool Changed = false;
+ for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end();
+ PI != PE; ) {
+ // Intrinsics can only be used in calls.
+ auto *Prepare = cast<CallInst>((PI++)->getUser());
+ replacePrepare(Prepare, CG);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
//===----------------------------------------------------------------------===//
// Top Level Driver
//===----------------------------------------------------------------------===//
@@ -899,7 +1524,9 @@ struct CoroSplit : public CallGraphSCCPass {
// A coroutine is identified by the presence of coro.begin intrinsic, if
// we don't have any, this pass has nothing to do.
bool doInitialization(CallGraph &CG) override {
- Run = coro::declaresIntrinsics(CG.getModule(), {"llvm.coro.begin"});
+ Run = coro::declaresIntrinsics(CG.getModule(),
+ {"llvm.coro.begin",
+ "llvm.coro.prepare.retcon"});
return CallGraphSCCPass::doInitialization(CG);
}
@@ -907,6 +1534,12 @@ struct CoroSplit : public CallGraphSCCPass {
if (!Run)
return false;
+ // Check for uses of llvm.coro.prepare.retcon.
+ auto PrepareFn =
+ SCC.getCallGraph().getModule().getFunction("llvm.coro.prepare.retcon");
+ if (PrepareFn && PrepareFn->use_empty())
+ PrepareFn = nullptr;
+
// Find coroutines for processing.
SmallVector<Function *, 4> Coroutines;
for (CallGraphNode *CGN : SCC)
@@ -914,12 +1547,17 @@ struct CoroSplit : public CallGraphSCCPass {
if (F->hasFnAttribute(CORO_PRESPLIT_ATTR))
Coroutines.push_back(F);
- if (Coroutines.empty())
+ if (Coroutines.empty() && !PrepareFn)
return false;
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+
+ if (Coroutines.empty())
+ return replaceAllPrepares(PrepareFn, CG);
+
createDevirtTriggerFunc(CG, SCC);
+ // Split all the coroutines.
for (Function *F : Coroutines) {
Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR);
StringRef Value = Attr.getValueAsString();
@@ -932,6 +1570,10 @@ struct CoroSplit : public CallGraphSCCPass {
F->removeFnAttr(CORO_PRESPLIT_ATTR);
splitCoroutine(*F, CG, SCC);
}
+
+ if (PrepareFn)
+ replaceAllPrepares(PrepareFn, CG);
+
return true;
}
diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp
index a581d1d21169..f39483b27518 100644
--- a/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/lib/Transforms/Coroutines/Coroutines.cpp
@@ -123,12 +123,26 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
static bool isCoroutineIntrinsicName(StringRef Name) {
// NOTE: Must be sorted!
static const char *const CoroIntrinsics[] = {
- "llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.destroy",
- "llvm.coro.done", "llvm.coro.end", "llvm.coro.frame",
- "llvm.coro.free", "llvm.coro.id", "llvm.coro.noop",
- "llvm.coro.param", "llvm.coro.promise", "llvm.coro.resume",
- "llvm.coro.save", "llvm.coro.size", "llvm.coro.subfn.addr",
+ "llvm.coro.alloc",
+ "llvm.coro.begin",
+ "llvm.coro.destroy",
+ "llvm.coro.done",
+ "llvm.coro.end",
+ "llvm.coro.frame",
+ "llvm.coro.free",
+ "llvm.coro.id",
+ "llvm.coro.id.retcon",
+ "llvm.coro.id.retcon.once",
+ "llvm.coro.noop",
+ "llvm.coro.param",
+ "llvm.coro.prepare.retcon",
+ "llvm.coro.promise",
+ "llvm.coro.resume",
+ "llvm.coro.save",
+ "llvm.coro.size",
+ "llvm.coro.subfn.addr",
"llvm.coro.suspend",
+ "llvm.coro.suspend.retcon",
};
return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1;
}
@@ -217,9 +231,6 @@ static void clear(coro::Shape &Shape) {
Shape.FrameTy = nullptr;
Shape.FramePtr = nullptr;
Shape.AllocaSpillBlock = nullptr;
- Shape.ResumeSwitch = nullptr;
- Shape.PromiseAlloca = nullptr;
- Shape.HasFinalSuspend = false;
}
static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin,
@@ -235,6 +246,7 @@ static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin,
// Collect "interesting" coroutine intrinsics.
void coro::Shape::buildFrom(Function &F) {
+ bool HasFinalSuspend = false;
size_t FinalSuspendIndex = 0;
clear(*this);
SmallVector<CoroFrameInst *, 8> CoroFrames;
@@ -257,9 +269,15 @@ void coro::Shape::buildFrom(Function &F) {
if (II->use_empty())
UnusedCoroSaves.push_back(cast<CoroSaveInst>(II));
break;
- case Intrinsic::coro_suspend:
- CoroSuspends.push_back(cast<CoroSuspendInst>(II));
- if (CoroSuspends.back()->isFinal()) {
+ case Intrinsic::coro_suspend_retcon: {
+ auto Suspend = cast<CoroSuspendRetconInst>(II);
+ CoroSuspends.push_back(Suspend);
+ break;
+ }
+ case Intrinsic::coro_suspend: {
+ auto Suspend = cast<CoroSuspendInst>(II);
+ CoroSuspends.push_back(Suspend);
+ if (Suspend->isFinal()) {
if (HasFinalSuspend)
report_fatal_error(
"Only one suspend point can be marked as final");
@@ -267,18 +285,23 @@ void coro::Shape::buildFrom(Function &F) {
FinalSuspendIndex = CoroSuspends.size() - 1;
}
break;
+ }
case Intrinsic::coro_begin: {
auto CB = cast<CoroBeginInst>(II);
- if (CB->getId()->getInfo().isPreSplit()) {
- if (CoroBegin)
- report_fatal_error(
+
+ // Ignore coro id's that aren't pre-split.
+ auto Id = dyn_cast<CoroIdInst>(CB->getId());
+ if (Id && !Id->getInfo().isPreSplit())
+ break;
+
+ if (CoroBegin)
+ report_fatal_error(
"coroutine should have exactly one defining @llvm.coro.begin");
- CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
- CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- CB->removeAttribute(AttributeList::FunctionIndex,
- Attribute::NoDuplicate);
- CoroBegin = CB;
- }
+ CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ CB->removeAttribute(AttributeList::FunctionIndex,
+ Attribute::NoDuplicate);
+ CoroBegin = CB;
break;
}
case Intrinsic::coro_end:
@@ -310,7 +333,7 @@ void coro::Shape::buildFrom(Function &F) {
// Replace all coro.suspend with undef and remove related coro.saves if
// present.
- for (CoroSuspendInst *CS : CoroSuspends) {
+ for (AnyCoroSuspendInst *CS : CoroSuspends) {
CS->replaceAllUsesWith(UndefValue::get(CS->getType()));
CS->eraseFromParent();
if (auto *CoroSave = CS->getCoroSave())
@@ -324,19 +347,136 @@ void coro::Shape::buildFrom(Function &F) {
return;
}
+ auto Id = CoroBegin->getId();
+ switch (auto IdIntrinsic = Id->getIntrinsicID()) {
+ case Intrinsic::coro_id: {
+ auto SwitchId = cast<CoroIdInst>(Id);
+ this->ABI = coro::ABI::Switch;
+ this->SwitchLowering.HasFinalSuspend = HasFinalSuspend;
+ this->SwitchLowering.ResumeSwitch = nullptr;
+ this->SwitchLowering.PromiseAlloca = SwitchId->getPromise();
+ this->SwitchLowering.ResumeEntryBlock = nullptr;
+
+ for (auto AnySuspend : CoroSuspends) {
+ auto Suspend = dyn_cast<CoroSuspendInst>(AnySuspend);
+ if (!Suspend) {
+#ifndef NDEBUG
+ AnySuspend->dump();
+#endif
+ report_fatal_error("coro.id must be paired with coro.suspend");
+ }
+
+ if (!Suspend->getCoroSave())
+ createCoroSave(CoroBegin, Suspend);
+ }
+ break;
+ }
+
+ case Intrinsic::coro_id_retcon:
+ case Intrinsic::coro_id_retcon_once: {
+ auto ContinuationId = cast<AnyCoroIdRetconInst>(Id);
+ ContinuationId->checkWellFormed();
+ this->ABI = (IdIntrinsic == Intrinsic::coro_id_retcon
+ ? coro::ABI::Retcon
+ : coro::ABI::RetconOnce);
+ auto Prototype = ContinuationId->getPrototype();
+ this->RetconLowering.ResumePrototype = Prototype;
+ this->RetconLowering.Alloc = ContinuationId->getAllocFunction();
+ this->RetconLowering.Dealloc = ContinuationId->getDeallocFunction();
+ this->RetconLowering.ReturnBlock = nullptr;
+ this->RetconLowering.IsFrameInlineInStorage = false;
+
+ // Determine the result value types, and make sure they match up with
+ // the values passed to the suspends.
+ auto ResultTys = getRetconResultTypes();
+ auto ResumeTys = getRetconResumeTypes();
+
+ for (auto AnySuspend : CoroSuspends) {
+ auto Suspend = dyn_cast<CoroSuspendRetconInst>(AnySuspend);
+ if (!Suspend) {
+#ifndef NDEBUG
+ AnySuspend->dump();
+#endif
+ report_fatal_error("coro.id.retcon.* must be paired with "
+ "coro.suspend.retcon");
+ }
+
+ // Check that the argument types of the suspend match the results.
+ auto SI = Suspend->value_begin(), SE = Suspend->value_end();
+ auto RI = ResultTys.begin(), RE = ResultTys.end();
+ for (; SI != SE && RI != RE; ++SI, ++RI) {
+ auto SrcTy = (*SI)->getType();
+ if (SrcTy != *RI) {
+ // The optimizer likes to eliminate bitcasts leading into variadic
+ // calls, but that messes with our invariants. Re-insert the
+ // bitcast and ignore this type mismatch.
+ if (CastInst::isBitCastable(SrcTy, *RI)) {
+ auto BCI = new BitCastInst(*SI, *RI, "", Suspend);
+ SI->set(BCI);
+ continue;
+ }
+
+#ifndef NDEBUG
+ Suspend->dump();
+ Prototype->getFunctionType()->dump();
+#endif
+ report_fatal_error("argument to coro.suspend.retcon does not "
+ "match corresponding prototype function result");
+ }
+ }
+ if (SI != SE || RI != RE) {
+#ifndef NDEBUG
+ Suspend->dump();
+ Prototype->getFunctionType()->dump();
+#endif
+ report_fatal_error("wrong number of arguments to coro.suspend.retcon");
+ }
+
+ // Check that the result type of the suspend matches the resume types.
+ Type *SResultTy = Suspend->getType();
+ ArrayRef<Type*> SuspendResultTys;
+ if (SResultTy->isVoidTy()) {
+ // leave as empty array
+ } else if (auto SResultStructTy = dyn_cast<StructType>(SResultTy)) {
+ SuspendResultTys = SResultStructTy->elements();
+ } else {
+ // forms an ArrayRef using SResultTy, be careful
+ SuspendResultTys = SResultTy;
+ }
+ if (SuspendResultTys.size() != ResumeTys.size()) {
+#ifndef NDEBUG
+ Suspend->dump();
+ Prototype->getFunctionType()->dump();
+#endif
+ report_fatal_error("wrong number of results from coro.suspend.retcon");
+ }
+ for (size_t I = 0, E = ResumeTys.size(); I != E; ++I) {
+ if (SuspendResultTys[I] != ResumeTys[I]) {
+#ifndef NDEBUG
+ Suspend->dump();
+ Prototype->getFunctionType()->dump();
+#endif
+ report_fatal_error("result from coro.suspend.retcon does not "
+ "match corresponding prototype function param");
+ }
+ }
+ }
+ break;
+ }
+
+ default:
+ llvm_unreachable("coro.begin is not dependent on a coro.id call");
+ }
+
// The coro.free intrinsic is always lowered to the result of coro.begin.
for (CoroFrameInst *CF : CoroFrames) {
CF->replaceAllUsesWith(CoroBegin);
CF->eraseFromParent();
}
- // Canonicalize coro.suspend by inserting a coro.save if needed.
- for (CoroSuspendInst *CS : CoroSuspends)
- if (!CS->getCoroSave())
- createCoroSave(CoroBegin, CS);
-
// Move final suspend to be the last element in the CoroSuspends vector.
- if (HasFinalSuspend &&
+ if (ABI == coro::ABI::Switch &&
+ SwitchLowering.HasFinalSuspend &&
FinalSuspendIndex != CoroSuspends.size() - 1)
std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back());
@@ -345,6 +485,154 @@ void coro::Shape::buildFrom(Function &F) {
CoroSave->eraseFromParent();
}
+static void propagateCallAttrsFromCallee(CallInst *Call, Function *Callee) {
+ Call->setCallingConv(Callee->getCallingConv());
+ // TODO: attributes?
+}
+
+static void addCallToCallGraph(CallGraph *CG, CallInst *Call, Function *Callee){
+ if (CG)
+ (*CG)[Call->getFunction()]->addCalledFunction(Call, (*CG)[Callee]);
+}
+
+Value *coro::Shape::emitAlloc(IRBuilder<> &Builder, Value *Size,
+ CallGraph *CG) const {
+ switch (ABI) {
+ case coro::ABI::Switch:
+ llvm_unreachable("can't allocate memory in coro switch-lowering");
+
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce: {
+ auto Alloc = RetconLowering.Alloc;
+ Size = Builder.CreateIntCast(Size,
+ Alloc->getFunctionType()->getParamType(0),
+ /*is signed*/ false);
+ auto *Call = Builder.CreateCall(Alloc, Size);
+ propagateCallAttrsFromCallee(Call, Alloc);
+ addCallToCallGraph(CG, Call, Alloc);
+ return Call;
+ }
+ }
+ llvm_unreachable("Unknown coro::ABI enum");
+}
+
+void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr,
+ CallGraph *CG) const {
+ switch (ABI) {
+ case coro::ABI::Switch:
+ llvm_unreachable("can't allocate memory in coro switch-lowering");
+
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce: {
+ auto Dealloc = RetconLowering.Dealloc;
+ Ptr = Builder.CreateBitCast(Ptr,
+ Dealloc->getFunctionType()->getParamType(0));
+ auto *Call = Builder.CreateCall(Dealloc, Ptr);
+ propagateCallAttrsFromCallee(Call, Dealloc);
+ addCallToCallGraph(CG, Call, Dealloc);
+ return;
+ }
+ }
+ llvm_unreachable("Unknown coro::ABI enum");
+}
+
+LLVM_ATTRIBUTE_NORETURN
+static void fail(const Instruction *I, const char *Reason, Value *V) {
+#ifndef NDEBUG
+ I->dump();
+ if (V) {
+ errs() << " Value: ";
+ V->printAsOperand(llvm::errs());
+ errs() << '\n';
+ }
+#endif
+ report_fatal_error(Reason);
+}
+
+/// Check that the given value is a well-formed prototype for the
+/// llvm.coro.id.retcon.* intrinsics.
+static void checkWFRetconPrototype(const AnyCoroIdRetconInst *I, Value *V) {
+ auto F = dyn_cast<Function>(V->stripPointerCasts());
+ if (!F)
+ fail(I, "llvm.coro.id.retcon.* prototype not a Function", V);
+
+ auto FT = F->getFunctionType();
+
+ if (isa<CoroIdRetconInst>(I)) {
+ bool ResultOkay;
+ if (FT->getReturnType()->isPointerTy()) {
+ ResultOkay = true;
+ } else if (auto SRetTy = dyn_cast<StructType>(FT->getReturnType())) {
+ ResultOkay = (!SRetTy->isOpaque() &&
+ SRetTy->getNumElements() > 0 &&
+ SRetTy->getElementType(0)->isPointerTy());
+ } else {
+ ResultOkay = false;
+ }
+ if (!ResultOkay)
+ fail(I, "llvm.coro.id.retcon prototype must return pointer as first "
+ "result", F);
+
+ if (FT->getReturnType() !=
+ I->getFunction()->getFunctionType()->getReturnType())
+ fail(I, "llvm.coro.id.retcon prototype return type must be same as"
+ "current function return type", F);
+ } else {
+ // No meaningful validation to do here for llvm.coro.id.unique.once.
+ }
+
+ if (FT->getNumParams() == 0 || !FT->getParamType(0)->isPointerTy())
+ fail(I, "llvm.coro.id.retcon.* prototype must take pointer as "
+ "its first parameter", F);
+}
+
+/// Check that the given value is a well-formed allocator.
+static void checkWFAlloc(const Instruction *I, Value *V) {
+ auto F = dyn_cast<Function>(V->stripPointerCasts());
+ if (!F)
+ fail(I, "llvm.coro.* allocator not a Function", V);
+
+ auto FT = F->getFunctionType();
+ if (!FT->getReturnType()->isPointerTy())
+ fail(I, "llvm.coro.* allocator must return a pointer", F);
+
+ if (FT->getNumParams() != 1 ||
+ !FT->getParamType(0)->isIntegerTy())
+ fail(I, "llvm.coro.* allocator must take integer as only param", F);
+}
+
+/// Check that the given value is a well-formed deallocator.
+static void checkWFDealloc(const Instruction *I, Value *V) {
+ auto F = dyn_cast<Function>(V->stripPointerCasts());
+ if (!F)
+ fail(I, "llvm.coro.* deallocator not a Function", V);
+
+ auto FT = F->getFunctionType();
+ if (!FT->getReturnType()->isVoidTy())
+ fail(I, "llvm.coro.* deallocator must return void", F);
+
+ if (FT->getNumParams() != 1 ||
+ !FT->getParamType(0)->isPointerTy())
+ fail(I, "llvm.coro.* deallocator must take pointer as only param", F);
+}
+
+static void checkConstantInt(const Instruction *I, Value *V,
+ const char *Reason) {
+ if (!isa<ConstantInt>(V)) {
+ fail(I, Reason, V);
+ }
+}
+
+void AnyCoroIdRetconInst::checkWellFormed() const {
+ checkConstantInt(this, getArgOperand(SizeArg),
+ "size argument to coro.id.retcon.* must be constant");
+ checkConstantInt(this, getArgOperand(AlignArg),
+ "alignment argument to coro.id.retcon.* must be constant");
+ checkWFRetconPrototype(this, getArgOperand(PrototypeArg));
+ checkWFAlloc(this, getArgOperand(AllocArg));
+ checkWFDealloc(this, getArgOperand(DeallocArg));
+}
+
void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCoroEarlyPass());
}
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 95a9f31cced3..dd9f74a881ee 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -304,7 +304,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// of the previous load.
LoadInst *newLoad =
IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
- newLoad->setAlignment(OrigLoad->getAlignment());
+ newLoad->setAlignment(MaybeAlign(OrigLoad->getAlignment()));
// Transfer the AA info too.
AAMDNodes AAInfo;
OrigLoad->getAAMetadata(AAInfo);
diff --git a/lib/Transforms/IPO/Attributor.cpp b/lib/Transforms/IPO/Attributor.cpp
index 2a52c6b9b4ad..95f47345d8fd 100644
--- a/lib/Transforms/IPO/Attributor.cpp
+++ b/lib/Transforms/IPO/Attributor.cpp
@@ -16,11 +16,15 @@
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -30,6 +34,9 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+
#include <cassert>
using namespace llvm;
@@ -46,19 +53,50 @@ STATISTIC(NumAttributesValidFixpoint,
"Number of abstract attributes in a valid fixpoint state");
STATISTIC(NumAttributesManifested,
"Number of abstract attributes manifested in IR");
-STATISTIC(NumFnNoUnwind, "Number of functions marked nounwind");
-
-STATISTIC(NumFnUniqueReturned, "Number of function with unique return");
-STATISTIC(NumFnKnownReturns, "Number of function with known return values");
-STATISTIC(NumFnArgumentReturned,
- "Number of function arguments marked returned");
-STATISTIC(NumFnNoSync, "Number of functions marked nosync");
-STATISTIC(NumFnNoFree, "Number of functions marked nofree");
-STATISTIC(NumFnReturnedNonNull,
- "Number of function return values marked nonnull");
-STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull");
-STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull");
-STATISTIC(NumFnWillReturn, "Number of functions marked willreturn");
+
+// Some helper macros to deal with statistics tracking.
+//
+// Usage:
+// For simple IR attribute tracking overload trackStatistics in the abstract
+// attribute and choose the right STATS_DECLTRACK_********* macro,
+// e.g.,:
+// void trackStatistics() const override {
+// STATS_DECLTRACK_ARG_ATTR(returned)
+// }
+// If there is a single "increment" side one can use the macro
+// STATS_DECLTRACK with a custom message. If there are multiple increment
+// sides, STATS_DECL and STATS_TRACK can also be used separatly.
+//
+#define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \
+ ("Number of " #TYPE " marked '" #NAME "'")
+#define BUILD_STAT_NAME(NAME, TYPE) NumIR##TYPE##_##NAME
+#define STATS_DECL_(NAME, MSG) STATISTIC(NAME, MSG);
+#define STATS_DECL(NAME, TYPE, MSG) \
+ STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG);
+#define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE));
+#define STATS_DECLTRACK(NAME, TYPE, MSG) \
+ { \
+ STATS_DECL(NAME, TYPE, MSG) \
+ STATS_TRACK(NAME, TYPE) \
+ }
+#define STATS_DECLTRACK_ARG_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME))
+#define STATS_DECLTRACK_CSARG_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, CSArguments, \
+ BUILD_STAT_MSG_IR_ATTR(call site arguments, NAME))
+#define STATS_DECLTRACK_FN_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, Function, BUILD_STAT_MSG_IR_ATTR(functions, NAME))
+#define STATS_DECLTRACK_CS_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, CS, BUILD_STAT_MSG_IR_ATTR(call site, NAME))
+#define STATS_DECLTRACK_FNRET_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, FunctionReturn, \
+ BUILD_STAT_MSG_IR_ATTR(function returns, NAME))
+#define STATS_DECLTRACK_CSRET_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, CSReturn, \
+ BUILD_STAT_MSG_IR_ATTR(call site returns, NAME))
+#define STATS_DECLTRACK_FLOATING_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, Floating, \
+ ("Number of floating values known to be '" #NAME "'"))
// TODO: Determine a good default value.
//
@@ -72,18 +110,32 @@ static cl::opt<unsigned>
MaxFixpointIterations("attributor-max-iterations", cl::Hidden,
cl::desc("Maximal number of fixpoint iterations."),
cl::init(32));
+static cl::opt<bool> VerifyMaxFixpointIterations(
+ "attributor-max-iterations-verify", cl::Hidden,
+ cl::desc("Verify that max-iterations is a tight bound for a fixpoint"),
+ cl::init(false));
static cl::opt<bool> DisableAttributor(
"attributor-disable", cl::Hidden,
cl::desc("Disable the attributor inter-procedural deduction pass."),
cl::init(true));
-static cl::opt<bool> VerifyAttributor(
- "attributor-verify", cl::Hidden,
- cl::desc("Verify the Attributor deduction and "
- "manifestation of attributes -- may issue false-positive errors"),
+static cl::opt<bool> ManifestInternal(
+ "attributor-manifest-internal", cl::Hidden,
+ cl::desc("Manifest Attributor internal string attributes."),
cl::init(false));
+static cl::opt<unsigned> DepRecInterval(
+ "attributor-dependence-recompute-interval", cl::Hidden,
+ cl::desc("Number of iterations until dependences are recomputed."),
+ cl::init(4));
+
+static cl::opt<bool> EnableHeapToStack("enable-heap-to-stack-conversion",
+ cl::init(true), cl::Hidden);
+
+static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128),
+ cl::Hidden);
+
/// Logic operators for the change status enum class.
///
///{
@@ -95,78 +147,30 @@ ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) {
}
///}
-/// Helper to adjust the statistics.
-static void bookkeeping(AbstractAttribute::ManifestPosition MP,
- const Attribute &Attr) {
- if (!AreStatisticsEnabled())
- return;
-
- if (!Attr.isEnumAttribute())
- return;
- switch (Attr.getKindAsEnum()) {
- case Attribute::NoUnwind:
- NumFnNoUnwind++;
- return;
- case Attribute::Returned:
- NumFnArgumentReturned++;
- return;
- case Attribute::NoSync:
- NumFnNoSync++;
- break;
- case Attribute::NoFree:
- NumFnNoFree++;
- break;
- case Attribute::NonNull:
- switch (MP) {
- case AbstractAttribute::MP_RETURNED:
- NumFnReturnedNonNull++;
- break;
- case AbstractAttribute::MP_ARGUMENT:
- NumFnArgumentNonNull++;
- break;
- case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
- NumCSArgumentNonNull++;
- break;
- default:
- break;
- }
- break;
- case Attribute::WillReturn:
- NumFnWillReturn++;
- break;
- default:
- return;
- }
-}
-
-template <typename StateTy>
-using followValueCB_t = std::function<bool(Value *, StateTy &State)>;
-template <typename StateTy>
-using visitValueCB_t = std::function<void(Value *, StateTy &State)>;
-
-/// Recursively visit all values that might become \p InitV at some point. This
+/// Recursively visit all values that might become \p IRP at some point. This
/// will be done by looking through cast instructions, selects, phis, and calls
-/// with the "returned" attribute. The callback \p FollowValueCB is asked before
-/// a potential origin value is looked at. If no \p FollowValueCB is passed, a
-/// default one is used that will make sure we visit every value only once. Once
-/// we cannot look through the value any further, the callback \p VisitValueCB
-/// is invoked and passed the current value and the \p State. To limit how much
-/// effort is invested, we will never visit more than \p MaxValues values.
-template <typename StateTy>
+/// with the "returned" attribute. Once we cannot look through the value any
+/// further, the callback \p VisitValueCB is invoked and passed the current
+/// value, the \p State, and a flag to indicate if we stripped anything. To
+/// limit how much effort is invested, we will never visit more values than
+/// specified by \p MaxValues.
+template <typename AAType, typename StateTy>
static bool genericValueTraversal(
- Value *InitV, StateTy &State, visitValueCB_t<StateTy> &VisitValueCB,
- followValueCB_t<StateTy> *FollowValueCB = nullptr, int MaxValues = 8) {
-
+ Attributor &A, IRPosition IRP, const AAType &QueryingAA, StateTy &State,
+ const function_ref<bool(Value &, StateTy &, bool)> &VisitValueCB,
+ int MaxValues = 8) {
+
+ const AAIsDead *LivenessAA = nullptr;
+ if (IRP.getAnchorScope())
+ LivenessAA = &A.getAAFor<AAIsDead>(
+ QueryingAA, IRPosition::function(*IRP.getAnchorScope()),
+ /* TrackDependence */ false);
+ bool AnyDead = false;
+
+ // TODO: Use Positions here to allow context sensitivity in VisitValueCB
SmallPtrSet<Value *, 16> Visited;
- followValueCB_t<bool> DefaultFollowValueCB = [&](Value *Val, bool &) {
- return Visited.insert(Val).second;
- };
-
- if (!FollowValueCB)
- FollowValueCB = &DefaultFollowValueCB;
-
SmallVector<Value *, 16> Worklist;
- Worklist.push_back(InitV);
+ Worklist.push_back(&IRP.getAssociatedValue());
int Iteration = 0;
do {
@@ -174,7 +178,7 @@ static bool genericValueTraversal(
// Check if we should process the current value. To prevent endless
// recursion keep a record of the values we followed!
- if (!(*FollowValueCB)(V, State))
+ if (!Visited.insert(V).second)
continue;
// Make sure we limit the compile time for complex expressions.
@@ -183,23 +187,23 @@ static bool genericValueTraversal(
// Explicitly look through calls with a "returned" attribute if we do
// not have a pointer as stripPointerCasts only works on them.
+ Value *NewV = nullptr;
if (V->getType()->isPointerTy()) {
- V = V->stripPointerCasts();
+ NewV = V->stripPointerCasts();
} else {
CallSite CS(V);
if (CS && CS.getCalledFunction()) {
- Value *NewV = nullptr;
for (Argument &Arg : CS.getCalledFunction()->args())
if (Arg.hasReturnedAttr()) {
NewV = CS.getArgOperand(Arg.getArgNo());
break;
}
- if (NewV) {
- Worklist.push_back(NewV);
- continue;
- }
}
}
+ if (NewV && NewV != V) {
+ Worklist.push_back(NewV);
+ continue;
+ }
// Look through select instructions, visit both potential values.
if (auto *SI = dyn_cast<SelectInst>(V)) {
@@ -208,35 +212,34 @@ static bool genericValueTraversal(
continue;
}
- // Look through phi nodes, visit all operands.
+ // Look through phi nodes, visit all live operands.
if (auto *PHI = dyn_cast<PHINode>(V)) {
- Worklist.append(PHI->op_begin(), PHI->op_end());
+ assert(LivenessAA &&
+ "Expected liveness in the presence of instructions!");
+ for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
+ const BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
+ if (LivenessAA->isAssumedDead(IncomingBB->getTerminator())) {
+ AnyDead = true;
+ continue;
+ }
+ Worklist.push_back(PHI->getIncomingValue(u));
+ }
continue;
}
// Once a leaf is reached we inform the user through the callback.
- VisitValueCB(V, State);
+ if (!VisitValueCB(*V, State, Iteration > 1))
+ return false;
} while (!Worklist.empty());
+ // If we actually used liveness information so we have to record a dependence.
+ if (AnyDead)
+ A.recordDependence(*LivenessAA, QueryingAA);
+
// All values have been visited.
return true;
}
-/// Helper to identify the correct offset into an attribute list.
-static unsigned getAttrIndex(AbstractAttribute::ManifestPosition MP,
- unsigned ArgNo = 0) {
- switch (MP) {
- case AbstractAttribute::MP_ARGUMENT:
- case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
- return ArgNo + AttributeList::FirstArgIndex;
- case AbstractAttribute::MP_FUNCTION:
- return AttributeList::FunctionIndex;
- case AbstractAttribute::MP_RETURNED:
- return AttributeList::ReturnIndex;
- }
- llvm_unreachable("Unknown manifest position!");
-}
-
/// Return true if \p New is equal or worse than \p Old.
static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
if (!Old.isIntAttribute())
@@ -247,12 +250,9 @@ static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
/// Return true if the information provided by \p Attr was added to the
/// attribute list \p Attrs. This is only the case if it was not already present
-/// in \p Attrs at the position describe by \p MP and \p ArgNo.
+/// in \p Attrs at the position describe by \p PK and \p AttrIdx.
static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
- AttributeList &Attrs,
- AbstractAttribute::ManifestPosition MP,
- unsigned ArgNo = 0) {
- unsigned AttrIdx = getAttrIndex(MP, ArgNo);
+ AttributeList &Attrs, int AttrIdx) {
if (Attr.isEnumAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
@@ -270,9 +270,47 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
return true;
}
+ if (Attr.isIntAttribute()) {
+ Attribute::AttrKind Kind = Attr.getKindAsEnum();
+ if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ return false;
+ Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind);
+ Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ return true;
+ }
llvm_unreachable("Expected enum or string attribute!");
}
+static const Value *getPointerOperand(const Instruction *I) {
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (!LI->isVolatile())
+ return LI->getPointerOperand();
+
+ if (auto *SI = dyn_cast<StoreInst>(I))
+ if (!SI->isVolatile())
+ return SI->getPointerOperand();
+
+ if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I))
+ if (!CXI->isVolatile())
+ return CXI->getPointerOperand();
+
+ if (auto *RMWI = dyn_cast<AtomicRMWInst>(I))
+ if (!RMWI->isVolatile())
+ return RMWI->getPointerOperand();
+
+ return nullptr;
+}
+static const Value *getBasePointerOfAccessPointerOperand(const Instruction *I,
+ int64_t &BytesOffset,
+ const DataLayout &DL) {
+ const Value *Ptr = getPointerOperand(I);
+ if (!Ptr)
+ return nullptr;
+
+ return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL,
+ /*AllowNonInbounds*/ false);
+}
ChangeStatus AbstractAttribute::update(Attributor &A) {
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
@@ -289,143 +327,527 @@ ChangeStatus AbstractAttribute::update(Attributor &A) {
return HasChanged;
}
-ChangeStatus AbstractAttribute::manifest(Attributor &A) {
- assert(getState().isValidState() &&
- "Attempted to manifest an invalid state!");
- assert(getAssociatedValue() &&
- "Attempted to manifest an attribute without associated value!");
-
- ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
- SmallVector<Attribute, 4> DeducedAttrs;
- getDeducedAttributes(DeducedAttrs);
-
- Function &ScopeFn = getAnchorScope();
- LLVMContext &Ctx = ScopeFn.getContext();
- ManifestPosition MP = getManifestPosition();
-
- AttributeList Attrs;
- SmallVector<unsigned, 4> ArgNos;
+ChangeStatus
+IRAttributeManifest::manifestAttrs(Attributor &A, IRPosition &IRP,
+ const ArrayRef<Attribute> &DeducedAttrs) {
+ Function *ScopeFn = IRP.getAssociatedFunction();
+ IRPosition::Kind PK = IRP.getPositionKind();
// In the following some generic code that will manifest attributes in
// DeducedAttrs if they improve the current IR. Due to the different
// annotation positions we use the underlying AttributeList interface.
- // Note that MP_CALL_SITE_ARGUMENT can annotate multiple locations.
- switch (MP) {
- case MP_ARGUMENT:
- ArgNos.push_back(cast<Argument>(getAssociatedValue())->getArgNo());
- Attrs = ScopeFn.getAttributes();
+ AttributeList Attrs;
+ switch (PK) {
+ case IRPosition::IRP_INVALID:
+ case IRPosition::IRP_FLOAT:
+ return ChangeStatus::UNCHANGED;
+ case IRPosition::IRP_ARGUMENT:
+ case IRPosition::IRP_FUNCTION:
+ case IRPosition::IRP_RETURNED:
+ Attrs = ScopeFn->getAttributes();
break;
- case MP_FUNCTION:
- case MP_RETURNED:
- ArgNos.push_back(0);
- Attrs = ScopeFn.getAttributes();
+ case IRPosition::IRP_CALL_SITE:
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ Attrs = ImmutableCallSite(&IRP.getAnchorValue()).getAttributes();
break;
- case MP_CALL_SITE_ARGUMENT: {
- CallSite CS(&getAnchoredValue());
- for (unsigned u = 0, e = CS.getNumArgOperands(); u != e; u++)
- if (CS.getArgOperand(u) == getAssociatedValue())
- ArgNos.push_back(u);
- Attrs = CS.getAttributes();
- }
}
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ LLVMContext &Ctx = IRP.getAnchorValue().getContext();
for (const Attribute &Attr : DeducedAttrs) {
- for (unsigned ArgNo : ArgNos) {
- if (!addIfNotExistent(Ctx, Attr, Attrs, MP, ArgNo))
- continue;
+ if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx()))
+ continue;
- HasChanged = ChangeStatus::CHANGED;
- bookkeeping(MP, Attr);
- }
+ HasChanged = ChangeStatus::CHANGED;
}
if (HasChanged == ChangeStatus::UNCHANGED)
return HasChanged;
- switch (MP) {
- case MP_ARGUMENT:
- case MP_FUNCTION:
- case MP_RETURNED:
- ScopeFn.setAttributes(Attrs);
+ switch (PK) {
+ case IRPosition::IRP_ARGUMENT:
+ case IRPosition::IRP_FUNCTION:
+ case IRPosition::IRP_RETURNED:
+ ScopeFn->setAttributes(Attrs);
+ break;
+ case IRPosition::IRP_CALL_SITE:
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ CallSite(&IRP.getAnchorValue()).setAttributes(Attrs);
+ break;
+ case IRPosition::IRP_INVALID:
+ case IRPosition::IRP_FLOAT:
break;
- case MP_CALL_SITE_ARGUMENT:
- CallSite(&getAnchoredValue()).setAttributes(Attrs);
}
return HasChanged;
}
-Function &AbstractAttribute::getAnchorScope() {
- Value &V = getAnchoredValue();
- if (isa<Function>(V))
- return cast<Function>(V);
- if (isa<Argument>(V))
- return *cast<Argument>(V).getParent();
- if (isa<Instruction>(V))
- return *cast<Instruction>(V).getFunction();
- llvm_unreachable("No scope for anchored value found!");
+const IRPosition IRPosition::EmptyKey(255);
+const IRPosition IRPosition::TombstoneKey(256);
+
+SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
+ IRPositions.emplace_back(IRP);
+
+ ImmutableCallSite ICS(&IRP.getAnchorValue());
+ switch (IRP.getPositionKind()) {
+ case IRPosition::IRP_INVALID:
+ case IRPosition::IRP_FLOAT:
+ case IRPosition::IRP_FUNCTION:
+ return;
+ case IRPosition::IRP_ARGUMENT:
+ case IRPosition::IRP_RETURNED:
+ IRPositions.emplace_back(
+ IRPosition::function(*IRP.getAssociatedFunction()));
+ return;
+ case IRPosition::IRP_CALL_SITE:
+ assert(ICS && "Expected call site!");
+ // TODO: We need to look at the operand bundles similar to the redirection
+ // in CallBase.
+ if (!ICS.hasOperandBundles())
+ if (const Function *Callee = ICS.getCalledFunction())
+ IRPositions.emplace_back(IRPosition::function(*Callee));
+ return;
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ assert(ICS && "Expected call site!");
+ // TODO: We need to look at the operand bundles similar to the redirection
+ // in CallBase.
+ if (!ICS.hasOperandBundles()) {
+ if (const Function *Callee = ICS.getCalledFunction()) {
+ IRPositions.emplace_back(IRPosition::returned(*Callee));
+ IRPositions.emplace_back(IRPosition::function(*Callee));
+ }
+ }
+ IRPositions.emplace_back(
+ IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction())));
+ return;
+ case IRPosition::IRP_CALL_SITE_ARGUMENT: {
+ int ArgNo = IRP.getArgNo();
+ assert(ICS && ArgNo >= 0 && "Expected call site!");
+ // TODO: We need to look at the operand bundles similar to the redirection
+ // in CallBase.
+ if (!ICS.hasOperandBundles()) {
+ const Function *Callee = ICS.getCalledFunction();
+ if (Callee && Callee->arg_size() > unsigned(ArgNo))
+ IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo)));
+ if (Callee)
+ IRPositions.emplace_back(IRPosition::function(*Callee));
+ }
+ IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue()));
+ return;
+ }
+ }
+}
+
+bool IRPosition::hasAttr(ArrayRef<Attribute::AttrKind> AKs,
+ bool IgnoreSubsumingPositions) const {
+ for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) {
+ for (Attribute::AttrKind AK : AKs)
+ if (EquivIRP.getAttr(AK).getKindAsEnum() == AK)
+ return true;
+ // The first position returned by the SubsumingPositionIterator is
+ // always the position itself. If we ignore subsuming positions we
+ // are done after the first iteration.
+ if (IgnoreSubsumingPositions)
+ break;
+ }
+ return false;
}
-const Function &AbstractAttribute::getAnchorScope() const {
- return const_cast<AbstractAttribute *>(this)->getAnchorScope();
+void IRPosition::getAttrs(ArrayRef<Attribute::AttrKind> AKs,
+ SmallVectorImpl<Attribute> &Attrs) const {
+ for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this))
+ for (Attribute::AttrKind AK : AKs) {
+ const Attribute &Attr = EquivIRP.getAttr(AK);
+ if (Attr.getKindAsEnum() == AK)
+ Attrs.push_back(Attr);
+ }
}
-/// -----------------------NoUnwind Function Attribute--------------------------
+void IRPosition::verify() {
+ switch (KindOrArgNo) {
+ default:
+ assert(KindOrArgNo >= 0 && "Expected argument or call site argument!");
+ assert((isa<CallBase>(AnchorVal) || isa<Argument>(AnchorVal)) &&
+ "Expected call base or argument for positive attribute index!");
+ if (isa<Argument>(AnchorVal)) {
+ assert(cast<Argument>(AnchorVal)->getArgNo() == unsigned(getArgNo()) &&
+ "Argument number mismatch!");
+ assert(cast<Argument>(AnchorVal) == &getAssociatedValue() &&
+ "Associated value mismatch!");
+ } else {
+ assert(cast<CallBase>(*AnchorVal).arg_size() > unsigned(getArgNo()) &&
+ "Call site argument number mismatch!");
+ assert(cast<CallBase>(*AnchorVal).getArgOperand(getArgNo()) ==
+ &getAssociatedValue() &&
+ "Associated value mismatch!");
+ }
+ break;
+ case IRP_INVALID:
+ assert(!AnchorVal && "Expected no value for an invalid position!");
+ break;
+ case IRP_FLOAT:
+ assert((!isa<CallBase>(&getAssociatedValue()) &&
+ !isa<Argument>(&getAssociatedValue())) &&
+ "Expected specialized kind for call base and argument values!");
+ break;
+ case IRP_RETURNED:
+ assert(isa<Function>(AnchorVal) &&
+ "Expected function for a 'returned' position!");
+ assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
+ break;
+ case IRP_CALL_SITE_RETURNED:
+ assert((isa<CallBase>(AnchorVal)) &&
+ "Expected call base for 'call site returned' position!");
+ assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
+ break;
+ case IRP_CALL_SITE:
+ assert((isa<CallBase>(AnchorVal)) &&
+ "Expected call base for 'call site function' position!");
+ assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
+ break;
+ case IRP_FUNCTION:
+ assert(isa<Function>(AnchorVal) &&
+ "Expected function for a 'function' position!");
+ assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
+ break;
+ }
+}
+
+namespace {
+/// Helper functions to clamp a state \p S of type \p StateType with the
+/// information in \p R and indicate/return if \p S did change (as-in update is
+/// required to be run again).
+///
+///{
+template <typename StateType>
+ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R);
+
+template <>
+ChangeStatus clampStateAndIndicateChange<IntegerState>(IntegerState &S,
+ const IntegerState &R) {
+ auto Assumed = S.getAssumed();
+ S ^= R;
+ return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+}
-struct AANoUnwindFunction : AANoUnwind, BooleanState {
+template <>
+ChangeStatus clampStateAndIndicateChange<BooleanState>(BooleanState &S,
+ const BooleanState &R) {
+ return clampStateAndIndicateChange<IntegerState>(S, R);
+}
+///}
- AANoUnwindFunction(Function &F, InformationCache &InfoCache)
- : AANoUnwind(F, InfoCache) {}
+/// Clamp the information known for all returned values of a function
+/// (identified by \p QueryingAA) into \p S.
+template <typename AAType, typename StateType = typename AAType::StateType>
+static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA,
+ StateType &S) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for "
+ << static_cast<const AbstractAttribute &>(QueryingAA)
+ << " into " << S << "\n");
+
+ assert((QueryingAA.getIRPosition().getPositionKind() ==
+ IRPosition::IRP_RETURNED ||
+ QueryingAA.getIRPosition().getPositionKind() ==
+ IRPosition::IRP_CALL_SITE_RETURNED) &&
+ "Can only clamp returned value states for a function returned or call "
+ "site returned position!");
+
+ // Use an optional state as there might not be any return values and we want
+ // to join (IntegerState::operator&) the state of all there are.
+ Optional<StateType> T;
+
+ // Callback for each possibly returned value.
+ auto CheckReturnValue = [&](Value &RV) -> bool {
+ const IRPosition &RVPos = IRPosition::value(RV);
+ const AAType &AA = A.getAAFor<AAType>(QueryingAA, RVPos);
+ LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr()
+ << " @ " << RVPos << "\n");
+ const StateType &AAS = static_cast<const StateType &>(AA.getState());
+ if (T.hasValue())
+ *T &= AAS;
+ else
+ T = AAS;
+ LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T
+ << "\n");
+ return T->isValidState();
+ };
- /// See AbstractAttribute::getState()
- /// {
- AbstractState &getState() override { return *this; }
- const AbstractState &getState() const override { return *this; }
- /// }
+ if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA))
+ S.indicatePessimisticFixpoint();
+ else if (T.hasValue())
+ S ^= *T;
+}
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+/// Helper class to compose two generic deduction
+template <typename AAType, typename Base, typename StateType,
+ template <typename...> class F, template <typename...> class G>
+struct AAComposeTwoGenericDeduction
+ : public F<AAType, G<AAType, Base, StateType>, StateType> {
+ AAComposeTwoGenericDeduction(const IRPosition &IRP)
+ : F<AAType, G<AAType, Base, StateType>, StateType>(IRP) {}
- const std::string getAsStr() const override {
- return getAssumed() ? "nounwind" : "may-unwind";
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus ChangedF = F<AAType, G<AAType, Base, StateType>, StateType>::updateImpl(A);
+ ChangeStatus ChangedG = G<AAType, Base, StateType>::updateImpl(A);
+ return ChangedF | ChangedG;
}
+};
+
+/// Helper class for generic deduction: return value -> returned position.
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+struct AAReturnedFromReturnedValues : public Base {
+ AAReturnedFromReturnedValues(const IRPosition &IRP) : Base(IRP) {}
/// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
+ ChangeStatus updateImpl(Attributor &A) override {
+ StateType S;
+ clampReturnedValueStates<AAType, StateType>(A, *this, S);
+ // TODO: If we know we visited all returned values, thus no are assumed
+ // dead, we can take the known information from the state T.
+ return clampStateAndIndicateChange<StateType>(this->getState(), S);
+ }
+};
- /// See AANoUnwind::isAssumedNoUnwind().
- bool isAssumedNoUnwind() const override { return getAssumed(); }
+/// Clamp the information known at all call sites for a given argument
+/// (identified by \p QueryingAA) into \p S.
+template <typename AAType, typename StateType = typename AAType::StateType>
+static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
+ StateType &S) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for "
+ << static_cast<const AbstractAttribute &>(QueryingAA)
+ << " into " << S << "\n");
+
+ assert(QueryingAA.getIRPosition().getPositionKind() ==
+ IRPosition::IRP_ARGUMENT &&
+ "Can only clamp call site argument states for an argument position!");
+
+ // Use an optional state as there might not be any return values and we want
+ // to join (IntegerState::operator&) the state of all there are.
+ Optional<StateType> T;
+
+ // The argument number which is also the call site argument number.
+ unsigned ArgNo = QueryingAA.getIRPosition().getArgNo();
+
+ auto CallSiteCheck = [&](AbstractCallSite ACS) {
+ const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
+ // Check if a coresponding argument was found or if it is on not associated
+ // (which can happen for callback calls).
+ if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
+ return false;
- /// See AANoUnwind::isKnownNoUnwind().
- bool isKnownNoUnwind() const override { return getKnown(); }
+ const AAType &AA = A.getAAFor<AAType>(QueryingAA, ACSArgPos);
+ LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction()
+ << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n");
+ const StateType &AAS = static_cast<const StateType &>(AA.getState());
+ if (T.hasValue())
+ *T &= AAS;
+ else
+ T = AAS;
+ LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T
+ << "\n");
+ return T->isValidState();
+ };
+
+ if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true))
+ S.indicatePessimisticFixpoint();
+ else if (T.hasValue())
+ S ^= *T;
+}
+
+/// Helper class for generic deduction: call site argument -> argument position.
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+struct AAArgumentFromCallSiteArguments : public Base {
+ AAArgumentFromCallSiteArguments(const IRPosition &IRP) : Base(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ StateType S;
+ clampCallSiteArgumentStates<AAType, StateType>(A, *this, S);
+ // TODO: If we know we visited all incoming values, thus no are assumed
+ // dead, we can take the known information from the state T.
+ return clampStateAndIndicateChange<StateType>(this->getState(), S);
+ }
};
-ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
+/// Helper class for generic replication: function returned -> cs returned.
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+struct AACallSiteReturnedFromReturned : public Base {
+ AACallSiteReturnedFromReturned(const IRPosition &IRP) : Base(IRP) {}
- // The map from instruction opcodes to those instructions in the function.
- auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
- auto Opcodes = {
- (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
- (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet,
- (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ assert(this->getIRPosition().getPositionKind() ==
+ IRPosition::IRP_CALL_SITE_RETURNED &&
+ "Can only wrap function returned positions for call site returned "
+ "positions!");
+ auto &S = this->getState();
+
+ const Function *AssociatedFunction =
+ this->getIRPosition().getAssociatedFunction();
+ if (!AssociatedFunction)
+ return S.indicatePessimisticFixpoint();
+
+ IRPosition FnPos = IRPosition::returned(*AssociatedFunction);
+ const AAType &AA = A.getAAFor<AAType>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ S, static_cast<const typename AAType::StateType &>(AA.getState()));
+ }
+};
- for (unsigned Opcode : Opcodes) {
- for (Instruction *I : OpcodeInstMap[Opcode]) {
- if (!I->mayThrow())
- continue;
+/// Helper class for generic deduction using must-be-executed-context
+/// Base class is required to have `followUse` method.
- auto *NoUnwindAA = A.getAAFor<AANoUnwind>(*this, *I);
+/// bool followUse(Attributor &A, const Use *U, const Instruction *I)
+/// U - Underlying use.
+/// I - The user of the \p U.
+/// `followUse` returns true if the value should be tracked transitively.
- if (!NoUnwindAA || !NoUnwindAA->isAssumedNoUnwind()) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+struct AAFromMustBeExecutedContext : public Base {
+ AAFromMustBeExecutedContext(const IRPosition &IRP) : Base(IRP) {}
+
+ void initialize(Attributor &A) override {
+ Base::initialize(A);
+ IRPosition &IRP = this->getIRPosition();
+ Instruction *CtxI = IRP.getCtxI();
+
+ if (!CtxI)
+ return;
+
+ for (const Use &U : IRP.getAssociatedValue().uses())
+ Uses.insert(&U);
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto BeforeState = this->getState();
+ auto &S = this->getState();
+ Instruction *CtxI = this->getIRPosition().getCtxI();
+ if (!CtxI)
+ return ChangeStatus::UNCHANGED;
+
+ MustBeExecutedContextExplorer &Explorer =
+ A.getInfoCache().getMustBeExecutedContextExplorer();
+
+ SetVector<const Use *> NextUses;
+
+ for (const Use *U : Uses) {
+ if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) {
+ auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
+ bool Found = EIt.count(UserI);
+ while (!Found && ++EIt != EEnd)
+ Found = EIt.getCurrentInst() == UserI;
+ if (Found && Base::followUse(A, U, UserI))
+ for (const Use &Us : UserI->uses())
+ NextUses.insert(&Us);
}
}
+ for (const Use *U : NextUses)
+ Uses.insert(U);
+
+ return BeforeState == S ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
}
- return ChangeStatus::UNCHANGED;
-}
+
+private:
+ /// Container for (transitive) uses of the associated value.
+ SetVector<const Use *> Uses;
+};
+
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+using AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext =
+ AAComposeTwoGenericDeduction<AAType, Base, StateType,
+ AAFromMustBeExecutedContext,
+ AAArgumentFromCallSiteArguments>;
+
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+using AACallSiteReturnedFromReturnedAndMustBeExecutedContext =
+ AAComposeTwoGenericDeduction<AAType, Base, StateType,
+ AAFromMustBeExecutedContext,
+ AACallSiteReturnedFromReturned>;
+
+/// -----------------------NoUnwind Function Attribute--------------------------
+
+struct AANoUnwindImpl : AANoUnwind {
+ AANoUnwindImpl(const IRPosition &IRP) : AANoUnwind(IRP) {}
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nounwind" : "may-unwind";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto Opcodes = {
+ (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet,
+ (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
+
+ auto CheckForNoUnwind = [&](Instruction &I) {
+ if (!I.mayThrow())
+ return true;
+
+ if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+ const auto &NoUnwindAA =
+ A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(ICS));
+ return NoUnwindAA.isAssumedNoUnwind();
+ }
+ return false;
+ };
+
+ if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+};
+
+struct AANoUnwindFunction final : public AANoUnwindImpl {
+ AANoUnwindFunction(const IRPosition &IRP) : AANoUnwindImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nounwind) }
+};
+
+/// NoUnwind attribute deduction for a call sites.
+struct AANoUnwindCallSite final : AANoUnwindImpl {
+ AANoUnwindCallSite(const IRPosition &IRP) : AANoUnwindImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoUnwindImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoUnwind::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); }
+};
/// --------------------- Function Return Values -------------------------------
@@ -434,68 +856,48 @@ ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) {
///
/// If there is a unique returned value R, the manifest method will:
/// - mark R with the "returned" attribute, if R is an argument.
-class AAReturnedValuesImpl final : public AAReturnedValues, AbstractState {
+class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState {
/// Mapping of values potentially returned by the associated function to the
/// return instructions that might return them.
- DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> ReturnedValues;
+ MapVector<Value *, SmallSetVector<ReturnInst *, 4>> ReturnedValues;
+
+ /// Mapping to remember the number of returned values for a call site such
+ /// that we can avoid updates if nothing changed.
+ DenseMap<const CallBase *, unsigned> NumReturnedValuesPerKnownAA;
+
+ /// Set of unresolved calls returned by the associated function.
+ SmallSetVector<CallBase *, 4> UnresolvedCalls;
/// State flags
///
///{
- bool IsFixed;
- bool IsValidState;
- bool HasOverdefinedReturnedCalls;
+ bool IsFixed = false;
+ bool IsValidState = true;
///}
- /// Collect values that could become \p V in the set \p Values, each mapped to
- /// \p ReturnInsts.
- void collectValuesRecursively(
- Attributor &A, Value *V, SmallPtrSetImpl<ReturnInst *> &ReturnInsts,
- DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> &Values) {
-
- visitValueCB_t<bool> VisitValueCB = [&](Value *Val, bool &) {
- assert(!isa<Instruction>(Val) ||
- &getAnchorScope() == cast<Instruction>(Val)->getFunction());
- Values[Val].insert(ReturnInsts.begin(), ReturnInsts.end());
- };
-
- bool UnusedBool;
- bool Success = genericValueTraversal(V, UnusedBool, VisitValueCB);
-
- // If we did abort the above traversal we haven't see all the values.
- // Consequently, we cannot know if the information we would derive is
- // accurate so we give up early.
- if (!Success)
- indicatePessimisticFixpoint();
- }
-
public:
- /// See AbstractAttribute::AbstractAttribute(...).
- AAReturnedValuesImpl(Function &F, InformationCache &InfoCache)
- : AAReturnedValues(F, InfoCache) {
- // We do not have an associated argument yet.
- AssociatedVal = nullptr;
- }
+ AAReturnedValuesImpl(const IRPosition &IRP) : AAReturnedValues(IRP) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// Reset the state.
- AssociatedVal = nullptr;
IsFixed = false;
IsValidState = true;
- HasOverdefinedReturnedCalls = false;
ReturnedValues.clear();
- Function &F = cast<Function>(getAnchoredValue());
+ Function *F = getAssociatedFunction();
+ if (!F) {
+ indicatePessimisticFixpoint();
+ return;
+ }
// The map from instruction opcodes to those instructions in the function.
- auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F);
// Look through all arguments, if one is marked as returned we are done.
- for (Argument &Arg : F.args()) {
+ for (Argument &Arg : F->args()) {
if (Arg.hasReturnedAttr()) {
-
auto &ReturnInstSet = ReturnedValues[&Arg];
for (Instruction *RI : OpcodeInstMap[Instruction::Ret])
ReturnInstSet.insert(cast<ReturnInst>(RI));
@@ -505,13 +907,8 @@ public:
}
}
- // If no argument was marked as returned we look at all return instructions
- // and collect potentially returned values.
- for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) {
- SmallPtrSet<ReturnInst *, 1> RISet({cast<ReturnInst>(RI)});
- collectValuesRecursively(A, cast<ReturnInst>(RI)->getReturnValue(), RISet,
- ReturnedValues);
- }
+ if (!F->hasExactDefinition())
+ indicatePessimisticFixpoint();
}
/// See AbstractAttribute::manifest(...).
@@ -523,25 +920,35 @@ public:
/// See AbstractAttribute::getState(...).
const AbstractState &getState() const override { return *this; }
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
-
/// See AbstractAttribute::updateImpl(Attributor &A).
ChangeStatus updateImpl(Attributor &A) override;
+ llvm::iterator_range<iterator> returned_values() override {
+ return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
+ }
+
+ llvm::iterator_range<const_iterator> returned_values() const override {
+ return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
+ }
+
+ const SmallSetVector<CallBase *, 4> &getUnresolvedCalls() const override {
+ return UnresolvedCalls;
+ }
+
/// Return the number of potential return values, -1 if unknown.
- size_t getNumReturnValues() const {
+ size_t getNumReturnValues() const override {
return isValidState() ? ReturnedValues.size() : -1;
}
/// Return an assumed unique return value if a single candidate is found. If
/// there cannot be one, return a nullptr. If it is not clear yet, return the
/// Optional::NoneType.
- Optional<Value *> getAssumedUniqueReturnValue() const;
+ Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;
- /// See AbstractState::checkForallReturnedValues(...).
- bool
- checkForallReturnedValues(std::function<bool(Value &)> &Pred) const override;
+ /// See AbstractState::checkForAllReturnedValues(...).
+ bool checkForAllReturnedValuesAndReturnInsts(
+ const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
+ &Pred) const override;
/// Pretty print the attribute similar to the IR representation.
const std::string getAsStr() const override;
@@ -553,13 +960,15 @@ public:
bool isValidState() const override { return IsValidState; }
/// See AbstractState::indicateOptimisticFixpoint(...).
- void indicateOptimisticFixpoint() override {
+ ChangeStatus indicateOptimisticFixpoint() override {
IsFixed = true;
- IsValidState &= true;
+ return ChangeStatus::UNCHANGED;
}
- void indicatePessimisticFixpoint() override {
+
+ ChangeStatus indicatePessimisticFixpoint() override {
IsFixed = true;
IsValidState = false;
+ return ChangeStatus::CHANGED;
}
};
@@ -568,21 +977,52 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
// Bookkeeping.
assert(isValidState());
- NumFnKnownReturns++;
+ STATS_DECLTRACK(KnownReturnValues, FunctionReturn,
+ "Number of function with known return values");
// Check if we have an assumed unique return value that we could manifest.
- Optional<Value *> UniqueRV = getAssumedUniqueReturnValue();
+ Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A);
if (!UniqueRV.hasValue() || !UniqueRV.getValue())
return Changed;
// Bookkeeping.
- NumFnUniqueReturned++;
+ STATS_DECLTRACK(UniqueReturnValue, FunctionReturn,
+ "Number of function with unique return");
+
+ // Callback to replace the uses of CB with the constant C.
+ auto ReplaceCallSiteUsersWith = [](CallBase &CB, Constant &C) {
+ if (CB.getNumUses() == 0 || CB.isMustTailCall())
+ return ChangeStatus::UNCHANGED;
+ CB.replaceAllUsesWith(&C);
+ return ChangeStatus::CHANGED;
+ };
// If the assumed unique return value is an argument, annotate it.
if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
- AssociatedVal = UniqueRVArg;
- Changed = AbstractAttribute::manifest(A) | Changed;
+ getIRPosition() = IRPosition::argument(*UniqueRVArg);
+ Changed = IRAttribute::manifest(A);
+ } else if (auto *RVC = dyn_cast<Constant>(UniqueRV.getValue())) {
+ // We can replace the returned value with the unique returned constant.
+ Value &AnchorValue = getAnchorValue();
+ if (Function *F = dyn_cast<Function>(&AnchorValue)) {
+ for (const Use &U : F->uses())
+ if (CallBase *CB = dyn_cast<CallBase>(U.getUser()))
+ if (CB->isCallee(&U)) {
+ Constant *RVCCast =
+ ConstantExpr::getTruncOrBitCast(RVC, CB->getType());
+ Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed;
+ }
+ } else {
+ assert(isa<CallBase>(AnchorValue) &&
+ "Expcected a function or call base anchor!");
+ Constant *RVCCast =
+ ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType());
+ Changed = ReplaceCallSiteUsersWith(cast<CallBase>(AnchorValue), *RVCCast);
+ }
+ if (Changed == ChangeStatus::CHANGED)
+ STATS_DECLTRACK(UniqueConstantReturnValue, FunctionReturn,
+ "Number of function returns replaced by constant return");
}
return Changed;
@@ -590,18 +1030,20 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
const std::string AAReturnedValuesImpl::getAsStr() const {
return (isAtFixpoint() ? "returns(#" : "may-return(#") +
- (isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")";
+ (isValidState() ? std::to_string(getNumReturnValues()) : "?") +
+ ")[#UC: " + std::to_string(UnresolvedCalls.size()) + "]";
}
-Optional<Value *> AAReturnedValuesImpl::getAssumedUniqueReturnValue() const {
- // If checkForallReturnedValues provides a unique value, ignoring potential
+Optional<Value *>
+AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const {
+ // If checkForAllReturnedValues provides a unique value, ignoring potential
// undef values that can also be present, it is assumed to be the actual
// return value and forwarded to the caller of this method. If there are
// multiple, a nullptr is returned indicating there cannot be a unique
// returned value.
Optional<Value *> UniqueRV;
- std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
+ auto Pred = [&](Value &RV) -> bool {
// If we found a second returned value and neither the current nor the saved
// one is an undef, there is no unique returned value. Undefs are special
// since we can pretend they have any value.
@@ -618,14 +1060,15 @@ Optional<Value *> AAReturnedValuesImpl::getAssumedUniqueReturnValue() const {
return true;
};
- if (!checkForallReturnedValues(Pred))
+ if (!A.checkForAllReturnedValues(Pred, *this))
UniqueRV = nullptr;
return UniqueRV;
}
-bool AAReturnedValuesImpl::checkForallReturnedValues(
- std::function<bool(Value &)> &Pred) const {
+bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts(
+ const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
+ &Pred) const {
if (!isValidState())
return false;
@@ -634,11 +1077,11 @@ bool AAReturnedValuesImpl::checkForallReturnedValues(
for (auto &It : ReturnedValues) {
Value *RV = It.first;
- ImmutableCallSite ICS(RV);
- if (ICS && !HasOverdefinedReturnedCalls)
+ CallBase *CB = dyn_cast<CallBase>(RV);
+ if (CB && !UnresolvedCalls.count(CB))
continue;
- if (!Pred(*RV))
+ if (!Pred(*RV, It.second))
return false;
}
@@ -646,125 +1089,196 @@ bool AAReturnedValuesImpl::checkForallReturnedValues(
}
ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
+ size_t NumUnresolvedCalls = UnresolvedCalls.size();
+ bool Changed = false;
+
+ // State used in the value traversals starting in returned values.
+ struct RVState {
+ // The map in which we collect return values -> return instrs.
+ decltype(ReturnedValues) &RetValsMap;
+ // The flag to indicate a change.
+ bool &Changed;
+ // The return instrs we come from.
+ SmallSetVector<ReturnInst *, 4> RetInsts;
+ };
- // Check if we know of any values returned by the associated function,
- // if not, we are done.
- if (getNumReturnValues() == 0) {
- indicateOptimisticFixpoint();
- return ChangeStatus::UNCHANGED;
- }
+ // Callback for a leaf value returned by the associated function.
+ auto VisitValueCB = [](Value &Val, RVState &RVS, bool) -> bool {
+ auto Size = RVS.RetValsMap[&Val].size();
+ RVS.RetValsMap[&Val].insert(RVS.RetInsts.begin(), RVS.RetInsts.end());
+ bool Inserted = RVS.RetValsMap[&Val].size() != Size;
+ RVS.Changed |= Inserted;
+ LLVM_DEBUG({
+ if (Inserted)
+ dbgs() << "[AAReturnedValues] 1 Add new returned value " << Val
+ << " => " << RVS.RetInsts.size() << "\n";
+ });
+ return true;
+ };
- // Check if any of the returned values is a call site we can refine.
- decltype(ReturnedValues) AddRVs;
- bool HasCallSite = false;
+ // Helper method to invoke the generic value traversal.
+ auto VisitReturnedValue = [&](Value &RV, RVState &RVS) {
+ IRPosition RetValPos = IRPosition::value(RV);
+ return genericValueTraversal<AAReturnedValues, RVState>(A, RetValPos, *this,
+ RVS, VisitValueCB);
+ };
- // Look at all returned call sites.
- for (auto &It : ReturnedValues) {
- SmallPtrSet<ReturnInst *, 2> &ReturnInsts = It.second;
- Value *RV = It.first;
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Potentially returned value " << *RV
- << "\n");
+ // Callback for all "return intructions" live in the associated function.
+ auto CheckReturnInst = [this, &VisitReturnedValue, &Changed](Instruction &I) {
+ ReturnInst &Ret = cast<ReturnInst>(I);
+ RVState RVS({ReturnedValues, Changed, {}});
+ RVS.RetInsts.insert(&Ret);
+ return VisitReturnedValue(*Ret.getReturnValue(), RVS);
+ };
- // Only call sites can change during an update, ignore the rest.
- CallSite RetCS(RV);
- if (!RetCS)
+ // Start by discovering returned values from all live returned instructions in
+ // the associated function.
+ if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}))
+ return indicatePessimisticFixpoint();
+
+ // Once returned values "directly" present in the code are handled we try to
+ // resolve returned calls.
+ decltype(ReturnedValues) NewRVsMap;
+ for (auto &It : ReturnedValues) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *It.first
+ << " by #" << It.second.size() << " RIs\n");
+ CallBase *CB = dyn_cast<CallBase>(It.first);
+ if (!CB || UnresolvedCalls.count(CB))
continue;
- // For now, any call site we see will prevent us from directly fixing the
- // state. However, if the information on the callees is fixed, the call
- // sites will be removed and we will fix the information for this state.
- HasCallSite = true;
-
- // Try to find a assumed unique return value for the called function.
- auto *RetCSAA = A.getAAFor<AAReturnedValuesImpl>(*this, *RV);
- if (!RetCSAA) {
- HasOverdefinedReturnedCalls = true;
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site (" << *RV
- << ") with " << (RetCSAA ? "invalid" : "no")
- << " associated state\n");
+ if (!CB->getCalledFunction()) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB
+ << "\n");
+ UnresolvedCalls.insert(CB);
continue;
}
- // Try to find a assumed unique return value for the called function.
- Optional<Value *> AssumedUniqueRV = RetCSAA->getAssumedUniqueReturnValue();
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const auto &RetValAA = A.getAAFor<AAReturnedValues>(
+ *this, IRPosition::function(*CB->getCalledFunction()));
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: "
+ << static_cast<const AbstractAttribute &>(RetValAA)
+ << "\n");
- // If no assumed unique return value was found due to the lack of
- // candidates, we may need to resolve more calls (through more update
- // iterations) or the called function will not return. Either way, we simply
- // stick with the call sites as return values. Because there were not
- // multiple possibilities, we do not treat it as overdefined.
- if (!AssumedUniqueRV.hasValue())
+ // Skip dead ends, thus if we do not know anything about the returned
+ // call we mark it as unresolved and it will stay that way.
+ if (!RetValAA.getState().isValidState()) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB
+ << "\n");
+ UnresolvedCalls.insert(CB);
continue;
+ }
- // If multiple, non-refinable values were found, there cannot be a unique
- // return value for the called function. The returned call is overdefined!
- if (!AssumedUniqueRV.getValue()) {
- HasOverdefinedReturnedCalls = true;
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site has multiple "
- "potentially returned values\n");
+ // Do not try to learn partial information. If the callee has unresolved
+ // return values we will treat the call as unresolved/opaque.
+ auto &RetValAAUnresolvedCalls = RetValAA.getUnresolvedCalls();
+ if (!RetValAAUnresolvedCalls.empty()) {
+ UnresolvedCalls.insert(CB);
continue;
}
- LLVM_DEBUG({
- bool UniqueRVIsKnown = RetCSAA->isAtFixpoint();
- dbgs() << "[AAReturnedValues] Returned call site "
- << (UniqueRVIsKnown ? "known" : "assumed")
- << " unique return value: " << *AssumedUniqueRV << "\n";
- });
+ // Now check if we can track transitively returned values. If possible, thus
+ // if all return value can be represented in the current scope, do so.
+ bool Unresolved = false;
+ for (auto &RetValAAIt : RetValAA.returned_values()) {
+ Value *RetVal = RetValAAIt.first;
+ if (isa<Argument>(RetVal) || isa<CallBase>(RetVal) ||
+ isa<Constant>(RetVal))
+ continue;
+ // Anything that did not fit in the above categories cannot be resolved,
+ // mark the call as unresolved.
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] transitively returned value "
+ "cannot be translated: "
+ << *RetVal << "\n");
+ UnresolvedCalls.insert(CB);
+ Unresolved = true;
+ break;
+ }
- // The assumed unique return value.
- Value *AssumedRetVal = AssumedUniqueRV.getValue();
-
- // If the assumed unique return value is an argument, lookup the matching
- // call site operand and recursively collect new returned values.
- // If it is not an argument, it is just put into the set of returned values
- // as we would have already looked through casts, phis, and similar values.
- if (Argument *AssumedRetArg = dyn_cast<Argument>(AssumedRetVal))
- collectValuesRecursively(A,
- RetCS.getArgOperand(AssumedRetArg->getArgNo()),
- ReturnInsts, AddRVs);
- else
- AddRVs[AssumedRetVal].insert(ReturnInsts.begin(), ReturnInsts.end());
- }
+ if (Unresolved)
+ continue;
- // Keep track of any change to trigger updates on dependent attributes.
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ // Now track transitively returned values.
+ unsigned &NumRetAA = NumReturnedValuesPerKnownAA[CB];
+ if (NumRetAA == RetValAA.getNumReturnValues()) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Skip call as it has not "
+ "changed since it was seen last\n");
+ continue;
+ }
+ NumRetAA = RetValAA.getNumReturnValues();
+
+ for (auto &RetValAAIt : RetValAA.returned_values()) {
+ Value *RetVal = RetValAAIt.first;
+ if (Argument *Arg = dyn_cast<Argument>(RetVal)) {
+ // Arguments are mapped to call site operands and we begin the traversal
+ // again.
+ bool Unused = false;
+ RVState RVS({NewRVsMap, Unused, RetValAAIt.second});
+ VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS);
+ continue;
+ } else if (isa<CallBase>(RetVal)) {
+ // Call sites are resolved by the callee attribute over time, no need to
+ // do anything for us.
+ continue;
+ } else if (isa<Constant>(RetVal)) {
+ // Constants are valid everywhere, we can simply take them.
+ NewRVsMap[RetVal].insert(It.second.begin(), It.second.end());
+ continue;
+ }
+ }
+ }
- for (auto &It : AddRVs) {
+ // To avoid modifications to the ReturnedValues map while we iterate over it
+ // we kept record of potential new entries in a copy map, NewRVsMap.
+ for (auto &It : NewRVsMap) {
assert(!It.second.empty() && "Entry does not add anything.");
auto &ReturnInsts = ReturnedValues[It.first];
for (ReturnInst *RI : It.second)
- if (ReturnInsts.insert(RI).second) {
+ if (ReturnInsts.insert(RI)) {
LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value "
<< *It.first << " => " << *RI << "\n");
- Changed = ChangeStatus::CHANGED;
+ Changed = true;
}
}
- // If there is no call site in the returned values we are done.
- if (!HasCallSite) {
- indicateOptimisticFixpoint();
- return ChangeStatus::CHANGED;
- }
-
- return Changed;
+ Changed |= (NumUnresolvedCalls != UnresolvedCalls.size());
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
-/// ------------------------ NoSync Function Attribute -------------------------
+struct AAReturnedValuesFunction final : public AAReturnedValuesImpl {
+ AAReturnedValuesFunction(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {}
-struct AANoSyncFunction : AANoSync, BooleanState {
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) }
+};
- AANoSyncFunction(Function &F, InformationCache &InfoCache)
- : AANoSync(F, InfoCache) {}
+/// Returned values information for a call sites.
+struct AAReturnedValuesCallSite final : AAReturnedValuesImpl {
+ AAReturnedValuesCallSite(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {}
- /// See AbstractAttribute::getState()
- /// {
- AbstractState &getState() override { return *this; }
- const AbstractState &getState() const override { return *this; }
- /// }
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites instead of
+ // redirecting requests to the callee.
+ llvm_unreachable("Abstract attributes for returned values are not "
+ "supported for call sites yet!");
+ }
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// ------------------------ NoSync Function Attribute -------------------------
+
+struct AANoSyncImpl : AANoSync {
+ AANoSyncImpl(const IRPosition &IRP) : AANoSync(IRP) {}
const std::string getAsStr() const override {
return getAssumed() ? "nosync" : "may-sync";
@@ -773,12 +1287,6 @@ struct AANoSyncFunction : AANoSync, BooleanState {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
- /// See AANoSync::isAssumedNoSync()
- bool isAssumedNoSync() const override { return getAssumed(); }
-
- /// See AANoSync::isKnownNoSync()
- bool isKnownNoSync() const override { return getKnown(); }
-
/// Helper function used to determine whether an instruction is non-relaxed
/// atomic. In other words, if an atomic instruction does not have unordered
/// or monotonic ordering
@@ -792,7 +1300,7 @@ struct AANoSyncFunction : AANoSync, BooleanState {
static bool isNoSyncIntrinsic(Instruction *I);
};
-bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) {
+bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) {
if (!I->isAtomic())
return false;
@@ -841,7 +1349,7 @@ bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) {
/// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics.
/// FIXME: We should ipmrove the handling of intrinsics.
-bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) {
+bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
/// Element wise atomic memory intrinsics are can only be unordered,
@@ -863,7 +1371,7 @@ bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) {
return false;
}
-bool AANoSyncFunction::isVolatile(Instruction *I) {
+bool AANoSyncImpl::isVolatile(Instruction *I) {
assert(!ImmutableCallSite(I) && !isa<CallBase>(I) &&
"Calls should not be checked here");
@@ -881,482 +1389,3074 @@ bool AANoSyncFunction::isVolatile(Instruction *I) {
}
}
-ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
+ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
- /// We are looking for volatile instructions or Non-Relaxed atomics.
- /// FIXME: We should ipmrove the handling of intrinsics.
- for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(F)) {
- ImmutableCallSite ICS(I);
- auto *NoSyncAA = A.getAAFor<AANoSyncFunction>(*this, *I);
+ auto CheckRWInstForNoSync = [&](Instruction &I) {
+ /// We are looking for volatile instructions or Non-Relaxed atomics.
+ /// FIXME: We should ipmrove the handling of intrinsics.
- if (isa<IntrinsicInst>(I) && isNoSyncIntrinsic(I))
- continue;
+ if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I))
+ return true;
+
+ if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+ if (ICS.hasFnAttr(Attribute::NoSync))
+ return true;
+
+ const auto &NoSyncAA =
+ A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(ICS));
+ if (NoSyncAA.isAssumedNoSync())
+ return true;
+ return false;
+ }
+
+ if (!isVolatile(&I) && !isNonRelaxedAtomic(&I))
+ return true;
+
+ return false;
+ };
- if (ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) &&
- !ICS.hasFnAttr(Attribute::NoSync)) {
+ auto CheckForNoSync = [&](Instruction &I) {
+ // At this point we handled all read/write effects and they are all
+ // nosync, so they can be skipped.
+ if (I.mayReadOrWriteMemory())
+ return true;
+
+ // non-convergent and readnone imply nosync.
+ return !ImmutableCallSite(&I).isConvergent();
+ };
+
+ if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) ||
+ !A.checkForAllCallLikeInstructions(CheckForNoSync, *this))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+}
+
+struct AANoSyncFunction final : public AANoSyncImpl {
+ AANoSyncFunction(const IRPosition &IRP) : AANoSyncImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nosync) }
+};
+
+/// NoSync attribute deduction for a call sites.
+struct AANoSyncCallSite final : AANoSyncImpl {
+ AANoSyncCallSite(const IRPosition &IRP) : AANoSyncImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoSyncImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoSync::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); }
+};
+
+/// ------------------------ No-Free Attributes ----------------------------
+
+struct AANoFreeImpl : public AANoFree {
+ AANoFreeImpl(const IRPosition &IRP) : AANoFree(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto CheckForNoFree = [&](Instruction &I) {
+ ImmutableCallSite ICS(&I);
+ if (ICS.hasFnAttr(Attribute::NoFree))
+ return true;
+
+ const auto &NoFreeAA =
+ A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(ICS));
+ return NoFreeAA.isAssumedNoFree();
+ };
+
+ if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nofree" : "may-free";
+ }
+};
+
+struct AANoFreeFunction final : public AANoFreeImpl {
+ AANoFreeFunction(const IRPosition &IRP) : AANoFreeImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nofree) }
+};
+
+/// NoFree attribute deduction for a call sites.
+struct AANoFreeCallSite final : AANoFreeImpl {
+ AANoFreeCallSite(const IRPosition &IRP) : AANoFreeImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoFreeImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoFree::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); }
+};
+
+/// ------------------------ NonNull Argument Attribute ------------------------
+static int64_t getKnownNonNullAndDerefBytesForUse(
+ Attributor &A, AbstractAttribute &QueryingAA, Value &AssociatedValue,
+ const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) {
+ TrackUse = false;
+
+ const Value *UseV = U->get();
+ if (!UseV->getType()->isPointerTy())
+ return 0;
+
+ Type *PtrTy = UseV->getType();
+ const Function *F = I->getFunction();
+ bool NullPointerIsDefined =
+ F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true;
+ const DataLayout &DL = A.getInfoCache().getDL();
+ if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
+ if (ICS.isBundleOperand(U))
+ return 0;
+
+ if (ICS.isCallee(U)) {
+ IsNonNull |= !NullPointerIsDefined;
+ return 0;
}
- if (ICS)
- continue;
+ unsigned ArgNo = ICS.getArgumentNo(U);
+ IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
+ auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP);
+ IsNonNull |= DerefAA.isKnownNonNull();
+ return DerefAA.getKnownDereferenceableBytes();
+ }
- if (!isVolatile(I) && !isNonRelaxedAtomic(I))
- continue;
+ int64_t Offset;
+ if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) {
+ if (Base == &AssociatedValue && getPointerOperand(I) == UseV) {
+ int64_t DerefBytes =
+ Offset + (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType());
+
+ IsNonNull |= !NullPointerIsDefined;
+ return DerefBytes;
+ }
+ }
+ if (const Value *Base =
+ GetPointerBaseWithConstantOffset(UseV, Offset, DL,
+ /*AllowNonInbounds*/ false)) {
+ auto &DerefAA =
+ A.getAAFor<AADereferenceable>(QueryingAA, IRPosition::value(*Base));
+ IsNonNull |= (!NullPointerIsDefined && DerefAA.isKnownNonNull());
+ IsNonNull |= (!NullPointerIsDefined && (Offset != 0));
+ int64_t DerefBytes = DerefAA.getKnownDereferenceableBytes();
+ return std::max(int64_t(0), DerefBytes - Offset);
+ }
+
+ return 0;
+}
+
+struct AANonNullImpl : AANonNull {
+ AANonNullImpl(const IRPosition &IRP)
+ : AANonNull(IRP),
+ NullIsDefined(NullPointerIsDefined(
+ getAnchorScope(),
+ getAssociatedValue().getType()->getPointerAddressSpace())) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (!NullIsDefined &&
+ hasAttr({Attribute::NonNull, Attribute::Dereferenceable}))
+ indicateOptimisticFixpoint();
+ else
+ AANonNull::initialize(A);
+ }
+
+ /// See AAFromMustBeExecutedContext
+ bool followUse(Attributor &A, const Use *U, const Instruction *I) {
+ bool IsNonNull = false;
+ bool TrackUse = false;
+ getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I,
+ IsNonNull, TrackUse);
+ takeKnownMaximum(IsNonNull);
+ return TrackUse;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nonnull" : "may-null";
+ }
+
+ /// Flag to determine if the underlying value can be null and still allow
+ /// valid accesses.
+ const bool NullIsDefined;
+};
+
+/// NonNull attribute for a floating value.
+struct AANonNullFloating
+ : AAFromMustBeExecutedContext<AANonNull, AANonNullImpl> {
+ using Base = AAFromMustBeExecutedContext<AANonNull, AANonNullImpl>;
+ AANonNullFloating(const IRPosition &IRP) : Base(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Base::initialize(A);
+
+ if (isAtFixpoint())
+ return;
+
+ const IRPosition &IRP = getIRPosition();
+ const Value &V = IRP.getAssociatedValue();
+ const DataLayout &DL = A.getDataLayout();
+
+ // TODO: This context sensitive query should be removed once we can do
+ // context sensitive queries in the genericValueTraversal below.
+ if (isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, IRP.getCtxI(),
+ /* TODO: DT */ nullptr))
+ indicateOptimisticFixpoint();
+ }
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = Base::updateImpl(A);
+ if (isKnownNonNull())
+ return Change;
+
+ if (!NullIsDefined) {
+ const auto &DerefAA = A.getAAFor<AADereferenceable>(*this, getIRPosition());
+ if (DerefAA.getAssumedDereferenceableBytes())
+ return Change;
+ }
+
+ const DataLayout &DL = A.getDataLayout();
+
+ auto VisitValueCB = [&](Value &V, AAAlign::StateType &T,
+ bool Stripped) -> bool {
+ const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ if (!isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr,
+ /* CtxI */ getCtxI(),
+ /* TODO: DT */ nullptr))
+ T.indicatePessimisticFixpoint();
+ } else {
+ // Use abstract attribute information.
+ const AANonNull::StateType &NS =
+ static_cast<const AANonNull::StateType &>(AA.getState());
+ T ^= NS;
+ }
+ return T.isValidState();
+ };
+
+ StateType T;
+ if (!genericValueTraversal<AANonNull, StateType>(A, getIRPosition(), *this,
+ T, VisitValueCB))
+ return indicatePessimisticFixpoint();
+
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
+};
+
+/// NonNull attribute for function return value.
+struct AANonNullReturned final
+ : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl> {
+ AANonNullReturned(const IRPosition &IRP)
+ : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl>(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
+};
+
+/// NonNull attribute for function argument.
+struct AANonNullArgument final
+ : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull,
+ AANonNullImpl> {
+ AANonNullArgument(const IRPosition &IRP)
+ : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull,
+ AANonNullImpl>(
+ IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) }
+};
+
+struct AANonNullCallSiteArgument final : AANonNullFloating {
+ AANonNullCallSiteArgument(const IRPosition &IRP) : AANonNullFloating(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) }
+};
+
+/// NonNull attribute for a call site return position.
+struct AANonNullCallSiteReturned final
+ : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull,
+ AANonNullImpl> {
+ AANonNullCallSiteReturned(const IRPosition &IRP)
+ : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull,
+ AANonNullImpl>(
+ IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) }
+};
+
+/// ------------------------ No-Recurse Attributes ----------------------------
+
+struct AANoRecurseImpl : public AANoRecurse {
+ AANoRecurseImpl(const IRPosition &IRP) : AANoRecurse(IRP) {}
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ return getAssumed() ? "norecurse" : "may-recurse";
+ }
+};
+
+struct AANoRecurseFunction final : AANoRecurseImpl {
+ AANoRecurseFunction(const IRPosition &IRP) : AANoRecurseImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoRecurseImpl::initialize(A);
+ if (const Function *F = getAnchorScope())
+ if (A.getInfoCache().getSccSize(*F) == 1)
+ return;
indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
}
- auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
- auto Opcodes = {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
- (unsigned)Instruction::Call};
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
- for (unsigned Opcode : Opcodes) {
- for (Instruction *I : OpcodeInstMap[Opcode]) {
- // At this point we handled all read/write effects and they are all
- // nosync, so they can be skipped.
- if (I->mayReadOrWriteMemory())
- continue;
+ auto CheckForNoRecurse = [&](Instruction &I) {
+ ImmutableCallSite ICS(&I);
+ if (ICS.hasFnAttr(Attribute::NoRecurse))
+ return true;
- ImmutableCallSite ICS(I);
+ const auto &NoRecurseAA =
+ A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(ICS));
+ if (!NoRecurseAA.isAssumedNoRecurse())
+ return false;
- // non-convergent and readnone imply nosync.
- if (!ICS.isConvergent())
- continue;
+ // Recursion to the same function
+ if (ICS.getCalledFunction() == getAnchorScope())
+ return false;
+
+ return true;
+ };
+
+ if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(norecurse) }
+};
+
+/// NoRecurse attribute deduction for a call sites.
+struct AANoRecurseCallSite final : AANoRecurseImpl {
+ AANoRecurseCallSite(const IRPosition &IRP) : AANoRecurseImpl(IRP) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoRecurseImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoRecurse::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); }
+};
+
+/// ------------------------ Will-Return Attributes ----------------------------
+
+// Helper function that checks whether a function has any cycle.
+// TODO: Replace with more efficent code
+static bool containsCycle(Function &F) {
+ SmallPtrSet<BasicBlock *, 32> Visited;
+
+ // Traverse BB by dfs and check whether successor is already visited.
+ for (BasicBlock *BB : depth_first(&F)) {
+ Visited.insert(BB);
+ for (auto *SuccBB : successors(BB)) {
+ if (Visited.count(SuccBB))
+ return true;
}
}
+ return false;
+}
- return ChangeStatus::UNCHANGED;
+// Helper function that checks the function have a loop which might become an
+// endless loop
+// FIXME: Any cycle is regarded as endless loop for now.
+// We have to allow some patterns.
+static bool containsPossiblyEndlessLoop(Function *F) {
+ return !F || !F->hasExactDefinition() || containsCycle(*F);
}
-/// ------------------------ No-Free Attributes ----------------------------
+struct AAWillReturnImpl : public AAWillReturn {
+ AAWillReturnImpl(const IRPosition &IRP) : AAWillReturn(IRP) {}
-struct AANoFreeFunction : AbstractAttribute, BooleanState {
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAWillReturn::initialize(A);
- /// See AbstractAttribute::AbstractAttribute(...).
- AANoFreeFunction(Function &F, InformationCache &InfoCache)
- : AbstractAttribute(F, InfoCache) {}
+ Function *F = getAssociatedFunction();
+ if (containsPossiblyEndlessLoop(F))
+ indicatePessimisticFixpoint();
+ }
- /// See AbstractAttribute::getState()
- ///{
- AbstractState &getState() override { return *this; }
- const AbstractState &getState() const override { return *this; }
- ///}
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto CheckForWillReturn = [&](Instruction &I) {
+ IRPosition IPos = IRPosition::callsite_function(ImmutableCallSite(&I));
+ const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, IPos);
+ if (WillReturnAA.isKnownWillReturn())
+ return true;
+ if (!WillReturnAA.isAssumedWillReturn())
+ return false;
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(*this, IPos);
+ return NoRecurseAA.isAssumedNoRecurse();
+ };
+
+ if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ return getAssumed() ? "willreturn" : "may-noreturn";
+ }
+};
+
+struct AAWillReturnFunction final : AAWillReturnImpl {
+ AAWillReturnFunction(const IRPosition &IRP) : AAWillReturnImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(willreturn) }
+};
+
+/// WillReturn attribute deduction for a call sites.
+struct AAWillReturnCallSite final : AAWillReturnImpl {
+ AAWillReturnCallSite(const IRPosition &IRP) : AAWillReturnImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAWillReturnImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AAWillReturn::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); }
+};
+
+/// ------------------------ NoAlias Argument Attribute ------------------------
+
+struct AANoAliasImpl : AANoAlias {
+ AANoAliasImpl(const IRPosition &IRP) : AANoAlias(IRP) {}
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "noalias" : "may-alias";
+ }
+};
+
+/// NoAlias attribute for a floating value.
+struct AANoAliasFloating final : AANoAliasImpl {
+ AANoAliasFloating(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoAliasImpl::initialize(A);
+ Value &Val = getAssociatedValue();
+ if (isa<AllocaInst>(Val))
+ indicateOptimisticFixpoint();
+ if (isa<ConstantPointerNull>(Val) &&
+ Val.getType()->getPointerAddressSpace() == 0)
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Implement this.
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(noalias)
+ }
+};
+
+/// NoAlias attribute for an argument.
+struct AANoAliasArgument final
+ : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> {
+ AANoAliasArgument(const IRPosition &IRP)
+ : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) }
+};
+
+struct AANoAliasCallSiteArgument final : AANoAliasImpl {
+ AANoAliasCallSiteArgument(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // See callsite argument attribute and callee argument attribute.
+ ImmutableCallSite ICS(&getAnchorValue());
+ if (ICS.paramHasAttr(getArgNo(), Attribute::NoAlias))
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // We can deduce "noalias" if the following conditions hold.
+ // (i) Associated value is assumed to be noalias in the definition.
+ // (ii) Associated value is assumed to be no-capture in all the uses
+ // possibly executed before this callsite.
+ // (iii) There is no other pointer argument which could alias with the
+ // value.
+
+ const Value &V = getAssociatedValue();
+ const IRPosition IRP = IRPosition::value(V);
+
+ // (i) Check whether noalias holds in the definition.
+
+ auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP);
+
+ if (!NoAliasAA.isAssumedNoAlias())
+ return indicatePessimisticFixpoint();
+
+ LLVM_DEBUG(dbgs() << "[Attributor][AANoAliasCSArg] " << V
+ << " is assumed NoAlias in the definition\n");
+
+ // (ii) Check whether the value is captured in the scope using AANoCapture.
+ // FIXME: This is conservative though, it is better to look at CFG and
+ // check only uses possibly executed before this callsite.
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+ auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
+ if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ LLVM_DEBUG(
+ dbgs() << "[Attributor][AANoAliasCSArg] " << V
+ << " cannot be noalias as it is potentially captured\n");
+ return indicatePessimisticFixpoint();
+ }
+
+ // (iii) Check there is no other pointer argument which could alias with the
+ // value.
+ ImmutableCallSite ICS(&getAnchorValue());
+ for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) {
+ if (getArgNo() == (int)i)
+ continue;
+ const Value *ArgOp = ICS.getArgOperand(i);
+ if (!ArgOp->getType()->isPointerTy())
+ continue;
+
+ if (const Function *F = getAnchorScope()) {
+ if (AAResults *AAR = A.getInfoCache().getAAResultsForFunction(*F)) {
+ bool IsAliasing = AAR->isNoAlias(&getAssociatedValue(), ArgOp);
+ LLVM_DEBUG(dbgs()
+ << "[Attributor][NoAliasCSArg] Check alias between "
+ "callsite arguments "
+ << AAR->isNoAlias(&getAssociatedValue(), ArgOp) << " "
+ << getAssociatedValue() << " " << *ArgOp << " => "
+ << (IsAliasing ? "" : "no-") << "alias \n");
+
+ if (IsAliasing)
+ continue;
+ }
+ }
+ return indicatePessimisticFixpoint();
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noalias) }
+};
+
+/// NoAlias attribute for function return value.
+struct AANoAliasReturned final : AANoAliasImpl {
+ AANoAliasReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ virtual ChangeStatus updateImpl(Attributor &A) override {
+
+ auto CheckReturnValue = [&](Value &RV) -> bool {
+ if (Constant *C = dyn_cast<Constant>(&RV))
+ if (C->isNullValue() || isa<UndefValue>(C))
+ return true;
+
+ /// For now, we can only deduce noalias if we have call sites.
+ /// FIXME: add more support.
+ ImmutableCallSite ICS(&RV);
+ if (!ICS)
+ return false;
+
+ const IRPosition &RVPos = IRPosition::value(RV);
+ const auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, RVPos);
+ if (!NoAliasAA.isAssumedNoAlias())
+ return false;
+
+ const auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, RVPos);
+ return NoCaptureAA.isAssumedNoCaptureMaybeReturned();
+ };
+
+ if (!A.checkForAllReturnedValues(CheckReturnValue, *this))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noalias) }
+};
+
+/// NoAlias attribute deduction for a call site return value.
+struct AANoAliasCallSiteReturned final : AANoAliasImpl {
+ AANoAliasCallSiteReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoAliasImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::returned(*F);
+ auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoAlias::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); }
+};
+
+/// -------------------AAIsDead Function Attribute-----------------------
+
+struct AAIsDeadImpl : public AAIsDead {
+ AAIsDeadImpl(const IRPosition &IRP) : AAIsDead(IRP) {}
+
+ void initialize(Attributor &A) override {
+ const Function *F = getAssociatedFunction();
+ if (F && !F->isDeclaration())
+ exploreFromEntry(A, F);
+ }
+
+ void exploreFromEntry(Attributor &A, const Function *F) {
+ ToBeExploredPaths.insert(&(F->getEntryBlock().front()));
+
+ for (size_t i = 0; i < ToBeExploredPaths.size(); ++i)
+ if (const Instruction *NextNoReturnI =
+ findNextNoReturn(A, ToBeExploredPaths[i]))
+ NoReturnCalls.insert(NextNoReturnI);
+
+ // Mark the block live after we looked for no-return instructions.
+ assumeLive(A, F->getEntryBlock());
+ }
+
+ /// Find the next assumed noreturn instruction in the block of \p I starting
+ /// from, thus including, \p I.
+ ///
+ /// The caller is responsible to monitor the ToBeExploredPaths set as new
+ /// instructions discovered in other basic block will be placed in there.
+ ///
+ /// \returns The next assumed noreturn instructions in the block of \p I
+ /// starting from, thus including, \p I.
+ const Instruction *findNextNoReturn(Attributor &A, const Instruction *I);
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
- return getAssumed() ? "nofree" : "may-free";
+ return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" +
+ std::to_string(getAssociatedFunction()->size()) + "][#NRI " +
+ std::to_string(NoReturnCalls.size()) + "]";
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ assert(getState().isValidState() &&
+ "Attempted to manifest an invalid state!");
+
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ Function &F = *getAssociatedFunction();
+
+ if (AssumedLiveBlocks.empty()) {
+ A.deleteAfterManifest(F);
+ return ChangeStatus::CHANGED;
+ }
+
+ // Flag to determine if we can change an invoke to a call assuming the
+ // callee is nounwind. This is not possible if the personality of the
+ // function allows to catch asynchronous exceptions.
+ bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F);
+
+ for (const Instruction *NRC : NoReturnCalls) {
+ Instruction *I = const_cast<Instruction *>(NRC);
+ BasicBlock *BB = I->getParent();
+ Instruction *SplitPos = I->getNextNode();
+ // TODO: mark stuff before unreachable instructions as dead.
+
+ if (auto *II = dyn_cast<InvokeInst>(I)) {
+ // If we keep the invoke the split position is at the beginning of the
+ // normal desitination block (it invokes a noreturn function after all).
+ BasicBlock *NormalDestBB = II->getNormalDest();
+ SplitPos = &NormalDestBB->front();
+
+ /// Invoke is replaced with a call and unreachable is placed after it if
+ /// the callee is nounwind and noreturn. Otherwise, we keep the invoke
+ /// and only place an unreachable in the normal successor.
+ if (Invoke2CallAllowed) {
+ if (II->getCalledFunction()) {
+ const IRPosition &IPos = IRPosition::callsite_function(*II);
+ const auto &AANoUnw = A.getAAFor<AANoUnwind>(*this, IPos);
+ if (AANoUnw.isAssumedNoUnwind()) {
+ LLVM_DEBUG(dbgs()
+ << "[AAIsDead] Replace invoke with call inst\n");
+ // We do not need an invoke (II) but instead want a call followed
+ // by an unreachable. However, we do not remove II as other
+ // abstract attributes might have it cached as part of their
+ // results. Given that we modify the CFG anyway, we simply keep II
+ // around but in a new dead block. To avoid II being live through
+ // a different edge we have to ensure the block we place it in is
+ // only reached from the current block of II and then not reached
+ // at all when we insert the unreachable.
+ SplitBlockPredecessors(NormalDestBB, {BB}, ".i2c");
+ CallInst *CI = createCallMatchingInvoke(II);
+ CI->insertBefore(II);
+ CI->takeName(II);
+ II->replaceAllUsesWith(CI);
+ SplitPos = CI->getNextNode();
+ }
+ }
+ }
+
+ if (SplitPos == &NormalDestBB->front()) {
+ // If this is an invoke of a noreturn function the edge to the normal
+ // destination block is dead but not necessarily the block itself.
+ // TODO: We need to move to an edge based system during deduction and
+ // also manifest.
+ assert(!NormalDestBB->isLandingPad() &&
+ "Expected the normal destination not to be a landingpad!");
+ if (NormalDestBB->getUniquePredecessor() == BB) {
+ assumeLive(A, *NormalDestBB);
+ } else {
+ BasicBlock *SplitBB =
+ SplitBlockPredecessors(NormalDestBB, {BB}, ".dead");
+ // The split block is live even if it contains only an unreachable
+ // instruction at the end.
+ assumeLive(A, *SplitBB);
+ SplitPos = SplitBB->getTerminator();
+ HasChanged = ChangeStatus::CHANGED;
+ }
+ }
+ }
+
+ if (isa_and_nonnull<UnreachableInst>(SplitPos))
+ continue;
+
+ BB = SplitPos->getParent();
+ SplitBlock(BB, SplitPos);
+ changeToUnreachable(BB->getTerminator(), /* UseLLVMTrap */ false);
+ HasChanged = ChangeStatus::CHANGED;
+ }
+
+ for (BasicBlock &BB : F)
+ if (!AssumedLiveBlocks.count(&BB))
+ A.deleteAfterManifest(BB);
+
+ return HasChanged;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
- /// See AbstractAttribute::getAttrKind().
- Attribute::AttrKind getAttrKind() const override { return ID; }
+ /// See AAIsDead::isAssumedDead(BasicBlock *).
+ bool isAssumedDead(const BasicBlock *BB) const override {
+ assert(BB->getParent() == getAssociatedFunction() &&
+ "BB must be in the same anchor scope function.");
+
+ if (!getAssumed())
+ return false;
+ return !AssumedLiveBlocks.count(BB);
+ }
+
+ /// See AAIsDead::isKnownDead(BasicBlock *).
+ bool isKnownDead(const BasicBlock *BB) const override {
+ return getKnown() && isAssumedDead(BB);
+ }
+
+ /// See AAIsDead::isAssumed(Instruction *I).
+ bool isAssumedDead(const Instruction *I) const override {
+ assert(I->getParent()->getParent() == getAssociatedFunction() &&
+ "Instruction must be in the same anchor scope function.");
+
+ if (!getAssumed())
+ return false;
+
+ // If it is not in AssumedLiveBlocks then it for sure dead.
+ // Otherwise, it can still be after noreturn call in a live block.
+ if (!AssumedLiveBlocks.count(I->getParent()))
+ return true;
+
+ // If it is not after a noreturn call, than it is live.
+ return isAfterNoReturn(I);
+ }
+
+ /// See AAIsDead::isKnownDead(Instruction *I).
+ bool isKnownDead(const Instruction *I) const override {
+ return getKnown() && isAssumedDead(I);
+ }
+
+ /// Check if instruction is after noreturn call, in other words, assumed dead.
+ bool isAfterNoReturn(const Instruction *I) const;
- /// Return true if "nofree" is assumed.
- bool isAssumedNoFree() const { return getAssumed(); }
+ /// Determine if \p F might catch asynchronous exceptions.
+ static bool mayCatchAsynchronousExceptions(const Function &F) {
+ return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F);
+ }
+
+ /// Assume \p BB is (partially) live now and indicate to the Attributor \p A
+ /// that internal function called from \p BB should now be looked at.
+ void assumeLive(Attributor &A, const BasicBlock &BB) {
+ if (!AssumedLiveBlocks.insert(&BB).second)
+ return;
+
+ // We assume that all of BB is (probably) live now and if there are calls to
+ // internal functions we will assume that those are now live as well. This
+ // is a performance optimization for blocks with calls to a lot of internal
+ // functions. It can however cause dead functions to be treated as live.
+ for (const Instruction &I : BB)
+ if (ImmutableCallSite ICS = ImmutableCallSite(&I))
+ if (const Function *F = ICS.getCalledFunction())
+ if (F->hasLocalLinkage())
+ A.markLiveInternalFunction(*F);
+ }
- /// Return true if "nofree" is known.
- bool isKnownNoFree() const { return getKnown(); }
+ /// Collection of to be explored paths.
+ SmallSetVector<const Instruction *, 8> ToBeExploredPaths;
- /// The identifier used by the Attributor for this class of attributes.
- static constexpr Attribute::AttrKind ID = Attribute::NoFree;
+ /// Collection of all assumed live BasicBlocks.
+ DenseSet<const BasicBlock *> AssumedLiveBlocks;
+
+ /// Collection of calls with noreturn attribute, assumed or knwon.
+ SmallSetVector<const Instruction *, 4> NoReturnCalls;
};
-ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
+struct AAIsDeadFunction final : public AAIsDeadImpl {
+ AAIsDeadFunction(const IRPosition &IRP) : AAIsDeadImpl(IRP) {}
- // The map from instruction opcodes to those instructions in the function.
- auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECL(PartiallyDeadBlocks, Function,
+ "Number of basic blocks classified as partially dead");
+ BUILD_STAT_NAME(PartiallyDeadBlocks, Function) += NoReturnCalls.size();
+ }
+};
- for (unsigned Opcode :
- {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
- (unsigned)Instruction::Call}) {
- for (Instruction *I : OpcodeInstMap[Opcode]) {
+bool AAIsDeadImpl::isAfterNoReturn(const Instruction *I) const {
+ const Instruction *PrevI = I->getPrevNode();
+ while (PrevI) {
+ if (NoReturnCalls.count(PrevI))
+ return true;
+ PrevI = PrevI->getPrevNode();
+ }
+ return false;
+}
- auto ICS = ImmutableCallSite(I);
- auto *NoFreeAA = A.getAAFor<AANoFreeFunction>(*this, *I);
+const Instruction *AAIsDeadImpl::findNextNoReturn(Attributor &A,
+ const Instruction *I) {
+ const BasicBlock *BB = I->getParent();
+ const Function &F = *BB->getParent();
- if ((!NoFreeAA || !NoFreeAA->isAssumedNoFree()) &&
- !ICS.hasFnAttr(Attribute::NoFree)) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+ // Flag to determine if we can change an invoke to a call assuming the callee
+ // is nounwind. This is not possible if the personality of the function allows
+ // to catch asynchronous exceptions.
+ bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F);
+
+ // TODO: We should have a function that determines if an "edge" is dead.
+ // Edges could be from an instruction to the next or from a terminator
+ // to the successor. For now, we need to special case the unwind block
+ // of InvokeInst below.
+
+ while (I) {
+ ImmutableCallSite ICS(I);
+
+ if (ICS) {
+ const IRPosition &IPos = IRPosition::callsite_function(ICS);
+ // Regarless of the no-return property of an invoke instruction we only
+ // learn that the regular successor is not reachable through this
+ // instruction but the unwind block might still be.
+ if (auto *Invoke = dyn_cast<InvokeInst>(I)) {
+ // Use nounwind to justify the unwind block is dead as well.
+ const auto &AANoUnw = A.getAAFor<AANoUnwind>(*this, IPos);
+ if (!Invoke2CallAllowed || !AANoUnw.isAssumedNoUnwind()) {
+ assumeLive(A, *Invoke->getUnwindDest());
+ ToBeExploredPaths.insert(&Invoke->getUnwindDest()->front());
+ }
}
+
+ const auto &NoReturnAA = A.getAAFor<AANoReturn>(*this, IPos);
+ if (NoReturnAA.isAssumedNoReturn())
+ return I;
}
+
+ I = I->getNextNode();
}
- return ChangeStatus::UNCHANGED;
+
+ // get new paths (reachable blocks).
+ for (const BasicBlock *SuccBB : successors(BB)) {
+ assumeLive(A, *SuccBB);
+ ToBeExploredPaths.insert(&SuccBB->front());
+ }
+
+ // No noreturn instruction found.
+ return nullptr;
}
-/// ------------------------ NonNull Argument Attribute ------------------------
-struct AANonNullImpl : AANonNull, BooleanState {
+ChangeStatus AAIsDeadImpl::updateImpl(Attributor &A) {
+ ChangeStatus Status = ChangeStatus::UNCHANGED;
+
+ // Temporary collection to iterate over existing noreturn instructions. This
+ // will alow easier modification of NoReturnCalls collection
+ SmallVector<const Instruction *, 8> NoReturnChanged;
+
+ for (const Instruction *I : NoReturnCalls)
+ NoReturnChanged.push_back(I);
+
+ for (const Instruction *I : NoReturnChanged) {
+ size_t Size = ToBeExploredPaths.size();
+
+ const Instruction *NextNoReturnI = findNextNoReturn(A, I);
+ if (NextNoReturnI != I) {
+ Status = ChangeStatus::CHANGED;
+ NoReturnCalls.remove(I);
+ if (NextNoReturnI)
+ NoReturnCalls.insert(NextNoReturnI);
+ }
- AANonNullImpl(Value &V, InformationCache &InfoCache)
- : AANonNull(V, InfoCache) {}
+ // Explore new paths.
+ while (Size != ToBeExploredPaths.size()) {
+ Status = ChangeStatus::CHANGED;
+ if (const Instruction *NextNoReturnI =
+ findNextNoReturn(A, ToBeExploredPaths[Size++]))
+ NoReturnCalls.insert(NextNoReturnI);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "[AAIsDead] AssumedLiveBlocks: "
+ << AssumedLiveBlocks.size() << " Total number of blocks: "
+ << getAssociatedFunction()->size() << "\n");
- AANonNullImpl(Value *AssociatedVal, Value &AnchoredValue,
- InformationCache &InfoCache)
- : AANonNull(AssociatedVal, AnchoredValue, InfoCache) {}
+ // If we know everything is live there is no need to query for liveness.
+ if (NoReturnCalls.empty() &&
+ getAssociatedFunction()->size() == AssumedLiveBlocks.size()) {
+ // Indicating a pessimistic fixpoint will cause the state to be "invalid"
+ // which will cause the Attributor to not return the AAIsDead on request,
+ // which will prevent us from querying isAssumedDead().
+ indicatePessimisticFixpoint();
+ assert(!isValidState() && "Expected an invalid state!");
+ Status = ChangeStatus::CHANGED;
+ }
+
+ return Status;
+}
+
+/// Liveness information for a call sites.
+struct AAIsDeadCallSite final : AAIsDeadImpl {
+ AAIsDeadCallSite(const IRPosition &IRP) : AAIsDeadImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites instead of
+ // redirecting requests to the callee.
+ llvm_unreachable("Abstract attributes for liveness are not "
+ "supported for call sites yet!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// -------------------- Dereferenceable Argument Attribute --------------------
+
+template <>
+ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
+ const DerefState &R) {
+ ChangeStatus CS0 = clampStateAndIndicateChange<IntegerState>(
+ S.DerefBytesState, R.DerefBytesState);
+ ChangeStatus CS1 =
+ clampStateAndIndicateChange<IntegerState>(S.GlobalState, R.GlobalState);
+ return CS0 | CS1;
+}
+
+struct AADereferenceableImpl : AADereferenceable {
+ AADereferenceableImpl(const IRPosition &IRP) : AADereferenceable(IRP) {}
+ using StateType = DerefState;
+
+ void initialize(Attributor &A) override {
+ SmallVector<Attribute, 4> Attrs;
+ getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull},
+ Attrs);
+ for (const Attribute &Attr : Attrs)
+ takeKnownDerefBytesMaximum(Attr.getValueAsInt());
+
+ NonNullAA = &A.getAAFor<AANonNull>(*this, getIRPosition());
+
+ const IRPosition &IRP = this->getIRPosition();
+ bool IsFnInterface = IRP.isFnInterfaceKind();
+ const Function *FnScope = IRP.getAnchorScope();
+ if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition()))
+ indicatePessimisticFixpoint();
+ }
/// See AbstractAttribute::getState()
/// {
- AbstractState &getState() override { return *this; }
- const AbstractState &getState() const override { return *this; }
+ StateType &getState() override { return *this; }
+ const StateType &getState() const override { return *this; }
/// }
+ /// See AAFromMustBeExecutedContext
+ bool followUse(Attributor &A, const Use *U, const Instruction *I) {
+ bool IsNonNull = false;
+ bool TrackUse = false;
+ int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse(
+ A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse);
+ takeKnownDerefBytesMaximum(DerefBytes);
+ return TrackUse;
+ }
+
+ void getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ // TODO: Add *_globally support
+ if (isAssumedNonNull())
+ Attrs.emplace_back(Attribute::getWithDereferenceableBytes(
+ Ctx, getAssumedDereferenceableBytes()));
+ else
+ Attrs.emplace_back(Attribute::getWithDereferenceableOrNullBytes(
+ Ctx, getAssumedDereferenceableBytes()));
+ }
+
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
- return getAssumed() ? "nonnull" : "may-null";
+ if (!getAssumedDereferenceableBytes())
+ return "unknown-dereferenceable";
+ return std::string("dereferenceable") +
+ (isAssumedNonNull() ? "" : "_or_null") +
+ (isAssumedGlobal() ? "_globally" : "") + "<" +
+ std::to_string(getKnownDereferenceableBytes()) + "-" +
+ std::to_string(getAssumedDereferenceableBytes()) + ">";
}
+};
+
+/// Dereferenceable attribute for a floating value.
+struct AADereferenceableFloating
+ : AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl> {
+ using Base =
+ AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl>;
+ AADereferenceableFloating(const IRPosition &IRP) : Base(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = Base::updateImpl(A);
+
+ const DataLayout &DL = A.getDataLayout();
+
+ auto VisitValueCB = [&](Value &V, DerefState &T, bool Stripped) -> bool {
+ unsigned IdxWidth =
+ DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace());
+ APInt Offset(IdxWidth, 0);
+ const Value *Base =
+ V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ const auto &AA =
+ A.getAAFor<AADereferenceable>(*this, IRPosition::value(*Base));
+ int64_t DerefBytes = 0;
+ if (!Stripped && this == &AA) {
+ // Use IR information if we did not strip anything.
+ // TODO: track globally.
+ bool CanBeNull;
+ DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull);
+ T.GlobalState.indicatePessimisticFixpoint();
+ } else {
+ const DerefState &DS = static_cast<const DerefState &>(AA.getState());
+ DerefBytes = DS.DerefBytesState.getAssumed();
+ T.GlobalState &= DS.GlobalState;
+ }
+
+ // For now we do not try to "increase" dereferenceability due to negative
+ // indices as we first have to come up with code to deal with loops and
+ // for overflows of the dereferenceable bytes.
+ int64_t OffsetSExt = Offset.getSExtValue();
+ if (OffsetSExt < 0)
+ OffsetSExt = 0;
+
+ T.takeAssumedDerefBytesMinimum(
+ std::max(int64_t(0), DerefBytes - OffsetSExt));
+
+ if (this == &AA) {
+ if (!Stripped) {
+ // If nothing was stripped IR information is all we got.
+ T.takeKnownDerefBytesMaximum(
+ std::max(int64_t(0), DerefBytes - OffsetSExt));
+ T.indicatePessimisticFixpoint();
+ } else if (OffsetSExt > 0) {
+ // If something was stripped but there is circular reasoning we look
+ // for the offset. If it is positive we basically decrease the
+ // dereferenceable bytes in a circluar loop now, which will simply
+ // drive them down to the known value in a very slow way which we
+ // can accelerate.
+ T.indicatePessimisticFixpoint();
+ }
+ }
+
+ return T.isValidState();
+ };
- /// See AANonNull::isAssumedNonNull().
- bool isAssumedNonNull() const override { return getAssumed(); }
+ DerefState T;
+ if (!genericValueTraversal<AADereferenceable, DerefState>(
+ A, getIRPosition(), *this, T, VisitValueCB))
+ return indicatePessimisticFixpoint();
- /// See AANonNull::isKnownNonNull().
- bool isKnownNonNull() const override { return getKnown(); }
+ return Change | clampStateAndIndicateChange(getState(), T);
+ }
- /// Generate a predicate that checks if a given value is assumed nonnull.
- /// The generated function returns true if a value satisfies any of
- /// following conditions.
- /// (i) A value is known nonZero(=nonnull).
- /// (ii) A value is associated with AANonNull and its isAssumedNonNull() is
- /// true.
- std::function<bool(Value &)> generatePredicate(Attributor &);
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(dereferenceable)
+ }
};
-std::function<bool(Value &)> AANonNullImpl::generatePredicate(Attributor &A) {
- // FIXME: The `AAReturnedValues` should provide the predicate with the
- // `ReturnInst` vector as well such that we can use the control flow sensitive
- // version of `isKnownNonZero`. This should fix `test11` in
- // `test/Transforms/FunctionAttrs/nonnull.ll`
+/// Dereferenceable attribute for a return value.
+struct AADereferenceableReturned final
+ : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl,
+ DerefState> {
+ AADereferenceableReturned(const IRPosition &IRP)
+ : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl,
+ DerefState>(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(dereferenceable)
+ }
+};
- std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
- if (isKnownNonZero(&RV, getAnchorScope().getParent()->getDataLayout()))
- return true;
+/// Dereferenceable attribute for an argument
+struct AADereferenceableArgument final
+ : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<
+ AADereferenceable, AADereferenceableImpl, DerefState> {
+ using Base = AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<
+ AADereferenceable, AADereferenceableImpl, DerefState>;
+ AADereferenceableArgument(const IRPosition &IRP) : Base(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(dereferenceable)
+ }
+};
- auto *NonNullAA = A.getAAFor<AANonNull>(*this, RV);
+/// Dereferenceable attribute for a call site argument.
+struct AADereferenceableCallSiteArgument final : AADereferenceableFloating {
+ AADereferenceableCallSiteArgument(const IRPosition &IRP)
+ : AADereferenceableFloating(IRP) {}
- ImmutableCallSite ICS(&RV);
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(dereferenceable)
+ }
+};
- if ((!NonNullAA || !NonNullAA->isAssumedNonNull()) &&
- (!ICS || !ICS.hasRetAttr(Attribute::NonNull)))
- return false;
+/// Dereferenceable attribute deduction for a call site return value.
+struct AADereferenceableCallSiteReturned final
+ : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<
+ AADereferenceable, AADereferenceableImpl> {
+ using Base = AACallSiteReturnedFromReturnedAndMustBeExecutedContext<
+ AADereferenceable, AADereferenceableImpl>;
+ AADereferenceableCallSiteReturned(const IRPosition &IRP) : Base(IRP) {}
- return true;
- };
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Base::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
- return Pred;
-}
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+
+ ChangeStatus Change = Base::updateImpl(A);
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::returned(*F);
+ auto &FnAA = A.getAAFor<AADereferenceable>(*this, FnPos);
+ return Change |
+ clampStateAndIndicateChange(
+ getState(), static_cast<const DerefState &>(FnAA.getState()));
+ }
-/// NonNull attribute for function return value.
-struct AANonNullReturned : AANonNullImpl {
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CS_ATTR(dereferenceable);
+ }
+};
- AANonNullReturned(Function &F, InformationCache &InfoCache)
- : AANonNullImpl(F, InfoCache) {}
+// ------------------------ Align Argument Attribute ------------------------
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_RETURNED; }
+struct AAAlignImpl : AAAlign {
+ AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {}
- /// See AbstractAttriubute::initialize(...).
+ // Max alignemnt value allowed in IR
+ static const unsigned MAX_ALIGN = 1U << 29;
+
+ /// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- Function &F = getAnchorScope();
+ takeAssumedMinimum(MAX_ALIGN);
- // Already nonnull.
- if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::NonNull))
- indicateOptimisticFixpoint();
+ SmallVector<Attribute, 4> Attrs;
+ getAttrs({Attribute::Alignment}, Attrs);
+ for (const Attribute &Attr : Attrs)
+ takeKnownMaximum(Attr.getValueAsInt());
+
+ if (getIRPosition().isFnInterfaceKind() &&
+ (!getAssociatedFunction() ||
+ !getAssociatedFunction()->hasExactDefinition()))
+ indicatePessimisticFixpoint();
}
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ // Check for users that allow alignment annotations.
+ Value &AnchorVal = getIRPosition().getAnchorValue();
+ for (const Use &U : AnchorVal.uses()) {
+ if (auto *SI = dyn_cast<StoreInst>(U.getUser())) {
+ if (SI->getPointerOperand() == &AnchorVal)
+ if (SI->getAlignment() < getAssumedAlign()) {
+ STATS_DECLTRACK(AAAlign, Store,
+ "Number of times alignemnt added to a store");
+ SI->setAlignment(Align(getAssumedAlign()));
+ Changed = ChangeStatus::CHANGED;
+ }
+ } else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) {
+ if (LI->getPointerOperand() == &AnchorVal)
+ if (LI->getAlignment() < getAssumedAlign()) {
+ LI->setAlignment(Align(getAssumedAlign()));
+ STATS_DECLTRACK(AAAlign, Load,
+ "Number of times alignemnt added to a load");
+ Changed = ChangeStatus::CHANGED;
+ }
+ }
+ }
+
+ return AAAlign::manifest(A) | Changed;
+ }
+
+ // TODO: Provide a helper to determine the implied ABI alignment and check in
+ // the existing manifest method and a new one for AAAlignImpl that value
+ // to avoid making the alignment explicit if it did not improve.
+
+ /// See AbstractAttribute::getDeducedAttributes
+ virtual void
+ getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ if (getAssumedAlign() > 1)
+ Attrs.emplace_back(
+ Attribute::getWithAlignment(Ctx, Align(getAssumedAlign())));
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) +
+ "-" + std::to_string(getAssumedAlign()) + ">")
+ : "unknown-align";
+ }
+};
+
+/// Align attribute for a floating value.
+struct AAAlignFloating : AAAlignImpl {
+ AAAlignFloating(const IRPosition &IRP) : AAAlignImpl(IRP) {}
+
/// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
+ ChangeStatus updateImpl(Attributor &A) override {
+ const DataLayout &DL = A.getDataLayout();
+
+ auto VisitValueCB = [&](Value &V, AAAlign::StateType &T,
+ bool Stripped) -> bool {
+ const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ // Use only IR information if we did not strip anything.
+ const MaybeAlign PA = V.getPointerAlignment(DL);
+ T.takeKnownMaximum(PA ? PA->value() : 0);
+ T.indicatePessimisticFixpoint();
+ } else {
+ // Use abstract attribute information.
+ const AAAlign::StateType &DS =
+ static_cast<const AAAlign::StateType &>(AA.getState());
+ T ^= DS;
+ }
+ return T.isValidState();
+ };
+
+ StateType T;
+ if (!genericValueTraversal<AAAlign, StateType>(A, getIRPosition(), *this, T,
+ VisitValueCB))
+ return indicatePessimisticFixpoint();
+
+ // TODO: If we know we visited all incoming values, thus no are assumed
+ // dead, we can take the known information from the state T.
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FLOATING_ATTR(align) }
};
-ChangeStatus AANonNullReturned::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
+/// Align attribute for function return value.
+struct AAAlignReturned final
+ : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl> {
+ AAAlignReturned(const IRPosition &IRP)
+ : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>(IRP) {}
- auto *AARetVal = A.getAAFor<AAReturnedValues>(*this, F);
- if (!AARetVal) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) }
+};
+
+/// Align attribute for function argument.
+struct AAAlignArgument final
+ : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl> {
+ AAAlignArgument(const IRPosition &IRP)
+ : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl>(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) }
+};
+
+struct AAAlignCallSiteArgument final : AAAlignFloating {
+ AAAlignCallSiteArgument(const IRPosition &IRP) : AAAlignFloating(IRP) {}
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ return AAAlignImpl::manifest(A);
}
- std::function<bool(Value &)> Pred = this->generatePredicate(A);
- if (!AARetVal->checkForallReturnedValues(Pred)) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) }
+};
+
+/// Align attribute deduction for a call site return value.
+struct AAAlignCallSiteReturned final : AAAlignImpl {
+ AAAlignCallSiteReturned(const IRPosition &IRP) : AAAlignImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAAlignImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
}
- return ChangeStatus::UNCHANGED;
-}
-/// NonNull attribute for function argument.
-struct AANonNullArgument : AANonNullImpl {
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::returned(*F);
+ auto &FnAA = A.getAAFor<AAAlign>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AAAlign::StateType &>(FnAA.getState()));
+ }
- AANonNullArgument(Argument &A, InformationCache &InfoCache)
- : AANonNullImpl(A, InfoCache) {}
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
+};
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
+/// ------------------ Function No-Return Attribute ----------------------------
+struct AANoReturnImpl : public AANoReturn {
+ AANoReturnImpl(const IRPosition &IRP) : AANoReturn(IRP) {}
- /// See AbstractAttriubute::initialize(...).
+ /// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- Argument *Arg = cast<Argument>(getAssociatedValue());
- if (Arg->hasNonNullAttr())
- indicateOptimisticFixpoint();
+ AANoReturn::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || F->hasFnAttribute(Attribute::WillReturn))
+ indicatePessimisticFixpoint();
}
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "noreturn" : "may-return";
+ }
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ virtual ChangeStatus updateImpl(Attributor &A) override {
+ const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, getIRPosition());
+ if (WillReturnAA.isKnownWillReturn())
+ return indicatePessimisticFixpoint();
+ auto CheckForNoReturn = [](Instruction &) { return false; };
+ if (!A.checkForAllInstructions(CheckForNoReturn, *this,
+ {(unsigned)Instruction::Ret}))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+};
+
+struct AANoReturnFunction final : AANoReturnImpl {
+ AANoReturnFunction(const IRPosition &IRP) : AANoReturnImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) }
+};
+
+/// NoReturn attribute deduction for a call sites.
+struct AANoReturnCallSite final : AANoReturnImpl {
+ AANoReturnCallSite(const IRPosition &IRP) : AANoReturnImpl(IRP) {}
+
/// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoReturn::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); }
};
-/// NonNull attribute for a call site argument.
-struct AANonNullCallSiteArgument : AANonNullImpl {
+/// ----------------------- Variable Capturing ---------------------------------
- /// See AANonNullImpl::AANonNullImpl(...).
- AANonNullCallSiteArgument(CallSite CS, unsigned ArgNo,
- InformationCache &InfoCache)
- : AANonNullImpl(CS.getArgOperand(ArgNo), *CS.getInstruction(), InfoCache),
- ArgNo(ArgNo) {}
+/// A class to hold the state of for no-capture attributes.
+struct AANoCaptureImpl : public AANoCapture {
+ AANoCaptureImpl(const IRPosition &IRP) : AANoCapture(IRP) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- CallSite CS(&getAnchoredValue());
- if (isKnownNonZero(getAssociatedValue(),
- getAnchorScope().getParent()->getDataLayout()) ||
- CS.paramHasAttr(ArgNo, getAttrKind()))
+ AANoCapture::initialize(A);
+
+ // You cannot "capture" null in the default address space.
+ if (isa<ConstantPointerNull>(getAssociatedValue()) &&
+ getAssociatedValue().getType()->getPointerAddressSpace() == 0) {
indicateOptimisticFixpoint();
+ return;
+ }
+
+ const IRPosition &IRP = getIRPosition();
+ const Function *F =
+ getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
+
+ // Check what state the associated function can actually capture.
+ if (F)
+ determineFunctionCaptureCapabilities(IRP, *F, *this);
+ else
+ indicatePessimisticFixpoint();
}
- /// See AbstractAttribute::updateImpl(Attributor &A).
+ /// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override {
- return MP_CALL_SITE_ARGUMENT;
- };
+ /// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...).
+ virtual void
+ getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ if (!isAssumedNoCaptureMaybeReturned())
+ return;
+
+ if (getArgNo() >= 0) {
+ if (isAssumedNoCapture())
+ Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture));
+ else if (ManifestInternal)
+ Attrs.emplace_back(Attribute::get(Ctx, "no-capture-maybe-returned"));
+ }
+ }
+
+ /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known
+ /// depending on the ability of the function associated with \p IRP to capture
+ /// state in memory and through "returning/throwing", respectively.
+ static void determineFunctionCaptureCapabilities(const IRPosition &IRP,
+ const Function &F,
+ IntegerState &State) {
+ // TODO: Once we have memory behavior attributes we should use them here.
+
+ // If we know we cannot communicate or write to memory, we do not care about
+ // ptr2int anymore.
+ if (F.onlyReadsMemory() && F.doesNotThrow() &&
+ F.getReturnType()->isVoidTy()) {
+ State.addKnownBits(NO_CAPTURE);
+ return;
+ }
+
+ // A function cannot capture state in memory if it only reads memory, it can
+ // however return/throw state and the state might be influenced by the
+ // pointer value, e.g., loading from a returned pointer might reveal a bit.
+ if (F.onlyReadsMemory())
+ State.addKnownBits(NOT_CAPTURED_IN_MEM);
+
+ // A function cannot communicate state back if it does not through
+ // exceptions and doesn not return values.
+ if (F.doesNotThrow() && F.getReturnType()->isVoidTy())
+ State.addKnownBits(NOT_CAPTURED_IN_RET);
+
+ // Check existing "returned" attributes.
+ int ArgNo = IRP.getArgNo();
+ if (F.doesNotThrow() && ArgNo >= 0) {
+ for (unsigned u = 0, e = F.arg_size(); u< e; ++u)
+ if (F.hasParamAttribute(u, Attribute::Returned)) {
+ if (u == unsigned(ArgNo))
+ State.removeAssumedBits(NOT_CAPTURED_IN_RET);
+ else if (F.onlyReadsMemory())
+ State.addKnownBits(NO_CAPTURE);
+ else
+ State.addKnownBits(NOT_CAPTURED_IN_RET);
+ break;
+ }
+ }
+ }
- // Return argument index of associated value.
- int getArgNo() const { return ArgNo; }
+ /// See AbstractState::getAsStr().
+ const std::string getAsStr() const override {
+ if (isKnownNoCapture())
+ return "known not-captured";
+ if (isAssumedNoCapture())
+ return "assumed not-captured";
+ if (isKnownNoCaptureMaybeReturned())
+ return "known not-captured-maybe-returned";
+ if (isAssumedNoCaptureMaybeReturned())
+ return "assumed not-captured-maybe-returned";
+ return "assumed-captured";
+ }
+};
+
+/// Attributor-aware capture tracker.
+struct AACaptureUseTracker final : public CaptureTracker {
+
+ /// Create a capture tracker that can lookup in-flight abstract attributes
+ /// through the Attributor \p A.
+ ///
+ /// If a use leads to a potential capture, \p CapturedInMemory is set and the
+ /// search is stopped. If a use leads to a return instruction,
+ /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed.
+ /// If a use leads to a ptr2int which may capture the value,
+ /// \p CapturedInInteger is set. If a use is found that is currently assumed
+ /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies
+ /// set. All values in \p PotentialCopies are later tracked as well. For every
+ /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0,
+ /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger
+ /// conservatively set to true.
+ AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA,
+ const AAIsDead &IsDeadAA, IntegerState &State,
+ SmallVectorImpl<const Value *> &PotentialCopies,
+ unsigned &RemainingUsesToExplore)
+ : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State),
+ PotentialCopies(PotentialCopies),
+ RemainingUsesToExplore(RemainingUsesToExplore) {}
+
+ /// Determine if \p V maybe captured. *Also updates the state!*
+ bool valueMayBeCaptured(const Value *V) {
+ if (V->getType()->isPointerTy()) {
+ PointerMayBeCaptured(V, this);
+ } else {
+ State.indicatePessimisticFixpoint();
+ }
+ return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
+ }
+
+ /// See CaptureTracker::tooManyUses().
+ void tooManyUses() override {
+ State.removeAssumedBits(AANoCapture::NO_CAPTURE);
+ }
+
+ bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override {
+ if (CaptureTracker::isDereferenceableOrNull(O, DL))
+ return true;
+ const auto &DerefAA =
+ A.getAAFor<AADereferenceable>(NoCaptureAA, IRPosition::value(*O));
+ return DerefAA.getAssumedDereferenceableBytes();
+ }
+
+ /// See CaptureTracker::captured(...).
+ bool captured(const Use *U) override {
+ Instruction *UInst = cast<Instruction>(U->getUser());
+ LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst
+ << "\n");
+
+ // Because we may reuse the tracker multiple times we keep track of the
+ // number of explored uses ourselves as well.
+ if (RemainingUsesToExplore-- == 0) {
+ LLVM_DEBUG(dbgs() << " - too many uses to explore!\n");
+ return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ /* Return */ true);
+ }
+
+ // Deal with ptr2int by following uses.
+ if (isa<PtrToIntInst>(UInst)) {
+ LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n");
+ return valueMayBeCaptured(UInst);
+ }
+
+ // Explicitly catch return instructions.
+ if (isa<ReturnInst>(UInst))
+ return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ /* Return */ true);
+
+ // For now we only use special logic for call sites. However, the tracker
+ // itself knows about a lot of other non-capturing cases already.
+ CallSite CS(UInst);
+ if (!CS || !CS.isArgOperand(U))
+ return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ /* Return */ true);
+
+ unsigned ArgNo = CS.getArgumentNo(U);
+ const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo);
+ // If we have a abstract no-capture attribute for the argument we can use
+ // it to justify a non-capture attribute here. This allows recursion!
+ auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos);
+ if (ArgNoCaptureAA.isAssumedNoCapture())
+ return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ /* Return */ false);
+ if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ addPotentialCopy(CS);
+ return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ /* Return */ false);
+ }
+
+ // Lastly, we could not find a reason no-capture can be assumed so we don't.
+ return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ /* Return */ true);
+ }
+
+ /// Register \p CS as potential copy of the value we are checking.
+ void addPotentialCopy(CallSite CS) {
+ PotentialCopies.push_back(CS.getInstruction());
+ }
+
+ /// See CaptureTracker::shouldExplore(...).
+ bool shouldExplore(const Use *U) override {
+ // Check liveness.
+ return !IsDeadAA.isAssumedDead(cast<Instruction>(U->getUser()));
+ }
+
+ /// Update the state according to \p CapturedInMem, \p CapturedInInt, and
+ /// \p CapturedInRet, then return the appropriate value for use in the
+ /// CaptureTracker::captured() interface.
+ bool isCapturedIn(bool CapturedInMem, bool CapturedInInt,
+ bool CapturedInRet) {
+ LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int "
+ << CapturedInInt << "|Ret " << CapturedInRet << "]\n");
+ if (CapturedInMem)
+ State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_MEM);
+ if (CapturedInInt)
+ State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT);
+ if (CapturedInRet)
+ State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET);
+ return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
+ }
private:
- unsigned ArgNo;
+ /// The attributor providing in-flight abstract attributes.
+ Attributor &A;
+
+ /// The abstract attribute currently updated.
+ AANoCapture &NoCaptureAA;
+
+ /// The abstract liveness state.
+ const AAIsDead &IsDeadAA;
+
+ /// The state currently updated.
+ IntegerState &State;
+
+ /// Set of potential copies of the tracked value.
+ SmallVectorImpl<const Value *> &PotentialCopies;
+
+ /// Global counter to limit the number of explored uses.
+ unsigned &RemainingUsesToExplore;
+};
+
+ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
+ const IRPosition &IRP = getIRPosition();
+ const Value *V =
+ getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue();
+ if (!V)
+ return indicatePessimisticFixpoint();
+
+ const Function *F =
+ getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
+ assert(F && "Expected a function!");
+ const IRPosition &FnPos = IRPosition::function(*F);
+ const auto &IsDeadAA = A.getAAFor<AAIsDead>(*this, FnPos);
+
+ AANoCapture::StateType T;
+
+ // Readonly means we cannot capture through memory.
+ const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
+ if (FnMemAA.isAssumedReadOnly()) {
+ T.addKnownBits(NOT_CAPTURED_IN_MEM);
+ if (FnMemAA.isKnownReadOnly())
+ addKnownBits(NOT_CAPTURED_IN_MEM);
+ }
+
+ // Make sure all returned values are different than the underlying value.
+ // TODO: we could do this in a more sophisticated way inside
+ // AAReturnedValues, e.g., track all values that escape through returns
+ // directly somehow.
+ auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) {
+ bool SeenConstant = false;
+ for (auto &It : RVAA.returned_values()) {
+ if (isa<Constant>(It.first)) {
+ if (SeenConstant)
+ return false;
+ SeenConstant = true;
+ } else if (!isa<Argument>(It.first) ||
+ It.first == getAssociatedArgument())
+ return false;
+ }
+ return true;
+ };
+
+ const auto &NoUnwindAA = A.getAAFor<AANoUnwind>(*this, FnPos);
+ if (NoUnwindAA.isAssumedNoUnwind()) {
+ bool IsVoidTy = F->getReturnType()->isVoidTy();
+ const AAReturnedValues *RVAA =
+ IsVoidTy ? nullptr : &A.getAAFor<AAReturnedValues>(*this, FnPos);
+ if (IsVoidTy || CheckReturnedArgs(*RVAA)) {
+ T.addKnownBits(NOT_CAPTURED_IN_RET);
+ if (T.isKnown(NOT_CAPTURED_IN_MEM))
+ return ChangeStatus::UNCHANGED;
+ if (NoUnwindAA.isKnownNoUnwind() &&
+ (IsVoidTy || RVAA->getState().isAtFixpoint())) {
+ addKnownBits(NOT_CAPTURED_IN_RET);
+ if (isKnown(NOT_CAPTURED_IN_MEM))
+ return indicateOptimisticFixpoint();
+ }
+ }
+ }
+
+ // Use the CaptureTracker interface and logic with the specialized tracker,
+ // defined in AACaptureUseTracker, that can look at in-flight abstract
+ // attributes and directly updates the assumed state.
+ SmallVector<const Value *, 4> PotentialCopies;
+ unsigned RemainingUsesToExplore = DefaultMaxUsesToExplore;
+ AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies,
+ RemainingUsesToExplore);
+
+ // Check all potential copies of the associated value until we can assume
+ // none will be captured or we have to assume at least one might be.
+ unsigned Idx = 0;
+ PotentialCopies.push_back(V);
+ while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size())
+ Tracker.valueMayBeCaptured(PotentialCopies[Idx++]);
+
+ AAAlign::StateType &S = getState();
+ auto Assumed = S.getAssumed();
+ S.intersectAssumedBits(T.getAssumed());
+ return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+}
+
+/// NoCapture attribute for function arguments.
+struct AANoCaptureArgument final : AANoCaptureImpl {
+ AANoCaptureArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nocapture) }
+};
+
+/// NoCapture attribute for call site arguments.
+struct AANoCaptureCallSiteArgument final : AANoCaptureImpl {
+ AANoCaptureCallSiteArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg)
+ return indicatePessimisticFixpoint();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoCapture::StateType &>(ArgAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)};
+};
+
+/// NoCapture attribute for floating values.
+struct AANoCaptureFloating final : AANoCaptureImpl {
+ AANoCaptureFloating(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(nocapture)
+ }
+};
+
+/// NoCapture attribute for function return value.
+struct AANoCaptureReturned final : AANoCaptureImpl {
+ AANoCaptureReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) {
+ llvm_unreachable("NoCapture is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ llvm_unreachable("NoCapture is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("NoCapture is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// NoCapture attribute deduction for a call site return value.
+struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
+ AANoCaptureCallSiteReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(nocapture)
+ }
};
-ChangeStatus AANonNullArgument::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
- Argument &Arg = cast<Argument>(getAnchoredValue());
- unsigned ArgNo = Arg.getArgNo();
+/// ------------------ Value Simplify Attribute ----------------------------
+struct AAValueSimplifyImpl : AAValueSimplify {
+ AAValueSimplifyImpl(const IRPosition &IRP) : AAValueSimplify(IRP) {}
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? (getKnown() ? "simplified" : "maybe-simple")
+ : "not-simple";
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+
+ /// See AAValueSimplify::getAssumedSimplifiedValue()
+ Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override {
+ if (!getAssumed())
+ return const_cast<Value *>(&getAssociatedValue());
+ return SimplifiedAssociatedValue;
+ }
+ void initialize(Attributor &A) override {}
+
+ /// Helper function for querying AAValueSimplify and updating candicate.
+ /// \param QueryingValue Value trying to unify with SimplifiedValue
+ /// \param AccumulatedSimplifiedValue Current simplification result.
+ static bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA,
+ Value &QueryingValue,
+ Optional<Value *> &AccumulatedSimplifiedValue) {
+ // FIXME: Add a typecast support.
+
+ auto &ValueSimpifyAA = A.getAAFor<AAValueSimplify>(
+ QueryingAA, IRPosition::value(QueryingValue));
- // Callback function
- std::function<bool(CallSite)> CallSiteCheck = [&](CallSite CS) {
- assert(CS && "Sanity check: Call site was not initialized properly!");
+ Optional<Value *> QueryingValueSimplified =
+ ValueSimpifyAA.getAssumedSimplifiedValue(A);
- auto *NonNullAA = A.getAAFor<AANonNull>(*this, *CS.getInstruction(), ArgNo);
+ if (!QueryingValueSimplified.hasValue())
+ return true;
- // Check that NonNullAA is AANonNullCallSiteArgument.
- if (NonNullAA) {
- ImmutableCallSite ICS(&NonNullAA->getAnchoredValue());
- if (ICS && CS.getInstruction() == ICS.getInstruction())
- return NonNullAA->isAssumedNonNull();
+ if (!QueryingValueSimplified.getValue())
return false;
+
+ Value &QueryingValueSimplifiedUnwrapped =
+ *QueryingValueSimplified.getValue();
+
+ if (isa<UndefValue>(QueryingValueSimplifiedUnwrapped))
+ return true;
+
+ if (AccumulatedSimplifiedValue.hasValue())
+ return AccumulatedSimplifiedValue == QueryingValueSimplified;
+
+ LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << QueryingValue
+ << " is assumed to be "
+ << QueryingValueSimplifiedUnwrapped << "\n");
+
+ AccumulatedSimplifiedValue = QueryingValueSimplified;
+ return true;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ if (!SimplifiedAssociatedValue.hasValue() ||
+ !SimplifiedAssociatedValue.getValue())
+ return Changed;
+
+ if (auto *C = dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())) {
+ // We can replace the AssociatedValue with the constant.
+ Value &V = getAssociatedValue();
+ if (!V.user_empty() && &V != C && V.getType() == C->getType()) {
+ LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << V << " -> " << *C
+ << "\n");
+ V.replaceAllUsesWith(C);
+ Changed = ChangeStatus::CHANGED;
+ }
+ }
+
+ return Changed | AAValueSimplify::manifest(A);
+ }
+
+protected:
+ // An assumed simplified value. Initially, it is set to Optional::None, which
+ // means that the value is not clear under current assumption. If in the
+ // pessimistic state, getAssumedSimplifiedValue doesn't return this value but
+ // returns orignal associated value.
+ Optional<Value *> SimplifiedAssociatedValue;
+};
+
+struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
+ AAValueSimplifyArgument(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+
+ auto PredForCallSite = [&](AbstractCallSite ACS) {
+ // Check if we have an associated argument or not (which can happen for
+ // callback calls).
+ if (Value *ArgOp = ACS.getCallArgOperand(getArgNo()))
+ return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue);
+ return false;
+ };
+
+ if (!A.checkForAllCallSites(PredForCallSite, *this, true))
+ return indicatePessimisticFixpoint();
+
+ // If a candicate was found in this update, return CHANGED.
+ return HasValueBefore == SimplifiedAssociatedValue.hasValue()
+ ? ChangeStatus::UNCHANGED
+ : ChangeStatus ::CHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyReturned : AAValueSimplifyImpl {
+ AAValueSimplifyReturned(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+
+ auto PredForReturned = [&](Value &V) {
+ return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue);
+ };
+
+ if (!A.checkForAllReturnedValues(PredForReturned, *this))
+ return indicatePessimisticFixpoint();
+
+ // If a candicate was found in this update, return CHANGED.
+ return HasValueBefore == SimplifiedAssociatedValue.hasValue()
+ ? ChangeStatus::UNCHANGED
+ : ChangeStatus ::CHANGED;
+ }
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyFloating : AAValueSimplifyImpl {
+ AAValueSimplifyFloating(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Value &V = getAnchorValue();
+
+ // TODO: add other stuffs
+ if (isa<Constant>(V) || isa<UndefValue>(V))
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+
+ auto VisitValueCB = [&](Value &V, BooleanState, bool Stripped) -> bool {
+ auto &AA = A.getAAFor<AAValueSimplify>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ // TODO: Look the instruction and check recursively.
+ LLVM_DEBUG(
+ dbgs() << "[Attributor][ValueSimplify] Can't be stripped more : "
+ << V << "\n");
+ indicatePessimisticFixpoint();
+ return false;
+ }
+ return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue);
+ };
+
+ if (!genericValueTraversal<AAValueSimplify, BooleanState>(
+ A, getIRPosition(), *this, static_cast<BooleanState &>(*this),
+ VisitValueCB))
+ return indicatePessimisticFixpoint();
+
+ // If a candicate was found in this update, return CHANGED.
+
+ return HasValueBefore == SimplifiedAssociatedValue.hasValue()
+ ? ChangeStatus::UNCHANGED
+ : ChangeStatus ::CHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyFunction : AAValueSimplifyImpl {
+ AAValueSimplifyFunction(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ SimplifiedAssociatedValue = &getAnchorValue();
+ indicateOptimisticFixpoint();
+ }
+ /// See AbstractAttribute::initialize(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable(
+ "AAValueSimplify(Function|CallSite)::updateImpl will not be called");
+ }
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FN_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyCallSite : AAValueSimplifyFunction {
+ AAValueSimplifyCallSite(const IRPosition &IRP)
+ : AAValueSimplifyFunction(IRP) {}
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CS_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyCallSiteReturned : AAValueSimplifyReturned {
+ AAValueSimplifyCallSiteReturned(const IRPosition &IRP)
+ : AAValueSimplifyReturned(IRP) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(value_simplify)
+ }
+};
+struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
+ AAValueSimplifyCallSiteArgument(const IRPosition &IRP)
+ : AAValueSimplifyFloating(IRP) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(value_simplify)
+ }
+};
+
+/// ----------------------- Heap-To-Stack Conversion ---------------------------
+struct AAHeapToStackImpl : public AAHeapToStack {
+ AAHeapToStackImpl(const IRPosition &IRP) : AAHeapToStack(IRP) {}
+
+ const std::string getAsStr() const override {
+ return "[H2S] Mallocs: " + std::to_string(MallocCalls.size());
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ assert(getState().isValidState() &&
+ "Attempted to manifest an invalid state!");
+
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ Function *F = getAssociatedFunction();
+ const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+
+ for (Instruction *MallocCall : MallocCalls) {
+ // This malloc cannot be replaced.
+ if (BadMallocCalls.count(MallocCall))
+ continue;
+
+ for (Instruction *FreeCall : FreesForMalloc[MallocCall]) {
+ LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n");
+ A.deleteAfterManifest(*FreeCall);
+ HasChanged = ChangeStatus::CHANGED;
+ }
+
+ LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall
+ << "\n");
+
+ Constant *Size;
+ if (isCallocLikeFn(MallocCall, TLI)) {
+ auto *Num = cast<ConstantInt>(MallocCall->getOperand(0));
+ auto *SizeT = dyn_cast<ConstantInt>(MallocCall->getOperand(1));
+ APInt TotalSize = SizeT->getValue() * Num->getValue();
+ Size =
+ ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize);
+ } else {
+ Size = cast<ConstantInt>(MallocCall->getOperand(0));
+ }
+
+ unsigned AS = cast<PointerType>(MallocCall->getType())->getAddressSpace();
+ Instruction *AI = new AllocaInst(Type::getInt8Ty(F->getContext()), AS,
+ Size, "", MallocCall->getNextNode());
+
+ if (AI->getType() != MallocCall->getType())
+ AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc",
+ AI->getNextNode());
+
+ MallocCall->replaceAllUsesWith(AI);
+
+ if (auto *II = dyn_cast<InvokeInst>(MallocCall)) {
+ auto *NBB = II->getNormalDest();
+ BranchInst::Create(NBB, MallocCall->getParent());
+ A.deleteAfterManifest(*MallocCall);
+ } else {
+ A.deleteAfterManifest(*MallocCall);
+ }
+
+ if (isCallocLikeFn(MallocCall, TLI)) {
+ auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc",
+ AI->getNextNode());
+ Value *Ops[] = {
+ BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size,
+ ConstantInt::get(Type::getInt1Ty(F->getContext()), false)};
+
+ Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()};
+ Module *M = F->getParent();
+ Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
+ CallInst::Create(Fn, Ops, "", BI->getNextNode());
+ }
+ HasChanged = ChangeStatus::CHANGED;
}
- if (CS.paramHasAttr(ArgNo, Attribute::NonNull))
+ return HasChanged;
+ }
+
+ /// Collection of all malloc calls in a function.
+ SmallSetVector<Instruction *, 4> MallocCalls;
+
+ /// Collection of malloc calls that cannot be converted.
+ DenseSet<const Instruction *> BadMallocCalls;
+
+ /// A map for each malloc call to the set of associated free calls.
+ DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> FreesForMalloc;
+
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
+ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
+ const Function *F = getAssociatedFunction();
+ const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+
+ auto UsesCheck = [&](Instruction &I) {
+ SmallPtrSet<const Use *, 8> Visited;
+ SmallVector<const Use *, 8> Worklist;
+
+ for (Use &U : I.uses())
+ Worklist.push_back(&U);
+
+ while (!Worklist.empty()) {
+ const Use *U = Worklist.pop_back_val();
+ if (!Visited.insert(U).second)
+ continue;
+
+ auto *UserI = U->getUser();
+
+ if (isa<LoadInst>(UserI))
+ continue;
+ if (auto *SI = dyn_cast<StoreInst>(UserI)) {
+ if (SI->getValueOperand() == U->get()) {
+ LLVM_DEBUG(dbgs() << "[H2S] escaping store to memory: " << *UserI << "\n");
+ return false;
+ }
+ // A store into the malloc'ed memory is fine.
+ continue;
+ }
+
+ // NOTE: Right now, if a function that has malloc pointer as an argument
+ // frees memory, we assume that the malloc pointer is freed.
+
+ // TODO: Add nofree callsite argument attribute to indicate that pointer
+ // argument is not freed.
+ if (auto *CB = dyn_cast<CallBase>(UserI)) {
+ if (!CB->isArgOperand(U))
+ continue;
+
+ if (CB->isLifetimeStartOrEnd())
+ continue;
+
+ // Record malloc.
+ if (isFreeCall(UserI, TLI)) {
+ FreesForMalloc[&I].insert(
+ cast<Instruction>(const_cast<User *>(UserI)));
+ continue;
+ }
+
+ // If a function does not free memory we are fine
+ const auto &NoFreeAA =
+ A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(*CB));
+
+ unsigned ArgNo = U - CB->arg_begin();
+ const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
+ *this, IRPosition::callsite_argument(*CB, ArgNo));
+
+ if (!NoCaptureAA.isAssumedNoCapture() || !NoFreeAA.isAssumedNoFree()) {
+ LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
+ return false;
+ }
+ continue;
+ }
+
+ if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI)) {
+ for (Use &U : UserI->uses())
+ Worklist.push_back(&U);
+ continue;
+ }
+
+ // Unknown user.
+ LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n");
+ return false;
+ }
+ return true;
+ };
+
+ auto MallocCallocCheck = [&](Instruction &I) {
+ if (BadMallocCalls.count(&I))
return true;
- Value *V = CS.getArgOperand(ArgNo);
- if (isKnownNonZero(V, getAnchorScope().getParent()->getDataLayout()))
+ bool IsMalloc = isMallocLikeFn(&I, TLI);
+ bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI);
+ if (!IsMalloc && !IsCalloc) {
+ BadMallocCalls.insert(&I);
return true;
+ }
- return false;
- };
- if (!A.checkForAllCallSites(F, CallSiteCheck, true)) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
- }
- return ChangeStatus::UNCHANGED;
-}
+ if (IsMalloc) {
+ if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0)))
+ if (Size->getValue().sle(MaxHeapToStackSize))
+ if (UsesCheck(I)) {
+ MallocCalls.insert(&I);
+ return true;
+ }
+ } else if (IsCalloc) {
+ bool Overflow = false;
+ if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0)))
+ if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
+ if ((Size->getValue().umul_ov(Num->getValue(), Overflow))
+ .sle(MaxHeapToStackSize))
+ if (!Overflow && UsesCheck(I)) {
+ MallocCalls.insert(&I);
+ return true;
+ }
+ }
-ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) {
- // NOTE: Never look at the argument of the callee in this method.
- // If we do this, "nonnull" is always deduced because of the assumption.
+ BadMallocCalls.insert(&I);
+ return true;
+ };
- Value &V = *getAssociatedValue();
+ size_t NumBadMallocs = BadMallocCalls.size();
- auto *NonNullAA = A.getAAFor<AANonNull>(*this, V);
+ A.checkForAllCallLikeInstructions(MallocCallocCheck, *this);
- if (!NonNullAA || !NonNullAA->isAssumedNonNull()) {
- indicatePessimisticFixpoint();
+ if (NumBadMallocs != BadMallocCalls.size())
return ChangeStatus::CHANGED;
- }
return ChangeStatus::UNCHANGED;
}
-/// ------------------------ Will-Return Attributes ----------------------------
+struct AAHeapToStackFunction final : public AAHeapToStackImpl {
+ AAHeapToStackFunction(const IRPosition &IRP) : AAHeapToStackImpl(IRP) {}
-struct AAWillReturnImpl : public AAWillReturn, BooleanState {
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECL(MallocCalls, Function,
+ "Number of MallocCalls converted to allocas");
+ BUILD_STAT_NAME(MallocCalls, Function) += MallocCalls.size();
+ }
+};
+
+/// -------------------- Memory Behavior Attributes ----------------------------
+/// Includes read-none, read-only, and write-only.
+/// ----------------------------------------------------------------------------
+struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
+ AAMemoryBehaviorImpl(const IRPosition &IRP) : AAMemoryBehavior(IRP) {}
- /// See AbstractAttribute::AbstractAttribute(...).
- AAWillReturnImpl(Function &F, InformationCache &InfoCache)
- : AAWillReturn(F, InfoCache) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ intersectAssumedBits(BEST_STATE);
+ getKnownStateFromValue(getIRPosition(), getState());
+ IRAttribute::initialize(A);
+ }
- /// See AAWillReturn::isKnownWillReturn().
- bool isKnownWillReturn() const override { return getKnown(); }
+ /// Return the memory behavior information encoded in the IR for \p IRP.
+ static void getKnownStateFromValue(const IRPosition &IRP,
+ IntegerState &State) {
+ SmallVector<Attribute, 2> Attrs;
+ IRP.getAttrs(AttrKinds, Attrs);
+ for (const Attribute &Attr : Attrs) {
+ switch (Attr.getKindAsEnum()) {
+ case Attribute::ReadNone:
+ State.addKnownBits(NO_ACCESSES);
+ break;
+ case Attribute::ReadOnly:
+ State.addKnownBits(NO_WRITES);
+ break;
+ case Attribute::WriteOnly:
+ State.addKnownBits(NO_READS);
+ break;
+ default:
+ llvm_unreachable("Unexpcted attribute!");
+ }
+ }
- /// See AAWillReturn::isAssumedWillReturn().
- bool isAssumedWillReturn() const override { return getAssumed(); }
+ if (auto *I = dyn_cast<Instruction>(&IRP.getAnchorValue())) {
+ if (!I->mayReadFromMemory())
+ State.addKnownBits(NO_READS);
+ if (!I->mayWriteToMemory())
+ State.addKnownBits(NO_WRITES);
+ }
+ }
- /// See AbstractAttribute::getState(...).
- AbstractState &getState() override { return *this; }
+ /// See AbstractAttribute::getDeducedAttributes(...).
+ void getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ assert(Attrs.size() == 0);
+ if (isAssumedReadNone())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone));
+ else if (isAssumedReadOnly())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly));
+ else if (isAssumedWriteOnly())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly));
+ assert(Attrs.size() <= 1);
+ }
- /// See AbstractAttribute::getState(...).
- const AbstractState &getState() const override { return *this; }
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ IRPosition &IRP = getIRPosition();
+
+ // Check if we would improve the existing attributes first.
+ SmallVector<Attribute, 4> DeducedAttrs;
+ getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs);
+ if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) {
+ return IRP.hasAttr(Attr.getKindAsEnum(),
+ /* IgnoreSubsumingPositions */ true);
+ }))
+ return ChangeStatus::UNCHANGED;
+
+ // Clear existing attributes.
+ IRP.removeAttrs(AttrKinds);
+
+ // Use the generic manifest method.
+ return IRAttribute::manifest(A);
+ }
- /// See AbstractAttribute::getAsStr()
+ /// See AbstractState::getAsStr().
const std::string getAsStr() const override {
- return getAssumed() ? "willreturn" : "may-noreturn";
+ if (isAssumedReadNone())
+ return "readnone";
+ if (isAssumedReadOnly())
+ return "readonly";
+ if (isAssumedWriteOnly())
+ return "writeonly";
+ return "may-read/write";
}
+
+ /// The set of IR attributes AAMemoryBehavior deals with.
+ static const Attribute::AttrKind AttrKinds[3];
};
-struct AAWillReturnFunction final : AAWillReturnImpl {
+const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = {
+ Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly};
- /// See AbstractAttribute::AbstractAttribute(...).
- AAWillReturnFunction(Function &F, InformationCache &InfoCache)
- : AAWillReturnImpl(F, InfoCache) {}
+/// Memory behavior attribute for a floating value.
+struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl {
+ AAMemoryBehaviorFloating(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {}
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override {
- return MP_FUNCTION;
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAMemoryBehaviorImpl::initialize(A);
+ // Initialize the use vector with all direct uses of the associated value.
+ for (const Use &U : getAssociatedValue().uses())
+ Uses.insert(&U);
}
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_FLOATING_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_FLOATING_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_FLOATING_ATTR(writeonly)
+ }
+
+private:
+ /// Return true if users of \p UserI might access the underlying
+ /// variable/location described by \p U and should therefore be analyzed.
+ bool followUsersOfUseIn(Attributor &A, const Use *U,
+ const Instruction *UserI);
+
+ /// Update the state according to the effect of use \p U in \p UserI.
+ void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI);
+
+protected:
+ /// Container for (transitive) uses of the associated argument.
+ SetVector<const Use *> Uses;
+};
+
+/// Memory behavior attribute for function argument.
+struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
+ AAMemoryBehaviorArgument(const IRPosition &IRP)
+ : AAMemoryBehaviorFloating(IRP) {}
+
/// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override;
+ void initialize(Attributor &A) override {
+ AAMemoryBehaviorFloating::initialize(A);
+
+ // Initialize the use vector with all direct uses of the associated value.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg || !Arg->getParent()->hasExactDefinition())
+ indicatePessimisticFixpoint();
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ // TODO: From readattrs.ll: "inalloca parameters are always
+ // considered written"
+ if (hasAttr({Attribute::InAlloca})) {
+ removeKnownBits(NO_WRITES);
+ removeAssumedBits(NO_WRITES);
+ }
+ return AAMemoryBehaviorFloating::manifest(A);
+ }
+
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_ARG_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_ARG_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_ARG_ATTR(writeonly)
+ }
+};
+
+struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
+ AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP)
+ : AAMemoryBehaviorArgument(IRP) {}
/// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoCapture::StateType &>(ArgAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_CSARG_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_CSARG_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_CSARG_ATTR(writeonly)
+ }
};
-// Helper function that checks whether a function has any cycle.
-// TODO: Replace with more efficent code
-bool containsCycle(Function &F) {
- SmallPtrSet<BasicBlock *, 32> Visited;
+/// Memory behavior attribute for a call site return position.
+struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating {
+ AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP)
+ : AAMemoryBehaviorFloating(IRP) {}
- // Traverse BB by dfs and check whether successor is already visited.
- for (BasicBlock *BB : depth_first(&F)) {
- Visited.insert(BB);
- for (auto *SuccBB : successors(BB)) {
- if (Visited.count(SuccBB))
- return true;
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // We do not annotate returned values.
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// An AA to represent the memory behavior function attributes.
+struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl {
+ AAMemoryBehaviorFunction(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ virtual ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ Function &F = cast<Function>(getAnchorValue());
+ if (isAssumedReadNone()) {
+ F.removeFnAttr(Attribute::ArgMemOnly);
+ F.removeFnAttr(Attribute::InaccessibleMemOnly);
+ F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
}
+ return AAMemoryBehaviorImpl::manifest(A);
}
- return false;
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_FN_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_FN_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_FN_ATTR(writeonly)
+ }
+};
+
+/// AAMemoryBehavior attribute for call sites.
+struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
+ AAMemoryBehaviorCallSite(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAMemoryBehaviorImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || !F->hasExactDefinition())
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AAAlign::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_CS_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_CS_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_CS_ATTR(writeonly)
+ }
+};
+} // namespace
+
+ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) {
+
+ // The current assumed state used to determine a change.
+ auto AssumedState = getAssumed();
+
+ auto CheckRWInst = [&](Instruction &I) {
+ // If the instruction has an own memory behavior state, use it to restrict
+ // the local state. No further analysis is required as the other memory
+ // state is as optimistic as it gets.
+ if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+ const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+ *this, IRPosition::callsite_function(ICS));
+ intersectAssumedBits(MemBehaviorAA.getAssumed());
+ return !isAtFixpoint();
+ }
+
+ // Remove access kind modifiers if necessary.
+ if (I.mayReadFromMemory())
+ removeAssumedBits(NO_READS);
+ if (I.mayWriteToMemory())
+ removeAssumedBits(NO_WRITES);
+ return !isAtFixpoint();
+ };
+
+ if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this))
+ return indicatePessimisticFixpoint();
+
+ return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
+ : ChangeStatus::UNCHANGED;
}
-// Helper function that checks the function have a loop which might become an
-// endless loop
-// FIXME: Any cycle is regarded as endless loop for now.
-// We have to allow some patterns.
-bool containsPossiblyEndlessLoop(Function &F) { return containsCycle(F); }
+ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
-void AAWillReturnFunction::initialize(Attributor &A) {
- Function &F = getAnchorScope();
+ const IRPosition &IRP = getIRPosition();
+ const IRPosition &FnPos = IRPosition::function_scope(IRP);
+ AAMemoryBehavior::StateType &S = getState();
- if (containsPossiblyEndlessLoop(F))
- indicatePessimisticFixpoint();
+ // First, check the function scope. We take the known information and we avoid
+ // work if the assumed information implies the current assumed information for
+ // this attribute.
+ const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
+ S.addKnownBits(FnMemAA.getKnown());
+ if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed())
+ return ChangeStatus::UNCHANGED;
+
+ // Make sure the value is not captured (except through "return"), if
+ // it is, any information derived would be irrelevant anyway as we cannot
+ // check the potential aliases introduced by the capture. However, no need
+ // to fall back to anythign less optimistic than the function state.
+ const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
+ if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ S.intersectAssumedBits(FnMemAA.getAssumed());
+ return ChangeStatus::CHANGED;
+ }
+
+ // The current assumed state used to determine a change.
+ auto AssumedState = S.getAssumed();
+
+ // Liveness information to exclude dead users.
+ // TODO: Take the FnPos once we have call site specific liveness information.
+ const auto &LivenessAA = A.getAAFor<AAIsDead>(
+ *this, IRPosition::function(*IRP.getAssociatedFunction()));
+
+ // Visit and expand uses until all are analyzed or a fixpoint is reached.
+ for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) {
+ const Use *U = Uses[i];
+ Instruction *UserI = cast<Instruction>(U->getUser());
+ LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI
+ << " [Dead: " << (LivenessAA.isAssumedDead(UserI))
+ << "]\n");
+ if (LivenessAA.isAssumedDead(UserI))
+ continue;
+
+ // Check if the users of UserI should also be visited.
+ if (followUsersOfUseIn(A, U, UserI))
+ for (const Use &UserIUse : UserI->uses())
+ Uses.insert(&UserIUse);
+
+ // If UserI might touch memory we analyze the use in detail.
+ if (UserI->mayReadOrWriteMemory())
+ analyzeUseIn(A, U, UserI);
+ }
+
+ return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
+ : ChangeStatus::UNCHANGED;
}
-ChangeStatus AAWillReturnFunction::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
+bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
+ const Instruction *UserI) {
+ // The loaded value is unrelated to the pointer argument, no need to
+ // follow the users of the load.
+ if (isa<LoadInst>(UserI))
+ return false;
- // The map from instruction opcodes to those instructions in the function.
- auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ // By default we follow all uses assuming UserI might leak information on U,
+ // we have special handling for call sites operands though.
+ ImmutableCallSite ICS(UserI);
+ if (!ICS || !ICS.isArgOperand(U))
+ return true;
- for (unsigned Opcode :
- {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
- (unsigned)Instruction::Call}) {
- for (Instruction *I : OpcodeInstMap[Opcode]) {
- auto ICS = ImmutableCallSite(I);
+ // If the use is a call argument known not to be captured, the users of
+ // the call do not need to be visited because they have to be unrelated to
+ // the input. Note that this check is not trivial even though we disallow
+ // general capturing of the underlying argument. The reason is that the
+ // call might the argument "through return", which we allow and for which we
+ // need to check call users.
+ unsigned ArgNo = ICS.getArgumentNo(U);
+ const auto &ArgNoCaptureAA =
+ A.getAAFor<AANoCapture>(*this, IRPosition::callsite_argument(ICS, ArgNo));
+ return !ArgNoCaptureAA.isAssumedNoCapture();
+}
- if (ICS.hasFnAttr(Attribute::WillReturn))
- continue;
+void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
+ const Instruction *UserI) {
+ assert(UserI->mayReadOrWriteMemory());
- auto *WillReturnAA = A.getAAFor<AAWillReturn>(*this, *I);
- if (!WillReturnAA || !WillReturnAA->isAssumedWillReturn()) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
- }
+ switch (UserI->getOpcode()) {
+ default:
+ // TODO: Handle all atomics and other side-effect operations we know of.
+ break;
+ case Instruction::Load:
+ // Loads cause the NO_READS property to disappear.
+ removeAssumedBits(NO_READS);
+ return;
- auto *NoRecurseAA = A.getAAFor<AANoRecurse>(*this, *I);
+ case Instruction::Store:
+ // Stores cause the NO_WRITES property to disappear if the use is the
+ // pointer operand. Note that we do assume that capturing was taken care of
+ // somewhere else.
+ if (cast<StoreInst>(UserI)->getPointerOperand() == U->get())
+ removeAssumedBits(NO_WRITES);
+ return;
- // FIXME: (i) Prohibit any recursion for now.
- // (ii) AANoRecurse isn't implemented yet so currently any call is
- // regarded as having recursion.
- // Code below should be
- // if ((!NoRecurseAA || !NoRecurseAA->isAssumedNoRecurse()) &&
- if (!NoRecurseAA && !ICS.hasFnAttr(Attribute::NoRecurse)) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
- }
+ case Instruction::Call:
+ case Instruction::CallBr:
+ case Instruction::Invoke: {
+ // For call sites we look at the argument memory behavior attribute (this
+ // could be recursive!) in order to restrict our own state.
+ ImmutableCallSite ICS(UserI);
+
+ // Give up on operand bundles.
+ if (ICS.isBundleOperand(U)) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ // Calling a function does read the function pointer, maybe write it if the
+ // function is self-modifying.
+ if (ICS.isCallee(U)) {
+ removeAssumedBits(NO_READS);
+ break;
}
+
+ // Adjust the possible access behavior based on the information on the
+ // argument.
+ unsigned ArgNo = ICS.getArgumentNo(U);
+ const IRPosition &ArgPos = IRPosition::callsite_argument(ICS, ArgNo);
+ const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos);
+ // "assumed" has at most the same bits as the MemBehaviorAA assumed
+ // and at least "known".
+ intersectAssumedBits(MemBehaviorAA.getAssumed());
+ return;
}
+ };
- return ChangeStatus::UNCHANGED;
+ // Generally, look at the "may-properties" and adjust the assumed state if we
+ // did not trigger special handling before.
+ if (UserI->mayReadFromMemory())
+ removeAssumedBits(NO_READS);
+ if (UserI->mayWriteToMemory())
+ removeAssumedBits(NO_WRITES);
}
/// ----------------------------------------------------------------------------
/// Attributor
/// ----------------------------------------------------------------------------
-bool Attributor::checkForAllCallSites(Function &F,
- std::function<bool(CallSite)> &Pred,
- bool RequireAllCallSites) {
+bool Attributor::isAssumedDead(const AbstractAttribute &AA,
+ const AAIsDead *LivenessAA) {
+ const Instruction *CtxI = AA.getIRPosition().getCtxI();
+ if (!CtxI)
+ return false;
+
+ if (!LivenessAA)
+ LivenessAA =
+ &getAAFor<AAIsDead>(AA, IRPosition::function(*CtxI->getFunction()),
+ /* TrackDependence */ false);
+
+ // Don't check liveness for AAIsDead.
+ if (&AA == LivenessAA)
+ return false;
+
+ if (!LivenessAA->isAssumedDead(CtxI))
+ return false;
+
+ // We actually used liveness information so we have to record a dependence.
+ recordDependence(*LivenessAA, AA);
+
+ return true;
+}
+
+bool Attributor::checkForAllCallSites(
+ const function_ref<bool(AbstractCallSite)> &Pred,
+ const AbstractAttribute &QueryingAA, bool RequireAllCallSites) {
// We can try to determine information from
// the call sites. However, this is only possible all call sites are known,
// hence the function has internal linkage.
- if (RequireAllCallSites && !F.hasInternalLinkage()) {
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ const Function *AssociatedFunction = IRP.getAssociatedFunction();
+ if (!AssociatedFunction) {
+ LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP
+ << "\n");
+ return false;
+ }
+
+ return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites,
+ &QueryingAA);
+}
+
+bool Attributor::checkForAllCallSites(
+ const function_ref<bool(AbstractCallSite)> &Pred, const Function &Fn,
+ bool RequireAllCallSites, const AbstractAttribute *QueryingAA) {
+ if (RequireAllCallSites && !Fn.hasLocalLinkage()) {
LLVM_DEBUG(
dbgs()
- << "Attributor: Function " << F.getName()
+ << "[Attributor] Function " << Fn.getName()
<< " has no internal linkage, hence not all call sites are known\n");
return false;
}
- for (const Use &U : F.uses()) {
+ for (const Use &U : Fn.uses()) {
+ AbstractCallSite ACS(&U);
+ if (!ACS) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Function "
+ << Fn.getName()
+ << " has non call site use " << *U.get() << " in "
+ << *U.getUser() << "\n");
+ return false;
+ }
+
+ Instruction *I = ACS.getInstruction();
+ Function *Caller = I->getFunction();
+
+ const auto *LivenessAA =
+ lookupAAFor<AAIsDead>(IRPosition::function(*Caller), QueryingAA,
+ /* TrackDependence */ false);
+
+ // Skip dead calls.
+ if (LivenessAA && LivenessAA->isAssumedDead(I)) {
+ // We actually used liveness information so we have to record a
+ // dependence.
+ if (QueryingAA)
+ recordDependence(*LivenessAA, *QueryingAA);
+ continue;
+ }
- CallSite CS(U.getUser());
- if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) {
+ const Use *EffectiveUse =
+ ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U;
+ if (!ACS.isCallee(EffectiveUse)) {
if (!RequireAllCallSites)
continue;
-
- LLVM_DEBUG(dbgs() << "Attributor: User " << *U.getUser()
- << " is an invalid use of " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "[Attributor] User " << EffectiveUse->getUser()
+ << " is an invalid use of "
+ << Fn.getName() << "\n");
return false;
}
- if (Pred(CS))
+ if (Pred(ACS))
continue;
- LLVM_DEBUG(dbgs() << "Attributor: Call site callback failed for "
- << *CS.getInstruction() << "\n");
+ LLVM_DEBUG(dbgs() << "[Attributor] Call site callback failed for "
+ << *ACS.getInstruction() << "\n");
return false;
}
return true;
}
-ChangeStatus Attributor::run() {
- // Initialize all abstract attributes.
- for (AbstractAttribute *AA : AllAbstractAttributes)
- AA->initialize(*this);
+bool Attributor::checkForAllReturnedValuesAndReturnInsts(
+ const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
+ &Pred,
+ const AbstractAttribute &QueryingAA) {
+
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ // Since we need to provide return instructions we have to have an exact
+ // definition.
+ const Function *AssociatedFunction = IRP.getAssociatedFunction();
+ if (!AssociatedFunction)
+ return false;
+ // If this is a call site query we use the call site specific return values
+ // and liveness information.
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
+ const auto &AARetVal = getAAFor<AAReturnedValues>(QueryingAA, QueryIRP);
+ if (!AARetVal.getState().isValidState())
+ return false;
+
+ return AARetVal.checkForAllReturnedValuesAndReturnInsts(Pred);
+}
+
+bool Attributor::checkForAllReturnedValues(
+ const function_ref<bool(Value &)> &Pred,
+ const AbstractAttribute &QueryingAA) {
+
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ const Function *AssociatedFunction = IRP.getAssociatedFunction();
+ if (!AssociatedFunction)
+ return false;
+
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
+ const auto &AARetVal = getAAFor<AAReturnedValues>(QueryingAA, QueryIRP);
+ if (!AARetVal.getState().isValidState())
+ return false;
+
+ return AARetVal.checkForAllReturnedValuesAndReturnInsts(
+ [&](Value &RV, const SmallSetVector<ReturnInst *, 4> &) {
+ return Pred(RV);
+ });
+}
+
+static bool
+checkForAllInstructionsImpl(InformationCache::OpcodeInstMapTy &OpcodeInstMap,
+ const function_ref<bool(Instruction &)> &Pred,
+ const AAIsDead *LivenessAA, bool &AnyDead,
+ const ArrayRef<unsigned> &Opcodes) {
+ for (unsigned Opcode : Opcodes) {
+ for (Instruction *I : OpcodeInstMap[Opcode]) {
+ // Skip dead instructions.
+ if (LivenessAA && LivenessAA->isAssumedDead(I)) {
+ AnyDead = true;
+ continue;
+ }
+
+ if (!Pred(*I))
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Attributor::checkForAllInstructions(
+ const llvm::function_ref<bool(Instruction &)> &Pred,
+ const AbstractAttribute &QueryingAA, const ArrayRef<unsigned> &Opcodes) {
+
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ // Since we need to provide instructions we have to have an exact definition.
+ const Function *AssociatedFunction = IRP.getAssociatedFunction();
+ if (!AssociatedFunction)
+ return false;
+
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
+ const auto &LivenessAA =
+ getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false);
+ bool AnyDead = false;
+
+ auto &OpcodeInstMap =
+ InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
+ if (!checkForAllInstructionsImpl(OpcodeInstMap, Pred, &LivenessAA, AnyDead,
+ Opcodes))
+ return false;
+
+ // If we actually used liveness information so we have to record a dependence.
+ if (AnyDead)
+ recordDependence(LivenessAA, QueryingAA);
+
+ return true;
+}
+
+bool Attributor::checkForAllReadWriteInstructions(
+ const llvm::function_ref<bool(Instruction &)> &Pred,
+ AbstractAttribute &QueryingAA) {
+
+ const Function *AssociatedFunction =
+ QueryingAA.getIRPosition().getAssociatedFunction();
+ if (!AssociatedFunction)
+ return false;
+
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
+ const auto &LivenessAA =
+ getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false);
+ bool AnyDead = false;
+
+ for (Instruction *I :
+ InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) {
+ // Skip dead instructions.
+ if (LivenessAA.isAssumedDead(I)) {
+ AnyDead = true;
+ continue;
+ }
+
+ if (!Pred(*I))
+ return false;
+ }
+
+ // If we actually used liveness information so we have to record a dependence.
+ if (AnyDead)
+ recordDependence(LivenessAA, QueryingAA);
+
+ return true;
+}
+
+ChangeStatus Attributor::run(Module &M) {
LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
<< AllAbstractAttributes.size()
<< " abstract attributes.\n");
@@ -1370,10 +4470,25 @@ ChangeStatus Attributor::run() {
SetVector<AbstractAttribute *> Worklist;
Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
+ bool RecomputeDependences = false;
+
do {
+ // Remember the size to determine new attributes.
+ size_t NumAAs = AllAbstractAttributes.size();
LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
<< ", Worklist size: " << Worklist.size() << "\n");
+ // If dependences (=QueryMap) are recomputed we have to look at all abstract
+ // attributes again, regardless of what changed in the last iteration.
+ if (RecomputeDependences) {
+ LLVM_DEBUG(
+ dbgs() << "[Attributor] Run all AAs to recompute dependences\n");
+ QueryMap.clear();
+ ChangedAAs.clear();
+ Worklist.insert(AllAbstractAttributes.begin(),
+ AllAbstractAttributes.end());
+ }
+
// Add all abstract attributes that are potentially dependent on one that
// changed to the work list.
for (AbstractAttribute *ChangedAA : ChangedAAs) {
@@ -1381,27 +4496,42 @@ ChangeStatus Attributor::run() {
Worklist.insert(QuerriedAAs.begin(), QuerriedAAs.end());
}
+ LLVM_DEBUG(dbgs() << "[Attributor] #Iteration: " << IterationCounter
+ << ", Worklist+Dependent size: " << Worklist.size()
+ << "\n");
+
// Reset the changed set.
ChangedAAs.clear();
// Update all abstract attribute in the work list and record the ones that
// changed.
for (AbstractAttribute *AA : Worklist)
- if (AA->update(*this) == ChangeStatus::CHANGED)
- ChangedAAs.push_back(AA);
+ if (!isAssumedDead(*AA, nullptr))
+ if (AA->update(*this) == ChangeStatus::CHANGED)
+ ChangedAAs.push_back(AA);
+
+ // Check if we recompute the dependences in the next iteration.
+ RecomputeDependences = (DepRecomputeInterval > 0 &&
+ IterationCounter % DepRecomputeInterval == 0);
+
+ // Add attributes to the changed set if they have been created in the last
+ // iteration.
+ ChangedAAs.append(AllAbstractAttributes.begin() + NumAAs,
+ AllAbstractAttributes.end());
// Reset the work list and repopulate with the changed abstract attributes.
// Note that dependent ones are added above.
Worklist.clear();
Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
- } while (!Worklist.empty() && ++IterationCounter < MaxFixpointIterations);
+ } while (!Worklist.empty() && (IterationCounter++ < MaxFixpointIterations ||
+ VerifyMaxFixpointIterations));
LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
<< IterationCounter << "/" << MaxFixpointIterations
<< " iterations\n");
- bool FinishedAtFixpoint = Worklist.empty();
+ size_t NumFinalAAs = AllAbstractAttributes.size();
// Reset abstract arguments not settled in a sound fixpoint by now. This
// happens when we stopped the fixpoint iteration early. Note that only the
@@ -1448,8 +4578,14 @@ ChangeStatus Attributor::run() {
if (!State.isValidState())
continue;
+ // Skip dead code.
+ if (isAssumedDead(*AA, nullptr))
+ continue;
// Manifest the state and record if we changed the IR.
ChangeStatus LocalChange = AA->manifest(*this);
+ if (LocalChange == ChangeStatus::CHANGED && AreStatisticsEnabled())
+ AA->trackStatistics();
+
ManifestChange = ManifestChange | LocalChange;
NumAtFixpoint++;
@@ -1462,69 +4598,92 @@ ChangeStatus Attributor::run() {
<< " arguments while " << NumAtFixpoint
<< " were in a valid fixpoint state\n");
- // If verification is requested, we finished this run at a fixpoint, and the
- // IR was changed, we re-run the whole fixpoint analysis, starting at
- // re-initialization of the arguments. This re-run should not result in an IR
- // change. Though, the (virtual) state of attributes at the end of the re-run
- // might be more optimistic than the known state or the IR state if the better
- // state cannot be manifested.
- if (VerifyAttributor && FinishedAtFixpoint &&
- ManifestChange == ChangeStatus::CHANGED) {
- VerifyAttributor = false;
- ChangeStatus VerifyStatus = run();
- if (VerifyStatus != ChangeStatus::UNCHANGED)
- llvm_unreachable(
- "Attributor verification failed, re-run did result in an IR change "
- "even after a fixpoint was reached in the original run. (False "
- "positives possible!)");
- VerifyAttributor = true;
- }
-
NumAttributesManifested += NumManifested;
NumAttributesValidFixpoint += NumAtFixpoint;
- return ManifestChange;
-}
-
-void Attributor::identifyDefaultAbstractAttributes(
- Function &F, InformationCache &InfoCache,
- DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist) {
+ (void)NumFinalAAs;
+ assert(
+ NumFinalAAs == AllAbstractAttributes.size() &&
+ "Expected the final number of abstract attributes to remain unchanged!");
+
+ // Delete stuff at the end to avoid invalid references and a nice order.
+ {
+ LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least "
+ << ToBeDeletedFunctions.size() << " functions and "
+ << ToBeDeletedBlocks.size() << " blocks and "
+ << ToBeDeletedInsts.size() << " instructions\n");
+ for (Instruction *I : ToBeDeletedInsts) {
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
- // Every function can be nounwind.
- registerAA(*new AANoUnwindFunction(F, InfoCache));
+ if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) {
+ SmallVector<BasicBlock *, 8> ToBeDeletedBBs;
+ ToBeDeletedBBs.reserve(NumDeadBlocks);
+ ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end());
+ DeleteDeadBlocks(ToBeDeletedBBs);
+ STATS_DECLTRACK(AAIsDead, BasicBlock,
+ "Number of dead basic blocks deleted.");
+ }
- // Every function might be marked "nosync"
- registerAA(*new AANoSyncFunction(F, InfoCache));
+ STATS_DECL(AAIsDead, Function, "Number of dead functions deleted.");
+ for (Function *Fn : ToBeDeletedFunctions) {
+ Fn->replaceAllUsesWith(UndefValue::get(Fn->getType()));
+ Fn->eraseFromParent();
+ STATS_TRACK(AAIsDead, Function);
+ }
- // Every function might be "no-free".
- registerAA(*new AANoFreeFunction(F, InfoCache));
+ // Identify dead internal functions and delete them. This happens outside
+ // the other fixpoint analysis as we might treat potentially dead functions
+ // as live to lower the number of iterations. If they happen to be dead, the
+ // below fixpoint loop will identify and eliminate them.
+ SmallVector<Function *, 8> InternalFns;
+ for (Function &F : M)
+ if (F.hasLocalLinkage())
+ InternalFns.push_back(&F);
+
+ bool FoundDeadFn = true;
+ while (FoundDeadFn) {
+ FoundDeadFn = false;
+ for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) {
+ Function *F = InternalFns[u];
+ if (!F)
+ continue;
- // Return attributes are only appropriate if the return type is non void.
- Type *ReturnType = F.getReturnType();
- if (!ReturnType->isVoidTy()) {
- // Argument attribute "returned" --- Create only one per function even
- // though it is an argument attribute.
- if (!Whitelist || Whitelist->count(AAReturnedValues::ID))
- registerAA(*new AAReturnedValuesImpl(F, InfoCache));
+ const auto *LivenessAA =
+ lookupAAFor<AAIsDead>(IRPosition::function(*F));
+ if (LivenessAA &&
+ !checkForAllCallSites([](AbstractCallSite ACS) { return false; },
+ *LivenessAA, true))
+ continue;
- // Every function with pointer return type might be marked nonnull.
- if (ReturnType->isPointerTy() &&
- (!Whitelist || Whitelist->count(AANonNullReturned::ID)))
- registerAA(*new AANonNullReturned(F, InfoCache));
+ STATS_TRACK(AAIsDead, Function);
+ F->replaceAllUsesWith(UndefValue::get(F->getType()));
+ F->eraseFromParent();
+ InternalFns[u] = nullptr;
+ FoundDeadFn = true;
+ }
+ }
}
- // Every argument with pointer type might be marked nonnull.
- for (Argument &Arg : F.args()) {
- if (Arg.getType()->isPointerTy())
- registerAA(*new AANonNullArgument(Arg, InfoCache));
+ if (VerifyMaxFixpointIterations &&
+ IterationCounter != MaxFixpointIterations) {
+ errs() << "\n[Attributor] Fixpoint iteration done after: "
+ << IterationCounter << "/" << MaxFixpointIterations
+ << " iterations\n";
+ llvm_unreachable("The fixpoint was not reached with exactly the number of "
+ "specified iterations!");
}
- // Every function might be "will-return".
- registerAA(*new AAWillReturnFunction(F, InfoCache));
+ return ManifestChange;
+}
+
+void Attributor::initializeInformationCache(Function &F) {
- // Walk all instructions to find more attribute opportunities and also
- // interesting instructions that might be queried by abstract attributes
- // during their initialization or update.
+ // Walk all instructions to find interesting instructions that might be
+ // queried by abstract attributes during their initialization or update.
+ // This has to happen before we create attributes.
auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F];
auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F];
@@ -1540,8 +4699,12 @@ void Attributor::identifyDefaultAbstractAttributes(
default:
assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) &&
"New call site/base instruction type needs to be known int the "
- "attributor.");
+ "Attributor.");
break;
+ case Instruction::Load:
+ // The alignment of a pointer is interesting for loads.
+ case Instruction::Store:
+ // The alignment of a pointer is interesting for stores.
case Instruction::Call:
case Instruction::CallBr:
case Instruction::Invoke:
@@ -1555,18 +4718,154 @@ void Attributor::identifyDefaultAbstractAttributes(
InstOpcodeMap[I.getOpcode()].push_back(&I);
if (I.mayReadOrWriteMemory())
ReadOrWriteInsts.push_back(&I);
+ }
+}
+
+void Attributor::identifyDefaultAbstractAttributes(Function &F) {
+ if (!VisitedFunctions.insert(&F).second)
+ return;
+
+ IRPosition FPos = IRPosition::function(F);
+
+ // Check for dead BasicBlocks in every function.
+ // We need dead instruction detection because we do not want to deal with
+ // broken IR in which SSA rules do not apply.
+ getOrCreateAAFor<AAIsDead>(FPos);
+
+ // Every function might be "will-return".
+ getOrCreateAAFor<AAWillReturn>(FPos);
+ // Every function can be nounwind.
+ getOrCreateAAFor<AANoUnwind>(FPos);
+
+ // Every function might be marked "nosync"
+ getOrCreateAAFor<AANoSync>(FPos);
+
+ // Every function might be "no-free".
+ getOrCreateAAFor<AANoFree>(FPos);
+
+ // Every function might be "no-return".
+ getOrCreateAAFor<AANoReturn>(FPos);
+
+ // Every function might be "no-recurse".
+ getOrCreateAAFor<AANoRecurse>(FPos);
+
+ // Every function might be "readnone/readonly/writeonly/...".
+ getOrCreateAAFor<AAMemoryBehavior>(FPos);
+
+ // Every function might be applicable for Heap-To-Stack conversion.
+ if (EnableHeapToStack)
+ getOrCreateAAFor<AAHeapToStack>(FPos);
+
+ // Return attributes are only appropriate if the return type is non void.
+ Type *ReturnType = F.getReturnType();
+ if (!ReturnType->isVoidTy()) {
+ // Argument attribute "returned" --- Create only one per function even
+ // though it is an argument attribute.
+ getOrCreateAAFor<AAReturnedValues>(FPos);
+
+ IRPosition RetPos = IRPosition::returned(F);
+
+ // Every function might be simplified.
+ getOrCreateAAFor<AAValueSimplify>(RetPos);
+
+ if (ReturnType->isPointerTy()) {
+
+ // Every function with pointer return type might be marked align.
+ getOrCreateAAFor<AAAlign>(RetPos);
+
+ // Every function with pointer return type might be marked nonnull.
+ getOrCreateAAFor<AANonNull>(RetPos);
+
+ // Every function with pointer return type might be marked noalias.
+ getOrCreateAAFor<AANoAlias>(RetPos);
+
+ // Every function with pointer return type might be marked
+ // dereferenceable.
+ getOrCreateAAFor<AADereferenceable>(RetPos);
+ }
+ }
+
+ for (Argument &Arg : F.args()) {
+ IRPosition ArgPos = IRPosition::argument(Arg);
+
+ // Every argument might be simplified.
+ getOrCreateAAFor<AAValueSimplify>(ArgPos);
+
+ if (Arg.getType()->isPointerTy()) {
+ // Every argument with pointer type might be marked nonnull.
+ getOrCreateAAFor<AANonNull>(ArgPos);
+
+ // Every argument with pointer type might be marked noalias.
+ getOrCreateAAFor<AANoAlias>(ArgPos);
+
+ // Every argument with pointer type might be marked dereferenceable.
+ getOrCreateAAFor<AADereferenceable>(ArgPos);
+
+ // Every argument with pointer type might be marked align.
+ getOrCreateAAFor<AAAlign>(ArgPos);
+
+ // Every argument with pointer type might be marked nocapture.
+ getOrCreateAAFor<AANoCapture>(ArgPos);
+
+ // Every argument with pointer type might be marked
+ // "readnone/readonly/writeonly/..."
+ getOrCreateAAFor<AAMemoryBehavior>(ArgPos);
+ }
+ }
+
+ auto CallSitePred = [&](Instruction &I) -> bool {
CallSite CS(&I);
- if (CS && CS.getCalledFunction()) {
+ if (CS.getCalledFunction()) {
for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) {
+
+ IRPosition CSArgPos = IRPosition::callsite_argument(CS, i);
+
+ // Call site argument might be simplified.
+ getOrCreateAAFor<AAValueSimplify>(CSArgPos);
+
if (!CS.getArgument(i)->getType()->isPointerTy())
continue;
// Call site argument attribute "non-null".
- registerAA(*new AANonNullCallSiteArgument(CS, i, InfoCache), i);
+ getOrCreateAAFor<AANonNull>(CSArgPos);
+
+ // Call site argument attribute "no-alias".
+ getOrCreateAAFor<AANoAlias>(CSArgPos);
+
+ // Call site argument attribute "dereferenceable".
+ getOrCreateAAFor<AADereferenceable>(CSArgPos);
+
+ // Call site argument attribute "align".
+ getOrCreateAAFor<AAAlign>(CSArgPos);
}
}
- }
+ return true;
+ };
+
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ bool Success, AnyDead = false;
+ Success = checkForAllInstructionsImpl(
+ OpcodeInstMap, CallSitePred, nullptr, AnyDead,
+ {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call});
+ (void)Success;
+ assert(Success && !AnyDead && "Expected the check call to be successful!");
+
+ auto LoadStorePred = [&](Instruction &I) -> bool {
+ if (isa<LoadInst>(I))
+ getOrCreateAAFor<AAAlign>(
+ IRPosition::value(*cast<LoadInst>(I).getPointerOperand()));
+ else
+ getOrCreateAAFor<AAAlign>(
+ IRPosition::value(*cast<StoreInst>(I).getPointerOperand()));
+ return true;
+ };
+ Success = checkForAllInstructionsImpl(
+ OpcodeInstMap, LoadStorePred, nullptr, AnyDead,
+ {(unsigned)Instruction::Load, (unsigned)Instruction::Store});
+ (void)Success;
+ assert(Success && !AnyDead && "Expected the check call to be successful!");
}
/// Helpers to ease debugging through output streams and print calls.
@@ -1576,21 +4875,39 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, ChangeStatus S) {
return OS << (S == ChangeStatus::CHANGED ? "changed" : "unchanged");
}
-raw_ostream &llvm::operator<<(raw_ostream &OS,
- AbstractAttribute::ManifestPosition AP) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) {
switch (AP) {
- case AbstractAttribute::MP_ARGUMENT:
+ case IRPosition::IRP_INVALID:
+ return OS << "inv";
+ case IRPosition::IRP_FLOAT:
+ return OS << "flt";
+ case IRPosition::IRP_RETURNED:
+ return OS << "fn_ret";
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ return OS << "cs_ret";
+ case IRPosition::IRP_FUNCTION:
+ return OS << "fn";
+ case IRPosition::IRP_CALL_SITE:
+ return OS << "cs";
+ case IRPosition::IRP_ARGUMENT:
return OS << "arg";
- case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
return OS << "cs_arg";
- case AbstractAttribute::MP_FUNCTION:
- return OS << "fn";
- case AbstractAttribute::MP_RETURNED:
- return OS << "fn_ret";
}
llvm_unreachable("Unknown attribute position!");
}
+raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) {
+ const Value &AV = Pos.getAssociatedValue();
+ return OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " ["
+ << Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}";
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerState &S) {
+ return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")"
+ << static_cast<const AbstractState &>(S);
+}
+
raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) {
return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : ""));
}
@@ -1601,8 +4918,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
}
void AbstractAttribute::print(raw_ostream &OS) const {
- OS << "[" << getManifestPosition() << "][" << getAsStr() << "]["
- << AnchoredVal.getName() << "]";
+ OS << "[P: " << getIRPosition() << "][" << getAsStr() << "][S: " << getState()
+ << "]";
}
///}
@@ -1610,7 +4927,7 @@ void AbstractAttribute::print(raw_ostream &OS) const {
/// Pass (Manager) Boilerplate
/// ----------------------------------------------------------------------------
-static bool runAttributorOnModule(Module &M) {
+static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) {
if (DisableAttributor)
return false;
@@ -1619,39 +4936,39 @@ static bool runAttributorOnModule(Module &M) {
// Create an Attributor and initially empty information cache that is filled
// while we identify default attribute opportunities.
- Attributor A;
- InformationCache InfoCache;
+ InformationCache InfoCache(M, AG);
+ Attributor A(InfoCache, DepRecInterval);
+
+ for (Function &F : M)
+ A.initializeInformationCache(F);
for (Function &F : M) {
- // TODO: Not all attributes require an exact definition. Find a way to
- // enable deduction for some but not all attributes in case the
- // definition might be changed at runtime, see also
- // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
- // TODO: We could always determine abstract attributes and if sufficient
- // information was found we could duplicate the functions that do not
- // have an exact definition.
- if (!F.hasExactDefinition()) {
+ if (F.hasExactDefinition())
+ NumFnWithExactDefinition++;
+ else
NumFnWithoutExactDefinition++;
- continue;
- }
- // For now we ignore naked and optnone functions.
- if (F.hasFnAttribute(Attribute::Naked) ||
- F.hasFnAttribute(Attribute::OptimizeNone))
- continue;
-
- NumFnWithExactDefinition++;
+ // We look at internal functions only on-demand but if any use is not a
+ // direct call, we have to do it eagerly.
+ if (F.hasLocalLinkage()) {
+ if (llvm::all_of(F.uses(), [](const Use &U) {
+ return ImmutableCallSite(U.getUser()) &&
+ ImmutableCallSite(U.getUser()).isCallee(&U);
+ }))
+ continue;
+ }
// Populate the Attributor with abstract attribute opportunities in the
// function and the information cache with IR information.
- A.identifyDefaultAbstractAttributes(F, InfoCache);
+ A.identifyDefaultAbstractAttributes(F);
}
- return A.run() == ChangeStatus::CHANGED;
+ return A.run(M) == ChangeStatus::CHANGED;
}
PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
- if (runAttributorOnModule(M)) {
+ AnalysisGetter AG(AM);
+ if (runAttributorOnModule(M, AG)) {
// FIXME: Think about passes we will preserve and add them here.
return PreservedAnalyses::none();
}
@@ -1670,12 +4987,14 @@ struct AttributorLegacyPass : public ModulePass {
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
- return runAttributorOnModule(M);
+
+ AnalysisGetter AG;
+ return runAttributorOnModule(M, AG);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
// FIXME: Think about passes we will preserve and add them here.
- AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
@@ -1684,7 +5003,147 @@ struct AttributorLegacyPass : public ModulePass {
Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); }
char AttributorLegacyPass::ID = 0;
+
+const char AAReturnedValues::ID = 0;
+const char AANoUnwind::ID = 0;
+const char AANoSync::ID = 0;
+const char AANoFree::ID = 0;
+const char AANonNull::ID = 0;
+const char AANoRecurse::ID = 0;
+const char AAWillReturn::ID = 0;
+const char AANoAlias::ID = 0;
+const char AANoReturn::ID = 0;
+const char AAIsDead::ID = 0;
+const char AADereferenceable::ID = 0;
+const char AAAlign::ID = 0;
+const char AANoCapture::ID = 0;
+const char AAValueSimplify::ID = 0;
+const char AAHeapToStack::ID = 0;
+const char AAMemoryBehavior::ID = 0;
+
+// Macro magic to create the static generator function for attributes that
+// follow the naming scheme.
+
+#define SWITCH_PK_INV(CLASS, PK, POS_NAME) \
+ case IRPosition::PK: \
+ llvm_unreachable("Cannot create " #CLASS " for a " POS_NAME " position!");
+
+#define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \
+ case IRPosition::PK: \
+ AA = new CLASS##SUFFIX(IRP); \
+ break;
+
+#define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \
+ SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \
+ SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_FUNCTION, "function") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \
+ SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \
+ SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
+ } \
+ return *AA; \
+ }
+
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
+
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
+
+CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
+
+CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
+
+CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
+
+#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef SWITCH_PK_CREATE
+#undef SWITCH_PK_INV
+
INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor",
"Deduce and propagate attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AttributorLegacyPass, "attributor",
"Deduce and propagate attributes", false, false)
diff --git a/lib/Transforms/IPO/BlockExtractor.cpp b/lib/Transforms/IPO/BlockExtractor.cpp
index 6c365f3f3cbe..de80c88c1591 100644
--- a/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/lib/Transforms/IPO/BlockExtractor.cpp
@@ -119,6 +119,8 @@ void BlockExtractor::loadFile() {
/*KeepEmpty=*/false);
if (LineSplit.empty())
continue;
+ if (LineSplit.size()!=2)
+ report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'");
SmallVector<StringRef, 4> BBNames;
LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
@@ -204,7 +206,8 @@ bool BlockExtractor::runOnModule(Module &M) {
++NumExtracted;
Changed = true;
}
- Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion();
+ CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent());
+ Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC);
if (F)
LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
<< "' in: " << F->getName() << '\n');
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index ad877ae1786c..3cf839e397f8 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -48,7 +48,7 @@ static void FindUsedValues(GlobalVariable *LLVMUsed,
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
- Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases();
+ Value *Operand = Inits->getOperand(i)->stripPointerCasts();
GlobalValue *GV = cast<GlobalValue>(Operand);
UsedValues.insert(GV);
}
@@ -120,7 +120,7 @@ static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) {
// Bump the alignment if necessary.
if (Old->getAlignment() || New->getAlignment())
- New->setAlignment(std::max(getAlignment(Old), getAlignment(New)));
+ New->setAlignment(Align(std::max(getAlignment(Old), getAlignment(New))));
copyDebugLocMetadata(Old, New);
Old->replaceAllUsesWith(NewConstant);
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index e30b33aa4872..e20159ba0db5 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -84,13 +84,9 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
for (GlobalObject &GO : M.global_objects()) {
Types.clear();
GO.getMetadata(LLVMContext::MD_type, Types);
- for (MDNode *Type : Types) {
- // Sanity check. GO must not be a function declaration.
- assert(!isa<Function>(&GO) || !cast<Function>(&GO)->isDeclaration());
-
+ for (MDNode *Type : Types)
if (ConstantInt *TypeId = extractNumericTypeId(Type))
TypeIds.insert(TypeId->getZExtValue());
- }
}
NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
@@ -108,11 +104,11 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
FunctionCallee C = M.getOrInsertFunction(
"__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
- Function *F = dyn_cast<Function>(C.getCallee());
+ Function *F = cast<Function>(C.getCallee());
// Take over the existing function. The frontend emits a weak stub so that the
// linker knows about the symbol; this pass replaces the function body.
F->deleteBody();
- F->setAlignment(4096);
+ F->setAlignment(Align(4096));
Triple T(M.getTargetTriple());
if (T.isARM() || T.isThumb())
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 5ccd8bc4b0fb..b174c63a577b 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -78,11 +78,8 @@ STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
STATISTIC(NumNoFree, "Number of functions marked as nofree");
-// FIXME: This is disabled by default to avoid exposing security vulnerabilities
-// in C/C++ code compiled by clang:
-// http://lists.llvm.org/pipermail/cfe-dev/2017-January/052066.html
static cl::opt<bool> EnableNonnullArgPropagation(
- "enable-nonnull-arg-prop", cl::Hidden,
+ "enable-nonnull-arg-prop", cl::init(true), cl::Hidden,
cl::desc("Try to propagate nonnull argument attributes from callsites to "
"caller functions."));
@@ -664,6 +661,25 @@ static bool addArgumentAttrsFromCallsites(Function &F) {
return Changed;
}
+static bool addReadAttr(Argument *A, Attribute::AttrKind R) {
+ assert((R == Attribute::ReadOnly || R == Attribute::ReadNone)
+ && "Must be a Read attribute.");
+ assert(A && "Argument must not be null.");
+
+ // If the argument already has the attribute, nothing needs to be done.
+ if (A->hasAttribute(R))
+ return false;
+
+ // Otherwise, remove potentially conflicting attribute, add the new one,
+ // and update statistics.
+ A->removeAttr(Attribute::WriteOnly);
+ A->removeAttr(Attribute::ReadOnly);
+ A->removeAttr(Attribute::ReadNone);
+ A->addAttr(R);
+ R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
+ return true;
+}
+
/// Deduce nocapture attributes for the SCC.
static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
@@ -732,11 +748,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
SmallPtrSet<Argument *, 8> Self;
Self.insert(&*A);
Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
- if (R != Attribute::None) {
- A->addAttr(R);
- Changed = true;
- R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
- }
+ if (R != Attribute::None)
+ Changed = addReadAttr(A, R);
}
}
}
@@ -833,12 +846,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (ReadAttr != Attribute::None) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- // Clear out existing readonly/readnone attributes
- A->removeAttr(Attribute::ReadOnly);
- A->removeAttr(Attribute::ReadNone);
- A->addAttr(ReadAttr);
- ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
- Changed = true;
+ Changed = addReadAttr(A, ReadAttr);
}
}
}
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 62c7fbd07223..3f5cc078d75f 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -450,7 +450,7 @@ static void computeImportForFunction(
} else if (PrintImportFailures) {
assert(!FailureInfo &&
"Expected no FailureInfo for newly rejected candidate");
- FailureInfo = llvm::make_unique<FunctionImporter::ImportFailureInfo>(
+ FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
VI, Edge.second.getHotness(), Reason, 1);
}
LLVM_DEBUG(
@@ -764,7 +764,7 @@ void llvm::computeDeadSymbols(
}
// Make value live and add it to the worklist if it was not live before.
- auto visit = [&](ValueInfo VI) {
+ auto visit = [&](ValueInfo VI, bool IsAliasee) {
// FIXME: If we knew which edges were created for indirect call profiles,
// we could skip them here. Any that are live should be reached via
// other edges, e.g. reference edges. Otherwise, using a profile collected
@@ -800,12 +800,15 @@ void llvm::computeDeadSymbols(
Interposable = true;
}
- if (!KeepAliveLinkage)
- return;
+ if (!IsAliasee) {
+ if (!KeepAliveLinkage)
+ return;
- if (Interposable)
- report_fatal_error(
- "Interposable and available_externally/linkonce_odr/weak_odr symbol");
+ if (Interposable)
+ report_fatal_error(
+ "Interposable and available_externally/linkonce_odr/weak_odr "
+ "symbol");
+ }
}
for (auto &S : VI.getSummaryList())
@@ -821,16 +824,16 @@ void llvm::computeDeadSymbols(
// If this is an alias, visit the aliasee VI to ensure that all copies
// are marked live and it is added to the worklist for further
// processing of its references.
- visit(AS->getAliaseeVI());
+ visit(AS->getAliaseeVI(), true);
continue;
}
Summary->setLive(true);
for (auto Ref : Summary->refs())
- visit(Ref);
+ visit(Ref, false);
if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
for (auto Call : FS->calls())
- visit(Call.first);
+ visit(Call.first, false);
}
}
Index.setWithGlobalValueDeadStripping();
@@ -892,7 +895,7 @@ std::error_code llvm::EmitImportsFiles(
StringRef ModulePath, StringRef OutputFilename,
const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
std::error_code EC;
- raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
if (EC)
return EC;
for (auto &ILI : ModuleToSummariesForIndex)
@@ -948,23 +951,15 @@ void llvm::thinLTOResolvePrevailingInModule(
auto NewLinkage = GS->second->linkage();
if (NewLinkage == GV.getLinkage())
return;
-
- // Switch the linkage to weakany if asked for, e.g. we do this for
- // linker redefined symbols (via --wrap or --defsym).
- // We record that the visibility should be changed here in `addThinLTO`
- // as we need access to the resolution vectors for each input file in
- // order to find which symbols have been redefined.
- // We may consider reorganizing this code and moving the linkage recording
- // somewhere else, e.g. in thinLTOResolvePrevailingInIndex.
- if (NewLinkage == GlobalValue::WeakAnyLinkage) {
- GV.setLinkage(NewLinkage);
- return;
- }
-
if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
+ // Don't internalize anything here, because the code below
+ // lacks necessary correctness checks. Leave this job to
+ // LLVM 'internalize' pass.
+ GlobalValue::isLocalLinkage(NewLinkage) ||
// In case it was dead and already converted to declaration.
GV.isDeclaration())
return;
+
// Check for a non-prevailing def that has interposable linkage
// (e.g. non-odr weak or linkonce). In that case we can't simply
// convert to available_externally, since it would lose the
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 86b7f3e49ee6..f010f7b703a6 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -17,9 +17,11 @@
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
@@ -29,10 +31,15 @@ using namespace llvm;
#define DEBUG_TYPE "globaldce"
+static cl::opt<bool>
+ ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+ cl::desc("Enable virtual function elimination"));
+
STATISTIC(NumAliases , "Number of global aliases removed");
STATISTIC(NumFunctions, "Number of functions removed");
STATISTIC(NumIFuncs, "Number of indirect functions removed");
STATISTIC(NumVariables, "Number of global variables removed");
+STATISTIC(NumVFuncs, "Number of virtual functions removed");
namespace {
class GlobalDCELegacyPass : public ModulePass {
@@ -118,6 +125,15 @@ void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) {
ComputeDependencies(User, Deps);
Deps.erase(&GV); // Remove self-reference.
for (GlobalValue *GVU : Deps) {
+ // If this is a dep from a vtable to a virtual function, and we have
+ // complete information about all virtual call sites which could call
+ // though this vtable, then skip it, because the call site information will
+ // be more precise.
+ if (VFESafeVTables.count(GVU) && isa<Function>(&GV)) {
+ LLVM_DEBUG(dbgs() << "Ignoring dep " << GVU->getName() << " -> "
+ << GV.getName() << "\n");
+ continue;
+ }
GVDependencies[GVU].insert(&GV);
}
}
@@ -132,12 +148,133 @@ void GlobalDCEPass::MarkLive(GlobalValue &GV,
if (Updates)
Updates->push_back(&GV);
if (Comdat *C = GV.getComdat()) {
- for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+ for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
MarkLive(*CM.second, Updates); // Recursion depth is only two because only
// globals in the same comdat are visited.
+ }
+ }
+}
+
+void GlobalDCEPass::ScanVTables(Module &M) {
+ SmallVector<MDNode *, 2> Types;
+ LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n");
+
+ auto *LTOPostLinkMD =
+ cast_or_null<ConstantAsMetadata>(M.getModuleFlag("LTOPostLink"));
+ bool LTOPostLink =
+ LTOPostLinkMD &&
+ (cast<ConstantInt>(LTOPostLinkMD->getValue())->getZExtValue() != 0);
+
+ for (GlobalVariable &GV : M.globals()) {
+ Types.clear();
+ GV.getMetadata(LLVMContext::MD_type, Types);
+ if (GV.isDeclaration() || Types.empty())
+ continue;
+
+ // Use the typeid metadata on the vtable to build a mapping from typeids to
+ // the list of (GV, offset) pairs which are the possible vtables for that
+ // typeid.
+ for (MDNode *Type : Types) {
+ Metadata *TypeID = Type->getOperand(1).get();
+
+ uint64_t Offset =
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+
+ TypeIdMap[TypeID].insert(std::make_pair(&GV, Offset));
+ }
+
+ // If the type corresponding to the vtable is private to this translation
+ // unit, we know that we can see all virtual functions which might use it,
+ // so VFE is safe.
+ if (auto GO = dyn_cast<GlobalObject>(&GV)) {
+ GlobalObject::VCallVisibility TypeVis = GO->getVCallVisibility();
+ if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit ||
+ (LTOPostLink &&
+ TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) {
+ LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n");
+ VFESafeVTables.insert(&GV);
+ }
+ }
+ }
+}
+
+void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId,
+ uint64_t CallOffset) {
+ for (auto &VTableInfo : TypeIdMap[TypeId]) {
+ GlobalVariable *VTable = VTableInfo.first;
+ uint64_t VTableOffset = VTableInfo.second;
+
+ Constant *Ptr =
+ getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset,
+ *Caller->getParent());
+ if (!Ptr) {
+ LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n");
+ VFESafeVTables.erase(VTable);
+ return;
+ }
+
+ auto Callee = dyn_cast<Function>(Ptr->stripPointerCasts());
+ if (!Callee) {
+ LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n");
+ VFESafeVTables.erase(VTable);
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> "
+ << Callee->getName() << "\n");
+ GVDependencies[Caller].insert(Callee);
}
}
+void GlobalDCEPass::ScanTypeCheckedLoadIntrinsics(Module &M) {
+ LLVM_DEBUG(dbgs() << "Scanning type.checked.load intrinsics\n");
+ Function *TypeCheckedLoadFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
+
+ if (!TypeCheckedLoadFunc)
+ return;
+
+ for (auto U : TypeCheckedLoadFunc->users()) {
+ auto CI = dyn_cast<CallInst>(U);
+ if (!CI)
+ continue;
+
+ auto *Offset = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ Value *TypeIdValue = CI->getArgOperand(2);
+ auto *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
+
+ if (Offset) {
+ ScanVTableLoad(CI->getFunction(), TypeId, Offset->getZExtValue());
+ } else {
+ // type.checked.load with a non-constant offset, so assume every entry in
+ // every matching vtable is used.
+ for (auto &VTableInfo : TypeIdMap[TypeId]) {
+ VFESafeVTables.erase(VTableInfo.first);
+ }
+ }
+ }
+}
+
+void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) {
+ if (!ClEnableVFE)
+ return;
+
+ ScanVTables(M);
+
+ if (VFESafeVTables.empty())
+ return;
+
+ ScanTypeCheckedLoadIntrinsics(M);
+
+ LLVM_DEBUG(
+ dbgs() << "VFE safe vtables:\n";
+ for (auto *VTable : VFESafeVTables)
+ dbgs() << " " << VTable->getName() << "\n";
+ );
+}
+
PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
bool Changed = false;
@@ -163,6 +300,10 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
if (Comdat *C = GA.getComdat())
ComdatMembers.insert(std::make_pair(C, &GA));
+ // Add dependencies between virtual call sites and the virtual functions they
+ // might call, if we have that information.
+ AddVirtualFunctionDependencies(M);
+
// Loop over the module, adding globals which are obviously necessary.
for (GlobalObject &GO : M.global_objects()) {
Changed |= RemoveUnusedGlobalValue(GO);
@@ -257,8 +398,17 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
};
NumFunctions += DeadFunctions.size();
- for (Function *F : DeadFunctions)
+ for (Function *F : DeadFunctions) {
+ if (!F->use_empty()) {
+ // Virtual functions might still be referenced by one or more vtables,
+ // but if we've proven them to be unused then it's safe to replace the
+ // virtual function pointers with null, allowing us to remove the
+ // function itself.
+ ++NumVFuncs;
+ F->replaceNonMetadataUsesWith(ConstantPointerNull::get(F->getType()));
+ }
EraseUnusedGlobalValue(F);
+ }
NumVariables += DeadGlobalVars.size();
for (GlobalVariable *GV : DeadGlobalVars)
@@ -277,6 +427,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
ConstantDependenciesCache.clear();
GVDependencies.clear();
ComdatMembers.clear();
+ TypeIdMap.clear();
+ VFESafeVTables.clear();
if (Changed)
return PreservedAnalyses::none();
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index c4fb3ce77f6e..819715b9f8da 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -155,7 +155,8 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) {
/// Given a value that is stored to a global but never read, determine whether
/// it's safe to remove the store and the chain of computation that feeds the
/// store.
-static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
+static bool IsSafeComputationToRemove(
+ Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
do {
if (isa<Constant>(V))
return true;
@@ -164,7 +165,7 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
isa<GlobalValue>(V))
return false;
- if (isAllocationFn(V, TLI))
+ if (isAllocationFn(V, GetTLI))
return true;
Instruction *I = cast<Instruction>(V);
@@ -184,8 +185,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
/// This GV is a pointer root. Loop over all users of the global and clean up
/// any that obviously don't assign the global a value that isn't dynamically
/// allocated.
-static bool CleanupPointerRootUsers(GlobalVariable *GV,
- const TargetLibraryInfo *TLI) {
+static bool
+CleanupPointerRootUsers(GlobalVariable *GV,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
// A brief explanation of leak checkers. The goal is to find bugs where
// pointers are forgotten, causing an accumulating growth in memory
// usage over time. The common strategy for leak checkers is to whitelist the
@@ -241,18 +243,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
Dead.clear();
- CleanupPointerRootUsers(GV, TLI);
+ CleanupPointerRootUsers(GV, GetTLI);
return true;
}
}
}
for (int i = 0, e = Dead.size(); i != e; ++i) {
- if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
+ if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
Dead[i].second->eraseFromParent();
Instruction *I = Dead[i].first;
do {
- if (isAllocationFn(I, TLI))
+ if (isAllocationFn(I, GetTLI))
break;
Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
if (!J)
@@ -270,9 +272,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
/// We just marked GV constant. Loop over all users of the global, cleaning up
/// the obvious ones. This is largely just a quick scan over the use list to
/// clean up the easy and obvious cruft. This returns true if it made a change.
-static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
- const DataLayout &DL,
- TargetLibraryInfo *TLI) {
+static bool CleanupConstantGlobalUsers(
+ Value *V, Constant *Init, const DataLayout &DL,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
bool Changed = false;
// Note that we need to use a weak value handle for the worklist items. When
// we delete a constant array, we may also be holding pointer to one of its
@@ -302,12 +304,12 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Constant *SubInit = nullptr;
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
- Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI);
} else if ((CE->getOpcode() == Instruction::BitCast &&
CE->getType()->isPointerTy()) ||
CE->getOpcode() == Instruction::AddrSpaceCast) {
// Pointer cast, delete any stores and memsets to the global.
- Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
+ Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI);
}
if (CE->use_empty()) {
@@ -321,7 +323,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Constant *SubInit = nullptr;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
- ConstantFoldInstruction(GEP, DL, TLI));
+ ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction())));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -331,7 +333,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
SubInit = Constant::getNullValue(GEP->getResultElementType());
}
- Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI);
if (GEP->use_empty()) {
GEP->eraseFromParent();
@@ -348,7 +350,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// us, and if they are all dead, nuke them without remorse.
if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
- CleanupConstantGlobalUsers(V, Init, DL, TLI);
+ CleanupConstantGlobalUsers(V, Init, DL, GetTLI);
return true;
}
}
@@ -495,8 +497,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
uint64_t FieldOffset = Layout.getElementOffset(i);
- unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
- if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
+ Align NewAlign(MinAlign(StartAlignment, FieldOffset));
+ if (NewAlign > Align(DL.getABITypeAlignment(STy->getElementType(i))))
NGV->setAlignment(NewAlign);
// Copy over the debug info for the variable.
@@ -511,7 +513,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
NewGlobals.reserve(NumElements);
auto ElTy = STy->getElementType();
uint64_t EltSize = DL.getTypeAllocSize(ElTy);
- unsigned EltAlign = DL.getABITypeAlignment(ElTy);
+ Align EltAlign(DL.getABITypeAlignment(ElTy));
uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Constant *In = Init->getAggregateElement(i);
@@ -530,7 +532,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
- unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+ Align NewAlign(MinAlign(StartAlignment, EltSize * i));
if (NewAlign > EltAlign)
NGV->setAlignment(NewAlign);
transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits,
@@ -745,9 +747,9 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
/// are uses of the loaded value that would trap if the loaded value is
/// dynamically null, then we know that they cannot be reachable with a null
/// optimize away the load.
-static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
- const DataLayout &DL,
- TargetLibraryInfo *TLI) {
+static bool OptimizeAwayTrappingUsesOfLoads(
+ GlobalVariable *GV, Constant *LV, const DataLayout &DL,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
bool Changed = false;
// Keep track of whether we are able to remove all the uses of the global
@@ -793,10 +795,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
// nor is the global.
if (AllNonStoreUsesGone) {
if (isLeakCheckerRoot(GV)) {
- Changed |= CleanupPointerRootUsers(GV, TLI);
+ Changed |= CleanupPointerRootUsers(GV, GetTLI);
} else {
Changed = true;
- CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
+ CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI);
}
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
@@ -889,8 +891,8 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
while (!GV->use_empty()) {
if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
// The global is initialized when the store to it occurs.
- new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
- SI->getOrdering(), SI->getSyncScopeID(), SI);
+ new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false,
+ None, SI->getOrdering(), SI->getSyncScopeID(), SI);
SI->eraseFromParent();
continue;
}
@@ -907,7 +909,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// Replace the cmp X, 0 with a use of the bool value.
// Sink the load to where the compare was, if atomic rules allow us to.
Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
- InitBool->getName() + ".val", false, 0,
+ InitBool->getName() + ".val", false, None,
LI->getOrdering(), LI->getSyncScopeID(),
LI->isUnordered() ? (Instruction *)ICI : LI);
InitBoolUsed = true;
@@ -1562,10 +1564,10 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
// Try to optimize globals based on the knowledge that only one value (besides
// its initializer) is ever stored to the global.
-static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
- AtomicOrdering Ordering,
- const DataLayout &DL,
- TargetLibraryInfo *TLI) {
+static bool
+optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+ AtomicOrdering Ordering, const DataLayout &DL,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1583,9 +1585,10 @@ static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
// Optimize away any trapping uses of the loaded value.
- if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI))
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
return true;
- } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
+ } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) {
+ auto *TLI = &GetTLI(*CI->getFunction());
Type *MallocType = getMallocAllocatedType(CI, TLI);
if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
Ordering, DL, TLI))
@@ -1643,10 +1646,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
// instead of a select to synthesize the desired value.
bool IsOneZero = false;
bool EmitOneOrZero = true;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)){
+ auto *CI = dyn_cast<ConstantInt>(OtherVal);
+ if (CI && CI->getValue().getActiveBits() <= 64) {
IsOneZero = InitVal->isNullValue() && CI->isOne();
- if (ConstantInt *CIInit = dyn_cast<ConstantInt>(GV->getInitializer())){
+ auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
+ if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
uint64_t ValInit = CIInit->getZExtValue();
uint64_t ValOther = CI->getZExtValue();
uint64_t ValMinus = ValOther - ValInit;
@@ -1711,7 +1716,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
assert(LI->getOperand(0) == GV && "Not a copy!");
// Insert a new load, to preserve the saved value.
StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
- LI->getName() + ".b", false, 0,
+ LI->getName() + ".b", false, None,
LI->getOrdering(), LI->getSyncScopeID(), LI);
} else {
assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
@@ -1721,15 +1726,15 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
}
}
StoreInst *NSI =
- new StoreInst(StoreVal, NewGV, false, 0, SI->getOrdering(),
+ new StoreInst(StoreVal, NewGV, false, None, SI->getOrdering(),
SI->getSyncScopeID(), SI);
NSI->setDebugLoc(SI->getDebugLoc());
} else {
// Change the load into a load of bool then a select.
LoadInst *LI = cast<LoadInst>(UI);
- LoadInst *NLI =
- new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b",
- false, 0, LI->getOrdering(), LI->getSyncScopeID(), LI);
+ LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV,
+ LI->getName() + ".b", false, None,
+ LI->getOrdering(), LI->getSyncScopeID(), LI);
Instruction *NSI;
if (IsOneZero)
NSI = new ZExtInst(NLI, LI->getType(), "", LI);
@@ -1914,9 +1919,10 @@ static void makeAllConstantUsesInstructions(Constant *C) {
/// Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
-static bool processInternalGlobal(
- GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI,
- function_ref<DominatorTree &(Function &)> LookupDomTree) {
+static bool
+processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
+ function_ref<DominatorTree &(Function &)> LookupDomTree) {
auto &DL = GV->getParent()->getDataLayout();
// If this is a first class global and has only one accessing function and
// this function is non-recursive, we replace the global with a local alloca
@@ -1963,11 +1969,12 @@ static bool processInternalGlobal(
bool Changed;
if (isLeakCheckerRoot(GV)) {
// Delete any constant stores to the global.
- Changed = CleanupPointerRootUsers(GV, TLI);
+ Changed = CleanupPointerRootUsers(GV, GetTLI);
} else {
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
- Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+ Changed =
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
}
// If the global is dead now, delete it.
@@ -1989,7 +1996,7 @@ static bool processInternalGlobal(
GV->setConstant(true);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@@ -2019,7 +2026,7 @@ static bool processInternalGlobal(
GV->setInitializer(SOVConstant);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
@@ -2033,7 +2040,8 @@ static bool processInternalGlobal(
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI))
+ if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL,
+ GetTLI))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -2054,7 +2062,8 @@ static bool processInternalGlobal(
/// Analyze the specified global variable and optimize it if possible. If we
/// make a change, return true.
static bool
-processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI,
+processGlobal(GlobalValue &GV,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
if (GV.getName().startswith("llvm."))
return false;
@@ -2086,7 +2095,7 @@ processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI,
if (GVar->isConstant() || !GVar->hasInitializer())
return Changed;
- return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || Changed;
+ return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed;
}
/// Walk all of the direct calls of the specified function, changing them to
@@ -2234,7 +2243,8 @@ hasOnlyColdCalls(Function &F,
}
static bool
-OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
+OptimizeFunctions(Module &M,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
@@ -2275,17 +2285,13 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
// So, remove unreachable blocks from the function, because a) there's
// no point in analyzing them and b) GlobalOpt should otherwise grow
// some more complicated logic to break these cycles.
- // Removing unreachable blocks might invalidate the dominator so we
- // recalculate it.
if (!F->isDeclaration()) {
- if (removeUnreachableBlocks(*F)) {
- auto &DT = LookupDomTree(*F);
- DT.recalculate(*F);
- Changed = true;
- }
+ auto &DT = LookupDomTree(*F);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ Changed |= removeUnreachableBlocks(*F, &DTU);
}
- Changed |= processGlobal(*F, TLI, LookupDomTree);
+ Changed |= processGlobal(*F, GetTLI, LookupDomTree);
if (!F->hasLocalLinkage())
continue;
@@ -2342,7 +2348,8 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
}
static bool
-OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
+OptimizeGlobalVars(Module &M,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
bool Changed = false;
@@ -2357,7 +2364,10 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
if (GV->hasInitializer())
if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
auto &DL = M.getDataLayout();
- Constant *New = ConstantFoldConstant(C, DL, TLI);
+ // TLI is not used in the case of a Constant, so use default nullptr
+ // for that optional parameter, since we don't have a Function to
+ // provide GetTLI anyway.
+ Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
if (New && New != C)
GV->setInitializer(New);
}
@@ -2367,7 +2377,7 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
continue;
}
- Changed |= processGlobal(*GV, TLI, LookupDomTree);
+ Changed |= processGlobal(*GV, GetTLI, LookupDomTree);
}
return Changed;
}
@@ -2581,8 +2591,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
}
static int compareNames(Constant *const *A, Constant *const *B) {
- Value *AStripped = (*A)->stripPointerCastsNoFollowAliases();
- Value *BStripped = (*B)->stripPointerCastsNoFollowAliases();
+ Value *AStripped = (*A)->stripPointerCasts();
+ Value *BStripped = (*B)->stripPointerCasts();
return AStripped->getName().compare(BStripped->getName());
}
@@ -2809,7 +2819,14 @@ OptimizeGlobalAliases(Module &M,
return Changed;
}
-static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
+static Function *
+FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
+ // Hack to get a default TLI before we have actual Function.
+ auto FuncIter = M.begin();
+ if (FuncIter == M.end())
+ return nullptr;
+ auto *TLI = &GetTLI(*FuncIter);
+
LibFunc F = LibFunc_cxa_atexit;
if (!TLI->has(F))
return nullptr;
@@ -2818,6 +2835,9 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
if (!Fn)
return nullptr;
+ // Now get the actual TLI for Fn.
+ TLI = &GetTLI(*Fn);
+
// Make sure that the function has the correct prototype.
if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
return nullptr;
@@ -2889,7 +2909,8 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
}
static bool optimizeGlobalsInModule(
- Module &M, const DataLayout &DL, TargetLibraryInfo *TLI,
+ Module &M, const DataLayout &DL,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
@@ -2914,24 +2935,24 @@ static bool optimizeGlobalsInModule(
NotDiscardableComdats.insert(C);
// Delete functions that are trivially dead, ccc -> fastcc
- LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree,
+ LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
NotDiscardableComdats);
// Optimize global_ctors list.
LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
- return EvaluateStaticConstructor(F, DL, TLI);
+ return EvaluateStaticConstructor(F, DL, &GetTLI(*F));
});
// Optimize non-address-taken globals.
- LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree,
- NotDiscardableComdats);
+ LocalChange |=
+ OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats);
// Resolve aliases, when possible.
LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
// Try to remove trivial global destructors if they are not removed
// already.
- Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
+ Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI);
if (CXAAtExitFn)
LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
@@ -2946,12 +2967,14 @@ static bool optimizeGlobalsInModule(
PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &DL = M.getDataLayout();
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
auto &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
return FAM.getResult<DominatorTreeAnalysis>(F);
};
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
@@ -2960,7 +2983,7 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
- if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree))
+ if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
@@ -2979,10 +3002,12 @@ struct GlobalOptLegacyPass : public ModulePass {
return false;
auto &DL = M.getDataLayout();
- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto LookupDomTree = [this](Function &F) -> DominatorTree & {
return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
};
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
};
@@ -2991,7 +3016,8 @@ struct GlobalOptLegacyPass : public ModulePass {
return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree);
+ return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI,
+ LookupDomTree);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp
index ab1a9a79cad6..cfdcc8db7f50 100644
--- a/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -85,12 +85,6 @@ static cl::opt<int>
"multiple of TCC_Basic)"));
namespace {
-
-/// A sequence of basic blocks.
-///
-/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
-using BlockSequence = SmallVector<BasicBlock *, 0>;
-
// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
// this function unless you modify the MBB version as well.
//
@@ -169,31 +163,6 @@ static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) {
return Changed;
}
-class HotColdSplitting {
-public:
- HotColdSplitting(ProfileSummaryInfo *ProfSI,
- function_ref<BlockFrequencyInfo *(Function &)> GBFI,
- function_ref<TargetTransformInfo &(Function &)> GTTI,
- std::function<OptimizationRemarkEmitter &(Function &)> *GORE,
- function_ref<AssumptionCache *(Function &)> LAC)
- : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {}
- bool run(Module &M);
-
-private:
- bool isFunctionCold(const Function &F) const;
- bool shouldOutlineFrom(const Function &F) const;
- bool outlineColdRegions(Function &F, bool HasProfileSummary);
- Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
- BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
- OptimizationRemarkEmitter &ORE,
- AssumptionCache *AC, unsigned Count);
- ProfileSummaryInfo *PSI;
- function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
- function_ref<TargetTransformInfo &(Function &)> GetTTI;
- std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
- function_ref<AssumptionCache *(Function &)> LookupAC;
-};
-
class HotColdSplittingLegacyPass : public ModulePass {
public:
static char ID;
@@ -321,13 +290,10 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
return Penalty;
}
-Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
- DominatorTree &DT,
- BlockFrequencyInfo *BFI,
- TargetTransformInfo &TTI,
- OptimizationRemarkEmitter &ORE,
- AssumptionCache *AC,
- unsigned Count) {
+Function *HotColdSplitting::extractColdRegion(
+ const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC,
+ DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+ OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) {
assert(!Region.empty());
// TODO: Pass BFI and BPI to update profile information.
@@ -349,7 +315,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
return nullptr;
Function *OrigF = Region[0]->getParent();
- if (Function *OutF = CE.extractCodeRegion()) {
+ if (Function *OutF = CE.extractCodeRegion(CEAC)) {
User *U = *OutF->user_begin();
CallInst *CI = cast<CallInst>(U);
CallSite CS(CI);
@@ -607,9 +573,9 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
});
if (!DT)
- DT = make_unique<DominatorTree>(F);
+ DT = std::make_unique<DominatorTree>(F);
if (!PDT)
- PDT = make_unique<PostDominatorTree>(F);
+ PDT = std::make_unique<PostDominatorTree>(F);
auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
for (OutliningRegion &Region : Regions) {
@@ -637,9 +603,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
}
}
+ if (OutliningWorklist.empty())
+ return Changed;
+
// Outline single-entry cold regions, splitting up larger regions as needed.
unsigned OutlinedFunctionID = 1;
- while (!OutliningWorklist.empty()) {
+ // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
+ CodeExtractorAnalysisCache CEAC(F);
+ do {
OutliningRegion Region = OutliningWorklist.pop_back_val();
assert(!Region.empty() && "Empty outlining region in worklist");
do {
@@ -650,14 +621,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
BB->dump();
});
- Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC,
- OutlinedFunctionID);
+ Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI,
+ ORE, AC, OutlinedFunctionID);
if (Outlined) {
++OutlinedFunctionID;
Changed = true;
}
} while (!Region.empty());
- }
+ } while (!OutliningWorklist.empty());
return Changed;
}
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 34db75dd8b03..bddf75211599 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -114,6 +114,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createIPSCCPPass());
}
+void LLVMAddMergeFunctionsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createMergeFunctionsPass());
+}
+
void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
auto PreserveMain = [=](const GlobalValue &GV) {
return AllButMain && GV.getName() == "main";
@@ -121,6 +125,15 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
unwrap(PM)->add(createInternalizePass(PreserveMain));
}
+void LLVMAddInternalizePassWithMustPreservePredicate(
+ LLVMPassManagerRef PM,
+ void *Context,
+ LLVMBool (*Pred)(LLVMValueRef, void *)) {
+ unwrap(PM)->add(createInternalizePass([=](const GlobalValue &GV) {
+ return Pred(wrap(&GV), Context) == 0 ? false : true;
+ }));
+}
+
void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createStripDeadPrototypesPass());
}
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 7f5511e008e1..d1a68b28bd33 100644
--- a/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -18,24 +18,28 @@ using namespace llvm;
#define DEBUG_TYPE "inferattrs"
-static bool inferAllPrototypeAttributes(Module &M,
- const TargetLibraryInfo &TLI) {
+static bool inferAllPrototypeAttributes(
+ Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
bool Changed = false;
for (Function &F : M.functions())
// We only infer things using the prototype and the name; we don't need
// definitions.
if (F.isDeclaration() && !F.hasOptNone())
- Changed |= inferLibFuncAttributes(F, TLI);
+ Changed |= inferLibFuncAttributes(F, GetTLI(F));
return Changed;
}
PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
ModuleAnalysisManager &AM) {
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
- if (!inferAllPrototypeAttributes(M, TLI))
+ if (!inferAllPrototypeAttributes(M, GetTLI))
// If we didn't infer anything, preserve all analyses.
return PreservedAnalyses::all();
@@ -60,8 +64,10 @@ struct InferFunctionAttrsLegacyPass : public ModulePass {
if (skipModule(M))
return false;
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- return inferAllPrototypeAttributes(M, TLI);
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ return inferAllPrototypeAttributes(M, GetTLI);
}
};
}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 945f8affae6e..4b72261131c1 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -239,7 +239,7 @@ static void mergeInlinedArrayAllocas(
}
if (Align1 > Align2)
- AvailableAlloca->setAlignment(AI->getAlignment());
+ AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment()));
}
AI->eraseFromParent();
@@ -527,7 +527,8 @@ static void setInlineRemark(CallSite &CS, StringRef message) {
static bool
inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
- ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI,
+ ProfileSummaryInfo *PSI,
+ std::function<TargetLibraryInfo &(Function &)> GetTLI,
bool InsertLifetime,
function_ref<InlineCost(CallSite CS)> GetInlineCost,
function_ref<AAResults &(Function &)> AARGetter,
@@ -626,7 +627,8 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
Instruction *Instr = CS.getInstruction();
- bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);
+ bool IsTriviallyDead =
+ isInstructionTriviallyDead(Instr, &GetTLI(*Caller));
int InlineHistoryID;
if (!IsTriviallyDead) {
@@ -757,13 +759,16 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
ACT = &getAnalysis<AssumptionCacheTracker>();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {
+ return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
- [this](CallSite CS) { return getInlineCost(CS); },
- LegacyAARGetter(*this), ImportedFunctionsStats);
+ return inlineCallsImpl(
+ SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime,
+ [this](CallSite CS) { return getInlineCost(CS); }, LegacyAARGetter(*this),
+ ImportedFunctionsStats);
}
/// Remove now-dead linkonce functions at the end of
@@ -879,7 +884,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (!ImportedFunctionsStats &&
InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
ImportedFunctionsStats =
- llvm::make_unique<ImportedFunctionsInliningStatistics>();
+ std::make_unique<ImportedFunctionsInliningStatistics>();
ImportedFunctionsStats->setModuleInfo(M);
}
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 91c7b5f5f135..add2ae053735 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -141,10 +141,12 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
if (NumLoops == 0) return Changed;
--NumLoops;
AssumptionCache *AC = nullptr;
+ Function &Func = *L->getHeader()->getParent();
if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
- AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent());
+ AC = ACT->lookupAssumptionCache(Func);
+ CodeExtractorAnalysisCache CEAC(Func);
CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
- if (Extractor.extractCodeRegion() != nullptr) {
+ if (Extractor.extractCodeRegion(CEAC) != nullptr) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index f7371284f47e..2dec366d70e2 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -230,6 +230,16 @@ void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
Bytes[AllocByteOffset + B] |= AllocMask;
}
+bool lowertypetests::isJumpTableCanonical(Function *F) {
+ if (F->isDeclarationForLinker())
+ return false;
+ auto *CI = mdconst::extract_or_null<ConstantInt>(
+ F->getParent()->getModuleFlag("CFI Canonical Jump Tables"));
+ if (!CI || CI->getZExtValue() != 0)
+ return true;
+ return F->hasFnAttribute("cfi-canonical-jump-table");
+}
+
namespace {
struct ByteArrayInfo {
@@ -251,9 +261,12 @@ class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
GlobalObject *GO;
size_t NTypes;
- // For functions: true if this is a definition (either in the merged module or
- // in one of the thinlto modules).
- bool IsDefinition;
+ // For functions: true if the jump table is canonical. This essentially means
+ // whether the canonical address (i.e. the symbol table entry) of the function
+ // is provided by the local jump table. This is normally the same as whether
+ // the function is defined locally, but if canonical jump tables are disabled
+ // by the user then the jump table never provides a canonical definition.
+ bool IsJumpTableCanonical;
// For functions: true if this function is either defined or used in a thinlto
// module and its jumptable entry needs to be exported to thinlto backends.
@@ -263,13 +276,13 @@ class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
public:
static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO,
- bool IsDefinition, bool IsExported,
+ bool IsJumpTableCanonical, bool IsExported,
ArrayRef<MDNode *> Types) {
auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate(
totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember)));
GTM->GO = GO;
GTM->NTypes = Types.size();
- GTM->IsDefinition = IsDefinition;
+ GTM->IsJumpTableCanonical = IsJumpTableCanonical;
GTM->IsExported = IsExported;
std::uninitialized_copy(Types.begin(), Types.end(),
GTM->getTrailingObjects<MDNode *>());
@@ -280,8 +293,8 @@ public:
return GO;
}
- bool isDefinition() const {
- return IsDefinition;
+ bool isJumpTableCanonical() const {
+ return IsJumpTableCanonical;
}
bool isExported() const {
@@ -320,6 +333,49 @@ private:
size_t NTargets;
};
+struct ScopedSaveAliaseesAndUsed {
+ Module &M;
+ SmallPtrSet<GlobalValue *, 16> Used, CompilerUsed;
+ std::vector<std::pair<GlobalIndirectSymbol *, Function *>> FunctionAliases;
+
+ ScopedSaveAliaseesAndUsed(Module &M) : M(M) {
+ // The users of this class want to replace all function references except
+ // for aliases and llvm.used/llvm.compiler.used with references to a jump
+ // table. We avoid replacing aliases in order to avoid introducing a double
+ // indirection (or an alias pointing to a declaration in ThinLTO mode), and
+ // we avoid replacing llvm.used/llvm.compiler.used because these global
+ // variables describe properties of the global, not the jump table (besides,
+ // offseted references to the jump table in llvm.used are invalid).
+ // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly
+ // indirect) users", so what we do is save the list of globals referenced by
+ // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW
+ // replace the aliasees and then set them back to their original values at
+ // the end.
+ if (GlobalVariable *GV = collectUsedGlobalVariables(M, Used, false))
+ GV->eraseFromParent();
+ if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true))
+ GV->eraseFromParent();
+
+ for (auto &GIS : concat<GlobalIndirectSymbol>(M.aliases(), M.ifuncs())) {
+ // FIXME: This should look past all aliases not just interposable ones,
+ // see discussion on D65118.
+ if (auto *F =
+ dyn_cast<Function>(GIS.getIndirectSymbol()->stripPointerCasts()))
+ FunctionAliases.push_back({&GIS, F});
+ }
+ }
+
+ ~ScopedSaveAliaseesAndUsed() {
+ appendToUsed(M, std::vector<GlobalValue *>(Used.begin(), Used.end()));
+ appendToCompilerUsed(M, std::vector<GlobalValue *>(CompilerUsed.begin(),
+ CompilerUsed.end()));
+
+ for (auto P : FunctionAliases)
+ P.first->setIndirectSymbol(
+ ConstantExpr::getBitCast(P.second, P.first->getType()));
+ }
+};
+
class LowerTypeTestsModule {
Module &M;
@@ -387,7 +443,8 @@ class LowerTypeTestsModule {
uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
TypeIdLowering importTypeId(StringRef TypeId);
void importTypeTest(CallInst *CI);
- void importFunction(Function *F, bool isDefinition);
+ void importFunction(Function *F, bool isJumpTableCanonical,
+ std::vector<GlobalAlias *> &AliasesToErase);
BitSetInfo
buildBitSet(Metadata *TypeId,
@@ -421,7 +478,8 @@ class LowerTypeTestsModule {
ArrayRef<GlobalTypeMember *> Globals,
ArrayRef<ICallBranchFunnel *> ICallBranchFunnels);
- void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, bool IsDefinition);
+ void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT,
+ bool IsJumpTableCanonical);
void moveInitializerToModuleConstructor(GlobalVariable *GV);
void findGlobalVariableUsersOf(Constant *C,
SmallSetVector<GlobalVariable *, 8> &Out);
@@ -433,7 +491,7 @@ class LowerTypeTestsModule {
/// the block. 'This's use list is expected to have at least one element.
/// Unlike replaceAllUsesWith this function skips blockaddr and direct call
/// uses.
- void replaceCfiUses(Function *Old, Value *New, bool IsDefinition);
+ void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical);
/// replaceDirectCalls - Go through the uses list for this definition and
/// replace each use, which is a direct function call.
@@ -759,43 +817,50 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
// Build a new global with the combined contents of the referenced globals.
// This global is a struct whose even-indexed elements contain the original
// contents of the referenced globals and whose odd-indexed elements contain
- // any padding required to align the next element to the next power of 2.
+ // any padding required to align the next element to the next power of 2 plus
+ // any additional padding required to meet its alignment requirements.
std::vector<Constant *> GlobalInits;
const DataLayout &DL = M.getDataLayout();
+ DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
+ Align MaxAlign;
+ uint64_t CurOffset = 0;
+ uint64_t DesiredPadding = 0;
for (GlobalTypeMember *G : Globals) {
- GlobalVariable *GV = cast<GlobalVariable>(G->getGlobal());
+ auto *GV = cast<GlobalVariable>(G->getGlobal());
+ MaybeAlign Alignment(GV->getAlignment());
+ if (!Alignment)
+ Alignment = Align(DL.getABITypeAlignment(GV->getValueType()));
+ MaxAlign = std::max(MaxAlign, *Alignment);
+ uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, *Alignment);
+ GlobalLayout[G] = GVOffset;
+ if (GVOffset != 0) {
+ uint64_t Padding = GVOffset - CurOffset;
+ GlobalInits.push_back(
+ ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+ }
+
GlobalInits.push_back(GV->getInitializer());
uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
+ CurOffset = GVOffset + InitSize;
- // Compute the amount of padding required.
- uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
+ // Compute the amount of padding that we'd like for the next element.
+ DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize;
// Experiments of different caps with Chromium on both x64 and ARM64
// have shown that the 32-byte cap generates the smallest binary on
// both platforms while different caps yield similar performance.
// (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html)
- if (Padding > 32)
- Padding = alignTo(InitSize, 32) - InitSize;
-
- GlobalInits.push_back(
- ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+ if (DesiredPadding > 32)
+ DesiredPadding = alignTo(InitSize, 32) - InitSize;
}
- if (!GlobalInits.empty())
- GlobalInits.pop_back();
+
Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
auto *CombinedGlobal =
new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
GlobalValue::PrivateLinkage, NewInit);
+ CombinedGlobal->setAlignment(MaxAlign);
StructType *NewTy = cast<StructType>(NewInit->getType());
- const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy);
-
- // Compute the offsets of the original globals within the new global.
- DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
- for (unsigned I = 0; I != Globals.size(); ++I)
- // Multiply by 2 to account for padding elements.
- GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
-
lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout);
// Build aliases pointing to offsets into the combined global for each
@@ -975,14 +1040,16 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
}
// ThinLTO backend: the function F has a jump table entry; update this module
-// accordingly. isDefinition describes the type of the jump table entry.
-void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
+// accordingly. isJumpTableCanonical describes the type of the jump table entry.
+void LowerTypeTestsModule::importFunction(
+ Function *F, bool isJumpTableCanonical,
+ std::vector<GlobalAlias *> &AliasesToErase) {
assert(F->getType()->getAddressSpace() == 0);
GlobalValue::VisibilityTypes Visibility = F->getVisibility();
std::string Name = F->getName();
- if (F->isDeclarationForLinker() && isDefinition) {
+ if (F->isDeclarationForLinker() && isJumpTableCanonical) {
// Non-dso_local functions may be overriden at run time,
// don't short curcuit them
if (F->isDSOLocal()) {
@@ -997,12 +1064,13 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
}
Function *FDecl;
- if (F->isDeclarationForLinker() && !isDefinition) {
- // Declaration of an external function.
+ if (!isJumpTableCanonical) {
+ // Either a declaration of an external function or a reference to a locally
+ // defined jump table.
FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
F->getAddressSpace(), Name + ".cfi_jt", &M);
FDecl->setVisibility(GlobalValue::HiddenVisibility);
- } else if (isDefinition) {
+ } else {
F->setName(Name + ".cfi");
F->setLinkage(GlobalValue::ExternalLinkage);
FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
@@ -1011,8 +1079,8 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
Visibility = GlobalValue::HiddenVisibility;
// Delete aliases pointing to this function, they'll be re-created in the
- // merged output
- SmallVector<GlobalAlias*, 4> ToErase;
+ // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed
+ // will want to reset the aliasees first.
for (auto &U : F->uses()) {
if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) {
Function *AliasDecl = Function::Create(
@@ -1020,24 +1088,15 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
F->getAddressSpace(), "", &M);
AliasDecl->takeName(A);
A->replaceAllUsesWith(AliasDecl);
- ToErase.push_back(A);
+ AliasesToErase.push_back(A);
}
}
- for (auto *A : ToErase)
- A->eraseFromParent();
- } else {
- // Function definition without type metadata, where some other translation
- // unit contained a declaration with type metadata. This normally happens
- // during mixed CFI + non-CFI compilation. We do nothing with the function
- // so that it is treated the same way as a function defined outside of the
- // LTO unit.
- return;
}
- if (F->isWeakForLinker())
- replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isDefinition);
+ if (F->hasExternalWeakLinkage())
+ replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical);
else
- replaceCfiUses(F, FDecl, isDefinition);
+ replaceCfiUses(F, FDecl, isJumpTableCanonical);
// Set visibility late because it's used in replaceCfiUses() to determine
// whether uses need to to be replaced.
@@ -1225,7 +1284,7 @@ void LowerTypeTestsModule::findGlobalVariableUsersOf(
// Replace all uses of F with (F ? JT : 0).
void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
- Function *F, Constant *JT, bool IsDefinition) {
+ Function *F, Constant *JT, bool IsJumpTableCanonical) {
// The target expression can not appear in a constant initializer on most
// (all?) targets. Switch to a runtime initializer.
SmallSetVector<GlobalVariable *, 8> GlobalVarUsers;
@@ -1239,7 +1298,7 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
Function::Create(cast<FunctionType>(F->getValueType()),
GlobalValue::ExternalWeakLinkage,
F->getAddressSpace(), "", &M);
- replaceCfiUses(F, PlaceholderFn, IsDefinition);
+ replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical);
Constant *Target = ConstantExpr::getSelect(
ConstantExpr::getICmp(CmpInst::ICMP_NE, F,
@@ -1276,8 +1335,9 @@ selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions,
unsigned ArmCount = 0, ThumbCount = 0;
for (const auto GTM : Functions) {
- if (!GTM->isDefinition()) {
+ if (!GTM->isJumpTableCanonical()) {
// PLT stubs are always ARM.
+ // FIXME: This is the wrong heuristic for non-canonical jump tables.
++ArmCount;
continue;
}
@@ -1303,7 +1363,7 @@ void LowerTypeTestsModule::createJumpTable(
cast<Function>(Functions[I]->getGlobal()));
// Align the whole table by entry size.
- F->setAlignment(getJumpTableEntrySize());
+ F->setAlignment(Align(getJumpTableEntrySize()));
// Skip prologue.
// Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3.
// Luckily, this function does not get any prologue even without the
@@ -1438,47 +1498,53 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout);
- // Build aliases pointing to offsets into the jump table, and replace
- // references to the original functions with references to the aliases.
- for (unsigned I = 0; I != Functions.size(); ++I) {
- Function *F = cast<Function>(Functions[I]->getGlobal());
- bool IsDefinition = Functions[I]->isDefinition();
-
- Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
- ConstantExpr::getInBoundsGetElementPtr(
- JumpTableType, JumpTable,
- ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
- ConstantInt::get(IntPtrTy, I)}),
- F->getType());
- if (Functions[I]->isExported()) {
- if (IsDefinition) {
- ExportSummary->cfiFunctionDefs().insert(F->getName());
+ {
+ ScopedSaveAliaseesAndUsed S(M);
+
+ // Build aliases pointing to offsets into the jump table, and replace
+ // references to the original functions with references to the aliases.
+ for (unsigned I = 0; I != Functions.size(); ++I) {
+ Function *F = cast<Function>(Functions[I]->getGlobal());
+ bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical();
+
+ Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
+ ConstantExpr::getInBoundsGetElementPtr(
+ JumpTableType, JumpTable,
+ ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, I)}),
+ F->getType());
+ if (Functions[I]->isExported()) {
+ if (IsJumpTableCanonical) {
+ ExportSummary->cfiFunctionDefs().insert(F->getName());
+ } else {
+ GlobalAlias *JtAlias = GlobalAlias::create(
+ F->getValueType(), 0, GlobalValue::ExternalLinkage,
+ F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
+ JtAlias->setVisibility(GlobalValue::HiddenVisibility);
+ ExportSummary->cfiFunctionDecls().insert(F->getName());
+ }
+ }
+ if (!IsJumpTableCanonical) {
+ if (F->hasExternalWeakLinkage())
+ replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr,
+ IsJumpTableCanonical);
+ else
+ replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical);
} else {
- GlobalAlias *JtAlias = GlobalAlias::create(
- F->getValueType(), 0, GlobalValue::ExternalLinkage,
- F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
- JtAlias->setVisibility(GlobalValue::HiddenVisibility);
- ExportSummary->cfiFunctionDecls().insert(F->getName());
+ assert(F->getType()->getAddressSpace() == 0);
+
+ GlobalAlias *FAlias =
+ GlobalAlias::create(F->getValueType(), 0, F->getLinkage(), "",
+ CombinedGlobalElemPtr, &M);
+ FAlias->setVisibility(F->getVisibility());
+ FAlias->takeName(F);
+ if (FAlias->hasName())
+ F->setName(FAlias->getName() + ".cfi");
+ replaceCfiUses(F, FAlias, IsJumpTableCanonical);
+ if (!F->hasLocalLinkage())
+ F->setVisibility(GlobalVariable::HiddenVisibility);
}
}
- if (!IsDefinition) {
- if (F->isWeakForLinker())
- replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, IsDefinition);
- else
- replaceCfiUses(F, CombinedGlobalElemPtr, IsDefinition);
- } else {
- assert(F->getType()->getAddressSpace() == 0);
-
- GlobalAlias *FAlias = GlobalAlias::create(
- F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M);
- FAlias->setVisibility(F->getVisibility());
- FAlias->takeName(F);
- if (FAlias->hasName())
- F->setName(FAlias->getName() + ".cfi");
- replaceCfiUses(F, FAlias, IsDefinition);
- if (!F->hasLocalLinkage())
- F->setVisibility(GlobalVariable::HiddenVisibility);
- }
}
createJumpTable(JumpTableFn, Functions);
@@ -1623,7 +1689,7 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
": ");
std::error_code EC;
- raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text);
ExitOnErr(errorCodeToError(EC));
yaml::Output Out(OS);
@@ -1643,7 +1709,8 @@ static bool isDirectCall(Use& U) {
return false;
}
-void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefinition) {
+void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
+ bool IsJumpTableCanonical) {
SmallSetVector<Constant *, 4> Constants;
auto UI = Old->use_begin(), E = Old->use_end();
for (; UI != E;) {
@@ -1655,7 +1722,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefi
continue;
// Skip direct calls to externally defined or non-dso_local functions
- if (isDirectCall(U) && (Old->isDSOLocal() || !IsDefinition))
+ if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical))
continue;
// Must handle Constants specially, we cannot call replaceUsesOfWith on a
@@ -1678,16 +1745,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefi
}
void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
- auto UI = Old->use_begin(), E = Old->use_end();
- for (; UI != E;) {
- Use &U = *UI;
- ++UI;
-
- if (!isDirectCall(U))
- continue;
-
- U.set(New);
- }
+ Old->replaceUsesWithIf(New, [](Use &U) { return isDirectCall(U); });
}
bool LowerTypeTestsModule::lower() {
@@ -1734,10 +1792,16 @@ bool LowerTypeTestsModule::lower() {
Decls.push_back(&F);
}
- for (auto F : Defs)
- importFunction(F, /*isDefinition*/ true);
- for (auto F : Decls)
- importFunction(F, /*isDefinition*/ false);
+ std::vector<GlobalAlias *> AliasesToErase;
+ {
+ ScopedSaveAliaseesAndUsed S(M);
+ for (auto F : Defs)
+ importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase);
+ for (auto F : Decls)
+ importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase);
+ }
+ for (GlobalAlias *GA : AliasesToErase)
+ GA->eraseFromParent();
return true;
}
@@ -1823,6 +1887,17 @@ bool LowerTypeTestsModule::lower() {
CfiFunctionLinkage Linkage = P.second.Linkage;
MDNode *FuncMD = P.second.FuncMD;
Function *F = M.getFunction(FunctionName);
+ if (F && F->hasLocalLinkage()) {
+ // Locally defined function that happens to have the same name as a
+ // function defined in a ThinLTO module. Rename it to move it out of
+ // the way of the external reference that we're about to create.
+ // Note that setName will find a unique name for the function, so even
+ // if there is an existing function with the suffix there won't be a
+ // name collision.
+ F->setName(F->getName() + ".1");
+ F = nullptr;
+ }
+
if (!F)
F = Function::Create(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
@@ -1871,24 +1946,26 @@ bool LowerTypeTestsModule::lower() {
Types.clear();
GO.getMetadata(LLVMContext::MD_type, Types);
- bool IsDefinition = !GO.isDeclarationForLinker();
+ bool IsJumpTableCanonical = false;
bool IsExported = false;
if (Function *F = dyn_cast<Function>(&GO)) {
+ IsJumpTableCanonical = isJumpTableCanonical(F);
if (ExportedFunctions.count(F->getName())) {
- IsDefinition |= ExportedFunctions[F->getName()].Linkage == CFL_Definition;
+ IsJumpTableCanonical |=
+ ExportedFunctions[F->getName()].Linkage == CFL_Definition;
IsExported = true;
// TODO: The logic here checks only that the function is address taken,
// not that the address takers are live. This can be updated to check
// their liveness and emit fewer jumptable entries once monolithic LTO
// builds also emit summaries.
} else if (!F->hasAddressTaken()) {
- if (!CrossDsoCfi || !IsDefinition || F->hasLocalLinkage())
+ if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage())
continue;
}
}
- auto *GTM =
- GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types);
+ auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical,
+ IsExported, Types);
GlobalTypeMembers[&GO] = GTM;
for (MDNode *Type : Types) {
verifyTypeMDNode(&GO, Type);
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 3a08069dcd4a..8b9abaddc84c 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -769,7 +769,7 @@ void MergeFunctions::writeAlias(Function *F, Function *G) {
PtrType->getElementType(), PtrType->getAddressSpace(),
G->getLinkage(), "", BitcastF, G->getParent());
- F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ F->setAlignment(MaybeAlign(std::max(F->getAlignment(), G->getAlignment())));
GA->takeName(G);
GA->setVisibility(G->getVisibility());
GA->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
@@ -816,7 +816,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
removeUsers(F);
F->replaceAllUsesWith(NewF);
- unsigned MaxAlignment = std::max(G->getAlignment(), NewF->getAlignment());
+ MaybeAlign MaxAlignment(std::max(G->getAlignment(), NewF->getAlignment()));
writeThunkOrAlias(F, G);
writeThunkOrAlias(F, NewF);
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 733782e8764d..e193074884af 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -409,7 +409,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
- llvm::make_unique<FunctionOutliningMultiRegionInfo>();
+ std::make_unique<FunctionOutliningMultiRegionInfo>();
auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
BasicBlock *Dom = BlockList.front();
@@ -589,7 +589,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
};
std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
- llvm::make_unique<FunctionOutliningInfo>();
+ std::make_unique<FunctionOutliningInfo>();
BasicBlock *CurrEntry = EntryBlock;
bool CandidateFound = false;
@@ -966,7 +966,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
function_ref<AssumptionCache *(Function &)> LookupAC)
: OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
- ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
+ ClonedOI = std::make_unique<FunctionOutliningInfo>();
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
@@ -991,7 +991,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
OptimizationRemarkEmitter &ORE,
function_ref<AssumptionCache *(Function &)> LookupAC)
: OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
- ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
+ ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
@@ -1122,6 +1122,9 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
BranchProbabilityInfo BPI(*ClonedFunc, LI);
ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
+ // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
+ CodeExtractorAnalysisCache CEAC(*ClonedFunc);
+
SetVector<Value *> Inputs, Outputs, Sinks;
for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
ClonedOMRI->ORI) {
@@ -1148,7 +1151,7 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
if (Outputs.size() > 0 && !ForceLiveExit)
continue;
- Function *OutlinedFunc = CE.extractCodeRegion();
+ Function *OutlinedFunc = CE.extractCodeRegion(CEAC);
if (OutlinedFunc) {
CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
@@ -1210,11 +1213,12 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
}
// Extract the body of the if.
+ CodeExtractorAnalysisCache CEAC(*ClonedFunc);
Function *OutlinedFunc =
CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
/* AllowVarargs */ true)
- .extractCodeRegion();
+ .extractCodeRegion(CEAC);
if (OutlinedFunc) {
BasicBlock *OutliningCallBB =
@@ -1264,7 +1268,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
if (PSI->isFunctionEntryCold(F))
return {false, nullptr};
- if (empty(F->users()))
+ if (F->users().empty())
return {false, nullptr};
OptimizationRemarkEmitter ORE(F);
@@ -1370,7 +1374,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
return false;
}
- assert(empty(Cloner.OrigFunc->users()) &&
+ assert(Cloner.OrigFunc->users().empty() &&
"F's users should all be replaced!");
std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 3ea77f08fd3c..5314a8219b1e 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -654,6 +654,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createGlobalsAAWrapperPass());
MPM.add(createFloat2IntPass());
+ MPM.add(createLowerConstantIntrinsicsPass());
addExtensionsToPM(EP_VectorizerStart, MPM);
diff --git a/lib/Transforms/IPO/SCCP.cpp b/lib/Transforms/IPO/SCCP.cpp
index 7be3608bd2ec..307690729b14 100644
--- a/lib/Transforms/IPO/SCCP.cpp
+++ b/lib/Transforms/IPO/SCCP.cpp
@@ -9,16 +9,18 @@ using namespace llvm;
PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) {
const DataLayout &DL = M.getDataLayout();
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn {
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
return {
- make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)),
+ std::make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)),
&DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F)};
};
- if (!runIPSCCP(M, DL, &TLI, getAnalysis))
+ if (!runIPSCCP(M, DL, GetTLI, getAnalysis))
return PreservedAnalyses::all();
PreservedAnalyses PA;
@@ -47,14 +49,14 @@ public:
if (skipModule(M))
return false;
const DataLayout &DL = M.getDataLayout();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
+ auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn {
DominatorTree &DT =
this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
return {
- make_unique<PredicateInfo>(
+ std::make_unique<PredicateInfo>(
F, DT,
this->getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F)),
@@ -62,7 +64,7 @@ public:
nullptr}; // manager, so set them to nullptr.
};
- return runIPSCCP(M, DL, TLI, getAnalysis);
+ return runIPSCCP(M, DL, GetTLI, getAnalysis);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 877d20e72ffc..6184681db8a2 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -72,6 +72,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -79,6 +80,7 @@
#include <limits>
#include <map>
#include <memory>
+#include <queue>
#include <string>
#include <system_error>
#include <utility>
@@ -128,6 +130,12 @@ static cl::opt<bool> ProfileSampleAccurate(
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));
+static cl::opt<bool> ProfileAccurateForSymsInList(
+ "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
+ cl::init(true),
+ cl::desc("For symbols in profile symbol list, regard their profiles to "
+ "be accurate. It may be overriden by profile-sample-accurate. "));
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -137,9 +145,11 @@ using EdgeWeightMap = DenseMap<Edge, uint64_t>;
using BlockEdgeMap =
DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>;
+class SampleProfileLoader;
+
class SampleCoverageTracker {
public:
- SampleCoverageTracker() = default;
+ SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){};
bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
uint32_t Discriminator, uint64_t Samples);
@@ -185,6 +195,76 @@ private:
/// keyed by FunctionSamples pointers, but these stats are cleared after
/// every function, so we just need to keep a single counter.
uint64_t TotalUsedSamples = 0;
+
+ SampleProfileLoader &SPLoader;
+};
+
+class GUIDToFuncNameMapper {
+public:
+ GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
+ DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
+ : CurrentReader(Reader), CurrentModule(M),
+ CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
+ if (CurrentReader.getFormat() != SPF_Compact_Binary)
+ return;
+
+ for (const auto &F : CurrentModule) {
+ StringRef OrigName = F.getName();
+ CurrentGUIDToFuncNameMap.insert(
+ {Function::getGUID(OrigName), OrigName});
+
+ // Local to global var promotion used by optimization like thinlto
+ // will rename the var and add suffix like ".llvm.xxx" to the
+ // original local name. In sample profile, the suffixes of function
+ // names are all stripped. Since it is possible that the mapper is
+ // built in post-thin-link phase and var promotion has been done,
+ // we need to add the substring of function name without the suffix
+ // into the GUIDToFuncNameMap.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ if (CanonName != OrigName)
+ CurrentGUIDToFuncNameMap.insert(
+ {Function::getGUID(CanonName), CanonName});
+ }
+
+ // Update GUIDToFuncNameMap for each function including inlinees.
+ SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
+ }
+
+ ~GUIDToFuncNameMapper() {
+ if (CurrentReader.getFormat() != SPF_Compact_Binary)
+ return;
+
+ CurrentGUIDToFuncNameMap.clear();
+
+ // Reset GUIDToFuncNameMap for of each function as they're no
+ // longer valid at this point.
+ SetGUIDToFuncNameMapForAll(nullptr);
+ }
+
+private:
+ void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
+ std::queue<FunctionSamples *> FSToUpdate;
+ for (auto &IFS : CurrentReader.getProfiles()) {
+ FSToUpdate.push(&IFS.second);
+ }
+
+ while (!FSToUpdate.empty()) {
+ FunctionSamples *FS = FSToUpdate.front();
+ FSToUpdate.pop();
+ FS->GUIDToFuncNameMap = Map;
+ for (const auto &ICS : FS->getCallsiteSamples()) {
+ const FunctionSamplesMap &FSMap = ICS.second;
+ for (auto &IFS : FSMap) {
+ FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
+ FSToUpdate.push(&FS);
+ }
+ }
+ }
+ }
+
+ SampleProfileReader &CurrentReader;
+ Module &CurrentModule;
+ DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
};
/// Sample profile pass.
@@ -199,8 +279,9 @@ public:
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
: GetAC(std::move(GetAssumptionCache)),
- GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
- RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
+ GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this),
+ Filename(Name), RemappingFilename(RemapName),
+ IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -209,6 +290,8 @@ public:
void dump() { Reader->dump(); }
protected:
+ friend class SampleCoverageTracker;
+
bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
@@ -237,6 +320,8 @@ protected:
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
void clearFunctionData();
+ bool callsiteIsHot(const FunctionSamples *CallsiteFS,
+ ProfileSummaryInfo *PSI);
/// Map basic blocks to their computed weights.
///
@@ -310,6 +395,10 @@ protected:
/// Profile Summary Info computed from sample profile.
ProfileSummaryInfo *PSI = nullptr;
+ /// Profle Symbol list tells whether a function name appears in the binary
+ /// used to generate the current profile.
+ std::unique_ptr<ProfileSymbolList> PSL;
+
/// Total number of samples collected in this profile.
///
/// This is the sum of all the samples collected in all the functions executed
@@ -326,6 +415,21 @@ protected:
uint64_t entryCount;
};
DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
+
+ // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
+ // all the function symbols defined or declared in current module.
+ DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
+
+ // All the Names used in FunctionSamples including outline function
+ // names, inline instance names and call target names.
+ StringSet<> NamesInProfile;
+
+ // For symbol in profile symbol list, whether to regard their profiles
+ // to be accurate. It is mainly decided by existance of profile symbol
+ // list and -profile-accurate-for-symsinlist flag, but it can be
+ // overriden by -profile-sample-accurate or profile-sample-accurate
+ // attribute.
+ bool ProfAccForSymsInList;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -381,14 +485,23 @@ private:
/// To decide whether an inlined callsite is hot, we compare the callsite
/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
/// regarded as hot if the count is above the cutoff value.
-static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
- ProfileSummaryInfo *PSI) {
+///
+/// When ProfileAccurateForSymsInList is enabled and profile symbol list
+/// is present, functions in the profile symbol list but without profile will
+/// be regarded as cold and much less inlining will happen in CGSCC inlining
+/// pass, so we tend to lower the hot criteria here to allow more early
+/// inlining to happen for warm callsites and it is helpful for performance.
+bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS,
+ ProfileSummaryInfo *PSI) {
if (!CallsiteFS)
return false; // The callsite was not inlined in the original binary.
assert(PSI && "PSI is expected to be non null");
uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
- return PSI->isHotCount(CallsiteTotalSamples);
+ if (ProfAccForSymsInList)
+ return !PSI->isColdCount(CallsiteTotalSamples);
+ else
+ return PSI->isHotCount(CallsiteTotalSamples);
}
/// Mark as used the sample record for the given function samples at
@@ -425,7 +538,7 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Count += countUsedRecords(CalleeSamples, PSI);
}
@@ -444,7 +557,7 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Count += countBodyRecords(CalleeSamples, PSI);
}
@@ -465,7 +578,7 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Total += countBodySamples(CalleeSamples, PSI);
}
@@ -788,6 +901,14 @@ bool SampleProfileLoader::inlineHotFunctions(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
+ // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+ // Profile symbol list is ignored when profile-sample-accurate is on.
+ assert((!ProfAccForSymsInList ||
+ (!ProfileSampleAccurate &&
+ !F.hasFnAttribute("profile-sample-accurate"))) &&
+ "ProfAccForSymsInList should be false when profile-sample-accurate "
+ "is enabled");
+
DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
bool Changed = false;
while (true) {
@@ -1219,17 +1340,12 @@ void SampleProfileLoader::buildEdges(Function &F) {
}
/// Returns the sorted CallTargetMap \p M by count in descending order.
-static SmallVector<InstrProfValueData, 2> SortCallTargets(
- const SampleRecord::CallTargetMap &M) {
+static SmallVector<InstrProfValueData, 2> GetSortedValueDataFromCallTargets(
+ const SampleRecord::CallTargetMap & M) {
SmallVector<InstrProfValueData, 2> R;
- for (auto I = M.begin(); I != M.end(); ++I)
- R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
- llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
- if (L.Count == R.Count)
- return L.Value > R.Value;
- else
- return L.Count > R.Count;
- });
+ for (const auto &I : SampleRecord::SortCallTargets(M)) {
+ R.emplace_back(InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
+ }
return R;
}
@@ -1324,7 +1440,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (!T || T.get().empty())
continue;
SmallVector<InstrProfValueData, 2> SortedCallTargets =
- SortCallTargets(T.get());
+ GetSortedValueDataFromCallTargets(T.get());
uint64_t Sum;
findIndirectCallFunctionSamples(I, Sum);
annotateValueSite(*I.getParent()->getParent()->getParent(), I,
@@ -1374,6 +1490,8 @@ void SampleProfileLoader::propagateWeights(Function &F) {
}
}
+ misexpect::verifyMisExpect(TI, Weights, TI->getContext());
+
uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
@@ -1557,30 +1675,29 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
bool SampleProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
- auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
+
+ std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
+ auto ReaderOrErr =
+ SampleProfileReader::create(Filename, Ctx, RemappingFilename);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
return false;
}
Reader = std::move(ReaderOrErr.get());
- Reader->collectFuncsToUse(M);
+ Reader->collectFuncsFrom(M);
ProfileIsValid = (Reader->read() == sampleprof_error::success);
-
- if (!RemappingFilename.empty()) {
- // Apply profile remappings to the loaded profile data if requested.
- // For now, we only support remapping symbols encoded using the Itanium
- // C++ ABI's name mangling scheme.
- ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
- RemappingFilename, Ctx, std::move(Reader));
- if (std::error_code EC = ReaderOrErr.getError()) {
- std::string Msg = "Could not open profile remapping file: " + EC.message();
- Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
- return false;
- }
- Reader = std::move(ReaderOrErr.get());
- ProfileIsValid = (Reader->read() == sampleprof_error::success);
+ PSL = Reader->getProfileSymbolList();
+
+ // While profile-sample-accurate is on, ignore symbol list.
+ ProfAccForSymsInList =
+ ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate;
+ if (ProfAccForSymsInList) {
+ NamesInProfile.clear();
+ if (auto NameTable = Reader->getNameTable())
+ NamesInProfile.insert(NameTable->begin(), NameTable->end());
}
+
return true;
}
@@ -1594,7 +1711,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI) {
- FunctionSamples::GUIDToFuncNameMapper Mapper(M);
+ GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
if (!ProfileIsValid)
return false;
@@ -1651,19 +1768,48 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
-
+
DILocation2SampleMap.clear();
// By default the entry count is initialized to -1, which will be treated
// conservatively by getEntryCount as the same as unknown (None). This is
// to avoid newly added code to be treated as cold. If we have samples
// this will be overwritten in emitAnnotations.
- // If ProfileSampleAccurate is true or F has profile-sample-accurate
- // attribute, initialize the entry count to 0 so callsites or functions
- // unsampled will be treated as cold.
- uint64_t initialEntryCount =
- (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
- ? 0
- : -1;
+ uint64_t initialEntryCount = -1;
+
+ ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
+ if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
+ // initialize all the function entry counts to 0. It means all the
+ // functions without profile will be regarded as cold.
+ initialEntryCount = 0;
+ // profile-sample-accurate is a user assertion which has a higher precedence
+ // than symbol list. When profile-sample-accurate is on, ignore symbol list.
+ ProfAccForSymsInList = false;
+ }
+
+ // PSL -- profile symbol list include all the symbols in sampled binary.
+ // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
+ // old functions without samples being cold, without having to worry
+ // about new and hot functions being mistakenly treated as cold.
+ if (ProfAccForSymsInList) {
+ // Initialize the entry count to 0 for functions in the list.
+ if (PSL->contains(F.getName()))
+ initialEntryCount = 0;
+
+ // Function in the symbol list but without sample will be regarded as
+ // cold. To minimize the potential negative performance impact it could
+ // have, we want to be a little conservative here saying if a function
+ // shows up in the profile, no matter as outline function, inline instance
+ // or call targets, treat the function as not being cold. This will handle
+ // the cases such as most callsites of a function are inlined in sampled
+ // binary but not inlined in current build (because of source code drift,
+ // imprecise debug information, or the callsites are all cold individually
+ // but not cold accumulatively...), so the outline function showing up as
+ // cold in sampled binary will actually not be cold after current build.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ if (NamesInProfile.count(CanonName))
+ initialEntryCount = -1;
+ }
+
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
@@ -1672,7 +1818,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
.getManager();
ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
} else {
- OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
+ OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
ORE = OwnedORE.get();
}
Samples = Reader->getSamplesFor(F);
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 24c476376c14..690b5e8bf49e 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
+#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -218,10 +219,18 @@ void splitAndWriteThinLTOBitcode(
promoteTypeIds(M, ModuleId);
- // Returns whether a global has attached type metadata. Such globals may
- // participate in CFI or whole-program devirtualization, so they need to
- // appear in the merged module instead of the thin LTO module.
+ // Returns whether a global or its associated global has attached type
+ // metadata. The former may participate in CFI or whole-program
+ // devirtualization, so they need to appear in the merged module instead of
+ // the thin LTO module. Similarly, globals that are associated with globals
+ // with type metadata need to appear in the merged module because they will
+ // reference the global's section directly.
auto HasTypeMetadata = [](const GlobalObject *GO) {
+ if (MDNode *MD = GO->getMetadata(LLVMContext::MD_associated))
+ if (auto *AssocVM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(0)))
+ if (auto *AssocGO = dyn_cast<GlobalObject>(AssocVM->getValue()))
+ if (AssocGO->hasMetadata(LLVMContext::MD_type))
+ return true;
return GO->hasMetadata(LLVMContext::MD_type);
};
@@ -315,9 +324,9 @@ void splitAndWriteThinLTOBitcode(
SmallVector<Metadata *, 4> Elts;
Elts.push_back(MDString::get(Ctx, F.getName()));
CfiFunctionLinkage Linkage;
- if (!F.isDeclarationForLinker())
+ if (lowertypetests::isJumpTableCanonical(&F))
Linkage = CFL_Definition;
- else if (F.isWeakForLinker())
+ else if (F.hasExternalWeakLinkage())
Linkage = CFL_WeakDeclaration;
else
Linkage = CFL_Declaration;
@@ -457,7 +466,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
// splitAndWriteThinLTOBitcode). Just always build it once via the
// buildModuleSummaryIndex when Module(s) are ready.
ProfileSummaryInfo PSI(M);
- NewIndex = llvm::make_unique<ModuleSummaryIndex>(
+ NewIndex = std::make_unique<ModuleSummaryIndex>(
buildModuleSummaryIndex(M, nullptr, &PSI));
Index = NewIndex.get();
}
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 6b6dd6194e17..f0cf5581ba8a 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -24,12 +24,14 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
-// This pass is intended to be used during the regular and thin LTO pipelines.
+// This pass is intended to be used during the regular and thin LTO pipelines:
+//
// During regular LTO, the pass determines the best optimization for each
// virtual call and applies the resolutions directly to virtual calls that are
// eligible for virtual call optimization (i.e. calls that use either of the
-// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During
-// ThinLTO, the pass operates in two phases:
+// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
+//
+// During hybrid Regular/ThinLTO, the pass operates in two phases:
// - Export phase: this is run during the thin link over a single merged module
// that contains all vtables with !type metadata that participate in the link.
// The pass computes a resolution for each virtual call and stores it in the
@@ -38,6 +40,14 @@
// modules. The pass applies the resolutions previously computed during the
// import phase to each eligible virtual call.
//
+// During ThinLTO, the pass operates in two phases:
+// - Export phase: this is run during the thin link over the index which
+// contains a summary of all vtables with !type metadata that participate in
+// the link. It computes a resolution for each virtual call and stores it in
+// the type identifier summary. Only single implementation devirtualization
+// is supported.
+// - Import phase: (same as with hybrid case above).
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -117,6 +127,11 @@ static cl::opt<unsigned>
cl::desc("Maximum number of call targets per "
"call site to enable branch funnels"));
+static cl::opt<bool>
+ PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
+ cl::init(false), cl::ZeroOrMore,
+ cl::desc("Print index-based devirtualization messages"));
+
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
@@ -265,6 +280,25 @@ template <> struct DenseMapInfo<VTableSlot> {
}
};
+template <> struct DenseMapInfo<VTableSlotSummary> {
+ static VTableSlotSummary getEmptyKey() {
+ return {DenseMapInfo<StringRef>::getEmptyKey(),
+ DenseMapInfo<uint64_t>::getEmptyKey()};
+ }
+ static VTableSlotSummary getTombstoneKey() {
+ return {DenseMapInfo<StringRef>::getTombstoneKey(),
+ DenseMapInfo<uint64_t>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const VTableSlotSummary &I) {
+ return DenseMapInfo<StringRef>::getHashValue(I.TypeID) ^
+ DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
+ }
+ static bool isEqual(const VTableSlotSummary &LHS,
+ const VTableSlotSummary &RHS) {
+ return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
+ }
+};
+
} // end namespace llvm
namespace {
@@ -342,19 +376,21 @@ struct CallSiteInfo {
/// pass the vector is non-empty, we will need to add a use of llvm.type.test
/// to each of the function summaries in the vector.
std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
+ std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
bool isExported() const {
return SummaryHasTypeTestAssumeUsers ||
!SummaryTypeCheckedLoadUsers.empty();
}
- void markSummaryHasTypeTestAssumeUsers() {
- SummaryHasTypeTestAssumeUsers = true;
+ void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
+ SummaryTypeCheckedLoadUsers.push_back(FS);
AllCallSitesDevirted = false;
}
- void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
- SummaryTypeCheckedLoadUsers.push_back(FS);
+ void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
+ SummaryTypeTestAssumeUsers.push_back(FS);
+ SummaryHasTypeTestAssumeUsers = true;
AllCallSitesDevirted = false;
}
@@ -456,7 +492,6 @@ struct DevirtModule {
void buildTypeIdentifierMap(
std::vector<VTableBits> &Bits,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
- Constant *getPointerAtOffset(Constant *I, uint64_t Offset);
bool
tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
const std::set<TypeMemberInfo> &TypeMemberInfos,
@@ -464,7 +499,8 @@ struct DevirtModule {
void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
bool &IsExported);
- bool trySingleImplDevirt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+ bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot,
VTableSlotInfo &SlotInfo,
WholeProgramDevirtResolution *Res);
@@ -542,6 +578,38 @@ struct DevirtModule {
function_ref<DominatorTree &(Function &)> LookupDomTree);
};
+struct DevirtIndex {
+ ModuleSummaryIndex &ExportSummary;
+ // The set in which to record GUIDs exported from their module by
+ // devirtualization, used by client to ensure they are not internalized.
+ std::set<GlobalValue::GUID> &ExportedGUIDs;
+ // A map in which to record the information necessary to locate the WPD
+ // resolution for local targets in case they are exported by cross module
+ // importing.
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
+
+ MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
+
+ DevirtIndex(
+ ModuleSummaryIndex &ExportSummary,
+ std::set<GlobalValue::GUID> &ExportedGUIDs,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
+ : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
+ LocalWPDTargetsMap(LocalWPDTargetsMap) {}
+
+ bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
+ const TypeIdCompatibleVtableInfo TIdInfo,
+ uint64_t ByteOffset);
+
+ bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
+ VTableSlotSummary &SlotSummary,
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res,
+ std::set<ValueInfo> &DevirtTargets);
+
+ void run();
+};
+
struct WholeProgramDevirt : public ModulePass {
static char ID;
@@ -572,7 +640,7 @@ struct WholeProgramDevirt : public ModulePass {
// an optimization remark emitter on the fly, when we need it.
std::unique_ptr<OptimizationRemarkEmitter> ORE;
auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
- ORE = make_unique<OptimizationRemarkEmitter>(F);
+ ORE = std::make_unique<OptimizationRemarkEmitter>(F);
return *ORE;
};
@@ -632,6 +700,41 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return PreservedAnalyses::none();
}
+namespace llvm {
+void runWholeProgramDevirtOnIndex(
+ ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
+ DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
+}
+
+void updateIndexWPDForExports(
+ ModuleSummaryIndex &Summary,
+ function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
+ for (auto &T : LocalWPDTargetsMap) {
+ auto &VI = T.first;
+ // This was enforced earlier during trySingleImplDevirt.
+ assert(VI.getSummaryList().size() == 1 &&
+ "Devirt of local target has more than one copy");
+ auto &S = VI.getSummaryList()[0];
+ if (!isExported(S->modulePath(), VI.getGUID()))
+ continue;
+
+ // It's been exported by a cross module import.
+ for (auto &SlotSummary : T.second) {
+ auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
+ assert(TIdSum);
+ auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
+ assert(WPDRes != TIdSum->WPDRes.end());
+ WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
+ WPDRes->second.SingleImplName,
+ Summary.getModuleHash(S->modulePath()));
+ }
+ }
+}
+
+} // end namespace llvm
+
bool DevirtModule::runForTesting(
Module &M, function_ref<AAResults &(Function &)> AARGetter,
function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
@@ -662,7 +765,7 @@ bool DevirtModule::runForTesting(
ExitOnError ExitOnErr(
"-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
std::error_code EC;
- raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text);
ExitOnErr(errorCodeToError(EC));
yaml::Output Out(OS);
@@ -706,38 +809,6 @@ void DevirtModule::buildTypeIdentifierMap(
}
}
-Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) {
- if (I->getType()->isPointerTy()) {
- if (Offset == 0)
- return I;
- return nullptr;
- }
-
- const DataLayout &DL = M.getDataLayout();
-
- if (auto *C = dyn_cast<ConstantStruct>(I)) {
- const StructLayout *SL = DL.getStructLayout(C->getType());
- if (Offset >= SL->getSizeInBytes())
- return nullptr;
-
- unsigned Op = SL->getElementContainingOffset(Offset);
- return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
- Offset - SL->getElementOffset(Op));
- }
- if (auto *C = dyn_cast<ConstantArray>(I)) {
- ArrayType *VTableTy = C->getType();
- uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType());
-
- unsigned Op = Offset / ElemSize;
- if (Op >= C->getNumOperands())
- return nullptr;
-
- return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
- Offset % ElemSize);
- }
- return nullptr;
-}
-
bool DevirtModule::tryFindVirtualCallTargets(
std::vector<VirtualCallTarget> &TargetsForSlot,
const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) {
@@ -746,7 +817,7 @@ bool DevirtModule::tryFindVirtualCallTargets(
return false;
Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
- TM.Offset + ByteOffset);
+ TM.Offset + ByteOffset, M);
if (!Ptr)
return false;
@@ -766,6 +837,34 @@ bool DevirtModule::tryFindVirtualCallTargets(
return !TargetsForSlot.empty();
}
+bool DevirtIndex::tryFindVirtualCallTargets(
+ std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
+ uint64_t ByteOffset) {
+ for (const TypeIdOffsetVtableInfo P : TIdInfo) {
+ // VTable initializer should have only one summary, or all copies must be
+ // linkonce/weak ODR.
+ assert(P.VTableVI.getSummaryList().size() == 1 ||
+ llvm::all_of(
+ P.VTableVI.getSummaryList(),
+ [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
+ return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) ||
+ GlobalValue::isWeakODRLinkage(Summary->linkage());
+ }));
+ const auto *VS = cast<GlobalVarSummary>(P.VTableVI.getSummaryList()[0].get());
+ if (!P.VTableVI.getSummaryList()[0]->isLive())
+ continue;
+ for (auto VTP : VS->vTableFuncs()) {
+ if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
+ continue;
+
+ TargetsForSlot.push_back(VTP.FuncVI);
+ }
+ }
+
+ // Give up if we couldn't find any targets.
+ return !TargetsForSlot.empty();
+}
+
void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
Constant *TheFn, bool &IsExported) {
auto Apply = [&](CallSiteInfo &CSInfo) {
@@ -788,9 +887,38 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
Apply(P.second);
}
+static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
+ // We can't add calls if we haven't seen a definition
+ if (Callee.getSummaryList().empty())
+ return false;
+
+ // Insert calls into the summary index so that the devirtualized targets
+ // are eligible for import.
+ // FIXME: Annotate type tests with hotness. For now, mark these as hot
+ // to better ensure we have the opportunity to inline them.
+ bool IsExported = false;
+ auto &S = Callee.getSummaryList()[0];
+ CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
+ auto AddCalls = [&](CallSiteInfo &CSInfo) {
+ for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
+ FS->addCall({Callee, CI});
+ IsExported |= S->modulePath() != FS->modulePath();
+ }
+ for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
+ FS->addCall({Callee, CI});
+ IsExported |= S->modulePath() != FS->modulePath();
+ }
+ };
+ AddCalls(SlotInfo.CSInfo);
+ for (auto &P : SlotInfo.ConstCSInfo)
+ AddCalls(P.second);
+ return IsExported;
+}
+
bool DevirtModule::trySingleImplDevirt(
- MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) {
+ ModuleSummaryIndex *ExportSummary,
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res) {
// See if the program contains a single implementation of this virtual
// function.
Function *TheFn = TargetsForSlot[0].Fn;
@@ -830,6 +958,10 @@ bool DevirtModule::trySingleImplDevirt(
TheFn->setVisibility(GlobalValue::HiddenVisibility);
TheFn->setName(NewName);
}
+ if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
+ // Any needed promotion of 'TheFn' has already been done during
+ // LTO unit split, so we can ignore return value of AddCalls.
+ AddCalls(SlotInfo, TheFnVI);
Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
Res->SingleImplName = TheFn->getName();
@@ -837,6 +969,63 @@ bool DevirtModule::trySingleImplDevirt(
return true;
}
+bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
+ VTableSlotSummary &SlotSummary,
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res,
+ std::set<ValueInfo> &DevirtTargets) {
+ // See if the program contains a single implementation of this virtual
+ // function.
+ auto TheFn = TargetsForSlot[0];
+ for (auto &&Target : TargetsForSlot)
+ if (TheFn != Target)
+ return false;
+
+ // Don't devirtualize if we don't have target definition.
+ auto Size = TheFn.getSummaryList().size();
+ if (!Size)
+ return false;
+
+ // If the summary list contains multiple summaries where at least one is
+ // a local, give up, as we won't know which (possibly promoted) name to use.
+ for (auto &S : TheFn.getSummaryList())
+ if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
+ return false;
+
+ // Collect functions devirtualized at least for one call site for stats.
+ if (PrintSummaryDevirt)
+ DevirtTargets.insert(TheFn);
+
+ auto &S = TheFn.getSummaryList()[0];
+ bool IsExported = AddCalls(SlotInfo, TheFn);
+ if (IsExported)
+ ExportedGUIDs.insert(TheFn.getGUID());
+
+ // Record in summary for use in devirtualization during the ThinLTO import
+ // step.
+ Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
+ if (GlobalValue::isLocalLinkage(S->linkage())) {
+ if (IsExported)
+ // If target is a local function and we are exporting it by
+ // devirtualizing a call in another module, we need to record the
+ // promoted name.
+ Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
+ TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
+ else {
+ LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
+ Res->SingleImplName = TheFn.name();
+ }
+ } else
+ Res->SingleImplName = TheFn.name();
+
+ // Name will be empty if this thin link driven off of serialized combined
+ // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
+ // legacy LTO API anyway.
+ assert(!Res->SingleImplName.empty());
+
+ return true;
+}
+
void DevirtModule::tryICallBranchFunnel(
MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
WholeProgramDevirtResolution *Res, VTableSlot Slot) {
@@ -1302,10 +1491,13 @@ void DevirtModule::rebuildGlobal(VTableBits &B) {
if (B.Before.Bytes.empty() && B.After.Bytes.empty())
return;
- // Align each byte array to pointer width.
- unsigned PointerSize = M.getDataLayout().getPointerSize();
- B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), PointerSize));
- B.After.Bytes.resize(alignTo(B.After.Bytes.size(), PointerSize));
+ // Align the before byte array to the global's minimum alignment so that we
+ // don't break any alignment requirements on the global.
+ MaybeAlign Alignment(B.GV->getAlignment());
+ if (!Alignment)
+ Alignment =
+ Align(M.getDataLayout().getABITypeAlignment(B.GV->getValueType()));
+ B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
// Before was stored in reverse order; flip it now.
for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
@@ -1322,6 +1514,7 @@ void DevirtModule::rebuildGlobal(VTableBits &B) {
GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
NewGV->setSection(B.GV->getSection());
NewGV->setComdat(B.GV->getComdat());
+ NewGV->setAlignment(MaybeAlign(B.GV->getAlignment()));
// Copy the original vtable's metadata to the anonymous global, adjusting
// offsets as required.
@@ -1483,8 +1676,11 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
}
void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
+ auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
+ if (!TypeId)
+ return;
const TypeIdSummary *TidSummary =
- ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString());
+ ImportSummary->getTypeIdSummary(TypeId->getString());
if (!TidSummary)
return;
auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
@@ -1493,6 +1689,7 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
const WholeProgramDevirtResolution &Res = ResI->second;
if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
+ assert(!Res.SingleImplName.empty());
// The type of the function in the declaration is irrelevant because every
// call site will cast it to the correct type.
Constant *SingleImpl =
@@ -1627,8 +1824,7 @@ bool DevirtModule::run() {
// FIXME: Only add live functions.
for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
for (Metadata *MD : MetadataByGUID[VF.GUID]) {
- CallSlots[{MD, VF.Offset}]
- .CSInfo.markSummaryHasTypeTestAssumeUsers();
+ CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
}
}
for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
@@ -1641,7 +1837,7 @@ bool DevirtModule::run() {
for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
CallSlots[{MD, VC.VFunc.Offset}]
.ConstCSInfo[VC.Args]
- .markSummaryHasTypeTestAssumeUsers();
+ .addSummaryTypeTestAssumeUser(FS);
}
}
for (const FunctionSummary::ConstVCall &VC :
@@ -1673,7 +1869,7 @@ bool DevirtModule::run() {
cast<MDString>(S.first.TypeID)->getString())
.WPDRes[S.first.ByteOffset];
- if (!trySingleImplDevirt(TargetsForSlot, S.second, Res)) {
+ if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
DidVirtualConstProp |=
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
@@ -1710,7 +1906,7 @@ bool DevirtModule::run() {
using namespace ore;
OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
<< "devirtualized "
- << NV("FunctionName", F->getName()));
+ << NV("FunctionName", DT.first));
}
}
@@ -1722,5 +1918,86 @@ bool DevirtModule::run() {
for (VTableBits &B : Bits)
rebuildGlobal(B);
+ // We have lowered or deleted the type checked load intrinsics, so we no
+ // longer have enough information to reason about the liveness of virtual
+ // function pointers in GlobalDCE.
+ for (GlobalVariable &GV : M.globals())
+ GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
+
return true;
}
+
+void DevirtIndex::run() {
+ if (ExportSummary.typeIdCompatibleVtableMap().empty())
+ return;
+
+ DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
+ for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
+ NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
+ }
+
+ // Collect information from summary about which calls to try to devirtualize.
+ for (auto &P : ExportSummary) {
+ for (auto &S : P.second.SummaryList) {
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS)
+ continue;
+ // FIXME: Only add live functions.
+ for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
+ for (StringRef Name : NameByGUID[VF.GUID]) {
+ CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
+ }
+ }
+ for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
+ for (StringRef Name : NameByGUID[VF.GUID]) {
+ CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_test_assume_const_vcalls()) {
+ for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
+ CallSlots[{Name, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .addSummaryTypeTestAssumeUser(FS);
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_checked_load_const_vcalls()) {
+ for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
+ CallSlots[{Name, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .addSummaryTypeCheckedLoadUser(FS);
+ }
+ }
+ }
+ }
+
+ std::set<ValueInfo> DevirtTargets;
+ // For each (type, offset) pair:
+ for (auto &S : CallSlots) {
+ // Search each of the members of the type identifier for the virtual
+ // function implementation at offset S.first.ByteOffset, and add to
+ // TargetsForSlot.
+ std::vector<ValueInfo> TargetsForSlot;
+ auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
+ assert(TidSummary);
+ if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
+ S.first.ByteOffset)) {
+ WholeProgramDevirtResolution *Res =
+ &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
+ .WPDRes[S.first.ByteOffset];
+
+ if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
+ DevirtTargets))
+ continue;
+ }
+ }
+
+ // Optionally have the thin link print message for each devirtualized
+ // function.
+ if (PrintSummaryDevirt)
+ for (const auto &DT : DevirtTargets)
+ errs() << "Devirtualized call to " << DT << "\n";
+
+ return;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index ba15b023f2a3..8bc34825f8a7 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1097,6 +1097,107 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
return nullptr;
}
+Instruction *
+InstCombiner::canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
+ BinaryOperator &I) {
+ assert((I.getOpcode() == Instruction::Add ||
+ I.getOpcode() == Instruction::Or ||
+ I.getOpcode() == Instruction::Sub) &&
+ "Expecting add/or/sub instruction");
+
+ // We have a subtraction/addition between a (potentially truncated) *logical*
+ // right-shift of X and a "select".
+ Value *X, *Select;
+ Instruction *LowBitsToSkip, *Extract;
+ if (!match(&I, m_c_BinOp(m_TruncOrSelf(m_CombineAnd(
+ m_LShr(m_Value(X), m_Instruction(LowBitsToSkip)),
+ m_Instruction(Extract))),
+ m_Value(Select))))
+ return nullptr;
+
+ // `add`/`or` is commutative; but for `sub`, "select" *must* be on RHS.
+ if (I.getOpcode() == Instruction::Sub && I.getOperand(1) != Select)
+ return nullptr;
+
+ Type *XTy = X->getType();
+ bool HadTrunc = I.getType() != XTy;
+
+ // If there was a truncation of extracted value, then we'll need to produce
+ // one extra instruction, so we need to ensure one instruction will go away.
+ if (HadTrunc && !match(&I, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
+ return nullptr;
+
+ // Extraction should extract high NBits bits, with shift amount calculated as:
+ // low bits to skip = shift bitwidth - high bits to extract
+ // The shift amount itself may be extended, and we need to look past zero-ext
+ // when matching NBits, that will matter for matching later.
+ Constant *C;
+ Value *NBits;
+ if (!match(
+ LowBitsToSkip,
+ m_ZExtOrSelf(m_Sub(m_Constant(C), m_ZExtOrSelf(m_Value(NBits))))) ||
+ !match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
+ APInt(C->getType()->getScalarSizeInBits(),
+ X->getType()->getScalarSizeInBits()))))
+ return nullptr;
+
+ // Sign-extending value can be zero-extended if we `sub`tract it,
+ // or sign-extended otherwise.
+ auto SkipExtInMagic = [&I](Value *&V) {
+ if (I.getOpcode() == Instruction::Sub)
+ match(V, m_ZExtOrSelf(m_Value(V)));
+ else
+ match(V, m_SExtOrSelf(m_Value(V)));
+ };
+
+ // Now, finally validate the sign-extending magic.
+ // `select` itself may be appropriately extended, look past that.
+ SkipExtInMagic(Select);
+
+ ICmpInst::Predicate Pred;
+ const APInt *Thr;
+ Value *SignExtendingValue, *Zero;
+ bool ShouldSignext;
+ // It must be a select between two values we will later establish to be a
+ // sign-extending value and a zero constant. The condition guarding the
+ // sign-extension must be based on a sign bit of the same X we had in `lshr`.
+ if (!match(Select, m_Select(m_ICmp(Pred, m_Specific(X), m_APInt(Thr)),
+ m_Value(SignExtendingValue), m_Value(Zero))) ||
+ !isSignBitCheck(Pred, *Thr, ShouldSignext))
+ return nullptr;
+
+ // icmp-select pair is commutative.
+ if (!ShouldSignext)
+ std::swap(SignExtendingValue, Zero);
+
+ // If we should not perform sign-extension then we must add/or/subtract zero.
+ if (!match(Zero, m_Zero()))
+ return nullptr;
+ // Otherwise, it should be some constant, left-shifted by the same NBits we
+ // had in `lshr`. Said left-shift can also be appropriately extended.
+ // Again, we must look past zero-ext when looking for NBits.
+ SkipExtInMagic(SignExtendingValue);
+ Constant *SignExtendingValueBaseConstant;
+ if (!match(SignExtendingValue,
+ m_Shl(m_Constant(SignExtendingValueBaseConstant),
+ m_ZExtOrSelf(m_Specific(NBits)))))
+ return nullptr;
+ // If we `sub`, then the constant should be one, else it should be all-ones.
+ if (I.getOpcode() == Instruction::Sub
+ ? !match(SignExtendingValueBaseConstant, m_One())
+ : !match(SignExtendingValueBaseConstant, m_AllOnes()))
+ return nullptr;
+
+ auto *NewAShr = BinaryOperator::CreateAShr(X, LowBitsToSkip,
+ Extract->getName() + ".sext");
+ NewAShr->copyIRFlags(Extract); // Preserve `exact`-ness.
+ if (!HadTrunc)
+ return NewAShr;
+
+ Builder.Insert(NewAShr);
+ return TruncInst::CreateTruncOrBitCast(NewAShr, I.getType());
+}
+
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@@ -1302,12 +1403,32 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Instruction *V = canonicalizeLowbitMask(I, Builder))
return V;
+ if (Instruction *V =
+ canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))
+ return V;
+
if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))
return SatAdd;
return Changed ? &I : nullptr;
}
+/// Eliminate an op from a linear interpolation (lerp) pattern.
+static Instruction *factorizeLerp(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X, *Y, *Z;
+ if (!match(&I, m_c_FAdd(m_OneUse(m_c_FMul(m_Value(Y),
+ m_OneUse(m_FSub(m_FPOne(),
+ m_Value(Z))))),
+ m_OneUse(m_c_FMul(m_Value(X), m_Deferred(Z))))))
+ return nullptr;
+
+ // (Y * (1.0 - Z)) + (X * Z) --> Y + Z * (X - Y) [8 commuted variants]
+ Value *XY = Builder.CreateFSubFMF(X, Y, &I);
+ Value *MulZ = Builder.CreateFMulFMF(Z, XY, &I);
+ return BinaryOperator::CreateFAddFMF(Y, MulZ, &I);
+}
+
/// Factor a common operand out of fadd/fsub of fmul/fdiv.
static Instruction *factorizeFAddFSub(BinaryOperator &I,
InstCombiner::BuilderTy &Builder) {
@@ -1315,6 +1436,10 @@ static Instruction *factorizeFAddFSub(BinaryOperator &I,
I.getOpcode() == Instruction::FSub) && "Expecting fadd/fsub");
assert(I.hasAllowReassoc() && I.hasNoSignedZeros() &&
"FP factorization requires FMF");
+
+ if (Instruction *Lerp = factorizeLerp(I, Builder))
+ return Lerp;
+
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Value *X, *Y, *Z;
bool IsFMul;
@@ -1362,17 +1487,32 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I))
return FoldedFAdd;
- Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- Value *X;
// (-X) + Y --> Y - X
- if (match(LHS, m_FNeg(m_Value(X))))
- return BinaryOperator::CreateFSubFMF(RHS, X, &I);
- // Y + (-X) --> Y - X
- if (match(RHS, m_FNeg(m_Value(X))))
- return BinaryOperator::CreateFSubFMF(LHS, X, &I);
+ Value *X, *Y;
+ if (match(&I, m_c_FAdd(m_FNeg(m_Value(X)), m_Value(Y))))
+ return BinaryOperator::CreateFSubFMF(Y, X, &I);
+
+ // Similar to above, but look through fmul/fdiv for the negated term.
+ // (-X * Y) + Z --> Z - (X * Y) [4 commuted variants]
+ Value *Z;
+ if (match(&I, m_c_FAdd(m_OneUse(m_c_FMul(m_FNeg(m_Value(X)), m_Value(Y))),
+ m_Value(Z)))) {
+ Value *XY = Builder.CreateFMulFMF(X, Y, &I);
+ return BinaryOperator::CreateFSubFMF(Z, XY, &I);
+ }
+ // (-X / Y) + Z --> Z - (X / Y) [2 commuted variants]
+ // (X / -Y) + Z --> Z - (X / Y) [2 commuted variants]
+ if (match(&I, m_c_FAdd(m_OneUse(m_FDiv(m_FNeg(m_Value(X)), m_Value(Y))),
+ m_Value(Z))) ||
+ match(&I, m_c_FAdd(m_OneUse(m_FDiv(m_Value(X), m_FNeg(m_Value(Y)))),
+ m_Value(Z)))) {
+ Value *XY = Builder.CreateFDivFMF(X, Y, &I);
+ return BinaryOperator::CreateFSubFMF(Z, XY, &I);
+ }
// Check for (fadd double (sitofp x), y), see if we can merge this into an
// integer add followed by a promotion.
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
Value *LHSIntVal = LHSConv->getOperand(0);
Type *FPType = LHSConv->getType();
@@ -1631,37 +1771,50 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
const APInt *Op0C;
if (match(Op0, m_APInt(Op0C))) {
- unsigned BitWidth = I.getType()->getScalarSizeInBits();
- // -(X >>u 31) -> (X >>s 31)
- // -(X >>s 31) -> (X >>u 31)
if (Op0C->isNullValue()) {
+ Value *Op1Wide;
+ match(Op1, m_TruncOrSelf(m_Value(Op1Wide)));
+ bool HadTrunc = Op1Wide != Op1;
+ bool NoTruncOrTruncIsOneUse = !HadTrunc || Op1->hasOneUse();
+ unsigned BitWidth = Op1Wide->getType()->getScalarSizeInBits();
+
Value *X;
const APInt *ShAmt;
- if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
+ // -(X >>u 31) -> (X >>s 31)
+ if (NoTruncOrTruncIsOneUse &&
+ match(Op1Wide, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
*ShAmt == BitWidth - 1) {
- Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1);
- return BinaryOperator::CreateAShr(X, ShAmtOp);
+ Value *ShAmtOp = cast<Instruction>(Op1Wide)->getOperand(1);
+ Instruction *NewShift = BinaryOperator::CreateAShr(X, ShAmtOp);
+ NewShift->copyIRFlags(Op1Wide);
+ if (!HadTrunc)
+ return NewShift;
+ Builder.Insert(NewShift);
+ return TruncInst::CreateTruncOrBitCast(NewShift, Op1->getType());
}
- if (match(Op1, m_AShr(m_Value(X), m_APInt(ShAmt))) &&
+ // -(X >>s 31) -> (X >>u 31)
+ if (NoTruncOrTruncIsOneUse &&
+ match(Op1Wide, m_AShr(m_Value(X), m_APInt(ShAmt))) &&
*ShAmt == BitWidth - 1) {
- Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1);
- return BinaryOperator::CreateLShr(X, ShAmtOp);
+ Value *ShAmtOp = cast<Instruction>(Op1Wide)->getOperand(1);
+ Instruction *NewShift = BinaryOperator::CreateLShr(X, ShAmtOp);
+ NewShift->copyIRFlags(Op1Wide);
+ if (!HadTrunc)
+ return NewShift;
+ Builder.Insert(NewShift);
+ return TruncInst::CreateTruncOrBitCast(NewShift, Op1->getType());
}
- if (Op1->hasOneUse()) {
+ if (!HadTrunc && Op1->hasOneUse()) {
Value *LHS, *RHS;
SelectPatternFlavor SPF = matchSelectPattern(Op1, LHS, RHS).Flavor;
if (SPF == SPF_ABS || SPF == SPF_NABS) {
// This is a negate of an ABS/NABS pattern. Just swap the operands
// of the select.
- SelectInst *SI = cast<SelectInst>(Op1);
- Value *TrueVal = SI->getTrueValue();
- Value *FalseVal = SI->getFalseValue();
- SI->setTrueValue(FalseVal);
- SI->setFalseValue(TrueVal);
+ cast<SelectInst>(Op1)->swapValues();
// Don't swap prof metadata, we didn't change the branch behavior.
- return replaceInstUsesWith(I, SI);
+ return replaceInstUsesWith(I, Op1);
}
}
}
@@ -1686,6 +1839,23 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateNeg(Y);
}
+ // (sub (or A, B) (and A, B)) --> (xor A, B)
+ {
+ Value *A, *B;
+ if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+ match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateXor(A, B);
+ }
+
+ // (sub (and A, B) (or A, B)) --> neg (xor A, B)
+ {
+ Value *A, *B;
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_Or(m_Specific(A), m_Specific(B))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse()))
+ return BinaryOperator::CreateNeg(Builder.CreateXor(A, B));
+ }
+
// (sub (or A, B), (xor A, B)) --> (and A, B)
{
Value *A, *B;
@@ -1694,6 +1864,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateAnd(A, B);
}
+ // (sub (xor A, B) (or A, B)) --> neg (and A, B)
+ {
+ Value *A, *B;
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_Or(m_Specific(A), m_Specific(B))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse()))
+ return BinaryOperator::CreateNeg(Builder.CreateAnd(A, B));
+ }
+
{
Value *Y;
// ((X | Y) - X) --> (~X & Y)
@@ -1778,7 +1957,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
std::swap(LHS, RHS);
// LHS is now O above and expected to have at least 2 uses (the min/max)
// NotA is epected to have 2 uses from the min/max and 1 from the sub.
- if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
+ if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
!NotA->hasNUsesOrMore(4)) {
// Note: We don't generate the inverse max/min, just create the not of
// it and let other folds do the rest.
@@ -1826,6 +2005,10 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return SelectInst::Create(Cmp, Neg, A);
}
+ if (Instruction *V =
+ canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))
+ return V;
+
if (Instruction *Ext = narrowMathIfNoOverflow(I))
return Ext;
@@ -1865,6 +2048,22 @@ static Instruction *foldFNegIntoConstant(Instruction &I) {
return nullptr;
}
+static Instruction *hoistFNegAboveFMulFDiv(Instruction &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *FNeg;
+ if (!match(&I, m_FNeg(m_Value(FNeg))))
+ return nullptr;
+
+ Value *X, *Y;
+ if (match(FNeg, m_OneUse(m_FMul(m_Value(X), m_Value(Y)))))
+ return BinaryOperator::CreateFMulFMF(Builder.CreateFNegFMF(X, &I), Y, &I);
+
+ if (match(FNeg, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))))
+ return BinaryOperator::CreateFDivFMF(Builder.CreateFNegFMF(X, &I), Y, &I);
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitFNeg(UnaryOperator &I) {
Value *Op = I.getOperand(0);
@@ -1882,6 +2081,9 @@ Instruction *InstCombiner::visitFNeg(UnaryOperator &I) {
match(Op, m_OneUse(m_FSub(m_Value(X), m_Value(Y)))))
return BinaryOperator::CreateFSubFMF(Y, X, &I);
+ if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder))
+ return R;
+
return nullptr;
}
@@ -1903,6 +2105,9 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
if (Instruction *X = foldFNegIntoConstant(I))
return X;
+ if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder))
+ return R;
+
Value *X, *Y;
Constant *C;
@@ -1944,6 +2149,21 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
if (match(Op1, m_OneUse(m_FPExt(m_FNeg(m_Value(Y))))))
return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPExt(Y, Ty), &I);
+ // Similar to above, but look through fmul/fdiv of the negated value:
+ // Op0 - (-X * Y) --> Op0 + (X * Y)
+ // Op0 - (Y * -X) --> Op0 + (X * Y)
+ if (match(Op1, m_OneUse(m_c_FMul(m_FNeg(m_Value(X)), m_Value(Y))))) {
+ Value *FMul = Builder.CreateFMulFMF(X, Y, &I);
+ return BinaryOperator::CreateFAddFMF(Op0, FMul, &I);
+ }
+ // Op0 - (-X / Y) --> Op0 + (X / Y)
+ // Op0 - (X / -Y) --> Op0 + (X / Y)
+ if (match(Op1, m_OneUse(m_FDiv(m_FNeg(m_Value(X)), m_Value(Y)))) ||
+ match(Op1, m_OneUse(m_FDiv(m_Value(X), m_FNeg(m_Value(Y)))))) {
+ Value *FDiv = Builder.CreateFDivFMF(X, Y, &I);
+ return BinaryOperator::CreateFAddFMF(Op0, FDiv, &I);
+ }
+
// Handle special cases for FSub with selects feeding the operation
if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))
return replaceInstUsesWith(I, V);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2b9859b602f4..4a30b60ca931 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -160,16 +160,14 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
}
/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
-/// (V < Lo || V >= Hi). This method expects that Lo <= Hi. IsSigned indicates
+/// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates
/// whether to treat V, Lo, and Hi as signed or not.
Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
bool isSigned, bool Inside) {
- assert((isSigned ? Lo.sle(Hi) : Lo.ule(Hi)) &&
- "Lo is not <= Hi in range emission code!");
+ assert((isSigned ? Lo.slt(Hi) : Lo.ult(Hi)) &&
+ "Lo is not < Hi in range emission code!");
Type *Ty = V->getType();
- if (Lo == Hi)
- return Inside ? ConstantInt::getFalse(Ty) : ConstantInt::getTrue(Ty);
// V >= Min && V < Hi --> V < Hi
// V < Min || V >= Hi --> V >= Hi
@@ -1051,9 +1049,103 @@ static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd,
return nullptr;
}
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp,
+ ICmpInst *UnsignedICmp, bool IsAnd,
+ const SimplifyQuery &Q,
+ InstCombiner::BuilderTy &Builder) {
+ Value *ZeroCmpOp;
+ ICmpInst::Predicate EqPred;
+ if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(ZeroCmpOp), m_Zero())) ||
+ !ICmpInst::isEquality(EqPred))
+ return nullptr;
+
+ auto IsKnownNonZero = [&](Value *V) {
+ return isKnownNonZero(V, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+ };
+
+ ICmpInst::Predicate UnsignedPred;
+
+ Value *A, *B;
+ if (match(UnsignedICmp,
+ m_c_ICmp(UnsignedPred, m_Specific(ZeroCmpOp), m_Value(A))) &&
+ match(ZeroCmpOp, m_c_Add(m_Specific(A), m_Value(B))) &&
+ (ZeroICmp->hasOneUse() || UnsignedICmp->hasOneUse())) {
+ if (UnsignedICmp->getOperand(0) != ZeroCmpOp)
+ UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+
+ auto GetKnownNonZeroAndOther = [&](Value *&NonZero, Value *&Other) {
+ if (!IsKnownNonZero(NonZero))
+ std::swap(NonZero, Other);
+ return IsKnownNonZero(NonZero);
+ };
+
+ // Given ZeroCmpOp = (A + B)
+ // ZeroCmpOp <= A && ZeroCmpOp != 0 --> (0-B) < A
+ // ZeroCmpOp > A || ZeroCmpOp == 0 --> (0-B) >= A
+ //
+ // ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff
+ // ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff
+ // with X being the value (A/B) that is known to be non-zero,
+ // and Y being remaining value.
+ if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&
+ IsAnd)
+ return Builder.CreateICmpULT(Builder.CreateNeg(B), A);
+ if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE &&
+ IsAnd && GetKnownNonZeroAndOther(B, A))
+ return Builder.CreateICmpULT(Builder.CreateNeg(B), A);
+ if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&
+ !IsAnd)
+ return Builder.CreateICmpUGE(Builder.CreateNeg(B), A);
+ if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ &&
+ !IsAnd && GetKnownNonZeroAndOther(B, A))
+ return Builder.CreateICmpUGE(Builder.CreateNeg(B), A);
+ }
+
+ Value *Base, *Offset;
+ if (!match(ZeroCmpOp, m_Sub(m_Value(Base), m_Value(Offset))))
+ return nullptr;
+
+ if (!match(UnsignedICmp,
+ m_c_ICmp(UnsignedPred, m_Specific(Base), m_Specific(Offset))) ||
+ !ICmpInst::isUnsigned(UnsignedPred))
+ return nullptr;
+ if (UnsignedICmp->getOperand(0) != Base)
+ UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+
+ // Base >=/> Offset && (Base - Offset) != 0 <--> Base > Offset
+ // (no overflow and not null)
+ if ((UnsignedPred == ICmpInst::ICMP_UGE ||
+ UnsignedPred == ICmpInst::ICMP_UGT) &&
+ EqPred == ICmpInst::ICMP_NE && IsAnd)
+ return Builder.CreateICmpUGT(Base, Offset);
+
+ // Base <=/< Offset || (Base - Offset) == 0 <--> Base <= Offset
+ // (overflow or null)
+ if ((UnsignedPred == ICmpInst::ICMP_ULE ||
+ UnsignedPred == ICmpInst::ICMP_ULT) &&
+ EqPred == ICmpInst::ICMP_EQ && !IsAnd)
+ return Builder.CreateICmpULE(Base, Offset);
+
+ // Base <= Offset && (Base - Offset) != 0 --> Base < Offset
+ if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&
+ IsAnd)
+ return Builder.CreateICmpULT(Base, Offset);
+
+ // Base > Offset || (Base - Offset) == 0 --> Base >= Offset
+ if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&
+ !IsAnd)
+ return Builder.CreateICmpUGE(Base, Offset);
+
+ return nullptr;
+}
+
/// Fold (icmp)&(icmp) if possible.
Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Instruction &CxtI) {
+ const SimplifyQuery Q = SQ.getWithInstruction(&CxtI);
+
// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
// if K1 and K2 are a one-bit mask.
if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, true, CxtI))
@@ -1096,6 +1188,13 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = foldIsPowerOf2(LHS, RHS, true /* JoinedByAnd */, Builder))
return V;
+ if (Value *X =
+ foldUnsignedUnderflowCheck(LHS, RHS, /*IsAnd=*/true, Q, Builder))
+ return X;
+ if (Value *X =
+ foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/true, Q, Builder))
+ return X;
+
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
@@ -1196,16 +1295,22 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
default:
llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_ULT:
- if (LHSC == SubOne(RHSC)) // (X != 13 & X u< 14) -> X < 13
+ // (X != 13 & X u< 14) -> X < 13
+ if (LHSC->getValue() == (RHSC->getValue() - 1))
return Builder.CreateICmpULT(LHS0, LHSC);
- if (LHSC->isZero()) // (X != 0 & X u< 14) -> X-1 u< 13
+ if (LHSC->isZero()) // (X != 0 & X u< C) -> X-1 u< C-1
return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
false, true);
break; // (X != 13 & X u< 15) -> no change
case ICmpInst::ICMP_SLT:
- if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13
+ // (X != 13 & X s< 14) -> X < 13
+ if (LHSC->getValue() == (RHSC->getValue() - 1))
return Builder.CreateICmpSLT(LHS0, LHSC);
- break; // (X != 13 & X s< 15) -> no change
+ // (X != INT_MIN & X s< C) -> X-(INT_MIN+1) u< (C-(INT_MIN+1))
+ if (LHSC->isMinValue(true))
+ return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
+ true, true);
+ break; // (X != 13 & X s< 15) -> no change
case ICmpInst::ICMP_NE:
// Potential folds for this case should already be handled.
break;
@@ -1216,10 +1321,15 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
default:
llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_NE:
- if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14
+ // (X u> 13 & X != 14) -> X u> 14
+ if (RHSC->getValue() == (LHSC->getValue() + 1))
return Builder.CreateICmp(PredL, LHS0, RHSC);
+ // X u> C & X != UINT_MAX -> (X-(C+1)) u< UINT_MAX-(C+1)
+ if (RHSC->isMaxValue(false))
+ return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
+ false, true);
break; // (X u> 13 & X != 15) -> no change
- case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
+ case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) u< 1
return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
false, true);
}
@@ -1229,10 +1339,15 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
default:
llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_NE:
- if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14
+ // (X s> 13 & X != 14) -> X s> 14
+ if (RHSC->getValue() == (LHSC->getValue() + 1))
return Builder.CreateICmp(PredL, LHS0, RHSC);
+ // X s> C & X != INT_MAX -> (X-(C+1)) u< INT_MAX-(C+1)
+ if (RHSC->isMaxValue(true))
+ return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
+ true, true);
break; // (X s> 13 & X != 15) -> no change
- case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
+ case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) u< 1
return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true,
true);
}
@@ -1352,8 +1467,8 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I,
Value *A, *B;
if (match(I.getOperand(0), m_OneUse(m_Not(m_Value(A)))) &&
match(I.getOperand(1), m_OneUse(m_Not(m_Value(B)))) &&
- !IsFreeToInvert(A, A->hasOneUse()) &&
- !IsFreeToInvert(B, B->hasOneUse())) {
+ !isFreeToInvert(A, A->hasOneUse()) &&
+ !isFreeToInvert(B, B->hasOneUse())) {
Value *AndOr = Builder.CreateBinOp(Opcode, A, B, I.getName() + ".demorgan");
return BinaryOperator::CreateNot(AndOr);
}
@@ -1770,13 +1885,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
- if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
+ if (Op1->hasOneUse() || isFreeToInvert(C, C->hasOneUse()))
return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C));
// ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
- if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
+ if (Op0->hasOneUse() || isFreeToInvert(C, C->hasOneUse()))
return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));
// (A | B) & ((~A) ^ B) -> (A & B)
@@ -1844,6 +1959,20 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
A->getType()->isIntOrIntVectorTy(1))
return SelectInst::Create(A, Op0, Constant::getNullValue(I.getType()));
+ // and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0.
+ {
+ Value *X, *Y;
+ const APInt *ShAmt;
+ Type *Ty = I.getType();
+ if (match(&I, m_c_And(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)),
+ m_APInt(ShAmt))),
+ m_Deferred(X))) &&
+ *ShAmt == Ty->getScalarSizeInBits() - 1) {
+ Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
+ return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty));
+ }
+ }
+
return nullptr;
}
@@ -2057,6 +2186,8 @@ Value *InstCombiner::matchSelectFromAndOr(Value *A, Value *C, Value *B,
/// Fold (icmp)|(icmp) if possible.
Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Instruction &CxtI) {
+ const SimplifyQuery Q = SQ.getWithInstruction(&CxtI);
+
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
// if K1 and K2 are a one-bit mask.
if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, false, CxtI))
@@ -2182,6 +2313,13 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = foldIsPowerOf2(LHS, RHS, false /* JoinedByAnd */, Builder))
return V;
+ if (Value *X =
+ foldUnsignedUnderflowCheck(LHS, RHS, /*IsAnd=*/false, Q, Builder))
+ return X;
+ if (Value *X =
+ foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder))
+ return X;
+
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
if (!LHSC || !RHSC)
return nullptr;
@@ -2251,8 +2389,19 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
case ICmpInst::ICMP_EQ:
// Potential folds for this case should already be handled.
break;
- case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
- case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
+ case ICmpInst::ICMP_UGT:
+ // (X == 0 || X u> C) -> (X-1) u>= C
+ if (LHSC->isMinValue(false))
+ return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue() + 1,
+ false, false);
+ // (X == 13 | X u> 14) -> no change
+ break;
+ case ICmpInst::ICMP_SGT:
+ // (X == INT_MIN || X s> C) -> (X-(INT_MIN+1)) u>= C-INT_MIN
+ if (LHSC->isMinValue(true))
+ return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue() + 1,
+ true, false);
+ // (X == 13 | X s> 14) -> no change
break;
}
break;
@@ -2261,6 +2410,10 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
default:
llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
+ // (X u< C || X == UINT_MAX) => (X-C) u>= UINT_MAX-C
+ if (RHSC->isMaxValue(false))
+ return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue(),
+ false, false);
break;
case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
assert(!RHSC->isMaxValue(false) && "Missed icmp simplification");
@@ -2272,9 +2425,14 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
switch (PredR) {
default:
llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change
+ case ICmpInst::ICMP_EQ:
+ // (X s< C || X == INT_MAX) => (X-C) u>= INT_MAX-C
+ if (RHSC->isMaxValue(true))
+ return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue(),
+ true, false);
+ // (X s< 13 | X == 14) -> no change
break;
- case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2
+ case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) u> 2
assert(!RHSC->isMaxValue(true) && "Missed icmp simplification");
return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1, true,
false);
@@ -2552,6 +2710,25 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
+ // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? -1 : X.
+ {
+ Value *X, *Y;
+ const APInt *ShAmt;
+ Type *Ty = I.getType();
+ if (match(&I, m_c_Or(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)),
+ m_APInt(ShAmt))),
+ m_Deferred(X))) &&
+ *ShAmt == Ty->getScalarSizeInBits() - 1) {
+ Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
+ return SelectInst::Create(NewICmpInst, ConstantInt::getAllOnesValue(Ty),
+ X);
+ }
+ }
+
+ if (Instruction *V =
+ canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))
+ return V;
+
return nullptr;
}
@@ -2617,7 +2794,11 @@ static Instruction *foldXorToXor(BinaryOperator &I,
return nullptr;
}
-Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+ BinaryOperator &I) {
+ assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS &&
+ I.getOperand(1) == RHS && "Should be 'xor' with these operands");
+
if (predicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
LHS->getOperand(1) == RHS->getOperand(0))
@@ -2672,14 +2853,35 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// TODO: If OrICmp is false, the whole thing is false (InstSimplify?).
if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) {
// TODO: Independently handle cases where the 'and' side is a constant.
- if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) {
- // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS
- RHS->setPredicate(RHS->getInversePredicate());
- return Builder.CreateAnd(LHS, RHS);
+ ICmpInst *X = nullptr, *Y = nullptr;
+ if (OrICmp == LHS && AndICmp == RHS) {
+ // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y
+ X = LHS;
+ Y = RHS;
}
- if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) {
- // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS
- LHS->setPredicate(LHS->getInversePredicate());
+ if (OrICmp == RHS && AndICmp == LHS) {
+ // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X
+ X = RHS;
+ Y = LHS;
+ }
+ if (X && Y && (Y->hasOneUse() || canFreelyInvertAllUsersOf(Y, &I))) {
+ // Invert the predicate of 'Y', thus inverting its output.
+ Y->setPredicate(Y->getInversePredicate());
+ // So, are there other uses of Y?
+ if (!Y->hasOneUse()) {
+ // We need to adapt other uses of Y though. Get a value that matches
+ // the original value of Y before inversion. While this increases
+ // immediate instruction count, we have just ensured that all the
+ // users are freely-invertible, so that 'not' *will* get folded away.
+ BuilderTy::InsertPointGuard Guard(Builder);
+ // Set insertion point to right after the Y.
+ Builder.SetInsertPoint(Y->getParent(), ++(Y->getIterator()));
+ Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
+ // Replace all uses of Y (excluding the one in NotY!) with NotY.
+ Y->replaceUsesWithIf(NotY,
+ [NotY](Use &U) { return U.getUser() != NotY; });
+ }
+ // All done.
return Builder.CreateAnd(LHS, RHS);
}
}
@@ -2747,9 +2949,9 @@ static Instruction *sinkNotIntoXor(BinaryOperator &I,
return nullptr;
// We only want to do the transform if it is free to do.
- if (IsFreeToInvert(X, X->hasOneUse())) {
+ if (isFreeToInvert(X, X->hasOneUse())) {
// Ok, good.
- } else if (IsFreeToInvert(Y, Y->hasOneUse())) {
+ } else if (isFreeToInvert(Y, Y->hasOneUse())) {
std::swap(X, Y);
} else
return nullptr;
@@ -2827,9 +3029,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// Apply DeMorgan's Law when inverts are free:
// ~(X & Y) --> (~X | ~Y)
// ~(X | Y) --> (~X & ~Y)
- if (IsFreeToInvert(NotVal->getOperand(0),
+ if (isFreeToInvert(NotVal->getOperand(0),
NotVal->getOperand(0)->hasOneUse()) &&
- IsFreeToInvert(NotVal->getOperand(1),
+ isFreeToInvert(NotVal->getOperand(1),
NotVal->getOperand(1)->hasOneUse())) {
Value *NotX = Builder.CreateNot(NotVal->getOperand(0), "notlhs");
Value *NotY = Builder.CreateNot(NotVal->getOperand(1), "notrhs");
@@ -3004,7 +3206,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
- if (Value *V = foldXorOfICmps(LHS, RHS))
+ if (Value *V = foldXorOfICmps(LHS, RHS, I))
return replaceInstUsesWith(I, V);
if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
@@ -3052,7 +3254,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (SelectPatternResult::isMinOrMax(SPF)) {
// It's possible we get here before the not has been simplified, so make
// sure the input to the not isn't freely invertible.
- if (match(LHS, m_Not(m_Value(X))) && !IsFreeToInvert(X, X->hasOneUse())) {
+ if (match(LHS, m_Not(m_Value(X))) && !isFreeToInvert(X, X->hasOneUse())) {
Value *NotY = Builder.CreateNot(RHS);
return SelectInst::Create(
Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
@@ -3060,7 +3262,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// It's possible we get here before the not has been simplified, so make
// sure the input to the not isn't freely invertible.
- if (match(RHS, m_Not(m_Value(Y))) && !IsFreeToInvert(Y, Y->hasOneUse())) {
+ if (match(RHS, m_Not(m_Value(Y))) && !isFreeToInvert(Y, Y->hasOneUse())) {
Value *NotX = Builder.CreateNot(LHS);
return SelectInst::Create(
Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y);
@@ -3068,8 +3270,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// If both sides are freely invertible, then we can get rid of the xor
// completely.
- if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
- IsFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) {
+ if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
+ isFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) {
Value *NotLHS = Builder.CreateNot(LHS);
Value *NotRHS = Builder.CreateNot(RHS);
return SelectInst::Create(
diff --git a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
index 5f37a00f56cf..825f4b468b0a 100644
--- a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -124,7 +124,7 @@ Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
auto *SI = new StoreInst(RMWI.getValOperand(),
RMWI.getPointerOperand(), &RMWI);
SI->setAtomic(Ordering, RMWI.getSyncScopeID());
- SI->setAlignment(DL.getABITypeAlignment(RMWI.getType()));
+ SI->setAlignment(MaybeAlign(DL.getABITypeAlignment(RMWI.getType())));
return eraseInstFromFunction(RMWI);
}
@@ -154,6 +154,6 @@ Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand());
Load->setAtomic(Ordering, RMWI.getSyncScopeID());
- Load->setAlignment(DL.getABITypeAlignment(RMWI.getType()));
+ Load->setAlignment(MaybeAlign(DL.getABITypeAlignment(RMWI.getType())));
return Load;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4b3333affa72..c650d242cd50 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -185,7 +185,8 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
LoadInst *L = Builder.CreateLoad(IntType, Src);
// Alignment from the mem intrinsic will be better, so use it.
- L->setAlignment(CopySrcAlign);
+ L->setAlignment(
+ MaybeAlign(CopySrcAlign)); // FIXME: Check if we can use Align instead.
if (CopyMD)
L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
MDNode *LoopMemParallelMD =
@@ -198,7 +199,8 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
StoreInst *S = Builder.CreateStore(L, Dest);
// Alignment from the mem intrinsic will be better, so use it.
- S->setAlignment(CopyDstAlign);
+ S->setAlignment(
+ MaybeAlign(CopyDstAlign)); // FIXME: Check if we can use Align instead.
if (CopyMD)
S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
if (LoopMemParallelMD)
@@ -223,9 +225,10 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
}
Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
- unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
- if (MI->getDestAlignment() < Alignment) {
- MI->setDestAlignment(Alignment);
+ const unsigned KnownAlignment =
+ getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
+ if (MI->getDestAlignment() < KnownAlignment) {
+ MI->setDestAlignment(KnownAlignment);
return MI;
}
@@ -243,13 +246,9 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
return nullptr;
- uint64_t Len = LenC->getLimitedValue();
- Alignment = MI->getDestAlignment();
+ const uint64_t Len = LenC->getLimitedValue();
assert(Len && "0-sized memory setting should be removed already.");
-
- // Alignment 0 is identity for alignment 1 for memset, but not store.
- if (Alignment == 0)
- Alignment = 1;
+ const Align Alignment = assumeAligned(MI->getDestAlignment());
// If it is an atomic and alignment is less than the size then we will
// introduce the unaligned memory access which will be later transformed
@@ -1060,9 +1059,9 @@ Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) {
// If we can unconditionally load from this address, replace with a
// load/select idiom. TODO: use DT for context sensitive query
- if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment,
- II.getModule()->getDataLayout(),
- &II, nullptr)) {
+ if (isDereferenceableAndAlignedPointer(
+ LoadPtr, II.getType(), MaybeAlign(Alignment),
+ II.getModule()->getDataLayout(), &II, nullptr)) {
Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
"unmaskedload");
return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
@@ -1086,7 +1085,8 @@ Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) {
// If the mask is all ones, this is a plain vector store of the 1st argument.
if (ConstMask->isAllOnesValue()) {
Value *StorePtr = II.getArgOperand(1);
- unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
+ MaybeAlign Alignment(
+ cast<ConstantInt>(II.getArgOperand(2))->getZExtValue());
return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
}
@@ -2234,6 +2234,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(*II, Add);
}
+ // Try to simplify the underlying FMul.
+ if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
+ II->getFastMathFlags(),
+ SQ.getWithInstruction(II))) {
+ auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
+ FAdd->copyFastMathFlags(II);
+ return FAdd;
+ }
+
LLVM_FALLTHROUGH;
}
case Intrinsic::fma: {
@@ -2258,9 +2267,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return II;
}
- // fma x, 1, z -> fadd x, z
- if (match(Src1, m_FPOne())) {
- auto *FAdd = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
+ // Try to simplify the underlying FMul. We can only apply simplifications
+ // that do not require rounding.
+ if (Value *V = SimplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
+ II->getFastMathFlags(),
+ SQ.getWithInstruction(II))) {
+ auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
FAdd->copyFastMathFlags(II);
return FAdd;
}
@@ -2331,7 +2343,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Turn PPC VSX loads into normal loads.
Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
- return new LoadInst(II->getType(), Ptr, Twine(""), false, 1);
+ return new LoadInst(II->getType(), Ptr, Twine(""), false, Align::None());
}
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
@@ -2349,7 +2361,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Turn PPC VSX stores into normal stores.
Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
- return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
+ return new StoreInst(II->getArgOperand(0), Ptr, false, Align::None());
}
case Intrinsic::ppc_qpx_qvlfs:
// Turn PPC QPX qvlfs -> load if the pointer is known aligned.
@@ -3885,6 +3897,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Asan needs to poison memory to detect invalid access which is possible
// even for empty lifetime range.
if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
+ II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
break;
@@ -3950,10 +3963,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::experimental_gc_relocate: {
+ auto &GCR = *cast<GCRelocateInst>(II);
+
+ // If we have two copies of the same pointer in the statepoint argument
+ // list, canonicalize to one. This may let us common gc.relocates.
+ if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
+ GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
+ auto *OpIntTy = GCR.getOperand(2)->getType();
+ II->setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
+ return II;
+ }
+
// Translate facts known about a pointer before relocating into
// facts about the relocate value, while being careful to
// preserve relocation semantics.
- Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
+ Value *DerivedPtr = GCR.getDerivedPtr();
// Remove the relocation if unused, note that this check is required
// to prevent the cases below from looping forever.
@@ -4177,10 +4201,58 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) {
return nullptr;
}
+static void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {
+ unsigned NumArgs = Call.getNumArgOperands();
+ ConstantInt *Op0C = dyn_cast<ConstantInt>(Call.getOperand(0));
+ ConstantInt *Op1C =
+ (NumArgs == 1) ? nullptr : dyn_cast<ConstantInt>(Call.getOperand(1));
+ // Bail out if the allocation size is zero.
+ if ((Op0C && Op0C->isNullValue()) || (Op1C && Op1C->isNullValue()))
+ return;
+
+ if (isMallocLikeFn(&Call, TLI) && Op0C) {
+ if (isOpNewLikeFn(&Call, TLI))
+ Call.addAttribute(AttributeList::ReturnIndex,
+ Attribute::getWithDereferenceableBytes(
+ Call.getContext(), Op0C->getZExtValue()));
+ else
+ Call.addAttribute(AttributeList::ReturnIndex,
+ Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Op0C->getZExtValue()));
+ } else if (isReallocLikeFn(&Call, TLI) && Op1C) {
+ Call.addAttribute(AttributeList::ReturnIndex,
+ Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Op1C->getZExtValue()));
+ } else if (isCallocLikeFn(&Call, TLI) && Op0C && Op1C) {
+ bool Overflow;
+ const APInt &N = Op0C->getValue();
+ APInt Size = N.umul_ov(Op1C->getValue(), Overflow);
+ if (!Overflow)
+ Call.addAttribute(AttributeList::ReturnIndex,
+ Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Size.getZExtValue()));
+ } else if (isStrdupLikeFn(&Call, TLI)) {
+ uint64_t Len = GetStringLength(Call.getOperand(0));
+ if (Len) {
+ // strdup
+ if (NumArgs == 1)
+ Call.addAttribute(AttributeList::ReturnIndex,
+ Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Len));
+ // strndup
+ else if (NumArgs == 2 && Op1C)
+ Call.addAttribute(
+ AttributeList::ReturnIndex,
+ Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), std::min(Len, Op1C->getZExtValue() + 1)));
+ }
+ }
+}
+
/// Improvements for call, callbr and invoke instructions.
Instruction *InstCombiner::visitCallBase(CallBase &Call) {
- if (isAllocLikeFn(&Call, &TLI))
- return visitAllocSite(Call);
+ if (isAllocationFn(&Call, &TLI))
+ annotateAnyAllocSite(Call, &TLI);
bool Changed = false;
@@ -4312,6 +4384,9 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) {
if (I) return eraseInstFromFunction(*I);
}
+ if (isAllocLikeFn(&Call, &TLI))
+ return visitAllocSite(Call);
+
return Changed ? &Call : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 2c9ba203fbf3..65aaef28d87a 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -140,7 +140,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
}
AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
- New->setAlignment(AI.getAlignment());
+ New->setAlignment(MaybeAlign(AI.getAlignment()));
New->takeName(&AI);
New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
@@ -1531,16 +1531,16 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
// what we can and cannot do safely varies from operation to operation, and
// is explained below in the various case statements.
Type *Ty = FPT.getType();
- BinaryOperator *OpI = dyn_cast<BinaryOperator>(FPT.getOperand(0));
- if (OpI && OpI->hasOneUse()) {
- Type *LHSMinType = getMinimumFPType(OpI->getOperand(0));
- Type *RHSMinType = getMinimumFPType(OpI->getOperand(1));
- unsigned OpWidth = OpI->getType()->getFPMantissaWidth();
+ auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
+ if (BO && BO->hasOneUse()) {
+ Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
+ Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
+ unsigned OpWidth = BO->getType()->getFPMantissaWidth();
unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
unsigned DstWidth = Ty->getFPMantissaWidth();
- switch (OpI->getOpcode()) {
+ switch (BO->getOpcode()) {
default: break;
case Instruction::FAdd:
case Instruction::FSub:
@@ -1563,10 +1563,10 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
// could be tightened for those cases, but they are rare (the main
// case of interest here is (float)((double)float + float)).
if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
- Value *LHS = Builder.CreateFPTrunc(OpI->getOperand(0), Ty);
- Value *RHS = Builder.CreateFPTrunc(OpI->getOperand(1), Ty);
- Instruction *RI = BinaryOperator::Create(OpI->getOpcode(), LHS, RHS);
- RI->copyFastMathFlags(OpI);
+ Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
+ Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
+ Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
+ RI->copyFastMathFlags(BO);
return RI;
}
break;
@@ -1577,9 +1577,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
// rounding can possibly occur; we can safely perform the operation
// in the destination format if it can represent both sources.
if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
- Value *LHS = Builder.CreateFPTrunc(OpI->getOperand(0), Ty);
- Value *RHS = Builder.CreateFPTrunc(OpI->getOperand(1), Ty);
- return BinaryOperator::CreateFMulFMF(LHS, RHS, OpI);
+ Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
+ Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
+ return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
}
break;
case Instruction::FDiv:
@@ -1590,9 +1590,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
// condition used here is a good conservative first pass.
// TODO: Tighten bound via rigorous analysis of the unbalanced case.
if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
- Value *LHS = Builder.CreateFPTrunc(OpI->getOperand(0), Ty);
- Value *RHS = Builder.CreateFPTrunc(OpI->getOperand(1), Ty);
- return BinaryOperator::CreateFDivFMF(LHS, RHS, OpI);
+ Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
+ Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
+ return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
}
break;
case Instruction::FRem: {
@@ -1604,14 +1604,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
break;
Value *LHS, *RHS;
if (LHSWidth == SrcWidth) {
- LHS = Builder.CreateFPTrunc(OpI->getOperand(0), LHSMinType);
- RHS = Builder.CreateFPTrunc(OpI->getOperand(1), LHSMinType);
+ LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
+ RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
} else {
- LHS = Builder.CreateFPTrunc(OpI->getOperand(0), RHSMinType);
- RHS = Builder.CreateFPTrunc(OpI->getOperand(1), RHSMinType);
+ LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
+ RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
}
- Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, OpI);
+ Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
return CastInst::CreateFPCast(ExactResult, Ty);
}
}
@@ -2338,8 +2338,23 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If we found a path from the src to dest, create the getelementptr now.
if (SrcElTy == DstElTy) {
SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
- return GetElementPtrInst::CreateInBounds(SrcPTy->getElementType(), Src,
- Idxs);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(SrcPTy->getElementType(), Src, Idxs);
+
+ // If the source pointer is dereferenceable, then assume it points to an
+ // allocated object and apply "inbounds" to the GEP.
+ bool CanBeNull;
+ if (Src->getPointerDereferenceableBytes(DL, CanBeNull)) {
+ // In a non-default address space (not 0), a null pointer can not be
+ // assumed inbounds, so ignore that case (dereferenceable_or_null).
+ // The reason is that 'null' is not treated differently in these address
+ // spaces, and we consequently ignore the 'gep inbounds' special case
+ // for 'null' which allows 'inbounds' on 'null' if the indices are
+ // zeros.
+ if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
+ GEP->setIsInBounds();
+ }
+ return GEP;
}
}
@@ -2391,28 +2406,47 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
- if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
+ if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitcast to a vector with the same # elts.
- if (SVI->hasOneUse() && DestTy->isVectorTy() &&
- DestTy->getVectorNumElements() == SVI->getType()->getNumElements() &&
- SVI->getType()->getNumElements() ==
- SVI->getOperand(0)->getType()->getVectorNumElements()) {
+ Value *ShufOp0 = Shuf->getOperand(0);
+ Value *ShufOp1 = Shuf->getOperand(1);
+ unsigned NumShufElts = Shuf->getType()->getVectorNumElements();
+ unsigned NumSrcVecElts = ShufOp0->getType()->getVectorNumElements();
+ if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
+ DestTy->getVectorNumElements() == NumShufElts &&
+ NumShufElts == NumSrcVecElts) {
BitCastInst *Tmp;
// If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow
// us to eliminate at least one cast.
- if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
+ if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
Tmp->getOperand(0)->getType() == DestTy) ||
- ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
+ ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
Tmp->getOperand(0)->getType() == DestTy)) {
- Value *LHS = Builder.CreateBitCast(SVI->getOperand(0), DestTy);
- Value *RHS = Builder.CreateBitCast(SVI->getOperand(1), DestTy);
+ Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
+ Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
// Return a new shuffle vector. Use the same element ID's, as we
// know the vector types match #elts.
- return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
+ return new ShuffleVectorInst(LHS, RHS, Shuf->getOperand(2));
}
}
+
+ // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as
+ // a byte-swap:
+ // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) --> bswap (bitcast X)
+ // TODO: We should match the related pattern for bitreverse.
+ if (DestTy->isIntegerTy() &&
+ DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
+ SrcTy->getScalarSizeInBits() == 8 && NumShufElts % 2 == 0 &&
+ Shuf->hasOneUse() && Shuf->isReverse()) {
+ assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
+ assert(isa<UndefValue>(ShufOp1) && "Unexpected shuffle op");
+ Function *Bswap =
+ Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy);
+ Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
+ return IntrinsicInst::Create(Bswap, { ScalarX });
+ }
}
// Handle the A->B->A cast, and there is an intervening PHI node.
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 3a4283ae5406..a9f64feb600c 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -69,34 +69,6 @@ static bool hasBranchUse(ICmpInst &I) {
return false;
}
-/// Given an exploded icmp instruction, return true if the comparison only
-/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the
-/// result of the comparison is true when the input value is signed.
-static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
- bool &TrueIfSigned) {
- switch (Pred) {
- case ICmpInst::ICMP_SLT: // True if LHS s< 0
- TrueIfSigned = true;
- return RHS.isNullValue();
- case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1
- TrueIfSigned = true;
- return RHS.isAllOnesValue();
- case ICmpInst::ICMP_SGT: // True if LHS s> -1
- TrueIfSigned = false;
- return RHS.isAllOnesValue();
- case ICmpInst::ICMP_UGT:
- // True if LHS u> RHS and RHS == high-bit-mask - 1
- TrueIfSigned = true;
- return RHS.isMaxSignedValue();
- case ICmpInst::ICMP_UGE:
- // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
- TrueIfSigned = true;
- return RHS.isSignMask();
- default:
- return false;
- }
-}
-
/// Returns true if the exploded icmp can be expressed as a signed comparison
/// to zero and updates the predicate accordingly.
/// The signedness of the comparison is preserved.
@@ -832,6 +804,10 @@ getAsConstantIndexedAddress(Value *V, const DataLayout &DL) {
static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond,
const DataLayout &DL) {
+ // FIXME: Support vector of pointers.
+ if (GEPLHS->getType()->isVectorTy())
+ return nullptr;
+
if (!GEPLHS->hasAllConstantIndices())
return nullptr;
@@ -882,7 +858,9 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
RHS = RHS->stripPointerCasts();
Value *PtrBase = GEPLHS->getOperand(0);
- if (PtrBase == RHS && GEPLHS->isInBounds()) {
+ // FIXME: Support vector pointer GEPs.
+ if (PtrBase == RHS && GEPLHS->isInBounds() &&
+ !GEPLHS->getType()->isVectorTy()) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
// This transformation (ignoring the base and scales) is valid because we
// know pointers can't overflow since the gep is inbounds. See if we can
@@ -894,6 +872,37 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
Offset = EmitGEPOffset(GEPLHS);
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
Constant::getNullValue(Offset->getType()));
+ }
+
+ if (GEPLHS->isInBounds() && ICmpInst::isEquality(Cond) &&
+ isa<Constant>(RHS) && cast<Constant>(RHS)->isNullValue() &&
+ !NullPointerIsDefined(I.getFunction(),
+ RHS->getType()->getPointerAddressSpace())) {
+ // For most address spaces, an allocation can't be placed at null, but null
+ // itself is treated as a 0 size allocation in the in bounds rules. Thus,
+ // the only valid inbounds address derived from null, is null itself.
+ // Thus, we have four cases to consider:
+ // 1) Base == nullptr, Offset == 0 -> inbounds, null
+ // 2) Base == nullptr, Offset != 0 -> poison as the result is out of bounds
+ // 3) Base != nullptr, Offset == (-base) -> poison (crossing allocations)
+ // 4) Base != nullptr, Offset != (-base) -> nonnull (and possibly poison)
+ //
+ // (Note if we're indexing a type of size 0, that simply collapses into one
+ // of the buckets above.)
+ //
+ // In general, we're allowed to make values less poison (i.e. remove
+ // sources of full UB), so in this case, we just select between the two
+ // non-poison cases (1 and 4 above).
+ //
+ // For vectors, we apply the same reasoning on a per-lane basis.
+ auto *Base = GEPLHS->getPointerOperand();
+ if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) {
+ int NumElts = GEPLHS->getType()->getVectorNumElements();
+ Base = Builder.CreateVectorSplat(NumElts, Base);
+ }
+ return new ICmpInst(Cond, Base,
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ cast<Constant>(RHS), Base->getType()));
} else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
// If the base pointers are different, but the indices are the same, just
// compare the base pointer.
@@ -916,11 +925,13 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// If we're comparing GEPs with two base pointers that only differ in type
// and both GEPs have only constant indices or just one use, then fold
// the compare with the adjusted indices.
+ // FIXME: Support vector of pointers.
if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
(GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
(GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
PtrBase->stripPointerCasts() ==
- GEPRHS->getOperand(0)->stripPointerCasts()) {
+ GEPRHS->getOperand(0)->stripPointerCasts() &&
+ !GEPLHS->getType()->isVectorTy()) {
Value *LOffset = EmitGEPOffset(GEPLHS);
Value *ROffset = EmitGEPOffset(GEPRHS);
@@ -949,12 +960,14 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
// If one of the GEPs has all zero indices, recurse.
- if (GEPLHS->hasAllZeroIndices())
+ // FIXME: Handle vector of pointers.
+ if (!GEPLHS->getType()->isVectorTy() && GEPLHS->hasAllZeroIndices())
return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
ICmpInst::getSwappedPredicate(Cond), I);
// If the other GEP has all zero indices, recurse.
- if (GEPRHS->hasAllZeroIndices())
+ // FIXME: Handle vector of pointers.
+ if (!GEPRHS->getType()->isVectorTy() && GEPRHS->hasAllZeroIndices())
return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
@@ -964,15 +977,20 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
unsigned DiffOperand = 0; // The operand that differs.
for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
- if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
- GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
+ Type *LHSType = GEPLHS->getOperand(i)->getType();
+ Type *RHSType = GEPRHS->getOperand(i)->getType();
+ // FIXME: Better support for vector of pointers.
+ if (LHSType->getPrimitiveSizeInBits() !=
+ RHSType->getPrimitiveSizeInBits() ||
+ (GEPLHS->getType()->isVectorTy() &&
+ (!LHSType->isVectorTy() || !RHSType->isVectorTy()))) {
// Irreconcilable differences.
NumDifferences = 2;
break;
- } else {
- if (NumDifferences++) break;
- DiffOperand = i;
}
+
+ if (NumDifferences++) break;
+ DiffOperand = i;
}
if (NumDifferences == 0) // SAME GEP?
@@ -1317,6 +1335,59 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
+/// If we have:
+/// icmp eq/ne (urem/srem %x, %y), 0
+/// iff %y is a power-of-two, we can replace this with a bit test:
+/// icmp eq/ne (and %x, (add %y, -1)), 0
+Instruction *InstCombiner::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) {
+ // This fold is only valid for equality predicates.
+ if (!I.isEquality())
+ return nullptr;
+ ICmpInst::Predicate Pred;
+ Value *X, *Y, *Zero;
+ if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))),
+ m_CombineAnd(m_Zero(), m_Value(Zero)))))
+ return nullptr;
+ if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I))
+ return nullptr;
+ // This may increase instruction count, we don't enforce that Y is a constant.
+ Value *Mask = Builder.CreateAdd(Y, Constant::getAllOnesValue(Y->getType()));
+ Value *Masked = Builder.CreateAnd(X, Mask);
+ return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero);
+}
+
+/// Fold equality-comparison between zero and any (maybe truncated) right-shift
+/// by one-less-than-bitwidth into a sign test on the original value.
+Instruction *InstCombiner::foldSignBitTest(ICmpInst &I) {
+ Instruction *Val;
+ ICmpInst::Predicate Pred;
+ if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero())))
+ return nullptr;
+
+ Value *X;
+ Type *XTy;
+
+ Constant *C;
+ if (match(Val, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))))) {
+ XTy = X->getType();
+ unsigned XBitWidth = XTy->getScalarSizeInBits();
+ if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
+ APInt(XBitWidth, XBitWidth - 1))))
+ return nullptr;
+ } else if (isa<BinaryOperator>(Val) &&
+ (X = reassociateShiftAmtsOfTwoSameDirectionShifts(
+ cast<BinaryOperator>(Val), SQ.getWithInstruction(Val),
+ /*AnalyzeForSignBitExtraction=*/true))) {
+ XTy = X->getType();
+ } else
+ return nullptr;
+
+ return ICmpInst::Create(Instruction::ICmp,
+ Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_SGE
+ : ICmpInst::ICMP_SLT,
+ X, ConstantInt::getNullValue(XTy));
+}
+
// Handle icmp pred X, 0
Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
CmpInst::Predicate Pred = Cmp.getPredicate();
@@ -1335,6 +1406,9 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
}
}
+ if (Instruction *New = foldIRemByPowerOfTwoToBitTest(Cmp))
+ return New;
+
// Given:
// icmp eq/ne (urem %x, %y), 0
// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
@@ -2179,6 +2253,44 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
return nullptr;
}
+Instruction *InstCombiner::foldICmpSRemConstant(ICmpInst &Cmp,
+ BinaryOperator *SRem,
+ const APInt &C) {
+ // Match an 'is positive' or 'is negative' comparison of remainder by a
+ // constant power-of-2 value:
+ // (X % pow2C) sgt/slt 0
+ const ICmpInst::Predicate Pred = Cmp.getPredicate();
+ if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT)
+ return nullptr;
+
+ // TODO: The one-use check is standard because we do not typically want to
+ // create longer instruction sequences, but this might be a special-case
+ // because srem is not good for analysis or codegen.
+ if (!SRem->hasOneUse())
+ return nullptr;
+
+ const APInt *DivisorC;
+ if (!C.isNullValue() || !match(SRem->getOperand(1), m_Power2(DivisorC)))
+ return nullptr;
+
+ // Mask off the sign bit and the modulo bits (low-bits).
+ Type *Ty = SRem->getType();
+ APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits());
+ Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1));
+ Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC);
+
+ // For 'is positive?' check that the sign-bit is clear and at least 1 masked
+ // bit is set. Example:
+ // (i8 X % 32) s> 0 --> (X & 159) s> 0
+ if (Pred == ICmpInst::ICMP_SGT)
+ return new ICmpInst(ICmpInst::ICMP_SGT, And, ConstantInt::getNullValue(Ty));
+
+ // For 'is negative?' check that the sign-bit is set and at least 1 masked
+ // bit is set. Example:
+ // (i16 X % 4) s< 0 --> (X & 32771) u> 32768
+ return new ICmpInst(ICmpInst::ICMP_UGT, And, ConstantInt::get(Ty, SignMask));
+}
+
/// Fold icmp (udiv X, Y), C.
Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp,
BinaryOperator *UDiv,
@@ -2387,6 +2499,11 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
const APInt *C2;
APInt SubResult;
+ // icmp eq/ne (sub C, Y), C -> icmp eq/ne Y, 0
+ if (match(X, m_APInt(C2)) && *C2 == C && Cmp.isEquality())
+ return new ICmpInst(Cmp.getPredicate(), Y,
+ ConstantInt::get(Y->getType(), 0));
+
// (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C)
if (match(X, m_APInt(C2)) &&
((Cmp.isUnsigned() && Sub->hasNoUnsignedWrap()) ||
@@ -2509,20 +2626,49 @@ bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
// TODO: Generalize this to work with other comparison idioms or ensure
// they get canonicalized into this form.
- // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32
- // Greater), where Equal, Less and Greater are placeholders for any three
- // constants.
- ICmpInst::Predicate PredA, PredB;
- if (match(SI->getTrueValue(), m_ConstantInt(Equal)) &&
- match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) &&
- PredA == ICmpInst::ICMP_EQ &&
- match(SI->getFalseValue(),
- m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)),
- m_ConstantInt(Less), m_ConstantInt(Greater))) &&
- PredB == ICmpInst::ICMP_SLT) {
- return true;
+ // select i1 (a == b),
+ // i32 Equal,
+ // i32 (select i1 (a < b), i32 Less, i32 Greater)
+ // where Equal, Less and Greater are placeholders for any three constants.
+ ICmpInst::Predicate PredA;
+ if (!match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) ||
+ !ICmpInst::isEquality(PredA))
+ return false;
+ Value *EqualVal = SI->getTrueValue();
+ Value *UnequalVal = SI->getFalseValue();
+ // We still can get non-canonical predicate here, so canonicalize.
+ if (PredA == ICmpInst::ICMP_NE)
+ std::swap(EqualVal, UnequalVal);
+ if (!match(EqualVal, m_ConstantInt(Equal)))
+ return false;
+ ICmpInst::Predicate PredB;
+ Value *LHS2, *RHS2;
+ if (!match(UnequalVal, m_Select(m_ICmp(PredB, m_Value(LHS2), m_Value(RHS2)),
+ m_ConstantInt(Less), m_ConstantInt(Greater))))
+ return false;
+ // We can get predicate mismatch here, so canonicalize if possible:
+ // First, ensure that 'LHS' match.
+ if (LHS2 != LHS) {
+ // x sgt y <--> y slt x
+ std::swap(LHS2, RHS2);
+ PredB = ICmpInst::getSwappedPredicate(PredB);
+ }
+ if (LHS2 != LHS)
+ return false;
+ // We also need to canonicalize 'RHS'.
+ if (PredB == ICmpInst::ICMP_SGT && isa<Constant>(RHS2)) {
+ // x sgt C-1 <--> x sge C <--> not(x slt C)
+ auto FlippedStrictness =
+ getFlippedStrictnessPredicateAndConstant(PredB, cast<Constant>(RHS2));
+ if (!FlippedStrictness)
+ return false;
+ assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check");
+ RHS2 = FlippedStrictness->second;
+ // And kind-of perform the result swap.
+ std::swap(Less, Greater);
+ PredB = ICmpInst::ICMP_SLT;
}
- return false;
+ return PredB == ICmpInst::ICMP_SLT && RHS == RHS2;
}
Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
@@ -2702,6 +2848,10 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C))
return I;
break;
+ case Instruction::SRem:
+ if (Instruction *I = foldICmpSRemConstant(Cmp, BO, *C))
+ return I;
+ break;
case Instruction::UDiv:
if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C))
return I;
@@ -2926,6 +3076,28 @@ Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp,
}
break;
}
+
+ case Intrinsic::uadd_sat: {
+ // uadd.sat(a, b) == 0 -> (a | b) == 0
+ if (C.isNullValue()) {
+ Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1));
+ return replaceInstUsesWith(Cmp, Builder.CreateICmp(
+ Cmp.getPredicate(), Or, Constant::getNullValue(Ty)));
+
+ }
+ break;
+ }
+
+ case Intrinsic::usub_sat: {
+ // usub.sat(a, b) == 0 -> a <= b
+ if (C.isNullValue()) {
+ ICmpInst::Predicate NewPred = Cmp.getPredicate() == ICmpInst::ICMP_EQ
+ ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
+ return ICmpInst::Create(Instruction::ICmp, NewPred,
+ II->getArgOperand(0), II->getArgOperand(1));
+ }
+ break;
+ }
default:
break;
}
@@ -3275,6 +3447,7 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I,
// we should move shifts to the same hand of 'and', i.e. rewrite as
// icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x)
// We are only interested in opposite logical shifts here.
+// One of the shifts can be truncated.
// If we can, we want to end up creating 'lshr' shift.
static Value *
foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
@@ -3284,55 +3457,215 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
return nullptr;
auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
- auto m_AnyLShr = m_LShr(m_Value(), m_Value());
-
- // Look for an 'and' of two (opposite) logical shifts.
- // Pick the single-use shift as XShift.
- Value *XShift, *YShift;
- if (!match(I.getOperand(0),
- m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
- m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
+
+ // Look for an 'and' of two logical shifts, one of which may be truncated.
+ // We use m_TruncOrSelf() on the RHS to correctly handle commutative case.
+ Instruction *XShift, *MaybeTruncation, *YShift;
+ if (!match(
+ I.getOperand(0),
+ m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)),
+ m_CombineAnd(m_TruncOrSelf(m_CombineAnd(
+ m_AnyLogicalShift, m_Instruction(YShift))),
+ m_Instruction(MaybeTruncation)))))
return nullptr;
- // If YShift is a single-use 'lshr', swap the shifts around.
- if (match(YShift, m_OneUse(m_AnyLShr)))
+ // We potentially looked past 'trunc', but only when matching YShift,
+ // therefore YShift must have the widest type.
+ Instruction *WidestShift = YShift;
+ // Therefore XShift must have the shallowest type.
+ // Or they both have identical types if there was no truncation.
+ Instruction *NarrowestShift = XShift;
+
+ Type *WidestTy = WidestShift->getType();
+ assert(NarrowestShift->getType() == I.getOperand(0)->getType() &&
+ "We did not look past any shifts while matching XShift though.");
+ bool HadTrunc = WidestTy != I.getOperand(0)->getType();
+
+ // If YShift is a 'lshr', swap the shifts around.
+ if (match(YShift, m_LShr(m_Value(), m_Value())))
std::swap(XShift, YShift);
// The shifts must be in opposite directions.
- Instruction::BinaryOps XShiftOpcode =
- cast<BinaryOperator>(XShift)->getOpcode();
- if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
+ auto XShiftOpcode = XShift->getOpcode();
+ if (XShiftOpcode == YShift->getOpcode())
return nullptr; // Do not care about same-direction shifts here.
Value *X, *XShAmt, *Y, *YShAmt;
- match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
- match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
+ match(XShift, m_BinOp(m_Value(X), m_ZExtOrSelf(m_Value(XShAmt))));
+ match(YShift, m_BinOp(m_Value(Y), m_ZExtOrSelf(m_Value(YShAmt))));
+
+ // If one of the values being shifted is a constant, then we will end with
+ // and+icmp, and [zext+]shift instrs will be constant-folded. If they are not,
+ // however, we will need to ensure that we won't increase instruction count.
+ if (!isa<Constant>(X) && !isa<Constant>(Y)) {
+ // At least one of the hands of the 'and' should be one-use shift.
+ if (!match(I.getOperand(0),
+ m_c_And(m_OneUse(m_AnyLogicalShift), m_Value())))
+ return nullptr;
+ if (HadTrunc) {
+ // Due to the 'trunc', we will need to widen X. For that either the old
+ // 'trunc' or the shift amt in the non-truncated shift should be one-use.
+ if (!MaybeTruncation->hasOneUse() &&
+ !NarrowestShift->getOperand(1)->hasOneUse())
+ return nullptr;
+ }
+ }
+
+ // We have two shift amounts from two different shifts. The types of those
+ // shift amounts may not match. If that's the case let's bailout now.
+ if (XShAmt->getType() != YShAmt->getType())
+ return nullptr;
// Can we fold (XShAmt+YShAmt) ?
- Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
- SQ.getWithInstruction(&I));
+ auto *NewShAmt = dyn_cast_or_null<Constant>(
+ SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false,
+ /*isNUW=*/false, SQ.getWithInstruction(&I)));
if (!NewShAmt)
return nullptr;
+ NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy);
+ unsigned WidestBitWidth = WidestTy->getScalarSizeInBits();
+
// Is the new shift amount smaller than the bit width?
// FIXME: could also rely on ConstantRange.
- unsigned BitWidth = X->getType()->getScalarSizeInBits();
- if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
- APInt(BitWidth, BitWidth))))
+ if (!match(NewShAmt,
+ m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
+ APInt(WidestBitWidth, WidestBitWidth))))
return nullptr;
- // All good, we can do this fold. The shift is the same that was for X.
+
+ // An extra legality check is needed if we had trunc-of-lshr.
+ if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) {
+ auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ,
+ WidestShift]() {
+ // It isn't obvious whether it's worth it to analyze non-constants here.
+ // Also, let's basically give up on non-splat cases, pessimizing vectors.
+ // If *any* of these preconditions matches we can perform the fold.
+ Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy()
+ ? NewShAmt->getSplatValue()
+ : NewShAmt;
+ // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold.
+ if (NewShAmtSplat &&
+ (NewShAmtSplat->isNullValue() ||
+ NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1))
+ return true;
+ // We consider *min* leading zeros so a single outlier
+ // blocks the transform as opposed to allowing it.
+ if (auto *C = dyn_cast<Constant>(NarrowestShift->getOperand(0))) {
+ KnownBits Known = computeKnownBits(C, SQ.DL);
+ unsigned MinLeadZero = Known.countMinLeadingZeros();
+ // If the value being shifted has at most lowest bit set we can fold.
+ unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
+ if (MaxActiveBits <= 1)
+ return true;
+ // Precondition: NewShAmt u<= countLeadingZeros(C)
+ if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero))
+ return true;
+ }
+ if (auto *C = dyn_cast<Constant>(WidestShift->getOperand(0))) {
+ KnownBits Known = computeKnownBits(C, SQ.DL);
+ unsigned MinLeadZero = Known.countMinLeadingZeros();
+ // If the value being shifted has at most lowest bit set we can fold.
+ unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
+ if (MaxActiveBits <= 1)
+ return true;
+ // Precondition: ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C)
+ if (NewShAmtSplat) {
+ APInt AdjNewShAmt =
+ (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger();
+ if (AdjNewShAmt.ule(MinLeadZero))
+ return true;
+ }
+ }
+ return false; // Can't tell if it's ok.
+ };
+ if (!CanFold())
+ return nullptr;
+ }
+
+ // All good, we can do this fold.
+ X = Builder.CreateZExt(X, WidestTy);
+ Y = Builder.CreateZExt(Y, WidestTy);
+ // The shift is the same that was for X.
Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
? Builder.CreateLShr(X, NewShAmt)
: Builder.CreateShl(X, NewShAmt);
Value *T1 = Builder.CreateAnd(T0, Y);
return Builder.CreateICmp(I.getPredicate(), T1,
- Constant::getNullValue(X->getType()));
+ Constant::getNullValue(WidestTy));
+}
+
+/// Fold
+/// (-1 u/ x) u< y
+/// ((x * y) u/ x) != y
+/// to
+/// @llvm.umul.with.overflow(x, y) plus extraction of overflow bit
+/// Note that the comparison is commutative, while inverted (u>=, ==) predicate
+/// will mean that we are looking for the opposite answer.
+Value *InstCombiner::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
+ ICmpInst::Predicate Pred;
+ Value *X, *Y;
+ Instruction *Mul;
+ bool NeedNegation;
+ // Look for: (-1 u/ x) u</u>= y
+ if (!I.isEquality() &&
+ match(&I, m_c_ICmp(Pred, m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))),
+ m_Value(Y)))) {
+ Mul = nullptr;
+ // Canonicalize as-if y was on RHS.
+ if (I.getOperand(1) != Y)
+ Pred = I.getSwappedPredicate();
+
+ // Are we checking that overflow does not happen, or does happen?
+ switch (Pred) {
+ case ICmpInst::Predicate::ICMP_ULT:
+ NeedNegation = false;
+ break; // OK
+ case ICmpInst::Predicate::ICMP_UGE:
+ NeedNegation = true;
+ break; // OK
+ default:
+ return nullptr; // Wrong predicate.
+ }
+ } else // Look for: ((x * y) u/ x) !=/== y
+ if (I.isEquality() &&
+ match(&I, m_c_ICmp(Pred, m_Value(Y),
+ m_OneUse(m_UDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y),
+ m_Value(X)),
+ m_Instruction(Mul)),
+ m_Deferred(X)))))) {
+ NeedNegation = Pred == ICmpInst::Predicate::ICMP_EQ;
+ } else
+ return nullptr;
+
+ BuilderTy::InsertPointGuard Guard(Builder);
+ // If the pattern included (x * y), we'll want to insert new instructions
+ // right before that original multiplication so that we can replace it.
+ bool MulHadOtherUses = Mul && !Mul->hasOneUse();
+ if (MulHadOtherUses)
+ Builder.SetInsertPoint(Mul);
+
+ Function *F = Intrinsic::getDeclaration(
+ I.getModule(), Intrinsic::umul_with_overflow, X->getType());
+ CallInst *Call = Builder.CreateCall(F, {X, Y}, "umul");
+
+ // If the multiplication was used elsewhere, to ensure that we don't leave
+ // "duplicate" instructions, replace uses of that original multiplication
+ // with the multiplication result from the with.overflow intrinsic.
+ if (MulHadOtherUses)
+ replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "umul.val"));
+
+ Value *Res = Builder.CreateExtractValue(Call, 1, "umul.ov");
+ if (NeedNegation) // This technically increases instruction count.
+ Res = Builder.CreateNot(Res, "umul.not.ov");
+
+ return Res;
}
/// Try to fold icmp (binop), X or icmp X, (binop).
/// TODO: A large part of this logic is duplicated in InstSimplify's
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
/// duplication.
-Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
+Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I, const SimplifyQuery &SQ) {
+ const SimplifyQuery Q = SQ.getWithInstruction(&I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// Special logic for binary operators.
@@ -3345,13 +3678,13 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
Value *X;
// Convert add-with-unsigned-overflow comparisons into a 'not' with compare.
- // (Op1 + X) <u Op1 --> ~Op1 <u X
- // Op0 >u (Op0 + X) --> X >u ~Op0
+ // (Op1 + X) u</u>= Op1 --> ~Op1 u</u>= X
if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) &&
- Pred == ICmpInst::ICMP_ULT)
+ (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE))
return new ICmpInst(Pred, Builder.CreateNot(Op1), X);
+ // Op0 u>/u<= (Op0 + X) --> X u>/u<= ~Op0
if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) &&
- Pred == ICmpInst::ICMP_UGT)
+ (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE))
return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
@@ -3378,21 +3711,21 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
D = BO1->getOperand(1);
}
- // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+ // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow.
+ // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow.
if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
return new ICmpInst(Pred, A == Op1 ? B : A,
Constant::getNullValue(Op1->getType()));
- // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+ // icmp C, (C+D) -> icmp 0, D for equalities or if there is no overflow.
+ // icmp D, (C+D) -> icmp 0, C for equalities or if there is no overflow.
if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
C == Op0 ? D : C);
- // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
+ // icmp (A+B), (A+D) -> icmp B, D for equalities or if there is no overflow.
if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem &&
- NoOp1WrapProblem &&
- // Try not to increase register pressure.
- BO0->hasOneUse() && BO1->hasOneUse()) {
+ NoOp1WrapProblem) {
// Determine Y and Z in the form icmp (X+Y), (X+Z).
Value *Y, *Z;
if (A == C) {
@@ -3416,39 +3749,39 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
return new ICmpInst(Pred, Y, Z);
}
- // icmp slt (X + -1), Y -> icmp sle X, Y
+ // icmp slt (A + -1), Op1 -> icmp sle A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
match(B, m_AllOnes()))
return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
- // icmp sge (X + -1), Y -> icmp sgt X, Y
+ // icmp sge (A + -1), Op1 -> icmp sgt A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
match(B, m_AllOnes()))
return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
- // icmp sle (X + 1), Y -> icmp slt X, Y
+ // icmp sle (A + 1), Op1 -> icmp slt A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
- // icmp sgt (X + 1), Y -> icmp sge X, Y
+ // icmp sgt (A + 1), Op1 -> icmp sge A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
- // icmp sgt X, (Y + -1) -> icmp sge X, Y
+ // icmp sgt Op0, (C + -1) -> icmp sge Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
match(D, m_AllOnes()))
return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
- // icmp sle X, (Y + -1) -> icmp slt X, Y
+ // icmp sle Op0, (C + -1) -> icmp slt Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
match(D, m_AllOnes()))
return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
- // icmp sge X, (Y + 1) -> icmp sgt X, Y
+ // icmp sge Op0, (C + 1) -> icmp sgt Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
- // icmp slt X, (Y + 1) -> icmp sle X, Y
+ // icmp slt Op0, (C + 1) -> icmp sle Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
@@ -3456,33 +3789,33 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
// canonicalization from (X -nuw 1) to (X + -1) means that the combinations
// wouldn't happen even if they were implemented.
//
- // icmp ult (X - 1), Y -> icmp ule X, Y
- // icmp uge (X - 1), Y -> icmp ugt X, Y
- // icmp ugt X, (Y - 1) -> icmp uge X, Y
- // icmp ule X, (Y - 1) -> icmp ult X, Y
+ // icmp ult (A - 1), Op1 -> icmp ule A, Op1
+ // icmp uge (A - 1), Op1 -> icmp ugt A, Op1
+ // icmp ugt Op0, (C - 1) -> icmp uge Op0, C
+ // icmp ule Op0, (C - 1) -> icmp ult Op0, C
- // icmp ule (X + 1), Y -> icmp ult X, Y
+ // icmp ule (A + 1), Op0 -> icmp ult A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
- // icmp ugt (X + 1), Y -> icmp uge X, Y
+ // icmp ugt (A + 1), Op0 -> icmp uge A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
- // icmp uge X, (Y + 1) -> icmp ugt X, Y
+ // icmp uge Op0, (C + 1) -> icmp ugt Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
- // icmp ult X, (Y + 1) -> icmp ule X, Y
+ // icmp ult Op0, (C + 1) -> icmp ule Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
// if C1 has greater magnitude than C2:
- // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
+ // icmp (A + C1), (C + C2) -> icmp (A + C3), C
// s.t. C3 = C1 - C2
//
// if C2 has greater magnitude than C1:
- // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
+ // icmp (A + C1), (C + C2) -> icmp A, (C + C3)
// s.t. C3 = C2 - C1
if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
(BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
@@ -3520,29 +3853,35 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
D = BO1->getOperand(1);
}
- // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
+ // icmp (A-B), A -> icmp 0, B for equalities or if there is no overflow.
if (A == Op1 && NoOp0WrapProblem)
return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
- // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
+ // icmp C, (C-D) -> icmp D, 0 for equalities or if there is no overflow.
if (C == Op0 && NoOp1WrapProblem)
return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
- // (A - B) >u A --> A <u B
- if (A == Op1 && Pred == ICmpInst::ICMP_UGT)
- return new ICmpInst(ICmpInst::ICMP_ULT, A, B);
- // C <u (C - D) --> C <u D
- if (C == Op0 && Pred == ICmpInst::ICMP_ULT)
- return new ICmpInst(ICmpInst::ICMP_ULT, C, D);
-
- // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
- if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
- // Try not to increase register pressure.
- BO0->hasOneUse() && BO1->hasOneUse())
+ // Convert sub-with-unsigned-overflow comparisons into a comparison of args.
+ // (A - B) u>/u<= A --> B u>/u<= A
+ if (A == Op1 && (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE))
+ return new ICmpInst(Pred, B, A);
+ // C u</u>= (C - D) --> C u</u>= D
+ if (C == Op0 && (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE))
+ return new ICmpInst(Pred, C, D);
+ // (A - B) u>=/u< A --> B u>/u<= A iff B != 0
+ if (A == Op1 && (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) &&
+ isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), B, A);
+ // C u<=/u> (C - D) --> C u</u>= D iff B != 0
+ if (C == Op0 && (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) &&
+ isKnownNonZero(D, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), C, D);
+
+ // icmp (A-B), (C-B) -> icmp A, C for equalities or if there is no overflow.
+ if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem)
return new ICmpInst(Pred, A, C);
- // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
- if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
- // Try not to increase register pressure.
- BO0->hasOneUse() && BO1->hasOneUse())
+
+ // icmp (A-B), (A-D) -> icmp D, B for equalities or if there is no overflow.
+ if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem)
return new ICmpInst(Pred, D, B);
// icmp (0-X) < cst --> x > -cst
@@ -3677,6 +4016,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
}
}
+ if (Value *V = foldUnsignedMultiplicationOverflowCheck(I))
+ return replaceInstUsesWith(I, V);
+
if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
return replaceInstUsesWith(I, V);
@@ -3953,125 +4295,140 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
return nullptr;
}
-/// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so
-/// far.
-Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
- const CastInst *LHSCI = cast<CastInst>(ICmp.getOperand(0));
- Value *LHSCIOp = LHSCI->getOperand(0);
- Type *SrcTy = LHSCIOp->getType();
- Type *DestTy = LHSCI->getType();
-
- // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
- // integer type is the same size as the pointer type.
- const auto& CompatibleSizes = [&](Type* SrcTy, Type* DestTy) -> bool {
- if (isa<VectorType>(SrcTy)) {
- SrcTy = cast<VectorType>(SrcTy)->getElementType();
- DestTy = cast<VectorType>(DestTy)->getElementType();
- }
- return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth();
- };
- if (LHSCI->getOpcode() == Instruction::PtrToInt &&
- CompatibleSizes(SrcTy, DestTy)) {
- Value *RHSOp = nullptr;
- if (auto *RHSC = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
- Value *RHSCIOp = RHSC->getOperand(0);
- if (RHSCIOp->getType()->getPointerAddressSpace() ==
- LHSCIOp->getType()->getPointerAddressSpace()) {
- RHSOp = RHSC->getOperand(0);
- // If the pointer types don't match, insert a bitcast.
- if (LHSCIOp->getType() != RHSOp->getType())
- RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType());
- }
- } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
- RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
- }
-
- if (RHSOp)
- return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSOp);
- }
-
- // The code below only handles extension cast instructions, so far.
- // Enforce this.
- if (LHSCI->getOpcode() != Instruction::ZExt &&
- LHSCI->getOpcode() != Instruction::SExt)
+static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp,
+ InstCombiner::BuilderTy &Builder) {
+ assert(isa<CastInst>(ICmp.getOperand(0)) && "Expected cast for operand 0");
+ auto *CastOp0 = cast<CastInst>(ICmp.getOperand(0));
+ Value *X;
+ if (!match(CastOp0, m_ZExtOrSExt(m_Value(X))))
return nullptr;
- bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
- bool isSignedCmp = ICmp.isSigned();
-
- if (auto *CI = dyn_cast<CastInst>(ICmp.getOperand(1))) {
- // Not an extension from the same type?
- Value *RHSCIOp = CI->getOperand(0);
- if (RHSCIOp->getType() != LHSCIOp->getType())
- return nullptr;
-
+ bool IsSignedExt = CastOp0->getOpcode() == Instruction::SExt;
+ bool IsSignedCmp = ICmp.isSigned();
+ if (auto *CastOp1 = dyn_cast<CastInst>(ICmp.getOperand(1))) {
// If the signedness of the two casts doesn't agree (i.e. one is a sext
// and the other is a zext), then we can't handle this.
- if (CI->getOpcode() != LHSCI->getOpcode())
+ // TODO: This is too strict. We can handle some predicates (equality?).
+ if (CastOp0->getOpcode() != CastOp1->getOpcode())
return nullptr;
- // Deal with equality cases early.
+ // Not an extension from the same type?
+ Value *Y = CastOp1->getOperand(0);
+ Type *XTy = X->getType(), *YTy = Y->getType();
+ if (XTy != YTy) {
+ // One of the casts must have one use because we are creating a new cast.
+ if (!CastOp0->hasOneUse() && !CastOp1->hasOneUse())
+ return nullptr;
+ // Extend the narrower operand to the type of the wider operand.
+ if (XTy->getScalarSizeInBits() < YTy->getScalarSizeInBits())
+ X = Builder.CreateCast(CastOp0->getOpcode(), X, YTy);
+ else if (YTy->getScalarSizeInBits() < XTy->getScalarSizeInBits())
+ Y = Builder.CreateCast(CastOp0->getOpcode(), Y, XTy);
+ else
+ return nullptr;
+ }
+
+ // (zext X) == (zext Y) --> X == Y
+ // (sext X) == (sext Y) --> X == Y
if (ICmp.isEquality())
- return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
+ return new ICmpInst(ICmp.getPredicate(), X, Y);
// A signed comparison of sign extended values simplifies into a
// signed comparison.
- if (isSignedCmp && isSignedExt)
- return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
+ if (IsSignedCmp && IsSignedExt)
+ return new ICmpInst(ICmp.getPredicate(), X, Y);
// The other three cases all fold into an unsigned comparison.
- return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
+ return new ICmpInst(ICmp.getUnsignedPredicate(), X, Y);
}
- // If we aren't dealing with a constant on the RHS, exit early.
+ // Below here, we are only folding a compare with constant.
auto *C = dyn_cast<Constant>(ICmp.getOperand(1));
if (!C)
return nullptr;
// Compute the constant that would happen if we truncated to SrcTy then
// re-extended to DestTy.
+ Type *SrcTy = CastOp0->getSrcTy();
+ Type *DestTy = CastOp0->getDestTy();
Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy);
- Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy);
+ Constant *Res2 = ConstantExpr::getCast(CastOp0->getOpcode(), Res1, DestTy);
// If the re-extended constant didn't change...
if (Res2 == C) {
- // Deal with equality cases early.
if (ICmp.isEquality())
- return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
+ return new ICmpInst(ICmp.getPredicate(), X, Res1);
// A signed comparison of sign extended values simplifies into a
// signed comparison.
- if (isSignedExt && isSignedCmp)
- return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
+ if (IsSignedExt && IsSignedCmp)
+ return new ICmpInst(ICmp.getPredicate(), X, Res1);
// The other three cases all fold into an unsigned comparison.
- return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, Res1);
+ return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res1);
}
// The re-extended constant changed, partly changed (in the case of a vector),
// or could not be determined to be equal (in the case of a constant
// expression), so the constant cannot be represented in the shorter type.
- // Consequently, we cannot emit a simple comparison.
// All the cases that fold to true or false will have already been handled
// by SimplifyICmpInst, so only deal with the tricky case.
+ if (IsSignedCmp || !IsSignedExt || !isa<ConstantInt>(C))
+ return nullptr;
+
+ // Is source op positive?
+ // icmp ult (sext X), C --> icmp sgt X, -1
+ if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
+ return new ICmpInst(CmpInst::ICMP_SGT, X, Constant::getAllOnesValue(SrcTy));
+
+ // Is source op negative?
+ // icmp ugt (sext X), C --> icmp slt X, 0
+ assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
+ return new ICmpInst(CmpInst::ICMP_SLT, X, Constant::getNullValue(SrcTy));
+}
- if (isSignedCmp || !isSignedExt || !isa<ConstantInt>(C))
+/// Handle icmp (cast x), (cast or constant).
+Instruction *InstCombiner::foldICmpWithCastOp(ICmpInst &ICmp) {
+ auto *CastOp0 = dyn_cast<CastInst>(ICmp.getOperand(0));
+ if (!CastOp0)
+ return nullptr;
+ if (!isa<Constant>(ICmp.getOperand(1)) && !isa<CastInst>(ICmp.getOperand(1)))
return nullptr;
- // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
- // should have been folded away previously and not enter in here.
+ Value *Op0Src = CastOp0->getOperand(0);
+ Type *SrcTy = CastOp0->getSrcTy();
+ Type *DestTy = CastOp0->getDestTy();
- // We're performing an unsigned comp with a sign extended value.
- // This is true if the input is >= 0. [aka >s -1]
- Constant *NegOne = Constant::getAllOnesValue(SrcTy);
- Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName());
+ // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
+ // integer type is the same size as the pointer type.
+ auto CompatibleSizes = [&](Type *SrcTy, Type *DestTy) {
+ if (isa<VectorType>(SrcTy)) {
+ SrcTy = cast<VectorType>(SrcTy)->getElementType();
+ DestTy = cast<VectorType>(DestTy)->getElementType();
+ }
+ return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth();
+ };
+ if (CastOp0->getOpcode() == Instruction::PtrToInt &&
+ CompatibleSizes(SrcTy, DestTy)) {
+ Value *NewOp1 = nullptr;
+ if (auto *PtrToIntOp1 = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
+ Value *PtrSrc = PtrToIntOp1->getOperand(0);
+ if (PtrSrc->getType()->getPointerAddressSpace() ==
+ Op0Src->getType()->getPointerAddressSpace()) {
+ NewOp1 = PtrToIntOp1->getOperand(0);
+ // If the pointer types don't match, insert a bitcast.
+ if (Op0Src->getType() != NewOp1->getType())
+ NewOp1 = Builder.CreateBitCast(NewOp1, Op0Src->getType());
+ }
+ } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
+ NewOp1 = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+ }
- // Finally, return the value computed.
- if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
- return replaceInstUsesWith(ICmp, Result);
+ if (NewOp1)
+ return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1);
+ }
- assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
- return BinaryOperator::CreateNot(Result);
+ return foldICmpWithZextOrSext(ICmp, Builder);
}
static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) {
@@ -4791,41 +5148,35 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
return nullptr;
}
-/// If we have an icmp le or icmp ge instruction with a constant operand, turn
-/// it into the appropriate icmp lt or icmp gt instruction. This transform
-/// allows them to be folded in visitICmpInst.
-static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
- ICmpInst::Predicate Pred = I.getPredicate();
- if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGE &&
- Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_UGE)
- return nullptr;
+llvm::Optional<std::pair<CmpInst::Predicate, Constant *>>
+llvm::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
+ Constant *C) {
+ assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&
+ "Only for relational integer predicates.");
- Value *Op0 = I.getOperand(0);
- Value *Op1 = I.getOperand(1);
- auto *Op1C = dyn_cast<Constant>(Op1);
- if (!Op1C)
- return nullptr;
+ Type *Type = C->getType();
+ bool IsSigned = ICmpInst::isSigned(Pred);
+
+ CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred);
+ bool WillIncrement =
+ UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT;
- // Check if the constant operand can be safely incremented/decremented without
- // overflowing/underflowing. For scalars, SimplifyICmpInst has already handled
- // the edge cases for us, so we just assert on them. For vectors, we must
- // handle the edge cases.
- Type *Op1Type = Op1->getType();
- bool IsSigned = I.isSigned();
- bool IsLE = (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE);
- auto *CI = dyn_cast<ConstantInt>(Op1C);
- if (CI) {
- // A <= MAX -> TRUE ; A >= MIN -> TRUE
- assert(IsLE ? !CI->isMaxValue(IsSigned) : !CI->isMinValue(IsSigned));
- } else if (Op1Type->isVectorTy()) {
- // TODO? If the edge cases for vectors were guaranteed to be handled as they
- // are for scalar, we could remove the min/max checks. However, to do that,
- // we would have to use insertelement/shufflevector to replace edge values.
- unsigned NumElts = Op1Type->getVectorNumElements();
+ // Check if the constant operand can be safely incremented/decremented
+ // without overflowing/underflowing.
+ auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) {
+ return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned);
+ };
+
+ if (auto *CI = dyn_cast<ConstantInt>(C)) {
+ // Bail out if the constant can't be safely incremented/decremented.
+ if (!ConstantIsOk(CI))
+ return llvm::None;
+ } else if (Type->isVectorTy()) {
+ unsigned NumElts = Type->getVectorNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
- Constant *Elt = Op1C->getAggregateElement(i);
+ Constant *Elt = C->getAggregateElement(i);
if (!Elt)
- return nullptr;
+ return llvm::None;
if (isa<UndefValue>(Elt))
continue;
@@ -4833,20 +5184,43 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
// Bail out if we can't determine if this constant is min/max or if we
// know that this constant is min/max.
auto *CI = dyn_cast<ConstantInt>(Elt);
- if (!CI || (IsLE ? CI->isMaxValue(IsSigned) : CI->isMinValue(IsSigned)))
- return nullptr;
+ if (!CI || !ConstantIsOk(CI))
+ return llvm::None;
}
} else {
// ConstantExpr?
- return nullptr;
+ return llvm::None;
}
- // Increment or decrement the constant and set the new comparison predicate:
- // ULE -> ULT ; UGE -> UGT ; SLE -> SLT ; SGE -> SGT
- Constant *OneOrNegOne = ConstantInt::get(Op1Type, IsLE ? 1 : -1, true);
- CmpInst::Predicate NewPred = IsLE ? ICmpInst::ICMP_ULT: ICmpInst::ICMP_UGT;
- NewPred = IsSigned ? ICmpInst::getSignedPredicate(NewPred) : NewPred;
- return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne));
+ CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred);
+
+ // Increment or decrement the constant.
+ Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true);
+ Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne);
+
+ return std::make_pair(NewPred, NewC);
+}
+
+/// If we have an icmp le or icmp ge instruction with a constant operand, turn
+/// it into the appropriate icmp lt or icmp gt instruction. This transform
+/// allows them to be folded in visitICmpInst.
+static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
+ ICmpInst::Predicate Pred = I.getPredicate();
+ if (ICmpInst::isEquality(Pred) || !ICmpInst::isIntPredicate(Pred) ||
+ isCanonicalPredicate(Pred))
+ return nullptr;
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ auto *Op1C = dyn_cast<Constant>(Op1);
+ if (!Op1C)
+ return nullptr;
+
+ auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, Op1C);
+ if (!FlippedStrictness)
+ return nullptr;
+
+ return new ICmpInst(FlippedStrictness->first, Op0, FlippedStrictness->second);
}
/// Integer compare with boolean values can always be turned into bitwise ops.
@@ -5002,6 +5376,7 @@ static Instruction *foldVectorCmp(CmpInst &Cmp,
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
+ const SimplifyQuery Q = SQ.getWithInstruction(&I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
unsigned Op0Cplxity = getComplexity(Op0);
unsigned Op1Cplxity = getComplexity(Op1);
@@ -5016,8 +5391,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Changed = true;
}
- if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1,
- SQ.getWithInstruction(&I)))
+ if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, Q))
return replaceInstUsesWith(I, V);
// Comparing -val or val with non-zero is the same as just comparing val
@@ -5050,6 +5424,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpWithDominatingICmp(I))
return Res;
+ if (Instruction *Res = foldICmpBinOp(I, Q))
+ return Res;
+
if (Instruction *Res = foldICmpUsingKnownBits(I))
return Res;
@@ -5098,6 +5475,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpInstWithConstant(I))
return Res;
+ // Try to match comparison as a sign bit test. Intentionally do this after
+ // foldICmpInstWithConstant() to potentially let other folds to happen first.
+ if (Instruction *New = foldSignBitTest(I))
+ return New;
+
if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
return Res;
@@ -5124,20 +5506,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpBitCast(I, Builder))
return Res;
- if (isa<CastInst>(Op0)) {
- // Handle the special case of: icmp (cast bool to X), <cst>
- // This comes up when you have code like
- // int X = A < B;
- // if (X) ...
- // For generality, we handle any zero-extension of any operand comparison
- // with a constant or another cast from the same type.
- if (isa<Constant>(Op1) || isa<CastInst>(Op1))
- if (Instruction *R = foldICmpWithCastAndCast(I))
- return R;
- }
-
- if (Instruction *Res = foldICmpBinOp(I))
- return Res;
+ if (Instruction *R = foldICmpWithCastOp(I))
+ return R;
if (Instruction *Res = foldICmpWithMinMax(I))
return Res;
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 434b0d591215..1dbc06d92e7a 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -113,6 +113,48 @@ static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) {
}
}
+/// Given an exploded icmp instruction, return true if the comparison only
+/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the
+/// result of the comparison is true when the input value is signed.
+inline bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
+ bool &TrueIfSigned) {
+ switch (Pred) {
+ case ICmpInst::ICMP_SLT: // True if LHS s< 0
+ TrueIfSigned = true;
+ return RHS.isNullValue();
+ case ICmpInst::ICMP_SLE: // True if LHS s<= -1
+ TrueIfSigned = true;
+ return RHS.isAllOnesValue();
+ case ICmpInst::ICMP_SGT: // True if LHS s> -1
+ TrueIfSigned = false;
+ return RHS.isAllOnesValue();
+ case ICmpInst::ICMP_SGE: // True if LHS s>= 0
+ TrueIfSigned = false;
+ return RHS.isNullValue();
+ case ICmpInst::ICMP_UGT:
+ // True if LHS u> RHS and RHS == sign-bit-mask - 1
+ TrueIfSigned = true;
+ return RHS.isMaxSignedValue();
+ case ICmpInst::ICMP_UGE:
+ // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
+ TrueIfSigned = true;
+ return RHS.isMinSignedValue();
+ case ICmpInst::ICMP_ULT:
+ // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
+ TrueIfSigned = false;
+ return RHS.isMinSignedValue();
+ case ICmpInst::ICMP_ULE:
+ // True if LHS u<= RHS and RHS == sign-bit-mask - 1
+ TrueIfSigned = false;
+ return RHS.isMaxSignedValue();
+ default:
+ return false;
+ }
+}
+
+llvm::Optional<std::pair<CmpInst::Predicate, Constant *>>
+getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, Constant *C);
+
/// Return the source operand of a potentially bitcasted value while optionally
/// checking if it has one use. If there is no bitcast or the one use check is
/// not met, return the input value itself.
@@ -139,32 +181,17 @@ static inline Constant *SubOne(Constant *C) {
/// This happens in cases where the ~ can be eliminated. If WillInvertAllUses
/// is true, work under the assumption that the caller intends to remove all
/// uses of V and only keep uses of ~V.
-static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
+///
+/// See also: canFreelyInvertAllUsersOf()
+static inline bool isFreeToInvert(Value *V, bool WillInvertAllUses) {
// ~(~(X)) -> X.
if (match(V, m_Not(m_Value())))
return true;
// Constants can be considered to be not'ed values.
- if (isa<ConstantInt>(V))
+ if (match(V, m_AnyIntegralConstant()))
return true;
- // A vector of constant integers can be inverted easily.
- if (V->getType()->isVectorTy() && isa<Constant>(V)) {
- unsigned NumElts = V->getType()->getVectorNumElements();
- for (unsigned i = 0; i != NumElts; ++i) {
- Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
- if (!Elt)
- return false;
-
- if (isa<UndefValue>(Elt))
- continue;
-
- if (!isa<ConstantInt>(Elt))
- return false;
- }
- return true;
- }
-
// Compares can be inverted if all of their uses are being modified to use the
// ~V.
if (isa<CmpInst>(V))
@@ -185,6 +212,32 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
return false;
}
+/// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
+///
+/// See also: isFreeToInvert()
+static inline bool canFreelyInvertAllUsersOf(Value *V, Value *IgnoredUser) {
+ // Look at every user of V.
+ for (User *U : V->users()) {
+ if (U == IgnoredUser)
+ continue; // Don't consider this user.
+
+ auto *I = cast<Instruction>(U);
+ switch (I->getOpcode()) {
+ case Instruction::Select:
+ case Instruction::Br:
+ break; // Free to invert by swapping true/false values/destinations.
+ case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring it.
+ if (!match(I, m_Not(m_Value())))
+ return false; // Not a 'not'.
+ break;
+ default:
+ return false; // Don't know, likely not freely invertible.
+ }
+ // So far all users were free to invert...
+ }
+ return true; // Can freely invert all users!
+}
+
/// Some binary operators require special handling to avoid poison and undefined
/// behavior. If a constant vector has undef elements, replace those undefs with
/// identity constants if possible because those are always safe to execute.
@@ -337,6 +390,13 @@ public:
Instruction *visitOr(BinaryOperator &I);
Instruction *visitXor(BinaryOperator &I);
Instruction *visitShl(BinaryOperator &I);
+ Value *reassociateShiftAmtsOfTwoSameDirectionShifts(
+ BinaryOperator *Sh0, const SimplifyQuery &SQ,
+ bool AnalyzeForSignBitExtraction = false);
+ Instruction *canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
+ BinaryOperator &I);
+ Instruction *foldVariableSignZeroExtensionOfVariableHighBitExtract(
+ BinaryOperator &OldAShr);
Instruction *visitAShr(BinaryOperator &I);
Instruction *visitLShr(BinaryOperator &I);
Instruction *commonShiftTransforms(BinaryOperator &I);
@@ -541,6 +601,7 @@ private:
Instruction *narrowMathIfNoOverflow(BinaryOperator &I);
Instruction *narrowRotate(TruncInst &Trunc);
Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
+ Instruction *matchSAddSubSat(SelectInst &MinMax1);
/// Determine if a pair of casts can be replaced by a single cast.
///
@@ -557,7 +618,7 @@ private:
Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI);
Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI);
- Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &I);
/// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp).
/// NOTE: Unlike most of instcombine, this returns a Value which should
@@ -725,7 +786,7 @@ public:
Value *LHS, Value *RHS, Instruction *CxtI) const;
/// Maximum size of array considered when transforming.
- uint64_t MaxArraySizeForCombine;
+ uint64_t MaxArraySizeForCombine = 0;
private:
/// Performs a few simplifications for operators which are associative
@@ -798,7 +859,8 @@ private:
int DmaskIdx = -1);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
- APInt &UndefElts, unsigned Depth = 0);
+ APInt &UndefElts, unsigned Depth = 0,
+ bool AllowMultipleUsers = false);
/// Canonicalize the position of binops relative to shufflevector.
Instruction *foldVectorBinop(BinaryOperator &Inst);
@@ -847,17 +909,21 @@ private:
Constant *RHSC);
Instruction *foldICmpAddOpConst(Value *X, const APInt &C,
ICmpInst::Predicate Pred);
- Instruction *foldICmpWithCastAndCast(ICmpInst &ICI);
+ Instruction *foldICmpWithCastOp(ICmpInst &ICI);
Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp);
Instruction *foldICmpWithConstant(ICmpInst &Cmp);
Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);
- Instruction *foldICmpBinOp(ICmpInst &Cmp);
+ Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ);
Instruction *foldICmpEquality(ICmpInst &Cmp);
+ Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I);
+ Instruction *foldSignBitTest(ICmpInst &I);
Instruction *foldICmpWithZero(ICmpInst &Cmp);
+ Value *foldUnsignedMultiplicationOverflowCheck(ICmpInst &Cmp);
+
Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select,
ConstantInt *C);
Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc,
@@ -874,6 +940,8 @@ private:
const APInt &C);
Instruction *foldICmpShrConstant(ICmpInst &Cmp, BinaryOperator *Shr,
const APInt &C);
+ Instruction *foldICmpSRemConstant(ICmpInst &Cmp, BinaryOperator *UDiv,
+ const APInt &C);
Instruction *foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv,
const APInt &C);
Instruction *foldICmpDivConstant(ICmpInst &Cmp, BinaryOperator *Div,
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 054fb7da09a2..3a0e05832fcb 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -175,7 +175,7 @@ static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
if (!AllocaSize)
return false;
- return isDereferenceableAndAlignedPointer(V, AI->getAlignment(),
+ return isDereferenceableAndAlignedPointer(V, Align(AI->getAlignment()),
APInt(64, AllocaSize), DL);
}
@@ -197,7 +197,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
if (C->getValue().getActiveBits() <= 64) {
Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
- New->setAlignment(AI.getAlignment());
+ New->setAlignment(MaybeAlign(AI.getAlignment()));
// Scan to the end of the allocation instructions, to skip over a block of
// allocas if possible...also skip interleaved debug info
@@ -345,7 +345,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
if (AI.getAllocatedType()->isSized()) {
// If the alignment is 0 (unspecified), assign it the preferred alignment.
if (AI.getAlignment() == 0)
- AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
+ AI.setAlignment(
+ MaybeAlign(DL.getPrefTypeAlignment(AI.getAllocatedType())));
// Move all alloca's of zero byte objects to the entry block and merge them
// together. Note that we only do this for alloca's, because malloc should
@@ -377,12 +378,12 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// assign it the preferred alignment.
if (EntryAI->getAlignment() == 0)
EntryAI->setAlignment(
- DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
+ MaybeAlign(DL.getPrefTypeAlignment(EntryAI->getAllocatedType())));
// Replace this zero-sized alloca with the one at the start of the entry
// block after ensuring that the address will be aligned enough for both
// types.
- unsigned MaxAlign = std::max(EntryAI->getAlignment(),
- AI.getAlignment());
+ const MaybeAlign MaxAlign(
+ std::max(EntryAI->getAlignment(), AI.getAlignment()));
EntryAI->setAlignment(MaxAlign);
if (AI.getType() != EntryAI->getType())
return new BitCastInst(EntryAI, AI.getType());
@@ -455,9 +456,6 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
Value *Ptr = LI.getPointerOperand();
unsigned AS = LI.getPointerAddressSpace();
- SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
- LI.getAllMetadata(MD);
-
Value *NewPtr = nullptr;
if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
NewPtr->getType()->getPointerElementType() == NewTy &&
@@ -467,48 +465,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
- MDBuilder MDB(NewLoad->getContext());
- for (const auto &MDPair : MD) {
- unsigned ID = MDPair.first;
- MDNode *N = MDPair.second;
- // Note, essentially every kind of metadata should be preserved here! This
- // routine is supposed to clone a load instruction changing *only its type*.
- // The only metadata it makes sense to drop is metadata which is invalidated
- // when the pointer type changes. This should essentially never be the case
- // in LLVM, but we explicitly switch over only known metadata to be
- // conservatively correct. If you are adding metadata to LLVM which pertains
- // to loads, you almost certainly want to add it here.
- switch (ID) {
- case LLVMContext::MD_dbg:
- case LLVMContext::MD_tbaa:
- case LLVMContext::MD_prof:
- case LLVMContext::MD_fpmath:
- case LLVMContext::MD_tbaa_struct:
- case LLVMContext::MD_invariant_load:
- case LLVMContext::MD_alias_scope:
- case LLVMContext::MD_noalias:
- case LLVMContext::MD_nontemporal:
- case LLVMContext::MD_mem_parallel_loop_access:
- case LLVMContext::MD_access_group:
- // All of these directly apply.
- NewLoad->setMetadata(ID, N);
- break;
-
- case LLVMContext::MD_nonnull:
- copyNonnullMetadata(LI, N, *NewLoad);
- break;
- case LLVMContext::MD_align:
- case LLVMContext::MD_dereferenceable:
- case LLVMContext::MD_dereferenceable_or_null:
- // These only directly apply if the new type is also a pointer.
- if (NewTy->isPointerTy())
- NewLoad->setMetadata(ID, N);
- break;
- case LLVMContext::MD_range:
- copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
- break;
- }
- }
+ copyMetadataForLoad(*NewLoad, LI);
return NewLoad;
}
@@ -1004,9 +961,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
if (KnownAlign > EffectiveLoadAlign)
- LI.setAlignment(KnownAlign);
+ LI.setAlignment(MaybeAlign(KnownAlign));
else if (LoadAlign == 0)
- LI.setAlignment(EffectiveLoadAlign);
+ LI.setAlignment(MaybeAlign(EffectiveLoadAlign));
// Replace GEP indices if possible.
if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
@@ -1063,11 +1020,11 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
//
if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
// load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
- unsigned Align = LI.getAlignment();
- if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align,
- DL, SI) &&
- isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align,
- DL, SI)) {
+ const MaybeAlign Alignment(LI.getAlignment());
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
+ Alignment, DL, SI) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
+ Alignment, DL, SI)) {
LoadInst *V1 =
Builder.CreateLoad(LI.getType(), SI->getOperand(1),
SI->getOperand(1)->getName() + ".val");
@@ -1075,9 +1032,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Builder.CreateLoad(LI.getType(), SI->getOperand(2),
SI->getOperand(2)->getName() + ".val");
assert(LI.isUnordered() && "implied by above");
- V1->setAlignment(Align);
+ V1->setAlignment(Alignment);
V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
- V2->setAlignment(Align);
+ V2->setAlignment(Alignment);
V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
return SelectInst::Create(SI->getCondition(), V1, V2);
}
@@ -1399,15 +1356,15 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
return eraseInstFromFunction(SI);
// Attempt to improve the alignment.
- unsigned KnownAlign = getOrEnforceKnownAlignment(
- Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
- unsigned StoreAlign = SI.getAlignment();
- unsigned EffectiveStoreAlign =
- StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
+ const Align KnownAlign = Align(getOrEnforceKnownAlignment(
+ Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT));
+ const MaybeAlign StoreAlign = MaybeAlign(SI.getAlignment());
+ const Align EffectiveStoreAlign =
+ StoreAlign ? *StoreAlign : Align(DL.getABITypeAlignment(Val->getType()));
if (KnownAlign > EffectiveStoreAlign)
SI.setAlignment(KnownAlign);
- else if (StoreAlign == 0)
+ else if (!StoreAlign)
SI.setAlignment(EffectiveStoreAlign);
// Try to canonicalize the stored type.
@@ -1622,8 +1579,8 @@ bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
// Advance to a place where it is safe to insert the new store and insert it.
BBI = DestBB->getFirstInsertionPt();
- StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
- SI.isVolatile(), SI.getAlignment(),
+ StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(),
+ MaybeAlign(SI.getAlignment()),
SI.getOrdering(), SI.getSyncScopeID());
InsertNewInstBefore(NewSI, *BBI);
NewSI->setDebugLoc(MergedLoc);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index cc753ce05313..0b9128a9f5a1 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -124,6 +124,50 @@ static Constant *getLogBase2(Type *Ty, Constant *C) {
return ConstantVector::get(Elts);
}
+// TODO: This is a specific form of a much more general pattern.
+// We could detect a select with any binop identity constant, or we
+// could use SimplifyBinOp to see if either arm of the select reduces.
+// But that needs to be done carefully and/or while removing potential
+// reverse canonicalizations as in InstCombiner::foldSelectIntoOp().
+static Value *foldMulSelectToNegate(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Cond, *OtherOp;
+
+ // mul (select Cond, 1, -1), OtherOp --> select Cond, OtherOp, -OtherOp
+ // mul OtherOp, (select Cond, 1, -1) --> select Cond, OtherOp, -OtherOp
+ if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())),
+ m_Value(OtherOp))))
+ return Builder.CreateSelect(Cond, OtherOp, Builder.CreateNeg(OtherOp));
+
+ // mul (select Cond, -1, 1), OtherOp --> select Cond, -OtherOp, OtherOp
+ // mul OtherOp, (select Cond, -1, 1) --> select Cond, -OtherOp, OtherOp
+ if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())),
+ m_Value(OtherOp))))
+ return Builder.CreateSelect(Cond, Builder.CreateNeg(OtherOp), OtherOp);
+
+ // fmul (select Cond, 1.0, -1.0), OtherOp --> select Cond, OtherOp, -OtherOp
+ // fmul OtherOp, (select Cond, 1.0, -1.0) --> select Cond, OtherOp, -OtherOp
+ if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(1.0),
+ m_SpecificFP(-1.0))),
+ m_Value(OtherOp)))) {
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(I.getFastMathFlags());
+ return Builder.CreateSelect(Cond, OtherOp, Builder.CreateFNeg(OtherOp));
+ }
+
+ // fmul (select Cond, -1.0, 1.0), OtherOp --> select Cond, -OtherOp, OtherOp
+ // fmul OtherOp, (select Cond, -1.0, 1.0) --> select Cond, -OtherOp, OtherOp
+ if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(-1.0),
+ m_SpecificFP(1.0))),
+ m_Value(OtherOp)))) {
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(I.getFastMathFlags());
+ return Builder.CreateSelect(Cond, Builder.CreateFNeg(OtherOp), OtherOp);
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (Value *V = SimplifyMulInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
@@ -213,6 +257,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I))
return FoldedMul;
+ if (Value *FoldedMul = foldMulSelectToNegate(I, Builder))
+ return replaceInstUsesWith(I, FoldedMul);
+
// Simplify mul instructions with a constant RHS.
if (isa<Constant>(Op1)) {
// Canonicalize (X+C1)*CI -> X*CI+C1*CI.
@@ -358,6 +405,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I))
return FoldedMul;
+ if (Value *FoldedMul = foldMulSelectToNegate(I, Builder))
+ return replaceInstUsesWith(I, FoldedMul);
+
// X * -1.0 --> -X
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (match(Op1, m_SpecificFP(-1.0)))
@@ -373,16 +423,6 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Constant(C)))
return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
- // Sink negation: -X * Y --> -(X * Y)
- // But don't transform constant expressions because there's an inverse fold.
- if (match(Op0, m_OneUse(m_FNeg(m_Value(X)))) && !isa<ConstantExpr>(Op0))
- return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op1, &I), &I);
-
- // Sink negation: Y * -X --> -(X * Y)
- // But don't transform constant expressions because there's an inverse fold.
- if (match(Op1, m_OneUse(m_FNeg(m_Value(X)))) && !isa<ConstantExpr>(Op1))
- return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op0, &I), &I);
-
// fabs(X) * fabs(X) -> X * X
if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::fabs>(m_Value(X))))
return BinaryOperator::CreateFMulFMF(X, X, &I);
@@ -1211,8 +1251,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
!IsTan && match(Op0, m_Intrinsic<Intrinsic::cos>(m_Value(X))) &&
match(Op1, m_Intrinsic<Intrinsic::sin>(m_Specific(X)));
- if ((IsTan || IsCot) && hasUnaryFloatFn(&TLI, I.getType(), LibFunc_tan,
- LibFunc_tanf, LibFunc_tanl)) {
+ if ((IsTan || IsCot) &&
+ hasFloatFn(&TLI, I.getType(), LibFunc_tan, LibFunc_tanf, LibFunc_tanl)) {
IRBuilder<> B(&I);
IRBuilder<>::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(I.getFastMathFlags());
@@ -1244,6 +1284,17 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
return &I;
}
+ // X / fabs(X) -> copysign(1.0, X)
+ // fabs(X) / X -> copysign(1.0, X)
+ if (I.hasNoNaNs() && I.hasNoInfs() &&
+ (match(&I,
+ m_FDiv(m_Value(X), m_Intrinsic<Intrinsic::fabs>(m_Deferred(X)))) ||
+ match(&I, m_FDiv(m_Intrinsic<Intrinsic::fabs>(m_Value(X)),
+ m_Deferred(X))))) {
+ Value *V = Builder.CreateBinaryIntrinsic(
+ Intrinsic::copysign, ConstantFP::get(I.getType(), 1.0), X, &I);
+ return replaceInstUsesWith(I, V);
+ }
return nullptr;
}
@@ -1309,6 +1360,8 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) {
+ // This may increase instruction count, we don't enforce that Y is a
+ // constant.
Constant *N1 = Constant::getAllOnesValue(Ty);
Value *Add = Builder.CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 5820ab726637..e0376b7582f3 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -542,7 +542,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// visitLoadInst will propagate an alignment onto the load when TD is around,
// and if TD isn't around, we can't handle the mixed case.
bool isVolatile = FirstLI->isVolatile();
- unsigned LoadAlignment = FirstLI->getAlignment();
+ MaybeAlign LoadAlignment(FirstLI->getAlignment());
unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
// We can't sink the load if the loaded value could be modified between the
@@ -574,10 +574,10 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// If some of the loads have an alignment specified but not all of them,
// we can't do the transformation.
- if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
+ if ((LoadAlignment.hasValue()) != (LI->getAlignment() != 0))
return nullptr;
- LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
+ LoadAlignment = std::min(LoadAlignment, MaybeAlign(LI->getAlignment()));
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index aefaf5af1750..9fc871e49b30 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -785,6 +785,41 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
return nullptr;
}
+/// Fold the following code sequence:
+/// \code
+/// int a = ctlz(x & -x);
+// x ? 31 - a : a;
+/// \code
+///
+/// into:
+/// cttz(x)
+static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal,
+ Value *FalseVal,
+ InstCombiner::BuilderTy &Builder) {
+ unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();
+ if (!ICI->isEquality() || !match(ICI->getOperand(1), m_Zero()))
+ return nullptr;
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_NE)
+ std::swap(TrueVal, FalseVal);
+
+ if (!match(FalseVal,
+ m_Xor(m_Deferred(TrueVal), m_SpecificInt(BitWidth - 1))))
+ return nullptr;
+
+ if (!match(TrueVal, m_Intrinsic<Intrinsic::ctlz>()))
+ return nullptr;
+
+ Value *X = ICI->getOperand(0);
+ auto *II = cast<IntrinsicInst>(TrueVal);
+ if (!match(II->getOperand(0), m_c_And(m_Specific(X), m_Neg(m_Specific(X)))))
+ return nullptr;
+
+ Function *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::cttz,
+ II->getType());
+ return CallInst::Create(F, {X, II->getArgOperand(1)});
+}
+
/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
/// call to cttz/ctlz with flag 'is_zero_undef' cleared.
///
@@ -973,8 +1008,7 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
// If we are swapping the select operands, swap the metadata too.
assert(Sel.getTrueValue() == RHS && Sel.getFalseValue() == LHS &&
"Unexpected results from matchSelectPattern");
- Sel.setTrueValue(LHS);
- Sel.setFalseValue(RHS);
+ Sel.swapValues();
Sel.swapProfMetadata();
return &Sel;
}
@@ -1056,17 +1090,293 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
}
// We are swapping the select operands, so swap the metadata too.
- Sel.setTrueValue(FVal);
- Sel.setFalseValue(TVal);
+ Sel.swapValues();
Sel.swapProfMetadata();
return &Sel;
}
+static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *ReplaceOp,
+ const SimplifyQuery &Q) {
+ // If this is a binary operator, try to simplify it with the replaced op
+ // because we know Op and ReplaceOp are equivalant.
+ // For example: V = X + 1, Op = X, ReplaceOp = 42
+ // Simplifies as: add(42, 1) --> 43
+ if (auto *BO = dyn_cast<BinaryOperator>(V)) {
+ if (BO->getOperand(0) == Op)
+ return SimplifyBinOp(BO->getOpcode(), ReplaceOp, BO->getOperand(1), Q);
+ if (BO->getOperand(1) == Op)
+ return SimplifyBinOp(BO->getOpcode(), BO->getOperand(0), ReplaceOp, Q);
+ }
+
+ return nullptr;
+}
+
+/// If we have a select with an equality comparison, then we know the value in
+/// one of the arms of the select. See if substituting this value into an arm
+/// and simplifying the result yields the same value as the other arm.
+///
+/// To make this transform safe, we must drop poison-generating flags
+/// (nsw, etc) if we simplified to a binop because the select may be guarding
+/// that poison from propagating. If the existing binop already had no
+/// poison-generating flags, then this transform can be done by instsimplify.
+///
+/// Consider:
+/// %cmp = icmp eq i32 %x, 2147483647
+/// %add = add nsw i32 %x, 1
+/// %sel = select i1 %cmp, i32 -2147483648, i32 %add
+///
+/// We can't replace %sel with %add unless we strip away the flags.
+/// TODO: Wrapping flags could be preserved in some cases with better analysis.
+static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp,
+ const SimplifyQuery &Q) {
+ if (!Cmp.isEquality())
+ return nullptr;
+
+ // Canonicalize the pattern to ICMP_EQ by swapping the select operands.
+ Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue();
+ if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
+ std::swap(TrueVal, FalseVal);
+
+ // Try each equivalence substitution possibility.
+ // We have an 'EQ' comparison, so the select's false value will propagate.
+ // Example:
+ // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1
+ // (X == 42) ? (X + 1) : 43 --> (X == 42) ? (42 + 1) : 43 --> 43
+ Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);
+ if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q) == TrueVal ||
+ simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q) == TrueVal ||
+ simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q) == FalseVal ||
+ simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q) == FalseVal) {
+ if (auto *FalseInst = dyn_cast<Instruction>(FalseVal))
+ FalseInst->dropPoisonGeneratingFlags();
+ return FalseVal;
+ }
+ return nullptr;
+}
+
+// See if this is a pattern like:
+// %old_cmp1 = icmp slt i32 %x, C2
+// %old_replacement = select i1 %old_cmp1, i32 %target_low, i32 %target_high
+// %old_x_offseted = add i32 %x, C1
+// %old_cmp0 = icmp ult i32 %old_x_offseted, C0
+// %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement
+// This can be rewritten as more canonical pattern:
+// %new_cmp1 = icmp slt i32 %x, -C1
+// %new_cmp2 = icmp sge i32 %x, C0-C1
+// %new_clamped_low = select i1 %new_cmp1, i32 %target_low, i32 %x
+// %r = select i1 %new_cmp2, i32 %target_high, i32 %new_clamped_low
+// Iff -C1 s<= C2 s<= C0-C1
+// Also ULT predicate can also be UGT iff C0 != -1 (+invert result)
+// SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.)
+static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X = Sel0.getTrueValue();
+ Value *Sel1 = Sel0.getFalseValue();
+
+ // First match the condition of the outermost select.
+ // Said condition must be one-use.
+ if (!Cmp0.hasOneUse())
+ return nullptr;
+ Value *Cmp00 = Cmp0.getOperand(0);
+ Constant *C0;
+ if (!match(Cmp0.getOperand(1),
+ m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))
+ return nullptr;
+ // Canonicalize Cmp0 into the form we expect.
+ // FIXME: we shouldn't care about lanes that are 'undef' in the end?
+ switch (Cmp0.getPredicate()) {
+ case ICmpInst::Predicate::ICMP_ULT:
+ break; // Great!
+ case ICmpInst::Predicate::ICMP_ULE:
+ // We'd have to increment C0 by one, and for that it must not have all-ones
+ // element, but then it would have been canonicalized to 'ult' before
+ // we get here. So we can't do anything useful with 'ule'.
+ return nullptr;
+ case ICmpInst::Predicate::ICMP_UGT:
+ // We want to canonicalize it to 'ult', so we'll need to increment C0,
+ // which again means it must not have any all-ones elements.
+ if (!match(C0,
+ m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE,
+ APInt::getAllOnesValue(
+ C0->getType()->getScalarSizeInBits()))))
+ return nullptr; // Can't do, have all-ones element[s].
+ C0 = AddOne(C0);
+ std::swap(X, Sel1);
+ break;
+ case ICmpInst::Predicate::ICMP_UGE:
+ // The only way we'd get this predicate if this `icmp` has extra uses,
+ // but then we won't be able to do this fold.
+ return nullptr;
+ default:
+ return nullptr; // Unknown predicate.
+ }
+
+ // Now that we've canonicalized the ICmp, we know the X we expect;
+ // the select in other hand should be one-use.
+ if (!Sel1->hasOneUse())
+ return nullptr;
+
+ // We now can finish matching the condition of the outermost select:
+ // it should either be the X itself, or an addition of some constant to X.
+ Constant *C1;
+ if (Cmp00 == X)
+ C1 = ConstantInt::getNullValue(Sel0.getType());
+ else if (!match(Cmp00,
+ m_Add(m_Specific(X),
+ m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1)))))
+ return nullptr;
+
+ Value *Cmp1;
+ ICmpInst::Predicate Pred1;
+ Constant *C2;
+ Value *ReplacementLow, *ReplacementHigh;
+ if (!match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
+ m_Value(ReplacementHigh))) ||
+ !match(Cmp1,
+ m_ICmp(Pred1, m_Specific(X),
+ m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C2)))))
+ return nullptr;
+
+ if (!Cmp1->hasOneUse() && (Cmp00 == X || !Cmp00->hasOneUse()))
+ return nullptr; // Not enough one-use instructions for the fold.
+ // FIXME: this restriction could be relaxed if Cmp1 can be reused as one of
+ // two comparisons we'll need to build.
+
+ // Canonicalize Cmp1 into the form we expect.
+ // FIXME: we shouldn't care about lanes that are 'undef' in the end?
+ switch (Pred1) {
+ case ICmpInst::Predicate::ICMP_SLT:
+ break;
+ case ICmpInst::Predicate::ICMP_SLE:
+ // We'd have to increment C2 by one, and for that it must not have signed
+ // max element, but then it would have been canonicalized to 'slt' before
+ // we get here. So we can't do anything useful with 'sle'.
+ return nullptr;
+ case ICmpInst::Predicate::ICMP_SGT:
+ // We want to canonicalize it to 'slt', so we'll need to increment C2,
+ // which again means it must not have any signed max elements.
+ if (!match(C2,
+ m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE,
+ APInt::getSignedMaxValue(
+ C2->getType()->getScalarSizeInBits()))))
+ return nullptr; // Can't do, have signed max element[s].
+ C2 = AddOne(C2);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::Predicate::ICMP_SGE:
+ // Also non-canonical, but here we don't need to change C2,
+ // so we don't have any restrictions on C2, so we can just handle it.
+ std::swap(ReplacementLow, ReplacementHigh);
+ break;
+ default:
+ return nullptr; // Unknown predicate.
+ }
+
+ // The thresholds of this clamp-like pattern.
+ auto *ThresholdLowIncl = ConstantExpr::getNeg(C1);
+ auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1);
+
+ // The fold has a precondition 1: C2 s>= ThresholdLow
+ auto *Precond1 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SGE, C2,
+ ThresholdLowIncl);
+ if (!match(Precond1, m_One()))
+ return nullptr;
+ // The fold has a precondition 2: C2 s<= ThresholdHigh
+ auto *Precond2 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SLE, C2,
+ ThresholdHighExcl);
+ if (!match(Precond2, m_One()))
+ return nullptr;
+
+ // All good, finally emit the new pattern.
+ Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl);
+ Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl);
+ Value *MaybeReplacedLow =
+ Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X);
+ Instruction *MaybeReplacedHigh =
+ SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow);
+
+ return MaybeReplacedHigh;
+}
+
+// If we have
+// %cmp = icmp [canonical predicate] i32 %x, C0
+// %r = select i1 %cmp, i32 %y, i32 C1
+// Where C0 != C1 and %x may be different from %y, see if the constant that we
+// will have if we flip the strictness of the predicate (i.e. without changing
+// the result) is identical to the C1 in select. If it matches we can change
+// original comparison to one with swapped predicate, reuse the constant,
+// and swap the hands of select.
+static Instruction *
+tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,
+ InstCombiner::BuilderTy &Builder) {
+ ICmpInst::Predicate Pred;
+ Value *X;
+ Constant *C0;
+ if (!match(&Cmp, m_OneUse(m_ICmp(
+ Pred, m_Value(X),
+ m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))))
+ return nullptr;
+
+ // If comparison predicate is non-relational, we won't be able to do anything.
+ if (ICmpInst::isEquality(Pred))
+ return nullptr;
+
+ // If comparison predicate is non-canonical, then we certainly won't be able
+ // to make it canonical; canonicalizeCmpWithConstant() already tried.
+ if (!isCanonicalPredicate(Pred))
+ return nullptr;
+
+ // If the [input] type of comparison and select type are different, lets abort
+ // for now. We could try to compare constants with trunc/[zs]ext though.
+ if (C0->getType() != Sel.getType())
+ return nullptr;
+
+ // FIXME: are there any magic icmp predicate+constant pairs we must not touch?
+
+ Value *SelVal0, *SelVal1; // We do not care which one is from where.
+ match(&Sel, m_Select(m_Value(), m_Value(SelVal0), m_Value(SelVal1)));
+ // At least one of these values we are selecting between must be a constant
+ // else we'll never succeed.
+ if (!match(SelVal0, m_AnyIntegralConstant()) &&
+ !match(SelVal1, m_AnyIntegralConstant()))
+ return nullptr;
+
+ // Does this constant C match any of the `select` values?
+ auto MatchesSelectValue = [SelVal0, SelVal1](Constant *C) {
+ return C->isElementWiseEqual(SelVal0) || C->isElementWiseEqual(SelVal1);
+ };
+
+ // If C0 *already* matches true/false value of select, we are done.
+ if (MatchesSelectValue(C0))
+ return nullptr;
+
+ // Check the constant we'd have with flipped-strictness predicate.
+ auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, C0);
+ if (!FlippedStrictness)
+ return nullptr;
+
+ // If said constant doesn't match either, then there is no hope,
+ if (!MatchesSelectValue(FlippedStrictness->second))
+ return nullptr;
+
+ // It matched! Lets insert the new comparison just before select.
+ InstCombiner::BuilderTy::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(&Sel);
+
+ Pred = ICmpInst::getSwappedPredicate(Pred); // Yes, swapped.
+ Value *NewCmp = Builder.CreateICmp(Pred, X, FlippedStrictness->second,
+ Cmp.getName() + ".inv");
+ Sel.setCondition(NewCmp);
+ Sel.swapValues();
+ Sel.swapProfMetadata();
+
+ return &Sel;
+}
+
/// Visit a SelectInst that has an ICmpInst as its first operand.
Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
ICmpInst *ICI) {
- Value *TrueVal = SI.getTrueValue();
- Value *FalseVal = SI.getFalseValue();
+ if (Value *V = foldSelectValueEquivalence(SI, *ICI, SQ))
+ return replaceInstUsesWith(SI, V);
if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder))
return NewSel;
@@ -1074,12 +1384,21 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, Builder))
return NewAbs;
+ if (Instruction *NewAbs = canonicalizeClampLike(SI, *ICI, Builder))
+ return NewAbs;
+
+ if (Instruction *NewSel =
+ tryToReuseConstantFromSelectInComparison(SI, *ICI, Builder))
+ return NewSel;
+
bool Changed = adjustMinMax(SI, *ICI);
if (Value *V = foldSelectICmpAnd(SI, ICI, Builder))
return replaceInstUsesWith(SI, V);
// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
ICmpInst::Predicate Pred = ICI->getPredicate();
Value *CmpLHS = ICI->getOperand(0);
Value *CmpRHS = ICI->getOperand(1);
@@ -1149,6 +1468,9 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder))
return V;
+ if (Instruction *V = foldSelectCtlzToCttz(ICI, TrueVal, FalseVal, Builder))
+ return V;
+
if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
@@ -1253,6 +1575,16 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
}
}
+ // max(max(A, B), min(A, B)) --> max(A, B)
+ // min(min(A, B), max(A, B)) --> min(A, B)
+ // TODO: This could be done in instsimplify.
+ if (SPF1 == SPF2 &&
+ ((SPF1 == SPF_UMIN && match(C, m_c_UMax(m_Specific(A), m_Specific(B)))) ||
+ (SPF1 == SPF_SMIN && match(C, m_c_SMax(m_Specific(A), m_Specific(B)))) ||
+ (SPF1 == SPF_UMAX && match(C, m_c_UMin(m_Specific(A), m_Specific(B)))) ||
+ (SPF1 == SPF_SMAX && match(C, m_c_SMin(m_Specific(A), m_Specific(B))))))
+ return replaceInstUsesWith(Outer, Inner);
+
// ABS(ABS(X)) -> ABS(X)
// NABS(NABS(X)) -> NABS(X)
// TODO: This could be done in instsimplify.
@@ -1280,7 +1612,7 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
return true;
}
- if (IsFreeToInvert(V, !V->hasNUsesOrMore(3))) {
+ if (isFreeToInvert(V, !V->hasNUsesOrMore(3))) {
NotV = nullptr;
return true;
}
@@ -1492,6 +1824,30 @@ static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) {
ConstantVector::get(Mask));
}
+/// If we have a select of vectors with a scalar condition, try to convert that
+/// to a vector select by splatting the condition. A splat may get folded with
+/// other operations in IR and having all operands of a select be vector types
+/// is likely better for vector codegen.
+static Instruction *canonicalizeScalarSelectOfVecs(
+ SelectInst &Sel, InstCombiner::BuilderTy &Builder) {
+ Type *Ty = Sel.getType();
+ if (!Ty->isVectorTy())
+ return nullptr;
+
+ // We can replace a single-use extract with constant index.
+ Value *Cond = Sel.getCondition();
+ if (!match(Cond, m_OneUse(m_ExtractElement(m_Value(), m_ConstantInt()))))
+ return nullptr;
+
+ // select (extelt V, Index), T, F --> select (splat V, Index), T, F
+ // Splatting the extracted condition reduces code (we could directly create a
+ // splat shuffle of the source vector to eliminate the intermediate step).
+ unsigned NumElts = Ty->getVectorNumElements();
+ Value *SplatCond = Builder.CreateVectorSplat(NumElts, Cond);
+ Sel.setCondition(SplatCond);
+ return &Sel;
+}
+
/// Reuse bitcasted operands between a compare and select:
/// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) -->
/// bitcast (select (cmp (bitcast C), (bitcast D)), (bitcast C), (bitcast D))
@@ -1648,6 +2004,71 @@ static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X,
return nullptr;
}
+/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
+Instruction *InstCombiner::matchSAddSubSat(SelectInst &MinMax1) {
+ Type *Ty = MinMax1.getType();
+
+ // We are looking for a tree of:
+ // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
+ // Where the min and max could be reversed
+ Instruction *MinMax2;
+ BinaryOperator *AddSub;
+ const APInt *MinValue, *MaxValue;
+ if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
+ if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
+ return nullptr;
+ } else if (match(&MinMax1,
+ m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
+ if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
+ return nullptr;
+ } else
+ return nullptr;
+
+ // Check that the constants clamp a saturate, and that the new type would be
+ // sensible to convert to.
+ if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
+ return nullptr;
+ // In what bitwidth can this be treated as saturating arithmetics?
+ unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
+ // FIXME: This isn't quite right for vectors, but using the scalar type is a
+ // good first approximation for what should be done there.
+ if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
+ return nullptr;
+
+ // Also make sure that the number of uses is as expected. The "3"s are for the
+ // the two items of min/max (the compare and the select).
+ if (MinMax2->hasNUsesOrMore(3) || AddSub->hasNUsesOrMore(3))
+ return nullptr;
+
+ // Create the new type (which can be a vector type)
+ Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
+ // Match the two extends from the add/sub
+ Value *A, *B;
+ if(!match(AddSub, m_BinOp(m_SExt(m_Value(A)), m_SExt(m_Value(B)))))
+ return nullptr;
+ // And check the incoming values are of a type smaller than or equal to the
+ // size of the saturation. Otherwise the higher bits can cause different
+ // results.
+ if (A->getType()->getScalarSizeInBits() > NewBitWidth ||
+ B->getType()->getScalarSizeInBits() > NewBitWidth)
+ return nullptr;
+
+ Intrinsic::ID IntrinsicID;
+ if (AddSub->getOpcode() == Instruction::Add)
+ IntrinsicID = Intrinsic::sadd_sat;
+ else if (AddSub->getOpcode() == Instruction::Sub)
+ IntrinsicID = Intrinsic::ssub_sat;
+ else
+ return nullptr;
+
+ // Finally create and return the sat intrinsic, truncated to the new type
+ Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
+ Value *AT = Builder.CreateSExt(A, NewTy);
+ Value *BT = Builder.CreateSExt(B, NewTy);
+ Value *Sat = Builder.CreateCall(F, {AT, BT});
+ return CastInst::Create(Instruction::SExt, Sat, Ty);
+}
+
/// Reduce a sequence of min/max with a common operand.
static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
Value *RHS,
@@ -1788,6 +2209,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *I = canonicalizeSelectToShuffle(SI))
return I;
+ if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, Builder))
+ return I;
+
// Canonicalize a one-use integer compare with a non-canonical predicate by
// inverting the predicate and swapping the select operands. This matches a
// compare canonicalization for conditional branches.
@@ -2013,16 +2437,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
(LHS->getType()->isFPOrFPVectorTy() &&
((CmpLHS != LHS && CmpLHS != RHS) ||
(CmpRHS != LHS && CmpRHS != RHS)))) {
- CmpInst::Predicate Pred = getMinMaxPred(SPF, SPR.Ordered);
+ CmpInst::Predicate MinMaxPred = getMinMaxPred(SPF, SPR.Ordered);
Value *Cmp;
- if (CmpInst::isIntPredicate(Pred)) {
- Cmp = Builder.CreateICmp(Pred, LHS, RHS);
+ if (CmpInst::isIntPredicate(MinMaxPred)) {
+ Cmp = Builder.CreateICmp(MinMaxPred, LHS, RHS);
} else {
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
+ auto FMF =
+ cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
Builder.setFastMathFlags(FMF);
- Cmp = Builder.CreateFCmp(Pred, LHS, RHS);
+ Cmp = Builder.CreateFCmp(MinMaxPred, LHS, RHS);
}
Value *NewSI = Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI);
@@ -2040,9 +2465,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
Value *A;
if (match(X, m_Not(m_Value(A))) && !X->hasNUsesOrMore(3) &&
- !IsFreeToInvert(A, A->hasOneUse()) &&
+ !isFreeToInvert(A, A->hasOneUse()) &&
// Passing false to only consider m_Not and constants.
- IsFreeToInvert(Y, false)) {
+ isFreeToInvert(Y, false)) {
Value *B = Builder.CreateNot(Y);
Value *NewMinMax = createMinMax(Builder, getInverseMinMaxFlavor(SPF),
A, B);
@@ -2070,6 +2495,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
return I;
+ if (Instruction *I = matchSAddSubSat(SI))
+ return I;
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index c821292400cd..64294838644f 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -25,50 +25,275 @@ using namespace PatternMatch;
// we should rewrite it as
// x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x)
// This is valid for any shift, but they must be identical.
-static Instruction *
-reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0,
- const SimplifyQuery &SQ) {
- // Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1
- Value *X, *ShAmt1, *ShAmt0;
+//
+// AnalyzeForSignBitExtraction indicates that we will only analyze whether this
+// pattern has any 2 right-shifts that sum to 1 less than original bit width.
+Value *InstCombiner::reassociateShiftAmtsOfTwoSameDirectionShifts(
+ BinaryOperator *Sh0, const SimplifyQuery &SQ,
+ bool AnalyzeForSignBitExtraction) {
+ // Look for a shift of some instruction, ignore zext of shift amount if any.
+ Instruction *Sh0Op0;
+ Value *ShAmt0;
+ if (!match(Sh0,
+ m_Shift(m_Instruction(Sh0Op0), m_ZExtOrSelf(m_Value(ShAmt0)))))
+ return nullptr;
+
+ // If there is a truncation between the two shifts, we must make note of it
+ // and look through it. The truncation imposes additional constraints on the
+ // transform.
Instruction *Sh1;
- if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)),
- m_Instruction(Sh1)),
- m_Value(ShAmt0))))
+ Value *Trunc = nullptr;
+ match(Sh0Op0,
+ m_CombineOr(m_CombineAnd(m_Trunc(m_Instruction(Sh1)), m_Value(Trunc)),
+ m_Instruction(Sh1)));
+
+ // Inner shift: (x shiftopcode ShAmt1)
+ // Like with other shift, ignore zext of shift amount if any.
+ Value *X, *ShAmt1;
+ if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1)))))
+ return nullptr;
+
+ // We have two shift amounts from two different shifts. The types of those
+ // shift amounts may not match. If that's the case let's bailout now..
+ if (ShAmt0->getType() != ShAmt1->getType())
+ return nullptr;
+
+ // We are only looking for signbit extraction if we have two right shifts.
+ bool HadTwoRightShifts = match(Sh0, m_Shr(m_Value(), m_Value())) &&
+ match(Sh1, m_Shr(m_Value(), m_Value()));
+ // ... and if it's not two right-shifts, we know the answer already.
+ if (AnalyzeForSignBitExtraction && !HadTwoRightShifts)
return nullptr;
- // The shift opcodes must be identical.
+ // The shift opcodes must be identical, unless we are just checking whether
+ // this pattern can be interpreted as a sign-bit-extraction.
Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode();
- if (ShiftOpcode != Sh1->getOpcode())
+ bool IdenticalShOpcodes = Sh0->getOpcode() == Sh1->getOpcode();
+ if (!IdenticalShOpcodes && !AnalyzeForSignBitExtraction)
return nullptr;
+
+ // If we saw truncation, we'll need to produce extra instruction,
+ // and for that one of the operands of the shift must be one-use,
+ // unless of course we don't actually plan to produce any instructions here.
+ if (Trunc && !AnalyzeForSignBitExtraction &&
+ !match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
+ return nullptr;
+
// Can we fold (ShAmt0+ShAmt1) ?
- Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1,
- SQ.getWithInstruction(Sh0));
+ auto *NewShAmt = dyn_cast_or_null<Constant>(
+ SimplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false,
+ SQ.getWithInstruction(Sh0)));
if (!NewShAmt)
return nullptr; // Did not simplify.
- // Is the new shift amount smaller than the bit width?
- // FIXME: could also rely on ConstantRange.
- unsigned BitWidth = X->getType()->getScalarSizeInBits();
+ unsigned NewShAmtBitWidth = NewShAmt->getType()->getScalarSizeInBits();
+ unsigned XBitWidth = X->getType()->getScalarSizeInBits();
+ // Is the new shift amount smaller than the bit width of inner/new shift?
if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
- APInt(BitWidth, BitWidth))))
- return nullptr;
+ APInt(NewShAmtBitWidth, XBitWidth))))
+ return nullptr; // FIXME: could perform constant-folding.
+
+ // If there was a truncation, and we have a right-shift, we can only fold if
+ // we are left with the original sign bit. Likewise, if we were just checking
+ // that this is a sighbit extraction, this is the place to check it.
+ // FIXME: zero shift amount is also legal here, but we can't *easily* check
+ // more than one predicate so it's not really worth it.
+ if (HadTwoRightShifts && (Trunc || AnalyzeForSignBitExtraction)) {
+ // If it's not a sign bit extraction, then we're done.
+ if (!match(NewShAmt,
+ m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
+ APInt(NewShAmtBitWidth, XBitWidth - 1))))
+ return nullptr;
+ // If it is, and that was the question, return the base value.
+ if (AnalyzeForSignBitExtraction)
+ return X;
+ }
+
+ assert(IdenticalShOpcodes && "Should not get here with different shifts.");
+
// All good, we can do this fold.
+ NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType());
+
BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt);
- // If both of the original shifts had the same flag set, preserve the flag.
- if (ShiftOpcode == Instruction::BinaryOps::Shl) {
- NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
- Sh1->hasNoUnsignedWrap());
- NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
- Sh1->hasNoSignedWrap());
- } else {
- NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
+
+ // The flags can only be propagated if there wasn't a trunc.
+ if (!Trunc) {
+ // If the pattern did not involve trunc, and both of the original shifts
+ // had the same flag set, preserve the flag.
+ if (ShiftOpcode == Instruction::BinaryOps::Shl) {
+ NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
+ Sh1->hasNoUnsignedWrap());
+ NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
+ Sh1->hasNoSignedWrap());
+ } else {
+ NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
+ }
+ }
+
+ Instruction *Ret = NewShift;
+ if (Trunc) {
+ Builder.Insert(NewShift);
+ Ret = CastInst::Create(Instruction::Trunc, NewShift, Sh0->getType());
+ }
+
+ return Ret;
+}
+
+// Try to replace `undef` constants in C with Replacement.
+static Constant *replaceUndefsWith(Constant *C, Constant *Replacement) {
+ if (C && match(C, m_Undef()))
+ return Replacement;
+
+ if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ llvm::SmallVector<Constant *, 32> NewOps(CV->getNumOperands());
+ for (unsigned i = 0, NumElts = NewOps.size(); i != NumElts; ++i) {
+ Constant *EltC = CV->getOperand(i);
+ NewOps[i] = EltC && match(EltC, m_Undef()) ? Replacement : EltC;
+ }
+ return ConstantVector::get(NewOps);
+ }
+
+ // Don't know how to deal with this constant.
+ return C;
+}
+
+// If we have some pattern that leaves only some low bits set, and then performs
+// left-shift of those bits, if none of the bits that are left after the final
+// shift are modified by the mask, we can omit the mask.
+//
+// There are many variants to this pattern:
+// a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
+// b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt
+// c) (x & (-1 >> MaskShAmt)) << ShiftShAmt
+// d) (x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt
+// e) ((x << MaskShAmt) l>> MaskShAmt) << ShiftShAmt
+// f) ((x << MaskShAmt) a>> MaskShAmt) << ShiftShAmt
+// All these patterns can be simplified to just:
+// x << ShiftShAmt
+// iff:
+// a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x)
+// c,d,e,f) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt)
+static Instruction *
+dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
+ const SimplifyQuery &Q,
+ InstCombiner::BuilderTy &Builder) {
+ assert(OuterShift->getOpcode() == Instruction::BinaryOps::Shl &&
+ "The input must be 'shl'!");
+
+ Value *Masked, *ShiftShAmt;
+ match(OuterShift, m_Shift(m_Value(Masked), m_Value(ShiftShAmt)));
+
+ Type *NarrowestTy = OuterShift->getType();
+ Type *WidestTy = Masked->getType();
+ // The mask must be computed in a type twice as wide to ensure
+ // that no bits are lost if the sum-of-shifts is wider than the base type.
+ Type *ExtendedTy = WidestTy->getExtendedType();
+
+ Value *MaskShAmt;
+
+ // ((1 << MaskShAmt) - 1)
+ auto MaskA = m_Add(m_Shl(m_One(), m_Value(MaskShAmt)), m_AllOnes());
+ // (~(-1 << maskNbits))
+ auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes());
+ // (-1 >> MaskShAmt)
+ auto MaskC = m_Shr(m_AllOnes(), m_Value(MaskShAmt));
+ // ((-1 << MaskShAmt) >> MaskShAmt)
+ auto MaskD =
+ m_Shr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt));
+
+ Value *X;
+ Constant *NewMask;
+
+ if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) {
+ // Can we simplify (MaskShAmt+ShiftShAmt) ?
+ auto *SumOfShAmts = dyn_cast_or_null<Constant>(SimplifyAddInst(
+ MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q));
+ if (!SumOfShAmts)
+ return nullptr; // Did not simplify.
+ // In this pattern SumOfShAmts correlates with the number of low bits
+ // that shall remain in the root value (OuterShift).
+
+ // An extend of an undef value becomes zero because the high bits are never
+ // completely unknown. Replace the the `undef` shift amounts with final
+ // shift bitwidth to ensure that the value remains undef when creating the
+ // subsequent shift op.
+ SumOfShAmts = replaceUndefsWith(
+ SumOfShAmts, ConstantInt::get(SumOfShAmts->getType()->getScalarType(),
+ ExtendedTy->getScalarSizeInBits()));
+ auto *ExtendedSumOfShAmts = ConstantExpr::getZExt(SumOfShAmts, ExtendedTy);
+ // And compute the mask as usual: ~(-1 << (SumOfShAmts))
+ auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);
+ auto *ExtendedInvertedMask =
+ ConstantExpr::getShl(ExtendedAllOnes, ExtendedSumOfShAmts);
+ NewMask = ConstantExpr::getNot(ExtendedInvertedMask);
+ } else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X))) ||
+ match(Masked, m_Shr(m_Shl(m_Value(X), m_Value(MaskShAmt)),
+ m_Deferred(MaskShAmt)))) {
+ // Can we simplify (ShiftShAmt-MaskShAmt) ?
+ auto *ShAmtsDiff = dyn_cast_or_null<Constant>(SimplifySubInst(
+ ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q));
+ if (!ShAmtsDiff)
+ return nullptr; // Did not simplify.
+ // In this pattern ShAmtsDiff correlates with the number of high bits that
+ // shall be unset in the root value (OuterShift).
+
+ // An extend of an undef value becomes zero because the high bits are never
+ // completely unknown. Replace the the `undef` shift amounts with negated
+ // bitwidth of innermost shift to ensure that the value remains undef when
+ // creating the subsequent shift op.
+ unsigned WidestTyBitWidth = WidestTy->getScalarSizeInBits();
+ ShAmtsDiff = replaceUndefsWith(
+ ShAmtsDiff, ConstantInt::get(ShAmtsDiff->getType()->getScalarType(),
+ -WidestTyBitWidth));
+ auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt(
+ ConstantExpr::getSub(ConstantInt::get(ShAmtsDiff->getType(),
+ WidestTyBitWidth,
+ /*isSigned=*/false),
+ ShAmtsDiff),
+ ExtendedTy);
+ // And compute the mask as usual: (-1 l>> (NumHighBitsToClear))
+ auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);
+ NewMask =
+ ConstantExpr::getLShr(ExtendedAllOnes, ExtendedNumHighBitsToClear);
+ } else
+ return nullptr; // Don't know anything about this pattern.
+
+ NewMask = ConstantExpr::getTrunc(NewMask, NarrowestTy);
+
+ // Does this mask has any unset bits? If not then we can just not apply it.
+ bool NeedMask = !match(NewMask, m_AllOnes());
+
+ // If we need to apply a mask, there are several more restrictions we have.
+ if (NeedMask) {
+ // The old masking instruction must go away.
+ if (!Masked->hasOneUse())
+ return nullptr;
+ // The original "masking" instruction must not have been`ashr`.
+ if (match(Masked, m_AShr(m_Value(), m_Value())))
+ return nullptr;
}
- return NewShift;
+
+ // No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits.
+ auto *NewShift = BinaryOperator::Create(OuterShift->getOpcode(), X,
+ OuterShift->getOperand(1));
+
+ if (!NeedMask)
+ return NewShift;
+
+ Builder.Insert(NewShift);
+ return BinaryOperator::Create(Instruction::And, NewShift, NewMask);
}
Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
assert(Op0->getType() == Op1->getType());
+ // If the shift amount is a one-use `sext`, we can demote it to `zext`.
+ Value *Y;
+ if (match(Op1, m_OneUse(m_SExt(m_Value(Y))))) {
+ Value *NewExt = Builder.CreateZExt(Y, I.getType(), Op1->getName());
+ return BinaryOperator::Create(I.getOpcode(), Op0, NewExt);
+ }
+
// See if we can fold away this shift.
if (SimplifyDemandedInstructionBits(I))
return &I;
@@ -83,8 +308,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
return Res;
- if (Instruction *NewShift =
- reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))
+ if (auto *NewShift = cast_or_null<Instruction>(
+ reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ)))
return NewShift;
// (C1 shift (A add C2)) -> (C1 shift C2) shift A)
@@ -618,9 +843,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
}
Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+ const SimplifyQuery Q = SQ.getWithInstruction(&I);
+
if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
- I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
- SQ.getWithInstruction(&I)))
+ I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), Q))
return replaceInstUsesWith(I, V);
if (Instruction *X = foldVectorBinop(I))
@@ -629,6 +855,9 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
if (Instruction *V = commonShiftTransforms(I))
return V;
+ if (Instruction *V = dropRedundantMaskingOfLeftShiftInput(&I, Q, Builder))
+ return V;
+
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
unsigned BitWidth = Ty->getScalarSizeInBits();
@@ -636,12 +865,11 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
const APInt *ShAmtAPInt;
if (match(Op1, m_APInt(ShAmtAPInt))) {
unsigned ShAmt = ShAmtAPInt->getZExtValue();
- unsigned BitWidth = Ty->getScalarSizeInBits();
// shl (zext X), ShAmt --> zext (shl X, ShAmt)
// This is only valid if X would have zeros shifted out.
Value *X;
- if (match(Op0, m_ZExt(m_Value(X)))) {
+ if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
unsigned SrcWidth = X->getType()->getScalarSizeInBits();
if (ShAmt < SrcWidth &&
MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I))
@@ -719,6 +947,12 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
// (X * C2) << C1 --> X * (C2 << C1)
if (match(Op0, m_Mul(m_Value(X), m_Constant(C2))))
return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1));
+
+ // shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
+ if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
+ auto *NewC = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C1);
+ return SelectInst::Create(X, NewC, ConstantInt::getNullValue(Ty));
+ }
}
// (1 << (C - x)) -> ((1 << C) >> x) if C is bitwidth - 1
@@ -859,6 +1093,75 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
return nullptr;
}
+Instruction *
+InstCombiner::foldVariableSignZeroExtensionOfVariableHighBitExtract(
+ BinaryOperator &OldAShr) {
+ assert(OldAShr.getOpcode() == Instruction::AShr &&
+ "Must be called with arithmetic right-shift instruction only.");
+
+ // Check that constant C is a splat of the element-wise bitwidth of V.
+ auto BitWidthSplat = [](Constant *C, Value *V) {
+ return match(
+ C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
+ APInt(C->getType()->getScalarSizeInBits(),
+ V->getType()->getScalarSizeInBits())));
+ };
+
+ // It should look like variable-length sign-extension on the outside:
+ // (Val << (bitwidth(Val)-Nbits)) a>> (bitwidth(Val)-Nbits)
+ Value *NBits;
+ Instruction *MaybeTrunc;
+ Constant *C1, *C2;
+ if (!match(&OldAShr,
+ m_AShr(m_Shl(m_Instruction(MaybeTrunc),
+ m_ZExtOrSelf(m_Sub(m_Constant(C1),
+ m_ZExtOrSelf(m_Value(NBits))))),
+ m_ZExtOrSelf(m_Sub(m_Constant(C2),
+ m_ZExtOrSelf(m_Deferred(NBits)))))) ||
+ !BitWidthSplat(C1, &OldAShr) || !BitWidthSplat(C2, &OldAShr))
+ return nullptr;
+
+ // There may or may not be a truncation after outer two shifts.
+ Instruction *HighBitExtract;
+ match(MaybeTrunc, m_TruncOrSelf(m_Instruction(HighBitExtract)));
+ bool HadTrunc = MaybeTrunc != HighBitExtract;
+
+ // And finally, the innermost part of the pattern must be a right-shift.
+ Value *X, *NumLowBitsToSkip;
+ if (!match(HighBitExtract, m_Shr(m_Value(X), m_Value(NumLowBitsToSkip))))
+ return nullptr;
+
+ // Said right-shift must extract high NBits bits - C0 must be it's bitwidth.
+ Constant *C0;
+ if (!match(NumLowBitsToSkip,
+ m_ZExtOrSelf(
+ m_Sub(m_Constant(C0), m_ZExtOrSelf(m_Specific(NBits))))) ||
+ !BitWidthSplat(C0, HighBitExtract))
+ return nullptr;
+
+ // Since the NBits is identical for all shifts, if the outermost and
+ // innermost shifts are identical, then outermost shifts are redundant.
+ // If we had truncation, do keep it though.
+ if (HighBitExtract->getOpcode() == OldAShr.getOpcode())
+ return replaceInstUsesWith(OldAShr, MaybeTrunc);
+
+ // Else, if there was a truncation, then we need to ensure that one
+ // instruction will go away.
+ if (HadTrunc && !match(&OldAShr, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
+ return nullptr;
+
+ // Finally, bypass two innermost shifts, and perform the outermost shift on
+ // the operands of the innermost shift.
+ Instruction *NewAShr =
+ BinaryOperator::Create(OldAShr.getOpcode(), X, NumLowBitsToSkip);
+ NewAShr->copyIRFlags(HighBitExtract); // We can preserve 'exact'-ness.
+ if (!HadTrunc)
+ return NewAShr;
+
+ Builder.Insert(NewAShr);
+ return TruncInst::CreateTruncOrBitCast(NewAShr, OldAShr.getType());
+}
+
Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
SQ.getWithInstruction(&I)))
@@ -933,6 +1236,9 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
}
}
+ if (Instruction *R = foldVariableSignZeroExtensionOfVariableHighBitExtract(I))
+ return R;
+
// See if we can turn a signed shr into an unsigned shr.
if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I))
return BinaryOperator::CreateLShr(Op0, Op1);
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index e0d85c4b49ae..d30ab8001897 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -971,6 +971,13 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
int DMaskIdx) {
+
+ // FIXME: Allow v3i16/v3f16 in buffer intrinsics when the types are fully supported.
+ if (DMaskIdx < 0 &&
+ II->getType()->getScalarSizeInBits() != 32 &&
+ DemandedElts.getActiveBits() == 3)
+ return nullptr;
+
unsigned VWidth = II->getType()->getVectorNumElements();
if (VWidth == 1)
return nullptr;
@@ -1067,16 +1074,22 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
}
/// The specified value produces a vector with any number of elements.
+/// This method analyzes which elements of the operand are undef and returns
+/// that information in UndefElts.
+///
/// DemandedElts contains the set of elements that are actually used by the
-/// caller. This method analyzes which elements of the operand are undef and
-/// returns that information in UndefElts.
+/// caller, and by default (AllowMultipleUsers equals false) the value is
+/// simplified only if it has a single caller. If AllowMultipleUsers is set
+/// to true, DemandedElts refers to the union of sets of elements that are
+/// used by all callers.
///
/// If the information about demanded elements can be used to simplify the
/// operation, the operation is simplified, then the resultant value is
/// returned. This returns null if no change was made.
Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt &UndefElts,
- unsigned Depth) {
+ unsigned Depth,
+ bool AllowMultipleUsers) {
unsigned VWidth = V->getType()->getVectorNumElements();
APInt EltMask(APInt::getAllOnesValue(VWidth));
assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
@@ -1130,19 +1143,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (Depth == 10)
return nullptr;
- // If multiple users are using the root value, proceed with
- // simplification conservatively assuming that all elements
- // are needed.
- if (!V->hasOneUse()) {
- // Quit if we find multiple users of a non-root value though.
- // They'll be handled when it's their turn to be visited by
- // the main instcombine process.
- if (Depth != 0)
- // TODO: Just compute the UndefElts information recursively.
- return nullptr;
+ if (!AllowMultipleUsers) {
+ // If multiple users are using the root value, proceed with
+ // simplification conservatively assuming that all elements
+ // are needed.
+ if (!V->hasOneUse()) {
+ // Quit if we find multiple users of a non-root value though.
+ // They'll be handled when it's their turn to be visited by
+ // the main instcombine process.
+ if (Depth != 0)
+ // TODO: Just compute the UndefElts information recursively.
+ return nullptr;
- // Conservatively assume that all elements are needed.
- DemandedElts = EltMask;
+ // Conservatively assume that all elements are needed.
+ DemandedElts = EltMask;
+ }
}
Instruction *I = dyn_cast<Instruction>(V);
@@ -1674,8 +1689,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::amdgcn_buffer_load_format:
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
+ case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format:
+ case Intrinsic::amdgcn_struct_tbuffer_load:
+ case Intrinsic::amdgcn_tbuffer_load:
return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
default: {
if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index dc9abdd7f47a..9c890748e5ab 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -253,6 +253,69 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
return nullptr;
}
+/// Find elements of V demanded by UserInstr.
+static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
+ unsigned VWidth = V->getType()->getVectorNumElements();
+
+ // Conservatively assume that all elements are needed.
+ APInt UsedElts(APInt::getAllOnesValue(VWidth));
+
+ switch (UserInstr->getOpcode()) {
+ case Instruction::ExtractElement: {
+ ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
+ assert(EEI->getVectorOperand() == V);
+ ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
+ if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
+ UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
+ }
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
+ unsigned MaskNumElts = UserInstr->getType()->getVectorNumElements();
+
+ UsedElts = APInt(VWidth, 0);
+ for (unsigned i = 0; i < MaskNumElts; i++) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal == -1u || MaskVal >= 2 * VWidth)
+ continue;
+ if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
+ UsedElts.setBit(MaskVal);
+ if (Shuffle->getOperand(1) == V &&
+ ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
+ UsedElts.setBit(MaskVal - VWidth);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return UsedElts;
+}
+
+/// Find union of elements of V demanded by all its users.
+/// If it is known by querying findDemandedEltsBySingleUser that
+/// no user demands an element of V, then the corresponding bit
+/// remains unset in the returned value.
+static APInt findDemandedEltsByAllUsers(Value *V) {
+ unsigned VWidth = V->getType()->getVectorNumElements();
+
+ APInt UnionUsedElts(VWidth, 0);
+ for (const Use &U : V->uses()) {
+ if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
+ UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
+ } else {
+ UnionUsedElts = APInt::getAllOnesValue(VWidth);
+ break;
+ }
+
+ if (UnionUsedElts.isAllOnesValue())
+ break;
+ }
+
+ return UnionUsedElts;
+}
+
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
Value *SrcVec = EI.getVectorOperand();
Value *Index = EI.getIndexOperand();
@@ -271,19 +334,35 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
return nullptr;
// This instruction only demands the single element from the input vector.
- // If the input vector has a single use, simplify it based on this use
- // property.
- if (SrcVec->hasOneUse() && NumElts != 1) {
- APInt UndefElts(NumElts, 0);
- APInt DemandedElts(NumElts, 0);
- DemandedElts.setBit(IndexC->getZExtValue());
- if (Value *V = SimplifyDemandedVectorElts(SrcVec, DemandedElts,
- UndefElts)) {
- EI.setOperand(0, V);
- return &EI;
+ if (NumElts != 1) {
+ // If the input vector has a single use, simplify it based on this use
+ // property.
+ if (SrcVec->hasOneUse()) {
+ APInt UndefElts(NumElts, 0);
+ APInt DemandedElts(NumElts, 0);
+ DemandedElts.setBit(IndexC->getZExtValue());
+ if (Value *V =
+ SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) {
+ EI.setOperand(0, V);
+ return &EI;
+ }
+ } else {
+ // If the input vector has multiple uses, simplify it based on a union
+ // of all elements used.
+ APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
+ if (!DemandedElts.isAllOnesValue()) {
+ APInt UndefElts(NumElts, 0);
+ if (Value *V = SimplifyDemandedVectorElts(
+ SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
+ true /* AllowMultipleUsers */)) {
+ if (V != SrcVec) {
+ SrcVec->replaceAllUsesWith(V);
+ return &EI;
+ }
+ }
+ }
}
}
-
if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
return I;
@@ -766,6 +845,55 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
}
+/// Try to fold an extract+insert element into an existing identity shuffle by
+/// changing the shuffle's mask to include the index of this insert element.
+static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
+ // Check if the vector operand of this insert is an identity shuffle.
+ auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
+ if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) ||
+ !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
+ return nullptr;
+
+ // Check for a constant insertion index.
+ uint64_t IdxC;
+ if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
+ return nullptr;
+
+ // Check if this insert's scalar op is extracted from the identity shuffle's
+ // input vector.
+ Value *Scalar = InsElt.getOperand(1);
+ Value *X = Shuf->getOperand(0);
+ if (!match(Scalar, m_ExtractElement(m_Specific(X), m_SpecificInt(IdxC))))
+ return nullptr;
+
+ // Replace the shuffle mask element at the index of this extract+insert with
+ // that same index value.
+ // For example:
+ // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
+ unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
+ SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
+ Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
+ Constant *NewMaskEltC = ConstantInt::get(I32Ty, IdxC);
+ Constant *OldMask = Shuf->getMask();
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ if (i != IdxC) {
+ // All mask elements besides the inserted element remain the same.
+ NewMaskVec[i] = OldMask->getAggregateElement(i);
+ } else if (OldMask->getAggregateElement(i) == NewMaskEltC) {
+ // If the mask element was already set, there's nothing to do
+ // (demanded elements analysis may unset it later).
+ return nullptr;
+ } else {
+ assert(isa<UndefValue>(OldMask->getAggregateElement(i)) &&
+ "Unexpected shuffle mask element for identity shuffle");
+ NewMaskVec[i] = NewMaskEltC;
+ }
+ }
+
+ Constant *NewMask = ConstantVector::get(NewMaskVec);
+ return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
+}
+
/// If we have an insertelement instruction feeding into another insertelement
/// and the 2nd is inserting a constant into the vector, canonicalize that
/// constant insertion before the insertion of a variable:
@@ -987,6 +1115,9 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
if (Instruction *Splat = foldInsEltIntoSplat(IE))
return Splat;
+ if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
+ return IdentityShuf;
+
return nullptr;
}
@@ -1009,17 +1140,23 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
if (Depth == 0) return false;
switch (I->getOpcode()) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ // Propagating an undefined shuffle mask element to integer div/rem is not
+ // allowed because those opcodes can create immediate undefined behavior
+ // from an undefined element in an operand.
+ if (llvm::any_of(Mask, [](int M){ return M == -1; }))
+ return false;
+ LLVM_FALLTHROUGH;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
@@ -1040,9 +1177,7 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
case Instruction::FPExt:
case Instruction::GetElementPtr: {
// Bail out if we would create longer vector ops. We could allow creating
- // longer vector ops, but that may result in more expensive codegen. We
- // would also need to limit the transform to avoid undefined behavior for
- // integer div/rem.
+ // longer vector ops, but that may result in more expensive codegen.
Type *ITy = I->getType();
if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
return false;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 385f4926b845..ecb486c544e0 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -200,8 +200,8 @@ bool InstCombiner::shouldChangeType(Type *From, Type *To) const {
// where both B and C should be ConstantInts, results in a constant that does
// not overflow. This function only handles the Add and Sub opcodes. For
// all other opcodes, the function conservatively returns false.
-static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
- OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
+static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
+ auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
if (!OBO || !OBO->hasNoSignedWrap())
return false;
@@ -224,10 +224,15 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
}
static bool hasNoUnsignedWrap(BinaryOperator &I) {
- OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
+ auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
return OBO && OBO->hasNoUnsignedWrap();
}
+static bool hasNoSignedWrap(BinaryOperator &I) {
+ auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
+ return OBO && OBO->hasNoSignedWrap();
+}
+
/// Conservatively clears subclassOptionalData after a reassociation or
/// commutation. We preserve fast-math flags when applicable as they can be
/// preserved.
@@ -332,22 +337,21 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// It simplifies to V. Form "A op V".
I.setOperand(0, A);
I.setOperand(1, V);
- // Conservatively clear the optional flags, since they may not be
- // preserved by the reassociation.
bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
- bool IsNSW = MaintainNoSignedWrap(I, B, C);
+ bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
+ // Conservatively clear all optional flags since they may not be
+ // preserved by the reassociation. Reset nsw/nuw based on the above
+ // analysis.
ClearSubclassDataAfterReassociation(I);
+ // Note: this is only valid because SimplifyBinOp doesn't look at
+ // the operands to Op0.
if (IsNUW)
I.setHasNoUnsignedWrap(true);
- if (IsNSW &&
- (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
- // Note: this is only valid because SimplifyBinOp doesn't look at
- // the operands to Op0.
+ if (IsNSW)
I.setHasNoSignedWrap(true);
- }
Changed = true;
++NumReassoc;
@@ -610,7 +614,6 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I,
HasNUW &= ROBO->hasNoUnsignedWrap();
}
- const APInt *CInt;
if (TopLevelOpcode == Instruction::Add &&
InnerOpcode == Instruction::Mul) {
// We can propagate 'nsw' if we know that
@@ -620,6 +623,7 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I,
// %Z = mul nsw i16 %X, C+1
//
// iff C+1 isn't INT_MIN
+ const APInt *CInt;
if (match(V, m_APInt(CInt))) {
if (!CInt->isMinSignedValue())
BO->setHasNoSignedWrap(HasNSW);
@@ -763,12 +767,16 @@ Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
if (match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C))) &&
match(RHS, m_Select(m_Specific(A), m_Value(D), m_Value(E)))) {
bool SelectsHaveOneUse = LHS->hasOneUse() && RHS->hasOneUse();
+
+ FastMathFlags FMF;
BuilderTy::FastMathFlagGuard Guard(Builder);
- if (isa<FPMathOperator>(&I))
- Builder.setFastMathFlags(I.getFastMathFlags());
+ if (isa<FPMathOperator>(&I)) {
+ FMF = I.getFastMathFlags();
+ Builder.setFastMathFlags(FMF);
+ }
- Value *V1 = SimplifyBinOp(Opcode, C, E, SQ.getWithInstruction(&I));
- Value *V2 = SimplifyBinOp(Opcode, B, D, SQ.getWithInstruction(&I));
+ Value *V1 = SimplifyBinOp(Opcode, C, E, FMF, SQ.getWithInstruction(&I));
+ Value *V2 = SimplifyBinOp(Opcode, B, D, FMF, SQ.getWithInstruction(&I));
if (V1 && V2)
SI = Builder.CreateSelect(A, V2, V1);
else if (V2 && SelectsHaveOneUse)
@@ -1659,7 +1667,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// to an index of zero, so replace it with zero if it is not zero already.
Type *EltTy = GTI.getIndexedType();
if (EltTy->isSized() && DL.getTypeAllocSize(EltTy) == 0)
- if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+ if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
*I = Constant::getNullValue(NewIndexType);
MadeChange = true;
}
@@ -2549,9 +2557,7 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
// Change br (not X), label True, label False to: br X, label False, True
Value *X = nullptr;
- BasicBlock *TrueDest;
- BasicBlock *FalseDest;
- if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
+ if (match(&BI, m_Br(m_Not(m_Value(X)), m_BasicBlock(), m_BasicBlock())) &&
!isa<Constant>(X)) {
// Swap Destinations and condition...
BI.setCondition(X);
@@ -2569,8 +2575,8 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
// Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq.
CmpInst::Predicate Pred;
- if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), TrueDest,
- FalseDest)) &&
+ if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())),
+ m_BasicBlock(), m_BasicBlock())) &&
!isCanonicalPredicate(Pred)) {
// Swap destinations and condition.
CmpInst *Cond = cast<CmpInst>(BI.getCondition());
@@ -3156,6 +3162,21 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
findDbgUsers(DbgUsers, I);
for (auto *DII : reverse(DbgUsers)) {
if (DII->getParent() == SrcBlock) {
+ if (isa<DbgDeclareInst>(DII)) {
+ // A dbg.declare instruction should not be cloned, since there can only be
+ // one per variable fragment. It should be left in the original place since
+ // sunk instruction is not an alloca(otherwise we could not be here).
+ // But we need to update arguments of dbg.declare instruction, so that it
+ // would not point into sunk instruction.
+ if (!isa<CastInst>(I))
+ continue; // dbg.declare points at something it shouldn't
+
+ DII->setOperand(
+ 0, MetadataAsValue::get(I->getContext(),
+ ValueAsMetadata::get(I->getOperand(0))));
+ continue;
+ }
+
// dbg.value is in the same basic block as the sunk inst, see if we can
// salvage it. Clone a new copy of the instruction: on success we need
// both salvaged and unsalvaged copies.
@@ -3580,7 +3601,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
// Required analyses.
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6821e214e921..d92ee11c2e1a 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -129,6 +129,8 @@ static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
static const char *const kAsanModuleCtorName = "asan.module_ctor";
static const char *const kAsanModuleDtorName = "asan.module_dtor";
static const uint64_t kAsanCtorAndDtorPriority = 1;
+// On Emscripten, the system needs more than one priorities for constructors.
+static const uint64_t kAsanEmscriptenCtorAndDtorPriority = 50;
static const char *const kAsanReportErrorTemplate = "__asan_report_";
static const char *const kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *const kAsanUnregisterGlobalsName =
@@ -191,6 +193,11 @@ static cl::opt<bool> ClRecover(
cl::desc("Enable recovery mode (continue-after-error)."),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClInsertVersionCheck(
+ "asan-guard-against-version-mismatch",
+ cl::desc("Guard against compiler/runtime version mismatch."),
+ cl::Hidden, cl::init(true));
+
// This flag may need to be replaced with -f[no-]asan-reads.
static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
cl::desc("instrument read instructions"),
@@ -530,6 +537,14 @@ static size_t RedzoneSizeForScale(int MappingScale) {
return std::max(32U, 1U << MappingScale);
}
+static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
+ if (TargetTriple.isOSEmscripten()) {
+ return kAsanEmscriptenCtorAndDtorPriority;
+ } else {
+ return kAsanCtorAndDtorPriority;
+ }
+}
+
namespace {
/// Module analysis for getting various metadata about the module.
@@ -565,10 +580,10 @@ char ASanGlobalsMetadataWrapperPass::ID = 0;
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer {
- AddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD,
+ AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD,
bool CompileKernel = false, bool Recover = false,
bool UseAfterScope = false)
- : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(GlobalsMD) {
+ : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(*GlobalsMD) {
this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
this->CompileKernel =
ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
@@ -677,7 +692,7 @@ private:
FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset;
InlineAsm *EmptyAsm;
Value *LocalDynamicShadow = nullptr;
- GlobalsMetadata GlobalsMD;
+ const GlobalsMetadata &GlobalsMD;
DenseMap<const AllocaInst *, bool> ProcessedAllocas;
};
@@ -706,8 +721,8 @@ public:
GlobalsMetadata &GlobalsMD =
getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- AddressSanitizer ASan(*F.getParent(), GlobalsMD, CompileKernel, Recover,
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ AddressSanitizer ASan(*F.getParent(), &GlobalsMD, CompileKernel, Recover,
UseAfterScope);
return ASan.instrumentFunction(F, TLI);
}
@@ -720,10 +735,10 @@ private:
class ModuleAddressSanitizer {
public:
- ModuleAddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD,
+ ModuleAddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD,
bool CompileKernel = false, bool Recover = false,
bool UseGlobalsGC = true, bool UseOdrIndicator = false)
- : GlobalsMD(GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
+ : GlobalsMD(*GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
// Enable aliases as they should have no downside with ODR indicators.
UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias),
UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator),
@@ -783,7 +798,7 @@ private:
}
int GetAsanVersion(const Module &M) const;
- GlobalsMetadata GlobalsMD;
+ const GlobalsMetadata &GlobalsMD;
bool CompileKernel;
bool Recover;
bool UseGlobalsGC;
@@ -830,7 +845,7 @@ public:
bool runOnModule(Module &M) override {
GlobalsMetadata &GlobalsMD =
getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
- ModuleAddressSanitizer ASanModule(M, GlobalsMD, CompileKernel, Recover,
+ ModuleAddressSanitizer ASanModule(M, &GlobalsMD, CompileKernel, Recover,
UseGlobalGC, UseOdrIndicator);
return ASanModule.instrumentModule(M);
}
@@ -1033,7 +1048,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
if (!II.isLifetimeStartOrEnd())
return;
// Found lifetime intrinsic, add ASan instrumentation if necessary.
- ConstantInt *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
+ auto *Size = cast<ConstantInt>(II.getArgOperand(0));
// If size argument is undefined, don't do anything.
if (Size->isMinusOne()) return;
// Check that size doesn't saturate uint64_t and can
@@ -1156,7 +1171,7 @@ PreservedAnalyses AddressSanitizerPass::run(Function &F,
Module &M = *F.getParent();
if (auto *R = MAM.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) {
const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
- AddressSanitizer Sanitizer(M, *R, CompileKernel, Recover, UseAfterScope);
+ AddressSanitizer Sanitizer(M, R, CompileKernel, Recover, UseAfterScope);
if (Sanitizer.instrumentFunction(F, TLI))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -1178,7 +1193,7 @@ ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(bool CompileKernel,
PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
AnalysisManager<Module> &AM) {
GlobalsMetadata &GlobalsMD = AM.getResult<ASanGlobalsMetadataAnalysis>(M);
- ModuleAddressSanitizer Sanitizer(M, GlobalsMD, CompileKernel, Recover,
+ ModuleAddressSanitizer Sanitizer(M, &GlobalsMD, CompileKernel, Recover,
UseGlobalGC, UseOdrIndicator);
if (Sanitizer.instrumentModule(M))
return PreservedAnalyses::none();
@@ -1331,7 +1346,7 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
unsigned *Alignment,
Value **MaybeMask) {
// Skip memory accesses inserted by another instrumentation.
- if (I->getMetadata("nosanitize")) return nullptr;
+ if (I->hasMetadata("nosanitize")) return nullptr;
// Do not instrument the load fetching the dynamic shadow address.
if (LocalDynamicShadow == I)
@@ -1775,9 +1790,10 @@ void ModuleAddressSanitizer::createInitializerPoisonCalls(
// Must have a function or null ptr.
if (Function *F = dyn_cast<Function>(CS->getOperand(1))) {
if (F->getName() == kAsanModuleCtorName) continue;
- ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ auto *Priority = cast<ConstantInt>(CS->getOperand(0));
// Don't instrument CTORs that will run before asan.module_ctor.
- if (Priority->getLimitedValue() <= kAsanCtorAndDtorPriority) continue;
+ if (Priority->getLimitedValue() <= GetCtorAndDtorPriority(TargetTriple))
+ continue;
poisonOneInitializer(*F, ModuleName);
}
}
@@ -1919,7 +1935,12 @@ StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const {
case Triple::COFF: return ".ASAN$GL";
case Triple::ELF: return "asan_globals";
case Triple::MachO: return "__DATA,__asan_globals,regular";
- default: break;
+ case Triple::Wasm:
+ case Triple::XCOFF:
+ report_fatal_error(
+ "ModuleAddressSanitizer not implemented for object file format.");
+ case Triple::UnknownObjectFormat:
+ break;
}
llvm_unreachable("unsupported object format");
}
@@ -2033,7 +2054,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsCOFF(
unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(Initializer->getType());
assert(isPowerOf2_32(SizeOfGlobalStruct) &&
"global metadata will not be padded appropriately");
- Metadata->setAlignment(SizeOfGlobalStruct);
+ Metadata->setAlignment(assumeAligned(SizeOfGlobalStruct));
SetComdatForGlobalMetadata(G, Metadata, "");
}
@@ -2170,7 +2191,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray(
M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage,
ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), "");
if (Mapping.Scale > 3)
- AllGlobals->setAlignment(1ULL << Mapping.Scale);
+ AllGlobals->setAlignment(Align(1ULL << Mapping.Scale));
IRB.CreateCall(AsanRegisterGlobals,
{IRB.CreatePointerCast(AllGlobals, IntptrTy),
@@ -2270,7 +2291,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
"", G, G->getThreadLocalMode());
NewGlobal->copyAttributesFrom(G);
NewGlobal->setComdat(G->getComdat());
- NewGlobal->setAlignment(MinRZ);
+ NewGlobal->setAlignment(MaybeAlign(MinRZ));
// Don't fold globals with redzones. ODR violation detector and redzone
// poisoning implicitly creates a dependence on the global's address, so it
// is no longer valid for it to be marked unnamed_addr.
@@ -2338,7 +2359,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
// Set meaningful attributes for indicator symbol.
ODRIndicatorSym->setVisibility(NewGlobal->getVisibility());
ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass());
- ODRIndicatorSym->setAlignment(1);
+ ODRIndicatorSym->setAlignment(Align::None());
ODRIndicator = ODRIndicatorSym;
}
@@ -2410,39 +2431,39 @@ bool ModuleAddressSanitizer::instrumentModule(Module &M) {
// Create a module constructor. A destructor is created lazily because not all
// platforms, and not all modules need it.
+ std::string AsanVersion = std::to_string(GetAsanVersion(M));
std::string VersionCheckName =
- kAsanVersionCheckNamePrefix + std::to_string(GetAsanVersion(M));
+ ClInsertVersionCheck ? (kAsanVersionCheckNamePrefix + AsanVersion) : "";
std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{},
/*InitArgs=*/{}, VersionCheckName);
bool CtorComdat = true;
- bool Changed = false;
// TODO(glider): temporarily disabled globals instrumentation for KASan.
if (ClGlobals) {
IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator());
- Changed |= InstrumentGlobals(IRB, M, &CtorComdat);
+ InstrumentGlobals(IRB, M, &CtorComdat);
}
+ const uint64_t Priority = GetCtorAndDtorPriority(TargetTriple);
+
// Put the constructor and destructor in comdat if both
// (1) global instrumentation is not TU-specific
// (2) target is ELF.
if (UseCtorComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) {
AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName));
- appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority,
- AsanCtorFunction);
+ appendToGlobalCtors(M, AsanCtorFunction, Priority, AsanCtorFunction);
if (AsanDtorFunction) {
AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName));
- appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority,
- AsanDtorFunction);
+ appendToGlobalDtors(M, AsanDtorFunction, Priority, AsanDtorFunction);
}
} else {
- appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
+ appendToGlobalCtors(M, AsanCtorFunction, Priority);
if (AsanDtorFunction)
- appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
+ appendToGlobalDtors(M, AsanDtorFunction, Priority);
}
- return Changed;
+ return true;
}
void AddressSanitizer::initializeCallbacks(Module &M) {
@@ -2664,7 +2685,7 @@ bool AddressSanitizer::instrumentFunction(Function &F,
if (CS) {
// A call inside BB.
TempsToInstrument.clear();
- if (CS.doesNotReturn() && !CS->getMetadata("nosanitize"))
+ if (CS.doesNotReturn() && !CS->hasMetadata("nosanitize"))
NoReturnCalls.push_back(CS.getInstruction());
}
if (CallInst *CI = dyn_cast<CallInst>(&Inst))
@@ -2877,18 +2898,19 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {
for (Argument &Arg : F.args()) {
if (Arg.hasByValAttr()) {
Type *Ty = Arg.getType()->getPointerElementType();
- unsigned Align = Arg.getParamAlignment();
- if (Align == 0) Align = DL.getABITypeAlignment(Ty);
+ unsigned Alignment = Arg.getParamAlignment();
+ if (Alignment == 0)
+ Alignment = DL.getABITypeAlignment(Ty);
AllocaInst *AI = IRB.CreateAlloca(
Ty, nullptr,
(Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) +
".byval");
- AI->setAlignment(Align);
+ AI->setAlignment(Align(Alignment));
Arg.replaceAllUsesWith(AI);
uint64_t AllocSize = DL.getTypeAllocSize(Ty);
- IRB.CreateMemCpy(AI, Align, &Arg, Align, AllocSize);
+ IRB.CreateMemCpy(AI, Alignment, &Arg, Alignment, AllocSize);
}
}
}
@@ -2919,7 +2941,7 @@ Value *FunctionStackPoisoner::createAllocaForLayout(
}
assert((ClRealignStack & (ClRealignStack - 1)) == 0);
size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack);
- Alloca->setAlignment(FrameAlignment);
+ Alloca->setAlignment(MaybeAlign(FrameAlignment));
return IRB.CreatePointerCast(Alloca, IntptrTy);
}
@@ -2928,7 +2950,7 @@ void FunctionStackPoisoner::createDynamicAllocasInitStorage() {
IRBuilder<> IRB(dyn_cast<Instruction>(FirstBB.begin()));
DynamicAllocaLayout = IRB.CreateAlloca(IntptrTy, nullptr);
IRB.CreateStore(Constant::getNullValue(IntptrTy), DynamicAllocaLayout);
- DynamicAllocaLayout->setAlignment(32);
+ DynamicAllocaLayout->setAlignment(Align(32));
}
void FunctionStackPoisoner::processDynamicAllocas() {
@@ -3275,7 +3297,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
// Insert new alloca with new NewSize and Align params.
AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize);
- NewAlloca->setAlignment(Align);
+ NewAlloca->setAlignment(MaybeAlign(Align));
// NewAddress = Address + Align
Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy),
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 4dc9b611c156..ae34be986537 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -224,7 +224,7 @@ struct BoundsCheckingLegacyPass : public FunctionPass {
}
bool runOnFunction(Function &F) override {
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
return addBoundsChecking(F, TLI, SE);
}
diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h
index 971e00041762..8bb6f47c4846 100644
--- a/lib/Transforms/Instrumentation/CFGMST.h
+++ b/lib/Transforms/Instrumentation/CFGMST.h
@@ -257,13 +257,13 @@ public:
std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr));
if (Inserted) {
// Newly inserted, update the real info.
- Iter->second = std::move(llvm::make_unique<BBInfo>(Index));
+ Iter->second = std::move(std::make_unique<BBInfo>(Index));
Index++;
}
std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr));
if (Inserted)
// Newly inserted, update the real info.
- Iter->second = std::move(llvm::make_unique<BBInfo>(Index));
+ Iter->second = std::move(std::make_unique<BBInfo>(Index));
AllEdges.emplace_back(new Edge(Src, Dest, W));
return *AllEdges.back();
}
diff --git a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 3f4f9bc7145d..55c64fa4b727 100644
--- a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -512,30 +512,38 @@ static bool isHoistable(Instruction *I, DominatorTree &DT) {
// first-region entry block) or the (hoistable or unhoistable) base values that
// are defined outside (including the first-region entry block) of the
// scope. The returned set doesn't include constants.
-static std::set<Value *> getBaseValues(Value *V,
- DominatorTree &DT) {
+static std::set<Value *> getBaseValues(
+ Value *V, DominatorTree &DT,
+ DenseMap<Value *, std::set<Value *>> &Visited) {
+ if (Visited.count(V)) {
+ return Visited[V];
+ }
std::set<Value *> Result;
if (auto *I = dyn_cast<Instruction>(V)) {
// We don't stop at a block that's not in the Scope because we would miss some
// instructions that are based on the same base values if we stop there.
if (!isHoistable(I, DT)) {
Result.insert(I);
+ Visited.insert(std::make_pair(V, Result));
return Result;
}
// I is hoistable above the Scope.
for (Value *Op : I->operands()) {
- std::set<Value *> OpResult = getBaseValues(Op, DT);
+ std::set<Value *> OpResult = getBaseValues(Op, DT, Visited);
Result.insert(OpResult.begin(), OpResult.end());
}
+ Visited.insert(std::make_pair(V, Result));
return Result;
}
if (isa<Argument>(V)) {
Result.insert(V);
+ Visited.insert(std::make_pair(V, Result));
return Result;
}
// We don't include others like constants because those won't lead to any
// chance of folding of conditions (eg two bit checks merged into one check)
// after CHR.
+ Visited.insert(std::make_pair(V, Result));
return Result; // empty
}
@@ -1078,12 +1086,13 @@ static bool shouldSplit(Instruction *InsertPoint,
if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
// Use std::set as DenseSet doesn't work with set_intersection.
std::set<Value *> PrevBases, Bases;
+ DenseMap<Value *, std::set<Value *>> Visited;
for (Value *V : PrevConditionValues) {
- std::set<Value *> BaseValues = getBaseValues(V, DT);
+ std::set<Value *> BaseValues = getBaseValues(V, DT, Visited);
PrevBases.insert(BaseValues.begin(), BaseValues.end());
}
for (Value *V : ConditionValues) {
- std::set<Value *> BaseValues = getBaseValues(V, DT);
+ std::set<Value *> BaseValues = getBaseValues(V, DT, Visited);
Bases.insert(BaseValues.begin(), BaseValues.end());
}
CHR_DEBUG(
@@ -1538,10 +1547,7 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
}
if (auto *SI = dyn_cast<SelectInst>(U)) {
// Swap operands
- Value *TrueValue = SI->getTrueValue();
- Value *FalseValue = SI->getFalseValue();
- SI->setTrueValue(FalseValue);
- SI->setFalseValue(TrueValue);
+ SI->swapValues();
SI->swapProfMetadata();
if (Scope->TrueBiasedSelects.count(SI)) {
assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
@@ -2073,7 +2079,7 @@ bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
- llvm::make_unique<OptimizationRemarkEmitter>(&F);
+ std::make_unique<OptimizationRemarkEmitter>(&F);
return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
}
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 2279c1bcb6a8..c0353cba0b2f 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -1212,7 +1212,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
return DFS.ZeroShadow;
case 1: {
LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos);
- LI->setAlignment(ShadowAlign);
+ LI->setAlignment(MaybeAlign(ShadowAlign));
return LI;
}
case 2: {
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 59950ffc4e9a..ac6082441eae 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -86,7 +86,9 @@ public:
ReversedVersion[3] = Options.Version[0];
ReversedVersion[4] = '\0';
}
- bool runOnModule(Module &M, const TargetLibraryInfo &TLI);
+ bool
+ runOnModule(Module &M,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
private:
// Create the .gcno files for the Module based on DebugInfo.
@@ -102,9 +104,9 @@ private:
std::vector<Regex> &Regexes);
// Get pointers to the functions in the runtime library.
- FunctionCallee getStartFileFunc();
- FunctionCallee getEmitFunctionFunc();
- FunctionCallee getEmitArcsFunc();
+ FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI);
+ FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI);
+ FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI);
FunctionCallee getSummaryInfoFunc();
FunctionCallee getEndFileFunc();
@@ -127,7 +129,7 @@ private:
SmallVector<uint32_t, 4> FileChecksums;
Module *M;
- const TargetLibraryInfo *TLI;
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
LLVMContext *Ctx;
SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
std::vector<Regex> FilterRe;
@@ -147,8 +149,9 @@ public:
StringRef getPassName() const override { return "GCOV Profiler"; }
bool runOnModule(Module &M) override {
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- return Profiler.runOnModule(M, TLI);
+ return Profiler.runOnModule(M, [this](Function &F) -> TargetLibraryInfo & {
+ return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ });
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -555,9 +558,10 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
return CurPath.str();
}
-bool GCOVProfiler::runOnModule(Module &M, const TargetLibraryInfo &TLI) {
+bool GCOVProfiler::runOnModule(
+ Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
this->M = &M;
- this->TLI = &TLI;
+ this->GetTLI = std::move(GetTLI);
Ctx = &M.getContext();
AddFlushBeforeForkAndExec();
@@ -574,9 +578,12 @@ PreservedAnalyses GCOVProfilerPass::run(Module &M,
ModuleAnalysisManager &AM) {
GCOVProfiler Profiler(GCOVOpts);
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
- if (!Profiler.runOnModule(M, TLI))
+ if (!Profiler.runOnModule(M, [&](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ }))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -624,6 +631,7 @@ static bool shouldKeepInEntry(BasicBlock::iterator It) {
void GCOVProfiler::AddFlushBeforeForkAndExec() {
SmallVector<Instruction *, 2> ForkAndExecs;
for (auto &F : M->functions()) {
+ auto *TLI = &GetTLI(F);
for (auto &I : instructions(F)) {
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
if (Function *Callee = CI->getCalledFunction()) {
@@ -669,7 +677,8 @@ void GCOVProfiler::emitProfileNotes() {
continue;
std::error_code EC;
- raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC, sys::fs::F_None);
+ raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
+ sys::fs::OF_None);
if (EC) {
Ctx->emitError(Twine("failed to open coverage notes file for writing: ") +
EC.message());
@@ -695,7 +704,7 @@ void GCOVProfiler::emitProfileNotes() {
++It;
EntryBlock.splitBasicBlock(It);
- Funcs.push_back(make_unique<GCOVFunction>(SP, &F, &out, FunctionIdent++,
+ Funcs.push_back(std::make_unique<GCOVFunction>(SP, &F, &out, FunctionIdent++,
Options.UseCfgChecksum,
Options.ExitBlockBeforeBody));
GCOVFunction &Func = *Funcs.back();
@@ -873,7 +882,7 @@ bool GCOVProfiler::emitProfileArcs() {
return Result;
}
-FunctionCallee GCOVProfiler::getStartFileFunc() {
+FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
Type *Args[] = {
Type::getInt8PtrTy(*Ctx), // const char *orig_filename
Type::getInt8PtrTy(*Ctx), // const char version[4]
@@ -887,7 +896,7 @@ FunctionCallee GCOVProfiler::getStartFileFunc() {
return Res;
}
-FunctionCallee GCOVProfiler::getEmitFunctionFunc() {
+FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
Type *Args[] = {
Type::getInt32Ty(*Ctx), // uint32_t ident
Type::getInt8PtrTy(*Ctx), // const char *function_name
@@ -906,7 +915,7 @@ FunctionCallee GCOVProfiler::getEmitFunctionFunc() {
return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
}
-FunctionCallee GCOVProfiler::getEmitArcsFunc() {
+FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
Type *Args[] = {
Type::getInt32Ty(*Ctx), // uint32_t num_counters
Type::getInt64PtrTy(*Ctx), // uint64_t *counters
@@ -943,9 +952,11 @@ Function *GCOVProfiler::insertCounterWriteout(
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
IRBuilder<> Builder(BB);
- FunctionCallee StartFile = getStartFileFunc();
- FunctionCallee EmitFunction = getEmitFunctionFunc();
- FunctionCallee EmitArcs = getEmitArcsFunc();
+ auto *TLI = &GetTLI(*WriteoutF);
+
+ FunctionCallee StartFile = getStartFileFunc(TLI);
+ FunctionCallee EmitFunction = getEmitFunctionFunc(TLI);
+ FunctionCallee EmitArcs = getEmitArcsFunc(TLI);
FunctionCallee SummaryInfo = getSummaryInfoFunc();
FunctionCallee EndFile = getEndFileFunc();
diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 90a9f4955a4b..f87132ee4758 100644
--- a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -12,10 +12,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -52,7 +54,10 @@ using namespace llvm;
#define DEBUG_TYPE "hwasan"
static const char *const kHwasanModuleCtorName = "hwasan.module_ctor";
+static const char *const kHwasanNoteName = "hwasan.note";
static const char *const kHwasanInitName = "__hwasan_init";
+static const char *const kHwasanPersonalityThunkName =
+ "__hwasan_personality_thunk";
static const char *const kHwasanShadowMemoryDynamicAddress =
"__hwasan_shadow_memory_dynamic_address";
@@ -112,6 +117,9 @@ static cl::opt<bool> ClGenerateTagsWithCalls(
cl::desc("generate new tags with runtime library calls"), cl::Hidden,
cl::init(false));
+static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<int> ClMatchAllTag(
"hwasan-match-all-tag",
cl::desc("don't report bad accesses via pointers with this tag"),
@@ -155,8 +163,18 @@ static cl::opt<bool>
static cl::opt<bool>
ClInstrumentLandingPads("hwasan-instrument-landing-pads",
- cl::desc("instrument landing pads"), cl::Hidden,
- cl::init(true));
+ cl::desc("instrument landing pads"), cl::Hidden,
+ cl::init(false), cl::ZeroOrMore);
+
+static cl::opt<bool> ClUseShortGranules(
+ "hwasan-use-short-granules",
+ cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
+ cl::init(false), cl::ZeroOrMore);
+
+static cl::opt<bool> ClInstrumentPersonalityFunctions(
+ "hwasan-instrument-personality-functions",
+ cl::desc("instrument personality functions"), cl::Hidden, cl::init(false),
+ cl::ZeroOrMore);
static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
cl::desc("inline all checks"),
@@ -169,16 +187,16 @@ namespace {
class HWAddressSanitizer {
public:
explicit HWAddressSanitizer(Module &M, bool CompileKernel = false,
- bool Recover = false) {
+ bool Recover = false) : M(M) {
this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ?
ClEnableKhwasan : CompileKernel;
- initializeModule(M);
+ initializeModule();
}
bool sanitizeFunction(Function &F);
- void initializeModule(Module &M);
+ void initializeModule();
void initializeCallbacks(Module &M);
@@ -216,9 +234,14 @@ public:
Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
+ void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
+ void instrumentGlobals();
+
+ void instrumentPersonalityFunctions();
+
private:
LLVMContext *C;
- std::string CurModuleUniqueId;
+ Module &M;
Triple TargetTriple;
FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
FunctionCallee HWAsanHandleVfork;
@@ -238,17 +261,21 @@ private:
bool InTls;
void init(Triple &TargetTriple);
- unsigned getAllocaAlignment() const { return 1U << Scale; }
+ unsigned getObjectAlignment() const { return 1U << Scale; }
};
ShadowMapping Mapping;
+ Type *VoidTy = Type::getVoidTy(M.getContext());
Type *IntptrTy;
Type *Int8PtrTy;
Type *Int8Ty;
Type *Int32Ty;
+ Type *Int64Ty = Type::getInt64Ty(M.getContext());
bool CompileKernel;
bool Recover;
+ bool UseShortGranules;
+ bool InstrumentLandingPads;
Function *HwasanCtorFunction;
@@ -278,7 +305,7 @@ public:
StringRef getPassName() const override { return "HWAddressSanitizer"; }
bool doInitialization(Module &M) override {
- HWASan = llvm::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover);
+ HWASan = std::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover);
return true;
}
@@ -333,7 +360,7 @@ PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
/// Module-level initialization.
///
/// inserts a call to __hwasan_init to the module's constructor list.
-void HWAddressSanitizer::initializeModule(Module &M) {
+void HWAddressSanitizer::initializeModule() {
LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
auto &DL = M.getDataLayout();
@@ -342,7 +369,6 @@ void HWAddressSanitizer::initializeModule(Module &M) {
Mapping.init(TargetTriple);
C = &(M.getContext());
- CurModuleUniqueId = getUniqueModuleId(&M);
IRBuilder<> IRB(*C);
IntptrTy = IRB.getIntPtrTy(DL);
Int8PtrTy = IRB.getInt8PtrTy();
@@ -350,6 +376,21 @@ void HWAddressSanitizer::initializeModule(Module &M) {
Int32Ty = IRB.getInt32Ty();
HwasanCtorFunction = nullptr;
+
+ // Older versions of Android do not have the required runtime support for
+ // short granules, global or personality function instrumentation. On other
+ // platforms we currently require using the latest version of the runtime.
+ bool NewRuntime =
+ !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
+
+ UseShortGranules =
+ ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
+
+ // If we don't have personality function support, fall back to landing pads.
+ InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
+ ? ClInstrumentLandingPads
+ : !NewRuntime;
+
if (!CompileKernel) {
std::tie(HwasanCtorFunction, std::ignore) =
getOrCreateSanitizerCtorAndInitFunctions(
@@ -363,6 +404,18 @@ void HWAddressSanitizer::initializeModule(Module &M) {
Ctor->setComdat(CtorComdat);
appendToGlobalCtors(M, Ctor, 0, Ctor);
});
+
+ bool InstrumentGlobals =
+ ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
+ if (InstrumentGlobals)
+ instrumentGlobals();
+
+ bool InstrumentPersonalityFunctions =
+ ClInstrumentPersonalityFunctions.getNumOccurrences()
+ ? ClInstrumentPersonalityFunctions
+ : NewRuntime;
+ if (InstrumentPersonalityFunctions)
+ instrumentPersonalityFunctions();
}
if (!TargetTriple.isAndroid()) {
@@ -456,7 +509,7 @@ Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I,
unsigned *Alignment,
Value **MaybeMask) {
// Skip memory accesses inserted by another instrumentation.
- if (I->getMetadata("nosanitize")) return nullptr;
+ if (I->hasMetadata("nosanitize")) return nullptr;
// Do not instrument the load fetching the dynamic shadow address.
if (LocalDynamicShadow == I)
@@ -564,9 +617,11 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
TargetTriple.isOSBinFormatELF() && !Recover) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
- IRB.CreateCall(
- Intrinsic::getDeclaration(M, Intrinsic::hwasan_check_memaccess),
- {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
+ IRB.CreateCall(Intrinsic::getDeclaration(
+ M, UseShortGranules
+ ? Intrinsic::hwasan_check_memaccess_shortgranules
+ : Intrinsic::hwasan_check_memaccess),
+ {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
return;
}
@@ -718,7 +773,9 @@ static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
Value *Tag, size_t Size) {
- size_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
+ size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+ if (!UseShortGranules)
+ Size = AlignedSize;
Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
if (ClInstrumentWithCalls) {
@@ -738,7 +795,7 @@ bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
if (Size != AlignedSize) {
IRB.CreateStore(
- ConstantInt::get(Int8Ty, Size % Mapping.getAllocaAlignment()),
+ ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
@@ -778,8 +835,9 @@ Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
// FIXME: use addressofreturnaddress (but implement it in aarch64 backend
// first).
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- auto GetStackPointerFn =
- Intrinsic::getDeclaration(M, Intrinsic::frameaddress);
+ auto GetStackPointerFn = Intrinsic::getDeclaration(
+ M, Intrinsic::frameaddress,
+ IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
Value *StackPointer = IRB.CreateCall(
GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())});
@@ -912,8 +970,10 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
PC = readRegister(IRB, "pc");
else
PC = IRB.CreatePtrToInt(F, IntptrTy);
- auto GetStackPointerFn =
- Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress);
+ Module *M = F->getParent();
+ auto GetStackPointerFn = Intrinsic::getDeclaration(
+ M, Intrinsic::frameaddress,
+ IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
Value *SP = IRB.CreatePtrToInt(
IRB.CreateCall(GetStackPointerFn,
{Constant::getNullValue(IRB.getInt32Ty())}),
@@ -999,11 +1059,8 @@ bool HWAddressSanitizer::instrumentStack(
AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
Replacement->setName(Name + ".hwasan");
- for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) {
- Use &U = *UI++;
- if (U.getUser() != AILong)
- U.set(Replacement);
- }
+ AI->replaceUsesWithIf(Replacement,
+ [AILong](Use &U) { return U.getUser() != AILong; });
for (auto *DDI : AllocaDeclareMap.lookup(AI)) {
DIExpression *OldExpr = DDI->getExpression();
@@ -1020,7 +1077,7 @@ bool HWAddressSanitizer::instrumentStack(
// Re-tag alloca memory with the special UAR tag.
Value *Tag = getUARTag(IRB, StackTag);
- tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getAllocaAlignment()));
+ tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment()));
}
}
@@ -1074,7 +1131,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress()))
AllocaDeclareMap[Alloca].push_back(DDI);
- if (ClInstrumentLandingPads && isa<LandingPadInst>(Inst))
+ if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
LandingPadVec.push_back(&Inst);
Value *MaybeMask = nullptr;
@@ -1093,6 +1150,13 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
if (!LandingPadVec.empty())
instrumentLandingPads(LandingPadVec);
+ if (AllocasToInstrument.empty() && F.hasPersonalityFn() &&
+ F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
+ // __hwasan_personality_thunk is a no-op for functions without an
+ // instrumented stack, so we can drop it.
+ F.setPersonalityFn(nullptr);
+ }
+
if (AllocasToInstrument.empty() && ToInstrument.empty())
return false;
@@ -1118,8 +1182,9 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
for (AllocaInst *AI : AllocasToInstrument) {
uint64_t Size = getAllocaSizeInBytes(*AI);
- uint64_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
- AI->setAlignment(std::max(AI->getAlignment(), 16u));
+ uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+ AI->setAlignment(
+ MaybeAlign(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
if (Size != AlignedSize) {
Type *AllocatedType = AI->getAllocatedType();
if (AI->isArrayAllocation()) {
@@ -1132,7 +1197,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
auto *NewAI = new AllocaInst(
TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
NewAI->takeName(AI);
- NewAI->setAlignment(AI->getAlignment());
+ NewAI->setAlignment(MaybeAlign(AI->getAlignment()));
NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
NewAI->setSwiftError(AI->isSwiftError());
NewAI->copyMetadata(*AI);
@@ -1179,6 +1244,257 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
return Changed;
}
+void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
+ Constant *Initializer = GV->getInitializer();
+ uint64_t SizeInBytes =
+ M.getDataLayout().getTypeAllocSize(Initializer->getType());
+ uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
+ if (SizeInBytes != NewSize) {
+ // Pad the initializer out to the next multiple of 16 bytes and add the
+ // required short granule tag.
+ std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
+ Init.back() = Tag;
+ Constant *Padding = ConstantDataArray::get(*C, Init);
+ Initializer = ConstantStruct::getAnon({Initializer, Padding});
+ }
+
+ auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
+ GlobalValue::ExternalLinkage, Initializer,
+ GV->getName() + ".hwasan");
+ NewGV->copyAttributesFrom(GV);
+ NewGV->setLinkage(GlobalValue::PrivateLinkage);
+ NewGV->copyMetadata(GV, 0);
+ NewGV->setAlignment(
+ MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment())));
+
+ // It is invalid to ICF two globals that have different tags. In the case
+ // where the size of the global is a multiple of the tag granularity the
+ // contents of the globals may be the same but the tags (i.e. symbol values)
+ // may be different, and the symbols are not considered during ICF. In the
+ // case where the size is not a multiple of the granularity, the short granule
+ // tags would discriminate two globals with different tags, but there would
+ // otherwise be nothing stopping such a global from being incorrectly ICF'd
+ // with an uninstrumented (i.e. tag 0) global that happened to have the short
+ // granule tag in the last byte.
+ NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+
+ // Descriptor format (assuming little-endian):
+ // bytes 0-3: relative address of global
+ // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
+ // it isn't, we create multiple descriptors)
+ // byte 7: tag
+ auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
+ const uint64_t MaxDescriptorSize = 0xfffff0;
+ for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
+ DescriptorPos += MaxDescriptorSize) {
+ auto *Descriptor =
+ new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
+ nullptr, GV->getName() + ".hwasan.descriptor");
+ auto *GVRelPtr = ConstantExpr::getTrunc(
+ ConstantExpr::getAdd(
+ ConstantExpr::getSub(
+ ConstantExpr::getPtrToInt(NewGV, Int64Ty),
+ ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
+ ConstantInt::get(Int64Ty, DescriptorPos)),
+ Int32Ty);
+ uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
+ auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
+ Descriptor->setComdat(NewGV->getComdat());
+ Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
+ Descriptor->setSection("hwasan_globals");
+ Descriptor->setMetadata(LLVMContext::MD_associated,
+ MDNode::get(*C, ValueAsMetadata::get(NewGV)));
+ appendToCompilerUsed(M, Descriptor);
+ }
+
+ Constant *Aliasee = ConstantExpr::getIntToPtr(
+ ConstantExpr::getAdd(
+ ConstantExpr::getPtrToInt(NewGV, Int64Ty),
+ ConstantInt::get(Int64Ty, uint64_t(Tag) << kPointerTagShift)),
+ GV->getType());
+ auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
+ GV->getLinkage(), "", Aliasee, &M);
+ Alias->setVisibility(GV->getVisibility());
+ Alias->takeName(GV);
+ GV->replaceAllUsesWith(Alias);
+ GV->eraseFromParent();
+}
+
+void HWAddressSanitizer::instrumentGlobals() {
+ // Start by creating a note that contains pointers to the list of global
+ // descriptors. Adding a note to the output file will cause the linker to
+ // create a PT_NOTE program header pointing to the note that we can use to
+ // find the descriptor list starting from the program headers. A function
+ // provided by the runtime initializes the shadow memory for the globals by
+ // accessing the descriptor list via the note. The dynamic loader needs to
+ // call this function whenever a library is loaded.
+ //
+ // The reason why we use a note for this instead of a more conventional
+ // approach of having a global constructor pass a descriptor list pointer to
+ // the runtime is because of an order of initialization problem. With
+ // constructors we can encounter the following problematic scenario:
+ //
+ // 1) library A depends on library B and also interposes one of B's symbols
+ // 2) B's constructors are called before A's (as required for correctness)
+ // 3) during construction, B accesses one of its "own" globals (actually
+ // interposed by A) and triggers a HWASAN failure due to the initialization
+ // for A not having happened yet
+ //
+ // Even without interposition it is possible to run into similar situations in
+ // cases where two libraries mutually depend on each other.
+ //
+ // We only need one note per binary, so put everything for the note in a
+ // comdat.
+ Comdat *NoteComdat = M.getOrInsertComdat(kHwasanNoteName);
+
+ Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
+ auto Start =
+ new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
+ nullptr, "__start_hwasan_globals");
+ Start->setVisibility(GlobalValue::HiddenVisibility);
+ Start->setDSOLocal(true);
+ auto Stop =
+ new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
+ nullptr, "__stop_hwasan_globals");
+ Stop->setVisibility(GlobalValue::HiddenVisibility);
+ Stop->setDSOLocal(true);
+
+ // Null-terminated so actually 8 bytes, which are required in order to align
+ // the note properly.
+ auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
+
+ auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
+ Int32Ty, Int32Ty);
+ auto *Note =
+ new GlobalVariable(M, NoteTy, /*isConstantGlobal=*/true,
+ GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
+ Note->setSection(".note.hwasan.globals");
+ Note->setComdat(NoteComdat);
+ Note->setAlignment(Align(4));
+ Note->setDSOLocal(true);
+
+ // The pointers in the note need to be relative so that the note ends up being
+ // placed in rodata, which is the standard location for notes.
+ auto CreateRelPtr = [&](Constant *Ptr) {
+ return ConstantExpr::getTrunc(
+ ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
+ ConstantExpr::getPtrToInt(Note, Int64Ty)),
+ Int32Ty);
+ };
+ Note->setInitializer(ConstantStruct::getAnon(
+ {ConstantInt::get(Int32Ty, 8), // n_namesz
+ ConstantInt::get(Int32Ty, 8), // n_descsz
+ ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
+ Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
+ appendToCompilerUsed(M, Note);
+
+ // Create a zero-length global in hwasan_globals so that the linker will
+ // always create start and stop symbols.
+ auto Dummy = new GlobalVariable(
+ M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
+ Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
+ Dummy->setSection("hwasan_globals");
+ Dummy->setComdat(NoteComdat);
+ Dummy->setMetadata(LLVMContext::MD_associated,
+ MDNode::get(*C, ValueAsMetadata::get(Note)));
+ appendToCompilerUsed(M, Dummy);
+
+ std::vector<GlobalVariable *> Globals;
+ for (GlobalVariable &GV : M.globals()) {
+ if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
+ GV.isThreadLocal())
+ continue;
+
+ // Common symbols can't have aliases point to them, so they can't be tagged.
+ if (GV.hasCommonLinkage())
+ continue;
+
+ // Globals with custom sections may be used in __start_/__stop_ enumeration,
+ // which would be broken both by adding tags and potentially by the extra
+ // padding/alignment that we insert.
+ if (GV.hasSection())
+ continue;
+
+ Globals.push_back(&GV);
+ }
+
+ MD5 Hasher;
+ Hasher.update(M.getSourceFileName());
+ MD5::MD5Result Hash;
+ Hasher.final(Hash);
+ uint8_t Tag = Hash[0];
+
+ for (GlobalVariable *GV : Globals) {
+ // Skip tag 0 in order to avoid collisions with untagged memory.
+ if (Tag == 0)
+ Tag = 1;
+ instrumentGlobal(GV, Tag++);
+ }
+}
+
+void HWAddressSanitizer::instrumentPersonalityFunctions() {
+ // We need to untag stack frames as we unwind past them. That is the job of
+ // the personality function wrapper, which either wraps an existing
+ // personality function or acts as a personality function on its own. Each
+ // function that has a personality function or that can be unwound past has
+ // its personality function changed to a thunk that calls the personality
+ // function wrapper in the runtime.
+ MapVector<Constant *, std::vector<Function *>> PersonalityFns;
+ for (Function &F : M) {
+ if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
+ continue;
+
+ if (F.hasPersonalityFn()) {
+ PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
+ } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
+ PersonalityFns[nullptr].push_back(&F);
+ }
+ }
+
+ if (PersonalityFns.empty())
+ return;
+
+ FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
+ "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
+ Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
+ FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
+ FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
+
+ for (auto &P : PersonalityFns) {
+ std::string ThunkName = kHwasanPersonalityThunkName;
+ if (P.first)
+ ThunkName += ("." + P.first->getName()).str();
+ FunctionType *ThunkFnTy = FunctionType::get(
+ Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
+ bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
+ cast<GlobalValue>(P.first)->hasLocalLinkage());
+ auto *ThunkFn = Function::Create(ThunkFnTy,
+ IsLocal ? GlobalValue::InternalLinkage
+ : GlobalValue::LinkOnceODRLinkage,
+ ThunkName, &M);
+ if (!IsLocal) {
+ ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
+ ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
+ }
+
+ auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
+ IRBuilder<> IRB(BB);
+ CallInst *WrapperCall = IRB.CreateCall(
+ HwasanPersonalityWrapper,
+ {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
+ ThunkFn->getArg(3), ThunkFn->getArg(4),
+ P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
+ : Constant::getNullValue(Int8PtrTy),
+ IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
+ IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
+ WrapperCall->setTailCall();
+ IRB.CreateRet(WrapperCall);
+
+ for (Function *F : P.second)
+ F->setPersonalityFn(ThunkFn);
+ }
+}
+
void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) {
Scale = kDefaultShadowScale;
if (ClMappingOffset.getNumOccurrences() > 0) {
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index c7371f567ff3..74d6e76eceb6 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -403,7 +403,7 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
AM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
} else {
- OwnedORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
+ OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
ORE = OwnedORE.get();
}
diff --git a/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/lib/Transforms/Instrumentation/InstrOrderFile.cpp
index a2c1ddfd279e..93d3a8a14d5c 100644
--- a/lib/Transforms/Instrumentation/InstrOrderFile.cpp
+++ b/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -100,7 +100,8 @@ public:
if (!ClOrderFileWriteMapping.empty()) {
std::lock_guard<std::mutex> LogLock(MappingMutex);
std::error_code EC;
- llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC, llvm::sys::fs::F_Append);
+ llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC,
+ llvm::sys::fs::OF_Append);
if (EC) {
report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping +
" to save mapping file for order file instrumentation\n");
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 63c2b8078967..1f092a5f3103 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -157,7 +157,10 @@ public:
}
bool runOnModule(Module &M) override {
- return InstrProf.run(M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ return InstrProf.run(M, GetTLI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -370,8 +373,12 @@ private:
} // end anonymous namespace
PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
- if (!run(M, TLI))
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+ if (!run(M, GetTLI))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -441,7 +448,7 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
std::unique_ptr<BlockFrequencyInfo> BFI;
if (Options.UseBFIInPromotion) {
std::unique_ptr<BranchProbabilityInfo> BPI;
- BPI.reset(new BranchProbabilityInfo(*F, LI, TLI));
+ BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
}
@@ -482,9 +489,10 @@ static bool containsProfilingIntrinsics(Module &M) {
return false;
}
-bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
+bool InstrProfiling::run(
+ Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
this->M = &M;
- this->TLI = &TLI;
+ this->GetTLI = std::move(GetTLI);
NamesVar = nullptr;
NamesSize = 0;
ProfileDataMap.clear();
@@ -601,6 +609,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
bool IsRange = (Ind->getValueKind()->getZExtValue() ==
llvm::InstrProfValueKind::IPVK_MemOPSize);
CallInst *Call = nullptr;
+ auto *TLI = &GetTLI(*Ind->getFunction());
if (!IsRange) {
Value *Args[3] = {Ind->getTargetValue(),
Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
@@ -731,9 +740,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
PD = It->second;
}
- // Match the linkage and visibility of the name global, except on COFF, where
- // the linkage must be local and consequentially the visibility must be
- // default.
+ // Match the linkage and visibility of the name global. COFF supports using
+ // comdats with internal symbols, so do that if we can.
Function *Fn = Inc->getParent()->getParent();
GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
@@ -749,19 +757,21 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
// new comdat group for the counters and profiling data. If we use the comdat
// of the parent function, that will result in relocations against discarded
// sections.
- Comdat *Cmdt = nullptr;
- GlobalValue::LinkageTypes CounterLinkage = Linkage;
- if (needsComdatForCounter(*Fn, *M)) {
- StringRef CmdtPrefix = getInstrProfComdatPrefix();
+ bool NeedComdat = needsComdatForCounter(*Fn, *M);
+ if (NeedComdat) {
if (TT.isOSBinFormatCOFF()) {
- // For COFF, the comdat group name must be the name of a symbol in the
- // group. Use the counter variable name, and upgrade its linkage to
- // something externally visible, like linkonce_odr.
- CmdtPrefix = getInstrProfCountersVarPrefix();
- CounterLinkage = GlobalValue::LinkOnceODRLinkage;
+ // For COFF, put the counters, data, and values each into their own
+ // comdats. We can't use a group because the Visual C++ linker will
+ // report duplicate symbol errors if there are multiple external symbols
+ // with the same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE.
+ Linkage = GlobalValue::LinkOnceODRLinkage;
+ Visibility = GlobalValue::HiddenVisibility;
}
- Cmdt = M->getOrInsertComdat(getVarName(Inc, CmdtPrefix));
}
+ auto MaybeSetComdat = [=](GlobalVariable *GV) {
+ if (NeedComdat)
+ GV->setComdat(M->getOrInsertComdat(GV->getName()));
+ };
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
LLVMContext &Ctx = M->getContext();
@@ -775,9 +785,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
CounterPtr->setVisibility(Visibility);
CounterPtr->setSection(
getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
- CounterPtr->setAlignment(8);
- CounterPtr->setComdat(Cmdt);
- CounterPtr->setLinkage(CounterLinkage);
+ CounterPtr->setAlignment(Align(8));
+ MaybeSetComdat(CounterPtr);
+ CounterPtr->setLinkage(Linkage);
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
// Allocate statically the array of pointers to value profile nodes for
@@ -797,8 +807,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
ValuesVar->setVisibility(Visibility);
ValuesVar->setSection(
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
- ValuesVar->setAlignment(8);
- ValuesVar->setComdat(Cmdt);
+ ValuesVar->setAlignment(Align(8));
+ MaybeSetComdat(ValuesVar);
ValuesPtrExpr =
ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
}
@@ -830,8 +840,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
getVarName(Inc, getInstrProfDataVarPrefix()));
Data->setVisibility(Visibility);
Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
- Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
- Data->setComdat(Cmdt);
+ Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
+ MaybeSetComdat(Data);
+ Data->setLinkage(Linkage);
PD.RegionCounters = CounterPtr;
PD.DataVar = Data;
@@ -920,7 +931,7 @@ void InstrProfiling::emitNameData() {
// On COFF, it's important to reduce the alignment down to 1 to prevent the
// linker from inserting padding before the start of the names section or
// between names entries.
- NamesVar->setAlignment(1);
+ NamesVar->setAlignment(Align::None());
UsedVars.push_back(NamesVar);
for (auto *NamePtr : ReferencedNames)
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index f56a1bd91b89..a6c2c9b464b6 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -68,7 +68,8 @@ GlobalVariable *llvm::createPrivateGlobalForString(Module &M, StringRef Str,
GlobalValue::PrivateLinkage, StrConst, NamePrefix);
if (AllowMerging)
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
+ GV->setAlignment(Align::None()); // Strings may not be merged w/o setting
+ // alignment explicitly.
return GV;
}
@@ -116,7 +117,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeMemorySanitizerLegacyPassPass(Registry);
initializeHWAddressSanitizerLegacyPassPass(Registry);
initializeThreadSanitizerLegacyPassPass(Registry);
- initializeSanitizerCoverageModulePass(Registry);
+ initializeModuleSanitizerCoverageLegacyPassPass(Registry);
initializeDataFlowSanitizerPass(Registry);
}
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b25cbed1bb02..69c9020e060b 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -462,16 +462,9 @@ namespace {
/// the module.
class MemorySanitizer {
public:
- MemorySanitizer(Module &M, MemorySanitizerOptions Options) {
- this->CompileKernel =
- ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : Options.Kernel;
- if (ClTrackOrigins.getNumOccurrences() > 0)
- this->TrackOrigins = ClTrackOrigins;
- else
- this->TrackOrigins = this->CompileKernel ? 2 : Options.TrackOrigins;
- this->Recover = ClKeepGoing.getNumOccurrences() > 0
- ? ClKeepGoing
- : (this->CompileKernel | Options.Recover);
+ MemorySanitizer(Module &M, MemorySanitizerOptions Options)
+ : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
+ Recover(Options.Recover) {
initializeModule(M);
}
@@ -594,10 +587,26 @@ private:
/// An empty volatile inline asm that prevents callback merge.
InlineAsm *EmptyAsm;
-
- Function *MsanCtorFunction;
};
+void insertModuleCtor(Module &M) {
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kMsanModuleCtorName, kMsanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) {
+ if (!ClWithComdat) {
+ appendToGlobalCtors(M, Ctor, 0);
+ return;
+ }
+ Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
+ Ctor->setComdat(MsanCtorComdat);
+ appendToGlobalCtors(M, Ctor, 0, Ctor);
+ });
+}
+
/// A legacy function pass for msan instrumentation.
///
/// Instruments functions to detect unitialized reads.
@@ -615,7 +624,7 @@ struct MemorySanitizerLegacyPass : public FunctionPass {
bool runOnFunction(Function &F) override {
return MSan->sanitizeFunction(
- F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
+ F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
}
bool doInitialization(Module &M) override;
@@ -623,8 +632,17 @@ struct MemorySanitizerLegacyPass : public FunctionPass {
MemorySanitizerOptions Options;
};
+template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
+ return (Opt.getNumOccurrences() > 0) ? Opt : Default;
+}
+
} // end anonymous namespace
+MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K)
+ : Kernel(getOptOrDefault(ClEnableKmsan, K)),
+ TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
+ Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {}
+
PreservedAnalyses MemorySanitizerPass::run(Function &F,
FunctionAnalysisManager &FAM) {
MemorySanitizer Msan(*F.getParent(), Options);
@@ -633,6 +651,14 @@ PreservedAnalyses MemorySanitizerPass::run(Function &F,
return PreservedAnalyses::all();
}
+PreservedAnalyses MemorySanitizerPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (Options.Kernel)
+ return PreservedAnalyses::all();
+ insertModuleCtor(M);
+ return PreservedAnalyses::none();
+}
+
char MemorySanitizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
@@ -918,23 +944,6 @@ void MemorySanitizer::initializeModule(Module &M) {
OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
if (!CompileKernel) {
- std::tie(MsanCtorFunction, std::ignore) =
- getOrCreateSanitizerCtorAndInitFunctions(
- M, kMsanModuleCtorName, kMsanInitName,
- /*InitArgTypes=*/{},
- /*InitArgs=*/{},
- // This callback is invoked when the functions are created the first
- // time. Hook them into the global ctors list in that case:
- [&](Function *Ctor, FunctionCallee) {
- if (!ClWithComdat) {
- appendToGlobalCtors(M, Ctor, 0);
- return;
- }
- Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
- Ctor->setComdat(MsanCtorComdat);
- appendToGlobalCtors(M, Ctor, 0, Ctor);
- });
-
if (TrackOrigins)
M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
return new GlobalVariable(
@@ -952,6 +961,8 @@ void MemorySanitizer::initializeModule(Module &M) {
}
bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
+ if (!Options.Kernel)
+ insertModuleCtor(M);
MSan.emplace(M, Options);
return true;
}
@@ -2562,6 +2573,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return false;
}
+ void handleInvariantGroup(IntrinsicInst &I) {
+ setShadow(&I, getShadow(&I, 0));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
void handleLifetimeStart(IntrinsicInst &I) {
if (!PoisonStack)
return;
@@ -2993,6 +3009,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::lifetime_start:
handleLifetimeStart(I);
break;
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
+ handleInvariantGroup(I);
+ break;
case Intrinsic::bswap:
handleBswap(I);
break;
@@ -3627,10 +3647,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
int NumRetOutputs = 0;
int NumOutputs = 0;
- Type *RetTy = dyn_cast<Value>(CB)->getType();
+ Type *RetTy = cast<Value>(CB)->getType();
if (!RetTy->isVoidTy()) {
// Register outputs are returned via the CallInst return value.
- StructType *ST = dyn_cast_or_null<StructType>(RetTy);
+ auto *ST = dyn_cast<StructType>(RetTy);
if (ST)
NumRetOutputs = ST->getNumElements();
else
@@ -3667,7 +3687,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// corresponding CallInst has nO+nI+1 operands (the last operand is the
// function to be called).
const DataLayout &DL = F.getParent()->getDataLayout();
- CallBase *CB = dyn_cast<CallBase>(&I);
+ CallBase *CB = cast<CallBase>(&I);
IRBuilder<> IRB(&I);
InlineAsm *IA = cast<InlineAsm>(CB->getCalledValue());
int OutputArgs = getNumOutputArgs(IA, CB);
@@ -4567,8 +4587,9 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
}
bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
- if (!CompileKernel && (&F == MsanCtorFunction))
+ if (!CompileKernel && F.getName() == kMsanModuleCtorName)
return false;
+
MemorySanitizerVisitor Visitor(F, *this, TLI);
// Clear out readonly/readnone attributes.
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 6fec3c9c79ee..ca1bb62389e9 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -48,6 +48,7 @@
//===----------------------------------------------------------------------===//
#include "CFGMST.h"
+#include "ValueProfileCollector.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
@@ -61,7 +62,6 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -96,6 +96,7 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CRC.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DOTGraphTraits.h"
@@ -103,11 +104,11 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/JamCRC.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -120,6 +121,7 @@
using namespace llvm;
using ProfileCount = Function::ProfileCount;
+using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
#define DEBUG_TYPE "pgo-instrumentation"
@@ -286,6 +288,11 @@ static std::string getBranchCondString(Instruction *TI) {
return result;
}
+static const char *ValueProfKindDescr[] = {
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
namespace {
/// The select instruction visitor plays three roles specified
@@ -348,50 +355,6 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
unsigned getNumOfSelectInsts() const { return NSIs; }
};
-/// Instruction Visitor class to visit memory intrinsic calls.
-struct MemIntrinsicVisitor : public InstVisitor<MemIntrinsicVisitor> {
- Function &F;
- unsigned NMemIs = 0; // Number of memIntrinsics instrumented.
- VisitMode Mode = VM_counting; // Visiting mode.
- unsigned CurCtrId = 0; // Current counter index.
- unsigned TotalNumCtrs = 0; // Total number of counters
- GlobalVariable *FuncNameVar = nullptr;
- uint64_t FuncHash = 0;
- PGOUseFunc *UseFunc = nullptr;
- std::vector<Instruction *> Candidates;
-
- MemIntrinsicVisitor(Function &Func) : F(Func) {}
-
- void countMemIntrinsics(Function &Func) {
- NMemIs = 0;
- Mode = VM_counting;
- visit(Func);
- }
-
- void instrumentMemIntrinsics(Function &Func, unsigned TotalNC,
- GlobalVariable *FNV, uint64_t FHash) {
- Mode = VM_instrument;
- TotalNumCtrs = TotalNC;
- FuncHash = FHash;
- FuncNameVar = FNV;
- visit(Func);
- }
-
- std::vector<Instruction *> findMemIntrinsics(Function &Func) {
- Candidates.clear();
- Mode = VM_annotate;
- visit(Func);
- return Candidates;
- }
-
- // Visit the IR stream and annotate all mem intrinsic call instructions.
- void instrumentOneMemIntrinsic(MemIntrinsic &MI);
-
- // Visit \p MI instruction and perform tasks according to visit mode.
- void visitMemIntrinsic(MemIntrinsic &SI);
-
- unsigned getNumOfMemIntrinsics() const { return NMemIs; }
-};
class PGOInstrumentationGenLegacyPass : public ModulePass {
public:
@@ -563,13 +526,14 @@ private:
// A map that stores the Comdat group in function F.
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
+ ValueProfileCollector VPC;
+
void computeCFGHash();
void renameComdatFunction();
public:
- std::vector<std::vector<Instruction *>> ValueSites;
+ std::vector<std::vector<VPCandidateInfo>> ValueSites;
SelectInstVisitor SIVisitor;
- MemIntrinsicVisitor MIVisitor;
std::string FuncName;
GlobalVariable *FuncNameVar;
@@ -604,23 +568,21 @@ public:
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
- : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers),
- ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func),
- MST(F, BPI, BFI) {
+ : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func),
+ ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) {
// This should be done before CFG hash computation.
SIVisitor.countSelects(Func);
- MIVisitor.countMemIntrinsics(Func);
+ ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
if (!IsCS) {
NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
- NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
+ NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
NumOfPGOBB += MST.BBInfos.size();
- ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
+ ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
} else {
NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
- NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
+ NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
NumOfCSPGOBB += MST.BBInfos.size();
}
- ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
FuncName = getPGOFuncName(F);
computeCFGHash();
@@ -647,7 +609,7 @@ public:
// value of each BB in the CFG. The higher 32 bits record the number of edges.
template <class Edge, class BBInfo>
void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
- std::vector<char> Indexes;
+ std::vector<uint8_t> Indexes;
JamCRC JC;
for (auto &BB : F) {
const Instruction *TI = BB.getTerminator();
@@ -658,7 +620,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
continue;
uint32_t Index = BI->Index;
for (int J = 0; J < 4; J++)
- Indexes.push_back((char)(Index >> (J * 8)));
+ Indexes.push_back((uint8_t)(Index >> (J * 8)));
}
}
JC.update(Indexes);
@@ -874,28 +836,36 @@ static void instrumentOneFunc(
if (DisableValueProfiling)
return;
- unsigned NumIndirectCalls = 0;
- for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) {
- CallSite CS(I);
- Value *Callee = CS.getCalledValue();
- LLVM_DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = "
- << NumIndirectCalls << "\n");
- IRBuilder<> Builder(I);
- assert(Builder.GetInsertPoint() != I->getParent()->end() &&
- "Cannot get the Instrumentation point");
- Builder.CreateCall(
- Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
- {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
- Builder.getInt64(FuncInfo.FunctionHash),
- Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()),
- Builder.getInt32(IPVK_IndirectCallTarget),
- Builder.getInt32(NumIndirectCalls++)});
- }
- NumOfPGOICall += NumIndirectCalls;
+ NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
- // Now instrument memop intrinsic calls.
- FuncInfo.MIVisitor.instrumentMemIntrinsics(
- F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash);
+ // For each VP Kind, walk the VP candidates and instrument each one.
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
+ unsigned SiteIndex = 0;
+ if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
+ continue;
+
+ for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
+ LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
+ << " site: CallSite Index = " << SiteIndex << "\n");
+
+ IRBuilder<> Builder(Cand.InsertPt);
+ assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
+ "Cannot get the Instrumentation point");
+
+ Value *ToProfile = nullptr;
+ if (Cand.V->getType()->isIntegerTy())
+ ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
+ else if (Cand.V->getType()->isPointerTy())
+ ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
+ assert(ToProfile && "value profiling Value is of unexpected type");
+
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
+ {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
+ Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
+ Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)});
+ }
+ } // IPVK_First <= Kind <= IPVK_Last
}
namespace {
@@ -984,9 +954,9 @@ class PGOUseFunc {
public:
PGOUseFunc(Function &Func, Module *Modu,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
- BranchProbabilityInfo *BPI = nullptr,
- BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false)
- : F(Func), M(Modu), BFI(BFIin),
+ BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
+ ProfileSummaryInfo *PSI, bool IsCS)
+ : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
FreqAttr(FFA_Normal), IsCS(IsCS) {}
@@ -1041,6 +1011,7 @@ private:
Function &F;
Module *M;
BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
// This member stores the shared information with class PGOGenFunc.
FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
@@ -1078,15 +1049,9 @@ private:
// FIXME: This function should be removed once the functionality in
// the inliner is implemented.
void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
- if (ProgramMaxCount == 0)
- return;
- // Threshold of the hot functions.
- const BranchProbability HotFunctionThreshold(1, 100);
- // Threshold of the cold functions.
- const BranchProbability ColdFunctionThreshold(2, 10000);
- if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount))
+ if (PSI->isHotCount(EntryCount))
FreqAttr = FFA_Hot;
- else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount))
+ else if (PSI->isColdCount(MaxCount))
FreqAttr = FFA_Cold;
}
};
@@ -1433,43 +1398,6 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
llvm_unreachable("Unknown visiting mode");
}
-void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {
- Module *M = F.getParent();
- IRBuilder<> Builder(&MI);
- Type *Int64Ty = Builder.getInt64Ty();
- Type *I8PtrTy = Builder.getInt8PtrTy();
- Value *Length = MI.getLength();
- assert(!isa<ConstantInt>(Length));
- Builder.CreateCall(
- Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
- {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
- Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty),
- Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)});
- ++CurCtrId;
-}
-
-void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) {
- if (!PGOInstrMemOP)
- return;
- Value *Length = MI.getLength();
- // Not instrument constant length calls.
- if (dyn_cast<ConstantInt>(Length))
- return;
-
- switch (Mode) {
- case VM_counting:
- NMemIs++;
- return;
- case VM_instrument:
- instrumentOneMemIntrinsic(MI);
- return;
- case VM_annotate:
- Candidates.push_back(&MI);
- return;
- }
- llvm_unreachable("Unknown visiting mode");
-}
-
// Traverse all valuesites and annotate the instructions for all value kind.
void PGOUseFunc::annotateValueSites() {
if (DisableValueProfiling)
@@ -1482,11 +1410,6 @@ void PGOUseFunc::annotateValueSites() {
annotateValueSites(Kind);
}
-static const char *ValueProfKindDescr[] = {
-#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
-#include "llvm/ProfileData/InstrProfData.inc"
-};
-
// Annotate the instructions for a specific value kind.
void PGOUseFunc::annotateValueSites(uint32_t Kind) {
assert(Kind <= IPVK_Last);
@@ -1505,11 +1428,11 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {
return;
}
- for (auto &I : ValueSites) {
+ for (VPCandidateInfo &I : ValueSites) {
LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
<< "): Index = " << ValueSiteIndex << " out of "
<< NumValueSites << "\n");
- annotateValueSite(*M, *I, ProfileRecord,
+ annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
: MaxNumAnnotations);
@@ -1595,7 +1518,8 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
static bool annotateAllFunctions(
Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
- function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
+ function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
+ ProfileSummaryInfo *PSI, bool IsCS) {
LLVM_DEBUG(dbgs() << "Read in profile counters: ");
auto &Ctx = M.getContext();
// Read the counter array from file.
@@ -1626,6 +1550,13 @@ static bool annotateAllFunctions(
return false;
}
+ // Add the profile summary (read from the header of the indexed summary) here
+ // so that we can use it below when reading counters (which checks if the
+ // function should be marked with a cold or inlinehint attribute).
+ M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
+ IsCS ? ProfileSummary::PSK_CSInstr
+ : ProfileSummary::PSK_Instr);
+
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
std::vector<Function *> HotFunctions;
@@ -1638,7 +1569,7 @@ static bool annotateAllFunctions(
// Split indirectbr critical edges here before computing the MST rather than
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
- PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS);
+ PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS);
bool AllZeros = false;
if (!Func.readCounters(PGOReader.get(), AllZeros))
continue;
@@ -1662,9 +1593,9 @@ static bool annotateAllFunctions(
F.getName().equals(ViewBlockFreqFuncName))) {
LoopInfo LI{DominatorTree(F)};
std::unique_ptr<BranchProbabilityInfo> NewBPI =
- llvm::make_unique<BranchProbabilityInfo>(F, LI);
+ std::make_unique<BranchProbabilityInfo>(F, LI);
std::unique_ptr<BlockFrequencyInfo> NewBFI =
- llvm::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
+ std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
if (PGOViewCounts == PGOVCT_Graph)
NewBFI->view();
else if (PGOViewCounts == PGOVCT_Text) {
@@ -1686,9 +1617,6 @@ static bool annotateAllFunctions(
}
}
}
- M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
- IsCS ? ProfileSummary::PSK_CSInstr
- : ProfileSummary::PSK_Instr);
// Set function hotness attribute from the profile.
// We have to apply these attributes at the end because their presence
@@ -1730,8 +1658,10 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
return &FAM.getResult<BlockFrequencyAnalysis>(F);
};
+ auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
- LookupBPI, LookupBFI, IsCS))
+ LookupBPI, LookupBFI, PSI, IsCS))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -1748,7 +1678,8 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI,
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI,
IsCS);
}
@@ -1776,6 +1707,9 @@ void llvm::setProfMetadata(Module *M, Instruction *TI,
: Weights) {
dbgs() << W << " ";
} dbgs() << "\n";);
+
+ misexpect::verifyMisExpect(TI, Weights, TI->getContext());
+
TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
if (EmitBranchProbability) {
std::string BrCondStr = getBranchCondString(TI);
diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 188f95b4676b..9f81bb16d0a7 100644
--- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -138,7 +138,7 @@ public:
OptimizationRemarkEmitter &ORE, DominatorTree *DT)
: Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) {
ValueDataArray =
- llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
+ std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
// Get the MemOPSize range information from option MemOPSizeRange,
getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
PreciseRangeLast);
@@ -374,8 +374,8 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
Instruction *NewInst = MI->clone();
// Fix the argument.
- MemIntrinsic * MemI = dyn_cast<MemIntrinsic>(NewInst);
- IntegerType *SizeType = dyn_cast<IntegerType>(MemI->getLength()->getType());
+ auto *MemI = cast<MemIntrinsic>(NewInst);
+ auto *SizeType = dyn_cast<IntegerType>(MemI->getLength()->getType());
assert(SizeType && "Expected integer type size argument.");
ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
MemI->setLength(CaseSizeId);
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index ca0cb4bdbe84..f8fa9cad03b8 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/EHPersonalities.h"
@@ -176,24 +177,21 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
return Options;
}
-class SanitizerCoverageModule : public ModulePass {
+using DomTreeCallback = function_ref<const DominatorTree *(Function &F)>;
+using PostDomTreeCallback =
+ function_ref<const PostDominatorTree *(Function &F)>;
+
+class ModuleSanitizerCoverage {
public:
- SanitizerCoverageModule(
+ ModuleSanitizerCoverage(
const SanitizerCoverageOptions &Options = SanitizerCoverageOptions())
- : ModulePass(ID), Options(OverrideFromCL(Options)) {
- initializeSanitizerCoverageModulePass(*PassRegistry::getPassRegistry());
- }
- bool runOnModule(Module &M) override;
- bool runOnFunction(Function &F);
- static char ID; // Pass identification, replacement for typeid
- StringRef getPassName() const override { return "SanitizerCoverageModule"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- }
+ : Options(OverrideFromCL(Options)) {}
+ bool instrumentModule(Module &M, DomTreeCallback DTCallback,
+ PostDomTreeCallback PDTCallback);
private:
+ void instrumentFunction(Function &F, DomTreeCallback DTCallback,
+ PostDomTreeCallback PDTCallback);
void InjectCoverageForIndirectCalls(Function &F,
ArrayRef<Instruction *> IndirCalls);
void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
@@ -252,10 +250,57 @@ private:
SanitizerCoverageOptions Options;
};
+class ModuleSanitizerCoverageLegacyPass : public ModulePass {
+public:
+ ModuleSanitizerCoverageLegacyPass(
+ const SanitizerCoverageOptions &Options = SanitizerCoverageOptions())
+ : ModulePass(ID), Options(Options) {
+ initializeModuleSanitizerCoverageLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override {
+ ModuleSanitizerCoverage ModuleSancov(Options);
+ auto DTCallback = [this](Function &F) -> const DominatorTree * {
+ return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ };
+ auto PDTCallback = [this](Function &F) -> const PostDominatorTree * {
+ return &this->getAnalysis<PostDominatorTreeWrapperPass>(F)
+ .getPostDomTree();
+ };
+ return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback);
+ }
+
+ static char ID; // Pass identification, replacement for typeid
+ StringRef getPassName() const override { return "ModuleSanitizerCoverage"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ }
+
+private:
+ SanitizerCoverageOptions Options;
+};
+
} // namespace
+PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ ModuleSanitizerCoverage ModuleSancov(Options);
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto DTCallback = [&FAM](Function &F) -> const DominatorTree * {
+ return &FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+ auto PDTCallback = [&FAM](Function &F) -> const PostDominatorTree * {
+ return &FAM.getResult<PostDominatorTreeAnalysis>(F);
+ };
+ if (ModuleSancov.instrumentModule(M, DTCallback, PDTCallback))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
std::pair<Value *, Value *>
-SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section,
+ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section,
Type *Ty) {
GlobalVariable *SecStart =
new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr,
@@ -278,7 +323,7 @@ SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section,
return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr);
}
-Function *SanitizerCoverageModule::CreateInitCallsForSections(
+Function *ModuleSanitizerCoverage::CreateInitCallsForSections(
Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty,
const char *Section) {
auto SecStartEnd = CreateSecStartEnd(M, Section, Ty);
@@ -310,7 +355,8 @@ Function *SanitizerCoverageModule::CreateInitCallsForSections(
return CtorFunc;
}
-bool SanitizerCoverageModule::runOnModule(Module &M) {
+bool ModuleSanitizerCoverage::instrumentModule(
+ Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
return false;
C = &(M.getContext());
@@ -403,7 +449,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy);
for (auto &F : M)
- runOnFunction(F);
+ instrumentFunction(F, DTCallback, PDTCallback);
Function *Ctor = nullptr;
@@ -518,29 +564,30 @@ static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT,
return true;
}
-bool SanitizerCoverageModule::runOnFunction(Function &F) {
+void ModuleSanitizerCoverage::instrumentFunction(
+ Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
if (F.empty())
- return false;
+ return;
if (F.getName().find(".module_ctor") != std::string::npos)
- return false; // Should not instrument sanitizer init functions.
+ return; // Should not instrument sanitizer init functions.
if (F.getName().startswith("__sanitizer_"))
- return false; // Don't instrument __sanitizer_* callbacks.
+ return; // Don't instrument __sanitizer_* callbacks.
// Don't touch available_externally functions, their actual body is elewhere.
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
- return false;
+ return;
// Don't instrument MSVC CRT configuration helpers. They may run before normal
// initialization.
if (F.getName() == "__local_stdio_printf_options" ||
F.getName() == "__local_stdio_scanf_options")
- return false;
+ return;
if (isa<UnreachableInst>(F.getEntryBlock().getTerminator()))
- return false;
+ return;
// Don't instrument functions using SEH for now. Splitting basic blocks like
// we do for coverage breaks WinEHPrepare.
// FIXME: Remove this when SEH no longer uses landingpad pattern matching.
if (F.hasPersonalityFn() &&
isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
- return false;
+ return;
if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
SmallVector<Instruction *, 8> IndirCalls;
@@ -550,10 +597,8 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
SmallVector<BinaryOperator *, 8> DivTraceTargets;
SmallVector<GetElementPtrInst *, 8> GepTraceTargets;
- const DominatorTree *DT =
- &getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
- const PostDominatorTree *PDT =
- &getAnalysis<PostDominatorTreeWrapperPass>(F).getPostDomTree();
+ const DominatorTree *DT = DTCallback(F);
+ const PostDominatorTree *PDT = PDTCallback(F);
bool IsLeafFunc = true;
for (auto &BB : F) {
@@ -593,10 +638,9 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
InjectTraceForSwitch(F, SwitchTraceTargets);
InjectTraceForDiv(F, DivTraceTargets);
InjectTraceForGep(F, GepTraceTargets);
- return true;
}
-GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection(
+GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
size_t NumElements, Function &F, Type *Ty, const char *Section) {
ArrayType *ArrayTy = ArrayType::get(Ty, NumElements);
auto Array = new GlobalVariable(
@@ -608,8 +652,9 @@ GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection(
GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
Array->setComdat(Comdat);
Array->setSection(getSectionName(Section));
- Array->setAlignment(Ty->isPointerTy() ? DL->getPointerSize()
- : Ty->getPrimitiveSizeInBits() / 8);
+ Array->setAlignment(Align(Ty->isPointerTy()
+ ? DL->getPointerSize()
+ : Ty->getPrimitiveSizeInBits() / 8));
GlobalsToAppendToUsed.push_back(Array);
GlobalsToAppendToCompilerUsed.push_back(Array);
MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
@@ -619,7 +664,7 @@ GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection(
}
GlobalVariable *
-SanitizerCoverageModule::CreatePCArray(Function &F,
+ModuleSanitizerCoverage::CreatePCArray(Function &F,
ArrayRef<BasicBlock *> AllBlocks) {
size_t N = AllBlocks.size();
assert(N);
@@ -646,7 +691,7 @@ SanitizerCoverageModule::CreatePCArray(Function &F,
return PCArray;
}
-void SanitizerCoverageModule::CreateFunctionLocalArrays(
+void ModuleSanitizerCoverage::CreateFunctionLocalArrays(
Function &F, ArrayRef<BasicBlock *> AllBlocks) {
if (Options.TracePCGuard)
FunctionGuardArray = CreateFunctionLocalArrayInSection(
@@ -660,7 +705,7 @@ void SanitizerCoverageModule::CreateFunctionLocalArrays(
FunctionPCsArray = CreatePCArray(F, AllBlocks);
}
-bool SanitizerCoverageModule::InjectCoverage(Function &F,
+bool ModuleSanitizerCoverage::InjectCoverage(Function &F,
ArrayRef<BasicBlock *> AllBlocks,
bool IsLeafFunc) {
if (AllBlocks.empty()) return false;
@@ -677,7 +722,7 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F,
// The cache is used to speed up recording the caller-callee pairs.
// The address of the caller is passed implicitly via caller PC.
// CacheSize is encoded in the name of the run-time function.
-void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
+void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls(
Function &F, ArrayRef<Instruction *> IndirCalls) {
if (IndirCalls.empty())
return;
@@ -696,7 +741,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
// __sanitizer_cov_trace_switch(CondValue,
// {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... })
-void SanitizerCoverageModule::InjectTraceForSwitch(
+void ModuleSanitizerCoverage::InjectTraceForSwitch(
Function &, ArrayRef<Instruction *> SwitchTraceTargets) {
for (auto I : SwitchTraceTargets) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
@@ -735,7 +780,7 @@ void SanitizerCoverageModule::InjectTraceForSwitch(
}
}
-void SanitizerCoverageModule::InjectTraceForDiv(
+void ModuleSanitizerCoverage::InjectTraceForDiv(
Function &, ArrayRef<BinaryOperator *> DivTraceTargets) {
for (auto BO : DivTraceTargets) {
IRBuilder<> IRB(BO);
@@ -753,7 +798,7 @@ void SanitizerCoverageModule::InjectTraceForDiv(
}
}
-void SanitizerCoverageModule::InjectTraceForGep(
+void ModuleSanitizerCoverage::InjectTraceForGep(
Function &, ArrayRef<GetElementPtrInst *> GepTraceTargets) {
for (auto GEP : GepTraceTargets) {
IRBuilder<> IRB(GEP);
@@ -764,7 +809,7 @@ void SanitizerCoverageModule::InjectTraceForGep(
}
}
-void SanitizerCoverageModule::InjectTraceForCmp(
+void ModuleSanitizerCoverage::InjectTraceForCmp(
Function &, ArrayRef<Instruction *> CmpTraceTargets) {
for (auto I : CmpTraceTargets) {
if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) {
@@ -799,7 +844,7 @@ void SanitizerCoverageModule::InjectTraceForCmp(
}
}
-void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
+void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
size_t Idx,
bool IsLeafFunc) {
BasicBlock::iterator IP = BB.getFirstInsertionPt();
@@ -842,8 +887,10 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
}
if (Options.StackDepth && IsEntryBB && !IsLeafFunc) {
// Check stack depth. If it's the deepest so far, record it.
- Function *GetFrameAddr =
- Intrinsic::getDeclaration(F.getParent(), Intrinsic::frameaddress);
+ Module *M = F.getParent();
+ Function *GetFrameAddr = Intrinsic::getDeclaration(
+ M, Intrinsic::frameaddress,
+ IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
auto FrameAddrPtr =
IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)});
auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy);
@@ -858,7 +905,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
}
std::string
-SanitizerCoverageModule::getSectionName(const std::string &Section) const {
+ModuleSanitizerCoverage::getSectionName(const std::string &Section) const {
if (TargetTriple.isOSBinFormatCOFF()) {
if (Section == SanCovCountersSectionName)
return ".SCOV$CM";
@@ -872,32 +919,29 @@ SanitizerCoverageModule::getSectionName(const std::string &Section) const {
}
std::string
-SanitizerCoverageModule::getSectionStart(const std::string &Section) const {
+ModuleSanitizerCoverage::getSectionStart(const std::string &Section) const {
if (TargetTriple.isOSBinFormatMachO())
return "\1section$start$__DATA$__" + Section;
return "__start___" + Section;
}
std::string
-SanitizerCoverageModule::getSectionEnd(const std::string &Section) const {
+ModuleSanitizerCoverage::getSectionEnd(const std::string &Section) const {
if (TargetTriple.isOSBinFormatMachO())
return "\1section$end$__DATA$__" + Section;
return "__stop___" + Section;
}
-
-char SanitizerCoverageModule::ID = 0;
-INITIALIZE_PASS_BEGIN(SanitizerCoverageModule, "sancov",
- "SanitizerCoverage: TODO."
- "ModulePass",
- false, false)
+char ModuleSanitizerCoverageLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov",
+ "Pass for instrumenting coverage on functions", false,
+ false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SanitizerCoverageModule, "sancov",
- "SanitizerCoverage: TODO."
- "ModulePass",
- false, false)
-ModulePass *llvm::createSanitizerCoverageModulePass(
+INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov",
+ "Pass for instrumenting coverage on functions", false,
+ false)
+ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
const SanitizerCoverageOptions &Options) {
- return new SanitizerCoverageModule(Options);
+ return new ModuleSanitizerCoverageLegacyPass(Options);
}
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 5be13fa745cb..ac274a155a80 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -92,11 +92,10 @@ namespace {
/// ensures the __tsan_init function is in the list of global constructors for
/// the module.
struct ThreadSanitizer {
- ThreadSanitizer(Module &M);
bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
private:
- void initializeCallbacks(Module &M);
+ void initialize(Module &M);
bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
bool instrumentAtomic(Instruction *I, const DataLayout &DL);
bool instrumentMemIntrinsic(Instruction *I);
@@ -108,8 +107,6 @@ private:
void InsertRuntimeIgnores(Function &F);
Type *IntptrTy;
- IntegerType *OrdTy;
- // Callbacks to run-time library are computed in doInitialization.
FunctionCallee TsanFuncEntry;
FunctionCallee TsanFuncExit;
FunctionCallee TsanIgnoreBegin;
@@ -130,7 +127,6 @@ private:
FunctionCallee TsanVptrUpdate;
FunctionCallee TsanVptrLoad;
FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
- Function *TsanCtorFunction;
};
struct ThreadSanitizerLegacyPass : FunctionPass {
@@ -143,16 +139,32 @@ struct ThreadSanitizerLegacyPass : FunctionPass {
private:
Optional<ThreadSanitizer> TSan;
};
+
+void insertModuleCtor(Module &M) {
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); });
+}
+
} // namespace
PreservedAnalyses ThreadSanitizerPass::run(Function &F,
FunctionAnalysisManager &FAM) {
- ThreadSanitizer TSan(*F.getParent());
+ ThreadSanitizer TSan;
if (TSan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
+PreservedAnalyses ThreadSanitizerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ insertModuleCtor(M);
+ return PreservedAnalyses::none();
+}
+
char ThreadSanitizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan",
"ThreadSanitizer: detects data races.", false, false)
@@ -169,12 +181,13 @@ void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool ThreadSanitizerLegacyPass::doInitialization(Module &M) {
- TSan.emplace(M);
+ insertModuleCtor(M);
+ TSan.emplace();
return true;
}
bool ThreadSanitizerLegacyPass::runOnFunction(Function &F) {
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
TSan->sanitizeFunction(F, TLI);
return true;
}
@@ -183,7 +196,10 @@ FunctionPass *llvm::createThreadSanitizerLegacyPassPass() {
return new ThreadSanitizerLegacyPass();
}
-void ThreadSanitizer::initializeCallbacks(Module &M) {
+void ThreadSanitizer::initialize(Module &M) {
+ const DataLayout &DL = M.getDataLayout();
+ IntptrTy = DL.getIntPtrType(M.getContext());
+
IRBuilder<> IRB(M.getContext());
AttributeList Attr;
Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex,
@@ -197,7 +213,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
IRB.getVoidTy());
TsanIgnoreEnd =
M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy());
- OrdTy = IRB.getInt32Ty();
+ IntegerType *OrdTy = IRB.getInt32Ty();
for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
const unsigned ByteSize = 1U << i;
const unsigned BitSize = ByteSize * 8;
@@ -280,20 +296,6 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy);
}
-ThreadSanitizer::ThreadSanitizer(Module &M) {
- const DataLayout &DL = M.getDataLayout();
- IntptrTy = DL.getIntPtrType(M.getContext());
- std::tie(TsanCtorFunction, std::ignore) =
- getOrCreateSanitizerCtorAndInitFunctions(
- M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
- /*InitArgs=*/{},
- // This callback is invoked when the functions are created the first
- // time. Hook them into the global ctors list in that case:
- [&](Function *Ctor, FunctionCallee) {
- appendToGlobalCtors(M, Ctor, 0);
- });
-}
-
static bool isVtableAccess(Instruction *I) {
if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa))
return Tag->isTBAAVtableAccess();
@@ -436,9 +438,9 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
const TargetLibraryInfo &TLI) {
// This is required to prevent instrumenting call to __tsan_init from within
// the module constructor.
- if (&F == TsanCtorFunction)
+ if (F.getName() == kTsanModuleCtorName)
return false;
- initializeCallbacks(*F.getParent());
+ initialize(*F.getParent());
SmallVector<Instruction*, 8> AllLoadsAndStores;
SmallVector<Instruction*, 8> LocalLoadsAndStores;
SmallVector<Instruction*, 8> AtomicAccesses;
diff --git a/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
new file mode 100644
index 000000000000..604726d4f40f
--- /dev/null
+++ b/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
@@ -0,0 +1,78 @@
+//===- ValueProfileCollector.cpp - determine what to value profile --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation of the ValueProfileCollector via ValueProfileCollectorImpl
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueProfilePlugins.inc"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
+
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+/// A plugin-based class that takes an arbitrary number of Plugin types.
+/// Each plugin type must satisfy the following API:
+/// 1) the constructor must take a `Function &f`. Typically, the plugin would
+/// scan the function looking for candidates.
+/// 2) contain a member function with the following signature and name:
+/// void run(std::vector<CandidateInfo> &Candidates);
+/// such that the plugin would append its result into the vector parameter.
+///
+/// Plugins are defined in ValueProfilePlugins.inc
+template <class... Ts> class PluginChain;
+
+/// The type PluginChainFinal is the final chain of plugins that will be used by
+/// ValueProfileCollectorImpl.
+using PluginChainFinal = PluginChain<VP_PLUGIN_LIST>;
+
+template <> class PluginChain<> {
+public:
+ PluginChain(Function &F) {}
+ void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) {}
+};
+
+template <class PluginT, class... Ts>
+class PluginChain<PluginT, Ts...> : public PluginChain<Ts...> {
+ PluginT Plugin;
+ using Base = PluginChain<Ts...>;
+
+public:
+ PluginChain(Function &F) : PluginChain<Ts...>(F), Plugin(F) {}
+
+ void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) {
+ if (K == PluginT::Kind)
+ Plugin.run(Candidates);
+ Base::get(K, Candidates);
+ }
+};
+
+} // end anonymous namespace
+
+/// ValueProfileCollectorImpl inherits the API of PluginChainFinal.
+class ValueProfileCollector::ValueProfileCollectorImpl : public PluginChainFinal {
+public:
+ using PluginChainFinal::PluginChainFinal;
+};
+
+ValueProfileCollector::ValueProfileCollector(Function &F)
+ : PImpl(new ValueProfileCollectorImpl(F)) {}
+
+ValueProfileCollector::~ValueProfileCollector() = default;
+
+std::vector<CandidateInfo>
+ValueProfileCollector::get(InstrProfValueKind Kind) const {
+ std::vector<CandidateInfo> Result;
+ PImpl->get(Kind, Result);
+ return Result;
+}
diff --git a/lib/Transforms/Instrumentation/ValueProfileCollector.h b/lib/Transforms/Instrumentation/ValueProfileCollector.h
new file mode 100644
index 000000000000..ff883c8d0c77
--- /dev/null
+++ b/lib/Transforms/Instrumentation/ValueProfileCollector.h
@@ -0,0 +1,79 @@
+//===- ValueProfileCollector.h - determine what to value profile ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a utility class, ValueProfileCollector, that is used to
+// determine what kind of llvm::Value's are worth value-profiling, at which
+// point in the program, and which instruction holds the Value Profile metadata.
+// Currently, the only users of this utility is the PGOInstrumentation[Gen|Use]
+// passes.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
+#define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
+
+namespace llvm {
+
+/// Utility analysis that determines what values are worth profiling.
+/// The actual logic is inside the ValueProfileCollectorImpl, whose job is to
+/// populate the Candidates vector.
+///
+/// Value profiling an expression means to track the values that this expression
+/// takes at runtime and the frequency of each value.
+/// It is important to distinguish between two sets of value profiles for a
+/// particular expression:
+/// 1) The set of values at the point of evaluation.
+/// 2) The set of values at the point of use.
+/// In some cases, the two sets are identical, but it's not unusual for the two
+/// to differ.
+///
+/// To elaborate more, consider this C code, and focus on the expression `nn`:
+/// void foo(int nn, bool b) {
+/// if (b) memcpy(x, y, nn);
+/// }
+/// The point of evaluation can be as early as the start of the function, and
+/// let's say the value profile for `nn` is:
+/// total=100; (value,freq) set = {(8,10), (32,50)}
+/// The point of use is right before we call memcpy, and since we execute the
+/// memcpy conditionally, the value profile of `nn` can be:
+/// total=15; (value,freq) set = {(8,10), (4,5)}
+///
+/// For this reason, a plugin is responsible for computing the insertion point
+/// for each value to be profiled. The `CandidateInfo` structure encapsulates
+/// all the information needed for each value profile site.
+class ValueProfileCollector {
+public:
+ struct CandidateInfo {
+ Value *V; // The value to profile.
+ Instruction *InsertPt; // Insert the VP lib call before this instr.
+ Instruction *AnnotatedInst; // Where metadata is attached.
+ };
+
+ ValueProfileCollector(Function &Fn);
+ ValueProfileCollector(ValueProfileCollector &&) = delete;
+ ValueProfileCollector &operator=(ValueProfileCollector &&) = delete;
+
+ ValueProfileCollector(const ValueProfileCollector &) = delete;
+ ValueProfileCollector &operator=(const ValueProfileCollector &) = delete;
+ ~ValueProfileCollector();
+
+ /// returns a list of value profiling candidates of the given kind
+ std::vector<CandidateInfo> get(InstrProfValueKind Kind) const;
+
+private:
+ class ValueProfileCollectorImpl;
+ std::unique_ptr<ValueProfileCollectorImpl> PImpl;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
new file mode 100644
index 000000000000..4cc4c6c848c3
--- /dev/null
+++ b/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
@@ -0,0 +1,75 @@
+//=== ValueProfilePlugins.inc - set of plugins used by ValueProfileCollector =//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a set of plugin classes used in ValueProfileCollectorImpl.
+// Each plugin is responsible for collecting Value Profiling candidates for a
+// particular optimization.
+// Each plugin must satisfy the interface described in ValueProfileCollector.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueProfileCollector.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
+#include "llvm/IR/InstVisitor.h"
+
+using namespace llvm;
+using CandidateInfo = ValueProfileCollector::CandidateInfo;
+
+///--------------------------- MemIntrinsicPlugin ------------------------------
+class MemIntrinsicPlugin : public InstVisitor<MemIntrinsicPlugin> {
+ Function &F;
+ std::vector<CandidateInfo> *Candidates;
+
+public:
+ static constexpr InstrProfValueKind Kind = IPVK_MemOPSize;
+
+ MemIntrinsicPlugin(Function &Fn) : F(Fn), Candidates(nullptr) {}
+
+ void run(std::vector<CandidateInfo> &Cs) {
+ Candidates = &Cs;
+ visit(F);
+ Candidates = nullptr;
+ }
+ void visitMemIntrinsic(MemIntrinsic &MI) {
+ Value *Length = MI.getLength();
+ // Not instrument constant length calls.
+ if (dyn_cast<ConstantInt>(Length))
+ return;
+
+ Instruction *InsertPt = &MI;
+ Instruction *AnnotatedInst = &MI;
+ Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst});
+ }
+};
+
+///------------------------ IndirectCallPromotionPlugin ------------------------
+class IndirectCallPromotionPlugin {
+ Function &F;
+
+public:
+ static constexpr InstrProfValueKind Kind = IPVK_IndirectCallTarget;
+
+ IndirectCallPromotionPlugin(Function &Fn) : F(Fn) {}
+
+ void run(std::vector<CandidateInfo> &Candidates) {
+ std::vector<Instruction *> Result = findIndirectCalls(F);
+ for (Instruction *I : Result) {
+ Value *Callee = CallSite(I).getCalledValue();
+ Instruction *InsertPt = I;
+ Instruction *AnnotatedInst = I;
+ Candidates.emplace_back(CandidateInfo{Callee, InsertPt, AnnotatedInst});
+ }
+ }
+};
+
+///----------------------- Registration of the plugins -------------------------
+/// For now, registering a plugin with the ValueProfileCollector is done by
+/// adding the plugin type to the VP_PLUGIN_LIST macro.
+#define VP_PLUGIN_LIST \
+ MemIntrinsicPlugin, \
+ IndirectCallPromotionPlugin
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
index 3243481dee0d..26dd416d6184 100644
--- a/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -275,6 +275,10 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
} else {
InsertAfter = std::next(Inst->getIterator());
}
+
+ if (InsertAfter != BB->end())
+ InsertAfter = skipDebugIntrinsics(InsertAfter);
+
InsertReverseInsertPt(&*InsertAfter);
};
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index de9a62e88c27..0e9f03a06061 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -93,9 +93,7 @@ static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV,
const SCEV *AlignSCEV,
ScalarEvolution *SE) {
// DiffUnits = Diff % int64_t(Alignment)
- const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV);
- const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV);
- const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV);
+ const SCEV *DiffUnitsSCEV = SE->getURemExpr(DiffSCEV, AlignSCEV);
LLVM_DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is "
<< *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n");
@@ -323,7 +321,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
LI->getPointerOperand(), SE);
if (NewAlignment > LI->getAlignment()) {
- LI->setAlignment(NewAlignment);
+ LI->setAlignment(MaybeAlign(NewAlignment));
++NumLoadAlignChanged;
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
@@ -331,7 +329,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
SI->getPointerOperand(), SE);
if (NewAlignment > SI->getAlignment()) {
- SI->setAlignment(NewAlignment);
+ SI->setAlignment(MaybeAlign(NewAlignment));
++NumStoreAlignChanged;
}
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 3519b000a33f..c3fba923104f 100644
--- a/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -562,7 +562,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
if (skipFunction(F))
return false;
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return doCallSiteSplitting(F, TLI, TTI, DT);
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 98243a23f1ef..9f340afbf7c2 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -204,7 +204,7 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
/// set found in \p BBs.
static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
BasicBlock *Entry,
- SmallPtrSet<BasicBlock *, 8> &BBs) {
+ SetVector<BasicBlock *> &BBs) {
assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
// Nodes on the current path to the root.
SmallPtrSet<BasicBlock *, 8> Path;
@@ -257,7 +257,7 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
// Visit Orders in bottom-up order.
using InsertPtsCostPair =
- std::pair<SmallPtrSet<BasicBlock *, 16>, BlockFrequency>;
+ std::pair<SetVector<BasicBlock *>, BlockFrequency>;
// InsertPtsMap is a map from a BB to the best insertion points for the
// subtree of BB (subtree not including the BB itself).
@@ -266,7 +266,7 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) {
BasicBlock *Node = *RIt;
bool NodeInBBs = BBs.count(Node);
- SmallPtrSet<BasicBlock *, 16> &InsertPts = InsertPtsMap[Node].first;
+ auto &InsertPts = InsertPtsMap[Node].first;
BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second;
// Return the optimal insert points in BBs.
@@ -283,7 +283,7 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock();
// Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child
// will update its parent's ParentInsertPts and ParentPtsFreq.
- SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first;
+ auto &ParentInsertPts = InsertPtsMap[Parent].first;
BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;
// Choose to insert in Node or in subtree of Node.
// Don't hoist to EHPad because we may not find a proper place to insert
@@ -305,12 +305,12 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
}
/// Find an insertion point that dominates all uses.
-SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint(
+SetVector<Instruction *> ConstantHoistingPass::findConstantInsertionPoint(
const ConstantInfo &ConstInfo) const {
assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
// Collect all basic blocks.
- SmallPtrSet<BasicBlock *, 8> BBs;
- SmallPtrSet<Instruction *, 8> InsertPts;
+ SetVector<BasicBlock *> BBs;
+ SetVector<Instruction *> InsertPts;
for (auto const &RCI : ConstInfo.RebasedConstants)
for (auto const &U : RCI.Uses)
BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
@@ -333,15 +333,13 @@ SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint(
while (BBs.size() >= 2) {
BasicBlock *BB, *BB1, *BB2;
- BB1 = *BBs.begin();
- BB2 = *std::next(BBs.begin());
+ BB1 = BBs.pop_back_val();
+ BB2 = BBs.pop_back_val();
BB = DT->findNearestCommonDominator(BB1, BB2);
if (BB == Entry) {
InsertPts.insert(&Entry->front());
return InsertPts;
}
- BBs.erase(BB1);
- BBs.erase(BB2);
BBs.insert(BB);
}
assert((BBs.size() == 1) && "Expected only one element.");
@@ -403,7 +401,7 @@ void ConstantHoistingPass::collectConstantCandidates(
return;
// Get offset from the base GV.
- PointerType *GVPtrTy = dyn_cast<PointerType>(BaseGV->getType());
+ PointerType *GVPtrTy = cast<PointerType>(BaseGV->getType());
IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace());
APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true);
auto *GEPO = cast<GEPOperator>(ConstExpr);
@@ -830,7 +828,7 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec =
BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
for (auto const &ConstInfo : ConstInfoVec) {
- SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
+ SetVector<Instruction *> IPSet = findConstantInsertionPoint(ConstInfo);
// We can have an empty set if the function contains unreachable blocks.
if (IPSet.empty())
continue;
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 770321c740a0..e9e6afe3fdd4 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -82,7 +82,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
bool Changed = false;
const DataLayout &DL = F.getParent()->getDataLayout();
TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
while (!WorkList.empty()) {
SmallVector<Instruction*, 16> NewWorkListVec;
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 89497177524f..2ef85268df48 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -62,6 +62,23 @@ STATISTIC(NumSDivs, "Number of sdiv converted to udiv");
STATISTIC(NumUDivs, "Number of udivs whose width was decreased");
STATISTIC(NumAShrs, "Number of ashr converted to lshr");
STATISTIC(NumSRems, "Number of srem converted to urem");
+STATISTIC(NumSExt, "Number of sext converted to zext");
+STATISTIC(NumAnd, "Number of ands removed");
+STATISTIC(NumNW, "Number of no-wrap deductions");
+STATISTIC(NumNSW, "Number of no-signed-wrap deductions");
+STATISTIC(NumNUW, "Number of no-unsigned-wrap deductions");
+STATISTIC(NumAddNW, "Number of no-wrap deductions for add");
+STATISTIC(NumAddNSW, "Number of no-signed-wrap deductions for add");
+STATISTIC(NumAddNUW, "Number of no-unsigned-wrap deductions for add");
+STATISTIC(NumSubNW, "Number of no-wrap deductions for sub");
+STATISTIC(NumSubNSW, "Number of no-signed-wrap deductions for sub");
+STATISTIC(NumSubNUW, "Number of no-unsigned-wrap deductions for sub");
+STATISTIC(NumMulNW, "Number of no-wrap deductions for mul");
+STATISTIC(NumMulNSW, "Number of no-signed-wrap deductions for mul");
+STATISTIC(NumMulNUW, "Number of no-unsigned-wrap deductions for mul");
+STATISTIC(NumShlNW, "Number of no-wrap deductions for shl");
+STATISTIC(NumShlNSW, "Number of no-signed-wrap deductions for shl");
+STATISTIC(NumShlNUW, "Number of no-unsigned-wrap deductions for shl");
STATISTIC(NumOverflows, "Number of overflow checks removed");
STATISTIC(NumSaturating,
"Number of saturating arithmetics converted to normal arithmetics");
@@ -85,6 +102,7 @@ namespace {
AU.addRequired<LazyValueInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<LazyValueInfoWrapperPass>();
}
};
@@ -416,37 +434,96 @@ static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
return NWRegion.contains(LRange);
}
-static void processOverflowIntrinsic(WithOverflowInst *WO) {
- IRBuilder<> B(WO);
- Value *NewOp = B.CreateBinOp(
- WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName());
- // Constant-folding could have happened.
- if (auto *Inst = dyn_cast<Instruction>(NewOp)) {
- if (WO->isSigned())
+static void setDeducedOverflowingFlags(Value *V, Instruction::BinaryOps Opcode,
+ bool NewNSW, bool NewNUW) {
+ Statistic *OpcNW, *OpcNSW, *OpcNUW;
+ switch (Opcode) {
+ case Instruction::Add:
+ OpcNW = &NumAddNW;
+ OpcNSW = &NumAddNSW;
+ OpcNUW = &NumAddNUW;
+ break;
+ case Instruction::Sub:
+ OpcNW = &NumSubNW;
+ OpcNSW = &NumSubNSW;
+ OpcNUW = &NumSubNUW;
+ break;
+ case Instruction::Mul:
+ OpcNW = &NumMulNW;
+ OpcNSW = &NumMulNSW;
+ OpcNUW = &NumMulNUW;
+ break;
+ case Instruction::Shl:
+ OpcNW = &NumShlNW;
+ OpcNSW = &NumShlNSW;
+ OpcNUW = &NumShlNUW;
+ break;
+ default:
+ llvm_unreachable("Will not be called with other binops");
+ }
+
+ auto *Inst = dyn_cast<Instruction>(V);
+ if (NewNSW) {
+ ++NumNW;
+ ++*OpcNW;
+ ++NumNSW;
+ ++*OpcNSW;
+ if (Inst)
Inst->setHasNoSignedWrap();
- else
+ }
+ if (NewNUW) {
+ ++NumNW;
+ ++*OpcNW;
+ ++NumNUW;
+ ++*OpcNUW;
+ if (Inst)
Inst->setHasNoUnsignedWrap();
}
+}
- Value *NewI = B.CreateInsertValue(UndefValue::get(WO->getType()), NewOp, 0);
- NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(WO->getContext()), 1);
+static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI);
+
+// Rewrite this with.overflow intrinsic as non-overflowing.
+static void processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) {
+ IRBuilder<> B(WO);
+ Instruction::BinaryOps Opcode = WO->getBinaryOp();
+ bool NSW = WO->isSigned();
+ bool NUW = !WO->isSigned();
+
+ Value *NewOp =
+ B.CreateBinOp(Opcode, WO->getLHS(), WO->getRHS(), WO->getName());
+ setDeducedOverflowingFlags(NewOp, Opcode, NSW, NUW);
+
+ StructType *ST = cast<StructType>(WO->getType());
+ Constant *Struct = ConstantStruct::get(ST,
+ { UndefValue::get(ST->getElementType(0)),
+ ConstantInt::getFalse(ST->getElementType(1)) });
+ Value *NewI = B.CreateInsertValue(Struct, NewOp, 0);
WO->replaceAllUsesWith(NewI);
WO->eraseFromParent();
++NumOverflows;
+
+ // See if we can infer the other no-wrap too.
+ if (auto *BO = dyn_cast<BinaryOperator>(NewOp))
+ processBinOp(BO, LVI);
}
-static void processSaturatingInst(SaturatingInst *SI) {
+static void processSaturatingInst(SaturatingInst *SI, LazyValueInfo *LVI) {
+ Instruction::BinaryOps Opcode = SI->getBinaryOp();
+ bool NSW = SI->isSigned();
+ bool NUW = !SI->isSigned();
BinaryOperator *BinOp = BinaryOperator::Create(
- SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+ Opcode, SI->getLHS(), SI->getRHS(), SI->getName(), SI);
BinOp->setDebugLoc(SI->getDebugLoc());
- if (SI->isSigned())
- BinOp->setHasNoSignedWrap();
- else
- BinOp->setHasNoUnsignedWrap();
+ setDeducedOverflowingFlags(BinOp, Opcode, NSW, NUW);
SI->replaceAllUsesWith(BinOp);
SI->eraseFromParent();
++NumSaturating;
+
+ // See if we can infer the other no-wrap too.
+ if (auto *BO = dyn_cast<BinaryOperator>(BinOp))
+ processBinOp(BO, LVI);
}
/// Infer nonnull attributes for the arguments at the specified callsite.
@@ -456,14 +533,14 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
if (auto *WO = dyn_cast<WithOverflowInst>(CS.getInstruction())) {
if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) {
- processOverflowIntrinsic(WO);
+ processOverflowIntrinsic(WO, LVI);
return true;
}
}
if (auto *SI = dyn_cast<SaturatingInst>(CS.getInstruction())) {
if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) {
- processSaturatingInst(SI);
+ processSaturatingInst(SI, LVI);
return true;
}
}
@@ -632,6 +709,27 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
return true;
}
+static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
+ if (SDI->getType()->isVectorTy())
+ return false;
+
+ Value *Base = SDI->getOperand(0);
+
+ Constant *Zero = ConstantInt::get(Base->getType(), 0);
+ if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, Base, Zero, SDI) !=
+ LazyValueInfo::True)
+ return false;
+
+ ++NumSExt;
+ auto *ZExt =
+ CastInst::CreateZExtOrBitCast(Base, SDI->getType(), SDI->getName(), SDI);
+ ZExt->setDebugLoc(SDI->getDebugLoc());
+ SDI->replaceAllUsesWith(ZExt);
+ SDI->eraseFromParent();
+
+ return true;
+}
+
static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
using OBO = OverflowingBinaryOperator;
@@ -648,6 +746,7 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
BasicBlock *BB = BinOp->getParent();
+ Instruction::BinaryOps Opcode = BinOp->getOpcode();
Value *LHS = BinOp->getOperand(0);
Value *RHS = BinOp->getOperand(1);
@@ -655,24 +754,48 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
ConstantRange RRange = LVI->getConstantRange(RHS, BB, BinOp);
bool Changed = false;
+ bool NewNUW = false, NewNSW = false;
if (!NUW) {
ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
- BinOp->getOpcode(), RRange, OBO::NoUnsignedWrap);
- bool NewNUW = NUWRange.contains(LRange);
- BinOp->setHasNoUnsignedWrap(NewNUW);
+ Opcode, RRange, OBO::NoUnsignedWrap);
+ NewNUW = NUWRange.contains(LRange);
Changed |= NewNUW;
}
if (!NSW) {
ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
- BinOp->getOpcode(), RRange, OBO::NoSignedWrap);
- bool NewNSW = NSWRange.contains(LRange);
- BinOp->setHasNoSignedWrap(NewNSW);
+ Opcode, RRange, OBO::NoSignedWrap);
+ NewNSW = NSWRange.contains(LRange);
Changed |= NewNSW;
}
+ setDeducedOverflowingFlags(BinOp, Opcode, NewNSW, NewNUW);
+
return Changed;
}
+static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) {
+ if (BinOp->getType()->isVectorTy())
+ return false;
+
+ // Pattern match (and lhs, C) where C includes a superset of bits which might
+ // be set in lhs. This is a common truncation idiom created by instcombine.
+ BasicBlock *BB = BinOp->getParent();
+ Value *LHS = BinOp->getOperand(0);
+ ConstantInt *RHS = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+ if (!RHS || !RHS->getValue().isMask())
+ return false;
+
+ ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp);
+ if (!LRange.getUnsignedMax().ule(RHS->getValue()))
+ return false;
+
+ BinOp->replaceAllUsesWith(LHS);
+ BinOp->eraseFromParent();
+ NumAnd++;
+ return true;
+}
+
+
static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
if (Constant *C = LVI->getConstant(V, At->getParent(), At))
return C;
@@ -740,10 +863,18 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
case Instruction::AShr:
BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
break;
+ case Instruction::SExt:
+ BBChanged |= processSExt(cast<SExtInst>(II), LVI);
+ break;
case Instruction::Add:
case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
BBChanged |= processBinOp(cast<BinaryOperator>(II), LVI);
break;
+ case Instruction::And:
+ BBChanged |= processAnd(cast<BinaryOperator>(II), LVI);
+ break;
}
}
@@ -796,5 +927,6 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses PA;
PA.preserve<GlobalsAA>();
PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LazyValueAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 479e0ed74074..a79d775aa7f3 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -38,17 +38,19 @@ namespace {
//===--------------------------------------------------------------------===//
// DeadInstElimination pass implementation
//
- struct DeadInstElimination : public BasicBlockPass {
- static char ID; // Pass identification, replacement for typeid
- DeadInstElimination() : BasicBlockPass(ID) {
- initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
- }
- bool runOnBasicBlock(BasicBlock &BB) override {
- if (skipBasicBlock(BB))
- return false;
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
- bool Changed = false;
+struct DeadInstElimination : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DeadInstElimination() : FunctionPass(ID) {
+ initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+
+ bool Changed = false;
+ for (auto &BB : F) {
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
Instruction *Inst = &*DI++;
if (isInstructionTriviallyDead(Inst, TLI)) {
@@ -60,13 +62,14 @@ namespace {
++DIEEliminated;
}
}
- return Changed;
}
+ return Changed;
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
- };
+};
}
char DeadInstElimination::ID = 0;
@@ -154,7 +157,7 @@ struct DCELegacyPass : public FunctionPass {
return false;
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
return eliminateDeadCode(F, TLI);
}
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a81645745b48..685de82810ed 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1254,8 +1254,9 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
auto *SI = new StoreInst(
ConstantInt::get(Earlier->getValueOperand()->getType(), Merged),
- Earlier->getPointerOperand(), false, Earlier->getAlignment(),
- Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite);
+ Earlier->getPointerOperand(), false,
+ MaybeAlign(Earlier->getAlignment()), Earlier->getOrdering(),
+ Earlier->getSyncScopeID(), DepWrite);
unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa,
LLVMContext::MD_alias_scope,
@@ -1361,7 +1362,7 @@ public:
MemoryDependenceResults *MD =
&getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
return eliminateDeadStores(F, AA, MD, DT, TLI);
}
diff --git a/lib/Transforms/Scalar/DivRemPairs.cpp b/lib/Transforms/Scalar/DivRemPairs.cpp
index 876681b4f9de..934853507478 100644
--- a/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -1,4 +1,4 @@
-//===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===//
+//===- DivRemPairs.cpp - Hoist/[dr]ecompose division and remainder --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass hoists and/or decomposes integer division and remainder
+// This pass hoists and/or decomposes/recomposes integer division and remainder
// instructions to enable CFG improvements and better codegen.
//
//===----------------------------------------------------------------------===//
@@ -19,37 +19,105 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "div-rem-pairs"
STATISTIC(NumPairs, "Number of div/rem pairs");
+STATISTIC(NumRecomposed, "Number of instructions recomposed");
STATISTIC(NumHoisted, "Number of instructions hoisted");
STATISTIC(NumDecomposed, "Number of instructions decomposed");
DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
"Controls transformations in div-rem-pairs pass");
-/// Find matching pairs of integer div/rem ops (they have the same numerator,
-/// denominator, and signedness). If they exist in different basic blocks, bring
-/// them together by hoisting or replace the common division operation that is
-/// implicit in the remainder:
-/// X % Y <--> X - ((X / Y) * Y).
-///
-/// We can largely ignore the normal safety and cost constraints on speculation
-/// of these ops when we find a matching pair. This is because we are already
-/// guaranteed that any exceptions and most cost are already incurred by the
-/// first member of the pair.
-///
-/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
-/// SimplifyCFG, but it's split off on its own because it's different enough
-/// that it doesn't quite match the stated objectives of those passes.
-static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
- const DominatorTree &DT) {
- bool Changed = false;
+namespace {
+struct ExpandedMatch {
+ DivRemMapKey Key;
+ Instruction *Value;
+};
+} // namespace
+
+/// See if we can match: (which is the form we expand into)
+/// X - ((X ?/ Y) * Y)
+/// which is equivalent to:
+/// X ?% Y
+static llvm::Optional<ExpandedMatch> matchExpandedRem(Instruction &I) {
+ Value *Dividend, *XroundedDownToMultipleOfY;
+ if (!match(&I, m_Sub(m_Value(Dividend), m_Value(XroundedDownToMultipleOfY))))
+ return llvm::None;
+
+ Value *Divisor;
+ Instruction *Div;
+ // Look for ((X / Y) * Y)
+ if (!match(
+ XroundedDownToMultipleOfY,
+ m_c_Mul(m_CombineAnd(m_IDiv(m_Specific(Dividend), m_Value(Divisor)),
+ m_Instruction(Div)),
+ m_Deferred(Divisor))))
+ return llvm::None;
+
+ ExpandedMatch M;
+ M.Key.SignedOp = Div->getOpcode() == Instruction::SDiv;
+ M.Key.Dividend = Dividend;
+ M.Key.Divisor = Divisor;
+ M.Value = &I;
+ return M;
+}
+
+/// A thin wrapper to store two values that we matched as div-rem pair.
+/// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
+struct DivRemPairWorklistEntry {
+ /// The actual udiv/sdiv instruction. Source of truth.
+ AssertingVH<Instruction> DivInst;
+
+ /// The instruction that we have matched as a remainder instruction.
+ /// Should only be used as Value, don't introspect it.
+ AssertingVH<Instruction> RemInst;
+
+ DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
+ : DivInst(DivInst_), RemInst(RemInst_) {
+ assert((DivInst->getOpcode() == Instruction::UDiv ||
+ DivInst->getOpcode() == Instruction::SDiv) &&
+ "Not a division.");
+ assert(DivInst->getType() == RemInst->getType() && "Types should match.");
+ // We can't check anything else about remainder instruction,
+ // it's not strictly required to be a urem/srem.
+ }
+ /// The type for this pair, identical for both the div and rem.
+ Type *getType() const { return DivInst->getType(); }
+
+ /// Is this pair signed or unsigned?
+ bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }
+
+ /// In this pair, what are the divident and divisor?
+ Value *getDividend() const { return DivInst->getOperand(0); }
+ Value *getDivisor() const { return DivInst->getOperand(1); }
+
+ bool isRemExpanded() const {
+ switch (RemInst->getOpcode()) {
+ case Instruction::SRem:
+ case Instruction::URem:
+ return false; // single 'rem' instruction - unexpanded form.
+ default:
+ return true; // anything else means we have remainder in expanded form.
+ }
+ }
+};
+using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;
+
+/// Find matching pairs of integer div/rem ops (they have the same numerator,
+/// denominator, and signedness). Place those pairs into a worklist for further
+/// processing. This indirection is needed because we have to use TrackingVH<>
+/// because we will be doing RAUW, and if one of the rem instructions we change
+/// happens to be an input to another div/rem in the maps, we'd have problems.
+static DivRemWorklistTy getWorklist(Function &F) {
// Insert all divide and remainder instructions into maps keyed by their
// operands and opcode (signed or unsigned).
DenseMap<DivRemMapKey, Instruction *> DivMap;
@@ -66,9 +134,14 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
else if (I.getOpcode() == Instruction::URem)
RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
+ else if (auto Match = matchExpandedRem(I))
+ RemMap[Match->Key] = Match->Value;
}
}
+ // We'll accumulate the matching pairs of div-rem instructions here.
+ DivRemWorklistTy Worklist;
+
// We can iterate over either map because we are only looking for matched
// pairs. Choose remainders for efficiency because they are usually even more
// rare than division.
@@ -78,12 +151,77 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
if (!DivInst)
continue;
- // We have a matching pair of div/rem instructions. If one dominates the
- // other, hoist and/or replace one.
+ // We have a matching pair of div/rem instructions.
NumPairs++;
Instruction *RemInst = RemPair.second;
- bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
- bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);
+
+ // Place it in the worklist.
+ Worklist.emplace_back(DivInst, RemInst);
+ }
+
+ return Worklist;
+}
+
+/// Find matching pairs of integer div/rem ops (they have the same numerator,
+/// denominator, and signedness). If they exist in different basic blocks, bring
+/// them together by hoisting or replace the common division operation that is
+/// implicit in the remainder:
+/// X % Y <--> X - ((X / Y) * Y).
+///
+/// We can largely ignore the normal safety and cost constraints on speculation
+/// of these ops when we find a matching pair. This is because we are already
+/// guaranteed that any exceptions and most cost are already incurred by the
+/// first member of the pair.
+///
+/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
+/// SimplifyCFG, but it's split off on its own because it's different enough
+/// that it doesn't quite match the stated objectives of those passes.
+static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
+ const DominatorTree &DT) {
+ bool Changed = false;
+
+ // Get the matching pairs of div-rem instructions. We want this extra
+ // indirection to avoid dealing with having to RAUW the keys of the maps.
+ DivRemWorklistTy Worklist = getWorklist(F);
+
+ // Process each entry in the worklist.
+ for (DivRemPairWorklistEntry &E : Worklist) {
+ if (!DebugCounter::shouldExecute(DRPCounter))
+ continue;
+
+ bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());
+
+ auto &DivInst = E.DivInst;
+ auto &RemInst = E.RemInst;
+
+ const bool RemOriginallyWasInExpandedForm = E.isRemExpanded();
+ (void)RemOriginallyWasInExpandedForm; // suppress unused variable warning
+
+ if (HasDivRemOp && E.isRemExpanded()) {
+ // The target supports div+rem but the rem is expanded.
+ // We should recompose it first.
+ Value *X = E.getDividend();
+ Value *Y = E.getDivisor();
+ Instruction *RealRem = E.isSigned() ? BinaryOperator::CreateSRem(X, Y)
+ : BinaryOperator::CreateURem(X, Y);
+ // Note that we place it right next to the original expanded instruction,
+ // and letting further handling to move it if needed.
+ RealRem->setName(RemInst->getName() + ".recomposed");
+ RealRem->insertAfter(RemInst);
+ Instruction *OrigRemInst = RemInst;
+ // Update AssertingVH<> with new instruction so it doesn't assert.
+ RemInst = RealRem;
+ // And replace the original instruction with the new one.
+ OrigRemInst->replaceAllUsesWith(RealRem);
+ OrigRemInst->eraseFromParent();
+ NumRecomposed++;
+ // Note that we have left ((X / Y) * Y) around.
+ // If it had other uses we could rewrite it as X - X % Y
+ }
+
+ assert((!E.isRemExpanded() || !HasDivRemOp) &&
+ "*If* the target supports div-rem, then by now the RemInst *is* "
+ "Instruction::[US]Rem.");
// If the target supports div+rem and the instructions are in the same block
// already, there's nothing to do. The backend should handle this. If the
@@ -92,10 +230,16 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
continue;
bool DivDominates = DT.dominates(DivInst, RemInst);
- if (!DivDominates && !DT.dominates(RemInst, DivInst))
+ if (!DivDominates && !DT.dominates(RemInst, DivInst)) {
+ // We have matching div-rem pair, but they are in two different blocks,
+ // neither of which dominates one another.
+ // FIXME: We could hoist both ops to the common predecessor block?
continue;
+ }
- if (!DebugCounter::shouldExecute(DRPCounter))
+ // The target does not have a single div/rem operation,
+ // and the rem is already in expanded form. Nothing to do.
+ if (!HasDivRemOp && E.isRemExpanded())
continue;
if (HasDivRemOp) {
@@ -107,11 +251,17 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
DivInst->moveAfter(RemInst);
NumHoisted++;
} else {
- // The target does not have a single div/rem operation. Decompose the
- // remainder calculation as:
+ // The target does not have a single div/rem operation,
+ // and the rem is *not* in a already-expanded form.
+ // Decompose the remainder calculation as:
// X % Y --> X - ((X / Y) * Y).
- Value *X = RemInst->getOperand(0);
- Value *Y = RemInst->getOperand(1);
+
+ assert(!RemOriginallyWasInExpandedForm &&
+ "We should not be expanding if the rem was in expanded form to "
+ "begin with.");
+
+ Value *X = E.getDividend();
+ Value *Y = E.getDivisor();
Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
@@ -152,8 +302,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// Now kill the explicit remainder. We have replaced it with:
// (sub X, (mul (div X, Y), Y)
- RemInst->replaceAllUsesWith(Sub);
- RemInst->eraseFromParent();
+ Sub->setName(RemInst->getName() + ".decomposed");
+ Instruction *OrigRemInst = RemInst;
+ // Update AssertingVH<> with new instruction so it doesn't assert.
+ RemInst = Sub;
+ // And replace the original instruction with the new one.
+ OrigRemInst->replaceAllUsesWith(Sub);
+ OrigRemInst->eraseFromParent();
NumDecomposed++;
}
Changed = true;
@@ -188,7 +343,7 @@ struct DivRemPairsLegacyPass : public FunctionPass {
return optimizeDivRem(F, TTI, DT);
}
};
-}
+} // namespace
char DivRemPairsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index f1f075257020..ce540683dae2 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -108,11 +108,12 @@ struct SimpleValue {
// This can only handle non-void readnone functions.
if (CallInst *CI = dyn_cast<CallInst>(Inst))
return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
- return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
- isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
- isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
- isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
- isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+ return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
+ isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
+ isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
+ isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
+ isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
+ isa<InsertValueInst>(Inst);
}
};
@@ -240,7 +241,7 @@ static unsigned getHashValueImpl(SimpleValue Val) {
assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
- isa<ShuffleVectorInst>(Inst)) &&
+ isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst)) &&
"Invalid/unknown instruction");
// Mix in the opcode.
@@ -526,7 +527,7 @@ public:
const TargetTransformInfo &TTI, DominatorTree &DT,
AssumptionCache &AC, MemorySSA *MSSA)
: TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
- MSSAUpdater(llvm::make_unique<MemorySSAUpdater>(MSSA)) {}
+ MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
bool run();
@@ -651,7 +652,7 @@ private:
bool isInvariantLoad() const {
if (auto *LI = dyn_cast<LoadInst>(Inst))
- return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+ return LI->hasMetadata(LLVMContext::MD_invariant_load);
return false;
}
@@ -790,7 +791,7 @@ bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
// A location loaded from with an invariant_load is assumed to *never* change
// within the visible scope of the compilation.
if (auto *LI = dyn_cast<LoadInst>(I))
- if (LI->getMetadata(LLVMContext::MD_invariant_load))
+ if (LI->hasMetadata(LLVMContext::MD_invariant_load))
return true;
auto MemLocOpt = MemoryLocation::getOrNone(I);
@@ -1359,7 +1360,7 @@ public:
if (skipFunction(F))
return false;
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -1381,6 +1382,7 @@ public:
AU.addPreserved<MemorySSAWrapperPass>();
}
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
AU.setPreservesCFG();
}
};
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 31670b1464e4..e6abf1ceb026 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -11,10 +11,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+
using namespace llvm;
#define DEBUG_TYPE "flattencfg"
@@ -52,15 +54,23 @@ FunctionPass *llvm::createFlattenCFGPass() { return new FlattenCFGPass(); }
static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
bool Changed = false;
bool LocalChange = true;
+
+ // Use block handles instead of iterating over function blocks directly
+ // to avoid using iterators invalidated by erasing blocks.
+ std::vector<WeakVH> Blocks;
+ Blocks.reserve(F.size());
+ for (auto &BB : F)
+ Blocks.push_back(&BB);
+
while (LocalChange) {
LocalChange = false;
- // Loop over all of the basic blocks and remove them if they are unneeded...
- //
- for (Function::iterator BBIt = F.begin(); BBIt != F.end();) {
- if (FlattenCFG(&*BBIt++, AA)) {
- LocalChange = true;
- }
+ // Loop over all of the basic blocks and try to flatten them.
+ for (WeakVH &BlockHandle : Blocks) {
+ // Skip blocks erased by FlattenCFG.
+ if (auto *BB = cast_or_null<BasicBlock>(BlockHandle))
+ if (FlattenCFG(BB, AA))
+ LocalChange = true;
}
Changed |= LocalChange;
}
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
index 4f83e869b303..4d2eac0451df 100644
--- a/lib/Transforms/Scalar/Float2Int.cpp
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -60,11 +60,13 @@ namespace {
if (skipFunction(F))
return false;
- return Impl.runImpl(F);
+ const DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return Impl.runImpl(F, DT);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
@@ -116,21 +118,29 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
// Find the roots - instructions that convert from the FP domain to
// integer domain.
-void Float2IntPass::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
- for (auto &I : instructions(F)) {
- if (isa<VectorType>(I.getType()))
+void Float2IntPass::findRoots(Function &F, const DominatorTree &DT,
+ SmallPtrSet<Instruction*,8> &Roots) {
+ for (BasicBlock &BB : F) {
+ // Unreachable code can take on strange forms that we are not prepared to
+ // handle. For example, an instruction may have itself as an operand.
+ if (!DT.isReachableFromEntry(&BB))
continue;
- switch (I.getOpcode()) {
- default: break;
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- Roots.insert(&I);
- break;
- case Instruction::FCmp:
- if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
- CmpInst::BAD_ICMP_PREDICATE)
+
+ for (Instruction &I : BB) {
+ if (isa<VectorType>(I.getType()))
+ continue;
+ switch (I.getOpcode()) {
+ default: break;
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
Roots.insert(&I);
- break;
+ break;
+ case Instruction::FCmp:
+ if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
+ CmpInst::BAD_ICMP_PREDICATE)
+ Roots.insert(&I);
+ break;
+ }
}
}
}
@@ -503,7 +513,7 @@ void Float2IntPass::cleanup() {
I.first->eraseFromParent();
}
-bool Float2IntPass::runImpl(Function &F) {
+bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
// Clear out all state.
ECs = EquivalenceClasses<Instruction*>();
@@ -513,7 +523,7 @@ bool Float2IntPass::runImpl(Function &F) {
Ctx = &F.getParent()->getContext();
- findRoots(F, Roots);
+ findRoots(F, DT, Roots);
walkBackwards(Roots);
walkForwards();
@@ -527,8 +537,9 @@ bool Float2IntPass::runImpl(Function &F) {
namespace llvm {
FunctionPass *createFloat2IntPass() { return new Float2IntLegacyPass(); }
-PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &) {
- if (!runImpl(F))
+PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &AM) {
+ const DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ if (!runImpl(F, DT))
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 1a02e9d33f49..743353eaea22 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -70,6 +70,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -626,6 +627,8 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<GlobalsAA>();
PA.preserve<TargetLibraryAnalysis>();
+ if (LI)
+ PA.preserve<LoopAnalysis>();
return PA;
}
@@ -1161,15 +1164,30 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Do PHI translation to get its value in the predecessor if necessary. The
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
+ // We do the translation for each edge we skipped by going from LI's block
+ // to LoadBB, otherwise we might miss pieces needing translation.
// If all preds have a single successor, then we know it is safe to insert
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
- PHITransAddr Address(LI->getPointerOperand(), DL, AC);
- Value *LoadPtr = nullptr;
- LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
- *DT, NewInsts);
+ Value *LoadPtr = LI->getPointerOperand();
+ BasicBlock *Cur = LI->getParent();
+ while (Cur != LoadBB) {
+ PHITransAddr Address(LoadPtr, DL, AC);
+ LoadPtr = Address.PHITranslateWithInsertion(
+ Cur, Cur->getSinglePredecessor(), *DT, NewInsts);
+ if (!LoadPtr) {
+ CanDoPRE = false;
+ break;
+ }
+ Cur = Cur->getSinglePredecessor();
+ }
+ if (LoadPtr) {
+ PHITransAddr Address(LoadPtr, DL, AC);
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT,
+ NewInsts);
+ }
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
if (!LoadPtr) {
@@ -1184,8 +1202,12 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (!CanDoPRE) {
while (!NewInsts.empty()) {
- Instruction *I = NewInsts.pop_back_val();
- markInstructionForDeletion(I);
+ // Erase instructions generated by the failed PHI translation before
+ // trying to number them. PHI translation might insert instructions
+ // in basic blocks other than the current one, and we delete them
+ // directly, as markInstructionForDeletion only allows removing from the
+ // current basic block.
+ NewInsts.pop_back_val()->eraseFromParent();
}
// HINT: Don't revert the edge-splitting as following transformation may
// also need to split these critical edges.
@@ -1219,10 +1241,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
BasicBlock *UnavailablePred = PredLoad.first;
Value *LoadPtr = PredLoad.second;
- auto *NewLoad =
- new LoadInst(LI->getType(), LoadPtr, LI->getName() + ".pre",
- LI->isVolatile(), LI->getAlignment(), LI->getOrdering(),
- LI->getSyncScopeID(), UnavailablePred->getTerminator());
+ auto *NewLoad = new LoadInst(
+ LI->getType(), LoadPtr, LI->getName() + ".pre", LI->isVolatile(),
+ MaybeAlign(LI->getAlignment()), LI->getOrdering(), LI->getSyncScopeID(),
+ UnavailablePred->getTerminator());
NewLoad->setDebugLoc(LI->getDebugLoc());
// Transfer the old load's AA tags to the new load.
@@ -1365,6 +1387,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
}
+static bool hasUsersIn(Value *V, BasicBlock *BB) {
+ for (User *U : V->users())
+ if (isa<Instruction>(U) &&
+ cast<Instruction>(U)->getParent() == BB)
+ return true;
+ return false;
+}
+
bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume &&
"This function can only be called with llvm.assume intrinsic");
@@ -1403,12 +1433,23 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
// We can replace assume value with true, which covers cases like this:
// call void @llvm.assume(i1 %cmp)
// br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
- ReplaceWithConstMap[V] = True;
-
- // If one of *cmp *eq operand is const, adding it to map will cover this:
+ ReplaceOperandsWithMap[V] = True;
+
+ // If we find an equality fact, canonicalize all dominated uses in this block
+ // to one of the two values. We heuristically choice the "oldest" of the
+ // two where age is determined by value number. (Note that propagateEquality
+ // above handles the cross block case.)
+ //
+ // Key case to cover are:
+ // 1)
// %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
// call void @llvm.assume(i1 %cmp)
// ret float %0 ; will change it to ret float 3.000000e+00
+ // 2)
+ // %load = load float, float* %addr
+ // %cmp = fcmp oeq float %load, %0
+ // call void @llvm.assume(i1 %cmp)
+ // ret float %load ; will change it to ret float %0
if (auto *CmpI = dyn_cast<CmpInst>(V)) {
if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ ||
CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
@@ -1416,13 +1457,50 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
CmpI->getFastMathFlags().noNaNs())) {
Value *CmpLHS = CmpI->getOperand(0);
Value *CmpRHS = CmpI->getOperand(1);
- if (isa<Constant>(CmpLHS))
+ // Heuristically pick the better replacement -- the choice of heuristic
+ // isn't terribly important here, but the fact we canonicalize on some
+ // replacement is for exposing other simplifications.
+ // TODO: pull this out as a helper function and reuse w/existing
+ // (slightly different) logic.
+ if (isa<Constant>(CmpLHS) && !isa<Constant>(CmpRHS))
std::swap(CmpLHS, CmpRHS);
- auto *RHSConst = dyn_cast<Constant>(CmpRHS);
+ if (!isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))
+ std::swap(CmpLHS, CmpRHS);
+ if ((isa<Argument>(CmpLHS) && isa<Argument>(CmpRHS)) ||
+ (isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))) {
+ // Move the 'oldest' value to the right-hand side, using the value
+ // number as a proxy for age.
+ uint32_t LVN = VN.lookupOrAdd(CmpLHS);
+ uint32_t RVN = VN.lookupOrAdd(CmpRHS);
+ if (LVN < RVN)
+ std::swap(CmpLHS, CmpRHS);
+ }
- // If only one operand is constant.
- if (RHSConst != nullptr && !isa<Constant>(CmpLHS))
- ReplaceWithConstMap[CmpLHS] = RHSConst;
+ // Handle degenerate case where we either haven't pruned a dead path or a
+ // removed a trivial assume yet.
+ if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS))
+ return Changed;
+
+ // +0.0 and -0.0 compare equal, but do not imply equivalence. Unless we
+ // can prove equivalence, bail.
+ if (CmpRHS->getType()->isFloatTy() &&
+ (!isa<ConstantFP>(CmpRHS) || cast<ConstantFP>(CmpRHS)->isZero()))
+ return Changed;
+
+ LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
+ << *CmpLHS << " with "
+ << *CmpRHS << " in block "
+ << IntrinsicI->getParent()->getName() << "\n");
+
+
+ // Setup the replacement map - this handles uses within the same block
+ if (hasUsersIn(CmpLHS, IntrinsicI->getParent()))
+ ReplaceOperandsWithMap[CmpLHS] = CmpRHS;
+
+ // NOTE: The non-block local cases are handled by the call to
+ // propagateEquality above; this block is just about handling the block
+ // local cases. TODO: There's a bunch of logic in propagateEqualiy which
+ // isn't duplicated for the block local case, can we share it somehow?
}
}
return Changed;
@@ -1522,6 +1600,41 @@ uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
return NewNum;
}
+// Return true if the value number \p Num and NewNum have equal value.
+// Return false if the result is unknown.
+bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
+ const BasicBlock *Pred,
+ const BasicBlock *PhiBlock, GVN &Gvn) {
+ CallInst *Call = nullptr;
+ LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
+ while (Vals) {
+ Call = dyn_cast<CallInst>(Vals->Val);
+ if (Call && Call->getParent() == PhiBlock)
+ break;
+ Vals = Vals->Next;
+ }
+
+ if (AA->doesNotAccessMemory(Call))
+ return true;
+
+ if (!MD || !AA->onlyReadsMemory(Call))
+ return false;
+
+ MemDepResult local_dep = MD->getDependency(Call);
+ if (!local_dep.isNonLocal())
+ return false;
+
+ const MemoryDependenceResults::NonLocalDepInfo &deps =
+ MD->getNonLocalCallDependency(Call);
+
+ // Check to see if the Call has no function local clobber.
+ for (unsigned i = 0; i < deps.size(); i++) {
+ if (deps[i].getResult().isNonFuncLocal())
+ return true;
+ }
+ return false;
+}
+
/// Translate value number \p Num using phis, so that it has the values of
/// the phis in BB.
uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
@@ -1568,8 +1681,11 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
}
}
- if (uint32_t NewNum = expressionNumbering[Exp])
+ if (uint32_t NewNum = expressionNumbering[Exp]) {
+ if (Exp.opcode == Instruction::Call && NewNum != Num)
+ return areCallValsEqual(Num, NewNum, Pred, PhiBlock, Gvn) ? NewNum : Num;
return NewNum;
+ }
return Num;
}
@@ -1637,16 +1753,12 @@ void GVN::assignBlockRPONumber(Function &F) {
InvalidBlockRPONumbers = false;
}
-// Tries to replace instruction with const, using information from
-// ReplaceWithConstMap.
-bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
+bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const {
bool Changed = false;
for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
- Value *Operand = Instr->getOperand(OpNum);
- auto it = ReplaceWithConstMap.find(Operand);
- if (it != ReplaceWithConstMap.end()) {
- assert(!isa<Constant>(Operand) &&
- "Replacing constants with constants is invalid");
+ Value *Operand = Instr->getOperand(OpNum);
+ auto it = ReplaceOperandsWithMap.find(Operand);
+ if (it != ReplaceOperandsWithMap.end()) {
LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with "
<< *it->second << " in instruction " << *Instr << '\n');
Instr->setOperand(OpNum, it->second);
@@ -1976,6 +2088,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
MD = RunMD;
ImplicitControlFlowTracking ImplicitCFT(DT);
ICF = &ImplicitCFT;
+ this->LI = LI;
VN.setMemDep(MD);
ORE = RunORE;
InvalidBlockRPONumbers = true;
@@ -2037,13 +2150,13 @@ bool GVN::processBlock(BasicBlock *BB) {
return false;
// Clearing map before every BB because it can be used only for single BB.
- ReplaceWithConstMap.clear();
+ ReplaceOperandsWithMap.clear();
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE;) {
- if (!ReplaceWithConstMap.empty())
- ChangedFunction |= replaceOperandsWithConsts(&*BI);
+ if (!ReplaceOperandsWithMap.empty())
+ ChangedFunction |= replaceOperandsForInBlockEquality(&*BI);
ChangedFunction |= processInstruction(&*BI);
if (InstrsToErase.empty()) {
@@ -2335,7 +2448,7 @@ bool GVN::performPRE(Function &F) {
/// the block inserted to the critical edge.
BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
BasicBlock *BB =
- SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
+ SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT, LI));
if (MD)
MD->invalidateCachedPredecessors();
InvalidBlockRPONumbers = true;
@@ -2350,7 +2463,7 @@ bool GVN::splitCriticalEdges() {
do {
std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val();
SplitCriticalEdge(Edge.first, Edge.second,
- CriticalEdgeSplittingOptions(DT));
+ CriticalEdgeSplittingOptions(DT, LI));
} while (!toSplit.empty());
if (MD) MD->invalidateCachedPredecessors();
InvalidBlockRPONumbers = true;
@@ -2456,18 +2569,26 @@ void GVN::addDeadBlock(BasicBlock *BB) {
if (DeadBlocks.count(B))
continue;
+ // First, split the critical edges. This might also create additional blocks
+ // to preserve LoopSimplify form and adjust edges accordingly.
SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
for (BasicBlock *P : Preds) {
if (!DeadBlocks.count(P))
continue;
- if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) {
+ if (llvm::any_of(successors(P),
+ [B](BasicBlock *Succ) { return Succ == B; }) &&
+ isCriticalEdge(P->getTerminator(), B)) {
if (BasicBlock *S = splitCriticalEdges(P, B))
DeadBlocks.insert(P = S);
}
+ }
- for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
- PHINode &Phi = cast<PHINode>(*II);
+ // Now undef the incoming values from the dead predecessors.
+ for (BasicBlock *P : predecessors(B)) {
+ if (!DeadBlocks.count(P))
+ continue;
+ for (PHINode &Phi : B->phis()) {
Phi.setIncomingValueForBlock(P, UndefValue::get(Phi.getType()));
if (MD)
MD->invalidateCachedPointerInfo(&Phi);
@@ -2544,10 +2665,11 @@ public:
return Impl.runImpl(
F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
getAnalysis<AAResultsWrapperPass>().getAAResults(),
- NoMemDepAnalysis ? nullptr
- : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(),
+ NoMemDepAnalysis
+ ? nullptr
+ : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(),
LIWP ? &LIWP->getLoopInfo() : nullptr,
&getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
}
@@ -2556,6 +2678,7 @@ public:
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
if (!NoMemDepAnalysis)
AU.addRequired<MemoryDependenceWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
@@ -2563,6 +2686,8 @@ public:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreservedID(LoopSimplifyID);
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index 7614599653c4..c87e41484b13 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -257,7 +257,7 @@ public:
GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA,
MemoryDependenceResults *MD, MemorySSA *MSSA)
: DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA),
- MSSAUpdater(llvm::make_unique<MemorySSAUpdater>(MSSA)) {}
+ MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
bool run(Function &F) {
NumFuncArgs = F.arg_size();
@@ -539,7 +539,7 @@ private:
// Check for unsafe hoistings due to side effects.
if (K == InsKind::Store) {
- if (hasEHOrLoadsOnPath(NewPt, dyn_cast<MemoryDef>(U), NBBsOnAllPaths))
+ if (hasEHOrLoadsOnPath(NewPt, cast<MemoryDef>(U), NBBsOnAllPaths))
return false;
} else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
return false;
@@ -889,19 +889,18 @@ private:
void updateAlignment(Instruction *I, Instruction *Repl) {
if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
- ReplacementLoad->setAlignment(
- std::min(ReplacementLoad->getAlignment(),
- cast<LoadInst>(I)->getAlignment()));
+ ReplacementLoad->setAlignment(MaybeAlign(std::min(
+ ReplacementLoad->getAlignment(), cast<LoadInst>(I)->getAlignment())));
++NumLoadsRemoved;
} else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
ReplacementStore->setAlignment(
- std::min(ReplacementStore->getAlignment(),
- cast<StoreInst>(I)->getAlignment()));
+ MaybeAlign(std::min(ReplacementStore->getAlignment(),
+ cast<StoreInst>(I)->getAlignment())));
++NumStoresRemoved;
} else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
ReplacementAlloca->setAlignment(
- std::max(ReplacementAlloca->getAlignment(),
- cast<AllocaInst>(I)->getAlignment()));
+ MaybeAlign(std::max(ReplacementAlloca->getAlignment(),
+ cast<AllocaInst>(I)->getAlignment())));
} else if (isa<CallInst>(Repl)) {
++NumCallsRemoved;
}
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index e14f44bb7069..2697d7809568 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -591,7 +591,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
else
Result = RC.getCheckInst();
}
-
+ assert(Result && "Failed to find result value");
Result->setName("wide.chk");
}
return true;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index f9fc698a4a9b..5519a00c12c9 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -124,6 +124,11 @@ static cl::opt<bool>
DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
cl::desc("Disable Linear Function Test Replace optimization"));
+static cl::opt<bool>
+LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false),
+ cl::desc("Predicate conditions in read only loops"));
+
+
namespace {
struct RewritePhi;
@@ -144,7 +149,11 @@ class IndVarSimplify {
bool rewriteNonIntegerIVs(Loop *L);
bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
- bool optimizeLoopExits(Loop *L);
+ /// Try to eliminate loop exits based on analyzeable exit counts
+ bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
+ /// Try to form loop invariant tests for loop exits by changing how many
+ /// iterations of the loop run when that is unobservable.
+ bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
@@ -628,12 +637,30 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// Okay, this instruction has a user outside of the current loop
// and varies predictably *inside* the loop. Evaluate the value it
- // contains when the loop exits, if possible.
+ // contains when the loop exits, if possible. We prefer to start with
+ // expressions which are true for all exits (so as to maximize
+ // expression reuse by the SCEVExpander), but resort to per-exit
+ // evaluation if that fails.
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (!SE->isLoopInvariant(ExitValue, L) ||
- !isSafeToExpand(ExitValue, *SE))
- continue;
-
+ if (isa<SCEVCouldNotCompute>(ExitValue) ||
+ !SE->isLoopInvariant(ExitValue, L) ||
+ !isSafeToExpand(ExitValue, *SE)) {
+ // TODO: This should probably be sunk into SCEV in some way; maybe a
+ // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
+ // most SCEV expressions and other recurrence types (e.g. shift
+ // recurrences). Is there existing code we can reuse?
+ const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ continue;
+ if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
+ if (AddRec->getLoop() == L)
+ ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
+ if (isa<SCEVCouldNotCompute>(ExitValue) ||
+ !SE->isLoopInvariant(ExitValue, L) ||
+ !isSafeToExpand(ExitValue, *SE))
+ continue;
+ }
+
// Computing the value outside of the loop brings no benefit if it is
// definitely used inside the loop in a way which can not be optimized
// away. Avoid doing so unless we know we have a value which computes
@@ -804,7 +831,7 @@ bool IndVarSimplify::canLoopBeDeleted(
L->getExitingBlocks(ExitingBlocks);
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
- if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
+ if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
return false;
BasicBlock *ExitBlock = ExitBlocks[0];
@@ -1654,6 +1681,10 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
return nullptr;
}
+ // if we reached this point then we are going to replace
+ // DU.NarrowUse with WideUse. Reattach DbgValue then.
+ replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
+
ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
// Returning WideUse pushes it on the worklist.
return WideUse;
@@ -1779,14 +1810,9 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
DeadInsts.emplace_back(DU.NarrowDef);
}
- // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
- // evaluate the same recurrence, we can just copy the debug info over.
- SmallVector<DbgValueInst *, 1> DbgValues;
- llvm::findDbgValues(DbgValues, OrigPhi);
- auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
- ValueAsMetadata::get(WidePhi));
- for (auto &DbgValue : DbgValues)
- DbgValue->setOperand(0, MDPhi);
+ // Attach any debug information to the new PHI.
+ replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
+
return WidePhi;
}
@@ -1817,8 +1843,8 @@ void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
auto CmpConstrainedLHSRange =
ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
- auto NarrowDefRange =
- CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
+ auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
+ *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
};
@@ -2242,8 +2268,8 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
continue;
- const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
-
+ const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+
// AR may be a pointer type, while BECount is an integer type.
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
// AR may not be a narrower type, or we may never exit.
@@ -2624,74 +2650,125 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
return MadeAnyChanges;
}
-bool IndVarSimplify::optimizeLoopExits(Loop *L) {
+/// Return a symbolic upper bound for the backedge taken count of the loop.
+/// This is more general than getConstantMaxBackedgeTakenCount as it returns
+/// an arbitrary expression as opposed to only constants.
+/// TODO: Move into the ScalarEvolution class.
+static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
+ DominatorTree &DT, Loop *L) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
// Form an expression for the maximum exit count possible for this loop. We
// merge the max and exact information to approximate a version of
- // getMaxBackedgeTakenInfo which isn't restricted to just constants.
- // TODO: factor this out as a version of getMaxBackedgeTakenCount which
- // isn't guaranteed to return a constant.
+ // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
SmallVector<const SCEV*, 4> ExitCounts;
- const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L);
+ const SCEV *MaxConstEC = SE.getConstantMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(MaxConstEC))
ExitCounts.push_back(MaxConstEC);
for (BasicBlock *ExitingBB : ExitingBlocks) {
- const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
if (!isa<SCEVCouldNotCompute>(ExitCount)) {
- assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
+ assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
"We should only have known counts for exiting blocks that "
"dominate latch!");
ExitCounts.push_back(ExitCount);
}
}
if (ExitCounts.empty())
- return false;
- const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
+ return SE.getCouldNotCompute();
+ return SE.getUMinFromMismatchedTypes(ExitCounts);
+}
- bool Changed = false;
- for (BasicBlock *ExitingBB : ExitingBlocks) {
+bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Remove all exits which aren't both rewriteable and analyzeable.
+ auto NewEnd = llvm::remove_if(ExitingBlocks,
+ [&](BasicBlock *ExitingBB) {
// If our exitting block exits multiple loops, we can only rewrite the
// innermost one. Otherwise, we're changing how many times the innermost
// loop runs before it exits.
if (LI->getLoopFor(ExitingBB) != L)
- continue;
+ return true;
// Can't rewrite non-branch yet.
BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
if (!BI)
- continue;
+ return true;
// If already constant, nothing to do.
if (isa<Constant>(BI->getCondition()))
- continue;
+ return true;
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
if (isa<SCEVCouldNotCompute>(ExitCount))
- continue;
+ return true;
+ return false;
+ });
+ ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
+
+ if (ExitingBlocks.empty())
+ return false;
+
+ // Get a symbolic upper bound on the loop backedge taken count.
+ const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
+ if (isa<SCEVCouldNotCompute>(MaxExitCount))
+ return false;
+
+ // Visit our exit blocks in order of dominance. We know from the fact that
+ // all exits (left) are analyzeable that the must be a total dominance order
+ // between them as each must dominate the latch. The visit order only
+ // matters for the provably equal case.
+ llvm::sort(ExitingBlocks,
+ [&](BasicBlock *A, BasicBlock *B) {
+ // std::sort sorts in ascending order, so we want the inverse of
+ // the normal dominance relation.
+ if (DT->properlyDominates(A, B)) return true;
+ if (DT->properlyDominates(B, A)) return false;
+ llvm_unreachable("expected total dominance order!");
+ });
+#ifdef ASSERT
+ for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
+ assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
+ }
+#endif
+
+ auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ auto *OldCond = BI->getCondition();
+ auto *NewCond = ConstantInt::get(OldCond->getType(),
+ IsTaken ? ExitIfTrue : !ExitIfTrue);
+ BI->setCondition(NewCond);
+ if (OldCond->use_empty())
+ DeadInsts.push_back(OldCond);
+ };
+ bool Changed = false;
+ SmallSet<const SCEV*, 8> DominatingExitCounts;
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
+
// If we know we'd exit on the first iteration, rewrite the exit to
// reflect this. This does not imply the loop must exit through this
// exit; there may be an earlier one taken on the first iteration.
// TODO: Given we know the backedge can't be taken, we should go ahead
// and break it. Or at least, kill all the header phis and simplify.
if (ExitCount->isZero()) {
- bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
- auto *OldCond = BI->getCondition();
- auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
- ConstantInt::getFalse(OldCond->getType());
- BI->setCondition(NewCond);
- if (OldCond->use_empty())
- DeadInsts.push_back(OldCond);
+ FoldExit(ExitingBB, true);
Changed = true;
continue;
}
- // If we end up with a pointer exit count, bail.
+ // If we end up with a pointer exit count, bail. Note that we can end up
+ // with a pointer exit count for one exiting block, and not for another in
+ // the same loop.
if (!ExitCount->getType()->isIntegerTy() ||
!MaxExitCount->getType()->isIntegerTy())
- return false;
+ continue;
Type *WiderType =
SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
@@ -2700,35 +2777,198 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L) {
assert(MaxExitCount->getType() == ExitCount->getType());
// Can we prove that some other exit must be taken strictly before this
- // one? TODO: handle cases where ule is known, and equality is covered
- // by a dominating exit
+ // one?
if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
MaxExitCount, ExitCount)) {
- bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
- auto *OldCond = BI->getCondition();
- auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
- ConstantInt::getTrue(OldCond->getType());
- BI->setCondition(NewCond);
- if (OldCond->use_empty())
- DeadInsts.push_back(OldCond);
+ FoldExit(ExitingBB, false);
Changed = true;
continue;
}
- // TODO: If we can prove that the exiting iteration is equal to the exit
- // count for this exit and that no previous exit oppurtunities exist within
- // the loop, then we can discharge all other exits. (May fall out of
- // previous TODO.)
-
- // TODO: If we can't prove any relation between our exit count and the
- // loops exit count, but taking this exit doesn't require actually running
- // the loop (i.e. no side effects, no computed values used in exit), then
- // we can replace the exit test with a loop invariant test which exits on
- // the first iteration.
+ // As we run, keep track of which exit counts we've encountered. If we
+ // find a duplicate, we've found an exit which would have exited on the
+ // exiting iteration, but (from the visit order) strictly follows another
+ // which does the same and is thus dead.
+ if (!DominatingExitCounts.insert(ExitCount).second) {
+ FoldExit(ExitingBB, false);
+ Changed = true;
+ continue;
+ }
+
+ // TODO: There might be another oppurtunity to leverage SCEV's reasoning
+ // here. If we kept track of the min of dominanting exits so far, we could
+ // discharge exits with EC >= MDEC. This is less powerful than the existing
+ // transform (since later exits aren't considered), but potentially more
+ // powerful for any case where SCEV can prove a >=u b, but neither a == b
+ // or a >u b. Such a case is not currently known.
}
return Changed;
}
+bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ bool Changed = false;
+
+ // Finally, see if we can rewrite our exit conditions into a loop invariant
+ // form. If we have a read-only loop, and we can tell that we must exit down
+ // a path which does not need any of the values computed within the loop, we
+ // can rewrite the loop to exit on the first iteration. Note that this
+ // doesn't either a) tell us the loop exits on the first iteration (unless
+ // *all* exits are predicateable) or b) tell us *which* exit might be taken.
+ // This transformation looks a lot like a restricted form of dead loop
+ // elimination, but restricted to read-only loops and without neccesssarily
+ // needing to kill the loop entirely.
+ if (!LoopPredication)
+ return Changed;
+
+ if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+ return Changed;
+
+ // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
+ // through *explicit* control flow. We have to eliminate the possibility of
+ // implicit exits (see below) before we know it's truly exact.
+ const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(ExactBTC) ||
+ !SE->isLoopInvariant(ExactBTC, L) ||
+ !isSafeToExpand(ExactBTC, *SE))
+ return Changed;
+
+ auto BadExit = [&](BasicBlock *ExitingBB) {
+ // If our exiting block exits multiple loops, we can only rewrite the
+ // innermost one. Otherwise, we're changing how many times the innermost
+ // loop runs before it exits.
+ if (LI->getLoopFor(ExitingBB) != L)
+ return true;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ return true;
+
+ // If already constant, nothing to do.
+ if (isa<Constant>(BI->getCondition()))
+ return true;
+
+ // If the exit block has phis, we need to be able to compute the values
+ // within the loop which contains them. This assumes trivially lcssa phis
+ // have already been removed; TODO: generalize
+ BasicBlock *ExitBlock =
+ BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
+ if (!ExitBlock->phis().empty())
+ return true;
+
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
+ if (!SE->isLoopInvariant(ExitCount, L) ||
+ !isSafeToExpand(ExitCount, *SE))
+ return true;
+
+ return false;
+ };
+
+ // If we have any exits which can't be predicated themselves, than we can't
+ // predicate any exit which isn't guaranteed to execute before it. Consider
+ // two exits (a) and (b) which would both exit on the same iteration. If we
+ // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
+ // we could convert a loop from exiting through (a) to one exiting through
+ // (b). Note that this problem exists only for exits with the same exit
+ // count, and we could be more aggressive when exit counts are known inequal.
+ llvm::sort(ExitingBlocks,
+ [&](BasicBlock *A, BasicBlock *B) {
+ // std::sort sorts in ascending order, so we want the inverse of
+ // the normal dominance relation, plus a tie breaker for blocks
+ // unordered by dominance.
+ if (DT->properlyDominates(A, B)) return true;
+ if (DT->properlyDominates(B, A)) return false;
+ return A->getName() < B->getName();
+ });
+ // Check to see if our exit blocks are a total order (i.e. a linear chain of
+ // exits before the backedge). If they aren't, reasoning about reachability
+ // is complicated and we choose not to for now.
+ for (unsigned i = 1; i < ExitingBlocks.size(); i++)
+ if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
+ return Changed;
+
+ // Given our sorted total order, we know that exit[j] must be evaluated
+ // after all exit[i] such j > i.
+ for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
+ if (BadExit(ExitingBlocks[i])) {
+ ExitingBlocks.resize(i);
+ break;
+ }
+
+ if (ExitingBlocks.empty())
+ return Changed;
+
+ // We rely on not being able to reach an exiting block on a later iteration
+ // then it's statically compute exit count. The implementaton of
+ // getExitCount currently has this invariant, but assert it here so that
+ // breakage is obvious if this ever changes..
+ assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
+ return DT->dominates(ExitingBB, L->getLoopLatch());
+ }));
+
+ // At this point, ExitingBlocks consists of only those blocks which are
+ // predicatable. Given that, we know we have at least one exit we can
+ // predicate if the loop is doesn't have side effects and doesn't have any
+ // implicit exits (because then our exact BTC isn't actually exact).
+ // @Reviewers - As structured, this is O(I^2) for loop nests. Any
+ // suggestions on how to improve this? I can obviously bail out for outer
+ // loops, but that seems less than ideal. MemorySSA can find memory writes,
+ // is that enough for *all* side effects?
+ for (BasicBlock *BB : L->blocks())
+ for (auto &I : *BB)
+ // TODO:isGuaranteedToTransfer
+ if (I.mayHaveSideEffects() || I.mayThrow())
+ return Changed;
+
+ // Finally, do the actual predication for all predicatable blocks. A couple
+ // of notes here:
+ // 1) We don't bother to constant fold dominated exits with identical exit
+ // counts; that's simply a form of CSE/equality propagation and we leave
+ // it for dedicated passes.
+ // 2) We insert the comparison at the branch. Hoisting introduces additional
+ // legality constraints and we leave that to dedicated logic. We want to
+ // predicate even if we can't insert a loop invariant expression as
+ // peeling or unrolling will likely reduce the cost of the otherwise loop
+ // varying check.
+ Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
+ IRBuilder<> B(L->getLoopPreheader()->getTerminator());
+ Value *ExactBTCV = nullptr; //lazy generated if needed
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+
+ auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ Value *NewCond;
+ if (ExitCount == ExactBTC) {
+ NewCond = L->contains(BI->getSuccessor(0)) ?
+ B.getFalse() : B.getTrue();
+ } else {
+ Value *ECV = Rewriter.expandCodeFor(ExitCount);
+ if (!ExactBTCV)
+ ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
+ Value *RHS = ExactBTCV;
+ if (ECV->getType() != RHS->getType()) {
+ Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
+ ECV = B.CreateZExt(ECV, WiderTy);
+ RHS = B.CreateZExt(RHS, WiderTy);
+ }
+ auto Pred = L->contains(BI->getSuccessor(0)) ?
+ ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+ NewCond = B.CreateICmp(Pred, ECV, RHS);
+ }
+ Value *OldCond = BI->getCondition();
+ BI->setCondition(NewCond);
+ if (OldCond->use_empty())
+ DeadInsts.push_back(OldCond);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
//===----------------------------------------------------------------------===//
// IndVarSimplify driver. Manage several subpasses of IV simplification.
//===----------------------------------------------------------------------===//
@@ -2755,7 +2995,10 @@ bool IndVarSimplify::run(Loop *L) {
// transform them to use integer recurrences.
Changed |= rewriteNonIntegerIVs(L);
+#ifndef NDEBUG
+ // Used below for a consistency check only
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+#endif
// Create a rewriter object which we'll use to transform the code with.
SCEVExpander Rewriter(*SE, DL, "indvars");
@@ -2772,20 +3015,22 @@ bool IndVarSimplify::run(Loop *L) {
Rewriter.disableCanonicalMode();
Changed |= simplifyAndExtend(L, Rewriter, LI);
- // Check to see if this loop has a computable loop-invariant execution count.
- // If so, this means that we can compute the final value of any expressions
+ // Check to see if we can compute the final value of any expressions
// that are recurrent in the loop, and substitute the exit values from the
- // loop into any instructions outside of the loop that use the final values of
- // the current expressions.
- //
- if (ReplaceExitValue != NeverRepl &&
- !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ // loop into any instructions outside of the loop that use the final values
+ // of the current expressions.
+ if (ReplaceExitValue != NeverRepl)
Changed |= rewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV cycles.
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
- Changed |= optimizeLoopExits(L);
+ // Try to eliminate loop exits based on analyzeable exit counts
+ Changed |= optimizeLoopExits(L, Rewriter);
+
+ // Try to form loop invariant tests for loop exits by changing how many
+ // iterations of the loop run when that is unobservable.
+ Changed |= predicateLoopExits(L, Rewriter);
// If we have a trip count expression, rewrite the loop's exit condition
// using it.
@@ -2825,7 +3070,7 @@ bool IndVarSimplify::run(Loop *L) {
// that our definition of "high cost" is not exactly principled.
if (Rewriter.isHighCostExpansion(ExitCount, L))
continue;
-
+
// Check preconditions for proper SCEVExpander operation. SCEV does not
// express SCEVExpander's dependencies, such as LoopSimplify. Instead
// any pass that uses the SCEVExpander must do it. This does not work
@@ -2924,7 +3169,7 @@ struct IndVarSimplifyLegacyPass : public LoopPass {
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 5f0e2001c73d..e7e73a132fbe 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -141,6 +141,8 @@ using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
/// InferAddressSpaces
class InferAddressSpaces : public FunctionPass {
+ const TargetTransformInfo *TTI;
+
/// Target specific address space which uses of should be replaced if
/// possible.
unsigned FlatAddrSpace;
@@ -264,17 +266,6 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
Module *M = II->getParent()->getParent()->getParent();
switch (II->getIntrinsicID()) {
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
- case Intrinsic::amdgcn_ds_fadd:
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax: {
- const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4));
- if (!IsVolatile || !IsVolatile->isZero())
- return false;
-
- LLVM_FALLTHROUGH;
- }
case Intrinsic::objectsize: {
Type *DestTy = II->getType();
Type *SrcTy = NewV->getType();
@@ -285,25 +276,27 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
return true;
}
default:
- return false;
+ return TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
}
}
-// TODO: Move logic to TTI?
void InferAddressSpaces::collectRewritableIntrinsicOperands(
IntrinsicInst *II, std::vector<std::pair<Value *, bool>> &PostorderStack,
DenseSet<Value *> &Visited) const {
- switch (II->getIntrinsicID()) {
+ auto IID = II->getIntrinsicID();
+ switch (IID) {
case Intrinsic::objectsize:
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
- case Intrinsic::amdgcn_ds_fadd:
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax:
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;
default:
+ SmallVector<int, 2> OpIndexes;
+ if (TTI->collectFlatAddressOperands(OpIndexes, IID)) {
+ for (int Idx : OpIndexes) {
+ appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(Idx),
+ PostorderStack, Visited);
+ }
+ }
break;
}
}
@@ -631,11 +624,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
if (FlatAddrSpace == UninitializedAddressSpace) {
- FlatAddrSpace = TTI.getFlatAddressSpace();
+ FlatAddrSpace = TTI->getFlatAddressSpace();
if (FlatAddrSpace == UninitializedAddressSpace)
return false;
}
@@ -650,7 +642,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
// Changes the address spaces of the flat address expressions who are inferred
// to point to a specific address space.
- return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F);
+ return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, &F);
}
// Constants need to be tracked through RAUW to handle cases with nested
diff --git a/lib/Transforms/Scalar/InstSimplifyPass.cpp b/lib/Transforms/Scalar/InstSimplifyPass.cpp
index 6616364ab203..ec28f790f252 100644
--- a/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -33,37 +33,39 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ,
bool Changed = false;
do {
- for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
- // Here be subtlety: the iterator must be incremented before the loop
- // body (not sure why), so a range-for loop won't work here.
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *I = &*BI++;
- // The first time through the loop ToSimplify is empty and we try to
- // simplify all instructions. On later iterations ToSimplify is not
+ for (BasicBlock &BB : F) {
+ // Unreachable code can take on strange forms that we are not prepared to
+ // handle. For example, an instruction may have itself as an operand.
+ if (!SQ.DT->isReachableFromEntry(&BB))
+ continue;
+
+ SmallVector<Instruction *, 8> DeadInstsInBB;
+ for (Instruction &I : BB) {
+ // The first time through the loop, ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations, ToSimplify is not
// empty and we only bother simplifying instructions that are in it.
- if (!ToSimplify->empty() && !ToSimplify->count(I))
+ if (!ToSimplify->empty() && !ToSimplify->count(&I))
continue;
- // Don't waste time simplifying unused instructions.
- if (!I->use_empty()) {
- if (Value *V = SimplifyInstruction(I, SQ, ORE)) {
+ // Don't waste time simplifying dead/unused instructions.
+ if (isInstructionTriviallyDead(&I)) {
+ DeadInstsInBB.push_back(&I);
+ Changed = true;
+ } else if (!I.use_empty()) {
+ if (Value *V = SimplifyInstruction(&I, SQ, ORE)) {
// Mark all uses for resimplification next time round the loop.
- for (User *U : I->users())
+ for (User *U : I.users())
Next->insert(cast<Instruction>(U));
- I->replaceAllUsesWith(V);
+ I.replaceAllUsesWith(V);
++NumSimplified;
Changed = true;
+ // A call can get simplified, but it may not be trivially dead.
+ if (isInstructionTriviallyDead(&I))
+ DeadInstsInBB.push_back(&I);
}
}
- if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) {
- // RecursivelyDeleteTriviallyDeadInstruction can remove more than one
- // instruction, so simply incrementing the iterator does not work.
- // When instructions get deleted re-iterate instead.
- BI = BB->begin();
- BE = BB->end();
- Changed = true;
- }
}
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInstsInBB, SQ.TLI);
}
// Place the list of instructions to simplify on the next loop iteration
@@ -90,7 +92,7 @@ struct InstSimplifyLegacyPass : public FunctionPass {
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
- /// runOnFunction - Remove instructions that simplify.
+ /// Remove instructions that simplify.
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
@@ -98,7 +100,7 @@ struct InstSimplifyLegacyPass : public FunctionPass {
const DominatorTree *DT =
&getAnalysis<DominatorTreeWrapperPass>().getDomTree();
const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
AssumptionCache *AC =
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
OptimizationRemarkEmitter *ORE =
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index b86bf2fefbe5..0cf00baaa24a 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -224,13 +224,21 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
auto *PredBB = IncomingBB;
auto *SuccBB = PhiBB;
+ SmallPtrSet<BasicBlock *, 16> Visited;
while (true) {
BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
if (PredBr && PredBr->isConditional())
return {PredBB, SuccBB};
+ Visited.insert(PredBB);
auto *SinglePredBB = PredBB->getSinglePredecessor();
if (!SinglePredBB)
return {nullptr, nullptr};
+
+ // Stop searching when SinglePredBB has been visited. It means we see
+ // an unreachable loop.
+ if (Visited.count(SinglePredBB))
+ return {nullptr, nullptr};
+
SuccBB = PredBB;
PredBB = SinglePredBB;
}
@@ -253,7 +261,9 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
return;
BasicBlock *PredBB = PredOutEdge.first;
- BranchInst *PredBr = cast<BranchInst>(PredBB->getTerminator());
+ BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
+ if (!PredBr)
+ return;
uint64_t PredTrueWeight, PredFalseWeight;
// FIXME: We currently only set the profile data when it is missing.
@@ -286,7 +296,7 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
bool JumpThreading::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
// Get DT analysis before LVI. When LVI is initialized it conditionally adds
// DT if it's available.
auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1461,7 +1471,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
"Can't handle critical edge here!");
LoadInst *NewVal = new LoadInst(
LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
- LoadI->getName() + ".pr", false, LoadI->getAlignment(),
+ LoadI->getName() + ".pr", false, MaybeAlign(LoadI->getAlignment()),
LoadI->getOrdering(), LoadI->getSyncScopeID(),
UnavailablePred->getTerminator());
NewVal->setDebugLoc(LoadI->getDebugLoc());
@@ -2423,7 +2433,7 @@ void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
// |-----
// v
// BB
- BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
+ BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
BB->getParent(), BB);
// Move the unconditional branch to NewBB.
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index d9dda4cef2d2..6ce4831a7359 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -220,7 +220,8 @@ struct LegacyLICMPass : public LoopPass {
&getAnalysis<AAResultsWrapperPass>().getAAResults(),
&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
&getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent()),
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent()),
SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
@@ -294,7 +295,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
- if (EnableMSSALoopDependency)
+ if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
@@ -330,6 +331,12 @@ bool LoopInvariantCodeMotion::runOnLoop(
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
+ // If this loop has metadata indicating that LICM is not to be performed then
+ // just exit.
+ if (hasDisableLICMTransformsHint(L)) {
+ return false;
+ }
+
std::unique_ptr<AliasSetTracker> CurAST;
std::unique_ptr<MemorySSAUpdater> MSSAU;
bool NoOfMemAccTooLarge = false;
@@ -340,7 +347,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
CurAST = collectAliasInfoForLoop(L, LI, AA);
} else {
LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
- MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
unsigned AccessCapCount = 0;
for (auto *BB : L->getBlocks()) {
@@ -956,7 +963,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// Now that we've finished hoisting make sure that LI and DT are still
// valid.
-#ifndef NDEBUG
+#ifdef EXPENSIVE_CHECKS
if (Changed) {
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
"Dominator tree verification failed");
@@ -1026,7 +1033,8 @@ namespace {
bool isHoistableAndSinkableInst(Instruction &I) {
// Only these instructions are hoistable/sinkable.
return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
- isa<FenceInst>(I) || isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+ isa<FenceInst>(I) || isa<CastInst>(I) ||
+ isa<UnaryOperator>(I) || isa<BinaryOperator>(I) ||
isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
@@ -1092,7 +1100,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// in the same alias set as something that ends up being modified.
if (AA->pointsToConstantMemory(LI->getOperand(0)))
return true;
- if (LI->getMetadata(LLVMContext::MD_invariant_load))
+ if (LI->hasMetadata(LLVMContext::MD_invariant_load))
return true;
if (LI->isAtomic() && !TargetExecutesOncePerLoop)
@@ -1240,12 +1248,22 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// FIXME: More precise: no Uses that alias SI.
if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
return false;
- } else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+ } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
(void)LI; // Silence warning.
assert(!LI->isUnordered() && "Expected unordered load");
return false;
}
+ // Any call, while it may not be clobbering SI, it may be a use.
+ if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
+ // Check if the call may read from the memory locattion written
+ // to by SI. Check CI's attributes and arguments; the number of
+ // such checks performed is limited above by NoOfMemAccTooLarge.
+ ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
+ if (isModOrRefSet(MRI))
+ return false;
+ }
+ }
}
auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
@@ -1375,8 +1393,7 @@ static Instruction *CloneInstructionInExitBlock(
if (!I.getName().empty())
New->setName(I.getName() + ".le");
- MemoryAccess *OldMemAcc;
- if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
+ if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
// Create a new MemoryAccess and let MemorySSA set its defining access.
MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
New, nullptr, New->getParent(), MemorySSA::Beginning);
@@ -1385,7 +1402,7 @@ static Instruction *CloneInstructionInExitBlock(
MSSAU->insertDef(MemDef, /*RenameUses=*/true);
else {
auto *MemUse = cast<MemoryUse>(NewMemAcc);
- MSSAU->insertUse(MemUse);
+ MSSAU->insertUse(MemUse, /*RenameUses=*/true);
}
}
}
@@ -1783,7 +1800,7 @@ public:
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
if (UnorderedAtomic)
NewSI->setOrdering(AtomicOrdering::Unordered);
- NewSI->setAlignment(Alignment);
+ NewSI->setAlignment(MaybeAlign(Alignment));
NewSI->setDebugLoc(DL);
if (AATags)
NewSI->setAAMetadata(AATags);
@@ -2016,7 +2033,8 @@ bool llvm::promoteLoopAccessesToScalars(
if (!DereferenceableInPH) {
DereferenceableInPH = isDereferenceableAndAlignedPointer(
Store->getPointerOperand(), Store->getValueOperand()->getType(),
- Store->getAlignment(), MDL, Preheader->getTerminator(), DT);
+ MaybeAlign(Store->getAlignment()), MDL,
+ Preheader->getTerminator(), DT);
}
} else
return false; // Not a load or store.
@@ -2101,20 +2119,21 @@ bool llvm::promoteLoopAccessesToScalars(
SomePtr->getName() + ".promoted", Preheader->getTerminator());
if (SawUnorderedAtomic)
PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
- PreheaderLoad->setAlignment(Alignment);
+ PreheaderLoad->setAlignment(MaybeAlign(Alignment));
PreheaderLoad->setDebugLoc(DL);
if (AATags)
PreheaderLoad->setAAMetadata(AATags);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
- MemoryAccess *PreheaderLoadMemoryAccess;
if (MSSAU) {
- PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
+ MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
- MSSAU->insertUse(NewMemUse);
+ MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
@@ -2161,7 +2180,7 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
LoopToAliasSetMap.erase(MapI);
}
if (!CurAST)
- CurAST = make_unique<AliasSetTracker>(*AA);
+ CurAST = std::make_unique<AliasSetTracker>(*AA);
// Add everything from the sub loops that are no longer directly available.
for (Loop *InnerL : RecomputeLoops)
@@ -2180,7 +2199,7 @@ std::unique_ptr<AliasSetTracker>
LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
auto *MSSA = MSSAU->getMemorySSA();
- auto CurAST = make_unique<AliasSetTracker>(*AA, MSSA, L);
+ auto CurAST = std::make_unique<AliasSetTracker>(*AA, MSSA, L);
CurAST->addAllInstructionsInLoopUsingMSSA();
return CurAST;
}
diff --git a/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 1fcf1315a177..a972d6fa2fcd 100644
--- a/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -312,8 +312,8 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
IRBuilder<> Builder(MemI);
Module *M = BB->getParent()->getParent();
Type *I32 = Type::getInt32Ty(BB->getContext());
- Function *PrefetchFunc =
- Intrinsic::getDeclaration(M, Intrinsic::prefetch);
+ Function *PrefetchFunc = Intrinsic::getDeclaration(
+ M, Intrinsic::prefetch, PrefPtrValue->getType());
Builder.CreateCall(
PrefetchFunc,
{PrefPtrValue,
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 8371367e24e7..cee197cf8354 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -191,7 +191,7 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
- const SCEV *S = SE.getMaxBackedgeTakenCount(L);
+ const SCEV *S = SE.getConstantMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S)) {
LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount.\n");
return Changed ? LoopDeletionResult::Modified
diff --git a/lib/Transforms/Scalar/LoopFuse.cpp b/lib/Transforms/Scalar/LoopFuse.cpp
index 0bc2bcff2ae1..9f93c68e6128 100644
--- a/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/lib/Transforms/Scalar/LoopFuse.cpp
@@ -66,7 +66,7 @@ using namespace llvm;
#define DEBUG_TYPE "loop-fusion"
-STATISTIC(FuseCounter, "Count number of loop fusions performed");
+STATISTIC(FuseCounter, "Loops fused");
STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
STATISTIC(InvalidPreheader, "Loop has invalid preheader");
STATISTIC(InvalidHeader, "Loop has invalid header");
@@ -79,12 +79,15 @@ STATISTIC(MayThrowException, "Loop may throw an exception");
STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
-STATISTIC(InvalidTripCount,
- "Loop does not have invariant backedge taken count");
+STATISTIC(UnknownTripCount, "Loop has unknown trip count");
STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
-STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same");
-STATISTIC(NonAdjacent, "Candidates are not adjacent");
-STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader");
+STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
+STATISTIC(NonAdjacent, "Loops are not adjacent");
+STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader");
+STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
+STATISTIC(NonIdenticalGuards, "Candidates have different guards");
+STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block");
+STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block");
enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -110,6 +113,7 @@ static cl::opt<bool>
cl::Hidden, cl::init(false), cl::ZeroOrMore);
#endif
+namespace {
/// This class is used to represent a candidate for loop fusion. When it is
/// constructed, it checks the conditions for loop fusion to ensure that it
/// represents a valid candidate. It caches several parts of a loop that are
@@ -143,6 +147,8 @@ struct FusionCandidate {
SmallVector<Instruction *, 16> MemWrites;
/// Are all of the members of this fusion candidate still valid
bool Valid;
+ /// Guard branch of the loop, if it exists
+ BranchInst *GuardBranch;
/// Dominator and PostDominator trees are needed for the
/// FusionCandidateCompare function, required by FusionCandidateSet to
@@ -151,11 +157,20 @@ struct FusionCandidate {
const DominatorTree *DT;
const PostDominatorTree *PDT;
+ OptimizationRemarkEmitter &ORE;
+
FusionCandidate(Loop *L, const DominatorTree *DT,
- const PostDominatorTree *PDT)
+ const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
- Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) {
+ Latch(L->getLoopLatch()), L(L), Valid(true), GuardBranch(nullptr),
+ DT(DT), PDT(PDT), ORE(ORE) {
+
+ // TODO: This is temporary while we fuse both rotated and non-rotated
+ // loops. Once we switch to only fusing rotated loops, the initialization of
+ // GuardBranch can be moved into the initialization list above.
+ if (isRotated())
+ GuardBranch = L->getLoopGuardBranch();
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
@@ -163,28 +178,28 @@ struct FusionCandidate {
// found, invalidate this object and return.
for (BasicBlock *BB : L->blocks()) {
if (BB->hasAddressTaken()) {
- AddressTakenBB++;
invalidate();
+ reportInvalidCandidate(AddressTakenBB);
return;
}
for (Instruction &I : *BB) {
if (I.mayThrow()) {
- MayThrowException++;
invalidate();
+ reportInvalidCandidate(MayThrowException);
return;
}
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
if (SI->isVolatile()) {
- ContainsVolatileAccess++;
invalidate();
+ reportInvalidCandidate(ContainsVolatileAccess);
return;
}
}
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (LI->isVolatile()) {
- ContainsVolatileAccess++;
invalidate();
+ reportInvalidCandidate(ContainsVolatileAccess);
return;
}
}
@@ -214,19 +229,96 @@ struct FusionCandidate {
assert(Latch == L->getLoopLatch() && "Latch is out of sync");
}
+ /// Get the entry block for this fusion candidate.
+ ///
+ /// If this fusion candidate represents a guarded loop, the entry block is the
+ /// loop guard block. If it represents an unguarded loop, the entry block is
+ /// the preheader of the loop.
+ BasicBlock *getEntryBlock() const {
+ if (GuardBranch)
+ return GuardBranch->getParent();
+ else
+ return Preheader;
+ }
+
+ /// Given a guarded loop, get the successor of the guard that is not in the
+ /// loop.
+ ///
+ /// This method returns the successor of the loop guard that is not located
+ /// within the loop (i.e., the successor of the guard that is not the
+ /// preheader).
+ /// This method is only valid for guarded loops.
+ BasicBlock *getNonLoopBlock() const {
+ assert(GuardBranch && "Only valid on guarded loops.");
+ assert(GuardBranch->isConditional() &&
+ "Expecting guard to be a conditional branch.");
+ return (GuardBranch->getSuccessor(0) == Preheader)
+ ? GuardBranch->getSuccessor(1)
+ : GuardBranch->getSuccessor(0);
+ }
+
+ bool isRotated() const {
+ assert(L && "Expecting loop to be valid.");
+ assert(Latch && "Expecting latch to be valid.");
+ return L->isLoopExiting(Latch);
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const {
- dbgs() << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
+ dbgs() << "\tGuardBranch: "
+ << (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n"
+ << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
<< "\n"
<< "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"
<< "\tExitingBB: "
<< (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"
<< "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")
<< "\n"
- << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n";
+ << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n"
+ << "\tEntryBlock: "
+ << (getEntryBlock() ? getEntryBlock()->getName() : "nullptr")
+ << "\n";
}
#endif
+ /// Determine if a fusion candidate (representing a loop) is eligible for
+ /// fusion. Note that this only checks whether a single loop can be fused - it
+ /// does not check whether it is *legal* to fuse two loops together.
+ bool isEligibleForFusion(ScalarEvolution &SE) const {
+ if (!isValid()) {
+ LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n");
+ if (!Preheader)
+ ++InvalidPreheader;
+ if (!Header)
+ ++InvalidHeader;
+ if (!ExitingBlock)
+ ++InvalidExitingBlock;
+ if (!ExitBlock)
+ ++InvalidExitBlock;
+ if (!Latch)
+ ++InvalidLatch;
+ if (L->isInvalid())
+ ++InvalidLoop;
+
+ return false;
+ }
+
+ // Require ScalarEvolution to be able to determine a trip count.
+ if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
+ LLVM_DEBUG(dbgs() << "Loop " << L->getName()
+ << " trip count not computable!\n");
+ return reportInvalidCandidate(UnknownTripCount);
+ }
+
+ if (!L->isLoopSimplifyForm()) {
+ LLVM_DEBUG(dbgs() << "Loop " << L->getName()
+ << " is not in simplified form!\n");
+ return reportInvalidCandidate(NotSimplifiedForm);
+ }
+
+ return true;
+ }
+
private:
// This is only used internally for now, to clear the MemWrites and MemReads
// list and setting Valid to false. I can't envision other uses of this right
@@ -239,17 +331,18 @@ private:
MemReads.clear();
Valid = false;
}
-};
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
- const FusionCandidate &FC) {
- if (FC.isValid())
- OS << FC.Preheader->getName();
- else
- OS << "<Invalid>";
-
- return OS;
-}
+ bool reportInvalidCandidate(llvm::Statistic &Stat) const {
+ using namespace ore;
+ assert(L && Preheader && "Fusion candidate not initialized properly!");
+ ++Stat;
+ ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(),
+ L->getStartLoc(), Preheader)
+ << "[" << Preheader->getParent()->getName() << "]: "
+ << "Loop is not a candidate for fusion: " << Stat.getDesc());
+ return false;
+ }
+};
struct FusionCandidateCompare {
/// Comparison functor to sort two Control Flow Equivalent fusion candidates
@@ -260,21 +353,24 @@ struct FusionCandidateCompare {
const FusionCandidate &RHS) const {
const DominatorTree *DT = LHS.DT;
+ BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
+ BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
+
// Do not save PDT to local variable as it is only used in asserts and thus
// will trigger an unused variable warning if building without asserts.
assert(DT && LHS.PDT && "Expecting valid dominator tree");
// Do this compare first so if LHS == RHS, function returns false.
- if (DT->dominates(RHS.Preheader, LHS.Preheader)) {
+ if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
// RHS dominates LHS
// Verify LHS post-dominates RHS
- assert(LHS.PDT->dominates(LHS.Preheader, RHS.Preheader));
+ assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
return false;
}
- if (DT->dominates(LHS.Preheader, RHS.Preheader)) {
+ if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
// Verify RHS Postdominates LHS
- assert(LHS.PDT->dominates(RHS.Preheader, LHS.Preheader));
+ assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
return true;
}
@@ -286,7 +382,6 @@ struct FusionCandidateCompare {
}
};
-namespace {
using LoopVector = SmallVector<Loop *, 4>;
// Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
@@ -301,17 +396,26 @@ using LoopVector = SmallVector<Loop *, 4>;
// keeps the FusionCandidateSet sorted will also simplify the implementation.
using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
-} // namespace
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+#if !defined(NDEBUG)
+static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const FusionCandidate &FC) {
+ if (FC.isValid())
+ OS << FC.Preheader->getName();
+ else
+ OS << "<Invalid>";
+
+ return OS;
+}
+
+static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
const FusionCandidateSet &CandSet) {
- for (auto IT : CandSet)
- OS << IT << "\n";
+ for (const FusionCandidate &FC : CandSet)
+ OS << FC << '\n';
return OS;
}
-#if !defined(NDEBUG)
static void
printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
dbgs() << "Fusion Candidates: \n";
@@ -391,16 +495,6 @@ static void printLoopVector(const LoopVector &LV) {
}
#endif
-static void reportLoopFusion(const FusionCandidate &FC0,
- const FusionCandidate &FC1,
- OptimizationRemarkEmitter &ORE) {
- using namespace ore;
- ORE.emit(
- OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent())
- << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName()))
- << " with " << NV("Cand2", StringRef(FC1.Preheader->getName())));
-}
-
struct LoopFuser {
private:
// Sets of control flow equivalent fusion candidates for a given nest level.
@@ -497,53 +591,16 @@ private:
const FusionCandidate &FC1) const {
assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
- if (DT.dominates(FC0.Preheader, FC1.Preheader))
- return PDT.dominates(FC1.Preheader, FC0.Preheader);
+ BasicBlock *FC0EntryBlock = FC0.getEntryBlock();
+ BasicBlock *FC1EntryBlock = FC1.getEntryBlock();
- if (DT.dominates(FC1.Preheader, FC0.Preheader))
- return PDT.dominates(FC0.Preheader, FC1.Preheader);
+ if (DT.dominates(FC0EntryBlock, FC1EntryBlock))
+ return PDT.dominates(FC1EntryBlock, FC0EntryBlock);
- return false;
- }
-
- /// Determine if a fusion candidate (representing a loop) is eligible for
- /// fusion. Note that this only checks whether a single loop can be fused - it
- /// does not check whether it is *legal* to fuse two loops together.
- bool eligibleForFusion(const FusionCandidate &FC) const {
- if (!FC.isValid()) {
- LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n");
- if (!FC.Preheader)
- InvalidPreheader++;
- if (!FC.Header)
- InvalidHeader++;
- if (!FC.ExitingBlock)
- InvalidExitingBlock++;
- if (!FC.ExitBlock)
- InvalidExitBlock++;
- if (!FC.Latch)
- InvalidLatch++;
- if (FC.L->isInvalid())
- InvalidLoop++;
+ if (DT.dominates(FC1EntryBlock, FC0EntryBlock))
+ return PDT.dominates(FC0EntryBlock, FC1EntryBlock);
- return false;
- }
-
- // Require ScalarEvolution to be able to determine a trip count.
- if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) {
- LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
- << " trip count not computable!\n");
- InvalidTripCount++;
- return false;
- }
-
- if (!FC.L->isLoopSimplifyForm()) {
- LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
- << " is not in simplified form!\n");
- NotSimplifiedForm++;
- return false;
- }
-
- return true;
+ return false;
}
/// Iterate over all loops in the given loop set and identify the loops that
@@ -551,8 +608,8 @@ private:
/// Flow Equivalent sets, sorted by dominance.
void collectFusionCandidates(const LoopVector &LV) {
for (Loop *L : LV) {
- FusionCandidate CurrCand(L, &DT, &PDT);
- if (!eligibleForFusion(CurrCand))
+ FusionCandidate CurrCand(L, &DT, &PDT, ORE);
+ if (!CurrCand.isEligibleForFusion(SE))
continue;
// Go through each list in FusionCandidates and determine if L is control
@@ -664,31 +721,64 @@ private:
if (!identicalTripCounts(*FC0, *FC1)) {
LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
"counts. Not fusing.\n");
- NonEqualTripCount++;
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEqualTripCount);
continue;
}
if (!isAdjacent(*FC0, *FC1)) {
LLVM_DEBUG(dbgs()
<< "Fusion candidates are not adjacent. Not fusing.\n");
- NonAdjacent++;
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
continue;
}
- // For now we skip fusing if the second candidate has any instructions
- // in the preheader. This is done because we currently do not have the
- // safety checks to determine if it is save to move the preheader of
- // the second candidate past the body of the first candidate. Once
- // these checks are added, this condition can be removed.
+ // Ensure that FC0 and FC1 have identical guards.
+ // If one (or both) are not guarded, this check is not necessary.
+ if (FC0->GuardBranch && FC1->GuardBranch &&
+ !haveIdenticalGuards(*FC0, *FC1)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
+ "guards. Not Fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonIdenticalGuards);
+ continue;
+ }
+
+ // The following three checks look for empty blocks in FC0 and FC1. If
+ // any of these blocks are non-empty, we do not fuse. This is done
+ // because we currently do not have the safety checks to determine if
+ // it is safe to move the blocks past other blocks in the loop. Once
+ // these checks are added, these conditions can be relaxed.
if (!isEmptyPreheader(*FC1)) {
LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
"preheader. Not fusing.\n");
- NonEmptyPreheader++;
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEmptyPreheader);
+ continue;
+ }
+
+ if (FC0->GuardBranch && !isEmptyExitBlock(*FC0)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty exit "
+ "block. Not fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEmptyExitBlock);
+ continue;
+ }
+
+ if (FC1->GuardBranch && !isEmptyGuardBlock(*FC1)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty guard "
+ "block. Not fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEmptyGuardBlock);
continue;
}
+ // Check the dependencies across the loops and do not fuse if it would
+ // violate them.
if (!dependencesAllowFusion(*FC0, *FC1)) {
LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ InvalidDependencies);
continue;
}
@@ -696,9 +786,11 @@ private:
LLVM_DEBUG(dbgs()
<< "\tFusion appears to be "
<< (BeneficialToFuse ? "" : "un") << "profitable!\n");
- if (!BeneficialToFuse)
+ if (!BeneficialToFuse) {
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ FusionNotBeneficial);
continue;
-
+ }
// All analysis has completed and has determined that fusion is legal
// and profitable. At this point, start transforming the code and
// perform fusion.
@@ -710,15 +802,14 @@ private:
// Note this needs to be done *before* performFusion because
// performFusion will change the original loops, making it not
// possible to identify them after fusion is complete.
- reportLoopFusion(*FC0, *FC1, ORE);
+ reportLoopFusion<OptimizationRemark>(*FC0, *FC1, FuseCounter);
- FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT);
+ FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE);
FusedCand.verify();
- assert(eligibleForFusion(FusedCand) &&
+ assert(FusedCand.isEligibleForFusion(SE) &&
"Fused candidate should be eligible for fusion!");
// Notify the loop-depth-tree that these loops are not valid objects
- // anymore.
LDT.removeLoop(FC1->L);
CandidateSet.erase(FC0);
@@ -889,7 +980,7 @@ private:
LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1
<< "\n");
assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth());
- assert(DT.dominates(FC0.Preheader, FC1.Preheader));
+ assert(DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock()));
for (Instruction *WriteL0 : FC0.MemWrites) {
for (Instruction *WriteL1 : FC1.MemWrites)
@@ -939,18 +1030,89 @@ private:
return true;
}
- /// Determine if the exit block of \p FC0 is the preheader of \p FC1. In this
- /// case, there is no code in between the two fusion candidates, thus making
- /// them adjacent.
+ /// Determine if two fusion candidates are adjacent in the CFG.
+ ///
+ /// This method will determine if there are additional basic blocks in the CFG
+ /// between the exit of \p FC0 and the entry of \p FC1.
+ /// If the two candidates are guarded loops, then it checks whether the
+ /// non-loop successor of the \p FC0 guard branch is the entry block of \p
+ /// FC1. If not, then the loops are not adjacent. If the two candidates are
+ /// not guarded loops, then it checks whether the exit block of \p FC0 is the
+ /// preheader of \p FC1.
bool isAdjacent(const FusionCandidate &FC0,
const FusionCandidate &FC1) const {
- return FC0.ExitBlock == FC1.Preheader;
+ // If the successor of the guard branch is FC1, then the loops are adjacent
+ if (FC0.GuardBranch)
+ return FC0.getNonLoopBlock() == FC1.getEntryBlock();
+ else
+ return FC0.ExitBlock == FC1.getEntryBlock();
+ }
+
+ /// Determine if two fusion candidates have identical guards
+ ///
+ /// This method will determine if two fusion candidates have the same guards.
+ /// The guards are considered the same if:
+ /// 1. The instructions to compute the condition used in the compare are
+ /// identical.
+ /// 2. The successors of the guard have the same flow into/around the loop.
+ /// If the compare instructions are identical, then the first successor of the
+ /// guard must go to the same place (either the preheader of the loop or the
+ /// NonLoopBlock). In other words, the the first successor of both loops must
+ /// both go into the loop (i.e., the preheader) or go around the loop (i.e.,
+ /// the NonLoopBlock). The same must be true for the second successor.
+ bool haveIdenticalGuards(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+ assert(FC0.GuardBranch && FC1.GuardBranch &&
+ "Expecting FC0 and FC1 to be guarded loops.");
+
+ if (auto FC0CmpInst =
+ dyn_cast<Instruction>(FC0.GuardBranch->getCondition()))
+ if (auto FC1CmpInst =
+ dyn_cast<Instruction>(FC1.GuardBranch->getCondition()))
+ if (!FC0CmpInst->isIdenticalTo(FC1CmpInst))
+ return false;
+
+ // The compare instructions are identical.
+ // Now make sure the successor of the guards have the same flow into/around
+ // the loop
+ if (FC0.GuardBranch->getSuccessor(0) == FC0.Preheader)
+ return (FC1.GuardBranch->getSuccessor(0) == FC1.Preheader);
+ else
+ return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
+ }
+
+ /// Check that the guard for \p FC *only* contains the cmp/branch for the
+ /// guard.
+ /// Once we are able to handle intervening code, any code in the guard block
+ /// for FC1 will need to be treated as intervening code and checked whether
+ /// it can safely move around the loops.
+ bool isEmptyGuardBlock(const FusionCandidate &FC) const {
+ assert(FC.GuardBranch && "Expecting a fusion candidate with guard branch.");
+ if (auto *CmpInst = dyn_cast<Instruction>(FC.GuardBranch->getCondition())) {
+ auto *GuardBlock = FC.GuardBranch->getParent();
+ // If the generation of the cmp value is in GuardBlock, then the size of
+ // the guard block should be 2 (cmp + branch). If the generation of the
+ // cmp value is in a different block, then the size of the guard block
+ // should only be 1.
+ if (CmpInst->getParent() == GuardBlock)
+ return GuardBlock->size() == 2;
+ else
+ return GuardBlock->size() == 1;
+ }
+
+ return false;
}
bool isEmptyPreheader(const FusionCandidate &FC) const {
+ assert(FC.Preheader && "Expecting a valid preheader");
return FC.Preheader->size() == 1;
}
+ bool isEmptyExitBlock(const FusionCandidate &FC) const {
+ assert(FC.ExitBlock && "Expecting a valid exit block");
+ return FC.ExitBlock->size() == 1;
+ }
+
/// Fuse two fusion candidates, creating a new fused loop.
///
/// This method contains the mechanics of fusing two loops, represented by \p
@@ -987,6 +1149,12 @@ private:
LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
+ // Fusing guarded loops is handled slightly differently than non-guarded
+ // loops and has been broken out into a separate method instead of trying to
+ // intersperse the logic within a single method.
+ if (FC0.GuardBranch)
+ return fuseGuardedLoops(FC0, FC1);
+
assert(FC1.Preheader == FC0.ExitBlock);
assert(FC1.Preheader->size() == 1 &&
FC1.Preheader->getSingleSuccessor() == FC1.Header);
@@ -1131,7 +1299,258 @@ private:
SE.verify();
#endif
- FuseCounter++;
+ LLVM_DEBUG(dbgs() << "Fusion done:\n");
+
+ return FC0.L;
+ }
+
+ /// Report details on loop fusion opportunities.
+ ///
+ /// This template function can be used to report both successful and missed
+ /// loop fusion opportunities, based on the RemarkKind. The RemarkKind should
+ /// be one of:
+ /// - OptimizationRemarkMissed to report when loop fusion is unsuccessful
+ /// given two valid fusion candidates.
+ /// - OptimizationRemark to report successful fusion of two fusion
+ /// candidates.
+ /// The remarks will be printed using the form:
+ /// <path/filename>:<line number>:<column number>: [<function name>]:
+ /// <Cand1 Preheader> and <Cand2 Preheader>: <Stat Description>
+ template <typename RemarkKind>
+ void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
+ llvm::Statistic &Stat) {
+ assert(FC0.Preheader && FC1.Preheader &&
+ "Expecting valid fusion candidates");
+ using namespace ore;
+ ++Stat;
+ ORE.emit(RemarkKind(DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(),
+ FC0.Preheader)
+ << "[" << FC0.Preheader->getParent()->getName()
+ << "]: " << NV("Cand1", StringRef(FC0.Preheader->getName()))
+ << " and " << NV("Cand2", StringRef(FC1.Preheader->getName()))
+ << ": " << Stat.getDesc());
+ }
+
+ /// Fuse two guarded fusion candidates, creating a new fused loop.
+ ///
+ /// Fusing guarded loops is handled much the same way as fusing non-guarded
+ /// loops. The rewiring of the CFG is slightly different though, because of
+ /// the presence of the guards around the loops and the exit blocks after the
+ /// loop body. As such, the new loop is rewired as follows:
+ /// 1. Keep the guard branch from FC0 and use the non-loop block target
+ /// from the FC1 guard branch.
+ /// 2. Remove the exit block from FC0 (this exit block should be empty
+ /// right now).
+ /// 3. Remove the guard branch for FC1
+ /// 4. Remove the preheader for FC1.
+ /// The exit block successor for the latch of FC0 is updated to be the header
+ /// of FC1 and the non-exit block successor of the latch of FC1 is updated to
+ /// be the header of FC0, thus creating the fused loop.
+ Loop *fuseGuardedLoops(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) {
+ assert(FC0.GuardBranch && FC1.GuardBranch && "Expecting guarded loops");
+
+ BasicBlock *FC0GuardBlock = FC0.GuardBranch->getParent();
+ BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent();
+ BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
+ BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
+
+ assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent");
+
+ SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
+
+ ////////////////////////////////////////////////////////////////////////////
+ // Update the Loop Guard
+ ////////////////////////////////////////////////////////////////////////////
+ // The guard for FC0 is updated to guard both FC0 and FC1. This is done by
+ // changing the NonLoopGuardBlock for FC0 to the NonLoopGuardBlock for FC1.
+ // Thus, one path from the guard goes to the preheader for FC0 (and thus
+ // executes the new fused loop) and the other path goes to the NonLoopBlock
+ // for FC1 (where FC1 guard would have gone if FC1 was not executed).
+ FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
+ FC0.ExitBlock->getTerminator()->replaceUsesOfWith(FC1GuardBlock,
+ FC1.Header);
+
+ // The guard of FC1 is not necessary anymore.
+ FC1.GuardBranch->eraseFromParent();
+ new UnreachableInst(FC1GuardBlock->getContext(), FC1GuardBlock);
+
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC1GuardBlock, FC1.Preheader));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC1GuardBlock, FC1NonLoopBlock));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0GuardBlock, FC1GuardBlock));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
+
+ assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) &&
+ "Expecting guard block to have no predecessors");
+ assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) &&
+ "Expecting guard block to have no successors");
+
+ // Remember the phi nodes originally in the header of FC0 in order to rewire
+ // them later. However, this is only necessary if the new loop carried
+ // values might not dominate the exiting branch. While we do not generally
+ // test if this is the case but simply insert intermediate phi nodes, we
+ // need to make sure these intermediate phi nodes have different
+ // predecessors. To this end, we filter the special case where the exiting
+ // block is the latch block of the first loop. Nothing needs to be done
+ // anyway as all loop carried values dominate the latch and thereby also the
+ // exiting branch.
+ // KB: This is no longer necessary because FC0.ExitingBlock == FC0.Latch
+ // (because the loops are rotated. Thus, nothing will ever be added to
+ // OriginalFC0PHIs.
+ SmallVector<PHINode *, 8> OriginalFC0PHIs;
+ if (FC0.ExitingBlock != FC0.Latch)
+ for (PHINode &PHI : FC0.Header->phis())
+ OriginalFC0PHIs.push_back(&PHI);
+
+ assert(OriginalFC0PHIs.empty() && "Expecting OriginalFC0PHIs to be empty!");
+
+ // Replace incoming blocks for header PHIs first.
+ FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
+ FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
+
+ // The old exiting block of the first loop (FC0) has to jump to the header
+ // of the second as we need to execute the code in the second header block
+ // regardless of the trip count. That is, if the trip count is 0, so the
+ // back edge is never taken, we still have to execute both loop headers,
+ // especially (but not only!) if the second is a do-while style loop.
+ // However, doing so might invalidate the phi nodes of the first loop as
+ // the new values do only need to dominate their latch and not the exiting
+ // predicate. To remedy this potential problem we always introduce phi
+ // nodes in the header of the second loop later that select the loop carried
+ // value, if the second header was reached through an old latch of the
+ // first, or undef otherwise. This is sound as exiting the first implies the
+ // second will exit too, __without__ taking the back-edge (their
+ // trip-counts are equal after all).
+ FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
+ FC1.Header);
+
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
+
+ // Remove FC0 Exit Block
+ // The exit block for FC0 is no longer needed since control will flow
+ // directly to the header of FC1. Since it is an empty block, it can be
+ // removed at this point.
+ // TODO: In the future, we can handle non-empty exit blocks my merging any
+ // instructions from FC0 exit block into FC1 exit block prior to removing
+ // the block.
+ assert(pred_begin(FC0.ExitBlock) == pred_end(FC0.ExitBlock) &&
+ "Expecting exit block to be empty");
+ FC0.ExitBlock->getTerminator()->eraseFromParent();
+ new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
+
+ // Remove FC1 Preheader
+ // The pre-header of L1 is not necessary anymore.
+ assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader));
+ FC1.Preheader->getTerminator()->eraseFromParent();
+ new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC1.Preheader, FC1.Header));
+
+ // Moves the phi nodes from the second to the first loops header block.
+ while (PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
+ if (SE.isSCEVable(PHI->getType()))
+ SE.forgetValue(PHI);
+ if (PHI->hasNUsesOrMore(1))
+ PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
+ else
+ PHI->eraseFromParent();
+ }
+
+ // Introduce new phi nodes in the second loop header to ensure
+ // exiting the first and jumping to the header of the second does not break
+ // the SSA property of the phis originally in the first loop. See also the
+ // comment above.
+ Instruction *L1HeaderIP = &FC1.Header->front();
+ for (PHINode *LCPHI : OriginalFC0PHIs) {
+ int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
+ assert(L1LatchBBIdx >= 0 &&
+ "Expected loop carried value to be rewired at this point!");
+
+ Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
+
+ PHINode *L1HeaderPHI = PHINode::Create(
+ LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
+ L1HeaderPHI->addIncoming(LCV, FC0.Latch);
+ L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
+ FC0.ExitingBlock);
+
+ LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
+ }
+
+ // Update the latches
+
+ // Replace latch terminator destinations.
+ FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
+ FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
+
+ // If FC0.Latch and FC0.ExitingBlock are the same then we have already
+ // performed the updates above.
+ if (FC0.Latch != FC0.ExitingBlock)
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.Latch, FC1.Header));
+
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
+ FC0.Latch, FC0.Header));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,
+ FC1.Latch, FC0.Header));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
+ FC1.Latch, FC1.Header));
+
+ // All done
+ // Apply the updates to the Dominator Tree and cleanup.
+
+ assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) &&
+ "FC1GuardBlock has successors!!");
+ assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) &&
+ "FC1GuardBlock has predecessors!!");
+
+ // Update DT/PDT
+ DTU.applyUpdates(TreeUpdates);
+
+ LI.removeBlock(FC1.Preheader);
+ DTU.deleteBB(FC1.Preheader);
+ DTU.deleteBB(FC0.ExitBlock);
+ DTU.flush();
+
+ // Is there a way to keep SE up-to-date so we don't need to forget the loops
+ // and rebuild the information in subsequent passes of fusion?
+ SE.forgetLoop(FC1.L);
+ SE.forgetLoop(FC0.L);
+
+ // Merge the loops.
+ SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
+ FC1.L->block_end());
+ for (BasicBlock *BB : Blocks) {
+ FC0.L->addBlockEntry(BB);
+ FC1.L->removeBlockFromLoop(BB);
+ if (LI.getLoopFor(BB) != FC1.L)
+ continue;
+ LI.changeLoopFor(BB, FC0.L);
+ }
+ while (!FC1.L->empty()) {
+ const auto &ChildLoopIt = FC1.L->begin();
+ Loop *ChildLoop = *ChildLoopIt;
+ FC1.L->removeChildLoop(ChildLoopIt);
+ FC0.L->addChildLoop(ChildLoop);
+ }
+
+ // Delete the now empty loop L1.
+ LI.erase(FC1.L);
+
+#ifndef NDEBUG
+ assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+ assert(PDT.verify());
+ LI.verify(DT);
+ SE.verify();
+#endif
LLVM_DEBUG(dbgs() << "Fusion done:\n");
@@ -1177,6 +1596,7 @@ struct LoopFuseLegacy : public FunctionPass {
return LF.fuseLoops(F);
}
};
+} // namespace
PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
auto &LI = AM.getResult<LoopAnalysis>(F);
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index e561494f19cf..dd477e800693 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -41,6 +41,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -77,16 +78,20 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -102,6 +107,7 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
+STATISTIC(NumBCmp, "Number of memcmp's formed from loop 2xload+eq-compare");
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
@@ -111,6 +117,26 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
namespace {
+// FIXME: reinventing the wheel much? Is there a cleaner solution?
+struct PMAbstraction {
+ virtual void markLoopAsDeleted(Loop *L) = 0;
+ virtual ~PMAbstraction() = default;
+};
+struct LegacyPMAbstraction : PMAbstraction {
+ LPPassManager &LPM;
+ LegacyPMAbstraction(LPPassManager &LPM) : LPM(LPM) {}
+ virtual ~LegacyPMAbstraction() = default;
+ void markLoopAsDeleted(Loop *L) override { LPM.markLoopAsDeleted(*L); }
+};
+struct NewPMAbstraction : PMAbstraction {
+ LPMUpdater &Updater;
+ NewPMAbstraction(LPMUpdater &Updater) : Updater(Updater) {}
+ virtual ~NewPMAbstraction() = default;
+ void markLoopAsDeleted(Loop *L) override {
+ Updater.markLoopAsDeleted(*L, L->getName());
+ }
+};
+
class LoopIdiomRecognize {
Loop *CurLoop = nullptr;
AliasAnalysis *AA;
@@ -120,6 +146,7 @@ class LoopIdiomRecognize {
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
const DataLayout *DL;
+ PMAbstraction &LoopDeleter;
OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;
@@ -128,9 +155,10 @@ public:
LoopInfo *LI, ScalarEvolution *SE,
TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const DataLayout *DL,
+ const DataLayout *DL, PMAbstraction &LoopDeleter,
OptimizationRemarkEmitter &ORE)
- : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
+ : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL),
+ LoopDeleter(LoopDeleter), ORE(ORE) {}
bool runOnLoop(Loop *L);
@@ -144,6 +172,8 @@ private:
bool HasMemset;
bool HasMemsetPattern;
bool HasMemcpy;
+ bool HasMemCmp;
+ bool HasBCmp;
/// Return code for isLegalStore()
enum LegalStoreKind {
@@ -186,6 +216,32 @@ private:
bool runOnNoncountableLoop();
+ struct CmpLoopStructure {
+ Value *BCmpValue, *LatchCmpValue;
+ BasicBlock *HeaderBrEqualBB, *HeaderBrUnequalBB;
+ BasicBlock *LatchBrFinishBB, *LatchBrContinueBB;
+ };
+ bool matchBCmpLoopStructure(CmpLoopStructure &CmpLoop) const;
+ struct CmpOfLoads {
+ ICmpInst::Predicate BCmpPred;
+ Value *LoadSrcA, *LoadSrcB;
+ Value *LoadA, *LoadB;
+ };
+ bool matchBCmpOfLoads(Value *BCmpValue, CmpOfLoads &CmpOfLoads) const;
+ bool recognizeBCmpLoopControlFlow(const CmpOfLoads &CmpOfLoads,
+ CmpLoopStructure &CmpLoop) const;
+ bool recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes, CmpOfLoads &CmpOfLoads,
+ const SCEV *&SrcA, const SCEV *&SrcB,
+ const SCEV *&Iterations) const;
+ bool detectBCmpIdiom(ICmpInst *&BCmpInst, CmpInst *&LatchCmpInst,
+ LoadInst *&LoadA, LoadInst *&LoadB, const SCEV *&SrcA,
+ const SCEV *&SrcB, const SCEV *&NBytes) const;
+ BasicBlock *transformBCmpControlFlow(ICmpInst *ComparedEqual);
+ void transformLoopToBCmp(ICmpInst *BCmpInst, CmpInst *LatchCmpInst,
+ LoadInst *LoadA, LoadInst *LoadB, const SCEV *SrcA,
+ const SCEV *SrcB, const SCEV *NBytes);
+ bool recognizeBCmp();
+
bool recognizePopcount();
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var);
@@ -217,18 +273,20 @@ public:
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent());
const TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent());
const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
+ LegacyPMAbstraction LoopDeleter(LPM);
// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
- LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
+ LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, LoopDeleter, ORE);
return LIR.runOnLoop(L);
}
@@ -247,7 +305,7 @@ char LoopIdiomRecognizeLegacyPass::ID = 0;
PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
+ LPMUpdater &Updater) {
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
const auto &FAM =
@@ -261,8 +319,9 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
"LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
"at a higher level");
+ NewPMAbstraction LoopDeleter(Updater);
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
- *ORE);
+ LoopDeleter, *ORE);
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();
@@ -299,7 +358,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
// Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();
- if (Name == "memset" || Name == "memcpy")
+ if (Name == "memset" || Name == "memcpy" || Name == "memcmp" ||
+ Name == "bcmp")
return false;
// Determine if code size heuristics need to be applied.
@@ -309,8 +369,10 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
HasMemset = TLI->has(LibFunc_memset);
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
HasMemcpy = TLI->has(LibFunc_memcpy);
+ HasMemCmp = TLI->has(LibFunc_memcmp);
+ HasBCmp = TLI->has(LibFunc_bcmp);
- if (HasMemset || HasMemsetPattern || HasMemcpy)
+ if (HasMemset || HasMemsetPattern || HasMemcpy || HasMemCmp || HasBCmp)
if (SE->hasLoopInvariantBackedgeTakenCount(L))
return runOnCountableLoop();
@@ -961,7 +1023,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
GlobalValue::PrivateLinkage,
PatternValue, ".memset_pattern");
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
- GV->setAlignment(16);
+ GV->setAlignment(Align(16));
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
}
@@ -1149,7 +1211,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
<< "] Noncountable Loop %"
<< CurLoop->getHeader()->getName() << "\n");
- return recognizePopcount() || recognizeAndInsertFFS();
+ return recognizeBCmp() || recognizePopcount() || recognizeAndInsertFFS();
}
/// Check if the given conditional branch is based on the comparison between
@@ -1823,3 +1885,811 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
// loop. The loop would otherwise not be deleted even if it becomes empty.
SE->forgetLoop(CurLoop);
}
+
+bool LoopIdiomRecognize::matchBCmpLoopStructure(
+ CmpLoopStructure &CmpLoop) const {
+ ICmpInst::Predicate BCmpPred;
+
+ // We are looking for the following basic layout:
+ // PreheaderBB: <preheader> ; preds = ???
+ // <...>
+ // br label %LoopHeaderBB
+ // LoopHeaderBB: <header,exiting> ; preds = %PreheaderBB,%LoopLatchBB
+ // <...>
+ // %BCmpValue = icmp <...>
+ // br i1 %BCmpValue, label %LoopLatchBB, label %Successor0
+ // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
+ // <...>
+ // %LatchCmpValue = <are we done, or do next iteration?>
+ // br i1 %LatchCmpValue, label %Successor1, label %LoopHeaderBB
+ // Successor0: <exit> ; preds = %LoopHeaderBB
+ // <...>
+ // Successor1: <exit> ; preds = %LoopLatchBB
+ // <...>
+ //
+ // Successor0 and Successor1 may or may not be the same basic block.
+
+ // Match basic frame-work of this supposedly-comparison loop.
+ using namespace PatternMatch;
+ if (!match(CurLoop->getHeader()->getTerminator(),
+ m_Br(m_CombineAnd(m_ICmp(BCmpPred, m_Value(), m_Value()),
+ m_Value(CmpLoop.BCmpValue)),
+ CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB)) ||
+ !match(CurLoop->getLoopLatch()->getTerminator(),
+ m_Br(m_CombineAnd(m_Cmp(), m_Value(CmpLoop.LatchCmpValue)),
+ CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB))) {
+ LLVM_DEBUG(dbgs() << "Basic control-flow layout unrecognized.\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Recognized basic control-flow layout.\n");
+ return true;
+}
+
+bool LoopIdiomRecognize::matchBCmpOfLoads(Value *BCmpValue,
+ CmpOfLoads &CmpOfLoads) const {
+ using namespace PatternMatch;
+ LLVM_DEBUG(dbgs() << "Analyzing header icmp " << *BCmpValue
+ << " as bcmp pattern.\n");
+
+ // Match bcmp-style loop header cmp. It must be an eq-icmp of loads. Example:
+ // %v0 = load <...>, <...>* %LoadSrcA
+ // %v1 = load <...>, <...>* %LoadSrcB
+ // %CmpLoop.BCmpValue = icmp eq <...> %v0, %v1
+ // There won't be any no-op bitcasts between load and icmp,
+ // they would have been transformed into a load of bitcast.
+ // FIXME: {b,mem}cmp() calls have the same semantics as icmp. Match them too.
+ if (!match(BCmpValue,
+ m_ICmp(CmpOfLoads.BCmpPred,
+ m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcA)),
+ m_Value(CmpOfLoads.LoadA)),
+ m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcB)),
+ m_Value(CmpOfLoads.LoadB)))) ||
+ !ICmpInst::isEquality(CmpOfLoads.BCmpPred)) {
+ LLVM_DEBUG(dbgs() << "Loop header icmp did not match bcmp pattern.\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Recognized header icmp as bcmp pattern with loads:\n\t"
+ << *CmpOfLoads.LoadA << "\n\t" << *CmpOfLoads.LoadB
+ << "\n");
+ // FIXME: handle memcmp pattern?
+ return true;
+}
+
+bool LoopIdiomRecognize::recognizeBCmpLoopControlFlow(
+ const CmpOfLoads &CmpOfLoads, CmpLoopStructure &CmpLoop) const {
+ BasicBlock *LoopHeaderBB = CurLoop->getHeader();
+ BasicBlock *LoopLatchBB = CurLoop->getLoopLatch();
+
+ // Be wary, comparisons can be inverted, canonicalize order.
+ // If this 'element' comparison passed, we expect to proceed to the next elt.
+ if (CmpOfLoads.BCmpPred != ICmpInst::Predicate::ICMP_EQ)
+ std::swap(CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB);
+ // The predicate on loop latch does not matter, just canonicalize some order.
+ if (CmpLoop.LatchBrContinueBB != LoopHeaderBB)
+ std::swap(CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB);
+
+ SmallVector<BasicBlock *, 2> ExitBlocks;
+
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ assert(ExitBlocks.size() <= 2U && "Can't have more than two exit blocks.");
+
+ // Check that control-flow between blocks is as expected.
+ if (CmpLoop.HeaderBrEqualBB != LoopLatchBB ||
+ CmpLoop.LatchBrContinueBB != LoopHeaderBB ||
+ !is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) ||
+ !is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB)) {
+ LLVM_DEBUG(dbgs() << "Loop control-flow not recognized.\n");
+ return false;
+ }
+
+ assert(!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) &&
+ !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) &&
+ "Unexpected exit edges.");
+
+ LLVM_DEBUG(dbgs() << "Recognized loop control-flow.\n");
+
+ LLVM_DEBUG(dbgs() << "Performing side-effect analysis on the loop.\n");
+ assert(CurLoop->isLCSSAForm(*DT) && "Should only get LCSSA-form loops here.");
+ // No loop instructions must be used outside of the loop. Since we are in
+ // LCSSA form, we only need to check successor block's PHI nodes's incoming
+ // values for incoming blocks that are the loop basic blocks.
+ for (const BasicBlock *ExitBB : ExitBlocks) {
+ for (const PHINode &PHI : ExitBB->phis()) {
+ for (const BasicBlock *LoopBB :
+ make_filter_range(PHI.blocks(), [this](BasicBlock *PredecessorBB) {
+ return CurLoop->contains(PredecessorBB);
+ })) {
+ const auto *I =
+ dyn_cast<Instruction>(PHI.getIncomingValueForBlock(LoopBB));
+ if (I && CurLoop->contains(I)) {
+ LLVM_DEBUG(dbgs()
+ << "Loop contains instruction " << *I
+ << " which is used outside of the loop in basic block "
+ << ExitBB->getName() << " in phi node " << PHI << "\n");
+ return false;
+ }
+ }
+ }
+ }
+ // Similarly, the loop should not have any other observable side-effects
+ // other than the final comparison result.
+ for (BasicBlock *LoopBB : CurLoop->blocks()) {
+ for (Instruction &I : *LoopBB) {
+ if (isa<DbgInfoIntrinsic>(I)) // Ignore dbginfo.
+ continue; // FIXME: anything else? lifetime info?
+ if ((I.mayHaveSideEffects() || I.isAtomic() || I.isFenceLike()) &&
+ &I != CmpOfLoads.LoadA && &I != CmpOfLoads.LoadB) {
+ LLVM_DEBUG(
+ dbgs() << "Loop contains instruction with potential side-effects: "
+ << I << "\n");
+ return false;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "No loop instructions deemed to have side-effects.\n");
+ return true;
+}
+
+bool LoopIdiomRecognize::recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes,
+ CmpOfLoads &CmpOfLoads,
+ const SCEV *&SrcA,
+ const SCEV *&SrcB,
+ const SCEV *&Iterations) const {
+ // Try to compute SCEV of the loads, for this loop's scope.
+ const auto *ScevForSrcA = dyn_cast<SCEVAddRecExpr>(
+ SE->getSCEVAtScope(CmpOfLoads.LoadSrcA, CurLoop));
+ const auto *ScevForSrcB = dyn_cast<SCEVAddRecExpr>(
+ SE->getSCEVAtScope(CmpOfLoads.LoadSrcB, CurLoop));
+ if (!ScevForSrcA || !ScevForSrcB) {
+ LLVM_DEBUG(dbgs() << "Failed to get SCEV expressions for load sources.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Got SCEV expressions (at loop scope) for loads:\n\t"
+ << *ScevForSrcA << "\n\t" << *ScevForSrcB << "\n");
+
+ // Loads must have folloving SCEV exprs: {%ptr,+,BCmpTyBytes}<%LoopHeaderBB>
+ const SCEV *RecStepForA = ScevForSrcA->getStepRecurrence(*SE);
+ const SCEV *RecStepForB = ScevForSrcB->getStepRecurrence(*SE);
+ if (!ScevForSrcA->isAffine() || !ScevForSrcB->isAffine() ||
+ ScevForSrcA->getLoop() != CurLoop || ScevForSrcB->getLoop() != CurLoop ||
+ RecStepForA != RecStepForB || !isa<SCEVConstant>(RecStepForA) ||
+ cast<SCEVConstant>(RecStepForA)->getAPInt() != BCmpTyBytes) {
+ LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads. Only support "
+ "affine SCEV expressions originating in the loop we "
+ "are analysing with identical constant positive step, "
+ "equal to the count of bytes compared. Got:\n\t"
+ << *RecStepForA << "\n\t" << *RecStepForB << "\n");
+ return false;
+ // FIXME: can support BCmpTyBytes > Step.
+ // But will need to account for the extra bytes compared at the end.
+ }
+
+ SrcA = ScevForSrcA->getStart();
+ SrcB = ScevForSrcB->getStart();
+ LLVM_DEBUG(dbgs() << "Got SCEV expressions for load sources:\n\t" << *SrcA
+ << "\n\t" << *SrcB << "\n");
+
+ // The load sources must be loop-invants that dominate the loop header.
+ if (SrcA == SE->getCouldNotCompute() || SrcB == SE->getCouldNotCompute() ||
+ !SE->isAvailableAtLoopEntry(SrcA, CurLoop) ||
+ !SE->isAvailableAtLoopEntry(SrcB, CurLoop)) {
+ LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads, unavaliable "
+ "prior to loop header.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "SCEV expressions for loads are acceptable.\n");
+
+ // bcmp / memcmp take length argument as size_t, so let's conservatively
+ // assume that the iteration count should be not wider than that.
+ Type *CmpFuncSizeTy = DL->getIntPtrType(SE->getContext());
+
+ // For how many iterations is loop guaranteed not to exit via LoopLatch?
+ // This is one less than the maximal number of comparisons,and is: n + -1
+ const SCEV *LoopExitCount =
+ SE->getExitCount(CurLoop, CurLoop->getLoopLatch());
+ LLVM_DEBUG(dbgs() << "Got SCEV expression for loop latch exit count: "
+ << *LoopExitCount << "\n");
+ // Exit count, similarly, must be loop-invant that dominates the loop header.
+ if (LoopExitCount == SE->getCouldNotCompute() ||
+ !LoopExitCount->getType()->isIntOrPtrTy() ||
+ LoopExitCount->getType()->getScalarSizeInBits() >
+ CmpFuncSizeTy->getScalarSizeInBits() ||
+ !SE->isAvailableAtLoopEntry(LoopExitCount, CurLoop)) {
+ LLVM_DEBUG(dbgs() << "Unsupported SCEV expression for loop latch exit.\n");
+ return false;
+ }
+
+ // LoopExitCount is always one less than the actual count of iterations.
+ // Do this before cast, else we will be stuck with 1 + zext(-1 + n)
+ Iterations = SE->getAddExpr(
+ LoopExitCount, SE->getOne(LoopExitCount->getType()), SCEV::FlagNUW);
+ assert(Iterations != SE->getCouldNotCompute() &&
+ "Shouldn't fail to increment by one.");
+
+ LLVM_DEBUG(dbgs() << "Computed iteration count: " << *Iterations << "\n");
+ return true;
+}
+
+/// Return true iff the bcmp idiom is detected in the loop.
+///
+/// Additionally:
+/// 1) \p BCmpInst is set to the root byte-comparison instruction.
+/// 2) \p LatchCmpInst is set to the comparison that controls the latch.
+/// 3) \p LoadA is set to the first LoadInst.
+/// 4) \p LoadB is set to the second LoadInst.
+/// 5) \p SrcA is set to the first source location that is being compared.
+/// 6) \p SrcB is set to the second source location that is being compared.
+/// 7) \p NBytes is set to the number of bytes to compare.
+bool LoopIdiomRecognize::detectBCmpIdiom(ICmpInst *&BCmpInst,
+ CmpInst *&LatchCmpInst,
+ LoadInst *&LoadA, LoadInst *&LoadB,
+ const SCEV *&SrcA, const SCEV *&SrcB,
+ const SCEV *&NBytes) const {
+ LLVM_DEBUG(dbgs() << "Recognizing bcmp idiom\n");
+
+ // Give up if the loop is not in normal form, or has more than 2 blocks.
+ if (!CurLoop->isLoopSimplifyForm() || CurLoop->getNumBlocks() > 2) {
+ LLVM_DEBUG(dbgs() << "Basic loop structure unrecognized.\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Recognized basic loop structure.\n");
+
+ CmpLoopStructure CmpLoop;
+ if (!matchBCmpLoopStructure(CmpLoop))
+ return false;
+
+ CmpOfLoads CmpOfLoads;
+ if (!matchBCmpOfLoads(CmpLoop.BCmpValue, CmpOfLoads))
+ return false;
+
+ if (!recognizeBCmpLoopControlFlow(CmpOfLoads, CmpLoop))
+ return false;
+
+ BCmpInst = cast<ICmpInst>(CmpLoop.BCmpValue); // FIXME: is there no
+ LatchCmpInst = cast<CmpInst>(CmpLoop.LatchCmpValue); // way to combine
+ LoadA = cast<LoadInst>(CmpOfLoads.LoadA); // these cast with
+ LoadB = cast<LoadInst>(CmpOfLoads.LoadB); // m_Value() matcher?
+
+ Type *BCmpValTy = BCmpInst->getOperand(0)->getType();
+ LLVMContext &Context = BCmpValTy->getContext();
+ uint64_t BCmpTyBits = DL->getTypeSizeInBits(BCmpValTy);
+ static constexpr uint64_t ByteTyBits = 8;
+
+ LLVM_DEBUG(dbgs() << "Got comparison between values of type " << *BCmpValTy
+ << " of size " << BCmpTyBits
+ << " bits (while byte = " << ByteTyBits << " bits).\n");
+ // bcmp()/memcmp() minimal unit of work is a byte. Therefore we must check
+ // that we are dealing with a multiple of a byte here.
+ if (BCmpTyBits % ByteTyBits != 0) {
+ LLVM_DEBUG(dbgs() << "Value size is not a multiple of byte.\n");
+ return false;
+ // FIXME: could still be done under a run-time check that the total bit
+ // count is a multiple of a byte i guess? Or handle remainder separately?
+ }
+
+ // Each comparison is done on this many bytes.
+ uint64_t BCmpTyBytes = BCmpTyBits / ByteTyBits;
+ LLVM_DEBUG(dbgs() << "Size is exactly " << BCmpTyBytes
+ << " bytes, eligible for bcmp conversion.\n");
+
+ const SCEV *Iterations;
+ if (!recognizeBCmpLoopSCEV(BCmpTyBytes, CmpOfLoads, SrcA, SrcB, Iterations))
+ return false;
+
+ // bcmp / memcmp take length argument as size_t, do promotion now.
+ Type *CmpFuncSizeTy = DL->getIntPtrType(Context);
+ Iterations = SE->getNoopOrZeroExtend(Iterations, CmpFuncSizeTy);
+ assert(Iterations != SE->getCouldNotCompute() && "Promotion failed.");
+ // Note that it didn't do ptrtoint cast, we will need to do it manually.
+
+ // We will be comparing *bytes*, not BCmpTy, we need to recalculate size.
+ // It's a multiplication, and it *could* overflow. But for it to overflow
+ // we'd want to compare more bytes than could be represented by size_t, But
+ // allocation functions also take size_t. So how'd you produce such buffer?
+ // FIXME: we likely need to actually check that we know this won't overflow,
+ // via llvm::computeOverflowForUnsignedMul().
+ NBytes = SE->getMulExpr(
+ Iterations, SE->getConstant(CmpFuncSizeTy, BCmpTyBytes), SCEV::FlagNUW);
+ assert(NBytes != SE->getCouldNotCompute() &&
+ "Shouldn't fail to increment by one.");
+
+ LLVM_DEBUG(dbgs() << "Computed total byte count: " << *NBytes << "\n");
+
+ if (LoadA->getPointerAddressSpace() != LoadB->getPointerAddressSpace() ||
+ LoadA->getPointerAddressSpace() != 0 || !LoadA->isSimple() ||
+ !LoadB->isSimple()) {
+ StringLiteral L("Unsupported loads in idiom - only support identical, "
+ "simple loads from address space 0.\n");
+ LLVM_DEBUG(dbgs() << L);
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "BCmpIdiomUnsupportedLoads",
+ BCmpInst->getDebugLoc(),
+ CurLoop->getHeader())
+ << L;
+ });
+ return false; // FIXME: support non-simple loads.
+ }
+
+ LLVM_DEBUG(dbgs() << "Recognized bcmp idiom\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "RecognizedBCmpIdiom",
+ CurLoop->getStartLoc(),
+ CurLoop->getHeader())
+ << "Loop recognized as a bcmp idiom";
+ });
+
+ return true;
+}
+
+BasicBlock *
+LoopIdiomRecognize::transformBCmpControlFlow(ICmpInst *ComparedEqual) {
+ LLVM_DEBUG(dbgs() << "Transforming control-flow.\n");
+ SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
+
+ BasicBlock *PreheaderBB = CurLoop->getLoopPreheader();
+ BasicBlock *HeaderBB = CurLoop->getHeader();
+ BasicBlock *LoopLatchBB = CurLoop->getLoopLatch();
+ SmallString<32> LoopName = CurLoop->getName();
+ Function *Func = PreheaderBB->getParent();
+ LLVMContext &Context = Func->getContext();
+
+ // Before doing anything, drop SCEV info.
+ SE->forgetLoop(CurLoop);
+
+ // Here we start with: (0/6)
+ // PreheaderBB: <preheader> ; preds = ???
+ // <...>
+ // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
+ // %ComparedEqual = icmp eq <...> %memcmp, 0
+ // br label %LoopHeaderBB
+ // LoopHeaderBB: <header,exiting> ; preds = %PreheaderBB,%LoopLatchBB
+ // <...>
+ // br i1 %<...>, label %LoopLatchBB, label %Successor0BB
+ // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
+ // <...>
+ // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB
+ // Successor0BB: <exit> ; preds = %LoopHeaderBB
+ // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ]
+ // <...>
+ // Successor1BB: <exit> ; preds = %LoopLatchBB
+ // %S1PHI = phi <...> [ <...>, %LoopLatchBB ]
+ // <...>
+ //
+ // Successor0 and Successor1 may or may not be the same basic block.
+
+ // Decouple the edge between loop preheader basic block and loop header basic
+ // block. Thus the loop has become unreachable.
+ assert(cast<BranchInst>(PreheaderBB->getTerminator())->isUnconditional() &&
+ PreheaderBB->getTerminator()->getSuccessor(0) == HeaderBB &&
+ "Preheader bb must end with an unconditional branch to header bb.");
+ PreheaderBB->getTerminator()->eraseFromParent();
+ DTUpdates.push_back({DominatorTree::Delete, PreheaderBB, HeaderBB});
+
+ // Create a new preheader basic block before loop header basic block.
+ auto *PhonyPreheaderBB = BasicBlock::Create(
+ Context, LoopName + ".phonypreheaderbb", Func, HeaderBB);
+ // And insert an unconditional branch from phony preheader basic block to
+ // loop header basic block.
+ IRBuilder<>(PhonyPreheaderBB).CreateBr(HeaderBB);
+ DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB});
+
+ // Create a *single* new empty block that we will substitute as a
+ // successor basic block for the loop's exits. This one is temporary.
+ // Much like phony preheader basic block, it is not connected.
+ auto *PhonySuccessorBB =
+ BasicBlock::Create(Context, LoopName + ".phonysuccessorbb", Func,
+ LoopLatchBB->getNextNode());
+ // That block must have *some* non-PHI instruction, or else deleteDeadLoop()
+ // will mess up cleanup of dbginfo, and verifier will complain.
+ IRBuilder<>(PhonySuccessorBB).CreateUnreachable();
+
+ // Create two new empty blocks that we will use to preserve the original
+ // loop exit control-flow, and preserve the incoming values in the PHI nodes
+ // in loop's successor exit blocks. These will live one.
+ auto *ComparedUnequalBB =
+ BasicBlock::Create(Context, ComparedEqual->getName() + ".unequalbb", Func,
+ PhonySuccessorBB->getNextNode());
+ auto *ComparedEqualBB =
+ BasicBlock::Create(Context, ComparedEqual->getName() + ".equalbb", Func,
+ PhonySuccessorBB->getNextNode());
+
+ // By now we have: (1/6)
+ // PreheaderBB: ; preds = ???
+ // <...>
+ // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
+ // %ComparedEqual = icmp eq <...> %memcmp, 0
+ // [no terminator instruction!]
+ // PhonyPreheaderBB: <preheader> ; No preds, UNREACHABLE!
+ // br label %LoopHeaderBB
+ // LoopHeaderBB: <header,exiting> ; preds = %PhonyPreheaderBB, %LoopLatchBB
+ // <...>
+ // br i1 %<...>, label %LoopLatchBB, label %Successor0BB
+ // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
+ // <...>
+ // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB
+ // PhonySuccessorBB: ; No preds, UNREACHABLE!
+ // unreachable
+ // EqualBB: ; No preds, UNREACHABLE!
+ // [no terminator instruction!]
+ // UnequalBB: ; No preds, UNREACHABLE!
+ // [no terminator instruction!]
+ // Successor0BB: <exit> ; preds = %LoopHeaderBB
+ // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ]
+ // <...>
+ // Successor1BB: <exit> ; preds = %LoopLatchBB
+ // %S1PHI = phi <...> [ <...>, %LoopLatchBB ]
+ // <...>
+
+ // What is the mapping/replacement basic block for exiting out of the loop
+ // from either of old's loop basic blocks?
+ auto GetReplacementBB = [this, ComparedEqualBB,
+ ComparedUnequalBB](const BasicBlock *OldBB) {
+ assert(CurLoop->contains(OldBB) && "Only for loop's basic blocks.");
+ if (OldBB == CurLoop->getLoopLatch()) // "all elements compared equal".
+ return ComparedEqualBB;
+ if (OldBB == CurLoop->getHeader()) // "element compared unequal".
+ return ComparedUnequalBB;
+ llvm_unreachable("Only had two basic blocks in loop.");
+ };
+
+ // What are the exits out of this loop?
+ SmallVector<Loop::Edge, 2> LoopExitEdges;
+ CurLoop->getExitEdges(LoopExitEdges);
+ assert(LoopExitEdges.size() == 2 && "Should have only to two exit edges.");
+
+ // Populate new basic blocks, update the exiting control-flow, PHI nodes.
+ for (const Loop::Edge &Edge : LoopExitEdges) {
+ auto *OldLoopBB = const_cast<BasicBlock *>(Edge.first);
+ auto *SuccessorBB = const_cast<BasicBlock *>(Edge.second);
+ assert(CurLoop->contains(OldLoopBB) && !CurLoop->contains(SuccessorBB) &&
+ "Unexpected edge.");
+
+ // If we would exit the loop from this loop's basic block,
+ // what semantically would that mean? Did comparison succeed or fail?
+ BasicBlock *NewBB = GetReplacementBB(OldLoopBB);
+ assert(NewBB->empty() && "Should not get same new basic block here twice.");
+ IRBuilder<> Builder(NewBB);
+ Builder.SetCurrentDebugLocation(OldLoopBB->getTerminator()->getDebugLoc());
+ Builder.CreateBr(SuccessorBB);
+ DTUpdates.push_back({DominatorTree::Insert, NewBB, SuccessorBB});
+ // Also, be *REALLY* careful with PHI nodes in successor basic block,
+ // update them to recieve the same input value, but not from current loop's
+ // basic block, but from new basic block instead.
+ SuccessorBB->replacePhiUsesWith(OldLoopBB, NewBB);
+ // Also, change loop control-flow. This loop's basic block shall no longer
+ // exit from the loop to it's original successor basic block, but to our new
+ // phony successor basic block. Note that new successor will be unique exit.
+ OldLoopBB->getTerminator()->replaceSuccessorWith(SuccessorBB,
+ PhonySuccessorBB);
+ DTUpdates.push_back({DominatorTree::Delete, OldLoopBB, SuccessorBB});
+ DTUpdates.push_back({DominatorTree::Insert, OldLoopBB, PhonySuccessorBB});
+ }
+
+ // Inform DomTree about edge changes. Note that LoopInfo is still out-of-date.
+ assert(DTUpdates.size() == 8 && "Update count prediction failed.");
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ DTU.applyUpdates(DTUpdates);
+ DTUpdates.clear();
+
+ // By now we have: (2/6)
+ // PreheaderBB: ; preds = ???
+ // <...>
+ // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
+ // %ComparedEqual = icmp eq <...> %memcmp, 0
+ // [no terminator instruction!]
+ // PhonyPreheaderBB: <preheader> ; No preds, UNREACHABLE!
+ // br label %LoopHeaderBB
+ // LoopHeaderBB: <header,exiting> ; preds = %PhonyPreheaderBB, %LoopLatchBB
+ // <...>
+ // br i1 %<...>, label %LoopLatchBB, label %PhonySuccessorBB
+ // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
+ // <...>
+ // br i1 %<...>, label %PhonySuccessorBB, label %LoopHeaderBB
+ // PhonySuccessorBB: <uniq. exit> ; preds = %LoopHeaderBB, %LoopLatchBB
+ // unreachable
+ // EqualBB: ; No preds, UNREACHABLE!
+ // br label %Successor1BB
+ // UnequalBB: ; No preds, UNREACHABLE!
+ // br label %Successor0BB
+ // Successor0BB: ; preds = %UnequalBB
+ // %S0PHI = phi <...> [ <...>, %UnequalBB ]
+ // <...>
+ // Successor1BB: ; preds = %EqualBB
+ // %S0PHI = phi <...> [ <...>, %EqualBB ]
+ // <...>
+
+ // *Finally*, zap the original loop. Record it's parent loop though.
+ Loop *ParentLoop = CurLoop->getParentLoop();
+ LLVM_DEBUG(dbgs() << "Deleting old loop.\n");
+ LoopDeleter.markLoopAsDeleted(CurLoop); // Mark as deleted *BEFORE* deleting!
+ deleteDeadLoop(CurLoop, DT, SE, LI); // And actually delete the loop.
+ CurLoop = nullptr;
+
+ // By now we have: (3/6)
+ // PreheaderBB: ; preds = ???
+ // <...>
+ // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
+ // %ComparedEqual = icmp eq <...> %memcmp, 0
+ // [no terminator instruction!]
+ // PhonyPreheaderBB: ; No preds, UNREACHABLE!
+ // br label %PhonySuccessorBB
+ // PhonySuccessorBB: ; preds = %PhonyPreheaderBB
+ // unreachable
+ // EqualBB: ; No preds, UNREACHABLE!
+ // br label %Successor1BB
+ // UnequalBB: ; No preds, UNREACHABLE!
+ // br label %Successor0BB
+ // Successor0BB: ; preds = %UnequalBB
+ // %S0PHI = phi <...> [ <...>, %UnequalBB ]
+ // <...>
+ // Successor1BB: ; preds = %EqualBB
+ // %S0PHI = phi <...> [ <...>, %EqualBB ]
+ // <...>
+
+ // Now, actually restore the CFG.
+
+ // Insert an unconditional branch from an actual preheader basic block to
+ // phony preheader basic block.
+ IRBuilder<>(PreheaderBB).CreateBr(PhonyPreheaderBB);
+ DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB});
+ // Insert proper conditional branch from phony successor basic block to the
+ // "dispatch" basic blocks, which were used to preserve incoming values in
+ // original loop's successor basic blocks.
+ assert(isa<UnreachableInst>(PhonySuccessorBB->getTerminator()) &&
+ "Yep, that's the one we created to keep deleteDeadLoop() happy.");
+ PhonySuccessorBB->getTerminator()->eraseFromParent();
+ {
+ IRBuilder<> Builder(PhonySuccessorBB);
+ Builder.SetCurrentDebugLocation(ComparedEqual->getDebugLoc());
+ Builder.CreateCondBr(ComparedEqual, ComparedEqualBB, ComparedUnequalBB);
+ }
+ DTUpdates.push_back(
+ {DominatorTree::Insert, PhonySuccessorBB, ComparedEqualBB});
+ DTUpdates.push_back(
+ {DominatorTree::Insert, PhonySuccessorBB, ComparedUnequalBB});
+
+ BasicBlock *DispatchBB = PhonySuccessorBB;
+ DispatchBB->setName(LoopName + ".bcmpdispatchbb");
+
+ assert(DTUpdates.size() == 3 && "Update count prediction failed.");
+ DTU.applyUpdates(DTUpdates);
+ DTUpdates.clear();
+
+ // By now we have: (4/6)
+ // PreheaderBB: ; preds = ???
+ // <...>
+ // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
+ // %ComparedEqual = icmp eq <...> %memcmp, 0
+ // br label %PhonyPreheaderBB
+ // PhonyPreheaderBB: ; preds = %PreheaderBB
+ // br label %DispatchBB
+ // DispatchBB: ; preds = %PhonyPreheaderBB
+ // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
+ // EqualBB: ; preds = %DispatchBB
+ // br label %Successor1BB
+ // UnequalBB: ; preds = %DispatchBB
+ // br label %Successor0BB
+ // Successor0BB: ; preds = %UnequalBB
+ // %S0PHI = phi <...> [ <...>, %UnequalBB ]
+ // <...>
+ // Successor1BB: ; preds = %EqualBB
+ // %S0PHI = phi <...> [ <...>, %EqualBB ]
+ // <...>
+
+ // The basic CFG has been restored! Now let's merge redundant basic blocks.
+
+ // Merge phony successor basic block into it's only predecessor,
+ // phony preheader basic block. It is fully pointlessly redundant.
+ MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU);
+
+ // By now we have: (5/6)
+ // PreheaderBB: ; preds = ???
+ // <...>
+ // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
+ // %ComparedEqual = icmp eq <...> %memcmp, 0
+ // br label %DispatchBB
+ // DispatchBB: ; preds = %PreheaderBB
+ // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
+ // EqualBB: ; preds = %DispatchBB
+ // br label %Successor1BB
+ // UnequalBB: ; preds = %DispatchBB
+ // br label %Successor0BB
+ // Successor0BB: ; preds = %UnequalBB
+ // %S0PHI = phi <...> [ <...>, %UnequalBB ]
+ // <...>
+ // Successor1BB: ; preds = %EqualBB
+ // %S0PHI = phi <...> [ <...>, %EqualBB ]
+ // <...>
+
+ // Was this loop nested?
+ if (!ParentLoop) {
+ // If the loop was *NOT* nested, then let's also merge phony successor
+ // basic block into it's only predecessor, preheader basic block.
+ // Also, here we need to update LoopInfo.
+ LI->removeBlock(PreheaderBB);
+ MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU);
+
+ // By now we have: (6/6)
+ // DispatchBB: ; preds = ???
+ // <...>
+ // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
+ // %ComparedEqual = icmp eq <...> %memcmp, 0
+ // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
+ // EqualBB: ; preds = %DispatchBB
+ // br label %Successor1BB
+ // UnequalBB: ; preds = %DispatchBB
+ // br label %Successor0BB
+ // Successor0BB: ; preds = %UnequalBB
+ // %S0PHI = phi <...> [ <...>, %UnequalBB ]
+ // <...>
+ // Successor1BB: ; preds = %EqualBB
+ // %S0PHI = phi <...> [ <...>, %EqualBB ]
+ // <...>
+
+ return DispatchBB;
+ }
+
+ // Otherwise, we need to "preserve" the LoopSimplify form of the deleted loop.
+ // To achieve that, we shall keep the preheader basic block (mainly so that
+ // the loop header block will be guaranteed to have a predecessor outside of
+ // the loop), and create a phony loop with all these new three basic blocks.
+ Loop *PhonyLoop = LI->AllocateLoop();
+ ParentLoop->addChildLoop(PhonyLoop);
+ PhonyLoop->addBasicBlockToLoop(DispatchBB, *LI);
+ PhonyLoop->addBasicBlockToLoop(ComparedEqualBB, *LI);
+ PhonyLoop->addBasicBlockToLoop(ComparedUnequalBB, *LI);
+
+ // But we only have a preheader basic block, a header basic block block and
+ // two exiting basic blocks. For a proper loop we also need a backedge from
+ // non-header basic block to header bb.
+ // Let's just add a never-taken branch from both of the exiting basic blocks.
+ for (BasicBlock *BB : {ComparedEqualBB, ComparedUnequalBB}) {
+ BranchInst *OldTerminator = cast<BranchInst>(BB->getTerminator());
+ assert(OldTerminator->isUnconditional() && "That's the one we created.");
+ BasicBlock *SuccessorBB = OldTerminator->getSuccessor(0);
+
+ IRBuilder<> Builder(OldTerminator);
+ Builder.SetCurrentDebugLocation(OldTerminator->getDebugLoc());
+ Builder.CreateCondBr(ConstantInt::getTrue(Context), SuccessorBB,
+ DispatchBB);
+ OldTerminator->eraseFromParent();
+ // Yes, the backedge will never be taken. The control-flow is redundant.
+ // If it can be simplified further, other passes will take care.
+ DTUpdates.push_back({DominatorTree::Delete, BB, SuccessorBB});
+ DTUpdates.push_back({DominatorTree::Insert, BB, SuccessorBB});
+ DTUpdates.push_back({DominatorTree::Insert, BB, DispatchBB});
+ }
+ assert(DTUpdates.size() == 6 && "Update count prediction failed.");
+ DTU.applyUpdates(DTUpdates);
+ DTUpdates.clear();
+
+ // By now we have: (6/6)
+ // PreheaderBB: <preheader> ; preds = ???
+ // <...>
+ // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
+ // %ComparedEqual = icmp eq <...> %memcmp, 0
+ // br label %BCmpDispatchBB
+ // BCmpDispatchBB: <header> ; preds = %PreheaderBB
+ // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
+ // EqualBB: <latch,exiting> ; preds = %BCmpDispatchBB
+ // br i1 %true, label %Successor1BB, label %BCmpDispatchBB
+ // UnequalBB: <latch,exiting> ; preds = %BCmpDispatchBB
+ // br i1 %true, label %Successor0BB, label %BCmpDispatchBB
+ // Successor0BB: ; preds = %UnequalBB
+ // %S0PHI = phi <...> [ <...>, %UnequalBB ]
+ // <...>
+ // Successor1BB: ; preds = %EqualBB
+ // %S0PHI = phi <...> [ <...>, %EqualBB ]
+ // <...>
+
+ // Finally fully DONE!
+ return DispatchBB;
+}
+
+void LoopIdiomRecognize::transformLoopToBCmp(ICmpInst *BCmpInst,
+ CmpInst *LatchCmpInst,
+ LoadInst *LoadA, LoadInst *LoadB,
+ const SCEV *SrcA, const SCEV *SrcB,
+ const SCEV *NBytes) {
+ // We will be inserting before the terminator instruction of preheader block.
+ IRBuilder<> Builder(CurLoop->getLoopPreheader()->getTerminator());
+
+ LLVM_DEBUG(dbgs() << "Transforming bcmp loop idiom into a call.\n");
+ LLVM_DEBUG(dbgs() << "Emitting new instructions.\n");
+
+ // Expand the SCEV expressions for both sources to compare, and produce value
+ // for the byte len (beware of Iterations potentially being a pointer, and
+ // account for element size being BCmpTyBytes bytes, which may be not 1 byte)
+ Value *PtrA, *PtrB, *Len;
+ {
+ SCEVExpander SExp(*SE, *DL, "LoopToBCmp");
+ SExp.setInsertPoint(&*Builder.GetInsertPoint());
+
+ auto HandlePtr = [&SExp](LoadInst *Load, const SCEV *Src) {
+ SExp.SetCurrentDebugLocation(DebugLoc());
+ // If the pointer operand of original load had dbgloc - use it.
+ if (const auto *I = dyn_cast<Instruction>(Load->getPointerOperand()))
+ SExp.SetCurrentDebugLocation(I->getDebugLoc());
+ return SExp.expandCodeFor(Src);
+ };
+ PtrA = HandlePtr(LoadA, SrcA);
+ PtrB = HandlePtr(LoadB, SrcB);
+
+ // For len calculation let's use dbgloc for the loop's latch condition.
+ Builder.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc());
+ SExp.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc());
+ Len = SExp.expandCodeFor(NBytes);
+
+ Type *CmpFuncSizeTy = DL->getIntPtrType(Builder.getContext());
+ assert(SE->getTypeSizeInBits(Len->getType()) ==
+ DL->getTypeSizeInBits(CmpFuncSizeTy) &&
+ "Len should already have the correct size.");
+
+ // Make sure that iteration count is a number, insert ptrtoint cast if not.
+ if (Len->getType()->isPointerTy())
+ Len = Builder.CreatePtrToInt(Len, CmpFuncSizeTy);
+ assert(Len->getType() == CmpFuncSizeTy && "Should have correct type now.");
+
+ Len->setName(Len->getName() + ".bytecount");
+
+ // There is no legality check needed. We want to compare that the memory
+ // regions [PtrA, PtrA+Len) and [PtrB, PtrB+Len) are fully identical, equal.
+ // For them to be fully equal, they must match bit-by-bit. And likewise,
+ // for them to *NOT* be fully equal, they have to differ just by one bit.
+ // The step of comparison (bits compared at once) simply does not matter.
+ }
+
+ // For the rest of new instructions, dbgloc should point at the value cmp.
+ Builder.SetCurrentDebugLocation(BCmpInst->getDebugLoc());
+
+ // Emit the comparison itself.
+ auto *CmpCall =
+ cast<CallInst>(HasBCmp ? emitBCmp(PtrA, PtrB, Len, Builder, *DL, TLI)
+ : emitMemCmp(PtrA, PtrB, Len, Builder, *DL, TLI));
+ // FIXME: add {B,Mem}CmpInst with MemoryCompareInst
+ // (based on MemIntrinsicBase) as base?
+ // FIXME: propagate metadata from loads? (alignments, AS, TBAA, ...)
+
+ // {b,mem}cmp returned 0 if they were equal, or non-zero if not equal.
+ auto *ComparedEqual = cast<ICmpInst>(Builder.CreateICmpEQ(
+ CmpCall, ConstantInt::get(CmpCall->getType(), 0),
+ PtrA->getName() + ".vs." + PtrB->getName() + ".eqcmp"));
+
+ BasicBlock *BB = transformBCmpControlFlow(ComparedEqual);
+ Builder.ClearInsertionPoint();
+
+ // We're done.
+ LLVM_DEBUG(dbgs() << "Transformed loop bcmp idiom into a call.\n");
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "TransformedBCmpIdiomToCall",
+ CmpCall->getDebugLoc(), BB)
+ << "Transformed bcmp idiom into a call to "
+ << ore::NV("NewFunction", CmpCall->getCalledFunction())
+ << "() function";
+ });
+ ++NumBCmp;
+}
+
+/// Recognizes a bcmp idiom in a non-countable loop.
+///
+/// If detected, transforms the relevant code to issue the bcmp (or memcmp)
+/// intrinsic function call, and returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeBCmp() {
+ if (!HasMemCmp && !HasBCmp)
+ return false;
+
+ ICmpInst *BCmpInst;
+ CmpInst *LatchCmpInst;
+ LoadInst *LoadA, *LoadB;
+ const SCEV *SrcA, *SrcB, *NBytes;
+ if (!detectBCmpIdiom(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB,
+ NBytes)) {
+ LLVM_DEBUG(dbgs() << "bcmp idiom recognition failed.\n");
+ return false;
+ }
+
+ transformLoopToBCmp(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB, NBytes);
+ return true;
+}
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 31191b52895c..368b9d4e8df1 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -192,7 +192,8 @@ public:
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
*L->getHeader()->getParent());
const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent());
MemorySSA *MSSA = nullptr;
Optional<MemorySSAUpdater> MSSAU;
if (EnableMSSALoopDependency) {
@@ -233,7 +234,7 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
auto PA = getLoopPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
- if (EnableMSSALoopDependency)
+ if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 9a42365adc1b..1af4b21b432e 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -410,8 +410,6 @@ public:
void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
private:
- void splitInnerLoopLatch(Instruction *);
- void splitInnerLoopHeader();
bool adjustLoopLinks();
void adjustLoopPreheaders();
bool adjustLoopBranches();
@@ -1226,7 +1224,7 @@ bool LoopInterchangeTransform::transform() {
if (InnerLoop->getSubLoops().empty()) {
BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
- LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n");
+ LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
if (!InductionPHI) {
LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
@@ -1242,11 +1240,55 @@ bool LoopInterchangeTransform::transform() {
if (&InductionPHI->getParent()->front() != InductionPHI)
InductionPHI->moveBefore(&InductionPHI->getParent()->front());
- // Split at the place were the induction variable is
- // incremented/decremented.
- // TODO: This splitting logic may not work always. Fix this.
- splitInnerLoopLatch(InnerIndexVar);
- LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n");
+ // Create a new latch block for the inner loop. We split at the
+ // current latch's terminator and then move the condition and all
+ // operands that are not either loop-invariant or the induction PHI into the
+ // new latch block.
+ BasicBlock *NewLatch =
+ SplitBlock(InnerLoop->getLoopLatch(),
+ InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
+
+ SmallSetVector<Instruction *, 4> WorkList;
+ unsigned i = 0;
+ auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() {
+ for (; i < WorkList.size(); i++) {
+ // Duplicate instruction and move it the new latch. Update uses that
+ // have been moved.
+ Instruction *NewI = WorkList[i]->clone();
+ NewI->insertBefore(NewLatch->getFirstNonPHI());
+ assert(!NewI->mayHaveSideEffects() &&
+ "Moving instructions with side-effects may change behavior of "
+ "the loop nest!");
+ for (auto UI = WorkList[i]->use_begin(), UE = WorkList[i]->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ if (!InnerLoop->contains(UserI->getParent()) ||
+ UserI->getParent() == NewLatch || UserI == InductionPHI)
+ U.set(NewI);
+ }
+ // Add operands of moved instruction to the worklist, except if they are
+ // outside the inner loop or are the induction PHI.
+ for (Value *Op : WorkList[i]->operands()) {
+ Instruction *OpI = dyn_cast<Instruction>(Op);
+ if (!OpI ||
+ this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
+ OpI == InductionPHI)
+ continue;
+ WorkList.insert(OpI);
+ }
+ }
+ };
+
+ // FIXME: Should we interchange when we have a constant condition?
+ Instruction *CondI = dyn_cast<Instruction>(
+ cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
+ ->getCondition());
+ if (CondI)
+ WorkList.insert(CondI);
+ MoveInstructions();
+ WorkList.insert(cast<Instruction>(InnerIndexVar));
+ MoveInstructions();
// Splits the inner loops phi nodes out into a separate basic block.
BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
@@ -1263,10 +1305,6 @@ bool LoopInterchangeTransform::transform() {
return true;
}
-void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
- SplitBlock(InnerLoop->getLoopLatch(), Inc, DT, LI);
-}
-
/// \brief Move all instructions except the terminator from FromBB right before
/// InsertBefore
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 2b3d5e0ce9b7..e8dc879a184b 100644
--- a/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -435,7 +435,8 @@ public:
PH->getTerminator());
Value *Initial = new LoadInst(
Cand.Load->getType(), InitialPtr, "load_initial",
- /* isVolatile */ false, Cand.Load->getAlignment(), PH->getTerminator());
+ /* isVolatile */ false, MaybeAlign(Cand.Load->getAlignment()),
+ PH->getTerminator());
PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
&L->getHeader()->front());
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 507a1e251ca6..885c0e8f4b8b 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -543,7 +543,7 @@ bool LoopPredication::isLoopInvariantValue(const SCEV* S) {
if (const auto *LI = dyn_cast<LoadInst>(U->getValue()))
if (LI->isUnordered() && L->hasLoopInvariantOperands(LI))
if (AA->pointsToConstantMemory(LI->getOperand(0)) ||
- LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+ LI->hasMetadata(LLVMContext::MD_invariant_load))
return true;
return false;
}
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 166b57f20b43..96e2c2a3ac6b 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1644,7 +1644,8 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent());
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index e009947690af..94517996df39 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -55,7 +55,7 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
AR.MSSA->verifyMemorySSA();
auto PA = getLoopPassPreservedAnalyses();
- if (EnableMSSALoopDependency)
+ if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -94,17 +94,15 @@ public:
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- auto *SE = SEWP ? &SEWP->getSE() : nullptr;
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
Optional<MemorySSAUpdater> MSSAU;
if (EnableMSSALoopDependency) {
MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
MSSAU = MemorySSAUpdater(MSSA);
}
- return LoopRotation(L, LI, TTI, AC, DT, SE,
+ return LoopRotation(L, LI, TTI, AC, &DT, &SE,
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ,
false, MaxHeaderSize, false);
}
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 046f4c8af492..299f3fc5fb19 100644
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -690,7 +690,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &LPMU) {
Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency && AR.MSSA)
+ if (AR.MSSA)
MSSAU = MemorySSAUpdater(AR.MSSA);
bool DeleteCurrentLoop = false;
if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE,
@@ -702,7 +702,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
LPMU.markLoopAsDeleted(L, "loop-simplifycfg");
auto PA = getLoopPassPreservedAnalyses();
- if (EnableMSSALoopDependency)
+ if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index 975452e13f09..65e0dee0225a 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -230,12 +230,9 @@ static bool sinkInstruction(Loop &L, Instruction &I,
IC->setName(I.getName());
IC->insertBefore(&*N->getFirstInsertionPt());
// Replaces uses of I with IC in N
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;) {
- Use &U = *UI++;
- auto *I = cast<Instruction>(U.getUser());
- if (I->getParent() == N)
- U.set(IC);
- }
+ I.replaceUsesWithIf(IC, [N](Use &U) {
+ return cast<Instruction>(U.getUser())->getParent() == N;
+ });
// Replaces uses of I with IC in blocks dominated by N
replaceDominatedUsesWith(&I, IC, DT, N);
LLVM_DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName()
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 59a387a186b8..7f119175c4a8 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1386,7 +1386,9 @@ void Cost::RateFormula(const Formula &F,
// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
// additional instruction (at least fill).
- unsigned TTIRegNum = TTI->getNumberOfRegisters(false) - 1;
+ // TODO: Need distinguish register class?
+ unsigned TTIRegNum = TTI->getNumberOfRegisters(
+ TTI->getRegisterClassForType(false, F.getType())) - 1;
if (C.NumRegs > TTIRegNum) {
// Cost already exceeded TTIRegNum, then only newly added register can add
// new instructions.
@@ -3165,6 +3167,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
return;
}
+ assert(IVSrc && "Failed to find IV chain source");
LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
Type *IVTy = IVSrc->getType();
@@ -3265,12 +3268,12 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// requirements for both N and i at the same time. Limiting this code to
// equality icmps is not a problem because all interesting loops use
// equality icmps, thanks to IndVarSimplify.
- if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
+ // If CI can be saved in some target, like replaced inside hardware loop
+ // in PowerPC, no need to generate initial formulae for it.
+ if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
+ continue;
if (CI->isEquality()) {
- // If CI can be saved in some target, like replaced inside hardware loop
- // in PowerPC, no need to generate initial formulae for it.
- if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
- continue;
// Swap the operands if needed to put the OperandValToReplace on the
// left, for consistency.
Value *NV = CI->getOperand(1);
@@ -3298,6 +3301,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
Factors.insert(-(uint64_t)Factors[i]);
Factors.insert(-1);
}
+ }
// Get or create an LSRUse.
std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
@@ -4834,6 +4838,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
}
}
}
+ assert(Best && "Failed to find best LSRUse candidate");
LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
<< " will yield profitable reuse.\n");
@@ -5740,7 +5745,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
*L->getHeader()->getParent());
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
*L->getHeader()->getParent());
- auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent());
return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, LibInfo);
}
diff --git a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 86891eb451bb..8d88be420314 100644
--- a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -166,7 +166,7 @@ static bool computeUnrollAndJamCount(
bool UseUpperBound = false;
bool ExplicitUnroll = computeUnrollCount(
L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
- OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
+ /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
if (ExplicitUnroll || UseUpperBound) {
// If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
// for the unroller instead.
@@ -293,9 +293,9 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
if (Latch != Exit || SubLoopLatch != SubLoopExit)
return LoopUnrollResult::Unmodified;
- TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, SE, TTI, nullptr, nullptr, OptLevel,
- None, None, None, None, None, None);
+ TargetTransformInfo::UnrollingPreferences UP =
+ gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None,
+ None, None, None, None, None, None, None);
if (AllowUnrollAndJam.getNumOccurrences() > 0)
UP.UnrollAndJam = AllowUnrollAndJam;
if (UnrollAndJamThreshold.getNumOccurrences() > 0)
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 2fa7436213dd..a6d4164c3645 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -178,7 +178,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
- Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
+ Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
+ Optional<bool> UserAllowProfileBasedPeeling,
+ Optional<unsigned> UserFullUnrollMaxCount) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
@@ -202,6 +204,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.UpperBound = false;
UP.AllowPeeling = true;
UP.UnrollAndJam = false;
+ UP.PeelProfiledIterations = true;
UP.UnrollAndJamInnerLoopThreshold = 60;
// Override with any target specific settings
@@ -257,6 +260,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.UpperBound = *UserUpperBound;
if (UserAllowPeeling.hasValue())
UP.AllowPeeling = *UserAllowPeeling;
+ if (UserAllowProfileBasedPeeling.hasValue())
+ UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
+ if (UserFullUnrollMaxCount.hasValue())
+ UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
return UP;
}
@@ -730,7 +737,7 @@ bool llvm::computeUnrollCount(
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
- unsigned &TripMultiple, unsigned LoopSize,
+ bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
// Check for explicit Count.
@@ -781,18 +788,34 @@ bool llvm::computeUnrollCount(
// Also we need to check if we exceed FullUnrollMaxCount.
// If using the upper bound to unroll, TripMultiple should be set to 1 because
// we do not know when loop may exit.
- // MaxTripCount and ExactTripCount cannot both be non zero since we only
+
+ // We can unroll by the upper bound amount if it's generally allowed or if
+ // we know that the loop is executed either the upper bound or zero times.
+ // (MaxOrZero unrolling keeps only the first loop test, so the number of
+ // loop tests remains the same compared to the non-unrolled version, whereas
+ // the generic upper bound unrolling keeps all but the last loop test so the
+ // number of loop tests goes up which may end up being worse on targets with
+ // constrained branch predictor resources so is controlled by an option.)
+ // In addition we only unroll small upper bounds.
+ unsigned FullUnrollMaxTripCount = MaxTripCount;
+ if (!(UP.UpperBound || MaxOrZero) ||
+ FullUnrollMaxTripCount > UnrollMaxUpperBound)
+ FullUnrollMaxTripCount = 0;
+
+ // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only
// compute the former when the latter is zero.
unsigned ExactTripCount = TripCount;
- assert((ExactTripCount == 0 || MaxTripCount == 0) &&
- "ExtractTripCount and MaxTripCount cannot both be non zero.");
- unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
+ assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) &&
+ "ExtractTripCount and UnrollByMaxCount cannot both be non zero.");
+
+ unsigned FullUnrollTripCount =
+ ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
UP.Count = FullUnrollTripCount;
if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
// When computing the unrolled size, note that BEInsns are not replicated
// like the rest of the loop body.
if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
- UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
TripCount = FullUnrollTripCount;
TripMultiple = UP.UpperBound ? 1 : TripMultiple;
return ExplicitUnroll;
@@ -806,7 +829,7 @@ bool llvm::computeUnrollCount(
unsigned Boost =
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
- UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
TripCount = FullUnrollTripCount;
TripMultiple = UP.UpperBound ? 1 : TripMultiple;
return ExplicitUnroll;
@@ -882,6 +905,8 @@ bool llvm::computeUnrollCount(
"because "
"unrolled size is too large.";
});
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
+ << "\n");
return ExplicitUnroll;
}
assert(TripCount == 0 &&
@@ -903,6 +928,12 @@ bool llvm::computeUnrollCount(
return false;
}
+ // Don't unroll a small upper bound loop unless user or TTI asked to do so.
+ if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
+ UP.Count = 0;
+ return false;
+ }
+
// Check if the runtime trip count is too small when profile is available.
if (L->getHeader()->getParent()->hasProfileData()) {
if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
@@ -966,7 +997,11 @@ bool llvm::computeUnrollCount(
if (UP.Count > UP.MaxCount)
UP.Count = UP.MaxCount;
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
+
+ if (MaxTripCount && UP.Count > MaxTripCount)
+ UP.Count = MaxTripCount;
+
+ LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count
<< "\n");
if (UP.Count < 2)
UP.Count = 0;
@@ -976,13 +1011,14 @@ bool llvm::computeUnrollCount(
static LoopUnrollResult tryToUnrollLoop(
Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
const TargetTransformInfo &TTI, AssumptionCache &AC,
- OptimizationRemarkEmitter &ORE,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
- bool PreserveLCSSA, int OptLevel,
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
- Optional<bool> ProvidedAllowPeeling) {
+ Optional<bool> ProvidedAllowPeeling,
+ Optional<bool> ProvidedAllowProfileBasedPeeling,
+ Optional<unsigned> ProvidedFullUnrollMaxCount) {
LLVM_DEBUG(dbgs() << "Loop Unroll: F["
<< L->getHeader()->getParent()->getName() << "] Loop %"
<< L->getHeader()->getName() << "\n");
@@ -1007,7 +1043,8 @@ static LoopUnrollResult tryToUnrollLoop(
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
- ProvidedAllowPeeling);
+ ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
+ ProvidedFullUnrollMaxCount);
// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
// as threshold later on.
@@ -1028,10 +1065,10 @@ static LoopUnrollResult tryToUnrollLoop(
return LoopUnrollResult::Unmodified;
}
- // When optimizing for size, use LoopSize as threshold, to (fully) unroll
- // loops, if it does not increase code size.
+ // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
+ // later), to (fully) unroll loops, if it does not increase code size.
if (OptForSize)
- UP.Threshold = std::max(UP.Threshold, LoopSize);
+ UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
if (NumInlineCandidates != 0) {
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
@@ -1040,7 +1077,6 @@ static LoopUnrollResult tryToUnrollLoop(
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
- unsigned MaxTripCount = 0;
unsigned TripMultiple = 1;
// If there are multiple exiting blocks but one of them is the latch, use the
// latch for the trip count estimation. Otherwise insist on a single exiting
@@ -1070,28 +1106,18 @@ static LoopUnrollResult tryToUnrollLoop(
// Try to find the trip count upper bound if we cannot find the exact trip
// count.
+ unsigned MaxTripCount = 0;
bool MaxOrZero = false;
if (!TripCount) {
MaxTripCount = SE.getSmallConstantMaxTripCount(L);
MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
- // We can unroll by the upper bound amount if it's generally allowed or if
- // we know that the loop is executed either the upper bound or zero times.
- // (MaxOrZero unrolling keeps only the first loop test, so the number of
- // loop tests remains the same compared to the non-unrolled version, whereas
- // the generic upper bound unrolling keeps all but the last loop test so the
- // number of loop tests goes up which may end up being worse on targets with
- // constrained branch predictor resources so is controlled by an option.)
- // In addition we only unroll small upper bounds.
- if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
- MaxTripCount = 0;
- }
}
// computeUnrollCount() decides whether it is beneficial to use upper bound to
// fully unroll the loop.
bool UseUpperBound = false;
bool IsCountSetExplicitly = computeUnrollCount(
- L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
+ L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
TripMultiple, LoopSize, UP, UseUpperBound);
if (!UP.Count)
return LoopUnrollResult::Unmodified;
@@ -1139,7 +1165,7 @@ static LoopUnrollResult tryToUnrollLoop(
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
- (IsCountSetExplicitly || UP.PeelCount))
+ (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
L->setLoopAlreadyUnrolled();
return UnrollResult;
@@ -1169,18 +1195,24 @@ public:
Optional<bool> ProvidedRuntime;
Optional<bool> ProvidedUpperBound;
Optional<bool> ProvidedAllowPeeling;
+ Optional<bool> ProvidedAllowProfileBasedPeeling;
+ Optional<unsigned> ProvidedFullUnrollMaxCount;
LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
Optional<bool> UpperBound = None,
- Optional<bool> AllowPeeling = None)
+ Optional<bool> AllowPeeling = None,
+ Optional<bool> AllowProfileBasedPeeling = None,
+ Optional<unsigned> ProvidedFullUnrollMaxCount = None)
: LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
- ProvidedAllowPeeling(AllowPeeling) {
+ ProvidedAllowPeeling(AllowPeeling),
+ ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
+ ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -1203,10 +1235,11 @@ public:
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
LoopUnrollResult Result = tryToUnrollLoop(
- L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
- PreserveLCSSA, OptLevel, OnlyWhenForced,
- ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
- ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
+ L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
+ OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
+ ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
+ ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
+ ProvidedFullUnrollMaxCount);
if (Result == LoopUnrollResult::FullyUnrolled)
LPM.markLoopAsDeleted(*L);
@@ -1283,14 +1316,16 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
std::string LoopName = L.getName();
- bool Changed =
- tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
- /*BFI*/ nullptr, /*PSI*/ nullptr,
- /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
- ForgetSCEV, /*Count*/ None,
- /*Threshold*/ None, /*AllowPartial*/ false,
- /*Runtime*/ false, /*UpperBound*/ false,
- /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
+ bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
+ /*BFI*/ nullptr, /*PSI*/ nullptr,
+ /*PreserveLCSSA*/ true, OptLevel,
+ OnlyWhenForced, ForgetSCEV, /*Count*/ None,
+ /*Threshold*/ None, /*AllowPartial*/ false,
+ /*Runtime*/ false, /*UpperBound*/ false,
+ /*AllowPeeling*/ false,
+ /*AllowProfileBasedPeeling*/ false,
+ /*FullUnrollMaxCount*/ None) !=
+ LoopUnrollResult::Unmodified;
if (!Changed)
return PreservedAnalyses::all();
@@ -1430,7 +1465,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
UnrollOpts.ForgetSCEV, /*Count*/ None,
/*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
- UnrollOpts.AllowUpperBound, LocalAllowPeeling);
+ UnrollOpts.AllowUpperBound, LocalAllowPeeling,
+ UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
Changed |= Result != LoopUnrollResult::Unmodified;
// The parent must not be damaged by unrolling!
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index b5b8e720069c..b410df0c5f68 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -420,7 +420,8 @@ enum OperatorChain {
/// cost of creating an entirely new loop.
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
OperatorChain &ParentChain,
- DenseMap<Value *, Value *> &Cache) {
+ DenseMap<Value *, Value *> &Cache,
+ MemorySSAUpdater *MSSAU) {
auto CacheIt = Cache.find(Cond);
if (CacheIt != Cache.end())
return CacheIt->second;
@@ -438,7 +439,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
// TODO: Handle: br (VARIANT|INVARIANT).
// Hoist simple values out.
- if (L->makeLoopInvariant(Cond, Changed)) {
+ if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) {
Cache[Cond] = Cond;
return Cond;
}
@@ -478,7 +479,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
// which will cause the branch to go away in one loop and the condition to
// simplify in the other one.
if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed,
- ParentChain, Cache)) {
+ ParentChain, Cache, MSSAU)) {
Cache[Cond] = LHS;
return LHS;
}
@@ -486,7 +487,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
// operand(1).
ParentChain = NewChain;
if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed,
- ParentChain, Cache)) {
+ ParentChain, Cache, MSSAU)) {
Cache[Cond] = RHS;
return RHS;
}
@@ -500,12 +501,12 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
/// an invariant piece, return the invariant along with the operator chain type.
/// Otherwise, return null.
-static std::pair<Value *, OperatorChain> FindLIVLoopCondition(Value *Cond,
- Loop *L,
- bool &Changed) {
+static std::pair<Value *, OperatorChain>
+FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
+ MemorySSAUpdater *MSSAU) {
DenseMap<Value *, Value *> Cache;
OperatorChain OpChain = OC_OpChainNone;
- Value *FCond = FindLIVLoopCondition(Cond, L, Changed, OpChain, Cache);
+ Value *FCond = FindLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU);
// In case we do find a LIV, it can not be obtained by walking up a mixed
// operator chain.
@@ -525,7 +526,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
if (EnableMSSALoopDependency) {
MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
assert(DT && "Cannot update MemorySSA without a valid DomTree.");
}
currentLoop = L;
@@ -694,8 +695,9 @@ bool LoopUnswitch::processCurrentLoop() {
}
for (IntrinsicInst *Guard : Guards) {
- Value *LoopCond =
- FindLIVLoopCondition(Guard->getOperand(0), currentLoop, Changed).first;
+ Value *LoopCond = FindLIVLoopCondition(Guard->getOperand(0), currentLoop,
+ Changed, MSSAU.get())
+ .first;
if (LoopCond &&
UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
// NB! Unswitching (if successful) could have erased some of the
@@ -735,8 +737,9 @@ bool LoopUnswitch::processCurrentLoop() {
if (BI->isConditional()) {
// See if this, or some part of it, is loop invariant. If so, we can
// unswitch on it if we desire.
- Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
- currentLoop, Changed).first;
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop,
+ Changed, MSSAU.get())
+ .first;
if (LoopCond && !EqualityPropUnSafe(*LoopCond) &&
UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {
++NumBranches;
@@ -748,7 +751,7 @@ bool LoopUnswitch::processCurrentLoop() {
Value *LoopCond;
OperatorChain OpChain;
std::tie(LoopCond, OpChain) =
- FindLIVLoopCondition(SC, currentLoop, Changed);
+ FindLIVLoopCondition(SC, currentLoop, Changed, MSSAU.get());
unsigned NumCases = SI->getNumCases();
if (LoopCond && NumCases) {
@@ -808,8 +811,9 @@ bool LoopUnswitch::processCurrentLoop() {
for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
BBI != E; ++BBI)
if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
- Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
- currentLoop, Changed).first;
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop,
+ Changed, MSSAU.get())
+ .first;
if (LoopCond && UnswitchIfProfitable(LoopCond,
ConstantInt::getTrue(Context))) {
++NumSelects;
@@ -1123,8 +1127,9 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
if (!BI->isConditional())
return false;
- Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
- currentLoop, Changed).first;
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop,
+ Changed, MSSAU.get())
+ .first;
// Unswitch only if the trivial condition itself is an LIV (not
// partial LIV which could occur in and/or)
@@ -1157,8 +1162,9 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
// If this isn't switching on an invariant condition, we can't unswitch it.
- Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
- currentLoop, Changed).first;
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop,
+ Changed, MSSAU.get())
+ .first;
// Unswitch only if the trivial condition itself is an LIV (not
// partial LIV which could occur in and/or)
@@ -1240,6 +1246,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
LoopBlocks.clear();
NewBlocks.clear();
+ if (MSSAU && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
// First step, split the preheader and exit blocks, and add these blocks to
// the LoopBlocks list.
BasicBlock *NewPreheader =
@@ -1607,36 +1616,30 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// If BI's parent is the only pred of the successor, fold the two blocks
// together.
BasicBlock *Pred = BI->getParent();
+ (void)Pred;
BasicBlock *Succ = BI->getSuccessor(0);
BasicBlock *SinglePred = Succ->getSinglePredecessor();
if (!SinglePred) continue; // Nothing to do.
assert(SinglePred == Pred && "CFG broken");
- LLVM_DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
- << Succ->getName() << "\n");
-
- // Resolve any single entry PHI nodes in Succ.
- while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
- ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM,
- MSSAU.get());
-
- // If Succ has any successors with PHI nodes, update them to have
- // entries coming from Pred instead of Succ.
- Succ->replaceAllUsesWith(Pred);
-
- // Move all of the successor contents from Succ to Pred.
- Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
- Succ->begin(), Succ->end());
- if (MSSAU)
- MSSAU->moveAllAfterMergeBlocks(Succ, Pred, BI);
+ // Make the LPM and Worklist updates specific to LoopUnswitch.
LPM->deleteSimpleAnalysisValue(BI, L);
RemoveFromWorklist(BI, Worklist);
- BI->eraseFromParent();
-
- // Remove Succ from the loop tree.
- LI->removeBlock(Succ);
LPM->deleteSimpleAnalysisValue(Succ, L);
- Succ->eraseFromParent();
+ auto SuccIt = Succ->begin();
+ while (PHINode *PN = dyn_cast<PHINode>(SuccIt++)) {
+ for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It)
+ if (Instruction *Use = dyn_cast<Instruction>(PN->getOperand(It)))
+ Worklist.push_back(Use);
+ for (User *U : PN->users())
+ Worklist.push_back(cast<Instruction>(U));
+ LPM->deleteSimpleAnalysisValue(PN, L);
+ RemoveFromWorklist(PN, Worklist);
+ ++NumSimplify;
+ }
+ // Merge the block and make the remaining analyses updates.
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get());
++NumSimplify;
continue;
}
diff --git a/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 896dd8bcb922..2ccb7cae3079 100644
--- a/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -112,37 +112,6 @@ static cl::opt<unsigned> LVLoopDepthThreshold(
"LoopVersioningLICM's threshold for maximum allowed loop nest/depth"),
cl::init(2), cl::Hidden);
-/// Create MDNode for input string.
-static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- Metadata *MDs[] = {
- MDString::get(Context, Name),
- ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
- return MDNode::get(Context, MDs);
-}
-
-/// Set input string into loop metadata by keeping other values intact.
-void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
- unsigned V) {
- SmallVector<Metadata *, 4> MDs(1);
- // If the loop already has metadata, retain it.
- MDNode *LoopID = TheLoop->getLoopID();
- if (LoopID) {
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
- MDs.push_back(Node);
- }
- }
- // Add new metadata.
- MDs.push_back(createStringMetadata(TheLoop, MDString, V));
- // Replace current metadata node with new one.
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- MDNode *NewLoopID = MDNode::get(Context, MDs);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
- TheLoop->setLoopID(NewLoopID);
-}
-
namespace {
struct LoopVersioningLICM : public LoopPass {
diff --git a/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
new file mode 100644
index 000000000000..d0fcf38b5a7b
--- /dev/null
+++ b/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -0,0 +1,170 @@
+//===- LowerConstantIntrinsics.cpp - Lower constant intrinsic calls -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers all remaining 'objectsize' 'is.constant' intrinsic calls
+// and provides constant propagation and basic CFG cleanup on the result.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "lower-is-constant-intrinsic"
+
+STATISTIC(IsConstantIntrinsicsHandled,
+ "Number of 'is.constant' intrinsic calls handled");
+STATISTIC(ObjectSizeIntrinsicsHandled,
+ "Number of 'objectsize' intrinsic calls handled");
+
+static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) {
+ Value *Op = II->getOperand(0);
+
+ return isa<Constant>(Op) ? ConstantInt::getTrue(II->getType())
+ : ConstantInt::getFalse(II->getType());
+}
+
+static bool replaceConditionalBranchesOnConstant(Instruction *II,
+ Value *NewValue) {
+ bool HasDeadBlocks = false;
+ SmallSetVector<Instruction *, 8> Worklist;
+ replaceAndRecursivelySimplify(II, NewValue, nullptr, nullptr, nullptr,
+ &Worklist);
+ for (auto I : Worklist) {
+ BranchInst *BI = dyn_cast<BranchInst>(I);
+ if (!BI)
+ continue;
+ if (BI->isUnconditional())
+ continue;
+
+ BasicBlock *Target, *Other;
+ if (match(BI->getOperand(0), m_Zero())) {
+ Target = BI->getSuccessor(1);
+ Other = BI->getSuccessor(0);
+ } else if (match(BI->getOperand(0), m_One())) {
+ Target = BI->getSuccessor(0);
+ Other = BI->getSuccessor(1);
+ } else {
+ Target = nullptr;
+ Other = nullptr;
+ }
+ if (Target && Target != Other) {
+ BasicBlock *Source = BI->getParent();
+ Other->removePredecessor(Source);
+ BI->eraseFromParent();
+ BranchInst::Create(Target, Source);
+ if (pred_begin(Other) == pred_end(Other))
+ HasDeadBlocks = true;
+ }
+ }
+ return HasDeadBlocks;
+}
+
+static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo *TLI) {
+ bool HasDeadBlocks = false;
+ const auto &DL = F.getParent()->getDataLayout();
+ SmallVector<WeakTrackingVH, 8> Worklist;
+
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+ for (BasicBlock *BB : RPOT) {
+ for (Instruction &I: *BB) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II)
+ continue;
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::is_constant:
+ case Intrinsic::objectsize:
+ Worklist.push_back(WeakTrackingVH(&I));
+ break;
+ }
+ }
+ }
+ for (WeakTrackingVH &VH: Worklist) {
+ // Items on the worklist can be mutated by earlier recursive replaces.
+ // This can remove the intrinsic as dead (VH == null), but also replace
+ // the intrinsic in place.
+ if (!VH)
+ continue;
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*VH);
+ if (!II)
+ continue;
+ Value *NewValue;
+ switch (II->getIntrinsicID()) {
+ default:
+ continue;
+ case Intrinsic::is_constant:
+ NewValue = lowerIsConstantIntrinsic(II);
+ IsConstantIntrinsicsHandled++;
+ break;
+ case Intrinsic::objectsize:
+ NewValue = lowerObjectSizeCall(II, DL, TLI, true);
+ ObjectSizeIntrinsicsHandled++;
+ break;
+ }
+ HasDeadBlocks |= replaceConditionalBranchesOnConstant(II, NewValue);
+ }
+ if (HasDeadBlocks)
+ removeUnreachableBlocks(F);
+ return !Worklist.empty();
+}
+
+PreservedAnalyses
+LowerConstantIntrinsicsPass::run(Function &F, FunctionAnalysisManager &AM) {
+ if (lowerConstantIntrinsics(F, AM.getCachedResult<TargetLibraryAnalysis>(F)))
+ return PreservedAnalyses::none();
+
+ return PreservedAnalyses::all();
+}
+
+namespace {
+/// Legacy pass for lowering is.constant intrinsics out of the IR.
+///
+/// When this pass is run over a function it converts is.constant intrinsics
+/// into 'true' or 'false'. This is completements the normal constand folding
+/// to 'true' as part of Instruction Simplify passes.
+class LowerConstantIntrinsics : public FunctionPass {
+public:
+ static char ID;
+ LowerConstantIntrinsics() : FunctionPass(ID) {
+ initializeLowerConstantIntrinsicsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+ return lowerConstantIntrinsics(F, TLI);
+ }
+};
+} // namespace
+
+char LowerConstantIntrinsics::ID = 0;
+INITIALIZE_PASS(LowerConstantIntrinsics, "lower-constant-intrinsics",
+ "Lower constant intrinsics", false, false)
+
+FunctionPass *llvm::createLowerConstantIntrinsicsPass() {
+ return new LowerConstantIntrinsics();
+}
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 0d67c0d740ec..d85f20b3f80c 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
using namespace llvm;
@@ -71,15 +72,20 @@ static bool handleSwitchExpect(SwitchInst &SI) {
unsigned n = SI.getNumCases(); // +1 for default case.
SmallVector<uint32_t, 16> Weights(n + 1, UnlikelyBranchWeight);
- if (Case == *SI.case_default())
- Weights[0] = LikelyBranchWeight;
- else
- Weights[Case.getCaseIndex() + 1] = LikelyBranchWeight;
+ uint64_t Index = (Case == *SI.case_default()) ? 0 : Case.getCaseIndex() + 1;
+ Weights[Index] = LikelyBranchWeight;
+
+ SI.setMetadata(
+ LLVMContext::MD_misexpect,
+ MDBuilder(CI->getContext())
+ .createMisExpect(Index, LikelyBranchWeight, UnlikelyBranchWeight));
+
+ SI.setCondition(ArgValue);
+ misexpect::checkFrontendInstrumentation(SI);
SI.setMetadata(LLVMContext::MD_prof,
MDBuilder(CI->getContext()).createBranchWeights(Weights));
- SI.setCondition(ArgValue);
return true;
}
@@ -155,7 +161,7 @@ static void handlePhiDef(CallInst *Expect) {
return Result;
};
- auto *PhiDef = dyn_cast<PHINode>(V);
+ auto *PhiDef = cast<PHINode>(V);
// Get the first dominating conditional branch of the operand
// i's incoming block.
@@ -280,19 +286,28 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
MDBuilder MDB(CI->getContext());
MDNode *Node;
+ MDNode *ExpNode;
if ((ExpectedValue->getZExtValue() == ValueComparedTo) ==
- (Predicate == CmpInst::ICMP_EQ))
+ (Predicate == CmpInst::ICMP_EQ)) {
Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight);
- else
+ ExpNode = MDB.createMisExpect(0, LikelyBranchWeight, UnlikelyBranchWeight);
+ } else {
Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight);
+ ExpNode = MDB.createMisExpect(1, LikelyBranchWeight, UnlikelyBranchWeight);
+ }
- BSI.setMetadata(LLVMContext::MD_prof, Node);
+ BSI.setMetadata(LLVMContext::MD_misexpect, ExpNode);
if (CmpI)
CmpI->setOperand(0, ArgValue);
else
BSI.setCondition(ArgValue);
+
+ misexpect::checkFrontendInstrumentation(BSI);
+
+ BSI.setMetadata(LLVMContext::MD_prof, Node);
+
return true;
}
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 5a055139be4f..2364748efb05 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -69,90 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
-static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
- bool &VariableIdxFound,
- const DataLayout &DL) {
- // Skip over the first indices.
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (unsigned i = 1; i != Idx; ++i, ++GTI)
- /*skip along*/;
-
- // Compute the offset implied by the rest of the indices.
- int64_t Offset = 0;
- for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
- ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!OpC)
- return VariableIdxFound = true;
- if (OpC->isZero()) continue; // No offset.
-
- // Handle struct indices, which add their field offset to the pointer.
- if (StructType *STy = GTI.getStructTypeOrNull()) {
- Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
- continue;
- }
-
- // Otherwise, we have a sequential type like an array or vector. Multiply
- // the index by the ElementSize.
- uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
- Offset += Size*OpC->getSExtValue();
- }
-
- return Offset;
-}
-
-/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and
-/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2
-/// might be &A[40]. In this case offset would be -8.
-static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
- const DataLayout &DL) {
- Ptr1 = Ptr1->stripPointerCasts();
- Ptr2 = Ptr2->stripPointerCasts();
-
- // Handle the trivial case first.
- if (Ptr1 == Ptr2) {
- Offset = 0;
- return true;
- }
-
- GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
- GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
-
- bool VariableIdxFound = false;
-
- // If one pointer is a GEP and the other isn't, then see if the GEP is a
- // constant offset from the base, as in "P" and "gep P, 1".
- if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
- Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL);
- return !VariableIdxFound;
- }
-
- if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
- Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL);
- return !VariableIdxFound;
- }
-
- // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
- // base. After that base, they may have some number of common (and
- // potentially variable) indices. After that they handle some constant
- // offset, which determines their offset from each other. At this point, we
- // handle no other case.
- if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
- return false;
-
- // Skip any common indices and track the GEP types.
- unsigned Idx = 1;
- for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
- if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
- break;
-
- int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL);
- int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL);
- if (VariableIdxFound) return false;
-
- Offset = Offset2-Offset1;
- return true;
-}
-
namespace {
/// Represents a range of memset'd bytes with the ByteVal value.
@@ -419,12 +335,12 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
break;
// Check to see if this store is to a constant offset from the start ptr.
- int64_t Offset;
- if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset,
- DL))
+ Optional<int64_t> Offset =
+ isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
+ if (!Offset)
break;
- Ranges.addStore(Offset, NextStore);
+ Ranges.addStore(*Offset, NextStore);
} else {
MemSetInst *MSI = cast<MemSetInst>(BI);
@@ -433,11 +349,11 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
break;
// Check to see if this store is to a constant offset from the start ptr.
- int64_t Offset;
- if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL))
+ Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
+ if (!Offset)
break;
- Ranges.addMemSet(Offset, MSI);
+ Ranges.addMemSet(*Offset, MSI);
}
}
@@ -597,9 +513,13 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
ToLift.push_back(C);
for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
- if (auto *A = dyn_cast<Instruction>(C->getOperand(k)))
- if (A->getParent() == SI->getParent())
+ if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) {
+ if (A->getParent() == SI->getParent()) {
+ // Cannot hoist user of P above P
+ if(A == P) return false;
Args.insert(A);
+ }
+ }
}
// We made it, we need to lift
@@ -979,7 +899,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// If the destination wasn't sufficiently aligned then increase its alignment.
if (!isDestSufficientlyAligned) {
assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
- cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
+ cast<AllocaInst>(cpyDest)->setAlignment(MaybeAlign(srcAlign));
}
// Drop any cached information about the call, because we may have changed
@@ -1516,7 +1436,7 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
return false;
auto *MD = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto LookupAliasAnalysis = [this]() -> AliasAnalysis & {
return getAnalysis<AAResultsWrapperPass>().getAAResults();
diff --git a/lib/Transforms/Scalar/MergeICmps.cpp b/lib/Transforms/Scalar/MergeICmps.cpp
index 3d047a193267..98a45b391319 100644
--- a/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/lib/Transforms/Scalar/MergeICmps.cpp
@@ -897,7 +897,7 @@ public:
bool runOnFunction(Function &F) override {
if (skipFunction(F)) return false;
- const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
// MergeICmps does not need the DominatorTree, but we update it if it's
// already available.
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 30645f4400e3..9799ea7960ec 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -14,9 +14,11 @@
// diamond (hammock) and merges them into a single load in the header. Similar
// it sinks and merges two stores to the tail block (footer). The algorithm
// iterates over the instructions of one side of the diamond and attempts to
-// find a matching load/store on the other side. It hoists / sinks when it
-// thinks it safe to do so. This optimization helps with eg. hiding load
-// latencies, triggering if-conversion, and reducing static code size.
+// find a matching load/store on the other side. New tail/footer block may be
+// insterted if the tail/footer block has more predecessors (not only the two
+// predecessors that are forming the diamond). It hoists / sinks when it thinks
+// it safe to do so. This optimization helps with eg. hiding load latencies,
+// triggering if-conversion, and reducing static code size.
//
// NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist.
//
@@ -103,7 +105,9 @@ class MergedLoadStoreMotion {
// Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
const int MagicCompileTimeControl = 250;
+ const bool SplitFooterBB;
public:
+ MergedLoadStoreMotion(bool SplitFooterBB) : SplitFooterBB(SplitFooterBB) {}
bool run(Function &F, AliasAnalysis &AA);
private:
@@ -114,7 +118,9 @@ private:
PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
bool isStoreSinkBarrierInRange(const Instruction &Start,
const Instruction &End, MemoryLocation Loc);
- bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst);
+ bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const;
+ void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand,
+ StoreInst *ElseInst);
bool mergeStores(BasicBlock *BB);
};
} // end anonymous namespace
@@ -217,74 +223,82 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
}
///
+/// Check if 2 stores can be sunk together with corresponding GEPs
+///
+bool MergedLoadStoreMotion::canSinkStoresAndGEPs(StoreInst *S0,
+ StoreInst *S1) const {
+ auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
+ auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
+ return A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
+ (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
+ (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0);
+}
+
+///
/// Merge two stores to same address and sink into \p BB
///
/// Also sinks GEP instruction computing the store address
///
-bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
- StoreInst *S1) {
+void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0,
+ StoreInst *S1) {
// Only one definition?
auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
- if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
- (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
- (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0)) {
- LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
- dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
- dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
- // Hoist the instruction.
- BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
- // Intersect optional metadata.
- S0->andIRFlags(S1);
- S0->dropUnknownNonDebugMetadata();
-
- // Create the new store to be inserted at the join point.
- StoreInst *SNew = cast<StoreInst>(S0->clone());
- Instruction *ANew = A0->clone();
- SNew->insertBefore(&*InsertPt);
- ANew->insertBefore(SNew);
-
- assert(S0->getParent() == A0->getParent());
- assert(S1->getParent() == A1->getParent());
-
- // New PHI operand? Use it.
- if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
- SNew->setOperand(0, NewPN);
- S0->eraseFromParent();
- S1->eraseFromParent();
- A0->replaceAllUsesWith(ANew);
- A0->eraseFromParent();
- A1->replaceAllUsesWith(ANew);
- A1->eraseFromParent();
- return true;
- }
- return false;
+ LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
+ dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
+ dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
+ // Hoist the instruction.
+ BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
+ // Intersect optional metadata.
+ S0->andIRFlags(S1);
+ S0->dropUnknownNonDebugMetadata();
+
+ // Create the new store to be inserted at the join point.
+ StoreInst *SNew = cast<StoreInst>(S0->clone());
+ Instruction *ANew = A0->clone();
+ SNew->insertBefore(&*InsertPt);
+ ANew->insertBefore(SNew);
+
+ assert(S0->getParent() == A0->getParent());
+ assert(S1->getParent() == A1->getParent());
+
+ // New PHI operand? Use it.
+ if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
+ SNew->setOperand(0, NewPN);
+ S0->eraseFromParent();
+ S1->eraseFromParent();
+ A0->replaceAllUsesWith(ANew);
+ A0->eraseFromParent();
+ A1->replaceAllUsesWith(ANew);
+ A1->eraseFromParent();
}
///
/// True when two stores are equivalent and can sink into the footer
///
-/// Starting from a diamond tail block, iterate over the instructions in one
-/// predecessor block and try to match a store in the second predecessor.
+/// Starting from a diamond head block, iterate over the instructions in one
+/// successor block and try to match a store in the second successor.
///
-bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
+bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) {
bool MergedStores = false;
- assert(T && "Footer of a diamond cannot be empty");
-
- pred_iterator PI = pred_begin(T), E = pred_end(T);
- assert(PI != E);
- BasicBlock *Pred0 = *PI;
- ++PI;
- BasicBlock *Pred1 = *PI;
- ++PI;
+ BasicBlock *TailBB = getDiamondTail(HeadBB);
+ BasicBlock *SinkBB = TailBB;
+ assert(SinkBB && "Footer of a diamond cannot be empty");
+
+ succ_iterator SI = succ_begin(HeadBB);
+ assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors");
+ BasicBlock *Pred0 = *SI;
+ ++SI;
+ assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor");
+ BasicBlock *Pred1 = *SI;
// tail block of a diamond/hammock?
if (Pred0 == Pred1)
return false; // No.
- if (PI != E)
- return false; // No. More than 2 predecessors.
-
- // #Instructions in Succ1 for Compile Time Control
+ // bail out early if we can not merge into the footer BB
+ if (!SplitFooterBB && TailBB->hasNPredecessorsOrMore(3))
+ return false;
+ // #Instructions in Pred1 for Compile Time Control
auto InstsNoDbg = Pred1->instructionsWithoutDebug();
int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end());
int NStores = 0;
@@ -304,14 +318,23 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
if (NStores * Size1 >= MagicCompileTimeControl)
break;
if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
- bool Res = sinkStore(T, S0, S1);
- MergedStores |= Res;
- // Don't attempt to sink below stores that had to stick around
- // But after removal of a store and some of its feeding
- // instruction search again from the beginning since the iterator
- // is likely stale at this point.
- if (!Res)
+ if (!canSinkStoresAndGEPs(S0, S1))
+ // Don't attempt to sink below stores that had to stick around
+ // But after removal of a store and some of its feeding
+ // instruction search again from the beginning since the iterator
+ // is likely stale at this point.
break;
+
+ if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(3)) {
+ // We have more than 2 predecessors. Insert a new block
+ // postdominating 2 predecessors we're going to sink from.
+ SinkBB = SplitBlockPredecessors(TailBB, {Pred0, Pred1}, ".sink.split");
+ if (!SinkBB)
+ break;
+ }
+
+ MergedStores = true;
+ sinkStoresAndGEPs(SinkBB, S0, S1);
RBI = Pred0->rbegin();
RBE = Pred0->rend();
LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
@@ -328,13 +351,15 @@ bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) {
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
+ // This loop doesn't care about newly inserted/split blocks
+ // since they never will be diamond heads.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
BasicBlock *BB = &*FI++;
// Hoist equivalent loads and sink stores
// outside diamonds when possible
if (isDiamondHead(BB)) {
- Changed |= mergeStores(getDiamondTail(BB));
+ Changed |= mergeStores(BB);
}
}
return Changed;
@@ -342,9 +367,11 @@ bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) {
namespace {
class MergedLoadStoreMotionLegacyPass : public FunctionPass {
+ const bool SplitFooterBB;
public:
static char ID; // Pass identification, replacement for typeid
- MergedLoadStoreMotionLegacyPass() : FunctionPass(ID) {
+ MergedLoadStoreMotionLegacyPass(bool SplitFooterBB = false)
+ : FunctionPass(ID), SplitFooterBB(SplitFooterBB) {
initializeMergedLoadStoreMotionLegacyPassPass(
*PassRegistry::getPassRegistry());
}
@@ -355,13 +382,14 @@ public:
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
- MergedLoadStoreMotion Impl;
+ MergedLoadStoreMotion Impl(SplitFooterBB);
return Impl.run(F, getAnalysis<AAResultsWrapperPass>().getAAResults());
}
private:
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
+ if (!SplitFooterBB)
+ AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
@@ -373,8 +401,8 @@ char MergedLoadStoreMotionLegacyPass::ID = 0;
///
/// createMergedLoadStoreMotionPass - The public interface to this file.
///
-FunctionPass *llvm::createMergedLoadStoreMotionPass() {
- return new MergedLoadStoreMotionLegacyPass();
+FunctionPass *llvm::createMergedLoadStoreMotionPass(bool SplitFooterBB) {
+ return new MergedLoadStoreMotionLegacyPass(SplitFooterBB);
}
INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion",
@@ -385,13 +413,14 @@ INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion",
PreservedAnalyses
MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
- MergedLoadStoreMotion Impl;
+ MergedLoadStoreMotion Impl(Options.SplitFooterBB);
auto &AA = AM.getResult<AAManager>(F);
if (!Impl.run(F, AA))
return PreservedAnalyses::all();
PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
+ if (!Options.SplitFooterBB)
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index 94436b55752a..1260bd39cdee 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -170,7 +170,7 @@ bool NaryReassociateLegacyPass::runOnFunction(Function &F) {
auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
return Impl.runImpl(F, AC, DT, SE, TLI, TTI);
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 08ac2b666fce..b213264de557 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -89,6 +89,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -122,6 +123,7 @@
using namespace llvm;
using namespace llvm::GVNExpression;
using namespace llvm::VNCoercion;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "newgvn"
@@ -656,7 +658,7 @@ public:
TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
const DataLayout &DL)
: F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
- PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)),
+ PredInfo(std::make_unique<PredicateInfo>(F, *DT, *AC)),
SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false) {}
bool runGVN();
@@ -1332,7 +1334,7 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
E->setOpcode(0);
E->op_push_back(PointerOp);
if (LI)
- E->setAlignment(LI->getAlignment());
+ E->setAlignment(MaybeAlign(LI->getAlignment()));
// TODO: Value number heap versions. We may be able to discover
// things alias analysis can't on it's own (IE that a store and a
@@ -1637,8 +1639,11 @@ const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) const {
if (AA->doesNotAccessMemory(CI)) {
return createCallExpression(CI, TOPClass->getMemoryLeader());
} else if (AA->onlyReadsMemory(CI)) {
- MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(CI);
- return createCallExpression(CI, DefiningAccess);
+ if (auto *MA = MSSA->getMemoryAccess(CI)) {
+ auto *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(MA);
+ return createCallExpression(CI, DefiningAccess);
+ } else // MSSA determined that CI does not access memory.
+ return createCallExpression(CI, TOPClass->getMemoryLeader());
}
return nullptr;
}
@@ -1754,7 +1759,7 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
return true;
});
// If we are left with no operands, it's dead.
- if (empty(Filtered)) {
+ if (Filtered.empty()) {
// If it has undef at this point, it means there are no-non-undef arguments,
// and thus, the value of the phi node must be undef.
if (HasUndef) {
@@ -2464,9 +2469,9 @@ Value *NewGVN::findConditionEquivalence(Value *Cond) const {
// Process the outgoing edges of a block for reachability.
void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
// Evaluate reachability of terminator instruction.
- BranchInst *BR;
- if ((BR = dyn_cast<BranchInst>(TI)) && BR->isConditional()) {
- Value *Cond = BR->getCondition();
+ Value *Cond;
+ BasicBlock *TrueSucc, *FalseSucc;
+ if (match(TI, m_Br(m_Value(Cond), TrueSucc, FalseSucc))) {
Value *CondEvaluated = findConditionEquivalence(Cond);
if (!CondEvaluated) {
if (auto *I = dyn_cast<Instruction>(Cond)) {
@@ -2479,8 +2484,6 @@ void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
}
}
ConstantInt *CI;
- BasicBlock *TrueSucc = BR->getSuccessor(0);
- BasicBlock *FalseSucc = BR->getSuccessor(1);
if (CondEvaluated && (CI = dyn_cast<ConstantInt>(CondEvaluated))) {
if (CI->isOne()) {
LLVM_DEBUG(dbgs() << "Condition for Terminator " << *TI
@@ -4196,7 +4199,7 @@ bool NewGVNLegacyPass::runOnFunction(Function &F) {
return false;
return NewGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
&getAnalysis<AAResultsWrapperPass>().getAAResults(),
&getAnalysis<MemorySSAWrapperPass>().getMSSA(),
F.getParent()->getDataLayout())
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 039123218544..68a0f5151ad5 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -161,7 +161,7 @@ public:
return false;
TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
const TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
return runPartiallyInlineLibCalls(F, TLI, TTI);
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index b544f0a39ea8..beb299272ed8 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -131,7 +131,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
for (Loop *I : *LI) {
runOnLoopAndSubLoops(I);
}
@@ -240,7 +240,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
BasicBlock *Pred) {
// A conservative bound on the loop as a whole.
- const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L);
+ const SCEV *MaxTrips = SE->getConstantMaxBackedgeTakenCount(L);
if (MaxTrips != SE->getCouldNotCompute() &&
SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(
CountedLoopTripWidth))
@@ -478,7 +478,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
return false;
const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
bool Modified = false;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index fa8c9e2a5fe4..124f625ef7b6 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -861,7 +861,7 @@ static Value *NegateValue(Value *V, Instruction *BI,
// this use. We do this by moving it to the entry block (if it is a
// non-instruction value) or right after the definition. These negates will
// be zapped by reassociate later, so we don't need much finesse here.
- BinaryOperator *TheNeg = cast<BinaryOperator>(U);
+ Instruction *TheNeg = cast<Instruction>(U);
// Verify that the negate is in this function, V might be a constant expr.
if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
@@ -1938,88 +1938,132 @@ void ReassociatePass::EraseInst(Instruction *I) {
MadeChange = true;
}
-// Canonicalize expressions of the following form:
-// x + (-Constant * y) -> x - (Constant * y)
-// x - (-Constant * y) -> x + (Constant * y)
-Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) {
- if (!I->hasOneUse() || I->getType()->isVectorTy())
- return nullptr;
-
- // Must be a fmul or fdiv instruction.
- unsigned Opcode = I->getOpcode();
- if (Opcode != Instruction::FMul && Opcode != Instruction::FDiv)
- return nullptr;
-
- auto *C0 = dyn_cast<ConstantFP>(I->getOperand(0));
- auto *C1 = dyn_cast<ConstantFP>(I->getOperand(1));
-
- // Both operands are constant, let it get constant folded away.
- if (C0 && C1)
- return nullptr;
-
- ConstantFP *CF = C0 ? C0 : C1;
-
- // Must have one constant operand.
- if (!CF)
- return nullptr;
+/// Recursively analyze an expression to build a list of instructions that have
+/// negative floating-point constant operands. The caller can then transform
+/// the list to create positive constants for better reassociation and CSE.
+static void getNegatibleInsts(Value *V,
+ SmallVectorImpl<Instruction *> &Candidates) {
+ // Handle only one-use instructions. Combining negations does not justify
+ // replicating instructions.
+ Instruction *I;
+ if (!match(V, m_OneUse(m_Instruction(I))))
+ return;
- // Must be a negative ConstantFP.
- if (!CF->isNegative())
- return nullptr;
+ // Handle expressions of multiplications and divisions.
+ // TODO: This could look through floating-point casts.
+ const APFloat *C;
+ switch (I->getOpcode()) {
+ case Instruction::FMul:
+ // Not expecting non-canonical code here. Bail out and wait.
+ if (match(I->getOperand(0), m_Constant()))
+ break;
- // User must be a binary operator with one or more uses.
- Instruction *User = I->user_back();
- if (!isa<BinaryOperator>(User) || User->use_empty())
- return nullptr;
+ if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
+ Candidates.push_back(I);
+ LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
+ }
+ getNegatibleInsts(I->getOperand(0), Candidates);
+ getNegatibleInsts(I->getOperand(1), Candidates);
+ break;
+ case Instruction::FDiv:
+ // Not expecting non-canonical code here. Bail out and wait.
+ if (match(I->getOperand(0), m_Constant()) &&
+ match(I->getOperand(1), m_Constant()))
+ break;
- unsigned UserOpcode = User->getOpcode();
- if (UserOpcode != Instruction::FAdd && UserOpcode != Instruction::FSub)
- return nullptr;
+ if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
+ (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
+ Candidates.push_back(I);
+ LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
+ }
+ getNegatibleInsts(I->getOperand(0), Candidates);
+ getNegatibleInsts(I->getOperand(1), Candidates);
+ break;
+ default:
+ break;
+ }
+}
- // Subtraction is not commutative. Explicitly, the following transform is
- // not valid: (-Constant * y) - x -> x + (Constant * y)
- if (!User->isCommutative() && User->getOperand(1) != I)
+/// Given an fadd/fsub with an operand that is a one-use instruction
+/// (the fadd/fsub), try to change negative floating-point constants into
+/// positive constants to increase potential for reassociation and CSE.
+Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
+ Instruction *Op,
+ Value *OtherOp) {
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
+
+ // Collect instructions with negative FP constants from the subtree that ends
+ // in Op.
+ SmallVector<Instruction *, 4> Candidates;
+ getNegatibleInsts(Op, Candidates);
+ if (Candidates.empty())
return nullptr;
// Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
// resulting subtract will be broken up later. This can get us into an
// infinite loop during reassociation.
- if (UserOpcode == Instruction::FAdd && ShouldBreakUpSubtract(User))
+ bool IsFSub = I->getOpcode() == Instruction::FSub;
+ bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
+ if (NeedsSubtract && ShouldBreakUpSubtract(I))
return nullptr;
- // Change the sign of the constant.
- APFloat Val = CF->getValueAPF();
- Val.changeSign();
- I->setOperand(C0 ? 0 : 1, ConstantFP::get(CF->getContext(), Val));
-
- // Canonicalize I to RHS to simplify the next bit of logic. E.g.,
- // ((-Const*y) + x) -> (x + (-Const*y)).
- if (User->getOperand(0) == I && User->isCommutative())
- cast<BinaryOperator>(User)->swapOperands();
-
- Value *Op0 = User->getOperand(0);
- Value *Op1 = User->getOperand(1);
- BinaryOperator *NI;
- switch (UserOpcode) {
- default:
- llvm_unreachable("Unexpected Opcode!");
- case Instruction::FAdd:
- NI = BinaryOperator::CreateFSub(Op0, Op1);
- NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());
- break;
- case Instruction::FSub:
- NI = BinaryOperator::CreateFAdd(Op0, Op1);
- NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());
- break;
+ for (Instruction *Negatible : Candidates) {
+ const APFloat *C;
+ if (match(Negatible->getOperand(0), m_APFloat(C))) {
+ assert(!match(Negatible->getOperand(1), m_Constant()) &&
+ "Expecting only 1 constant operand");
+ assert(C->isNegative() && "Expected negative FP constant");
+ Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
+ MadeChange = true;
+ }
+ if (match(Negatible->getOperand(1), m_APFloat(C))) {
+ assert(!match(Negatible->getOperand(0), m_Constant()) &&
+ "Expecting only 1 constant operand");
+ assert(C->isNegative() && "Expected negative FP constant");
+ Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
+ MadeChange = true;
+ }
}
+ assert(MadeChange == true && "Negative constant candidate was not changed");
- NI->insertBefore(User);
- NI->setName(User->getName());
- User->replaceAllUsesWith(NI);
- NI->setDebugLoc(I->getDebugLoc());
+ // Negations cancelled out.
+ if (Candidates.size() % 2 == 0)
+ return I;
+
+ // Negate the final operand in the expression by flipping the opcode of this
+ // fadd/fsub.
+ assert(Candidates.size() % 2 == 1 && "Expected odd number");
+ IRBuilder<> Builder(I);
+ Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
+ : Builder.CreateFSubFMF(OtherOp, Op, I);
+ I->replaceAllUsesWith(NewInst);
RedoInsts.insert(I);
- MadeChange = true;
- return NI;
+ return dyn_cast<Instruction>(NewInst);
+}
+
+/// Canonicalize expressions that contain a negative floating-point constant
+/// of the following form:
+/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
+/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
+/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
+///
+/// The fadd/fsub opcode may be switched to allow folding a negation into the
+/// input instruction.
+Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
+ LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
+ Value *X;
+ Instruction *Op;
+ if (match(I, m_FAdd(m_Value(X), m_OneUse(m_Instruction(Op)))))
+ if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
+ I = R;
+ if (match(I, m_FAdd(m_OneUse(m_Instruction(Op)), m_Value(X))))
+ if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
+ I = R;
+ if (match(I, m_FSub(m_Value(X), m_OneUse(m_Instruction(Op)))))
+ if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
+ I = R;
+ return I;
}
/// Inspect and optimize the given instruction. Note that erasing
@@ -2042,16 +2086,16 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
I = NI;
}
- // Canonicalize negative constants out of expressions.
- if (Instruction *Res = canonicalizeNegConstExpr(I))
- I = Res;
-
// Commute binary operators, to canonicalize the order of their operands.
// This can potentially expose more CSE opportunities, and makes writing other
// transformations simpler.
if (I->isCommutative())
canonicalizeOperands(I);
+ // Canonicalize negative constants out of expressions.
+ if (Instruction *Res = canonicalizeNegFPConstants(I))
+ I = Res;
+
// Don't optimize floating-point instructions unless they are 'fast'.
if (I->getType()->isFPOrFPVectorTy() && !I->isFast())
return;
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index c358258d24cf..48bbdd8d1b33 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -172,8 +172,6 @@ public:
bool runOnModule(Module &M) override {
bool Changed = false;
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
for (Function &F : M) {
// Nothing to do for declarations.
if (F.isDeclaration() || F.empty())
@@ -186,6 +184,8 @@ public:
TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
Changed |= Impl.runOnFunction(F, DT, TTI, TLI);
@@ -2530,7 +2530,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// statepoints surviving this pass. This makes testing easier and the
// resulting IR less confusing to human readers.
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
- bool MadeChange = removeUnreachableBlocks(F, nullptr, &DTU);
+ bool MadeChange = removeUnreachableBlocks(F, &DTU);
// Flush the Dominator Tree.
DTU.getDomTree();
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 4093e50ce899..10fbdc8aacd2 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -191,7 +191,7 @@ public:
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
const DataLayout &DL;
- const TargetLibraryInfo *TLI;
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI;
SmallPtrSet<BasicBlock *, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value *, LatticeVal> ValueState; // The state each value is in.
// The state each parameter is in.
@@ -268,8 +268,9 @@ public:
return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy};
}
- SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli)
- : DL(DL), TLI(tli) {}
+ SCCPSolver(const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI)
+ : DL(DL), GetTLI(std::move(GetTLI)) {}
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
@@ -1290,7 +1291,7 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()), F,
- Operands, TLI)) {
+ Operands, &GetTLI(*F))) {
// call -> undef.
if (isa<UndefValue>(C))
return;
@@ -1465,7 +1466,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
}
LatticeVal &LV = getValueState(&I);
- if (!LV.isUnknown()) continue;
+ if (!LV.isUnknown())
+ continue;
+
+ // There are two reasons a call can have an undef result
+ // 1. It could be tracked.
+ // 2. It could be constant-foldable.
+ // Because of the way we solve return values, tracked calls must
+ // never be marked overdefined in ResolvedUndefsIn.
+ if (CallSite CS = CallSite(&I)) {
+ if (Function *F = CS.getCalledFunction())
+ if (TrackedRetVals.count(F))
+ continue;
+
+ // If the call is constant-foldable, we mark it overdefined because
+ // we do not know what return values are valid.
+ markOverdefined(&I);
+ return true;
+ }
// extractvalue is safe; check here because the argument is a struct.
if (isa<ExtractValueInst>(I))
@@ -1638,19 +1656,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::Call:
case Instruction::Invoke:
case Instruction::CallBr:
- // There are two reasons a call can have an undef result
- // 1. It could be tracked.
- // 2. It could be constant-foldable.
- // Because of the way we solve return values, tracked calls must
- // never be marked overdefined in ResolvedUndefsIn.
- if (Function *F = CallSite(&I).getCalledFunction())
- if (TrackedRetVals.count(F))
- break;
-
- // If the call is constant-foldable, we mark it overdefined because
- // we do not know what return values are valid.
- markOverdefined(&I);
- return true;
+ llvm_unreachable("Call-like instructions should have be handled early");
default:
// If we don't know what should happen here, conservatively mark it
// overdefined.
@@ -1751,7 +1757,7 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
[](const LatticeVal &LV) { return LV.isOverdefined(); }))
return false;
std::vector<Constant *> ConstVals;
- auto *ST = dyn_cast<StructType>(V->getType());
+ auto *ST = cast<StructType>(V->getType());
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
LatticeVal V = IVs[i];
ConstVals.push_back(V.isConstant()
@@ -1796,7 +1802,8 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
static bool runSCCP(Function &F, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
LLVM_DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- SCCPSolver Solver(DL, TLI);
+ SCCPSolver Solver(
+ DL, [TLI](Function &F) -> const TargetLibraryInfo & { return *TLI; });
// Mark the first block of the function as being executable.
Solver.MarkBlockExecutable(&F.front());
@@ -1891,7 +1898,7 @@ public:
return false;
const DataLayout &DL = F.getParent()->getDataLayout();
const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
return runSCCP(F, DL, TLI);
}
};
@@ -1924,6 +1931,27 @@ static void findReturnsToZap(Function &F,
return;
}
+ assert(
+ all_of(F.users(),
+ [&Solver](User *U) {
+ if (isa<Instruction>(U) &&
+ !Solver.isBlockExecutable(cast<Instruction>(U)->getParent()))
+ return true;
+ // Non-callsite uses are not impacted by zapping. Also, constant
+ // uses (like blockaddresses) could stuck around, without being
+ // used in the underlying IR, meaning we do not have lattice
+ // values for them.
+ if (!CallSite(U))
+ return true;
+ if (U->getType()->isStructTy()) {
+ return all_of(
+ Solver.getStructLatticeValueFor(U),
+ [](const LatticeVal &LV) { return !LV.isOverdefined(); });
+ }
+ return !Solver.getLatticeValueFor(U).isOverdefined();
+ }) &&
+ "We can only zap functions where all live users have a concrete value");
+
for (BasicBlock &BB : F) {
if (CallInst *CI = BB.getTerminatingMustTailCall()) {
LLVM_DEBUG(dbgs() << "Can't zap return of the block due to present "
@@ -1974,9 +2002,10 @@ static void forceIndeterminateEdge(Instruction* I, SCCPSolver &Solver) {
}
bool llvm::runIPSCCP(
- Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ Module &M, const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI,
function_ref<AnalysisResultsForFn(Function &)> getAnalysis) {
- SCCPSolver Solver(DL, TLI);
+ SCCPSolver Solver(DL, GetTLI);
// Loop over all functions, marking arguments to those with their addresses
// taken or that are external as overdefined.
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 33f90d0b01e4..74b8ff913050 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -959,14 +959,16 @@ private:
std::tie(UsedI, I) = Uses.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- Size = std::max(Size, DL.getTypeStoreSize(LI->getType()));
+ Size = std::max(Size,
+ DL.getTypeStoreSize(LI->getType()).getFixedSize());
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Value *Op = SI->getOperand(0);
if (Op == UsedI)
return SI;
- Size = std::max(Size, DL.getTypeStoreSize(Op->getType()));
+ Size = std::max(Size,
+ DL.getTypeStoreSize(Op->getType()).getFixedSize());
continue;
}
@@ -1197,7 +1199,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
// TODO: Allow recursive phi users.
// TODO: Allow stores.
BasicBlock *BB = PN.getParent();
- unsigned MaxAlign = 0;
+ MaybeAlign MaxAlign;
uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
APInt MaxSize(APWidth, 0);
bool HaveLoad = false;
@@ -1218,8 +1220,8 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
if (BBI->mayWriteToMemory())
return false;
- uint64_t Size = DL.getTypeStoreSizeInBits(LI->getType());
- MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ uint64_t Size = DL.getTypeStoreSize(LI->getType());
+ MaxAlign = std::max(MaxAlign, MaybeAlign(LI->getAlignment()));
MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize;
HaveLoad = true;
}
@@ -1266,11 +1268,11 @@ static void speculatePHINodeLoads(PHINode &PN) {
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
PN.getName() + ".sroa.speculated");
- // Get the AA tags and alignment to use from one of the loads. It doesn't
+ // Get the AA tags and alignment to use from one of the loads. It does not
// matter which one we get and if any differ.
AAMDNodes AATags;
SomeLoad->getAAMetadata(AATags);
- unsigned Align = SomeLoad->getAlignment();
+ const MaybeAlign Align = MaybeAlign(SomeLoad->getAlignment());
// Rewrite all loads of the PN to use the new PHI.
while (!PN.use_empty()) {
@@ -1338,11 +1340,11 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) {
// Both operands to the select need to be dereferenceable, either
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
- if (!isSafeToLoadUnconditionally(TValue, LI->getType(), LI->getAlignment(),
- DL, LI))
+ if (!isSafeToLoadUnconditionally(TValue, LI->getType(),
+ MaybeAlign(LI->getAlignment()), DL, LI))
return false;
- if (!isSafeToLoadUnconditionally(FValue, LI->getType(), LI->getAlignment(),
- DL, LI))
+ if (!isSafeToLoadUnconditionally(FValue, LI->getType(),
+ MaybeAlign(LI->getAlignment()), DL, LI))
return false;
}
@@ -1368,8 +1370,8 @@ static void speculateSelectInstLoads(SelectInst &SI) {
NumLoadsSpeculated += 2;
// Transfer alignment and AA info if present.
- TL->setAlignment(LI->getAlignment());
- FL->setAlignment(LI->getAlignment());
+ TL->setAlignment(MaybeAlign(LI->getAlignment()));
+ FL->setAlignment(MaybeAlign(LI->getAlignment()));
AAMDNodes Tags;
LI->getAAMetadata(Tags);
@@ -1888,6 +1890,14 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
bool HaveCommonEltTy = true;
auto CheckCandidateType = [&](Type *Ty) {
if (auto *VTy = dyn_cast<VectorType>(Ty)) {
+ // Return if bitcast to vectors is different for total size in bits.
+ if (!CandidateTys.empty()) {
+ VectorType *V = CandidateTys[0];
+ if (DL.getTypeSizeInBits(VTy) != DL.getTypeSizeInBits(V)) {
+ CandidateTys.clear();
+ return;
+ }
+ }
CandidateTys.push_back(VTy);
if (!CommonEltTy)
CommonEltTy = VTy->getElementType();
@@ -3110,7 +3120,7 @@ private:
unsigned LoadAlign = LI->getAlignment();
if (!LoadAlign)
LoadAlign = DL.getABITypeAlignment(LI->getType());
- LI->setAlignment(std::min(LoadAlign, getSliceAlign()));
+ LI->setAlignment(MaybeAlign(std::min(LoadAlign, getSliceAlign())));
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
@@ -3119,7 +3129,7 @@ private:
Value *Op = SI->getOperand(0);
StoreAlign = DL.getABITypeAlignment(Op->getType());
}
- SI->setAlignment(std::min(StoreAlign, getSliceAlign()));
+ SI->setAlignment(MaybeAlign(std::min(StoreAlign, getSliceAlign())));
continue;
}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 869cf00e0a89..1d2e40bf62be 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -79,6 +79,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopVersioningLICMPass(Registry);
initializeLoopIdiomRecognizeLegacyPassPass(Registry);
initializeLowerAtomicLegacyPassPass(Registry);
+ initializeLowerConstantIntrinsicsPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeLowerGuardIntrinsicLegacyPassPass(Registry);
initializeLowerWidenableConditionLegacyPassPass(Registry);
@@ -123,6 +124,10 @@ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createAggressiveDCEPass());
}
+void LLVMAddDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadCodeEliminationPass());
+}
+
void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBitTrackingDCEPass());
}
@@ -280,6 +285,10 @@ void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBasicAAWrapperPass());
}
+void LLVMAddLowerConstantIntrinsicsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerConstantIntrinsicsPass());
+}
+
void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLowerExpectIntrinsicPass());
}
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index f6a12fb13142..41554fccdf08 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1121,7 +1121,7 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
bool Changed = false;
for (BasicBlock &B : F) {
for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index aeac6f548b32..ac832b9b4567 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1909,7 +1909,7 @@ static void unswitchNontrivialInvariants(
// We can only unswitch switches, conditional branches with an invariant
// condition, or combining invariant conditions with an instruction.
- assert((SI || BI->isConditional()) &&
+ assert((SI || (BI && BI->isConditional())) &&
"Can only unswitch switches and conditional branch!");
bool FullUnswitch = SI || BI->getCondition() == Invariants[0];
if (FullUnswitch)
@@ -2141,17 +2141,21 @@ static void unswitchNontrivialInvariants(
buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
*ClonedPH, *LoopPH);
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
+
+ if (MSSAU) {
+ DT.applyUpdates(DTUpdates);
+ DTUpdates.clear();
+
+ // Perform MSSA cloning updates.
+ for (auto &VMap : VMaps)
+ MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
+ /*IgnoreIncomingWithNoClones=*/true);
+ MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
+ }
}
// Apply the updates accumulated above to get an up-to-date dominator tree.
DT.applyUpdates(DTUpdates);
- if (!FullUnswitch && MSSAU) {
- // Update MSSA for partial unswitch, after DT update.
- SmallVector<CFGUpdate, 1> Updates;
- Updates.push_back(
- {cfg::UpdateKind::Insert, SplitBB, ClonedPHs.begin()->second});
- MSSAU->applyInsertUpdates(Updates, DT);
- }
// Now that we have an accurate dominator tree, first delete the dead cloned
// blocks so that we can accurately build any cloned loops. It is important to
@@ -2720,7 +2724,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
return Cost * (SuccessorsCount - 1);
};
Instruction *BestUnswitchTI = nullptr;
- int BestUnswitchCost;
+ int BestUnswitchCost = 0;
ArrayRef<Value *> BestUnswitchInvariants;
for (auto &TerminatorAndInvariants : UnswitchCandidates) {
Instruction &TI = *TerminatorAndInvariants.first;
@@ -2752,6 +2756,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
BestUnswitchInvariants = Invariants;
}
}
+ assert(BestUnswitchTI && "Failed to find loop unswitch candidate");
if (BestUnswitchCost >= UnswitchThreshold) {
LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
@@ -2880,7 +2885,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
auto PA = getLoopPassPreservedAnalyses();
- if (EnableMSSALoopDependency)
+ if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index c13fb3e04516..e6db11f47ead 100644
--- a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -777,8 +777,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
// speculation if the predecessor is an invoke. This doesn't seem
// fundamental and we should probably be splitting critical edges
// differently.
- if (isa<IndirectBrInst>(PredBB->getTerminator()) ||
- isa<InvokeInst>(PredBB->getTerminator())) {
+ const auto *TermInst = PredBB->getTerminator();
+ if (isa<IndirectBrInst>(TermInst) ||
+ isa<InvokeInst>(TermInst) ||
+ isa<CallBrInst>(TermInst)) {
LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
<< PredBB->getName() << "\n");
return false;
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index e5400676c7e8..9791cf41f621 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -65,7 +65,7 @@ static cl::opt<bool> ForceSkipUniformRegions(
static cl::opt<bool>
RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
cl::desc("Allow relaxed uniform region checks"),
- cl::init(false));
+ cl::init(true));
// Definition of the complex types used in this pass.
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index f0b79079d817..b27a36b67d62 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -341,7 +341,7 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
const DataLayout &DL = L->getModule()->getDataLayout();
if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) ||
!isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(),
- L->getAlignment(), DL, L))
+ MaybeAlign(L->getAlignment()), DL, L))
return false;
}
}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 5fa371377c85..d85cc40c372a 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -170,7 +170,8 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
LoopInfo *LI, MemorySSAUpdater *MSSAU,
- MemoryDependenceResults *MemDep) {
+ MemoryDependenceResults *MemDep,
+ bool PredecessorWithTwoSuccessors) {
if (BB->hasAddressTaken())
return false;
@@ -185,9 +186,24 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
return false;
// Can't merge if there are multiple distinct successors.
- if (PredBB->getUniqueSuccessor() != BB)
+ if (!PredecessorWithTwoSuccessors && PredBB->getUniqueSuccessor() != BB)
return false;
+ // Currently only allow PredBB to have two predecessors, one being BB.
+ // Update BI to branch to BB's only successor instead of BB.
+ BranchInst *PredBB_BI;
+ BasicBlock *NewSucc = nullptr;
+ unsigned FallThruPath;
+ if (PredecessorWithTwoSuccessors) {
+ if (!(PredBB_BI = dyn_cast<BranchInst>(PredBB->getTerminator())))
+ return false;
+ BranchInst *BB_JmpI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BB_JmpI || !BB_JmpI->isUnconditional())
+ return false;
+ NewSucc = BB_JmpI->getSuccessor(0);
+ FallThruPath = PredBB_BI->getSuccessor(0) == BB ? 0 : 1;
+ }
+
// Can't merge if there is PHI loop.
for (PHINode &PN : BB->phis())
for (Value *IncValue : PN.incoming_values())
@@ -227,18 +243,39 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
- if (MSSAU)
- MSSAU->moveAllAfterMergeBlocks(BB, PredBB, &*(BB->begin()));
+ Instruction *PTI = PredBB->getTerminator();
+ Instruction *STI = BB->getTerminator();
+ Instruction *Start = &*BB->begin();
+ // If there's nothing to move, mark the starting instruction as the last
+ // instruction in the block.
+ if (Start == STI)
+ Start = PTI;
+
+ // Move all definitions in the successor to the predecessor...
+ PredBB->getInstList().splice(PTI->getIterator(), BB->getInstList(),
+ BB->begin(), STI->getIterator());
- // Delete the unconditional branch from the predecessor...
- PredBB->getInstList().pop_back();
+ if (MSSAU)
+ MSSAU->moveAllAfterMergeBlocks(BB, PredBB, Start);
// Make all PHI nodes that referred to BB now refer to Pred as their
// source...
BB->replaceAllUsesWith(PredBB);
- // Move all definitions in the successor to the predecessor...
- PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+ if (PredecessorWithTwoSuccessors) {
+ // Delete the unconditional branch from BB.
+ BB->getInstList().pop_back();
+
+ // Update branch in the predecessor.
+ PredBB_BI->setSuccessor(FallThruPath, NewSucc);
+ } else {
+ // Delete the unconditional branch from the predecessor.
+ PredBB->getInstList().pop_back();
+
+ // Move terminator instruction.
+ PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+ }
+ // Add unreachable to now empty BB.
new UnreachableInst(BB->getContext(), BB);
// Eliminate duplicate dbg.values describing the entry PHI node post-splice.
@@ -274,11 +311,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
"applying corresponding DTU updates.");
DTU->applyUpdatesPermissive(Updates);
DTU->deleteBB(BB);
- }
-
- else {
+ } else {
BB->eraseFromParent(); // Nuke BB if DTU is nullptr.
}
+
return true;
}
@@ -365,11 +401,13 @@ llvm::SplitAllCriticalEdges(Function &F,
BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU, const Twine &BBName) {
BasicBlock::iterator SplitIt = SplitPt->getIterator();
while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
++SplitIt;
- BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+ std::string Name = BBName.str();
+ BasicBlock *New = Old->splitBasicBlock(
+ SplitIt, Name.empty() ? Old->getName() + ".split" : Name);
// The new block lives in whichever loop the old one did. This preserves
// LCSSA as well, because we force the split point to be after any PHI nodes.
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 27f110e24f9c..71316ce8f758 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -88,6 +88,14 @@ static bool setDoesNotCapture(Function &F, unsigned ArgNo) {
return true;
}
+static bool setDoesNotAlias(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::NoAlias))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::NoAlias);
+ ++NumNoAlias;
+ return true;
+}
+
static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) {
if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly))
return false;
@@ -175,6 +183,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_strcpy:
case LibFunc_strncpy:
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
+ LLVM_FALLTHROUGH;
case LibFunc_strcat:
case LibFunc_strncat:
Changed |= setReturnedArg(F, 0);
@@ -249,12 +260,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_sprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_snprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
@@ -291,11 +304,23 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_memcpy:
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
+ Changed |= setReturnedArg(F, 0);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
case LibFunc_memmove:
Changed |= setReturnedArg(F, 0);
- LLVM_FALLTHROUGH;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
case LibFunc_mempcpy:
case LibFunc_memccpy:
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
@@ -760,9 +785,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
}
}
-bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn) {
+bool llvm::hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) {
switch (Ty->getTypeID()) {
case Type::HalfTyID:
return false;
@@ -775,10 +799,10 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
}
}
-StringRef llvm::getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn) {
- assert(hasUnaryFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
+StringRef llvm::getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn) {
+ assert(hasFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
"Cannot get name for unavailable function!");
switch (Ty->getTypeID()) {
@@ -827,6 +851,12 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
}
+Value *llvm::emitStrDup(Value *Ptr, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(),
+ castToCStr(Ptr, B), B, TLI);
+}
+
Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
@@ -1045,24 +1075,28 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
LibFunc LongDoubleFn, IRBuilder<> &B,
const AttributeList &Attrs) {
// Get the name of the function according to TLI.
- StringRef Name = getUnaryFloatFn(TLI, Op->getType(),
- DoubleFn, FloatFn, LongDoubleFn);
+ StringRef Name = getFloatFnName(TLI, Op->getType(),
+ DoubleFn, FloatFn, LongDoubleFn);
return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
}
-Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
- IRBuilder<> &B, const AttributeList &Attrs) {
+static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
+ StringRef Name, IRBuilder<> &B,
+ const AttributeList &Attrs) {
assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
- SmallString<20> NameBuffer;
- appendTypeSuffix(Op1, Name, NameBuffer);
-
Module *M = B.GetInsertBlock()->getModule();
- FunctionCallee Callee = M->getOrInsertFunction(
- Name, Op1->getType(), Op1->getType(), Op2->getType());
- CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
- CI->setAttributes(Attrs);
+ FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(),
+ Op1->getType(), Op2->getType());
+ CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name);
+
+ // The incoming attribute set may have come from a speculatable intrinsic, but
+ // is being replaced with a library call which is not allowed to be
+ // speculatable.
+ CI->setAttributes(Attrs.removeAttribute(B.getContext(),
+ AttributeList::FunctionIndex,
+ Attribute::Speculatable));
if (const Function *F =
dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1070,6 +1104,28 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
return CI;
}
+Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
+ IRBuilder<> &B, const AttributeList &Attrs) {
+ assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
+
+ SmallString<20> NameBuffer;
+ appendTypeSuffix(Op1, Name, NameBuffer);
+
+ return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs);
+}
+
+Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
+ const TargetLibraryInfo *TLI,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn, IRBuilder<> &B,
+ const AttributeList &Attrs) {
+ // Get the name of the function according to TLI.
+ StringRef Name = getFloatFnName(TLI, Op1->getType(),
+ DoubleFn, FloatFn, LongDoubleFn);
+
+ return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs);
+}
+
Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_putchar))
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index df299f673f65..9a6761040bd8 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -448,13 +448,17 @@ bool llvm::bypassSlowDivision(BasicBlock *BB,
DivCacheTy PerBBDivCache;
bool MadeChange = false;
- Instruction* Next = &*BB->begin();
+ Instruction *Next = &*BB->begin();
while (Next != nullptr) {
// We may add instructions immediately after I, but we want to skip over
// them.
- Instruction* I = Next;
+ Instruction *I = Next;
Next = Next->getNextNode();
+ // Ignore dead code to save time and avoid bugs.
+ if (I->hasNUses(0))
+ continue;
+
FastDivInsertionTask Task(I, BypassWidths);
if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
I->replaceAllUsesWith(Replacement);
diff --git a/lib/Transforms/Utils/CanonicalizeAliases.cpp b/lib/Transforms/Utils/CanonicalizeAliases.cpp
index 455fcbb1cf98..3c7c8d872595 100644
--- a/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ b/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
using namespace llvm;
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 1026c9d37038..75e8963303c2 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -210,6 +210,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
RemapInstruction(&II, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer);
+
+ // Register all DICompileUnits of the old parent module in the new parent module
+ auto* OldModule = OldFunc->getParent();
+ auto* NewModule = NewFunc->getParent();
+ if (OldModule && NewModule && OldModule != NewModule && DIFinder.compile_unit_count()) {
+ auto* NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu");
+ // Avoid multiple insertions of the same DICompileUnit to NMD.
+ SmallPtrSet<const void*, 8> Visited;
+ for (auto* Operand : NMD->operands())
+ Visited.insert(Operand);
+ for (auto* Unit : DIFinder.compile_units())
+ // VMap.MD()[Unit] == Unit
+ if (Visited.insert(Unit).second)
+ NMD->addOperand(Unit);
+ }
}
/// Return a copy of the specified function and add it to that function's
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 7ddf59becba9..2c8c3abb2922 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -48,7 +48,7 @@ std::unique_ptr<Module> llvm::CloneModule(
function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
// First off, we need to create the new module.
std::unique_ptr<Module> New =
- llvm::make_unique<Module>(M.getModuleIdentifier(), M.getContext());
+ std::make_unique<Module>(M.getModuleIdentifier(), M.getContext());
New->setSourceFileName(M.getSourceFileName());
New->setDataLayout(M.getDataLayout());
New->setTargetTriple(M.getTargetTriple());
@@ -181,13 +181,25 @@ std::unique_ptr<Module> llvm::CloneModule(
}
// And named metadata....
+ const auto* LLVM_DBG_CU = M.getNamedMetadata("llvm.dbg.cu");
for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
E = M.named_metadata_end();
I != E; ++I) {
const NamedMDNode &NMD = *I;
NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
- for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
- NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
+ if (&NMD == LLVM_DBG_CU) {
+ // Do not insert duplicate operands.
+ SmallPtrSet<const void*, 8> Visited;
+ for (const auto* Operand : NewNMD->operands())
+ Visited.insert(Operand);
+ for (const auto* Operand : NMD.operands()) {
+ auto* MappedOperand = MapMetadata(Operand, VMap);
+ if (Visited.insert(MappedOperand).second)
+ NewNMD->addOperand(MappedOperand);
+ }
+ } else
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
}
return New;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index fa6d3f8ae873..0298ff9a395f 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -293,10 +293,8 @@ static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
CommonExitBlock = Succ;
continue;
}
- if (CommonExitBlock == Succ)
- continue;
-
- return true;
+ if (CommonExitBlock != Succ)
+ return true;
}
return false;
};
@@ -307,52 +305,79 @@ static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
return CommonExitBlock;
}
-bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
- Instruction *Addr) const {
- AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
- Function *Func = (*Blocks.begin())->getParent();
- for (BasicBlock &BB : *Func) {
- if (Blocks.count(&BB))
- continue;
- for (Instruction &II : BB) {
- if (isa<DbgInfoIntrinsic>(II))
- continue;
+CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &II : BB.instructionsWithoutDebug())
+ if (auto *AI = dyn_cast<AllocaInst>(&II))
+ Allocas.push_back(AI);
- unsigned Opcode = II.getOpcode();
- Value *MemAddr = nullptr;
- switch (Opcode) {
- case Instruction::Store:
- case Instruction::Load: {
- if (Opcode == Instruction::Store) {
- StoreInst *SI = cast<StoreInst>(&II);
- MemAddr = SI->getPointerOperand();
- } else {
- LoadInst *LI = cast<LoadInst>(&II);
- MemAddr = LI->getPointerOperand();
- }
- // Global variable can not be aliased with locals.
- if (dyn_cast<Constant>(MemAddr))
- break;
- Value *Base = MemAddr->stripInBoundsConstantOffsets();
- if (!isa<AllocaInst>(Base) || Base == AI)
- return false;
+ findSideEffectInfoForBlock(BB);
+ }
+}
+
+void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) {
+ for (Instruction &II : BB.instructionsWithoutDebug()) {
+ unsigned Opcode = II.getOpcode();
+ Value *MemAddr = nullptr;
+ switch (Opcode) {
+ case Instruction::Store:
+ case Instruction::Load: {
+ if (Opcode == Instruction::Store) {
+ StoreInst *SI = cast<StoreInst>(&II);
+ MemAddr = SI->getPointerOperand();
+ } else {
+ LoadInst *LI = cast<LoadInst>(&II);
+ MemAddr = LI->getPointerOperand();
+ }
+ // Global variable can not be aliased with locals.
+ if (dyn_cast<Constant>(MemAddr))
break;
+ Value *Base = MemAddr->stripInBoundsConstantOffsets();
+ if (!isa<AllocaInst>(Base)) {
+ SideEffectingBlocks.insert(&BB);
+ return;
}
- default: {
- IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
- if (IntrInst) {
- if (IntrInst->isLifetimeStartOrEnd())
- break;
- return false;
- }
- // Treat all the other cases conservatively if it has side effects.
- if (II.mayHaveSideEffects())
- return false;
+ BaseMemAddrs[&BB].insert(Base);
+ break;
+ }
+ default: {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
+ if (IntrInst) {
+ if (IntrInst->isLifetimeStartOrEnd())
+ break;
+ SideEffectingBlocks.insert(&BB);
+ return;
}
+ // Treat all the other cases conservatively if it has side effects.
+ if (II.mayHaveSideEffects()) {
+ SideEffectingBlocks.insert(&BB);
+ return;
}
}
+ }
}
+}
+bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr(
+ BasicBlock &BB, AllocaInst *Addr) const {
+ if (SideEffectingBlocks.count(&BB))
+ return true;
+ auto It = BaseMemAddrs.find(&BB);
+ if (It != BaseMemAddrs.end())
+ return It->second.count(Addr);
+ return false;
+}
+
+bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
+ const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const {
+ AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
+ Function *Func = (*Blocks.begin())->getParent();
+ for (BasicBlock &BB : *Func) {
+ if (Blocks.count(&BB))
+ continue;
+ if (CEAC.doesBlockContainClobberOfAddr(BB, AI))
+ return false;
+ }
return true;
}
@@ -415,7 +440,8 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
// outline region. If there are not other untracked uses of the address, return
// the pair of markers if found; otherwise return a pair of nullptr.
CodeExtractor::LifetimeMarkerInfo
-CodeExtractor::getLifetimeMarkers(Instruction *Addr,
+CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
+ Instruction *Addr,
BasicBlock *ExitBlock) const {
LifetimeMarkerInfo Info;
@@ -447,7 +473,7 @@ CodeExtractor::getLifetimeMarkers(Instruction *Addr,
Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
// Do legality check.
if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
- !isLegalToShrinkwrapLifetimeMarkers(Addr))
+ !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr))
return {};
// Check to see if we have a place to do hoisting, if not, bail.
@@ -457,7 +483,8 @@ CodeExtractor::getLifetimeMarkers(Instruction *Addr,
return Info;
}
-void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
+void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
+ ValueSet &SinkCands, ValueSet &HoistCands,
BasicBlock *&ExitBlock) const {
Function *Func = (*Blocks.begin())->getParent();
ExitBlock = getCommonExitBlock(Blocks);
@@ -478,74 +505,104 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
return true;
};
- for (BasicBlock &BB : *Func) {
- if (Blocks.count(&BB))
+ // Look up allocas in the original function in CodeExtractorAnalysisCache, as
+ // this is much faster than walking all the instructions.
+ for (AllocaInst *AI : CEAC.getAllocas()) {
+ BasicBlock *BB = AI->getParent();
+ if (Blocks.count(BB))
continue;
- for (Instruction &II : BB) {
- auto *AI = dyn_cast<AllocaInst>(&II);
- if (!AI)
- continue;
- LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock);
- bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
- if (Moved) {
- LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
- SinkCands.insert(AI);
- continue;
- }
+ // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca,
+ // check whether it is actually still in the original function.
+ Function *AIFunc = BB->getParent();
+ if (AIFunc != Func)
+ continue;
- // Follow any bitcasts.
- SmallVector<Instruction *, 2> Bitcasts;
- SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
- for (User *U : AI->users()) {
- if (U->stripInBoundsConstantOffsets() == AI) {
- Instruction *Bitcast = cast<Instruction>(U);
- LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock);
- if (LMI.LifeStart) {
- Bitcasts.push_back(Bitcast);
- BitcastLifetimeInfo.push_back(LMI);
- continue;
- }
- }
+ LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock);
+ bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
+ if (Moved) {
+ LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
+ SinkCands.insert(AI);
+ continue;
+ }
- // Found unknown use of AI.
- if (!definedInRegion(Blocks, U)) {
- Bitcasts.clear();
- break;
+ // Follow any bitcasts.
+ SmallVector<Instruction *, 2> Bitcasts;
+ SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
+ for (User *U : AI->users()) {
+ if (U->stripInBoundsConstantOffsets() == AI) {
+ Instruction *Bitcast = cast<Instruction>(U);
+ LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock);
+ if (LMI.LifeStart) {
+ Bitcasts.push_back(Bitcast);
+ BitcastLifetimeInfo.push_back(LMI);
+ continue;
}
}
- // Either no bitcasts reference the alloca or there are unknown uses.
- if (Bitcasts.empty())
- continue;
+ // Found unknown use of AI.
+ if (!definedInRegion(Blocks, U)) {
+ Bitcasts.clear();
+ break;
+ }
+ }
- LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
- SinkCands.insert(AI);
- for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
- Instruction *BitcastAddr = Bitcasts[I];
- const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
- assert(LMI.LifeStart &&
- "Unsafe to sink bitcast without lifetime markers");
- moveOrIgnoreLifetimeMarkers(LMI);
- if (!definedInRegion(Blocks, BitcastAddr)) {
- LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
- << "\n");
- SinkCands.insert(BitcastAddr);
- }
+ // Either no bitcasts reference the alloca or there are unknown uses.
+ if (Bitcasts.empty())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
+ SinkCands.insert(AI);
+ for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
+ Instruction *BitcastAddr = Bitcasts[I];
+ const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
+ assert(LMI.LifeStart &&
+ "Unsafe to sink bitcast without lifetime markers");
+ moveOrIgnoreLifetimeMarkers(LMI);
+ if (!definedInRegion(Blocks, BitcastAddr)) {
+ LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
+ << "\n");
+ SinkCands.insert(BitcastAddr);
}
}
}
}
+bool CodeExtractor::isEligible() const {
+ if (Blocks.empty())
+ return false;
+ BasicBlock *Header = *Blocks.begin();
+ Function *F = Header->getParent();
+
+ // For functions with varargs, check that varargs handling is only done in the
+ // outlined function, i.e vastart and vaend are only used in outlined blocks.
+ if (AllowVarArgs && F->getFunctionType()->isVarArg()) {
+ auto containsVarArgIntrinsic = [](const Instruction &I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (const Function *Callee = CI->getCalledFunction())
+ return Callee->getIntrinsicID() == Intrinsic::vastart ||
+ Callee->getIntrinsicID() == Intrinsic::vaend;
+ return false;
+ };
+
+ for (auto &BB : *F) {
+ if (Blocks.count(&BB))
+ continue;
+ if (llvm::any_of(BB, containsVarArgIntrinsic))
+ return false;
+ }
+ }
+ return true;
+}
+
void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
const ValueSet &SinkCands) const {
for (BasicBlock *BB : Blocks) {
// If a used value is defined outside the region, it's an input. If an
// instruction is used outside the region, it's an output.
for (Instruction &II : *BB) {
- for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
- ++OI) {
- Value *V = *OI;
+ for (auto &OI : II.operands()) {
+ Value *V = OI;
if (!SinkCands.count(V) && definedInCaller(Blocks, V))
Inputs.insert(V);
}
@@ -904,12 +961,12 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// within the new function. This must be done before we lose track of which
// blocks were originally in the code region.
std::vector<User *> Users(header->user_begin(), header->user_end());
- for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ for (auto &U : Users)
// The BasicBlock which contains the branch is not in the region
// modify the branch target to a new block
- if (Instruction *I = dyn_cast<Instruction>(Users[i]))
- if (I->isTerminator() && !Blocks.count(I->getParent()) &&
- I->getParent()->getParent() == oldFunction)
+ if (Instruction *I = dyn_cast<Instruction>(U))
+ if (I->isTerminator() && I->getFunction() == oldFunction &&
+ !Blocks.count(I->getParent()))
I->replaceUsesOfWith(header, newHeader);
return newFunction;
@@ -1277,13 +1334,6 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
// Insert this basic block into the new function
newBlocks.push_back(Block);
-
- // Remove @llvm.assume calls that were moved to the new function from the
- // old function's assumption cache.
- if (AC)
- for (auto &I : *Block)
- if (match(&I, m_Intrinsic<Intrinsic::assume>()))
- AC->unregisterAssumption(cast<CallInst>(&I));
}
}
@@ -1332,7 +1382,8 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
}
-Function *CodeExtractor::extractCodeRegion() {
+Function *
+CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
if (!isEligible())
return nullptr;
@@ -1341,27 +1392,6 @@ Function *CodeExtractor::extractCodeRegion() {
BasicBlock *header = *Blocks.begin();
Function *oldFunction = header->getParent();
- // For functions with varargs, check that varargs handling is only done in the
- // outlined function, i.e vastart and vaend are only used in outlined blocks.
- if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) {
- auto containsVarArgIntrinsic = [](Instruction &I) {
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (const Function *F = CI->getCalledFunction())
- return F->getIntrinsicID() == Intrinsic::vastart ||
- F->getIntrinsicID() == Intrinsic::vaend;
- return false;
- };
-
- for (auto &BB : *oldFunction) {
- if (Blocks.count(&BB))
- continue;
- if (llvm::any_of(BB, containsVarArgIntrinsic))
- return nullptr;
- }
- }
- ValueSet inputs, outputs, SinkingCands, HoistingCands;
- BasicBlock *CommonExit = nullptr;
-
// Calculate the entry frequency of the new function before we change the root
// block.
BlockFrequency EntryFreq;
@@ -1375,6 +1405,15 @@ Function *CodeExtractor::extractCodeRegion() {
}
}
+ if (AC) {
+ // Remove @llvm.assume calls that were moved to the new function from the
+ // old function's assumption cache.
+ for (BasicBlock *Block : Blocks)
+ for (auto &I : *Block)
+ if (match(&I, m_Intrinsic<Intrinsic::assume>()))
+ AC->unregisterAssumption(cast<CallInst>(&I));
+ }
+
// If we have any return instructions in the region, split those blocks so
// that the return is not in the region.
splitReturnBlocks();
@@ -1428,7 +1467,9 @@ Function *CodeExtractor::extractCodeRegion() {
}
newFuncRoot->getInstList().push_back(BranchI);
- findAllocas(SinkingCands, HoistingCands, CommonExit);
+ ValueSet inputs, outputs, SinkingCands, HoistingCands;
+ BasicBlock *CommonExit = nullptr;
+ findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
assert(HoistingCands.empty() || CommonExit);
// Find inputs to, outputs from the code region.
@@ -1563,5 +1604,17 @@ Function *CodeExtractor::extractCodeRegion() {
});
LLVM_DEBUG(if (verifyFunction(*oldFunction))
report_fatal_error("verification of oldFunction failed!"));
+ LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, AC))
+ report_fatal_error("Stale Asumption cache for old Function!"));
return newFunction;
}
+
+bool CodeExtractor::verifyAssumptionCache(const Function& F,
+ AssumptionCache *AC) {
+ for (auto AssumeVH : AC->assumptions()) {
+ CallInst *I = cast<CallInst>(AssumeVH);
+ if (I->getFunction() != &F)
+ return true;
+ }
+ return false;
+}
diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 4aa40eeadda4..57e2ff0251a9 100644
--- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -24,7 +24,7 @@ static void insertCall(Function &CurFn, StringRef Func,
if (Func == "mcount" ||
Func == ".mcount" ||
- Func == "\01__gnu_mcount_nc" ||
+ Func == "llvm.arm.gnu.eabi.mcount" ||
Func == "\01_mcount" ||
Func == "\01mcount" ||
Func == "__mcount" ||
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index 0e203f4e075d..ad36790b8c6a 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -469,7 +469,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
return false; // Cannot handle array allocs.
}
Type *Ty = AI->getAllocatedType();
- AllocaTmps.push_back(llvm::make_unique<GlobalVariable>(
+ AllocaTmps.push_back(std::make_unique<GlobalVariable>(
Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty),
AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal,
AI->getType()->getPointerAddressSpace()));
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 0c52e6f3703b..893f23eb6048 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -67,7 +67,7 @@ public:
/// Before:
/// ......
/// %cmp10 = fcmp une float %tmp1, %tmp2
-/// br i1 %cmp1, label %if.then, label %lor.rhs
+/// br i1 %cmp10, label %if.then, label %lor.rhs
///
/// lor.rhs:
/// ......
@@ -251,8 +251,8 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
bool EverChanged = false;
for (; CurrBlock != FirstCondBlock;
CurrBlock = CurrBlock->getSinglePredecessor()) {
- BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
- CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ auto *BI = cast<BranchInst>(CurrBlock->getTerminator());
+ auto *CI = dyn_cast<CmpInst>(BI->getCondition());
if (!CI)
continue;
@@ -278,7 +278,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
// Do the transformation.
BasicBlock *CB;
- BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
+ BranchInst *PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
bool Iteration = true;
IRBuilder<>::InsertPointGuard Guard(Builder);
Value *PC = PBI->getCondition();
@@ -444,7 +444,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
FirstEntryBlock->getInstList().pop_back();
FirstEntryBlock->getInstList()
.splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
- BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
+ BranchInst *PBI = cast<BranchInst>(FirstEntryBlock->getTerminator());
Value *CC = PBI->getCondition();
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
@@ -453,6 +453,16 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
PBI->replaceUsesOfWith(CC, NC);
Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+ // Handle PHI node to replace its predecessors to FirstEntryBlock.
+ for (BasicBlock *Succ : successors(PBI)) {
+ for (PHINode &Phi : Succ->phis()) {
+ for (unsigned i = 0, e = Phi.getNumIncomingValues(); i != e; ++i) {
+ if (Phi.getIncomingBlock(i) == SecondEntryBlock)
+ Phi.setIncomingBlock(i, FirstEntryBlock);
+ }
+ }
+ }
+
// Remove IfTrue1
if (IfTrue1 != FirstEntryBlock) {
IfTrue1->dropAllReferences();
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index c9cc0990f237..76b4635ad501 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -210,7 +210,7 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
if (Function *F = dyn_cast<Function>(&GV)) {
if (!F->isDeclaration()) {
for (auto &S : VI.getSummaryList()) {
- FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
+ auto *FS = cast<FunctionSummary>(S->getBaseObject());
if (FS->modulePath() == M.getModuleIdentifier()) {
F->setEntryCount(Function::ProfileCount(FS->entryCount(),
Function::PCT_Synthetic));
diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index 8041e66e6c4c..ea93f99d69e3 100644
--- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -25,8 +25,8 @@ ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) {
auto &ValueLookup = NodesMap[F.getName()];
if (!ValueLookup) {
- ValueLookup = llvm::make_unique<InlineGraphNode>();
- ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr;
+ ValueLookup = std::make_unique<InlineGraphNode>();
+ ValueLookup->Imported = F.hasMetadata("thinlto_src_module");
}
return *ValueLookup;
}
@@ -64,7 +64,7 @@ void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) {
if (F.isDeclaration())
continue;
AllFunctions++;
- ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr);
+ ImportedFunctions += int(F.hasMetadata("thinlto_src_module"));
}
}
static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All,
diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 8c67d1dc6eb3..ed28fffc22b5 100644
--- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -533,7 +533,7 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
}
bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
return runImpl(F, TLI, DT);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 39b6b889f91c..5bcd05757ec1 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -324,8 +324,14 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
Value *Address = IBI->getAddress();
IBI->eraseFromParent();
if (DeleteDeadConditions)
+ // Delete pointer cast instructions.
RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
+ // Also zap the blockaddress constant if there are no users remaining,
+ // otherwise the destination is still marked as having its address taken.
+ if (BA->use_empty())
+ BA->destroyConstant();
+
// If we didn't find our destination in the IBI successor list, then we
// have undefined behavior. Replace the unconditional branch with an
// 'unreachable' instruction.
@@ -633,17 +639,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
// Control Flow Graph Restructuring.
//
-/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
-/// method is called when we're about to delete Pred as a predecessor of BB. If
-/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
-///
-/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
-/// nodes that collapse into identity values. For example, if we have:
-/// x = phi(1, 0, 0, 0)
-/// y = and x, z
-///
-/// .. and delete the predecessor corresponding to the '1', this will attempt to
-/// recursively fold the and to 0.
void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
DomTreeUpdater *DTU) {
// This only adjusts blocks with PHI nodes.
@@ -672,10 +667,6 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}});
}
-/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
-/// predecessor is known to have one successor (DestBB!). Eliminate the edge
-/// between them, moving the instructions in the predecessor into DestBB and
-/// deleting the predecessor block.
void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
DomTreeUpdater *DTU) {
@@ -755,15 +746,14 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
}
}
-/// CanMergeValues - Return true if we can choose one of these values to use
-/// in place of the other. Note that we will always choose the non-undef
-/// value to keep.
+/// Return true if we can choose one of these values to use in place of the
+/// other. Note that we will always choose the non-undef value to keep.
static bool CanMergeValues(Value *First, Value *Second) {
return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second);
}
-/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
-/// almost-empty BB ending in an unconditional branch to Succ, into Succ.
+/// Return true if we can fold BB, an almost-empty BB ending in an unconditional
+/// branch to Succ, into Succ.
///
/// Assumption: Succ is the single successor for BB.
static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
@@ -956,11 +946,6 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
replaceUndefValuesInPhi(PN, IncomingValues);
}
-/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
-/// unconditional branch, and contains no instructions other than PHI nodes,
-/// potential side-effect free intrinsics and the branch. If possible,
-/// eliminate BB by rewriting all the predecessors to branch to the successor
-/// block and return true. If we can't transform, return false.
bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
DomTreeUpdater *DTU) {
assert(BB != &BB->getParent()->getEntryBlock() &&
@@ -1088,10 +1073,6 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
return true;
}
-/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
-/// nodes in this block. This doesn't try to be clever about PHI nodes
-/// which differ only in the order of the incoming values, but instcombine
-/// orders them so it usually won't matter.
bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
// This implementation doesn't currently consider undef operands
// specially. Theoretically, two phis which are identical except for
@@ -1151,10 +1132,10 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
/// often possible though. If alignment is important, a more reliable approach
/// is to simply align all global variables and allocation instructions to
/// their preferred alignment from the beginning.
-static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+static unsigned enforceKnownAlignment(Value *V, unsigned Alignment,
unsigned PrefAlign,
const DataLayout &DL) {
- assert(PrefAlign > Align);
+ assert(PrefAlign > Alignment);
V = V->stripPointerCasts();
@@ -1165,36 +1146,36 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
// stripPointerCasts recurses through infinite layers of bitcasts,
// while computeKnownBits is not allowed to traverse more than 6
// levels.
- Align = std::max(AI->getAlignment(), Align);
- if (PrefAlign <= Align)
- return Align;
+ Alignment = std::max(AI->getAlignment(), Alignment);
+ if (PrefAlign <= Alignment)
+ return Alignment;
// If the preferred alignment is greater than the natural stack alignment
// then don't round up. This avoids dynamic stack realignment.
- if (DL.exceedsNaturalStackAlignment(PrefAlign))
- return Align;
- AI->setAlignment(PrefAlign);
+ if (DL.exceedsNaturalStackAlignment(Align(PrefAlign)))
+ return Alignment;
+ AI->setAlignment(MaybeAlign(PrefAlign));
return PrefAlign;
}
if (auto *GO = dyn_cast<GlobalObject>(V)) {
// TODO: as above, this shouldn't be necessary.
- Align = std::max(GO->getAlignment(), Align);
- if (PrefAlign <= Align)
- return Align;
+ Alignment = std::max(GO->getAlignment(), Alignment);
+ if (PrefAlign <= Alignment)
+ return Alignment;
// If there is a large requested alignment and we can, bump up the alignment
// of the global. If the memory we set aside for the global may not be the
// memory used by the final program then it is impossible for us to reliably
// enforce the preferred alignment.
if (!GO->canIncreaseAlignment())
- return Align;
+ return Alignment;
- GO->setAlignment(PrefAlign);
+ GO->setAlignment(MaybeAlign(PrefAlign));
return PrefAlign;
}
- return Align;
+ return Alignment;
}
unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
@@ -1397,7 +1378,12 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
/// Determine whether this alloca is either a VLA or an array.
static bool isArray(AllocaInst *AI) {
return AI->isArrayAllocation() ||
- AI->getType()->getElementType()->isArrayTy();
+ (AI->getAllocatedType() && AI->getAllocatedType()->isArrayTy());
+}
+
+/// Determine whether this alloca is a structure.
+static bool isStructure(AllocaInst *AI) {
+ return AI->getAllocatedType() && AI->getAllocatedType()->isStructTy();
}
/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
@@ -1422,7 +1408,7 @@ bool llvm::LowerDbgDeclare(Function &F) {
// stored on the stack, while the dbg.declare can only describe
// the stack slot (and at a lexical-scope granularity). Later
// passes will attempt to elide the stack slot.
- if (!AI || isArray(AI))
+ if (!AI || isArray(AI) || isStructure(AI))
continue;
// A volatile load/store means that the alloca can't be elided anyway.
@@ -1591,15 +1577,10 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
DIExpr->getElement(0) != dwarf::DW_OP_deref)
return;
- // Insert the offset immediately after the first deref.
+ // Insert the offset before the first deref.
// We could just change the offset argument of dbg.value, but it's unsigned...
- if (Offset) {
- SmallVector<uint64_t, 4> Ops;
- Ops.push_back(dwarf::DW_OP_deref);
- DIExpression::appendOffset(Ops, Offset);
- Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
- DIExpr = Builder.createExpression(Ops);
- }
+ if (Offset)
+ DIExpr = DIExpression::prepend(DIExpr, 0, Offset);
Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI);
DVI->eraseFromParent();
@@ -1957,18 +1938,24 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
return NumInstrsRemoved;
}
-/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
- SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) {
+ SmallVector<Value *, 8> Args(II->arg_begin(), II->arg_end());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
- CallInst *NewCall = CallInst::Create(
- II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II);
- NewCall->takeName(II);
+ CallInst *NewCall = CallInst::Create(II->getFunctionType(),
+ II->getCalledValue(), Args, OpBundles);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
NewCall->setDebugLoc(II->getDebugLoc());
NewCall->copyMetadata(*II);
+ return NewCall;
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
+ CallInst *NewCall = createCallMatchingInvoke(II);
+ NewCall->takeName(II);
+ NewCall->insertBefore(II);
II->replaceAllUsesWith(NewCall);
// Follow the call by a branch to the normal destination.
@@ -2223,12 +2210,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
/// removeUnreachableBlocks - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
-/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo
-/// after modifying the CFG.
-bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
- DomTreeUpdater *DTU,
+/// otherwise.
+bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU) {
- SmallPtrSet<BasicBlock*, 16> Reachable;
+ SmallPtrSet<BasicBlock *, 16> Reachable;
bool Changed = markAliveBlocks(F, Reachable, DTU);
// If there are unreachable blocks in the CFG...
@@ -2236,21 +2221,21 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
return Changed;
assert(Reachable.size() < F.size());
- NumRemoved += F.size()-Reachable.size();
+ NumRemoved += F.size() - Reachable.size();
SmallSetVector<BasicBlock *, 8> DeadBlockSet;
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
- auto *BB = &*I;
- if (Reachable.count(BB))
+ for (BasicBlock &BB : F) {
+ // Skip reachable basic blocks
+ if (Reachable.find(&BB) != Reachable.end())
continue;
- DeadBlockSet.insert(BB);
+ DeadBlockSet.insert(&BB);
}
if (MSSAU)
MSSAU->removeBlocks(DeadBlockSet);
// Loop over all of the basic blocks that are not reachable, dropping all of
- // their internal references. Update DTU and LVI if available.
+ // their internal references. Update DTU if available.
std::vector<DominatorTree::UpdateType> Updates;
for (auto *BB : DeadBlockSet) {
for (BasicBlock *Successor : successors(BB)) {
@@ -2259,26 +2244,18 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Successor});
}
- if (LVI)
- LVI->eraseBlock(BB);
BB->dropAllReferences();
- }
- for (Function::iterator I = ++F.begin(); I != F.end();) {
- auto *BB = &*I;
- if (Reachable.count(BB)) {
- ++I;
- continue;
- }
if (DTU) {
- // Remove the terminator of BB to clear the successor list of BB.
- if (BB->getTerminator())
- BB->getInstList().pop_back();
+ Instruction *TI = BB->getTerminator();
+ assert(TI && "Basic block should have a terminator");
+ // Terminators like invoke can have users. We have to replace their users,
+ // before removing them.
+ if (!TI->use_empty())
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ TI->eraseFromParent();
new UnreachableInst(BB->getContext(), BB);
assert(succ_empty(BB) && "The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
- ++I;
- } else {
- I = F.getBasicBlockList().erase(I);
}
}
@@ -2294,7 +2271,11 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
}
if (!Deleted)
return false;
+ } else {
+ for (auto *BB : DeadBlockSet)
+ BB->eraseFromParent();
}
+
return true;
}
@@ -2363,6 +2344,9 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
+ case LLVMContext::MD_preserve_access_index:
+ // Preserve !preserve.access.index in K.
+ break;
}
}
// Set !invariant.group from J if J has it. If both instructions have it
@@ -2385,10 +2369,61 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
LLVMContext::MD_invariant_group, LLVMContext::MD_align,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_access_group};
+ LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index};
combineMetadata(K, J, KnownIDs, KDominatesJ);
}
+void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) {
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+ Source.getAllMetadata(MD);
+ MDBuilder MDB(Dest.getContext());
+ Type *NewType = Dest.getType();
+ const DataLayout &DL = Source.getModule()->getDataLayout();
+ for (const auto &MDPair : MD) {
+ unsigned ID = MDPair.first;
+ MDNode *N = MDPair.second;
+ // Note, essentially every kind of metadata should be preserved here! This
+ // routine is supposed to clone a load instruction changing *only its type*.
+ // The only metadata it makes sense to drop is metadata which is invalidated
+ // when the pointer type changes. This should essentially never be the case
+ // in LLVM, but we explicitly switch over only known metadata to be
+ // conservatively correct. If you are adding metadata to LLVM which pertains
+ // to loads, you almost certainly want to add it here.
+ switch (ID) {
+ case LLVMContext::MD_dbg:
+ case LLVMContext::MD_tbaa:
+ case LLVMContext::MD_prof:
+ case LLVMContext::MD_fpmath:
+ case LLVMContext::MD_tbaa_struct:
+ case LLVMContext::MD_invariant_load:
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ case LLVMContext::MD_access_group:
+ // All of these directly apply.
+ Dest.setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_nonnull:
+ copyNonnullMetadata(Source, N, Dest);
+ break;
+
+ case LLVMContext::MD_align:
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ // These only directly apply if the new type is also a pointer.
+ if (NewType->isPointerTy())
+ Dest.setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_range:
+ copyRangeMetadata(DL, Source, N, Dest);
+ break;
+ }
+ }
+}
+
void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
auto *ReplInst = dyn_cast<Instruction>(Repl);
if (!ReplInst)
@@ -2417,7 +2452,7 @@ void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
LLVMContext::MD_noalias, LLVMContext::MD_range,
LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull,
- LLVMContext::MD_access_group};
+ LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index};
combineMetadata(ReplInst, I, KnownIDs, false);
}
diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp
index 37389a695b45..889ea5ca9970 100644
--- a/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -615,30 +615,9 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
<< LastExit->getName() << "\n");
- // Hoist the instructions from Latch into LastExit.
- Instruction *FirstLatchInst = &*(Latch->begin());
- LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
- Latch->begin(), Jmp->getIterator());
-
- // Update MemorySSA
- if (MSSAU)
- MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst);
-
- unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
- BasicBlock *Header = Jmp->getSuccessor(0);
- assert(Header == L->getHeader() && "expected a backward branch");
-
- // Remove Latch from the CFG so that LastExit becomes the new Latch.
- BI->setSuccessor(FallThruPath, Header);
- Latch->replaceSuccessorsPhiUsesWith(LastExit);
- Jmp->eraseFromParent();
-
- // Nuke the Latch block.
- assert(Latch->empty() && "unable to evacuate Latch");
- LI->removeBlock(Latch);
- if (DT)
- DT->eraseNode(Latch);
- Latch->eraseFromParent();
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr,
+ /*PredecessorWithTwoSuccessors=*/true);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 7e6da02d5707..d0f89dc54bfb 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -808,7 +808,7 @@ bool LoopSimplify::runOnFunction(Function &F) {
auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
if (MSSAAnalysis) {
MSSA = &MSSAAnalysis->getMSSA();
- MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
}
}
@@ -835,12 +835,19 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
+ auto *MSSAAnalysis = AM.getCachedResult<MemorySSAAnalysis>(F);
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAAnalysis) {
+ auto *MSSA = &MSSAAnalysis->getMSSA();
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+ }
+
// Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
- // after simplifying the loops. MemorySSA is not preserved either.
+ // after simplifying the loops. MemorySSA is preserved if it exists.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
Changed |=
- simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false);
+ simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false);
if (!Changed)
return PreservedAnalyses::all();
@@ -853,6 +860,8 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
PA.preserve<SCEVAA>();
PA.preserve<ScalarEvolutionAnalysis>();
PA.preserve<DependenceAnalysis>();
+ if (MSSAAnalysis)
+ PA.preserve<MemorySSAAnalysis>();
// BPI maps conditional terminators to probabilities, LoopSimplify can insert
// blocks, but it does so only by splitting existing blocks and edges. This
// results in the interesting property that all new terminators inserted are
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index e39ade523714..a7590fc32545 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -711,7 +711,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest,
ArrayRef<BasicBlock *> NextBlocks,
- BasicBlock *CurrentHeader,
+ BasicBlock *BlockInLoop,
bool NeedConditional) {
auto *Term = cast<BranchInst>(Src->getTerminator());
if (NeedConditional) {
@@ -723,7 +723,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (Dest != LoopExit) {
BasicBlock *BB = Src;
for (BasicBlock *Succ : successors(BB)) {
- if (Succ == CurrentHeader)
+ // Preserve the incoming value from BB if we are jumping to the block
+ // in the current loop.
+ if (Succ == BlockInLoop)
continue;
for (PHINode &Phi : Succ->phis())
Phi.removeIncomingValue(BB, false);
@@ -794,7 +796,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// unconditional branch for some iterations.
NeedConditional = false;
- setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional);
+ setDest(Headers[i], Dest, Headers, HeaderSucc[i], NeedConditional);
}
// Set up latches to branch to the new header in the unrolled iterations or
@@ -868,7 +870,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
assert(!DT || !UnrollVerifyDomtree ||
DT->verify(DominatorTree::VerificationLevel::Fast));
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
// Merge adjacent basic blocks, if possible.
for (BasicBlock *Latch : Latches) {
BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
@@ -888,6 +890,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
}
+ // Apply updates to the DomTree.
+ DT = &DTU.getDomTree();
// At this point, the code is well formed. We now simplify the unrolled loop,
// doing constant propagation and dead code elimination as we go.
diff --git a/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index ff49d83f25c5..bf2e87b0d49f 100644
--- a/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -517,6 +517,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
}
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
// Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
// new ones required.
if (Count != 1) {
@@ -530,7 +531,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
SubLoopBlocksLast.back(), AftBlocksFirst[0]);
- DT->applyUpdates(DTUpdates);
+ DTU.applyUpdatesPermissive(DTUpdates);
}
// Merge adjacent basic blocks, if possible.
@@ -538,7 +539,6 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
while (!MergeBlocks.empty()) {
BasicBlock *BB = *MergeBlocks.begin();
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
@@ -555,6 +555,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
} else
MergeBlocks.erase(BB);
}
+ // Apply updates to the DomTree.
+ DT = &DTU.getDomTree();
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 005306cf1898..58e42074f963 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -62,9 +62,11 @@ static cl::opt<unsigned> UnrollForcePeelCount(
cl::desc("Force a peel count regardless of profiling information."));
static cl::opt<bool> UnrollPeelMultiDeoptExit(
- "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden,
+ "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden,
cl::desc("Allow peeling of loops with multiple deopt exits."));
+static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
+
// Designates that a Phi is estimated to become invariant after an "infinite"
// number of loop iterations (i.e. only may become an invariant if the loop is
// fully unrolled).
@@ -275,6 +277,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
<< " iterations.\n");
UP.PeelCount = UnrollForcePeelCount;
+ UP.PeelProfiledIterations = true;
return;
}
@@ -282,6 +285,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (!UP.AllowPeeling)
return;
+ unsigned AlreadyPeeled = 0;
+ if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
+ AlreadyPeeled = *Peeled;
+ // Stop if we already peeled off the maximum number of iterations.
+ if (AlreadyPeeled >= UnrollPeelMaxCount)
+ return;
+
// Here we try to get rid of Phis which become invariants after 1, 2, ..., N
// iterations of the loop. For this we compute the number for iterations after
// which every Phi is guaranteed to become an invariant, and try to peel the
@@ -317,11 +327,14 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
// Consider max peel count limitation.
assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
- LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
- << " iteration(s) to turn"
- << " some Phis into invariants.\n");
- UP.PeelCount = DesiredPeelCount;
- return;
+ if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
+ LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
+ << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
+ UP.PeelCount = DesiredPeelCount;
+ UP.PeelProfiledIterations = false;
+ return;
+ }
}
}
@@ -330,6 +343,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (TripCount)
return;
+ // Do not apply profile base peeling if it is disabled.
+ if (!UP.PeelProfiledIterations)
+ return;
// If we don't know the trip count, but have reason to believe the average
// trip count is low, peeling should be beneficial, since we will usually
// hit the peeled section.
@@ -344,7 +360,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
<< "\n");
if (*PeelCount) {
- if ((*PeelCount <= UnrollPeelMaxCount) &&
+ if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
(LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
<< " iterations.\n");
@@ -352,6 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
return;
}
LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
+ LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
<< "\n");
@@ -364,88 +381,77 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
/// iteration.
/// This sets the branch weights for the latch of the recently peeled off loop
/// iteration correctly.
-/// Our goal is to make sure that:
-/// a) The total weight of all the copies of the loop body is preserved.
-/// b) The total weight of the loop exit is preserved.
-/// c) The body weight is reasonably distributed between the peeled iterations.
+/// Let F is a weight of the edge from latch to header.
+/// Let E is a weight of the edge from latch to exit.
+/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
+/// go to exit.
+/// Then, Estimated TripCount = F / E.
+/// For I-th (counting from 0) peeled off iteration we set the the weights for
+/// the peeled latch as (TC - I, 1). It gives us reasonable distribution,
+/// The probability to go to exit 1/(TC-I) increases. At the same time
+/// the estimated trip count of remaining loop reduces by I.
+/// To avoid dealing with division rounding we can just multiple both part
+/// of weights to E and use weight as (F - I * E, E).
///
/// \param Header The copy of the header block that belongs to next iteration.
/// \param LatchBR The copy of the latch branch that belongs to this iteration.
-/// \param IterNumber The serial number of the iteration that was just
-/// peeled off.
-/// \param AvgIters The average number of iterations we expect the loop to have.
-/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop
-/// iterations that are unaccounted for. As an input, it represents the number
-/// of times we expect to enter the header of the iteration currently being
-/// peeled off. The output is the number of times we expect to enter the
-/// header of the next iteration.
+/// \param[in,out] FallThroughWeight The weight of the edge from latch to
+/// header before peeling (in) and after peeled off one iteration (out).
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
- unsigned IterNumber, unsigned AvgIters,
- uint64_t &PeeledHeaderWeight) {
- if (!PeeledHeaderWeight)
+ uint64_t ExitWeight,
+ uint64_t &FallThroughWeight) {
+ // FallThroughWeight is 0 means that there is no branch weights on original
+ // latch block or estimated trip count is zero.
+ if (!FallThroughWeight)
return;
- // FIXME: Pick a more realistic distribution.
- // Currently the proportion of weight we assign to the fall-through
- // side of the branch drops linearly with the iteration number, and we use
- // a 0.9 fudge factor to make the drop-off less sharp...
- uint64_t FallThruWeight =
- PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
- uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
- PeeledHeaderWeight -= ExitWeight;
unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
MDBuilder MDB(LatchBR->getContext());
MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
- : MDB.createBranchWeights(FallThruWeight, ExitWeight);
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
+ : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+ FallThroughWeight =
+ FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1;
}
/// Initialize the weights.
///
/// \param Header The header block.
/// \param LatchBR The latch branch.
-/// \param AvgIters The average number of iterations we expect the loop to have.
-/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken.
-/// \param[out] CurHeaderWeight The # of times the header is executed.
+/// \param[out] ExitWeight The weight of the edge from Latch to Exit.
+/// \param[out] FallThroughWeight The weight of the edge from Latch to Header.
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
- unsigned AvgIters, uint64_t &ExitWeight,
- uint64_t &CurHeaderWeight) {
+ uint64_t &ExitWeight,
+ uint64_t &FallThroughWeight) {
uint64_t TrueWeight, FalseWeight;
if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
return;
unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
- // The # of times the loop body executes is the sum of the exit block
- // is taken and the # of times the backedges are taken.
- CurHeaderWeight = TrueWeight + FalseWeight;
+ FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;
}
/// Update the weights of original Latch block after peeling off all iterations.
///
/// \param Header The header block.
/// \param LatchBR The latch branch.
-/// \param ExitWeight The weight of the edge from Latch to Exit block.
-/// \param CurHeaderWeight The # of time the header is executed.
+/// \param ExitWeight The weight of the edge from Latch to Exit.
+/// \param FallThroughWeight The weight of the edge from Latch to Header.
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
- uint64_t ExitWeight, uint64_t CurHeaderWeight) {
- // Adjust the branch weights on the loop exit.
- if (!ExitWeight)
+ uint64_t ExitWeight,
+ uint64_t FallThroughWeight) {
+ // FallThroughWeight is 0 means that there is no branch weights on original
+ // latch block or estimated trip count is zero.
+ if (!FallThroughWeight)
return;
- // The backedge count is the difference of current header weight and
- // current loop exit weight. If the current header weight is smaller than
- // the current loop exit weight, we mark the loop backedge weight as 1.
- uint64_t BackEdgeWeight = 0;
- if (ExitWeight < CurHeaderWeight)
- BackEdgeWeight = CurHeaderWeight - ExitWeight;
- else
- BackEdgeWeight = 1;
+ // Sets the branch weights on the loop exit.
MDBuilder MDB(LatchBR->getContext());
unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
- : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
+ : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
}
@@ -586,11 +592,30 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
if (DT) {
+ // We'd like to determine the idom of exit block after peeling one
+ // iteration.
+ // Let Exit is exit block.
+ // Let ExitingSet - is a set of predecessors of Exit block. They are exiting
+ // blocks.
+ // Let Latch' and ExitingSet' are copies after a peeling.
+ // We'd like to find an idom'(Exit) - idom of Exit after peeling.
+ // It is an evident that idom'(Exit) will be the nearest common dominator
+ // of ExitingSet and ExitingSet'.
+ // idom(Exit) is a nearest common dominator of ExitingSet.
+ // idom(Exit)' is a nearest common dominator of ExitingSet'.
+ // Taking into account that we have a single Latch, Latch' will dominate
+ // Header and idom(Exit).
+ // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
+ // All these basic blocks are in the same loop, so what we find is
+ // (nearest common dominator of idom(Exit) and Latch)'.
+ // In the loop below we remember nearest common dominator of idom(Exit) and
+ // Latch to update idom of Exit later.
assert(L->hasDedicatedExits() && "No dedicated exits?");
for (auto Edge : ExitEdges) {
if (ExitIDom.count(Edge.second))
continue;
- BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
+ BasicBlock *BB = DT->findNearestCommonDominator(
+ DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
assert(L->contains(BB) && "IDom is not in a loop");
ExitIDom[Edge.second] = BB;
}
@@ -659,23 +684,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// newly created branches.
BranchInst *LatchBR =
cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
- uint64_t ExitWeight = 0, CurHeaderWeight = 0;
- initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight);
+ uint64_t ExitWeight = 0, FallThroughWeight = 0;
+ initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
// For each peeled-off iteration, make a copy of the loop.
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
SmallVector<BasicBlock *, 8> NewBlocks;
ValueToValueMapTy VMap;
- // Subtract the exit weight from the current header weight -- the exit
- // weight is exactly the weight of the previous iteration's header.
- // FIXME: due to the way the distribution is constructed, we need a
- // guard here to make sure we don't end up with non-positive weights.
- if (ExitWeight < CurHeaderWeight)
- CurHeaderWeight -= ExitWeight;
- else
- CurHeaderWeight = 1;
-
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
LoopBlocks, VMap, LVMap, DT, LI);
@@ -697,8 +713,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
}
auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
- updateBranchWeights(InsertBot, LatchBRCopy, Iter,
- PeelCount, ExitWeight);
+ updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
// Remove Loop metadata from the latch branch instruction
// because it is not the Loop's latch branch anymore.
LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
@@ -724,7 +739,13 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
}
- fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight);
+ fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
+
+ // Update Metadata for count of peeled off iterations.
+ unsigned AlreadyPeeled = 0;
+ if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
+ AlreadyPeeled = *Peeled;
+ addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
if (Loop *ParentLoop = L->getParentLoop())
L = ParentLoop;
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index ec226e65f650..b4d7f35d2d9a 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -45,6 +46,7 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-utils"
static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
+static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
@@ -169,6 +171,8 @@ void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<SCEVAAWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
+ // FIXME: When all loop passes preserve MemorySSA, it can be required and
+ // preserved here instead of the individual handling in each pass.
}
/// Manually defined generic "LoopPass" dependency initialization. This is used
@@ -189,6 +193,54 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) {
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+}
+
+/// Create MDNode for input string.
+static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ Metadata *MDs[] = {
+ MDString::get(Context, Name),
+ ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
+ return MDNode::get(Context, MDs);
+}
+
+/// Set input string into loop metadata by keeping other values intact.
+/// If the string is already in loop metadata update value if it is
+/// different.
+void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
+ unsigned V) {
+ SmallVector<Metadata *, 4> MDs(1);
+ // If the loop already has metadata, retain it.
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (LoopID) {
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
+ // If it is of form key = value, try to parse it.
+ if (Node->getNumOperands() == 2) {
+ MDString *S = dyn_cast<MDString>(Node->getOperand(0));
+ if (S && S->getString().equals(StringMD)) {
+ ConstantInt *IntMD =
+ mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
+ if (IntMD && IntMD->getSExtValue() == V)
+ // It is already in place. Do nothing.
+ return;
+ // We need to update the value, so just skip it here and it will
+ // be added after copying other existed nodes.
+ continue;
+ }
+ }
+ MDs.push_back(Node);
+ }
+ }
+ // Add new metadata.
+ MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
+ // Replace current metadata node with new one.
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ TheLoop->setLoopID(NewLoopID);
}
/// Find string metadata for loop
@@ -332,6 +384,10 @@ bool llvm::hasDisableAllTransformsHint(const Loop *L) {
return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);
}
+bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
+ return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);
+}
+
TransformationMode llvm::hasUnrollTransformation(Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
return TM_SuppressedByUser;
diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp
index a9a480a4b7f9..5d7759056c7d 100644
--- a/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/lib/Transforms/Utils/LoopVersioning.cpp
@@ -92,8 +92,8 @@ void LoopVersioning::versionLoop(
// Create empty preheader for the loop (and after cloning for the
// non-versioned loop).
BasicBlock *PH =
- SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI);
- PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
+ SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI,
+ nullptr, VersionedLoop->getHeader()->getName() + ".ph");
// Clone the loop including the preheader.
//
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index c0b7edc547fd..60bb2775a194 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -121,15 +121,14 @@ namespace {
}
// Rename all functions
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
for (auto &F : M) {
StringRef Name = F.getName();
LibFunc Tmp;
// Leave library functions alone because their presence or absence could
// affect the behavior of other passes.
if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
- TLI.getLibFunc(F, Tmp))
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F).getLibFunc(
+ F, Tmp))
continue;
// Leave @main alone. The output of -metarenamer might be passed to
diff --git a/lib/Transforms/Utils/MisExpect.cpp b/lib/Transforms/Utils/MisExpect.cpp
new file mode 100644
index 000000000000..26d3402bd279
--- /dev/null
+++ b/lib/Transforms/Utils/MisExpect.cpp
@@ -0,0 +1,177 @@
+//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit warnings for potentially incorrect usage of the
+// llvm.expect intrinsic. This utility extracts the threshold values from
+// metadata associated with the instrumented Branch or Switch instruction. The
+// threshold values are then used to determine if a warning should be emmited.
+//
+// MisExpect metadata is generated when llvm.expect intrinsics are lowered see
+// LowerExpectIntrinsic.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MisExpect.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <cstdint>
+#include <functional>
+#include <numeric>
+
+#define DEBUG_TYPE "misexpect"
+
+using namespace llvm;
+using namespace misexpect;
+
+namespace llvm {
+
+// Command line option to enable/disable the warning when profile data suggests
+// a mismatch with the use of the llvm.expect intrinsic
+static cl::opt<bool> PGOWarnMisExpect(
+ "pgo-warn-misexpect", cl::init(false), cl::Hidden,
+ cl::desc("Use this option to turn on/off "
+ "warnings about incorrect usage of llvm.expect intrinsics."));
+
+} // namespace llvm
+
+namespace {
+
+Instruction *getOprndOrInst(Instruction *I) {
+ assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
+ Instruction *Ret = nullptr;
+ if (auto *B = dyn_cast<BranchInst>(I)) {
+ Ret = dyn_cast<Instruction>(B->getCondition());
+ }
+ // TODO: Find a way to resolve condition location for switches
+ // Using the condition of the switch seems to often resolve to an earlier
+ // point in the program, i.e. the calculation of the switch condition, rather
+ // than the switches location in the source code. Thus, we should use the
+ // instruction to get source code locations rather than the condition to
+ // improve diagnostic output, such as the caret. If the same problem exists
+ // for branch instructions, then we should remove this function and directly
+ // use the instruction
+ //
+ // else if (auto S = dyn_cast<SwitchInst>(I)) {
+ // Ret = I;
+ //}
+ return Ret ? Ret : I;
+}
+
+void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
+ uint64_t ProfCount, uint64_t TotalCount) {
+ double PercentageCorrect = (double)ProfCount / TotalCount;
+ auto PerString =
+ formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
+ auto RemStr = formatv(
+ "Potential performance regression from use of the llvm.expect intrinsic: "
+ "Annotation was correct on {0} of profiled executions.",
+ PerString);
+ Twine Msg(PerString);
+ Instruction *Cond = getOprndOrInst(I);
+ if (PGOWarnMisExpect)
+ Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));
+ OptimizationRemarkEmitter ORE(I->getParent()->getParent());
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
+}
+
+} // namespace
+
+namespace llvm {
+namespace misexpect {
+
+void verifyMisExpect(Instruction *I, const SmallVector<uint32_t, 4> &Weights,
+ LLVMContext &Ctx) {
+ if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) {
+ auto *MisExpectDataName = dyn_cast<MDString>(MisExpectData->getOperand(0));
+ if (MisExpectDataName &&
+ MisExpectDataName->getString().equals("misexpect")) {
+ LLVM_DEBUG(llvm::dbgs() << "------------------\n");
+ LLVM_DEBUG(llvm::dbgs()
+ << "Function: " << I->getFunction()->getName() << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n");
+ LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) {
+ llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n";
+ });
+
+ // extract values from misexpect metadata
+ const auto *IndexCint =
+ mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(1));
+ const auto *LikelyCInt =
+ mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(2));
+ const auto *UnlikelyCInt =
+ mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(3));
+
+ if (!IndexCint || !LikelyCInt || !UnlikelyCInt)
+ return;
+
+ const uint64_t Index = IndexCint->getZExtValue();
+ const uint64_t LikelyBranchWeight = LikelyCInt->getZExtValue();
+ const uint64_t UnlikelyBranchWeight = UnlikelyCInt->getZExtValue();
+ const uint64_t ProfileCount = Weights[Index];
+ const uint64_t CaseTotal = std::accumulate(
+ Weights.begin(), Weights.end(), (uint64_t)0, std::plus<uint64_t>());
+ const uint64_t NumUnlikelyTargets = Weights.size() - 1;
+
+ const uint64_t TotalBranchWeight =
+ LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
+
+ const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight,
+ TotalBranchWeight);
+ uint64_t ScaledThreshold = LikelyThreshold.scale(CaseTotal);
+
+ LLVM_DEBUG(llvm::dbgs()
+ << "Unlikely Targets: " << NumUnlikelyTargets << ":\n");
+ LLVM_DEBUG(llvm::dbgs() << "Profile Count: " << ProfileCount << ":\n");
+ LLVM_DEBUG(llvm::dbgs()
+ << "Scaled Threshold: " << ScaledThreshold << ":\n");
+ LLVM_DEBUG(llvm::dbgs() << "------------------\n");
+ if (ProfileCount < ScaledThreshold)
+ emitMisexpectDiagnostic(I, Ctx, ProfileCount, CaseTotal);
+ }
+ }
+}
+
+void checkFrontendInstrumentation(Instruction &I) {
+ if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) {
+ unsigned NOps = MD->getNumOperands();
+
+ // Only emit misexpect diagnostics if at least 2 branch weights are present.
+ // Less than 2 branch weights means that the profiling metadata is:
+ // 1) incorrect/corrupted
+ // 2) not branch weight metadata
+ // 3) completely deterministic
+ // In these cases we should not emit any diagnostic related to misexpect.
+ if (NOps < 3)
+ return;
+
+ // Operand 0 is a string tag "branch_weights"
+ if (MDString *Tag = cast<MDString>(MD->getOperand(0))) {
+ if (Tag->getString().equals("branch_weights")) {
+ SmallVector<uint32_t, 4> RealWeights(NOps - 1);
+ for (unsigned i = 1; i < NOps; i++) {
+ ConstantInt *Value =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(i));
+ RealWeights[i - 1] = Value->getZExtValue();
+ }
+ verifyMisExpect(&I, RealWeights, I.getContext());
+ }
+ }
+ }
+}
+
+} // namespace misexpect
+} // namespace llvm
+#undef DEBUG_TYPE
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index c84beceee191..1ef3757017a8 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -73,7 +73,7 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *>
SmallPtrSet<Constant *, 16> InitAsSet;
SmallVector<Constant *, 16> Init;
if (GV) {
- ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ auto *CA = cast<ConstantArray>(GV->getInitializer());
for (auto &Op : CA->operands()) {
Constant *C = cast_or_null<Constant>(Op);
if (InitAsSet.insert(C).second)
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index bdf24d80bd17..44859eafb9c1 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -125,8 +125,10 @@ static bool valueComesBefore(OrderedInstructions &OI, const Value *A,
// necessary to compare uses/defs in the same block. Doing so allows us to walk
// the minimum number of instructions necessary to compute our def/use ordering.
struct ValueDFS_Compare {
+ DominatorTree &DT;
OrderedInstructions &OI;
- ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {}
+ ValueDFS_Compare(DominatorTree &DT, OrderedInstructions &OI)
+ : DT(DT), OI(OI) {}
bool operator()(const ValueDFS &A, const ValueDFS &B) const {
if (&A == &B)
@@ -136,7 +138,9 @@ struct ValueDFS_Compare {
// comesbefore to see what the real ordering is, because they are in the
// same basic block.
- bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut);
+ assert((A.DFSIn != B.DFSIn || A.DFSOut == B.DFSOut) &&
+ "Equal DFS-in numbers imply equal out numbers");
+ bool SameBlock = A.DFSIn == B.DFSIn;
// We want to put the def that will get used for a given set of phi uses,
// before those phi uses.
@@ -145,9 +149,11 @@ struct ValueDFS_Compare {
if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last)
return comparePHIRelated(A, B);
+ bool isADef = A.Def;
+ bool isBDef = B.Def;
if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle)
- return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) <
- std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U);
+ return std::tie(A.DFSIn, A.LocalNum, isADef) <
+ std::tie(B.DFSIn, B.LocalNum, isBDef);
return localComesBefore(A, B);
}
@@ -164,10 +170,35 @@ struct ValueDFS_Compare {
// For two phi related values, return the ordering.
bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const {
- auto &ABlockEdge = getBlockEdge(A);
- auto &BBlockEdge = getBlockEdge(B);
- // Now sort by block edge and then defs before uses.
- return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U);
+ BasicBlock *ASrc, *ADest, *BSrc, *BDest;
+ std::tie(ASrc, ADest) = getBlockEdge(A);
+ std::tie(BSrc, BDest) = getBlockEdge(B);
+
+#ifndef NDEBUG
+ // This function should only be used for values in the same BB, check that.
+ DomTreeNode *DomASrc = DT.getNode(ASrc);
+ DomTreeNode *DomBSrc = DT.getNode(BSrc);
+ assert(DomASrc->getDFSNumIn() == (unsigned)A.DFSIn &&
+ "DFS numbers for A should match the ones of the source block");
+ assert(DomBSrc->getDFSNumIn() == (unsigned)B.DFSIn &&
+ "DFS numbers for B should match the ones of the source block");
+ assert(A.DFSIn == B.DFSIn && "Values must be in the same block");
+#endif
+ (void)ASrc;
+ (void)BSrc;
+
+ // Use DFS numbers to compare destination blocks, to guarantee a
+ // deterministic order.
+ DomTreeNode *DomADest = DT.getNode(ADest);
+ DomTreeNode *DomBDest = DT.getNode(BDest);
+ unsigned AIn = DomADest->getDFSNumIn();
+ unsigned BIn = DomBDest->getDFSNumIn();
+ bool isADef = A.Def;
+ bool isBDef = B.Def;
+ assert((!A.Def || !A.U) && (!B.Def || !B.U) &&
+ "Def and U cannot be set at the same time");
+ // Now sort by edge destination and then defs before uses.
+ return std::tie(AIn, isADef) < std::tie(BIn, isBDef);
}
// Get the definition of an instruction that occurs in the middle of a block.
@@ -306,10 +337,11 @@ void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
}
// Add Op, PB to the list of value infos for Op, and mark Op to be renamed.
-void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op,
+void PredicateInfo::addInfoFor(SmallVectorImpl<Value *> &OpsToRename, Value *Op,
PredicateBase *PB) {
- OpsToRename.insert(Op);
auto &OperandInfo = getOrCreateValueInfo(Op);
+ if (OperandInfo.Infos.empty())
+ OpsToRename.push_back(Op);
AllInfos.push_back(PB);
OperandInfo.Infos.push_back(PB);
}
@@ -317,7 +349,7 @@ void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op,
// Process an assume instruction and place relevant operations we want to rename
// into OpsToRename.
void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB,
- SmallPtrSetImpl<Value *> &OpsToRename) {
+ SmallVectorImpl<Value *> &OpsToRename) {
// See if we have a comparison we support
SmallVector<Value *, 8> CmpOperands;
SmallVector<Value *, 2> ConditionsToProcess;
@@ -357,7 +389,7 @@ void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB,
// Process a block terminating branch, and place relevant operations to be
// renamed into OpsToRename.
void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB,
- SmallPtrSetImpl<Value *> &OpsToRename) {
+ SmallVectorImpl<Value *> &OpsToRename) {
BasicBlock *FirstBB = BI->getSuccessor(0);
BasicBlock *SecondBB = BI->getSuccessor(1);
SmallVector<BasicBlock *, 2> SuccsToProcess;
@@ -427,7 +459,7 @@ void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB,
// Process a block terminating switch, and place relevant operations to be
// renamed into OpsToRename.
void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB,
- SmallPtrSetImpl<Value *> &OpsToRename) {
+ SmallVectorImpl<Value *> &OpsToRename) {
Value *Op = SI->getCondition();
if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse())
return;
@@ -457,7 +489,7 @@ void PredicateInfo::buildPredicateInfo() {
DT.updateDFSNumbers();
// Collect operands to rename from all conditional branch terminators, as well
// as assume statements.
- SmallPtrSet<Value *, 8> OpsToRename;
+ SmallVector<Value *, 8> OpsToRename;
for (auto DTN : depth_first(DT.getRootNode())) {
BasicBlock *BranchBB = DTN->getBlock();
if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) {
@@ -524,7 +556,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
if (isa<PredicateWithEdge>(ValInfo)) {
IRBuilder<> B(getBranchTerminator(ValInfo));
Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (empty(IF->users()))
+ if (IF->users().empty())
CreatedDeclarations.insert(IF);
CallInst *PIC =
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
@@ -536,7 +568,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
"Should not have gotten here without it being an assume");
IRBuilder<> B(PAssume->AssumeInst);
Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (empty(IF->users()))
+ if (IF->users().empty())
CreatedDeclarations.insert(IF);
CallInst *PIC = B.CreateCall(IF, Op);
PredicateMap.insert({PIC, ValInfo});
@@ -565,14 +597,8 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
//
// TODO: Use this algorithm to perform fast single-variable renaming in
// promotememtoreg and memoryssa.
-void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
- // Sort OpsToRename since we are going to iterate it.
- SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end());
- auto Comparator = [&](const Value *A, const Value *B) {
- return valueComesBefore(OI, A, B);
- };
- llvm::sort(OpsToRename, Comparator);
- ValueDFS_Compare Compare(OI);
+void PredicateInfo::renameUses(SmallVectorImpl<Value *> &OpsToRename) {
+ ValueDFS_Compare Compare(DT, OI);
// Compute liveness, and rename in O(uses) per Op.
for (auto *Op : OpsToRename) {
LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n");
@@ -772,7 +798,7 @@ static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto PredInfo = make_unique<PredicateInfo>(F, DT, AC);
+ auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
PredInfo->print(dbgs());
if (VerifyPredicateInfo)
PredInfo->verifyPredicateInfo();
@@ -786,7 +812,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
OS << "PredicateInfo for function: " << F.getName() << "\n";
- auto PredInfo = make_unique<PredicateInfo>(F, DT, AC);
+ auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
PredInfo->print(OS);
replaceCreatedSSACopys(*PredInfo, F);
@@ -845,7 +871,7 @@ PreservedAnalyses PredicateInfoVerifierPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
- make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo();
+ std::make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo();
return PreservedAnalyses::all();
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 11651d040dc0..3a5e3293ed4f 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -94,6 +94,12 @@ static cl::opt<unsigned> PHINodeFoldingThreshold(
cl::desc(
"Control the amount of phi node folding to perform (default = 2)"));
+static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
+ "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
+ cl::desc("Control the maximal total instruction cost that we are willing "
+ "to speculatively execute to fold a 2-entry PHI node into a "
+ "select (default = 4)"));
+
static cl::opt<bool> DupRet(
"simplifycfg-dup-ret", cl::Hidden, cl::init(false),
cl::desc("Duplicate return instructions into unconditional branches"));
@@ -332,7 +338,7 @@ static unsigned ComputeSpeculationCost(const User *I,
/// CostRemaining, false is returned and CostRemaining is undefined.
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
SmallPtrSetImpl<Instruction *> &AggressiveInsts,
- unsigned &CostRemaining,
+ int &BudgetRemaining,
const TargetTransformInfo &TTI,
unsigned Depth = 0) {
// It is possible to hit a zero-cost cycle (phi/gep instructions for example),
@@ -375,7 +381,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
if (!isSafeToSpeculativelyExecute(I))
return false;
- unsigned Cost = ComputeSpeculationCost(I, TTI);
+ BudgetRemaining -= ComputeSpeculationCost(I, TTI);
// Allow exactly one instruction to be speculated regardless of its cost
// (as long as it is safe to do so).
@@ -383,17 +389,14 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// or other expensive operation. The speculation of an expensive instruction
// is expected to be undone in CodeGenPrepare if the speculation has not
// enabled further IR optimizations.
- if (Cost > CostRemaining &&
+ if (BudgetRemaining < 0 &&
(!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0))
return false;
- // Avoid unsigned wrap.
- CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost;
-
// Okay, we can only really hoist these out if their operands do
// not take us over the cost threshold.
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, BudgetRemaining, TTI,
Depth + 1))
return false;
// Okay, it's safe to do this! Remember this instruction.
@@ -629,8 +632,7 @@ private:
/// vector.
/// One "Extra" case is allowed to differ from the other.
void gather(Value *V) {
- Instruction *I = dyn_cast<Instruction>(V);
- bool isEQ = (I->getOpcode() == Instruction::Or);
+ bool isEQ = (cast<Instruction>(V)->getOpcode() == Instruction::Or);
// Keep a stack (SmallVector for efficiency) for depth-first traversal
SmallVector<Value *, 8> DFT;
@@ -1313,7 +1315,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
LLVMContext::MD_mem_parallel_loop_access,
- LLVMContext::MD_access_group};
+ LLVMContext::MD_access_group,
+ LLVMContext::MD_preserve_access_index};
combineMetadata(I1, I2, KnownIDs, true);
// I1 and I2 are being combined into a single instruction. Its debug
@@ -1420,6 +1423,20 @@ HoistTerminator:
return true;
}
+// Check lifetime markers.
+static bool isLifeTimeMarker(const Instruction *I) {
+ if (auto II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+ return false;
+}
+
// All instructions in Insts belong to different blocks that all unconditionally
// branch to a common successor. Analyze each instruction and return true if it
// would be possible to sink them into their successor, creating one common
@@ -1474,20 +1491,25 @@ static bool canSinkInstructions(
return false;
}
- // Because SROA can't handle speculating stores of selects, try not
- // to sink loads or stores of allocas when we'd have to create a PHI for
- // the address operand. Also, because it is likely that loads or stores
- // of allocas will disappear when Mem2Reg/SROA is run, don't sink them.
+ // Because SROA can't handle speculating stores of selects, try not to sink
+ // loads, stores or lifetime markers of allocas when we'd have to create a
+ // PHI for the address operand. Also, because it is likely that loads or
+ // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
+ // them.
// This can cause code churn which can have unintended consequences down
// the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
// FIXME: This is a workaround for a deficiency in SROA - see
// https://llvm.org/bugs/show_bug.cgi?id=30188
if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
- return isa<AllocaInst>(I->getOperand(1));
+ return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
}))
return false;
if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
- return isa<AllocaInst>(I->getOperand(0));
+ return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
+ }))
+ return false;
+ if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
}))
return false;
@@ -1959,7 +1981,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
- unsigned SpeculationCost = 0;
+ unsigned SpeculatedInstructions = 0;
Value *SpeculatedStoreValue = nullptr;
StoreInst *SpeculatedStore = nullptr;
for (BasicBlock::iterator BBI = ThenBB->begin(),
@@ -1974,8 +1996,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// Only speculatively execute a single instruction (not counting the
// terminator) for now.
- ++SpeculationCost;
- if (SpeculationCost > 1)
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
return false;
// Don't hoist the instruction if it's unsafe or expensive.
@@ -2012,8 +2034,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
E = SinkCandidateUseCounts.end();
I != E; ++I)
if (I->first->hasNUses(I->second)) {
- ++SpeculationCost;
- if (SpeculationCost > 1)
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
return false;
}
@@ -2053,8 +2075,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// getting expanded into Instructions.
// FIXME: This doesn't account for how many operations are combined in the
// constant expression.
- ++SpeculationCost;
- if (SpeculationCost > 1)
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
return false;
}
@@ -2302,10 +2324,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
SmallPtrSet<Instruction *, 4> AggressiveInsts;
- unsigned MaxCostVal0 = PHINodeFoldingThreshold,
- MaxCostVal1 = PHINodeFoldingThreshold;
- MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
- MaxCostVal1 *= TargetTransformInfo::TCC_Basic;
+ int BudgetRemaining =
+ TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
@@ -2316,9 +2336,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
}
if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
- MaxCostVal0, TTI) ||
+ BudgetRemaining, TTI) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
- MaxCostVal1, TTI))
+ BudgetRemaining, TTI))
return false;
}
@@ -2328,12 +2348,24 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
if (!PN)
return true;
- // Don't fold i1 branches on PHIs which contain binary operators. These can
- // often be turned into switches and other things.
+ // Return true if at least one of these is a 'not', and another is either
+ // a 'not' too, or a constant.
+ auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
+ if (!match(V0, m_Not(m_Value())))
+ std::swap(V0, V1);
+ auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
+ return match(V0, m_Not(m_Value())) && match(V1, Invertible);
+ };
+
+ // Don't fold i1 branches on PHIs which contain binary operators, unless one
+ // of the incoming values is an 'not' and another one is freely invertible.
+ // These can often be turned into switches and other things.
if (PN->getType()->isIntegerTy(1) &&
(isa<BinaryOperator>(PN->getIncomingValue(0)) ||
isa<BinaryOperator>(PN->getIncomingValue(1)) ||
- isa<BinaryOperator>(IfCond)))
+ isa<BinaryOperator>(IfCond)) &&
+ !CanHoistNotFromBothValues(PN->getIncomingValue(0),
+ PN->getIncomingValue(1)))
return false;
// If all PHI nodes are promotable, check to make sure that all instructions
@@ -2368,6 +2400,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
return false;
}
}
+ assert(DomBlock && "Failed to find root DomBlock");
LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
<< " T: " << IfTrue->getName()
@@ -2913,42 +2946,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
BasicBlock *QTB, BasicBlock *QFB,
BasicBlock *PostBB, Value *Address,
bool InvertPCond, bool InvertQCond,
- const DataLayout &DL) {
- auto IsaBitcastOfPointerType = [](const Instruction &I) {
- return Operator::getOpcode(&I) == Instruction::BitCast &&
- I.getType()->isPointerTy();
- };
-
- // If we're not in aggressive mode, we only optimize if we have some
- // confidence that by optimizing we'll allow P and/or Q to be if-converted.
- auto IsWorthwhile = [&](BasicBlock *BB) {
- if (!BB)
- return true;
- // Heuristic: if the block can be if-converted/phi-folded and the
- // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
- // thread this store.
- unsigned N = 0;
- for (auto &I : BB->instructionsWithoutDebug()) {
- // Cheap instructions viable for folding.
- if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) ||
- isa<StoreInst>(I))
- ++N;
- // Free instructions.
- else if (I.isTerminator() || IsaBitcastOfPointerType(I))
- continue;
- else
- return false;
- }
- // The store we want to merge is counted in N, so add 1 to make sure
- // we're counting the instructions that would be left.
- return N <= (PHINodeFoldingThreshold + 1);
- };
-
- if (!MergeCondStoresAggressively &&
- (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) ||
- !IsWorthwhile(QFB)))
- return false;
-
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
// For every pointer, there must be exactly two stores, one coming from
// PTB or PFB, and the other from QTB or QFB. We don't support more than one
// store (to any address) in PTB,PFB or QTB,QFB.
@@ -2989,6 +2988,46 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
if (&*I != PStore && I->mayReadOrWriteMemory())
return false;
+ // If we're not in aggressive mode, we only optimize if we have some
+ // confidence that by optimizing we'll allow P and/or Q to be if-converted.
+ auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
+ if (!BB)
+ return true;
+ // Heuristic: if the block can be if-converted/phi-folded and the
+ // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
+ // thread this store.
+ int BudgetRemaining =
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ for (auto &I : BB->instructionsWithoutDebug()) {
+ // Consider terminator instruction to be free.
+ if (I.isTerminator())
+ continue;
+ // If this is one the stores that we want to speculate out of this BB,
+ // then don't count it's cost, consider it to be free.
+ if (auto *S = dyn_cast<StoreInst>(&I))
+ if (llvm::find(FreeStores, S))
+ continue;
+ // Else, we have a white-list of instructions that we are ak speculating.
+ if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
+ return false; // Not in white-list - not worthwhile folding.
+ // And finally, if this is a non-free instruction that we are okay
+ // speculating, ensure that we consider the speculation budget.
+ BudgetRemaining -= TTI.getUserCost(&I);
+ if (BudgetRemaining < 0)
+ return false; // Eagerly refuse to fold as soon as we're out of budget.
+ }
+ assert(BudgetRemaining >= 0 &&
+ "When we run out of budget we will eagerly return from within the "
+ "per-instruction loop.");
+ return true;
+ };
+
+ const SmallVector<StoreInst *, 2> FreeStores = {PStore, QStore};
+ if (!MergeCondStoresAggressively &&
+ (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
+ !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
+ return false;
+
// If PostBB has more than two predecessors, we need to split it so we can
// sink the store.
if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
@@ -3048,15 +3087,15 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
// store that doesn't execute.
if (MinAlignment != 0) {
// Choose the minimum of all non-zero alignments.
- SI->setAlignment(MinAlignment);
+ SI->setAlignment(Align(MinAlignment));
} else if (MaxAlignment != 0) {
// Choose the minimal alignment between the non-zero alignment and the ABI
// default alignment for the type of the stored value.
- SI->setAlignment(std::min(MaxAlignment, TypeAlignment));
+ SI->setAlignment(Align(std::min(MaxAlignment, TypeAlignment)));
} else {
// If both alignments are zero, use ABI default alignment for the type of
// the stored value.
- SI->setAlignment(TypeAlignment);
+ SI->setAlignment(Align(TypeAlignment));
}
QStore->eraseFromParent();
@@ -3066,7 +3105,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
}
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
- const DataLayout &DL) {
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
// The intention here is to find diamonds or triangles (see below) where each
// conditional block contains a store to the same address. Both of these
// stores are conditional, so they can't be unconditionally sunk. But it may
@@ -3168,7 +3208,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
bool Changed = false;
for (auto *Address : CommonAddresses)
Changed |= mergeConditionalStoreToAddress(
- PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL);
+ PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI);
return Changed;
}
@@ -3177,7 +3217,8 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
/// that PBI and BI are both conditional branches, and BI is in one of the
/// successor blocks of PBI - PBI branches to BI.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
- const DataLayout &DL) {
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
assert(PBI->isConditional() && BI->isConditional());
BasicBlock *BB = BI->getParent();
@@ -3233,7 +3274,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// If both branches are conditional and both contain stores to the same
// address, remove the stores from the conditionals and create a conditional
// merged store at the end.
- if (MergeCondStores && mergeConditionalStores(PBI, BI, DL))
+ if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI))
return true;
// If this is a conditional branch in an empty block, and if any
@@ -3697,12 +3738,17 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
BasicBlock *BB = BI->getParent();
+ // MSAN does not like undefs as branch condition which can be introduced
+ // with "explicit branch".
+ if (ExtraCase && BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
+ return false;
+
LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
<< " cases into SWITCH. BB is:\n"
<< *BB);
// If there are any extra values that couldn't be folded into the switch
- // then we evaluate them with an explicit branch first. Split the block
+ // then we evaluate them with an explicit branch first. Split the block
// right before the condbr to handle it.
if (ExtraCase) {
BasicBlock *NewBB =
@@ -3851,7 +3897,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
// Simplify resume that is only used by a single (non-phi) landing pad.
bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
- LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
+ auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
assert(RI->getValue() == LPInst &&
"Resume must unwind the exception that caused control to here");
@@ -4178,23 +4224,22 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
IRBuilder<> Builder(TI);
if (auto *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isUnconditional()) {
- if (BI->getSuccessor(0) == BB) {
- new UnreachableInst(TI->getContext(), TI);
- TI->eraseFromParent();
- Changed = true;
- }
+ assert(BI->getSuccessor(0) == BB && "Incorrect CFG");
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
} else {
Value* Cond = BI->getCondition();
if (BI->getSuccessor(0) == BB) {
Builder.CreateAssumption(Builder.CreateNot(Cond));
Builder.CreateBr(BI->getSuccessor(1));
- EraseTerminatorAndDCECond(BI);
- } else if (BI->getSuccessor(1) == BB) {
+ } else {
+ assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
Builder.CreateAssumption(Cond);
Builder.CreateBr(BI->getSuccessor(0));
- EraseTerminatorAndDCECond(BI);
- Changed = true;
}
+ EraseTerminatorAndDCECond(BI);
+ Changed = true;
}
} else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
SwitchInstProfUpdateWrapper SU(*SI);
@@ -4276,6 +4321,17 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
return true;
}
+static void createUnreachableSwitchDefault(SwitchInst *Switch) {
+ LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ BasicBlock *NewDefaultBlock =
+ SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), "");
+ Switch->setDefaultDest(&*NewDefaultBlock);
+ SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front());
+ auto *NewTerminator = NewDefaultBlock->getTerminator();
+ new UnreachableInst(Switch->getContext(), NewTerminator);
+ EraseTerminatorAndDCECond(NewTerminator);
+}
+
/// Turn a switch with two reachable destinations into an integer range
/// comparison and branch.
static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
@@ -4384,6 +4440,11 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
}
+ // Clean up the default block - it may have phis or other instructions before
+ // the unreachable terminator.
+ if (!HasDefault)
+ createUnreachableSwitchDefault(SI);
+
// Drop the switch.
SI->eraseFromParent();
@@ -4428,14 +4489,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
SI->getNumCases() == (1ULL << NumUnknownBits)) {
- LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
- BasicBlock *NewDefault =
- SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), "");
- SI->setDefaultDest(&*NewDefault);
- SplitBlock(&*NewDefault, &NewDefault->front());
- auto *OldTI = NewDefault->getTerminator();
- new UnreachableInst(SI->getContext(), OldTI);
- EraseTerminatorAndDCECond(OldTI);
+ createUnreachableSwitchDefault(SI);
return true;
}
@@ -5031,7 +5085,7 @@ SwitchLookupTable::SwitchLookupTable(
Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
// Set the alignment to that of an array items. We will be only loading one
// value out of it.
- Array->setAlignment(DL.getPrefTypeAlignment(ValueType));
+ Array->setAlignment(Align(DL.getPrefTypeAlignment(ValueType)));
Kind = ArrayKind;
}
@@ -5260,7 +5314,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Figure out the corresponding result for each case value and phi node in the
// common destination, as well as the min and max case values.
- assert(!empty(SI->cases()));
+ assert(!SI->cases().empty());
SwitchInst::CaseIt CI = SI->case_begin();
ConstantInt *MinCaseVal = CI->getCaseValue();
ConstantInt *MaxCaseVal = CI->getCaseValue();
@@ -5892,7 +5946,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
+ if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI))
return requestResimplify();
// Look for diamond patterns.
@@ -5900,7 +5954,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (mergeConditionalStores(PBI, BI, DL))
+ if (mergeConditionalStores(PBI, BI, DL, TTI))
return requestResimplify();
return false;
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index e0def81d5eee..0324993a8203 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
@@ -47,7 +48,6 @@ static cl::opt<bool>
cl::desc("Enable unsafe double to float "
"shrinking for math lib calls"));
-
//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//
@@ -177,7 +177,8 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
if (!isOnlyUsedInComparisonWithZero(CI))
return false;
- if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL))
+ if (!isDereferenceableAndAlignedPointer(Str, Align::None(), APInt(64, Len),
+ DL))
return false;
if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
@@ -186,6 +187,67 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
return true;
}
+static void annotateDereferenceableBytes(CallInst *CI,
+ ArrayRef<unsigned> ArgNos,
+ uint64_t DereferenceableBytes) {
+ const Function *F = CI->getCaller();
+ if (!F)
+ return;
+ for (unsigned ArgNo : ArgNos) {
+ uint64_t DerefBytes = DereferenceableBytes;
+ unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
+ if (!llvm::NullPointerIsDefined(F, AS) ||
+ CI->paramHasAttr(ArgNo, Attribute::NonNull))
+ DerefBytes = std::max(CI->getDereferenceableOrNullBytes(
+ ArgNo + AttributeList::FirstArgIndex),
+ DereferenceableBytes);
+
+ if (CI->getDereferenceableBytes(ArgNo + AttributeList::FirstArgIndex) <
+ DerefBytes) {
+ CI->removeParamAttr(ArgNo, Attribute::Dereferenceable);
+ if (!llvm::NullPointerIsDefined(F, AS) ||
+ CI->paramHasAttr(ArgNo, Attribute::NonNull))
+ CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull);
+ CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes(
+ CI->getContext(), DerefBytes));
+ }
+ }
+}
+
+static void annotateNonNullBasedOnAccess(CallInst *CI,
+ ArrayRef<unsigned> ArgNos) {
+ Function *F = CI->getCaller();
+ if (!F)
+ return;
+
+ for (unsigned ArgNo : ArgNos) {
+ if (CI->paramHasAttr(ArgNo, Attribute::NonNull))
+ continue;
+ unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
+ if (llvm::NullPointerIsDefined(F, AS))
+ continue;
+
+ CI->addParamAttr(ArgNo, Attribute::NonNull);
+ annotateDereferenceableBytes(CI, ArgNo, 1);
+ }
+}
+
+static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos,
+ Value *Size, const DataLayout &DL) {
+ if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) {
+ annotateNonNullBasedOnAccess(CI, ArgNos);
+ annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue());
+ } else if (isKnownNonZero(Size, DL)) {
+ annotateNonNullBasedOnAccess(CI, ArgNos);
+ const APInt *X, *Y;
+ uint64_t DerefMin = 1;
+ if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) {
+ DerefMin = std::min(X->getZExtValue(), Y->getZExtValue());
+ annotateDereferenceableBytes(CI, ArgNos, DerefMin);
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// String and Memory Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -194,10 +256,13 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
// Extract some information from the instruction
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
+ annotateNonNullBasedOnAccess(CI, {0, 1});
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0)
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
return nullptr;
--Len; // Unbias length.
@@ -232,24 +297,34 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
// Extract some information from the instruction.
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
uint64_t Len;
+ annotateNonNullBasedOnAccess(CI, 0);
+ if (isKnownNonZero(Size, DL))
+ annotateNonNullBasedOnAccess(CI, 1);
// We don't do anything if length is not constant.
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size);
+ if (LengthArg) {
Len = LengthArg->getZExtValue();
- else
+ // strncat(x, c, 0) -> x
+ if (!Len)
+ return Dst;
+ } else {
return nullptr;
+ }
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0)
+ if (SrcLen) {
+ annotateDereferenceableBytes(CI, 1, SrcLen);
+ --SrcLen; // Unbias length.
+ } else {
return nullptr;
- --SrcLen; // Unbias length.
+ }
- // Handle the simple, do-nothing cases:
// strncat(x, "", c) -> x
- // strncat(x, c, 0) -> x
- if (SrcLen == 0 || Len == 0)
+ if (SrcLen == 0)
return Dst;
// We don't optimize this case.
@@ -265,13 +340,18 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
Value *SrcStr = CI->getArgOperand(0);
+ annotateNonNullBasedOnAccess(CI, 0);
// If the second operand is non-constant, see if we can compute the length
// of the input string and turn this into memchr.
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (!CharC) {
uint64_t Len = GetStringLength(SrcStr);
- if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
+ if (Len)
+ annotateDereferenceableBytes(CI, 0, Len);
+ else
+ return nullptr;
+ if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
return nullptr;
return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
@@ -304,6 +384,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
Value *SrcStr = CI->getArgOperand(0);
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ annotateNonNullBasedOnAccess(CI, 0);
// Cannot fold anything if we're not looking for a constant.
if (!CharC)
@@ -351,7 +432,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
// strcmp(P, "x") -> memcmp(P, "x", 2)
uint64_t Len1 = GetStringLength(Str1P);
+ if (Len1)
+ annotateDereferenceableBytes(CI, 0, Len1);
uint64_t Len2 = GetStringLength(Str2P);
+ if (Len2)
+ annotateDereferenceableBytes(CI, 1, Len2);
+
if (Len1 && Len2) {
return emitMemCmp(Str1P, Str2P,
ConstantInt::get(DL.getIntPtrType(CI->getContext()),
@@ -374,17 +460,22 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
TLI);
}
+ annotateNonNullBasedOnAccess(CI, {0, 1});
return nullptr;
}
Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ Value *Str1P = CI->getArgOperand(0);
+ Value *Str2P = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
if (Str1P == Str2P) // strncmp(x,x,n) -> 0
return ConstantInt::get(CI->getType(), 0);
+ if (isKnownNonZero(Size, DL))
+ annotateNonNullBasedOnAccess(CI, {0, 1});
// Get the length argument if it is constant.
uint64_t Length;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
Length = LengthArg->getZExtValue();
else
return nullptr;
@@ -393,7 +484,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
return ConstantInt::get(CI->getType(), 0);
if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
+ return emitMemCmp(Str1P, Str2P, Size, B, DL, TLI);
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -415,7 +506,11 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
CI->getType());
uint64_t Len1 = GetStringLength(Str1P);
+ if (Len1)
+ annotateDereferenceableBytes(CI, 0, Len1);
uint64_t Len2 = GetStringLength(Str2P);
+ if (Len2)
+ annotateDereferenceableBytes(CI, 1, Len2);
// strncmp to memcmp
if (!HasStr1 && HasStr2) {
@@ -437,20 +532,38 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
+Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilder<> &B) {
+ Value *Src = CI->getArgOperand(0);
+ ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen && Size) {
+ annotateDereferenceableBytes(CI, 0, SrcLen);
+ if (SrcLen <= Size->getZExtValue() + 1)
+ return emitStrDup(Src, B, TLI);
+ }
+
+ return nullptr;
+}
+
Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
-
+
+ annotateNonNullBasedOnAccess(CI, {0, 1});
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0)
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
return nullptr;
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(Dst, 1, Src, 1,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
+ CallInst *NewCI =
+ B.CreateMemCpy(Dst, 1, Src, 1,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
+ NewCI->setAttributes(CI->getAttributes());
return Dst;
}
@@ -464,7 +577,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0)
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
return nullptr;
Type *PT = Callee->getFunctionType()->getParamType(0);
@@ -474,7 +589,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(Dst, 1, Src, 1, LenV);
+ CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, LenV);
+ NewCI->setAttributes(CI->getAttributes());
return DstEnd;
}
@@ -482,37 +598,47 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
- Value *LenOp = CI->getArgOperand(2);
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullBasedOnAccess(CI, 0);
+ if (isKnownNonZero(Size, DL))
+ annotateNonNullBasedOnAccess(CI, 1);
+
+ uint64_t Len;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
+ Len = LengthArg->getZExtValue();
+ else
+ return nullptr;
+
+ // strncpy(x, y, 0) -> x
+ if (Len == 0)
+ return Dst;
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0)
+ if (SrcLen) {
+ annotateDereferenceableBytes(CI, 1, SrcLen);
+ --SrcLen; // Unbias length.
+ } else {
return nullptr;
- --SrcLen;
+ }
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(align 1 x, '\0', y)
- B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
+ CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, 1);
+ AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0));
+ NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
+ CI->getContext(), 0, ArgAttrs));
return Dst;
}
- uint64_t Len;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
- Len = LengthArg->getZExtValue();
- else
- return nullptr;
-
- if (Len == 0)
- return Dst; // strncpy(x, y, 0) -> x
-
// Let strncpy handle the zero padding
if (Len > SrcLen + 1)
return nullptr;
Type *PT = Callee->getFunctionType()->getParamType(0);
// strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
- B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len));
-
+ CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len));
+ NewCI->setAttributes(CI->getAttributes());
return Dst;
}
@@ -608,7 +734,10 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
}
Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
- return optimizeStringLength(CI, B, 8);
+ if (Value *V = optimizeStringLength(CI, B, 8))
+ return V;
+ annotateNonNullBasedOnAccess(CI, 0);
+ return nullptr;
}
Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
@@ -756,21 +885,35 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
}
+
+ annotateNonNullBasedOnAccess(CI, {0, 1});
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilder<> &B) {
+ if (isKnownNonZero(CI->getOperand(2), DL))
+ annotateNonNullBasedOnAccess(CI, 0);
return nullptr;
}
Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
Value *SrcStr = CI->getArgOperand(0);
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, 0, Size, DL);
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
// memchr(x, y, 0) -> null
- if (LenC && LenC->isZero())
- return Constant::getNullValue(CI->getType());
+ if (LenC) {
+ if (LenC->isZero())
+ return Constant::getNullValue(CI->getType());
+ } else {
+ // From now on we need at least constant length and string.
+ return nullptr;
+ }
- // From now on we need at least constant length and string.
StringRef Str;
- if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
+ if (!getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
return nullptr;
// Truncate the string to LenC. If Str is smaller than LenC we will still only
@@ -913,6 +1056,7 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
Ret = 1;
return ConstantInt::get(CI->getType(), Ret);
}
+
return nullptr;
}
@@ -925,12 +1069,19 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
if (LHS == RHS) // memcmp(s,s,x) -> 0
return Constant::getNullValue(CI->getType());
+ annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
// Handle constant lengths.
- if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
- if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS,
- LenC->getZExtValue(), B, DL))
- return Res;
+ ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
+ if (!LenC)
+ return nullptr;
+ // memcmp(d,s,0) -> 0
+ if (LenC->getZExtValue() == 0)
+ return Constant::getNullValue(CI->getType());
+
+ if (Value *Res =
+ optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL))
+ return Res;
return nullptr;
}
@@ -939,9 +1090,9 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
return V;
// memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
- // `bcmp` can be more efficient than memcmp because it only has to know that
- // there is a difference, not where it is.
- if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) {
+ // bcmp can be more efficient than memcmp because it only has to know that
+ // there is a difference, not how different one is to the other.
+ if (TLI->has(LibFunc_bcmp) && isOnlyUsedInZeroEqualityComparison(CI)) {
Value *LHS = CI->getArgOperand(0);
Value *RHS = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
@@ -956,16 +1107,37 @@ Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
+ if (isa<IntrinsicInst>(CI))
+ return nullptr;
+
// memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
+ CallInst *NewCI =
+ B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size);
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
+Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) {
+ Value *Dst = CI->getArgOperand(0);
+ Value *N = CI->getArgOperand(2);
+ // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
+ CallInst *NewCI = B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N);
+ NewCI->setAttributes(CI->getAttributes());
+ return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
+}
+
Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
+ if (isa<IntrinsicInst>(CI))
+ return nullptr;
+
// memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
- B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
+ CallInst *NewCI =
+ B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size);
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -1003,25 +1175,29 @@ Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) {
B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
const DataLayout &DL = Malloc->getModule()->getDataLayout();
IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
- Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
- Malloc->getArgOperand(0), Malloc->getAttributes(),
- B, *TLI);
- if (!Calloc)
- return nullptr;
-
- Malloc->replaceAllUsesWith(Calloc);
- eraseFromParent(Malloc);
+ if (Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
+ Malloc->getArgOperand(0),
+ Malloc->getAttributes(), B, *TLI)) {
+ substituteInParent(Malloc, Calloc);
+ return Calloc;
+ }
- return Calloc;
+ return nullptr;
}
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, 0, Size, DL);
+ if (isa<IntrinsicInst>(CI))
+ return nullptr;
+
if (auto *Calloc = foldMallocMemset(CI, B))
return Calloc;
// memset(p, v, n) -> llvm.memset(align 1 p, v, n)
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, 1);
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -1096,21 +1272,18 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
if (!V[0] || (isBinary && !V[1]))
return nullptr;
- StringRef CalleeNm = CalleeFn->getName();
- AttributeList CalleeAt = CalleeFn->getAttributes();
- bool CalleeIn = CalleeFn->isIntrinsic();
-
// If call isn't an intrinsic, check that it isn't within a function with the
// same name as the float version of this call, otherwise the result is an
// infinite loop. For example, from MinGW-w64:
//
// float expf(float val) { return (float) exp((double) val); }
- if (!CalleeIn) {
- const Function *Fn = CI->getFunction();
- StringRef FnName = Fn->getName();
- if (FnName.back() == 'f' &&
- FnName.size() == (CalleeNm.size() + 1) &&
- FnName.startswith(CalleeNm))
+ StringRef CalleeName = CalleeFn->getName();
+ bool IsIntrinsic = CalleeFn->isIntrinsic();
+ if (!IsIntrinsic) {
+ StringRef CallerName = CI->getFunction()->getName();
+ if (!CallerName.empty() && CallerName.back() == 'f' &&
+ CallerName.size() == (CalleeName.size() + 1) &&
+ CallerName.startswith(CalleeName))
return nullptr;
}
@@ -1120,16 +1293,16 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
// g((double) float) -> (double) gf(float)
Value *R;
- if (CalleeIn) {
+ if (IsIntrinsic) {
Module *M = CI->getModule();
Intrinsic::ID IID = CalleeFn->getIntrinsicID();
Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
+ } else {
+ AttributeList CalleeAttrs = CalleeFn->getAttributes();
+ R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeName, B, CalleeAttrs)
+ : emitUnaryFloatFnCall(V[0], CalleeName, B, CalleeAttrs);
}
- else
- R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt)
- : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt);
-
return B.CreateFPExt(R, B.getDoubleTy());
}
@@ -1234,9 +1407,25 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
return InnerChain[Exp];
}
+// Return a properly extended 32-bit integer if the operation is an itofp.
+static Value *getIntToFPVal(Value *I2F, IRBuilder<> &B) {
+ if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) {
+ Value *Op = cast<Instruction>(I2F)->getOperand(0);
+ // Make sure that the exponent fits inside an int32_t,
+ // thus avoiding any range issues that FP has not.
+ unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits();
+ if (BitWidth < 32 ||
+ (BitWidth == 32 && isa<SIToFPInst>(I2F)))
+ return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getInt32Ty())
+ : B.CreateZExt(Op, B.getInt32Ty());
+ }
+
+ return nullptr;
+}
+
/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
-/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x);
-/// exp2(log2(n) * x) for pow(n, x).
+/// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x);
+/// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
@@ -1269,9 +1458,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
StringRef ExpName;
Intrinsic::ID ID;
Value *ExpFn;
- LibFunc LibFnFloat;
- LibFunc LibFnDouble;
- LibFunc LibFnLongDouble;
+ LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble;
switch (LibFn) {
default:
@@ -1305,9 +1492,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
// elimination cannot be trusted to remove it, since it may have side
// effects (e.g., errno). When the only consumer for the original
// exp{,2}() is pow(), then it has to be explicitly erased.
- BaseFn->replaceAllUsesWith(ExpFn);
- eraseFromParent(BaseFn);
-
+ substituteInParent(BaseFn, ExpFn);
return ExpFn;
}
}
@@ -1318,8 +1503,18 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
if (!match(Pow->getArgOperand(0), m_APFloat(BaseF)))
return nullptr;
+ // pow(2.0, itofp(x)) -> ldexp(1.0, x)
+ if (match(Base, m_SpecificFP(2.0)) &&
+ (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
+ hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
+ if (Value *ExpoI = getIntToFPVal(Expo, B))
+ return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI,
+ LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
+ B, Attrs);
+ }
+
// pow(2.0 ** n, x) -> exp2(n * x)
- if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
+ if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
APFloat BaseR = APFloat(1.0);
BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
BaseR = BaseR / *BaseF;
@@ -1344,7 +1539,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
// pow(10.0, x) -> exp10(x)
// TODO: There is no exp10() intrinsic yet, but some day there shall be one.
if (match(Base, m_SpecificFP(10.0)) &&
- hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+ hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
LibFunc_exp10l, B, Attrs);
@@ -1359,17 +1554,15 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
if (Log) {
Value *FMul = B.CreateFMul(Log, Expo, "mul");
- if (Pow->doesNotAccessMemory()) {
+ if (Pow->doesNotAccessMemory())
return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
FMul, "exp2");
- } else {
- if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
- LibFunc_exp2l))
- return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
- LibFunc_exp2l, B, Attrs);
- }
+ else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l))
+ return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
+ LibFunc_exp2l, B, Attrs);
}
}
+
return nullptr;
}
@@ -1384,8 +1577,7 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
}
// Otherwise, use the libcall for sqrt().
- if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
- LibFunc_sqrtl))
+ if (hasFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
// TODO: We also should check that the target can in fact lower the sqrt()
// libcall. We currently have no way to ask this question, so we ask if
// the target has a sqrt() libcall, which is not exactly the same.
@@ -1452,7 +1644,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
bool Ignored;
// Bail out if simplifying libcalls to pow() is disabled.
- if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl))
+ if (!hasFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl))
return nullptr;
// Propagate the math semantics from the call to any created instructions.
@@ -1480,8 +1672,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
if (match(Expo, m_SpecificFP(-1.0)))
return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
- // pow(x, 0.0) -> 1.0
- if (match(Expo, m_SpecificFP(0.0)))
+ // pow(x, +/-0.0) -> 1.0
+ if (match(Expo, m_AnyZeroFP()))
return ConstantFP::get(Ty, 1.0);
// pow(x, 1.0) -> x
@@ -1558,16 +1750,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
// powf(x, itofp(y)) -> powi(x, y)
if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
- Value *IntExpo = cast<Instruction>(Expo)->getOperand(0);
- Value *NewExpo = nullptr;
- unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits();
- if (isa<SIToFPInst>(Expo) && BitWidth == 32)
- NewExpo = IntExpo;
- else if (BitWidth < 32)
- NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty())
- : B.CreateZExt(IntExpo, B.getInt32Ty());
- if (NewExpo)
- return createPowWithIntegerExponent(Base, NewExpo, M, B);
+ if (Value *ExpoI = getIntToFPVal(Expo, B))
+ return createPowWithIntegerExponent(Base, ExpoI, M, B);
}
return Shrunk;
@@ -1575,45 +1759,25 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- Value *Ret = nullptr;
StringRef Name = Callee->getName();
- if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name))
+ Value *Ret = nullptr;
+ if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) &&
+ hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
+ Type *Ty = CI->getType();
Value *Op = CI->getArgOperand(0);
+
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
- LibFunc LdExp = LibFunc_ldexpl;
- if (Op->getType()->isFloatTy())
- LdExp = LibFunc_ldexpf;
- else if (Op->getType()->isDoubleTy())
- LdExp = LibFunc_ldexp;
-
- if (TLI->has(LdExp)) {
- Value *LdExpArg = nullptr;
- if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
- } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
- }
-
- if (LdExpArg) {
- Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f));
- if (!Op->getType()->isFloatTy())
- One = ConstantExpr::getFPExtend(One, Op->getType());
-
- Module *M = CI->getModule();
- FunctionCallee NewCallee = M->getOrInsertFunction(
- TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty());
- CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
- }
+ if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&
+ hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
+ if (Value *Exp = getIntToFPVal(Op, B))
+ return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,
+ LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
+ B, CI->getCalledFunction()->getAttributes());
}
+
return Ret;
}
@@ -1644,48 +1808,155 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
}
-Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
+Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilder<> &B) {
+ Function *LogFn = Log->getCalledFunction();
+ AttributeList Attrs = LogFn->getAttributes();
+ StringRef LogNm = LogFn->getName();
+ Intrinsic::ID LogID = LogFn->getIntrinsicID();
+ Module *Mod = Log->getModule();
+ Type *Ty = Log->getType();
Value *Ret = nullptr;
- StringRef Name = Callee->getName();
- if (UnsafeFPShrink && hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
- if (!CI->isFast())
- return Ret;
- Value *Op1 = CI->getArgOperand(0);
- auto *OpC = dyn_cast<CallInst>(Op1);
+ if (UnsafeFPShrink && hasFloatVersion(LogNm))
+ Ret = optimizeUnaryDoubleFP(Log, B, true);
// The earlier call must also be 'fast' in order to do these transforms.
- if (!OpC || !OpC->isFast())
+ CallInst *Arg = dyn_cast<CallInst>(Log->getArgOperand(0));
+ if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse())
return Ret;
- // log(pow(x,y)) -> y*log(x)
- // This is only applicable to log, log2, log10.
- if (Name != "log" && Name != "log2" && Name != "log10")
+ LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb;
+
+ // This is only applicable to log(), log2(), log10().
+ if (TLI->getLibFunc(LogNm, LogLb))
+ switch (LogLb) {
+ case LibFunc_logf:
+ LogID = Intrinsic::log;
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ break;
+ case LibFunc_log:
+ LogID = Intrinsic::log;
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ break;
+ case LibFunc_logl:
+ LogID = Intrinsic::log;
+ ExpLb = LibFunc_expl;
+ Exp2Lb = LibFunc_exp2l;
+ Exp10Lb = LibFunc_exp10l;
+ PowLb = LibFunc_powl;
+ break;
+ case LibFunc_log2f:
+ LogID = Intrinsic::log2;
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ break;
+ case LibFunc_log2:
+ LogID = Intrinsic::log2;
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ break;
+ case LibFunc_log2l:
+ LogID = Intrinsic::log2;
+ ExpLb = LibFunc_expl;
+ Exp2Lb = LibFunc_exp2l;
+ Exp10Lb = LibFunc_exp10l;
+ PowLb = LibFunc_powl;
+ break;
+ case LibFunc_log10f:
+ LogID = Intrinsic::log10;
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ break;
+ case LibFunc_log10:
+ LogID = Intrinsic::log10;
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ break;
+ case LibFunc_log10l:
+ LogID = Intrinsic::log10;
+ ExpLb = LibFunc_expl;
+ Exp2Lb = LibFunc_exp2l;
+ Exp10Lb = LibFunc_exp10l;
+ PowLb = LibFunc_powl;
+ break;
+ default:
+ return Ret;
+ }
+ else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 ||
+ LogID == Intrinsic::log10) {
+ if (Ty->getScalarType()->isFloatTy()) {
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ } else if (Ty->getScalarType()->isDoubleTy()) {
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ } else
+ return Ret;
+ } else
return Ret;
IRBuilder<>::FastMathFlagGuard Guard(B);
- FastMathFlags FMF;
- FMF.setFast();
- B.setFastMathFlags(FMF);
+ B.setFastMathFlags(FastMathFlags::getFast());
+
+ Intrinsic::ID ArgID = Arg->getIntrinsicID();
+ LibFunc ArgLb = NotLibFunc;
+ TLI->getLibFunc(Arg, ArgLb);
+
+ // log(pow(x,y)) -> y*log(x)
+ if (ArgLb == PowLb || ArgID == Intrinsic::pow) {
+ Value *LogX =
+ Log->doesNotAccessMemory()
+ ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty),
+ Arg->getOperand(0), "log")
+ : emitUnaryFloatFnCall(Arg->getOperand(0), LogNm, B, Attrs);
+ Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul");
+ // Since pow() may have side effects, e.g. errno,
+ // dead code elimination may not be trusted to remove it.
+ substituteInParent(Arg, MulY);
+ return MulY;
+ }
+
+ // log(exp{,2,10}(y)) -> y*log({e,2,10})
+ // TODO: There is no exp10() intrinsic yet.
+ if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb ||
+ ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) {
+ Constant *Eul;
+ if (ArgLb == ExpLb || ArgID == Intrinsic::exp)
+ // FIXME: Add more precise value of e for long double.
+ Eul = ConstantFP::get(Log->getType(), numbers::e);
+ else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2)
+ Eul = ConstantFP::get(Log->getType(), 2.0);
+ else
+ Eul = ConstantFP::get(Log->getType(), 10.0);
+ Value *LogE = Log->doesNotAccessMemory()
+ ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty),
+ Eul, "log")
+ : emitUnaryFloatFnCall(Eul, LogNm, B, Attrs);
+ Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul");
+ // Since exp() may have side effects, e.g. errno,
+ // dead code elimination may not be trusted to remove it.
+ substituteInParent(Arg, MulY);
+ return MulY;
+ }
- LibFunc Func;
- Function *F = OpC->getCalledFunction();
- if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
- Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow))
- return B.CreateFMul(OpC->getArgOperand(1),
- emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
- Callee->getAttributes()), "mul");
-
- // log(exp2(y)) -> y*log(2)
- if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) &&
- TLI->has(Func) && Func == LibFunc_exp2)
- return B.CreateFMul(
- OpC->getArgOperand(0),
- emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
- Callee->getName(), B, Callee->getAttributes()),
- "logmul");
return Ret;
}
@@ -2137,6 +2408,7 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
return New;
}
+ annotateNonNullBasedOnAccess(CI, 0);
return nullptr;
}
@@ -2231,21 +2503,21 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
return New;
}
+ annotateNonNullBasedOnAccess(CI, {0, 1});
return nullptr;
}
Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
- // Check for a fixed format string.
- StringRef FormatStr;
- if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr))
- return nullptr;
-
// Check for size
ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (!Size)
return nullptr;
uint64_t N = Size->getZExtValue();
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr))
+ return nullptr;
// If we just have a format string (nothing else crazy) transform it.
if (CI->getNumArgOperands() == 3) {
@@ -2318,6 +2590,8 @@ Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) {
return V;
}
+ if (isKnownNonZero(CI->getOperand(1), DL))
+ annotateNonNullBasedOnAccess(CI, 0);
return nullptr;
}
@@ -2503,6 +2777,7 @@ Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
+ annotateNonNullBasedOnAccess(CI, 0);
if (!CI->use_empty())
return nullptr;
@@ -2515,6 +2790,12 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
+Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilder<> &B) {
+ // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
+ return B.CreateMemMove(CI->getArgOperand(1), 1, CI->getArgOperand(0), 1,
+ CI->getArgOperand(2));
+}
+
bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
LibFunc Func;
SmallString<20> FloatFuncName = FuncName;
@@ -2557,6 +2838,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeStrLen(CI, Builder);
case LibFunc_strpbrk:
return optimizeStrPBrk(CI, Builder);
+ case LibFunc_strndup:
+ return optimizeStrNDup(CI, Builder);
case LibFunc_strtol:
case LibFunc_strtod:
case LibFunc_strtof:
@@ -2573,12 +2856,16 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeStrStr(CI, Builder);
case LibFunc_memchr:
return optimizeMemChr(CI, Builder);
+ case LibFunc_memrchr:
+ return optimizeMemRChr(CI, Builder);
case LibFunc_bcmp:
return optimizeBCmp(CI, Builder);
case LibFunc_memcmp:
return optimizeMemCmp(CI, Builder);
case LibFunc_memcpy:
return optimizeMemCpy(CI, Builder);
+ case LibFunc_mempcpy:
+ return optimizeMemPCpy(CI, Builder);
case LibFunc_memmove:
return optimizeMemMove(CI, Builder);
case LibFunc_memset:
@@ -2587,6 +2874,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeRealloc(CI, Builder);
case LibFunc_wcslen:
return optimizeWcslen(CI, Builder);
+ case LibFunc_bcopy:
+ return optimizeBCopy(CI, Builder);
default:
break;
}
@@ -2626,11 +2915,21 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
case LibFunc_sqrt:
case LibFunc_sqrtl:
return optimizeSqrt(CI, Builder);
+ case LibFunc_logf:
case LibFunc_log:
+ case LibFunc_logl:
+ case LibFunc_log10f:
case LibFunc_log10:
+ case LibFunc_log10l:
+ case LibFunc_log1pf:
case LibFunc_log1p:
+ case LibFunc_log1pl:
+ case LibFunc_log2f:
case LibFunc_log2:
+ case LibFunc_log2l:
+ case LibFunc_logbf:
case LibFunc_logb:
+ case LibFunc_logbl:
return optimizeLog(CI, Builder);
case LibFunc_tan:
case LibFunc_tanf:
@@ -2721,10 +3020,18 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
case Intrinsic::exp2:
return optimizeExp2(CI, Builder);
case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
return optimizeLog(CI, Builder);
case Intrinsic::sqrt:
return optimizeSqrt(CI, Builder);
// TODO: Use foldMallocMemset() with memset intrinsic.
+ case Intrinsic::memset:
+ return optimizeMemSet(CI, Builder);
+ case Intrinsic::memcpy:
+ return optimizeMemCpy(CI, Builder);
+ case Intrinsic::memmove:
+ return optimizeMemMove(CI, Builder);
default:
return nullptr;
}
@@ -2740,8 +3047,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
IRBuilder<> TmpBuilder(SimplifiedCI);
if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
// If we were able to further simplify, remove the now redundant call.
- SimplifiedCI->replaceAllUsesWith(V);
- eraseFromParent(SimplifiedCI);
+ substituteInParent(SimplifiedCI, V);
return V;
}
}
@@ -2898,7 +3204,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
// If the length is 0 we don't know how long it is and so we can't
// remove the check.
- if (Len == 0)
+ if (Len)
+ annotateDereferenceableBytes(CI, *StrOp, Len);
+ else
return false;
return ObjSizeCI->getZExtValue() >= Len;
}
@@ -2915,8 +3223,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
IRBuilder<> &B) {
if (isFortifiedCallFoldable(CI, 3, 2)) {
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
+ CallInst *NewCI = B.CreateMemCpy(
+ CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2));
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
return nullptr;
@@ -2925,8 +3234,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
IRBuilder<> &B) {
if (isFortifiedCallFoldable(CI, 3, 2)) {
- B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
+ CallInst *NewCI = B.CreateMemMove(
+ CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2));
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
return nullptr;
@@ -2938,7 +3248,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
if (isFortifiedCallFoldable(CI, 3, 2)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ CallInst *NewCI =
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
return nullptr;
@@ -2974,7 +3286,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
uint64_t Len = GetStringLength(Src);
- if (Len == 0)
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
return nullptr;
Type *SizeTTy = DL.getIntPtrType(CI->getContext());
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index 456724779b43..5d380dcf231c 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -380,11 +380,11 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
// TODO see if there is a more elegant solution to selecting the rewrite
// descriptor type
if (!Target.empty())
- DL->push_back(llvm::make_unique<ExplicitRewriteFunctionDescriptor>(
+ DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>(
Source, Target, Naked));
else
DL->push_back(
- llvm::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
+ std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
return true;
}
@@ -442,11 +442,11 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
}
if (!Target.empty())
- DL->push_back(llvm::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
+ DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
Source, Target,
/*Naked*/ false));
else
- DL->push_back(llvm::make_unique<PatternRewriteGlobalVariableDescriptor>(
+ DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>(
Source, Transform));
return true;
@@ -505,11 +505,11 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
}
if (!Target.empty())
- DL->push_back(llvm::make_unique<ExplicitRewriteNamedAliasDescriptor>(
+ DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>(
Source, Target,
/*Naked*/ false));
else
- DL->push_back(llvm::make_unique<PatternRewriteNamedAliasDescriptor>(
+ DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>(
Source, Transform));
return true;
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index a77bf50fe10b..591e1fd2dbee 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -431,7 +431,7 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
NewLoad->takeName(SrcVal);
- NewLoad->setAlignment(SrcVal->getAlignment());
+ NewLoad->setAlignment(MaybeAlign(SrcVal->getAlignment()));
LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index fbc3407c301f..da68d3713b40 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -27,8 +27,8 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
@@ -66,7 +66,7 @@ struct WorklistEntry {
enum EntryKind {
MapGlobalInit,
MapAppendingVar,
- MapGlobalAliasee,
+ MapGlobalIndirectSymbol,
RemapFunction
};
struct GVInitTy {
@@ -77,9 +77,9 @@ struct WorklistEntry {
GlobalVariable *GV;
Constant *InitPrefix;
};
- struct GlobalAliaseeTy {
- GlobalAlias *GA;
- Constant *Aliasee;
+ struct GlobalIndirectSymbolTy {
+ GlobalIndirectSymbol *GIS;
+ Constant *Target;
};
unsigned Kind : 2;
@@ -89,7 +89,7 @@ struct WorklistEntry {
union {
GVInitTy GVInit;
AppendingGVTy AppendingGV;
- GlobalAliaseeTy GlobalAliasee;
+ GlobalIndirectSymbolTy GlobalIndirectSymbol;
Function *RemapF;
} Data;
};
@@ -161,8 +161,8 @@ public:
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers,
unsigned MCID);
- void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
- unsigned MCID);
+ void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target,
+ unsigned MCID);
void scheduleRemapFunction(Function &F, unsigned MCID);
void flush();
@@ -172,7 +172,7 @@ private:
void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers);
- void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee);
+ void mapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target);
void remapFunction(Function &F, ValueToValueMapTy &VM);
ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; }
@@ -774,20 +774,6 @@ Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) {
return *getMappedOp(&FirstN);
}
-namespace {
-
-struct MapMetadataDisabler {
- ValueToValueMapTy &VM;
-
- MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) {
- VM.disableMapMetadata();
- }
-
- ~MapMetadataDisabler() { VM.enableMapMetadata(); }
-};
-
-} // end anonymous namespace
-
Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
// If the value already exists in the map, use it.
if (Optional<Metadata *> NewMD = getVM().getMappedMD(MD))
@@ -802,9 +788,6 @@ Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
return const_cast<Metadata *>(MD);
if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) {
- // Disallow recursion into metadata mapping through mapValue.
- MapMetadataDisabler MMD(getVM());
-
// Don't memoize ConstantAsMetadata. Instead of lasting until the
// LLVMContext is destroyed, they can be deleted when the GlobalValue they
// reference is destructed. These aren't super common, so the extra
@@ -846,9 +829,9 @@ void Mapper::flush() {
AppendingInits.resize(PrefixSize);
break;
}
- case WorklistEntry::MapGlobalAliasee:
- E.Data.GlobalAliasee.GA->setAliasee(
- mapConstant(E.Data.GlobalAliasee.Aliasee));
+ case WorklistEntry::MapGlobalIndirectSymbol:
+ E.Data.GlobalIndirectSymbol.GIS->setIndirectSymbol(
+ mapConstant(E.Data.GlobalIndirectSymbol.Target));
break;
case WorklistEntry::RemapFunction:
remapFunction(*E.Data.RemapF);
@@ -1041,16 +1024,16 @@ void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV,
AppendingInits.append(NewMembers.begin(), NewMembers.end());
}
-void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
- unsigned MCID) {
- assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule");
+void Mapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
+ Constant &Target, unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GIS).second && "Should not reschedule");
assert(MCID < MCs.size() && "Invalid mapping context");
WorklistEntry WE;
- WE.Kind = WorklistEntry::MapGlobalAliasee;
+ WE.Kind = WorklistEntry::MapGlobalIndirectSymbol;
WE.MCID = MCID;
- WE.Data.GlobalAliasee.GA = &GA;
- WE.Data.GlobalAliasee.Aliasee = &Aliasee;
+ WE.Data.GlobalIndirectSymbol.GIS = &GIS;
+ WE.Data.GlobalIndirectSymbol.Target = &Target;
Worklist.push_back(WE);
}
@@ -1147,9 +1130,10 @@ void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV,
GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID);
}
-void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
- unsigned MCID) {
- getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID);
+void ValueMapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
+ Constant &Target,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapGlobalIndirectSymbol(GIS, Target, MCID);
}
void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) {
diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 4273080ddd91..f44976c723ec 100644
--- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -147,7 +147,7 @@ private:
static const unsigned MaxDepth = 3;
bool isConsecutiveAccess(Value *A, Value *B);
- bool areConsecutivePointers(Value *PtrA, Value *PtrB, const APInt &PtrDelta,
+ bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta,
unsigned Depth = 0) const;
bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta,
unsigned Depth) const;
@@ -336,14 +336,29 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
}
bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB,
- const APInt &PtrDelta,
- unsigned Depth) const {
+ APInt PtrDelta, unsigned Depth) const {
unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType());
APInt OffsetA(PtrBitWidth, 0);
APInt OffsetB(PtrBitWidth, 0);
PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
+ unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType());
+
+ if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType()))
+ return false;
+
+ // In case if we have to shrink the pointer
+ // stripAndAccumulateInBoundsConstantOffsets should properly handle a
+ // possible overflow and the value should fit into a smallest data type
+ // used in the cast/gep chain.
+ assert(OffsetA.getMinSignedBits() <= NewPtrBitWidth &&
+ OffsetB.getMinSignedBits() <= NewPtrBitWidth);
+
+ OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
+ OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
+ PtrDelta = PtrDelta.sextOrTrunc(NewPtrBitWidth);
+
APInt OffsetDelta = OffsetB - OffsetA;
// Check if they are based on the same pointer. That makes the offsets
@@ -650,7 +665,7 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
// We can ignore the alias if the we have a load store pair and the load
// is known to be invariant. The load cannot be clobbered by the store.
auto IsInvariantLoad = [](const LoadInst *LI) -> bool {
- return LI->getMetadata(LLVMContext::MD_invariant_load);
+ return LI->hasMetadata(LLVMContext::MD_invariant_load);
};
// We can ignore the alias as long as the load comes before the store,
@@ -1077,7 +1092,7 @@ bool Vectorizer::vectorizeLoadChain(
LoadInst *L0 = cast<LoadInst>(Chain[0]);
// If the vector has an int element, default to int for the whole load.
- Type *LoadTy;
+ Type *LoadTy = nullptr;
for (const auto &V : Chain) {
LoadTy = cast<LoadInst>(V)->getType();
if (LoadTy->isIntOrIntVectorTy())
@@ -1089,6 +1104,7 @@ bool Vectorizer::vectorizeLoadChain(
break;
}
}
+ assert(LoadTy && "Can't determine LoadInst type from chain");
unsigned Sz = DL.getTypeSizeInBits(LoadTy);
unsigned AS = L0->getPointerAddressSpace();
diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 6ef8dc2d3cd7..f43842be5357 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -13,7 +13,10 @@
// pass. It should be easy to create an analysis pass around it if there
// is a need (but D45420 needs to happen first).
//
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -47,38 +50,6 @@ static const unsigned MaxInterleaveFactor = 16;
namespace llvm {
-#ifndef NDEBUG
-static void debugVectorizationFailure(const StringRef DebugMsg,
- Instruction *I) {
- dbgs() << "LV: Not vectorizing: " << DebugMsg;
- if (I != nullptr)
- dbgs() << " " << *I;
- else
- dbgs() << '.';
- dbgs() << '\n';
-}
-#endif
-
-OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
- StringRef RemarkName,
- Loop *TheLoop,
- Instruction *I) {
- Value *CodeRegion = TheLoop->getHeader();
- DebugLoc DL = TheLoop->getStartLoc();
-
- if (I) {
- CodeRegion = I->getParent();
- // If there is no debug location attached to the instruction, revert back to
- // using the loop's.
- if (I->getDebugLoc())
- DL = I->getDebugLoc();
- }
-
- OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
- R << "loop not vectorized: ";
- return R;
-}
-
bool LoopVectorizeHints::Hint::validate(unsigned Val) {
switch (Kind) {
case HK_WIDTH:
@@ -88,6 +59,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
case HK_FORCE:
return (Val <= 1);
case HK_ISVECTORIZED:
+ case HK_PREDICATE:
return (Val == 0 || Val == 1);
}
return false;
@@ -99,7 +71,9 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
- IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
+ IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
+ Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
+ ORE(ORE) {
// Populate values with existing loop metadata.
getHintsFromMetadata();
@@ -250,7 +224,7 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
return;
unsigned Val = C->getZExtValue();
- Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
+ Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
for (auto H : Hints) {
if (Name == H->Name) {
if (H->validate(Val))
@@ -435,7 +409,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
const ValueToValueMap &Strides =
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
- int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
+ bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
+ int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
@@ -445,14 +420,6 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
return LAI->isUniform(V);
}
-void LoopVectorizationLegality::reportVectorizationFailure(
- const StringRef DebugMsg, const StringRef OREMsg,
- const StringRef ORETag, Instruction *I) const {
- LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
- ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
- ORETag, TheLoop, I) << OREMsg);
-}
-
bool LoopVectorizationLegality::canVectorizeOuterLoop() {
assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
// Store the result and return it at the end instead of exiting early, in case
@@ -467,7 +434,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
if (!Br) {
reportVectorizationFailure("Unsupported basic block terminator",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -486,7 +453,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
!LI->isLoopHeader(Br->getSuccessor(1))) {
reportVectorizationFailure("Unsupported conditional branch",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -500,7 +467,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
TheLoop /*context outer loop*/)) {
reportVectorizationFailure("Outer loop contains divergent loops",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -511,7 +478,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
if (!setupOuterLoopInductions()) {
reportVectorizationFailure("Unsupported outer loop Phi(s)",
"Unsupported outer loop Phi(s)",
- "UnsupportedPhi");
+ "UnsupportedPhi", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -618,7 +585,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
!PhiTy->isPointerTy()) {
reportVectorizationFailure("Found a non-int non-pointer PHI",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
return false;
}
@@ -631,6 +598,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Unsafe cyclic dependencies with header phis are identified during
// legalization for reduction, induction and first order
// recurrences.
+ AllowedExit.insert(&I);
continue;
}
@@ -638,7 +606,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (Phi->getNumIncomingValues() != 2) {
reportVectorizationFailure("Found an invalid PHI",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", Phi);
+ "CFGNotUnderstood", ORE, TheLoop, Phi);
return false;
}
@@ -690,7 +658,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
reportVectorizationFailure("Found an unidentified PHI",
"value that could not be identified as "
"reduction is used outside the loop",
- "NonReductionValueUsedOutsideLoop", Phi);
+ "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
return false;
} // end of PHI handling
@@ -721,11 +689,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
"library call cannot be vectorized. "
"Try compiling with -fno-math-errno, -ffast-math, "
"or similar flags",
- "CantVectorizeLibcall", CI);
+ "CantVectorizeLibcall", ORE, TheLoop, CI);
} else {
reportVectorizationFailure("Found a non-intrinsic callsite",
"call instruction cannot be vectorized",
- "CantVectorizeLibcall", CI);
+ "CantVectorizeLibcall", ORE, TheLoop, CI);
}
return false;
}
@@ -740,7 +708,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
reportVectorizationFailure("Found unvectorizable intrinsic",
"intrinsic instruction cannot be vectorized",
- "CantVectorizeIntrinsic", CI);
+ "CantVectorizeIntrinsic", ORE, TheLoop, CI);
return false;
}
}
@@ -753,7 +721,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
isa<ExtractElementInst>(I)) {
reportVectorizationFailure("Found unvectorizable type",
"instruction return type cannot be vectorized",
- "CantVectorizeInstructionReturnType", &I);
+ "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
return false;
}
@@ -763,7 +731,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!VectorType::isValidElementType(T)) {
reportVectorizationFailure("Store instruction cannot be vectorized",
"store instruction cannot be vectorized",
- "CantVectorizeStore", ST);
+ "CantVectorizeStore", ORE, TheLoop, ST);
return false;
}
@@ -773,12 +741,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Arbitrarily try a vector of 2 elements.
Type *VecTy = VectorType::get(T, /*NumElements=*/2);
assert(VecTy && "did not find vectorized version of stored type");
- unsigned Alignment = getLoadStoreAlignment(ST);
- if (!TTI->isLegalNTStore(VecTy, Alignment)) {
+ const MaybeAlign Alignment = getLoadStoreAlignment(ST);
+ assert(Alignment && "Alignment should be set");
+ if (!TTI->isLegalNTStore(VecTy, *Alignment)) {
reportVectorizationFailure(
"nontemporal store instruction cannot be vectorized",
"nontemporal store instruction cannot be vectorized",
- "CantVectorizeNontemporalStore", ST);
+ "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
return false;
}
}
@@ -789,12 +758,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// supported on the target (arbitrarily try a vector of 2 elements).
Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
assert(VecTy && "did not find vectorized version of load type");
- unsigned Alignment = getLoadStoreAlignment(LD);
- if (!TTI->isLegalNTLoad(VecTy, Alignment)) {
+ const MaybeAlign Alignment = getLoadStoreAlignment(LD);
+ assert(Alignment && "Alignment should be set");
+ if (!TTI->isLegalNTLoad(VecTy, *Alignment)) {
reportVectorizationFailure(
"nontemporal load instruction cannot be vectorized",
"nontemporal load instruction cannot be vectorized",
- "CantVectorizeNontemporalLoad", LD);
+ "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
return false;
}
}
@@ -823,7 +793,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
reportVectorizationFailure("Value cannot be used outside the loop",
"value cannot be used outside the loop",
- "ValueUsedOutsideLoop", &I);
+ "ValueUsedOutsideLoop", ORE, TheLoop, &I);
return false;
}
} // next instr.
@@ -833,12 +803,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (Inductions.empty()) {
reportVectorizationFailure("Did not find one integer induction var",
"loop induction variable could not be identified",
- "NoInductionVariable");
+ "NoInductionVariable", ORE, TheLoop);
return false;
} else if (!WidestIndTy) {
reportVectorizationFailure("Did not find one integer induction var",
"integer loop induction variable could not be identified",
- "NoIntegerInductionVariable");
+ "NoIntegerInductionVariable", ORE, TheLoop);
return false;
} else {
LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
@@ -869,7 +839,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
reportVectorizationFailure("Stores to a uniform address",
"write to a loop invariant address could not be vectorized",
- "CantVectorizeStoreToLoopInvariantAddress");
+ "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
return false;
}
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
@@ -905,7 +875,7 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
}
bool LoopVectorizationLegality::blockCanBePredicated(
- BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
+ BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
for (Instruction &I : *BB) {
@@ -924,7 +894,7 @@ bool LoopVectorizationLegality::blockCanBePredicated(
// !llvm.mem.parallel_loop_access implies if-conversion safety.
// Otherwise, record that the load needs (real or emulated) masking
// and let the cost model decide.
- if (!IsAnnotatedParallel)
+ if (!IsAnnotatedParallel || PreserveGuards)
MaskedOp.insert(LI);
continue;
}
@@ -953,23 +923,41 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion) {
reportVectorizationFailure("If-conversion is disabled",
"if-conversion is disabled",
- "IfConversionDisabled");
+ "IfConversionDisabled",
+ ORE, TheLoop);
return false;
}
assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
- // A list of pointers that we can safely read and write to.
+ // A list of pointers which are known to be dereferenceable within scope of
+ // the loop body for each iteration of the loop which executes. That is,
+ // the memory pointed to can be dereferenced (with the access size implied by
+ // the value's type) unconditionally within the loop header without
+ // introducing a new fault.
SmallPtrSet<Value *, 8> SafePointes;
// Collect safe addresses.
for (BasicBlock *BB : TheLoop->blocks()) {
- if (blockNeedsPredication(BB))
+ if (!blockNeedsPredication(BB)) {
+ for (Instruction &I : *BB)
+ if (auto *Ptr = getLoadStorePointerOperand(&I))
+ SafePointes.insert(Ptr);
continue;
+ }
- for (Instruction &I : *BB)
- if (auto *Ptr = getLoadStorePointerOperand(&I))
- SafePointes.insert(Ptr);
+ // For a block which requires predication, a address may be safe to access
+ // in the loop w/o predication if we can prove dereferenceability facts
+ // sufficient to ensure it'll never fault within the loop. For the moment,
+ // we restrict this to loads; stores are more complicated due to
+ // concurrency restrictions.
+ ScalarEvolution &SE = *PSE.getSE();
+ for (Instruction &I : *BB) {
+ LoadInst *LI = dyn_cast<LoadInst>(&I);
+ if (LI && !mustSuppressSpeculation(*LI) &&
+ isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
+ SafePointes.insert(LI->getPointerOperand());
+ }
}
// Collect the blocks that need predication.
@@ -979,7 +967,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!isa<BranchInst>(BB->getTerminator())) {
reportVectorizationFailure("Loop contains a switch statement",
"loop contains a switch statement",
- "LoopContainsSwitch", BB->getTerminator());
+ "LoopContainsSwitch", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
@@ -989,14 +978,16 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
reportVectorizationFailure(
"Control flow cannot be substituted for a select",
"control flow cannot be substituted for a select",
- "NoCFGForSelect", BB->getTerminator());
+ "NoCFGForSelect", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
} else if (BB != Header && !canIfConvertPHINodes(BB)) {
reportVectorizationFailure(
"Control flow cannot be substituted for a select",
"control flow cannot be substituted for a select",
- "NoCFGForSelect", BB->getTerminator());
+ "NoCFGForSelect", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
}
@@ -1026,7 +1017,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
if (!Lp->getLoopPreheader()) {
reportVectorizationFailure("Loop doesn't have a legal pre-header",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1037,7 +1028,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
if (Lp->getNumBackEdges() != 1) {
reportVectorizationFailure("The loop must have a single backedge",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1048,7 +1039,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
if (!Lp->getExitingBlock()) {
reportVectorizationFailure("The loop must have an exiting block",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1061,7 +1052,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
reportVectorizationFailure("The exiting block is not the loop latch",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1124,7 +1115,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
if (!canVectorizeOuterLoop()) {
reportVectorizationFailure("Unsupported outer loop",
"unsupported outer loop",
- "UnsupportedOuterLoop");
+ "UnsupportedOuterLoop",
+ ORE, TheLoop);
// TODO: Implement DoExtraAnalysis when subsequent legal checks support
// outer loops.
return false;
@@ -1176,7 +1168,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
reportVectorizationFailure("Too many SCEV checks needed",
"Too many SCEV assumptions need to be made and checked at runtime",
- "TooManySCEVRunTimeChecks");
+ "TooManySCEVRunTimeChecks", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
@@ -1190,7 +1182,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return Result;
}
-bool LoopVectorizationLegality::canFoldTailByMasking() {
+bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
@@ -1199,22 +1191,21 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
"No primary induction, cannot fold tail by masking",
"Missing a primary induction variable in the loop, which is "
"needed in order to fold tail by masking as required.",
- "NoPrimaryInduction");
+ "NoPrimaryInduction", ORE, TheLoop);
return false;
}
- // TODO: handle reductions when tail is folded by masking.
- if (!Reductions.empty()) {
- reportVectorizationFailure(
- "Loop has reductions, cannot fold tail by masking",
- "Cannot fold tail by masking in the presence of reductions.",
- "ReductionFoldingTailByMasking");
- return false;
- }
+ SmallPtrSet<const Value *, 8> ReductionLiveOuts;
- // TODO: handle outside users when tail is folded by masking.
+ for (auto &Reduction : *getReductionVars())
+ ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
+
+ // TODO: handle non-reduction outside users when tail is folded by masking.
for (auto *AE : AllowedExit) {
- // Check that all users of allowed exit values are inside the loop.
+ // Check that all users of allowed exit values are inside the loop or
+ // are the live-out of a reduction.
+ if (ReductionLiveOuts.count(AE))
+ continue;
for (User *U : AE->users()) {
Instruction *UI = cast<Instruction>(U);
if (TheLoop->contains(UI))
@@ -1222,7 +1213,7 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
reportVectorizationFailure(
"Cannot fold tail by masking, loop has an outside user for",
"Cannot fold tail by masking in the presence of live outs.",
- "LiveOutFoldingTailByMasking", UI);
+ "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
return false;
}
}
@@ -1233,11 +1224,12 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
// Check and mark all blocks for predication, including those that ordinarily
// do not need predication such as the header block.
for (BasicBlock *BB : TheLoop->blocks()) {
- if (!blockCanBePredicated(BB, SafePointers)) {
+ if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
reportVectorizationFailure(
"Cannot fold tail by masking as required",
"control flow cannot be substituted for a select",
- "NoCFGForSelect", BB->getTerminator());
+ "NoCFGForSelect", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
}
diff --git a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 97077cce83e3..a5e85f27fabf 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -228,11 +228,11 @@ public:
/// Plan how to best vectorize, return the best VF and its cost, or None if
/// vectorization and interleaving should be avoided up front.
- Optional<VectorizationFactor> plan(bool OptForSize, unsigned UserVF);
+ Optional<VectorizationFactor> plan(unsigned UserVF);
/// Use the VPlan-native path to plan how to best vectorize, return the best
/// VF and its cost.
- VectorizationFactor planInVPlanNativePath(bool OptForSize, unsigned UserVF);
+ VectorizationFactor planInVPlanNativePath(unsigned UserVF);
/// Finalize the best decision and dispose of all other VPlans.
void setBestPlan(unsigned VF, unsigned UF);
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 46265e3f3e13..8f0bf70f873c 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -177,6 +177,14 @@ static cl::opt<unsigned> TinyTripCountVectorThreshold(
"value are vectorized only if no scalar iteration overheads "
"are incurred."));
+// Indicates that an epilogue is undesired, predication is preferred.
+// This means that the vectorizer will try to fold the loop-tail (epilogue)
+// into the loop and predicate the loop body accordingly.
+static cl::opt<bool> PreferPredicateOverEpilog(
+ "prefer-predicate-over-epilog", cl::init(false), cl::Hidden,
+ cl::desc("Indicate that an epilogue is undesired, predication should be "
+ "used instead."));
+
static cl::opt<bool> MaximizeBandwidth(
"vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
cl::desc("Maximize bandwidth when selecting vectorization factor which "
@@ -347,6 +355,29 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
: ConstantFP::get(Ty, C);
}
+/// Returns "best known" trip count for the specified loop \p L as defined by
+/// the following procedure:
+/// 1) Returns exact trip count if it is known.
+/// 2) Returns expected trip count according to profile data if any.
+/// 3) Returns upper bound estimate if it is known.
+/// 4) Returns None if all of the above failed.
+static Optional<unsigned> getSmallBestKnownTC(ScalarEvolution &SE, Loop *L) {
+ // Check if exact trip count is known.
+ if (unsigned ExpectedTC = SE.getSmallConstantTripCount(L))
+ return ExpectedTC;
+
+ // Check if there is an expected trip count available from profile data.
+ if (LoopVectorizeWithBlockFrequency)
+ if (auto EstimatedTC = getLoopEstimatedTripCount(L))
+ return EstimatedTC;
+
+ // Check if upper bound estimate is known.
+ if (unsigned ExpectedTC = SE.getSmallConstantMaxTripCount(L))
+ return ExpectedTC;
+
+ return None;
+}
+
namespace llvm {
/// InnerLoopVectorizer vectorizes loops which contain only one basic
@@ -795,6 +826,59 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr)
B.SetCurrentDebugLocation(DebugLoc());
}
+/// Write a record \p DebugMsg about vectorization failure to the debug
+/// output stream. If \p I is passed, it is an instruction that prevents
+/// vectorization.
+#ifndef NDEBUG
+static void debugVectorizationFailure(const StringRef DebugMsg,
+ Instruction *I) {
+ dbgs() << "LV: Not vectorizing: " << DebugMsg;
+ if (I != nullptr)
+ dbgs() << " " << *I;
+ else
+ dbgs() << '.';
+ dbgs() << '\n';
+}
+#endif
+
+/// Create an analysis remark that explains why vectorization failed
+///
+/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
+/// RemarkName is the identifier for the remark. If \p I is passed it is an
+/// instruction that prevents vectorization. Otherwise \p TheLoop is used for
+/// the location of the remark. \return the remark object that can be
+/// streamed to.
+static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName,
+ StringRef RemarkName, Loop *TheLoop, Instruction *I) {
+ Value *CodeRegion = TheLoop->getHeader();
+ DebugLoc DL = TheLoop->getStartLoc();
+
+ if (I) {
+ CodeRegion = I->getParent();
+ // If there is no debug location attached to the instruction, revert back to
+ // using the loop's.
+ if (I->getDebugLoc())
+ DL = I->getDebugLoc();
+ }
+
+ OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
+ R << "loop not vectorized: ";
+ return R;
+}
+
+namespace llvm {
+
+void reportVectorizationFailure(const StringRef DebugMsg,
+ const StringRef OREMsg, const StringRef ORETag,
+ OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I) {
+ LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
+ LoopVectorizeHints Hints(TheLoop, true /* doesn't matter */, *ORE);
+ ORE->emit(createLVAnalysis(Hints.vectorizeAnalysisPassName(),
+ ORETag, TheLoop, I) << OREMsg);
+}
+
+} // end namespace llvm
+
#ifndef NDEBUG
/// \return string containing a file name and a line # for the given loop.
static std::string getDebugLocString(const Loop *L) {
@@ -836,6 +920,26 @@ void InnerLoopVectorizer::addMetadata(ArrayRef<Value *> To,
namespace llvm {
+// Loop vectorization cost-model hints how the scalar epilogue loop should be
+// lowered.
+enum ScalarEpilogueLowering {
+
+ // The default: allowing scalar epilogues.
+ CM_ScalarEpilogueAllowed,
+
+ // Vectorization with OptForSize: don't allow epilogues.
+ CM_ScalarEpilogueNotAllowedOptSize,
+
+ // A special case of vectorisation with OptForSize: loops with a very small
+ // trip count are considered for vectorization under OptForSize, thereby
+ // making sure the cost of their loop body is dominant, free of runtime
+ // guards and scalar iteration overheads.
+ CM_ScalarEpilogueNotAllowedLowTripLoop,
+
+ // Loop hint predicate indicating an epilogue is undesired.
+ CM_ScalarEpilogueNotNeededUsePredicate
+};
+
/// LoopVectorizationCostModel - estimates the expected speedups due to
/// vectorization.
/// In many cases vectorization is not profitable. This can happen because of
@@ -845,20 +949,26 @@ namespace llvm {
/// different operations.
class LoopVectorizationCostModel {
public:
- LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE,
- LoopInfo *LI, LoopVectorizationLegality *Legal,
+ LoopVectorizationCostModel(ScalarEpilogueLowering SEL, Loop *L,
+ PredicatedScalarEvolution &PSE, LoopInfo *LI,
+ LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
const TargetLibraryInfo *TLI, DemandedBits *DB,
AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, const Function *F,
const LoopVectorizeHints *Hints,
InterleavedAccessInfo &IAI)
- : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
- AC(AC), ORE(ORE), TheFunction(F), Hints(Hints), InterleaveInfo(IAI) {}
+ : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal),
+ TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F),
+ Hints(Hints), InterleaveInfo(IAI) {}
/// \return An upper bound for the vectorization factor, or None if
/// vectorization and interleaving should be avoided up front.
- Optional<unsigned> computeMaxVF(bool OptForSize);
+ Optional<unsigned> computeMaxVF();
+
+ /// \return True if runtime checks are required for vectorization, and false
+ /// otherwise.
+ bool runtimeChecksRequired();
/// \return The most profitable vectorization factor and the cost of that VF.
/// This method checks every power of two up to MaxVF. If UserVF is not ZERO
@@ -881,8 +991,7 @@ public:
/// If interleave count has been specified by metadata it will be returned.
/// Otherwise, the interleave count is computed and returned. VF and LoopCost
/// are the selected vectorization factor and the cost of the selected VF.
- unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
- unsigned LoopCost);
+ unsigned selectInterleaveCount(unsigned VF, unsigned LoopCost);
/// Memory access instruction may be vectorized in more than one way.
/// Form of instruction after vectorization depends on cost.
@@ -897,10 +1006,11 @@ public:
/// of a loop.
struct RegisterUsage {
/// Holds the number of loop invariant values that are used in the loop.
- unsigned LoopInvariantRegs;
-
+ /// The key is ClassID of target-provided register class.
+ SmallMapVector<unsigned, unsigned, 4> LoopInvariantRegs;
/// Holds the maximum number of concurrent live intervals in the loop.
- unsigned MaxLocalUsers;
+ /// The key is ClassID of target-provided register class.
+ SmallMapVector<unsigned, unsigned, 4> MaxLocalUsers;
};
/// \return Returns information about the register usages of the loop for the
@@ -1080,14 +1190,16 @@ public:
/// Returns true if the target machine supports masked store operation
/// for the given \p DataType and kind of access to \p Ptr.
- bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
- return Legal->isConsecutivePtr(Ptr) && TTI.isLegalMaskedStore(DataType);
+ bool isLegalMaskedStore(Type *DataType, Value *Ptr, MaybeAlign Alignment) {
+ return Legal->isConsecutivePtr(Ptr) &&
+ TTI.isLegalMaskedStore(DataType, Alignment);
}
/// Returns true if the target machine supports masked load operation
/// for the given \p DataType and kind of access to \p Ptr.
- bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
- return Legal->isConsecutivePtr(Ptr) && TTI.isLegalMaskedLoad(DataType);
+ bool isLegalMaskedLoad(Type *DataType, Value *Ptr, MaybeAlign Alignment) {
+ return Legal->isConsecutivePtr(Ptr) &&
+ TTI.isLegalMaskedLoad(DataType, Alignment);
}
/// Returns true if the target machine supports masked scatter operation
@@ -1157,11 +1269,14 @@ public:
/// to handle accesses with gaps, and there is nothing preventing us from
/// creating a scalar epilogue.
bool requiresScalarEpilogue() const {
- return IsScalarEpilogueAllowed && InterleaveInfo.requiresScalarEpilogue();
+ return isScalarEpilogueAllowed() && InterleaveInfo.requiresScalarEpilogue();
}
- /// Returns true if a scalar epilogue is not allowed due to optsize.
- bool isScalarEpilogueAllowed() const { return IsScalarEpilogueAllowed; }
+ /// Returns true if a scalar epilogue is not allowed due to optsize or a
+ /// loop hint annotation.
+ bool isScalarEpilogueAllowed() const {
+ return ScalarEpilogueStatus == CM_ScalarEpilogueAllowed;
+ }
/// Returns true if all loop blocks should be masked to fold tail loop.
bool foldTailByMasking() const { return FoldTailByMasking; }
@@ -1187,7 +1302,7 @@ private:
/// \return An upper bound for the vectorization factor, larger than zero.
/// One is returned if vectorization should best be avoided due to cost.
- unsigned computeFeasibleMaxVF(bool OptForSize, unsigned ConstTripCount);
+ unsigned computeFeasibleMaxVF(unsigned ConstTripCount);
/// The vectorization cost is a combination of the cost itself and a boolean
/// indicating whether any of the contributing operations will actually
@@ -1246,15 +1361,6 @@ private:
/// should be used.
bool useEmulatedMaskMemRefHack(Instruction *I);
- /// Create an analysis remark that explains why vectorization failed
- ///
- /// \p RemarkName is the identifier for the remark. \return the remark object
- /// that can be streamed to.
- OptimizationRemarkAnalysis createMissedAnalysis(StringRef RemarkName) {
- return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
- RemarkName, TheLoop);
- }
-
/// Map of scalar integer values to the smallest bitwidth they can be legally
/// represented as. The vector equivalents of these values should be truncated
/// to this type.
@@ -1270,13 +1376,13 @@ private:
SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization;
/// Records whether it is allowed to have the original scalar loop execute at
- /// least once. This may be needed as a fallback loop in case runtime
+ /// least once. This may be needed as a fallback loop in case runtime
/// aliasing/dependence checks fail, or to handle the tail/remainder
/// iterations when the trip count is unknown or doesn't divide by the VF,
/// or as a peel-loop to handle gaps in interleave-groups.
/// Under optsize and when the trip count is very small we don't allow any
/// iterations to execute in the scalar loop.
- bool IsScalarEpilogueAllowed = true;
+ ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
/// All blocks of loop are to be masked to fold tail of scalar iterations.
bool FoldTailByMasking = false;
@@ -1496,7 +1602,7 @@ struct LoopVectorize : public FunctionPass {
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
@@ -2253,12 +2359,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Type *ScalarDataTy = getMemInstValueType(Instr);
Type *DataTy = VectorType::get(ScalarDataTy, VF);
Value *Ptr = getLoadStorePointerOperand(Instr);
- unsigned Alignment = getLoadStoreAlignment(Instr);
// An alignment of 0 means target abi alignment. We need to use the scalar's
// target abi alignment in such a case.
const DataLayout &DL = Instr->getModule()->getDataLayout();
- if (!Alignment)
- Alignment = DL.getABITypeAlignment(ScalarDataTy);
+ const Align Alignment =
+ DL.getValueOrABITypeAlignment(getLoadStoreAlignment(Instr), ScalarDataTy);
unsigned AddressSpace = getLoadStoreAddressSpace(Instr);
// Determine if the pointer operand of the access is either consecutive or
@@ -2322,8 +2427,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
- NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
- MaskPart);
+ NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep,
+ Alignment.value(), MaskPart);
} else {
if (Reverse) {
// If we store to reverse consecutive memory locations, then we need
@@ -2334,10 +2439,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
}
auto *VecPtr = CreateVecPtr(Part, Ptr);
if (isMaskRequired)
- NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
- Mask[Part]);
+ NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr,
+ Alignment.value(), Mask[Part]);
else
- NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
+ NewSI =
+ Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment.value());
}
addMetadata(NewSI, SI);
}
@@ -2352,18 +2458,18 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
- NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart,
+ NewLI = Builder.CreateMaskedGather(VectorGep, Alignment.value(), MaskPart,
nullptr, "wide.masked.gather");
addMetadata(NewLI, LI);
} else {
auto *VecPtr = CreateVecPtr(Part, Ptr);
if (isMaskRequired)
- NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part],
+ NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment.value(), Mask[Part],
UndefValue::get(DataTy),
"wide.masked.load");
else
- NewLI =
- Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
+ NewLI = Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment.value(),
+ "wide.load");
// Add metadata to the load, but setVectorValue to the reverse shuffle.
addMetadata(NewLI, LI);
@@ -2615,8 +2721,9 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
if (C->isZero())
return;
- assert(!Cost->foldTailByMasking() &&
- "Cannot SCEV check stride or overflow when folding tail");
+ assert(!BB->getParent()->hasOptSize() &&
+ "Cannot SCEV check stride or overflow when optimizing for size");
+
// Create a new block containing the stride check.
BB->setName("vector.scevcheck");
auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
@@ -2649,7 +2756,20 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
if (!MemRuntimeCheck)
return;
- assert(!Cost->foldTailByMasking() && "Cannot check memory when folding tail");
+ if (BB->getParent()->hasOptSize()) {
+ assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
+ "Cannot emit memory checks when optimizing for size, unless forced "
+ "to vectorize.");
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize",
+ L->getStartLoc(), L->getHeader())
+ << "Code-size may be reduced by not forcing "
+ "vectorization, or by source-code modifications "
+ "eliminating the need for runtime checks "
+ "(e.g., adding 'restrict').";
+ });
+ }
+
// Create a new block containing the memory check.
BB->setName("vector.memcheck");
auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
@@ -2666,7 +2786,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
// We currently don't use LoopVersioning for the actual loop cloning but we
// still use it to add the noalias metadata.
- LVer = llvm::make_unique<LoopVersioning>(*Legal->getLAI(), OrigLoop, LI, DT,
+ LVer = std::make_unique<LoopVersioning>(*Legal->getLAI(), OrigLoop, LI, DT,
PSE.getSE());
LVer->prepareNoAliasMetadata();
}
@@ -3598,6 +3718,26 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
setDebugLocFromInst(Builder, LoopExitInst);
+ // If tail is folded by masking, the vector value to leave the loop should be
+ // a Select choosing between the vectorized LoopExitInst and vectorized Phi,
+ // instead of the former.
+ if (Cost->foldTailByMasking()) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *VecLoopExitInst =
+ VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
+ Value *Sel = nullptr;
+ for (User *U : VecLoopExitInst->users()) {
+ if (isa<SelectInst>(U)) {
+ assert(!Sel && "Reduction exit feeding two selects");
+ Sel = U;
+ } else
+ assert(isa<PHINode>(U) && "Reduction exit must feed Phi's or select");
+ }
+ assert(Sel && "Reduction exit feeds no select");
+ VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, Sel);
+ }
+ }
+
// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
@@ -4064,7 +4204,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
case Instruction::FCmp: {
// Widen compares. Generate vector compares.
bool FCmp = (I.getOpcode() == Instruction::FCmp);
- auto *Cmp = dyn_cast<CmpInst>(&I);
+ auto *Cmp = cast<CmpInst>(&I);
setDebugLocFromInst(Builder, Cmp);
for (unsigned Part = 0; Part < UF; ++Part) {
Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part);
@@ -4097,7 +4237,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- auto *CI = dyn_cast<CastInst>(&I);
+ auto *CI = cast<CastInst>(&I);
setDebugLocFromInst(Builder, CI);
/// Vectorize casts.
@@ -4421,9 +4561,10 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne
"Widening decision should be ready at this moment");
return WideningDecision == CM_Scalarize;
}
+ const MaybeAlign Alignment = getLoadStoreAlignment(I);
return isa<LoadInst>(I) ?
- !(isLegalMaskedLoad(Ty, Ptr) || isLegalMaskedGather(Ty))
- : !(isLegalMaskedStore(Ty, Ptr) || isLegalMaskedScatter(Ty));
+ !(isLegalMaskedLoad(Ty, Ptr, Alignment) || isLegalMaskedGather(Ty))
+ : !(isLegalMaskedStore(Ty, Ptr, Alignment) || isLegalMaskedScatter(Ty));
}
case Instruction::UDiv:
case Instruction::SDiv:
@@ -4452,10 +4593,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
// Check if masking is required.
// A Group may need masking for one of two reasons: it resides in a block that
// needs predication, or it was decided to use masking to deal with gaps.
- bool PredicatedAccessRequiresMasking =
+ bool PredicatedAccessRequiresMasking =
Legal->blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I);
- bool AccessWithGapsRequiresMasking =
- Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed;
+ bool AccessWithGapsRequiresMasking =
+ Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed();
if (!PredicatedAccessRequiresMasking && !AccessWithGapsRequiresMasking)
return true;
@@ -4466,8 +4607,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
"Masked interleave-groups for predicated accesses are not enabled.");
auto *Ty = getMemInstValueType(I);
- return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty)
- : TTI.isLegalMaskedStore(Ty);
+ const MaybeAlign Alignment = getLoadStoreAlignment(I);
+ return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty, Alignment)
+ : TTI.isLegalMaskedStore(Ty, Alignment);
}
bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
@@ -4675,82 +4817,96 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
Uniforms[VF].insert(Worklist.begin(), Worklist.end());
}
-Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
- if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
- // TODO: It may by useful to do since it's still likely to be dynamically
- // uniform if the target can skip.
- LLVM_DEBUG(
- dbgs() << "LV: Not inserting runtime ptr check for divergent target");
-
- ORE->emit(
- createMissedAnalysis("CantVersionLoopWithDivergentTarget")
- << "runtime pointer checks needed. Not enabled for divergent target");
-
- return None;
- }
-
- unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- if (!OptForSize) // Remaining checks deal with scalar loop when OptForSize.
- return computeFeasibleMaxVF(OptForSize, TC);
+bool LoopVectorizationCostModel::runtimeChecksRequired() {
+ LLVM_DEBUG(dbgs() << "LV: Performing code size checks.\n");
if (Legal->getRuntimePointerChecking()->Need) {
- ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
- << "runtime pointer checks needed. Enable vectorization of this "
- "loop with '#pragma clang loop vectorize(enable)' when "
- "compiling with -Os/-Oz");
- LLVM_DEBUG(
- dbgs()
- << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
- return None;
+ reportVectorizationFailure("Runtime ptr check is required with -Os/-Oz",
+ "runtime pointer checks needed. Enable vectorization of this "
+ "loop with '#pragma clang loop vectorize(enable)' when "
+ "compiling with -Os/-Oz",
+ "CantVersionLoopWithOptForSize", ORE, TheLoop);
+ return true;
}
if (!PSE.getUnionPredicate().getPredicates().empty()) {
- ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
- << "runtime SCEV checks needed. Enable vectorization of this "
- "loop with '#pragma clang loop vectorize(enable)' when "
- "compiling with -Os/-Oz");
- LLVM_DEBUG(
- dbgs()
- << "LV: Aborting. Runtime SCEV check is required with -Os/-Oz.\n");
- return None;
+ reportVectorizationFailure("Runtime SCEV check is required with -Os/-Oz",
+ "runtime SCEV checks needed. Enable vectorization of this "
+ "loop with '#pragma clang loop vectorize(enable)' when "
+ "compiling with -Os/-Oz",
+ "CantVersionLoopWithOptForSize", ORE, TheLoop);
+ return true;
}
// FIXME: Avoid specializing for stride==1 instead of bailing out.
if (!Legal->getLAI()->getSymbolicStrides().empty()) {
- ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
- << "runtime stride == 1 checks needed. Enable vectorization of "
- "this loop with '#pragma clang loop vectorize(enable)' when "
- "compiling with -Os/-Oz");
- LLVM_DEBUG(
- dbgs()
- << "LV: Aborting. Runtime stride check is required with -Os/-Oz.\n");
+ reportVectorizationFailure("Runtime stride check is required with -Os/-Oz",
+ "runtime stride == 1 checks needed. Enable vectorization of "
+ "this loop with '#pragma clang loop vectorize(enable)' when "
+ "compiling with -Os/-Oz",
+ "CantVersionLoopWithOptForSize", ORE, TheLoop);
+ return true;
+ }
+
+ return false;
+}
+
+Optional<unsigned> LoopVectorizationCostModel::computeMaxVF() {
+ if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
+ // TODO: It may by useful to do since it's still likely to be dynamically
+ // uniform if the target can skip.
+ reportVectorizationFailure(
+ "Not inserting runtime ptr check for divergent target",
+ "runtime pointer checks needed. Not enabled for divergent target",
+ "CantVersionLoopWithDivergentTarget", ORE, TheLoop);
return None;
}
- // If we optimize the program for size, avoid creating the tail loop.
+ unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
-
if (TC == 1) {
- ORE->emit(createMissedAnalysis("SingleIterationLoop")
- << "loop trip count is one, irrelevant for vectorization");
- LLVM_DEBUG(dbgs() << "LV: Aborting, single iteration (non) loop.\n");
+ reportVectorizationFailure("Single iteration (non) loop",
+ "loop trip count is one, irrelevant for vectorization",
+ "SingleIterationLoop", ORE, TheLoop);
return None;
}
- // Record that scalar epilogue is not allowed.
- LLVM_DEBUG(dbgs() << "LV: Not allowing scalar epilogue due to -Os/-Oz.\n");
+ switch (ScalarEpilogueStatus) {
+ case CM_ScalarEpilogueAllowed:
+ return computeFeasibleMaxVF(TC);
+ case CM_ScalarEpilogueNotNeededUsePredicate:
+ LLVM_DEBUG(
+ dbgs() << "LV: vector predicate hint/switch found.\n"
+ << "LV: Not allowing scalar epilogue, creating predicated "
+ << "vector loop.\n");
+ break;
+ case CM_ScalarEpilogueNotAllowedLowTripLoop:
+ // fallthrough as a special case of OptForSize
+ case CM_ScalarEpilogueNotAllowedOptSize:
+ if (ScalarEpilogueStatus == CM_ScalarEpilogueNotAllowedOptSize)
+ LLVM_DEBUG(
+ dbgs() << "LV: Not allowing scalar epilogue due to -Os/-Oz.\n");
+ else
+ LLVM_DEBUG(dbgs() << "LV: Not allowing scalar epilogue due to low trip "
+ << "count.\n");
+
+ // Bail if runtime checks are required, which are not good when optimising
+ // for size.
+ if (runtimeChecksRequired())
+ return None;
+ break;
+ }
- IsScalarEpilogueAllowed = !OptForSize;
+ // Now try the tail folding
- // We don't create an epilogue when optimizing for size.
// Invalidate interleave groups that require an epilogue if we can't mask
// the interleave-group.
- if (!useMaskedInterleavedAccesses(TTI))
+ if (!useMaskedInterleavedAccesses(TTI))
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
- unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC);
-
+ unsigned MaxVF = computeFeasibleMaxVF(TC);
if (TC > 0 && TC % MaxVF == 0) {
+ // Accept MaxVF if we do not have a tail.
LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n");
return MaxVF;
}
@@ -4759,28 +4915,30 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
// found modulo the vectorization factor is not zero, try to fold the tail
// by masking.
// FIXME: look for a smaller MaxVF that does divide TC rather than masking.
- if (Legal->canFoldTailByMasking()) {
+ if (Legal->prepareToFoldTailByMasking()) {
FoldTailByMasking = true;
return MaxVF;
}
if (TC == 0) {
- ORE->emit(
- createMissedAnalysis("UnknownLoopCountComplexCFG")
- << "unable to calculate the loop count due to complex control flow");
+ reportVectorizationFailure(
+ "Unable to calculate the loop count due to complex control flow",
+ "unable to calculate the loop count due to complex control flow",
+ "UnknownLoopCountComplexCFG", ORE, TheLoop);
return None;
}
- ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize")
- << "cannot optimize for size and vectorize at the same time. "
- "Enable vectorization of this loop with '#pragma clang loop "
- "vectorize(enable)' when compiling with -Os/-Oz");
+ reportVectorizationFailure(
+ "Cannot optimize for size and vectorize at the same time.",
+ "cannot optimize for size and vectorize at the same time. "
+ "Enable vectorization of this loop with '#pragma clang loop "
+ "vectorize(enable)' when compiling with -Os/-Oz",
+ "NoTailLoopWithOptForSize", ORE, TheLoop);
return None;
}
unsigned
-LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize,
- unsigned ConstTripCount) {
+LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
unsigned SmallestType, WidestType;
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
@@ -4818,8 +4976,8 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize,
}
unsigned MaxVF = MaxVectorSize;
- if (TTI.shouldMaximizeVectorBandwidth(OptForSize) ||
- (MaximizeBandwidth && !OptForSize)) {
+ if (TTI.shouldMaximizeVectorBandwidth(!isScalarEpilogueAllowed()) ||
+ (MaximizeBandwidth && isScalarEpilogueAllowed())) {
// Collect all viable vectorization factors larger than the default MaxVF
// (i.e. MaxVectorSize).
SmallVector<unsigned, 8> VFs;
@@ -4832,9 +4990,14 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize,
// Select the largest VF which doesn't require more registers than existing
// ones.
- unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
for (int i = RUs.size() - 1; i >= 0; --i) {
- if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
+ bool Selected = true;
+ for (auto& pair : RUs[i].MaxLocalUsers) {
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
+ if (pair.second > TargetNumRegisters)
+ Selected = false;
+ }
+ if (Selected) {
MaxVF = VFs[i];
break;
}
@@ -4886,10 +5049,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) {
}
if (!EnableCondStoresVectorization && NumPredStores) {
- ORE->emit(createMissedAnalysis("ConditionalStore")
- << "store that is conditionally executed prevents vectorization");
- LLVM_DEBUG(
- dbgs() << "LV: No vectorization. There are conditional stores.\n");
+ reportVectorizationFailure("There are conditional stores.",
+ "store that is conditionally executed prevents vectorization",
+ "ConditionalStore", ORE, TheLoop);
Width = 1;
Cost = ScalarCost;
}
@@ -4958,8 +5120,7 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
return {MinWidth, MaxWidth};
}
-unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
- unsigned VF,
+unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
unsigned LoopCost) {
// -- The interleave heuristics --
// We interleave the loop in order to expose ILP and reduce the loop overhead.
@@ -4975,8 +5136,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
// 3. We don't interleave if we think that we will spill registers to memory
// due to the increased register pressure.
- // When we optimize for size, we don't interleave.
- if (OptForSize)
+ if (!isScalarEpilogueAllowed())
return 1;
// We used the distance for the interleave count.
@@ -4988,22 +5148,12 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
return 1;
- unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
- LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
- << " registers\n");
-
- if (VF == 1) {
- if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
- TargetNumRegisters = ForceTargetNumScalarRegs;
- } else {
- if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
- TargetNumRegisters = ForceTargetNumVectorRegs;
- }
-
RegisterUsage R = calculateRegisterUsage({VF})[0];
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
- R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
+ for (auto& pair : R.MaxLocalUsers) {
+ pair.second = std::max(pair.second, 1U);
+ }
// We calculate the interleave count using the following formula.
// Subtract the number of loop invariants from the number of available
@@ -5016,13 +5166,35 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
// We also want power of two interleave counts to ensure that the induction
// variable of the vector loop wraps to zero, when tail is folded by masking;
// this currently happens when OptForSize, in which case IC is set to 1 above.
- unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
- R.MaxLocalUsers);
+ unsigned IC = UINT_MAX;
- // Don't count the induction variable as interleaved.
- if (EnableIndVarRegisterHeur)
- IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
- std::max(1U, (R.MaxLocalUsers - 1)));
+ for (auto& pair : R.MaxLocalUsers) {
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
+ LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
+ << " registers of "
+ << TTI.getRegisterClassName(pair.first) << " register class\n");
+ if (VF == 1) {
+ if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
+ TargetNumRegisters = ForceTargetNumScalarRegs;
+ } else {
+ if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
+ TargetNumRegisters = ForceTargetNumVectorRegs;
+ }
+ unsigned MaxLocalUsers = pair.second;
+ unsigned LoopInvariantRegs = 0;
+ if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end())
+ LoopInvariantRegs = R.LoopInvariantRegs[pair.first];
+
+ unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers);
+ // Don't count the induction variable as interleaved.
+ if (EnableIndVarRegisterHeur) {
+ TmpIC =
+ PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) /
+ std::max(1U, (MaxLocalUsers - 1)));
+ }
+
+ IC = std::min(IC, TmpIC);
+ }
// Clamp the interleave ranges to reasonable counts.
unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
@@ -5036,6 +5208,14 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor;
}
+ // If the trip count is constant, limit the interleave count to be less than
+ // the trip count divided by VF.
+ if (TC > 0) {
+ assert(TC >= VF && "VF exceeds trip count?");
+ if ((TC / VF) < MaxInterleaveCount)
+ MaxInterleaveCount = (TC / VF);
+ }
+
// If we did not calculate the cost for VF (because the user selected the VF)
// then we calculate the cost of VF here.
if (LoopCost == 0)
@@ -5044,7 +5224,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
assert(LoopCost && "Non-zero loop cost expected");
// Clamp the calculated IC to be between the 1 and the max interleave count
- // that the target allows.
+ // that the target and trip count allows.
if (IC > MaxInterleaveCount)
IC = MaxInterleaveCount;
else if (IC < 1)
@@ -5196,7 +5376,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
const DataLayout &DL = TheFunction->getParent()->getDataLayout();
SmallVector<RegisterUsage, 8> RUs(VFs.size());
- SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
+ SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
@@ -5226,21 +5406,45 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
// For each VF find the maximum usage of registers.
for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
+ // Count the number of live intervals.
+ SmallMapVector<unsigned, unsigned, 4> RegUsage;
+
if (VFs[j] == 1) {
- MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
- continue;
+ for (auto Inst : OpenIntervals) {
+ unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
+ if (RegUsage.find(ClassID) == RegUsage.end())
+ RegUsage[ClassID] = 1;
+ else
+ RegUsage[ClassID] += 1;
+ }
+ } else {
+ collectUniformsAndScalars(VFs[j]);
+ for (auto Inst : OpenIntervals) {
+ // Skip ignored values for VF > 1.
+ if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end())
+ continue;
+ if (isScalarAfterVectorization(Inst, VFs[j])) {
+ unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
+ if (RegUsage.find(ClassID) == RegUsage.end())
+ RegUsage[ClassID] = 1;
+ else
+ RegUsage[ClassID] += 1;
+ } else {
+ unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType());
+ if (RegUsage.find(ClassID) == RegUsage.end())
+ RegUsage[ClassID] = GetRegUsage(Inst->getType(), VFs[j]);
+ else
+ RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]);
+ }
+ }
}
- collectUniformsAndScalars(VFs[j]);
- // Count the number of live intervals.
- unsigned RegUsage = 0;
- for (auto Inst : OpenIntervals) {
- // Skip ignored values for VF > 1.
- if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() ||
- isScalarAfterVectorization(Inst, VFs[j]))
- continue;
- RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
+
+ for (auto& pair : RegUsage) {
+ if (MaxUsages[j].find(pair.first) != MaxUsages[j].end())
+ MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second);
+ else
+ MaxUsages[j][pair.first] = pair.second;
}
- MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
}
LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
@@ -5251,18 +5455,34 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
}
for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
- unsigned Invariant = 0;
- if (VFs[i] == 1)
- Invariant = LoopInvariants.size();
- else {
- for (auto Inst : LoopInvariants)
- Invariant += GetRegUsage(Inst->getType(), VFs[i]);
+ SmallMapVector<unsigned, unsigned, 4> Invariant;
+
+ for (auto Inst : LoopInvariants) {
+ unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
+ unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType());
+ if (Invariant.find(ClassID) == Invariant.end())
+ Invariant[ClassID] = Usage;
+ else
+ Invariant[ClassID] += Usage;
}
- LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
- LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
- LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant
- << '\n');
+ LLVM_DEBUG({
+ dbgs() << "LV(REG): VF = " << VFs[i] << '\n';
+ dbgs() << "LV(REG): Found max usage: " << MaxUsages[i].size()
+ << " item\n";
+ for (const auto &pair : MaxUsages[i]) {
+ dbgs() << "LV(REG): RegisterClass: "
+ << TTI.getRegisterClassName(pair.first) << ", " << pair.second
+ << " registers\n";
+ }
+ dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
+ << " item\n";
+ for (const auto &pair : Invariant) {
+ dbgs() << "LV(REG): RegisterClass: "
+ << TTI.getRegisterClassName(pair.first) << ", " << pair.second
+ << " registers\n";
+ }
+ });
RU.LoopInvariantRegs = Invariant;
RU.MaxLocalUsers = MaxUsages[i];
@@ -5511,7 +5731,6 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
Type *ValTy = getMemInstValueType(I);
auto SE = PSE.getSE();
- unsigned Alignment = getLoadStoreAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I);
Value *Ptr = getLoadStorePointerOperand(I);
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
@@ -5525,9 +5744,9 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// Don't pass *I here, since it is scalar but will actually be part of a
// vectorized loop where the user of it is a vectorized instruction.
- Cost += VF *
- TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment,
- AS);
+ const MaybeAlign Alignment = getLoadStoreAlignment(I);
+ Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
+ Alignment ? Alignment->value() : 0, AS);
// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.
@@ -5552,18 +5771,20 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
Type *VectorTy = ToVectorTy(ValTy, VF);
- unsigned Alignment = getLoadStoreAlignment(I);
Value *Ptr = getLoadStorePointerOperand(I);
unsigned AS = getLoadStoreAddressSpace(I);
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
"Stride should be 1 or -1 for consecutive memory access");
+ const MaybeAlign Alignment = getLoadStoreAlignment(I);
unsigned Cost = 0;
if (Legal->isMaskRequired(I))
- Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+ Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
+ Alignment ? Alignment->value() : 0, AS);
else
- Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I);
+ Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
+ Alignment ? Alignment->value() : 0, AS, I);
bool Reverse = ConsecutiveStride < 0;
if (Reverse)
@@ -5575,33 +5796,37 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
Type *VectorTy = ToVectorTy(ValTy, VF);
- unsigned Alignment = getLoadStoreAlignment(I);
+ const MaybeAlign Alignment = getLoadStoreAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I);
if (isa<LoadInst>(I)) {
return TTI.getAddressComputationCost(ValTy) +
- TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
+ TTI.getMemoryOpCost(Instruction::Load, ValTy,
+ Alignment ? Alignment->value() : 0, AS) +
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
}
StoreInst *SI = cast<StoreInst>(I);
bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
return TTI.getAddressComputationCost(ValTy) +
- TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) +
- (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost(
- Instruction::ExtractElement,
- VectorTy, VF - 1));
+ TTI.getMemoryOpCost(Instruction::Store, ValTy,
+ Alignment ? Alignment->value() : 0, AS) +
+ (isLoopInvariantStoreValue
+ ? 0
+ : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
+ VF - 1));
}
unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
Type *VectorTy = ToVectorTy(ValTy, VF);
- unsigned Alignment = getLoadStoreAlignment(I);
+ const MaybeAlign Alignment = getLoadStoreAlignment(I);
Value *Ptr = getLoadStorePointerOperand(I);
return TTI.getAddressComputationCost(VectorTy) +
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
- Legal->isMaskRequired(I), Alignment);
+ Legal->isMaskRequired(I),
+ Alignment ? Alignment->value() : 0);
}
unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
@@ -5626,8 +5851,8 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
}
// Calculate the cost of the whole interleaved group.
- bool UseMaskForGaps =
- Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed;
+ bool UseMaskForGaps =
+ Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed();
unsigned Cost = TTI.getInterleavedMemoryOpCost(
I->getOpcode(), WideVecTy, Group->getFactor(), Indices,
Group->getAlignment(), AS, Legal->isMaskRequired(I), UseMaskForGaps);
@@ -5648,11 +5873,12 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
// moment.
if (VF == 1) {
Type *ValTy = getMemInstValueType(I);
- unsigned Alignment = getLoadStoreAlignment(I);
+ const MaybeAlign Alignment = getLoadStoreAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I);
return TTI.getAddressComputationCost(ValTy) +
- TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
+ TTI.getMemoryOpCost(I->getOpcode(), ValTy,
+ Alignment ? Alignment->value() : 0, AS, I);
}
return getWideningCost(I, VF);
}
@@ -6167,8 +6393,7 @@ static unsigned determineVPlanVF(const unsigned WidestVectorRegBits,
}
VectorizationFactor
-LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
- unsigned UserVF) {
+LoopVectorizationPlanner::planInVPlanNativePath(unsigned UserVF) {
unsigned VF = UserVF;
// Outer loop handling: They may require CFG and instruction level
// transformations before even evaluating whether vectorization is profitable.
@@ -6207,10 +6432,9 @@ LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
return VectorizationFactor::Disabled();
}
-Optional<VectorizationFactor> LoopVectorizationPlanner::plan(bool OptForSize,
- unsigned UserVF) {
+Optional<VectorizationFactor> LoopVectorizationPlanner::plan(unsigned UserVF) {
assert(OrigLoop->empty() && "Inner loop expected.");
- Optional<unsigned> MaybeMaxVF = CM.computeMaxVF(OptForSize);
+ Optional<unsigned> MaybeMaxVF = CM.computeMaxVF();
if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved.
return None;
@@ -6840,8 +7064,15 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
// If the tail is to be folded by masking, the primary induction variable
// needs to be represented in VPlan for it to model early-exit masking.
- if (CM.foldTailByMasking())
+ // Also, both the Phi and the live-out instruction of each reduction are
+ // required in order to introduce a select between them in VPlan.
+ if (CM.foldTailByMasking()) {
NeedDef.insert(Legal->getPrimaryInduction());
+ for (auto &Reduction : *Legal->getReductionVars()) {
+ NeedDef.insert(Reduction.first);
+ NeedDef.insert(Reduction.second.getLoopExitInstr());
+ }
+ }
// Collect instructions from the original loop that will become trivially dead
// in the vectorized loop. We don't need to vectorize these instructions. For
@@ -6873,7 +7104,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
- auto Plan = llvm::make_unique<VPlan>(VPBB);
+ auto Plan = std::make_unique<VPlan>(VPBB);
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
// Represent values that will have defs inside VPlan.
@@ -6968,6 +7199,18 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
delete PreEntry;
+ // Finally, if tail is folded by masking, introduce selects between the phi
+ // and the live-out instruction of each reduction, at the end of the latch.
+ if (CM.foldTailByMasking()) {
+ Builder.setInsertPoint(VPBB);
+ auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan);
+ for (auto &Reduction : *Legal->getReductionVars()) {
+ VPValue *Phi = Plan->getVPValue(Reduction.first);
+ VPValue *Red = Plan->getVPValue(Reduction.second.getLoopExitInstr());
+ Builder.createNaryOp(Instruction::Select, {Cond, Red, Phi});
+ }
+ }
+
std::string PlanName;
raw_string_ostream RSO(PlanName);
unsigned VF = Range.Start;
@@ -6993,7 +7236,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
// Create new empty VPlan
- auto Plan = llvm::make_unique<VPlan>();
+ auto Plan = std::make_unique<VPlan>();
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
@@ -7199,6 +7442,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
}
+static ScalarEpilogueLowering
+getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed;
+ if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ (F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)))
+ SEL = CM_ScalarEpilogueNotAllowedOptSize;
+ else if (PreferPredicateOverEpilog || Hints.getPredicate())
+ SEL = CM_ScalarEpilogueNotNeededUsePredicate;
+
+ return SEL;
+}
+
// Process the loop in the VPlan-native vectorization path. This path builds
// VPlan upfront in the vectorization pipeline, which allows to apply
// VPlan-to-VPlan transformations from the very beginning without modifying the
@@ -7213,7 +7470,9 @@ static bool processLoopInVPlanNativePath(
assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
Function *F = L->getHeader()->getParent();
InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI());
- LoopVectorizationCostModel CM(L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F,
+ ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI);
+
+ LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F,
&Hints, IAI);
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
@@ -7223,15 +7482,8 @@ static bool processLoopInVPlanNativePath(
// Get user vectorization factor.
const unsigned UserVF = Hints.getWidth();
- // Check the function attributes and profiles to find out if this function
- // should be optimized for size.
- bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
- (F->hasOptSize() ||
- llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
-
// Plan how to best vectorize, return the best VF and its cost.
- const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
+ const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF);
// If we are stress testing VPlan builds, do not attempt to generate vector
// code. Masked vector code generation support will follow soon.
@@ -7310,10 +7562,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check the function attributes and profiles to find out if this function
// should be optimized for size.
- bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
- (F->hasOptSize() ||
- llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
+ ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI);
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
@@ -7325,36 +7574,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
ORE, BFI, PSI, Hints);
assert(L->empty() && "Inner loop expected.");
+
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
// count by optimizing for size, to minimize overheads.
- // Prefer constant trip counts over profile data, over upper bound estimate.
- unsigned ExpectedTC = 0;
- bool HasExpectedTC = false;
- if (const SCEVConstant *ConstExits =
- dyn_cast<SCEVConstant>(SE->getBackedgeTakenCount(L))) {
- const APInt &ExitsCount = ConstExits->getAPInt();
- // We are interested in small values for ExpectedTC. Skip over those that
- // can't fit an unsigned.
- if (ExitsCount.ult(std::numeric_limits<unsigned>::max())) {
- ExpectedTC = static_cast<unsigned>(ExitsCount.getZExtValue()) + 1;
- HasExpectedTC = true;
- }
- }
- // ExpectedTC may be large because it's bound by a variable. Check
- // profiling information to validate we should vectorize.
- if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) {
- auto EstimatedTC = getLoopEstimatedTripCount(L);
- if (EstimatedTC) {
- ExpectedTC = *EstimatedTC;
- HasExpectedTC = true;
- }
- }
- if (!HasExpectedTC) {
- ExpectedTC = SE->getSmallConstantMaxTripCount(L);
- HasExpectedTC = (ExpectedTC > 0);
- }
-
- if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) {
+ auto ExpectedTC = getSmallBestKnownTC(*SE, L);
+ if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) {
LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
<< "This loop is worth vectorizing only if no scalar "
<< "iteration overheads are incurred.");
@@ -7362,10 +7586,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {
LLVM_DEBUG(dbgs() << "\n");
- // Loops with a very small trip count are considered for vectorization
- // under OptForSize, thereby making sure the cost of their loop body is
- // dominant, free of runtime guards and scalar iteration overheads.
- OptForSize = true;
+ SEL = CM_ScalarEpilogueNotAllowedLowTripLoop;
}
}
@@ -7374,11 +7595,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// an integer loop and the vector instructions selected are purely integer
// vector instructions?
if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
- LLVM_DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
- "attribute is used.\n");
- ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(),
- "NoImplicitFloat", L)
- << "loop not vectorized due to NoImplicitFloat attribute");
+ reportVectorizationFailure(
+ "Can't vectorize when the NoImplicitFloat attribute is used",
+ "loop not vectorized due to NoImplicitFloat attribute",
+ "NoImplicitFloat", ORE, L);
Hints.emitRemarkWithHints();
return false;
}
@@ -7389,11 +7609,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// additional fp-math flags can help.
if (Hints.isPotentiallyUnsafe() &&
TTI->isFPVectorizationPotentiallyUnsafe()) {
- LLVM_DEBUG(
- dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n");
- ORE->emit(
- createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L)
- << "loop not vectorized due to unsafe FP support.");
+ reportVectorizationFailure(
+ "Potentially unsafe FP op prevents vectorization",
+ "loop not vectorized due to unsafe FP support.",
+ "UnsafeFP", ORE, L);
Hints.emitRemarkWithHints();
return false;
}
@@ -7411,8 +7630,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F,
- &Hints, IAI);
+ LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
+ F, &Hints, IAI);
CM.collectValuesToIgnore();
// Use the planner for vectorization.
@@ -7422,7 +7641,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
unsigned UserVF = Hints.getWidth();
// Plan how to best vectorize, return the best VF and its cost.
- Optional<VectorizationFactor> MaybeVF = LVP.plan(OptForSize, UserVF);
+ Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF);
VectorizationFactor VF = VectorizationFactor::Disabled();
unsigned IC = 1;
@@ -7431,7 +7650,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (MaybeVF) {
VF = *MaybeVF;
// Select the interleave count.
- IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
+ IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
}
// Identify the diagnostic messages that should be produced.
@@ -7609,7 +7828,8 @@ bool LoopVectorizePass::runImpl(
// The second condition is necessary because, even if the target has no
// vector registers, loop vectorization may still enable scalar
// interleaving.
- if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
+ if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) &&
+ TTI->getMaxInterleaveFactor(1) < 2)
return false;
bool Changed = false;
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 27a86c0bca91..974eff9974d9 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -194,10 +194,13 @@ static bool allSameBlock(ArrayRef<Value *> VL) {
return true;
}
-/// \returns True if all of the values in \p VL are constants.
+/// \returns True if all of the values in \p VL are constants (but not
+/// globals/constant expressions).
static bool allConstant(ArrayRef<Value *> VL) {
+ // Constant expressions and globals can't be vectorized like normal integer/FP
+ // constants.
for (Value *i : VL)
- if (!isa<Constant>(i))
+ if (!isa<Constant>(i) || isa<ConstantExpr>(i) || isa<GlobalValue>(i))
return false;
return true;
}
@@ -486,6 +489,7 @@ namespace slpvectorizer {
/// Bottom Up SLP Vectorizer.
class BoUpSLP {
struct TreeEntry;
+ struct ScheduleData;
public:
using ValueList = SmallVector<Value *, 8>;
@@ -614,6 +618,15 @@ public:
/// vectorizable. We do not vectorize such trees.
bool isTreeTinyAndNotFullyVectorizable() const;
+ /// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values
+ /// can be load combined in the backend. Load combining may not be allowed in
+ /// the IR optimizer, so we do not want to alter the pattern. For example,
+ /// partially transforming a scalar bswap() pattern into vector code is
+ /// effectively impossible for the backend to undo.
+ /// TODO: If load combining is allowed in the IR optimizer, this analysis
+ /// may not be necessary.
+ bool isLoadCombineReductionCandidate(unsigned ReductionOpcode) const;
+
OptimizationRemarkEmitter *getORE() { return ORE; }
/// This structure holds any data we need about the edges being traversed
@@ -1117,6 +1130,14 @@ public:
#endif
};
+ /// Checks if the instruction is marked for deletion.
+ bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); }
+
+ /// Marks values operands for later deletion by replacing them with Undefs.
+ void eraseInstructions(ArrayRef<Value *> AV);
+
+ ~BoUpSLP();
+
private:
/// Checks if all users of \p I are the part of the vectorization tree.
bool areAllUsersVectorized(Instruction *I) const;
@@ -1153,8 +1174,7 @@ private:
/// Set the Builder insert point to one after the last instruction in
/// the bundle
- void setInsertPointAfterBundle(ArrayRef<Value *> VL,
- const InstructionsState &S);
+ void setInsertPointAfterBundle(TreeEntry *E);
/// \returns a vector from a collection of scalars in \p VL.
Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
@@ -1220,27 +1240,37 @@ private:
/// reordering of operands during buildTree_rec() and vectorizeTree().
SmallVector<ValueList, 2> Operands;
+ /// The main/alternate instruction.
+ Instruction *MainOp = nullptr;
+ Instruction *AltOp = nullptr;
+
public:
/// Set this bundle's \p OpIdx'th operand to \p OpVL.
- void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL,
- ArrayRef<unsigned> ReuseShuffleIndices) {
+ void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
if (Operands.size() < OpIdx + 1)
Operands.resize(OpIdx + 1);
assert(Operands[OpIdx].size() == 0 && "Already resized?");
Operands[OpIdx].resize(Scalars.size());
for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
- Operands[OpIdx][Lane] = (!ReuseShuffleIndices.empty())
- ? OpVL[ReuseShuffleIndices[Lane]]
- : OpVL[Lane];
- }
-
- /// If there is a user TreeEntry, then set its operand.
- void trySetUserTEOperand(const EdgeInfo &UserTreeIdx,
- ArrayRef<Value *> OpVL,
- ArrayRef<unsigned> ReuseShuffleIndices) {
- if (UserTreeIdx.UserTE)
- UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, OpVL,
- ReuseShuffleIndices);
+ Operands[OpIdx][Lane] = OpVL[Lane];
+ }
+
+ /// Set the operands of this bundle in their original order.
+ void setOperandsInOrder() {
+ assert(Operands.empty() && "Already initialized?");
+ auto *I0 = cast<Instruction>(Scalars[0]);
+ Operands.resize(I0->getNumOperands());
+ unsigned NumLanes = Scalars.size();
+ for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
+ OpIdx != NumOperands; ++OpIdx) {
+ Operands[OpIdx].resize(NumLanes);
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ auto *I = cast<Instruction>(Scalars[Lane]);
+ assert(I->getNumOperands() == NumOperands &&
+ "Expected same number of operands");
+ Operands[OpIdx][Lane] = I->getOperand(OpIdx);
+ }
+ }
}
/// \returns the \p OpIdx operand of this TreeEntry.
@@ -1249,6 +1279,9 @@ private:
return Operands[OpIdx];
}
+ /// \returns the number of operands.
+ unsigned getNumOperands() const { return Operands.size(); }
+
/// \return the single \p OpIdx operand.
Value *getSingleOperand(unsigned OpIdx) const {
assert(OpIdx < Operands.size() && "Off bounds");
@@ -1256,6 +1289,58 @@ private:
return Operands[OpIdx][0];
}
+ /// Some of the instructions in the list have alternate opcodes.
+ bool isAltShuffle() const {
+ return getOpcode() != getAltOpcode();
+ }
+
+ bool isOpcodeOrAlt(Instruction *I) const {
+ unsigned CheckedOpcode = I->getOpcode();
+ return (getOpcode() == CheckedOpcode ||
+ getAltOpcode() == CheckedOpcode);
+ }
+
+ /// Chooses the correct key for scheduling data. If \p Op has the same (or
+ /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is
+ /// \p OpValue.
+ Value *isOneOf(Value *Op) const {
+ auto *I = dyn_cast<Instruction>(Op);
+ if (I && isOpcodeOrAlt(I))
+ return Op;
+ return MainOp;
+ }
+
+ void setOperations(const InstructionsState &S) {
+ MainOp = S.MainOp;
+ AltOp = S.AltOp;
+ }
+
+ Instruction *getMainOp() const {
+ return MainOp;
+ }
+
+ Instruction *getAltOp() const {
+ return AltOp;
+ }
+
+ /// The main/alternate opcodes for the list of instructions.
+ unsigned getOpcode() const {
+ return MainOp ? MainOp->getOpcode() : 0;
+ }
+
+ unsigned getAltOpcode() const {
+ return AltOp ? AltOp->getOpcode() : 0;
+ }
+
+ /// Update operations state of this entry if reorder occurred.
+ bool updateStateIfReorder() {
+ if (ReorderIndices.empty())
+ return false;
+ InstructionsState S = getSameOpcode(Scalars, ReorderIndices.front());
+ setOperations(S);
+ return true;
+ }
+
#ifndef NDEBUG
/// Debug printer.
LLVM_DUMP_METHOD void dump() const {
@@ -1269,6 +1354,8 @@ private:
for (Value *V : Scalars)
dbgs().indent(2) << *V << "\n";
dbgs() << "NeedToGather: " << NeedToGather << "\n";
+ dbgs() << "MainOp: " << *MainOp << "\n";
+ dbgs() << "AltOp: " << *AltOp << "\n";
dbgs() << "VectorizedValue: ";
if (VectorizedValue)
dbgs() << *VectorizedValue;
@@ -1279,12 +1366,12 @@ private:
if (ReuseShuffleIndices.empty())
dbgs() << "Emtpy";
else
- for (unsigned Idx : ReuseShuffleIndices)
- dbgs() << Idx << ", ";
+ for (unsigned ReuseIdx : ReuseShuffleIndices)
+ dbgs() << ReuseIdx << ", ";
dbgs() << "\n";
dbgs() << "ReorderIndices: ";
- for (unsigned Idx : ReorderIndices)
- dbgs() << Idx << ", ";
+ for (unsigned ReorderIdx : ReorderIndices)
+ dbgs() << ReorderIdx << ", ";
dbgs() << "\n";
dbgs() << "UserTreeIndices: ";
for (const auto &EInfo : UserTreeIndices)
@@ -1295,11 +1382,13 @@ private:
};
/// Create a new VectorizableTree entry.
- TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
+ TreeEntry *newTreeEntry(ArrayRef<Value *> VL, Optional<ScheduleData *> Bundle,
+ const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
ArrayRef<unsigned> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
- VectorizableTree.push_back(llvm::make_unique<TreeEntry>(VectorizableTree));
+ bool Vectorized = (bool)Bundle;
+ VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
TreeEntry *Last = VectorizableTree.back().get();
Last->Idx = VectorizableTree.size() - 1;
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
@@ -1307,11 +1396,22 @@ private:
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
ReuseShuffleIndices.end());
Last->ReorderIndices = ReorderIndices;
+ Last->setOperations(S);
if (Vectorized) {
for (int i = 0, e = VL.size(); i != e; ++i) {
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
- ScalarToTreeEntry[VL[i]] = Last->Idx;
- }
+ ScalarToTreeEntry[VL[i]] = Last;
+ }
+ // Update the scheduler bundle to point to this TreeEntry.
+ unsigned Lane = 0;
+ for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember;
+ BundleMember = BundleMember->NextInBundle) {
+ BundleMember->TE = Last;
+ BundleMember->Lane = Lane;
+ ++Lane;
+ }
+ assert((!Bundle.getValue() || Lane == VL.size()) &&
+ "Bundle and VL out of sync");
} else {
MustGather.insert(VL.begin(), VL.end());
}
@@ -1319,7 +1419,6 @@ private:
if (UserTreeIdx.UserTE)
Last->UserTreeIndices.push_back(UserTreeIdx);
- Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices);
return Last;
}
@@ -1340,19 +1439,19 @@ private:
TreeEntry *getTreeEntry(Value *V) {
auto I = ScalarToTreeEntry.find(V);
if (I != ScalarToTreeEntry.end())
- return VectorizableTree[I->second].get();
+ return I->second;
return nullptr;
}
const TreeEntry *getTreeEntry(Value *V) const {
auto I = ScalarToTreeEntry.find(V);
if (I != ScalarToTreeEntry.end())
- return VectorizableTree[I->second].get();
+ return I->second;
return nullptr;
}
/// Maps a specific scalar to its tree entry.
- SmallDenseMap<Value*, int> ScalarToTreeEntry;
+ SmallDenseMap<Value*, TreeEntry *> ScalarToTreeEntry;
/// A list of scalars that we found that we need to keep as scalars.
ValueSet MustGather;
@@ -1408,15 +1507,14 @@ private:
/// This is required to ensure that there are no incorrect collisions in the
/// AliasCache, which can happen if a new instruction is allocated at the
/// same address as a previously deleted instruction.
- void eraseInstruction(Instruction *I) {
- I->removeFromParent();
- I->dropAllReferences();
- DeletedInstructions.emplace_back(I);
+ void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) {
+ auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first;
+ It->getSecond() = It->getSecond() && ReplaceOpsWithUndef;
}
/// Temporary store for deleted instructions. Instructions will be deleted
/// eventually when the BoUpSLP is destructed.
- SmallVector<unique_value, 8> DeletedInstructions;
+ DenseMap<Instruction *, bool> DeletedInstructions;
/// A list of values that need to extracted out of the tree.
/// This list holds pairs of (Internal Scalar : External User). External User
@@ -1453,6 +1551,8 @@ private:
UnscheduledDepsInBundle = UnscheduledDeps;
clearDependencies();
OpValue = OpVal;
+ TE = nullptr;
+ Lane = -1;
}
/// Returns true if the dependency information has been calculated.
@@ -1559,6 +1659,12 @@ private:
/// Opcode of the current instruction in the schedule data.
Value *OpValue = nullptr;
+
+ /// The TreeEntry that this instruction corresponds to.
+ TreeEntry *TE = nullptr;
+
+ /// The lane of this node in the TreeEntry.
+ int Lane = -1;
};
#ifndef NDEBUG
@@ -1633,10 +1739,9 @@ private:
continue;
}
// Handle the def-use chain dependencies.
- for (Use &U : BundleMember->Inst->operands()) {
- auto *I = dyn_cast<Instruction>(U.get());
- if (!I)
- continue;
+
+ // Decrement the unscheduled counter and insert to ready list if ready.
+ auto &&DecrUnsched = [this, &ReadyList](Instruction *I) {
doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) {
if (OpDef && OpDef->hasValidDependencies() &&
OpDef->incrementUnscheduledDeps(-1) == 0) {
@@ -1651,6 +1756,24 @@ private:
<< "SLP: gets ready (def): " << *DepBundle << "\n");
}
});
+ };
+
+ // If BundleMember is a vector bundle, its operands may have been
+ // reordered duiring buildTree(). We therefore need to get its operands
+ // through the TreeEntry.
+ if (TreeEntry *TE = BundleMember->TE) {
+ int Lane = BundleMember->Lane;
+ assert(Lane >= 0 && "Lane not set");
+ for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands();
+ OpIdx != NumOperands; ++OpIdx)
+ if (auto *I = dyn_cast<Instruction>(TE->getOperand(OpIdx)[Lane]))
+ DecrUnsched(I);
+ } else {
+ // If BundleMember is a stand-alone instruction, no operand reordering
+ // has taken place, so we directly access its operands.
+ for (Use &U : BundleMember->Inst->operands())
+ if (auto *I = dyn_cast<Instruction>(U.get()))
+ DecrUnsched(I);
}
// Handle the memory dependencies.
for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
@@ -1697,8 +1820,11 @@ private:
/// Checks if a bundle of instructions can be scheduled, i.e. has no
/// cyclic dependencies. This is only a dry-run, no instructions are
/// actually moved at this stage.
- bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
- const InstructionsState &S);
+ /// \returns the scheduling bundle. The returned Optional value is non-None
+ /// if \p VL is allowed to be scheduled.
+ Optional<ScheduleData *>
+ tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
+ const InstructionsState &S);
/// Un-bundles a group of instructions.
void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
@@ -1945,6 +2071,30 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
} // end namespace llvm
+BoUpSLP::~BoUpSLP() {
+ for (const auto &Pair : DeletedInstructions) {
+ // Replace operands of ignored instructions with Undefs in case if they were
+ // marked for deletion.
+ if (Pair.getSecond()) {
+ Value *Undef = UndefValue::get(Pair.getFirst()->getType());
+ Pair.getFirst()->replaceAllUsesWith(Undef);
+ }
+ Pair.getFirst()->dropAllReferences();
+ }
+ for (const auto &Pair : DeletedInstructions) {
+ assert(Pair.getFirst()->use_empty() &&
+ "trying to erase instruction with users.");
+ Pair.getFirst()->eraseFromParent();
+ }
+}
+
+void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
+ for (auto *V : AV) {
+ if (auto *I = dyn_cast<Instruction>(V))
+ eraseInstruction(I, /*ReplaceWithUndef=*/true);
+ };
+}
+
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst) {
ExtraValueToDebugLocsMap ExternallyUsedValues;
@@ -2026,28 +2176,28 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
InstructionsState S = getSameOpcode(VL);
if (Depth == RecursionMaxDepth) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
// Don't handle vectors.
if (S.OpValue->getType()->isVectorTy()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
if (SI->getValueOperand()->getType()->isVectorTy()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
// If all of the operands are identical or constant we have a simple solution.
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
@@ -2055,11 +2205,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// the same block.
// Don't vectorize ephemeral values.
- for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- if (EphValues.count(VL[i])) {
- LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i]
+ for (Value *V : VL) {
+ if (EphValues.count(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
<< ") is ephemeral.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
}
@@ -2069,7 +2219,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
if (!E->isSame(VL)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
// Record the reuse of the tree node. FIXME, currently this is only used to
@@ -2077,19 +2227,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
E->UserTreeIndices.push_back(UserTreeIdx);
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
<< ".\n");
- E->trySetUserTEOperand(UserTreeIdx, VL, None);
return;
}
// Check that none of the instructions in the bundle are already in the tree.
- for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- auto *I = dyn_cast<Instruction>(VL[i]);
+ for (Value *V : VL) {
+ auto *I = dyn_cast<Instruction>(V);
if (!I)
continue;
if (getTreeEntry(I)) {
- LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i]
+ LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
<< ") is already in tree.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
}
@@ -2097,10 +2246,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// If any of the scalars is marked as a value that needs to stay scalar, then
// we need to gather the scalars.
// The reduction nodes (stored in UserIgnoreList) also should stay scalar.
- for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
+ for (Value *V : VL) {
+ if (MustGather.count(V) || is_contained(UserIgnoreList, V)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
}
@@ -2114,7 +2263,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Don't go into unreachable blocks. They may contain instructions with
// dependency cycles which confuse the final scheduling.
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
@@ -2128,13 +2277,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (Res.second)
UniqueValues.emplace_back(V);
}
- if (UniqueValues.size() == VL.size()) {
+ size_t NumUniqueScalarValues = UniqueValues.size();
+ if (NumUniqueScalarValues == VL.size()) {
ReuseShuffleIndicies.clear();
} else {
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
- if (UniqueValues.size() <= 1 || !llvm::isPowerOf2_32(UniqueValues.size())) {
+ if (NumUniqueScalarValues <= 1 ||
+ !llvm::isPowerOf2_32(NumUniqueScalarValues)) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
VL = UniqueValues;
@@ -2142,16 +2293,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
auto &BSRef = BlocksSchedules[BB];
if (!BSRef)
- BSRef = llvm::make_unique<BlockScheduling>(BB);
+ BSRef = std::make_unique<BlockScheduling>(BB);
BlockScheduling &BS = *BSRef.get();
- if (!BS.tryScheduleBundle(VL, this, S)) {
+ Optional<ScheduleData *> Bundle = BS.tryScheduleBundle(VL, this, S);
+ if (!Bundle) {
LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
assert((!BS.getScheduleData(VL0) ||
!BS.getScheduleData(VL0)->isPartOfBundle()) &&
"tryScheduleBundle should cancelScheduling on failure");
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
@@ -2160,7 +2313,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
(unsigned) Instruction::ShuffleVector : S.getOpcode();
switch (ShuffleOrOp) {
case Instruction::PHI: {
- PHINode *PH = dyn_cast<PHINode>(VL0);
+ auto *PH = cast<PHINode>(VL0);
// Check for terminator values (e.g. invoke).
for (unsigned j = 0; j < VL.size(); ++j)
@@ -2172,23 +2325,29 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs()
<< "SLP: Need to swizzle PHINodes (terminator use).\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE =
+ newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
+ // Keeps the reordered operands to avoid code duplication.
+ SmallVector<ValueList, 2> OperandsVec;
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (Value *j : VL)
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
PH->getIncomingBlock(i)));
-
- buildTree_rec(Operands, Depth + 1, {TE, i});
+ TE->setOperand(i, Operands);
+ OperandsVec.push_back(Operands);
}
+ for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx)
+ buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx});
return;
}
case Instruction::ExtractValue:
@@ -2198,13 +2357,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (Reuse) {
LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
++NumOpsWantToKeepOriginalOrder;
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
+ newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
// This is a special case, as it does not gather, but at the same time
// we are not extending buildTree_rec() towards the operands.
ValueList Op0;
Op0.assign(VL.size(), VL0->getOperand(0));
- VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
+ VectorizableTree.back()->setOperand(0, Op0);
return;
}
if (!CurrentOrder.empty()) {
@@ -2220,17 +2379,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
auto StoredCurrentOrderAndNum =
NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
++StoredCurrentOrderAndNum->getSecond();
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies,
+ newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies,
StoredCurrentOrderAndNum->getFirst());
// This is a special case, as it does not gather, but at the same time
// we are not extending buildTree_rec() towards the operands.
ValueList Op0;
Op0.assign(VL.size(), VL0->getOperand(0));
- VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
+ VectorizableTree.back()->setOperand(0, Op0);
return;
}
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
- newTreeEntry(VL, /*Vectorized=*/false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
BS.cancelScheduling(VL, VL0);
return;
}
@@ -2246,7 +2407,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
return;
}
@@ -2259,7 +2421,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
auto *L = cast<LoadInst>(V);
if (!L->isSimple()) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
@@ -2289,15 +2452,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (CurrentOrder.empty()) {
// Original loads are consecutive and does not require reordering.
++NumOpsWantToKeepOriginalOrder;
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
- ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
+ UserTreeIdx, ReuseShuffleIndicies);
+ TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
} else {
// Need to reorder.
auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
++I->getSecond();
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
- ReuseShuffleIndicies, I->getFirst());
+ TreeEntry *TE =
+ newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies, I->getFirst());
+ TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
}
return;
@@ -2306,7 +2472,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
case Instruction::ZExt:
@@ -2322,24 +2489,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::FPTrunc:
case Instruction::BitCast: {
Type *SrcTy = VL0->getOperand(0)->getType();
- for (unsigned i = 0; i < VL.size(); ++i) {
- Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
+ for (Value *V : VL) {
+ Type *Ty = cast<Instruction>(V)->getOperand(0)->getType();
if (Ty != SrcTy || !isValidElementType(Ty)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs()
<< "SLP: Gathering casts with different src types.\n");
return;
}
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
+ TE->setOperandsInOrder();
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
- for (Value *j : VL)
- Operands.push_back(cast<Instruction>(j)->getOperand(i));
+ for (Value *V : VL)
+ Operands.push_back(cast<Instruction>(V)->getOperand(i));
buildTree_rec(Operands, Depth + 1, {TE, i});
}
@@ -2351,19 +2521,21 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
Type *ComparedTy = VL0->getOperand(0)->getType();
- for (unsigned i = 1, e = VL.size(); i < e; ++i) {
- CmpInst *Cmp = cast<CmpInst>(VL[i]);
+ for (Value *V : VL) {
+ CmpInst *Cmp = cast<CmpInst>(V);
if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
Cmp->getOperand(0)->getType() != ComparedTy) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs()
<< "SLP: Gathering cmp with different predicate.\n");
return;
}
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
ValueList Left, Right;
@@ -2384,7 +2556,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Right.push_back(RHS);
}
}
-
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
@@ -2409,7 +2582,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
// Sort operands of the instructions so that each side is more likely to
@@ -2417,11 +2591,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
+ TE->setOperandsInOrder();
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
@@ -2434,11 +2611,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
case Instruction::GetElementPtr: {
// We don't combine GEPs with complicated (nested) indexing.
- for (unsigned j = 0; j < VL.size(); ++j) {
- if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
+ for (Value *V : VL) {
+ if (cast<Instruction>(V)->getNumOperands() != 2) {
LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
@@ -2446,58 +2624,64 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// We can't combine several GEPs into one vector if they operate on
// different types.
Type *Ty0 = VL0->getOperand(0)->getType();
- for (unsigned j = 0; j < VL.size(); ++j) {
- Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
+ for (Value *V : VL) {
+ Type *CurTy = cast<Instruction>(V)->getOperand(0)->getType();
if (Ty0 != CurTy) {
LLVM_DEBUG(dbgs()
<< "SLP: not-vectorizable GEP (different types).\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
// We don't combine GEPs with non-constant indexes.
- for (unsigned j = 0; j < VL.size(); ++j) {
- auto Op = cast<Instruction>(VL[j])->getOperand(1);
+ for (Value *V : VL) {
+ auto Op = cast<Instruction>(V)->getOperand(1);
if (!isa<ConstantInt>(Op)) {
LLVM_DEBUG(dbgs()
<< "SLP: not-vectorizable GEP (non-constant indexes).\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
+ TE->setOperandsInOrder();
for (unsigned i = 0, e = 2; i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
- for (Value *j : VL)
- Operands.push_back(cast<Instruction>(j)->getOperand(i));
+ for (Value *V : VL)
+ Operands.push_back(cast<Instruction>(V)->getOperand(i));
buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
case Instruction::Store: {
- // Check if the stores are consecutive or of we need to swizzle them.
+ // Check if the stores are consecutive or if we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
ValueList Operands;
- for (Value *j : VL)
- Operands.push_back(cast<Instruction>(j)->getOperand(0));
-
+ for (Value *V : VL)
+ Operands.push_back(cast<Instruction>(V)->getOperand(0));
+ TE->setOperandsInOrder();
buildTree_rec(Operands, Depth + 1, {TE, 0});
return;
}
@@ -2509,7 +2693,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
if (!isTriviallyVectorizable(ID)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
@@ -2519,14 +2704,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (unsigned j = 0; j != NumArgs; ++j)
if (hasVectorInstrinsicScalarOpd(ID, j))
ScalarArgs[j] = CI->getArgOperand(j);
- for (unsigned i = 1, e = VL.size(); i != e; ++i) {
- CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
+ for (Value *V : VL) {
+ CallInst *CI2 = dyn_cast<CallInst>(V);
if (!CI2 || CI2->getCalledFunction() != Int ||
getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
!CI->hasIdenticalOperandBundleSchema(*CI2)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *V
<< "\n");
return;
}
@@ -2537,7 +2723,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Value *A1J = CI2->getArgOperand(j);
if (ScalarArgs[j] != A1J) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
<< " argument " << ScalarArgs[j] << "!=" << A1J
<< "\n");
@@ -2551,19 +2738,22 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
CI->op_begin() + CI->getBundleOperandsEndIndex(),
CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:"
- << *CI << "!=" << *VL[i] << '\n');
+ << *CI << "!=" << *V << '\n');
return;
}
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ TE->setOperandsInOrder();
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
ValueList Operands;
// Prepare the operand vector.
- for (Value *j : VL) {
- CallInst *CI2 = dyn_cast<CallInst>(j);
+ for (Value *V : VL) {
+ auto *CI2 = cast<CallInst>(V);
Operands.push_back(CI2->getArgOperand(i));
}
buildTree_rec(Operands, Depth + 1, {TE, i});
@@ -2575,27 +2765,32 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// then do not vectorize this instruction.
if (!S.isAltShuffle()) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
+ TE->setOperandsInOrder();
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
- for (Value *j : VL)
- Operands.push_back(cast<Instruction>(j)->getOperand(i));
+ for (Value *V : VL)
+ Operands.push_back(cast<Instruction>(V)->getOperand(i));
buildTree_rec(Operands, Depth + 1, {TE, i});
}
@@ -2603,7 +2798,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
default:
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
return;
}
@@ -2738,7 +2934,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
return ReuseShuffleCost +
TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
}
- if (getSameOpcode(VL).getOpcode() == Instruction::ExtractElement &&
+ if (E->getOpcode() == Instruction::ExtractElement &&
allSameType(VL) && allSameBlock(VL)) {
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
if (ShuffleKind.hasValue()) {
@@ -2761,11 +2957,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
return ReuseShuffleCost + getGatherCost(VL);
}
- InstructionsState S = getSameOpcode(VL);
- assert(S.getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
- Instruction *VL0 = cast<Instruction>(S.OpValue);
- unsigned ShuffleOrOp = S.isAltShuffle() ?
- (unsigned) Instruction::ShuffleVector : S.getOpcode();
+ assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
+ Instruction *VL0 = E->getMainOp();
+ unsigned ShuffleOrOp =
+ E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
switch (ShuffleOrOp) {
case Instruction::PHI:
return 0;
@@ -2851,7 +3046,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::BitCast: {
Type *SrcTy = VL0->getOperand(0)->getType();
int ScalarEltCost =
- TTI->getCastInstrCost(S.getOpcode(), ScalarTy, SrcTy, VL0);
+ TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
@@ -2864,7 +3059,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Check if the values are candidates to demote.
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
VecCost = ReuseShuffleCost +
- TTI->getCastInstrCost(S.getOpcode(), VecTy, SrcVecTy, VL0);
+ TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, VL0);
}
return VecCost - ScalarCost;
}
@@ -2872,14 +3067,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::ICmp:
case Instruction::Select: {
// Calculate the cost of this instruction.
- int ScalarEltCost = TTI->getCmpSelInstrCost(S.getOpcode(), ScalarTy,
+ int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
Builder.getInt1Ty(), VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
- int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0);
+ int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VL0);
return ReuseShuffleCost + VecCost - ScalarCost;
}
case Instruction::FNeg:
@@ -2940,12 +3135,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
SmallVector<const Value *, 4> Operands(VL0->operand_values());
int ScalarEltCost = TTI->getArithmeticInstrCost(
- S.getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands);
+ E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
- int VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy, Op1VK,
+ int VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, Op1VK,
Op2VK, Op1VP, Op2VP, Operands);
return ReuseShuffleCost + VecCost - ScalarCost;
}
@@ -3027,11 +3222,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
return ReuseShuffleCost + VecCallCost - ScalarCallCost;
}
case Instruction::ShuffleVector: {
- assert(S.isAltShuffle() &&
- ((Instruction::isBinaryOp(S.getOpcode()) &&
- Instruction::isBinaryOp(S.getAltOpcode())) ||
- (Instruction::isCast(S.getOpcode()) &&
- Instruction::isCast(S.getAltOpcode()))) &&
+ assert(E->isAltShuffle() &&
+ ((Instruction::isBinaryOp(E->getOpcode()) &&
+ Instruction::isBinaryOp(E->getAltOpcode())) ||
+ (Instruction::isCast(E->getOpcode()) &&
+ Instruction::isCast(E->getAltOpcode()))) &&
"Invalid Shuffle Vector Operand");
int ScalarCost = 0;
if (NeedToShuffleReuses) {
@@ -3046,25 +3241,25 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
I, TargetTransformInfo::TCK_RecipThroughput);
}
}
- for (Value *i : VL) {
- Instruction *I = cast<Instruction>(i);
- assert(S.isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ for (Value *V : VL) {
+ Instruction *I = cast<Instruction>(V);
+ assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
ScalarCost += TTI->getInstructionCost(
I, TargetTransformInfo::TCK_RecipThroughput);
}
// VecCost is equal to sum of the cost of creating 2 vectors
// and the cost of creating shuffle.
int VecCost = 0;
- if (Instruction::isBinaryOp(S.getOpcode())) {
- VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy);
- VecCost += TTI->getArithmeticInstrCost(S.getAltOpcode(), VecTy);
+ if (Instruction::isBinaryOp(E->getOpcode())) {
+ VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy);
+ VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy);
} else {
- Type *Src0SclTy = S.MainOp->getOperand(0)->getType();
- Type *Src1SclTy = S.AltOp->getOperand(0)->getType();
+ Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
+ Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size());
VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size());
- VecCost = TTI->getCastInstrCost(S.getOpcode(), VecTy, Src0Ty);
- VecCost += TTI->getCastInstrCost(S.getAltOpcode(), VecTy, Src1Ty);
+ VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty);
+ VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty);
}
VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0);
return ReuseShuffleCost + VecCost - ScalarCost;
@@ -3098,6 +3293,43 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const {
return true;
}
+bool BoUpSLP::isLoadCombineReductionCandidate(unsigned RdxOpcode) const {
+ if (RdxOpcode != Instruction::Or)
+ return false;
+
+ unsigned NumElts = VectorizableTree[0]->Scalars.size();
+ Value *FirstReduced = VectorizableTree[0]->Scalars[0];
+
+ // Look past the reduction to find a source value. Arbitrarily follow the
+ // path through operand 0 of any 'or'. Also, peek through optional
+ // shift-left-by-constant.
+ Value *ZextLoad = FirstReduced;
+ while (match(ZextLoad, m_Or(m_Value(), m_Value())) ||
+ match(ZextLoad, m_Shl(m_Value(), m_Constant())))
+ ZextLoad = cast<BinaryOperator>(ZextLoad)->getOperand(0);
+
+ // Check if the input to the reduction is an extended load.
+ Value *LoadPtr;
+ if (!match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr)))))
+ return false;
+
+ // Require that the total load bit width is a legal integer type.
+ // For example, <8 x i8> --> i64 is a legal integer on a 64-bit target.
+ // But <16 x i8> --> i128 is not, so the backend probably can't reduce it.
+ Type *SrcTy = LoadPtr->getType()->getPointerElementType();
+ unsigned LoadBitWidth = SrcTy->getIntegerBitWidth() * NumElts;
+ LLVMContext &Context = FirstReduced->getContext();
+ if (!TTI->isTypeLegal(IntegerType::get(Context, LoadBitWidth)))
+ return false;
+
+ // Everything matched - assume that we can fold the whole sequence using
+ // load combining.
+ LLVM_DEBUG(dbgs() << "SLP: Assume load combining for scalar reduction of "
+ << *(cast<Instruction>(FirstReduced)) << "\n");
+
+ return true;
+}
+
bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
// We can vectorize the tree if its size is greater than or equal to the
// minimum size specified by the MinTreeSize command line option.
@@ -3319,16 +3551,16 @@ void BoUpSLP::reorderInputsAccordingToOpcode(
Right = Ops.getVL(1);
}
-void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
- const InstructionsState &S) {
+void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) {
// Get the basic block this bundle is in. All instructions in the bundle
// should be in this block.
- auto *Front = cast<Instruction>(S.OpValue);
+ auto *Front = E->getMainOp();
auto *BB = Front->getParent();
- assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool {
- auto *I = cast<Instruction>(V);
- return !S.isOpcodeOrAlt(I) || I->getParent() == BB;
- }));
+ assert(llvm::all_of(make_range(E->Scalars.begin(), E->Scalars.end()),
+ [=](Value *V) -> bool {
+ auto *I = cast<Instruction>(V);
+ return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
+ }));
// The last instruction in the bundle in program order.
Instruction *LastInst = nullptr;
@@ -3339,7 +3571,7 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
// bundle. The end of the bundle is marked by null ScheduleData.
if (BlocksSchedules.count(BB)) {
auto *Bundle =
- BlocksSchedules[BB]->getScheduleData(isOneOf(S, VL.back()));
+ BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back()));
if (Bundle && Bundle->isPartOfBundle())
for (; Bundle; Bundle = Bundle->NextInBundle)
if (Bundle->OpValue == Bundle->Inst)
@@ -3365,14 +3597,15 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
// we both exit early from buildTree_rec and that the bundle be out-of-order
// (causing us to iterate all the way to the end of the block).
if (!LastInst) {
- SmallPtrSet<Value *, 16> Bundle(VL.begin(), VL.end());
+ SmallPtrSet<Value *, 16> Bundle(E->Scalars.begin(), E->Scalars.end());
for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) {
- if (Bundle.erase(&I) && S.isOpcodeOrAlt(&I))
+ if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I))
LastInst = &I;
if (Bundle.empty())
break;
}
}
+ assert(LastInst && "Failed to find last instruction in bundle");
// Set the insertion point after the last instruction in the bundle. Set the
// debug location to Front.
@@ -3385,7 +3618,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
// Generate the 'InsertElement' instruction.
for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
- if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
+ if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) {
GatherSeq.insert(Insrt);
CSEBlocks.insert(Insrt->getParent());
@@ -3494,8 +3727,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return E->VectorizedValue;
}
- InstructionsState S = getSameOpcode(E->Scalars);
- Instruction *VL0 = cast<Instruction>(S.OpValue);
+ Instruction *VL0 = E->getMainOp();
Type *ScalarTy = VL0->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
ScalarTy = SI->getValueOperand()->getType();
@@ -3504,7 +3736,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
if (E->NeedToGather) {
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
auto *V = Gather(E->Scalars, VecTy);
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
@@ -3518,11 +3750,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
- unsigned ShuffleOrOp = S.isAltShuffle() ?
- (unsigned) Instruction::ShuffleVector : S.getOpcode();
+ unsigned ShuffleOrOp =
+ E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
switch (ShuffleOrOp) {
case Instruction::PHI: {
- PHINode *PH = dyn_cast<PHINode>(VL0);
+ auto *PH = cast<PHINode>(VL0);
Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
@@ -3577,7 +3809,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->VectorizedValue = V;
return V;
}
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
auto *V = Gather(E->Scalars, VecTy);
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
@@ -3612,7 +3844,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->VectorizedValue = NewV;
return NewV;
}
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
auto *V = Gather(E->Scalars, VecTy);
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
@@ -3637,7 +3869,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
Value *InVec = vectorizeTree(E->getOperand(0));
@@ -3646,7 +3878,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return E->VectorizedValue;
}
- CastInst *CI = dyn_cast<CastInst>(VL0);
+ auto *CI = cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
@@ -3658,7 +3890,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::FCmp:
case Instruction::ICmp: {
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
Value *L = vectorizeTree(E->getOperand(0));
Value *R = vectorizeTree(E->getOperand(1));
@@ -3670,7 +3902,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Value *V;
- if (S.getOpcode() == Instruction::FCmp)
+ if (E->getOpcode() == Instruction::FCmp)
V = Builder.CreateFCmp(P0, L, R);
else
V = Builder.CreateICmp(P0, L, R);
@@ -3685,7 +3917,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::Select: {
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
Value *Cond = vectorizeTree(E->getOperand(0));
Value *True = vectorizeTree(E->getOperand(1));
@@ -3706,7 +3938,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::FNeg: {
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
Value *Op = vectorizeTree(E->getOperand(0));
@@ -3716,7 +3948,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = Builder.CreateUnOp(
- static_cast<Instruction::UnaryOps>(S.getOpcode()), Op);
+ static_cast<Instruction::UnaryOps>(E->getOpcode()), Op);
propagateIRFlags(V, E->Scalars, VL0);
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
@@ -3748,7 +3980,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
Value *LHS = vectorizeTree(E->getOperand(0));
Value *RHS = vectorizeTree(E->getOperand(1));
@@ -3759,7 +3991,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = Builder.CreateBinOp(
- static_cast<Instruction::BinaryOps>(S.getOpcode()), LHS, RHS);
+ static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
+ RHS);
propagateIRFlags(V, E->Scalars, VL0);
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
@@ -3776,12 +4009,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Load: {
// Loads are inserted at the head of the tree because we don't want to
// sink them all the way down past store instructions.
- bool IsReorder = !E->ReorderIndices.empty();
- if (IsReorder) {
- S = getSameOpcode(E->Scalars, E->ReorderIndices.front());
- VL0 = cast<Instruction>(S.OpValue);
- }
- setInsertPointAfterBundle(E->Scalars, S);
+ bool IsReorder = E->updateStateIfReorder();
+ if (IsReorder)
+ VL0 = E->getMainOp();
+ setInsertPointAfterBundle(E);
LoadInst *LI = cast<LoadInst>(VL0);
Type *ScalarLoadTy = LI->getType();
@@ -3797,11 +4028,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (getTreeEntry(PO))
ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
- unsigned Alignment = LI->getAlignment();
+ MaybeAlign Alignment = MaybeAlign(LI->getAlignment());
LI = Builder.CreateLoad(VecTy, VecPtr);
- if (!Alignment) {
- Alignment = DL->getABITypeAlignment(ScalarLoadTy);
- }
+ if (!Alignment)
+ Alignment = MaybeAlign(DL->getABITypeAlignment(ScalarLoadTy));
LI->setAlignment(Alignment);
Value *V = propagateMetadata(LI, E->Scalars);
if (IsReorder) {
@@ -3824,7 +4054,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
unsigned Alignment = SI->getAlignment();
unsigned AS = SI->getPointerAddressSpace();
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
Value *VecValue = vectorizeTree(E->getOperand(0));
Value *ScalarPtr = SI->getPointerOperand();
@@ -3840,7 +4070,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (!Alignment)
Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
- ST->setAlignment(Alignment);
+ ST->setAlignment(Align(Alignment));
Value *V = propagateMetadata(ST, E->Scalars);
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
@@ -3851,7 +4081,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::GetElementPtr: {
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
Value *Op0 = vectorizeTree(E->getOperand(0));
@@ -3878,13 +4108,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
- setInsertPointAfterBundle(E->Scalars, S);
- Function *FI;
+ setInsertPointAfterBundle(E);
+
Intrinsic::ID IID = Intrinsic::not_intrinsic;
- Value *ScalarArg = nullptr;
- if (CI && (FI = CI->getCalledFunction())) {
+ if (Function *FI = CI->getCalledFunction())
IID = FI->getIntrinsicID();
- }
+
+ Value *ScalarArg = nullptr;
std::vector<Value *> OpVecs;
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
ValueList OpVL;
@@ -3926,20 +4156,20 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::ShuffleVector: {
- assert(S.isAltShuffle() &&
- ((Instruction::isBinaryOp(S.getOpcode()) &&
- Instruction::isBinaryOp(S.getAltOpcode())) ||
- (Instruction::isCast(S.getOpcode()) &&
- Instruction::isCast(S.getAltOpcode()))) &&
+ assert(E->isAltShuffle() &&
+ ((Instruction::isBinaryOp(E->getOpcode()) &&
+ Instruction::isBinaryOp(E->getAltOpcode())) ||
+ (Instruction::isCast(E->getOpcode()) &&
+ Instruction::isCast(E->getAltOpcode()))) &&
"Invalid Shuffle Vector Operand");
- Value *LHS, *RHS;
- if (Instruction::isBinaryOp(S.getOpcode())) {
- setInsertPointAfterBundle(E->Scalars, S);
+ Value *LHS = nullptr, *RHS = nullptr;
+ if (Instruction::isBinaryOp(E->getOpcode())) {
+ setInsertPointAfterBundle(E);
LHS = vectorizeTree(E->getOperand(0));
RHS = vectorizeTree(E->getOperand(1));
} else {
- setInsertPointAfterBundle(E->Scalars, S);
+ setInsertPointAfterBundle(E);
LHS = vectorizeTree(E->getOperand(0));
}
@@ -3949,16 +4179,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V0, *V1;
- if (Instruction::isBinaryOp(S.getOpcode())) {
+ if (Instruction::isBinaryOp(E->getOpcode())) {
V0 = Builder.CreateBinOp(
- static_cast<Instruction::BinaryOps>(S.getOpcode()), LHS, RHS);
+ static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
V1 = Builder.CreateBinOp(
- static_cast<Instruction::BinaryOps>(S.getAltOpcode()), LHS, RHS);
+ static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
} else {
V0 = Builder.CreateCast(
- static_cast<Instruction::CastOps>(S.getOpcode()), LHS, VecTy);
+ static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
V1 = Builder.CreateCast(
- static_cast<Instruction::CastOps>(S.getAltOpcode()), LHS, VecTy);
+ static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy);
}
// Create shuffle to take alternate operations from the vector.
@@ -3969,8 +4199,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
SmallVector<Constant *, 8> Mask(e);
for (unsigned i = 0; i < e; ++i) {
auto *OpInst = cast<Instruction>(E->Scalars[i]);
- assert(S.isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
- if (OpInst->getOpcode() == S.getAltOpcode()) {
+ assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
+ if (OpInst->getOpcode() == E->getAltOpcode()) {
Mask[i] = Builder.getInt32(e + i);
AltScalars.push_back(E->Scalars[i]);
} else {
@@ -4136,20 +4366,18 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
+#ifndef NDEBUG
Type *Ty = Scalar->getType();
if (!Ty->isVoidTy()) {
-#ifndef NDEBUG
for (User *U : Scalar->users()) {
LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
- // It is legal to replace users in the ignorelist by undef.
+ // It is legal to delete users in the ignorelist.
assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
- "Replacing out-of-tree value with undef");
+ "Deleting out-of-tree value");
}
-#endif
- Value *Undef = UndefValue::get(Ty);
- Scalar->replaceAllUsesWith(Undef);
}
+#endif
LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
eraseInstruction(cast<Instruction>(Scalar));
}
@@ -4165,7 +4393,7 @@ void BoUpSLP::optimizeGatherSequence() {
<< " gather sequences instructions.\n");
// LICM InsertElementInst sequences.
for (Instruction *I : GatherSeq) {
- if (!isa<InsertElementInst>(I) && !isa<ShuffleVectorInst>(I))
+ if (isDeleted(I))
continue;
// Check if this block is inside a loop.
@@ -4219,6 +4447,8 @@ void BoUpSLP::optimizeGatherSequence() {
// For all instructions in blocks containing gather sequences:
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
Instruction *In = &*it++;
+ if (isDeleted(In))
+ continue;
if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
continue;
@@ -4245,11 +4475,11 @@ void BoUpSLP::optimizeGatherSequence() {
// Groups the instructions to a bundle (which is then a single scheduling entity)
// and schedules instructions until the bundle gets ready.
-bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
- BoUpSLP *SLP,
- const InstructionsState &S) {
+Optional<BoUpSLP::ScheduleData *>
+BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
+ const InstructionsState &S) {
if (isa<PHINode>(S.OpValue))
- return true;
+ return nullptr;
// Initialize the instruction bundle.
Instruction *OldScheduleEnd = ScheduleEnd;
@@ -4262,7 +4492,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
// instructions of the bundle.
for (Value *V : VL) {
if (!extendSchedulingRegion(V, S))
- return false;
+ return None;
}
for (Value *V : VL) {
@@ -4308,6 +4538,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
resetSchedule();
initialFillReadyList(ReadyInsts);
}
+ assert(Bundle && "Failed to find schedule bundle");
LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block "
<< BB->getName() << "\n");
@@ -4329,9 +4560,9 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
}
if (!Bundle->isReady()) {
cancelScheduling(VL, S.OpValue);
- return false;
+ return None;
}
- return true;
+ return Bundle;
}
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
@@ -4364,7 +4595,7 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() {
// Allocate a new ScheduleData for the instruction.
if (ChunkPos >= ChunkSize) {
- ScheduleDataChunks.push_back(llvm::make_unique<ScheduleData[]>(ChunkSize));
+ ScheduleDataChunks.push_back(std::make_unique<ScheduleData[]>(ChunkSize));
ChunkPos = 0;
}
return &(ScheduleDataChunks.back()[ChunkPos++]);
@@ -4977,7 +5208,7 @@ struct SLPVectorizer : public FunctionPass {
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -5052,7 +5283,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
// If the target claims to have no vector registers don't attempt
// vectorization.
- if (!TTI->getNumberOfRegisters(true))
+ if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)))
return false;
// Don't vectorize when the attribute NoImplicitFloat is used.
@@ -5100,19 +5331,6 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
return Changed;
}
-/// Check that the Values in the slice in VL array are still existent in
-/// the WeakTrackingVH array.
-/// Vectorization of part of the VL array may cause later values in the VL array
-/// to become invalid. We track when this has happened in the WeakTrackingVH
-/// array.
-static bool hasValueBeenRAUWed(ArrayRef<Value *> VL,
- ArrayRef<WeakTrackingVH> VH, unsigned SliceBegin,
- unsigned SliceSize) {
- VL = VL.slice(SliceBegin, SliceSize);
- VH = VH.slice(SliceBegin, SliceSize);
- return !std::equal(VL.begin(), VL.end(), VH.begin());
-}
-
bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
unsigned VecRegSize) {
const unsigned ChainLen = Chain.size();
@@ -5124,20 +5342,20 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
if (!isPowerOf2_32(Sz) || VF < 2)
return false;
- // Keep track of values that were deleted by vectorizing in the loop below.
- const SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(), Chain.end());
-
bool Changed = false;
// Look for profitable vectorizable trees at all offsets, starting at zero.
for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) {
+ ArrayRef<Value *> Operands = Chain.slice(i, VF);
// Check that a previous iteration of this loop did not delete the Value.
- if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
+ if (llvm::any_of(Operands, [&R](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ return I && R.isDeleted(I);
+ }))
continue;
LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
<< "\n");
- ArrayRef<Value *> Operands = Chain.slice(i, VF);
R.buildTree(Operands);
if (R.isTreeTinyAndNotFullyVectorizable())
@@ -5329,12 +5547,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
bool CandidateFound = false;
int MinCost = SLPCostThreshold;
- // Keep track of values that were deleted by vectorizing in the loop below.
- SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
-
unsigned NextInst = 0, MaxInst = VL.size();
- for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF;
- VF /= 2) {
+ for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) {
// No actual vectorization should happen, if number of parts is the same as
// provided vectorization factor (i.e. the scalar type is used for vector
// code during codegen).
@@ -5352,13 +5566,16 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
break;
+ ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
// Check that a previous iteration of this loop did not delete the Value.
- if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
+ if (llvm::any_of(Ops, [&R](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ return I && R.isDeleted(I);
+ }))
continue;
LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
<< "\n");
- ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
R.buildTree(Ops);
Optional<ArrayRef<unsigned>> Order = R.bestOrder();
@@ -5571,7 +5788,7 @@ class HorizontalReduction {
Value *createOp(IRBuilder<> &Builder, const Twine &Name) const {
assert(isVectorizable() &&
"Expected add|fadd or min/max reduction operation.");
- Value *Cmp;
+ Value *Cmp = nullptr;
switch (Kind) {
case RK_Arithmetic:
return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS,
@@ -5579,23 +5796,23 @@ class HorizontalReduction {
case RK_Min:
Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS)
: Builder.CreateFCmpOLT(LHS, RHS);
- break;
+ return Builder.CreateSelect(Cmp, LHS, RHS, Name);
case RK_Max:
Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS)
: Builder.CreateFCmpOGT(LHS, RHS);
- break;
+ return Builder.CreateSelect(Cmp, LHS, RHS, Name);
case RK_UMin:
assert(Opcode == Instruction::ICmp && "Expected integer types.");
Cmp = Builder.CreateICmpULT(LHS, RHS);
- break;
+ return Builder.CreateSelect(Cmp, LHS, RHS, Name);
case RK_UMax:
assert(Opcode == Instruction::ICmp && "Expected integer types.");
Cmp = Builder.CreateICmpUGT(LHS, RHS);
- break;
+ return Builder.CreateSelect(Cmp, LHS, RHS, Name);
case RK_None:
- llvm_unreachable("Unknown reduction operation.");
+ break;
}
- return Builder.CreateSelect(Cmp, LHS, RHS, Name);
+ llvm_unreachable("Unknown reduction operation.");
}
public:
@@ -6203,6 +6420,8 @@ public:
}
if (V.isTreeTinyAndNotFullyVectorizable())
break;
+ if (V.isLoadCombineReductionCandidate(ReductionData.getOpcode()))
+ break;
V.computeMinimumValueSizes();
@@ -6275,6 +6494,9 @@ public:
}
// Update users.
ReductionRoot->replaceAllUsesWith(VectorizedTree);
+ // Mark all scalar reduction ops for deletion, they are replaced by the
+ // vector reductions.
+ V.eraseInstructions(IgnoreList);
}
return VectorizedTree != nullptr;
}
@@ -6323,7 +6545,7 @@ private:
IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
- int ScalarReduxCost;
+ int ScalarReduxCost = 0;
switch (ReductionData.getKind()) {
case RK_Arithmetic:
ScalarReduxCost =
@@ -6429,10 +6651,9 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,
/// \return true if it matches.
static bool findBuildAggregate(InsertValueInst *IV,
SmallVectorImpl<Value *> &BuildVectorOpds) {
- Value *V;
do {
BuildVectorOpds.push_back(IV->getInsertedValueOperand());
- V = IV->getAggregateOperand();
+ Value *V = IV->getAggregateOperand();
if (isa<UndefValue>(V))
break;
IV = dyn_cast<InsertValueInst>(V);
@@ -6530,18 +6751,13 @@ static bool tryToVectorizeHorReductionOrInstOperands(
// horizontal reduction.
// Interrupt the process if the Root instruction itself was vectorized or all
// sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
- SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0});
+ SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0});
SmallPtrSet<Value *, 8> VisitedInstrs;
bool Res = false;
while (!Stack.empty()) {
- Value *V;
+ Instruction *Inst;
unsigned Level;
- std::tie(V, Level) = Stack.pop_back_val();
- if (!V)
- continue;
- auto *Inst = dyn_cast<Instruction>(V);
- if (!Inst)
- continue;
+ std::tie(Inst, Level) = Stack.pop_back_val();
auto *BI = dyn_cast<BinaryOperator>(Inst);
auto *SI = dyn_cast<SelectInst>(Inst);
if (BI || SI) {
@@ -6582,8 +6798,8 @@ static bool tryToVectorizeHorReductionOrInstOperands(
for (auto *Op : Inst->operand_values())
if (VisitedInstrs.insert(Op).second)
if (auto *I = dyn_cast<Instruction>(Op))
- if (!isa<PHINode>(I) && I->getParent() == BB)
- Stack.emplace_back(Op, Level);
+ if (!isa<PHINode>(I) && !R.isDeleted(I) && I->getParent() == BB)
+ Stack.emplace_back(I, Level);
}
return Res;
}
@@ -6652,11 +6868,10 @@ bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,
}
bool SLPVectorizerPass::vectorizeSimpleInstructions(
- SmallVectorImpl<WeakVH> &Instructions, BasicBlock *BB, BoUpSLP &R) {
+ SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R) {
bool OpsChanged = false;
- for (auto &VH : reverse(Instructions)) {
- auto *I = dyn_cast_or_null<Instruction>(VH);
- if (!I)
+ for (auto *I : reverse(Instructions)) {
+ if (R.isDeleted(I))
continue;
if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
@@ -6685,7 +6900,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (!P)
break;
- if (!VisitedInstrs.count(P))
+ if (!VisitedInstrs.count(P) && !R.isDeleted(P))
Incoming.push_back(P);
}
@@ -6729,9 +6944,12 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
VisitedInstrs.clear();
- SmallVector<WeakVH, 8> PostProcessInstructions;
+ SmallVector<Instruction *, 8> PostProcessInstructions;
SmallDenseSet<Instruction *, 4> KeyNodes;
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ // Skip instructions marked for the deletion.
+ if (R.isDeleted(&*it))
+ continue;
// We may go through BB multiple times so skip the one we have checked.
if (!VisitedInstrs.insert(&*it).second) {
if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
@@ -6811,10 +7029,16 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
LLVM_DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length "
<< Entry.second.size() << ".\n");
- // We process the getelementptr list in chunks of 16 (like we do for
- // stores) to minimize compile-time.
- for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += 16) {
- auto Len = std::min<unsigned>(BE - BI, 16);
+ // Process the GEP list in chunks suitable for the target's supported
+ // vector size. If a vector register can't hold 1 element, we are done.
+ unsigned MaxVecRegSize = R.getMaxVecRegSize();
+ unsigned EltSize = R.getVectorElementSize(Entry.second[0]);
+ if (MaxVecRegSize < EltSize)
+ continue;
+
+ unsigned MaxElts = MaxVecRegSize / EltSize;
+ for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += MaxElts) {
+ auto Len = std::min<unsigned>(BE - BI, MaxElts);
auto GEPList = makeArrayRef(&Entry.second[BI], Len);
// Initialize a set a candidate getelementptrs. Note that we use a
@@ -6824,10 +7048,10 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
// Some of the candidates may have already been vectorized after we
- // initially collected them. If so, the WeakTrackingVHs will have
- // nullified the
- // values, so remove them from the set of candidates.
- Candidates.remove(nullptr);
+ // initially collected them. If so, they are marked as deleted, so remove
+ // them from the set of candidates.
+ Candidates.remove_if(
+ [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); });
// Remove from the set of candidates all pairs of getelementptrs with
// constant differences. Such getelementptrs are likely not good
@@ -6835,18 +7059,18 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
// computed from the other. We also ensure all candidate getelementptr
// indices are unique.
for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) {
- auto *GEPI = cast<GetElementPtrInst>(GEPList[I]);
+ auto *GEPI = GEPList[I];
if (!Candidates.count(GEPI))
continue;
auto *SCEVI = SE->getSCEV(GEPList[I]);
for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
- auto *GEPJ = cast<GetElementPtrInst>(GEPList[J]);
+ auto *GEPJ = GEPList[J];
auto *SCEVJ = SE->getSCEV(GEPList[J]);
if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) {
- Candidates.remove(GEPList[I]);
- Candidates.remove(GEPList[J]);
+ Candidates.remove(GEPI);
+ Candidates.remove(GEPJ);
} else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) {
- Candidates.remove(GEPList[J]);
+ Candidates.remove(GEPJ);
}
}
}
diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp
index 517d759d7bfc..4b80d1fb20aa 100644
--- a/lib/Transforms/Vectorize/VPlan.cpp
+++ b/lib/Transforms/Vectorize/VPlan.cpp
@@ -283,6 +283,12 @@ iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
return getParent()->getRecipeList().erase(getIterator());
}
+void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
+ InsertPos->getParent()->getRecipeList().splice(
+ std::next(InsertPos->getIterator()), getParent()->getRecipeList(),
+ getIterator());
+}
+
void VPInstruction::generateInstruction(VPTransformState &State,
unsigned Part) {
IRBuilder<> &Builder = State.Builder;
@@ -309,6 +315,14 @@ void VPInstruction::generateInstruction(VPTransformState &State,
State.set(this, V, Part);
break;
}
+ case Instruction::Select: {
+ Value *Cond = State.get(getOperand(0), Part);
+ Value *Op1 = State.get(getOperand(1), Part);
+ Value *Op2 = State.get(getOperand(2), Part);
+ Value *V = Builder.CreateSelect(Cond, Op1, Op2);
+ State.set(this, V, Part);
+ break;
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -728,7 +742,7 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
auto NewIGIter = Old2New.find(IG);
if (NewIGIter == Old2New.end())
Old2New[IG] = new InterleaveGroup<VPInstruction>(
- IG->getFactor(), IG->isReverse(), IG->getAlignment());
+ IG->getFactor(), IG->isReverse(), Align(IG->getAlignment()));
if (Inst == IG->getInsertPos())
Old2New[IG]->setInsertPos(VPInst);
@@ -736,7 +750,8 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
InterleaveGroupMap[VPInst] = Old2New[IG];
InterleaveGroupMap[VPInst]->insertMember(
VPInst, IG->getIndex(Inst),
- IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor());
+ Align(IG->isReverse() ? (-1) * int(IG->getFactor())
+ : IG->getFactor()));
}
} else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
visitRegion(Region, Old2New, IAI);
diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h
index 8a06412ad590..44d8a198f27e 100644
--- a/lib/Transforms/Vectorize/VPlan.h
+++ b/lib/Transforms/Vectorize/VPlan.h
@@ -615,6 +615,10 @@ public:
/// the specified recipe.
void insertBefore(VPRecipeBase *InsertPos);
+ /// Unlink this recipe from its current VPBasicBlock and insert it into
+ /// the VPBasicBlock that MovePos lives in, right after MovePos.
+ void moveAfter(VPRecipeBase *MovePos);
+
/// This method unlinks 'this' from the containing basic block and deletes it.
///
/// \returns an iterator pointing to the element after the erased one
diff --git a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
index 7ed7d21b6caa..b22d3190d654 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
@@ -21,7 +21,7 @@ void VPlanHCFGTransforms::VPInstructionsToVPRecipes(
LoopVectorizationLegality::InductionList *Inductions,
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
- VPRegionBlock *TopRegion = dyn_cast<VPRegionBlock>(Plan->getEntry());
+ auto *TopRegion = cast<VPRegionBlock>(Plan->getEntry());
ReversePostOrderTraversal<VPBlockBase *> RPOT(TopRegion->getEntry());
// Condition bit VPValues get deleted during transformation to VPRecipes.
diff --git a/lib/Transforms/Vectorize/VPlanSLP.cpp b/lib/Transforms/Vectorize/VPlanSLP.cpp
index e5ab24e52df6..9019ed15ec5f 100644
--- a/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -346,11 +346,14 @@ SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
dbgs() << " Ops: ";
- for (auto Op : Values)
- if (auto *Instr = cast_or_null<VPInstruction>(Op)->getUnderlyingInstr())
- dbgs() << *Instr << " | ";
- else
- dbgs() << " nullptr | ";
+ for (auto Op : Values) {
+ if (auto *VPInstr = cast_or_null<VPInstruction>(Op))
+ if (auto *Instr = VPInstr->getUnderlyingInstr()) {
+ dbgs() << *Instr << " | ";
+ continue;
+ }
+ dbgs() << " nullptr | ";
+ }
dbgs() << "\n";
}
diff --git a/lib/WindowsManifest/WindowsManifestMerger.cpp b/lib/WindowsManifest/WindowsManifestMerger.cpp
index d092ab493c9b..031a963cd3b0 100644
--- a/lib/WindowsManifest/WindowsManifestMerger.cpp
+++ b/lib/WindowsManifest/WindowsManifestMerger.cpp
@@ -58,7 +58,7 @@ private:
#if LLVM_LIBXML2_ENABLED
-static const std::pair<StringRef, StringRef> MtNsHrefsPrefixes[] = {
+static constexpr std::pair<StringLiteral, StringLiteral> MtNsHrefsPrefixes[] = {
{"urn:schemas-microsoft-com:asm.v1", "ms_asmv1"},
{"urn:schemas-microsoft-com:asm.v2", "ms_asmv2"},
{"urn:schemas-microsoft-com:asm.v3", "ms_asmv3"},
@@ -704,7 +704,7 @@ bool windows_manifest::isAvailable() { return false; }
#endif
WindowsManifestMerger::WindowsManifestMerger()
- : Impl(make_unique<WindowsManifestMergerImpl>()) {}
+ : Impl(std::make_unique<WindowsManifestMergerImpl>()) {}
WindowsManifestMerger::~WindowsManifestMerger() {}
diff --git a/lib/XRay/FDRRecordProducer.cpp b/lib/XRay/FDRRecordProducer.cpp
index 452bc6c55fb8..479b710444be 100644
--- a/lib/XRay/FDRRecordProducer.cpp
+++ b/lib/XRay/FDRRecordProducer.cpp
@@ -40,32 +40,32 @@ metadataRecordType(const XRayFileHeader &Header, uint8_t T) {
"Invalid metadata record type: %d", T);
switch (T) {
case MetadataRecordKinds::NewBufferKind:
- return make_unique<NewBufferRecord>();
+ return std::make_unique<NewBufferRecord>();
case MetadataRecordKinds::EndOfBufferKind:
if (Header.Version >= 2)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
"End of buffer records are no longer supported starting version "
"2 of the log.");
- return make_unique<EndBufferRecord>();
+ return std::make_unique<EndBufferRecord>();
case MetadataRecordKinds::NewCPUIdKind:
- return make_unique<NewCPUIDRecord>();
+ return std::make_unique<NewCPUIDRecord>();
case MetadataRecordKinds::TSCWrapKind:
- return make_unique<TSCWrapRecord>();
+ return std::make_unique<TSCWrapRecord>();
case MetadataRecordKinds::WalltimeMarkerKind:
- return make_unique<WallclockRecord>();
+ return std::make_unique<WallclockRecord>();
case MetadataRecordKinds::CustomEventMarkerKind:
if (Header.Version >= 5)
- return make_unique<CustomEventRecordV5>();
- return make_unique<CustomEventRecord>();
+ return std::make_unique<CustomEventRecordV5>();
+ return std::make_unique<CustomEventRecord>();
case MetadataRecordKinds::CallArgumentKind:
- return make_unique<CallArgRecord>();
+ return std::make_unique<CallArgRecord>();
case MetadataRecordKinds::BufferExtentsKind:
- return make_unique<BufferExtents>();
+ return std::make_unique<BufferExtents>();
case MetadataRecordKinds::TypedEventMarkerKind:
- return make_unique<TypedEventRecord>();
+ return std::make_unique<TypedEventRecord>();
case MetadataRecordKinds::PidKind:
- return make_unique<PIDRecord>();
+ return std::make_unique<PIDRecord>();
case MetadataRecordKinds::EnumEndMarker:
llvm_unreachable("Invalid MetadataRecordKind");
}
@@ -89,7 +89,7 @@ FileBasedRecordProducer::findNextBufferExtent() {
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading one byte from offset %d.", OffsetPtr);
+ "Failed reading one byte from offset %" PRId64 ".", OffsetPtr);
if (isMetadataIntroducer(FirstByte)) {
auto LoadedType = FirstByte >> 1;
@@ -130,7 +130,7 @@ Expected<std::unique_ptr<Record>> FileBasedRecordProducer::produce() {
R = std::move(BufferExtentsOrError.get());
assert(R != nullptr);
assert(isa<BufferExtents>(R.get()));
- auto BE = dyn_cast<BufferExtents>(R.get());
+ auto BE = cast<BufferExtents>(R.get());
CurrentBufferBytes = BE->size();
return std::move(R);
}
@@ -151,7 +151,7 @@ Expected<std::unique_ptr<Record>> FileBasedRecordProducer::produce() {
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading one byte from offset %d.", OffsetPtr);
+ "Failed reading one byte from offset %" PRId64 ".", OffsetPtr);
// For metadata records, handle especially here.
if (isMetadataIntroducer(FirstByte)) {
@@ -162,11 +162,12 @@ Expected<std::unique_ptr<Record>> FileBasedRecordProducer::produce() {
MetadataRecordOrErr.takeError(),
createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Encountered an unsupported metadata record (%d) at offset %d.",
+ "Encountered an unsupported metadata record (%d) "
+ "at offset %" PRId64 ".",
LoadedType, PreReadOffset));
R = std::move(MetadataRecordOrErr.get());
} else {
- R = llvm::make_unique<FunctionRecord>();
+ R = std::make_unique<FunctionRecord>();
}
RecordInitializer RI(E, OffsetPtr);
@@ -182,8 +183,8 @@ Expected<std::unique_ptr<Record>> FileBasedRecordProducer::produce() {
if (OffsetPtr - PreReadOffset > CurrentBufferBytes)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Buffer over-read at offset %d (over-read by %d bytes); Record Type "
- "= %s.",
+ "Buffer over-read at offset %" PRId64 " (over-read by %" PRId64
+ " bytes); Record Type = %s.",
OffsetPtr, (OffsetPtr - PreReadOffset) - CurrentBufferBytes,
Record::kindToString(R->getRecordType()).data());
diff --git a/lib/XRay/FileHeaderReader.cpp b/lib/XRay/FileHeaderReader.cpp
index 3fb021906a6f..6b6daf9deba5 100644
--- a/lib/XRay/FileHeaderReader.cpp
+++ b/lib/XRay/FileHeaderReader.cpp
@@ -12,7 +12,7 @@ namespace xray {
// Populates the FileHeader reference by reading the first 32 bytes of the file.
Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor,
- uint32_t &OffsetPtr) {
+ uint64_t &OffsetPtr) {
// FIXME: Maybe deduce whether the data is little or big-endian using some
// magic bytes in the beginning of the file?
@@ -30,21 +30,24 @@ Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor,
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Failed reading version from file header at offset %d.", OffsetPtr);
+ "Failed reading version from file header at offset %" PRId64 ".",
+ OffsetPtr);
PreReadOffset = OffsetPtr;
FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Failed reading file type from file header at offset %d.", OffsetPtr);
+ "Failed reading file type from file header at offset %" PRId64 ".",
+ OffsetPtr);
PreReadOffset = OffsetPtr;
uint32_t Bitfield = HeaderExtractor.getU32(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Failed reading flag bits from file header at offset %d.", OffsetPtr);
+ "Failed reading flag bits from file header at offset %" PRId64 ".",
+ OffsetPtr);
FileHeader.ConstantTSC = Bitfield & 1uL;
FileHeader.NonstopTSC = Bitfield & 1uL << 1;
@@ -53,7 +56,8 @@ Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor,
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Failed reading cycle frequency from file header at offset %d.",
+ "Failed reading cycle frequency from file header at offset %" PRId64
+ ".",
OffsetPtr);
std::memcpy(&FileHeader.FreeFormData,
diff --git a/lib/XRay/InstrumentationMap.cpp b/lib/XRay/InstrumentationMap.cpp
index fe5e941f7ea6..7453613c7038 100644
--- a/lib/XRay/InstrumentationMap.cpp
+++ b/lib/XRay/InstrumentationMap.cpp
@@ -67,10 +67,11 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
StringRef Contents = "";
const auto &Sections = ObjFile.getBinary()->sections();
auto I = llvm::find_if(Sections, [&](object::SectionRef Section) {
- StringRef Name = "";
- if (Section.getName(Name))
- return false;
- return Name == "xray_instr_map";
+ Expected<StringRef> NameOrErr = Section.getName();
+ if (NameOrErr)
+ return *NameOrErr == "xray_instr_map";
+ consumeError(NameOrErr.takeError());
+ return false;
});
if (I == Sections.end())
@@ -118,7 +119,7 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
"an XRay sled entry in ELF64."),
std::make_error_code(std::errc::executable_format_error));
- auto RelocateOrElse = [&](uint32_t Offset, uint64_t Address) {
+ auto RelocateOrElse = [&](uint64_t Offset, uint64_t Address) {
if (!Address) {
uint64_t A = I->getAddress() + C - Contents.bytes_begin() + Offset;
RelocMap::const_iterator R = Relocs.find(A);
@@ -136,10 +137,10 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
8);
Sleds.push_back({});
auto &Entry = Sleds.back();
- uint32_t OffsetPtr = 0;
- uint32_t AddrOff = OffsetPtr;
+ uint64_t OffsetPtr = 0;
+ uint64_t AddrOff = OffsetPtr;
Entry.Address = RelocateOrElse(AddrOff, Extractor.getU64(&OffsetPtr));
- uint32_t FuncOff = OffsetPtr;
+ uint64_t FuncOff = OffsetPtr;
Entry.Function = RelocateOrElse(FuncOff, Extractor.getU64(&OffsetPtr));
auto Kind = Extractor.getU8(&OffsetPtr);
static constexpr SledEntry::FunctionKinds Kinds[] = {
diff --git a/lib/XRay/Profile.cpp b/lib/XRay/Profile.cpp
index e34b182f2e02..c1a43632b600 100644
--- a/lib/XRay/Profile.cpp
+++ b/lib/XRay/Profile.cpp
@@ -49,9 +49,9 @@ struct BlockHeader {
};
static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
- uint32_t &Offset) {
+ uint64_t &Offset) {
BlockHeader H;
- uint32_t CurrentOffset = Offset;
+ uint64_t CurrentOffset = Offset;
H.Size = Extractor.getU32(&Offset);
if (Offset == CurrentOffset)
return make_error<StringError>(
@@ -76,7 +76,7 @@ static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
}
static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
- uint32_t &Offset) {
+ uint64_t &Offset) {
// We're reading a sequence of int32_t's until we find a 0.
std::vector<Profile::FuncID> Path;
auto CurrentOffset = Offset;
@@ -94,7 +94,7 @@ static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
}
static Expected<Profile::Data> readData(DataExtractor &Extractor,
- uint32_t &Offset) {
+ uint64_t &Offset) {
// We expect a certain number of elements for Data:
// - A 64-bit CallCount
// - A 64-bit CumulativeLocalTime counter
@@ -280,7 +280,7 @@ Expected<Profile> loadProfile(StringRef Filename) {
StringRef Data(MappedFile.data(), MappedFile.size());
Profile P;
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
DataExtractor Extractor(Data, true, 8);
// For each block we get from the file:
diff --git a/lib/XRay/RecordInitializer.cpp b/lib/XRay/RecordInitializer.cpp
index 78163031a8cc..68ab3db06208 100644
--- a/lib/XRay/RecordInitializer.cpp
+++ b/lib/XRay/RecordInitializer.cpp
@@ -12,15 +12,15 @@ namespace xray {
Error RecordInitializer::visit(BufferExtents &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr, sizeof(uint64_t)))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a buffer extent (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a buffer extent (%" PRId64 ").", OffsetPtr);
auto PreReadOffset = OffsetPtr;
R.Size = E.getU64(&OffsetPtr);
if (PreReadOffset == OffsetPtr)
return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Cannot read buffer extent at offset %d.",
+ "Cannot read buffer extent at offset %" PRId64 ".",
OffsetPtr);
OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
@@ -30,23 +30,25 @@ Error RecordInitializer::visit(BufferExtents &R) {
Error RecordInitializer::visit(WallclockRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr,
MetadataRecord::kMetadataBodySize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a wallclock record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a wallclock record (%" PRId64 ").", OffsetPtr);
auto BeginOffset = OffsetPtr;
auto PreReadOffset = OffsetPtr;
R.Seconds = E.getU64(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Cannot read wall clock 'seconds' field at offset %d.", OffsetPtr);
+ "Cannot read wall clock 'seconds' field at offset %" PRId64 ".",
+ OffsetPtr);
PreReadOffset = OffsetPtr;
R.Nanos = E.getU32(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Cannot read wall clock 'nanos' field at offset %d.", OffsetPtr);
+ "Cannot read wall clock 'nanos' field at offset %" PRId64 ".",
+ OffsetPtr);
// Align to metadata record size boundary.
assert(OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize);
@@ -57,21 +59,23 @@ Error RecordInitializer::visit(WallclockRecord &R) {
Error RecordInitializer::visit(NewCPUIDRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr,
MetadataRecord::kMetadataBodySize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a new cpu id record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a new cpu id record (%" PRId64 ").", OffsetPtr);
auto BeginOffset = OffsetPtr;
auto PreReadOffset = OffsetPtr;
R.CPUId = E.getU16(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Cannot read CPU id at offset %d.", OffsetPtr);
+ "Cannot read CPU id at offset %" PRId64 ".",
+ OffsetPtr);
PreReadOffset = OffsetPtr;
R.TSC = E.getU64(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Cannot read CPU TSC at offset %d.", OffsetPtr);
+ "Cannot read CPU TSC at offset %" PRId64 ".",
+ OffsetPtr);
OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
return Error::success();
@@ -80,16 +84,16 @@ Error RecordInitializer::visit(NewCPUIDRecord &R) {
Error RecordInitializer::visit(TSCWrapRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr,
MetadataRecord::kMetadataBodySize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a new TSC wrap record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a new TSC wrap record (%" PRId64 ").", OffsetPtr);
auto PreReadOffset = OffsetPtr;
R.BaseTSC = E.getU64(&OffsetPtr);
if (PreReadOffset == OffsetPtr)
- return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Cannot read TSC wrap record at offset %d.",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read TSC wrap record at offset %" PRId64 ".", OffsetPtr);
OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
return Error::success();
@@ -98,9 +102,9 @@ Error RecordInitializer::visit(TSCWrapRecord &R) {
Error RecordInitializer::visit(CustomEventRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr,
MetadataRecord::kMetadataBodySize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a custom event record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a custom event record (%" PRId64 ").", OffsetPtr);
auto BeginOffset = OffsetPtr;
auto PreReadOffset = OffsetPtr;
@@ -108,20 +112,22 @@ Error RecordInitializer::visit(CustomEventRecord &R) {
if (PreReadOffset == OffsetPtr)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Cannot read a custom event record size field offset %d.", OffsetPtr);
+ "Cannot read a custom event record size field offset %" PRId64 ".",
+ OffsetPtr);
if (R.Size <= 0)
return createStringError(
std::make_error_code(std::errc::bad_address),
- "Invalid size for custom event (size = %d) at offset %d.", R.Size,
- OffsetPtr);
+ "Invalid size for custom event (size = %d) at offset %" PRId64 ".",
+ R.Size, OffsetPtr);
PreReadOffset = OffsetPtr;
R.TSC = E.getU64(&OffsetPtr);
if (PreReadOffset == OffsetPtr)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Cannot read a custom event TSC field at offset %d.", OffsetPtr);
+ "Cannot read a custom event TSC field at offset %" PRId64 ".",
+ OffsetPtr);
// For version 4 onwards, of the FDR log, we want to also capture the CPU ID
// of the custom event.
@@ -131,7 +137,7 @@ Error RecordInitializer::visit(CustomEventRecord &R) {
if (PreReadOffset == OffsetPtr)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Missing CPU field at offset %d", OffsetPtr);
+ "Missing CPU field at offset %" PRId64 ".", OffsetPtr);
}
assert(OffsetPtr > BeginOffset &&
@@ -142,8 +148,8 @@ Error RecordInitializer::visit(CustomEventRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size))
return createStringError(
std::make_error_code(std::errc::bad_address),
- "Cannot read %d bytes of custom event data from offset %d.", R.Size,
- OffsetPtr);
+ "Cannot read %d bytes of custom event data from offset %" PRId64 ".",
+ R.Size, OffsetPtr);
std::vector<uint8_t> Buffer;
Buffer.resize(R.Size);
@@ -151,15 +157,15 @@ Error RecordInitializer::visit(CustomEventRecord &R) {
if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data())
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Failed reading data into buffer of size %d at offset %d.", R.Size,
- OffsetPtr);
+ "Failed reading data into buffer of size %d at offset %" PRId64 ".",
+ R.Size, OffsetPtr);
assert(OffsetPtr >= PreReadOffset);
if (OffsetPtr - PreReadOffset != static_cast<uint32_t>(R.Size))
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Failed reading enough bytes for the custom event payload -- read %d "
- "expecting %d bytes at offset %d.",
+ "Failed reading enough bytes for the custom event payload -- read "
+ "%" PRId64 " expecting %d bytes at offset %" PRId64 ".",
OffsetPtr - PreReadOffset, R.Size, PreReadOffset);
R.Data.assign(Buffer.begin(), Buffer.end());
@@ -169,9 +175,9 @@ Error RecordInitializer::visit(CustomEventRecord &R) {
Error RecordInitializer::visit(CustomEventRecordV5 &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr,
MetadataRecord::kMetadataBodySize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a custom event record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a custom event record (%" PRId64 ").", OffsetPtr);
auto BeginOffset = OffsetPtr;
auto PreReadOffset = OffsetPtr;
@@ -180,20 +186,22 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) {
if (PreReadOffset == OffsetPtr)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Cannot read a custom event record size field offset %d.", OffsetPtr);
+ "Cannot read a custom event record size field offset %" PRId64 ".",
+ OffsetPtr);
if (R.Size <= 0)
return createStringError(
std::make_error_code(std::errc::bad_address),
- "Invalid size for custom event (size = %d) at offset %d.", R.Size,
- OffsetPtr);
+ "Invalid size for custom event (size = %d) at offset %" PRId64 ".",
+ R.Size, OffsetPtr);
PreReadOffset = OffsetPtr;
R.Delta = E.getSigned(&OffsetPtr, sizeof(int32_t));
if (PreReadOffset == OffsetPtr)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Cannot read a custom event record TSC delta field at offset %d.",
+ "Cannot read a custom event record TSC delta field at offset "
+ "%" PRId64 ".",
OffsetPtr);
assert(OffsetPtr > BeginOffset &&
@@ -204,8 +212,8 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size))
return createStringError(
std::make_error_code(std::errc::bad_address),
- "Cannot read %d bytes of custom event data from offset %d.", R.Size,
- OffsetPtr);
+ "Cannot read %d bytes of custom event data from offset %" PRId64 ".",
+ R.Size, OffsetPtr);
std::vector<uint8_t> Buffer;
Buffer.resize(R.Size);
@@ -213,15 +221,15 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) {
if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data())
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Failed reading data into buffer of size %d at offset %d.", R.Size,
- OffsetPtr);
+ "Failed reading data into buffer of size %d at offset %" PRId64 ".",
+ R.Size, OffsetPtr);
assert(OffsetPtr >= PreReadOffset);
if (OffsetPtr - PreReadOffset != static_cast<uint32_t>(R.Size))
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Failed reading enough bytes for the custom event payload -- read %d "
- "expecting %d bytes at offset %d.",
+ "Failed reading enough bytes for the custom event payload -- read "
+ "%" PRId64 " expecting %d bytes at offset %" PRId64 ".",
OffsetPtr - PreReadOffset, R.Size, PreReadOffset);
R.Data.assign(Buffer.begin(), Buffer.end());
@@ -231,9 +239,9 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) {
Error RecordInitializer::visit(TypedEventRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr,
MetadataRecord::kMetadataBodySize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a typed event record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a typed event record (%" PRId64 ").", OffsetPtr);
auto BeginOffset = OffsetPtr;
auto PreReadOffset = OffsetPtr;
@@ -242,20 +250,22 @@ Error RecordInitializer::visit(TypedEventRecord &R) {
if (PreReadOffset == OffsetPtr)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Cannot read a typed event record size field offset %d.", OffsetPtr);
+ "Cannot read a typed event record size field offset %" PRId64 ".",
+ OffsetPtr);
if (R.Size <= 0)
return createStringError(
std::make_error_code(std::errc::bad_address),
- "Invalid size for typed event (size = %d) at offset %d.", R.Size,
- OffsetPtr);
+ "Invalid size for typed event (size = %d) at offset %" PRId64 ".",
+ R.Size, OffsetPtr);
PreReadOffset = OffsetPtr;
R.Delta = E.getSigned(&OffsetPtr, sizeof(int32_t));
if (PreReadOffset == OffsetPtr)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Cannot read a typed event record TSC delta field at offset %d.",
+ "Cannot read a typed event record TSC delta field at offset "
+ "%" PRId64 ".",
OffsetPtr);
PreReadOffset = OffsetPtr;
@@ -263,7 +273,8 @@ Error RecordInitializer::visit(TypedEventRecord &R) {
if (PreReadOffset == OffsetPtr)
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Cannot read a typed event record type field at offset %d.", OffsetPtr);
+ "Cannot read a typed event record type field at offset %" PRId64 ".",
+ OffsetPtr);
assert(OffsetPtr > BeginOffset &&
OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize);
@@ -273,8 +284,8 @@ Error RecordInitializer::visit(TypedEventRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size))
return createStringError(
std::make_error_code(std::errc::bad_address),
- "Cannot read %d bytes of custom event data from offset %d.", R.Size,
- OffsetPtr);
+ "Cannot read %d bytes of custom event data from offset %" PRId64 ".",
+ R.Size, OffsetPtr);
std::vector<uint8_t> Buffer;
Buffer.resize(R.Size);
@@ -282,15 +293,15 @@ Error RecordInitializer::visit(TypedEventRecord &R) {
if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data())
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Failed reading data into buffer of size %d at offset %d.", R.Size,
- OffsetPtr);
+ "Failed reading data into buffer of size %d at offset %" PRId64 ".",
+ R.Size, OffsetPtr);
assert(OffsetPtr >= PreReadOffset);
if (OffsetPtr - PreReadOffset != static_cast<uint32_t>(R.Size))
return createStringError(
std::make_error_code(std::errc::invalid_argument),
- "Failed reading enough bytes for the typed event payload -- read %d "
- "expecting %d bytes at offset %d.",
+ "Failed reading enough bytes for the typed event payload -- read "
+ "%" PRId64 " expecting %d bytes at offset %" PRId64 ".",
OffsetPtr - PreReadOffset, R.Size, PreReadOffset);
R.Data.assign(Buffer.begin(), Buffer.end());
@@ -300,16 +311,17 @@ Error RecordInitializer::visit(TypedEventRecord &R) {
Error RecordInitializer::visit(CallArgRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr,
MetadataRecord::kMetadataBodySize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a call argument record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a call argument record (%" PRId64 ").",
+ OffsetPtr);
auto PreReadOffset = OffsetPtr;
R.Arg = E.getU64(&OffsetPtr);
if (PreReadOffset == OffsetPtr)
- return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Cannot read a call arg record at offset %d.",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a call arg record at offset %" PRId64 ".", OffsetPtr);
OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
return Error::success();
@@ -318,16 +330,16 @@ Error RecordInitializer::visit(CallArgRecord &R) {
Error RecordInitializer::visit(PIDRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr,
MetadataRecord::kMetadataBodySize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a process ID record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a process ID record (%" PRId64 ").", OffsetPtr);
auto PreReadOffset = OffsetPtr;
R.PID = E.getSigned(&OffsetPtr, 4);
if (PreReadOffset == OffsetPtr)
- return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Cannot read a process ID record at offset %d.",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a process ID record at offset %" PRId64 ".", OffsetPtr);
OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
return Error::success();
@@ -336,16 +348,16 @@ Error RecordInitializer::visit(PIDRecord &R) {
Error RecordInitializer::visit(NewBufferRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr,
MetadataRecord::kMetadataBodySize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a new buffer record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a new buffer record (%" PRId64 ").", OffsetPtr);
auto PreReadOffset = OffsetPtr;
R.TID = E.getSigned(&OffsetPtr, sizeof(int32_t));
if (PreReadOffset == OffsetPtr)
- return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Cannot read a new buffer record at offset %d.",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a new buffer record at offset %" PRId64 ".", OffsetPtr);
OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
return Error::success();
@@ -354,9 +366,10 @@ Error RecordInitializer::visit(NewBufferRecord &R) {
Error RecordInitializer::visit(EndBufferRecord &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr,
MetadataRecord::kMetadataBodySize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for an end-of-buffer record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for an end-of-buffer record (%" PRId64 ").",
+ OffsetPtr);
OffsetPtr += MetadataRecord::kMetadataBodySize;
return Error::success();
@@ -373,17 +386,17 @@ Error RecordInitializer::visit(FunctionRecord &R) {
//
if (OffsetPtr == 0 || !E.isValidOffsetForDataOfSize(
--OffsetPtr, FunctionRecord::kFunctionRecordSize))
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Invalid offset for a function record (%d).",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a function record (%" PRId64 ").", OffsetPtr);
auto BeginOffset = OffsetPtr;
auto PreReadOffset = BeginOffset;
uint32_t Buffer = E.getU32(&OffsetPtr);
if (PreReadOffset == OffsetPtr)
- return createStringError(std::make_error_code(std::errc::bad_address),
- "Cannot read function id field from offset %d.",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Cannot read function id field from offset %" PRId64 ".", OffsetPtr);
// To get the function record type, we shift the buffer one to the right
// (truncating the function record indicator) then take the three bits
@@ -397,18 +410,19 @@ Error RecordInitializer::visit(FunctionRecord &R) {
R.Kind = static_cast<RecordTypes>(FunctionType);
break;
default:
- return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Unknown function record type '%d' at offset %d.",
- FunctionType, BeginOffset);
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Unknown function record type '%d' at offset %" PRId64 ".",
+ FunctionType, BeginOffset);
}
R.FuncId = Buffer >> 4;
PreReadOffset = OffsetPtr;
R.Delta = E.getU32(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
- return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Failed reading TSC delta from offset %d.",
- OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading TSC delta from offset %" PRId64 ".", OffsetPtr);
assert(FunctionRecord::kFunctionRecordSize == (OffsetPtr - BeginOffset));
return Error::success();
}
diff --git a/lib/XRay/Trace.cpp b/lib/XRay/Trace.cpp
index b9b67c561c66..4f107e1059cc 100644
--- a/lib/XRay/Trace.cpp
+++ b/lib/XRay/Trace.cpp
@@ -47,7 +47,7 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
std::make_error_code(std::errc::invalid_argument));
DataExtractor Reader(Data, IsLittleEndian, 8);
- uint32_t OffsetPtr = 0;
+ uint64_t OffsetPtr = 0;
auto FileHeaderOrError = readBinaryFormatHeader(Reader, OffsetPtr);
if (!FileHeaderOrError)
return FileHeaderOrError.takeError();
@@ -67,13 +67,14 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
if (!Reader.isValidOffsetForDataOfSize(OffsetPtr, 32))
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Not enough bytes to read a full record at offset %d.", OffsetPtr);
+ "Not enough bytes to read a full record at offset %" PRId64 ".",
+ OffsetPtr);
auto PreReadOffset = OffsetPtr;
auto RecordType = Reader.getU16(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading record type at offset %d.", OffsetPtr);
+ "Failed reading record type at offset %" PRId64 ".", OffsetPtr);
switch (RecordType) {
case 0: { // Normal records.
@@ -86,14 +87,15 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading CPU field at offset %d.", OffsetPtr);
+ "Failed reading CPU field at offset %" PRId64 ".", OffsetPtr);
PreReadOffset = OffsetPtr;
auto Type = Reader.getU8(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading record type field at offset %d.", OffsetPtr);
+ "Failed reading record type field at offset %" PRId64 ".",
+ OffsetPtr);
switch (Type) {
case 0:
@@ -111,7 +113,7 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
default:
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Unknown record type '%d' at offset %d.", Type, OffsetPtr);
+ "Unknown record type '%d' at offset %" PRId64 ".", Type, OffsetPtr);
}
PreReadOffset = OffsetPtr;
@@ -119,28 +121,29 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading function id field at offset %d.", OffsetPtr);
+ "Failed reading function id field at offset %" PRId64 ".",
+ OffsetPtr);
PreReadOffset = OffsetPtr;
Record.TSC = Reader.getU64(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading TSC field at offset %d.", OffsetPtr);
+ "Failed reading TSC field at offset %" PRId64 ".", OffsetPtr);
PreReadOffset = OffsetPtr;
Record.TId = Reader.getU32(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading thread id field at offset %d.", OffsetPtr);
+ "Failed reading thread id field at offset %" PRId64 ".", OffsetPtr);
PreReadOffset = OffsetPtr;
Record.PId = Reader.getU32(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading process id at offset %d.", OffsetPtr);
+ "Failed reading process id at offset %" PRId64 ".", OffsetPtr);
break;
}
@@ -155,21 +158,23 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading function id field at offset %d.", OffsetPtr);
+ "Failed reading function id field at offset %" PRId64 ".",
+ OffsetPtr);
PreReadOffset = OffsetPtr;
auto TId = Reader.getU32(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading thread id field at offset %d.", OffsetPtr);
+ "Failed reading thread id field at offset %" PRId64 ".", OffsetPtr);
PreReadOffset = OffsetPtr;
auto PId = Reader.getU32(&OffsetPtr);
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading process id field at offset %d.", OffsetPtr);
+ "Failed reading process id field at offset %" PRId64 ".",
+ OffsetPtr);
// Make a check for versions above 3 for the Pid field
if (Record.FuncId != FuncId || Record.TId != TId ||
@@ -178,7 +183,7 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
std::make_error_code(std::errc::executable_format_error),
"Corrupted log, found arg payload following non-matching "
"function+thread record. Record for function %d != %d at offset "
- "%d",
+ "%" PRId64 ".",
Record.FuncId, FuncId, OffsetPtr);
PreReadOffset = OffsetPtr;
@@ -186,7 +191,8 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
if (OffsetPtr == PreReadOffset)
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Failed reading argument payload at offset %d.", OffsetPtr);
+ "Failed reading argument payload at offset %" PRId64 ".",
+ OffsetPtr);
Record.CallArgs.push_back(Arg);
break;
@@ -194,7 +200,8 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
default:
return createStringError(
std::make_error_code(std::errc::executable_format_error),
- "Unknown record type '%d' at offset %d.", RecordType, OffsetPtr);
+ "Unknown record type '%d' at offset %" PRId64 ".", RecordType,
+ OffsetPtr);
}
// Advance the offset pointer enough bytes to align to 32-byte records for
// basic mode logs.
@@ -265,7 +272,7 @@ Error loadFDRLog(StringRef Data, bool IsLittleEndian,
"Not enough bytes for an XRay FDR log.");
DataExtractor DE(Data, IsLittleEndian, 8);
- uint32_t OffsetPtr = 0;
+ uint64_t OffsetPtr = 0;
auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr);
if (!FileHeaderOrError)
return FileHeaderOrError.takeError();
@@ -424,7 +431,7 @@ Expected<Trace> llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) {
// Only if we can't load either the binary or the YAML format will we yield an
// error.
DataExtractor HeaderExtractor(DE.getData(), DE.isLittleEndian(), 8);
- uint32_t OffsetPtr = 0;
+ uint64_t OffsetPtr = 0;
uint16_t Version = HeaderExtractor.getU16(&OffsetPtr);
uint16_t Type = HeaderExtractor.getU16(&OffsetPtr);
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index 75f166b21b2c..fe5201eb2e6c 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -217,8 +217,7 @@ public:
/// returning the transformed module on success, or a null pointer on failure.
std::unique_ptr<Module> runPassesOn(Module *M,
const std::vector<std::string> &Passes,
- unsigned NumExtraArgs = 0,
- const char *const *ExtraArgs = nullptr);
+ ArrayRef<std::string> ExtraArgs = {});
/// runPasses - Run the specified passes on Program, outputting a bitcode
/// file and writting the filename into OutputFile if successful. If the
@@ -231,8 +230,8 @@ public:
///
bool runPasses(Module &Program, const std::vector<std::string> &PassesToRun,
std::string &OutputFilename, bool DeleteOutput = false,
- bool Quiet = false, unsigned NumExtraArgs = 0,
- const char *const *ExtraArgs = nullptr) const;
+ bool Quiet = false,
+ ArrayRef<std::string> ExtraArgs = {}) const;
/// runPasses - Just like the method above, but this just returns true or
/// false indicating whether or not the optimizer crashed on the specified
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index 105702de3f1d..d9047acd30e1 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -407,11 +407,10 @@ BugDriver::extractMappedBlocksFromModule(const std::vector<BasicBlock *> &BBs,
std::string uniqueFN = "--extract-blocks-file=";
uniqueFN += Temp->TmpName;
- const char *ExtraArg = uniqueFN.c_str();
std::vector<std::string> PI;
PI.push_back("extract-blocks");
- std::unique_ptr<Module> Ret = runPassesOn(M, PI, 1, &ExtraArg);
+ std::unique_ptr<Module> Ret = runPassesOn(M, PI, {uniqueFN});
if (!Ret) {
outs() << "*** Basic Block extraction failed, please report a bug!\n";
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 562de7952388..64af81fcc8a1 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -79,7 +79,7 @@ bool BugDriver::writeProgramToFile(int FD, const Module &M) const {
bool BugDriver::writeProgramToFile(const std::string &Filename,
const Module &M) const {
std::error_code EC;
- ToolOutputFile Out(Filename, EC, sys::fs::F_None);
+ ToolOutputFile Out(Filename, EC, sys::fs::OF_None);
if (!EC)
return writeProgramToFileAux(Out, M);
return true;
@@ -130,8 +130,7 @@ static cl::list<std::string> OptArgs("opt-args", cl::Positional,
bool BugDriver::runPasses(Module &Program,
const std::vector<std::string> &Passes,
std::string &OutputFilename, bool DeleteOutput,
- bool Quiet, unsigned NumExtraArgs,
- const char *const *ExtraArgs) const {
+ bool Quiet, ArrayRef<std::string> ExtraArgs) const {
// setup the output file name
outs().flush();
SmallString<128> UniqueFilename;
@@ -223,8 +222,7 @@ bool BugDriver::runPasses(Module &Program,
I != E; ++I)
Args.push_back(I->c_str());
Args.push_back(Temp->TmpName.c_str());
- for (unsigned i = 0; i < NumExtraArgs; ++i)
- Args.push_back(*ExtraArgs);
+ Args.append(ExtraArgs.begin(), ExtraArgs.end());
LLVM_DEBUG(errs() << "\nAbout to run:\t";
for (unsigned i = 0, e = Args.size() - 1; i != e; ++i) errs()
@@ -268,10 +266,10 @@ bool BugDriver::runPasses(Module &Program,
std::unique_ptr<Module>
BugDriver::runPassesOn(Module *M, const std::vector<std::string> &Passes,
- unsigned NumExtraArgs, const char *const *ExtraArgs) {
+ ArrayRef<std::string> ExtraArgs) {
std::string BitcodeResult;
if (runPasses(*M, Passes, BitcodeResult, false /*delete*/, true /*quiet*/,
- NumExtraArgs, ExtraArgs)) {
+ ExtraArgs)) {
return nullptr;
}
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index da4244345e3b..19b2ea2c0181 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -170,7 +170,7 @@ Expected<int> LLI::ExecuteProgram(const std::string &Bitcode,
const std::vector<std::string> &SharedLibs,
unsigned Timeout, unsigned MemoryLimit) {
std::vector<StringRef> LLIArgs;
- LLIArgs.push_back(LLIPath.c_str());
+ LLIArgs.push_back(LLIPath);
LLIArgs.push_back("-force-interpreter=true");
for (std::vector<std::string>::const_iterator i = SharedLibs.begin(),
@@ -266,15 +266,15 @@ Error CustomCompiler::compileProgram(const std::string &Bitcode,
unsigned Timeout, unsigned MemoryLimit) {
std::vector<StringRef> ProgramArgs;
- ProgramArgs.push_back(CompilerCommand.c_str());
+ ProgramArgs.push_back(CompilerCommand);
- for (std::size_t i = 0; i < CompilerArgs.size(); ++i)
- ProgramArgs.push_back(CompilerArgs.at(i).c_str());
+ for (const auto &Arg : CompilerArgs)
+ ProgramArgs.push_back(Arg);
ProgramArgs.push_back(Bitcode);
// Add optional parameters to the running program from Argv
- for (unsigned i = 0, e = CompilerArgs.size(); i != e; ++i)
- ProgramArgs.push_back(CompilerArgs[i].c_str());
+ for (const auto &Arg : CompilerArgs)
+ ProgramArgs.push_back(Arg);
if (RunProgramWithTimeout(CompilerCommand, ProgramArgs, "", "", "", Timeout,
MemoryLimit))
@@ -559,7 +559,7 @@ Expected<int> JIT::ExecuteProgram(const std::string &Bitcode,
unsigned Timeout, unsigned MemoryLimit) {
// Construct a vector of parameters, incorporating those from the command-line
std::vector<StringRef> JITArgs;
- JITArgs.push_back(LLIPath.c_str());
+ JITArgs.push_back(LLIPath);
JITArgs.push_back("-force-interpreter=false");
// Add any extra LLI args.
@@ -570,7 +570,7 @@ Expected<int> JIT::ExecuteProgram(const std::string &Bitcode,
JITArgs.push_back("-load");
JITArgs.push_back(SharedLibs[i]);
}
- JITArgs.push_back(Bitcode.c_str());
+ JITArgs.push_back(Bitcode);
// Add optional parameters to the running program from Argv
for (unsigned i = 0, e = Args.size(); i != e; ++i)
JITArgs.push_back(Args[i]);
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index 2d5322a351ad..c7644e75ae4b 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -81,6 +81,10 @@ static cl::opt<bool> OptLevelOs(
"Like -O2 with extra optimizations for size. Similar to clang -Os"));
static cl::opt<bool>
+OptLevelOz("Oz",
+ cl::desc("Like -Os but reduces code size further. Similar to clang -Oz"));
+
+static cl::opt<bool>
OptLevelO3("O3", cl::desc("Optimization level 3. Identical to 'opt -O3'"));
static cl::opt<std::string>
@@ -109,6 +113,26 @@ public:
};
}
+// This routine adds optimization passes based on selected optimization level,
+// OptLevel.
+//
+// OptLevel - Optimization Level
+static void AddOptimizationPasses(legacy::FunctionPassManager &FPM,
+ unsigned OptLevel,
+ unsigned SizeLevel) {
+ PassManagerBuilder Builder;
+ Builder.OptLevel = OptLevel;
+ Builder.SizeLevel = SizeLevel;
+
+ if (OptLevel > 1)
+ Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel, false);
+ else
+ Builder.Inliner = createAlwaysInlinerLegacyPass();
+
+ Builder.populateFunctionPassManager(FPM);
+ Builder.populateModulePassManager(FPM);
+}
+
#ifdef LINK_POLLY_INTO_TOOLS
namespace polly {
void initializePollyPasses(llvm::PassRegistry &Registry);
@@ -189,18 +213,16 @@ int main(int argc, char **argv) {
Builder.populateLTOPassManager(PM);
}
- if (OptLevelO1 || OptLevelO2 || OptLevelO3) {
- PassManagerBuilder Builder;
- if (OptLevelO1)
- Builder.Inliner = createAlwaysInlinerLegacyPass();
- else if (OptLevelOs || OptLevelO2)
- Builder.Inliner = createFunctionInliningPass(
- 2, OptLevelOs ? 1 : 0, false);
- else
- Builder.Inliner = createFunctionInliningPass(275);
- Builder.populateFunctionPassManager(PM);
- Builder.populateModulePassManager(PM);
- }
+ if (OptLevelO1)
+ AddOptimizationPasses(PM, 1, 0);
+ else if (OptLevelO2)
+ AddOptimizationPasses(PM, 2, 0);
+ else if (OptLevelO3)
+ AddOptimizationPasses(PM, 3, 0);
+ else if (OptLevelOs)
+ AddOptimizationPasses(PM, 2, 1);
+ else if (OptLevelOz)
+ AddOptimizationPasses(PM, 2, 2);
for (const PassInfo *PI : PassList)
D.addPass(PI->getPassArgument());
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 76da843f065e..574b15b399c3 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -239,10 +239,10 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
// Open the file.
std::error_code EC;
- sys::fs::OpenFlags OpenFlags = sys::fs::F_None;
+ sys::fs::OpenFlags OpenFlags = sys::fs::OF_None;
if (!Binary)
- OpenFlags |= sys::fs::F_Text;
- auto FDOut = llvm::make_unique<ToolOutputFile>(OutputFilename, EC, OpenFlags);
+ OpenFlags |= sys::fs::OF_Text;
+ auto FDOut = std::make_unique<ToolOutputFile>(OutputFilename, EC, OpenFlags);
if (EC) {
WithColor::error() << EC.message() << '\n';
return nullptr;
@@ -329,7 +329,7 @@ int main(int argc, char **argv) {
// Set a diagnostic handler that doesn't exit on the first error
bool HasError = false;
Context.setDiagnosticHandler(
- llvm::make_unique<LLCDiagnosticHandler>(&HasError));
+ std::make_unique<LLCDiagnosticHandler>(&HasError));
Context.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, &HasError);
Expected<std::unique_ptr<ToolOutputFile>> RemarksFileOrErr =
@@ -479,8 +479,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
std::unique_ptr<ToolOutputFile> DwoOut;
if (!SplitDwarfOutputFile.empty()) {
std::error_code EC;
- DwoOut = llvm::make_unique<ToolOutputFile>(SplitDwarfOutputFile, EC,
- sys::fs::F_None);
+ DwoOut = std::make_unique<ToolOutputFile>(SplitDwarfOutputFile, EC,
+ sys::fs::OF_None);
if (EC) {
WithColor::error(errs(), argv[0]) << EC.message() << '\n';
return 1;
@@ -533,13 +533,14 @@ static int compileModule(char **argv, LLVMContext &Context) {
if ((FileType != TargetMachine::CGFT_AssemblyFile &&
!Out->os().supportsSeeking()) ||
CompileTwice) {
- BOS = make_unique<raw_svector_ostream>(Buffer);
+ BOS = std::make_unique<raw_svector_ostream>(Buffer);
OS = BOS.get();
}
const char *argv0 = argv[0];
- LLVMTargetMachine &LLVMTM = static_cast<LLVMTargetMachine&>(*Target);
- MachineModuleInfo *MMI = new MachineModuleInfo(&LLVMTM);
+ LLVMTargetMachine &LLVMTM = static_cast<LLVMTargetMachine &>(*Target);
+ MachineModuleInfoWrapperPass *MMIWP =
+ new MachineModuleInfoWrapperPass(&LLVMTM);
// Construct a custom pass pipeline that starts after instruction
// selection.
@@ -559,7 +560,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
TPC.setDisableVerify(NoVerify);
PM.add(&TPC);
- PM.add(MMI);
+ PM.add(MMIWP);
TPC.printAndVerify("");
for (const std::string &RunPassName : *RunPassNames) {
if (addPass(PM, argv0, RunPassName, TPC))
@@ -570,7 +571,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
PM.add(createFreeMachineFunctionPass());
} else if (Target->addPassesToEmitFile(PM, *OS,
DwoOut ? &DwoOut->os() : nullptr,
- FileType, NoVerify, MMI)) {
+ FileType, NoVerify, MMIWP)) {
WithColor::warning(errs(), argv[0])
<< "target does not support generation of this"
<< " file type!\n";
@@ -578,8 +579,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
}
if (MIR) {
- assert(MMI && "Forgot to create MMI?");
- if (MIR->parseMachineFunctions(*M, *MMI))
+ assert(MMIWP && "Forgot to create MMIWP?");
+ if (MIR->parseMachineFunctions(*M, MMIWP->getMMI()))
return 1;
}
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 8c8cd88c9711..ccad06721414 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -251,7 +251,7 @@ public:
sys::fs::create_directories(Twine(dir));
}
std::error_code EC;
- raw_fd_ostream outfile(CacheName, EC, sys::fs::F_None);
+ raw_fd_ostream outfile(CacheName, EC, sys::fs::OF_None);
outfile.write(Obj.getBufferStart(), Obj.getBufferSize());
outfile.close();
}
@@ -308,7 +308,7 @@ static void addCygMingExtraModule(ExecutionEngine &EE, LLVMContext &Context,
Triple TargetTriple(TargetTripleStr);
// Create a new module.
- std::unique_ptr<Module> M = make_unique<Module>("CygMingHelper", Context);
+ std::unique_ptr<Module> M = std::make_unique<Module>("CygMingHelper", Context);
M->setTargetTriple(TargetTripleStr);
// Create an empty function named "__main".
@@ -695,18 +695,16 @@ int main(int argc, char **argv, char * const *envp) {
return Result;
}
-static orc::IRTransformLayer::TransformFunction createDebugDumper() {
+static std::function<void(Module &)> createDebugDumper() {
switch (OrcDumpKind) {
case DumpKind::NoDump:
- return [](orc::ThreadSafeModule TSM,
- const orc::MaterializationResponsibility &R) { return TSM; };
+ return [](Module &M) {};
case DumpKind::DumpFuncsToStdOut:
- return [](orc::ThreadSafeModule TSM,
- const orc::MaterializationResponsibility &R) {
+ return [](Module &M) {
printf("[ ");
- for (const auto &F : *TSM.getModule()) {
+ for (const auto &F : M) {
if (F.isDeclaration())
continue;
@@ -718,31 +716,23 @@ static orc::IRTransformLayer::TransformFunction createDebugDumper() {
}
printf("]\n");
- return TSM;
};
case DumpKind::DumpModsToStdOut:
- return [](orc::ThreadSafeModule TSM,
- const orc::MaterializationResponsibility &R) {
- outs() << "----- Module Start -----\n"
- << *TSM.getModule() << "----- Module End -----\n";
-
- return TSM;
+ return [](Module &M) {
+ outs() << "----- Module Start -----\n" << M << "----- Module End -----\n";
};
case DumpKind::DumpModsToDisk:
- return [](orc::ThreadSafeModule TSM,
- const orc::MaterializationResponsibility &R) {
+ return [](Module &M) {
std::error_code EC;
- raw_fd_ostream Out(TSM.getModule()->getModuleIdentifier() + ".ll", EC,
- sys::fs::F_Text);
+ raw_fd_ostream Out(M.getModuleIdentifier() + ".ll", EC, sys::fs::OF_Text);
if (EC) {
- errs() << "Couldn't open " << TSM.getModule()->getModuleIdentifier()
+ errs() << "Couldn't open " << M.getModuleIdentifier()
<< " for dumping.\nError:" << EC.message() << "\n";
exit(1);
}
- Out << *TSM.getModule();
- return TSM;
+ Out << M;
};
}
llvm_unreachable("Unknown DumpKind");
@@ -754,14 +744,13 @@ int runOrcLazyJIT(const char *ProgName) {
// Start setting up the JIT environment.
// Parse the main module.
- orc::ThreadSafeContext TSCtx(llvm::make_unique<LLVMContext>());
+ orc::ThreadSafeContext TSCtx(std::make_unique<LLVMContext>());
SMDiagnostic Err;
- auto MainModule = orc::ThreadSafeModule(
- parseIRFile(InputFile, Err, *TSCtx.getContext()), TSCtx);
+ auto MainModule = parseIRFile(InputFile, Err, *TSCtx.getContext());
if (!MainModule)
reportError(Err, ProgName);
- const auto &TT = MainModule.getModule()->getTargetTriple();
+ const auto &TT = MainModule->getTargetTriple();
orc::LLLazyJITBuilder Builder;
Builder.setJITTargetMachineBuilder(
@@ -794,13 +783,16 @@ int runOrcLazyJIT(const char *ProgName) {
J->setLazyCompileTransform([&](orc::ThreadSafeModule TSM,
const orc::MaterializationResponsibility &R) {
- if (verifyModule(*TSM.getModule(), &dbgs())) {
- dbgs() << "Bad module: " << *TSM.getModule() << "\n";
- exit(1);
- }
- return Dump(std::move(TSM), R);
+ TSM.withModuleDo([&](Module &M) {
+ if (verifyModule(M, &dbgs())) {
+ dbgs() << "Bad module: " << &M << "\n";
+ exit(1);
+ }
+ Dump(M);
+ });
+ return TSM;
});
- J->getMainJITDylib().setGenerator(
+ J->getMainJITDylib().addGenerator(
ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
J->getDataLayout().getGlobalPrefix())));
@@ -809,7 +801,8 @@ int runOrcLazyJIT(const char *ProgName) {
ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle));
// Add the main module.
- ExitOnErr(J->addLazyIRModule(std::move(MainModule)));
+ ExitOnErr(
+ J->addLazyIRModule(orc::ThreadSafeModule(std::move(MainModule), TSCtx)));
// Create JITDylibs and add any extra modules.
{
@@ -839,6 +832,16 @@ int runOrcLazyJIT(const char *ProgName) {
ExitOnErr(
J->addLazyIRModule(JD, orc::ThreadSafeModule(std::move(M), TSCtx)));
}
+
+ for (auto EAItr = ExtraArchives.begin(), EAEnd = ExtraArchives.end();
+ EAItr != EAEnd; ++EAItr) {
+ auto EAIdx = ExtraArchives.getPosition(EAItr - ExtraArchives.begin());
+ assert(EAIdx != 0 && "ExtraArchive should have index > 0");
+ auto JDItr = std::prev(IdxToDylib.lower_bound(EAIdx));
+ auto &JD = *JDItr->second;
+ JD.addGenerator(ExitOnErr(orc::StaticLibraryDefinitionGenerator::Load(
+ J->getObjLinkingLayer(), EAItr->c_str())));
+ }
}
// Add the objects.
@@ -959,6 +962,6 @@ std::unique_ptr<FDRawChannel> launchRemote() {
close(PipeFD[1][1]);
// Return an RPC channel connected to our end of the pipes.
- return llvm::make_unique<FDRawChannel>(PipeFD[1][0], PipeFD[0][1]);
+ return std::make_unique<FDRawChannel>(PipeFD[1][0], PipeFD[0][1]);
#endif
}
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index 91746d0fab37..c9cf217f7688 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -43,6 +43,11 @@
#include <io.h>
#endif
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
using namespace llvm;
// The name this program was invoked as.
@@ -70,14 +75,14 @@ USAGE: llvm-ar [options] [-]<operation>[modifiers] [relpos] [count] <archive> [f
llvm-ar -M [<mri-script]
OPTIONS:
- --format - Archive format to create
+ --format - archive format to create
=default - default
=gnu - gnu
=darwin - darwin
=bsd - bsd
- --plugin=<string> - Ignored for compatibility
- --help - Display available options
- --version - Display the version of this program
+ --plugin=<string> - ignored for compatibility
+ -h --help - display this help and exit
+ --version - print the version and exit
@<file> - read options from <file>
OPERATIONS:
@@ -95,11 +100,13 @@ MODIFIERS:
[b] - put [files] before [relpos] (same as [i])
[c] - do not warn if archive had to be created
[D] - use zero for timestamps and uids/gids (default)
+ [h] - display this help and exit
[i] - put [files] before [relpos] (same as [b])
[l] - ignored for compatibility
[L] - add archive's contents
[N] - use instance [count] of name
[o] - preserve original dates
+ [O] - display member offsets
[P] - use full names when matching (implied for thin archives)
[s] - create an archive index (cf. ranlib)
[S] - do not build a symbol table
@@ -107,6 +114,7 @@ MODIFIERS:
[u] - update only [files] newer than archive contents
[U] - use actual timestamps and uids/gids
[v] - be verbose about actions taken
+ [V] - display the version and exit
)";
void printHelpMessage() {
@@ -116,10 +124,19 @@ void printHelpMessage() {
outs() << ArHelp;
}
+static unsigned MRILineNumber;
+static bool ParsingMRIScript;
+
// Show the error message and exit.
LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) {
- WithColor::error(errs(), ToolName) << Error << ".\n";
- printHelpMessage();
+ if (ParsingMRIScript) {
+ WithColor::error(errs(), ToolName)
+ << "script line " << MRILineNumber << ": " << Error << "\n";
+ } else {
+ WithColor::error(errs(), ToolName) << Error << "\n";
+ printHelpMessage();
+ }
+
exit(1);
}
@@ -171,17 +188,18 @@ enum ArchiveOperation {
};
// Modifiers to follow operation to vary behavior
-static bool AddAfter = false; ///< 'a' modifier
-static bool AddBefore = false; ///< 'b' modifier
-static bool Create = false; ///< 'c' modifier
-static bool OriginalDates = false; ///< 'o' modifier
-static bool CompareFullPath = false; ///< 'P' modifier
-static bool OnlyUpdate = false; ///< 'u' modifier
-static bool Verbose = false; ///< 'v' modifier
-static bool Symtab = true; ///< 's' modifier
-static bool Deterministic = true; ///< 'D' and 'U' modifiers
-static bool Thin = false; ///< 'T' modifier
-static bool AddLibrary = false; ///< 'L' modifier
+static bool AddAfter = false; ///< 'a' modifier
+static bool AddBefore = false; ///< 'b' modifier
+static bool Create = false; ///< 'c' modifier
+static bool OriginalDates = false; ///< 'o' modifier
+static bool DisplayMemberOffsets = false; ///< 'O' modifier
+static bool CompareFullPath = false; ///< 'P' modifier
+static bool OnlyUpdate = false; ///< 'u' modifier
+static bool Verbose = false; ///< 'v' modifier
+static bool Symtab = true; ///< 's' modifier
+static bool Deterministic = true; ///< 'D' and 'U' modifiers
+static bool Thin = false; ///< 'T' modifier
+static bool AddLibrary = false; ///< 'L' modifier
// Relative Positional Argument (for insert/move). This variable holds
// the name of the archive member to which the 'a', 'b' or 'i' modifier
@@ -198,6 +216,9 @@ static int CountParam = 0;
// command line.
static std::string ArchiveName;
+static std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
+static std::vector<std::unique_ptr<object::Archive>> Archives;
+
// This variable holds the list of member files to proecess, as given
// on the command line.
static std::vector<StringRef> Members;
@@ -209,7 +230,7 @@ static BumpPtrAllocator Alloc;
// associated with a, b, and i modifiers
static void getRelPos() {
if (PositionalArgs.empty())
- fail("Expected [relpos] for a, b, or i modifier");
+ fail("expected [relpos] for 'a', 'b', or 'i' modifier");
RelPos = PositionalArgs[0];
PositionalArgs.erase(PositionalArgs.begin());
}
@@ -218,40 +239,31 @@ static void getRelPos() {
// associated with the N modifier
static void getCountParam() {
if (PositionalArgs.empty())
- fail("Expected [count] for N modifier");
+ fail("expected [count] for 'N' modifier");
auto CountParamArg = StringRef(PositionalArgs[0]);
if (CountParamArg.getAsInteger(10, CountParam))
- fail("Value for [count] must be numeric, got: " + CountParamArg);
+ fail("value for [count] must be numeric, got: " + CountParamArg);
if (CountParam < 1)
- fail("Value for [count] must be positive, got: " + CountParamArg);
+ fail("value for [count] must be positive, got: " + CountParamArg);
PositionalArgs.erase(PositionalArgs.begin());
}
// Get the archive file name from the command line
static void getArchive() {
if (PositionalArgs.empty())
- fail("An archive name must be specified");
+ fail("an archive name must be specified");
ArchiveName = PositionalArgs[0];
PositionalArgs.erase(PositionalArgs.begin());
}
-// Copy over remaining items in PositionalArgs to our Members vector
-static void getMembers() {
- for (auto &Arg : PositionalArgs)
- Members.push_back(Arg);
-}
-
-std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
-std::vector<std::unique_ptr<object::Archive>> Archives;
-
static object::Archive &readLibrary(const Twine &Library) {
auto BufOrErr = MemoryBuffer::getFile(Library, -1, false);
- failIfError(BufOrErr.getError(), "Could not open library " + Library);
+ failIfError(BufOrErr.getError(), "could not open library " + Library);
ArchiveBuffers.push_back(std::move(*BufOrErr));
auto LibOrErr =
object::Archive::create(ArchiveBuffers.back()->getMemBufferRef());
failIfError(errorToErrorCode(LibOrErr.takeError()),
- "Could not parse library");
+ "could not parse library");
Archives.push_back(std::move(*LibOrErr));
return *Archives.back();
}
@@ -264,7 +276,7 @@ static void runMRIScript();
static ArchiveOperation parseCommandLine() {
if (MRI) {
if (!PositionalArgs.empty() || !Options.empty())
- fail("Cannot mix -M and other options");
+ fail("cannot mix -M and other options");
runMRIScript();
}
@@ -319,6 +331,9 @@ static ArchiveOperation parseCommandLine() {
case 'o':
OriginalDates = true;
break;
+ case 'O':
+ DisplayMemberOffsets = true;
+ break;
case 'P':
CompareFullPath = true;
break;
@@ -367,6 +382,12 @@ static ArchiveOperation parseCommandLine() {
case 'L':
AddLibrary = true;
break;
+ case 'V':
+ cl::PrintVersionMessage();
+ exit(0);
+ case 'h':
+ printHelpMessage();
+ exit(0);
default:
fail(std::string("unknown option ") + Options[i]);
}
@@ -377,37 +398,37 @@ static ArchiveOperation parseCommandLine() {
getArchive();
// Everything on the command line at this point is a member.
- getMembers();
+ Members.assign(PositionalArgs.begin(), PositionalArgs.end());
if (NumOperations == 0 && MaybeJustCreateSymTab) {
NumOperations = 1;
Operation = CreateSymTab;
if (!Members.empty())
- fail("The s operation takes only an archive as argument");
+ fail("the 's' operation takes only an archive as argument");
}
// Perform various checks on the operation/modifier specification
// to make sure we are dealing with a legal request.
if (NumOperations == 0)
- fail("You must specify at least one of the operations");
+ fail("you must specify at least one of the operations");
if (NumOperations > 1)
- fail("Only one operation may be specified");
+ fail("only one operation may be specified");
if (NumPositional > 1)
- fail("You may only specify one of a, b, and i modifiers");
+ fail("you may only specify one of 'a', 'b', and 'i' modifiers");
if (AddAfter || AddBefore)
if (Operation != Move && Operation != ReplaceOrInsert)
- fail("The 'a', 'b' and 'i' modifiers can only be specified with "
+ fail("the 'a', 'b' and 'i' modifiers can only be specified with "
"the 'm' or 'r' operations");
if (CountParam)
if (Operation != Extract && Operation != Delete)
- fail("The 'N' modifier can only be specified with the 'x' or 'd' "
+ fail("the 'N' modifier can only be specified with the 'x' or 'd' "
"operations");
if (OriginalDates && Operation != Extract)
- fail("The 'o' modifier is only applicable to the 'x' operation");
+ fail("the 'o' modifier is only applicable to the 'x' operation");
if (OnlyUpdate && Operation != ReplaceOrInsert)
- fail("The 'u' modifier is only applicable to the 'r' operation");
+ fail("the 'u' modifier is only applicable to the 'r' operation");
if (AddLibrary && Operation != QuickAppend)
- fail("The 'L' modifier is only applicable to the 'q' operation");
+ fail("the 'L' modifier is only applicable to the 'q' operation");
// Return the parsed operation to the caller
return Operation;
@@ -470,12 +491,35 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
if (!ParentDir.empty())
outs() << sys::path::convert_to_slash(ParentDir) << '/';
}
+ outs() << Name;
+ } else {
+ outs() << Name;
+ if (DisplayMemberOffsets)
+ outs() << " 0x" << utohexstr(C.getDataOffset(), true);
}
- outs() << Name << "\n";
+ outs() << '\n';
}
-static StringRef normalizePath(StringRef Path) {
- return CompareFullPath ? Path : sys::path::filename(Path);
+static std::string normalizePath(StringRef Path) {
+ return CompareFullPath ? sys::path::convert_to_slash(Path)
+ : std::string(sys::path::filename(Path));
+}
+
+static bool comparePaths(StringRef Path1, StringRef Path2) {
+// When on Windows this function calls CompareStringOrdinal
+// as Windows file paths are case-insensitive.
+// CompareStringOrdinal compares two Unicode strings for
+// binary equivalence and allows for case insensitivity.
+#ifdef _WIN32
+ SmallVector<wchar_t, 128> WPath1, WPath2;
+ failIfError(sys::path::widenPath(normalizePath(Path1), WPath1));
+ failIfError(sys::path::widenPath(normalizePath(Path2), WPath2));
+
+ return CompareStringOrdinal(WPath1.data(), WPath1.size(), WPath2.data(),
+ WPath2.size(), true) == CSTR_EQUAL;
+#else
+ return normalizePath(Path1) == normalizePath(Path2);
+#endif
}
// Implement the 'x' operation. This function extracts files back to the file
@@ -489,7 +533,7 @@ static void doExtract(StringRef Name, const object::Archive::Child &C) {
int FD;
failIfError(sys::fs::openFileForWrite(sys::path::filename(Name), FD,
sys::fs::CD_CreateAlways,
- sys::fs::F_None, Mode),
+ sys::fs::OF_None, Mode),
Name);
{
@@ -551,7 +595,7 @@ static void performReadOperation(ArchiveOperation Operation,
if (Filter) {
auto I = find_if(Members, [Name](StringRef Path) {
- return Name == normalizePath(Path);
+ return comparePaths(Name, Path);
});
if (I == Members.end())
continue;
@@ -588,7 +632,7 @@ static void addChildMember(std::vector<NewArchiveMember> &Members,
const object::Archive::Child &M,
bool FlattenArchive = false) {
if (Thin && !M.getParent()->isThin())
- fail("Cannot convert a regular archive to a thin one");
+ fail("cannot convert a regular archive to a thin one");
Expected<NewArchiveMember> NMOrErr =
NewArchiveMember::getOldMember(M, Deterministic);
failIfError(NMOrErr.takeError());
@@ -681,7 +725,7 @@ static InsertAction computeInsertAction(ArchiveOperation Operation,
if (Operation == QuickAppend || Members.empty())
return IA_AddOldMember;
auto MI = find_if(
- Members, [Name](StringRef Path) { return Name == normalizePath(Path); });
+ Members, [Name](StringRef Path) { return comparePaths(Name, Path); });
if (MI == Members.end())
return IA_AddOldMember;
@@ -698,9 +742,8 @@ static InsertAction computeInsertAction(ArchiveOperation Operation,
return IA_MoveOldMember;
if (Operation == ReplaceOrInsert) {
- StringRef PosName = normalizePath(RelPos);
if (!OnlyUpdate) {
- if (PosName.empty())
+ if (RelPos.empty())
return IA_AddNewMember;
return IA_MoveNewMember;
}
@@ -712,12 +755,12 @@ static InsertAction computeInsertAction(ArchiveOperation Operation,
auto ModTimeOrErr = Member.getLastModified();
failIfError(ModTimeOrErr.takeError());
if (Status.getLastModificationTime() < ModTimeOrErr.get()) {
- if (PosName.empty())
+ if (RelPos.empty())
return IA_AddOldMember;
return IA_MoveOldMember;
}
- if (PosName.empty())
+ if (RelPos.empty())
return IA_AddNewMember;
return IA_MoveNewMember;
}
@@ -732,7 +775,6 @@ computeNewArchiveMembers(ArchiveOperation Operation,
std::vector<NewArchiveMember> Ret;
std::vector<NewArchiveMember> Moved;
int InsertPos = -1;
- StringRef PosName = normalizePath(RelPos);
if (OldArchive) {
Error Err = Error::success();
StringMap<int> MemberCount;
@@ -740,8 +782,8 @@ computeNewArchiveMembers(ArchiveOperation Operation,
int Pos = Ret.size();
Expected<StringRef> NameOrErr = Child.getName();
failIfError(NameOrErr.takeError());
- StringRef Name = NameOrErr.get();
- if (Name == PosName) {
+ std::string Name = NameOrErr.get();
+ if (comparePaths(Name, RelPos)) {
assert(AddAfter || AddBefore);
if (AddBefore)
InsertPos = Pos;
@@ -783,7 +825,7 @@ computeNewArchiveMembers(ArchiveOperation Operation,
return Ret;
if (!RelPos.empty() && InsertPos == -1)
- fail("Insertion point not found");
+ fail("insertion point not found");
if (RelPos.empty())
InsertPos = Ret.size();
@@ -859,12 +901,12 @@ static void performWriteOperation(ArchiveOperation Operation,
break;
case BSD:
if (Thin)
- fail("Only the gnu format has a thin mode");
+ fail("only the gnu format has a thin mode");
Kind = object::Archive::K_BSD;
break;
case DARWIN:
if (Thin)
- fail("Only the gnu format has a thin mode");
+ fail("only the gnu format has a thin mode");
Kind = object::Archive::K_DARWIN;
break;
case Unknown:
@@ -922,14 +964,12 @@ static int performOperation(ArchiveOperation Operation,
MemoryBuffer::getFile(ArchiveName, -1, false);
std::error_code EC = Buf.getError();
if (EC && EC != errc::no_such_file_or_directory)
- fail("error opening '" + ArchiveName + "': " + EC.message() + "!");
+ fail("error opening '" + ArchiveName + "': " + EC.message());
if (!EC) {
Error Err = Error::success();
object::Archive Archive(Buf.get()->getMemBufferRef(), Err);
- EC = errorToErrorCode(std::move(Err));
- failIfError(EC,
- "error loading '" + ArchiveName + "': " + EC.message() + "!");
+ failIfError(std::move(Err), "unable to load '" + ArchiveName + "'");
if (Archive.isThin())
CompareFullPath = true;
performOperation(Operation, &Archive, std::move(Buf.get()), NewMembers);
@@ -960,8 +1000,10 @@ static void runMRIScript() {
const MemoryBuffer &Ref = *Buf.get();
bool Saved = false;
std::vector<NewArchiveMember> NewMembers;
+ ParsingMRIScript = true;
for (line_iterator I(Ref, /*SkipBlanks*/ false), E; I != E; ++I) {
+ ++MRILineNumber;
StringRef Line = *I;
Line = Line.split(';').first;
Line = Line.split('*').first;
@@ -1003,15 +1045,15 @@ static void runMRIScript() {
case MRICommand::Create:
Create = true;
if (!ArchiveName.empty())
- fail("Editing multiple archives not supported");
+ fail("editing multiple archives not supported");
if (Saved)
- fail("File already saved");
+ fail("file already saved");
ArchiveName = Rest;
break;
case MRICommand::Delete: {
- StringRef Name = normalizePath(Rest);
- llvm::erase_if(NewMembers,
- [=](NewArchiveMember &M) { return M.MemberName == Name; });
+ llvm::erase_if(NewMembers, [=](NewArchiveMember &M) {
+ return comparePaths(M.MemberName, Rest);
+ });
break;
}
case MRICommand::Save:
@@ -1020,10 +1062,12 @@ static void runMRIScript() {
case MRICommand::End:
break;
case MRICommand::Invalid:
- fail("Unknown command: " + CommandStr);
+ fail("unknown command: " + CommandStr);
}
}
-
+
+ ParsingMRIScript = false;
+
// Nothing to do if not saved.
if (Saved)
performOperation(ReplaceOrInsert, &NewMembers);
@@ -1108,7 +1152,7 @@ static int ranlib_main(int argc, char **argv) {
return 0;
} else {
if (ArchiveSpecified)
- fail("Exactly one archive should be specified");
+ fail("exactly one archive should be specified");
ArchiveSpecified = true;
ArchiveName = argv[i];
}
@@ -1136,5 +1180,5 @@ int main(int argc, char **argv) {
if (Stem.contains_lower("ar"))
return ar_main(argc, argv);
- fail("Not ranlib, ar, lib or dlltool!");
+ fail("not ranlib, ar, lib or dlltool");
}
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index 234fef907a38..c9f50e38fc61 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -82,7 +82,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
std::error_code EC;
std::unique_ptr<ToolOutputFile> Out(
- new ToolOutputFile(OutputFilename, EC, sys::fs::F_None));
+ new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None));
if (EC) {
errs() << EC.message() << '\n';
exit(1);
diff --git a/tools/llvm-cov/CodeCoverage.cpp b/tools/llvm-cov/CodeCoverage.cpp
index f707e3c7ab53..7151cfb032f3 100644
--- a/tools/llvm-cov/CodeCoverage.cpp
+++ b/tools/llvm-cov/CodeCoverage.cpp
@@ -712,15 +712,15 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
// Create the function filters
if (!NameFilters.empty() || NameWhitelist || !NameRegexFilters.empty()) {
- auto NameFilterer = llvm::make_unique<CoverageFilters>();
+ auto NameFilterer = std::make_unique<CoverageFilters>();
for (const auto &Name : NameFilters)
- NameFilterer->push_back(llvm::make_unique<NameCoverageFilter>(Name));
+ NameFilterer->push_back(std::make_unique<NameCoverageFilter>(Name));
if (NameWhitelist)
NameFilterer->push_back(
- llvm::make_unique<NameWhitelistCoverageFilter>(*NameWhitelist));
+ std::make_unique<NameWhitelistCoverageFilter>(*NameWhitelist));
for (const auto &Regex : NameRegexFilters)
NameFilterer->push_back(
- llvm::make_unique<NameRegexCoverageFilter>(Regex));
+ std::make_unique<NameRegexCoverageFilter>(Regex));
Filters.push_back(std::move(NameFilterer));
}
@@ -728,18 +728,18 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
RegionCoverageGtFilter.getNumOccurrences() ||
LineCoverageLtFilter.getNumOccurrences() ||
LineCoverageGtFilter.getNumOccurrences()) {
- auto StatFilterer = llvm::make_unique<CoverageFilters>();
+ auto StatFilterer = std::make_unique<CoverageFilters>();
if (RegionCoverageLtFilter.getNumOccurrences())
- StatFilterer->push_back(llvm::make_unique<RegionCoverageFilter>(
+ StatFilterer->push_back(std::make_unique<RegionCoverageFilter>(
RegionCoverageFilter::LessThan, RegionCoverageLtFilter));
if (RegionCoverageGtFilter.getNumOccurrences())
- StatFilterer->push_back(llvm::make_unique<RegionCoverageFilter>(
+ StatFilterer->push_back(std::make_unique<RegionCoverageFilter>(
RegionCoverageFilter::GreaterThan, RegionCoverageGtFilter));
if (LineCoverageLtFilter.getNumOccurrences())
- StatFilterer->push_back(llvm::make_unique<LineCoverageFilter>(
+ StatFilterer->push_back(std::make_unique<LineCoverageFilter>(
LineCoverageFilter::LessThan, LineCoverageLtFilter));
if (LineCoverageGtFilter.getNumOccurrences())
- StatFilterer->push_back(llvm::make_unique<LineCoverageFilter>(
+ StatFilterer->push_back(std::make_unique<LineCoverageFilter>(
RegionCoverageFilter::GreaterThan, LineCoverageGtFilter));
Filters.push_back(std::move(StatFilterer));
}
@@ -747,7 +747,7 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
// Create the ignore filename filters.
for (const auto &RE : IgnoreFilenameRegexFilters)
IgnoreFilenameFilters.push_back(
- llvm::make_unique<NameRegexCoverageFilter>(RE));
+ std::make_unique<NameRegexCoverageFilter>(RE));
if (!Arches.empty()) {
for (const std::string &Arch : Arches) {
@@ -1040,7 +1040,7 @@ int CodeCoverageTool::doExport(int argc, const char **argv,
switch (ViewOpts.Format) {
case CoverageViewOptions::OutputFormat::Text:
- Exporter = llvm::make_unique<CoverageExporterJson>(*Coverage.get(),
+ Exporter = std::make_unique<CoverageExporterJson>(*Coverage.get(),
ViewOpts, outs());
break;
case CoverageViewOptions::OutputFormat::HTML:
@@ -1048,7 +1048,7 @@ int CodeCoverageTool::doExport(int argc, const char **argv,
// above.
llvm_unreachable("Export in HTML is not supported!");
case CoverageViewOptions::OutputFormat::Lcov:
- Exporter = llvm::make_unique<CoverageExporterLcov>(*Coverage.get(),
+ Exporter = std::make_unique<CoverageExporterLcov>(*Coverage.get(),
ViewOpts, outs());
break;
}
diff --git a/tools/llvm-cov/SourceCoverageView.cpp b/tools/llvm-cov/SourceCoverageView.cpp
index 616f667e2c84..0e20ea63cd6f 100644
--- a/tools/llvm-cov/SourceCoverageView.cpp
+++ b/tools/llvm-cov/SourceCoverageView.cpp
@@ -76,9 +76,9 @@ std::unique_ptr<CoveragePrinter>
CoveragePrinter::create(const CoverageViewOptions &Opts) {
switch (Opts.Format) {
case CoverageViewOptions::OutputFormat::Text:
- return llvm::make_unique<CoveragePrinterText>(Opts);
+ return std::make_unique<CoveragePrinterText>(Opts);
case CoverageViewOptions::OutputFormat::HTML:
- return llvm::make_unique<CoveragePrinterHTML>(Opts);
+ return std::make_unique<CoveragePrinterHTML>(Opts);
case CoverageViewOptions::OutputFormat::Lcov:
// Unreachable because CodeCoverage.cpp should terminate with an error
// before we get here.
@@ -141,10 +141,10 @@ SourceCoverageView::create(StringRef SourceName, const MemoryBuffer &File,
CoverageData &&CoverageInfo) {
switch (Options.Format) {
case CoverageViewOptions::OutputFormat::Text:
- return llvm::make_unique<SourceCoverageViewText>(
+ return std::make_unique<SourceCoverageViewText>(
SourceName, File, Options, std::move(CoverageInfo));
case CoverageViewOptions::OutputFormat::HTML:
- return llvm::make_unique<SourceCoverageViewHTML>(
+ return std::make_unique<SourceCoverageViewHTML>(
SourceName, File, Options, std::move(CoverageInfo));
case CoverageViewOptions::OutputFormat::Lcov:
// Unreachable because CodeCoverage.cpp should terminate with an error
diff --git a/tools/llvm-cov/TestingSupport.cpp b/tools/llvm-cov/TestingSupport.cpp
index 3ee318c9c640..b99bd83157d0 100644
--- a/tools/llvm-cov/TestingSupport.cpp
+++ b/tools/llvm-cov/TestingSupport.cpp
@@ -8,6 +8,7 @@
#include "llvm/Object/ObjectFile.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/raw_ostream.h"
@@ -50,8 +51,13 @@ int convertForTestingMain(int argc, const char *argv[]) {
auto ObjFormat = OF->getTripleObjectFormat();
for (const auto &Section : OF->sections()) {
StringRef Name;
- if (Section.getName(Name))
+ if (Expected<StringRef> NameOrErr = Section.getName()) {
+ Name = *NameOrErr;
+ } else {
+ consumeError(NameOrErr.takeError());
return 1;
+ }
+
if (Name == llvm::getInstrProfSectionName(IPSK_name, ObjFormat,
/*AddSegmentInfo=*/false)) {
ProfileNames = Section;
@@ -94,7 +100,7 @@ int convertForTestingMain(int argc, const char *argv[]) {
encodeULEB128(ProfileNamesAddress, OS);
OS << ProfileNamesData;
// Coverage mapping data is expected to have an alignment of 8.
- for (unsigned Pad = OffsetToAlignment(OS.tell(), 8); Pad; --Pad)
+ for (unsigned Pad = offsetToAlignment(OS.tell(), Align(8)); Pad; --Pad)
OS.write(uint8_t(0));
OS << CoverageMappingData;
diff --git a/tools/llvm-cxxdump/llvm-cxxdump.cpp b/tools/llvm-cxxdump/llvm-cxxdump.cpp
index 833312655788..03e1bab9417e 100644
--- a/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -174,7 +174,11 @@ static void dumpCXXData(const ObjectFile *Obj) {
SectionRelocMap.clear();
for (const SectionRef &Section : Obj->sections()) {
- section_iterator Sec2 = Section.getRelocatedSection();
+ Expected<section_iterator> ErrOrSec = Section.getRelocatedSection();
+ if (!ErrOrSec)
+ error(ErrOrSec.takeError());
+
+ section_iterator Sec2 = *ErrOrSec;
if (Sec2 != Obj->section_end())
SectionRelocMap[*Sec2].push_back(Section);
}
diff --git a/tools/llvm-cxxmap/llvm-cxxmap.cpp b/tools/llvm-cxxmap/llvm-cxxmap.cpp
index 87d4d06bbc96..b53a6364c89e 100644
--- a/tools/llvm-cxxmap/llvm-cxxmap.cpp
+++ b/tools/llvm-cxxmap/llvm-cxxmap.cpp
@@ -145,7 +145,7 @@ int main(int argc, const char *argv[]) {
exitWithErrorCode(RemappingBufOrError.getError(), RemappingFile);
std::error_code EC;
- raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text);
+ raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_Text);
if (EC)
exitWithErrorCode(EC, OutputFilename);
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 3f337b874b16..d66299cbf767 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -153,7 +153,7 @@ int main(int argc, char **argv) {
LLVMContext Context;
Context.setDiagnosticHandler(
- llvm::make_unique<LLVMDisDiagnosticHandler>(argv[0]));
+ std::make_unique<LLVMDisDiagnosticHandler>(argv[0]));
cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n");
std::unique_ptr<MemoryBuffer> MB =
@@ -186,7 +186,7 @@ int main(int argc, char **argv) {
std::error_code EC;
std::unique_ptr<ToolOutputFile> Out(
- new ToolOutputFile(OutputFilename, EC, sys::fs::F_None));
+ new ToolOutputFile(OutputFilename, EC, sys::fs::OF_Text));
if (EC) {
errs() << EC.message() << '\n';
return 1;
diff --git a/tools/llvm-dwarfdump/Statistics.cpp b/tools/llvm-dwarfdump/Statistics.cpp
index f26369b935cb..c29ad783a9e6 100644
--- a/tools/llvm-dwarfdump/Statistics.cpp
+++ b/tools/llvm-dwarfdump/Statistics.cpp
@@ -5,11 +5,18 @@
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/JSON.h"
#define DEBUG_TYPE "dwarfdump"
using namespace llvm;
using namespace object;
+/// This represents the number of categories of debug location coverage being
+/// calculated. The first category is the number of variables with 0% location
+/// coverage, but the last category is the number of variables with 100%
+/// location coverage.
+constexpr int NumOfCoverageCategories = 12;
+
/// Holds statistics for one function (or other entity that has a PC range and
/// contains variables, such as a compile unit).
struct PerFunctionStats {
@@ -43,9 +50,9 @@ struct PerFunctionStats {
unsigned NumVars = 0;
/// Number of variables with source location.
unsigned NumVarSourceLocations = 0;
- /// Number of variables wtih type.
+ /// Number of variables with type.
unsigned NumVarTypes = 0;
- /// Number of variables wtih DW_AT_location.
+ /// Number of variables with DW_AT_location.
unsigned NumVarLocations = 0;
};
@@ -56,16 +63,74 @@ struct GlobalStats {
/// Total number of PC range bytes in each variable's enclosing scope,
/// starting from the first definition of the variable.
unsigned ScopeBytesFromFirstDefinition = 0;
- /// Total number of call site entries (DW_TAG_call_site) or
- /// (DW_AT_call_file & DW_AT_call_line).
+ /// Total number of PC range bytes covered by DW_AT_locations with
+ /// the debug entry values (DW_OP_entry_value).
+ unsigned ScopeEntryValueBytesCovered = 0;
+ /// Total number of PC range bytes covered by DW_AT_locations of
+ /// formal parameters.
+ unsigned ParamScopeBytesCovered = 0;
+ /// Total number of PC range bytes in each variable's enclosing scope,
+ /// starting from the first definition of the variable (only for parameters).
+ unsigned ParamScopeBytesFromFirstDefinition = 0;
+ /// Total number of PC range bytes covered by DW_AT_locations with
+ /// the debug entry values (DW_OP_entry_value) (only for parameters).
+ unsigned ParamScopeEntryValueBytesCovered = 0;
+ /// Total number of PC range bytes covered by DW_AT_locations (only for local
+ /// variables).
+ unsigned VarScopeBytesCovered = 0;
+ /// Total number of PC range bytes in each variable's enclosing scope,
+ /// starting from the first definition of the variable (only for local
+ /// variables).
+ unsigned VarScopeBytesFromFirstDefinition = 0;
+ /// Total number of PC range bytes covered by DW_AT_locations with
+ /// the debug entry values (DW_OP_entry_value) (only for local variables).
+ unsigned VarScopeEntryValueBytesCovered = 0;
+ /// Total number of call site entries (DW_AT_call_file & DW_AT_call_line).
unsigned CallSiteEntries = 0;
+ /// Total number of call site DIEs (DW_TAG_call_site).
+ unsigned CallSiteDIEs = 0;
+ /// Total number of call site parameter DIEs (DW_TAG_call_site_parameter).
+ unsigned CallSiteParamDIEs = 0;
/// Total byte size of concrete functions. This byte size includes
/// inline functions contained in the concrete functions.
- uint64_t FunctionSize = 0;
+ unsigned FunctionSize = 0;
/// Total byte size of inlined functions. This is the total number of bytes
/// for the top inline functions within concrete functions. This can help
/// tune the inline settings when compiling to match user expectations.
- uint64_t InlineFunctionSize = 0;
+ unsigned InlineFunctionSize = 0;
+};
+
+/// Holds accumulated debug location statistics about local variables and
+/// formal parameters.
+struct LocationStats {
+ /// Map the scope coverage decile to the number of variables in the decile.
+ /// The first element of the array (at the index zero) represents the number
+ /// of variables with the no debug location at all, but the last element
+ /// in the vector represents the number of fully covered variables within
+ /// its scope.
+ std::vector<unsigned> VarParamLocStats{
+ std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ /// Map non debug entry values coverage.
+ std::vector<unsigned> VarParamNonEntryValLocStats{
+ std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ /// The debug location statistics for formal parameters.
+ std::vector<unsigned> ParamLocStats{
+ std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ /// Map non debug entry values coverage for formal parameters.
+ std::vector<unsigned> ParamNonEntryValLocStats{
+ std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ /// The debug location statistics for local variables.
+ std::vector<unsigned> VarLocStats{
+ std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ /// Map non debug entry values coverage for local variables.
+ std::vector<unsigned> VarNonEntryValLocStats{
+ std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ /// Total number of local variables and function parameters processed.
+ unsigned NumVarParam = 0;
+ /// Total number of formal parameters processed.
+ unsigned NumParam = 0;
+ /// Total number of local variables processed.
+ unsigned NumVar = 0;
};
/// Extract the low pc from a Die.
@@ -81,27 +146,66 @@ static uint64_t getLowPC(DWARFDie Die) {
return dwarf::toAddress(Die.find(dwarf::DW_AT_low_pc), 0);
}
+/// Collect debug location statistics for one DIE.
+static void collectLocStats(uint64_t BytesCovered, uint64_t BytesInScope,
+ std::vector<unsigned> &VarParamLocStats,
+ std::vector<unsigned> &ParamLocStats,
+ std::vector<unsigned> &VarLocStats, bool IsParam,
+ bool IsLocalVar) {
+ auto getCoverageBucket = [BytesCovered, BytesInScope]() -> unsigned {
+ unsigned LocBucket = 100 * (double)BytesCovered / BytesInScope;
+ if (LocBucket == 0) {
+ // No debug location at all for the variable.
+ return 0;
+ } else if (LocBucket == 100 || BytesCovered > BytesInScope) {
+ // Fully covered variable within its scope.
+ return NumOfCoverageCategories - 1;
+ } else {
+ // Get covered range (e.g. 20%-29%).
+ LocBucket /= 10;
+ return LocBucket + 1;
+ }
+ };
+
+ unsigned CoverageBucket = getCoverageBucket();
+ VarParamLocStats[CoverageBucket]++;
+ if (IsParam)
+ ParamLocStats[CoverageBucket]++;
+ else if (IsLocalVar)
+ VarLocStats[CoverageBucket]++;
+}
+
/// Collect debug info quality metrics for one DIE.
-static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
+static void collectStatsForDie(DWARFDie Die, uint64_t UnitLowPC, std::string FnPrefix,
std::string VarPrefix, uint64_t ScopeLowPC,
uint64_t BytesInScope, uint32_t InlineDepth,
StringMap<PerFunctionStats> &FnStatMap,
- GlobalStats &GlobalStats) {
+ GlobalStats &GlobalStats,
+ LocationStats &LocStats) {
bool HasLoc = false;
bool HasSrcLoc = false;
bool HasType = false;
bool IsArtificial = false;
uint64_t BytesCovered = 0;
+ uint64_t BytesEntryValuesCovered = 0;
uint64_t OffsetToFirstDefinition = 0;
+ auto &FnStats = FnStatMap[FnPrefix];
+ bool IsParam = Die.getTag() == dwarf::DW_TAG_formal_parameter;
+ bool IsLocalVar = Die.getTag() == dwarf::DW_TAG_variable;
+
+ if (Die.getTag() == dwarf::DW_TAG_call_site ||
+ Die.getTag() == dwarf::DW_TAG_GNU_call_site) {
+ GlobalStats.CallSiteDIEs++;
+ return;
+ }
- if (Die.getTag() == dwarf::DW_TAG_call_site) {
- GlobalStats.CallSiteEntries++;
+ if (Die.getTag() == dwarf::DW_TAG_call_site_parameter ||
+ Die.getTag() == dwarf::DW_TAG_GNU_call_site_parameter) {
+ GlobalStats.CallSiteParamDIEs++;
return;
}
- if (Die.getTag() != dwarf::DW_TAG_formal_parameter &&
- Die.getTag() != dwarf::DW_TAG_variable &&
- Die.getTag() != dwarf::DW_TAG_member) {
+ if (!IsParam && !IsLocalVar && Die.getTag() != dwarf::DW_TAG_member) {
// Not a variable or constant member.
return;
}
@@ -116,6 +220,19 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
if (Die.find(dwarf::DW_AT_artificial))
IsArtificial = true;
+ auto IsEntryValue = [&](ArrayRef<uint8_t> D) -> bool {
+ DWARFUnit *U = Die.getDwarfUnit();
+ DataExtractor Data(toStringRef(D),
+ Die.getDwarfUnit()->getContext().isLittleEndian(), 0);
+ DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize());
+ // Consider the expression containing the DW_OP_entry_value as
+ // an entry value.
+ return llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
+ return Op.getCode() == dwarf::DW_OP_entry_value ||
+ Op.getCode() == dwarf::DW_OP_GNU_entry_value;
+ });
+ };
+
if (Die.find(dwarf::DW_AT_const_value)) {
// This catches constant members *and* variables.
HasLoc = true;
@@ -133,11 +250,15 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
if (auto DebugLocOffset = FormValue->getAsSectionOffset()) {
auto *DebugLoc = Die.getDwarfUnit()->getContext().getDebugLoc();
if (auto List = DebugLoc->getLocationListAtOffset(*DebugLocOffset)) {
- for (auto Entry : List->Entries)
- BytesCovered += Entry.End - Entry.Begin;
+ for (auto Entry : List->Entries) {
+ uint64_t BytesEntryCovered = Entry.End - Entry.Begin;
+ BytesCovered += BytesEntryCovered;
+ if (IsEntryValue(Entry.Loc))
+ BytesEntryValuesCovered += BytesEntryCovered;
+ }
if (List->Entries.size()) {
uint64_t FirstDef = List->Entries[0].Begin;
- uint64_t UnitOfs = getLowPC(Die.getDwarfUnit()->getUnitDIE());
+ uint64_t UnitOfs = UnitLowPC;
// Ranges sometimes start before the lexical scope.
if (UnitOfs + FirstDef >= ScopeLowPC)
OffsetToFirstDefinition = UnitOfs + FirstDef - ScopeLowPC;
@@ -154,8 +275,25 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
}
}
+ // Calculate the debug location statistics.
+ if (BytesInScope) {
+ LocStats.NumVarParam++;
+ if (IsParam)
+ LocStats.NumParam++;
+ else if (IsLocalVar)
+ LocStats.NumVar++;
+
+ collectLocStats(BytesCovered, BytesInScope, LocStats.VarParamLocStats,
+ LocStats.ParamLocStats, LocStats.VarLocStats, IsParam,
+ IsLocalVar);
+ // Non debug entry values coverage statistics.
+ collectLocStats(BytesCovered - BytesEntryValuesCovered, BytesInScope,
+ LocStats.VarParamNonEntryValLocStats,
+ LocStats.ParamNonEntryValLocStats,
+ LocStats.VarNonEntryValLocStats, IsParam, IsLocalVar);
+ }
+
// Collect PC range coverage data.
- auto &FnStats = FnStatMap[FnPrefix];
if (DWARFDie D =
Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin))
Die = D;
@@ -171,6 +309,17 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
// Turns out we have a lot of ranges that extend past the lexical scope.
GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered);
GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope;
+ GlobalStats.ScopeEntryValueBytesCovered += BytesEntryValuesCovered;
+ if (IsParam) {
+ GlobalStats.ParamScopeBytesCovered +=
+ std::min(BytesInScope, BytesCovered);
+ GlobalStats.ParamScopeBytesFromFirstDefinition += BytesInScope;
+ GlobalStats.ParamScopeEntryValueBytesCovered += BytesEntryValuesCovered;
+ } else if (IsLocalVar) {
+ GlobalStats.VarScopeBytesCovered += std::min(BytesInScope, BytesCovered);
+ GlobalStats.VarScopeBytesFromFirstDefinition += BytesInScope;
+ GlobalStats.VarScopeEntryValueBytesCovered += BytesEntryValuesCovered;
+ }
assert(GlobalStats.ScopeBytesCovered <=
GlobalStats.ScopeBytesFromFirstDefinition);
} else if (Die.getTag() == dwarf::DW_TAG_member) {
@@ -179,7 +328,7 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
FnStats.TotalVarWithLoc += (unsigned)HasLoc;
}
if (!IsArtificial) {
- if (Die.getTag() == dwarf::DW_TAG_formal_parameter) {
+ if (IsParam) {
FnStats.NumParams++;
if (HasType)
FnStats.NumParamTypes++;
@@ -187,7 +336,7 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
FnStats.NumParamSourceLocations++;
if (HasLoc)
FnStats.NumParamLocations++;
- } else if (Die.getTag() == dwarf::DW_TAG_variable) {
+ } else if (IsLocalVar) {
FnStats.NumVars++;
if (HasType)
FnStats.NumVarTypes++;
@@ -200,11 +349,12 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
}
/// Recursively collect debug info quality metrics.
-static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
+static void collectStatsRecursive(DWARFDie Die, uint64_t UnitLowPC, std::string FnPrefix,
std::string VarPrefix, uint64_t ScopeLowPC,
uint64_t BytesInScope, uint32_t InlineDepth,
StringMap<PerFunctionStats> &FnStatMap,
- GlobalStats &GlobalStats) {
+ GlobalStats &GlobalStats,
+ LocationStats &LocStats) {
// Handle any kind of lexical scope.
const dwarf::Tag Tag = Die.getTag();
const bool IsFunction = Tag == dwarf::DW_TAG_subprogram;
@@ -272,8 +422,8 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
}
} else {
// Not a scope, visit the Die itself. It could be a variable.
- collectStatsForDie(Die, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope,
- InlineDepth, FnStatMap, GlobalStats);
+ collectStatsForDie(Die, UnitLowPC, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope,
+ InlineDepth, FnStatMap, GlobalStats, LocStats);
}
// Set InlineDepth correctly for child recursion
@@ -290,8 +440,9 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
if (Child.getTag() == dwarf::DW_TAG_lexical_block)
ChildVarPrefix += toHex(LexicalBlockIndex++) + '.';
- collectStatsRecursive(Child, FnPrefix, ChildVarPrefix, ScopeLowPC,
- BytesInScope, InlineDepth, FnStatMap, GlobalStats);
+ collectStatsRecursive(Child, UnitLowPC, FnPrefix, ChildVarPrefix, ScopeLowPC,
+ BytesInScope, InlineDepth, FnStatMap, GlobalStats,
+ LocStats);
Child = Child.getSibling();
}
}
@@ -299,14 +450,33 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
/// Print machine-readable output.
/// The machine-readable format is single-line JSON output.
/// \{
-static void printDatum(raw_ostream &OS, const char *Key, StringRef Value) {
- OS << ",\"" << Key << "\":\"" << Value << '"';
- LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
-}
-static void printDatum(raw_ostream &OS, const char *Key, uint64_t Value) {
+static void printDatum(raw_ostream &OS, const char *Key, json::Value Value) {
OS << ",\"" << Key << "\":" << Value;
LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
}
+static void printLocationStats(raw_ostream &OS,
+ const char *Key,
+ std::vector<unsigned> &LocationStats) {
+ OS << ",\"" << Key << " with 0% of its scope covered\":"
+ << LocationStats[0];
+ LLVM_DEBUG(llvm::dbgs() << Key << " with 0% of its scope covered: "
+ << LocationStats[0] << '\n');
+ OS << ",\"" << Key << " with 1-9% of its scope covered\":"
+ << LocationStats[1];
+ LLVM_DEBUG(llvm::dbgs() << Key << " with 1-9% of its scope covered: "
+ << LocationStats[1] << '\n');
+ for (unsigned i = 2; i < NumOfCoverageCategories - 1; ++i) {
+ OS << ",\"" << Key << " with " << (i - 1) * 10 << "-" << i * 10 - 1
+ << "% of its scope covered\":" << LocationStats[i];
+ LLVM_DEBUG(llvm::dbgs()
+ << Key << " with " << (i - 1) * 10 << "-" << i * 10 - 1
+ << "% of its scope covered: " << LocationStats[i]);
+ }
+ OS << ",\"" << Key << " with 100% of its scope covered\":"
+ << LocationStats[NumOfCoverageCategories - 1];
+ LLVM_DEBUG(llvm::dbgs() << Key << " with 100% of its scope covered: "
+ << LocationStats[NumOfCoverageCategories - 1]);
+}
/// \}
/// Collect debug info quality metrics for an entire DIContext.
@@ -321,10 +491,12 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
Twine Filename, raw_ostream &OS) {
StringRef FormatName = Obj.getFileFormatName();
GlobalStats GlobalStats;
+ LocationStats LocStats;
StringMap<PerFunctionStats> Statistics;
for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units())
if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false))
- collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats);
+ collectStatsRecursive(CUDie, getLowPC(CUDie), "/", "g", 0, 0, 0,
+ Statistics, GlobalStats, LocStats);
/// The version number should be increased every time the algorithm is changed
/// (including bug fixes). New metrics may be added without increasing the
@@ -387,9 +559,24 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
printDatum(OS, "source variables", VarParamTotal);
printDatum(OS, "variables with location", VarParamWithLoc);
printDatum(OS, "call site entries", GlobalStats.CallSiteEntries);
+ printDatum(OS, "call site DIEs", GlobalStats.CallSiteDIEs);
+ printDatum(OS, "call site parameter DIEs", GlobalStats.CallSiteParamDIEs);
printDatum(OS, "scope bytes total",
GlobalStats.ScopeBytesFromFirstDefinition);
printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered);
+ printDatum(OS, "entry value scope bytes covered",
+ GlobalStats.ScopeEntryValueBytesCovered);
+ printDatum(OS, "formal params scope bytes total",
+ GlobalStats.ParamScopeBytesFromFirstDefinition);
+ printDatum(OS, "formal params scope bytes covered",
+ GlobalStats.ParamScopeBytesCovered);
+ printDatum(OS, "formal params entry value scope bytes covered",
+ GlobalStats.ParamScopeEntryValueBytesCovered);
+ printDatum(OS, "vars scope bytes total",
+ GlobalStats.VarScopeBytesFromFirstDefinition);
+ printDatum(OS, "vars scope bytes covered", GlobalStats.VarScopeBytesCovered);
+ printDatum(OS, "vars entry value scope bytes covered",
+ GlobalStats.VarScopeEntryValueBytesCovered);
printDatum(OS, "total function size", GlobalStats.FunctionSize);
printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize);
printDatum(OS, "total formal params", ParamTotal);
@@ -400,6 +587,20 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
printDatum(OS, "vars with source location", VarWithSrcLoc);
printDatum(OS, "vars with type", VarWithType);
printDatum(OS, "vars with binary location", VarWithLoc);
+ printDatum(OS, "total variables procesed by location statistics",
+ LocStats.NumVarParam);
+ printLocationStats(OS, "variables", LocStats.VarParamLocStats);
+ printLocationStats(OS, "variables (excluding the debug entry values)",
+ LocStats.VarParamNonEntryValLocStats);
+ printDatum(OS, "total params procesed by location statistics",
+ LocStats.NumParam);
+ printLocationStats(OS, "params", LocStats.ParamLocStats);
+ printLocationStats(OS, "params (excluding the debug entry values)",
+ LocStats.ParamNonEntryValLocStats);
+ printDatum(OS, "total vars procesed by location statistics", LocStats.NumVar);
+ printLocationStats(OS, "vars", LocStats.VarLocStats);
+ printLocationStats(OS, "vars (excluding the debug entry values)",
+ LocStats.VarNonEntryValLocStats);
OS << "}\n";
LLVM_DEBUG(
llvm::dbgs() << "Total Availability: "
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 05a7aef67ece..e20f6041f98d 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -584,7 +584,7 @@ int main(int argc, char **argv) {
}
std::error_code EC;
- ToolOutputFile OutputFile(OutputFilename, EC, sys::fs::OF_None);
+ ToolOutputFile OutputFile(OutputFilename, EC, sys::fs::OF_Text);
error("Unable to open output file" + OutputFilename, EC);
// Don't remove output file if we exit with an error.
OutputFile.keep();
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 300bc0b4bd52..dddc0d9baa08 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -74,8 +74,18 @@ static cl::list<std::string>
// ExtractBlocks - The blocks to extract from the module.
static cl::list<std::string> ExtractBlocks(
- "bb", cl::desc("Specify <function, basic block> pairs to extract"),
- cl::ZeroOrMore, cl::value_desc("function:bb"), cl::cat(ExtractCat));
+ "bb",
+ cl::desc(
+ "Specify <function, basic block1[;basic block2...]> pairs to extract.\n"
+ "Each pair will create a function.\n"
+ "If multiple basic blocks are specified in one pair,\n"
+ "the first block in the sequence should dominate the rest.\n"
+ "eg:\n"
+ " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n"
+ " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one "
+ "with bb2."),
+ cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"),
+ cl::cat(ExtractCat));
// ExtractAlias - The alias to extract from the module.
static cl::list<std::string>
@@ -350,7 +360,7 @@ int main(int argc, char **argv) {
Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls
std::error_code EC;
- ToolOutputFile Out(OutputFilename, EC, sys::fs::F_None);
+ ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None);
if (EC) {
errs() << EC.message() << '\n';
return 1;
diff --git a/tools/llvm-ifs/CMakeLists.txt b/tools/llvm-ifs/CMakeLists.txt
new file mode 100644
index 000000000000..544b0e41a5ed
--- /dev/null
+++ b/tools/llvm-ifs/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(LLVM_LINK_COMPONENTS
+ Object
+ Support
+ TextAPI
+ ObjectYAML
+ )
+
+add_llvm_tool(llvm-ifs
+ llvm-ifs.cpp
+ )
diff --git a/tools/llvm-ifs/LLVMBuild.txt b/tools/llvm-ifs/LLVMBuild.txt
new file mode 100644
index 000000000000..10dc6bd8f550
--- /dev/null
+++ b/tools/llvm-ifs/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./tools/llvm-ifs/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-ifs
+parent = Tools
+required_libraries = Object Support TextAPI
diff --git a/tools/llvm-ifs/llvm-ifs.cpp b/tools/llvm-ifs/llvm-ifs.cpp
new file mode 100644
index 000000000000..f329b4633632
--- /dev/null
+++ b/tools/llvm-ifs/llvm-ifs.cpp
@@ -0,0 +1,532 @@
+//===- llvm-ifs.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------------===/
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/VersionTuple.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TextAPI/MachO/InterfaceFile.h"
+#include "llvm/TextAPI/MachO/TextAPIReader.h"
+#include "llvm/TextAPI/MachO/TextAPIWriter.h"
+#include <set>
+#include <string>
+
+using namespace llvm;
+using namespace llvm::yaml;
+using namespace llvm::MachO;
+
+#define DEBUG_TYPE "llvm-ifs"
+
+namespace {
+const VersionTuple IFSVersionCurrent(1, 2);
+}
+
+static cl::opt<std::string> Action("action", cl::desc("<llvm-ifs action>"),
+ cl::value_desc("write-ifs | write-bin"),
+ cl::init("write-ifs"));
+
+static cl::opt<std::string> ForceFormat("force-format",
+ cl::desc("<force object format>"),
+ cl::value_desc("ELF | TBD"),
+ cl::init(""));
+
+static cl::list<std::string> InputFilenames(cl::Positional,
+ cl::desc("<input ifs files>"),
+ cl::ZeroOrMore);
+
+static cl::opt<std::string> OutputFilename("o", cl::desc("<output file>"),
+ cl::value_desc("path"));
+
+enum class IFSSymbolType {
+ NoType = 0,
+ Object,
+ Func,
+ // Type information is 4 bits, so 16 is safely out of range.
+ Unknown = 16,
+};
+
+std::string getTypeName(IFSSymbolType Type) {
+ switch (Type) {
+ case IFSSymbolType::NoType:
+ return "NoType";
+ case IFSSymbolType::Func:
+ return "Func";
+ case IFSSymbolType::Object:
+ return "Object";
+ case IFSSymbolType::Unknown:
+ return "Unknown";
+ }
+ llvm_unreachable("Unexpected ifs symbol type.");
+}
+
+struct IFSSymbol {
+ IFSSymbol(std::string SymbolName) : Name(SymbolName) {}
+ std::string Name;
+ uint64_t Size;
+ IFSSymbolType Type;
+ bool Weak;
+ Optional<std::string> Warning;
+ bool operator<(const IFSSymbol &RHS) const { return Name < RHS.Name; }
+};
+
+namespace llvm {
+namespace yaml {
+/// YAML traits for IFSSymbolType.
+template <> struct ScalarEnumerationTraits<IFSSymbolType> {
+ static void enumeration(IO &IO, IFSSymbolType &SymbolType) {
+ IO.enumCase(SymbolType, "NoType", IFSSymbolType::NoType);
+ IO.enumCase(SymbolType, "Func", IFSSymbolType::Func);
+ IO.enumCase(SymbolType, "Object", IFSSymbolType::Object);
+ IO.enumCase(SymbolType, "Unknown", IFSSymbolType::Unknown);
+ // Treat other symbol types as noise, and map to Unknown.
+ if (!IO.outputting() && IO.matchEnumFallback())
+ SymbolType = IFSSymbolType::Unknown;
+ }
+};
+
+template <> struct ScalarTraits<VersionTuple> {
+ static void output(const VersionTuple &Value, void *,
+ llvm::raw_ostream &Out) {
+ Out << Value.getAsString();
+ }
+
+ static StringRef input(StringRef Scalar, void *, VersionTuple &Value) {
+ if (Value.tryParse(Scalar))
+ return StringRef("Can't parse version: invalid version format.");
+
+ if (Value > IFSVersionCurrent)
+ return StringRef("Unsupported IFS version.");
+
+ // Returning empty StringRef indicates successful parse.
+ return StringRef();
+ }
+
+ // Don't place quotation marks around version value.
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
+/// YAML traits for IFSSymbol.
+template <> struct MappingTraits<IFSSymbol> {
+ static void mapping(IO &IO, IFSSymbol &Symbol) {
+ IO.mapRequired("Type", Symbol.Type);
+ // The need for symbol size depends on the symbol type.
+ if (Symbol.Type == IFSSymbolType::NoType)
+ IO.mapOptional("Size", Symbol.Size, (uint64_t)0);
+ else if (Symbol.Type == IFSSymbolType::Func)
+ Symbol.Size = 0;
+ else
+ IO.mapRequired("Size", Symbol.Size);
+ IO.mapOptional("Weak", Symbol.Weak, false);
+ IO.mapOptional("Warning", Symbol.Warning);
+ }
+
+ // Compacts symbol information into a single line.
+ static const bool flow = true;
+};
+
+/// YAML traits for set of IFSSymbols.
+template <> struct CustomMappingTraits<std::set<IFSSymbol>> {
+ static void inputOne(IO &IO, StringRef Key, std::set<IFSSymbol> &Set) {
+ std::string Name = Key.str();
+ IFSSymbol Sym(Name);
+ IO.mapRequired(Name.c_str(), Sym);
+ Set.insert(Sym);
+ }
+
+ static void output(IO &IO, std::set<IFSSymbol> &Set) {
+ for (auto &Sym : Set)
+ IO.mapRequired(Sym.Name.c_str(), const_cast<IFSSymbol &>(Sym));
+ }
+};
+} // namespace yaml
+} // namespace llvm
+
+// A cumulative representation of ELF stubs.
+// Both textual and binary stubs will read into and write from this object.
+class IFSStub {
+ // TODO: Add support for symbol versioning.
+public:
+ VersionTuple IfsVersion;
+ std::string Triple;
+ std::string ObjectFileFormat;
+ Optional<std::string> SOName;
+ std::vector<std::string> NeededLibs;
+ std::set<IFSSymbol> Symbols;
+
+ IFSStub() = default;
+ IFSStub(const IFSStub &Stub)
+ : IfsVersion(Stub.IfsVersion), Triple(Stub.Triple),
+ ObjectFileFormat(Stub.ObjectFileFormat), SOName(Stub.SOName),
+ NeededLibs(Stub.NeededLibs), Symbols(Stub.Symbols) {}
+ IFSStub(IFSStub &&Stub)
+ : IfsVersion(std::move(Stub.IfsVersion)), Triple(std::move(Stub.Triple)),
+ ObjectFileFormat(std::move(Stub.ObjectFileFormat)),
+ SOName(std::move(Stub.SOName)), NeededLibs(std::move(Stub.NeededLibs)),
+ Symbols(std::move(Stub.Symbols)) {}
+};
+
+namespace llvm {
+namespace yaml {
+/// YAML traits for IFSStub objects.
+template <> struct MappingTraits<IFSStub> {
+ static void mapping(IO &IO, IFSStub &Stub) {
+ if (!IO.mapTag("!experimental-ifs-v1", true))
+ IO.setError("Not a .ifs YAML file.");
+ IO.mapRequired("IfsVersion", Stub.IfsVersion);
+ IO.mapOptional("Triple", Stub.Triple);
+ IO.mapOptional("ObjectFileFormat", Stub.ObjectFileFormat);
+ IO.mapOptional("SOName", Stub.SOName);
+ IO.mapOptional("NeededLibs", Stub.NeededLibs);
+ IO.mapRequired("Symbols", Stub.Symbols);
+ }
+};
+} // namespace yaml
+} // namespace llvm
+
+static Expected<std::unique_ptr<IFSStub>> readInputFile(StringRef FilePath) {
+ // Read in file.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrError =
+ MemoryBuffer::getFileOrSTDIN(FilePath);
+ if (!BufOrError)
+ return createStringError(BufOrError.getError(), "Could not open `%s`",
+ FilePath.data());
+
+ std::unique_ptr<MemoryBuffer> FileReadBuffer = std::move(*BufOrError);
+ yaml::Input YamlIn(FileReadBuffer->getBuffer());
+ std::unique_ptr<IFSStub> Stub(new IFSStub());
+ YamlIn >> *Stub;
+
+ if (std::error_code Err = YamlIn.error())
+ return createStringError(Err, "Failed reading Interface Stub File.");
+
+ return std::move(Stub);
+}
+
+int writeTbdStub(const llvm::Triple &T, const std::set<IFSSymbol> &Symbols,
+ const StringRef Format, raw_ostream &Out) {
+
+ auto PlatformKindOrError =
+ [](const llvm::Triple &T) -> llvm::Expected<llvm::MachO::PlatformKind> {
+ if (T.isMacOSX())
+ return llvm::MachO::PlatformKind::macOS;
+ if (T.isTvOS())
+ return llvm::MachO::PlatformKind::tvOS;
+ if (T.isWatchOS())
+ return llvm::MachO::PlatformKind::watchOS;
+ // Note: put isiOS last because tvOS and watchOS are also iOS according
+ // to the Triple.
+ if (T.isiOS())
+ return llvm::MachO::PlatformKind::iOS;
+
+ // TODO: Add an option for ForceTriple, but keep ForceFormat for now.
+ if (ForceFormat == "TBD")
+ return llvm::MachO::PlatformKind::macOS;
+
+ return createStringError(errc::not_supported, "Invalid Platform.\n");
+ }(T);
+
+ if (!PlatformKindOrError)
+ return -1;
+
+ PlatformKind Plat = PlatformKindOrError.get();
+ TargetList Targets({Target(llvm::MachO::mapToArchitecture(T), Plat)});
+
+ InterfaceFile File;
+ File.setFileType(FileType::TBD_V3); // Only supporting v3 for now.
+ File.addTargets(Targets);
+
+ for (const auto &Symbol : Symbols) {
+ auto Name = Symbol.Name;
+ auto Kind = SymbolKind::GlobalSymbol;
+ switch (Symbol.Type) {
+ default:
+ case IFSSymbolType::NoType:
+ Kind = SymbolKind::GlobalSymbol;
+ break;
+ case IFSSymbolType::Object:
+ Kind = SymbolKind::GlobalSymbol;
+ break;
+ case IFSSymbolType::Func:
+ Kind = SymbolKind::GlobalSymbol;
+ break;
+ }
+ if (Symbol.Weak)
+ File.addSymbol(Kind, Name, Targets, SymbolFlags::WeakDefined);
+ else
+ File.addSymbol(Kind, Name, Targets);
+ }
+
+ SmallString<4096> Buffer;
+ raw_svector_ostream OS(Buffer);
+ if (Error Result = TextAPIWriter::writeToStream(OS, File))
+ return -1;
+ Out << OS.str();
+ return 0;
+}
+
+int writeElfStub(const llvm::Triple &T, const std::set<IFSSymbol> &Symbols,
+ const StringRef Format, raw_ostream &Out) {
+ SmallString<0> Storage;
+ Storage.clear();
+ raw_svector_ostream OS(Storage);
+
+ OS << "--- !ELF\n";
+ OS << "FileHeader:\n";
+ OS << " Class: ELFCLASS";
+ OS << (T.isArch64Bit() ? "64" : "32");
+ OS << "\n";
+ OS << " Data: ELFDATA2";
+ OS << (T.isLittleEndian() ? "LSB" : "MSB");
+ OS << "\n";
+ OS << " Type: ET_DYN\n";
+ OS << " Machine: "
+ << llvm::StringSwitch<llvm::StringRef>(T.getArchName())
+ .Case("x86_64", "EM_X86_64")
+ .Case("i386", "EM_386")
+ .Case("i686", "EM_386")
+ .Case("aarch64", "EM_AARCH64")
+ .Case("amdgcn", "EM_AMDGPU")
+ .Case("r600", "EM_AMDGPU")
+ .Case("arm", "EM_ARM")
+ .Case("thumb", "EM_ARM")
+ .Case("avr", "EM_AVR")
+ .Case("mips", "EM_MIPS")
+ .Case("mipsel", "EM_MIPS")
+ .Case("mips64", "EM_MIPS")
+ .Case("mips64el", "EM_MIPS")
+ .Case("msp430", "EM_MSP430")
+ .Case("ppc", "EM_PPC")
+ .Case("ppc64", "EM_PPC64")
+ .Case("ppc64le", "EM_PPC64")
+ .Case("x86", T.isOSIAMCU() ? "EM_IAMCU" : "EM_386")
+ .Case("x86_64", "EM_X86_64")
+ .Default("EM_NONE")
+ << "\nSections:"
+ << "\n - Name: .text"
+ << "\n Type: SHT_PROGBITS"
+ << "\n - Name: .data"
+ << "\n Type: SHT_PROGBITS"
+ << "\n - Name: .rodata"
+ << "\n Type: SHT_PROGBITS"
+ << "\nSymbols:\n";
+ for (const auto &Symbol : Symbols) {
+ OS << " - Name: " << Symbol.Name << "\n"
+ << " Type: STT_";
+ switch (Symbol.Type) {
+ default:
+ case IFSSymbolType::NoType:
+ OS << "NOTYPE";
+ break;
+ case IFSSymbolType::Object:
+ OS << "OBJECT";
+ break;
+ case IFSSymbolType::Func:
+ OS << "FUNC";
+ break;
+ }
+ OS << "\n Section: .text"
+ << "\n Binding: STB_" << (Symbol.Weak ? "WEAK" : "GLOBAL")
+ << "\n";
+ }
+ OS << "...\n";
+
+ std::string YamlStr = OS.str();
+
+ // Only or debugging. Not an offical format.
+ LLVM_DEBUG({
+ if (ForceFormat == "ELFOBJYAML") {
+ Out << YamlStr;
+ return 0;
+ }
+ });
+
+ yaml::Input YIn(YamlStr);
+ auto ErrHandler = [](const Twine &Msg) {
+ WithColor::error(errs(), "llvm-ifs") << Msg << "\n";
+ };
+ return convertYAML(YIn, Out, ErrHandler) ? 0 : 1;
+}
+
+int writeIfso(const IFSStub &Stub, bool IsWriteIfs, raw_ostream &Out) {
+ if (IsWriteIfs) {
+ yaml::Output YamlOut(Out, NULL, /*WrapColumn =*/0);
+ YamlOut << const_cast<IFSStub &>(Stub);
+ return 0;
+ }
+
+ std::string ObjectFileFormat =
+ ForceFormat.empty() ? Stub.ObjectFileFormat : ForceFormat;
+
+ if (ObjectFileFormat == "ELF" || ForceFormat == "ELFOBJYAML")
+ return writeElfStub(llvm::Triple(Stub.Triple), Stub.Symbols,
+ Stub.ObjectFileFormat, Out);
+ if (ObjectFileFormat == "TBD")
+ return writeTbdStub(llvm::Triple(Stub.Triple), Stub.Symbols,
+ Stub.ObjectFileFormat, Out);
+
+ WithColor::error()
+ << "Invalid ObjectFileFormat: Only ELF and TBD are supported.\n";
+ return -1;
+}
+
+// New Interface Stubs Yaml Format:
+// --- !experimental-ifs-v1
+// IfsVersion: 1.0
+// Triple: <llvm triple>
+// ObjectFileFormat: <ELF | others not yet supported>
+// Symbols:
+// _ZSymbolName: { Type: <type> }
+// ...
+
+int main(int argc, char *argv[]) {
+ // Parse arguments.
+ cl::ParseCommandLineOptions(argc, argv);
+
+ if (InputFilenames.empty())
+ InputFilenames.push_back("-");
+
+ IFSStub Stub;
+ std::map<std::string, IFSSymbol> SymbolMap;
+
+ std::string PreviousInputFilePath = "";
+ for (const std::string &InputFilePath : InputFilenames) {
+ Expected<std::unique_ptr<IFSStub>> StubOrErr = readInputFile(InputFilePath);
+ if (!StubOrErr) {
+ WithColor::error() << StubOrErr.takeError() << "\n";
+ return -1;
+ }
+ std::unique_ptr<IFSStub> TargetStub = std::move(StubOrErr.get());
+
+ if (Stub.Triple.empty()) {
+ PreviousInputFilePath = InputFilePath;
+ Stub.IfsVersion = TargetStub->IfsVersion;
+ Stub.Triple = TargetStub->Triple;
+ Stub.ObjectFileFormat = TargetStub->ObjectFileFormat;
+ Stub.SOName = TargetStub->SOName;
+ Stub.NeededLibs = TargetStub->NeededLibs;
+ } else {
+ if (Stub.IfsVersion != TargetStub->IfsVersion) {
+ if (Stub.IfsVersion.getMajor() != IFSVersionCurrent.getMajor()) {
+ WithColor::error()
+ << "Interface Stub: IfsVersion Mismatch."
+ << "\nFilenames: " << PreviousInputFilePath << " "
+ << InputFilePath << "\nIfsVersion Values: " << Stub.IfsVersion
+ << " " << TargetStub->IfsVersion << "\n";
+ return -1;
+ }
+ if (TargetStub->IfsVersion > Stub.IfsVersion)
+ Stub.IfsVersion = TargetStub->IfsVersion;
+ }
+ if (Stub.ObjectFileFormat != TargetStub->ObjectFileFormat) {
+ WithColor::error() << "Interface Stub: ObjectFileFormat Mismatch."
+ << "\nFilenames: " << PreviousInputFilePath << " "
+ << InputFilePath << "\nObjectFileFormat Values: "
+ << Stub.ObjectFileFormat << " "
+ << TargetStub->ObjectFileFormat << "\n";
+ return -1;
+ }
+ if (Stub.Triple != TargetStub->Triple) {
+ WithColor::error() << "Interface Stub: Triple Mismatch."
+ << "\nFilenames: " << PreviousInputFilePath << " "
+ << InputFilePath
+ << "\nTriple Values: " << Stub.Triple << " "
+ << TargetStub->Triple << "\n";
+ return -1;
+ }
+ if (Stub.SOName != TargetStub->SOName) {
+ WithColor::error() << "Interface Stub: SOName Mismatch."
+ << "\nFilenames: " << PreviousInputFilePath << " "
+ << InputFilePath
+ << "\nSOName Values: " << Stub.SOName << " "
+ << TargetStub->SOName << "\n";
+ return -1;
+ }
+ if (Stub.NeededLibs != TargetStub->NeededLibs) {
+ WithColor::error() << "Interface Stub: NeededLibs Mismatch."
+ << "\nFilenames: " << PreviousInputFilePath << " "
+ << InputFilePath << "\n";
+ return -1;
+ }
+ }
+
+ for (auto Symbol : TargetStub->Symbols) {
+ auto SI = SymbolMap.find(Symbol.Name);
+ if (SI == SymbolMap.end()) {
+ SymbolMap.insert(
+ std::pair<std::string, IFSSymbol>(Symbol.Name, Symbol));
+ continue;
+ }
+
+ assert(Symbol.Name == SI->second.Name && "Symbol Names Must Match.");
+
+ // Check conflicts:
+ if (Symbol.Type != SI->second.Type) {
+ WithColor::error() << "Interface Stub: Type Mismatch for "
+ << Symbol.Name << ".\nFilename: " << InputFilePath
+ << "\nType Values: " << getTypeName(SI->second.Type)
+ << " " << getTypeName(Symbol.Type) << "\n";
+
+ return -1;
+ }
+ if (Symbol.Size != SI->second.Size) {
+ WithColor::error() << "Interface Stub: Size Mismatch for "
+ << Symbol.Name << ".\nFilename: " << InputFilePath
+ << "\nSize Values: " << SI->second.Size << " "
+ << Symbol.Size << "\n";
+
+ return -1;
+ }
+ if (Symbol.Weak != SI->second.Weak) {
+ // TODO: Add conflict resolution for Weak vs non-Weak.
+ WithColor::error() << "Interface Stub: Weak Mismatch for "
+ << Symbol.Name << ".\nFilename: " << InputFilePath
+ << "\nWeak Values: " << SI->second.Weak << " "
+ << Symbol.Weak << "\n";
+
+ return -1;
+ }
+ // TODO: Not checking Warning. Will be dropped.
+ }
+
+ PreviousInputFilePath = InputFilePath;
+ }
+
+ if (Stub.IfsVersion != IFSVersionCurrent)
+ if (Stub.IfsVersion.getMajor() != IFSVersionCurrent.getMajor()) {
+ WithColor::error() << "Interface Stub: Bad IfsVersion: "
+ << Stub.IfsVersion << ", llvm-ifs supported version: "
+ << IFSVersionCurrent << ".\n";
+ return -1;
+ }
+
+ for (auto &Entry : SymbolMap)
+ Stub.Symbols.insert(Entry.second);
+
+ std::error_code SysErr;
+
+ // Open file for writing.
+ raw_fd_ostream Out(OutputFilename, SysErr);
+ if (SysErr) {
+ WithColor::error() << "Couldn't open " << OutputFilename
+ << " for writing.\n";
+ return -1;
+ }
+
+ return writeIfso(Stub, (Action == "write-ifs"), Out);
+}
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index 50ba57178d02..fa36e083b6f8 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -351,13 +351,13 @@ int main(int argc, char **argv) {
LLVMContext Context;
Context.setDiagnosticHandler(
- llvm::make_unique<LLVMLinkDiagnosticHandler>(), true);
+ std::make_unique<LLVMLinkDiagnosticHandler>(), true);
cl::ParseCommandLineOptions(argc, argv, "llvm linker\n");
if (!DisableDITypeMap)
Context.enableDebugTypeODRUniquing();
- auto Composite = make_unique<Module>("llvm-link", Context);
+ auto Composite = std::make_unique<Module>("llvm-link", Context);
Linker L(*Composite);
unsigned Flags = Linker::Flags::None;
@@ -381,7 +381,7 @@ int main(int argc, char **argv) {
errs() << "Here's the assembly:\n" << *Composite;
std::error_code EC;
- ToolOutputFile Out(OutputFilename, EC, sys::fs::F_None);
+ ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None);
if (EC) {
WithColor::error() << EC.message() << '\n';
return 1;
diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp
index 585207b25185..b47e68e82850 100644
--- a/tools/llvm-lto/llvm-lto.cpp
+++ b/tools/llvm-lto/llvm-lto.cpp
@@ -315,8 +315,8 @@ getLocalLTOModule(StringRef Path, std::unique_ptr<MemoryBuffer> &Buffer,
error(BufferOrErr, "error loading file '" + Path + "'");
Buffer = std::move(BufferOrErr.get());
CurrentActivity = ("loading file '" + Path + "'").str();
- std::unique_ptr<LLVMContext> Context = llvm::make_unique<LLVMContext>();
- Context->setDiagnosticHandler(llvm::make_unique<LLVMLTODiagnosticHandler>(),
+ std::unique_ptr<LLVMContext> Context = std::make_unique<LLVMContext>();
+ Context->setDiagnosticHandler(std::make_unique<LLVMLTODiagnosticHandler>(),
true);
ErrorOr<std::unique_ptr<LTOModule>> Ret = LTOModule::createInLocalContext(
std::move(Context), Buffer->getBufferStart(), Buffer->getBufferSize(),
@@ -420,7 +420,7 @@ static void createCombinedModuleSummaryIndex() {
std::error_code EC;
assert(!OutputFilename.empty());
raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC,
- sys::fs::OpenFlags::F_None);
+ sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + OutputFilename + ".thinlto.bc'");
WriteIndexToFile(CombinedIndex, OS);
OS.close();
@@ -510,7 +510,7 @@ static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile &File,
static void writeModuleToFile(Module &TheModule, StringRef Filename) {
std::error_code EC;
- raw_fd_ostream OS(Filename, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream OS(Filename, EC, sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + Filename + "'");
maybeVerifyModule(TheModule);
WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true);
@@ -581,7 +581,7 @@ private:
if (!CombinedIndex)
report_fatal_error("ThinLink didn't create an index");
std::error_code EC;
- raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + OutputFilename + "'");
WriteIndexToFile(*CombinedIndex, OS);
}
@@ -619,7 +619,7 @@ private:
}
OutputName = getThinLTOOutputFile(OutputName, OldPrefix, NewPrefix);
std::error_code EC;
- raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + OutputName + "'");
WriteIndexToFile(*Index, OS, &ModuleToSummariesForIndex);
}
@@ -802,7 +802,7 @@ private:
}
std::error_code EC;
- raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + OutputName + "'");
OS << std::get<0>(BinName)->getBuffer();
}
@@ -848,7 +848,7 @@ private:
for (unsigned BufID = 0; BufID < Binaries.size(); ++BufID) {
auto OutputName = InputFilenames[BufID] + ".thinlto.o";
std::error_code EC;
- raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + OutputName + "'");
OS << Binaries[BufID]->getBuffer();
}
@@ -921,7 +921,7 @@ int main(int argc, char **argv) {
unsigned BaseArg = 0;
LLVMContext Context;
- Context.setDiagnosticHandler(llvm::make_unique<LLVMLTODiagnosticHandler>(),
+ Context.setDiagnosticHandler(std::make_unique<LLVMLTODiagnosticHandler>(),
true);
LTOCodeGenerator CodeGen(Context);
@@ -1020,7 +1020,7 @@ int main(int argc, char **argv) {
if (Parallelism != 1)
PartFilename += "." + utostr(I);
std::error_code EC;
- OSs.emplace_back(PartFilename, EC, sys::fs::F_None);
+ OSs.emplace_back(PartFilename, EC, sys::fs::OF_None);
if (EC)
error("error opening the file '" + PartFilename + "': " + EC.message());
OSPtrs.push_back(&OSs.back().os());
diff --git a/tools/llvm-lto2/llvm-lto2.cpp b/tools/llvm-lto2/llvm-lto2.cpp
index 0bd9289dc938..5e3b3dcb6c31 100644
--- a/tools/llvm-lto2/llvm-lto2.cpp
+++ b/tools/llvm-lto2/llvm-lto2.cpp
@@ -291,6 +291,14 @@ static int run(int argc, char **argv) {
std::vector<SymbolResolution> Res;
for (const InputFile::Symbol &Sym : Input->symbols()) {
auto I = CommandLineResolutions.find({F, Sym.getName()});
+ // If it isn't found, look for "$", which would have been added
+ // (followed by a hash) when the symbol was promoted during module
+ // splitting if it was defined in one part and used in the other.
+ // Try looking up the symbol name before the "$".
+ if (I == CommandLineResolutions.end()) {
+ auto SplitName = Sym.getName().rsplit("$");
+ I = CommandLineResolutions.find({F, SplitName.first});
+ }
if (I == CommandLineResolutions.end()) {
llvm::errs() << argv[0] << ": missing symbol resolution for " << F
<< ',' << Sym.getName() << '\n';
@@ -325,9 +333,9 @@ static int run(int argc, char **argv) {
std::string Path = OutputFilename + "." + utostr(Task);
std::error_code EC;
- auto S = llvm::make_unique<raw_fd_ostream>(Path, EC, sys::fs::F_None);
+ auto S = std::make_unique<raw_fd_ostream>(Path, EC, sys::fs::OF_None);
check(EC, Path);
- return llvm::make_unique<lto::NativeObjectStream>(std::move(S));
+ return std::make_unique<lto::NativeObjectStream>(std::move(S));
};
auto AddBuffer = [&](size_t Task, std::unique_ptr<MemoryBuffer> MB) {
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index e2af2e7f2e32..1ddbddfa1846 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -129,13 +130,10 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
return false;
}
-int Disassembler::disassemble(const Target &T,
- const std::string &Triple,
- MCSubtargetInfo &STI,
- MCStreamer &Streamer,
- MemoryBuffer &Buffer,
- SourceMgr &SM,
- raw_ostream &Out) {
+int Disassembler::disassemble(const Target &T, const std::string &Triple,
+ MCSubtargetInfo &STI, MCStreamer &Streamer,
+ MemoryBuffer &Buffer, SourceMgr &SM,
+ MCContext &Ctx, raw_ostream &Out) {
std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
if (!MRI) {
@@ -149,9 +147,6 @@ int Disassembler::disassemble(const Target &T,
return -1;
}
- // Set up the MCContext for creating symbols and MCExpr's.
- MCContext Ctx(MAI.get(), MRI.get(), nullptr);
-
std::unique_ptr<const MCDisassembler> DisAsm(
T.createMCDisassembler(STI, Ctx));
if (!DisAsm) {
diff --git a/tools/llvm-mc/Disassembler.h b/tools/llvm-mc/Disassembler.h
index 11b685233abc..dcd8c279c91a 100644
--- a/tools/llvm-mc/Disassembler.h
+++ b/tools/llvm-mc/Disassembler.h
@@ -22,17 +22,15 @@ class MemoryBuffer;
class Target;
class raw_ostream;
class SourceMgr;
+class MCContext;
class MCSubtargetInfo;
class MCStreamer;
class Disassembler {
public:
- static int disassemble(const Target &T,
- const std::string &Triple,
- MCSubtargetInfo &STI,
- MCStreamer &Streamer,
- MemoryBuffer &Buffer,
- SourceMgr &SM,
+ static int disassemble(const Target &T, const std::string &Triple,
+ MCSubtargetInfo &STI, MCStreamer &Streamer,
+ MemoryBuffer &Buffer, SourceMgr &SM, MCContext &Ctx,
raw_ostream &Out);
};
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index ec189c297860..c23740a3094d 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -209,9 +209,10 @@ static const Target *GetTarget(const char *ProgName) {
return TheTarget;
}
-static std::unique_ptr<ToolOutputFile> GetOutputStream(StringRef Path) {
+static std::unique_ptr<ToolOutputFile> GetOutputStream(StringRef Path,
+ sys::fs::OpenFlags Flags) {
std::error_code EC;
- auto Out = llvm::make_unique<ToolOutputFile>(Path, EC, sys::fs::F_None);
+ auto Out = std::make_unique<ToolOutputFile>(Path, EC, Flags);
if (EC) {
WithColor::error() << EC.message() << '\n';
return nullptr;
@@ -279,7 +280,7 @@ static int fillCommandLineSymbols(MCAsmParser &Parser) {
static int AssembleInput(const char *ProgName, const Target *TheTarget,
SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
MCAsmInfo &MAI, MCSubtargetInfo &STI,
- MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
+ MCInstrInfo &MCII, MCTargetOptions const &MCOptions) {
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, Ctx, Str, MAI));
std::unique_ptr<MCTargetAsmParser> TAP(
@@ -316,7 +317,7 @@ int main(int argc, char **argv) {
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
+ const MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
setDwarfDebugFlags(argc, argv);
setDwarfDebugProducer();
@@ -368,7 +369,7 @@ int main(int argc, char **argv) {
// FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and
// MCObjectFileInfo needs a MCContext reference in order to initialize itself.
MCObjectFileInfo MOFI;
- MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr);
+ MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr, &MCOptions);
MOFI.InitMCObjectFileInfo(TheTriple, PIC, Ctx, LargeCodeModel);
if (SaveTempLabels)
@@ -413,7 +414,9 @@ int main(int argc, char **argv) {
FeaturesStr = Features.getString();
}
- std::unique_ptr<ToolOutputFile> Out = GetOutputStream(OutputFilename);
+ sys::fs::OpenFlags Flags = (FileType == OFT_AssemblyFile) ? sys::fs::OF_Text
+ : sys::fs::OF_None;
+ std::unique_ptr<ToolOutputFile> Out = GetOutputStream(OutputFilename, Flags);
if (!Out)
return 1;
@@ -423,7 +426,7 @@ int main(int argc, char **argv) {
WithColor::error() << "dwo output only supported with object files\n";
return 1;
}
- DwoOut = GetOutputStream(SplitDwarfFile);
+ DwoOut = GetOutputStream(SplitDwarfFile, sys::fs::OF_None);
if (!DwoOut)
return 1;
}
@@ -459,7 +462,7 @@ int main(int argc, char **argv) {
std::unique_ptr<MCAsmBackend> MAB(
TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions));
- auto FOut = llvm::make_unique<formatted_raw_ostream>(*OS);
+ auto FOut = std::make_unique<formatted_raw_ostream>(*OS);
Str.reset(
TheTarget->createAsmStreamer(Ctx, std::move(FOut), /*asmverbose*/ true,
/*useDwarfDirectory*/ true, IP,
@@ -474,7 +477,7 @@ int main(int argc, char **argv) {
Ctx.setUseNamesOnTempLabels(false);
if (!Out->os().supportsSeeking()) {
- BOS = make_unique<buffer_ostream>(Out->os());
+ BOS = std::make_unique<buffer_ostream>(Out->os());
OS = BOS.get();
}
@@ -506,7 +509,7 @@ int main(int argc, char **argv) {
break;
case AC_MDisassemble:
assert(IP && "Expected assembly output");
- IP->setUseMarkup(1);
+ IP->setUseMarkup(true);
disassemble = true;
break;
case AC_Disassemble:
@@ -514,8 +517,8 @@ int main(int argc, char **argv) {
break;
}
if (disassemble)
- Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str,
- *Buffer, SrcMgr, Out->os());
+ Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, *Buffer,
+ SrcMgr, Ctx, Out->os());
// Keep output if no errors.
if (Res == 0) {
diff --git a/tools/llvm-mca/CodeRegion.cpp b/tools/llvm-mca/CodeRegion.cpp
index bf592f67245e..e05517c1ac95 100644
--- a/tools/llvm-mca/CodeRegion.cpp
+++ b/tools/llvm-mca/CodeRegion.cpp
@@ -18,7 +18,7 @@ namespace mca {
CodeRegions::CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) {
// Create a default region for the input code sequence.
- Regions.emplace_back(make_unique<CodeRegion>("", SMLoc()));
+ Regions.emplace_back(std::make_unique<CodeRegion>("", SMLoc()));
}
bool CodeRegion::isLocInRange(SMLoc Loc) const {
@@ -36,7 +36,7 @@ void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
if (Regions.size() == 1 && !Regions[0]->startLoc().isValid() &&
!Regions[0]->endLoc().isValid()) {
ActiveRegions[Description] = 0;
- Regions[0] = make_unique<CodeRegion>(Description, Loc);
+ Regions[0] = std::make_unique<CodeRegion>(Description, Loc);
return;
}
} else {
@@ -62,7 +62,7 @@ void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
}
ActiveRegions[Description] = Regions.size();
- Regions.emplace_back(make_unique<CodeRegion>(Description, Loc));
+ Regions.emplace_back(std::make_unique<CodeRegion>(Description, Loc));
return;
}
diff --git a/tools/llvm-mca/CodeRegionGenerator.cpp b/tools/llvm-mca/CodeRegionGenerator.cpp
index c793169e64e0..8ddcd2f4abe2 100644
--- a/tools/llvm-mca/CodeRegionGenerator.cpp
+++ b/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -118,6 +118,8 @@ Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions() {
MCAsmLexer &Lexer = Parser->getLexer();
MCACommentConsumer CC(Regions);
Lexer.setCommentConsumer(&CC);
+ // Enable support for MASM literal numbers (example: 05h, 101b).
+ Lexer.setLexMasmIntegers(true);
std::unique_ptr<MCTargetAsmParser> TAP(
TheTarget.createMCAsmParser(STI, *Parser, MCII, Opts));
diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index 560c6c6e8a33..feff0cd6d524 100644
--- a/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -165,10 +165,33 @@ void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
"Unsupported dependency type!");
OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
}
- OS << " - CYCLES: " << DE.Cost << '\n';
+ OS << " - COST: " << DE.Cost << '\n';
}
#endif // NDEBUG
+void DependencyGraph::pruneEdges(unsigned Iterations) {
+ for (DGNode &N : Nodes) {
+ unsigned NumPruned = 0;
+ const unsigned Size = N.OutgoingEdges.size();
+ // Use a cut-off threshold to prune edges with a low frequency.
+ for (unsigned I = 0, E = Size; I < E; ++I) {
+ DependencyEdge &Edge = N.OutgoingEdges[I];
+ if (Edge.Frequency == Iterations)
+ continue;
+ double Factor = (double)Edge.Frequency / Iterations;
+ if (0.10 < Factor)
+ continue;
+ Nodes[Edge.ToIID].NumPredecessors--;
+ std::swap(Edge, N.OutgoingEdges[E - 1]);
+ --E;
+ ++NumPruned;
+ }
+
+ if (NumPruned)
+ N.OutgoingEdges.resize(Size - NumPruned);
+ }
+}
+
void DependencyGraph::initializeRootSet(
SmallVectorImpl<unsigned> &RootSet) const {
for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
@@ -179,7 +202,7 @@ void DependencyGraph::initializeRootSet(
}
void DependencyGraph::propagateThroughEdges(
- SmallVectorImpl<unsigned> &RootSet) {
+ SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) {
SmallVector<unsigned, 8> ToVisit;
// A critical sequence is computed as the longest path from a node of the
@@ -189,6 +212,10 @@ void DependencyGraph::propagateThroughEdges(
// Each node of the graph starts with an initial default cost of zero. The
// cost of a node is a measure of criticality: the higher the cost, the bigger
// is the performance impact.
+ // For register and memory dependencies, the cost is a function of the write
+ // latency as well as the actual delay (in cycles) caused to users.
+ // For processor resource dependencies, the cost is a function of the resource
+ // pressure. Resource interferences with low frequency values are ignored.
//
// This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
// the inner loop selects (i.e. visits) a node N from a set of `unvisited
@@ -277,6 +304,10 @@ static void printInstruction(formatted_raw_ostream &FOS,
}
void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
+ // Early exit if no bottlenecks were found during the simulation.
+ if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
+ return;
+
SmallVector<const DependencyEdge *, 16> Seq;
DG.getCriticalSequence(Seq);
if (Seq.empty())
@@ -432,7 +463,6 @@ void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
- Cost *= Iterations / 2;
DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
return;
@@ -445,7 +475,6 @@ void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
- Cost *= Iterations / 2;
DG.addMemoryDep(From, To + SourceSize, Cost);
DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
return;
@@ -458,7 +487,6 @@ void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
- Cost *= Iterations / 2;
DG.addResourceDep(From, To + SourceSize, Mask, Cost);
DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
return;
@@ -514,7 +542,7 @@ void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
// Check if this is the last simulated instruction.
if (IID == ((Iterations * Source.size()) - 1))
- DG.finalizeGraph();
+ DG.finalizeGraph(Iterations);
}
void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.h b/tools/llvm-mca/Views/BottleneckAnalysis.h
index 7564b1a48206..9e3bd5978f09 100644
--- a/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ b/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -236,8 +236,9 @@ class DependencyGraph {
void addDependency(unsigned From, unsigned To,
DependencyEdge::Dependency &&DE);
+ void pruneEdges(unsigned Iterations);
void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const;
- void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet);
+ void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, unsigned Iterations);
#ifndef NDEBUG
void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE,
@@ -263,10 +264,11 @@ public:
// Called by the bottleneck analysis at the end of simulation to propagate
// costs through the edges of the graph, and compute a critical path.
- void finalizeGraph() {
+ void finalizeGraph(unsigned Iterations) {
SmallVector<unsigned, 16> RootSet;
+ pruneEdges(Iterations);
initializeRootSet(RootSet);
- propagateThroughEdges(RootSet);
+ propagateThroughEdges(RootSet, Iterations);
}
// Returns a sequence of edges representing the critical sequence based on the
diff --git a/tools/llvm-mca/Views/InstructionInfoView.cpp b/tools/llvm-mca/Views/InstructionInfoView.cpp
index 1fbffa3e5b69..a6f9153b4945 100644
--- a/tools/llvm-mca/Views/InstructionInfoView.cpp
+++ b/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "Views/InstructionInfoView.h"
+#include "llvm/Support/FormattedStream.h"
namespace llvm {
namespace mca {
@@ -26,10 +27,17 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
TempStream << "\n\nInstruction Info:\n";
TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n"
- << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n";
+ << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n";
+ if (PrintEncodings) {
+ TempStream << "[7]: Encoding Size\n";
+ TempStream << "\n[1] [2] [3] [4] [5] [6] [7] "
+ << "Encodings: Instructions:\n";
+ } else {
+ TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n";
+ }
- TempStream << "[1] [2] [3] [4] [5] [6] Instructions:\n";
- for (const MCInst &Inst : Source) {
+ for (unsigned I = 0, E = Source.size(); I < E; ++I) {
+ const MCInst &Inst = Source[I];
const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
// Obtain the scheduling class information from the instruction.
@@ -72,7 +80,20 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
}
TempStream << (MCDesc.mayLoad() ? " * " : " ");
TempStream << (MCDesc.mayStore() ? " * " : " ");
- TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " ");
+ TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " ");
+
+ if (PrintEncodings) {
+ StringRef Encoding(CE.getEncoding(I));
+ unsigned EncodingSize = Encoding.size();
+ TempStream << " " << EncodingSize
+ << (EncodingSize < 10 ? " " : " ");
+ TempStream.flush();
+ formatted_raw_ostream FOS(TempStream);
+ for (unsigned i = 0, e = Encoding.size(); i != e; ++i)
+ FOS << format("%02x ", (uint8_t)Encoding[i]);
+ FOS.PadToColumn(30);
+ FOS.flush();
+ }
MCIP.printInst(&Inst, InstrStream, "", STI);
InstrStream.flush();
@@ -80,7 +101,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
// Consume any tabs or spaces at the beginning of the string.
StringRef Str(Instruction);
Str = Str.ltrim();
- TempStream << " " << Str << '\n';
+ TempStream << Str << '\n';
Instruction = "";
}
diff --git a/tools/llvm-mca/Views/InstructionInfoView.h b/tools/llvm-mca/Views/InstructionInfoView.h
index 640d87383436..0e948304119f 100644
--- a/tools/llvm-mca/Views/InstructionInfoView.h
+++ b/tools/llvm-mca/Views/InstructionInfoView.h
@@ -40,6 +40,7 @@
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/CodeEmitter.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "llvm-mca"
@@ -51,14 +52,18 @@ namespace mca {
class InstructionInfoView : public View {
const llvm::MCSubtargetInfo &STI;
const llvm::MCInstrInfo &MCII;
+ CodeEmitter &CE;
+ bool PrintEncodings;
llvm::ArrayRef<llvm::MCInst> Source;
llvm::MCInstPrinter &MCIP;
public:
- InstructionInfoView(const llvm::MCSubtargetInfo &sti,
- const llvm::MCInstrInfo &mcii,
- llvm::ArrayRef<llvm::MCInst> S, llvm::MCInstPrinter &IP)
- : STI(sti), MCII(mcii), Source(S), MCIP(IP) {}
+ InstructionInfoView(const llvm::MCSubtargetInfo &ST,
+ const llvm::MCInstrInfo &II, CodeEmitter &C,
+ bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S,
+ llvm::MCInstPrinter &IP)
+ : STI(ST), MCII(II), CE(C), PrintEncodings(ShouldPrintEncodings),
+ Source(S), MCIP(IP) {}
void printView(llvm::raw_ostream &OS) const override;
};
diff --git a/tools/llvm-mca/Views/TimelineView.cpp b/tools/llvm-mca/Views/TimelineView.cpp
index fe3f16ba344c..1e7caa297ac6 100644
--- a/tools/llvm-mca/Views/TimelineView.cpp
+++ b/tools/llvm-mca/Views/TimelineView.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "Views/TimelineView.h"
+#include <numeric>
namespace llvm {
namespace mca {
@@ -132,25 +133,38 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
const WaitTimeEntry &Entry,
unsigned SourceIndex,
unsigned Executions) const {
- OS << SourceIndex << '.';
+ bool PrintingTotals = SourceIndex == Source.size();
+ unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions;
+
+ if (!PrintingTotals)
+ OS << SourceIndex << '.';
+
OS.PadToColumn(7);
double AverageTime1, AverageTime2, AverageTime3;
- AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
- AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
- AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
+ AverageTime1 =
+ (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions;
+ AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions;
+ AverageTime3 =
+ (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions;
OS << Executions;
OS.PadToColumn(13);
- int BufferSize = UsedBuffer[SourceIndex].second;
- tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize);
+
+ int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second;
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,
+ BufferSize);
OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
OS.PadToColumn(20);
- tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize);
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,
+ BufferSize);
OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
OS.PadToColumn(27);
- tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions,
- STI.getSchedModel().MicroOpBufferSize);
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
+ CumulativeExecutions, STI.getSchedModel().MicroOpBufferSize);
OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
if (OS.has_colors())
@@ -190,6 +204,24 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
++IID;
}
+
+ // If the timeline contains more than one instruction,
+ // let's also print global averages.
+ if (Source.size() != 1) {
+ WaitTimeEntry TotalWaitTime = std::accumulate(
+ WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0},
+ [](const WaitTimeEntry &A, const WaitTimeEntry &B) {
+ return WaitTimeEntry{
+ A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue,
+ A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady,
+ A.CyclesSpentAfterWBAndBeforeRetire +
+ B.CyclesSpentAfterWBAndBeforeRetire};
+ });
+ printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions);
+ FOS << " "
+ << "<total>" << '\n';
+ InstrStream.flush();
+ }
}
void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
diff --git a/tools/llvm-mca/Views/TimelineView.h b/tools/llvm-mca/Views/TimelineView.h
index b63b234293cd..9bec3b87db45 100644
--- a/tools/llvm-mca/Views/TimelineView.h
+++ b/tools/llvm-mca/Views/TimelineView.h
@@ -84,6 +84,7 @@
/// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3
/// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4
/// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6
+/// 2 2.4 0.6 1.6 <total>
///
/// By comparing column [2] with column [1], we get an idea about how many
/// cycles were spent in the scheduler's queue due to data dependencies.
diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp
index b3590b5910ec..99c45eebdd88 100644
--- a/tools/llvm-mca/llvm-mca.cpp
+++ b/tools/llvm-mca/llvm-mca.cpp
@@ -32,11 +32,17 @@
#include "Views/SchedulerStatistics.h"
#include "Views/SummaryView.h"
#include "Views/TimelineView.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.inc"
+#include "llvm/MCA/CodeEmitter.h"
#include "llvm/MCA/Context.h"
+#include "llvm/MCA/InstrBuilder.h"
#include "llvm/MCA/Pipeline.h"
#include "llvm/MCA/Stages/EntryStage.h"
#include "llvm/MCA/Stages/InstructionTables.h"
@@ -83,11 +89,20 @@ static cl::opt<std::string>
cl::desc("Target a specific cpu type (-mcpu=help for details)"),
cl::value_desc("cpu-name"), cl::cat(ToolOptions), cl::init("native"));
+static cl::opt<std::string>
+ MATTR("mattr",
+ cl::desc("Additional target features."),
+ cl::cat(ToolOptions));
+
static cl::opt<int>
OutputAsmVariant("output-asm-variant",
cl::desc("Syntax variant to use for output printing"),
cl::cat(ToolOptions), cl::init(-1));
+static cl::opt<bool>
+ PrintImmHex("print-imm-hex", cl::cat(ToolOptions), cl::init(false),
+ cl::desc("Prefer hex format when printing immediate values"));
+
static cl::opt<unsigned> Iterations("iterations",
cl::desc("Number of iterations to run"),
cl::cat(ToolOptions), cl::init(0));
@@ -193,6 +208,11 @@ static cl::opt<bool> EnableBottleneckAnalysis(
cl::desc("Enable bottleneck analysis (disabled by default)"),
cl::cat(ViewOptions), cl::init(false));
+static cl::opt<bool> ShowEncoding(
+ "show-encoding",
+ cl::desc("Print encoding information in the instruction info view"),
+ cl::cat(ViewOptions), cl::init(false));
+
namespace {
const Target *getTarget(const char *ProgName) {
@@ -218,7 +238,7 @@ ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() {
OutputFilename = "-";
std::error_code EC;
auto Out =
- llvm::make_unique<ToolOutputFile>(OutputFilename, EC, sys::fs::F_None);
+ std::make_unique<ToolOutputFile>(OutputFilename, EC, sys::fs::OF_Text);
if (!EC)
return std::move(Out);
return EC;
@@ -303,33 +323,11 @@ int main(int argc, char **argv) {
// Apply overrides to llvm-mca specific options.
processViewOptions();
- SourceMgr SrcMgr;
-
- // Tell SrcMgr about this buffer, which is what the parser will pick up.
- SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc());
-
- std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
- assert(MRI && "Unable to create target register info!");
-
- std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
- assert(MAI && "Unable to create target asm info!");
-
- MCObjectFileInfo MOFI;
- MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr);
- MOFI.InitMCObjectFileInfo(TheTriple, /* PIC= */ false, Ctx);
-
- std::unique_ptr<buffer_ostream> BOS;
-
- std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
-
- std::unique_ptr<MCInstrAnalysis> MCIA(
- TheTarget->createMCInstrAnalysis(MCII.get()));
-
if (!MCPU.compare("native"))
MCPU = llvm::sys::getHostCPUName();
std::unique_ptr<MCSubtargetInfo> STI(
- TheTarget->createMCSubtargetInfo(TripleName, MCPU, /* FeaturesStr */ ""));
+ TheTarget->createMCSubtargetInfo(TripleName, MCPU, MATTR));
if (!STI->isCPUStringValid(MCPU))
return 1;
@@ -352,6 +350,29 @@ int main(int argc, char **argv) {
return 1;
}
+ std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
+ assert(MRI && "Unable to create target register info!");
+
+ std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
+ assert(MAI && "Unable to create target asm info!");
+
+ MCObjectFileInfo MOFI;
+ SourceMgr SrcMgr;
+
+ // Tell SrcMgr about this buffer, which is what the parser will pick up.
+ SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc());
+
+ MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr);
+
+ MOFI.InitMCObjectFileInfo(TheTriple, /* PIC= */ false, Ctx);
+
+ std::unique_ptr<buffer_ostream> BOS;
+
+ std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
+
+ std::unique_ptr<MCInstrAnalysis> MCIA(
+ TheTarget->createMCInstrAnalysis(MCII.get()));
+
// Parse the input and create CodeRegions that llvm-mca can analyze.
mca::AsmCodeRegionGenerator CRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI, *MCII);
Expected<const mca::CodeRegions &> RegionsOrErr = CRG.parseCodeRegions();
@@ -396,6 +417,9 @@ int main(int argc, char **argv) {
return 1;
}
+ // Set the display preference for hex vs. decimal immediates.
+ IP->setPrintImmHex(PrintImmHex);
+
std::unique_ptr<ToolOutputFile> TOF = std::move(*OF);
const MCSchedModel &SM = STI->getSchedModel();
@@ -413,6 +437,12 @@ int main(int argc, char **argv) {
// Number each region in the sequence.
unsigned RegionIdx = 0;
+ std::unique_ptr<MCCodeEmitter> MCE(
+ TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx));
+
+ std::unique_ptr<MCAsmBackend> MAB(TheTarget->createMCAsmBackend(
+ *STI, *MRI, InitMCTargetOptionsFromFlags()));
+
for (const std::unique_ptr<mca::CodeRegion> &Region : Regions) {
// Skip empty code regions.
if (Region->empty())
@@ -430,6 +460,7 @@ int main(int argc, char **argv) {
// Lower the MCInst sequence into an mca::Instruction sequence.
ArrayRef<MCInst> Insts = Region->getInstructions();
+ mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts);
std::vector<std::unique_ptr<mca::Instruction>> LoweredSequence;
for (const MCInst &MCI : Insts) {
Expected<std::unique_ptr<mca::Instruction>> Inst =
@@ -459,18 +490,18 @@ int main(int argc, char **argv) {
if (PrintInstructionTables) {
// Create a pipeline, stages, and a printer.
- auto P = llvm::make_unique<mca::Pipeline>();
- P->appendStage(llvm::make_unique<mca::EntryStage>(S));
- P->appendStage(llvm::make_unique<mca::InstructionTables>(SM));
+ auto P = std::make_unique<mca::Pipeline>();
+ P->appendStage(std::make_unique<mca::EntryStage>(S));
+ P->appendStage(std::make_unique<mca::InstructionTables>(SM));
mca::PipelinePrinter Printer(*P);
// Create the views for this pipeline, execute, and emit a report.
if (PrintInstructionInfoView) {
- Printer.addView(llvm::make_unique<mca::InstructionInfoView>(
- *STI, *MCII, Insts, *IP));
+ Printer.addView(std::make_unique<mca::InstructionInfoView>(
+ *STI, *MCII, CE, ShowEncoding, Insts, *IP));
}
Printer.addView(
- llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
+ std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
if (!runPipeline(*P))
return 1;
@@ -480,42 +511,42 @@ int main(int argc, char **argv) {
}
// Create a basic pipeline simulating an out-of-order backend.
- auto P = MCA.createDefaultPipeline(PO, IB, S);
+ auto P = MCA.createDefaultPipeline(PO, S);
mca::PipelinePrinter Printer(*P);
if (PrintSummaryView)
Printer.addView(
- llvm::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));
+ std::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));
if (EnableBottleneckAnalysis) {
- Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(
+ Printer.addView(std::make_unique<mca::BottleneckAnalysis>(
*STI, *IP, Insts, S.getNumIterations()));
}
if (PrintInstructionInfoView)
- Printer.addView(
- llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, Insts, *IP));
+ Printer.addView(std::make_unique<mca::InstructionInfoView>(
+ *STI, *MCII, CE, ShowEncoding, Insts, *IP));
if (PrintDispatchStats)
- Printer.addView(llvm::make_unique<mca::DispatchStatistics>());
+ Printer.addView(std::make_unique<mca::DispatchStatistics>());
if (PrintSchedulerStats)
- Printer.addView(llvm::make_unique<mca::SchedulerStatistics>(*STI));
+ Printer.addView(std::make_unique<mca::SchedulerStatistics>(*STI));
if (PrintRetireStats)
- Printer.addView(llvm::make_unique<mca::RetireControlUnitStatistics>(SM));
+ Printer.addView(std::make_unique<mca::RetireControlUnitStatistics>(SM));
if (PrintRegisterFileStats)
- Printer.addView(llvm::make_unique<mca::RegisterFileStatistics>(*STI));
+ Printer.addView(std::make_unique<mca::RegisterFileStatistics>(*STI));
if (PrintResourcePressureView)
Printer.addView(
- llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
+ std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
if (PrintTimelineView) {
unsigned TimelineIterations =
TimelineMaxIterations ? TimelineMaxIterations : 10;
- Printer.addView(llvm::make_unique<mca::TimelineView>(
+ Printer.addView(std::make_unique<mca::TimelineView>(
*STI, *IP, Insts, std::min(TimelineIterations, S.getNumIterations()),
TimelineMaxCycles));
}
diff --git a/tools/llvm-modextract/llvm-modextract.cpp b/tools/llvm-modextract/llvm-modextract.cpp
index 3adefc5f0d3e..7c4099625842 100644
--- a/tools/llvm-modextract/llvm-modextract.cpp
+++ b/tools/llvm-modextract/llvm-modextract.cpp
@@ -54,7 +54,7 @@ int main(int argc, char **argv) {
std::error_code EC;
std::unique_ptr<ToolOutputFile> Out(
- new ToolOutputFile(OutputFilename, EC, sys::fs::F_None));
+ new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None));
ExitOnErr(errorCodeToError(EC));
if (BinaryExtract) {
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index aa62e6f0209b..ee55722dc139 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -711,17 +711,21 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
const std::string &ArchiveName,
const std::string &ArchitectureName) {
if (!NoSort) {
- std::function<bool(const NMSymbol &, const NMSymbol &)> Cmp;
+ using Comparator = bool (*)(const NMSymbol &, const NMSymbol &);
+ Comparator Cmp;
if (NumericSort)
- Cmp = compareSymbolAddress;
+ Cmp = &compareSymbolAddress;
else if (SizeSort)
- Cmp = compareSymbolSize;
+ Cmp = &compareSymbolSize;
else
- Cmp = compareSymbolName;
+ Cmp = &compareSymbolName;
if (ReverseSort)
- Cmp = [=](const NMSymbol &A, const NMSymbol &B) { return Cmp(B, A); };
- llvm::sort(SymbolList, Cmp);
+ llvm::sort(SymbolList, [=](const NMSymbol &A, const NMSymbol &B) -> bool {
+ return Cmp(B, A);
+ });
+ else
+ llvm::sort(SymbolList, Cmp);
}
if (!PrintFileName) {
@@ -913,10 +917,12 @@ static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
if (Flags & ELF::SHF_ALLOC)
return Flags & ELF::SHF_WRITE ? 'd' : 'r';
- StringRef SecName;
- if (SecI->getName(SecName))
+ auto NameOrErr = SecI->getName();
+ if (!NameOrErr) {
+ consumeError(NameOrErr.takeError());
return '?';
- if (SecName.startswith(".debug"))
+ }
+ if ((*NameOrErr).startswith(".debug"))
return 'N';
if (!(Flags & ELF::SHF_WRITE))
return 'n';
@@ -1076,7 +1082,7 @@ static StringRef getNMTypeName(SymbolicFile &Obj, basic_symbol_iterator I) {
static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I,
StringRef &SecName) {
uint32_t Symflags = I->getFlags();
- if (isa<ELFObjectFileBase>(&Obj)) {
+ if (ELFObjectFileBase *ELFObj = dyn_cast<ELFObjectFileBase>(&Obj)) {
if (Symflags & object::SymbolRef::SF_Absolute)
SecName = "*ABS*";
else if (Symflags & object::SymbolRef::SF_Common)
@@ -1090,8 +1096,16 @@ static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I,
consumeError(SecIOrErr.takeError());
return '?';
}
- elf_section_iterator secT = *SecIOrErr;
- secT->getName(SecName);
+
+ if (*SecIOrErr == ELFObj->section_end())
+ return '?';
+
+ Expected<StringRef> NameOrErr = (*SecIOrErr)->getName();
+ if (!NameOrErr) {
+ consumeError(NameOrErr.takeError());
+ return '?';
+ }
+ SecName = *NameOrErr;
}
}
@@ -1347,7 +1361,12 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
StringRef SectionName = StringRef();
for (const SectionRef &Section : MachO->sections()) {
S.NSect++;
- Section.getName(SectionName);
+
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ SectionName = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
SegmentName = MachO->getSectionFinalSegmentName(
Section.getRawDataRefImpl());
if (S.Address >= Section.getAddress() &&
@@ -1667,7 +1686,11 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
StringRef SegmentName = StringRef();
StringRef SectionName = StringRef();
for (const SectionRef &Section : MachO->sections()) {
- Section.getName(SectionName);
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ SectionName = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
SegmentName = MachO->getSectionFinalSegmentName(
Section.getRawDataRefImpl());
F.NSect++;
diff --git a/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
index 4ae46851a66f..2a8d816e6f3c 100644
--- a/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -16,8 +16,8 @@
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Support/CRC.h"
#include "llvm/Support/Errc.h"
-#include "llvm/Support/JamCRC.h"
#include "llvm/Support/Path.h"
#include <cassert>
@@ -40,22 +40,13 @@ static uint64_t getNextRVA(const Object &Obj) {
Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1);
}
-static uint32_t getCRC32(StringRef Data) {
- JamCRC CRC;
- CRC.update(ArrayRef<char>(Data.data(), Data.size()));
- // The CRC32 value needs to be complemented because the JamCRC dosn't
- // finalize the CRC32 value. It also dosn't negate the initial CRC32 value
- // but it starts by default at 0xFFFFFFFF which is the complement of zero.
- return ~CRC.getCRC();
-}
-
static std::vector<uint8_t> createGnuDebugLinkSectionContents(StringRef File) {
ErrorOr<std::unique_ptr<MemoryBuffer>> LinkTargetOrErr =
MemoryBuffer::getFile(File);
if (!LinkTargetOrErr)
error("'" + File + "': " + LinkTargetOrErr.getError().message());
auto LinkTarget = std::move(*LinkTargetOrErr);
- uint32_t CRC32 = getCRC32(LinkTarget->getBuffer());
+ uint32_t CRC32 = llvm::crc32(arrayRefFromStringRef(LinkTarget->getBuffer()));
StringRef FileName = sys::path::filename(File);
size_t CRCPos = alignTo(FileName.size() + 1, 4);
@@ -65,26 +56,37 @@ static std::vector<uint8_t> createGnuDebugLinkSectionContents(StringRef File) {
return Data;
}
-static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) {
- uint32_t StartRVA = getNextRVA(Obj);
+// Adds named section with given contents to the object.
+static void addSection(Object &Obj, StringRef Name, ArrayRef<uint8_t> Contents,
+ uint32_t Characteristics) {
+ bool NeedVA = Characteristics & (IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ |
+ IMAGE_SCN_MEM_WRITE);
- std::vector<Section> Sections;
Section Sec;
- Sec.setOwnedContents(createGnuDebugLinkSectionContents(DebugLinkFile));
- Sec.Name = ".gnu_debuglink";
- Sec.Header.VirtualSize = Sec.getContents().size();
- Sec.Header.VirtualAddress = StartRVA;
- Sec.Header.SizeOfRawData = alignTo(Sec.Header.VirtualSize,
- Obj.IsPE ? Obj.PeHeader.FileAlignment : 1);
+ Sec.setOwnedContents(Contents);
+ Sec.Name = Name;
+ Sec.Header.VirtualSize = NeedVA ? Sec.getContents().size() : 0u;
+ Sec.Header.VirtualAddress = NeedVA ? getNextRVA(Obj) : 0u;
+ Sec.Header.SizeOfRawData =
+ NeedVA ? alignTo(Sec.Header.VirtualSize,
+ Obj.IsPE ? Obj.PeHeader.FileAlignment : 1)
+ : Sec.getContents().size();
// Sec.Header.PointerToRawData is filled in by the writer.
Sec.Header.PointerToRelocations = 0;
Sec.Header.PointerToLinenumbers = 0;
// Sec.Header.NumberOfRelocations is filled in by the writer.
Sec.Header.NumberOfLinenumbers = 0;
- Sec.Header.Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
- IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_DISCARDABLE;
- Sections.push_back(Sec);
- Obj.addSections(Sections);
+ Sec.Header.Characteristics = Characteristics;
+
+ Obj.addSections(Sec);
+}
+
+static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) {
+ std::vector<uint8_t> Contents =
+ createGnuDebugLinkSectionContents(DebugLinkFile);
+ addSection(Obj, ".gnu_debuglink", Contents,
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
+ IMAGE_SCN_MEM_DISCARDABLE);
}
static Error handleArgs(const CopyConfig &Config, Object &Obj) {
@@ -92,8 +94,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
Obj.removeSections([&Config](const Section &Sec) {
// Contrary to --only-keep-debug, --only-section fully removes sections that
// aren't mentioned.
- if (!Config.OnlySection.empty() &&
- !is_contained(Config.OnlySection, Sec.Name))
+ if (!Config.OnlySection.empty() && !Config.OnlySection.matches(Sec.Name))
return true;
if (Config.StripDebug || Config.StripAll || Config.StripAllGNU ||
@@ -103,7 +104,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
return true;
}
- if (is_contained(Config.ToRemove, Sec.Name))
+ if (Config.ToRemove.matches(Sec.Name))
return true;
return false;
@@ -137,7 +138,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
if (Config.StripAll || Config.StripAllGNU)
return true;
- if (is_contained(Config.SymbolsToRemove, Sym.Name)) {
+ if (Config.SymbolsToRemove.matches(Sym.Name)) {
// Explicitly removing a referenced symbol is an error.
if (Sym.Referenced)
reportError(Config.OutputFilename,
@@ -156,7 +157,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC ||
Sym.Sym.SectionNumber == 0)
if (Config.StripUnneeded ||
- is_contained(Config.UnneededSymbolsToRemove, Sym.Name))
+ Config.UnneededSymbolsToRemove.matches(Sym.Name))
return true;
// GNU objcopy keeps referenced local symbols and external symbols
@@ -171,21 +172,38 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
return false;
});
+ for (const auto &Flag : Config.AddSection) {
+ StringRef SecName, FileName;
+ std::tie(SecName, FileName) = Flag.split("=");
+
+ auto BufOrErr = MemoryBuffer::getFile(FileName);
+ if (!BufOrErr)
+ return createFileError(FileName, errorCodeToError(BufOrErr.getError()));
+ auto Buf = std::move(*BufOrErr);
+
+ addSection(
+ Obj, SecName,
+ makeArrayRef(reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
+ Buf->getBufferSize()),
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES);
+ }
+
if (!Config.AddGnuDebugLink.empty())
addGnuDebugLink(Obj, Config.AddGnuDebugLink);
if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() ||
Config.BuildIdLinkInput || Config.BuildIdLinkOutput ||
!Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() ||
- !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() ||
- !Config.DumpSection.empty() || !Config.KeepSection.empty() ||
+ !Config.AllocSectionsPrefix.empty() || !Config.DumpSection.empty() ||
+ !Config.KeepSection.empty() || Config.NewSymbolVisibility ||
!Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() ||
!Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() ||
!Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() ||
- !Config.SetSectionFlags.empty() || !Config.SymbolsToRename.empty() ||
- Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden ||
- Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc ||
- Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
+ !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() ||
+ !Config.SymbolsToRename.empty() || Config.ExtractDWO ||
+ Config.KeepFileSymbols || Config.LocalizeHidden || Config.PreserveDates ||
+ Config.StripDWO || Config.StripNonAlloc || Config.StripSections ||
+ Config.Weaken || Config.DecompressDebugSections ||
Config.DiscardMode == DiscardType::Locals ||
!Config.SymbolsToAdd.empty() || Config.EntryExpr) {
return createStringError(llvm::errc::invalid_argument,
diff --git a/tools/llvm-objcopy/COFF/Reader.cpp b/tools/llvm-objcopy/COFF/Reader.cpp
index 1f0ec9fa9691..2fcec0057c03 100644
--- a/tools/llvm-objcopy/COFF/Reader.cpp
+++ b/tools/llvm-objcopy/COFF/Reader.cpp
@@ -36,14 +36,9 @@ Error COFFReader::readExecutableHeaders(Object &Obj) const {
DH->AddressOfNewExeHeader - sizeof(*DH));
if (COFFObj.is64()) {
- const pe32plus_header *PE32Plus = nullptr;
- if (auto EC = COFFObj.getPE32PlusHeader(PE32Plus))
- return errorCodeToError(EC);
- Obj.PeHeader = *PE32Plus;
+ Obj.PeHeader = *COFFObj.getPE32PlusHeader();
} else {
- const pe32_header *PE32 = nullptr;
- if (auto EC = COFFObj.getPE32Header(PE32))
- return errorCodeToError(EC);
+ const pe32_header *PE32 = COFFObj.getPE32Header();
copyPeHeader(Obj.PeHeader, *PE32);
// The pe32plus_header (stored in Object) lacks the BaseOfData field.
Obj.BaseOfData = PE32->BaseOfData;
@@ -196,16 +191,13 @@ Error COFFReader::setSymbolTargets(Object &Obj) const {
}
Expected<std::unique_ptr<Object>> COFFReader::create() const {
- auto Obj = llvm::make_unique<Object>();
+ auto Obj = std::make_unique<Object>();
- const coff_file_header *CFH = nullptr;
- const coff_bigobj_file_header *CBFH = nullptr;
- COFFObj.getCOFFHeader(CFH);
- COFFObj.getCOFFBigObjHeader(CBFH);
bool IsBigObj = false;
- if (CFH) {
+ if (const coff_file_header *CFH = COFFObj.getCOFFHeader()) {
Obj->CoffFileHeader = *CFH;
} else {
+ const coff_bigobj_file_header *CBFH = COFFObj.getCOFFBigObjHeader();
if (!CBFH)
return createStringError(object_error::parse_failed,
"no COFF file header returned");
diff --git a/tools/llvm-objcopy/COFF/Writer.cpp b/tools/llvm-objcopy/COFF/Writer.cpp
index f3bb1ce331f2..6db37435fd96 100644
--- a/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/tools/llvm-objcopy/COFF/Writer.cpp
@@ -120,12 +120,12 @@ size_t COFFWriter::finalizeStringTable() {
StrTabBuilder.finalize();
for (auto &S : Obj.getMutableSections()) {
+ memset(S.Header.Name, 0, sizeof(S.Header.Name));
if (S.Name.size() > COFF::NameSize) {
- memset(S.Header.Name, 0, sizeof(S.Header.Name));
snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d",
(int)StrTabBuilder.getOffset(S.Name));
} else {
- strncpy(S.Header.Name, S.Name.data(), COFF::NameSize);
+ memcpy(S.Header.Name, S.Name.data(), S.Name.size());
}
}
for (auto &S : Obj.getMutableSymbols()) {
diff --git a/tools/llvm-objcopy/CommonOpts.td b/tools/llvm-objcopy/CommonOpts.td
new file mode 100644
index 000000000000..e8c092b44431
--- /dev/null
+++ b/tools/llvm-objcopy/CommonOpts.td
@@ -0,0 +1,123 @@
+include "llvm/Option/OptParser.td"
+
+multiclass Eq<string name, string help> {
+ def NAME : Separate<["--"], name>;
+ def NAME #_eq : Joined<["--"], name #"=">,
+ Alias<!cast<Separate>(NAME)>,
+ HelpText<help>;
+}
+
+def help : Flag<["--"], "help">;
+def h : Flag<["-"], "h">, Alias<help>;
+
+def allow_broken_links
+ : Flag<["--"], "allow-broken-links">,
+ HelpText<"Allow the tool to remove sections even if it would leave "
+ "invalid section references. The appropriate sh_link fields "
+ "will be set to zero.">;
+
+def enable_deterministic_archives
+ : Flag<["--"], "enable-deterministic-archives">,
+ HelpText<"Enable deterministic mode when operating on archives (use "
+ "zero for UIDs, GIDs, and timestamps).">;
+def D : Flag<["-"], "D">,
+ Alias<enable_deterministic_archives>,
+ HelpText<"Alias for --enable-deterministic-archives">;
+
+def disable_deterministic_archives
+ : Flag<["--"], "disable-deterministic-archives">,
+ HelpText<"Disable deterministic mode when operating on archives (use "
+ "real values for UIDs, GIDs, and timestamps).">;
+def U : Flag<["-"], "U">,
+ Alias<disable_deterministic_archives>,
+ HelpText<"Alias for --disable-deterministic-archives">;
+
+def preserve_dates : Flag<["--"], "preserve-dates">,
+ HelpText<"Preserve access and modification timestamps">;
+def p : Flag<["-"], "p">,
+ Alias<preserve_dates>,
+ HelpText<"Alias for --preserve-dates">;
+
+def strip_all : Flag<["--"], "strip-all">,
+ HelpText<"Remove non-allocated sections outside segments. "
+ ".gnu.warning* sections are not removed">;
+
+def strip_all_gnu
+ : Flag<["--"], "strip-all-gnu">,
+ HelpText<"Compatible with GNU's --strip-all">;
+
+def strip_debug : Flag<["--"], "strip-debug">,
+ HelpText<"Remove all debug sections">;
+def g : Flag<["-"], "g">,
+ Alias<strip_debug>,
+ HelpText<"Alias for --strip-debug">;
+
+def strip_unneeded : Flag<["--"], "strip-unneeded">,
+ HelpText<"Remove all symbols not needed by relocations">;
+
+defm remove_section : Eq<"remove-section", "Remove <section>">,
+ MetaVarName<"section">;
+def R : JoinedOrSeparate<["-"], "R">,
+ Alias<remove_section>,
+ HelpText<"Alias for --remove-section">;
+
+def strip_sections
+ : Flag<["--"], "strip-sections">,
+ HelpText<"Remove all section headers and all sections not in segments">;
+
+defm strip_symbol : Eq<"strip-symbol", "Strip <symbol>">,
+ MetaVarName<"symbol">;
+def N : JoinedOrSeparate<["-"], "N">,
+ Alias<strip_symbol>,
+ HelpText<"Alias for --strip-symbol">;
+
+defm keep_section : Eq<"keep-section", "Keep <section>">,
+ MetaVarName<"section">;
+
+defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
+ MetaVarName<"symbol">;
+def K : JoinedOrSeparate<["-"], "K">,
+ Alias<keep_symbol>,
+ HelpText<"Alias for --keep-symbol">;
+
+def keep_file_symbols : Flag<["--"], "keep-file-symbols">,
+ HelpText<"Do not remove file symbols">;
+
+def only_keep_debug
+ : Flag<["--"], "only-keep-debug">,
+ HelpText<"Clear sections that would not be stripped by --strip-debug. "
+ "Currently only implemented for COFF.">;
+
+def discard_locals : Flag<["--"], "discard-locals">,
+ HelpText<"Remove compiler-generated local symbols, (e.g. "
+ "symbols starting with .L)">;
+def X : Flag<["-"], "X">,
+ Alias<discard_locals>,
+ HelpText<"Alias for --discard-locals">;
+
+def discard_all
+ : Flag<["--"], "discard-all">,
+ HelpText<"Remove all local symbols except file and section symbols">;
+def x : Flag<["-"], "x">,
+ Alias<discard_all>,
+ HelpText<"Alias for --discard-all">;
+
+def regex
+ : Flag<["--"], "regex">,
+ HelpText<"Permit regular expressions in name comparison">;
+
+def version : Flag<["--"], "version">,
+ HelpText<"Print the version and exit.">;
+def V : Flag<["-"], "V">,
+ Alias<version>,
+ HelpText<"Alias for --version">;
+
+def wildcard
+ : Flag<["--"], "wildcard">,
+ HelpText<"Allow wildcard syntax for symbol-related flags. Incompatible "
+ "with --regex. Allows using '*' to match any number of "
+ "characters, '?' to match any single character, '\' to escape "
+ "special characters, and '[]' to define character classes. "
+ "Wildcards beginning with '!' will prevent a match, for example "
+ "\"-N '*' -N '!x'\" will strip all symbols except for \"x\".">;
+def w : Flag<["-"], "w">, Alias<wildcard>, HelpText<"Alias for --wildcard">;
diff --git a/tools/llvm-objcopy/CopyConfig.cpp b/tools/llvm-objcopy/CopyConfig.cpp
index 8d6431b3044f..d707bec20c49 100644
--- a/tools/llvm-objcopy/CopyConfig.cpp
+++ b/tools/llvm-objcopy/CopyConfig.cpp
@@ -14,10 +14,10 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
+#include "llvm/Support/CRC.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Errc.h"
-#include "llvm/Support/JamCRC.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/StringSaver.h"
#include <memory>
@@ -155,6 +155,25 @@ static Expected<SectionRename> parseRenameSectionValue(StringRef FlagValue) {
return SR;
}
+static Expected<std::pair<StringRef, uint64_t>>
+parseSetSectionAlignment(StringRef FlagValue) {
+ if (!FlagValue.contains('='))
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --set-section-alignment: missing '='");
+ auto Split = StringRef(FlagValue).split('=');
+ if (Split.first.empty())
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --set-section-alignment: missing section name");
+ uint64_t NewAlign;
+ if (Split.second.getAsInteger(0, NewAlign))
+ return createStringError(errc::invalid_argument,
+ "invalid alignment for --set-section-alignment: '%s'",
+ Split.second.str().c_str());
+ return std::make_pair(Split.first, NewAlign);
+}
+
static Expected<SectionFlagsUpdate>
parseSetSectionFlagValue(StringRef FlagValue) {
if (!StringRef(FlagValue).contains('='))
@@ -177,106 +196,6 @@ parseSetSectionFlagValue(StringRef FlagValue) {
return SFU;
}
-static Expected<NewSymbolInfo> parseNewSymbolInfo(StringRef FlagValue) {
- // Parse value given with --add-symbol option and create the
- // new symbol if possible. The value format for --add-symbol is:
- //
- // <name>=[<section>:]<value>[,<flags>]
- //
- // where:
- // <name> - symbol name, can be empty string
- // <section> - optional section name. If not given ABS symbol is created
- // <value> - symbol value, can be decimal or hexadecimal number prefixed
- // with 0x.
- // <flags> - optional flags affecting symbol type, binding or visibility:
- // The following are currently supported:
- //
- // global, local, weak, default, hidden, file, section, object,
- // indirect-function.
- //
- // The following flags are ignored and provided for GNU
- // compatibility only:
- //
- // warning, debug, constructor, indirect, synthetic,
- // unique-object, before=<symbol>.
- NewSymbolInfo SI;
- StringRef Value;
- std::tie(SI.SymbolName, Value) = FlagValue.split('=');
- if (Value.empty())
- return createStringError(
- errc::invalid_argument,
- "bad format for --add-symbol, missing '=' after '%s'",
- SI.SymbolName.str().c_str());
-
- if (Value.contains(':')) {
- std::tie(SI.SectionName, Value) = Value.split(':');
- if (SI.SectionName.empty() || Value.empty())
- return createStringError(
- errc::invalid_argument,
- "bad format for --add-symbol, missing section name or symbol value");
- }
-
- SmallVector<StringRef, 6> Flags;
- Value.split(Flags, ',');
- if (Flags[0].getAsInteger(0, SI.Value))
- return createStringError(errc::invalid_argument, "bad symbol value: '%s'",
- Flags[0].str().c_str());
-
- using Functor = std::function<void(void)>;
- SmallVector<StringRef, 6> UnsupportedFlags;
- for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I)
- static_cast<Functor>(
- StringSwitch<Functor>(Flags[I])
- .CaseLower("global", [&SI] { SI.Bind = ELF::STB_GLOBAL; })
- .CaseLower("local", [&SI] { SI.Bind = ELF::STB_LOCAL; })
- .CaseLower("weak", [&SI] { SI.Bind = ELF::STB_WEAK; })
- .CaseLower("default", [&SI] { SI.Visibility = ELF::STV_DEFAULT; })
- .CaseLower("hidden", [&SI] { SI.Visibility = ELF::STV_HIDDEN; })
- .CaseLower("file", [&SI] { SI.Type = ELF::STT_FILE; })
- .CaseLower("section", [&SI] { SI.Type = ELF::STT_SECTION; })
- .CaseLower("object", [&SI] { SI.Type = ELF::STT_OBJECT; })
- .CaseLower("function", [&SI] { SI.Type = ELF::STT_FUNC; })
- .CaseLower("indirect-function",
- [&SI] { SI.Type = ELF::STT_GNU_IFUNC; })
- .CaseLower("debug", [] {})
- .CaseLower("constructor", [] {})
- .CaseLower("warning", [] {})
- .CaseLower("indirect", [] {})
- .CaseLower("synthetic", [] {})
- .CaseLower("unique-object", [] {})
- .StartsWithLower("before", [] {})
- .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))();
- if (!UnsupportedFlags.empty())
- return createStringError(errc::invalid_argument,
- "unsupported flag%s for --add-symbol: '%s'",
- UnsupportedFlags.size() > 1 ? "s" : "",
- join(UnsupportedFlags, "', '").c_str());
- return SI;
-}
-
-static const StringMap<MachineInfo> ArchMap{
- // Name, {EMachine, 64bit, LittleEndian}
- {"aarch64", {ELF::EM_AARCH64, true, true}},
- {"arm", {ELF::EM_ARM, false, true}},
- {"i386", {ELF::EM_386, false, true}},
- {"i386:x86-64", {ELF::EM_X86_64, true, true}},
- {"mips", {ELF::EM_MIPS, false, false}},
- {"powerpc:common64", {ELF::EM_PPC64, true, true}},
- {"riscv:rv32", {ELF::EM_RISCV, false, true}},
- {"riscv:rv64", {ELF::EM_RISCV, true, true}},
- {"sparc", {ELF::EM_SPARC, false, false}},
- {"sparcel", {ELF::EM_SPARC, false, true}},
- {"x86-64", {ELF::EM_X86_64, true, true}},
-};
-
-static Expected<const MachineInfo &> getMachineInfo(StringRef Arch) {
- auto Iter = ArchMap.find(Arch);
- if (Iter == std::end(ArchMap))
- return createStringError(errc::invalid_argument,
- "invalid architecture: '%s'", Arch.str().c_str());
- return Iter->getValue();
-}
-
struct TargetInfo {
FileFormat Format;
MachineInfo Machine;
@@ -341,9 +260,10 @@ getOutputTargetInfoByTargetName(StringRef TargetName) {
return {TargetInfo{Format, MI}};
}
-static Error addSymbolsFromFile(std::vector<NameOrRegex> &Symbols,
- BumpPtrAllocator &Alloc, StringRef Filename,
- bool UseRegex) {
+static Error
+addSymbolsFromFile(NameMatcher &Symbols, BumpPtrAllocator &Alloc,
+ StringRef Filename, MatchStyle MS,
+ llvm::function_ref<Error(Error)> ErrorCallback) {
StringSaver Saver(Alloc);
SmallVector<StringRef, 16> Lines;
auto BufOrErr = MemoryBuffer::getFile(Filename);
@@ -356,21 +276,47 @@ static Error addSymbolsFromFile(std::vector<NameOrRegex> &Symbols,
// it's not empty.
auto TrimmedLine = Line.split('#').first.trim();
if (!TrimmedLine.empty())
- Symbols.emplace_back(Saver.save(TrimmedLine), UseRegex);
+ if (Error E = Symbols.addMatcher(NameOrPattern::create(
+ Saver.save(TrimmedLine), MS, ErrorCallback)))
+ return E;
}
return Error::success();
}
-NameOrRegex::NameOrRegex(StringRef Pattern, bool IsRegex) {
- if (!IsRegex) {
- Name = Pattern;
- return;
- }
+Expected<NameOrPattern>
+NameOrPattern::create(StringRef Pattern, MatchStyle MS,
+ llvm::function_ref<Error(Error)> ErrorCallback) {
+ switch (MS) {
+ case MatchStyle::Literal:
+ return NameOrPattern(Pattern);
+ case MatchStyle::Wildcard: {
+ SmallVector<char, 32> Data;
+ bool IsPositiveMatch = true;
+ if (Pattern[0] == '!') {
+ IsPositiveMatch = false;
+ Pattern = Pattern.drop_front();
+ }
+ Expected<GlobPattern> GlobOrErr = GlobPattern::create(Pattern);
+
+ // If we couldn't create it as a glob, report the error, but try again with
+ // a literal if the error reporting is non-fatal.
+ if (!GlobOrErr) {
+ if (Error E = ErrorCallback(GlobOrErr.takeError()))
+ return std::move(E);
+ return create(Pattern, MatchStyle::Literal, ErrorCallback);
+ }
- SmallVector<char, 32> Data;
- R = std::make_shared<Regex>(
- ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data));
+ return NameOrPattern(std::make_shared<GlobPattern>(*GlobOrErr),
+ IsPositiveMatch);
+ }
+ case MatchStyle::Regex: {
+ SmallVector<char, 32> Data;
+ return NameOrPattern(std::make_shared<Regex>(
+ ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data)));
+ }
+ }
+ llvm_unreachable("Unhandled llvm.objcopy.MatchStyle enum");
}
static Error addSymbolsToRenameFromFile(StringMap<StringRef> &SymbolsToRename,
@@ -407,10 +353,22 @@ template <class T> static ErrorOr<T> getAsInteger(StringRef Val) {
return Result;
}
+static void printHelp(const opt::OptTable &OptTable, raw_ostream &OS,
+ StringRef ToolName) {
+ OptTable.PrintHelp(OS, (ToolName + " input [output]").str().c_str(),
+ (ToolName + " tool").str().c_str());
+ // TODO: Replace this with libOption call once it adds extrahelp support.
+ // The CommandLine library has a cl::extrahelp class to support this,
+ // but libOption does not have that yet.
+ OS << "\nPass @FILE as argument to read options from FILE.\n";
+}
+
// ParseObjcopyOptions returns the config and sets the input arguments. If a
// help flag is set then ParseObjcopyOptions will print the help messege and
// exit.
-Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
+Expected<DriverConfig>
+parseObjcopyOptions(ArrayRef<const char *> ArgsArr,
+ llvm::function_ref<Error(Error)> ErrorCallback) {
DriverConfig DC;
ObjcopyOptTable T;
unsigned MissingArgumentIndex, MissingArgumentCount;
@@ -418,12 +376,12 @@ Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
if (InputArgs.size() == 0) {
- T.PrintHelp(errs(), "llvm-objcopy input [output]", "objcopy tool");
+ printHelp(T, errs(), "llvm-objcopy");
exit(1);
}
if (InputArgs.hasArg(OBJCOPY_help)) {
- T.PrintHelp(outs(), "llvm-objcopy input [output]", "objcopy tool");
+ printHelp(T, outs(), "llvm-objcopy");
exit(0);
}
@@ -459,7 +417,18 @@ Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
errc::invalid_argument,
"--target cannot be used with --input-target or --output-target");
- bool UseRegex = InputArgs.hasArg(OBJCOPY_regex);
+ if (InputArgs.hasArg(OBJCOPY_regex) && InputArgs.hasArg(OBJCOPY_wildcard))
+ return createStringError(errc::invalid_argument,
+ "--regex and --wildcard are incompatible");
+
+ MatchStyle SectionMatchStyle = InputArgs.hasArg(OBJCOPY_regex)
+ ? MatchStyle::Regex
+ : MatchStyle::Wildcard;
+ MatchStyle SymbolMatchStyle = InputArgs.hasArg(OBJCOPY_regex)
+ ? MatchStyle::Regex
+ : InputArgs.hasArg(OBJCOPY_wildcard)
+ ? MatchStyle::Wildcard
+ : MatchStyle::Literal;
StringRef InputFormat, OutputFormat;
if (InputArgs.hasArg(OBJCOPY_target)) {
InputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
@@ -476,28 +445,26 @@ Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
.Case("binary", FileFormat::Binary)
.Case("ihex", FileFormat::IHex)
.Default(FileFormat::Unspecified);
- if (Config.InputFormat == FileFormat::Binary) {
- auto BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture);
- if (BinaryArch.empty())
- return createStringError(
- errc::invalid_argument,
- "specified binary input without specifiying an architecture");
- Expected<const MachineInfo &> MI = getMachineInfo(BinaryArch);
- if (!MI)
- return MI.takeError();
- Config.BinaryArch = *MI;
- }
+
+ if (InputArgs.hasArg(OBJCOPY_new_symbol_visibility))
+ Config.NewSymbolVisibility =
+ InputArgs.getLastArgValue(OBJCOPY_new_symbol_visibility);
Config.OutputFormat = StringSwitch<FileFormat>(OutputFormat)
.Case("binary", FileFormat::Binary)
.Case("ihex", FileFormat::IHex)
.Default(FileFormat::Unspecified);
- if (Config.OutputFormat == FileFormat::Unspecified && !OutputFormat.empty()) {
- Expected<TargetInfo> Target = getOutputTargetInfoByTargetName(OutputFormat);
- if (!Target)
- return Target.takeError();
- Config.OutputFormat = Target->Format;
- Config.OutputArch = Target->Machine;
+ if (Config.OutputFormat == FileFormat::Unspecified) {
+ if (OutputFormat.empty()) {
+ Config.OutputFormat = Config.InputFormat;
+ } else {
+ Expected<TargetInfo> Target =
+ getOutputTargetInfoByTargetName(OutputFormat);
+ if (!Target)
+ return Target.takeError();
+ Config.OutputFormat = Target->Format;
+ Config.OutputArch = Target->Machine;
+ }
}
if (auto Arg = InputArgs.getLastArg(OBJCOPY_compress_debug_sections,
@@ -535,12 +502,8 @@ Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
if (!DebugOrErr)
return createFileError(Config.AddGnuDebugLink, DebugOrErr.getError());
auto Debug = std::move(*DebugOrErr);
- JamCRC CRC;
- CRC.update(
- ArrayRef<char>(Debug->getBuffer().data(), Debug->getBuffer().size()));
- // The CRC32 value needs to be complemented because the JamCRC doesn't
- // finalize the CRC32 value.
- Config.GnuDebugLinkCRC32 = ~CRC.getCRC();
+ Config.GnuDebugLinkCRC32 =
+ llvm::crc32(arrayRefFromStringRef(Debug->getBuffer()));
}
Config.BuildIdLinkDir = InputArgs.getLastArgValue(OBJCOPY_build_id_link_dir);
if (InputArgs.hasArg(OBJCOPY_build_id_link_input))
@@ -582,6 +545,13 @@ Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
"multiple renames of section '%s'",
SR->OriginalName.str().c_str());
}
+ for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_alignment)) {
+ Expected<std::pair<StringRef, uint64_t>> NameAndAlign =
+ parseSetSectionAlignment(Arg->getValue());
+ if (!NameAndAlign)
+ return NameAndAlign.takeError();
+ Config.SetSectionAlignment[NameAndAlign->first] = NameAndAlign->second;
+ }
for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_flags)) {
Expected<SectionFlagsUpdate> SFU =
parseSetSectionFlagValue(Arg->getValue());
@@ -612,13 +582,28 @@ Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
}
for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section))
- Config.ToRemove.emplace_back(Arg->getValue(), UseRegex);
+ if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SectionMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section))
- Config.KeepSection.emplace_back(Arg->getValue(), UseRegex);
+ if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SectionMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_only_section))
- Config.OnlySection.emplace_back(Arg->getValue(), UseRegex);
- for (auto Arg : InputArgs.filtered(OBJCOPY_add_section))
- Config.AddSection.push_back(Arg->getValue());
+ if (Error E = Config.OnlySection.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SectionMatchStyle, ErrorCallback)))
+ return std::move(E);
+ for (auto Arg : InputArgs.filtered(OBJCOPY_add_section)) {
+ StringRef ArgValue(Arg->getValue());
+ if (!ArgValue.contains('='))
+ return createStringError(errc::invalid_argument,
+ "bad format for --add-section: missing '='");
+ if (ArgValue.split("=").second.empty())
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --add-section: missing file name");
+ Config.AddSection.push_back(ArgValue);
+ }
for (auto Arg : InputArgs.filtered(OBJCOPY_dump_section))
Config.DumpSection.push_back(Arg->getValue());
Config.StripAll = InputArgs.hasArg(OBJCOPY_strip_all);
@@ -645,53 +630,71 @@ Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
if (Config.DiscardMode == DiscardType::All)
Config.StripDebug = true;
for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol))
- Config.SymbolsToLocalize.emplace_back(Arg->getValue(), UseRegex);
+ if (Error E = Config.SymbolsToLocalize.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SymbolMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbols))
if (Error E = addSymbolsFromFile(Config.SymbolsToLocalize, DC.Alloc,
- Arg->getValue(), UseRegex))
+ Arg->getValue(), SymbolMatchStyle,
+ ErrorCallback))
return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol))
- Config.SymbolsToKeepGlobal.emplace_back(Arg->getValue(), UseRegex);
+ if (Error E = Config.SymbolsToKeepGlobal.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SymbolMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols))
if (Error E = addSymbolsFromFile(Config.SymbolsToKeepGlobal, DC.Alloc,
- Arg->getValue(), UseRegex))
+ Arg->getValue(), SymbolMatchStyle,
+ ErrorCallback))
return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol))
- Config.SymbolsToGlobalize.emplace_back(Arg->getValue(), UseRegex);
+ if (Error E = Config.SymbolsToGlobalize.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SymbolMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbols))
if (Error E = addSymbolsFromFile(Config.SymbolsToGlobalize, DC.Alloc,
- Arg->getValue(), UseRegex))
+ Arg->getValue(), SymbolMatchStyle,
+ ErrorCallback))
return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol))
- Config.SymbolsToWeaken.emplace_back(Arg->getValue(), UseRegex);
+ if (Error E = Config.SymbolsToWeaken.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SymbolMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbols))
if (Error E = addSymbolsFromFile(Config.SymbolsToWeaken, DC.Alloc,
- Arg->getValue(), UseRegex))
+ Arg->getValue(), SymbolMatchStyle,
+ ErrorCallback))
return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol))
- Config.SymbolsToRemove.emplace_back(Arg->getValue(), UseRegex);
+ if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SymbolMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbols))
if (Error E = addSymbolsFromFile(Config.SymbolsToRemove, DC.Alloc,
- Arg->getValue(), UseRegex))
+ Arg->getValue(), SymbolMatchStyle,
+ ErrorCallback))
return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbol))
- Config.UnneededSymbolsToRemove.emplace_back(Arg->getValue(), UseRegex);
+ if (Error E =
+ Config.UnneededSymbolsToRemove.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SymbolMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbols))
if (Error E = addSymbolsFromFile(Config.UnneededSymbolsToRemove, DC.Alloc,
- Arg->getValue(), UseRegex))
+ Arg->getValue(), SymbolMatchStyle,
+ ErrorCallback))
return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol))
- Config.SymbolsToKeep.emplace_back(Arg->getValue(), UseRegex);
+ if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SymbolMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbols))
- if (Error E = addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc,
- Arg->getValue(), UseRegex))
+ if (Error E =
+ addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, Arg->getValue(),
+ SymbolMatchStyle, ErrorCallback))
return std::move(E);
- for (auto Arg : InputArgs.filtered(OBJCOPY_add_symbol)) {
- Expected<NewSymbolInfo> NSI = parseNewSymbolInfo(Arg->getValue());
- if (!NSI)
- return NSI.takeError();
- Config.SymbolsToAdd.push_back(*NSI);
- }
+ for (auto Arg : InputArgs.filtered(OBJCOPY_add_symbol))
+ Config.SymbolsToAdd.push_back(Arg->getValue());
Config.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links);
@@ -754,19 +757,19 @@ Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
// exit.
Expected<DriverConfig>
parseStripOptions(ArrayRef<const char *> ArgsArr,
- std::function<Error(Error)> ErrorCallback) {
+ llvm::function_ref<Error(Error)> ErrorCallback) {
StripOptTable T;
unsigned MissingArgumentIndex, MissingArgumentCount;
llvm::opt::InputArgList InputArgs =
T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
if (InputArgs.size() == 0) {
- T.PrintHelp(errs(), "llvm-strip [options] file...", "strip tool");
+ printHelp(T, errs(), "llvm-strip");
exit(1);
}
if (InputArgs.hasArg(STRIP_help)) {
- T.PrintHelp(outs(), "llvm-strip [options] file...", "strip tool");
+ printHelp(T, outs(), "llvm-strip");
exit(0);
}
@@ -792,7 +795,17 @@ parseStripOptions(ArrayRef<const char *> ArgsArr,
"multiple input files cannot be used in combination with -o");
CopyConfig Config;
- bool UseRegexp = InputArgs.hasArg(STRIP_regex);
+
+ if (InputArgs.hasArg(STRIP_regex) && InputArgs.hasArg(STRIP_wildcard))
+ return createStringError(errc::invalid_argument,
+ "--regex and --wildcard are incompatible");
+ MatchStyle SectionMatchStyle =
+ InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex : MatchStyle::Wildcard;
+ MatchStyle SymbolMatchStyle = InputArgs.hasArg(STRIP_regex)
+ ? MatchStyle::Regex
+ : InputArgs.hasArg(STRIP_wildcard)
+ ? MatchStyle::Wildcard
+ : MatchStyle::Literal;
Config.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links);
Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug);
@@ -801,6 +814,7 @@ parseStripOptions(ArrayRef<const char *> ArgsArr,
InputArgs.hasFlag(STRIP_discard_all, STRIP_discard_locals)
? DiscardType::All
: DiscardType::Locals;
+ Config.StripSections = InputArgs.hasArg(STRIP_strip_sections);
Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded);
if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all))
Config.StripAll = Arg->getOption().getID() == STRIP_strip_all;
@@ -809,16 +823,24 @@ parseStripOptions(ArrayRef<const char *> ArgsArr,
Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols);
for (auto Arg : InputArgs.filtered(STRIP_keep_section))
- Config.KeepSection.emplace_back(Arg->getValue(), UseRegexp);
+ if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SectionMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(STRIP_remove_section))
- Config.ToRemove.emplace_back(Arg->getValue(), UseRegexp);
+ if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SectionMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(STRIP_strip_symbol))
- Config.SymbolsToRemove.emplace_back(Arg->getValue(), UseRegexp);
+ if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SymbolMatchStyle, ErrorCallback)))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(STRIP_keep_symbol))
- Config.SymbolsToKeep.emplace_back(Arg->getValue(), UseRegexp);
+ if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create(
+ Arg->getValue(), SymbolMatchStyle, ErrorCallback)))
+ return std::move(E);
if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug &&
!Config.StripUnneeded && Config.DiscardMode == DiscardType::None &&
diff --git a/tools/llvm-objcopy/CopyConfig.h b/tools/llvm-objcopy/CopyConfig.h
index aff3631a487c..55a55d3a2bc2 100644
--- a/tools/llvm-objcopy/CopyConfig.h
+++ b/tools/llvm-objcopy/CopyConfig.h
@@ -9,6 +9,7 @@
#ifndef LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H
#define LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H
+#include "ELF/ELFConfig.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/Optional.h"
@@ -18,6 +19,7 @@
#include "llvm/Object/ELFTypes.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/Regex.h"
// Necessary for llvm::DebugCompressionType::None
#include "llvm/Target/TargetOptions.h"
@@ -87,36 +89,71 @@ enum class DiscardType {
Locals, // --discard-locals (-X)
};
-class NameOrRegex {
+enum class MatchStyle {
+ Literal, // Default for symbols.
+ Wildcard, // Default for sections, or enabled with --wildcard (-w).
+ Regex, // Enabled with --regex.
+};
+
+class NameOrPattern {
StringRef Name;
// Regex is shared between multiple CopyConfig instances.
std::shared_ptr<Regex> R;
+ std::shared_ptr<GlobPattern> G;
+ bool IsPositiveMatch = true;
+
+ NameOrPattern(StringRef N) : Name(N) {}
+ NameOrPattern(std::shared_ptr<Regex> R) : R(R) {}
+ NameOrPattern(std::shared_ptr<GlobPattern> G, bool IsPositiveMatch)
+ : G(G), IsPositiveMatch(IsPositiveMatch) {}
public:
- NameOrRegex(StringRef Pattern, bool IsRegex);
- bool operator==(StringRef S) const { return R ? R->match(S) : Name == S; }
+ // ErrorCallback is used to handle recoverable errors. An Error returned
+ // by the callback aborts the parsing and is then returned by this function.
+ static Expected<NameOrPattern>
+ create(StringRef Pattern, MatchStyle MS,
+ llvm::function_ref<Error(Error)> ErrorCallback);
+
+ bool isPositiveMatch() const { return IsPositiveMatch; }
+ bool operator==(StringRef S) const {
+ return R ? R->match(S) : G ? G->match(S) : Name == S;
+ }
bool operator!=(StringRef S) const { return !operator==(S); }
};
-struct NewSymbolInfo {
- StringRef SymbolName;
- StringRef SectionName;
- uint64_t Value = 0;
- uint8_t Type = ELF::STT_NOTYPE;
- uint8_t Bind = ELF::STB_GLOBAL;
- uint8_t Visibility = ELF::STV_DEFAULT;
+// Matcher that checks symbol or section names against the command line flags
+// provided for that option.
+class NameMatcher {
+ std::vector<NameOrPattern> PosMatchers;
+ std::vector<NameOrPattern> NegMatchers;
+
+public:
+ Error addMatcher(Expected<NameOrPattern> Matcher) {
+ if (!Matcher)
+ return Matcher.takeError();
+ if (Matcher->isPositiveMatch())
+ PosMatchers.push_back(std::move(*Matcher));
+ else
+ NegMatchers.push_back(std::move(*Matcher));
+ return Error::success();
+ }
+ bool matches(StringRef S) const {
+ return is_contained(PosMatchers, S) && !is_contained(NegMatchers, S);
+ }
+ bool empty() const { return PosMatchers.empty() && NegMatchers.empty(); }
};
// Configuration for copying/stripping a single file.
struct CopyConfig {
+ // Format-specific options to be initialized lazily when needed.
+ Optional<elf::ELFCopyConfig> ELF;
+
// Main input/output options
StringRef InputFilename;
FileFormat InputFormat;
StringRef OutputFilename;
FileFormat OutputFormat;
- // Only applicable for --input-format=binary
- MachineInfo BinaryArch;
// Only applicable when --output-format!=binary (e.g. elf64-x86-64).
Optional<MachineInfo> OutputArch;
@@ -132,24 +169,30 @@ struct CopyConfig {
StringRef SymbolsPrefix;
StringRef AllocSectionsPrefix;
DiscardType DiscardMode = DiscardType::None;
+ Optional<StringRef> NewSymbolVisibility;
// Repeated options
std::vector<StringRef> AddSection;
std::vector<StringRef> DumpSection;
- std::vector<NewSymbolInfo> SymbolsToAdd;
- std::vector<NameOrRegex> KeepSection;
- std::vector<NameOrRegex> OnlySection;
- std::vector<NameOrRegex> SymbolsToGlobalize;
- std::vector<NameOrRegex> SymbolsToKeep;
- std::vector<NameOrRegex> SymbolsToLocalize;
- std::vector<NameOrRegex> SymbolsToRemove;
- std::vector<NameOrRegex> UnneededSymbolsToRemove;
- std::vector<NameOrRegex> SymbolsToWeaken;
- std::vector<NameOrRegex> ToRemove;
- std::vector<NameOrRegex> SymbolsToKeepGlobal;
+ std::vector<StringRef> SymbolsToAdd;
+
+ // Section matchers
+ NameMatcher KeepSection;
+ NameMatcher OnlySection;
+ NameMatcher ToRemove;
+
+ // Symbol matchers
+ NameMatcher SymbolsToGlobalize;
+ NameMatcher SymbolsToKeep;
+ NameMatcher SymbolsToLocalize;
+ NameMatcher SymbolsToRemove;
+ NameMatcher UnneededSymbolsToRemove;
+ NameMatcher SymbolsToWeaken;
+ NameMatcher SymbolsToKeepGlobal;
// Map options
StringMap<SectionRename> SectionsToRename;
+ StringMap<uint64_t> SetSectionAlignment;
StringMap<SectionFlagsUpdate> SetSectionFlags;
StringMap<StringRef> SymbolsToRename;
@@ -178,6 +221,18 @@ struct CopyConfig {
bool Weaken = false;
bool DecompressDebugSections = false;
DebugCompressionType CompressionType = DebugCompressionType::None;
+
+ // parseELFConfig performs ELF-specific command-line parsing. Fills `ELF` on
+ // success or returns an Error otherwise.
+ Error parseELFConfig() {
+ if (!ELF) {
+ Expected<elf::ELFCopyConfig> ELFConfig = elf::parseConfig(*this);
+ if (!ELFConfig)
+ return ELFConfig.takeError();
+ ELF = *ELFConfig;
+ }
+ return Error::success();
+ }
};
// Configuration for the overall invocation of this tool. When invoked as
@@ -190,8 +245,11 @@ struct DriverConfig {
// ParseObjcopyOptions returns the config and sets the input arguments. If a
// help flag is set then ParseObjcopyOptions will print the help messege and
-// exit.
-Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr);
+// exit. ErrorCallback is used to handle recoverable errors. An Error returned
+// by the callback aborts the parsing and is then returned by this function.
+Expected<DriverConfig>
+parseObjcopyOptions(ArrayRef<const char *> ArgsArr,
+ llvm::function_ref<Error(Error)> ErrorCallback);
// ParseStripOptions returns the config and sets the input arguments. If a
// help flag is set then ParseStripOptions will print the help messege and
@@ -199,7 +257,7 @@ Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr);
// by the callback aborts the parsing and is then returned by this function.
Expected<DriverConfig>
parseStripOptions(ArrayRef<const char *> ArgsArr,
- std::function<Error(Error)> ErrorCallback);
+ llvm::function_ref<Error(Error)> ErrorCallback);
} // namespace objcopy
} // namespace llvm
diff --git a/tools/llvm-objcopy/ELF/ELFConfig.cpp b/tools/llvm-objcopy/ELF/ELFConfig.cpp
new file mode 100644
index 000000000000..40993760add7
--- /dev/null
+++ b/tools/llvm-objcopy/ELF/ELFConfig.cpp
@@ -0,0 +1,133 @@
+//===- ELFConfig.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CopyConfig.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace objcopy {
+namespace elf {
+
+static Expected<NewSymbolInfo> parseNewSymbolInfo(StringRef FlagValue,
+ uint8_t DefaultVisibility) {
+ // Parse value given with --add-symbol option and create the
+ // new symbol if possible. The value format for --add-symbol is:
+ //
+ // <name>=[<section>:]<value>[,<flags>]
+ //
+ // where:
+ // <name> - symbol name, can be empty string
+ // <section> - optional section name. If not given ABS symbol is created
+ // <value> - symbol value, can be decimal or hexadecimal number prefixed
+ // with 0x.
+ // <flags> - optional flags affecting symbol type, binding or visibility:
+ // The following are currently supported:
+ //
+ // global, local, weak, default, hidden, file, section, object,
+ // indirect-function.
+ //
+ // The following flags are ignored and provided for GNU
+ // compatibility only:
+ //
+ // warning, debug, constructor, indirect, synthetic,
+ // unique-object, before=<symbol>.
+ NewSymbolInfo SI;
+ StringRef Value;
+ std::tie(SI.SymbolName, Value) = FlagValue.split('=');
+ if (Value.empty())
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --add-symbol, missing '=' after '%s'",
+ SI.SymbolName.str().c_str());
+
+ if (Value.contains(':')) {
+ std::tie(SI.SectionName, Value) = Value.split(':');
+ if (SI.SectionName.empty() || Value.empty())
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --add-symbol, missing section name or symbol value");
+ }
+
+ SmallVector<StringRef, 6> Flags;
+ Value.split(Flags, ',');
+ if (Flags[0].getAsInteger(0, SI.Value))
+ return createStringError(errc::invalid_argument, "bad symbol value: '%s'",
+ Flags[0].str().c_str());
+
+ SI.Visibility = DefaultVisibility;
+
+ using Functor = std::function<void(void)>;
+ SmallVector<StringRef, 6> UnsupportedFlags;
+ for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I)
+ static_cast<Functor>(
+ StringSwitch<Functor>(Flags[I])
+ .CaseLower("global", [&SI] { SI.Bind = ELF::STB_GLOBAL; })
+ .CaseLower("local", [&SI] { SI.Bind = ELF::STB_LOCAL; })
+ .CaseLower("weak", [&SI] { SI.Bind = ELF::STB_WEAK; })
+ .CaseLower("default", [&SI] { SI.Visibility = ELF::STV_DEFAULT; })
+ .CaseLower("hidden", [&SI] { SI.Visibility = ELF::STV_HIDDEN; })
+ .CaseLower("protected",
+ [&SI] { SI.Visibility = ELF::STV_PROTECTED; })
+ .CaseLower("file", [&SI] { SI.Type = ELF::STT_FILE; })
+ .CaseLower("section", [&SI] { SI.Type = ELF::STT_SECTION; })
+ .CaseLower("object", [&SI] { SI.Type = ELF::STT_OBJECT; })
+ .CaseLower("function", [&SI] { SI.Type = ELF::STT_FUNC; })
+ .CaseLower("indirect-function",
+ [&SI] { SI.Type = ELF::STT_GNU_IFUNC; })
+ .CaseLower("debug", [] {})
+ .CaseLower("constructor", [] {})
+ .CaseLower("warning", [] {})
+ .CaseLower("indirect", [] {})
+ .CaseLower("synthetic", [] {})
+ .CaseLower("unique-object", [] {})
+ .StartsWithLower("before", [] {})
+ .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))();
+ if (!UnsupportedFlags.empty())
+ return createStringError(errc::invalid_argument,
+ "unsupported flag%s for --add-symbol: '%s'",
+ UnsupportedFlags.size() > 1 ? "s" : "",
+ join(UnsupportedFlags, "', '").c_str());
+ return SI;
+}
+
+Expected<ELFCopyConfig> parseConfig(const CopyConfig &Config) {
+ ELFCopyConfig ELFConfig;
+ if (Config.NewSymbolVisibility) {
+ const uint8_t Invalid = 0xff;
+ ELFConfig.NewSymbolVisibility =
+ StringSwitch<uint8_t>(*Config.NewSymbolVisibility)
+ .Case("default", ELF::STV_DEFAULT)
+ .Case("hidden", ELF::STV_HIDDEN)
+ .Case("internal", ELF::STV_INTERNAL)
+ .Case("protected", ELF::STV_PROTECTED)
+ .Default(Invalid);
+
+ if (ELFConfig.NewSymbolVisibility == Invalid)
+ return createStringError(errc::invalid_argument,
+ "'%s' is not a valid symbol visibility",
+ Config.NewSymbolVisibility->str().c_str());
+ }
+
+ for (StringRef Arg : Config.SymbolsToAdd) {
+ Expected<elf::NewSymbolInfo> NSI = parseNewSymbolInfo(
+ Arg,
+ ELFConfig.NewSymbolVisibility.getValueOr((uint8_t)ELF::STV_DEFAULT));
+ if (!NSI)
+ return NSI.takeError();
+ ELFConfig.SymbolsToAdd.push_back(*NSI);
+ }
+
+ return ELFConfig;
+}
+
+} // end namespace elf
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/ELF/ELFConfig.h b/tools/llvm-objcopy/ELF/ELFConfig.h
new file mode 100644
index 000000000000..977efbc4166f
--- /dev/null
+++ b/tools/llvm-objcopy/ELF/ELFConfig.h
@@ -0,0 +1,44 @@
+//===- ELFConfig.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_ELFCONFIG_H
+#define LLVM_TOOLS_OBJCOPY_ELFCONFIG_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+namespace llvm {
+namespace objcopy {
+struct CopyConfig;
+
+namespace elf {
+
+struct NewSymbolInfo {
+ StringRef SymbolName;
+ StringRef SectionName;
+ uint64_t Value = 0;
+ uint8_t Type = ELF::STT_NOTYPE;
+ uint8_t Bind = ELF::STB_GLOBAL;
+ uint8_t Visibility = ELF::STV_DEFAULT;
+};
+
+struct ELFCopyConfig {
+ Optional<uint8_t> NewSymbolVisibility;
+ std::vector<NewSymbolInfo> SymbolsToAdd;
+};
+
+Expected<ELFCopyConfig> parseConfig(const CopyConfig &Config);
+
+} // namespace elf
+} // namespace objcopy
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index b366c6e55987..8bf7e0f88010 100644
--- a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -136,16 +136,16 @@ static std::unique_ptr<Writer> createELFWriter(const CopyConfig &Config,
// Depending on the initial ELFT and OutputFormat we need a different Writer.
switch (OutputElfType) {
case ELFT_ELF32LE:
- return llvm::make_unique<ELFWriter<ELF32LE>>(Obj, Buf,
+ return std::make_unique<ELFWriter<ELF32LE>>(Obj, Buf,
!Config.StripSections);
case ELFT_ELF64LE:
- return llvm::make_unique<ELFWriter<ELF64LE>>(Obj, Buf,
+ return std::make_unique<ELFWriter<ELF64LE>>(Obj, Buf,
!Config.StripSections);
case ELFT_ELF32BE:
- return llvm::make_unique<ELFWriter<ELF32BE>>(Obj, Buf,
+ return std::make_unique<ELFWriter<ELF32BE>>(Obj, Buf,
!Config.StripSections);
case ELFT_ELF64BE:
- return llvm::make_unique<ELFWriter<ELF64BE>>(Obj, Buf,
+ return std::make_unique<ELFWriter<ELF64BE>>(Obj, Buf,
!Config.StripSections);
}
llvm_unreachable("Invalid output format");
@@ -156,9 +156,9 @@ static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
ElfType OutputElfType) {
switch (Config.OutputFormat) {
case FileFormat::Binary:
- return llvm::make_unique<BinaryWriter>(Obj, Buf);
+ return std::make_unique<BinaryWriter>(Obj, Buf);
case FileFormat::IHex:
- return llvm::make_unique<IHexWriter>(Obj, Buf);
+ return std::make_unique<IHexWriter>(Obj, Buf);
default:
return createELFWriter(Config, Obj, Buf, OutputElfType);
}
@@ -263,7 +263,7 @@ static Error linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink,
static Error splitDWOToFile(const CopyConfig &Config, const Reader &Reader,
StringRef File, ElfType OutputElfType) {
- auto DWOFile = Reader.create();
+ auto DWOFile = Reader.create(false);
auto OnlyKeepDWOPred = [&DWOFile](const SectionBase &Sec) {
return onlyKeepDWOPred(*DWOFile, Sec);
};
@@ -305,9 +305,9 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
SecName.str().c_str());
}
-static bool isCompressable(const SectionBase &Section) {
- return !(Section.Flags & ELF::SHF_COMPRESSED) &&
- StringRef(Section.Name).startswith(".debug");
+static bool isCompressable(const SectionBase &Sec) {
+ return !(Sec.Flags & ELF::SHF_COMPRESSED) &&
+ StringRef(Sec.Name).startswith(".debug");
}
static void replaceDebugSections(
@@ -356,7 +356,7 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF &&
((Config.LocalizeHidden &&
(Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
- is_contained(Config.SymbolsToLocalize, Sym.Name)))
+ Config.SymbolsToLocalize.matches(Sym.Name)))
Sym.Binding = STB_LOCAL;
// Note: these two globalize flags have very similar names but different
@@ -370,16 +370,15 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
// --keep-global-symbol. Because of that, make sure to check
// --globalize-symbol second.
if (!Config.SymbolsToKeepGlobal.empty() &&
- !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) &&
+ !Config.SymbolsToKeepGlobal.matches(Sym.Name) &&
Sym.getShndx() != SHN_UNDEF)
Sym.Binding = STB_LOCAL;
- if (is_contained(Config.SymbolsToGlobalize, Sym.Name) &&
+ if (Config.SymbolsToGlobalize.matches(Sym.Name) &&
Sym.getShndx() != SHN_UNDEF)
Sym.Binding = STB_GLOBAL;
- if (is_contained(Config.SymbolsToWeaken, Sym.Name) &&
- Sym.Binding == STB_GLOBAL)
+ if (Config.SymbolsToWeaken.matches(Sym.Name) && Sym.Binding == STB_GLOBAL)
Sym.Binding = STB_WEAK;
if (Config.Weaken && Sym.Binding == STB_GLOBAL &&
@@ -399,12 +398,12 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
// symbols are still 'needed' and which are not.
if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty() ||
!Config.OnlySection.empty()) {
- for (auto &Section : Obj.sections())
- Section.markSymbols();
+ for (SectionBase &Sec : Obj.sections())
+ Sec.markSymbols();
}
auto RemoveSymbolsPred = [&](const Symbol &Sym) {
- if (is_contained(Config.SymbolsToKeep, Sym.Name) ||
+ if (Config.SymbolsToKeep.matches(Sym.Name) ||
(Config.KeepFileSymbols && Sym.Type == STT_FILE))
return false;
@@ -418,12 +417,12 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
if (Config.StripAll || Config.StripAllGNU)
return true;
- if (is_contained(Config.SymbolsToRemove, Sym.Name))
+ if (Config.SymbolsToRemove.matches(Sym.Name))
return true;
if ((Config.StripUnneeded ||
- is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) &&
- isUnneededSymbol(Sym))
+ Config.UnneededSymbolsToRemove.matches(Sym.Name)) &&
+ (!Obj.isRelocatable() || isUnneededSymbol(Sym)))
return true;
// We want to remove undefined symbols if all references have been stripped.
@@ -443,7 +442,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) {
// Removes:
if (!Config.ToRemove.empty()) {
RemovePred = [&Config](const SectionBase &Sec) {
- return is_contained(Config.ToRemove, Sec.Name);
+ return Config.ToRemove.matches(Sec.Name);
};
}
@@ -481,7 +480,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) {
};
}
- if (Config.StripDebug) {
+ if (Config.StripDebug || Config.StripUnneeded) {
RemovePred = [RemovePred](const SectionBase &Sec) {
return RemovePred(Sec) || isDebugSection(Sec);
};
@@ -523,7 +522,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) {
if (!Config.OnlySection.empty()) {
RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) {
// Explicitly keep these sections regardless of previous removes.
- if (is_contained(Config.OnlySection, Sec.Name))
+ if (Config.OnlySection.matches(Sec.Name))
return false;
// Allow all implicit removes.
@@ -545,7 +544,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) {
if (!Config.KeepSection.empty()) {
RemovePred = [&Config, RemovePred](const SectionBase &Sec) {
// Explicitly keep these sections regardless of previous removes.
- if (is_contained(Config.KeepSection, Sec.Name))
+ if (Config.KeepSection.matches(Sec.Name))
return false;
// Otherwise defer to RemovePred.
return RemovePred(Sec);
@@ -614,9 +613,8 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj,
if (Error E = updateAndRemoveSymbols(Config, Obj))
return E;
- if (!Config.SectionsToRename.empty() || !Config.AllocSectionsPrefix.empty()) {
- DenseSet<SectionBase *> PrefixedSections;
- for (auto &Sec : Obj.sections()) {
+ if (!Config.SectionsToRename.empty()) {
+ for (SectionBase &Sec : Obj.sections()) {
const auto Iter = Config.SectionsToRename.find(Sec.Name);
if (Iter != Config.SectionsToRename.end()) {
const SectionRename &SR = Iter->second;
@@ -624,63 +622,62 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj,
if (SR.NewFlags.hasValue())
setSectionFlagsAndType(Sec, SR.NewFlags.getValue());
}
+ }
+ }
- // Add a prefix to allocated sections and their relocation sections. This
- // should be done after renaming the section by Config.SectionToRename to
- // imitate the GNU objcopy behavior.
- if (!Config.AllocSectionsPrefix.empty()) {
- if (Sec.Flags & SHF_ALLOC) {
- Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str();
- PrefixedSections.insert(&Sec);
-
- // Rename relocation sections associated to the allocated sections.
- // For example, if we rename .text to .prefix.text, we also rename
- // .rel.text to .rel.prefix.text.
- //
- // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled
- // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not
- // .rela.prefix.plt since GNU objcopy does so.
- } else if (auto *RelocSec = dyn_cast<RelocationSectionBase>(&Sec)) {
- auto *TargetSec = RelocSec->getSection();
- if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) {
- StringRef prefix;
- switch (Sec.Type) {
- case SHT_REL:
- prefix = ".rel";
- break;
- case SHT_RELA:
- prefix = ".rela";
- break;
- default:
- continue;
- }
-
- // If the relocation section comes *after* the target section, we
- // don't add Config.AllocSectionsPrefix because we've already added
- // the prefix to TargetSec->Name. Otherwise, if the relocation
- // section comes *before* the target section, we add the prefix.
- if (PrefixedSections.count(TargetSec)) {
- Sec.Name = (prefix + TargetSec->Name).str();
- } else {
- const auto Iter = Config.SectionsToRename.find(TargetSec->Name);
- if (Iter != Config.SectionsToRename.end()) {
- // Both `--rename-section` and `--prefix-alloc-sections` are
- // given but the target section is not yet renamed.
- Sec.Name =
- (prefix + Config.AllocSectionsPrefix + Iter->second.NewName)
- .str();
- } else {
- Sec.Name =
- (prefix + Config.AllocSectionsPrefix + TargetSec->Name)
- .str();
- }
- }
+ // Add a prefix to allocated sections and their relocation sections. This
+ // should be done after renaming the section by Config.SectionToRename to
+ // imitate the GNU objcopy behavior.
+ if (!Config.AllocSectionsPrefix.empty()) {
+ DenseSet<SectionBase *> PrefixedSections;
+ for (SectionBase &Sec : Obj.sections()) {
+ if (Sec.Flags & SHF_ALLOC) {
+ Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str();
+ PrefixedSections.insert(&Sec);
+ } else if (auto *RelocSec = dyn_cast<RelocationSectionBase>(&Sec)) {
+ // Rename relocation sections associated to the allocated sections.
+ // For example, if we rename .text to .prefix.text, we also rename
+ // .rel.text to .rel.prefix.text.
+ //
+ // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled
+ // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not
+ // .rela.prefix.plt since GNU objcopy does so.
+ const SectionBase *TargetSec = RelocSec->getSection();
+ if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) {
+ StringRef prefix;
+ switch (Sec.Type) {
+ case SHT_REL:
+ prefix = ".rel";
+ break;
+ case SHT_RELA:
+ prefix = ".rela";
+ break;
+ default:
+ llvm_unreachable("not a relocation section");
}
+
+ // If the relocation section comes *after* the target section, we
+ // don't add Config.AllocSectionsPrefix because we've already added
+ // the prefix to TargetSec->Name. Otherwise, if the relocation
+ // section comes *before* the target section, we add the prefix.
+ if (PrefixedSections.count(TargetSec))
+ Sec.Name = (prefix + TargetSec->Name).str();
+ else
+ Sec.Name =
+ (prefix + Config.AllocSectionsPrefix + TargetSec->Name).str();
}
}
}
}
+ if (!Config.SetSectionAlignment.empty()) {
+ for (SectionBase &Sec : Obj.sections()) {
+ auto I = Config.SetSectionAlignment.find(Sec.Name);
+ if (I != Config.SetSectionAlignment.end())
+ Sec.Align = I->second;
+ }
+ }
+
if (!Config.SetSectionFlags.empty()) {
for (auto &Sec : Obj.sections()) {
const auto Iter = Config.SetSectionFlags.find(Sec.Name);
@@ -721,7 +718,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj,
Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink,
Config.GnuDebugLinkCRC32);
- for (const NewSymbolInfo &SI : Config.SymbolsToAdd) {
+ for (const NewSymbolInfo &SI : Config.ELF->SymbolsToAdd) {
SectionBase *Sec = Obj.findSection(SI.SectionName);
uint64_t Value = Sec ? Sec->Addr + SI.Value : SI.Value;
Obj.SymbolTable->addSymbol(
@@ -746,9 +743,9 @@ static Error writeOutput(const CopyConfig &Config, Object &Obj, Buffer &Out,
Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In,
Buffer &Out) {
IHexReader Reader(&In);
- std::unique_ptr<Object> Obj = Reader.create();
+ std::unique_ptr<Object> Obj = Reader.create(true);
const ElfType OutputElfType =
- getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch));
+ getOutputElfType(Config.OutputArch.getValueOr(MachineInfo()));
if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
return E;
return writeOutput(Config, *Obj, Out, OutputElfType);
@@ -756,13 +753,15 @@ Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In,
Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
Buffer &Out) {
- BinaryReader Reader(Config.BinaryArch, &In);
- std::unique_ptr<Object> Obj = Reader.create();
+ uint8_t NewSymbolVisibility =
+ Config.ELF->NewSymbolVisibility.getValueOr((uint8_t)ELF::STV_DEFAULT);
+ BinaryReader Reader(&In, NewSymbolVisibility);
+ std::unique_ptr<Object> Obj = Reader.create(true);
// Prefer OutputArch (-O<format>) if set, otherwise fallback to BinaryArch
// (-B<arch>).
const ElfType OutputElfType =
- getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch));
+ getOutputElfType(Config.OutputArch.getValueOr(MachineInfo()));
if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
return E;
return writeOutput(Config, *Obj, Out, OutputElfType);
@@ -771,7 +770,7 @@ Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
Error executeObjcopyOnBinary(const CopyConfig &Config,
object::ELFObjectFileBase &In, Buffer &Out) {
ELFReader Reader(&In, Config.ExtractPartition);
- std::unique_ptr<Object> Obj = Reader.create();
+ std::unique_ptr<Object> Obj = Reader.create(!Config.SymbolsToAdd.empty());
// Prefer OutputArch (-O<format>) if set, otherwise infer it from the input.
const ElfType OutputElfType =
Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue())
diff --git a/tools/llvm-objcopy/ELF/Object.cpp b/tools/llvm-objcopy/ELF/Object.cpp
index fa696380e17c..74145dad6e6b 100644
--- a/tools/llvm-objcopy/ELF/Object.cpp
+++ b/tools/llvm-objcopy/ELF/Object.cpp
@@ -397,7 +397,7 @@ void SectionWriter::visit(const OwnedDataSection &Sec) {
llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset);
}
-static const std::vector<uint8_t> ZlibGnuMagic = {'Z', 'L', 'I', 'B'};
+static constexpr std::array<uint8_t, 4> ZlibGnuMagic = {{'Z', 'L', 'I', 'B'}};
static bool isDataGnuCompressed(ArrayRef<uint8_t> Data) {
return Data.size() > ZlibGnuMagic.size() &&
@@ -665,7 +665,7 @@ void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type,
Sym.Visibility = Visibility;
Sym.Size = SymbolSize;
Sym.Index = Symbols.size();
- Symbols.emplace_back(llvm::make_unique<Symbol>(Sym));
+ Symbols.emplace_back(std::make_unique<Symbol>(Sym));
Size += this->EntrySize;
}
@@ -1055,29 +1055,28 @@ void GroupSection::accept(MutableSectionVisitor &Visitor) {
}
// Returns true IFF a section is wholly inside the range of a segment
-static bool sectionWithinSegment(const SectionBase &Section,
- const Segment &Segment) {
+static bool sectionWithinSegment(const SectionBase &Sec, const Segment &Seg) {
// If a section is empty it should be treated like it has a size of 1. This is
// to clarify the case when an empty section lies on a boundary between two
// segments and ensures that the section "belongs" to the second segment and
// not the first.
- uint64_t SecSize = Section.Size ? Section.Size : 1;
+ uint64_t SecSize = Sec.Size ? Sec.Size : 1;
- if (Section.Type == SHT_NOBITS) {
- if (!(Section.Flags & SHF_ALLOC))
+ if (Sec.Type == SHT_NOBITS) {
+ if (!(Sec.Flags & SHF_ALLOC))
return false;
- bool SectionIsTLS = Section.Flags & SHF_TLS;
- bool SegmentIsTLS = Segment.Type == PT_TLS;
+ bool SectionIsTLS = Sec.Flags & SHF_TLS;
+ bool SegmentIsTLS = Seg.Type == PT_TLS;
if (SectionIsTLS != SegmentIsTLS)
return false;
- return Segment.VAddr <= Section.Addr &&
- Segment.VAddr + Segment.MemSize >= Section.Addr + SecSize;
+ return Seg.VAddr <= Sec.Addr &&
+ Seg.VAddr + Seg.MemSize >= Sec.Addr + SecSize;
}
- return Segment.Offset <= Section.OriginalOffset &&
- Segment.Offset + Segment.FileSize >= Section.OriginalOffset + SecSize;
+ return Seg.Offset <= Sec.OriginalOffset &&
+ Seg.Offset + Seg.FileSize >= Sec.OriginalOffset + SecSize;
}
// Returns true IFF a segment's original offset is inside of another segment's
@@ -1113,7 +1112,7 @@ void BasicELFBuilder::initFileHeader() {
Obj->OSABI = ELFOSABI_NONE;
Obj->ABIVersion = 0;
Obj->Entry = 0x0;
- Obj->Machine = EMachine;
+ Obj->Machine = EM_NONE;
Obj->Version = 1;
}
@@ -1141,8 +1140,8 @@ SymbolTableSection *BasicELFBuilder::addSymTab(StringTableSection *StrTab) {
}
void BasicELFBuilder::initSections() {
- for (auto &Section : Obj->sections())
- Section.initialize(Obj->sections());
+ for (SectionBase &Sec : Obj->sections())
+ Sec.initialize(Obj->sections());
}
void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
@@ -1161,11 +1160,12 @@ void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
Twine Prefix = Twine("_binary_") + SanitizedFilename;
SymTab->addSymbol(Prefix + "_start", STB_GLOBAL, STT_NOTYPE, &DataSection,
- /*Value=*/0, STV_DEFAULT, 0, 0);
+ /*Value=*/0, NewSymbolVisibility, 0, 0);
SymTab->addSymbol(Prefix + "_end", STB_GLOBAL, STT_NOTYPE, &DataSection,
- /*Value=*/DataSection.Size, STV_DEFAULT, 0, 0);
+ /*Value=*/DataSection.Size, NewSymbolVisibility, 0, 0);
SymTab->addSymbol(Prefix + "_size", STB_GLOBAL, STT_NOTYPE, nullptr,
- /*Value=*/DataSection.Size, STV_DEFAULT, SHN_ABS, 0);
+ /*Value=*/DataSection.Size, NewSymbolVisibility, SHN_ABS,
+ 0);
}
std::unique_ptr<Object> BinaryELFBuilder::build() {
@@ -1255,10 +1255,9 @@ template <class ELFT> void ELFBuilder<ELFT>::findEhdrOffset() {
if (!ExtractPartition)
return;
- for (const SectionBase &Section : Obj.sections()) {
- if (Section.Type == SHT_LLVM_PART_EHDR &&
- Section.Name == *ExtractPartition) {
- EhdrOffset = Section.Offset;
+ for (const SectionBase &Sec : Obj.sections()) {
+ if (Sec.Type == SHT_LLVM_PART_EHDR && Sec.Name == *ExtractPartition) {
+ EhdrOffset = Sec.Offset;
return;
}
}
@@ -1287,15 +1286,12 @@ void ELFBuilder<ELFT>::readProgramHeaders(const ELFFile<ELFT> &HeadersFile) {
Seg.MemSize = Phdr.p_memsz;
Seg.Align = Phdr.p_align;
Seg.Index = Index++;
- for (SectionBase &Section : Obj.sections()) {
- if (sectionWithinSegment(Section, Seg)) {
- Seg.addSection(&Section);
- if (!Section.ParentSegment ||
- Section.ParentSegment->Offset > Seg.Offset) {
- Section.ParentSegment = &Seg;
- }
+ for (SectionBase &Sec : Obj.sections())
+ if (sectionWithinSegment(Sec, Seg)) {
+ Seg.addSection(&Sec);
+ if (!Sec.ParentSegment || Sec.ParentSegment->Offset > Seg.Offset)
+ Sec.ParentSegment = &Seg;
}
- }
}
auto &ElfHdr = Obj.ElfHdrSegment;
@@ -1531,7 +1527,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
}
}
-template <class ELFT> void ELFBuilder<ELFT>::readSections() {
+template <class ELFT> void ELFBuilder<ELFT>::readSections(bool EnsureSymtab) {
// If a section index table exists we'll need to initialize it before we
// initialize the symbol table because the symbol table might need to
// reference it.
@@ -1544,16 +1540,37 @@ template <class ELFT> void ELFBuilder<ELFT>::readSections() {
if (Obj.SymbolTable) {
Obj.SymbolTable->initialize(Obj.sections());
initSymbolTable(Obj.SymbolTable);
+ } else if (EnsureSymtab) {
+ // Reuse the existing SHT_STRTAB section if exists.
+ StringTableSection *StrTab = nullptr;
+ for (auto &Sec : Obj.sections()) {
+ if (Sec.Type == ELF::SHT_STRTAB && !(Sec.Flags & SHF_ALLOC)) {
+ StrTab = static_cast<StringTableSection *>(&Sec);
+
+ // Prefer .strtab to .shstrtab.
+ if (Obj.SectionNames != &Sec)
+ break;
+ }
+ }
+ if (!StrTab)
+ StrTab = &Obj.addSection<StringTableSection>();
+
+ SymbolTableSection &SymTab = Obj.addSection<SymbolTableSection>();
+ SymTab.Name = ".symtab";
+ SymTab.Link = StrTab->Index;
+ SymTab.initialize(Obj.sections());
+ SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0);
+ Obj.SymbolTable = &SymTab;
}
// Now that all sections and symbols have been added we can add
// relocations that reference symbols and set the link and info fields for
// relocation sections.
- for (auto &Section : Obj.sections()) {
- if (&Section == Obj.SymbolTable)
+ for (auto &Sec : Obj.sections()) {
+ if (&Sec == Obj.SymbolTable)
continue;
- Section.initialize(Obj.sections());
- if (auto RelSec = dyn_cast<RelocationSection>(&Section)) {
+ Sec.initialize(Obj.sections());
+ if (auto RelSec = dyn_cast<RelocationSection>(&Sec)) {
auto Shdr = unwrapOrError(ElfFile.sections()).begin() + RelSec->Index;
if (RelSec->Type == SHT_REL)
initRelocations(RelSec, Obj.SymbolTable,
@@ -1561,7 +1578,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readSections() {
else
initRelocations(RelSec, Obj.SymbolTable,
unwrapOrError(ElfFile.relas(Shdr)));
- } else if (auto GroupSec = dyn_cast<GroupSection>(&Section)) {
+ } else if (auto GroupSec = dyn_cast<GroupSection>(&Sec)) {
initGroupSection(GroupSec);
}
}
@@ -1582,7 +1599,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readSections() {
" is not a string table");
}
-template <class ELFT> void ELFBuilder<ELFT>::build() {
+template <class ELFT> void ELFBuilder<ELFT>::build(bool EnsureSymtab) {
readSectionHeaders();
findEhdrOffset();
@@ -1601,7 +1618,7 @@ template <class ELFT> void ELFBuilder<ELFT>::build() {
Obj.Entry = Ehdr.e_entry;
Obj.Flags = Ehdr.e_flags;
- readSections();
+ readSections(EnsureSymtab);
readProgramHeaders(HeadersFile);
}
@@ -1609,8 +1626,8 @@ Writer::~Writer() {}
Reader::~Reader() {}
-std::unique_ptr<Object> BinaryReader::create() const {
- return BinaryELFBuilder(MInfo.EMachine, MemBuf).build();
+std::unique_ptr<Object> BinaryReader::create(bool /*EnsureSymtab*/) const {
+ return BinaryELFBuilder(MemBuf, NewSymbolVisibility).build();
}
Expected<std::vector<IHexRecord>> IHexReader::parse() const {
@@ -1639,28 +1656,28 @@ Expected<std::vector<IHexRecord>> IHexReader::parse() const {
return std::move(Records);
}
-std::unique_ptr<Object> IHexReader::create() const {
+std::unique_ptr<Object> IHexReader::create(bool /*EnsureSymtab*/) const {
std::vector<IHexRecord> Records = unwrapOrError(parse());
return IHexELFBuilder(Records).build();
}
-std::unique_ptr<Object> ELFReader::create() const {
- auto Obj = llvm::make_unique<Object>();
+std::unique_ptr<Object> ELFReader::create(bool EnsureSymtab) const {
+ auto Obj = std::make_unique<Object>();
if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
ELFBuilder<ELF32LE> Builder(*O, *Obj, ExtractPartition);
- Builder.build();
+ Builder.build(EnsureSymtab);
return Obj;
} else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
ELFBuilder<ELF64LE> Builder(*O, *Obj, ExtractPartition);
- Builder.build();
+ Builder.build(EnsureSymtab);
return Obj;
} else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
ELFBuilder<ELF32BE> Builder(*O, *Obj, ExtractPartition);
- Builder.build();
+ Builder.build(EnsureSymtab);
return Obj;
} else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
ELFBuilder<ELF64BE> Builder(*O, *Obj, ExtractPartition);
- Builder.build();
+ Builder.build(EnsureSymtab);
return Obj;
}
error("invalid file type");
@@ -1693,7 +1710,7 @@ template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
Ehdr.e_ehsize = sizeof(Elf_Ehdr);
if (WriteSectionHeaders && Obj.sections().size() != 0) {
Ehdr.e_shentsize = sizeof(Elf_Shdr);
- Ehdr.e_shoff = Obj.SHOffset;
+ Ehdr.e_shoff = Obj.SHOff;
// """
// If the number of sections is greater than or equal to
// SHN_LORESERVE (0xff00), this member has the value zero and the actual
@@ -1732,7 +1749,7 @@ template <class ELFT> void ELFWriter<ELFT>::writeShdrs() {
// This reference serves to write the dummy section header at the begining
// of the file. It is not used for anything else
Elf_Shdr &Shdr =
- *reinterpret_cast<Elf_Shdr *>(Buf.getBufferStart() + Obj.SHOffset);
+ *reinterpret_cast<Elf_Shdr *>(Buf.getBufferStart() + Obj.SHOff);
Shdr.sh_name = 0;
Shdr.sh_type = SHT_NULL;
Shdr.sh_flags = 0;
@@ -1862,26 +1879,13 @@ void Object::sortSections() {
});
}
-static uint64_t alignToAddr(uint64_t Offset, uint64_t Addr, uint64_t Align) {
- // Calculate Diff such that (Offset + Diff) & -Align == Addr & -Align.
- if (Align == 0)
- Align = 1;
- auto Diff =
- static_cast<int64_t>(Addr % Align) - static_cast<int64_t>(Offset % Align);
- // We only want to add to Offset, however, so if Diff < 0 we can add Align and
- // (Offset + Diff) & -Align == Addr & -Align will still hold.
- if (Diff < 0)
- Diff += Align;
- return Offset + Diff;
-}
-
// Orders segments such that if x = y->ParentSegment then y comes before x.
static void orderSegments(std::vector<Segment *> &Segments) {
llvm::stable_sort(Segments, compareSegmentsByOffset);
}
// This function finds a consistent layout for a list of segments starting from
-// an Offset. It assumes that Segments have been sorted by OrderSegments and
+// an Offset. It assumes that Segments have been sorted by orderSegments and
// returns an Offset one past the end of the last segment.
static uint64_t layoutSegments(std::vector<Segment *> &Segments,
uint64_t Offset) {
@@ -1902,8 +1906,8 @@ static uint64_t layoutSegments(std::vector<Segment *> &Segments,
Seg->Offset =
Parent->Offset + Seg->OriginalOffset - Parent->OriginalOffset;
} else {
- Offset = alignToAddr(Offset, Seg->VAddr, Seg->Align);
- Seg->Offset = Offset;
+ Seg->Offset =
+ alignTo(Offset, std::max<uint64_t>(Seg->Align, 1), Seg->VAddr);
}
Offset = std::max(Offset, Seg->Offset + Seg->FileSize);
}
@@ -1925,17 +1929,17 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
// of the segment we can assign a new offset to the section. For sections not
// covered by segments we can just bump Offset to the next valid location.
uint32_t Index = 1;
- for (auto &Section : Sections) {
- Section.Index = Index++;
- if (Section.ParentSegment != nullptr) {
- auto Segment = *Section.ParentSegment;
- Section.Offset =
- Segment.Offset + (Section.OriginalOffset - Segment.OriginalOffset);
+ for (auto &Sec : Sections) {
+ Sec.Index = Index++;
+ if (Sec.ParentSegment != nullptr) {
+ auto Segment = *Sec.ParentSegment;
+ Sec.Offset =
+ Segment.Offset + (Sec.OriginalOffset - Segment.OriginalOffset);
} else {
- Offset = alignTo(Offset, Section.Align == 0 ? 1 : Section.Align);
- Section.Offset = Offset;
- if (Section.Type != SHT_NOBITS)
- Offset += Section.Size;
+ Offset = alignTo(Offset, Sec.Align == 0 ? 1 : Sec.Align);
+ Sec.Offset = Offset;
+ if (Sec.Type != SHT_NOBITS)
+ Offset += Sec.Size;
}
}
return Offset;
@@ -1971,16 +1975,16 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
// Offset so that SHOffset is valid.
if (WriteSectionHeaders)
Offset = alignTo(Offset, sizeof(Elf_Addr));
- Obj.SHOffset = Offset;
+ Obj.SHOff = Offset;
}
template <class ELFT> size_t ELFWriter<ELFT>::totalSize() const {
// We already have the section header offset so we can calculate the total
// size by just adding up the size of each section header.
if (!WriteSectionHeaders)
- return Obj.SHOffset;
+ return Obj.SHOff;
size_t ShdrCount = Obj.sections().size() + 1; // Includes null shdr.
- return Obj.SHOffset + ShdrCount * sizeof(Elf_Shdr);
+ return Obj.SHOff + ShdrCount * sizeof(Elf_Shdr);
}
template <class ELFT> Error ELFWriter<ELFT>::write() {
@@ -1995,6 +1999,25 @@ template <class ELFT> Error ELFWriter<ELFT>::write() {
return Buf.commit();
}
+static Error removeUnneededSections(Object &Obj) {
+ // We can remove an empty symbol table from non-relocatable objects.
+ // Relocatable objects typically have relocation sections whose
+ // sh_link field points to .symtab, so we can't remove .symtab
+ // even if it is empty.
+ if (Obj.isRelocatable() || Obj.SymbolTable == nullptr ||
+ !Obj.SymbolTable->empty())
+ return Error::success();
+
+ // .strtab can be used for section names. In such a case we shouldn't
+ // remove it.
+ auto *StrTab = Obj.SymbolTable->getStrTab() == Obj.SectionNames
+ ? nullptr
+ : Obj.SymbolTable->getStrTab();
+ return Obj.removeSections(false, [&](const SectionBase &Sec) {
+ return &Sec == Obj.SymbolTable || &Sec == StrTab;
+ });
+}
+
template <class ELFT> Error ELFWriter<ELFT>::finalize() {
// It could happen that SectionNames has been removed and yet the user wants
// a section header table output. We need to throw an error if a user tries
@@ -2004,6 +2027,8 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
"cannot write section header table because "
"section header string table was removed");
+ if (Error E = removeUnneededSections(Obj))
+ return E;
Obj.sortSections();
// We need to assign indexes before we perform layout because we need to know
@@ -2045,9 +2070,8 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
// Make sure we add the names of all the sections. Importantly this must be
// done after we decide to add or remove SectionIndexes.
if (Obj.SectionNames != nullptr)
- for (const auto &Section : Obj.sections()) {
- Obj.SectionNames->addString(Section.Name);
- }
+ for (const SectionBase &Sec : Obj.sections())
+ Obj.SectionNames->addString(Sec.Name);
initEhdrSegment();
@@ -2055,8 +2079,8 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
// Also, the output arch may not be the same as the input arch, so fix up
// size-related fields before doing layout calculations.
uint64_t Index = 0;
- auto SecSizer = llvm::make_unique<ELFSectionSizer<ELFT>>();
- for (auto &Sec : Obj.sections()) {
+ auto SecSizer = std::make_unique<ELFSectionSizer<ELFT>>();
+ for (SectionBase &Sec : Obj.sections()) {
Sec.Index = Index++;
Sec.accept(*SecSizer);
}
@@ -2082,40 +2106,36 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
// Finally now that all offsets and indexes have been set we can finalize any
// remaining issues.
- uint64_t Offset = Obj.SHOffset + sizeof(Elf_Shdr);
- for (SectionBase &Section : Obj.sections()) {
- Section.HeaderOffset = Offset;
+ uint64_t Offset = Obj.SHOff + sizeof(Elf_Shdr);
+ for (SectionBase &Sec : Obj.sections()) {
+ Sec.HeaderOffset = Offset;
Offset += sizeof(Elf_Shdr);
if (WriteSectionHeaders)
- Section.NameIndex = Obj.SectionNames->findIndex(Section.Name);
- Section.finalize();
+ Sec.NameIndex = Obj.SectionNames->findIndex(Sec.Name);
+ Sec.finalize();
}
if (Error E = Buf.allocate(totalSize()))
return E;
- SecWriter = llvm::make_unique<ELFSectionWriter<ELFT>>(Buf);
+ SecWriter = std::make_unique<ELFSectionWriter<ELFT>>(Buf);
return Error::success();
}
Error BinaryWriter::write() {
- for (auto &Section : Obj.sections())
- if (Section.Flags & SHF_ALLOC)
- Section.accept(*SecWriter);
+ for (const SectionBase &Sec : Obj.allocSections())
+ Sec.accept(*SecWriter);
return Buf.commit();
}
Error BinaryWriter::finalize() {
- // TODO: Create a filter range to construct OrderedSegments from so that this
- // code can be deduped with assignOffsets above. This should also solve the
- // todo below for LayoutSections.
// We need a temporary list of segments that has a special order to it
// so that we know that anytime ->ParentSegment is set that segment has
// already had it's offset properly set. We only want to consider the segments
// that will affect layout of allocated sections so we only add those.
std::vector<Segment *> OrderedSegments;
- for (SectionBase &Section : Obj.sections())
- if ((Section.Flags & SHF_ALLOC) != 0 && Section.ParentSegment != nullptr)
- OrderedSegments.push_back(Section.ParentSegment);
+ for (const SectionBase &Sec : Obj.allocSections())
+ if (Sec.ParentSegment != nullptr)
+ OrderedSegments.push_back(Sec.ParentSegment);
// For binary output, we're going to use physical addresses instead of
// virtual addresses, since a binary output is used for cases like ROM
@@ -2130,7 +2150,7 @@ Error BinaryWriter::finalize() {
llvm::stable_sort(OrderedSegments, compareSegmentsByPAddr);
// Because we add a ParentSegment for each section we might have duplicate
- // segments in OrderedSegments. If there were duplicates then LayoutSegments
+ // segments in OrderedSegments. If there were duplicates then layoutSegments
// would do very strange things.
auto End =
std::unique(std::begin(OrderedSegments), std::end(OrderedSegments));
@@ -2158,28 +2178,20 @@ Error BinaryWriter::finalize() {
}
}
- // TODO: generalize LayoutSections to take a range. Pass a special range
- // constructed from an iterator that skips values for which a predicate does
- // not hold. Then pass such a range to LayoutSections instead of constructing
- // AllocatedSections here.
- std::vector<SectionBase *> AllocatedSections;
- for (SectionBase &Section : Obj.sections())
- if (Section.Flags & SHF_ALLOC)
- AllocatedSections.push_back(&Section);
- layoutSections(make_pointee_range(AllocatedSections), Offset);
+ layoutSections(Obj.allocSections(), Offset);
// Now that every section has been laid out we just need to compute the total
// file size. This might not be the same as the offset returned by
- // LayoutSections, because we want to truncate the last segment to the end of
+ // layoutSections, because we want to truncate the last segment to the end of
// its last section, to match GNU objcopy's behaviour.
TotalSize = 0;
- for (SectionBase *Section : AllocatedSections)
- if (Section->Type != SHT_NOBITS)
- TotalSize = std::max(TotalSize, Section->Offset + Section->Size);
+ for (const SectionBase &Sec : Obj.allocSections())
+ if (Sec.Type != SHT_NOBITS)
+ TotalSize = std::max(TotalSize, Sec.Offset + Sec.Size);
if (Error E = Buf.allocate(TotalSize))
return E;
- SecWriter = llvm::make_unique<BinarySectionWriter>(Buf);
+ SecWriter = std::make_unique<BinarySectionWriter>(Buf);
return Error::success();
}
@@ -2259,17 +2271,17 @@ Error IHexWriter::finalize() {
// If any section we're to write has segment then we
// switch to using physical addresses. Otherwise we
// use section virtual address.
- for (auto &Section : Obj.sections())
- if (ShouldWrite(Section) && IsInPtLoad(Section)) {
+ for (const SectionBase &Sec : Obj.sections())
+ if (ShouldWrite(Sec) && IsInPtLoad(Sec)) {
UseSegments = true;
break;
}
- for (auto &Section : Obj.sections())
- if (ShouldWrite(Section) && (!UseSegments || IsInPtLoad(Section))) {
- if (Error E = checkSection(Section))
+ for (const SectionBase &Sec : Obj.sections())
+ if (ShouldWrite(Sec) && (!UseSegments || IsInPtLoad(Sec))) {
+ if (Error E = checkSection(Sec))
return E;
- Sections.insert(&Section);
+ Sections.insert(&Sec);
}
IHexSectionWriterBase LengthCalc(Buf);
diff --git a/tools/llvm-objcopy/ELF/Object.h b/tools/llvm-objcopy/ELF/Object.h
index f3df93b9662f..eeacb014e4dc 100644
--- a/tools/llvm-objcopy/ELF/Object.h
+++ b/tools/llvm-objcopy/ELF/Object.h
@@ -57,8 +57,8 @@ public:
: Sections(Secs) {}
SectionTableRef(const SectionTableRef &) = default;
- iterator begin() { return iterator(Sections.data()); }
- iterator end() { return iterator(Sections.data() + Sections.size()); }
+ iterator begin() const { return iterator(Sections.data()); }
+ iterator end() const { return iterator(Sections.data() + Sections.size()); }
size_t size() const { return Sections.size(); }
SectionBase *getSection(uint32_t Index, Twine ErrMsg);
@@ -863,7 +863,7 @@ public:
class Reader {
public:
virtual ~Reader();
- virtual std::unique_ptr<Object> create() const = 0;
+ virtual std::unique_ptr<Object> create(bool EnsureSymtab) const = 0;
};
using object::Binary;
@@ -873,7 +873,6 @@ using object::OwningBinary;
class BasicELFBuilder {
protected:
- uint16_t EMachine;
std::unique_ptr<Object> Obj;
void initFileHeader();
@@ -883,17 +882,18 @@ protected:
void initSections();
public:
- BasicELFBuilder(uint16_t EM)
- : EMachine(EM), Obj(llvm::make_unique<Object>()) {}
+ BasicELFBuilder() : Obj(std::make_unique<Object>()) {}
};
class BinaryELFBuilder : public BasicELFBuilder {
MemoryBuffer *MemBuf;
+ uint8_t NewSymbolVisibility;
void addData(SymbolTableSection *SymTab);
public:
- BinaryELFBuilder(uint16_t EM, MemoryBuffer *MB)
- : BasicELFBuilder(EM), MemBuf(MB) {}
+ BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility)
+ : BasicELFBuilder(), MemBuf(MB),
+ NewSymbolVisibility(NewSymbolVisibility) {}
std::unique_ptr<Object> build();
};
@@ -905,7 +905,7 @@ class IHexELFBuilder : public BasicELFBuilder {
public:
IHexELFBuilder(const std::vector<IHexRecord> &Records)
- : BasicELFBuilder(ELF::EM_386), Records(Records) {}
+ : BasicELFBuilder(), Records(Records) {}
std::unique_ptr<Object> build();
};
@@ -926,7 +926,7 @@ private:
void initGroupSection(GroupSection *GroupSec);
void initSymbolTable(SymbolTableSection *SymTab);
void readSectionHeaders();
- void readSections();
+ void readSections(bool EnsureSymtab);
void findEhdrOffset();
SectionBase &makeSection(const Elf_Shdr &Shdr);
@@ -936,17 +936,17 @@ public:
: ElfFile(*ElfObj.getELFFile()), Obj(Obj),
ExtractPartition(ExtractPartition) {}
- void build();
+ void build(bool EnsureSymtab);
};
class BinaryReader : public Reader {
- const MachineInfo &MInfo;
MemoryBuffer *MemBuf;
+ uint8_t NewSymbolVisibility;
public:
- BinaryReader(const MachineInfo &MI, MemoryBuffer *MB)
- : MInfo(MI), MemBuf(MB) {}
- std::unique_ptr<Object> create() const override;
+ BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility)
+ : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {}
+ std::unique_ptr<Object> create(bool EnsureSymtab) const override;
};
class IHexReader : public Reader {
@@ -968,7 +968,7 @@ class IHexReader : public Reader {
public:
IHexReader(MemoryBuffer *MB) : MemBuf(MB) {}
- std::unique_ptr<Object> create() const override;
+ std::unique_ptr<Object> create(bool EnsureSymtab) const override;
};
class ELFReader : public Reader {
@@ -976,7 +976,7 @@ class ELFReader : public Reader {
Optional<StringRef> ExtractPartition;
public:
- std::unique_ptr<Object> create() const override;
+ std::unique_ptr<Object> create(bool EnsureSymtab) const override;
explicit ELFReader(Binary *B, Optional<StringRef> ExtractPartition)
: Bin(B), ExtractPartition(ExtractPartition) {}
};
@@ -990,6 +990,10 @@ private:
std::vector<SegPtr> Segments;
std::vector<SecPtr> RemovedSections;
+ static bool sectionIsAlloc(const SectionBase &Sec) {
+ return Sec.Flags & ELF::SHF_ALLOC;
+ };
+
public:
template <class T>
using Range = iterator_range<
@@ -1011,13 +1015,14 @@ public:
uint8_t OSABI;
uint8_t ABIVersion;
uint64_t Entry;
- uint64_t SHOffset;
+ uint64_t SHOff;
uint32_t Type;
uint32_t Machine;
uint32_t Version;
uint32_t Flags;
bool HadShdrs = true;
+ bool MustBeRelocatable = false;
StringTableSection *SectionNames = nullptr;
SymbolTableSection *SymbolTable = nullptr;
SectionIndexSection *SectionIndexTable = nullptr;
@@ -1027,6 +1032,13 @@ public:
ConstRange<SectionBase> sections() const {
return make_pointee_range(Sections);
}
+ iterator_range<
+ filter_iterator<pointee_iterator<std::vector<SecPtr>::const_iterator>,
+ decltype(&sectionIsAlloc)>>
+ allocSections() const {
+ return make_filter_range(make_pointee_range(Sections), sectionIsAlloc);
+ }
+
SectionBase *findSection(StringRef Name) {
auto SecIt =
find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; });
@@ -1041,16 +1053,20 @@ public:
std::function<bool(const SectionBase &)> ToRemove);
Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
template <class T, class... Ts> T &addSection(Ts &&... Args) {
- auto Sec = llvm::make_unique<T>(std::forward<Ts>(Args)...);
+ auto Sec = std::make_unique<T>(std::forward<Ts>(Args)...);
auto Ptr = Sec.get();
+ MustBeRelocatable |= isa<RelocationSection>(*Ptr);
Sections.emplace_back(std::move(Sec));
Ptr->Index = Sections.size();
return *Ptr;
}
Segment &addSegment(ArrayRef<uint8_t> Data) {
- Segments.emplace_back(llvm::make_unique<Segment>(Data));
+ Segments.emplace_back(std::make_unique<Segment>(Data));
return *Segments.back();
}
+ bool isRelocatable() const {
+ return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable;
+ }
};
} // end namespace elf
diff --git a/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
new file mode 100644
index 000000000000..f621f3aa09cf
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
@@ -0,0 +1,350 @@
+//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOLayoutBuilder.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
+ uint32_t Size = 0;
+ for (const auto &LC : O.LoadCommands) {
+ const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
+ auto cmd = MLC.load_command_data.cmd;
+ switch (cmd) {
+ case MachO::LC_SEGMENT:
+ Size += sizeof(MachO::segment_command) +
+ sizeof(MachO::section) * LC.Sections.size();
+ continue;
+ case MachO::LC_SEGMENT_64:
+ Size += sizeof(MachO::segment_command_64) +
+ sizeof(MachO::section_64) * LC.Sections.size();
+ continue;
+ }
+
+ switch (cmd) {
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
+ case MachO::LCName: \
+ Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
+ break;
+#include "llvm/BinaryFormat/MachO.def"
+#undef HANDLE_LOAD_COMMAND
+ }
+ }
+
+ return Size;
+}
+
+void MachOLayoutBuilder::constructStringTable() {
+ for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
+ StrTableBuilder.add(Sym->Name);
+ StrTableBuilder.finalize();
+}
+
+void MachOLayoutBuilder::updateSymbolIndexes() {
+ uint32_t Index = 0;
+ for (auto &Symbol : O.SymTable.Symbols)
+ Symbol->Index = Index++;
+}
+
+// Updates the index and the number of local/external/undefined symbols.
+void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
+ assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
+ // Make sure that nlist entries in the symbol table are sorted by the those
+ // types. The order is: local < defined external < undefined external.
+ assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(),
+ [](const std::unique_ptr<SymbolEntry> &A,
+ const std::unique_ptr<SymbolEntry> &B) {
+ return (A->isLocalSymbol() && !B->isLocalSymbol()) ||
+ (!A->isUndefinedSymbol() &&
+ B->isUndefinedSymbol());
+ }) &&
+ "Symbols are not sorted by their types.");
+
+ uint32_t NumLocalSymbols = 0;
+ auto Iter = O.SymTable.Symbols.begin();
+ auto End = O.SymTable.Symbols.end();
+ for (; Iter != End; ++Iter) {
+ if ((*Iter)->isExternalSymbol())
+ break;
+
+ ++NumLocalSymbols;
+ }
+
+ uint32_t NumExtDefSymbols = 0;
+ for (; Iter != End; ++Iter) {
+ if ((*Iter)->isUndefinedSymbol())
+ break;
+
+ ++NumExtDefSymbols;
+ }
+
+ MLC.dysymtab_command_data.ilocalsym = 0;
+ MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
+ MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
+ MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
+ MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
+ MLC.dysymtab_command_data.nundefsym =
+ O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
+}
+
+// Recomputes and updates offset and size fields in load commands and sections
+// since they could be modified.
+uint64_t MachOLayoutBuilder::layoutSegments() {
+ auto HeaderSize =
+ Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+ const bool IsObjectFile =
+ O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
+ uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
+ for (auto &LC : O.LoadCommands) {
+ auto &MLC = LC.MachOLoadCommand;
+ StringRef Segname;
+ uint64_t SegmentVmAddr;
+ uint64_t SegmentVmSize;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ SegmentVmAddr = MLC.segment_command_data.vmaddr;
+ SegmentVmSize = MLC.segment_command_data.vmsize;
+ Segname = StringRef(MLC.segment_command_data.segname,
+ strnlen(MLC.segment_command_data.segname,
+ sizeof(MLC.segment_command_data.segname)));
+ break;
+ case MachO::LC_SEGMENT_64:
+ SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
+ SegmentVmSize = MLC.segment_command_64_data.vmsize;
+ Segname = StringRef(MLC.segment_command_64_data.segname,
+ strnlen(MLC.segment_command_64_data.segname,
+ sizeof(MLC.segment_command_64_data.segname)));
+ break;
+ default:
+ continue;
+ }
+
+ if (Segname == "__LINKEDIT") {
+ // We update the __LINKEDIT segment later (in layoutTail).
+ assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
+ LinkEditLoadCommand = &MLC;
+ continue;
+ }
+
+ // Update file offsets and sizes of sections.
+ uint64_t SegOffset = Offset;
+ uint64_t SegFileSize = 0;
+ uint64_t VMSize = 0;
+ for (auto &Sec : LC.Sections) {
+ if (IsObjectFile) {
+ if (Sec.isVirtualSection()) {
+ Sec.Offset = 0;
+ } else {
+ uint64_t PaddingSize =
+ offsetToAlignment(SegFileSize, Align(1ull << Sec.Align));
+ Sec.Offset = SegOffset + SegFileSize + PaddingSize;
+ Sec.Size = Sec.Content.size();
+ SegFileSize += PaddingSize + Sec.Size;
+ }
+ VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
+ } else {
+ if (Sec.isVirtualSection()) {
+ Sec.Offset = 0;
+ VMSize += Sec.Size;
+ } else {
+ uint32_t SectOffset = Sec.Addr - SegmentVmAddr;
+ Sec.Offset = SegOffset + SectOffset;
+ Sec.Size = Sec.Content.size();
+ SegFileSize = std::max(SegFileSize, SectOffset + Sec.Size);
+ VMSize = std::max(VMSize, SegFileSize);
+ }
+ }
+ }
+
+ if (IsObjectFile) {
+ Offset += SegFileSize;
+ } else {
+ Offset = alignTo(Offset + SegFileSize, PageSize);
+ SegFileSize = alignTo(SegFileSize, PageSize);
+ // Use the original vmsize if the segment is __PAGEZERO.
+ VMSize =
+ Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
+ }
+
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ MLC.segment_command_data.cmdsize =
+ sizeof(MachO::segment_command) +
+ sizeof(MachO::section) * LC.Sections.size();
+ MLC.segment_command_data.nsects = LC.Sections.size();
+ MLC.segment_command_data.fileoff = SegOffset;
+ MLC.segment_command_data.vmsize = VMSize;
+ MLC.segment_command_data.filesize = SegFileSize;
+ break;
+ case MachO::LC_SEGMENT_64:
+ MLC.segment_command_64_data.cmdsize =
+ sizeof(MachO::segment_command_64) +
+ sizeof(MachO::section_64) * LC.Sections.size();
+ MLC.segment_command_64_data.nsects = LC.Sections.size();
+ MLC.segment_command_64_data.fileoff = SegOffset;
+ MLC.segment_command_64_data.vmsize = VMSize;
+ MLC.segment_command_64_data.filesize = SegFileSize;
+ break;
+ }
+ }
+
+ return Offset;
+}
+
+uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
+ for (auto &LC : O.LoadCommands)
+ for (auto &Sec : LC.Sections) {
+ Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
+ Sec.NReloc = Sec.Relocations.size();
+ Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
+ }
+
+ return Offset;
+}
+
+Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
+ // The order of LINKEDIT elements is as follows:
+ // rebase info, binding info, weak binding info, lazy binding info, export
+ // trie, data-in-code, symbol table, indirect symbol table, symbol table
+ // strings.
+ uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
+ uint64_t StartOfLinkEdit = Offset;
+ uint64_t StartOfRebaseInfo = StartOfLinkEdit;
+ uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size();
+ uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size();
+ uint64_t StartOfLazyBindingInfo =
+ StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size();
+ uint64_t StartOfExportTrie =
+ StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
+ uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
+ uint64_t StartOfDataInCode =
+ StartOfFunctionStarts + O.FunctionStarts.Data.size();
+ uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size();
+ uint64_t StartOfIndirectSymbols =
+ StartOfSymbols + NListSize * O.SymTable.Symbols.size();
+ uint64_t StartOfSymbolStrings =
+ StartOfIndirectSymbols +
+ sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
+ uint64_t LinkEditSize =
+ (StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit;
+
+ // Now we have determined the layout of the contents of the __LINKEDIT
+ // segment. Update its load command.
+ if (LinkEditLoadCommand) {
+ MachO::macho_load_command *MLC = LinkEditLoadCommand;
+ switch (LinkEditLoadCommand->load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
+ MLC->segment_command_data.fileoff = StartOfLinkEdit;
+ MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
+ MLC->segment_command_data.filesize = LinkEditSize;
+ break;
+ case MachO::LC_SEGMENT_64:
+ MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
+ MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
+ MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
+ MLC->segment_command_64_data.filesize = LinkEditSize;
+ break;
+ }
+ }
+
+ for (auto &LC : O.LoadCommands) {
+ auto &MLC = LC.MachOLoadCommand;
+ auto cmd = MLC.load_command_data.cmd;
+ switch (cmd) {
+ case MachO::LC_SYMTAB:
+ MLC.symtab_command_data.symoff = StartOfSymbols;
+ MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
+ MLC.symtab_command_data.stroff = StartOfSymbolStrings;
+ MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
+ break;
+ case MachO::LC_DYSYMTAB: {
+ if (MLC.dysymtab_command_data.ntoc != 0 ||
+ MLC.dysymtab_command_data.nmodtab != 0 ||
+ MLC.dysymtab_command_data.nextrefsyms != 0 ||
+ MLC.dysymtab_command_data.nlocrel != 0 ||
+ MLC.dysymtab_command_data.nextrel != 0)
+ return createStringError(llvm::errc::not_supported,
+ "shared library is not yet supported");
+
+ if (!O.IndirectSymTable.Symbols.empty()) {
+ MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
+ MLC.dysymtab_command_data.nindirectsyms =
+ O.IndirectSymTable.Symbols.size();
+ }
+
+ updateDySymTab(MLC);
+ break;
+ }
+ case MachO::LC_DATA_IN_CODE:
+ MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
+ MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
+ break;
+ case MachO::LC_FUNCTION_STARTS:
+ MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
+ MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
+ break;
+ case MachO::LC_DYLD_INFO:
+ case MachO::LC_DYLD_INFO_ONLY:
+ MLC.dyld_info_command_data.rebase_off =
+ O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
+ MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
+ MLC.dyld_info_command_data.bind_off =
+ O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
+ MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
+ MLC.dyld_info_command_data.weak_bind_off =
+ O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
+ MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
+ MLC.dyld_info_command_data.lazy_bind_off =
+ O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
+ MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
+ MLC.dyld_info_command_data.export_off =
+ O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
+ MLC.dyld_info_command_data.export_size = O.Exports.Trie.size();
+ break;
+ case MachO::LC_LOAD_DYLINKER:
+ case MachO::LC_MAIN:
+ case MachO::LC_RPATH:
+ case MachO::LC_SEGMENT:
+ case MachO::LC_SEGMENT_64:
+ case MachO::LC_VERSION_MIN_MACOSX:
+ case MachO::LC_BUILD_VERSION:
+ case MachO::LC_ID_DYLIB:
+ case MachO::LC_LOAD_DYLIB:
+ case MachO::LC_UUID:
+ case MachO::LC_SOURCE_VERSION:
+ // Nothing to update.
+ break;
+ default:
+ // Abort if it's unsupported in order to prevent corrupting the object.
+ return createStringError(llvm::errc::not_supported,
+ "unsupported load command (cmd=0x%x)", cmd);
+ }
+ }
+
+ return Error::success();
+}
+
+Error MachOLayoutBuilder::layout() {
+ O.Header.NCmds = O.LoadCommands.size();
+ O.Header.SizeOfCmds = computeSizeOfCmds();
+ constructStringTable();
+ updateSymbolIndexes();
+ uint64_t Offset = layoutSegments();
+ Offset = layoutRelocations(Offset);
+ return layoutTail(Offset);
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
new file mode 100644
index 000000000000..21cbe56605de
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
@@ -0,0 +1,50 @@
+//===- MachOLayoutBuilder.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
+#define LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
+
+#include "MachOObjcopy.h"
+#include "Object.h"
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+class MachOLayoutBuilder {
+ Object &O;
+ bool Is64Bit;
+ uint64_t PageSize;
+
+ // Points to the __LINKEDIT segment if it exists.
+ MachO::macho_load_command *LinkEditLoadCommand = nullptr;
+ StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
+
+ uint32_t computeSizeOfCmds() const;
+ void constructStringTable();
+ void updateSymbolIndexes();
+ void updateDySymTab(MachO::macho_load_command &MLC);
+ uint64_t layoutSegments();
+ uint64_t layoutRelocations(uint64_t Offset);
+ Error layoutTail(uint64_t Offset);
+
+public:
+ MachOLayoutBuilder(Object &O, bool Is64Bit, uint64_t PageSize)
+ : O(O), Is64Bit(Is64Bit), PageSize(PageSize) {}
+
+ // Recomputes and updates fields in the given object such as file offsets.
+ Error layout();
+
+ StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; }
+};
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
diff --git a/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
index 19343b65dd1e..6d586e7d73f1 100644
--- a/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -25,18 +25,20 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
!Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() ||
!Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() ||
!Config.DumpSection.empty() || !Config.KeepSection.empty() ||
- !Config.OnlySection.empty() || !Config.SymbolsToGlobalize.empty() ||
- !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() ||
- !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() ||
- !Config.SectionsToRename.empty() || !Config.SymbolsToRename.empty() ||
+ Config.NewSymbolVisibility || !Config.OnlySection.empty() ||
+ !Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() ||
+ !Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() ||
+ !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() ||
+ !Config.SymbolsToRename.empty() ||
!Config.UnneededSymbolsToRemove.empty() ||
- !Config.SetSectionFlags.empty() || !Config.ToRemove.empty() ||
- Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden ||
- Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc ||
- Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
- Config.StripDebug || Config.StripNonAlloc || Config.StripSections ||
- Config.StripUnneeded || Config.DiscardMode != DiscardType::None ||
- !Config.SymbolsToAdd.empty() || Config.EntryExpr) {
+ !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() ||
+ !Config.ToRemove.empty() || Config.ExtractDWO || Config.KeepFileSymbols ||
+ Config.LocalizeHidden || Config.PreserveDates || Config.StripDWO ||
+ Config.StripNonAlloc || Config.StripSections || Config.Weaken ||
+ Config.DecompressDebugSections || Config.StripDebug ||
+ Config.StripNonAlloc || Config.StripSections || Config.StripUnneeded ||
+ Config.DiscardMode != DiscardType::None || !Config.SymbolsToAdd.empty() ||
+ Config.EntryExpr) {
return createStringError(llvm::errc::invalid_argument,
"option not supported by llvm-objcopy for MachO");
}
@@ -57,7 +59,11 @@ Error executeObjcopyOnBinary(const CopyConfig &Config,
if (Error E = handleArgs(Config, *O))
return createFileError(Config.InputFilename, std::move(E));
- MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out);
+ // TODO: Support 16KB pages which are employed in iOS arm64 binaries:
+ // https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb
+ const uint64_t PageSize = 4096;
+
+ MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
if (auto E = Writer.finalize())
return E;
return Writer.write();
diff --git a/tools/llvm-objcopy/MachO/MachOReader.cpp b/tools/llvm-objcopy/MachO/MachOReader.cpp
index d31293034608..b48a0d8952d0 100644
--- a/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -129,10 +129,19 @@ void MachOReader::readLoadCommands(Object &O) const {
case MachO::LC_SYMTAB:
O.SymTabCommandIndex = O.LoadCommands.size();
break;
+ case MachO::LC_DYSYMTAB:
+ O.DySymTabCommandIndex = O.LoadCommands.size();
+ break;
case MachO::LC_DYLD_INFO:
case MachO::LC_DYLD_INFO_ONLY:
O.DyLdInfoCommandIndex = O.LoadCommands.size();
break;
+ case MachO::LC_DATA_IN_CODE:
+ O.DataInCodeCommandIndex = O.LoadCommands.size();
+ break;
+ case MachO::LC_FUNCTION_STARTS:
+ O.FunctionStartsCommandIndex = O.LoadCommands.size();
+ break;
}
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
case MachO::LCName: \
@@ -188,7 +197,7 @@ void MachOReader::readSymbolTable(Object &O) const {
StrTable,
MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl())));
- O.SymTable.Symbols.push_back(llvm::make_unique<SymbolEntry>(SE));
+ O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
}
}
@@ -222,8 +231,37 @@ void MachOReader::readExportInfo(Object &O) const {
O.Exports.Trie = MachOObj.getDyldInfoExportsTrie();
}
+void MachOReader::readDataInCodeData(Object &O) const {
+ if (!O.DataInCodeCommandIndex)
+ return;
+ const MachO::linkedit_data_command &LDC =
+ O.LoadCommands[*O.DataInCodeCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ O.DataInCode.Data = arrayRefFromStringRef(
+ MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
+}
+
+void MachOReader::readFunctionStartsData(Object &O) const {
+ if (!O.FunctionStartsCommandIndex)
+ return;
+ const MachO::linkedit_data_command &LDC =
+ O.LoadCommands[*O.FunctionStartsCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ O.FunctionStarts.Data = arrayRefFromStringRef(
+ MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
+}
+
+void MachOReader::readIndirectSymbolTable(Object &O) const {
+ MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
+ for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i)
+ O.IndirectSymTable.Symbols.push_back(
+ MachOObj.getIndirectSymbolTableEntry(DySymTab, i));
+}
+
std::unique_ptr<Object> MachOReader::create() const {
- auto Obj = llvm::make_unique<Object>();
+ auto Obj = std::make_unique<Object>();
readHeader(*Obj);
readLoadCommands(*Obj);
readSymbolTable(*Obj);
@@ -233,6 +271,9 @@ std::unique_ptr<Object> MachOReader::create() const {
readWeakBindInfo(*Obj);
readLazyBindInfo(*Obj);
readExportInfo(*Obj);
+ readDataInCodeData(*Obj);
+ readFunctionStartsData(*Obj);
+ readIndirectSymbolTable(*Obj);
return Obj;
}
diff --git a/tools/llvm-objcopy/MachO/MachOReader.h b/tools/llvm-objcopy/MachO/MachOReader.h
index 795e5cc2363d..00c8f0d55f61 100644
--- a/tools/llvm-objcopy/MachO/MachOReader.h
+++ b/tools/llvm-objcopy/MachO/MachOReader.h
@@ -36,6 +36,9 @@ class MachOReader : public Reader {
void readWeakBindInfo(Object &O) const;
void readLazyBindInfo(Object &O) const;
void readExportInfo(Object &O) const;
+ void readDataInCodeData(Object &O) const;
+ void readFunctionStartsData(Object &O) const;
+ void readIndirectSymbolTable(Object &O) const;
public:
explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {}
diff --git a/tools/llvm-objcopy/MachO/MachOWriter.cpp b/tools/llvm-objcopy/MachO/MachOWriter.cpp
index 74200c5aa62a..4ec91cc9eb7a 100644
--- a/tools/llvm-objcopy/MachO/MachOWriter.cpp
+++ b/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "MachOWriter.h"
+#include "MachOLayoutBuilder.h"
#include "Object.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -40,16 +41,10 @@ size_t MachOWriter::totalSize() const {
const MachO::symtab_command &SymTabCommand =
O.LoadCommands[*O.SymTabCommandIndex]
.MachOLoadCommand.symtab_command_data;
- if (SymTabCommand.symoff) {
- assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) &&
- "Incorrect number of symbols");
+ if (SymTabCommand.symoff)
Ends.push_back(SymTabCommand.symoff + symTableSize());
- }
- if (SymTabCommand.stroff) {
- assert((SymTabCommand.strsize == StrTableBuilder.getSize()) &&
- "Incorrect string table size");
+ if (SymTabCommand.stroff)
Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
- }
}
if (O.DyLdInfoCommandIndex) {
const MachO::dyld_info_command &DyLdInfoCommand =
@@ -84,6 +79,36 @@ size_t MachOWriter::totalSize() const {
}
}
+ if (O.DySymTabCommandIndex) {
+ const MachO::dysymtab_command &DySymTabCommand =
+ O.LoadCommands[*O.DySymTabCommandIndex]
+ .MachOLoadCommand.dysymtab_command_data;
+
+ if (DySymTabCommand.indirectsymoff)
+ Ends.push_back(DySymTabCommand.indirectsymoff +
+ sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
+ }
+
+ if (O.DataInCodeCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.DataInCodeCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ if (LinkEditDataCommand.dataoff)
+ Ends.push_back(LinkEditDataCommand.dataoff +
+ LinkEditDataCommand.datasize);
+ }
+
+ if (O.FunctionStartsCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.FunctionStartsCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ if (LinkEditDataCommand.dataoff)
+ Ends.push_back(LinkEditDataCommand.dataoff +
+ LinkEditDataCommand.datasize);
+ }
+
// Otherwise, use the last section / reloction.
for (const auto &LC : O.LoadCommands)
for (const auto &S : LC.Sections) {
@@ -120,14 +145,6 @@ void MachOWriter::writeHeader() {
memcpy(B.getBufferStart(), &Header, HeaderSize);
}
-void MachOWriter::updateSymbolIndexes() {
- uint32_t Index = 0;
- for (auto &Symbol : O.SymTable.Symbols) {
- Symbol->Index = Index;
- Index++;
- }
-}
-
void MachOWriter::writeLoadCommands() {
uint8_t *Begin = B.getBufferStart() + headerSize();
for (const auto &LC : O.LoadCommands) {
@@ -253,7 +270,7 @@ void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out,
Out += sizeof(NListType);
}
-void MachOWriter::writeSymbolTable() {
+void MachOWriter::writeStringTable() {
if (!O.SymTabCommandIndex)
return;
const MachO::symtab_command &SymTabCommand =
@@ -261,10 +278,10 @@ void MachOWriter::writeSymbolTable() {
.MachOLoadCommand.symtab_command_data;
uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
- StrTableBuilder.write(StrTable);
+ LayoutBuilder.getStringTableBuilder().write(StrTable);
}
-void MachOWriter::writeStringTable() {
+void MachOWriter::writeSymbolTable() {
if (!O.SymTabCommandIndex)
return;
const MachO::symtab_command &SymTabCommand =
@@ -275,7 +292,7 @@ void MachOWriter::writeStringTable() {
for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
Iter != End; Iter++) {
SymbolEntry *Sym = Iter->get();
- auto Nstrx = StrTableBuilder.getOffset(Sym->Name);
+ uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name);
if (Is64Bit)
writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
@@ -344,6 +361,45 @@ void MachOWriter::writeExportInfo() {
memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
}
+void MachOWriter::writeIndirectSymbolTable() {
+ if (!O.DySymTabCommandIndex)
+ return;
+
+ const MachO::dysymtab_command &DySymTabCommand =
+ O.LoadCommands[*O.DySymTabCommandIndex]
+ .MachOLoadCommand.dysymtab_command_data;
+
+ char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff;
+ assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) &&
+ "Incorrect indirect symbol table size");
+ memcpy(Out, O.IndirectSymTable.Symbols.data(),
+ sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
+}
+
+void MachOWriter::writeDataInCodeData() {
+ if (!O.DataInCodeCommandIndex)
+ return;
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.DataInCodeCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+ char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
+ assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) &&
+ "Incorrect data in code data size");
+ memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size());
+}
+
+void MachOWriter::writeFunctionStartsData() {
+ if (!O.FunctionStartsCommandIndex)
+ return;
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.FunctionStartsCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+ char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
+ assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) &&
+ "Incorrect function starts data size");
+ memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size());
+}
+
void MachOWriter::writeTail() {
typedef void (MachOWriter::*WriteHandlerType)(void);
typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
@@ -379,206 +435,51 @@ void MachOWriter::writeTail() {
{DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
}
- llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
- return LHS.first < RHS.first;
- });
-
- for (auto WriteOp : Queue)
- (this->*WriteOp.second)();
-}
-
-void MachOWriter::updateSizeOfCmds() {
- auto Size = 0;
- for (const auto &LC : O.LoadCommands) {
- auto &MLC = LC.MachOLoadCommand;
- auto cmd = MLC.load_command_data.cmd;
-
- switch (cmd) {
- case MachO::LC_SEGMENT:
- Size += sizeof(MachO::segment_command) +
- sizeof(MachO::section) * LC.Sections.size();
- continue;
- case MachO::LC_SEGMENT_64:
- Size += sizeof(MachO::segment_command_64) +
- sizeof(MachO::section_64) * LC.Sections.size();
- continue;
- }
-
- switch (cmd) {
-#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
- case MachO::LCName: \
- Size += sizeof(MachO::LCStruct); \
- break;
-#include "llvm/BinaryFormat/MachO.def"
-#undef HANDLE_LOAD_COMMAND
- }
- }
-
- O.Header.SizeOfCmds = Size;
-}
-
-// Updates the index and the number of local/external/undefined symbols. Here we
-// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table
-// are already sorted by the those types.
-void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) {
- uint32_t NumLocalSymbols = 0;
- auto Iter = O.SymTable.Symbols.begin();
- auto End = O.SymTable.Symbols.end();
- for (; Iter != End; Iter++) {
- if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT))
- break;
+ if (O.DySymTabCommandIndex) {
+ const MachO::dysymtab_command &DySymTabCommand =
+ O.LoadCommands[*O.DySymTabCommandIndex]
+ .MachOLoadCommand.dysymtab_command_data;
- NumLocalSymbols++;
+ if (DySymTabCommand.indirectsymoff)
+ Queue.emplace_back(DySymTabCommand.indirectsymoff,
+ &MachOWriter::writeIndirectSymbolTable);
}
- uint32_t NumExtDefSymbols = 0;
- for (; Iter != End; Iter++) {
- if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF)
- break;
-
- NumExtDefSymbols++;
- }
-
- MLC.dysymtab_command_data.ilocalsym = 0;
- MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
- MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
- MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
- MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
- MLC.dysymtab_command_data.nundefsym =
- O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
-}
-
-// Recomputes and updates offset and size fields in load commands and sections
-// since they could be modified.
-Error MachOWriter::layout() {
- auto SizeOfCmds = loadCommandsSize();
- auto Offset = headerSize() + SizeOfCmds;
- O.Header.NCmds = O.LoadCommands.size();
- O.Header.SizeOfCmds = SizeOfCmds;
-
- // Lay out sections.
- for (auto &LC : O.LoadCommands) {
- uint64_t FileOff = Offset;
- uint64_t VMSize = 0;
- uint64_t FileOffsetInSegment = 0;
- for (auto &Sec : LC.Sections) {
- if (!Sec.isVirtualSection()) {
- auto FilePaddingSize =
- OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align);
- Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
- Sec.Size = Sec.Content.size();
- FileOffsetInSegment += FilePaddingSize + Sec.Size;
- }
-
- VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
- }
-
- // TODO: Handle the __PAGEZERO segment.
- auto &MLC = LC.MachOLoadCommand;
- switch (MLC.load_command_data.cmd) {
- case MachO::LC_SEGMENT:
- MLC.segment_command_data.cmdsize =
- sizeof(MachO::segment_command) +
- sizeof(MachO::section) * LC.Sections.size();
- MLC.segment_command_data.nsects = LC.Sections.size();
- MLC.segment_command_data.fileoff = FileOff;
- MLC.segment_command_data.vmsize = VMSize;
- MLC.segment_command_data.filesize = FileOffsetInSegment;
- break;
- case MachO::LC_SEGMENT_64:
- MLC.segment_command_64_data.cmdsize =
- sizeof(MachO::segment_command_64) +
- sizeof(MachO::section_64) * LC.Sections.size();
- MLC.segment_command_64_data.nsects = LC.Sections.size();
- MLC.segment_command_64_data.fileoff = FileOff;
- MLC.segment_command_64_data.vmsize = VMSize;
- MLC.segment_command_64_data.filesize = FileOffsetInSegment;
- break;
- }
+ if (O.DataInCodeCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.DataInCodeCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
- Offset += FileOffsetInSegment;
+ if (LinkEditDataCommand.dataoff)
+ Queue.emplace_back(LinkEditDataCommand.dataoff,
+ &MachOWriter::writeDataInCodeData);
}
- // Lay out relocations.
- for (auto &LC : O.LoadCommands)
- for (auto &Sec : LC.Sections) {
- Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
- Sec.NReloc = Sec.Relocations.size();
- Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
- }
+ if (O.FunctionStartsCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.FunctionStartsCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
- // Lay out tail stuff.
- auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
- for (auto &LC : O.LoadCommands) {
- auto &MLC = LC.MachOLoadCommand;
- auto cmd = MLC.load_command_data.cmd;
- switch (cmd) {
- case MachO::LC_SYMTAB:
- MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
- MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
- MLC.symtab_command_data.symoff = Offset;
- Offset += NListSize * MLC.symtab_command_data.nsyms;
- MLC.symtab_command_data.stroff = Offset;
- Offset += MLC.symtab_command_data.strsize;
- break;
- case MachO::LC_DYSYMTAB: {
- if (MLC.dysymtab_command_data.ntoc != 0 ||
- MLC.dysymtab_command_data.nmodtab != 0 ||
- MLC.dysymtab_command_data.nextrefsyms != 0 ||
- MLC.dysymtab_command_data.nlocrel != 0 ||
- MLC.dysymtab_command_data.nextrel != 0)
- return createStringError(llvm::errc::not_supported,
- "shared library is not yet supported");
-
- if (MLC.dysymtab_command_data.nindirectsyms != 0)
- return createStringError(llvm::errc::not_supported,
- "indirect symbol table is not yet supported");
-
- updateDySymTab(MLC);
- break;
- }
- case MachO::LC_SEGMENT:
- case MachO::LC_SEGMENT_64:
- case MachO::LC_VERSION_MIN_MACOSX:
- case MachO::LC_BUILD_VERSION:
- case MachO::LC_ID_DYLIB:
- case MachO::LC_LOAD_DYLIB:
- case MachO::LC_UUID:
- case MachO::LC_SOURCE_VERSION:
- // Nothing to update.
- break;
- default:
- // Abort if it's unsupported in order to prevent corrupting the object.
- return createStringError(llvm::errc::not_supported,
- "unsupported load command (cmd=0x%x)", cmd);
- }
+ if (LinkEditDataCommand.dataoff)
+ Queue.emplace_back(LinkEditDataCommand.dataoff,
+ &MachOWriter::writeFunctionStartsData);
}
- return Error::success();
-}
+ llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
+ return LHS.first < RHS.first;
+ });
-void MachOWriter::constructStringTable() {
- for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
- StrTableBuilder.add(Sym->Name);
- StrTableBuilder.finalize();
+ for (auto WriteOp : Queue)
+ (this->*WriteOp.second)();
}
-Error MachOWriter::finalize() {
- updateSizeOfCmds();
- constructStringTable();
-
- if (auto E = layout())
- return E;
-
- return Error::success();
-}
+Error MachOWriter::finalize() { return LayoutBuilder.layout(); }
Error MachOWriter::write() {
if (Error E = B.allocate(totalSize()))
return E;
memset(B.getBufferStart(), 0, totalSize());
writeHeader();
- updateSymbolIndexes();
writeLoadCommands();
writeSections();
writeTail();
diff --git a/tools/llvm-objcopy/MachO/MachOWriter.h b/tools/llvm-objcopy/MachO/MachOWriter.h
index ecf12d62de2c..22abbad56f41 100644
--- a/tools/llvm-objcopy/MachO/MachOWriter.h
+++ b/tools/llvm-objcopy/MachO/MachOWriter.h
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "../Buffer.h"
+#include "MachOLayoutBuilder.h"
#include "MachOObjcopy.h"
#include "Object.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -22,20 +23,15 @@ class MachOWriter {
Object &O;
bool Is64Bit;
bool IsLittleEndian;
+ uint64_t PageSize;
Buffer &B;
- StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
+ MachOLayoutBuilder LayoutBuilder;
size_t headerSize() const;
size_t loadCommandsSize() const;
size_t symTableSize() const;
size_t strTableSize() const;
- void updateDySymTab(MachO::macho_load_command &MLC);
- void updateSizeOfCmds();
- void updateSymbolIndexes();
- void constructStringTable();
- Error layout();
-
void writeHeader();
void writeLoadCommands();
template <typename StructType>
@@ -48,11 +44,16 @@ class MachOWriter {
void writeWeakBindInfo();
void writeLazyBindInfo();
void writeExportInfo();
+ void writeIndirectSymbolTable();
+ void writeDataInCodeData();
+ void writeFunctionStartsData();
void writeTail();
public:
- MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, Buffer &B)
- : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {}
+ MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, uint64_t PageSize,
+ Buffer &B)
+ : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian),
+ PageSize(PageSize), B(B), LayoutBuilder(O, Is64Bit, PageSize) {}
size_t totalSize() const;
Error finalize();
diff --git a/tools/llvm-objcopy/MachO/Object.h b/tools/llvm-objcopy/MachO/Object.h
index ed85fcbc47f7..1cebf8253d19 100644
--- a/tools/llvm-objcopy/MachO/Object.h
+++ b/tools/llvm-objcopy/MachO/Object.h
@@ -90,6 +90,16 @@ struct SymbolEntry {
uint8_t n_sect;
uint16_t n_desc;
uint64_t n_value;
+
+ bool isExternalSymbol() const {
+ return n_type & ((MachO::N_EXT | MachO::N_PEXT));
+ }
+
+ bool isLocalSymbol() const { return !isExternalSymbol(); }
+
+ bool isUndefinedSymbol() const {
+ return (n_type & MachO::N_TYPE) == MachO::N_UNDF;
+ }
};
/// The location of the symbol table inside the binary is described by LC_SYMTAB
@@ -100,6 +110,10 @@ struct SymbolTable {
const SymbolEntry *getSymbolByIndex(uint32_t Index) const;
};
+struct IndirectSymbolTable {
+ std::vector<uint32_t> Symbols;
+};
+
/// The location of the string table inside the binary is described by LC_SYMTAB
/// load command.
struct StringTable {
@@ -206,6 +220,10 @@ struct ExportInfo {
ArrayRef<uint8_t> Trie;
};
+struct LinkData {
+ ArrayRef<uint8_t> Data;
+};
+
struct Object {
MachHeader Header;
std::vector<LoadCommand> LoadCommands;
@@ -218,11 +236,20 @@ struct Object {
WeakBindInfo WeakBinds;
LazyBindInfo LazyBinds;
ExportInfo Exports;
+ IndirectSymbolTable IndirectSymTable;
+ LinkData DataInCode;
+ LinkData FunctionStarts;
/// The index of LC_SYMTAB load command if present.
Optional<size_t> SymTabCommandIndex;
/// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.
Optional<size_t> DyLdInfoCommandIndex;
+ /// The index LC_DYSYMTAB load comamnd if present.
+ Optional<size_t> DySymTabCommandIndex;
+ /// The index LC_DATA_IN_CODE load comamnd if present.
+ Optional<size_t> DataInCodeCommandIndex;
+ /// The index LC_FUNCTION_STARTS load comamnd if present.
+ Optional<size_t> FunctionStartsCommandIndex;
};
} // end namespace macho
diff --git a/tools/llvm-objcopy/ObjcopyOpts.td b/tools/llvm-objcopy/ObjcopyOpts.td
index 5fce4fbde539..9e6b6f0005cd 100644
--- a/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/tools/llvm-objcopy/ObjcopyOpts.td
@@ -1,37 +1,33 @@
-include "llvm/Option/OptParser.td"
-
-multiclass Eq<string name, string help> {
- def NAME : Separate<["--"], name>;
- def NAME #_eq : Joined<["--"], name #"=">,
- Alias<!cast<Separate>(NAME)>,
- HelpText<help>;
-}
-
-def help : Flag<["--"], "help">;
-def h : Flag<["-"], "h">, Alias<help>;
-
-def allow_broken_links
- : Flag<["--"], "allow-broken-links">,
- HelpText<"Allow llvm-objcopy to remove sections even if it would leave "
- "invalid section references. The appropriate sh_link fields "
- "will be set to zero.">;
+include "CommonOpts.td"
defm binary_architecture
- : Eq<"binary-architecture", "Used when transforming an architecture-less "
- "format (such as binary) to another format">;
-def B : JoinedOrSeparate<["-"], "B">, Alias<binary_architecture>;
+ : Eq<"binary-architecture", "Ignored for compatibility">;
+def B : JoinedOrSeparate<["-"], "B">,
+ Alias<binary_architecture>,
+ HelpText<"Alias for --binary-architecture">;
defm target : Eq<"target", "Format of the input and output file">,
Values<"binary">;
-def F : JoinedOrSeparate<["-"], "F">, Alias<target>;
+def F : JoinedOrSeparate<["-"], "F">,
+ Alias<target>,
+ HelpText<"Alias for --target">;
defm input_target : Eq<"input-target", "Format of the input file">,
Values<"binary">;
-def I : JoinedOrSeparate<["-"], "I">, Alias<input_target>;
+def I : JoinedOrSeparate<["-"], "I">,
+ Alias<input_target>,
+ HelpText<"Alias for --input-target">;
defm output_target : Eq<"output-target", "Format of the output file">,
Values<"binary">;
-def O : JoinedOrSeparate<["-"], "O">, Alias<output_target>;
+def O : JoinedOrSeparate<["-"], "O">,
+ Alias<output_target>,
+ HelpText<"Alias for --output-target">;
+
+defm new_symbol_visibility : Eq<"new-symbol-visibility", "Visibility of "
+ "symbols generated for binary input or added"
+ " with --add-symbol unless otherwise"
+ " specified. The default value is 'default'.">;
def compress_debug_sections : Flag<["--"], "compress-debug-sections">;
def compress_debug_sections_eq
@@ -46,34 +42,10 @@ defm split_dwo
"<dwo-file>, then strip-dwo on the input file">,
MetaVarName<"dwo-file">;
-def enable_deterministic_archives
- : Flag<["--"], "enable-deterministic-archives">,
- HelpText<"Enable deterministic mode when copying archives (use zero for "
- "UIDs, GIDs, and timestamps).">;
-def D : Flag<["-"], "D">,
- Alias<enable_deterministic_archives>,
- HelpText<"Alias for --enable-deterministic-archives">;
-
-def disable_deterministic_archives
- : Flag<["--"], "disable-deterministic-archives">,
- HelpText<"Disable deterministic mode when copying archives (use real "
- "values for UIDs, GIDs, and timestamps).">;
-def U : Flag<["-"], "U">,
- Alias<disable_deterministic_archives>,
- HelpText<"Alias for --disable-deterministic-archives">;
-
-def preserve_dates : Flag<["--"], "preserve-dates">,
- HelpText<"Preserve access and modification timestamps">;
-def p : Flag<["-"], "p">, Alias<preserve_dates>;
-
defm add_gnu_debuglink
: Eq<"add-gnu-debuglink", "Add a .gnu_debuglink for <debug-file>">,
MetaVarName<"debug-file">;
-defm remove_section : Eq<"remove-section", "Remove <section>">,
- MetaVarName<"section">;
-def R : JoinedOrSeparate<["-"], "R">, Alias<remove_section>;
-
defm rename_section
: Eq<"rename-section",
"Renames a section from old to new, optionally with specified flags. "
@@ -93,16 +65,20 @@ defm redefine_symbols
"symbols from many files.">,
MetaVarName<"filename">;
-defm keep_section : Eq<"keep-section", "Keep <section>">,
- MetaVarName<"section">;
defm only_section : Eq<"only-section", "Remove all but <section>">,
MetaVarName<"section">;
-def j : JoinedOrSeparate<["-"], "j">, Alias<only_section>;
+def j : JoinedOrSeparate<["-"], "j">,
+ Alias<only_section>,
+ HelpText<"Alias for --only-section">;
defm add_section
: Eq<"add-section",
"Make a section named <section> with the contents of <file>.">,
MetaVarName<"section=file">;
+defm set_section_alignment
+ : Eq<"set-section-alignment", "Set alignment for a given section.">,
+ MetaVarName<"section=align">;
+
defm set_section_flags
: Eq<"set-section-flags",
"Set section flags for a given section. Flags supported for GNU "
@@ -110,26 +86,14 @@ defm set_section_flags
"rom, share, contents, merge, strings.">,
MetaVarName<"section=flag1[,flag2,...]">;
-def strip_all : Flag<["--"], "strip-all">,
- HelpText<"Remove non-allocated sections outside segments. "
- ".gnu.warning* sections are not removed">;
-def S : Flag<["-"], "S">, Alias<strip_all>;
-def strip_all_gnu : Flag<["--"], "strip-all-gnu">,
- HelpText<"Compatible with GNU objcopy's --strip-all">;
-def strip_debug : Flag<["--"], "strip-debug">,
- HelpText<"Remove all debug information">;
-def g : Flag<["-"], "g">, Alias<strip_debug>,
- HelpText<"Alias for --strip-debug">;
+def S : Flag<["-"], "S">,
+ Alias<strip_all>,
+ HelpText<"Alias for --strip-all">;
def strip_dwo : Flag<["--"], "strip-dwo">,
HelpText<"Remove all DWARF .dwo sections from file">;
-def strip_sections
- : Flag<["--"], "strip-sections">,
- HelpText<"Remove all section headers and all sections not in segments">;
def strip_non_alloc
: Flag<["--"], "strip-non-alloc">,
HelpText<"Remove all non-allocated sections outside segments">;
-def strip_unneeded : Flag<["--"], "strip-unneeded">,
- HelpText<"Remove all symbols not needed by relocations">;
defm strip_unneeded_symbol
: Eq<"strip-unneeded-symbol",
"Remove symbol <symbol> if it is not needed by relocations">,
@@ -163,7 +127,9 @@ defm localize_symbols
"Reads a list of symbols from <filename> and marks them local.">,
MetaVarName<"filename">;
-def L : JoinedOrSeparate<["-"], "L">, Alias<localize_symbol>;
+def L : JoinedOrSeparate<["-"], "L">,
+ Alias<localize_symbol>,
+ HelpText<"Alias for --localize-symbol">;
defm globalize_symbol : Eq<"globalize-symbol", "Mark <symbol> as global">,
MetaVarName<"symbol">;
@@ -178,7 +144,9 @@ defm keep_global_symbol
"Convert all symbols except <symbol> to local. May be repeated to "
"convert all except a set of symbols to local.">,
MetaVarName<"symbol">;
-def G : JoinedOrSeparate<["-"], "G">, Alias<keep_global_symbol>;
+def G : JoinedOrSeparate<["-"], "G">,
+ Alias<keep_global_symbol>,
+ HelpText<"Alias for --keep-global-symbol">;
defm keep_global_symbols
: Eq<"keep-global-symbols",
@@ -196,31 +164,17 @@ defm weaken_symbols
"Reads a list of symbols from <filename> and marks them weak.">,
MetaVarName<"filename">;
-def W : JoinedOrSeparate<["-"], "W">, Alias<weaken_symbol>;
+def W : JoinedOrSeparate<["-"], "W">,
+ Alias<weaken_symbol>,
+ HelpText<"Alias for --weaken-symbol">;
def weaken : Flag<["--"], "weaken">,
HelpText<"Mark all global symbols as weak">;
-def discard_locals : Flag<["--"], "discard-locals">,
- HelpText<"Remove compiler-generated local symbols, (e.g. "
- "symbols starting with .L)">;
-def X : Flag<["-"], "X">, Alias<discard_locals>;
-
-def discard_all
- : Flag<["--"], "discard-all">,
- HelpText<"Remove all local symbols except file and section symbols">;
-def x : Flag<["-"], "x">, Alias<discard_all>;
-defm strip_symbol : Eq<"strip-symbol", "Remove symbol <symbol>">,
- MetaVarName<"symbol">;
defm strip_symbols
: Eq<"strip-symbols",
"Reads a list of symbols from <filename> and removes them.">,
MetaVarName<"filename">;
-def N : JoinedOrSeparate<["-"], "N">, Alias<strip_symbol>;
-defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
- MetaVarName<"symbol">;
-def K : JoinedOrSeparate<["-"], "K">, Alias<keep_symbol>;
-
defm keep_symbols
: Eq<"keep-symbols",
"Reads a list of symbols from <filename> and runs as if "
@@ -230,13 +184,6 @@ defm keep_symbols
"be repeated to read symbols from many files.">,
MetaVarName<"filename">;
-def only_keep_debug
- : Flag<["--"], "only-keep-debug">,
- HelpText<"Clear sections that would not be stripped by --strip-debug. "
- "Currently only implemented for COFF.">;
-
-def keep_file_symbols : Flag<["--"], "keep-file-symbols">,
- HelpText<"Do not remove file symbols">;
defm dump_section
: Eq<"dump-section",
"Dump contents of section named <section> into file <file>">,
@@ -249,9 +196,6 @@ defm prefix_alloc_sections
: Eq<"prefix-alloc-sections", "Add <prefix> to the start of every allocated section name">,
MetaVarName<"prefix">;
-def version : Flag<["--"], "version">,
- HelpText<"Print the version and exit.">;
-def V : Flag<["-"], "V">, Alias<version>;
defm build_id_link_dir
: Eq<"build-id-link-dir", "Set directory for --build-id-link-input and "
"--build-id-link-output to <dir>">,
@@ -265,10 +209,6 @@ defm build_id_link_output
"name derived from hex build ID">,
MetaVarName<"suffix">;
-def regex
- : Flag<["--"], "regex">,
- HelpText<"Permit regular expressions in name comparison">;
-
defm set_start : Eq<"set-start", "Set the start address to <addr>. Overrides "
"any previous --change-start or --adjust-start values.">,
MetaVarName<"addr">;
@@ -277,11 +217,12 @@ defm change_start : Eq<"change-start", "Add <incr> to the start address. Can be
"cumulatively.">,
MetaVarName<"incr">;
def adjust_start : JoinedOrSeparate<["--"], "adjust-start">,
- Alias<change_start>;
+ Alias<change_start>,
+ HelpText<"Alias for --change-start">;
defm add_symbol
: Eq<"add-symbol", "Add new symbol <name> to .symtab. Accepted flags: "
- "global, local, weak, default, hidden, file, section, object, "
+ "global, local, weak, default, hidden, protected, file, section, object, "
"function, indirect-function. Accepted but ignored for "
"compatibility: debug, constructor, warning, indirect, synthetic, "
"unique-object, before.">,
diff --git a/tools/llvm-objcopy/StripOpts.td b/tools/llvm-objcopy/StripOpts.td
index 1d06bb3dfb38..cd02cffae673 100644
--- a/tools/llvm-objcopy/StripOpts.td
+++ b/tools/llvm-objcopy/StripOpts.td
@@ -1,96 +1,17 @@
-include "llvm/Option/OptParser.td"
+include "CommonOpts.td"
-multiclass Eq<string name, string help> {
- def NAME : Separate<["--"], name>;
- def NAME #_eq : Joined<["--"], name #"=">,
- Alias<!cast<Separate>(NAME)>,
- HelpText<help>;
-}
+def output : JoinedOrSeparate<["-"], "o">, HelpText<"Write output to <file>">,
+ MetaVarName<"<file>">;
-def help : Flag<["--"], "help">;
-def h : Flag<["-"], "h">, Alias<help>;
-
-def allow_broken_links
- : Flag<["--"], "allow-broken-links">,
- HelpText<"Allow llvm-strip to remove sections even if it would leave "
- "invalid section references. The appropriate sh_link fields "
- "will be set to zero.">;
-
-def enable_deterministic_archives
- : Flag<["--"], "enable-deterministic-archives">,
- HelpText<"Enable deterministic mode when stripping archives (use zero "
- "for UIDs, GIDs, and timestamps).">;
-def D : Flag<["-"], "D">,
- Alias<enable_deterministic_archives>,
- HelpText<"Alias for --enable-deterministic-archives">;
-
-def disable_deterministic_archives
- : Flag<["--"], "disable-deterministic-archives">,
- HelpText<"Disable deterministic mode when stripping archives (use real "
- "values for UIDs, GIDs, and timestamps).">;
-def U : Flag<["-"], "U">,
- Alias<disable_deterministic_archives>,
- HelpText<"Alias for --disable-deterministic-archives">;
-
-def output : JoinedOrSeparate<["-"], "o">, HelpText<"Write output to <file>">;
-
-def preserve_dates : Flag<["--"], "preserve-dates">,
- HelpText<"Preserve access and modification timestamps">;
-def p : Flag<["-"], "p">, Alias<preserve_dates>;
-
-def strip_all : Flag<["--"], "strip-all">,
- HelpText<"Remove non-allocated sections outside segments. "
- ".gnu.warning* sections are not removed">;
-def s : Flag<["-"], "s">, Alias<strip_all>;
+def s : Flag<["-"], "s">,
+ Alias<strip_all>,
+ HelpText<"Alias for --strip-all">;
def no_strip_all : Flag<["--"], "no-strip-all">,
HelpText<"Disable --strip-all">;
-def strip_all_gnu : Flag<["--"], "strip-all-gnu">,
- HelpText<"Compatible with GNU strip's --strip-all">;
-def strip_debug : Flag<["--"], "strip-debug">,
- HelpText<"Remove debugging symbols only">;
-def d : Flag<["-"], "d">, Alias<strip_debug>;
-def g : Flag<["-"], "g">, Alias<strip_debug>;
-def S : Flag<["-"], "S">, Alias<strip_debug>;
-def strip_unneeded : Flag<["--"], "strip-unneeded">,
- HelpText<"Remove all symbols not needed by relocations">;
-
-defm remove_section : Eq<"remove-section", "Remove <section>">,
- MetaVarName<"section">;
-def R : JoinedOrSeparate<["-"], "R">, Alias<remove_section>;
-
-defm strip_symbol : Eq<"strip-symbol", "Strip <symbol>">,
- MetaVarName<"symbol">;
-def N : JoinedOrSeparate<["-"], "N">, Alias<strip_symbol>;
-
-defm keep_section : Eq<"keep-section", "Keep <section>">,
- MetaVarName<"section">;
-defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
- MetaVarName<"symbol">;
-def keep_file_symbols : Flag<["--"], "keep-file-symbols">,
- HelpText<"Do not remove file symbols">;
-
-def K : JoinedOrSeparate<["-"], "K">, Alias<keep_symbol>;
-
-def only_keep_debug
- : Flag<["--"], "only-keep-debug">,
- HelpText<"Clear sections that would not be stripped by --strip-debug. "
- "Currently only implemented for COFF.">;
-
-def discard_locals : Flag<["--"], "discard-locals">,
- HelpText<"Remove compiler-generated local symbols, (e.g. "
- "symbols starting with .L)">;
-def X : Flag<["-"], "X">, Alias<discard_locals>;
-
-def discard_all
- : Flag<["--"], "discard-all">,
- HelpText<"Remove all local symbols except file and section symbols">;
-def x : Flag<["-"], "x">, Alias<discard_all>;
-
-def regex
- : Flag<["--"], "regex">,
- HelpText<"Permit regular expressions in name comparison">;
-
-def version : Flag<["--"], "version">,
- HelpText<"Print the version and exit.">;
-def V : Flag<["-"], "V">, Alias<version>;
+def d : Flag<["-"], "d">,
+ Alias<strip_debug>,
+ HelpText<"Alias for --strip-debug">;
+def S : Flag<["-"], "S">,
+ Alias<strip_debug>,
+ HelpText<"Alias for --strip-debug">;
diff --git a/tools/llvm-objcopy/llvm-objcopy.cpp b/tools/llvm-objcopy/llvm-objcopy.cpp
index e9372176e43b..a68210f3fdd3 100644
--- a/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -29,6 +29,7 @@
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
@@ -36,6 +37,7 @@
#include "llvm/Support/Memory.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
+#include "llvm/Support/StringSaver.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -84,7 +86,7 @@ LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) {
ErrorSuccess reportWarning(Error E) {
assert(E);
- WithColor::warning(errs(), ToolName) << toString(std::move(E));
+ WithColor::warning(errs(), ToolName) << toString(std::move(E)) << '\n';
return Error::success();
}
@@ -130,16 +132,18 @@ static Error deepWriteArchive(StringRef ArcName,
/// The function executeObjcopyOnIHex does the dispatch based on the format
/// of the output specified by the command line options.
-static Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In,
+static Error executeObjcopyOnIHex(CopyConfig &Config, MemoryBuffer &In,
Buffer &Out) {
// TODO: support output formats other than ELF.
+ if (Error E = Config.parseELFConfig())
+ return E;
return elf::executeObjcopyOnIHex(Config, In, Out);
}
/// The function executeObjcopyOnRawBinary does the dispatch based on the format
/// of the output specified by the command line options.
-static Error executeObjcopyOnRawBinary(const CopyConfig &Config,
- MemoryBuffer &In, Buffer &Out) {
+static Error executeObjcopyOnRawBinary(CopyConfig &Config, MemoryBuffer &In,
+ Buffer &Out) {
switch (Config.OutputFormat) {
case FileFormat::ELF:
// FIXME: Currently, we call elf::executeObjcopyOnRawBinary even if the
@@ -148,6 +152,8 @@ static Error executeObjcopyOnRawBinary(const CopyConfig &Config,
case FileFormat::Binary:
case FileFormat::IHex:
case FileFormat::Unspecified:
+ if (Error E = Config.parseELFConfig())
+ return E;
return elf::executeObjcopyOnRawBinary(Config, In, Out);
}
@@ -156,11 +162,13 @@ static Error executeObjcopyOnRawBinary(const CopyConfig &Config,
/// The function executeObjcopyOnBinary does the dispatch based on the format
/// of the input binary (ELF, MachO or COFF).
-static Error executeObjcopyOnBinary(const CopyConfig &Config,
- object::Binary &In, Buffer &Out) {
- if (auto *ELFBinary = dyn_cast<object::ELFObjectFileBase>(&In))
+static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In,
+ Buffer &Out) {
+ if (auto *ELFBinary = dyn_cast<object::ELFObjectFileBase>(&In)) {
+ if (Error E = Config.parseELFConfig())
+ return E;
return elf::executeObjcopyOnBinary(Config, *ELFBinary, Out);
- else if (auto *COFFBinary = dyn_cast<object::COFFObjectFile>(&In))
+ } else if (auto *COFFBinary = dyn_cast<object::COFFObjectFile>(&In))
return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out);
else if (auto *MachOBinary = dyn_cast<object::MachOObjectFile>(&In))
return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out);
@@ -169,8 +177,7 @@ static Error executeObjcopyOnBinary(const CopyConfig &Config,
"unsupported object file format");
}
-static Error executeObjcopyOnArchive(const CopyConfig &Config,
- const Archive &Ar) {
+static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) {
std::vector<NewArchiveMember> NewArchiveMembers;
Error Err = Error::success();
for (const Archive::Child &Child : Ar.children(Err)) {
@@ -246,7 +253,7 @@ static Error restoreStatOnFile(StringRef Filename,
/// The function executeObjcopy does the higher level dispatch based on the type
/// of input (raw binary, archive or single object file) and takes care of the
/// format-agnostic modifications, i.e. preserving dates.
-static Error executeObjcopy(const CopyConfig &Config) {
+static Error executeObjcopy(CopyConfig &Config) {
sys::fs::file_status Stat;
if (Config.InputFilename != "-") {
if (auto EC = sys::fs::status(Config.InputFilename, Stat))
@@ -255,7 +262,7 @@ static Error executeObjcopy(const CopyConfig &Config) {
Stat.permissions(static_cast<sys::fs::perms>(0777));
}
- typedef Error (*ProcessRawFn)(const CopyConfig &, MemoryBuffer &, Buffer &);
+ using ProcessRawFn = Error (*)(CopyConfig &, MemoryBuffer &, Buffer &);
ProcessRawFn ProcessRaw;
switch (Config.InputFormat) {
case FileFormat::Binary:
@@ -310,15 +317,31 @@ int main(int argc, char **argv) {
InitLLVM X(argc, argv);
ToolName = argv[0];
bool IsStrip = sys::path::stem(ToolName).contains("strip");
+
+ // Expand response files.
+ // TODO: Move these lines, which are copied from lib/Support/CommandLine.cpp,
+ // into a separate function in the CommandLine library and call that function
+ // here. This is duplicated code.
+ SmallVector<const char *, 20> NewArgv(argv, argv + argc);
+ BumpPtrAllocator A;
+ StringSaver Saver(A);
+ cl::ExpandResponseFiles(Saver,
+ Triple(sys::getProcessTriple()).isOSWindows()
+ ? cl::TokenizeWindowsCommandLine
+ : cl::TokenizeGNUCommandLine,
+ NewArgv);
+
+ auto Args = makeArrayRef(NewArgv).drop_front();
+
Expected<DriverConfig> DriverConfig =
- IsStrip ? parseStripOptions(makeArrayRef(argv + 1, argc), reportWarning)
- : parseObjcopyOptions(makeArrayRef(argv + 1, argc));
+ IsStrip ? parseStripOptions(Args, reportWarning)
+ : parseObjcopyOptions(Args, reportWarning);
if (!DriverConfig) {
logAllUnhandledErrors(DriverConfig.takeError(),
WithColor::error(errs(), ToolName));
return 1;
}
- for (const CopyConfig &CopyConfig : DriverConfig->CopyConfigs) {
+ for (CopyConfig &CopyConfig : DriverConfig->CopyConfigs) {
if (Error E = executeObjcopy(CopyConfig)) {
logAllUnhandledErrors(std::move(E), WithColor::error(errs(), ToolName));
return 1;
diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp
index 1ba0a68902c9..60b0f5a3cbd1 100644
--- a/tools/llvm-objdump/COFFDump.cpp
+++ b/tools/llvm-objdump/COFFDump.cpp
@@ -234,15 +234,14 @@ printSEHTable(const COFFObjectFile *Obj, uint32_t TableVA, int Count) {
if (Count == 0)
return;
- const pe32_header *PE32Header;
- error(Obj->getPE32Header(PE32Header));
- uint32_t ImageBase = PE32Header->ImageBase;
uintptr_t IntPtr = 0;
- error(Obj->getVaPtr(TableVA, IntPtr));
+ if (std::error_code EC = Obj->getVaPtr(TableVA, IntPtr))
+ reportError(errorCodeToError(EC), Obj->getFileName());
+
const support::ulittle32_t *P = (const support::ulittle32_t *)IntPtr;
outs() << "SEH Table:";
for (int I = 0; I < Count; ++I)
- outs() << format(" 0x%x", P[I] + ImageBase);
+ outs() << format(" 0x%x", P[I] + Obj->getPE32Header()->ImageBase);
outs() << "\n\n";
}
@@ -268,22 +267,24 @@ static void printTLSDirectoryT(const coff_tls_directory<T> *TLSDir) {
}
static void printTLSDirectory(const COFFObjectFile *Obj) {
- const pe32_header *PE32Header;
- error(Obj->getPE32Header(PE32Header));
-
- const pe32plus_header *PE32PlusHeader;
- error(Obj->getPE32PlusHeader(PE32PlusHeader));
+ const pe32_header *PE32Header = Obj->getPE32Header();
+ const pe32plus_header *PE32PlusHeader = Obj->getPE32PlusHeader();
// Skip if it's not executable.
if (!PE32Header && !PE32PlusHeader)
return;
const data_directory *DataDir;
- error(Obj->getDataDirectory(COFF::TLS_TABLE, DataDir));
- uintptr_t IntPtr = 0;
+ if (std::error_code EC = Obj->getDataDirectory(COFF::TLS_TABLE, DataDir))
+ reportError(errorCodeToError(EC), Obj->getFileName());
+
if (DataDir->RelativeVirtualAddress == 0)
return;
- error(Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr));
+
+ uintptr_t IntPtr = 0;
+ if (std::error_code EC =
+ Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr))
+ reportError(errorCodeToError(EC), Obj->getFileName());
if (PE32Header) {
auto *TLSDir = reinterpret_cast<const coff_tls_directory32 *>(IntPtr);
@@ -298,9 +299,7 @@ static void printTLSDirectory(const COFFObjectFile *Obj) {
static void printLoadConfiguration(const COFFObjectFile *Obj) {
// Skip if it's not executable.
- const pe32_header *PE32Header;
- error(Obj->getPE32Header(PE32Header));
- if (!PE32Header)
+ if (!Obj->getPE32Header())
return;
// Currently only x86 is supported
@@ -308,11 +307,18 @@ static void printLoadConfiguration(const COFFObjectFile *Obj) {
return;
const data_directory *DataDir;
- error(Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataDir));
+
+ if (std::error_code EC =
+ Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataDir))
+ reportError(errorCodeToError(EC), Obj->getFileName());
+
uintptr_t IntPtr = 0;
if (DataDir->RelativeVirtualAddress == 0)
return;
- error(Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr));
+
+ if (std::error_code EC =
+ Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr))
+ reportError(errorCodeToError(EC), Obj->getFileName());
auto *LoadConf = reinterpret_cast<const coff_load_configuration32 *>(IntPtr);
outs() << "Load configuration:"
@@ -442,8 +448,7 @@ static bool getPDataSection(const COFFObjectFile *Obj,
std::vector<RelocationRef> &Rels,
const RuntimeFunction *&RFStart, int &NumRFs) {
for (const SectionRef &Section : Obj->sections()) {
- StringRef Name;
- error(Section.getName(Name));
+ StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
if (Name != ".pdata")
continue;
@@ -455,7 +460,9 @@ static bool getPDataSection(const COFFObjectFile *Obj,
llvm::sort(Rels, isRelocAddressLess);
ArrayRef<uint8_t> Contents;
- error(Obj->getSectionContents(Pdata, Contents));
+ if (Error E = Obj->getSectionContents(Pdata, Contents))
+ reportError(std::move(E), Obj->getFileName());
+
if (Contents.empty())
continue;
@@ -571,10 +578,12 @@ static void printRuntimeFunctionRels(const COFFObjectFile *Obj,
ArrayRef<uint8_t> XContents;
uint64_t UnwindInfoOffset = 0;
- error(getSectionContents(
- Obj, Rels, SectionOffset +
- /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8,
- XContents, UnwindInfoOffset));
+ if (Error E = getSectionContents(
+ Obj, Rels,
+ SectionOffset +
+ /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8,
+ XContents, UnwindInfoOffset))
+ reportError(std::move(E), Obj->getFileName());
if (XContents.empty())
return;
@@ -650,9 +659,12 @@ void printCOFFSymbolTable(const object::COFFImportFile *i) {
void printCOFFSymbolTable(const COFFObjectFile *coff) {
for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) {
Expected<COFFSymbolRef> Symbol = coff->getSymbol(SI);
+ if (!Symbol)
+ reportError(Symbol.takeError(), coff->getFileName());
+
StringRef Name;
- error(Symbol.takeError());
- error(coff->getSymbolName(*Symbol, Name));
+ if (std::error_code EC = coff->getSymbolName(*Symbol, Name))
+ reportError(errorCodeToError(EC), coff->getFileName());
outs() << "[" << format("%2d", SI) << "]"
<< "(sec " << format("%2d", int(Symbol->getSectionNumber())) << ")"
@@ -682,7 +694,9 @@ void printCOFFSymbolTable(const COFFObjectFile *coff) {
for (unsigned AI = 0, AE = Symbol->getNumberOfAuxSymbols(); AI < AE; ++AI, ++SI) {
if (Symbol->isSectionDefinition()) {
const coff_aux_section_definition *asd;
- error(coff->getAuxSymbol<coff_aux_section_definition>(SI + 1, asd));
+ if (std::error_code EC =
+ coff->getAuxSymbol<coff_aux_section_definition>(SI + 1, asd))
+ reportError(errorCodeToError(EC), coff->getFileName());
int32_t AuxNumber = asd->getNumber(Symbol->isBigObj());
@@ -697,7 +711,8 @@ void printCOFFSymbolTable(const COFFObjectFile *coff) {
, unsigned(asd->Selection));
} else if (Symbol->isFileRecord()) {
const char *FileName;
- error(coff->getAuxSymbol<char>(SI + 1, FileName));
+ if (std::error_code EC = coff->getAuxSymbol<char>(SI + 1, FileName))
+ reportError(errorCodeToError(EC), coff->getFileName());
StringRef Name(FileName, Symbol->getNumberOfAuxSymbols() *
coff->getSymbolTableEntrySize());
@@ -707,7 +722,9 @@ void printCOFFSymbolTable(const COFFObjectFile *coff) {
break;
} else if (Symbol->isWeakExternal()) {
const coff_aux_weak_external *awe;
- error(coff->getAuxSymbol<coff_aux_weak_external>(SI + 1, awe));
+ if (std::error_code EC =
+ coff->getAuxSymbol<coff_aux_weak_external>(SI + 1, awe))
+ reportError(errorCodeToError(EC), coff->getFileName());
outs() << "AUX " << format("indx %d srch %d\n",
static_cast<uint32_t>(awe->TagIndex),
diff --git a/tools/llvm-objdump/ELFDump.cpp b/tools/llvm-objdump/ELFDump.cpp
index 9c4d67d0f1bd..93d070eee16c 100644
--- a/tools/llvm-objdump/ELFDump.cpp
+++ b/tools/llvm-objdump/ELFDump.cpp
@@ -178,7 +178,7 @@ void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
outs() << (Data + Dyn.d_un.d_val) << "\n";
continue;
}
- warn(toString(StrTabOrErr.takeError()));
+ reportWarning(toString(StrTabOrErr.takeError()), Filename);
consumeError(StrTabOrErr.takeError());
}
outs() << format(Fmt, (uint64_t)Dyn.d_un.d_val);
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 58ff7be4543c..e4684d0f1601 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -236,11 +236,11 @@ struct SymbolSorter {
bool operator()(const SymbolRef &A, const SymbolRef &B) {
Expected<SymbolRef::Type> ATypeOrErr = A.getType();
if (!ATypeOrErr)
- report_error(ATypeOrErr.takeError(), A.getObject()->getFileName());
+ reportError(ATypeOrErr.takeError(), A.getObject()->getFileName());
SymbolRef::Type AType = *ATypeOrErr;
Expected<SymbolRef::Type> BTypeOrErr = B.getType();
if (!BTypeOrErr)
- report_error(BTypeOrErr.takeError(), B.getObject()->getFileName());
+ reportError(BTypeOrErr.takeError(), B.getObject()->getFileName());
SymbolRef::Type BType = *BTypeOrErr;
uint64_t AAddr = (AType != SymbolRef::ST_Function) ? 0 : A.getValue();
uint64_t BAddr = (BType != SymbolRef::ST_Function) ? 0 : B.getValue();
@@ -371,11 +371,8 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
Symbols.push_back(Symbol);
}
- for (const SectionRef &Section : MachOObj->sections()) {
- StringRef SectName;
- Section.getName(SectName);
+ for (const SectionRef &Section : MachOObj->sections())
Sections.push_back(Section);
- }
bool BaseSegmentAddressSet = false;
for (const auto &Command : MachOObj->load_commands()) {
@@ -393,10 +390,40 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
BaseSegmentAddressSet = true;
BaseSegmentAddress = SLC.vmaddr;
}
+ } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
+ MachO::segment_command_64 SLC = MachOObj->getSegment64LoadCommand(Command);
+ StringRef SegName = SLC.segname;
+ if (!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
+ BaseSegmentAddressSet = true;
+ BaseSegmentAddress = SLC.vmaddr;
+ }
}
}
}
+static bool DumpAndSkipDataInCode(uint64_t PC, const uint8_t *bytes,
+ DiceTable &Dices, uint64_t &InstSize) {
+ // Check the data in code table here to see if this is data not an
+ // instruction to be disassembled.
+ DiceTable Dice;
+ Dice.push_back(std::make_pair(PC, DiceRef()));
+ dice_table_iterator DTI =
+ std::search(Dices.begin(), Dices.end(), Dice.begin(), Dice.end(),
+ compareDiceTableEntries);
+ if (DTI != Dices.end()) {
+ uint16_t Length;
+ DTI->second.getLength(Length);
+ uint16_t Kind;
+ DTI->second.getKind(Kind);
+ InstSize = DumpDataInCode(bytes, Length, Kind);
+ if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) &&
+ (PC == (DTI->first + Length - 1)) && (Length & 1))
+ InstSize++;
+ return true;
+ }
+ return false;
+}
+
static void printRelocationTargetName(const MachOObjectFile *O,
const MachO::any_relocation_info &RE,
raw_string_ostream &Fmt) {
@@ -419,13 +446,11 @@ static void printRelocationTargetName(const MachOObjectFile *O,
// If we couldn't find a symbol that this relocation refers to, try
// to find a section beginning instead.
for (const SectionRef &Section : ToolSectionFilter(*O)) {
- StringRef Name;
uint64_t Addr = Section.getAddress();
if (Addr != Val)
continue;
- if (std::error_code EC = Section.getName(Name))
- report_error(errorCodeToError(EC), O->getFileName());
- Fmt << Name;
+ StringRef NameOrErr = unwrapOrError(Section.getName(), O->getFileName());
+ Fmt << NameOrErr;
return;
}
@@ -458,10 +483,14 @@ static void printRelocationTargetName(const MachOObjectFile *O,
--I;
advance(SI, 1);
}
- if (SI == O->section_end())
+ if (SI == O->section_end()) {
Fmt << Val << " (?,?)";
- else
- SI->getName(S);
+ } else {
+ if (Expected<StringRef> NameOrErr = SI->getName())
+ S = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+ }
}
Fmt << S;
@@ -504,8 +533,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj,
// NOTE: Scattered relocations don't exist on x86_64.
unsigned RType = Obj->getAnyRelocationType(RENext);
if (RType != MachO::X86_64_RELOC_UNSIGNED)
- report_error(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after "
- "X86_64_RELOC_SUBTRACTOR.");
+ reportError(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after "
+ "X86_64_RELOC_SUBTRACTOR.");
// The X86_64_RELOC_UNSIGNED contains the minuend symbol;
// X86_64_RELOC_SUBTRACTOR contains the subtrahend.
@@ -553,8 +582,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj,
unsigned RType = Obj->getAnyRelocationType(RENext);
if (RType != MachO::GENERIC_RELOC_PAIR)
- report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
- "GENERIC_RELOC_SECTDIFF.");
+ reportError(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
+ "GENERIC_RELOC_SECTDIFF.");
printRelocationTargetName(Obj, RE, Fmt);
Fmt << "-";
@@ -574,8 +603,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj,
// GENERIC_RELOC_PAIR.
unsigned RType = Obj->getAnyRelocationType(RENext);
if (RType != MachO::GENERIC_RELOC_PAIR)
- report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
- "GENERIC_RELOC_LOCAL_SECTDIFF.");
+ reportError(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
+ "GENERIC_RELOC_LOCAL_SECTDIFF.");
printRelocationTargetName(Obj, RE, Fmt);
Fmt << "-";
@@ -614,8 +643,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj,
// ARM_RELOC_PAIR.
unsigned RType = Obj->getAnyRelocationType(RENext);
if (RType != MachO::ARM_RELOC_PAIR)
- report_error(Obj->getFileName(), "Expected ARM_RELOC_PAIR after "
- "ARM_RELOC_HALF");
+ reportError(Obj->getFileName(), "Expected ARM_RELOC_PAIR after "
+ "ARM_RELOC_HALF");
// NOTE: The half of the target virtual address is stashed in the
// address field of the secondary relocation, but we can't reverse
@@ -1501,7 +1530,12 @@ static void DumpLiteralPointerSection(MachOObjectFile *O,
uint64_t SectSize = Sect->getSize();
StringRef SectName;
- Sect->getName(SectName);
+ Expected<StringRef> SectNameOrErr = Sect->getName();
+ if (SectNameOrErr)
+ SectName = *SectNameOrErr;
+ else
+ consumeError(SectNameOrErr.takeError());
+
DataRefImpl Ref = Sect->getRawDataRefImpl();
StringRef SegmentName = O->getSectionFinalSegmentName(Ref);
outs() << SegmentName << ":" << SectName << ":";
@@ -1713,7 +1747,12 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
}
for (const SectionRef &Section : O->sections()) {
StringRef SectName;
- Section.getName(SectName);
+ Expected<StringRef> SecNameOrErr = Section.getName();
+ if (SecNameOrErr)
+ SectName = *SecNameOrErr;
+ else
+ consumeError(SecNameOrErr.takeError());
+
DataRefImpl Ref = Section.getRawDataRefImpl();
StringRef SegName = O->getSectionFinalSegmentName(Ref);
if ((DumpSegName.empty() || SegName == DumpSegName) &&
@@ -1809,7 +1848,12 @@ static void DumpInfoPlistSectionContents(StringRef Filename,
MachOObjectFile *O) {
for (const SectionRef &Section : O->sections()) {
StringRef SectName;
- Section.getName(SectName);
+ Expected<StringRef> SecNameOrErr = Section.getName();
+ if (SecNameOrErr)
+ SectName = *SecNameOrErr;
+ else
+ consumeError(SecNameOrErr.takeError());
+
DataRefImpl Ref = Section.getRawDataRefImpl();
StringRef SegName = O->getSectionFinalSegmentName(Ref);
if (SegName == "__TEXT" && SectName == "__info_plist") {
@@ -1901,12 +1945,16 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
// the error message.
if (Disassemble || IndirectSymbols || !FilterSections.empty() || UnwindInfo)
if (Error Err = MachOOF->checkSymbolTable())
- report_error(std::move(Err), ArchiveName, FileName, ArchitectureName);
+ reportError(std::move(Err), FileName, ArchiveName, ArchitectureName);
if (DisassembleAll) {
for (const SectionRef &Section : MachOOF->sections()) {
StringRef SectName;
- Section.getName(SectName);
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ SectName = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
if (SectName.equals("__text")) {
DataRefImpl Ref = Section.getRawDataRefImpl();
StringRef SegName = MachOOF->getSectionFinalSegmentName(Ref);
@@ -2151,7 +2199,7 @@ static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB,
outs() << " offset " << OFA.getOffset();
if (OFA.getOffset() > size)
outs() << " (past end of file)";
- if (OFA.getOffset() % (1 << OFA.getAlign()) != 0)
+ if (OFA.getOffset() % (1ull << OFA.getAlign()) != 0)
outs() << " (not aligned on it's alignment (2^" << OFA.getAlign() << ")";
outs() << "\n";
outs() << " size " << OFA.getSize();
@@ -2165,12 +2213,14 @@ static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB,
}
static void printArchiveChild(StringRef Filename, const Archive::Child &C,
- bool verbose, bool print_offset,
+ size_t ChildIndex, bool verbose,
+ bool print_offset,
StringRef ArchitectureName = StringRef()) {
if (print_offset)
outs() << C.getChildOffset() << "\t";
sys::fs::perms Mode =
- unwrapOrError(C.getAccessMode(), Filename, C, ArchitectureName);
+ unwrapOrError(C.getAccessMode(), getFileNameForError(C, ChildIndex),
+ Filename, ArchitectureName);
if (verbose) {
// FIXME: this first dash, "-", is for (Mode & S_IFMT) == S_IFREG.
// But there is nothing in sys::fs::perms for S_IFMT or S_IFREG.
@@ -2188,11 +2238,14 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C,
outs() << format("0%o ", Mode);
}
- outs() << format(
- "%3d/%-3d %5" PRId64 " ",
- unwrapOrError(C.getUID(), Filename, C, ArchitectureName),
- unwrapOrError(C.getGID(), Filename, C, ArchitectureName),
- unwrapOrError(C.getRawSize(), Filename, C, ArchitectureName));
+ outs() << format("%3d/%-3d %5" PRId64 " ",
+ unwrapOrError(C.getUID(), getFileNameForError(C, ChildIndex),
+ Filename, ArchitectureName),
+ unwrapOrError(C.getGID(), getFileNameForError(C, ChildIndex),
+ Filename, ArchitectureName),
+ unwrapOrError(C.getRawSize(),
+ getFileNameForError(C, ChildIndex), Filename,
+ ArchitectureName));
StringRef RawLastModified = C.getRawLastModified();
if (verbose) {
@@ -2215,14 +2268,17 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C,
Expected<StringRef> NameOrErr = C.getName();
if (!NameOrErr) {
consumeError(NameOrErr.takeError());
- outs() << unwrapOrError(C.getRawName(), Filename, C, ArchitectureName)
+ outs() << unwrapOrError(C.getRawName(),
+ getFileNameForError(C, ChildIndex), Filename,
+ ArchitectureName)
<< "\n";
} else {
StringRef Name = NameOrErr.get();
outs() << Name << "\n";
}
} else {
- outs() << unwrapOrError(C.getRawName(), Filename, C, ArchitectureName)
+ outs() << unwrapOrError(C.getRawName(), getFileNameForError(C, ChildIndex),
+ Filename, ArchitectureName)
<< "\n";
}
}
@@ -2231,11 +2287,13 @@ static void printArchiveHeaders(StringRef Filename, Archive *A, bool verbose,
bool print_offset,
StringRef ArchitectureName = StringRef()) {
Error Err = Error::success();
+ size_t I = 0;
for (const auto &C : A->children(Err, false))
- printArchiveChild(Filename, C, verbose, print_offset, ArchitectureName);
+ printArchiveChild(Filename, C, I++, verbose, print_offset,
+ ArchitectureName);
if (Err)
- report_error(std::move(Err), StringRef(), Filename, ArchitectureName);
+ reportError(std::move(Err), Filename, "", ArchitectureName);
}
static bool ValidateArchFlags() {
@@ -2267,7 +2325,7 @@ void parseInputMachO(StringRef Filename) {
Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Filename);
if (!BinaryOrErr) {
if (Error E = isNotObjectErrorInvalidFileType(BinaryOrErr.takeError()))
- report_error(std::move(E), Filename);
+ reportError(std::move(E), Filename);
else
outs() << Filename << ": is not an object file\n";
return;
@@ -2280,11 +2338,13 @@ void parseInputMachO(StringRef Filename) {
printArchiveHeaders(Filename, A, !NonVerbose, ArchiveMemberOffsets);
Error Err = Error::success();
+ unsigned I = -1;
for (auto &C : A->children(Err)) {
+ ++I;
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(std::move(E), Filename, C);
+ reportError(std::move(E), getFileNameForError(C, I), Filename);
continue;
}
if (MachOObjectFile *O = dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
@@ -2294,7 +2354,7 @@ void parseInputMachO(StringRef Filename) {
}
}
if (Err)
- report_error(std::move(Err), Filename);
+ reportError(std::move(Err), Filename);
return;
}
if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
@@ -2346,7 +2406,7 @@ void parseInputMachO(MachOUniversalBinary *UB) {
ProcessMachO(Filename, MachOOF, "", ArchitectureName);
} else if (Error E = isNotObjectErrorInvalidFileType(
ObjOrErr.takeError())) {
- report_error(std::move(E), Filename, StringRef(), ArchitectureName);
+ reportError(std::move(E), "", Filename, ArchitectureName);
continue;
} else if (Expected<std::unique_ptr<Archive>> AOrErr =
I->getAsArchive()) {
@@ -2359,11 +2419,15 @@ void parseInputMachO(MachOUniversalBinary *UB) {
printArchiveHeaders(Filename, A.get(), !NonVerbose,
ArchiveMemberOffsets, ArchitectureName);
Error Err = Error::success();
+ unsigned I = -1;
for (auto &C : A->children(Err)) {
+ ++I;
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
- if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(std::move(E), Filename, C, ArchitectureName);
+ if (Error E =
+ isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ reportError(std::move(E), getFileNameForError(C, I), Filename,
+ ArchitectureName);
continue;
}
if (MachOObjectFile *O =
@@ -2371,12 +2435,13 @@ void parseInputMachO(MachOUniversalBinary *UB) {
ProcessMachO(Filename, O, O->getFileName(), ArchitectureName);
}
if (Err)
- report_error(std::move(Err), Filename);
+ reportError(std::move(Err), Filename);
} else {
consumeError(AOrErr.takeError());
- error("Mach-O universal file: " + Filename + " for " +
- "architecture " + StringRef(I->getArchFlagName()) +
- " is not a Mach-O file or an archive file");
+ reportError(Filename,
+ "Mach-O universal file for architecture " +
+ StringRef(I->getArchFlagName()) +
+ " is not a Mach-O file or an archive file");
}
}
}
@@ -2406,7 +2471,7 @@ void parseInputMachO(MachOUniversalBinary *UB) {
ProcessMachO(Filename, MachOOF);
} else if (Error E =
isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
- report_error(std::move(E), Filename);
+ reportError(std::move(E), Filename);
} else if (Expected<std::unique_ptr<Archive>> AOrErr =
I->getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
@@ -2415,12 +2480,14 @@ void parseInputMachO(MachOUniversalBinary *UB) {
printArchiveHeaders(Filename, A.get(), !NonVerbose,
ArchiveMemberOffsets);
Error Err = Error::success();
+ unsigned I = -1;
for (auto &C : A->children(Err)) {
+ ++I;
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
if (Error E =
isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(std::move(E), Filename, C);
+ reportError(std::move(E), getFileNameForError(C, I), Filename);
continue;
}
if (MachOObjectFile *O =
@@ -2428,12 +2495,12 @@ void parseInputMachO(MachOUniversalBinary *UB) {
ProcessMachO(Filename, O, O->getFileName());
}
if (Err)
- report_error(std::move(Err), Filename);
+ reportError(std::move(Err), Filename);
} else {
consumeError(AOrErr.takeError());
- error("Mach-O universal file: " + Filename + " for architecture " +
- StringRef(I->getArchFlagName()) +
- " is not a Mach-O file or an archive file");
+ reportError(Filename, "Mach-O universal file for architecture " +
+ StringRef(I->getArchFlagName()) +
+ " is not a Mach-O file or an archive file");
}
return;
}
@@ -2455,7 +2522,7 @@ void parseInputMachO(MachOUniversalBinary *UB) {
ProcessMachO(Filename, MachOOF, "", ArchitectureName);
} else if (Error E =
isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
- report_error(std::move(E), StringRef(), Filename, ArchitectureName);
+ reportError(std::move(E), Filename, "", ArchitectureName);
} else if (Expected<std::unique_ptr<Archive>> AOrErr = I->getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
outs() << "Archive : " << Filename;
@@ -2466,11 +2533,14 @@ void parseInputMachO(MachOUniversalBinary *UB) {
printArchiveHeaders(Filename, A.get(), !NonVerbose,
ArchiveMemberOffsets, ArchitectureName);
Error Err = Error::success();
+ unsigned I = -1;
for (auto &C : A->children(Err)) {
+ ++I;
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(std::move(E), Filename, C, ArchitectureName);
+ reportError(std::move(E), getFileNameForError(C, I), Filename,
+ ArchitectureName);
continue;
}
if (MachOObjectFile *O =
@@ -2481,12 +2551,12 @@ void parseInputMachO(MachOUniversalBinary *UB) {
}
}
if (Err)
- report_error(std::move(Err), Filename);
+ reportError(std::move(Err), Filename);
} else {
consumeError(AOrErr.takeError());
- error("Mach-O universal file: " + Filename + " for architecture " +
- StringRef(I->getArchFlagName()) +
- " is not a Mach-O file or an archive file");
+ reportError(Filename, "Mach-O universal file for architecture " +
+ StringRef(I->getArchFlagName()) +
+ " is not a Mach-O file or an archive file");
}
}
}
@@ -3083,7 +3153,7 @@ static void method_reference(struct DisassembleInfo *info,
if (strcmp(*ReferenceName, "_objc_msgSend") == 0) {
if (info->selector_name != nullptr) {
if (info->class_name != nullptr) {
- info->method = llvm::make_unique<char[]>(
+ info->method = std::make_unique<char[]>(
5 + strlen(info->class_name) + strlen(info->selector_name));
char *method = info->method.get();
if (method != nullptr) {
@@ -3097,7 +3167,7 @@ static void method_reference(struct DisassembleInfo *info,
}
} else {
info->method =
- llvm::make_unique<char[]>(9 + strlen(info->selector_name));
+ std::make_unique<char[]>(9 + strlen(info->selector_name));
char *method = info->method.get();
if (method != nullptr) {
if (Arch == Triple::x86_64)
@@ -3117,7 +3187,7 @@ static void method_reference(struct DisassembleInfo *info,
} else if (strcmp(*ReferenceName, "_objc_msgSendSuper2") == 0) {
if (info->selector_name != nullptr) {
info->method =
- llvm::make_unique<char[]>(17 + strlen(info->selector_name));
+ std::make_unique<char[]>(17 + strlen(info->selector_name));
char *method = info->method.get();
if (method != nullptr) {
if (Arch == Triple::x86_64)
@@ -3217,7 +3287,13 @@ static const char *get_pointer_64(uint64_t Address, uint32_t &offset,
continue;
if (objc_only) {
StringRef SectName;
- ((*(info->Sections))[SectIdx]).getName(SectName);
+ Expected<StringRef> SecNameOrErr =
+ ((*(info->Sections))[SectIdx]).getName();
+ if (SecNameOrErr)
+ SectName = *SecNameOrErr;
+ else
+ consumeError(SecNameOrErr.takeError());
+
DataRefImpl Ref = ((*(info->Sections))[SectIdx]).getRawDataRefImpl();
StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
if (SegName != "__OBJC" && SectName != "__cstring")
@@ -4009,7 +4085,12 @@ static const SectionRef get_section(MachOObjectFile *O, const char *segname,
const char *sectname) {
for (const SectionRef &Section : O->sections()) {
StringRef SectName;
- Section.getName(SectName);
+ Expected<StringRef> SecNameOrErr = Section.getName();
+ if (SecNameOrErr)
+ SectName = *SecNameOrErr;
+ else
+ consumeError(SecNameOrErr.takeError());
+
DataRefImpl Ref = Section.getRawDataRefImpl();
StringRef SegName = O->getSectionFinalSegmentName(Ref);
if (SegName == segname && SectName == sectname)
@@ -4026,7 +4107,12 @@ walk_pointer_list_64(const char *listname, const SectionRef S,
return;
StringRef SectName;
- S.getName(SectName);
+ Expected<StringRef> SecNameOrErr = S.getName();
+ if (SecNameOrErr)
+ SectName = *SecNameOrErr;
+ else
+ consumeError(SecNameOrErr.takeError());
+
DataRefImpl Ref = S.getRawDataRefImpl();
StringRef SegName = O->getSectionFinalSegmentName(Ref);
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
@@ -4075,8 +4161,7 @@ walk_pointer_list_32(const char *listname, const SectionRef S,
if (S == SectionRef())
return;
- StringRef SectName;
- S.getName(SectName);
+ StringRef SectName = unwrapOrError(S.getName(), O->getFileName());
DataRefImpl Ref = S.getRawDataRefImpl();
StringRef SegName = O->getSectionFinalSegmentName(Ref);
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
@@ -5750,7 +5835,12 @@ static void print_message_refs64(SectionRef S, struct DisassembleInfo *info) {
return;
StringRef SectName;
- S.getName(SectName);
+ Expected<StringRef> SecNameOrErr = S.getName();
+ if (SecNameOrErr)
+ SectName = *SecNameOrErr;
+ else
+ consumeError(SecNameOrErr.takeError());
+
DataRefImpl Ref = S.getRawDataRefImpl();
StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
@@ -5813,7 +5903,12 @@ static void print_message_refs32(SectionRef S, struct DisassembleInfo *info) {
return;
StringRef SectName;
- S.getName(SectName);
+ Expected<StringRef> SecNameOrErr = S.getName();
+ if (SecNameOrErr)
+ SectName = *SecNameOrErr;
+ else
+ consumeError(SecNameOrErr.takeError());
+
DataRefImpl Ref = S.getRawDataRefImpl();
StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
@@ -5859,7 +5954,12 @@ static void print_image_info64(SectionRef S, struct DisassembleInfo *info) {
return;
StringRef SectName;
- S.getName(SectName);
+ Expected<StringRef> SecNameOrErr = S.getName();
+ if (SecNameOrErr)
+ SectName = *SecNameOrErr;
+ else
+ consumeError(SecNameOrErr.takeError());
+
DataRefImpl Ref = S.getRawDataRefImpl();
StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
@@ -5916,7 +6016,12 @@ static void print_image_info32(SectionRef S, struct DisassembleInfo *info) {
return;
StringRef SectName;
- S.getName(SectName);
+ Expected<StringRef> SecNameOrErr = S.getName();
+ if (SecNameOrErr)
+ SectName = *SecNameOrErr;
+ else
+ consumeError(SecNameOrErr.takeError());
+
DataRefImpl Ref = S.getRawDataRefImpl();
StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
@@ -5966,7 +6071,12 @@ static void print_image_info(SectionRef S, struct DisassembleInfo *info) {
const char *r;
StringRef SectName;
- S.getName(SectName);
+ Expected<StringRef> SecNameOrErr = S.getName();
+ if (SecNameOrErr)
+ SectName = *SecNameOrErr;
+ else
+ consumeError(SecNameOrErr.takeError());
+
DataRefImpl Ref = S.getRawDataRefImpl();
StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
@@ -6001,11 +6111,8 @@ static void printObjc2_64bit_MetaData(MachOObjectFile *O, bool verbose) {
CreateSymbolAddressMap(O, &AddrMap);
std::vector<SectionRef> Sections;
- for (const SectionRef &Section : O->sections()) {
- StringRef SectName;
- Section.getName(SectName);
+ for (const SectionRef &Section : O->sections())
Sections.push_back(Section);
- }
struct DisassembleInfo info(O, &AddrMap, &Sections, verbose);
@@ -6086,11 +6193,8 @@ static void printObjc2_32bit_MetaData(MachOObjectFile *O, bool verbose) {
CreateSymbolAddressMap(O, &AddrMap);
std::vector<SectionRef> Sections;
- for (const SectionRef &Section : O->sections()) {
- StringRef SectName;
- Section.getName(SectName);
+ for (const SectionRef &Section : O->sections())
Sections.push_back(Section);
- }
struct DisassembleInfo info(O, &AddrMap, &Sections, verbose);
@@ -6184,11 +6288,8 @@ static bool printObjc1_32bit_MetaData(MachOObjectFile *O, bool verbose) {
CreateSymbolAddressMap(O, &AddrMap);
std::vector<SectionRef> Sections;
- for (const SectionRef &Section : O->sections()) {
- StringRef SectName;
- Section.getName(SectName);
+ for (const SectionRef &Section : O->sections())
Sections.push_back(Section);
- }
struct DisassembleInfo info(O, &AddrMap, &Sections, verbose);
@@ -6345,11 +6446,8 @@ static void DumpProtocolSection(MachOObjectFile *O, const char *sect,
CreateSymbolAddressMap(O, &AddrMap);
std::vector<SectionRef> Sections;
- for (const SectionRef &Section : O->sections()) {
- StringRef SectName;
- Section.getName(SectName);
+ for (const SectionRef &Section : O->sections())
Sections.push_back(Section);
- }
struct DisassembleInfo info(O, &AddrMap, &Sections, true);
@@ -7203,7 +7301,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
std::vector<SectionRef> Sections;
std::vector<SymbolRef> Symbols;
SmallVector<uint64_t, 8> FoundFns;
- uint64_t BaseSegmentAddress;
+ uint64_t BaseSegmentAddress = 0;
getSectionsAndSymbols(MachOOF, Sections, Symbols, FoundFns,
BaseSegmentAddress);
@@ -7242,10 +7340,24 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
// A separate DSym file path was specified, parse it as a macho file,
// get the sections and supply it to the section name parsing machinery.
if (!DSYMFile.empty()) {
+ std::string DSYMPath(DSYMFile);
+
+ // If DSYMPath is a .dSYM directory, append the Mach-O file.
+ if (llvm::sys::fs::is_directory(DSYMPath) &&
+ llvm::sys::path::extension(DSYMPath) == ".dSYM") {
+ SmallString<128> ShortName(llvm::sys::path::filename(DSYMPath));
+ llvm::sys::path::replace_extension(ShortName, "");
+ SmallString<1024> FullPath(DSYMPath);
+ llvm::sys::path::append(FullPath, "Contents", "Resources", "DWARF",
+ ShortName);
+ DSYMPath = FullPath.str();
+ }
+
+ // Load the file.
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
- MemoryBuffer::getFileOrSTDIN(DSYMFile);
+ MemoryBuffer::getFileOrSTDIN(DSYMPath);
if (std::error_code EC = BufOrErr.getError()) {
- report_error(errorCodeToError(EC), DSYMFile);
+ reportError(errorCodeToError(EC), DSYMPath);
return;
}
@@ -7255,13 +7367,12 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
Expected<std::unique_ptr<Binary>> BinaryOrErr =
createBinary(DSYMBuf.get()->getMemBufferRef());
if (!BinaryOrErr) {
- report_error(BinaryOrErr.takeError(), DSYMFile);
+ reportError(BinaryOrErr.takeError(), DSYMPath);
return;
}
- // We need to keep the Binary elive with the buffer
+ // We need to keep the Binary alive with the buffer
DSYMBinary = std::move(BinaryOrErr.get());
-
if (ObjectFile *O = dyn_cast<ObjectFile>(DSYMBinary.get())) {
// this is a Mach-O object file, use it
if (MachOObjectFile *MachDSYM = dyn_cast<MachOObjectFile>(&*O)) {
@@ -7269,7 +7380,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
}
else {
WithColor::error(errs(), "llvm-objdump")
- << DSYMFile << " is not a Mach-O file type.\n";
+ << DSYMPath << " is not a Mach-O file type.\n";
return;
}
}
@@ -7289,19 +7400,19 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
Triple T = MachOObjectFile::getArchTriple(CPUType, CPUSubType, nullptr,
&ArchFlag);
Expected<std::unique_ptr<MachOObjectFile>> MachDSYM =
- UB->getObjectForArch(ArchFlag);
+ UB->getMachOObjectForArch(ArchFlag);
if (!MachDSYM) {
- report_error(MachDSYM.takeError(), DSYMFile);
+ reportError(MachDSYM.takeError(), DSYMPath);
return;
}
- // We need to keep the Binary elive with the buffer
+ // We need to keep the Binary alive with the buffer
DbgObj = &*MachDSYM.get();
DSYMBinary = std::move(*MachDSYM);
}
else {
WithColor::error(errs(), "llvm-objdump")
- << DSYMFile << " is not a Mach-O or Universal file type.\n";
+ << DSYMPath << " is not a Mach-O or Universal file type.\n";
return;
}
}
@@ -7314,8 +7425,12 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
outs() << "(" << DisSegName << "," << DisSectName << ") section\n";
for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
- StringRef SectName;
- if (Sections[SectIdx].getName(SectName) || SectName != DisSectName)
+ Expected<StringRef> SecNameOrErr = Sections[SectIdx].getName();
+ if (!SecNameOrErr) {
+ consumeError(SecNameOrErr.takeError());
+ continue;
+ }
+ if (*SecNameOrErr != DisSectName)
continue;
DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();
@@ -7496,24 +7611,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
if (!NoShowRawInsn || Arch == Triple::arm)
outs() << "\t";
- // Check the data in code table here to see if this is data not an
- // instruction to be disassembled.
- DiceTable Dice;
- Dice.push_back(std::make_pair(PC, DiceRef()));
- dice_table_iterator DTI =
- std::search(Dices.begin(), Dices.end(), Dice.begin(), Dice.end(),
- compareDiceTableEntries);
- if (DTI != Dices.end()) {
- uint16_t Length;
- DTI->second.getLength(Length);
- uint16_t Kind;
- DTI->second.getKind(Kind);
- Size = DumpDataInCode(Bytes.data() + Index, Length, Kind);
- if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) &&
- (PC == (DTI->first + Length - 1)) && (Length & 1))
- Size++;
+ if (DumpAndSkipDataInCode(PC, Bytes.data() + Index, Dices, Size))
continue;
- }
SmallVector<char, 64> AnnotationsBytes;
raw_svector_ostream Annotations(AnnotationsBytes);
@@ -7588,6 +7687,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
MCInst Inst;
uint64_t PC = SectAddress + Index;
+
+ if (DumpAndSkipDataInCode(PC, Bytes.data() + Index, Dices, InstSize))
+ continue;
+
SmallVector<char, 64> AnnotationsBytes;
raw_svector_ostream Annotations(AnnotationsBytes);
if (DisAsm->getInstruction(Inst, InstSize, Bytes.slice(Index), PC,
@@ -7724,8 +7827,12 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
auto Sym = Symbols.upper_bound(Addr);
if (Sym == Symbols.begin()) {
// The first symbol in the object is after this reference, the best we can
- // do is section-relative notation.
- RelocSection.getName(Name);
+ // do is section-relative notation.
+ if (Expected<StringRef> NameOrErr = RelocSection.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
Addend = Addr - SectionAddr;
return;
}
@@ -7744,7 +7851,11 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
// There is a symbol before this reference, but it's in a different
// section. Probably not helpful to mention it, so use the section name.
- RelocSection.getName(Name);
+ if (Expected<StringRef> NameOrErr = RelocSection.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
Addend = Addr - SectionAddr;
}
@@ -8109,7 +8220,11 @@ void printMachOUnwindInfo(const MachOObjectFile *Obj) {
for (const SectionRef &Section : Obj->sections()) {
StringRef SectName;
- Section.getName(SectName);
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ SectName = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
if (SectName == "__compact_unwind")
printMachOCompactUnwindSection(Obj, Symbols, Section);
else if (SectName == "__unwind_info")
@@ -10191,7 +10306,7 @@ void printMachOExportsTrie(const object::MachOObjectFile *Obj) {
outs() << "\n";
}
if (Err)
- report_error(std::move(Err), Obj->getFileName());
+ reportError(std::move(Err), Obj->getFileName());
}
//===----------------------------------------------------------------------===//
@@ -10212,7 +10327,7 @@ void printMachORebaseTable(object::MachOObjectFile *Obj) {
Address, Entry.typeName().str().c_str());
}
if (Err)
- report_error(std::move(Err), Obj->getFileName());
+ reportError(std::move(Err), Obj->getFileName());
}
static StringRef ordinalName(const object::MachOObjectFile *Obj, int Ordinal) {
@@ -10264,7 +10379,7 @@ void printMachOBindTable(object::MachOObjectFile *Obj) {
<< Entry.symbolName() << Attr << "\n";
}
if (Err)
- report_error(std::move(Err), Obj->getFileName());
+ reportError(std::move(Err), Obj->getFileName());
}
//===----------------------------------------------------------------------===//
@@ -10289,7 +10404,7 @@ void printMachOLazyBindTable(object::MachOObjectFile *Obj) {
<< Entry.symbolName() << "\n";
}
if (Err)
- report_error(std::move(Err), Obj->getFileName());
+ reportError(std::move(Err), Obj->getFileName());
}
//===----------------------------------------------------------------------===//
@@ -10321,7 +10436,7 @@ void printMachOWeakBindTable(object::MachOObjectFile *Obj) {
<< "\n";
}
if (Err)
- report_error(std::move(Err), Obj->getFileName());
+ reportError(std::move(Err), Obj->getFileName());
}
// get_dyld_bind_info_symbolname() is used for disassembly and passed an
@@ -10331,7 +10446,7 @@ void printMachOWeakBindTable(object::MachOObjectFile *Obj) {
static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
struct DisassembleInfo *info) {
if (info->bindtable == nullptr) {
- info->bindtable = llvm::make_unique<SymbolAddressMap>();
+ info->bindtable = std::make_unique<SymbolAddressMap>();
Error Err = Error::success();
for (const object::MachOBindEntry &Entry : info->O->bindTable(Err)) {
uint64_t Address = Entry.address();
@@ -10340,7 +10455,7 @@ static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
(*info->bindtable)[Address] = name;
}
if (Err)
- report_error(std::move(Err), info->O->getFileName());
+ reportError(std::move(Err), info->O->getFileName());
}
auto name = info->bindtable->lookup(ReferenceValue);
return !name.empty() ? name.data() : nullptr;
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 58981203c59e..34a44b3b7fa9 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -51,6 +51,7 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
@@ -341,78 +342,84 @@ static StringRef ToolName;
typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy;
-static bool shouldKeep(object::SectionRef S) {
+namespace {
+struct FilterResult {
+ // True if the section should not be skipped.
+ bool Keep;
+
+ // True if the index counter should be incremented, even if the section should
+ // be skipped. For example, sections may be skipped if they are not included
+ // in the --section flag, but we still want those to count toward the section
+ // count.
+ bool IncrementIndex;
+};
+} // namespace
+
+static FilterResult checkSectionFilter(object::SectionRef S) {
if (FilterSections.empty())
- return true;
- StringRef SecName;
- std::error_code error = S.getName(SecName);
- if (error)
- return false;
+ return {/*Keep=*/true, /*IncrementIndex=*/true};
+
+ Expected<StringRef> SecNameOrErr = S.getName();
+ if (!SecNameOrErr) {
+ consumeError(SecNameOrErr.takeError());
+ return {/*Keep=*/false, /*IncrementIndex=*/false};
+ }
+ StringRef SecName = *SecNameOrErr;
+
// StringSet does not allow empty key so avoid adding sections with
// no name (such as the section with index 0) here.
if (!SecName.empty())
FoundSectionSet.insert(SecName);
- return is_contained(FilterSections, SecName);
-}
-SectionFilter ToolSectionFilter(object::ObjectFile const &O) {
- return SectionFilter([](object::SectionRef S) { return shouldKeep(S); }, O);
-}
-
-void error(std::error_code EC) {
- if (!EC)
- return;
- WithColor::error(errs(), ToolName)
- << "reading file: " << EC.message() << ".\n";
- errs().flush();
- exit(1);
-}
-
-void error(Error E) {
- if (!E)
- return;
- WithColor::error(errs(), ToolName) << toString(std::move(E));
- exit(1);
+ // Only show the section if it's in the FilterSections list, but always
+ // increment so the indexing is stable.
+ return {/*Keep=*/is_contained(FilterSections, SecName),
+ /*IncrementIndex=*/true};
}
-LLVM_ATTRIBUTE_NORETURN void error(Twine Message) {
- WithColor::error(errs(), ToolName) << Message << ".\n";
- errs().flush();
- exit(1);
+SectionFilter ToolSectionFilter(object::ObjectFile const &O, uint64_t *Idx) {
+ // Start at UINT64_MAX so that the first index returned after an increment is
+ // zero (after the unsigned wrap).
+ if (Idx)
+ *Idx = UINT64_MAX;
+ return SectionFilter(
+ [Idx](object::SectionRef S) {
+ FilterResult Result = checkSectionFilter(S);
+ if (Idx != nullptr && Result.IncrementIndex)
+ *Idx += 1;
+ return Result.Keep;
+ },
+ O);
}
-void warn(StringRef Message) {
- WithColor::warning(errs(), ToolName) << Message << ".\n";
- errs().flush();
+std::string getFileNameForError(const object::Archive::Child &C,
+ unsigned Index) {
+ Expected<StringRef> NameOrErr = C.getName();
+ if (NameOrErr)
+ return NameOrErr.get();
+ // If we have an error getting the name then we print the index of the archive
+ // member. Since we are already in an error state, we just ignore this error.
+ consumeError(NameOrErr.takeError());
+ return "<file index: " + std::to_string(Index) + ">";
}
-static void warn(Twine Message) {
+void reportWarning(Twine Message, StringRef File) {
// Output order between errs() and outs() matters especially for archive
// files where the output is per member object.
outs().flush();
- WithColor::warning(errs(), ToolName) << Message << "\n";
+ WithColor::warning(errs(), ToolName)
+ << "'" << File << "': " << Message << "\n";
errs().flush();
}
-LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message) {
- WithColor::error(errs(), ToolName)
- << "'" << File << "': " << Message << ".\n";
- exit(1);
-}
-
-LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef File) {
- assert(E);
- std::string Buf;
- raw_string_ostream OS(Buf);
- logAllUnhandledErrors(std::move(E), OS);
- OS.flush();
- WithColor::error(errs(), ToolName) << "'" << File << "': " << Buf;
+LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Twine Message) {
+ WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n";
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName,
- StringRef FileName,
- StringRef ArchitectureName) {
+LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName,
+ StringRef ArchiveName,
+ StringRef ArchitectureName) {
assert(E);
WithColor::error(errs(), ToolName);
if (ArchiveName != "")
@@ -429,18 +436,13 @@ LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName,
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName,
- const object::Archive::Child &C,
- StringRef ArchitectureName) {
- Expected<StringRef> NameOrErr = C.getName();
- // TODO: if we have a error getting the name then it would be nice to print
- // the index of which archive member this is and or its offset in the
- // archive instead of "???" as the name.
- if (!NameOrErr) {
- consumeError(NameOrErr.takeError());
- report_error(std::move(E), ArchiveName, "???", ArchitectureName);
- } else
- report_error(std::move(E), ArchiveName, NameOrErr.get(), ArchitectureName);
+static void reportCmdLineWarning(Twine Message) {
+ WithColor::warning(errs(), ToolName) << Message << "\n";
+}
+
+LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(Twine Message) {
+ WithColor::error(errs(), ToolName) << Message << "\n";
+ exit(1);
}
static void warnOnNoMatchForSections() {
@@ -455,37 +457,29 @@ static void warnOnNoMatchForSections() {
// Warn only if no section in FilterSections is matched.
for (StringRef S : MissingSections)
- warn("section '" + S + "' mentioned in a -j/--section option, but not "
- "found in any input file");
+ reportCmdLineWarning("section '" + S +
+ "' mentioned in a -j/--section option, but not "
+ "found in any input file");
}
-static const Target *getTarget(const ObjectFile *Obj = nullptr) {
+static const Target *getTarget(const ObjectFile *Obj) {
// Figure out the target triple.
Triple TheTriple("unknown-unknown-unknown");
if (TripleName.empty()) {
- if (Obj)
- TheTriple = Obj->makeTriple();
+ TheTriple = Obj->makeTriple();
} else {
TheTriple.setTriple(Triple::normalize(TripleName));
-
- // Use the triple, but also try to combine with ARM build attributes.
- if (Obj) {
- auto Arch = Obj->getArch();
- if (Arch == Triple::arm || Arch == Triple::armeb)
- Obj->setARMSubArch(TheTriple);
- }
+ auto Arch = Obj->getArch();
+ if (Arch == Triple::arm || Arch == Triple::armeb)
+ Obj->setARMSubArch(TheTriple);
}
// Get the target specific parser.
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
Error);
- if (!TheTarget) {
- if (Obj)
- report_error(Obj->getFileName(), "can't find target: " + Error);
- else
- error("can't find target: " + Error);
- }
+ if (!TheTarget)
+ reportError(Obj->getFileName(), "can't find target: " + Error);
// Update the triple name and return the found target.
TripleName = TheTriple.getTriple();
@@ -548,17 +542,22 @@ protected:
DILineInfo OldLineInfo;
const ObjectFile *Obj = nullptr;
std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
- // File name to file contents of source
+ // File name to file contents of source.
std::unordered_map<std::string, std::unique_ptr<MemoryBuffer>> SourceCache;
- // Mark the line endings of the cached source
+ // Mark the line endings of the cached source.
std::unordered_map<std::string, std::vector<StringRef>> LineCache;
+ // Keep track of missing sources.
+ StringSet<> MissingSources;
+ // Only emit 'no debug info' warning once.
+ bool WarnedNoDebugInfo;
private:
bool cacheSource(const DILineInfo& LineInfoFile);
public:
SourcePrinter() = default;
- SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) {
+ SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch)
+ : Obj(Obj), WarnedNoDebugInfo(false) {
symbolize::LLVMSymbolizer::Options SymbolizerOpts;
SymbolizerOpts.PrintFunctions = DILineInfoSpecifier::FunctionNameKind::None;
SymbolizerOpts.Demangle = false;
@@ -568,6 +567,7 @@ public:
virtual ~SourcePrinter() = default;
virtual void printSourceLine(raw_ostream &OS,
object::SectionedAddress Address,
+ StringRef ObjectFilename,
StringRef Delimiter = "; ");
};
@@ -577,8 +577,12 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) {
Buffer = MemoryBuffer::getMemBuffer(*LineInfo.Source);
} else {
auto BufferOrError = MemoryBuffer::getFile(LineInfo.FileName);
- if (!BufferOrError)
+ if (!BufferOrError) {
+ if (MissingSources.insert(LineInfo.FileName).second)
+ reportWarning("failed to find source " + LineInfo.FileName,
+ Obj->getFileName());
return false;
+ }
Buffer = std::move(*BufferOrError);
}
// Chomp the file to get lines
@@ -599,20 +603,33 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) {
void SourcePrinter::printSourceLine(raw_ostream &OS,
object::SectionedAddress Address,
+ StringRef ObjectFilename,
StringRef Delimiter) {
if (!Symbolizer)
return;
DILineInfo LineInfo = DILineInfo();
auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address);
+ std::string ErrorMessage;
if (!ExpectedLineInfo)
- consumeError(ExpectedLineInfo.takeError());
+ ErrorMessage = toString(ExpectedLineInfo.takeError());
else
LineInfo = *ExpectedLineInfo;
- if ((LineInfo.FileName == "<invalid>") || LineInfo.Line == 0 ||
- ((OldLineInfo.Line == LineInfo.Line) &&
- (OldLineInfo.FileName == LineInfo.FileName)))
+ if (LineInfo.FileName == DILineInfo::BadString) {
+ if (!WarnedNoDebugInfo) {
+ std::string Warning =
+ "failed to parse debug information for " + ObjectFilename.str();
+ if (!ErrorMessage.empty())
+ Warning += ": " + ErrorMessage;
+ reportWarning(Warning, ObjectFilename);
+ WarnedNoDebugInfo = true;
+ }
+ return;
+ }
+
+ if (LineInfo.Line == 0 || ((OldLineInfo.Line == LineInfo.Line) &&
+ (OldLineInfo.FileName == LineInfo.FileName)))
return;
if (PrintLines)
@@ -623,8 +640,14 @@ void SourcePrinter::printSourceLine(raw_ostream &OS,
return;
auto LineBuffer = LineCache.find(LineInfo.FileName);
if (LineBuffer != LineCache.end()) {
- if (LineInfo.Line > LineBuffer->second.size())
+ if (LineInfo.Line > LineBuffer->second.size()) {
+ reportWarning(
+ formatv(
+ "debug info line number {0} exceeds the number of lines in {1}",
+ LineInfo.Line, LineInfo.FileName),
+ ObjectFilename);
return;
+ }
// Vector begins at 0, line numbers are non-zero
OS << Delimiter << LineBuffer->second[LineInfo.Line - 1] << '\n';
}
@@ -646,13 +669,14 @@ static bool hasMappingSymbols(const ObjectFile *Obj) {
return isArmElf(Obj) || isAArch64Elf(Obj);
}
-static void printRelocation(const RelocationRef &Rel, uint64_t Address,
- bool Is64Bits) {
+static void printRelocation(StringRef FileName, const RelocationRef &Rel,
+ uint64_t Address, bool Is64Bits) {
StringRef Fmt = Is64Bits ? "\t\t%016" PRIx64 ": " : "\t\t\t%08" PRIx64 ": ";
SmallString<16> Name;
SmallString<32> Val;
Rel.getTypeName(Name);
- error(getRelocationValueString(Rel, Val));
+ if (Error E = getRelocationValueString(Rel, Val))
+ reportError(std::move(E), FileName);
outs() << format(Fmt.data(), Address) << Name << "\t" << Val << "\n";
}
@@ -663,29 +687,25 @@ public:
ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI,
- SourcePrinter *SP,
+ SourcePrinter *SP, StringRef ObjectFilename,
std::vector<RelocationRef> *Rels = nullptr) {
if (SP && (PrintSource || PrintLines))
- SP->printSourceLine(OS, Address);
+ SP->printSourceLine(OS, Address, ObjectFilename);
- {
- formatted_raw_ostream FOS(OS);
- if (!NoLeadingAddr)
- FOS << format("%8" PRIx64 ":", Address.Address);
- if (!NoShowRawInsn) {
- FOS << ' ';
- dumpBytes(Bytes, FOS);
- }
- FOS.flush();
- // The output of printInst starts with a tab. Print some spaces so that
- // the tab has 1 column and advances to the target tab stop.
- unsigned TabStop = NoShowRawInsn ? 16 : 40;
- unsigned Column = FOS.getColumn();
- FOS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);
-
- // The dtor calls flush() to ensure the indent comes before printInst().
+ size_t Start = OS.tell();
+ if (!NoLeadingAddr)
+ OS << format("%8" PRIx64 ":", Address.Address);
+ if (!NoShowRawInsn) {
+ OS << ' ';
+ dumpBytes(Bytes, OS);
}
+ // The output of printInst starts with a tab. Print some spaces so that
+ // the tab has 1 column and advances to the target tab stop.
+ unsigned TabStop = NoShowRawInsn ? 16 : 40;
+ unsigned Column = OS.tell() - Start;
+ OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);
+
if (MI)
IP.printInst(MI, OS, "", STI);
else
@@ -711,9 +731,10 @@ public:
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
+ StringRef ObjectFilename,
std::vector<RelocationRef> *Rels) override {
if (SP && (PrintSource || PrintLines))
- SP->printSourceLine(OS, Address, "");
+ SP->printSourceLine(OS, Address, ObjectFilename, "");
if (!MI) {
printLead(Bytes, Address.Address, OS);
OS << " <unknown>";
@@ -739,7 +760,7 @@ public:
auto PrintReloc = [&]() -> void {
while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) {
if (RelCur->getOffset() == Address.Address) {
- printRelocation(*RelCur, Address.Address, false);
+ printRelocation(ObjectFilename, *RelCur, Address.Address, false);
return;
}
++RelCur;
@@ -750,7 +771,7 @@ public:
OS << Separator;
Separator = "\n";
if (SP && (PrintSource || PrintLines))
- SP->printSourceLine(OS, Address, "");
+ SP->printSourceLine(OS, Address, ObjectFilename, "");
printLead(Bytes, Address.Address, OS);
OS << Preamble;
Preamble = " ";
@@ -780,9 +801,10 @@ public:
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
+ StringRef ObjectFilename,
std::vector<RelocationRef> *Rels) override {
if (SP && (PrintSource || PrintLines))
- SP->printSourceLine(OS, Address);
+ SP->printSourceLine(OS, Address, ObjectFilename);
if (MI) {
SmallString<40> InstStr;
@@ -831,9 +853,10 @@ public:
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
+ StringRef ObjectFilename,
std::vector<RelocationRef> *Rels) override {
if (SP && (PrintSource || PrintLines))
- SP->printSourceLine(OS, Address);
+ SP->printSourceLine(OS, Address, ObjectFilename);
if (!NoLeadingAddr)
OS << format("%8" PRId64 ":", Address.Address / 8);
if (!NoShowRawInsn) {
@@ -924,10 +947,12 @@ static void addPltEntries(const ObjectFile *Obj,
StringSaver &Saver) {
Optional<SectionRef> Plt = None;
for (const SectionRef &Section : Obj->sections()) {
- StringRef Name;
- if (Section.getName(Name))
+ Expected<StringRef> SecNameOrErr = Section.getName();
+ if (!SecNameOrErr) {
+ consumeError(SecNameOrErr.takeError());
continue;
- if (Name == ".plt")
+ }
+ if (*SecNameOrErr == ".plt")
Plt = Section;
}
if (!Plt)
@@ -968,9 +993,18 @@ static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
static std::map<SectionRef, std::vector<RelocationRef>>
getRelocsMap(object::ObjectFile const &Obj) {
std::map<SectionRef, std::vector<RelocationRef>> Ret;
+ uint64_t I = (uint64_t)-1;
for (SectionRef Sec : Obj.sections()) {
- section_iterator Relocated = Sec.getRelocatedSection();
- if (Relocated == Obj.section_end() || !shouldKeep(*Relocated))
+ ++I;
+ Expected<section_iterator> RelocatedOrErr = Sec.getRelocatedSection();
+ if (!RelocatedOrErr)
+ reportError(Obj.getFileName(),
+ "section (" + Twine(I) +
+ "): failed to get a relocated section: " +
+ toString(RelocatedOrErr.takeError()));
+
+ section_iterator Relocated = *RelocatedOrErr;
+ if (Relocated == Obj.section_end() || !checkSectionFilter(*Relocated).Keep)
continue;
std::vector<RelocationRef> &V = Ret[*Relocated];
for (const RelocationRef &R : Sec.relocations())
@@ -1137,11 +1171,14 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) {
for (const auto &ExportEntry : COFFObj->export_directories()) {
StringRef Name;
- error(ExportEntry.getSymbolName(Name));
+ if (std::error_code EC = ExportEntry.getSymbolName(Name))
+ reportError(errorCodeToError(EC), Obj->getFileName());
if (Name.empty())
continue;
+
uint32_t RVA;
- error(ExportEntry.getExportRVA(RVA));
+ if (std::error_code EC = ExportEntry.getExportRVA(RVA))
+ reportError(errorCodeToError(EC), Obj->getFileName());
uint64_t VA = COFFObj->getImageBase() + RVA;
auto Sec = partition_point(
@@ -1210,9 +1247,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
DataRefImpl DR = Section.getRawDataRefImpl();
SegmentName = MachO->getSectionFinalSegmentName(DR);
}
- StringRef SectionName;
- error(Section.getName(SectionName));
+ StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName());
// If the section has no symbol at the start, just insert a dummy one.
if (Symbols.empty() || std::get<0>(Symbols[0]) != 0) {
Symbols.insert(
@@ -1381,10 +1417,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
if (Size == 0)
Size = 1;
- PIP.printInst(
- *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size),
- {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(),
- "", *STI, &SP, &Rels);
+ PIP.printInst(*IP, Disassembled ? &Inst : nullptr,
+ Bytes.slice(Index, Size),
+ {SectionAddr + Index + VMAAdjustment, Section.getIndex()},
+ outs(), "", *STI, &SP, Obj->getFileName(), &Rels);
outs() << CommentStream.str();
Comments.clear();
@@ -1470,7 +1506,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
Offset += AdjustVMA;
}
- printRelocation(*RelCur, SectionAddr + Offset, Is64Bits);
+ printRelocation(Obj->getFileName(), *RelCur, SectionAddr + Offset,
+ Is64Bits);
++RelCur;
}
}
@@ -1482,7 +1519,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
StringSet<> MissingDisasmFuncsSet =
set_difference(DisasmFuncsSet, FoundDisasmFuncsSet);
for (StringRef MissingDisasmFunc : MissingDisasmFuncsSet.keys())
- warn("failed to disassemble missing function " + MissingDisasmFunc);
+ reportWarning("failed to disassemble missing function " + MissingDisasmFunc,
+ FileName);
}
static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
@@ -1497,24 +1535,24 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
if (!MRI)
- report_error(Obj->getFileName(),
- "no register info for target " + TripleName);
+ reportError(Obj->getFileName(),
+ "no register info for target " + TripleName);
// Set up disassembler.
std::unique_ptr<const MCAsmInfo> AsmInfo(
TheTarget->createMCAsmInfo(*MRI, TripleName));
if (!AsmInfo)
- report_error(Obj->getFileName(),
- "no assembly info for target " + TripleName);
+ reportError(Obj->getFileName(),
+ "no assembly info for target " + TripleName);
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
if (!STI)
- report_error(Obj->getFileName(),
- "no subtarget info for target " + TripleName);
+ reportError(Obj->getFileName(),
+ "no subtarget info for target " + TripleName);
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
if (!MII)
- report_error(Obj->getFileName(),
- "no instruction info for target " + TripleName);
+ reportError(Obj->getFileName(),
+ "no instruction info for target " + TripleName);
MCObjectFileInfo MOFI;
MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
// FIXME: for now initialize MCObjectFileInfo with default values
@@ -1523,8 +1561,7 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
std::unique_ptr<MCDisassembler> DisAsm(
TheTarget->createMCDisassembler(*STI, Ctx));
if (!DisAsm)
- report_error(Obj->getFileName(),
- "no disassembler for target " + TripleName);
+ reportError(Obj->getFileName(), "no disassembler for target " + TripleName);
// If we have an ARM object file, we need a second disassembler, because
// ARM CPUs have two different instruction sets: ARM mode, and Thumb mode.
@@ -1549,8 +1586,8 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
if (!IP)
- report_error(Obj->getFileName(),
- "no instruction printer for target " + TripleName);
+ reportError(Obj->getFileName(),
+ "no instruction printer for target " + TripleName);
IP->setPrintImmHex(PrintImmHex);
PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
@@ -1558,7 +1595,8 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
for (StringRef Opt : DisassemblerOptions)
if (!IP->applyTargetSpecificCLOption(Opt))
- error("Unrecognized disassembler option: " + Opt);
+ reportError(Obj->getFileName(),
+ "Unrecognized disassembler option: " + Opt);
disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(),
MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP,
@@ -1577,16 +1615,21 @@ void printRelocations(const ObjectFile *Obj) {
// sections. Usually, there is an only one relocation section for
// each relocated section.
MapVector<SectionRef, std::vector<SectionRef>> SecToRelSec;
- for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
+ uint64_t Ndx;
+ for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) {
if (Section.relocation_begin() == Section.relocation_end())
continue;
- const SectionRef TargetSec = *Section.getRelocatedSection();
- SecToRelSec[TargetSec].push_back(Section);
+ Expected<section_iterator> SecOrErr = Section.getRelocatedSection();
+ if (!SecOrErr)
+ reportError(Obj->getFileName(),
+ "section (" + Twine(Ndx) +
+ "): unable to get a relocation target: " +
+ toString(SecOrErr.takeError()));
+ SecToRelSec[**SecOrErr].push_back(Section);
}
for (std::pair<SectionRef, std::vector<SectionRef>> &P : SecToRelSec) {
- StringRef SecName;
- error(P.first.getName(SecName));
+ StringRef SecName = unwrapOrError(P.first.getName(), Obj->getFileName());
outs() << "RELOCATION RECORDS FOR [" << SecName << "]:\n";
for (SectionRef Section : P.second) {
@@ -1597,7 +1640,9 @@ void printRelocations(const ObjectFile *Obj) {
if (Address < StartAddress || Address > StopAddress || getHidden(Reloc))
continue;
Reloc.getTypeName(RelocName);
- error(getRelocationValueString(Reloc, ValueStr));
+ if (Error E = getRelocationValueString(Reloc, ValueStr))
+ reportError(std::move(E), Obj->getFileName());
+
outs() << format(Fmt.data(), Address) << " " << RelocName << " "
<< ValueStr << "\n";
}
@@ -1613,7 +1658,7 @@ void printDynamicRelocations(const ObjectFile *Obj) {
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
if (!Elf || Elf->getEType() != ELF::ET_DYN) {
- error("not a dynamic object");
+ reportError(Obj->getFileName(), "not a dynamic object");
return;
}
@@ -1629,7 +1674,8 @@ void printDynamicRelocations(const ObjectFile *Obj) {
SmallString<32> RelocName;
SmallString<32> ValueStr;
Reloc.getTypeName(RelocName);
- error(getRelocationValueString(Reloc, ValueStr));
+ if (Error E = getRelocationValueString(Reloc, ValueStr))
+ reportError(std::move(E), Obj->getFileName());
outs() << format(Fmt.data(), Address) << " " << RelocName << " "
<< ValueStr << "\n";
}
@@ -1647,47 +1693,64 @@ static bool shouldDisplayLMA(const ObjectFile *Obj) {
return ShowLMA;
}
+static size_t getMaxSectionNameWidth(const ObjectFile *Obj) {
+ // Default column width for names is 13 even if no names are that long.
+ size_t MaxWidth = 13;
+ for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
+ StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
+ MaxWidth = std::max(MaxWidth, Name.size());
+ }
+ return MaxWidth;
+}
+
void printSectionHeaders(const ObjectFile *Obj) {
+ size_t NameWidth = getMaxSectionNameWidth(Obj);
+ size_t AddressWidth = 2 * Obj->getBytesInAddress();
bool HasLMAColumn = shouldDisplayLMA(Obj);
if (HasLMAColumn)
outs() << "Sections:\n"
- "Idx Name Size VMA LMA "
- "Type\n";
+ "Idx "
+ << left_justify("Name", NameWidth) << " Size "
+ << left_justify("VMA", AddressWidth) << " "
+ << left_justify("LMA", AddressWidth) << " Type\n";
else
outs() << "Sections:\n"
- "Idx Name Size VMA Type\n";
+ "Idx "
+ << left_justify("Name", NameWidth) << " Size "
+ << left_justify("VMA", AddressWidth) << " Type\n";
- for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
- StringRef Name;
- error(Section.getName(Name));
+ uint64_t Idx;
+ for (const SectionRef &Section : ToolSectionFilter(*Obj, &Idx)) {
+ StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
uint64_t VMA = Section.getAddress();
if (shouldAdjustVA(Section))
VMA += AdjustVMA;
uint64_t Size = Section.getSize();
- bool Text = Section.isText();
- bool Data = Section.isData();
- bool BSS = Section.isBSS();
- std::string Type = (std::string(Text ? "TEXT " : "") +
- (Data ? "DATA " : "") + (BSS ? "BSS" : ""));
+
+ std::string Type = Section.isText() ? "TEXT" : "";
+ if (Section.isData())
+ Type += Type.empty() ? "DATA" : " DATA";
+ if (Section.isBSS())
+ Type += Type.empty() ? "BSS" : " BSS";
if (HasLMAColumn)
- outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %016" PRIx64
- " %s\n",
- (unsigned)Section.getIndex(), Name.str().c_str(), Size,
- VMA, getELFSectionLMA(Section), Type.c_str());
+ outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth,
+ Name.str().c_str(), Size)
+ << format_hex_no_prefix(VMA, AddressWidth) << " "
+ << format_hex_no_prefix(getELFSectionLMA(Section), AddressWidth)
+ << " " << Type << "\n";
else
- outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %s\n",
- (unsigned)Section.getIndex(), Name.str().c_str(), Size,
- VMA, Type.c_str());
+ outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth,
+ Name.str().c_str(), Size)
+ << format_hex_no_prefix(VMA, AddressWidth) << " " << Type << "\n";
}
outs() << "\n";
}
void printSectionContents(const ObjectFile *Obj) {
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
- StringRef Name;
- error(Section.getName(Name));
+ StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
uint64_t BaseAddr = Section.getAddress();
uint64_t Size = Section.getSize();
if (!Size)
@@ -1741,21 +1804,26 @@ void printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
const StringRef FileName = O->getFileName();
for (auto I = O->symbol_begin(), E = O->symbol_end(); I != E; ++I) {
const SymbolRef &Symbol = *I;
- uint64_t Address = unwrapOrError(Symbol.getAddress(), ArchiveName, FileName,
+ uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName,
ArchitectureName);
if ((Address < StartAddress) || (Address > StopAddress))
continue;
- SymbolRef::Type Type = unwrapOrError(Symbol.getType(), ArchiveName,
- FileName, ArchitectureName);
+ SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName,
+ ArchiveName, ArchitectureName);
uint32_t Flags = Symbol.getFlags();
- section_iterator Section = unwrapOrError(Symbol.getSection(), ArchiveName,
- FileName, ArchitectureName);
+ section_iterator Section = unwrapOrError(Symbol.getSection(), FileName,
+ ArchiveName, ArchitectureName);
StringRef Name;
- if (Type == SymbolRef::ST_Debug && Section != O->section_end())
- Section->getName(Name);
- else
- Name = unwrapOrError(Symbol.getName(), ArchiveName, FileName,
+ if (Type == SymbolRef::ST_Debug && Section != O->section_end()) {
+ if (Expected<StringRef> NameOrErr = Section->getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
+ } else {
+ Name = unwrapOrError(Symbol.getName(), FileName, ArchiveName,
ArchitectureName);
+ }
bool Global = Flags & SymbolRef::SF_Global;
bool Weak = Flags & SymbolRef::SF_Weak;
@@ -1801,8 +1869,8 @@ void printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
outs() << SegmentName << ",";
}
- StringRef SectionName;
- error(Section->getName(SectionName));
+ StringRef SectionName =
+ unwrapOrError(Section->getName(), O->getFileName());
outs() << SectionName;
}
@@ -1875,7 +1943,11 @@ void printRawClangAST(const ObjectFile *Obj) {
Optional<object::SectionRef> ClangASTSection;
for (auto Sec : ToolSectionFilter(*Obj)) {
StringRef Name;
- Sec.getName(Name);
+ if (Expected<StringRef> NameOrErr = Sec.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
if (Name == ClangASTSectionName) {
ClangASTSection = Sec;
break;
@@ -1907,7 +1979,11 @@ static void printFaultMaps(const ObjectFile *Obj) {
for (auto Sec : ToolSectionFilter(*Obj)) {
StringRef Name;
- Sec.getName(Name);
+ if (Expected<StringRef> NameOrErr = Sec.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
if (Name == FaultMapSectionName) {
FaultMapSection = Sec;
break;
@@ -1946,12 +2022,12 @@ static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
printMachOLoadCommands(O);
return;
}
- report_error(O->getFileName(), "Invalid/Unsupported object file format");
+ reportError(O->getFileName(), "Invalid/Unsupported object file format");
}
static void printFileHeaders(const ObjectFile *O) {
if (!O->isELF() && !O->isCOFF())
- report_error(O->getFileName(), "Invalid/Unsupported object file format");
+ reportError(O->getFileName(), "Invalid/Unsupported object file format");
Triple::ArchType AT = O->getArch();
outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n";
@@ -2010,6 +2086,43 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
outs() << Name << "\n";
}
+// For ELF only now.
+static bool shouldWarnForInvalidStartStopAddress(ObjectFile *Obj) {
+ if (const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj)) {
+ if (Elf->getEType() != ELF::ET_REL)
+ return true;
+ }
+ return false;
+}
+
+static void checkForInvalidStartStopAddress(ObjectFile *Obj,
+ uint64_t Start, uint64_t Stop) {
+ if (!shouldWarnForInvalidStartStopAddress(Obj))
+ return;
+
+ for (const SectionRef &Section : Obj->sections())
+ if (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC) {
+ uint64_t BaseAddr = Section.getAddress();
+ uint64_t Size = Section.getSize();
+ if ((Start < BaseAddr + Size) && Stop > BaseAddr)
+ return;
+ }
+
+ if (StartAddress.getNumOccurrences() == 0)
+ reportWarning("no section has address less than 0x" +
+ Twine::utohexstr(Stop) + " specified by --stop-address",
+ Obj->getFileName());
+ else if (StopAddress.getNumOccurrences() == 0)
+ reportWarning("no section has address greater than or equal to 0x" +
+ Twine::utohexstr(Start) + " specified by --start-address",
+ Obj->getFileName());
+ else
+ reportWarning("no section overlaps the range [0x" +
+ Twine::utohexstr(Start) + ",0x" + Twine::utohexstr(Stop) +
+ ") specified by --start-address/--stop-address",
+ Obj->getFileName());
+}
+
static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
const Archive::Child *C = nullptr) {
// Avoid other output when using a raw option.
@@ -2022,27 +2135,40 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
outs() << ":\tfile format " << O->getFileFormatName() << "\n\n";
}
+ if (StartAddress.getNumOccurrences() || StopAddress.getNumOccurrences())
+ checkForInvalidStartStopAddress(O, StartAddress, StopAddress);
+
+ // Note: the order here matches GNU objdump for compatability.
StringRef ArchiveName = A ? A->getFileName() : "";
- if (FileHeaders)
- printFileHeaders(O);
if (ArchiveHeaders && !MachOOpt && C)
printArchiveChild(ArchiveName, *C);
- if (Disassemble)
- disassembleObject(O, Relocations);
+ if (FileHeaders)
+ printFileHeaders(O);
+ if (PrivateHeaders || FirstPrivateHeader)
+ printPrivateFileHeaders(O, FirstPrivateHeader);
+ if (SectionHeaders)
+ printSectionHeaders(O);
+ if (SymbolTable)
+ printSymbolTable(O, ArchiveName);
+ if (DwarfDumpType != DIDT_Null) {
+ std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O);
+ // Dump the complete DWARF structure.
+ DIDumpOptions DumpOpts;
+ DumpOpts.DumpType = DwarfDumpType;
+ DICtx->dump(outs(), DumpOpts);
+ }
if (Relocations && !Disassemble)
printRelocations(O);
if (DynamicRelocations)
printDynamicRelocations(O);
- if (SectionHeaders)
- printSectionHeaders(O);
if (SectionContents)
printSectionContents(O);
- if (SymbolTable)
- printSymbolTable(O, ArchiveName);
+ if (Disassemble)
+ disassembleObject(O, Relocations);
if (UnwindInfo)
printUnwindInfo(O);
- if (PrivateHeaders || FirstPrivateHeader)
- printPrivateFileHeaders(O, FirstPrivateHeader);
+
+ // Mach-O specific options:
if (ExportsTrie)
printExportsTrie(O);
if (Rebase)
@@ -2053,17 +2179,12 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
printLazyBindTable(O);
if (WeakBind)
printWeakBindTable(O);
+
+ // Other special sections:
if (RawClangAST)
printRawClangAST(O);
if (FaultMapSection)
printFaultMaps(O);
- if (DwarfDumpType != DIDT_Null) {
- std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O);
- // Dump the complete DWARF structure.
- DIDumpOptions DumpOpts;
- DumpOpts.DumpType = DwarfDumpType;
- DICtx->dump(outs(), DumpOpts);
- }
}
static void dumpObject(const COFFImportFile *I, const Archive *A,
@@ -2086,11 +2207,13 @@ static void dumpObject(const COFFImportFile *I, const Archive *A,
/// Dump each object file in \a a;
static void dumpArchive(const Archive *A) {
Error Err = Error::success();
+ unsigned I = -1;
for (auto &C : A->children(Err)) {
+ ++I;
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(std::move(E), A->getFileName(), C);
+ reportError(std::move(E), getFileNameForError(C, I), A->getFileName());
continue;
}
if (ObjectFile *O = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
@@ -2098,11 +2221,11 @@ static void dumpArchive(const Archive *A) {
else if (COFFImportFile *I = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
dumpObject(I, A, &C);
else
- report_error(errorCodeToError(object_error::invalid_file_type),
- A->getFileName());
+ reportError(errorCodeToError(object_error::invalid_file_type),
+ A->getFileName());
}
if (Err)
- report_error(std::move(Err), A->getFileName());
+ reportError(std::move(Err), A->getFileName());
}
/// Open file and figure out how to dump it.
@@ -2126,7 +2249,7 @@ static void dumpInput(StringRef file) {
else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Binary))
parseInputMachO(UB);
else
- report_error(errorCodeToError(object_error::invalid_file_type), file);
+ reportError(errorCodeToError(object_error::invalid_file_type), file);
}
} // namespace llvm
@@ -2147,7 +2270,7 @@ int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n");
if (StartAddress >= StopAddress)
- error("start address should be less than stop address");
+ reportCmdLineError("start address should be less than stop address");
ToolName = argv[0];
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index e58d4a05c2e6..43ce02ae0bc2 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -31,6 +31,8 @@ extern cl::opt<bool> Demangle;
typedef std::function<bool(llvm::object::SectionRef const &)> FilterPredicate;
+/// A filtered iterator for SectionRefs that skips sections based on some given
+/// predicate.
class SectionFilterIterator {
public:
SectionFilterIterator(FilterPredicate P,
@@ -60,6 +62,8 @@ private:
llvm::object::section_iterator End;
};
+/// Creates an iterator range of SectionFilterIterators for a given Object and
+/// predicate.
class SectionFilter {
public:
SectionFilter(FilterPredicate P, llvm::object::ObjectFile const &O)
@@ -79,7 +83,15 @@ private:
};
// Various helper functions.
-SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O);
+
+/// Creates a SectionFilter with a standard predicate that conditionally skips
+/// sections when the --section objdump flag is provided.
+///
+/// Idx is an optional output parameter that keeps track of which section index
+/// this is. This may be different than the actual section number, as some
+/// sections may be filtered (e.g. symbol tables).
+SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O,
+ uint64_t *Idx = nullptr);
Error getELFRelocationValueString(const object::ELFObjectFileBase *Obj,
const object::RelocationRef &Rel,
@@ -96,8 +108,6 @@ Error getMachORelocationValueString(const object::MachOObjectFile *Obj,
uint64_t getELFSectionLMA(const object::ELFSectionRef& Sec);
-void error(std::error_code ec);
-void error(Error E);
bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B);
void parseInputMachO(StringRef Filename);
void parseInputMachO(object::MachOUniversalBinary *UB);
@@ -129,24 +139,22 @@ void printSectionHeaders(const object::ObjectFile *O);
void printSectionContents(const object::ObjectFile *O);
void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName,
StringRef ArchitectureName = StringRef());
-void warn(StringRef Message);
-LLVM_ATTRIBUTE_NORETURN void error(Twine Message);
-LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message);
-LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef File);
-LLVM_ATTRIBUTE_NORETURN void
-report_error(Error E, StringRef FileName, StringRef ArchiveName,
- StringRef ArchitectureName = StringRef());
-LLVM_ATTRIBUTE_NORETURN void
-report_error(Error E, StringRef ArchiveName, const object::Archive::Child &C,
- StringRef ArchitectureName = StringRef());
+LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Twine Message);
+LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName,
+ StringRef ArchiveName = "",
+ StringRef ArchitectureName = "");
+void reportWarning(Twine Message, StringRef File);
template <typename T, typename... Ts>
T unwrapOrError(Expected<T> EO, Ts &&... Args) {
if (EO)
return std::move(*EO);
- report_error(EO.takeError(), std::forward<Ts>(Args)...);
+ reportError(EO.takeError(), std::forward<Ts>(Args)...);
}
+std::string getFileNameForError(const object::Archive::Child &C,
+ unsigned Index);
+
} // end namespace llvm
#endif
diff --git a/tools/llvm-pdbutil/BytesOutputStyle.cpp b/tools/llvm-pdbutil/BytesOutputStyle.cpp
index 162d12c120b4..ffc907e09f11 100644
--- a/tools/llvm-pdbutil/BytesOutputStyle.cpp
+++ b/tools/llvm-pdbutil/BytesOutputStyle.cpp
@@ -457,7 +457,7 @@ BytesOutputStyle::initializeTypes(uint32_t StreamIdx) {
uint32_t Count = Tpi->getNumTypeRecords();
auto Offsets = Tpi->getTypeIndexOffsets();
TypeCollection =
- llvm::make_unique<LazyRandomTypeCollection>(Types, Count, Offsets);
+ std::make_unique<LazyRandomTypeCollection>(Types, Count, Offsets);
return *TypeCollection;
}
diff --git a/tools/llvm-pdbutil/DumpOutputStyle.cpp b/tools/llvm-pdbutil/DumpOutputStyle.cpp
index 962d4cf88a8a..4d82e0fd9174 100644
--- a/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -1369,9 +1369,10 @@ Error DumpOutputStyle::dumpTypesFromObjectFile() {
LazyRandomTypeCollection Types(100);
for (const auto &S : getObj().sections()) {
- StringRef SectionName;
- if (auto EC = S.getName(SectionName))
- return errorCodeToError(EC);
+ Expected<StringRef> NameOrErr = S.getName();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ StringRef SectionName = *NameOrErr;
// .debug$T is a standard CodeView type section, while .debug$P is the same
// format but used for MSVC precompiled header object files.
@@ -1551,7 +1552,7 @@ Error DumpOutputStyle::dumpModuleSymsForObj() {
Dumper.setSymbolGroup(&Strings);
for (auto Symbol : Symbols) {
if (auto EC = Visitor.visitSymbolRecord(Symbol)) {
- SymbolError = llvm::make_unique<Error>(std::move(EC));
+ SymbolError = std::make_unique<Error>(std::move(EC));
return;
}
}
diff --git a/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/tools/llvm-pdbutil/ExplainOutputStyle.cpp
index 94faa0463981..3d2490509c03 100644
--- a/tools/llvm-pdbutil/ExplainOutputStyle.cpp
+++ b/tools/llvm-pdbutil/ExplainOutputStyle.cpp
@@ -64,7 +64,7 @@ Error ExplainOutputStyle::explainPdbFile() {
Error ExplainOutputStyle::explainBinaryFile() {
std::unique_ptr<BinaryByteStream> Stream =
- llvm::make_unique<BinaryByteStream>(File.unknown().getBuffer(),
+ std::make_unique<BinaryByteStream>(File.unknown().getBuffer(),
llvm::support::little);
switch (opts::explain::InputType) {
case opts::explain::InputFileType::DBIStream: {
diff --git a/tools/llvm-pdbutil/InputFile.cpp b/tools/llvm-pdbutil/InputFile.cpp
index bd23bfdbe31a..b316882de64d 100644
--- a/tools/llvm-pdbutil/InputFile.cpp
+++ b/tools/llvm-pdbutil/InputFile.cpp
@@ -66,12 +66,13 @@ getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
StringRef Name,
BinaryStreamReader &Reader) {
- StringRef SectionName;
- if (Section.getName(SectionName))
- return false;
-
- if (SectionName != Name)
+ if (Expected<StringRef> NameOrErr = Section.getName()) {
+ if (*NameOrErr != Name)
+ return false;
+ } else {
+ consumeError(NameOrErr.takeError());
return false;
+ }
Expected<StringRef> ContentsOrErr = Section.getContents();
if (!ContentsOrErr) {
@@ -384,7 +385,7 @@ InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
uint32_t Count = Stream.getNumTypeRecords();
auto Offsets = Stream.getTypeIndexOffsets();
Collection =
- llvm::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
+ std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
return *Collection;
}
@@ -397,11 +398,11 @@ InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
if (!isDebugTSection(Section, Records))
continue;
- Types = llvm::make_unique<LazyRandomTypeCollection>(Records, 100);
+ Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
return *Types;
}
- Types = llvm::make_unique<LazyRandomTypeCollection>(100);
+ Types = std::make_unique<LazyRandomTypeCollection>(100);
return *Types;
}
diff --git a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index e5ae47050678..ebfa50625e76 100644
--- a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -569,8 +569,9 @@ Error MinimalSymbolDumper::visitKnownRecord(
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
DefRangeFramePointerRelSym &Def) {
AutoIndent Indent(P, 7);
- P.formatLine("offset = {0}, range = {1}", Def.Offset, formatRange(Def.Range));
- P.formatLine("gaps = {2}", Def.Offset,
+ P.formatLine("offset = {0}, range = {1}", Def.Hdr.Offset,
+ formatRange(Def.Range));
+ P.formatLine("gaps = {2}", Def.Hdr.Offset,
formatGaps(P.getIndentLevel() + 9, Def.Gaps));
return Error::success();
}
diff --git a/tools/llvm-pdbutil/PrettyTypeDumper.cpp b/tools/llvm-pdbutil/PrettyTypeDumper.cpp
index e8f8e5aa62c9..2f7a39803ca5 100644
--- a/tools/llvm-pdbutil/PrettyTypeDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyTypeDumper.cpp
@@ -117,7 +117,7 @@ filterAndSortClassDefs(LinePrinter &Printer, Enumerator &E,
continue;
}
- auto Layout = llvm::make_unique<ClassLayout>(std::move(Class));
+ auto Layout = std::make_unique<ClassLayout>(std::move(Class));
if (Layout->deepPaddingSize() < opts::pretty::PaddingThreshold) {
++Discarded;
continue;
@@ -259,7 +259,7 @@ void TypeDumper::start(const PDBSymbolExe &Exe) {
continue;
}
- auto Layout = llvm::make_unique<ClassLayout>(std::move(Class));
+ auto Layout = std::make_unique<ClassLayout>(std::move(Class));
if (Layout->deepPaddingSize() < opts::pretty::PaddingThreshold)
continue;
diff --git a/tools/llvm-pdbutil/llvm-pdbutil.cpp b/tools/llvm-pdbutil/llvm-pdbutil.cpp
index 785a98086791..9307300861d4 100644
--- a/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -863,8 +863,8 @@ static void pdb2Yaml(StringRef Path) {
std::unique_ptr<IPDBSession> Session;
auto &File = loadPDB(Path, Session);
- auto O = llvm::make_unique<YAMLOutputStyle>(File);
- O = llvm::make_unique<YAMLOutputStyle>(File);
+ auto O = std::make_unique<YAMLOutputStyle>(File);
+ O = std::make_unique<YAMLOutputStyle>(File);
ExitOnErr(O->dump());
}
@@ -872,7 +872,7 @@ static void pdb2Yaml(StringRef Path) {
static void dumpRaw(StringRef Path) {
InputFile IF = ExitOnErr(InputFile::open(Path));
- auto O = llvm::make_unique<DumpOutputStyle>(IF);
+ auto O = std::make_unique<DumpOutputStyle>(IF);
ExitOnErr(O->dump());
}
@@ -880,7 +880,7 @@ static void dumpBytes(StringRef Path) {
std::unique_ptr<IPDBSession> Session;
auto &File = loadPDB(Path, Session);
- auto O = llvm::make_unique<BytesOutputStyle>(File);
+ auto O = std::make_unique<BytesOutputStyle>(File);
ExitOnErr(O->dump());
}
@@ -1347,7 +1347,7 @@ static void explain() {
ExitOnErr(InputFile::open(opts::explain::InputFilename.front(), true));
for (uint64_t Off : opts::explain::Offsets) {
- auto O = llvm::make_unique<ExplainOutputStyle>(IF, Off);
+ auto O = std::make_unique<ExplainOutputStyle>(IF, Off);
ExitOnErr(O->dump());
}
diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp
index 16d3ebe3fcbc..41e9abb82b1f 100644
--- a/tools/llvm-profdata/llvm-profdata.cpp
+++ b/tools/llvm-profdata/llvm-profdata.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -37,6 +38,7 @@ enum ProfileFormat {
PF_None = 0,
PF_Text,
PF_Compact_Binary,
+ PF_Ext_Binary,
PF_GCC,
PF_Binary
};
@@ -84,6 +86,15 @@ static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
namespace {
enum ProfileKinds { instr, sample };
+enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid };
+}
+
+static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
+ StringRef Whence = "") {
+ if (FailMode == failIfAnyAreInvalid)
+ exitWithErrorCode(EC, Whence);
+ else
+ warn(EC.message(), Whence);
}
static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
@@ -136,7 +147,7 @@ public:
if (!BufOrError)
exitWithErrorCode(BufOrError.getError(), InputFile);
- auto Remapper = llvm::make_unique<SymbolRemapper>();
+ auto Remapper = std::make_unique<SymbolRemapper>();
Remapper->File = std::move(BufOrError.get());
for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
@@ -173,33 +184,16 @@ typedef SmallVector<WeightedFile, 5> WeightedFileVector;
struct WriterContext {
std::mutex Lock;
InstrProfWriter Writer;
- Error Err;
- std::string ErrWhence;
+ std::vector<std::pair<Error, std::string>> Errors;
std::mutex &ErrLock;
SmallSet<instrprof_error, 4> &WriterErrorCodes;
WriterContext(bool IsSparse, std::mutex &ErrLock,
SmallSet<instrprof_error, 4> &WriterErrorCodes)
- : Lock(), Writer(IsSparse), Err(Error::success()), ErrWhence(""),
- ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
+ : Lock(), Writer(IsSparse), Errors(), ErrLock(ErrLock),
+ WriterErrorCodes(WriterErrorCodes) {}
};
-/// Determine whether an error is fatal for profile merging.
-static bool isFatalError(instrprof_error IPE) {
- switch (IPE) {
- default:
- return true;
- case instrprof_error::success:
- case instrprof_error::eof:
- case instrprof_error::unknown_function:
- case instrprof_error::hash_mismatch:
- case instrprof_error::count_mismatch:
- case instrprof_error::counter_overflow:
- case instrprof_error::value_site_count_mismatch:
- return false;
- }
-}
-
/// Computer the overlap b/w profile BaseFilename and TestFileName,
/// and store the program level result to Overlap.
static void overlapInput(const std::string &BaseFilename,
@@ -212,7 +206,7 @@ static void overlapInput(const std::string &BaseFilename,
// Skip the empty profiles by returning sliently.
instrprof_error IPE = InstrProfError::take(std::move(E));
if (IPE != instrprof_error::empty_raw_profile)
- WC->Err = make_error<InstrProfError>(IPE);
+ WC->Errors.emplace_back(make_error<InstrProfError>(IPE), TestFilename);
return;
}
@@ -231,21 +225,17 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
WriterContext *WC) {
std::unique_lock<std::mutex> CtxGuard{WC->Lock};
- // If there's a pending hard error, don't do more work.
- if (WC->Err)
- return;
-
// Copy the filename, because llvm::ThreadPool copied the input "const
// WeightedFile &" by value, making a reference to the filename within it
// invalid outside of this packaged task.
- WC->ErrWhence = Input.Filename;
+ std::string Filename = Input.Filename;
auto ReaderOrErr = InstrProfReader::create(Input.Filename);
if (Error E = ReaderOrErr.takeError()) {
// Skip the empty profiles by returning sliently.
instrprof_error IPE = InstrProfError::take(std::move(E));
if (IPE != instrprof_error::empty_raw_profile)
- WC->Err = make_error<InstrProfError>(IPE);
+ WC->Errors.emplace_back(make_error<InstrProfError>(IPE), Filename);
return;
}
@@ -253,9 +243,11 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
bool IsIRProfile = Reader->isIRLevelProfile();
bool HasCSIRProfile = Reader->hasCSIRLevelProfile();
if (WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) {
- WC->Err = make_error<StringError>(
- "Merge IR generated profile with Clang generated profile.",
- std::error_code());
+ WC->Errors.emplace_back(
+ make_error<StringError>(
+ "Merge IR generated profile with Clang generated profile.",
+ std::error_code()),
+ Filename);
return;
}
@@ -278,30 +270,23 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
FuncName, firstTime);
});
}
- if (Reader->hasError()) {
- if (Error E = Reader->getError()) {
- instrprof_error IPE = InstrProfError::take(std::move(E));
- if (isFatalError(IPE))
- WC->Err = make_error<InstrProfError>(IPE);
- }
- }
+ if (Reader->hasError())
+ if (Error E = Reader->getError())
+ WC->Errors.emplace_back(std::move(E), Filename);
}
/// Merge the \p Src writer context into \p Dst.
static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
- // If we've already seen a hard error, continuing with the merge would
- // clobber it.
- if (Dst->Err || Src->Err)
- return;
+ for (auto &ErrorPair : Src->Errors)
+ Dst->Errors.push_back(std::move(ErrorPair));
+ Src->Errors.clear();
- bool Reported = false;
Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
- if (Reported) {
- consumeError(std::move(E));
- return;
- }
- Reported = true;
- Dst->Err = std::move(E);
+ instrprof_error IPE = InstrProfError::take(std::move(E));
+ std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
+ bool firstTime = Dst->WriterErrorCodes.insert(IPE).second;
+ if (firstTime)
+ warn(toString(make_error<InstrProfError>(IPE)));
});
}
@@ -309,12 +294,12 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
SymbolRemapper *Remapper,
StringRef OutputFilename,
ProfileFormat OutputFormat, bool OutputSparse,
- unsigned NumThreads) {
+ unsigned NumThreads, FailureMode FailMode) {
if (OutputFilename.compare("-") == 0)
exitWithError("Cannot write indexed profdata format to stdout.");
if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary &&
- OutputFormat != PF_Text)
+ OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text)
exitWithError("Unknown format is specified.");
std::mutex ErrorLock;
@@ -328,7 +313,7 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
// Initialize the writer contexts.
SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
for (unsigned I = 0; I < NumThreads; ++I)
- Contexts.emplace_back(llvm::make_unique<WriterContext>(
+ Contexts.emplace_back(std::make_unique<WriterContext>(
OutputSparse, ErrorLock, WriterErrorCodes));
if (NumThreads == 1) {
@@ -364,23 +349,21 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
} while (Mid > 0);
}
- // Handle deferred hard errors encountered during merging.
+ // Handle deferred errors encountered during merging. If the number of errors
+ // is equal to the number of inputs the merge failed.
+ unsigned NumErrors = 0;
for (std::unique_ptr<WriterContext> &WC : Contexts) {
- if (!WC->Err)
- continue;
- if (!WC->Err.isA<InstrProfError>())
- exitWithError(std::move(WC->Err), WC->ErrWhence);
-
- instrprof_error IPE = InstrProfError::take(std::move(WC->Err));
- if (isFatalError(IPE))
- exitWithError(make_error<InstrProfError>(IPE), WC->ErrWhence);
- else
- warn(toString(make_error<InstrProfError>(IPE)),
- WC->ErrWhence);
+ for (auto &ErrorPair : WC->Errors) {
+ ++NumErrors;
+ warn(toString(std::move(ErrorPair.first)), ErrorPair.second);
+ }
}
+ if (NumErrors == Inputs.size() ||
+ (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
+ exitWithError("No profiles could be merged.");
std::error_code EC;
- raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None);
+ raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::OF_None);
if (EC)
exitWithErrorCode(EC, OutputFilename);
@@ -425,21 +408,78 @@ remapSamples(const sampleprof::FunctionSamples &Samples,
}
static sampleprof::SampleProfileFormat FormatMap[] = {
- sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_Compact_Binary,
- sampleprof::SPF_GCC, sampleprof::SPF_Binary};
+ sampleprof::SPF_None,
+ sampleprof::SPF_Text,
+ sampleprof::SPF_Compact_Binary,
+ sampleprof::SPF_Ext_Binary,
+ sampleprof::SPF_GCC,
+ sampleprof::SPF_Binary};
+
+static std::unique_ptr<MemoryBuffer>
+getInputFileBuf(const StringRef &InputFile) {
+ if (InputFile == "")
+ return {};
+
+ auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
+ if (!BufOrError)
+ exitWithErrorCode(BufOrError.getError(), InputFile);
+
+ return std::move(*BufOrError);
+}
+
+static void populateProfileSymbolList(MemoryBuffer *Buffer,
+ sampleprof::ProfileSymbolList &PSL) {
+ if (!Buffer)
+ return;
+
+ SmallVector<StringRef, 32> SymbolVec;
+ StringRef Data = Buffer->getBuffer();
+ Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+
+ for (StringRef symbol : SymbolVec)
+ PSL.add(symbol);
+}
+
+static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
+ ProfileFormat OutputFormat,
+ MemoryBuffer *Buffer,
+ sampleprof::ProfileSymbolList &WriterList,
+ bool CompressAllSections) {
+ populateProfileSymbolList(Buffer, WriterList);
+ if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
+ warn("Profile Symbol list is not empty but the output format is not "
+ "ExtBinary format. The list will be lost in the output. ");
+
+ Writer.setProfileSymbolList(&WriterList);
+
+ if (CompressAllSections) {
+ if (OutputFormat != PF_Ext_Binary) {
+ warn("-compress-all-section is ignored. Specify -extbinary to enable it");
+ } else {
+ auto ExtBinaryWriter =
+ static_cast<sampleprof::SampleProfileWriterExtBinary *>(&Writer);
+ ExtBinaryWriter->setToCompressAllSections();
+ }
+ }
+}
static void mergeSampleProfile(const WeightedFileVector &Inputs,
SymbolRemapper *Remapper,
StringRef OutputFilename,
- ProfileFormat OutputFormat) {
+ ProfileFormat OutputFormat,
+ StringRef ProfileSymbolListFile,
+ bool CompressAllSections, FailureMode FailMode) {
using namespace sampleprof;
StringMap<FunctionSamples> ProfileMap;
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
LLVMContext Context;
+ sampleprof::ProfileSymbolList WriterList;
for (const auto &Input : Inputs) {
auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context);
- if (std::error_code EC = ReaderOrErr.getError())
- exitWithErrorCode(EC, Input.Filename);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ warnOrExitGivenError(FailMode, EC, Input.Filename);
+ continue;
+ }
// We need to keep the readers around until after all the files are
// read so that we do not lose the function names stored in each
@@ -447,8 +487,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
// merged profile map.
Readers.push_back(std::move(ReaderOrErr.get()));
const auto Reader = Readers.back().get();
- if (std::error_code EC = Reader->read())
- exitWithErrorCode(EC, Input.Filename);
+ if (std::error_code EC = Reader->read()) {
+ warnOrExitGivenError(FailMode, EC, Input.Filename);
+ Readers.pop_back();
+ continue;
+ }
StringMap<FunctionSamples> &Profiles = Reader->getProfiles();
for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
@@ -466,6 +509,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
handleMergeWriterError(errorCodeToError(EC), Input.Filename, FName);
}
}
+
+ std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
+ Reader->getProfileSymbolList();
+ if (ReaderList)
+ WriterList.merge(*ReaderList);
}
auto WriterOrErr =
SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
@@ -473,6 +521,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
exitWithErrorCode(EC, OutputFilename);
auto Writer = std::move(WriterOrErr.get());
+ // WriterList will have StringRef refering to string in Buffer.
+ // Make sure Buffer lives as long as WriterList.
+ auto Buffer = getInputFileBuf(ProfileSymbolListFile);
+ handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
+ CompressAllSections);
Writer->write(ProfileMap);
}
@@ -487,18 +540,6 @@ static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
return {FileName, Weight};
}
-static std::unique_ptr<MemoryBuffer>
-getInputFilenamesFileBuf(const StringRef &InputFilenamesFile) {
- if (InputFilenamesFile == "")
- return {};
-
- auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile);
- if (!BufOrError)
- exitWithErrorCode(BufOrError.getError(), InputFilenamesFile);
-
- return std::move(*BufOrError);
-}
-
static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
StringRef Filename = WF.Filename;
uint64_t Weight = WF.Weight;
@@ -583,12 +624,20 @@ static int merge_main(int argc, const char *argv[]) {
clEnumVal(sample, "Sample profile")));
cl::opt<ProfileFormat> OutputFormat(
cl::desc("Format of output profile"), cl::init(PF_Binary),
- cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding (default)"),
- clEnumValN(PF_Compact_Binary, "compbinary",
- "Compact binary encoding"),
- clEnumValN(PF_Text, "text", "Text encoding"),
- clEnumValN(PF_GCC, "gcc",
- "GCC encoding (only meaningful for -sample)")));
+ cl::values(
+ clEnumValN(PF_Binary, "binary", "Binary encoding (default)"),
+ clEnumValN(PF_Compact_Binary, "compbinary",
+ "Compact binary encoding"),
+ clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding"),
+ clEnumValN(PF_Text, "text", "Text encoding"),
+ clEnumValN(PF_GCC, "gcc",
+ "GCC encoding (only meaningful for -sample)")));
+ cl::opt<FailureMode> FailureMode(
+ "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"),
+ cl::values(clEnumValN(failIfAnyAreInvalid, "any",
+ "Fail if any profile is invalid."),
+ clEnumValN(failIfAllAreInvalid, "all",
+ "Fail only if all profiles are invalid.")));
cl::opt<bool> OutputSparse("sparse", cl::init(false),
cl::desc("Generate a sparse profile (only meaningful for -instr)"));
cl::opt<unsigned> NumThreads(
@@ -596,6 +645,14 @@ static int merge_main(int argc, const char *argv[]) {
cl::desc("Number of merge threads to use (default: autodetect)"));
cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
cl::aliasopt(NumThreads));
+ cl::opt<std::string> ProfileSymbolListFile(
+ "prof-sym-list", cl::init(""),
+ cl::desc("Path to file containing the list of function symbols "
+ "used to populate profile symbol list"));
+ cl::opt<bool> CompressAllSections(
+ "compress-all-sections", cl::init(false), cl::Hidden,
+ cl::desc("Compress all sections when writing the profile (only "
+ "meaningful for -extbinary)"));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
@@ -607,7 +664,7 @@ static int merge_main(int argc, const char *argv[]) {
// Make sure that the file buffer stays alive for the duration of the
// weighted input vector's lifetime.
- auto Buffer = getInputFilenamesFileBuf(InputFilenamesFile);
+ auto Buffer = getInputFileBuf(InputFilenamesFile);
parseInputFilenamesFile(Buffer.get(), WeightedInputs);
if (WeightedInputs.empty())
@@ -626,10 +683,11 @@ static int merge_main(int argc, const char *argv[]) {
if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename,
- OutputFormat, OutputSparse, NumThreads);
+ OutputFormat, OutputSparse, NumThreads, FailureMode);
else
mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
- OutputFormat);
+ OutputFormat, ProfileSymbolListFile, CompressAllSections,
+ FailureMode);
return 0;
}
@@ -644,7 +702,7 @@ static void overlapInstrProfile(const std::string &BaseFilename,
WriterContext Context(false, ErrorLock, WriterErrorCodes);
WeightedFile WeightedInput{BaseFilename, 1};
OverlapStats Overlap;
- Error E = Overlap.accumuateCounts(BaseFilename, TestFilename, IsCS);
+ Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
if (E)
exitWithError(std::move(E), "Error in getting profile count sums");
if (Overlap.Base.CountSum < 1.0f) {
@@ -682,7 +740,7 @@ static int overlap_main(int argc, const char *argv[]) {
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n");
std::error_code EC;
- raw_fd_ostream OS(Output.data(), EC, sys::fs::F_Text);
+ raw_fd_ostream OS(Output.data(), EC, sys::fs::OF_Text);
if (EC)
exitWithErrorCode(EC, Output);
@@ -944,10 +1002,21 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
return 0;
}
+static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
+ raw_fd_ostream &OS) {
+ if (!Reader->dumpSectionInfo(OS)) {
+ WithColor::warning() << "-show-sec-info-only is only supported for "
+ << "sample profile in extbinary format and is "
+ << "ignored for other formats.\n";
+ return;
+ }
+}
+
static int showSampleProfile(const std::string &Filename, bool ShowCounts,
bool ShowAllFunctions,
const std::string &ShowFunction,
- raw_fd_ostream &OS) {
+ bool ShowProfileSymbolList,
+ bool ShowSectionInfoOnly, raw_fd_ostream &OS) {
using namespace sampleprof;
LLVMContext Context;
auto ReaderOrErr = SampleProfileReader::create(Filename, Context);
@@ -955,6 +1024,12 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
exitWithErrorCode(EC, Filename);
auto Reader = std::move(ReaderOrErr.get());
+
+ if (ShowSectionInfoOnly) {
+ showSectionInfo(Reader.get(), OS);
+ return 0;
+ }
+
if (std::error_code EC = Reader->read())
exitWithErrorCode(EC, Filename);
@@ -963,6 +1038,12 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
else
Reader->dumpFunctionProfile(ShowFunction, OS);
+ if (ShowProfileSymbolList) {
+ std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
+ Reader->getProfileSymbolList();
+ ReaderList->dump(OS);
+ }
+
return 0;
}
@@ -1015,6 +1096,15 @@ static int show_main(int argc, const char *argv[]) {
"list-below-cutoff", cl::init(false),
cl::desc("Only output names of functions whose max count values are "
"below the cutoff value"));
+ cl::opt<bool> ShowProfileSymbolList(
+ "show-prof-sym-list", cl::init(false),
+ cl::desc("Show profile symbol list if it exists in the profile. "));
+ cl::opt<bool> ShowSectionInfoOnly(
+ "show-sec-info-only", cl::init(false),
+ cl::desc("Show the information of each section in the sample profile. "
+ "The flag is only usable when the sample profile is in "
+ "extbinary format"));
+
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
if (OutputFilename.empty())
@@ -1027,7 +1117,7 @@ static int show_main(int argc, const char *argv[]) {
}
std::error_code EC;
- raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text);
+ raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_Text);
if (EC)
exitWithErrorCode(EC, OutputFilename);
@@ -1042,7 +1132,8 @@ static int show_main(int argc, const char *argv[]) {
OnlyListBelow, ShowFunction, TextFormat, OS);
else
return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
- ShowFunction, OS);
+ ShowFunction, ShowProfileSymbolList,
+ ShowSectionInfoOnly, OS);
}
int main(int argc, const char *argv[]) {
diff --git a/tools/llvm-readobj/ARMEHABIPrinter.h b/tools/llvm-readobj/ARMEHABIPrinter.h
index 11f9d6166a59..2c0912038c31 100644
--- a/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -329,6 +329,7 @@ class PrinterContext {
ScopedPrinter &SW;
const object::ELFFile<ET> *ELF;
+ StringRef FileName;
const Elf_Shdr *Symtab;
ArrayRef<Elf_Word> ShndxTable;
@@ -352,8 +353,8 @@ class PrinterContext {
public:
PrinterContext(ScopedPrinter &SW, const object::ELFFile<ET> *ELF,
- const Elf_Shdr *Symtab)
- : SW(SW), ELF(ELF), Symtab(Symtab) {}
+ StringRef FileName, const Elf_Shdr *Symtab)
+ : SW(SW), ELF(ELF), FileName(FileName), Symtab(Symtab) {}
void PrintUnwindInformation() const;
};
@@ -369,10 +370,10 @@ PrinterContext<ET>::FunctionAtAddress(unsigned Section,
return readobj_error::unknown_symbol;
auto StrTableOrErr = ELF->getStringTableForSymtab(*Symtab);
if (!StrTableOrErr)
- error(StrTableOrErr.takeError());
+ reportError(StrTableOrErr.takeError(), FileName);
StringRef StrTable = *StrTableOrErr;
- for (const Elf_Sym &Sym : unwrapOrError(ELF->symbols(Symtab)))
+ for (const Elf_Sym &Sym : unwrapOrError(FileName, ELF->symbols(Symtab)))
if (Sym.st_shndx == Section && Sym.st_value == Address &&
Sym.getType() == ELF::STT_FUNC) {
auto NameOrErr = Sym.getName(StrTable);
@@ -398,16 +399,16 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
/// handling table. Use this symbol to recover the actual exception handling
/// table.
- for (const Elf_Shdr &Sec : unwrapOrError(ELF->sections())) {
+ for (const Elf_Shdr &Sec : unwrapOrError(FileName, ELF->sections())) {
if (Sec.sh_type != ELF::SHT_REL || Sec.sh_info != IndexSectionIndex)
continue;
auto SymTabOrErr = ELF->getSection(Sec.sh_link);
if (!SymTabOrErr)
- error(SymTabOrErr.takeError());
+ reportError(SymTabOrErr.takeError(), FileName);
const Elf_Shdr *SymTab = *SymTabOrErr;
- for (const Elf_Rel &R : unwrapOrError(ELF->rels(&Sec))) {
+ for (const Elf_Rel &R : unwrapOrError(FileName, ELF->rels(&Sec))) {
if (R.r_offset != static_cast<unsigned>(IndexTableOffset))
continue;
@@ -417,7 +418,7 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
RelA.r_addend = 0;
const Elf_Sym *Symbol =
- unwrapOrError(ELF->getRelocationSymbol(&RelA, SymTab));
+ unwrapOrError(FileName, ELF->getRelocationSymbol(&RelA, SymTab));
auto Ret = ELF->getSection(Symbol, SymTab, ShndxTable);
if (!Ret)
@@ -570,7 +571,7 @@ void PrinterContext<ET>::PrintUnwindInformation() const {
DictScope UI(SW, "UnwindInformation");
int SectionIndex = 0;
- for (const Elf_Shdr &Sec : unwrapOrError(ELF->sections())) {
+ for (const Elf_Shdr &Sec : unwrapOrError(FileName, ELF->sections())) {
if (Sec.sh_type == ELF::SHT_ARM_EXIDX) {
DictScope UIT(SW, "UnwindIndexTable");
diff --git a/tools/llvm-readobj/ARMWinEHPrinter.cpp b/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 4de14e2e78d5..3e026f58871b 100644
--- a/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -842,8 +842,10 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
if ((int64_t)(Contents.size() - Offset - 4 * HeaderWords(XData) -
(XData.E() ? 0 : XData.EpilogueCount() * 4) -
- (XData.X() ? 8 : 0)) < (int64_t)ByteCodeLength)
+ (XData.X() ? 8 : 0)) < (int64_t)ByteCodeLength) {
+ SW.flush();
report_fatal_error("Malformed unwind data");
+ }
if (XData.E()) {
ArrayRef<uint8_t> UC = XData.UnwindByteCode();
@@ -1039,10 +1041,7 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
}
FunctionAddress = *FunctionAddressOrErr;
} else {
- const pe32_header *PEHeader;
- if (COFF.getPE32Header(PEHeader))
- return false;
- FunctionAddress = PEHeader->ImageBase + RF.BeginAddress;
+ FunctionAddress = COFF.getPE32Header()->ImageBase + RF.BeginAddress;
}
SW.printString("Function", formatSymbol(FunctionName, FunctionAddress));
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index 4c2e39dfa3cc..9b2c6adb9d93 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -60,6 +60,10 @@ using namespace llvm::codeview;
using namespace llvm::support;
using namespace llvm::Win64EH;
+static inline Error createError(const Twine &Err) {
+ return make_error<StringError>(Err, object_error::parse_failed);
+}
+
namespace {
struct LoadConfigTables {
@@ -167,9 +171,6 @@ private:
void printDelayImportedSymbols(
const DelayImportDirectoryEntryRef &I,
iterator_range<imported_symbol_iterator> Range);
- ErrorOr<const coff_resource_dir_entry &>
- getResourceDirectoryTableEntry(const coff_resource_dir_table &Table,
- uint32_t Index);
typedef DenseMap<const coff_section*, std::vector<RelocationRef> > RelocMapTy;
@@ -627,14 +628,10 @@ void COFFDumper::printFileHeaders() {
// Print PE header. This header does not exist if this is an object file and
// not an executable.
- const pe32_header *PEHeader = nullptr;
- error(Obj->getPE32Header(PEHeader));
- if (PEHeader)
+ if (const pe32_header *PEHeader = Obj->getPE32Header())
printPEHeader<pe32_header>(PEHeader);
- const pe32plus_header *PEPlusHeader = nullptr;
- error(Obj->getPE32PlusHeader(PEPlusHeader));
- if (PEPlusHeader)
+ if (const pe32plus_header *PEPlusHeader = Obj->getPE32PlusHeader())
printPEHeader<pe32plus_header>(PEPlusHeader);
if (const dos_header *DH = Obj->getDOSHeader())
@@ -728,7 +725,9 @@ void COFFDumper::printCOFFDebugDirectory() {
if (D.Type == COFF::IMAGE_DEBUG_TYPE_CODEVIEW) {
const codeview::DebugInfo *DebugInfo;
StringRef PDBFileName;
- error(Obj->getDebugPDBInfo(&D, DebugInfo, PDBFileName));
+ if (std::error_code EC = Obj->getDebugPDBInfo(&D, DebugInfo, PDBFileName))
+ reportError(errorCodeToError(EC), Obj->getFileName());
+
DictScope PDBScope(W, "PDBInfo");
W.printHex("PDBSignature", DebugInfo->Signature.CVSignature);
if (DebugInfo->Signature.CVSignature == OMF::Signature::PDB70) {
@@ -740,8 +739,9 @@ void COFFDumper::printCOFFDebugDirectory() {
// FIXME: Type values of 12 and 13 are commonly observed but are not in
// the documented type enum. Figure out what they mean.
ArrayRef<uint8_t> RawData;
- error(
- Obj->getRvaAndSizeAsBytes(D.AddressOfRawData, D.SizeOfData, RawData));
+ if (std::error_code EC = Obj->getRvaAndSizeAsBytes(D.AddressOfRawData,
+ D.SizeOfData, RawData))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printBinaryBlock("RawData", RawData);
}
}
@@ -750,8 +750,11 @@ void COFFDumper::printCOFFDebugDirectory() {
void COFFDumper::printRVATable(uint64_t TableVA, uint64_t Count,
uint64_t EntrySize, PrintExtraCB PrintExtra) {
uintptr_t TableStart, TableEnd;
- error(Obj->getVaPtr(TableVA, TableStart));
- error(Obj->getVaPtr(TableVA + Count * EntrySize - 1, TableEnd));
+ if (std::error_code EC = Obj->getVaPtr(TableVA, TableStart))
+ reportError(errorCodeToError(EC), Obj->getFileName());
+ if (std::error_code EC =
+ Obj->getVaPtr(TableVA + Count * EntrySize - 1, TableEnd))
+ reportError(errorCodeToError(EC), Obj->getFileName());
TableEnd++;
for (uintptr_t I = TableStart; I < TableEnd; I += EntrySize) {
uint32_t RVA = *reinterpret_cast<const ulittle32_t *>(I);
@@ -887,16 +890,14 @@ void COFFDumper::printBaseOfDataField(const pe32plus_header *) {}
void COFFDumper::printCodeViewDebugInfo() {
// Print types first to build CVUDTNames, then print symbols.
for (const SectionRef &S : Obj->sections()) {
- StringRef SectionName;
- error(S.getName(SectionName));
+ StringRef SectionName = unwrapOrError(Obj->getFileName(), S.getName());
// .debug$T is a standard CodeView type section, while .debug$P is the same
// format but used for MSVC precompiled header object files.
if (SectionName == ".debug$T" || SectionName == ".debug$P")
printCodeViewTypeSection(SectionName, S);
}
for (const SectionRef &S : Obj->sections()) {
- StringRef SectionName;
- error(S.getName(SectionName));
+ StringRef SectionName = unwrapOrError(Obj->getFileName(), S.getName());
if (SectionName == ".debug$S")
printCodeViewSymbolSection(SectionName, S);
}
@@ -908,32 +909,40 @@ void COFFDumper::initializeFileAndStringTables(BinaryStreamReader &Reader) {
// The section consists of a number of subsection in the following format:
// |SubSectionType|SubSectionSize|Contents...|
uint32_t SubType, SubSectionSize;
- error(Reader.readInteger(SubType));
- error(Reader.readInteger(SubSectionSize));
+
+ if (Error E = Reader.readInteger(SubType))
+ reportError(std::move(E), Obj->getFileName());
+ if (Error E = Reader.readInteger(SubSectionSize))
+ reportError(std::move(E), Obj->getFileName());
StringRef Contents;
- error(Reader.readFixedString(Contents, SubSectionSize));
+ if (Error E = Reader.readFixedString(Contents, SubSectionSize))
+ reportError(std::move(E), Obj->getFileName());
BinaryStreamRef ST(Contents, support::little);
switch (DebugSubsectionKind(SubType)) {
case DebugSubsectionKind::FileChecksums:
- error(CVFileChecksumTable.initialize(ST));
+ if (Error E = CVFileChecksumTable.initialize(ST))
+ reportError(std::move(E), Obj->getFileName());
break;
case DebugSubsectionKind::StringTable:
- error(CVStringTable.initialize(ST));
+ if (Error E = CVStringTable.initialize(ST))
+ reportError(std::move(E), Obj->getFileName());
break;
default:
break;
}
uint32_t PaddedSize = alignTo(SubSectionSize, 4);
- error(Reader.skip(PaddedSize - SubSectionSize));
+ if (Error E = Reader.skip(PaddedSize - SubSectionSize))
+ reportError(std::move(E), Obj->getFileName());
}
}
void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
const SectionRef &Section) {
- StringRef SectionContents = unwrapOrError(Section.getContents());
+ StringRef SectionContents =
+ unwrapOrError(Obj->getFileName(), Section.getContents());
StringRef Data = SectionContents;
SmallVector<StringRef, 10> FunctionNames;
@@ -944,10 +953,13 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
W.printNumber("Section", SectionName, Obj->getSectionID(Section));
uint32_t Magic;
- error(consume(Data, Magic));
+ if (Error E = consume(Data, Magic))
+ reportError(std::move(E), Obj->getFileName());
+
W.printHex("Magic", Magic);
if (Magic != COFF::DEBUG_SECTION_MAGIC)
- return error(object_error::parse_failed);
+ reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
BinaryStreamReader FSReader(Data, support::little);
initializeFileAndStringTables(FSReader);
@@ -957,8 +969,10 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
// The section consists of a number of subsection in the following format:
// |SubSectionType|SubSectionSize|Contents...|
uint32_t SubType, SubSectionSize;
- error(consume(Data, SubType));
- error(consume(Data, SubSectionSize));
+ if (Error E = consume(Data, SubType))
+ reportError(std::move(E), Obj->getFileName());
+ if (Error E = consume(Data, SubSectionSize))
+ reportError(std::move(E), Obj->getFileName());
ListScope S(W, "Subsection");
// Dump the subsection as normal even if the ignore bit is set.
@@ -971,7 +985,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
// Get the contents of the subsection.
if (SubSectionSize > Data.size())
- return error(object_error::parse_failed);
+ return reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
StringRef Contents = Data.substr(0, SubSectionSize);
// Add SubSectionSize to the current offset and align that offset to find
@@ -980,7 +995,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
size_t NextOffset = SectionOffset + SubSectionSize;
NextOffset = alignTo(NextOffset, 4);
if (NextOffset > SectionContents.size())
- return error(object_error::parse_failed);
+ return reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
Data = SectionContents.drop_front(NextOffset);
// Optionally print the subsection bytes in case our parsing gets confused
@@ -1010,17 +1026,21 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
if (SubSectionSize < 12) {
// There should be at least three words to store two function
// relocations and size of the code.
- error(object_error::parse_failed);
+ reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
return;
}
StringRef LinkageName;
- error(resolveSymbolName(Obj->getCOFFSection(Section), SectionOffset,
- LinkageName));
+ if (std::error_code EC = resolveSymbolName(Obj->getCOFFSection(Section),
+ SectionOffset, LinkageName))
+ reportError(errorCodeToError(EC), Obj->getFileName());
+
W.printString("LinkageName", LinkageName);
if (FunctionLineTables.count(LinkageName) != 0) {
// Saw debug info for this function already?
- error(object_error::parse_failed);
+ reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
return;
}
@@ -1033,17 +1053,21 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
BinaryStreamReader SR(Contents, llvm::support::little);
DebugFrameDataSubsectionRef FrameData;
- error(FrameData.initialize(SR));
+ if (Error E = FrameData.initialize(SR))
+ reportError(std::move(E), Obj->getFileName());
StringRef LinkageName;
- error(resolveSymbolName(Obj->getCOFFSection(Section), SectionContents,
- FrameData.getRelocPtr(), LinkageName));
+ if (std::error_code EC =
+ resolveSymbolName(Obj->getCOFFSection(Section), SectionContents,
+ FrameData.getRelocPtr(), LinkageName))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printString("LinkageName", LinkageName);
// To find the active frame description, search this array for the
// smallest PC range that includes the current PC.
for (const auto &FD : FrameData) {
- StringRef FrameFunc = error(CVStringTable.getString(FD.FrameFunc));
+ StringRef FrameFunc = unwrapOrError(
+ Obj->getFileName(), CVStringTable.getString(FD.FrameFunc));
DictScope S(W, "FrameData");
W.printHex("RvaStart", FD.RvaStart);
@@ -1094,7 +1118,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
BinaryStreamReader Reader(FunctionLineTables[Name], support::little);
DebugLinesSubsectionRef LineInfo;
- error(LineInfo.initialize(Reader));
+ if (Error E = LineInfo.initialize(Reader))
+ reportError(std::move(E), Obj->getFileName());
W.printHex("Flags", LineInfo.header()->Flags);
W.printHex("CodeSize", LineInfo.header()->CodeSize);
@@ -1105,7 +1130,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
uint32_t ColumnIndex = 0;
for (const auto &Line : Entry.LineNumbers) {
if (Line.Offset >= LineInfo.header()->CodeSize) {
- error(object_error::parse_failed);
+ reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
return;
}
@@ -1136,21 +1162,20 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
StringRef SectionContents) {
ArrayRef<uint8_t> BinaryData(Subsection.bytes_begin(),
Subsection.bytes_end());
- auto CODD = llvm::make_unique<COFFObjectDumpDelegate>(*this, Section, Obj,
+ auto CODD = std::make_unique<COFFObjectDumpDelegate>(*this, Section, Obj,
SectionContents);
CVSymbolDumper CVSD(W, Types, CodeViewContainer::ObjectFile, std::move(CODD),
CompilationCPUType, opts::CodeViewSubsectionBytes);
CVSymbolArray Symbols;
BinaryStreamReader Reader(BinaryData, llvm::support::little);
- if (auto EC = Reader.readArray(Symbols, Reader.getLength())) {
- consumeError(std::move(EC));
+ if (Error E = Reader.readArray(Symbols, Reader.getLength())) {
W.flush();
- error(object_error::parse_failed);
+ reportError(std::move(E), Obj->getFileName());
}
- if (auto EC = CVSD.dump(Symbols)) {
+ if (Error E = CVSD.dump(Symbols)) {
W.flush();
- error(std::move(EC));
+ reportError(std::move(E), Obj->getFileName());
}
CompilationCPUType = CVSD.getCompilationCPUType();
W.flush();
@@ -1159,12 +1184,14 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) {
BinaryStreamRef Stream(Subsection, llvm::support::little);
DebugChecksumsSubsectionRef Checksums;
- error(Checksums.initialize(Stream));
+ if (Error E = Checksums.initialize(Stream))
+ reportError(std::move(E), Obj->getFileName());
for (auto &FC : Checksums) {
DictScope S(W, "FileChecksum");
- StringRef Filename = error(CVStringTable.getString(FC.FileNameOffset));
+ StringRef Filename = unwrapOrError(
+ Obj->getFileName(), CVStringTable.getString(FC.FileNameOffset));
W.printHex("Filename", Filename, FC.FileNameOffset);
W.printHex("ChecksumSize", FC.Checksum.size());
W.printEnum("ChecksumKind", uint8_t(FC.Kind),
@@ -1177,7 +1204,8 @@ void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) {
void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) {
BinaryStreamReader SR(Subsection, llvm::support::little);
DebugInlineeLinesSubsectionRef Lines;
- error(Lines.initialize(SR));
+ if (Error E = Lines.initialize(SR))
+ reportError(std::move(E), Obj->getFileName());
for (auto &Line : Lines) {
DictScope S(W, "InlineeSourceLine");
@@ -1198,15 +1226,18 @@ void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) {
StringRef COFFDumper::getFileNameForFileOffset(uint32_t FileOffset) {
// The file checksum subsection should precede all references to it.
if (!CVFileChecksumTable.valid() || !CVStringTable.valid())
- error(object_error::parse_failed);
+ reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
auto Iter = CVFileChecksumTable.getArray().at(FileOffset);
// Check if the file checksum table offset is valid.
if (Iter == CVFileChecksumTable.end())
- error(object_error::parse_failed);
+ reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
- return error(CVStringTable.getString(Iter->FileNameOffset));
+ return unwrapOrError(Obj->getFileName(),
+ CVStringTable.getString(Iter->FileNameOffset));
}
void COFFDumper::printFileNameForOffset(StringRef Label, uint32_t FileOffset) {
@@ -1219,35 +1250,38 @@ void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs,
GlobalTypeTableBuilder &GlobalCVTypes,
bool GHash) {
for (const SectionRef &S : Obj->sections()) {
- StringRef SectionName;
- error(S.getName(SectionName));
+ StringRef SectionName = unwrapOrError(Obj->getFileName(), S.getName());
if (SectionName == ".debug$T") {
- StringRef Data = unwrapOrError(S.getContents());
+ StringRef Data = unwrapOrError(Obj->getFileName(), S.getContents());
uint32_t Magic;
- error(consume(Data, Magic));
+ if (Error E = consume(Data, Magic))
+ reportError(std::move(E), Obj->getFileName());
+
if (Magic != 4)
- error(object_error::parse_failed);
+ reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
CVTypeArray Types;
BinaryStreamReader Reader(Data, llvm::support::little);
if (auto EC = Reader.readArray(Types, Reader.getLength())) {
consumeError(std::move(EC));
W.flush();
- error(object_error::parse_failed);
+ reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
}
SmallVector<TypeIndex, 128> SourceToDest;
Optional<uint32_t> PCHSignature;
if (GHash) {
std::vector<GloballyHashedType> Hashes =
GloballyHashedType::hashTypes(Types);
- if (auto EC =
+ if (Error E =
mergeTypeAndIdRecords(GlobalCVIDs, GlobalCVTypes, SourceToDest,
Types, Hashes, PCHSignature))
- return error(std::move(EC));
+ return reportError(std::move(E), Obj->getFileName());
} else {
- if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types,
+ if (Error E = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types,
PCHSignature))
- return error(std::move(EC));
+ return reportError(std::move(E), Obj->getFileName());
}
}
}
@@ -1258,20 +1292,25 @@ void COFFDumper::printCodeViewTypeSection(StringRef SectionName,
ListScope D(W, "CodeViewTypes");
W.printNumber("Section", SectionName, Obj->getSectionID(Section));
- StringRef Data = unwrapOrError(Section.getContents());
+ StringRef Data = unwrapOrError(Obj->getFileName(), Section.getContents());
if (opts::CodeViewSubsectionBytes)
W.printBinaryBlock("Data", Data);
uint32_t Magic;
- error(consume(Data, Magic));
+ if (Error E = consume(Data, Magic))
+ reportError(std::move(E), Obj->getFileName());
+
W.printHex("Magic", Magic);
if (Magic != COFF::DEBUG_SECTION_MAGIC)
- return error(object_error::parse_failed);
+ reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
Types.reset(Data, 100);
TypeDumpVisitor TDV(Types, &W, opts::CodeViewSubsectionBytes);
- error(codeview::visitTypeStream(Types, TDV));
+ if (Error E = codeview::visitTypeStream(Types, TDV))
+ reportError(std::move(E), Obj->getFileName());
+
W.flush();
}
@@ -1282,8 +1321,7 @@ void COFFDumper::printSectionHeaders() {
++SectionNumber;
const coff_section *Section = Obj->getCOFFSection(Sec);
- StringRef Name;
- error(Sec.getName(Name));
+ StringRef Name = unwrapOrError(Obj->getFileName(), Sec.getName());
DictScope D(W, "Section");
W.printNumber("Number", SectionNumber);
@@ -1318,7 +1356,7 @@ void COFFDumper::printSectionHeaders() {
if (opts::SectionData &&
!(Section->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)) {
- StringRef Data = unwrapOrError(Sec.getContents());
+ StringRef Data = unwrapOrError(Obj->getFileName(), Sec.getContents());
W.printBinaryBlock("SectionData", Data);
}
}
@@ -1330,8 +1368,7 @@ void COFFDumper::printRelocations() {
int SectionNumber = 0;
for (const SectionRef &Section : Obj->sections()) {
++SectionNumber;
- StringRef Name;
- error(Section.getName(Name));
+ StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName());
bool PrintedGroup = false;
for (const RelocationRef &Reloc : Section.relocations()) {
@@ -1362,7 +1399,9 @@ void COFFDumper::printRelocation(const SectionRef &Section,
int64_t SymbolIndex = -1;
if (Symbol != Obj->symbol_end()) {
Expected<StringRef> SymbolNameOrErr = Symbol->getName();
- error(errorToErrorCode(SymbolNameOrErr.takeError()));
+ if (!SymbolNameOrErr)
+ reportError(SymbolNameOrErr.takeError(), Obj->getFileName());
+
SymbolName = *SymbolNameOrErr;
SymbolIndex = Obj->getSymbolIndex(Obj->getCOFFSymbol(*Symbol));
}
@@ -1439,7 +1478,8 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
for (uint8_t I = 0; I < Symbol.getNumberOfAuxSymbols(); ++I) {
if (Symbol.isFunctionDefinition()) {
const coff_aux_function_definition *Aux;
- error(getSymbolAuxData(Obj, Symbol, I, Aux));
+ if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux))
+ reportError(errorCodeToError(EC), Obj->getFileName());
DictScope AS(W, "AuxFunctionDef");
W.printNumber("TagIndex", Aux->TagIndex);
@@ -1449,15 +1489,16 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
} else if (Symbol.isAnyUndefined()) {
const coff_aux_weak_external *Aux;
- error(getSymbolAuxData(Obj, Symbol, I, Aux));
+ if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux))
+ reportError(errorCodeToError(EC), Obj->getFileName());
Expected<COFFSymbolRef> Linked = Obj->getSymbol(Aux->TagIndex);
+ if (!Linked)
+ reportError(Linked.takeError(), Obj->getFileName());
+
StringRef LinkedName;
- std::error_code EC = errorToErrorCode(Linked.takeError());
- if (EC || (EC = Obj->getSymbolName(*Linked, LinkedName))) {
- LinkedName = "";
- error(EC);
- }
+ if (std::error_code EC = Obj->getSymbolName(*Linked, LinkedName))
+ reportError(errorCodeToError(EC), Obj->getFileName());
DictScope AS(W, "AuxWeakExternal");
W.printNumber("Linked", LinkedName, Aux->TagIndex);
@@ -1466,8 +1507,8 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
} else if (Symbol.isFileRecord()) {
const char *FileName;
- error(getSymbolAuxData(Obj, Symbol, I, FileName));
-
+ if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, FileName))
+ reportError(errorCodeToError(EC), Obj->getFileName());
DictScope AS(W, "AuxFileRecord");
StringRef Name(FileName, Symbol.getNumberOfAuxSymbols() *
@@ -1476,7 +1517,8 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
break;
} else if (Symbol.isSectionDefinition()) {
const coff_aux_section_definition *Aux;
- error(getSymbolAuxData(Obj, Symbol, I, Aux));
+ if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux))
+ reportError(errorCodeToError(EC), Obj->getFileName());
int32_t AuxNumber = Aux->getNumber(Symbol.isBigObj());
@@ -1493,26 +1535,27 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
const coff_section *Assoc;
StringRef AssocName = "";
if (std::error_code EC = Obj->getSection(AuxNumber, Assoc))
- error(EC);
+ reportError(errorCodeToError(EC), Obj->getFileName());
Expected<StringRef> Res = getSectionName(Obj, AuxNumber, Assoc);
if (!Res)
- error(Res.takeError());
+ reportError(Res.takeError(), Obj->getFileName());
AssocName = *Res;
W.printNumber("AssocSection", AssocName, AuxNumber);
}
} else if (Symbol.isCLRToken()) {
const coff_aux_clr_token *Aux;
- error(getSymbolAuxData(Obj, Symbol, I, Aux));
+ if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux))
+ reportError(errorCodeToError(EC), Obj->getFileName());
Expected<COFFSymbolRef> ReferredSym =
Obj->getSymbol(Aux->SymbolTableIndex);
+ if (!ReferredSym)
+ reportError(ReferredSym.takeError(), Obj->getFileName());
+
StringRef ReferredName;
- std::error_code EC = errorToErrorCode(ReferredSym.takeError());
- if (EC || (EC = Obj->getSymbolName(*ReferredSym, ReferredName))) {
- ReferredName = "";
- error(EC);
- }
+ if (std::error_code EC = Obj->getSymbolName(*ReferredSym, ReferredName))
+ reportError(errorCodeToError(EC), Obj->getFileName());
DictScope AS(W, "AuxCLRToken");
W.printNumber("AuxType", Aux->AuxType);
@@ -1578,9 +1621,11 @@ void COFFDumper::printImportedSymbols(
iterator_range<imported_symbol_iterator> Range) {
for (const ImportedSymbolRef &I : Range) {
StringRef Sym;
- error(I.getSymbolName(Sym));
+ if (std::error_code EC = I.getSymbolName(Sym))
+ reportError(errorCodeToError(EC), Obj->getFileName());
uint16_t Ordinal;
- error(I.getOrdinal(Ordinal));
+ if (std::error_code EC = I.getOrdinal(Ordinal))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printNumber("Symbol", Sym, Ordinal);
}
}
@@ -1592,12 +1637,17 @@ void COFFDumper::printDelayImportedSymbols(
for (const ImportedSymbolRef &S : Range) {
DictScope Import(W, "Import");
StringRef Sym;
- error(S.getSymbolName(Sym));
+ if (std::error_code EC = S.getSymbolName(Sym))
+ reportError(errorCodeToError(EC), Obj->getFileName());
+
uint16_t Ordinal;
- error(S.getOrdinal(Ordinal));
+ if (std::error_code EC = S.getOrdinal(Ordinal))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printNumber("Symbol", Sym, Ordinal);
+
uint64_t Addr;
- error(I.getImportAddress(Index++, Addr));
+ if (std::error_code EC = I.getImportAddress(Index++, Addr))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printHex("Address", Addr);
}
}
@@ -1607,13 +1657,16 @@ void COFFDumper::printCOFFImports() {
for (const ImportDirectoryEntryRef &I : Obj->import_directories()) {
DictScope Import(W, "Import");
StringRef Name;
- error(I.getName(Name));
+ if (std::error_code EC = I.getName(Name))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printString("Name", Name);
uint32_t ILTAddr;
- error(I.getImportLookupTableRVA(ILTAddr));
+ if (std::error_code EC = I.getImportLookupTableRVA(ILTAddr))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printHex("ImportLookupTableRVA", ILTAddr);
uint32_t IATAddr;
- error(I.getImportAddressTableRVA(IATAddr));
+ if (std::error_code EC = I.getImportAddressTableRVA(IATAddr))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printHex("ImportAddressTableRVA", IATAddr);
// The import lookup table can be missing with certain older linkers, so
// fall back to the import address table in that case.
@@ -1627,10 +1680,12 @@ void COFFDumper::printCOFFImports() {
for (const DelayImportDirectoryEntryRef &I : Obj->delay_import_directories()) {
DictScope Import(W, "DelayImport");
StringRef Name;
- error(I.getName(Name));
+ if (std::error_code EC = I.getName(Name))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printString("Name", Name);
const delay_import_directory_table_entry *Table;
- error(I.getDelayImportTable(Table));
+ if (std::error_code EC = I.getDelayImportTable(Table))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printHex("Attributes", Table->Attributes);
W.printHex("ModuleHandle", Table->ModuleHandle);
W.printHex("ImportAddressTable", Table->DelayImportAddressTable);
@@ -1648,9 +1703,12 @@ void COFFDumper::printCOFFExports() {
StringRef Name;
uint32_t Ordinal, RVA;
- error(E.getSymbolName(Name));
- error(E.getOrdinal(Ordinal));
- error(E.getExportRVA(RVA));
+ if (std::error_code EC = E.getSymbolName(Name))
+ reportError(errorCodeToError(EC), Obj->getFileName());
+ if (std::error_code EC = E.getOrdinal(Ordinal))
+ reportError(errorCodeToError(EC), Obj->getFileName());
+ if (std::error_code EC = E.getExportRVA(RVA))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printNumber("Ordinal", Ordinal);
W.printString("Name", Name);
@@ -1660,13 +1718,12 @@ void COFFDumper::printCOFFExports() {
void COFFDumper::printCOFFDirectives() {
for (const SectionRef &Section : Obj->sections()) {
- StringRef Name;
-
- error(Section.getName(Name));
+ StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName());
if (Name != ".drectve")
continue;
- StringRef Contents = unwrapOrError(Section.getContents());
+ StringRef Contents =
+ unwrapOrError(Obj->getFileName(), Section.getContents());
W.printString("Directive(s)", Contents);
}
}
@@ -1689,8 +1746,10 @@ void COFFDumper::printCOFFBaseReloc() {
for (const BaseRelocRef &I : Obj->base_relocs()) {
uint8_t Type;
uint32_t RVA;
- error(I.getRVA(RVA));
- error(I.getType(Type));
+ if (std::error_code EC = I.getRVA(RVA))
+ reportError(errorCodeToError(EC), Obj->getFileName());
+ if (std::error_code EC = I.getType(Type))
+ reportError(errorCodeToError(EC), Obj->getFileName());
DictScope Import(W, "Entry");
W.printString("Type", getBaseRelocTypeName(Type));
W.printHex("Address", RVA);
@@ -1700,16 +1759,18 @@ void COFFDumper::printCOFFBaseReloc() {
void COFFDumper::printCOFFResources() {
ListScope ResourcesD(W, "Resources");
for (const SectionRef &S : Obj->sections()) {
- StringRef Name;
- error(S.getName(Name));
+ StringRef Name = unwrapOrError(Obj->getFileName(), S.getName());
if (!Name.startswith(".rsrc"))
continue;
- StringRef Ref = unwrapOrError(S.getContents());
+ StringRef Ref = unwrapOrError(Obj->getFileName(), S.getContents());
if ((Name == ".rsrc") || (Name == ".rsrc$01")) {
- ResourceSectionRef RSF(Ref);
- auto &BaseTable = unwrapOrError(RSF.getBaseTable());
+ ResourceSectionRef RSF;
+ Error E = RSF.load(Obj, S);
+ if (E)
+ reportError(std::move(E), Obj->getFileName());
+ auto &BaseTable = unwrapOrError(Obj->getFileName(), RSF.getBaseTable());
W.printNumber("Total Number of Resources",
countTotalTableEntries(RSF, BaseTable, "Type"));
W.printHex("Base Table Address",
@@ -1729,14 +1790,15 @@ COFFDumper::countTotalTableEntries(ResourceSectionRef RSF,
uint32_t TotalEntries = 0;
for (int i = 0; i < Table.NumberOfNameEntries + Table.NumberOfIDEntries;
i++) {
- auto Entry = unwrapOrError(getResourceDirectoryTableEntry(Table, i));
+ auto Entry = unwrapOrError(Obj->getFileName(), RSF.getTableEntry(Table, i));
if (Entry.Offset.isSubDir()) {
StringRef NextLevel;
if (Level == "Name")
NextLevel = "Language";
else
NextLevel = "Name";
- auto &NextTable = unwrapOrError(RSF.getEntrySubDir(Entry));
+ auto &NextTable =
+ unwrapOrError(Obj->getFileName(), RSF.getEntrySubDir(Entry));
TotalEntries += countTotalTableEntries(RSF, NextTable, NextLevel);
} else {
TotalEntries += 1;
@@ -1755,13 +1817,13 @@ void COFFDumper::printResourceDirectoryTable(
// Iterate through level in resource directory tree.
for (int i = 0; i < Table.NumberOfNameEntries + Table.NumberOfIDEntries;
i++) {
- auto Entry = unwrapOrError(getResourceDirectoryTableEntry(Table, i));
+ auto Entry = unwrapOrError(Obj->getFileName(), RSF.getTableEntry(Table, i));
StringRef Name;
SmallString<20> IDStr;
raw_svector_ostream OS(IDStr);
if (i < Table.NumberOfNameEntries) {
ArrayRef<UTF16> RawEntryNameString =
- unwrapOrError(RSF.getEntryNameString(Entry));
+ unwrapOrError(Obj->getFileName(), RSF.getEntryNameString(Entry));
std::vector<UTF16> EndianCorrectedNameString;
if (llvm::sys::IsBigEndianHost) {
EndianCorrectedNameString.resize(RawEntryNameString.size() + 1);
@@ -1772,14 +1834,14 @@ void COFFDumper::printResourceDirectoryTable(
}
std::string EntryNameString;
if (!llvm::convertUTF16ToUTF8String(RawEntryNameString, EntryNameString))
- error(object_error::parse_failed);
+ reportError(errorCodeToError(object_error::parse_failed),
+ Obj->getFileName());
OS << ": ";
OS << EntryNameString;
} else {
if (Level == "Type") {
OS << ": ";
printResourceTypeName(Entry.Identifier.ID, OS);
- IDStr = IDStr.slice(0, IDStr.find_first_of(")", 0) + 1);
} else {
OS << ": (ID " << Entry.Identifier.ID << ")";
}
@@ -1793,7 +1855,8 @@ void COFFDumper::printResourceDirectoryTable(
NextLevel = "Language";
else
NextLevel = "Name";
- auto &NextTable = unwrapOrError(RSF.getEntrySubDir(Entry));
+ auto &NextTable =
+ unwrapOrError(Obj->getFileName(), RSF.getEntrySubDir(Entry));
printResourceDirectoryTable(RSF, NextTable, NextLevel);
} else {
W.printHex("Entry Offset", Entry.Offset.value());
@@ -1804,24 +1867,29 @@ void COFFDumper::printResourceDirectoryTable(
W.printNumber("Major Version", Table.MajorVersion);
W.printNumber("Minor Version", Table.MinorVersion);
W.printNumber("Characteristics", Table.Characteristics);
+ ListScope DataScope(W, "Data");
+ auto &DataEntry =
+ unwrapOrError(Obj->getFileName(), RSF.getEntryData(Entry));
+ W.printHex("DataRVA", DataEntry.DataRVA);
+ W.printNumber("DataSize", DataEntry.DataSize);
+ W.printNumber("Codepage", DataEntry.Codepage);
+ W.printNumber("Reserved", DataEntry.Reserved);
+ StringRef Contents =
+ unwrapOrError(Obj->getFileName(), RSF.getContents(DataEntry));
+ W.printBinaryBlock("Data", Contents);
}
}
}
-ErrorOr<const coff_resource_dir_entry &>
-COFFDumper::getResourceDirectoryTableEntry(const coff_resource_dir_table &Table,
- uint32_t Index) {
- if (Index >= (uint32_t)(Table.NumberOfNameEntries + Table.NumberOfIDEntries))
- return object_error::parse_failed;
- auto TablePtr = reinterpret_cast<const coff_resource_dir_entry *>(&Table + 1);
- return TablePtr[Index];
-}
-
void COFFDumper::printStackMap() const {
object::SectionRef StackMapSection;
for (auto Sec : Obj->sections()) {
StringRef Name;
- Sec.getName(Name);
+ if (Expected<StringRef> NameOrErr = Sec.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
if (Name == ".llvm_stackmaps") {
StackMapSection = Sec;
break;
@@ -1831,7 +1899,8 @@ void COFFDumper::printStackMap() const {
if (StackMapSection == object::SectionRef())
return;
- StringRef StackMapContents = unwrapOrError(StackMapSection.getContents());
+ StringRef StackMapContents =
+ unwrapOrError(Obj->getFileName(), StackMapSection.getContents());
ArrayRef<uint8_t> StackMapContentsArray =
arrayRefFromStringRef(StackMapContents);
@@ -1847,7 +1916,11 @@ void COFFDumper::printAddrsig() {
object::SectionRef AddrsigSection;
for (auto Sec : Obj->sections()) {
StringRef Name;
- Sec.getName(Name);
+ if (Expected<StringRef> NameOrErr = Sec.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
if (Name == ".llvm_addrsig") {
AddrsigSection = Sec;
break;
@@ -1857,7 +1930,8 @@ void COFFDumper::printAddrsig() {
if (AddrsigSection == object::SectionRef())
return;
- StringRef AddrsigContents = unwrapOrError(AddrsigSection.getContents());
+ StringRef AddrsigContents =
+ unwrapOrError(Obj->getFileName(), AddrsigSection.getContents());
ArrayRef<uint8_t> AddrsigContentsArray(AddrsigContents.bytes_begin(),
AddrsigContents.size());
@@ -1869,15 +1943,15 @@ void COFFDumper::printAddrsig() {
const char *Err;
uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err);
if (Err)
- reportError(Err);
+ reportError(createError(Err), Obj->getFileName());
Expected<COFFSymbolRef> Sym = Obj->getSymbol(SymIndex);
+ if (!Sym)
+ reportError(Sym.takeError(), Obj->getFileName());
+
StringRef SymName;
- std::error_code EC = errorToErrorCode(Sym.takeError());
- if (EC || (EC = Obj->getSymbolName(*Sym, SymName))) {
- SymName = "";
- error(EC);
- }
+ if (std::error_code EC = Obj->getSymbolName(*Sym, SymName))
+ reportError(errorCodeToError(EC), Obj->getFileName());
W.printNumber("Sym", SymName, SymIndex);
Cur += Size;
@@ -1891,7 +1965,8 @@ void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer,
{
ListScope S(Writer, "MergedTypeStream");
TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes);
- error(codeview::visitTypeStream(TpiTypes, TDV));
+ if (Error Err = codeview::visitTypeStream(TpiTypes, TDV))
+ reportError(std::move(Err), "<?>");
Writer.flush();
}
@@ -1902,7 +1977,8 @@ void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer,
ListScope S(Writer, "MergedIDStream");
TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes);
TDV.setIpiTypes(IpiTypes);
- error(codeview::visitTypeStream(IpiTypes, TDV));
+ if (Error Err = codeview::visitTypeStream(IpiTypes, TDV))
+ reportError(std::move(Err), "<?>");
Writer.flush();
}
}
diff --git a/tools/llvm-readobj/DwarfCFIEHPrinter.h b/tools/llvm-readobj/DwarfCFIEHPrinter.h
index 7055510ef2f2..0a365d4fe72a 100644
--- a/tools/llvm-readobj/DwarfCFIEHPrinter.h
+++ b/tools/llvm-readobj/DwarfCFIEHPrinter.h
@@ -44,12 +44,12 @@ public:
void printUnwindInformation() const;
};
-template <class ELFO>
-static const typename ELFO::Elf_Shdr *findSectionByAddress(const ELFO *Obj,
- uint64_t Addr) {
- auto Sections = Obj->sections();
+template <class ELFT>
+static const typename object::ELFObjectFile<ELFT>::Elf_Shdr *
+findSectionByAddress(const object::ELFObjectFile<ELFT> *ObjF, uint64_t Addr) {
+ auto Sections = ObjF->getELFFile()->sections();
if (Error E = Sections.takeError())
- reportError(toString(std::move(E)));
+ reportError(std::move(E), ObjF->getFileName());
for (const auto &Shdr : *Sections)
if (Shdr.sh_addr == Addr)
@@ -64,13 +64,15 @@ void PrinterContext<ELFT>::printUnwindInformation() const {
auto PHs = Obj->program_headers();
if (Error E = PHs.takeError())
- reportError(toString(std::move(E)));
+ reportError(std::move(E), ObjF->getFileName());
for (const auto &Phdr : *PHs) {
if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) {
EHFramePhdr = &Phdr;
if (Phdr.p_memsz != Phdr.p_filesz)
- reportError("p_memsz does not match p_filesz for GNU_EH_FRAME");
+ reportError(object::createError(
+ "p_memsz does not match p_filesz for GNU_EH_FRAME"),
+ ObjF->getFileName());
break;
}
}
@@ -81,12 +83,12 @@ void PrinterContext<ELFT>::printUnwindInformation() const {
auto Sections = Obj->sections();
if (Error E = Sections.takeError())
- reportError(toString(std::move(E)));
+ reportError(std::move(E), ObjF->getFileName());
for (const auto &Shdr : *Sections) {
auto SectionName = Obj->getSectionName(&Shdr);
if (Error E = SectionName.takeError())
- reportError(toString(std::move(E)));
+ reportError(std::move(E), ObjF->getFileName());
if (*SectionName == ".eh_frame")
printEHFrame(&Shdr);
@@ -97,49 +99,52 @@ template <typename ELFT>
void PrinterContext<ELFT>::printEHFrameHdr(uint64_t EHFrameHdrOffset,
uint64_t EHFrameHdrAddress,
uint64_t EHFrameHdrSize) const {
- ListScope L(W, "EH_FRAME Header");
+ DictScope L(W, "EHFrameHeader");
W.startLine() << format("Address: 0x%" PRIx64 "\n", EHFrameHdrAddress);
W.startLine() << format("Offset: 0x%" PRIx64 "\n", EHFrameHdrOffset);
W.startLine() << format("Size: 0x%" PRIx64 "\n", EHFrameHdrSize);
const object::ELFFile<ELFT> *Obj = ObjF->getELFFile();
- const auto *EHFrameHdrShdr = findSectionByAddress(Obj, EHFrameHdrAddress);
+ const auto *EHFrameHdrShdr = findSectionByAddress(ObjF, EHFrameHdrAddress);
if (EHFrameHdrShdr) {
auto SectionName = Obj->getSectionName(EHFrameHdrShdr);
if (Error E = SectionName.takeError())
- reportError(toString(std::move(E)));
+ reportError(std::move(E), ObjF->getFileName());
W.printString("Corresponding Section", *SectionName);
}
- DataExtractor DE(
- StringRef(reinterpret_cast<const char *>(Obj->base()) + EHFrameHdrOffset,
- EHFrameHdrSize),
- ELFT::TargetEndianness == support::endianness::little,
- ELFT::Is64Bits ? 8 : 4);
+ DataExtractor DE(makeArrayRef(Obj->base() + EHFrameHdrOffset, EHFrameHdrSize),
+ ELFT::TargetEndianness == support::endianness::little,
+ ELFT::Is64Bits ? 8 : 4);
DictScope D(W, "Header");
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
auto Version = DE.getU8(&Offset);
W.printNumber("version", Version);
if (Version != 1)
- reportError("only version 1 of .eh_frame_hdr is supported");
+ reportError(
+ object::createError("only version 1 of .eh_frame_hdr is supported"),
+ ObjF->getFileName());
uint64_t EHFramePtrEnc = DE.getU8(&Offset);
W.startLine() << format("eh_frame_ptr_enc: 0x%" PRIx64 "\n", EHFramePtrEnc);
if (EHFramePtrEnc != (dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4))
- reportError("unexpected encoding eh_frame_ptr_enc");
+ reportError(object::createError("unexpected encoding eh_frame_ptr_enc"),
+ ObjF->getFileName());
uint64_t FDECountEnc = DE.getU8(&Offset);
W.startLine() << format("fde_count_enc: 0x%" PRIx64 "\n", FDECountEnc);
if (FDECountEnc != dwarf::DW_EH_PE_udata4)
- reportError("unexpected encoding fde_count_enc");
+ reportError(object::createError("unexpected encoding fde_count_enc"),
+ ObjF->getFileName());
uint64_t TableEnc = DE.getU8(&Offset);
W.startLine() << format("table_enc: 0x%" PRIx64 "\n", TableEnc);
if (TableEnc != (dwarf::DW_EH_PE_datarel | dwarf::DW_EH_PE_sdata4))
- reportError("unexpected encoding table_enc");
+ reportError(object::createError("unexpected encoding table_enc"),
+ ObjF->getFileName());
auto EHFramePtr = DE.getSigned(&Offset, 4) + EHFrameHdrAddress + 4;
W.startLine() << format("eh_frame_ptr: 0x%" PRIx64 "\n", EHFramePtr);
@@ -158,7 +163,8 @@ void PrinterContext<ELFT>::printEHFrameHdr(uint64_t EHFrameHdrOffset,
W.startLine() << format("address: 0x%" PRIx64 "\n", Address);
if (InitialPC < PrevPC)
- reportError("initial_location is out of order");
+ reportError(object::createError("initial_location is out of order"),
+ ObjF->getFileName());
PrevPC = InitialPC;
++NumEntries;
@@ -178,7 +184,7 @@ void PrinterContext<ELFT>::printEHFrame(
const object::ELFFile<ELFT> *Obj = ObjF->getELFFile();
auto Result = Obj->getSectionContents(EHFrameShdr);
if (Error E = Result.takeError())
- reportError(toString(std::move(E)));
+ reportError(std::move(E), ObjF->getFileName());
auto Contents = Result.get();
DWARFDataExtractor DE(
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 4e1cb7d544e7..57144882c4b4 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
@@ -36,6 +37,7 @@
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/RelocationResolver.h"
#include "llvm/Object/StackMapParser.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/ARMAttributeParser.h"
@@ -61,6 +63,7 @@
#include <memory>
#include <string>
#include <system_error>
+#include <unordered_set>
#include <vector>
using namespace llvm;
@@ -119,9 +122,9 @@ template <class ELFT> class DumpStyle;
/// the size, entity size and virtual address are different entries in arbitrary
/// order (DT_REL, DT_RELSZ, DT_RELENT for example).
struct DynRegionInfo {
- DynRegionInfo() = default;
- DynRegionInfo(const void *A, uint64_t S, uint64_t ES)
- : Addr(A), Size(S), EntSize(ES) {}
+ DynRegionInfo(StringRef ObjName) : FileName(ObjName) {}
+ DynRegionInfo(const void *A, uint64_t S, uint64_t ES, StringRef ObjName)
+ : Addr(A), Size(S), EntSize(ES), FileName(ObjName) {}
/// Address in current address space.
const void *Addr = nullptr;
@@ -130,14 +133,18 @@ struct DynRegionInfo {
/// Size of each entity in the region.
uint64_t EntSize = 0;
+ /// Name of the file. Used for error reporting.
+ StringRef FileName;
+
template <typename Type> ArrayRef<Type> getAsArrayRef() const {
const Type *Start = reinterpret_cast<const Type *>(Addr);
if (!Start)
return {Start, Start};
if (EntSize != sizeof(Type) || Size % EntSize) {
// TODO: Add a section index to this warning.
- reportWarning("invalid section size (" + Twine(Size) +
- ") or entity size (" + Twine(EntSize) + ")");
+ reportWarning(createError("invalid section size (" + Twine(Size) +
+ ") or entity size (" + Twine(EntSize) + ")"),
+ FileName);
return {Start, Start};
}
return {Start, Start + (Size / EntSize)};
@@ -166,11 +173,7 @@ public:
void printVersionInfo() override;
void printGroupSections() override;
- void printAttributes() override;
- void printMipsPLTGOT() override;
- void printMipsABIFlags() override;
- void printMipsReginfo() override;
- void printMipsOptions() override;
+ void printArchSpecificInfo() override;
void printStackMap() const override;
@@ -182,6 +185,7 @@ public:
void printNotes() override;
void printELFLinkerOptions() override;
+ void printStackSizes() override;
const object::ELFObjectFile<ELFT> *getElfObject() const { return ObjF; };
@@ -195,20 +199,27 @@ private:
if (DRI.Addr < Obj->base() ||
reinterpret_cast<const uint8_t *>(DRI.Addr) + DRI.Size >
Obj->base() + Obj->getBufSize())
- error(llvm::object::object_error::parse_failed);
+ reportError(errorCodeToError(llvm::object::object_error::parse_failed),
+ ObjF->getFileName());
return DRI;
}
DynRegionInfo createDRIFrom(const Elf_Phdr *P, uintX_t EntSize) {
- return checkDRI(
- {ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, EntSize});
+ return checkDRI({ObjF->getELFFile()->base() + P->p_offset, P->p_filesz,
+ EntSize, ObjF->getFileName()});
}
DynRegionInfo createDRIFrom(const Elf_Shdr *S) {
- return checkDRI(
- {ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, S->sh_entsize});
+ return checkDRI({ObjF->getELFFile()->base() + S->sh_offset, S->sh_size,
+ S->sh_entsize, ObjF->getFileName()});
}
+ void printAttributes();
+ void printMipsReginfo();
+ void printMipsOptions();
+
+ std::pair<const Elf_Phdr *, const Elf_Shdr *>
+ findDynamic(const ELFFile<ELFT> *Obj);
void loadDynamicTable(const ELFFile<ELFT> *Obj);
void parseDynamicTable();
@@ -226,7 +237,7 @@ private:
DynRegionInfo DynSymRegion;
DynRegionInfo DynamicTable;
StringRef DynamicStringTable;
- StringRef SOName = "<Not found>";
+ std::string SOName = "<Not found>";
const Elf_Hash *HashTable = nullptr;
const Elf_GnuHash *GnuHashTable = nullptr;
const Elf_Shdr *DotSymtabSec = nullptr;
@@ -291,7 +302,8 @@ public:
void getSectionNameIndex(const Elf_Sym *Symbol, const Elf_Sym *FirstSym,
StringRef &SectionName,
unsigned &SectionIndex) const;
- std::string getStaticSymbolName(uint32_t Index) const;
+ Expected<std::string> getStaticSymbolName(uint32_t Index) const;
+ std::string getDynamicString(uint64_t Value) const;
StringRef getSymbolVersionByIndex(StringRef StrTab,
uint32_t VersionSymbolIndex,
bool &IsDefault) const;
@@ -328,16 +340,27 @@ void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic) const {
} else {
if (!DotSymtabSec)
return;
- StrTable = unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec));
- Syms = unwrapOrError(Obj->symbols(DotSymtabSec));
- SymtabName = unwrapOrError(Obj->getSectionName(DotSymtabSec));
+ StrTable = unwrapOrError(ObjF->getFileName(),
+ Obj->getStringTableForSymtab(*DotSymtabSec));
+ Syms = unwrapOrError(ObjF->getFileName(), Obj->symbols(DotSymtabSec));
+ SymtabName =
+ unwrapOrError(ObjF->getFileName(), Obj->getSectionName(DotSymtabSec));
Entries = DotSymtabSec->getEntityCount();
}
if (Syms.begin() == Syms.end())
return;
- ELFDumperStyle->printSymtabMessage(Obj, SymtabName, Entries);
+
+ // The st_other field has 2 logical parts. The first two bits hold the symbol
+ // visibility (STV_*) and the remainder hold other platform-specific values.
+ bool NonVisibilityBitsUsed = llvm::find_if(Syms, [](const Elf_Sym &S) {
+ return S.st_other & ~0x3;
+ }) != Syms.end();
+
+ ELFDumperStyle->printSymtabMessage(Obj, SymtabName, Entries,
+ NonVisibilityBitsUsed);
for (const auto &Sym : Syms)
- ELFDumperStyle->printSymbol(Obj, &Sym, Syms.begin(), StrTable, IsDynamic);
+ ELFDumperStyle->printSymbol(Obj, &Sym, Syms.begin(), StrTable, IsDynamic,
+ NonVisibilityBitsUsed);
}
template <class ELFT> class MipsGOTParser;
@@ -346,8 +369,20 @@ template <typename ELFT> class DumpStyle {
public:
using Elf_Shdr = typename ELFT::Shdr;
using Elf_Sym = typename ELFT::Sym;
+ using Elf_Addr = typename ELFT::Addr;
+
+ DumpStyle(ELFDumper<ELFT> *Dumper) : Dumper(Dumper) {
+ FileName = this->Dumper->getElfObject()->getFileName();
+
+ // Dumper reports all non-critical errors as warnings.
+ // It does not print the same warning more than once.
+ WarningHandler = [this](const Twine &Msg) {
+ if (Warnings.insert(Msg.str()).second)
+ reportWarning(createError(Msg), FileName);
+ return Error::success();
+ };
+ }
- DumpStyle(ELFDumper<ELFT> *Dumper) : Dumper(Dumper) {}
virtual ~DumpStyle() = default;
virtual void printFileHeaders(const ELFFile<ELFT> *Obj) = 0;
@@ -360,10 +395,10 @@ public:
virtual void printDynamic(const ELFFile<ELFT> *Obj) {}
virtual void printDynamicRelocations(const ELFFile<ELFT> *Obj) = 0;
virtual void printSymtabMessage(const ELFFile<ELFT> *Obj, StringRef Name,
- size_t Offset) {}
+ size_t Offset, bool NonVisibilityBitsUsed) {}
virtual void printSymbol(const ELFFile<ELFT> *Obj, const Elf_Sym *Symbol,
const Elf_Sym *FirstSym, StringRef StrTable,
- bool IsDynamic) = 0;
+ bool IsDynamic, bool NonVisibilityBitsUsed) = 0;
virtual void printProgramHeaders(const ELFFile<ELFT> *Obj,
bool PrintProgramHeaders,
cl::boolOrDefault PrintSectionMapping) = 0;
@@ -378,11 +413,31 @@ public:
virtual void printAddrsig(const ELFFile<ELFT> *Obj) = 0;
virtual void printNotes(const ELFFile<ELFT> *Obj) = 0;
virtual void printELFLinkerOptions(const ELFFile<ELFT> *Obj) = 0;
+ virtual void printStackSizes(const ELFObjectFile<ELFT> *Obj) = 0;
+ void printNonRelocatableStackSizes(const ELFObjectFile<ELFT> *Obj,
+ std::function<void()> PrintHeader);
+ void printRelocatableStackSizes(const ELFObjectFile<ELFT> *Obj,
+ std::function<void()> PrintHeader);
+ void printFunctionStackSize(const ELFObjectFile<ELFT> *Obj, uint64_t SymValue,
+ SectionRef FunctionSec,
+ const StringRef SectionName, DataExtractor Data,
+ uint64_t *Offset);
+ void printStackSize(const ELFObjectFile<ELFT> *Obj, RelocationRef Rel,
+ SectionRef FunctionSec,
+ const StringRef &StackSizeSectionName,
+ const RelocationResolver &Resolver, DataExtractor Data);
+ virtual void printStackSizeEntry(uint64_t Size, StringRef FuncName) = 0;
virtual void printMipsGOT(const MipsGOTParser<ELFT> &Parser) = 0;
virtual void printMipsPLT(const MipsGOTParser<ELFT> &Parser) = 0;
+ virtual void printMipsABIFlags(const ELFObjectFile<ELFT> *Obj) = 0;
const ELFDumper<ELFT> *dumper() const { return Dumper; }
+protected:
+ std::function<Error(const Twine &Msg)> WarningHandler;
+ StringRef FileName;
+
private:
+ std::unordered_set<std::string> Warnings;
const ELFDumper<ELFT> *Dumper;
};
@@ -407,8 +462,8 @@ public:
void printHashSymbols(const ELFO *Obj) override;
void printDynamic(const ELFFile<ELFT> *Obj) override;
void printDynamicRelocations(const ELFO *Obj) override;
- void printSymtabMessage(const ELFO *Obj, StringRef Name,
- size_t Offset) override;
+ void printSymtabMessage(const ELFO *Obj, StringRef Name, size_t Offset,
+ bool NonVisibilityBitsUsed) override;
void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders,
cl::boolOrDefault PrintSectionMapping) override;
void printVersionSymbolSection(const ELFFile<ELFT> *Obj,
@@ -422,8 +477,11 @@ public:
void printAddrsig(const ELFFile<ELFT> *Obj) override;
void printNotes(const ELFFile<ELFT> *Obj) override;
void printELFLinkerOptions(const ELFFile<ELFT> *Obj) override;
+ void printStackSizes(const ELFObjectFile<ELFT> *Obj) override;
+ void printStackSizeEntry(uint64_t Size, StringRef FuncName) override;
void printMipsGOT(const MipsGOTParser<ELFT> &Parser) override;
void printMipsPLT(const MipsGOTParser<ELFT> &Parser) override;
+ void printMipsABIFlags(const ELFObjectFile<ELFT> *Obj) override;
private:
struct Field {
@@ -484,7 +542,8 @@ private:
void printRelocation(const ELFO *Obj, const Elf_Sym *Sym,
StringRef SymbolName, const Elf_Rela &R, bool IsRela);
void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
- StringRef StrTable, bool IsDynamic) override;
+ StringRef StrTable, bool IsDynamic,
+ bool NonVisibilityBitsUsed) override;
std::string getSymbolSectionNdx(const ELFO *Obj, const Elf_Sym *Symbol,
const Elf_Sym *FirstSym);
void printDynamicRelocation(const ELFO *Obj, Elf_Rela R, bool IsRela);
@@ -525,8 +584,11 @@ public:
void printAddrsig(const ELFFile<ELFT> *Obj) override;
void printNotes(const ELFFile<ELFT> *Obj) override;
void printELFLinkerOptions(const ELFFile<ELFT> *Obj) override;
+ void printStackSizes(const ELFObjectFile<ELFT> *Obj) override;
+ void printStackSizeEntry(uint64_t Size, StringRef FuncName) override;
void printMipsGOT(const MipsGOTParser<ELFT> &Parser) override;
void printMipsPLT(const MipsGOTParser<ELFT> &Parser) override;
+ void printMipsABIFlags(const ELFObjectFile<ELFT> *Obj) override;
private:
void printRelocation(const ELFO *Obj, Elf_Rela Rel, const Elf_Shdr *SymTab);
@@ -534,7 +596,8 @@ private:
void printSymbols(const ELFO *Obj);
void printDynamicSymbols(const ELFO *Obj);
void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
- StringRef StrTable, bool IsDynamic) override;
+ StringRef StrTable, bool IsDynamic,
+ bool /*NonVisibilityBitsUsed*/) override;
void printProgramHeaders(const ELFO *Obj);
void printSectionMapping(const ELFO *Obj) {}
@@ -680,9 +743,9 @@ StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
sizeof(Elf_Sym);
// Get the corresponding version index entry.
- const Elf_Versym *Versym =
- unwrapOrError(ObjF->getELFFile()->template getEntry<Elf_Versym>(
- SymbolVersionSection, EntryIndex));
+ const Elf_Versym *Versym = unwrapOrError(
+ ObjF->getFileName(), ObjF->getELFFile()->template getEntry<Elf_Versym>(
+ SymbolVersionSection, EntryIndex));
return this->getSymbolVersionByIndex(StrTab, Versym->vs_index, IsDefault);
}
@@ -691,15 +754,22 @@ static std::string maybeDemangle(StringRef Name) {
}
template <typename ELFT>
-std::string ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
+Expected<std::string>
+ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- StringRef StrTable =
- unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec));
- Elf_Sym_Range Syms = unwrapOrError(Obj->symbols(DotSymtabSec));
- if (Index >= Syms.size())
- reportError("Invalid symbol index");
- const Elf_Sym *Sym = &Syms[Index];
- return maybeDemangle(unwrapOrError(Sym->getName(StrTable)));
+ Expected<const typename ELFT::Sym *> SymOrErr =
+ Obj->getSymbol(DotSymtabSec, Index);
+ if (!SymOrErr)
+ return SymOrErr.takeError();
+
+ Expected<StringRef> StrTabOrErr = Obj->getStringTableForSymtab(*DotSymtabSec);
+ if (!StrTabOrErr)
+ return StrTabOrErr.takeError();
+
+ Expected<StringRef> NameOrErr = (*SymOrErr)->getName(*StrTabOrErr);
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ return maybeDemangle(*NameOrErr);
}
template <typename ELFT>
@@ -717,7 +787,7 @@ StringRef ELFDumper<ELFT>::getSymbolVersionByIndex(StringRef StrTab,
// Lookup this symbol in the version table.
LoadVersionMap();
if (VersionIndex >= VersionMap.size() || VersionMap[VersionIndex].isNull())
- reportError("Invalid version entry");
+ reportError(createError("Invalid version entry"), ObjF->getFileName());
const VersionMapEntry &Entry = VersionMap[VersionIndex];
// Get the version name string.
@@ -731,7 +801,7 @@ StringRef ELFDumper<ELFT>::getSymbolVersionByIndex(StringRef StrTab,
IsDefault = false;
}
if (NameOffset >= StrTab.size())
- reportError("Invalid string offset");
+ reportError(createError("Invalid string offset"), ObjF->getFileName());
return StrTab.data() + NameOffset;
}
@@ -739,14 +809,14 @@ template <typename ELFT>
std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
StringRef StrTable,
bool IsDynamic) const {
- std::string SymbolName =
- maybeDemangle(unwrapOrError(Symbol->getName(StrTable)));
+ std::string SymbolName = maybeDemangle(
+ unwrapOrError(ObjF->getFileName(), Symbol->getName(StrTable)));
if (SymbolName.empty() && Symbol->getType() == ELF::STT_SECTION) {
unsigned SectionIndex;
StringRef SectionName;
- Elf_Sym_Range Syms =
- unwrapOrError(ObjF->getELFFile()->symbols(DotSymtabSec));
+ Elf_Sym_Range Syms = unwrapOrError(
+ ObjF->getFileName(), ObjF->getELFFile()->symbols(DotSymtabSec));
getSectionNameIndex(Symbol, Syms.begin(), SectionName, SectionIndex);
return SectionName;
}
@@ -783,31 +853,32 @@ void ELFDumper<ELFT>::getSectionNameIndex(const Elf_Sym *Symbol,
SectionName = "Reserved";
else {
if (SectionIndex == SHN_XINDEX)
- SectionIndex = unwrapOrError(object::getExtendedSymbolTableIndex<ELFT>(
- Symbol, FirstSym, ShndxTable));
+ SectionIndex = unwrapOrError(ObjF->getFileName(),
+ object::getExtendedSymbolTableIndex<ELFT>(
+ Symbol, FirstSym, ShndxTable));
const ELFFile<ELFT> *Obj = ObjF->getELFFile();
const typename ELFT::Shdr *Sec =
- unwrapOrError(Obj->getSection(SectionIndex));
- SectionName = unwrapOrError(Obj->getSectionName(Sec));
+ unwrapOrError(ObjF->getFileName(), Obj->getSection(SectionIndex));
+ SectionName = unwrapOrError(ObjF->getFileName(), Obj->getSectionName(Sec));
}
}
template <class ELFO>
static const typename ELFO::Elf_Shdr *
-findNotEmptySectionByAddress(const ELFO *Obj, uint64_t Addr) {
- for (const auto &Shdr : unwrapOrError(Obj->sections()))
+findNotEmptySectionByAddress(const ELFO *Obj, StringRef FileName,
+ uint64_t Addr) {
+ for (const auto &Shdr : unwrapOrError(FileName, Obj->sections()))
if (Shdr.sh_addr == Addr && Shdr.sh_size > 0)
return &Shdr;
return nullptr;
}
template <class ELFO>
-static const typename ELFO::Elf_Shdr *findSectionByName(const ELFO &Obj,
- StringRef Name) {
- for (const auto &Shdr : unwrapOrError(Obj.sections())) {
- if (Name == unwrapOrError(Obj.getSectionName(&Shdr)))
+static const typename ELFO::Elf_Shdr *
+findSectionByName(const ELFO &Obj, StringRef FileName, StringRef Name) {
+ for (const auto &Shdr : unwrapOrError(FileName, Obj.sections()))
+ if (Name == unwrapOrError(FileName, Obj.getSectionName(&Shdr)))
return &Shdr;
- }
return nullptr;
}
@@ -1356,10 +1427,12 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) {
}
template <typename ELFT>
-void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
+std::pair<const typename ELFT::Phdr *, const typename ELFT::Shdr *>
+ELFDumper<ELFT>::findDynamic(const ELFFile<ELFT> *Obj) {
// Try to locate the PT_DYNAMIC header.
const Elf_Phdr *DynamicPhdr = nullptr;
- for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
+ for (const Elf_Phdr &Phdr :
+ unwrapOrError(ObjF->getFileName(), Obj->program_headers())) {
if (Phdr.p_type != ELF::PT_DYNAMIC)
continue;
DynamicPhdr = &Phdr;
@@ -1368,61 +1441,132 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
// Try to locate the .dynamic section in the sections header table.
const Elf_Shdr *DynamicSec = nullptr;
- for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+ for (const Elf_Shdr &Sec :
+ unwrapOrError(ObjF->getFileName(), Obj->sections())) {
if (Sec.sh_type != ELF::SHT_DYNAMIC)
continue;
DynamicSec = &Sec;
break;
}
- // Information in the section header has priority over the information
- // in a PT_DYNAMIC header.
+ if (DynamicPhdr && DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
+ ObjF->getMemoryBufferRef().getBufferSize()) {
+ reportWarning(
+ createError(
+ "PT_DYNAMIC segment offset + size exceeds the size of the file"),
+ ObjF->getFileName());
+ // Don't use the broken dynamic header.
+ DynamicPhdr = nullptr;
+ }
+
+ if (DynamicPhdr && DynamicSec) {
+ StringRef Name =
+ unwrapOrError(ObjF->getFileName(), Obj->getSectionName(DynamicSec));
+ if (DynamicSec->sh_addr + DynamicSec->sh_size >
+ DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz ||
+ DynamicSec->sh_addr < DynamicPhdr->p_vaddr)
+ reportWarning(createError("The SHT_DYNAMIC section '" + Name +
+ "' is not contained within the "
+ "PT_DYNAMIC segment"),
+ ObjF->getFileName());
+
+ if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr)
+ reportWarning(createError("The SHT_DYNAMIC section '" + Name +
+ "' is not at the start of "
+ "PT_DYNAMIC segment"),
+ ObjF->getFileName());
+ }
+
+ return std::make_pair(DynamicPhdr, DynamicSec);
+}
+
+template <typename ELFT>
+void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
+ const Elf_Phdr *DynamicPhdr;
+ const Elf_Shdr *DynamicSec;
+ std::tie(DynamicPhdr, DynamicSec) = findDynamic(Obj);
+ if (!DynamicPhdr && !DynamicSec)
+ return;
+
+ DynRegionInfo FromPhdr(ObjF->getFileName());
+ bool IsPhdrTableValid = false;
+ if (DynamicPhdr) {
+ FromPhdr = createDRIFrom(DynamicPhdr, sizeof(Elf_Dyn));
+ IsPhdrTableValid = !FromPhdr.getAsArrayRef<Elf_Dyn>().empty();
+ }
+
+ // Locate the dynamic table described in a section header.
// Ignore sh_entsize and use the expected value for entry size explicitly.
- // This allows us to dump the dynamic sections with a broken sh_entsize
+ // This allows us to dump dynamic sections with a broken sh_entsize
// field.
+ DynRegionInfo FromSec(ObjF->getFileName());
+ bool IsSecTableValid = false;
if (DynamicSec) {
- DynamicTable = checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset,
- DynamicSec->sh_size, sizeof(Elf_Dyn)});
- parseDynamicTable();
+ FromSec =
+ checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset,
+ DynamicSec->sh_size, sizeof(Elf_Dyn), ObjF->getFileName()});
+ IsSecTableValid = !FromSec.getAsArrayRef<Elf_Dyn>().empty();
}
- // If we have a PT_DYNAMIC header, we will either check the found dynamic
- // section or take the dynamic table data directly from the header.
- if (!DynamicPhdr)
+ // When we only have information from one of the SHT_DYNAMIC section header or
+ // PT_DYNAMIC program header, just use that.
+ if (!DynamicPhdr || !DynamicSec) {
+ if ((DynamicPhdr && IsPhdrTableValid) || (DynamicSec && IsSecTableValid)) {
+ DynamicTable = DynamicPhdr ? FromPhdr : FromSec;
+ parseDynamicTable();
+ } else {
+ reportWarning(createError("no valid dynamic table was found"),
+ ObjF->getFileName());
+ }
return;
+ }
- if (DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
- ObjF->getMemoryBufferRef().getBufferSize())
- reportError(
- "PT_DYNAMIC segment offset + size exceeds the size of the file");
+ // At this point we have tables found from the section header and from the
+ // dynamic segment. Usually they match, but we have to do sanity checks to
+ // verify that.
- if (!DynamicSec) {
- DynamicTable = createDRIFrom(DynamicPhdr, sizeof(Elf_Dyn));
- parseDynamicTable();
+ if (FromPhdr.Addr != FromSec.Addr)
+ reportWarning(createError("SHT_DYNAMIC section header and PT_DYNAMIC "
+ "program header disagree about "
+ "the location of the dynamic table"),
+ ObjF->getFileName());
+
+ if (!IsPhdrTableValid && !IsSecTableValid) {
+ reportWarning(createError("no valid dynamic table was found"),
+ ObjF->getFileName());
return;
}
- StringRef Name = unwrapOrError(Obj->getSectionName(DynamicSec));
- if (DynamicSec->sh_addr + DynamicSec->sh_size >
- DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz ||
- DynamicSec->sh_addr < DynamicPhdr->p_vaddr)
- reportWarning("The SHT_DYNAMIC section '" + Name +
- "' is not contained within the "
- "PT_DYNAMIC segment");
+ // Information in the PT_DYNAMIC program header has priority over the information
+ // in a section header.
+ if (IsPhdrTableValid) {
+ if (!IsSecTableValid)
+ reportWarning(
+ createError(
+ "SHT_DYNAMIC dynamic table is invalid: PT_DYNAMIC will be used"),
+ ObjF->getFileName());
+ DynamicTable = FromPhdr;
+ } else {
+ reportWarning(
+ createError(
+ "PT_DYNAMIC dynamic table is invalid: SHT_DYNAMIC will be used"),
+ ObjF->getFileName());
+ DynamicTable = FromSec;
+ }
- if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr)
- reportWarning("The SHT_DYNAMIC section '" + Name +
- "' is not at the start of "
- "PT_DYNAMIC segment");
+ parseDynamicTable();
}
template <typename ELFT>
ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
- ScopedPrinter &Writer)
- : ObjDumper(Writer), ObjF(ObjF) {
+ ScopedPrinter &Writer)
+ : ObjDumper(Writer), ObjF(ObjF), DynRelRegion(ObjF->getFileName()),
+ DynRelaRegion(ObjF->getFileName()), DynRelrRegion(ObjF->getFileName()),
+ DynPLTRelRegion(ObjF->getFileName()), DynSymRegion(ObjF->getFileName()),
+ DynamicTable(ObjF->getFileName()) {
const ELFFile<ELFT> *Obj = ObjF->getELFFile();
-
- for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+ for (const Elf_Shdr &Sec :
+ unwrapOrError(ObjF->getFileName(), Obj->sections())) {
switch (Sec.sh_type) {
case ELF::SHT_SYMTAB:
if (!DotSymtabSec)
@@ -1433,16 +1577,17 @@ ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
DynSymRegion = createDRIFrom(&Sec);
// This is only used (if Elf_Shdr present)for naming section in GNU
// style
- DynSymtabName = unwrapOrError(Obj->getSectionName(&Sec));
+ DynSymtabName =
+ unwrapOrError(ObjF->getFileName(), Obj->getSectionName(&Sec));
if (Expected<StringRef> E = Obj->getStringTableForSymtab(Sec))
DynamicStringTable = *E;
else
- warn(E.takeError());
+ reportWarning(E.takeError(), ObjF->getFileName());
}
break;
case ELF::SHT_SYMTAB_SHNDX:
- ShndxTable = unwrapOrError(Obj->getSHNDXTable(Sec));
+ ShndxTable = unwrapOrError(ObjF->getFileName(), Obj->getSHNDXTable(Sec));
break;
case ELF::SHT_GNU_versym:
if (!SymbolVersionSection)
@@ -1547,10 +1692,13 @@ template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
auto toMappedAddr = [&](uint64_t Tag, uint64_t VAddr) -> const uint8_t * {
auto MappedAddrOrError = ObjF->getELFFile()->toMappedAddr(VAddr);
if (!MappedAddrOrError) {
- reportWarning("Unable to parse DT_" +
- Twine(getTypeString(
- ObjF->getELFFile()->getHeader()->e_machine, Tag)) +
- ": " + llvm::toString(MappedAddrOrError.takeError()));
+ Error Err =
+ createError("Unable to parse DT_" +
+ Twine(getTypeString(
+ ObjF->getELFFile()->getHeader()->e_machine, Tag)) +
+ ": " + llvm::toString(MappedAddrOrError.takeError()));
+
+ reportWarning(std::move(Err), ObjF->getFileName());
return nullptr;
}
return MappedAddrOrError.get();
@@ -1576,10 +1724,29 @@ template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
case ELF::DT_STRSZ:
StringTableSize = Dyn.getVal();
break;
- case ELF::DT_SYMTAB:
- DynSymRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
- DynSymRegion.EntSize = sizeof(Elf_Sym);
+ case ELF::DT_SYMTAB: {
+ // Often we find the information about the dynamic symbol table
+ // location in the SHT_DYNSYM section header. However, the value in
+ // DT_SYMTAB has priority, because it is used by dynamic loaders to
+ // locate .dynsym at runtime. The location we find in the section header
+ // and the location we find here should match. If we can't map the
+ // DT_SYMTAB value to an address (e.g. when there are no program headers), we
+ // ignore its value.
+ if (const uint8_t *VA = toMappedAddr(Dyn.getTag(), Dyn.getPtr())) {
+ // EntSize is non-zero if the dynamic symbol table has been found via a
+ // section header.
+ if (DynSymRegion.EntSize && VA != DynSymRegion.Addr)
+ reportWarning(
+ createError(
+ "SHT_DYNSYM section header and DT_SYMTAB disagree about "
+ "the location of the dynamic symbol table"),
+ ObjF->getFileName());
+
+ DynSymRegion.Addr = VA;
+ DynSymRegion.EntSize = sizeof(Elf_Sym);
+ }
break;
+ }
case ELF::DT_RELA:
DynRelaRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
break;
@@ -1619,8 +1786,9 @@ template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
else if (Dyn.getVal() == DT_RELA)
DynPLTRelRegion.EntSize = sizeof(Elf_Rela);
else
- reportError(Twine("unknown DT_PLTREL value of ") +
- Twine((uint64_t)Dyn.getVal()));
+ reportError(createError(Twine("unknown DT_PLTREL value of ") +
+ Twine((uint64_t)Dyn.getVal())),
+ ObjF->getFileName());
break;
case ELF::DT_JMPREL:
DynPLTRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
@@ -1632,8 +1800,7 @@ template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
}
if (StringTableBegin)
DynamicStringTable = StringRef(StringTableBegin, StringTableSize);
- if (SONameOffset && SONameOffset < DynamicStringTable.size())
- SOName = DynamicStringTable.data() + SONameOffset;
+ SOName = getDynamicString(SONameOffset);
}
template <typename ELFT>
@@ -1715,6 +1882,10 @@ template <class ELFT> void ELFDumper<ELFT>::printELFLinkerOptions() {
ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile());
}
+template <class ELFT> void ELFDumper<ELFT>::printStackSizes() {
+ ELFDumperStyle->printStackSizes(ObjF);
+}
+
#define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \
{ #enum, prefix##_##enum }
@@ -1953,13 +2124,7 @@ void ELFDumper<ELFT>::printDynamicEntry(raw_ostream &OS, uint64_t Type,
{DT_RPATH, "Library rpath"},
{DT_RUNPATH, "Library runpath"},
};
- OS << TagNames.at(Type) << ": ";
- if (DynamicStringTable.empty())
- OS << "<String table is empty or was not found> ";
- else if (Value < DynamicStringTable.size())
- OS << "[" << StringRef(DynamicStringTable.data() + Value) << "]";
- else
- OS << "<Invalid offset 0x" << utohexstr(Value) << ">";
+ OS << TagNames.at(Type) << ": [" << getDynamicString(Value) << "]";
break;
}
case DT_FLAGS:
@@ -1974,6 +2139,15 @@ void ELFDumper<ELFT>::printDynamicEntry(raw_ostream &OS, uint64_t Type,
}
}
+template <class ELFT>
+std::string ELFDumper<ELFT>::getDynamicString(uint64_t Value) const {
+ if (DynamicStringTable.empty())
+ return "<String table is empty or was not found>";
+ if (Value < DynamicStringTable.size())
+ return DynamicStringTable.data() + Value;
+ return Twine("<Invalid offset 0x" + utohexstr(Value) + ">").str();
+}
+
template <class ELFT> void ELFDumper<ELFT>::printUnwindInfo() {
DwarfCFIEH::PrinterContext<ELFT> Ctx(W, ObjF);
Ctx.printUnwindInformation();
@@ -1985,7 +2159,8 @@ template <> void ELFDumper<ELF32LE>::printUnwindInfo() {
const ELFFile<ELF32LE> *Obj = ObjF->getELFFile();
const unsigned Machine = Obj->getHeader()->e_machine;
if (Machine == EM_ARM) {
- ARM::EHABI::PrinterContext<ELF32LE> Ctx(W, Obj, DotSymtabSec);
+ ARM::EHABI::PrinterContext<ELF32LE> Ctx(W, Obj, ObjF->getFileName(),
+ DotSymtabSec);
Ctx.PrintUnwindInformation();
}
DwarfCFIEH::PrinterContext<ELF32LE> Ctx(W, ObjF);
@@ -2001,17 +2176,10 @@ template <class ELFT> void ELFDumper<ELFT>::printDynamicTable() {
template <class ELFT> void ELFDumper<ELFT>::printNeededLibraries() {
ListScope D(W, "NeededLibraries");
- using LibsTy = std::vector<StringRef>;
- LibsTy Libs;
-
+ std::vector<std::string> Libs;
for (const auto &Entry : dynamic_table())
- if (Entry.d_tag == ELF::DT_NEEDED) {
- uint64_t Value = Entry.d_un.d_val;
- if (Value < DynamicStringTable.size())
- Libs.push_back(StringRef(DynamicStringTable.data() + Value));
- else
- Libs.push_back("<Library name index out of range>");
- }
+ if (Entry.d_tag == ELF::DT_NEEDED)
+ Libs.push_back(getDynamicString(Entry.d_un.d_val));
llvm::stable_sort(Libs);
@@ -2042,7 +2210,7 @@ template <typename ELFT> void ELFDumper<ELFT>::printGnuHashTable() {
Elf_Sym_Range Syms = dynamic_symbols();
unsigned NumSyms = std::distance(Syms.begin(), Syms.end());
if (!NumSyms)
- reportError("No dynamic symbol section");
+ reportError(createError("No dynamic symbol section"), ObjF->getFileName());
W.printHexList("Values", GnuHashTable->values(NumSyms));
}
@@ -2050,6 +2218,30 @@ template <typename ELFT> void ELFDumper<ELFT>::printLoadName() {
W.printString("LoadName", SOName);
}
+template <class ELFT> void ELFDumper<ELFT>::printArchSpecificInfo() {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ switch (Obj->getHeader()->e_machine) {
+ case EM_ARM:
+ printAttributes();
+ break;
+ case EM_MIPS: {
+ ELFDumperStyle->printMipsABIFlags(ObjF);
+ printMipsOptions();
+ printMipsReginfo();
+
+ MipsGOTParser<ELFT> Parser(Obj, ObjF->getFileName(), dynamic_table(),
+ dynamic_symbols());
+ if (Parser.hasGot())
+ ELFDumperStyle->printMipsGOT(Parser);
+ if (Parser.hasPlt())
+ ELFDumperStyle->printMipsPLT(Parser);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
template <class ELFT> void ELFDumper<ELFT>::printAttributes() {
W.startLine() << "Attributes not implemented.\n";
}
@@ -2064,11 +2256,13 @@ template <> void ELFDumper<ELF32LE>::printAttributes() {
}
DictScope BA(W, "BuildAttributes");
- for (const ELFO::Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+ for (const ELFO::Elf_Shdr &Sec :
+ unwrapOrError(ObjF->getFileName(), Obj->sections())) {
if (Sec.sh_type != ELF::SHT_ARM_ATTRIBUTES)
continue;
- ArrayRef<uint8_t> Contents = unwrapOrError(Obj->getSectionContents(&Sec));
+ ArrayRef<uint8_t> Contents =
+ unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(&Sec));
if (Contents[0] != ARMBuildAttrs::Format_Version) {
errs() << "unrecognised FormatVersion: 0x"
<< Twine::utohexstr(Contents[0]) << '\n';
@@ -2092,7 +2286,8 @@ public:
const bool IsStatic;
const ELFO * const Obj;
- MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable, Elf_Sym_Range DynSyms);
+ MipsGOTParser(const ELFO *Obj, StringRef FileName, Elf_Dyn_Range DynTable,
+ Elf_Sym_Range DynSyms);
bool hasGot() const { return !GotEntries.empty(); }
bool hasPlt() const { return !PltEntries.empty(); }
@@ -2126,6 +2321,8 @@ private:
const Elf_Shdr *PltSec;
const Elf_Shdr *PltRelSec;
const Elf_Shdr *PltSymTable;
+ StringRef FileName;
+
Elf_Sym_Range GotDynSyms;
StringRef PltStrTable;
@@ -2136,21 +2333,24 @@ private:
} // end anonymous namespace
template <class ELFT>
-MipsGOTParser<ELFT>::MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable,
+MipsGOTParser<ELFT>::MipsGOTParser(const ELFO *Obj, StringRef FileName,
+ Elf_Dyn_Range DynTable,
Elf_Sym_Range DynSyms)
: IsStatic(DynTable.empty()), Obj(Obj), GotSec(nullptr), LocalNum(0),
- GlobalNum(0), PltSec(nullptr), PltRelSec(nullptr), PltSymTable(nullptr) {
+ GlobalNum(0), PltSec(nullptr), PltRelSec(nullptr), PltSymTable(nullptr),
+ FileName(FileName) {
// See "Global Offset Table" in Chapter 5 in the following document
// for detailed GOT description.
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
// Find static GOT secton.
if (IsStatic) {
- GotSec = findSectionByName(*Obj, ".got");
+ GotSec = findSectionByName(*Obj, FileName, ".got");
if (!GotSec)
- reportError("Cannot find .got section");
+ return;
- ArrayRef<uint8_t> Content = unwrapOrError(Obj->getSectionContents(GotSec));
+ ArrayRef<uint8_t> Content =
+ unwrapOrError(FileName, Obj->getSectionContents(GotSec));
GotEntries = Entries(reinterpret_cast<const Entry *>(Content.data()),
Content.size() / sizeof(Entry));
LocalNum = GotEntries.size();
@@ -2194,17 +2394,21 @@ MipsGOTParser<ELFT>::MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable,
size_t DynSymTotal = DynSyms.size();
if (*DtGotSym > DynSymTotal)
- reportError("MIPS_GOTSYM exceeds a number of dynamic symbols");
+ reportError(
+ createError("MIPS_GOTSYM exceeds a number of dynamic symbols"),
+ FileName);
- GotSec = findNotEmptySectionByAddress(Obj, *DtPltGot);
+ GotSec = findNotEmptySectionByAddress(Obj, FileName, *DtPltGot);
if (!GotSec)
- reportError("There is no not empty GOT section at 0x" +
- Twine::utohexstr(*DtPltGot));
+ reportError(createError("There is no not empty GOT section at 0x" +
+ Twine::utohexstr(*DtPltGot)),
+ FileName);
LocalNum = *DtLocalGotNum;
GlobalNum = DynSymTotal - *DtGotSym;
- ArrayRef<uint8_t> Content = unwrapOrError(Obj->getSectionContents(GotSec));
+ ArrayRef<uint8_t> Content =
+ unwrapOrError(FileName, Obj->getSectionContents(GotSec));
GotEntries = Entries(reinterpret_cast<const Entry *>(Content.data()),
Content.size() / sizeof(Entry));
GotDynSyms = DynSyms.drop_front(*DtGotSym);
@@ -2217,23 +2421,24 @@ MipsGOTParser<ELFT>::MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable,
if (!DtJmpRel)
report_fatal_error("Cannot find JMPREL dynamic table tag.");
- PltSec = findNotEmptySectionByAddress(Obj, *DtMipsPltGot);
+ PltSec = findNotEmptySectionByAddress(Obj, FileName, * DtMipsPltGot);
if (!PltSec)
report_fatal_error("There is no not empty PLTGOT section at 0x " +
Twine::utohexstr(*DtMipsPltGot));
- PltRelSec = findNotEmptySectionByAddress(Obj, *DtJmpRel);
+ PltRelSec = findNotEmptySectionByAddress(Obj, FileName, * DtJmpRel);
if (!PltRelSec)
report_fatal_error("There is no not empty RELPLT section at 0x" +
Twine::utohexstr(*DtJmpRel));
ArrayRef<uint8_t> PltContent =
- unwrapOrError(Obj->getSectionContents(PltSec));
+ unwrapOrError(FileName, Obj->getSectionContents(PltSec));
PltEntries = Entries(reinterpret_cast<const Entry *>(PltContent.data()),
PltContent.size() / sizeof(Entry));
- PltSymTable = unwrapOrError(Obj->getSection(PltRelSec->sh_link));
- PltStrTable = unwrapOrError(Obj->getStringTableForSymtab(*PltSymTable));
+ PltSymTable = unwrapOrError(FileName, Obj->getSection(PltRelSec->sh_link));
+ PltStrTable =
+ unwrapOrError(FileName, Obj->getStringTableForSymtab(*PltSymTable));
}
}
@@ -2334,26 +2539,16 @@ const typename MipsGOTParser<ELFT>::Elf_Sym *
MipsGOTParser<ELFT>::getPltSym(const Entry *E) const {
int64_t Offset = std::distance(getPltEntries().data(), E);
if (PltRelSec->sh_type == ELF::SHT_REL) {
- Elf_Rel_Range Rels = unwrapOrError(Obj->rels(PltRelSec));
- return unwrapOrError(Obj->getRelocationSymbol(&Rels[Offset], PltSymTable));
+ Elf_Rel_Range Rels = unwrapOrError(FileName, Obj->rels(PltRelSec));
+ return unwrapOrError(FileName,
+ Obj->getRelocationSymbol(&Rels[Offset], PltSymTable));
} else {
- Elf_Rela_Range Rels = unwrapOrError(Obj->relas(PltRelSec));
- return unwrapOrError(Obj->getRelocationSymbol(&Rels[Offset], PltSymTable));
+ Elf_Rela_Range Rels = unwrapOrError(FileName, Obj->relas(PltRelSec));
+ return unwrapOrError(FileName,
+ Obj->getRelocationSymbol(&Rels[Offset], PltSymTable));
}
}
-template <class ELFT> void ELFDumper<ELFT>::printMipsPLTGOT() {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- if (Obj->getHeader()->e_machine != EM_MIPS)
- reportError("MIPS PLT GOT is available for MIPS targets only");
-
- MipsGOTParser<ELFT> Parser(Obj, dynamic_table(), dynamic_symbols());
- if (Parser.hasGot())
- ELFDumperStyle->printMipsGOT(Parser);
- if (Parser.hasPlt())
- ELFDumperStyle->printMipsPLT(Parser);
-}
-
static const EnumEntry<unsigned> ElfMipsISAExtType[] = {
{"None", Mips::AFL_EXT_NONE},
{"Broadcom SB-1", Mips::AFL_EXT_SB1},
@@ -2427,41 +2622,6 @@ static int getMipsRegisterSize(uint8_t Flag) {
}
}
-template <class ELFT> void ELFDumper<ELFT>::printMipsABIFlags() {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- const Elf_Shdr *Shdr = findSectionByName(*Obj, ".MIPS.abiflags");
- if (!Shdr) {
- W.startLine() << "There is no .MIPS.abiflags section in the file.\n";
- return;
- }
- ArrayRef<uint8_t> Sec = unwrapOrError(Obj->getSectionContents(Shdr));
- if (Sec.size() != sizeof(Elf_Mips_ABIFlags<ELFT>)) {
- W.startLine() << "The .MIPS.abiflags section has a wrong size.\n";
- return;
- }
-
- auto *Flags = reinterpret_cast<const Elf_Mips_ABIFlags<ELFT> *>(Sec.data());
-
- raw_ostream &OS = W.getOStream();
- DictScope GS(W, "MIPS ABI Flags");
-
- W.printNumber("Version", Flags->version);
- W.startLine() << "ISA: ";
- if (Flags->isa_rev <= 1)
- OS << format("MIPS%u", Flags->isa_level);
- else
- OS << format("MIPS%ur%u", Flags->isa_level, Flags->isa_rev);
- OS << "\n";
- W.printEnum("ISA Extension", Flags->isa_ext, makeArrayRef(ElfMipsISAExtType));
- W.printFlags("ASEs", Flags->ases, makeArrayRef(ElfMipsASEFlags));
- W.printEnum("FP ABI", Flags->fp_abi, makeArrayRef(ElfMipsFpABIType));
- W.printNumber("GPR size", getMipsRegisterSize(Flags->gpr_size));
- W.printNumber("CPR1 size", getMipsRegisterSize(Flags->cpr1_size));
- W.printNumber("CPR2 size", getMipsRegisterSize(Flags->cpr2_size));
- W.printFlags("Flags 1", Flags->flags1, makeArrayRef(ElfMipsFlags1));
- W.printHex("Flags 2", Flags->flags2);
-}
-
template <class ELFT>
static void printMipsReginfoData(ScopedPrinter &W,
const Elf_Mips_RegInfo<ELFT> &Reginfo) {
@@ -2475,12 +2635,13 @@ static void printMipsReginfoData(ScopedPrinter &W,
template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- const Elf_Shdr *Shdr = findSectionByName(*Obj, ".reginfo");
+ const Elf_Shdr *Shdr = findSectionByName(*Obj, ObjF->getFileName(), ".reginfo");
if (!Shdr) {
W.startLine() << "There is no .reginfo section in the file.\n";
return;
}
- ArrayRef<uint8_t> Sec = unwrapOrError(Obj->getSectionContents(Shdr));
+ ArrayRef<uint8_t> Sec =
+ unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr));
if (Sec.size() != sizeof(Elf_Mips_RegInfo<ELFT>)) {
W.startLine() << "The .reginfo section has a wrong size.\n";
return;
@@ -2493,7 +2654,8 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- const Elf_Shdr *Shdr = findSectionByName(*Obj, ".MIPS.options");
+ const Elf_Shdr *Shdr =
+ findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.options");
if (!Shdr) {
W.startLine() << "There is no .MIPS.options section in the file.\n";
return;
@@ -2501,7 +2663,8 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
DictScope GS(W, "MIPS Options");
- ArrayRef<uint8_t> Sec = unwrapOrError(Obj->getSectionContents(Shdr));
+ ArrayRef<uint8_t> Sec =
+ unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr));
while (!Sec.empty()) {
if (Sec.size() < sizeof(Elf_Mips_Options<ELFT>)) {
W.startLine() << "The .MIPS.options section has a wrong size.\n";
@@ -2524,8 +2687,9 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
const ELFFile<ELFT> *Obj = ObjF->getELFFile();
const Elf_Shdr *StackMapSection = nullptr;
- for (const auto &Sec : unwrapOrError(Obj->sections())) {
- StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
+ for (const auto &Sec : unwrapOrError(ObjF->getFileName(), Obj->sections())) {
+ StringRef Name =
+ unwrapOrError(ObjF->getFileName(), Obj->getSectionName(&Sec));
if (Name == ".llvm_stackmaps") {
StackMapSection = &Sec;
break;
@@ -2535,8 +2699,8 @@ template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
if (!StackMapSection)
return;
- ArrayRef<uint8_t> StackMapContentsArray =
- unwrapOrError(Obj->getSectionContents(StackMapSection));
+ ArrayRef<uint8_t> StackMapContentsArray = unwrapOrError(
+ ObjF->getFileName(), Obj->getSectionContents(StackMapSection));
prettyPrintStackMap(
W, StackMapParser<ELFT::TargetEndianness>(StackMapContentsArray));
@@ -2560,24 +2724,26 @@ static inline void printFields(formatted_raw_ostream &OS, StringRef Str1,
}
template <class ELFT>
-static std::string getSectionHeadersNumString(const ELFFile<ELFT> *Obj) {
+static std::string getSectionHeadersNumString(const ELFFile<ELFT> *Obj,
+ StringRef FileName) {
const typename ELFT::Ehdr *ElfHeader = Obj->getHeader();
if (ElfHeader->e_shnum != 0)
return to_string(ElfHeader->e_shnum);
- ArrayRef<typename ELFT::Shdr> Arr = unwrapOrError(Obj->sections());
+ ArrayRef<typename ELFT::Shdr> Arr = unwrapOrError(FileName, Obj->sections());
if (Arr.empty())
return "0";
return "0 (" + to_string(Arr[0].sh_size) + ")";
}
template <class ELFT>
-static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> *Obj) {
+static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> *Obj,
+ StringRef FileName) {
const typename ELFT::Ehdr *ElfHeader = Obj->getHeader();
if (ElfHeader->e_shstrndx != SHN_XINDEX)
return to_string(ElfHeader->e_shstrndx);
- ArrayRef<typename ELFT::Shdr> Arr = unwrapOrError(Obj->sections());
+ ArrayRef<typename ELFT::Shdr> Arr = unwrapOrError(FileName, Obj->sections());
if (Arr.empty())
return "65535 (corrupt: out of range)";
return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) +
@@ -2639,9 +2805,9 @@ template <class ELFT> void GNUStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
printFields(OS, "Number of program headers:", Str);
Str = to_string(e->e_shentsize) + " (bytes)";
printFields(OS, "Size of section headers:", Str);
- Str = getSectionHeadersNumString(Obj);
+ Str = getSectionHeadersNumString(Obj, this->FileName);
printFields(OS, "Number of section headers:", Str);
- Str = getSectionHeaderTableIndexString(Obj);
+ Str = getSectionHeaderTableIndexString(Obj, this->FileName);
printFields(OS, "Section header string table index:", Str);
}
@@ -2663,26 +2829,29 @@ struct GroupSection {
};
template <class ELFT>
-std::vector<GroupSection> getGroups(const ELFFile<ELFT> *Obj) {
+std::vector<GroupSection> getGroups(const ELFFile<ELFT> *Obj,
+ StringRef FileName) {
using Elf_Shdr = typename ELFT::Shdr;
using Elf_Sym = typename ELFT::Sym;
using Elf_Word = typename ELFT::Word;
std::vector<GroupSection> Ret;
uint64_t I = 0;
- for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+ for (const Elf_Shdr &Sec : unwrapOrError(FileName, Obj->sections())) {
++I;
if (Sec.sh_type != ELF::SHT_GROUP)
continue;
- const Elf_Shdr *Symtab = unwrapOrError(Obj->getSection(Sec.sh_link));
- StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*Symtab));
- const Elf_Sym *Sym =
- unwrapOrError(Obj->template getEntry<Elf_Sym>(Symtab, Sec.sh_info));
- auto Data =
- unwrapOrError(Obj->template getSectionContentsAsArray<Elf_Word>(&Sec));
+ const Elf_Shdr *Symtab =
+ unwrapOrError(FileName, Obj->getSection(Sec.sh_link));
+ StringRef StrTable =
+ unwrapOrError(FileName, Obj->getStringTableForSymtab(*Symtab));
+ const Elf_Sym *Sym = unwrapOrError(
+ FileName, Obj->template getEntry<Elf_Sym>(Symtab, Sec.sh_info));
+ auto Data = unwrapOrError(
+ FileName, Obj->template getSectionContentsAsArray<Elf_Word>(&Sec));
- StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
+ StringRef Name = unwrapOrError(FileName, Obj->getSectionName(&Sec));
StringRef Signature = StrTable.data() + Sym->st_name;
Ret.push_back({Name,
maybeDemangle(Signature),
@@ -2695,8 +2864,8 @@ std::vector<GroupSection> getGroups(const ELFFile<ELFT> *Obj) {
std::vector<GroupMember> &GM = Ret.back().Members;
for (uint32_t Ndx : Data.slice(1)) {
- auto Sec = unwrapOrError(Obj->getSection(Ndx));
- const StringRef Name = unwrapOrError(Obj->getSectionName(Sec));
+ auto Sec = unwrapOrError(FileName, Obj->getSection(Ndx));
+ const StringRef Name = unwrapOrError(FileName, Obj->getSectionName(Sec));
GM.push_back({Name, Ndx});
}
}
@@ -2715,7 +2884,7 @@ mapSectionsToGroups(ArrayRef<GroupSection> Groups) {
} // namespace
template <class ELFT> void GNUStyle<ELFT>::printGroupSections(const ELFO *Obj) {
- std::vector<GroupSection> V = getGroups<ELFT>(Obj);
+ std::vector<GroupSection> V = getGroups<ELFT>(Obj, this->FileName);
DenseMap<uint64_t, const GroupSection *> Map = mapSectionsToGroups(V);
for (const GroupSection &G : V) {
OS << "\n"
@@ -2745,14 +2914,17 @@ template <class ELFT> void GNUStyle<ELFT>::printGroupSections(const ELFO *Obj) {
template <class ELFT>
void GNUStyle<ELFT>::printRelocation(const ELFO *Obj, const Elf_Shdr *SymTab,
const Elf_Rela &R, bool IsRela) {
- const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&R, SymTab));
+ const Elf_Sym *Sym =
+ unwrapOrError(this->FileName, Obj->getRelocationSymbol(&R, SymTab));
std::string TargetName;
if (Sym && Sym->getType() == ELF::STT_SECTION) {
const Elf_Shdr *Sec = unwrapOrError(
+ this->FileName,
Obj->getSection(Sym, SymTab, this->dumper()->getShndxTable()));
- TargetName = unwrapOrError(Obj->getSectionName(Sec));
+ TargetName = unwrapOrError(this->FileName, Obj->getSectionName(Sec));
} else if (Sym) {
- StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab));
+ StringRef StrTable =
+ unwrapOrError(this->FileName, Obj->getStringTableForSymtab(*SymTab));
TargetName = this->dumper()->getFullSymbolName(
Sym, StrTable, SymTab->sh_type == SHT_DYNSYM /* IsDynamic */);
}
@@ -2821,21 +2993,21 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocHeader(unsigned SType) {
template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
bool HasRelocSections = false;
- for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+ for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) {
if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA &&
Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_REL &&
Sec.sh_type != ELF::SHT_ANDROID_RELA &&
Sec.sh_type != ELF::SHT_ANDROID_RELR)
continue;
HasRelocSections = true;
- StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
+ StringRef Name = unwrapOrError(this->FileName, Obj->getSectionName(&Sec));
unsigned Entries = Sec.getEntityCount();
std::vector<Elf_Rela> AndroidRelas;
if (Sec.sh_type == ELF::SHT_ANDROID_REL ||
Sec.sh_type == ELF::SHT_ANDROID_RELA) {
// Android's packed relocation section needs to be unpacked first
// to get the actual number of entries.
- AndroidRelas = unwrapOrError(Obj->android_relas(&Sec));
+ AndroidRelas = unwrapOrError(this->FileName, Obj->android_relas(&Sec));
Entries = AndroidRelas.size();
}
std::vector<Elf_Rela> RelrRelas;
@@ -2843,8 +3015,8 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
Sec.sh_type == ELF::SHT_ANDROID_RELR)) {
// .relr.dyn relative relocation section needs to be unpacked first
// to get the actual number of entries.
- Elf_Relr_Range Relrs = unwrapOrError(Obj->relrs(&Sec));
- RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs));
+ Elf_Relr_Range Relrs = unwrapOrError(this->FileName, Obj->relrs(&Sec));
+ RelrRelas = unwrapOrError(this->FileName, Obj->decode_relrs(Relrs));
Entries = RelrRelas.size();
}
uintX_t Offset = Sec.sh_offset;
@@ -2852,10 +3024,11 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
<< to_hexString(Offset, false) << " contains " << Entries
<< " entries:\n";
printRelocHeader(Sec.sh_type);
- const Elf_Shdr *SymTab = unwrapOrError(Obj->getSection(Sec.sh_link));
+ const Elf_Shdr *SymTab =
+ unwrapOrError(this->FileName, Obj->getSection(Sec.sh_link));
switch (Sec.sh_type) {
case ELF::SHT_REL:
- for (const auto &R : unwrapOrError(Obj->rels(&Sec))) {
+ for (const auto &R : unwrapOrError(this->FileName, Obj->rels(&Sec))) {
Elf_Rela Rela;
Rela.r_offset = R.r_offset;
Rela.r_info = R.r_info;
@@ -2864,13 +3037,13 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
}
break;
case ELF::SHT_RELA:
- for (const auto &R : unwrapOrError(Obj->relas(&Sec)))
+ for (const auto &R : unwrapOrError(this->FileName, Obj->relas(&Sec)))
printRelocation(Obj, SymTab, R, true);
break;
case ELF::SHT_RELR:
case ELF::SHT_ANDROID_RELR:
if (opts::RawRelr)
- for (const auto &R : unwrapOrError(Obj->relrs(&Sec)))
+ for (const auto &R : unwrapOrError(this->FileName, Obj->relrs(&Sec)))
OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8))
<< "\n";
else
@@ -2992,6 +3165,12 @@ static std::string getSectionTypeString(unsigned Arch, unsigned Type) {
return "LLVM_ADDRSIG";
case SHT_LLVM_DEPENDENT_LIBRARIES:
return "LLVM_DEPENDENT_LIBRARIES";
+ case SHT_LLVM_SYMPART:
+ return "LLVM_SYMPART";
+ case SHT_LLVM_PART_EHDR:
+ return "LLVM_PART_EHDR";
+ case SHT_LLVM_PART_PHDR:
+ return "LLVM_PART_PHDR";
// FIXME: Parse processor specific GNU attributes
case SHT_GNU_ATTRIBUTES:
return "ATTRIBUTES";
@@ -3010,29 +3189,9 @@ static std::string getSectionTypeString(unsigned Arch, unsigned Type) {
}
template <class ELFT>
-static StringRef getSectionName(const typename ELFT::Shdr &Sec,
- const ELFObjectFile<ELFT> &ElfObj,
- ArrayRef<typename ELFT::Shdr> Sections) {
- const ELFFile<ELFT> &Obj = *ElfObj.getELFFile();
- uint32_t Index = Obj.getHeader()->e_shstrndx;
- if (Index == ELF::SHN_XINDEX)
- Index = Sections[0].sh_link;
- if (!Index) // no section string table.
- return "";
- // TODO: Test a case when the sh_link of the section with index 0 is broken.
- if (Index >= Sections.size())
- reportError(ElfObj.getFileName(),
- createError("section header string table index " +
- Twine(Index) + " does not exist"));
- StringRef Data = toStringRef(unwrapOrError(
- Obj.template getSectionContentsAsArray<uint8_t>(&Sections[Index])));
- return unwrapOrError(Obj.getSectionName(&Sec, Data));
-}
-
-template <class ELFT>
void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
unsigned Bias = ELFT::Is64Bits ? 0 : 8;
- ArrayRef<Elf_Shdr> Sections = unwrapOrError(Obj->sections());
+ ArrayRef<Elf_Shdr> Sections = unwrapOrError(this->FileName, Obj->sections());
OS << "There are " << to_string(Sections.size())
<< " section headers, starting at offset "
<< "0x" << to_hexString(Obj->getHeader()->e_shoff, false) << ":\n\n";
@@ -3050,7 +3209,8 @@ void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
size_t SectionIndex = 0;
for (const Elf_Shdr &Sec : Sections) {
Fields[0].Str = to_string(SectionIndex);
- Fields[1].Str = getSectionName(Sec, *ElfObj, Sections);
+ Fields[1].Str = unwrapOrError<StringRef>(
+ ElfObj->getFileName(), Obj->getSectionName(&Sec, this->WarningHandler));
Fields[2].Str =
getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type);
Fields[3].Str =
@@ -3089,7 +3249,8 @@ void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
template <class ELFT>
void GNUStyle<ELFT>::printSymtabMessage(const ELFO *Obj, StringRef Name,
- size_t Entries) {
+ size_t Entries,
+ bool NonVisibilityBitsUsed) {
if (!Name.empty())
OS << "\nSymbol table '" << Name << "' contains " << Entries
<< " entries:\n";
@@ -3097,9 +3258,13 @@ void GNUStyle<ELFT>::printSymtabMessage(const ELFO *Obj, StringRef Name,
OS << "\n Symbol table for image:\n";
if (ELFT::Is64Bits)
- OS << " Num: Value Size Type Bind Vis Ndx Name\n";
+ OS << " Num: Value Size Type Bind Vis";
else
- OS << " Num: Value Size Type Bind Vis Ndx Name\n";
+ OS << " Num: Value Size Type Bind Vis";
+
+ if (NonVisibilityBitsUsed)
+ OS << " ";
+ OS << " Ndx Name\n";
}
template <class ELFT>
@@ -3115,10 +3280,11 @@ std::string GNUStyle<ELFT>::getSymbolSectionNdx(const ELFO *Obj,
case ELF::SHN_COMMON:
return "COM";
case ELF::SHN_XINDEX:
- return to_string(
- format_decimal(unwrapOrError(object::getExtendedSymbolTableIndex<ELFT>(
- Symbol, FirstSym, this->dumper()->getShndxTable())),
- 3));
+ return to_string(format_decimal(
+ unwrapOrError(this->FileName,
+ object::getExtendedSymbolTableIndex<ELFT>(
+ Symbol, FirstSym, this->dumper()->getShndxTable())),
+ 3));
default:
// Find if:
// Processor specific
@@ -3142,7 +3308,7 @@ std::string GNUStyle<ELFT>::getSymbolSectionNdx(const ELFO *Obj,
template <class ELFT>
void GNUStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
const Elf_Sym *FirstSym, StringRef StrTable,
- bool IsDynamic) {
+ bool IsDynamic, bool NonVisibilityBitsUsed) {
static int Idx = 0;
static bool Dynamic = true;
@@ -3156,7 +3322,7 @@ void GNUStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
unsigned Bias = ELFT::Is64Bits ? 8 : 0;
Field Fields[8] = {0, 8, 17 + Bias, 23 + Bias,
- 31 + Bias, 38 + Bias, 47 + Bias, 51 + Bias};
+ 31 + Bias, 38 + Bias, 48 + Bias, 51 + Bias};
Fields[0].Str = to_string(format_decimal(Idx++, 6)) + ":";
Fields[1].Str = to_string(
format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8));
@@ -3173,7 +3339,13 @@ void GNUStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
Fields[5].Str =
printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities));
+ if (Symbol->st_other & ~0x3)
+ Fields[5].Str +=
+ " [<other: " + to_string(format_hex(Symbol->st_other, 2)) + ">]";
+
+ Fields[6].Column += NonVisibilityBitsUsed ? 13 : 0;
Fields[6].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym);
+
Fields[7].Str =
this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic);
for (auto &Entry : Fields)
@@ -3193,7 +3365,7 @@ void GNUStyle<ELFT>::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym,
const auto Symbol = FirstSym + Sym;
Fields[2].Str = to_string(
- format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 18 : 8));
+ format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8));
Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5));
unsigned char SymbolType = Symbol->getType();
@@ -3246,10 +3418,21 @@ template <class ELFT> void GNUStyle<ELFT>::printHashSymbols(const ELFO *Obj) {
for (uint32_t Buc = 0; Buc < SysVHash->nbucket; Buc++) {
if (Buckets[Buc] == ELF::STN_UNDEF)
continue;
+ std::vector<bool> Visited(SysVHash->nchain);
for (uint32_t Ch = Buckets[Buc]; Ch < SysVHash->nchain; Ch = Chains[Ch]) {
if (Ch == ELF::STN_UNDEF)
break;
+
+ if (Visited[Ch]) {
+ reportWarning(
+ createError(".hash section is invalid: bucket " + Twine(Ch) +
+ ": a cycle was detected in the linked chain"),
+ this->FileName);
+ break;
+ }
+
printHashedSymbol(Obj, &DynSyms[0], Ch, StringTable, Buc);
+ Visited[Ch] = true;
}
}
}
@@ -3380,7 +3563,8 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
unsigned Width = ELFT::Is64Bits ? 18 : 10;
unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7;
- for (const auto &Phdr : unwrapOrError(Obj->program_headers())) {
+ for (const auto &Phdr :
+ unwrapOrError(this->FileName, Obj->program_headers())) {
Fields[0].Str = getElfPtType(Header->e_machine, Phdr.p_type);
Fields[1].Str = to_string(format_hex(Phdr.p_offset, 8));
Fields[2].Str = to_string(format_hex(Phdr.p_vaddr, Width));
@@ -3404,10 +3588,11 @@ void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
OS << "\n Section to Segment mapping:\n Segment Sections...\n";
DenseSet<const Elf_Shdr *> BelongsToSegment;
int Phnum = 0;
- for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
+ for (const Elf_Phdr &Phdr :
+ unwrapOrError(this->FileName, Obj->program_headers())) {
std::string Sections;
OS << format(" %2.2d ", Phnum++);
- for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+ for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) {
// Check if each section is in a segment and then print mapping.
// readelf additionally makes sure it does not print zero sized sections
// at end of segments and for PT_DYNAMIC both start and end of section
@@ -3418,7 +3603,9 @@ void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
if (!TbssInNonTLS && checkTLSSections(Phdr, Sec) &&
checkoffsets(Phdr, Sec) && checkVMA(Phdr, Sec) &&
checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL)) {
- Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + " ";
+ Sections +=
+ unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() +
+ " ";
BelongsToSegment.insert(&Sec);
}
}
@@ -3428,9 +3615,10 @@ void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
// Display sections that do not belong to a segment.
std::string Sections;
- for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+ for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) {
if (BelongsToSegment.find(&Sec) == BelongsToSegment.end())
- Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + ' ';
+ Sections +=
+ unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + ' ';
}
if (!Sections.empty()) {
OS << " None " << Sections << '\n';
@@ -3438,14 +3626,40 @@ void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
}
}
+namespace {
+template <class ELFT> struct RelSymbol {
+ const typename ELFT::Sym *Sym;
+ std::string Name;
+};
+
+template <class ELFT>
+RelSymbol<ELFT> getSymbolForReloc(const ELFFile<ELFT> *Obj, StringRef FileName,
+ const ELFDumper<ELFT> *Dumper,
+ const typename ELFT::Rela &Reloc) {
+ uint32_t SymIndex = Reloc.getSymbol(Obj->isMips64EL());
+ const typename ELFT::Sym *Sym = Dumper->dynamic_symbols().begin() + SymIndex;
+ Expected<StringRef> ErrOrName = Sym->getName(Dumper->getDynamicStringTable());
+
+ std::string Name;
+ if (ErrOrName) {
+ Name = maybeDemangle(*ErrOrName);
+ } else {
+ reportWarning(
+ createError("unable to get name of the dynamic symbol with index " +
+ Twine(SymIndex) + ": " + toString(ErrOrName.takeError())),
+ FileName);
+ Name = "<corrupt>";
+ }
+
+ return {Sym, std::move(Name)};
+}
+} // namespace
+
template <class ELFT>
void GNUStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela R,
bool IsRela) {
- uint32_t SymIndex = R.getSymbol(Obj->isMips64EL());
- const Elf_Sym *Sym = this->dumper()->dynamic_symbols().begin() + SymIndex;
- std::string SymbolName = maybeDemangle(
- unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable())));
- printRelocation(Obj, Sym, SymbolName, R, IsRela);
+ RelSymbol<ELFT> S = getSymbolForReloc(Obj, this->FileName, this->dumper(), R);
+ printRelocation(Obj, S.Sym, S.Name, R, IsRela);
}
template <class ELFT> void GNUStyle<ELFT>::printDynamic(const ELFO *Obj) {
@@ -3518,7 +3732,8 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
<< " contains " << DynRelrRegion.Size << " bytes:\n";
printRelocHeader(ELF::SHT_REL);
Elf_Relr_Range Relrs = this->dumper()->dyn_relrs();
- std::vector<Elf_Rela> RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs));
+ std::vector<Elf_Rela> RelrRelas =
+ unwrapOrError(this->FileName, Obj->decode_relrs(Relrs));
for (const Elf_Rela &Rela : RelrRelas) {
printDynamicRelocation(Obj, Rela, false);
}
@@ -3550,14 +3765,15 @@ template <class ELFT>
static void printGNUVersionSectionProlog(formatted_raw_ostream &OS,
const Twine &Name, unsigned EntriesNum,
const ELFFile<ELFT> *Obj,
- const typename ELFT::Shdr *Sec) {
- StringRef SecName = unwrapOrError(Obj->getSectionName(Sec));
+ const typename ELFT::Shdr *Sec,
+ StringRef FileName) {
+ StringRef SecName = unwrapOrError(FileName, Obj->getSectionName(Sec));
OS << Name << " section '" << SecName << "' "
<< "contains " << EntriesNum << " entries:\n";
const typename ELFT::Shdr *SymTab =
- unwrapOrError(Obj->getSection(Sec->sh_link));
- StringRef SymTabName = unwrapOrError(Obj->getSectionName(SymTab));
+ unwrapOrError(FileName, Obj->getSection(Sec->sh_link));
+ StringRef SymTabName = unwrapOrError(FileName, Obj->getSectionName(SymTab));
OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16)
<< " Offset: " << format_hex(Sec->sh_offset, 8)
<< " Link: " << Sec->sh_link << " (" << SymTabName << ")\n";
@@ -3570,7 +3786,8 @@ void GNUStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
return;
unsigned Entries = Sec->sh_size / sizeof(Elf_Versym);
- printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec);
+ printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec,
+ this->FileName);
const uint8_t *VersymBuf =
reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
@@ -3642,14 +3859,17 @@ void GNUStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
return;
unsigned VerDefsNum = Sec->sh_info;
- printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec);
+ printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec,
+ this->FileName);
- const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link));
+ const Elf_Shdr *StrTabSec =
+ unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link));
StringRef StringTable(
reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
(size_t)StrTabSec->sh_size);
- const uint8_t *VerdefBuf = unwrapOrError(Obj->getSectionContents(Sec)).data();
+ const uint8_t *VerdefBuf =
+ unwrapOrError(this->FileName, Obj->getSectionContents(Sec)).data();
const uint8_t *Begin = VerdefBuf;
while (VerDefsNum--) {
@@ -3684,11 +3904,14 @@ void GNUStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
return;
unsigned VerneedNum = Sec->sh_info;
- printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec);
+ printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec,
+ this->FileName);
- ArrayRef<uint8_t> SecData = unwrapOrError(Obj->getSectionContents(Sec));
+ ArrayRef<uint8_t> SecData =
+ unwrapOrError(this->FileName, Obj->getSectionContents(Sec));
- const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link));
+ const Elf_Shdr *StrTabSec =
+ unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link));
StringRef StringTable = {
reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
(size_t)StrTabSec->sh_size};
@@ -3745,9 +3968,21 @@ void GNUStyle<ELFT>::printHashHistogram(const ELFFile<ELFT> *Obj) {
// Go over all buckets and and note chain lengths of each bucket (total
// unique chain lengths).
for (size_t B = 0; B < NBucket; B++) {
- for (size_t C = Buckets[B]; C > 0 && C < NChain; C = Chains[C])
+ std::vector<bool> Visited(NChain);
+ for (size_t C = Buckets[B]; C < NChain; C = Chains[C]) {
+ if (C == ELF::STN_UNDEF)
+ break;
+ if (Visited[C]) {
+ reportWarning(
+ createError(".hash section is invalid: bucket " + Twine(C) +
+ ": a cycle was detected in the linked chain"),
+ this->FileName);
+ break;
+ }
+ Visited[C] = true;
if (MaxChain <= ++ChainLen[B])
MaxChain++;
+ }
TotalSyms += ChainLen[B];
}
@@ -3829,7 +4064,7 @@ void GNUStyle<ELFT>::printCGProfile(const ELFFile<ELFT> *Obj) {
template <class ELFT>
void GNUStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
- OS << "GNUStyle::printAddrsig not implemented\n";
+ reportError(createError("--addrsig: not implemented"), this->FileName);
}
static StringRef getGenericNoteTypeName(const uint32_t NT) {
@@ -3850,6 +4085,86 @@ static StringRef getGenericNoteTypeName(const uint32_t NT) {
return "";
}
+static StringRef getCoreNoteTypeName(const uint32_t NT) {
+ static const struct {
+ uint32_t ID;
+ const char *Name;
+ } Notes[] = {
+ {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"},
+ {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"},
+ {ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"},
+ {ELF::NT_TASKSTRUCT, "NT_TASKSTRUCT (task structure)"},
+ {ELF::NT_AUXV, "NT_AUXV (auxiliary vector)"},
+ {ELF::NT_PSTATUS, "NT_PSTATUS (pstatus structure)"},
+ {ELF::NT_FPREGS, "NT_FPREGS (floating point registers)"},
+ {ELF::NT_PSINFO, "NT_PSINFO (psinfo structure)"},
+ {ELF::NT_LWPSTATUS, "NT_LWPSTATUS (lwpstatus_t structure)"},
+ {ELF::NT_LWPSINFO, "NT_LWPSINFO (lwpsinfo_t structure)"},
+ {ELF::NT_WIN32PSTATUS, "NT_WIN32PSTATUS (win32_pstatus structure)"},
+
+ {ELF::NT_PPC_VMX, "NT_PPC_VMX (ppc Altivec registers)"},
+ {ELF::NT_PPC_VSX, "NT_PPC_VSX (ppc VSX registers)"},
+ {ELF::NT_PPC_TAR, "NT_PPC_TAR (ppc TAR register)"},
+ {ELF::NT_PPC_PPR, "NT_PPC_PPR (ppc PPR register)"},
+ {ELF::NT_PPC_DSCR, "NT_PPC_DSCR (ppc DSCR register)"},
+ {ELF::NT_PPC_EBB, "NT_PPC_EBB (ppc EBB registers)"},
+ {ELF::NT_PPC_PMU, "NT_PPC_PMU (ppc PMU registers)"},
+ {ELF::NT_PPC_TM_CGPR, "NT_PPC_TM_CGPR (ppc checkpointed GPR registers)"},
+ {ELF::NT_PPC_TM_CFPR,
+ "NT_PPC_TM_CFPR (ppc checkpointed floating point registers)"},
+ {ELF::NT_PPC_TM_CVMX,
+ "NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)"},
+ {ELF::NT_PPC_TM_CVSX, "NT_PPC_TM_CVSX (ppc checkpointed VSX registers)"},
+ {ELF::NT_PPC_TM_SPR, "NT_PPC_TM_SPR (ppc TM special purpose registers)"},
+ {ELF::NT_PPC_TM_CTAR, "NT_PPC_TM_CTAR (ppc checkpointed TAR register)"},
+ {ELF::NT_PPC_TM_CPPR, "NT_PPC_TM_CPPR (ppc checkpointed PPR register)"},
+ {ELF::NT_PPC_TM_CDSCR,
+ "NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)"},
+
+ {ELF::NT_386_TLS, "NT_386_TLS (x86 TLS information)"},
+ {ELF::NT_386_IOPERM, "NT_386_IOPERM (x86 I/O permissions)"},
+ {ELF::NT_X86_XSTATE, "NT_X86_XSTATE (x86 XSAVE extended state)"},
+
+ {ELF::NT_S390_HIGH_GPRS,
+ "NT_S390_HIGH_GPRS (s390 upper register halves)"},
+ {ELF::NT_S390_TIMER, "NT_S390_TIMER (s390 timer register)"},
+ {ELF::NT_S390_TODCMP, "NT_S390_TODCMP (s390 TOD comparator register)"},
+ {ELF::NT_S390_TODPREG,
+ "NT_S390_TODPREG (s390 TOD programmable register)"},
+ {ELF::NT_S390_CTRS, "NT_S390_CTRS (s390 control registers)"},
+ {ELF::NT_S390_PREFIX, "NT_S390_PREFIX (s390 prefix register)"},
+ {ELF::NT_S390_LAST_BREAK,
+ "NT_S390_LAST_BREAK (s390 last breaking event address)"},
+ {ELF::NT_S390_SYSTEM_CALL,
+ "NT_S390_SYSTEM_CALL (s390 system call restart data)"},
+ {ELF::NT_S390_TDB, "NT_S390_TDB (s390 transaction diagnostic block)"},
+ {ELF::NT_S390_VXRS_LOW,
+ "NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)"},
+ {ELF::NT_S390_VXRS_HIGH,
+ "NT_S390_VXRS_HIGH (s390 vector registers 16-31)"},
+ {ELF::NT_S390_GS_CB, "NT_S390_GS_CB (s390 guarded-storage registers)"},
+ {ELF::NT_S390_GS_BC,
+ "NT_S390_GS_BC (s390 guarded-storage broadcast control)"},
+
+ {ELF::NT_ARM_VFP, "NT_ARM_VFP (arm VFP registers)"},
+ {ELF::NT_ARM_TLS, "NT_ARM_TLS (AArch TLS registers)"},
+ {ELF::NT_ARM_HW_BREAK,
+ "NT_ARM_HW_BREAK (AArch hardware breakpoint registers)"},
+ {ELF::NT_ARM_HW_WATCH,
+ "NT_ARM_HW_WATCH (AArch hardware watchpoint registers)"},
+
+ {ELF::NT_FILE, "NT_FILE (mapped files)"},
+ {ELF::NT_PRXFPREG, "NT_PRXFPREG (user_xfpregs structure)"},
+ {ELF::NT_SIGINFO, "NT_SIGINFO (siginfo_t data)"},
+ };
+
+ for (const auto &Note : Notes)
+ if (Note.ID == NT)
+ return Note.Name;
+
+ return "";
+}
+
static std::string getGNUNoteTypeName(const uint32_t NT) {
static const struct {
uint32_t ID;
@@ -4207,13 +4522,85 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
}
}
+struct CoreFileMapping {
+ uint64_t Start, End, Offset;
+ StringRef Filename;
+};
+
+struct CoreNote {
+ uint64_t PageSize;
+ std::vector<CoreFileMapping> Mappings;
+};
+
+static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
+ // Expected format of the NT_FILE note description:
+ // 1. # of file mappings (call it N)
+ // 2. Page size
+ // 3. N (start, end, offset) triples
+ // 4. N packed filenames (null delimited)
+ // Each field is an Elf_Addr, except for filenames which are char* strings.
+
+ CoreNote Ret;
+ const int Bytes = Desc.getAddressSize();
+
+ if (!Desc.isValidOffsetForAddress(2))
+ return createStringError(object_error::parse_failed,
+ "malformed note: header too short");
+ if (Desc.getData().back() != 0)
+ return createStringError(object_error::parse_failed,
+ "malformed note: not NUL terminated");
+
+ uint64_t DescOffset = 0;
+ uint64_t FileCount = Desc.getAddress(&DescOffset);
+ Ret.PageSize = Desc.getAddress(&DescOffset);
+
+ if (!Desc.isValidOffsetForAddress(3 * FileCount * Bytes))
+ return createStringError(object_error::parse_failed,
+ "malformed note: too short for number of files");
+
+ uint64_t FilenamesOffset = 0;
+ DataExtractor Filenames(
+ Desc.getData().drop_front(DescOffset + 3 * FileCount * Bytes),
+ Desc.isLittleEndian(), Desc.getAddressSize());
+
+ Ret.Mappings.resize(FileCount);
+ for (CoreFileMapping &Mapping : Ret.Mappings) {
+ if (!Filenames.isValidOffsetForDataOfSize(FilenamesOffset, 1))
+ return createStringError(object_error::parse_failed,
+ "malformed note: too few filenames");
+ Mapping.Start = Desc.getAddress(&DescOffset);
+ Mapping.End = Desc.getAddress(&DescOffset);
+ Mapping.Offset = Desc.getAddress(&DescOffset);
+ Mapping.Filename = Filenames.getCStrRef(&FilenamesOffset);
+ }
+
+ return Ret;
+}
+
+template <typename ELFT>
+static void printCoreNote(raw_ostream &OS, const CoreNote &Note) {
+ // Length of "0x<address>" string.
+ const int FieldWidth = ELFT::Is64Bits ? 18 : 10;
+
+ OS << " Page size: " << format_decimal(Note.PageSize, 0) << '\n';
+ OS << " " << right_justify("Start", FieldWidth) << " "
+ << right_justify("End", FieldWidth) << " "
+ << right_justify("Page Offset", FieldWidth) << '\n';
+ for (const CoreFileMapping &Mapping : Note.Mappings) {
+ OS << " " << format_hex(Mapping.Start, FieldWidth) << " "
+ << format_hex(Mapping.End, FieldWidth) << " "
+ << format_hex(Mapping.Offset, FieldWidth) << "\n "
+ << Mapping.Filename << '\n';
+ }
+}
+
template <class ELFT>
void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
auto PrintHeader = [&](const typename ELFT::Off Offset,
const typename ELFT::Addr Size) {
OS << "Displaying notes found at file offset " << format_hex(Offset, 10)
<< " with length " << format_hex(Size, 10) << ":\n"
- << " Owner Data size\tDescription\n";
+ << " Owner Data size \tDescription\n";
};
auto ProcessNote = [&](const Elf_Note &Note) {
@@ -4221,55 +4608,81 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
ArrayRef<uint8_t> Descriptor = Note.getDesc();
Elf_Word Type = Note.getType();
- OS << " " << Name << std::string(22 - Name.size(), ' ')
+ // Print the note owner/type.
+ OS << " " << left_justify(Name, 20) << ' '
<< format_hex(Descriptor.size(), 10) << '\t';
-
if (Name == "GNU") {
OS << getGNUNoteTypeName(Type) << '\n';
- printGNUNote<ELFT>(OS, Type, Descriptor);
} else if (Name == "FreeBSD") {
OS << getFreeBSDNoteTypeName(Type) << '\n';
} else if (Name == "AMD") {
OS << getAMDNoteTypeName(Type) << '\n';
+ } else if (Name == "AMDGPU") {
+ OS << getAMDGPUNoteTypeName(Type) << '\n';
+ } else {
+ StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE
+ ? getCoreNoteTypeName(Type)
+ : getGenericNoteTypeName(Type);
+ if (!NoteType.empty())
+ OS << NoteType << '\n';
+ else
+ OS << "Unknown note type: (" << format_hex(Type, 10) << ")\n";
+ }
+
+ // Print the description, or fallback to printing raw bytes for unknown
+ // owners.
+ if (Name == "GNU") {
+ printGNUNote<ELFT>(OS, Type, Descriptor);
+ } else if (Name == "AMD") {
const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
if (!N.Type.empty())
OS << " " << N.Type << ":\n " << N.Value << '\n';
} else if (Name == "AMDGPU") {
- OS << getAMDGPUNoteTypeName(Type) << '\n';
const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
if (!N.Type.empty())
OS << " " << N.Type << ":\n " << N.Value << '\n';
- } else {
- StringRef NoteType = getGenericNoteTypeName(Type);
- if (!NoteType.empty())
- OS << NoteType;
- else
- OS << "Unknown note type: (" << format_hex(Type, 10) << ')';
+ } else if (Name == "CORE") {
+ if (Type == ELF::NT_FILE) {
+ DataExtractor DescExtractor(Descriptor,
+ ELFT::TargetEndianness == support::little,
+ sizeof(Elf_Addr));
+ Expected<CoreNote> Note = readCoreNote(DescExtractor);
+ if (Note)
+ printCoreNote<ELFT>(OS, *Note);
+ else
+ reportWarning(Note.takeError(), this->FileName);
+ }
+ } else if (!Descriptor.empty()) {
+ OS << " description data:";
+ for (uint8_t B : Descriptor)
+ OS << " " << format("%02x", B);
+ OS << '\n';
}
- OS << '\n';
};
- if (Obj->getHeader()->e_type == ELF::ET_CORE) {
- for (const auto &P : unwrapOrError(Obj->program_headers())) {
- if (P.p_type != PT_NOTE)
+ ArrayRef<Elf_Shdr> Sections = unwrapOrError(this->FileName, Obj->sections());
+ if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) {
+ for (const auto &S : Sections) {
+ if (S.sh_type != SHT_NOTE)
continue;
- PrintHeader(P.p_offset, P.p_filesz);
+ PrintHeader(S.sh_offset, S.sh_size);
Error Err = Error::success();
- for (const auto &Note : Obj->notes(P, Err))
+ for (const auto &Note : Obj->notes(S, Err))
ProcessNote(Note);
if (Err)
- error(std::move(Err));
+ reportError(std::move(Err), this->FileName);
}
} else {
- for (const auto &S : unwrapOrError(Obj->sections())) {
- if (S.sh_type != SHT_NOTE)
+ for (const auto &P :
+ unwrapOrError(this->FileName, Obj->program_headers())) {
+ if (P.p_type != PT_NOTE)
continue;
- PrintHeader(S.sh_offset, S.sh_size);
+ PrintHeader(P.p_offset, P.p_filesz);
Error Err = Error::success();
- for (const auto &Note : Obj->notes(S, Err))
+ for (const auto &Note : Obj->notes(P, Err))
ProcessNote(Note);
if (Err)
- error(std::move(Err));
+ reportError(std::move(Err), this->FileName);
}
}
}
@@ -4279,6 +4692,294 @@ void GNUStyle<ELFT>::printELFLinkerOptions(const ELFFile<ELFT> *Obj) {
OS << "printELFLinkerOptions not implemented!\n";
}
+// Used for printing section names in places where possible errors can be
+// ignored.
+static StringRef getSectionName(const SectionRef &Sec) {
+ Expected<StringRef> NameOrErr = Sec.getName();
+ if (NameOrErr)
+ return *NameOrErr;
+ consumeError(NameOrErr.takeError());
+ return "<?>";
+}
+
+// Used for printing symbol names in places where possible errors can be
+// ignored.
+static std::string getSymbolName(const ELFSymbolRef &Sym) {
+ Expected<StringRef> NameOrErr = Sym.getName();
+ if (NameOrErr)
+ return maybeDemangle(*NameOrErr);
+ consumeError(NameOrErr.takeError());
+ return "<?>";
+}
+
+template <class ELFT>
+void DumpStyle<ELFT>::printFunctionStackSize(
+ const ELFObjectFile<ELFT> *Obj, uint64_t SymValue, SectionRef FunctionSec,
+ const StringRef SectionName, DataExtractor Data, uint64_t *Offset) {
+ // This function ignores potentially erroneous input, unless it is directly
+ // related to stack size reporting.
+ SymbolRef FuncSym;
+ for (const ELFSymbolRef &Symbol : Obj->symbols()) {
+ Expected<uint64_t> SymAddrOrErr = Symbol.getAddress();
+ if (!SymAddrOrErr) {
+ consumeError(SymAddrOrErr.takeError());
+ continue;
+ }
+ if (Symbol.getELFType() == ELF::STT_FUNC && *SymAddrOrErr == SymValue) {
+ // Check if the symbol is in the right section.
+ if (FunctionSec.containsSymbol(Symbol)) {
+ FuncSym = Symbol;
+ break;
+ }
+ }
+ }
+
+ std::string FuncName = "?";
+ // A valid SymbolRef has a non-null object file pointer.
+ if (FuncSym.BasicSymbolRef::getObject())
+ FuncName = getSymbolName(FuncSym);
+ else
+ reportWarning(
+ createError("could not identify function symbol for stack size entry"),
+ Obj->getFileName());
+
+ // Extract the size. The expectation is that Offset is pointing to the right
+ // place, i.e. past the function address.
+ uint64_t PrevOffset = *Offset;
+ uint64_t StackSize = Data.getULEB128(Offset);
+ // getULEB128() does not advance Offset if it is not able to extract a valid
+ // integer.
+ if (*Offset == PrevOffset)
+ reportError(
+ createStringError(object_error::parse_failed,
+ "could not extract a valid stack size in section %s",
+ SectionName.data()),
+ Obj->getFileName());
+
+ printStackSizeEntry(StackSize, FuncName);
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printStackSizeEntry(uint64_t Size, StringRef FuncName) {
+ OS.PadToColumn(2);
+ OS << format_decimal(Size, 11);
+ OS.PadToColumn(18);
+ OS << FuncName << "\n";
+}
+
+template <class ELFT>
+void DumpStyle<ELFT>::printStackSize(const ELFObjectFile<ELFT> *Obj,
+ RelocationRef Reloc,
+ SectionRef FunctionSec,
+ const StringRef &StackSizeSectionName,
+ const RelocationResolver &Resolver,
+ DataExtractor Data) {
+ // This function ignores potentially erroneous input, unless it is directly
+ // related to stack size reporting.
+ object::symbol_iterator RelocSym = Reloc.getSymbol();
+ uint64_t RelocSymValue = 0;
+ StringRef FileStr = Obj->getFileName();
+ if (RelocSym != Obj->symbol_end()) {
+ // Ensure that the relocation symbol is in the function section, i.e. the
+ // section where the functions whose stack sizes we are reporting are
+ // located.
+ auto SectionOrErr = RelocSym->getSection();
+ if (!SectionOrErr) {
+ reportWarning(
+ createError("cannot identify the section for relocation symbol '" +
+ getSymbolName(*RelocSym) + "'"),
+ FileStr);
+ consumeError(SectionOrErr.takeError());
+ } else if (*SectionOrErr != FunctionSec) {
+ reportWarning(createError("relocation symbol '" +
+ getSymbolName(*RelocSym) +
+ "' is not in the expected section"),
+ FileStr);
+ // Pretend that the symbol is in the correct section and report its
+ // stack size anyway.
+ FunctionSec = **SectionOrErr;
+ }
+
+ Expected<uint64_t> RelocSymValueOrErr = RelocSym->getValue();
+ if (RelocSymValueOrErr)
+ RelocSymValue = *RelocSymValueOrErr;
+ else
+ consumeError(RelocSymValueOrErr.takeError());
+ }
+
+ uint64_t Offset = Reloc.getOffset();
+ if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1))
+ reportError(
+ createStringError(object_error::parse_failed,
+ "found invalid relocation offset into section %s "
+ "while trying to extract a stack size entry",
+ StackSizeSectionName.data()),
+ FileStr);
+
+ uint64_t Addend = Data.getAddress(&Offset);
+ uint64_t SymValue = Resolver(Reloc, RelocSymValue, Addend);
+ this->printFunctionStackSize(Obj, SymValue, FunctionSec, StackSizeSectionName,
+ Data, &Offset);
+}
+
+template <class ELFT>
+void DumpStyle<ELFT>::printNonRelocatableStackSizes(
+ const ELFObjectFile<ELFT> *Obj, std::function<void()> PrintHeader) {
+ // This function ignores potentially erroneous input, unless it is directly
+ // related to stack size reporting.
+ const ELFFile<ELFT> *EF = Obj->getELFFile();
+ StringRef FileStr = Obj->getFileName();
+ for (const SectionRef &Sec : Obj->sections()) {
+ StringRef SectionName = getSectionName(Sec);
+ if (SectionName != ".stack_sizes")
+ continue;
+ PrintHeader();
+ const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl());
+ ArrayRef<uint8_t> Contents =
+ unwrapOrError(this->FileName, EF->getSectionContents(ElfSec));
+ DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr));
+ // A .stack_sizes section header's sh_link field is supposed to point
+ // to the section that contains the functions whose stack sizes are
+ // described in it.
+ const Elf_Shdr *FunctionELFSec =
+ unwrapOrError(this->FileName, EF->getSection(ElfSec->sh_link));
+ uint64_t Offset = 0;
+ while (Offset < Contents.size()) {
+ // The function address is followed by a ULEB representing the stack
+ // size. Check for an extra byte before we try to process the entry.
+ if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) {
+ reportError(
+ createStringError(
+ object_error::parse_failed,
+ "section %s ended while trying to extract a stack size entry",
+ SectionName.data()),
+ FileStr);
+ }
+ uint64_t SymValue = Data.getAddress(&Offset);
+ printFunctionStackSize(Obj, SymValue, Obj->toSectionRef(FunctionELFSec),
+ SectionName, Data, &Offset);
+ }
+ }
+}
+
+template <class ELFT>
+void DumpStyle<ELFT>::printRelocatableStackSizes(
+ const ELFObjectFile<ELFT> *Obj, std::function<void()> PrintHeader) {
+ const ELFFile<ELFT> *EF = Obj->getELFFile();
+
+ // Build a map between stack size sections and their corresponding relocation
+ // sections.
+ llvm::MapVector<SectionRef, SectionRef> StackSizeRelocMap;
+ const SectionRef NullSection{};
+
+ for (const SectionRef &Sec : Obj->sections()) {
+ StringRef SectionName;
+ if (Expected<StringRef> NameOrErr = Sec.getName())
+ SectionName = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
+ // A stack size section that we haven't encountered yet is mapped to the
+ // null section until we find its corresponding relocation section.
+ if (SectionName == ".stack_sizes")
+ if (StackSizeRelocMap.count(Sec) == 0) {
+ StackSizeRelocMap[Sec] = NullSection;
+ continue;
+ }
+
+ // Check relocation sections if they are relocating contents of a
+ // stack sizes section.
+ const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl());
+ uint32_t SectionType = ElfSec->sh_type;
+ if (SectionType != ELF::SHT_RELA && SectionType != ELF::SHT_REL)
+ continue;
+
+ Expected<section_iterator> RelSecOrErr = Sec.getRelocatedSection();
+ if (!RelSecOrErr)
+ reportError(createStringError(object_error::parse_failed,
+ "%s: failed to get a relocated section: %s",
+ SectionName.data(),
+ toString(RelSecOrErr.takeError()).c_str()),
+ Obj->getFileName());
+
+ const Elf_Shdr *ContentsSec =
+ Obj->getSection((*RelSecOrErr)->getRawDataRefImpl());
+ Expected<StringRef> ContentsSectionNameOrErr =
+ EF->getSectionName(ContentsSec);
+ if (!ContentsSectionNameOrErr) {
+ consumeError(ContentsSectionNameOrErr.takeError());
+ continue;
+ }
+ if (*ContentsSectionNameOrErr != ".stack_sizes")
+ continue;
+ // Insert a mapping from the stack sizes section to its relocation section.
+ StackSizeRelocMap[Obj->toSectionRef(ContentsSec)] = Sec;
+ }
+
+ for (const auto &StackSizeMapEntry : StackSizeRelocMap) {
+ PrintHeader();
+ const SectionRef &StackSizesSec = StackSizeMapEntry.first;
+ const SectionRef &RelocSec = StackSizeMapEntry.second;
+
+ // Warn about stack size sections without a relocation section.
+ StringRef StackSizeSectionName = getSectionName(StackSizesSec);
+ if (RelocSec == NullSection) {
+ reportWarning(createError("section " + StackSizeSectionName +
+ " does not have a corresponding "
+ "relocation section"),
+ Obj->getFileName());
+ continue;
+ }
+
+ // A .stack_sizes section header's sh_link field is supposed to point
+ // to the section that contains the functions whose stack sizes are
+ // described in it.
+ const Elf_Shdr *StackSizesELFSec =
+ Obj->getSection(StackSizesSec.getRawDataRefImpl());
+ const SectionRef FunctionSec = Obj->toSectionRef(unwrapOrError(
+ this->FileName, EF->getSection(StackSizesELFSec->sh_link)));
+
+ bool (*IsSupportedFn)(uint64_t);
+ RelocationResolver Resolver;
+ std::tie(IsSupportedFn, Resolver) = getRelocationResolver(*Obj);
+ auto Contents = unwrapOrError(this->FileName, StackSizesSec.getContents());
+ DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr));
+ for (const RelocationRef &Reloc : RelocSec.relocations()) {
+ if (!IsSupportedFn || !IsSupportedFn(Reloc.getType()))
+ reportError(createStringError(
+ object_error::parse_failed,
+ "unsupported relocation type in section %s: %s",
+ getSectionName(RelocSec).data(),
+ EF->getRelocationTypeName(Reloc.getType()).data()),
+ Obj->getFileName());
+ this->printStackSize(Obj, Reloc, FunctionSec, StackSizeSectionName,
+ Resolver, Data);
+ }
+ }
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printStackSizes(const ELFObjectFile<ELFT> *Obj) {
+ bool HeaderHasBeenPrinted = false;
+ auto PrintHeader = [&]() {
+ if (HeaderHasBeenPrinted)
+ return;
+ OS << "\nStack Sizes:\n";
+ OS.PadToColumn(9);
+ OS << "Size";
+ OS.PadToColumn(18);
+ OS << "Function\n";
+ HeaderHasBeenPrinted = true;
+ };
+
+ // For non-relocatable objects, look directly for sections whose name starts
+ // with .stack_sizes and process the contents.
+ if (Obj->isRelocatableObject())
+ this->printRelocatableStackSizes(Obj, PrintHeader);
+ else
+ this->printNonRelocatableStackSizes(Obj, PrintHeader);
+}
+
template <class ELFT>
void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
size_t Bias = ELFT::Is64Bits ? 8 : 0;
@@ -4402,6 +5103,45 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
}
}
+template <class ELFT>
+void GNUStyle<ELFT>::printMipsABIFlags(const ELFObjectFile<ELFT> *ObjF) {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ const Elf_Shdr *Shdr =
+ findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.abiflags");
+ if (!Shdr)
+ return;
+
+ ArrayRef<uint8_t> Sec =
+ unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr));
+ if (Sec.size() != sizeof(Elf_Mips_ABIFlags<ELFT>))
+ reportError(createError(".MIPS.abiflags section has a wrong size"),
+ ObjF->getFileName());
+
+ auto *Flags = reinterpret_cast<const Elf_Mips_ABIFlags<ELFT> *>(Sec.data());
+
+ OS << "MIPS ABI Flags Version: " << Flags->version << "\n\n";
+ OS << "ISA: MIPS" << int(Flags->isa_level);
+ if (Flags->isa_rev > 1)
+ OS << "r" << int(Flags->isa_rev);
+ OS << "\n";
+ OS << "GPR size: " << getMipsRegisterSize(Flags->gpr_size) << "\n";
+ OS << "CPR1 size: " << getMipsRegisterSize(Flags->cpr1_size) << "\n";
+ OS << "CPR2 size: " << getMipsRegisterSize(Flags->cpr2_size) << "\n";
+ OS << "FP ABI: " << printEnum(Flags->fp_abi, makeArrayRef(ElfMipsFpABIType))
+ << "\n";
+ OS << "ISA Extension: "
+ << printEnum(Flags->isa_ext, makeArrayRef(ElfMipsISAExtType)) << "\n";
+ if (Flags->ases == 0)
+ OS << "ASEs: None\n";
+ else
+ // FIXME: Print each flag on a separate line.
+ OS << "ASEs: " << printFlags(Flags->ases, makeArrayRef(ElfMipsASEFlags))
+ << "\n";
+ OS << "FLAGS 1: " << format_hex_no_prefix(Flags->flags1, 8, false) << "\n";
+ OS << "FLAGS 2: " << format_hex_no_prefix(Flags->flags2, 8, false) << "\n";
+ OS << "\n";
+}
+
template <class ELFT> void LLVMStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
const Elf_Ehdr *E = Obj->getHeader();
{
@@ -4455,16 +5195,17 @@ template <class ELFT> void LLVMStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
W.printNumber("ProgramHeaderEntrySize", E->e_phentsize);
W.printNumber("ProgramHeaderCount", E->e_phnum);
W.printNumber("SectionHeaderEntrySize", E->e_shentsize);
- W.printString("SectionHeaderCount", getSectionHeadersNumString(Obj));
+ W.printString("SectionHeaderCount",
+ getSectionHeadersNumString(Obj, this->FileName));
W.printString("StringTableSectionIndex",
- getSectionHeaderTableIndexString(Obj));
+ getSectionHeaderTableIndexString(Obj, this->FileName));
}
}
template <class ELFT>
void LLVMStyle<ELFT>::printGroupSections(const ELFO *Obj) {
DictScope Lists(W, "Groups");
- std::vector<GroupSection> V = getGroups<ELFT>(Obj);
+ std::vector<GroupSection> V = getGroups<ELFT>(Obj, this->FileName);
DenseMap<uint64_t, const GroupSection *> Map = mapSectionsToGroups(V);
for (const GroupSection &G : V) {
DictScope D(W, "Group");
@@ -4499,7 +5240,7 @@ template <class ELFT> void LLVMStyle<ELFT>::printRelocations(const ELFO *Obj) {
ListScope D(W, "Relocations");
int SectionNumber = -1;
- for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+ for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) {
++SectionNumber;
if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA &&
@@ -4508,7 +5249,7 @@ template <class ELFT> void LLVMStyle<ELFT>::printRelocations(const ELFO *Obj) {
Sec.sh_type != ELF::SHT_ANDROID_RELR)
continue;
- StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
+ StringRef Name = unwrapOrError(this->FileName, Obj->getSectionName(&Sec));
W.startLine() << "Section (" << SectionNumber << ") " << Name << " {\n";
W.indent();
@@ -4522,11 +5263,12 @@ template <class ELFT> void LLVMStyle<ELFT>::printRelocations(const ELFO *Obj) {
template <class ELFT>
void LLVMStyle<ELFT>::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) {
- const Elf_Shdr *SymTab = unwrapOrError(Obj->getSection(Sec->sh_link));
+ const Elf_Shdr *SymTab =
+ unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link));
switch (Sec->sh_type) {
case ELF::SHT_REL:
- for (const Elf_Rel &R : unwrapOrError(Obj->rels(Sec))) {
+ for (const Elf_Rel &R : unwrapOrError(this->FileName, Obj->rels(Sec))) {
Elf_Rela Rela;
Rela.r_offset = R.r_offset;
Rela.r_info = R.r_info;
@@ -4535,17 +5277,18 @@ void LLVMStyle<ELFT>::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) {
}
break;
case ELF::SHT_RELA:
- for (const Elf_Rela &R : unwrapOrError(Obj->relas(Sec)))
+ for (const Elf_Rela &R : unwrapOrError(this->FileName, Obj->relas(Sec)))
printRelocation(Obj, R, SymTab);
break;
case ELF::SHT_RELR:
case ELF::SHT_ANDROID_RELR: {
- Elf_Relr_Range Relrs = unwrapOrError(Obj->relrs(Sec));
+ Elf_Relr_Range Relrs = unwrapOrError(this->FileName, Obj->relrs(Sec));
if (opts::RawRelr) {
for (const Elf_Relr &R : Relrs)
W.startLine() << W.hex(R) << "\n";
} else {
- std::vector<Elf_Rela> RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs));
+ std::vector<Elf_Rela> RelrRelas =
+ unwrapOrError(this->FileName, Obj->decode_relrs(Relrs));
for (const Elf_Rela &R : RelrRelas)
printRelocation(Obj, R, SymTab);
}
@@ -4553,7 +5296,8 @@ void LLVMStyle<ELFT>::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) {
}
case ELF::SHT_ANDROID_REL:
case ELF::SHT_ANDROID_RELA:
- for (const Elf_Rela &R : unwrapOrError(Obj->android_relas(Sec)))
+ for (const Elf_Rela &R :
+ unwrapOrError(this->FileName, Obj->android_relas(Sec)))
printRelocation(Obj, R, SymTab);
break;
}
@@ -4565,13 +5309,16 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
SmallString<32> RelocName;
Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
std::string TargetName;
- const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&Rel, SymTab));
+ const Elf_Sym *Sym =
+ unwrapOrError(this->FileName, Obj->getRelocationSymbol(&Rel, SymTab));
if (Sym && Sym->getType() == ELF::STT_SECTION) {
const Elf_Shdr *Sec = unwrapOrError(
+ this->FileName,
Obj->getSection(Sym, SymTab, this->dumper()->getShndxTable()));
- TargetName = unwrapOrError(Obj->getSectionName(Sec));
+ TargetName = unwrapOrError(this->FileName, Obj->getSectionName(Sec));
} else if (Sym) {
- StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab));
+ StringRef StrTable =
+ unwrapOrError(this->FileName, Obj->getStringTableForSymtab(*SymTab));
TargetName = this->dumper()->getFullSymbolName(
Sym, StrTable, SymTab->sh_type == SHT_DYNSYM /* IsDynamic */);
}
@@ -4596,10 +5343,11 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
ListScope SectionsD(W, "Sections");
int SectionIndex = -1;
- ArrayRef<Elf_Shdr> Sections = unwrapOrError(Obj->sections());
+ ArrayRef<Elf_Shdr> Sections = unwrapOrError(this->FileName, Obj->sections());
const ELFObjectFile<ELFT> *ElfObj = this->dumper()->getElfObject();
for (const Elf_Shdr &Sec : Sections) {
- StringRef Name = getSectionName(Sec, *ElfObj, Sections);
+ StringRef Name = unwrapOrError(
+ ElfObj->getFileName(), Obj->getSectionName(&Sec, this->WarningHandler));
DictScope SectionD(W, "Section");
W.printNumber("Index", ++SectionIndex);
W.printNumber("Name", Name, Sec.sh_name);
@@ -4652,19 +5400,25 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
if (opts::SectionSymbols) {
ListScope D(W, "Symbols");
const Elf_Shdr *Symtab = this->dumper()->getDotSymtabSec();
- StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*Symtab));
+ StringRef StrTable =
+ unwrapOrError(this->FileName, Obj->getStringTableForSymtab(*Symtab));
- for (const Elf_Sym &Sym : unwrapOrError(Obj->symbols(Symtab))) {
+ for (const Elf_Sym &Sym :
+ unwrapOrError(this->FileName, Obj->symbols(Symtab))) {
const Elf_Shdr *SymSec = unwrapOrError(
+ this->FileName,
Obj->getSection(&Sym, Symtab, this->dumper()->getShndxTable()));
if (SymSec == &Sec)
- printSymbol(Obj, &Sym, unwrapOrError(Obj->symbols(Symtab)).begin(),
- StrTable, false);
+ printSymbol(
+ Obj, &Sym,
+ unwrapOrError(this->FileName, Obj->symbols(Symtab)).begin(),
+ StrTable, false, false);
}
}
if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) {
- ArrayRef<uint8_t> Data = unwrapOrError(Obj->getSectionContents(&Sec));
+ ArrayRef<uint8_t> Data =
+ unwrapOrError(this->FileName, Obj->getSectionContents(&Sec));
W.printBinaryBlock(
"SectionData",
StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()));
@@ -4675,7 +5429,8 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
template <class ELFT>
void LLVMStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
const Elf_Sym *First, StringRef StrTable,
- bool IsDynamic) {
+ bool IsDynamic,
+ bool /*NonVisibilityBitsUsed*/) {
unsigned SectionIndex = 0;
StringRef SectionName;
this->dumper()->getSectionNameIndex(Symbol, First, SectionName, SectionIndex);
@@ -4786,7 +5541,8 @@ void LLVMStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
}
if (DynRelrRegion.Size > 0) {
Elf_Relr_Range Relrs = this->dumper()->dyn_relrs();
- std::vector<Elf_Rela> RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs));
+ std::vector<Elf_Rela> RelrRelas =
+ unwrapOrError(this->FileName, Obj->decode_relrs(Relrs));
for (const Elf_Rela &Rela : RelrRelas)
printDynamicRelocation(Obj, Rela);
}
@@ -4809,11 +5565,9 @@ template <class ELFT>
void LLVMStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel) {
SmallString<32> RelocName;
Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
- std::string SymbolName;
- uint32_t SymIndex = Rel.getSymbol(Obj->isMips64EL());
- const Elf_Sym *Sym = this->dumper()->dynamic_symbols().begin() + SymIndex;
- SymbolName = maybeDemangle(
- unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable())));
+ std::string SymbolName =
+ getSymbolForReloc(Obj, this->FileName, this->dumper(), Rel).Name;
+
if (opts::ExpandRelocs) {
DictScope Group(W, "Relocation");
W.printHex("Offset", Rel.r_offset);
@@ -4842,7 +5596,8 @@ template <class ELFT>
void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
ListScope L(W, "ProgramHeaders");
- for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
+ for (const Elf_Phdr &Phdr :
+ unwrapOrError(this->FileName, Obj->program_headers())) {
DictScope P(W, "ProgramHeader");
W.printHex("Type",
getElfSegmentType(Obj->getHeader()->e_machine, Phdr.p_type),
@@ -4860,23 +5615,16 @@ void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
template <class ELFT>
void LLVMStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
const Elf_Shdr *Sec) {
- DictScope SS(W, "Version symbols");
+ ListScope SS(W, "VersionSymbols");
if (!Sec)
return;
- StringRef SecName = unwrapOrError(Obj->getSectionName(Sec));
- W.printNumber("Section Name", SecName, Sec->sh_name);
- W.printHex("Address", Sec->sh_addr);
- W.printHex("Offset", Sec->sh_offset);
- W.printNumber("Link", Sec->sh_link);
-
const uint8_t *VersymBuf =
reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
const ELFDumper<ELFT> *Dumper = this->dumper();
StringRef StrTable = Dumper->getDynamicStringTable();
// Same number of entries in the dynamic symbol table (DT_SYMTAB).
- ListScope Syms(W, "Symbols");
for (const Elf_Sym &Sym : Dumper->dynamic_symbols()) {
DictScope S(W, "Symbol");
const Elf_Versym *Versym = reinterpret_cast<const Elf_Versym *>(VersymBuf);
@@ -4891,7 +5639,7 @@ void LLVMStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
template <class ELFT>
void LLVMStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
const Elf_Shdr *Sec) {
- DictScope SD(W, "SHT_GNU_verdef");
+ ListScope SD(W, "VersionDefinitions");
if (!Sec)
return;
@@ -4899,7 +5647,8 @@ void LLVMStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size;
const uint8_t *VerdefBuf = SecStartAddress;
- const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link));
+ const Elf_Shdr *StrTab =
+ unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link));
unsigned VerDefsNum = Sec->sh_info;
while (VerDefsNum--) {
@@ -4938,13 +5687,14 @@ void LLVMStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
template <class ELFT>
void LLVMStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
const Elf_Shdr *Sec) {
- DictScope SD(W, "SHT_GNU_verneed");
+ ListScope SD(W, "VersionRequirements");
if (!Sec)
return;
const uint8_t *SecData =
reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
- const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link));
+ const Elf_Shdr *StrTab =
+ unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link));
const uint8_t *VerneedBuf = SecData;
unsigned VerneedNum = Sec->sh_info;
@@ -4986,37 +5736,62 @@ void LLVMStyle<ELFT>::printCGProfile(const ELFFile<ELFT> *Obj) {
ListScope L(W, "CGProfile");
if (!this->dumper()->getDotCGProfileSec())
return;
- auto CGProfile =
- unwrapOrError(Obj->template getSectionContentsAsArray<Elf_CGProfile>(
- this->dumper()->getDotCGProfileSec()));
+ auto CGProfile = unwrapOrError(
+ this->FileName, Obj->template getSectionContentsAsArray<Elf_CGProfile>(
+ this->dumper()->getDotCGProfileSec()));
for (const Elf_CGProfile &CGPE : CGProfile) {
DictScope D(W, "CGProfileEntry");
- W.printNumber("From", this->dumper()->getStaticSymbolName(CGPE.cgp_from),
- CGPE.cgp_from);
- W.printNumber("To", this->dumper()->getStaticSymbolName(CGPE.cgp_to),
- CGPE.cgp_to);
+ W.printNumber(
+ "From",
+ unwrapOrError(this->FileName,
+ this->dumper()->getStaticSymbolName(CGPE.cgp_from)),
+ CGPE.cgp_from);
+ W.printNumber(
+ "To",
+ unwrapOrError(this->FileName,
+ this->dumper()->getStaticSymbolName(CGPE.cgp_to)),
+ CGPE.cgp_to);
W.printNumber("Weight", CGPE.cgp_weight);
}
}
+static Expected<std::vector<uint64_t>> toULEB128Array(ArrayRef<uint8_t> Data) {
+ std::vector<uint64_t> Ret;
+ const uint8_t *Cur = Data.begin();
+ const uint8_t *End = Data.end();
+ while (Cur != End) {
+ unsigned Size;
+ const char *Err;
+ Ret.push_back(decodeULEB128(Cur, &Size, End, &Err));
+ if (Err)
+ return createError(Err);
+ Cur += Size;
+ }
+ return Ret;
+}
+
template <class ELFT>
void LLVMStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
ListScope L(W, "Addrsig");
if (!this->dumper()->getDotAddrsigSec())
return;
ArrayRef<uint8_t> Contents = unwrapOrError(
+ this->FileName,
Obj->getSectionContents(this->dumper()->getDotAddrsigSec()));
- const uint8_t *Cur = Contents.begin();
- const uint8_t *End = Contents.end();
- while (Cur != End) {
- unsigned Size;
- const char *Err;
- uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err);
- if (Err)
- reportError(Err);
- W.printNumber("Sym", this->dumper()->getStaticSymbolName(SymIndex),
- SymIndex);
- Cur += Size;
+ Expected<std::vector<uint64_t>> V = toULEB128Array(Contents);
+ if (!V) {
+ reportWarning(V.takeError(), this->FileName);
+ return;
+ }
+
+ for (uint64_t Sym : *V) {
+ Expected<std::string> NameOrErr = this->dumper()->getStaticSymbolName(Sym);
+ if (NameOrErr) {
+ W.printNumber("Sym", *NameOrErr, Sym);
+ continue;
+ }
+ reportWarning(NameOrErr.takeError(), this->FileName);
+ W.printNumber("Sym", "<?>", Sym);
}
}
@@ -5051,6 +5826,17 @@ static void printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
}
}
+static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) {
+ W.printNumber("Page Size", Note.PageSize);
+ for (const CoreFileMapping &Mapping : Note.Mappings) {
+ ListScope D(W, "Mapping");
+ W.printHex("Start", Mapping.Start);
+ W.printHex("End", Mapping.End);
+ W.printHex("Offset", Mapping.Offset);
+ W.printString("Filename", Mapping.Filename);
+ }
+}
+
template <class ELFT>
void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
ListScope L(W, "Notes");
@@ -5067,56 +5853,81 @@ void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
ArrayRef<uint8_t> Descriptor = Note.getDesc();
Elf_Word Type = Note.getType();
+ // Print the note owner/type.
W.printString("Owner", Name);
W.printHex("Data size", Descriptor.size());
if (Name == "GNU") {
W.printString("Type", getGNUNoteTypeName(Type));
- printGNUNoteLLVMStyle<ELFT>(Type, Descriptor, W);
} else if (Name == "FreeBSD") {
W.printString("Type", getFreeBSDNoteTypeName(Type));
} else if (Name == "AMD") {
W.printString("Type", getAMDNoteTypeName(Type));
- const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
- if (!N.Type.empty())
- W.printString(N.Type, N.Value);
} else if (Name == "AMDGPU") {
W.printString("Type", getAMDGPUNoteTypeName(Type));
- const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
- if (!N.Type.empty())
- W.printString(N.Type, N.Value);
} else {
- StringRef NoteType = getGenericNoteTypeName(Type);
+ StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE
+ ? getCoreNoteTypeName(Type)
+ : getGenericNoteTypeName(Type);
if (!NoteType.empty())
W.printString("Type", NoteType);
else
W.printString("Type",
"Unknown (" + to_string(format_hex(Type, 10)) + ")");
}
+
+ // Print the description, or fallback to printing raw bytes for unknown
+ // owners.
+ if (Name == "GNU") {
+ printGNUNoteLLVMStyle<ELFT>(Type, Descriptor, W);
+ } else if (Name == "AMD") {
+ const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
+ if (!N.Type.empty())
+ W.printString(N.Type, N.Value);
+ } else if (Name == "AMDGPU") {
+ const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
+ if (!N.Type.empty())
+ W.printString(N.Type, N.Value);
+ } else if (Name == "CORE") {
+ if (Type == ELF::NT_FILE) {
+ DataExtractor DescExtractor(Descriptor,
+ ELFT::TargetEndianness == support::little,
+ sizeof(Elf_Addr));
+ Expected<CoreNote> Note = readCoreNote(DescExtractor);
+ if (Note)
+ printCoreNoteLLVMStyle(*Note, W);
+ else
+ reportWarning(Note.takeError(), this->FileName);
+ }
+ } else if (!Descriptor.empty()) {
+ W.printBinaryBlock("Description data", Descriptor);
+ }
};
- if (Obj->getHeader()->e_type == ELF::ET_CORE) {
- for (const auto &P : unwrapOrError(Obj->program_headers())) {
- if (P.p_type != PT_NOTE)
+ ArrayRef<Elf_Shdr> Sections = unwrapOrError(this->FileName, Obj->sections());
+ if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) {
+ for (const auto &S : Sections) {
+ if (S.sh_type != SHT_NOTE)
continue;
DictScope D(W, "NoteSection");
- PrintHeader(P.p_offset, P.p_filesz);
+ PrintHeader(S.sh_offset, S.sh_size);
Error Err = Error::success();
- for (const auto &Note : Obj->notes(P, Err))
+ for (const auto &Note : Obj->notes(S, Err))
ProcessNote(Note);
if (Err)
- error(std::move(Err));
+ reportError(std::move(Err), this->FileName);
}
} else {
- for (const auto &S : unwrapOrError(Obj->sections())) {
- if (S.sh_type != SHT_NOTE)
+ for (const auto &P :
+ unwrapOrError(this->FileName, Obj->program_headers())) {
+ if (P.p_type != PT_NOTE)
continue;
DictScope D(W, "NoteSection");
- PrintHeader(S.sh_offset, S.sh_size);
+ PrintHeader(P.p_offset, P.p_filesz);
Error Err = Error::success();
- for (const auto &Note : Obj->notes(S, Err))
+ for (const auto &Note : Obj->notes(P, Err))
ProcessNote(Note);
if (Err)
- error(std::move(Err));
+ reportError(std::move(Err), this->FileName);
}
}
}
@@ -5125,11 +5936,12 @@ template <class ELFT>
void LLVMStyle<ELFT>::printELFLinkerOptions(const ELFFile<ELFT> *Obj) {
ListScope L(W, "LinkerOptions");
- for (const Elf_Shdr &Shdr : unwrapOrError(Obj->sections())) {
+ for (const Elf_Shdr &Shdr : unwrapOrError(this->FileName, Obj->sections())) {
if (Shdr.sh_type != ELF::SHT_LLVM_LINKER_OPTIONS)
continue;
- ArrayRef<uint8_t> Contents = unwrapOrError(Obj->getSectionContents(&Shdr));
+ ArrayRef<uint8_t> Contents =
+ unwrapOrError(this->FileName, Obj->getSectionContents(&Shdr));
for (const uint8_t *P = Contents.begin(), *E = Contents.end(); P < E; ) {
StringRef Key = StringRef(reinterpret_cast<const char *>(P));
StringRef Value =
@@ -5143,6 +5955,22 @@ void LLVMStyle<ELFT>::printELFLinkerOptions(const ELFFile<ELFT> *Obj) {
}
template <class ELFT>
+void LLVMStyle<ELFT>::printStackSizes(const ELFObjectFile<ELFT> *Obj) {
+ ListScope L(W, "StackSizes");
+ if (Obj->isRelocatableObject())
+ this->printRelocatableStackSizes(Obj, []() {});
+ else
+ this->printNonRelocatableStackSizes(Obj, []() {});
+}
+
+template <class ELFT>
+void LLVMStyle<ELFT>::printStackSizeEntry(uint64_t Size, StringRef FuncName) {
+ DictScope D(W, "Entry");
+ W.printString("Function", FuncName);
+ W.printHex("Size", Size);
+}
+
+template <class ELFT>
void LLVMStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
auto PrintEntry = [&](const Elf_Addr *E) {
W.printHex("Address", Parser.getGotAddress(E));
@@ -5252,3 +6080,41 @@ void LLVMStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
}
}
}
+
+template <class ELFT>
+void LLVMStyle<ELFT>::printMipsABIFlags(const ELFObjectFile<ELFT> *ObjF) {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ const Elf_Shdr *Shdr =
+ findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.abiflags");
+ if (!Shdr) {
+ W.startLine() << "There is no .MIPS.abiflags section in the file.\n";
+ return;
+ }
+ ArrayRef<uint8_t> Sec =
+ unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr));
+ if (Sec.size() != sizeof(Elf_Mips_ABIFlags<ELFT>)) {
+ W.startLine() << "The .MIPS.abiflags section has a wrong size.\n";
+ return;
+ }
+
+ auto *Flags = reinterpret_cast<const Elf_Mips_ABIFlags<ELFT> *>(Sec.data());
+
+ raw_ostream &OS = W.getOStream();
+ DictScope GS(W, "MIPS ABI Flags");
+
+ W.printNumber("Version", Flags->version);
+ W.startLine() << "ISA: ";
+ if (Flags->isa_rev <= 1)
+ OS << format("MIPS%u", Flags->isa_level);
+ else
+ OS << format("MIPS%ur%u", Flags->isa_level, Flags->isa_rev);
+ OS << "\n";
+ W.printEnum("ISA Extension", Flags->isa_ext, makeArrayRef(ElfMipsISAExtType));
+ W.printFlags("ASEs", Flags->ases, makeArrayRef(ElfMipsASEFlags));
+ W.printEnum("FP ABI", Flags->fp_abi, makeArrayRef(ElfMipsFpABIType));
+ W.printNumber("GPR size", getMipsRegisterSize(Flags->gpr_size));
+ W.printNumber("CPR1 size", getMipsRegisterSize(Flags->cpr1_size));
+ W.printNumber("CPR2 size", getMipsRegisterSize(Flags->cpr2_size));
+ W.printFlags("Flags 1", Flags->flags1, makeArrayRef(ElfMipsFlags1));
+ W.printHex("Flags 2", Flags->flags2);
+}
diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp
index 32a3866eb2f2..20a60b3df699 100644
--- a/tools/llvm-readobj/MachODumper.cpp
+++ b/tools/llvm-readobj/MachODumper.cpp
@@ -214,6 +214,31 @@ static const EnumEntry<uint32_t> MachOHeaderFlags[] = {
LLVM_READOBJ_ENUM_ENT(MachO, MH_APP_EXTENSION_SAFE),
};
+static const EnumEntry<unsigned> MachOSectionTypes[] = {
+ { "Regular" , MachO::S_REGULAR },
+ { "ZeroFill" , MachO::S_ZEROFILL },
+ { "CStringLiterals" , MachO::S_CSTRING_LITERALS },
+ { "4ByteLiterals" , MachO::S_4BYTE_LITERALS },
+ { "8ByteLiterals" , MachO::S_8BYTE_LITERALS },
+ { "LiteralPointers" , MachO::S_LITERAL_POINTERS },
+ { "NonLazySymbolPointers" , MachO::S_NON_LAZY_SYMBOL_POINTERS },
+ { "LazySymbolPointers" , MachO::S_LAZY_SYMBOL_POINTERS },
+ { "SymbolStubs" , MachO::S_SYMBOL_STUBS },
+ { "ModInitFuncPointers" , MachO::S_MOD_INIT_FUNC_POINTERS },
+ { "ModTermFuncPointers" , MachO::S_MOD_TERM_FUNC_POINTERS },
+ { "Coalesced" , MachO::S_COALESCED },
+ { "GBZeroFill" , MachO::S_GB_ZEROFILL },
+ { "Interposing" , MachO::S_INTERPOSING },
+ { "16ByteLiterals" , MachO::S_16BYTE_LITERALS },
+ { "DTraceDOF" , MachO::S_DTRACE_DOF },
+ { "LazyDylibSymbolPointers" , MachO::S_LAZY_DYLIB_SYMBOL_POINTERS },
+ { "ThreadLocalRegular" , MachO::S_THREAD_LOCAL_REGULAR },
+ { "ThreadLocalZerofill" , MachO::S_THREAD_LOCAL_ZEROFILL },
+ { "ThreadLocalVariables" , MachO::S_THREAD_LOCAL_VARIABLES },
+ { "ThreadLocalVariablePointers" , MachO::S_THREAD_LOCAL_VARIABLE_POINTERS },
+ { "ThreadLocalInitFunctionPointers", MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS }
+};
+
static const EnumEntry<unsigned> MachOSectionAttributes[] = {
{ "LocReloc" , 1 << 0 /*S_ATTR_LOC_RELOC */ },
{ "ExtReloc" , 1 << 1 /*S_ATTR_EXT_RELOC */ },
@@ -440,10 +465,7 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) {
MachOSection MOSection;
getSection(Obj, Section.getRawDataRefImpl(), MOSection);
DataRefImpl DR = Section.getRawDataRefImpl();
-
- StringRef Name;
- error(Section.getName(Name));
-
+ StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName());
ArrayRef<char> RawName = Obj->getSectionRawName(DR);
StringRef SegmentName = Obj->getSectionFinalSegmentName(DR);
ArrayRef<char> RawSegmentName = Obj->getSectionRawFinalSegmentName(DR);
@@ -459,7 +481,7 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) {
W.printHex("RelocationOffset", MOSection.RelocationTableOffset);
W.printNumber("RelocationCount", MOSection.NumRelocationTableEntries);
W.printEnum("Type", MOSection.Flags & 0xFF,
- makeArrayRef(MachOSectionAttributes));
+ makeArrayRef(MachOSectionTypes));
W.printFlags("Attributes", MOSection.Flags >> 8,
makeArrayRef(MachOSectionAttributes));
W.printHex("Reserved1", MOSection.Reserved1);
@@ -484,7 +506,8 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) {
}
if (opts::SectionData && !Section.isBSS())
- W.printBinaryBlock("SectionData", unwrapOrError(Section.getContents()));
+ W.printBinaryBlock("SectionData", unwrapOrError(Obj->getFileName(),
+ Section.getContents()));
}
}
@@ -493,9 +516,7 @@ void MachODumper::printRelocations() {
std::error_code EC;
for (const SectionRef &Section : Obj->sections()) {
- StringRef Name;
- error(Section.getName(Name));
-
+ StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName());
bool PrintedGroup = false;
for (const RelocationRef &Reloc : Section.relocations()) {
if (!PrintedGroup) {
@@ -535,14 +556,13 @@ void MachODumper::printRelocation(const MachOObjectFile *Obj,
if (Symbol != Obj->symbol_end()) {
Expected<StringRef> TargetNameOrErr = Symbol->getName();
if (!TargetNameOrErr)
- error(errorToErrorCode(TargetNameOrErr.takeError()));
+ reportError(TargetNameOrErr.takeError(), Obj->getFileName());
TargetName = *TargetNameOrErr;
}
} else if (!IsScattered) {
section_iterator SecI = Obj->getRelocationSection(DR);
- if (SecI != Obj->section_end()) {
- error(SecI->getName(TargetName));
- }
+ if (SecI != Obj->section_end())
+ TargetName = unwrapOrError(Obj->getFileName(), SecI->getName());
}
if (TargetName.empty())
TargetName = "-";
@@ -610,10 +630,12 @@ void MachODumper::printSymbol(const SymbolRef &Symbol) {
StringRef SectionName = "";
Expected<section_iterator> SecIOrErr = Symbol.getSection();
- error(errorToErrorCode(SecIOrErr.takeError()));
+ if (!SecIOrErr)
+ reportError(SecIOrErr.takeError(), Obj->getFileName());
+
section_iterator SecI = *SecIOrErr;
if (SecI != Obj->section_end())
- error(SecI->getName(SectionName));
+ SectionName = unwrapOrError(Obj->getFileName(), SecI->getName());
DictScope D(W, "Symbol");
W.printNumber("Name", SymbolName, MOSymbol.StringIndex);
@@ -643,7 +665,11 @@ void MachODumper::printStackMap() const {
object::SectionRef StackMapSection;
for (auto Sec : Obj->sections()) {
StringRef Name;
- Sec.getName(Name);
+ if (Expected<StringRef> NameOrErr = Sec.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
+
if (Name == "__llvm_stackmaps") {
StackMapSection = Sec;
break;
@@ -653,7 +679,8 @@ void MachODumper::printStackMap() const {
if (StackMapSection == object::SectionRef())
return;
- StringRef StackMapContents = unwrapOrError(StackMapSection.getContents());
+ StringRef StackMapContents =
+ unwrapOrError(Obj->getFileName(), StackMapSection.getContents());
ArrayRef<uint8_t> StackMapContentsArray =
arrayRefFromStringRef(StackMapContents);
diff --git a/tools/llvm-readobj/ObjDumper.cpp b/tools/llvm-readobj/ObjDumper.cpp
index 0a9e22c8a71c..9e5ebd99ac37 100644
--- a/tools/llvm-readobj/ObjDumper.cpp
+++ b/tools/llvm-readobj/ObjDumper.cpp
@@ -23,6 +23,10 @@
namespace llvm {
+static inline Error createError(const Twine &Msg) {
+ return createStringError(object::object_error::parse_failed, Msg);
+}
+
ObjDumper::ObjDumper(ScopedPrinter &Writer) : W(Writer) {}
ObjDumper::~ObjDumper() {
@@ -49,8 +53,7 @@ getSectionRefsByNameOrIndex(const object::ObjectFile *Obj,
SecIndex = Obj->isELF() ? 0 : 1;
for (object::SectionRef SecRef : Obj->sections()) {
- StringRef SecName;
- error(SecRef.getName(SecName));
+ StringRef SecName = unwrapOrError(Obj->getFileName(), SecRef.getName());
auto NameIt = SecNames.find(SecName);
if (NameIt != SecNames.end())
NameIt->second = true;
@@ -64,10 +67,15 @@ getSectionRefsByNameOrIndex(const object::ObjectFile *Obj,
for (const std::pair<std::string, bool> &S : SecNames)
if (!S.second)
- reportWarning(formatv("could not find section '{0}'", S.first).str());
+ reportWarning(
+ createError(formatv("could not find section '{0}'", S.first).str()),
+ Obj->getFileName());
+
for (std::pair<unsigned, bool> S : SecIndices)
if (!S.second)
- reportWarning(formatv("could not find section {0}", S.first).str());
+ reportWarning(
+ createError(formatv("could not find section {0}", S.first).str()),
+ Obj->getFileName());
return Ret;
}
@@ -77,14 +85,16 @@ void ObjDumper::printSectionsAsString(const object::ObjectFile *Obj,
bool First = true;
for (object::SectionRef Section :
getSectionRefsByNameOrIndex(Obj, Sections)) {
- StringRef SectionName;
- error(Section.getName(SectionName));
+ StringRef SectionName =
+ unwrapOrError(Obj->getFileName(), Section.getName());
+
if (!First)
W.startLine() << '\n';
First = false;
W.startLine() << "String dump of section '" << SectionName << "':\n";
- StringRef SectionContent = unwrapOrError(Section.getContents());
+ StringRef SectionContent =
+ unwrapOrError(Obj->getFileName(), Section.getContents());
const uint8_t *SecContent = SectionContent.bytes_begin();
const uint8_t *CurrentWord = SecContent;
@@ -110,14 +120,16 @@ void ObjDumper::printSectionsAsHex(const object::ObjectFile *Obj,
bool First = true;
for (object::SectionRef Section :
getSectionRefsByNameOrIndex(Obj, Sections)) {
- StringRef SectionName;
- error(Section.getName(SectionName));
+ StringRef SectionName =
+ unwrapOrError(Obj->getFileName(), Section.getName());
+
if (!First)
W.startLine() << '\n';
First = false;
W.startLine() << "Hex dump of section '" << SectionName << "':\n";
- StringRef SectionContent = unwrapOrError(Section.getContents());
+ StringRef SectionContent =
+ unwrapOrError(Obj->getFileName(), Section.getContents());
const uint8_t *SecContent = SectionContent.bytes_begin();
const uint8_t *SecEnd = SecContent + SectionContent.size();
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
index aaabfa2ca2e8..2ba441342499 100644
--- a/tools/llvm-readobj/ObjDumper.h
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -68,15 +68,8 @@ public:
virtual void printAddrsig() {}
virtual void printNotes() {}
virtual void printELFLinkerOptions() {}
-
- // Only implemented for ARM ELF at this time.
- virtual void printAttributes() { }
-
- // Only implemented for MIPS ELF at this time.
- virtual void printMipsPLTGOT() { }
- virtual void printMipsABIFlags() { }
- virtual void printMipsReginfo() { }
- virtual void printMipsOptions() { }
+ virtual void printStackSizes() {}
+ virtual void printArchSpecificInfo() { }
// Only implemented for PE/COFF.
virtual void printCOFFImports() { }
diff --git a/tools/llvm-readobj/WasmDumper.cpp b/tools/llvm-readobj/WasmDumper.cpp
index 041a9a15bdb6..dfab9f40d71b 100644
--- a/tools/llvm-readobj/WasmDumper.cpp
+++ b/tools/llvm-readobj/WasmDumper.cpp
@@ -51,6 +51,7 @@ static const EnumEntry<unsigned> WasmSymbolFlags[] = {
ENUM_ENTRY(UNDEFINED),
ENUM_ENTRY(EXPORTED),
ENUM_ENTRY(EXPLICIT_NAME),
+ ENUM_ENTRY(NO_STRIP),
#undef ENUM_ENTRY
};
@@ -90,7 +91,7 @@ void WasmDumper::printRelocation(const SectionRef &Section,
StringRef SymName;
symbol_iterator SI = Reloc.getSymbol();
if (SI != Obj->symbol_end())
- SymName = error(SI->getName());
+ SymName = unwrapOrError(Obj->getFileName(), SI->getName());
bool HasAddend = false;
switch (RelocType) {
@@ -133,8 +134,8 @@ void WasmDumper::printRelocations() {
int SectionNumber = 0;
for (const SectionRef &Section : Obj->sections()) {
bool PrintedGroup = false;
- StringRef Name;
- error(Section.getName(Name));
+ StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName());
+
++SectionNumber;
for (const RelocationRef &Reloc : Section.relocations()) {
diff --git a/tools/llvm-readobj/Win64EHDumper.cpp b/tools/llvm-readobj/Win64EHDumper.cpp
index e64b8f157180..fa268ce9d434 100644
--- a/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/tools/llvm-readobj/Win64EHDumper.cpp
@@ -289,7 +289,9 @@ void Dumper::printRuntimeFunction(const Context &Ctx,
resolveRelocation(Ctx, Section, SectionOffset + 8, XData, Offset);
ArrayRef<uint8_t> Contents;
- error(Ctx.COFF.getSectionContents(XData, Contents));
+ if (Error E = Ctx.COFF.getSectionContents(XData, Contents))
+ reportError(std::move(E), Ctx.COFF.getFileName());
+
if (Contents.empty())
return;
@@ -304,14 +306,19 @@ void Dumper::printRuntimeFunction(const Context &Ctx,
void Dumper::printData(const Context &Ctx) {
for (const auto &Section : Ctx.COFF.sections()) {
StringRef Name;
- Section.getName(Name);
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ Name = *NameOrErr;
+ else
+ consumeError(NameOrErr.takeError());
if (Name != ".pdata" && !Name.startswith(".pdata$"))
continue;
const coff_section *PData = Ctx.COFF.getCOFFSection(Section);
ArrayRef<uint8_t> Contents;
- error(Ctx.COFF.getSectionContents(PData, Contents));
+
+ if (Error E = Ctx.COFF.getSectionContents(PData, Contents))
+ reportError(std::move(E), Ctx.COFF.getFileName());
if (Contents.empty())
continue;
diff --git a/tools/llvm-readobj/WindowsResourceDumper.cpp b/tools/llvm-readobj/WindowsResourceDumper.cpp
index 13989f696d9d..a2fb6aac3f93 100644
--- a/tools/llvm-readobj/WindowsResourceDumper.cpp
+++ b/tools/llvm-readobj/WindowsResourceDumper.cpp
@@ -56,8 +56,12 @@ void Dumper::printEntry(const ResourceEntryRef &Ref) {
if (Ref.checkTypeString()) {
auto NarrowStr = stripUTF16(Ref.getTypeString());
SW.printString("Resource type (string)", NarrowStr);
- } else
- SW.printNumber("Resource type (int)", Ref.getTypeID());
+ } else {
+ SmallString<20> IDStr;
+ raw_svector_ostream OS(IDStr);
+ printResourceTypeName(Ref.getTypeID(), OS);
+ SW.printString("Resource type (int)", IDStr);
+ }
if (Ref.checkNameString()) {
auto NarrowStr = stripUTF16(Ref.getNameString());
diff --git a/tools/llvm-readobj/XCOFFDumper.cpp b/tools/llvm-readobj/XCOFFDumper.cpp
index 6f260f91537f..fe95b6d1b494 100644
--- a/tools/llvm-readobj/XCOFFDumper.cpp
+++ b/tools/llvm-readobj/XCOFFDumper.cpp
@@ -22,6 +22,12 @@ using namespace object;
namespace {
class XCOFFDumper : public ObjDumper {
+ enum {
+ SymbolTypeMask = 0x07,
+ SymbolAlignmentMask = 0xF8,
+ SymbolAlignmentBitOffset = 3
+ };
+
public:
XCOFFDumper(const XCOFFObjectFile &Obj, ScopedPrinter &Writer)
: ObjDumper(Writer), Obj(Obj) {}
@@ -37,11 +43,21 @@ public:
private:
template <typename T> void printSectionHeaders(ArrayRef<T> Sections);
-
- const XCOFFObjectFile &Obj;
+ template <typename T> void printGenericSectionHeader(T &Sec) const;
+ template <typename T> void printOverflowSectionHeader(T &Sec) const;
+ void printFileAuxEnt(const XCOFFFileAuxEnt *AuxEntPtr);
+ void printCsectAuxEnt32(const XCOFFCsectAuxEnt32 *AuxEntPtr);
+ void printSectAuxEntForStat(const XCOFFSectAuxEntForStat *AuxEntPtr);
+ void printSymbol(const SymbolRef &);
// Least significant 3 bits are reserved.
static constexpr unsigned SectionFlagsReservedMask = 0x7;
+
+ // The low order 16 bits of section flags denotes the section type.
+ static constexpr unsigned SectionFlagsTypeMask = 0xffffu;
+
+ void printRelocations(ArrayRef<XCOFFSectionHeader32> Sections);
+ const XCOFFObjectFile &Obj;
};
} // anonymous namespace
@@ -100,11 +116,315 @@ void XCOFFDumper::printSectionHeaders() {
}
void XCOFFDumper::printRelocations() {
- llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+ if (Obj.is64Bit())
+ llvm_unreachable("64-bit relocation output not implemented!");
+ else
+ printRelocations(Obj.sections32());
+}
+
+static const EnumEntry<XCOFF::RelocationType> RelocationTypeNameclass[] = {
+#define ECase(X) \
+ { #X, XCOFF::X }
+ ECase(R_POS), ECase(R_RL), ECase(R_RLA), ECase(R_NEG),
+ ECase(R_REL), ECase(R_TOC), ECase(R_TRL), ECase(R_TRLA),
+ ECase(R_GL), ECase(R_TCL), ECase(R_REF), ECase(R_BA),
+ ECase(R_BR), ECase(R_RBA), ECase(R_RBR), ECase(R_TLS),
+ ECase(R_TLS_IE), ECase(R_TLS_LD), ECase(R_TLS_LE), ECase(R_TLSM),
+ ECase(R_TLSML), ECase(R_TOCU), ECase(R_TOCL)
+#undef ECase
+};
+
+void XCOFFDumper::printRelocations(ArrayRef<XCOFFSectionHeader32> Sections) {
+ if (!opts::ExpandRelocs)
+ report_fatal_error("Unexpanded relocation output not implemented.");
+
+ ListScope LS(W, "Relocations");
+ uint16_t Index = 0;
+ for (const auto &Sec : Sections) {
+ ++Index;
+ // Only the .text, .data, .tdata, and STYP_DWARF sections have relocation.
+ if (Sec.Flags != XCOFF::STYP_TEXT && Sec.Flags != XCOFF::STYP_DATA &&
+ Sec.Flags != XCOFF::STYP_TDATA && Sec.Flags != XCOFF::STYP_DWARF)
+ continue;
+ auto Relocations = unwrapOrError(Obj.getFileName(), Obj.relocations(Sec));
+ if (Relocations.empty())
+ continue;
+
+ W.startLine() << "Section (index: " << Index << ") " << Sec.getName()
+ << " {\n";
+ for (auto Reloc : Relocations) {
+ StringRef SymbolName = unwrapOrError(
+ Obj.getFileName(), Obj.getSymbolNameByIndex(Reloc.SymbolIndex));
+
+ DictScope RelocScope(W, "Relocation");
+ W.printHex("Virtual Address", Reloc.VirtualAddress);
+ W.printNumber("Symbol", SymbolName, Reloc.SymbolIndex);
+ W.printString("IsSigned", Reloc.isRelocationSigned() ? "Yes" : "No");
+ W.printNumber("FixupBitValue", Reloc.isFixupIndicated() ? 1 : 0);
+ W.printNumber("Length", Reloc.getRelocatedLength());
+ W.printEnum("Type", (uint8_t)Reloc.Type,
+ makeArrayRef(RelocationTypeNameclass));
+ }
+ W.unindent();
+ W.startLine() << "}\n";
+ }
+}
+
+static const EnumEntry<XCOFF::CFileStringType> FileStringType[] = {
+#define ECase(X) \
+ { #X, XCOFF::X }
+ ECase(XFT_FN), ECase(XFT_CT), ECase(XFT_CV), ECase(XFT_CD)
+#undef ECase
+};
+
+void XCOFFDumper::printFileAuxEnt(const XCOFFFileAuxEnt *AuxEntPtr) {
+ if (Obj.is64Bit())
+ report_fatal_error(
+ "Printing for File Auxiliary Entry in 64-bit is unimplemented.");
+ StringRef FileName =
+ unwrapOrError(Obj.getFileName(), Obj.getCFileName(AuxEntPtr));
+ DictScope SymDs(W, "File Auxiliary Entry");
+ W.printNumber("Index",
+ Obj.getSymbolIndex(reinterpret_cast<uintptr_t>(AuxEntPtr)));
+ W.printString("Name", FileName);
+ W.printEnum("Type", static_cast<uint8_t>(AuxEntPtr->Type),
+ makeArrayRef(FileStringType));
+}
+
+static const EnumEntry<XCOFF::StorageMappingClass> CsectStorageMappingClass[] =
+ {
+#define ECase(X) \
+ { #X, XCOFF::X }
+ ECase(XMC_PR), ECase(XMC_RO), ECase(XMC_DB),
+ ECase(XMC_GL), ECase(XMC_XO), ECase(XMC_SV),
+ ECase(XMC_SV64), ECase(XMC_SV3264), ECase(XMC_TI),
+ ECase(XMC_TB), ECase(XMC_RW), ECase(XMC_TC0),
+ ECase(XMC_TC), ECase(XMC_TD), ECase(XMC_DS),
+ ECase(XMC_UA), ECase(XMC_BS), ECase(XMC_UC),
+ ECase(XMC_TL), ECase(XMC_TE)
+#undef ECase
+};
+
+static const EnumEntry<XCOFF::SymbolType> CsectSymbolTypeClass[] = {
+#define ECase(X) \
+ { #X, XCOFF::X }
+ ECase(XTY_ER), ECase(XTY_SD), ECase(XTY_LD), ECase(XTY_CM)
+#undef ECase
+};
+
+void XCOFFDumper::printCsectAuxEnt32(const XCOFFCsectAuxEnt32 *AuxEntPtr) {
+ assert(!Obj.is64Bit() && "32-bit interface called on 64-bit object file.");
+
+ DictScope SymDs(W, "CSECT Auxiliary Entry");
+ W.printNumber("Index",
+ Obj.getSymbolIndex(reinterpret_cast<uintptr_t>(AuxEntPtr)));
+ if ((AuxEntPtr->SymbolAlignmentAndType & SymbolTypeMask) == XCOFF::XTY_LD)
+ W.printNumber("ContainingCsectSymbolIndex", AuxEntPtr->SectionOrLength);
+ else
+ W.printNumber("SectionLen", AuxEntPtr->SectionOrLength);
+ W.printHex("ParameterHashIndex", AuxEntPtr->ParameterHashIndex);
+ W.printHex("TypeChkSectNum", AuxEntPtr->TypeChkSectNum);
+ // Print out symbol alignment and type.
+ W.printNumber("SymbolAlignmentLog2",
+ (AuxEntPtr->SymbolAlignmentAndType & SymbolAlignmentMask) >>
+ SymbolAlignmentBitOffset);
+ W.printEnum("SymbolType", AuxEntPtr->SymbolAlignmentAndType & SymbolTypeMask,
+ makeArrayRef(CsectSymbolTypeClass));
+ W.printEnum("StorageMappingClass",
+ static_cast<uint8_t>(AuxEntPtr->StorageMappingClass),
+ makeArrayRef(CsectStorageMappingClass));
+ W.printHex("StabInfoIndex", AuxEntPtr->StabInfoIndex);
+ W.printHex("StabSectNum", AuxEntPtr->StabSectNum);
+}
+
+void XCOFFDumper::printSectAuxEntForStat(
+ const XCOFFSectAuxEntForStat *AuxEntPtr) {
+ assert(!Obj.is64Bit() && "32-bit interface called on 64-bit object file.");
+
+ DictScope SymDs(W, "Sect Auxiliary Entry For Stat");
+ W.printNumber("Index",
+ Obj.getSymbolIndex(reinterpret_cast<uintptr_t>(AuxEntPtr)));
+ W.printNumber("SectionLength", AuxEntPtr->SectionLength);
+
+ // Unlike the corresponding fields in the section header, NumberOfRelocEnt
+ // and NumberOfLineNum do not handle values greater than 65535.
+ W.printNumber("NumberOfRelocEnt", AuxEntPtr->NumberOfRelocEnt);
+ W.printNumber("NumberOfLineNum", AuxEntPtr->NumberOfLineNum);
+}
+
+static const EnumEntry<XCOFF::StorageClass> SymStorageClass[] = {
+#define ECase(X) \
+ { #X, XCOFF::X }
+ ECase(C_NULL), ECase(C_AUTO), ECase(C_EXT), ECase(C_STAT),
+ ECase(C_REG), ECase(C_EXTDEF), ECase(C_LABEL), ECase(C_ULABEL),
+ ECase(C_MOS), ECase(C_ARG), ECase(C_STRTAG), ECase(C_MOU),
+ ECase(C_UNTAG), ECase(C_TPDEF), ECase(C_USTATIC), ECase(C_ENTAG),
+ ECase(C_MOE), ECase(C_REGPARM), ECase(C_FIELD), ECase(C_BLOCK),
+ ECase(C_FCN), ECase(C_EOS), ECase(C_FILE), ECase(C_LINE),
+ ECase(C_ALIAS), ECase(C_HIDDEN), ECase(C_HIDEXT), ECase(C_BINCL),
+ ECase(C_EINCL), ECase(C_INFO), ECase(C_WEAKEXT), ECase(C_DWARF),
+ ECase(C_GSYM), ECase(C_LSYM), ECase(C_PSYM), ECase(C_RSYM),
+ ECase(C_RPSYM), ECase(C_STSYM), ECase(C_TCSYM), ECase(C_BCOMM),
+ ECase(C_ECOML), ECase(C_ECOMM), ECase(C_DECL), ECase(C_ENTRY),
+ ECase(C_FUN), ECase(C_BSTAT), ECase(C_ESTAT), ECase(C_GTLS),
+ ECase(C_STTLS), ECase(C_EFCN)
+#undef ECase
+};
+
+static StringRef GetSymbolValueName(XCOFF::StorageClass SC) {
+ switch (SC) {
+ case XCOFF::C_EXT:
+ case XCOFF::C_WEAKEXT:
+ case XCOFF::C_HIDEXT:
+ case XCOFF::C_STAT:
+ return "Value (RelocatableAddress)";
+ case XCOFF::C_FILE:
+ return "Value (SymbolTableIndex)";
+ case XCOFF::C_FCN:
+ case XCOFF::C_BLOCK:
+ case XCOFF::C_FUN:
+ case XCOFF::C_STSYM:
+ case XCOFF::C_BINCL:
+ case XCOFF::C_EINCL:
+ case XCOFF::C_INFO:
+ case XCOFF::C_BSTAT:
+ case XCOFF::C_LSYM:
+ case XCOFF::C_PSYM:
+ case XCOFF::C_RPSYM:
+ case XCOFF::C_RSYM:
+ case XCOFF::C_ECOML:
+ case XCOFF::C_DWARF:
+ assert(false && "This StorageClass for the symbol is not yet implemented.");
+ return "";
+ default:
+ return "Value";
+ }
+}
+
+static const EnumEntry<XCOFF::CFileLangId> CFileLangIdClass[] = {
+#define ECase(X) \
+ { #X, XCOFF::X }
+ ECase(TB_C), ECase(TB_CPLUSPLUS)
+#undef ECase
+};
+
+static const EnumEntry<XCOFF::CFileCpuId> CFileCpuIdClass[] = {
+#define ECase(X) \
+ { #X, XCOFF::X }
+ ECase(TCPU_PPC64), ECase(TCPU_COM), ECase(TCPU_970)
+#undef ECase
+};
+
+void XCOFFDumper::printSymbol(const SymbolRef &S) {
+ if (Obj.is64Bit())
+ report_fatal_error("64-bit support is unimplemented.");
+
+ DataRefImpl SymbolDRI = S.getRawDataRefImpl();
+ const XCOFFSymbolEntry *SymbolEntPtr = Obj.toSymbolEntry(SymbolDRI);
+
+ XCOFFSymbolRef XCOFFSymRef(SymbolDRI, &Obj);
+ uint8_t NumberOfAuxEntries = XCOFFSymRef.getNumberOfAuxEntries();
+
+ DictScope SymDs(W, "Symbol");
+
+ StringRef SymbolName =
+ unwrapOrError(Obj.getFileName(), Obj.getSymbolName(SymbolDRI));
+
+ W.printNumber("Index",
+ Obj.getSymbolIndex(reinterpret_cast<uintptr_t>(SymbolEntPtr)));
+ W.printString("Name", SymbolName);
+ W.printHex(GetSymbolValueName(SymbolEntPtr->StorageClass),
+ SymbolEntPtr->Value);
+
+ StringRef SectionName =
+ unwrapOrError(Obj.getFileName(), Obj.getSymbolSectionName(SymbolEntPtr));
+
+ W.printString("Section", SectionName);
+ if (XCOFFSymRef.getStorageClass() == XCOFF::C_FILE) {
+ W.printEnum("Source Language ID",
+ SymbolEntPtr->CFileLanguageIdAndTypeId.LanguageId,
+ makeArrayRef(CFileLangIdClass));
+ W.printEnum("CPU Version ID",
+ SymbolEntPtr->CFileLanguageIdAndTypeId.CpuTypeId,
+ makeArrayRef(CFileCpuIdClass));
+ } else
+ W.printHex("Type", SymbolEntPtr->SymbolType);
+
+ W.printEnum("StorageClass", static_cast<uint8_t>(SymbolEntPtr->StorageClass),
+ makeArrayRef(SymStorageClass));
+ W.printNumber("NumberOfAuxEntries", SymbolEntPtr->NumberOfAuxEntries);
+
+ if (NumberOfAuxEntries == 0)
+ return;
+
+ switch (XCOFFSymRef.getStorageClass()) {
+ case XCOFF::C_FILE:
+ // If the symbol is C_FILE and has auxiliary entries...
+ for (int i = 1; i <= NumberOfAuxEntries; i++) {
+ const XCOFFFileAuxEnt *FileAuxEntPtr =
+ reinterpret_cast<const XCOFFFileAuxEnt *>(SymbolEntPtr + i);
+#ifndef NDEBUG
+ Obj.checkSymbolEntryPointer(reinterpret_cast<uintptr_t>(FileAuxEntPtr));
+#endif
+ printFileAuxEnt(FileAuxEntPtr);
+ }
+ break;
+ case XCOFF::C_EXT:
+ case XCOFF::C_WEAKEXT:
+ case XCOFF::C_HIDEXT:
+ // If the symbol is for a function, and it has more than 1 auxiliary entry,
+ // then one of them must be function auxiliary entry which we do not
+ // support yet.
+ if (XCOFFSymRef.isFunction() && NumberOfAuxEntries >= 2)
+ report_fatal_error("Function auxiliary entry printing is unimplemented.");
+
+ // If there is more than 1 auxiliary entry, instead of printing out
+ // error information, print out the raw Auxiliary entry from 1st till
+ // the last - 1. The last one must be a CSECT Auxiliary Entry.
+ for (int i = 1; i < NumberOfAuxEntries; i++) {
+ W.startLine() << "!Unexpected raw auxiliary entry data:\n";
+ W.startLine() << format_bytes(
+ ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(SymbolEntPtr + i),
+ XCOFF::SymbolTableEntrySize));
+ }
+
+ // The symbol's last auxiliary entry is a CSECT Auxiliary Entry.
+ printCsectAuxEnt32(XCOFFSymRef.getXCOFFCsectAuxEnt32());
+ break;
+ case XCOFF::C_STAT:
+ if (NumberOfAuxEntries > 1)
+ report_fatal_error(
+ "C_STAT symbol should not have more than 1 auxiliary entry.");
+
+ const XCOFFSectAuxEntForStat *StatAuxEntPtr;
+ StatAuxEntPtr =
+ reinterpret_cast<const XCOFFSectAuxEntForStat *>(SymbolEntPtr + 1);
+#ifndef NDEBUG
+ Obj.checkSymbolEntryPointer(reinterpret_cast<uintptr_t>(StatAuxEntPtr));
+#endif
+ printSectAuxEntForStat(StatAuxEntPtr);
+ break;
+ case XCOFF::C_DWARF:
+ case XCOFF::C_BLOCK:
+ case XCOFF::C_FCN:
+ report_fatal_error("Symbol table entry printing for this storage class "
+ "type is unimplemented.");
+ break;
+ default:
+ for (int i = 1; i <= NumberOfAuxEntries; i++) {
+ W.startLine() << "!Unexpected raw auxiliary entry data:\n";
+ W.startLine() << format_bytes(
+ ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(SymbolEntPtr + i),
+ XCOFF::SymbolTableEntrySize));
+ }
+ break;
+ }
}
void XCOFFDumper::printSymbols() {
- llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+ ListScope Group(W, "Symbols");
+ for (const SymbolRef &S : Obj.symbols())
+ printSymbol(S);
}
void XCOFFDumper::printDynamicSymbols() {
@@ -135,6 +455,39 @@ static const EnumEntry<XCOFF::SectionTypeFlags> SectionTypeFlagsNames[] = {
};
template <typename T>
+void XCOFFDumper::printOverflowSectionHeader(T &Sec) const {
+ if (Obj.is64Bit()) {
+ reportWarning(make_error<StringError>("An 64-bit XCOFF object file may not "
+ "contain an overflow section header.",
+ object_error::parse_failed),
+ Obj.getFileName());
+ }
+
+ W.printString("Name", Sec.getName());
+ W.printNumber("NumberOfRelocations", Sec.PhysicalAddress);
+ W.printNumber("NumberOfLineNumbers", Sec.VirtualAddress);
+ W.printHex("Size", Sec.SectionSize);
+ W.printHex("RawDataOffset", Sec.FileOffsetToRawData);
+ W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo);
+ W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo);
+ W.printNumber("IndexOfSectionOverflowed", Sec.NumberOfRelocations);
+ W.printNumber("IndexOfSectionOverflowed", Sec.NumberOfLineNumbers);
+}
+
+template <typename T>
+void XCOFFDumper::printGenericSectionHeader(T &Sec) const {
+ W.printString("Name", Sec.getName());
+ W.printHex("PhysicalAddress", Sec.PhysicalAddress);
+ W.printHex("VirtualAddress", Sec.VirtualAddress);
+ W.printHex("Size", Sec.SectionSize);
+ W.printHex("RawDataOffset", Sec.FileOffsetToRawData);
+ W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo);
+ W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo);
+ W.printNumber("NumberOfRelocations", Sec.NumberOfRelocations);
+ W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers);
+}
+
+template <typename T>
void XCOFFDumper::printSectionHeaders(ArrayRef<T> Sections) {
ListScope Group(W, "Sections");
@@ -143,27 +496,28 @@ void XCOFFDumper::printSectionHeaders(ArrayRef<T> Sections) {
DictScope SecDS(W, "Section");
W.printNumber("Index", Index++);
- W.printString("Name", Sec.getName());
-
- W.printHex("PhysicalAddress", Sec.PhysicalAddress);
- W.printHex("VirtualAddress", Sec.VirtualAddress);
- W.printHex("Size", Sec.SectionSize);
- W.printHex("RawDataOffset", Sec.FileOffsetToRawData);
- W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo);
- W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo);
-
- // TODO Need to add overflow handling when NumberOfX == _OVERFLOW_MARKER
- // in 32-bit object files.
- W.printNumber("NumberOfRelocations", Sec.NumberOfRelocations);
- W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers);
-
- // The most significant 16-bits represent the DWARF section subtype. For
- // now we just dump the section type flags.
- uint16_t Flags = Sec.Flags & 0xffffu;
- if (Flags & SectionFlagsReservedMask)
- W.printHex("Flags", "Reserved", Flags);
+
+ uint16_t SectionType = Sec.Flags & SectionFlagsTypeMask;
+ switch (SectionType) {
+ case XCOFF::STYP_OVRFLO:
+ printOverflowSectionHeader(Sec);
+ break;
+ case XCOFF::STYP_LOADER:
+ case XCOFF::STYP_EXCEPT:
+ case XCOFF::STYP_TYPCHK:
+ // TODO The interpretation of loader, exception and type check section
+ // headers are different from that of generic section headers. We will
+ // implement them later. We interpret them as generic section headers for
+ // now.
+ default:
+ printGenericSectionHeader(Sec);
+ break;
+ }
+ // For now we just dump the section type portion of the flags.
+ if (SectionType & SectionFlagsReservedMask)
+ W.printHex("Flags", "Reserved", SectionType);
else
- W.printEnum("Type", Flags, makeArrayRef(SectionTypeFlagsNames));
+ W.printEnum("Type", SectionType, makeArrayRef(SectionTypeFlagsNames));
}
if (opts::SectionRelocations)
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 1bd5bb74bf29..4db13897879d 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -231,26 +231,11 @@ namespace opts {
"codeview-subsection-bytes",
cl::desc("Dump raw contents of codeview debug sections and records"));
- // --arm-attributes
- cl::opt<bool> ARMAttributes("arm-attributes",
- cl::desc("Display the ARM attributes section"));
-
- // --mips-plt-got
- cl::opt<bool>
- MipsPLTGOT("mips-plt-got",
- cl::desc("Display the MIPS GOT and PLT GOT sections"));
-
- // --mips-abi-flags
- cl::opt<bool> MipsABIFlags("mips-abi-flags",
- cl::desc("Display the MIPS.abiflags section"));
-
- // --mips-reginfo
- cl::opt<bool> MipsReginfo("mips-reginfo",
- cl::desc("Display the MIPS .reginfo section"));
-
- // --mips-options
- cl::opt<bool> MipsOptions("mips-options",
- cl::desc("Display the MIPS .MIPS.options section"));
+ // --arch-specific
+ cl::opt<bool> ArchSpecificInfo("arch-specific",
+ cl::desc("Displays architecture-specific information, if there is any."));
+ cl::alias ArchSpecifcInfoShort("A", cl::desc("Alias for --arch-specific"),
+ cl::aliasopt(ArchSpecificInfo), cl::NotHidden);
// --coff-imports
cl::opt<bool>
@@ -324,6 +309,11 @@ namespace opts {
PrintStackMap("stackmap",
cl::desc("Display contents of stackmap section"));
+ // --stack-sizes
+ cl::opt<bool>
+ PrintStackSizes("stack-sizes",
+ cl::desc("Display contents of all stack sizes sections"));
+
// --version-info, -V
cl::opt<bool>
VersionInfo("version-info",
@@ -368,63 +358,45 @@ namespace opts {
HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
} // namespace opts
+static StringRef ToolName;
+
namespace llvm {
-LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) {
+LLVM_ATTRIBUTE_NORETURN static void error(Twine Msg) {
+ // Flush the standard output to print the error at a
+ // proper place.
fouts().flush();
errs() << "\n";
- WithColor::error(errs()) << Msg << "\n";
+ WithColor::error(errs(), ToolName) << Msg << "\n";
exit(1);
}
-void reportError(StringRef Input, Error Err) {
+LLVM_ATTRIBUTE_NORETURN void reportError(Error Err, StringRef Input) {
+ assert(Err);
if (Input == "-")
Input = "<stdin>";
- error(createFileError(Input, std::move(Err)));
+ handleAllErrors(createFileError(Input, std::move(Err)),
+ [&](const ErrorInfoBase &EI) { error(EI.message()); });
+ llvm_unreachable("error() call should never return");
}
-void reportWarning(Twine Msg) {
- fouts().flush();
- errs() << "\n";
- WithColor::warning(errs()) << Msg << "\n";
-}
-
-void warn(Error Err) {
- handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
- reportWarning(EI.message());
- });
-}
-
-void error(Error EC) {
- if (!EC)
- return;
- handleAllErrors(std::move(EC),
- [&](const ErrorInfoBase &EI) { reportError(EI.message()); });
-}
+void reportWarning(Error Err, StringRef Input) {
+ assert(Err);
+ if (Input == "-")
+ Input = "<stdin>";
-void error(std::error_code EC) {
- if (!EC)
- return;
- reportError(EC.message());
+ // Flush the standard output to print the warning at a
+ // proper place.
+ fouts().flush();
+ handleAllErrors(
+ createFileError(Input, std::move(Err)), [&](const ErrorInfoBase &EI) {
+ errs() << "\n";
+ WithColor::warning(errs(), ToolName) << EI.message() << "\n";
+ });
}
} // namespace llvm
-static void reportError(StringRef Input, std::error_code EC) {
- reportError(Input, errorCodeToError(EC));
-}
-
-static bool isMipsArch(unsigned Arch) {
- switch (Arch) {
- case llvm::Triple::mips:
- case llvm::Triple::mipsel:
- case llvm::Triple::mips64:
- case llvm::Triple::mips64el:
- return true;
- default:
- return false;
- }
-}
namespace {
struct ReadObjTypeTableBuilder {
ReadObjTypeTableBuilder()
@@ -471,19 +443,19 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
std::unique_ptr<ObjDumper> Dumper;
if (std::error_code EC = createDumper(Obj, Writer, Dumper))
- reportError(FileStr, EC);
+ reportError(errorCodeToError(EC), FileStr);
- Writer.startLine() << "\n";
- if (opts::Output == opts::LLVM) {
+ if (opts::Output == opts::LLVM || opts::InputFilenames.size() > 1 || A) {
+ Writer.startLine() << "\n";
Writer.printString("File", FileStr);
+ }
+ if (opts::Output == opts::LLVM) {
Writer.printString("Format", Obj->getFileFormatName());
Writer.printString("Arch", Triple::getArchTypeName(
(llvm::Triple::ArchType)Obj->getArch()));
Writer.printString("AddressSize",
formatv("{0}bit", 8 * Obj->getBytesInAddress()));
Dumper->printLoadName();
- } else if (opts::Output == opts::GNU && A) {
- Writer.printString("File", FileStr);
}
if (opts::FileHeaders)
@@ -519,19 +491,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
if (Obj->isELF()) {
if (opts::ELFLinkerOptions)
Dumper->printELFLinkerOptions();
- if (Obj->getArch() == llvm::Triple::arm)
- if (opts::ARMAttributes)
- Dumper->printAttributes();
- if (isMipsArch(Obj->getArch())) {
- if (opts::MipsPLTGOT)
- Dumper->printMipsPLTGOT();
- if (opts::MipsABIFlags)
- Dumper->printMipsABIFlags();
- if (opts::MipsReginfo)
- Dumper->printMipsReginfo();
- if (opts::MipsOptions)
- Dumper->printMipsOptions();
- }
+ if (opts::ArchSpecificInfo)
+ Dumper->printArchSpecificInfo();
if (opts::SectionGroups)
Dumper->printGroupSections();
if (opts::HashHistogram)
@@ -583,6 +544,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
}
if (opts::PrintStackMap)
Dumper->printStackMap();
+ if (opts::PrintStackSizes)
+ Dumper->printStackSizes();
}
/// Dumps each object file in \a Arc;
@@ -591,9 +554,8 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) {
for (auto &Child : Arc->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) {
- reportError(Arc->getFileName(), std::move(E));
- }
+ if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ reportError(std::move(E), Arc->getFileName());
continue;
}
if (ObjectFile *Obj = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
@@ -601,10 +563,11 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) {
else if (COFFImportFile *Imp = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
dumpCOFFImportFile(Imp, Writer);
else
- reportError(Arc->getFileName(), readobj_error::unrecognized_file_format);
+ reportError(errorCodeToError(readobj_error::unrecognized_file_format),
+ Arc->getFileName());
}
if (Err)
- reportError(Arc->getFileName(), std::move(Err));
+ reportError(std::move(Err), Arc->getFileName());
}
/// Dumps each object file in \a MachO Universal Binary;
@@ -614,9 +577,8 @@ static void dumpMachOUniversalBinary(const MachOUniversalBinary *UBinary,
Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = Obj.getAsObjectFile();
if (ObjOrErr)
dumpObject(&*ObjOrErr.get(), Writer);
- else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
- reportError(UBinary->getFileName(), ObjOrErr.takeError());
- }
+ else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError()))
+ reportError(ObjOrErr.takeError(), UBinary->getFileName());
else if (Expected<std::unique_ptr<Archive>> AOrErr = Obj.getAsArchive())
dumpArchive(&*AOrErr.get(), Writer);
}
@@ -627,7 +589,7 @@ static void dumpWindowsResourceFile(WindowsResource *WinRes,
ScopedPrinter &Printer) {
WindowsRes::Dumper Dumper(WinRes, Printer);
if (auto Err = Dumper.printData())
- reportError(WinRes->getFileName(), std::move(Err));
+ reportError(std::move(Err), WinRes->getFileName());
}
@@ -636,7 +598,7 @@ static void dumpInput(StringRef File, ScopedPrinter &Writer) {
// Attempt to open the binary.
Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(File);
if (!BinaryOrErr)
- reportError(File, BinaryOrErr.takeError());
+ reportError(BinaryOrErr.takeError(), File);
Binary &Binary = *BinaryOrErr.get().getBinary();
if (Archive *Arc = dyn_cast<Archive>(&Binary))
@@ -651,7 +613,8 @@ static void dumpInput(StringRef File, ScopedPrinter &Writer) {
else if (WindowsResource *WinRes = dyn_cast<WindowsResource>(&Binary))
dumpWindowsResourceFile(WinRes, Writer);
else
- reportError(File, readobj_error::unrecognized_file_format);
+ reportError(errorCodeToError(readobj_error::unrecognized_file_format),
+ File);
CVTypes.Binaries.push_back(std::move(*BinaryOrErr));
}
@@ -702,6 +665,7 @@ static void registerReadelfAliases() {
int main(int argc, const char *argv[]) {
InitLLVM X(argc, argv);
+ ToolName = argv[0];
// Register the target printer for --version.
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
@@ -727,6 +691,10 @@ int main(int argc, const char *argv[]) {
opts::UnwindInfo = true;
opts::SectionGroups = true;
opts::HashHistogram = true;
+ if (opts::Output == opts::LLVM) {
+ opts::Addrsig = true;
+ opts::PrintStackSizes = true;
+ }
}
if (opts::Headers) {
diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h
index 0e02da4cb847..d9813f5dea62 100644
--- a/tools/llvm-readobj/llvm-readobj.h
+++ b/tools/llvm-readobj/llvm-readobj.h
@@ -21,30 +21,13 @@ namespace llvm {
}
// Various helper functions.
- LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg);
- void reportError(StringRef Input, Error Err);
- void reportWarning(Twine Msg);
- void warn(llvm::Error Err);
- void error(std::error_code EC);
- void error(llvm::Error EC);
- template <typename T> T error(llvm::Expected<T> &&E) {
- error(E.takeError());
- return std::move(*E);
- }
+ LLVM_ATTRIBUTE_NORETURN void reportError(Error Err, StringRef Input);
+ void reportWarning(Error Err, StringRef Input);
- template <class T> T unwrapOrError(ErrorOr<T> EO) {
- if (EO)
- return *EO;
- reportError(EO.getError().message());
- }
- template <class T> T unwrapOrError(Expected<T> EO) {
+ template <class T> T unwrapOrError(StringRef Input, Expected<T> EO) {
if (EO)
return *EO;
- std::string Buf;
- raw_string_ostream OS(Buf);
- logAllUnhandledErrors(EO.takeError(), OS);
- OS.flush();
- reportError(Buf);
+ reportError(EO.takeError(), Input);
}
} // namespace llvm
diff --git a/tools/llvm-reduce/CMakeLists.txt b/tools/llvm-reduce/CMakeLists.txt
new file mode 100644
index 000000000000..48de0ffa78a1
--- /dev/null
+++ b/tools/llvm-reduce/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LLVM_LINK_COMPONENTS
+ AllTargetsAsmParsers
+ AllTargetsCodeGens
+ AllTargetsDescs
+ AllTargetsInfos
+ Core
+ IRReader
+ Support
+ Target
+ TransformUtils
+ )
+
+add_llvm_tool(llvm-reduce
+ llvm-reduce.cpp
+ TestRunner.cpp
+ deltas/Delta.cpp
+ deltas/ReduceFunctions.cpp
+ deltas/ReduceGlobalVars.cpp
+ deltas/ReduceMetadata.cpp
+ deltas/ReduceArguments.cpp
+ deltas/ReduceBasicBlocks.cpp
+ deltas/ReduceInstructions.cpp
+
+ DEPENDS
+ intrinsics_gen
+ )
diff --git a/tools/llvm-reduce/DeltaManager.h b/tools/llvm-reduce/DeltaManager.h
new file mode 100644
index 000000000000..2309c3adf4e6
--- /dev/null
+++ b/tools/llvm-reduce/DeltaManager.h
@@ -0,0 +1,36 @@
+//===- DeltaManager.h - Runs Delta Passes to reduce Input -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file calls each specialized Delta pass in order to reduce the input IR
+// file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestRunner.h"
+#include "deltas/Delta.h"
+#include "deltas/ReduceArguments.h"
+#include "deltas/ReduceBasicBlocks.h"
+#include "deltas/ReduceFunctions.h"
+#include "deltas/ReduceGlobalVars.h"
+#include "deltas/ReduceMetadata.h"
+#include "deltas/ReduceInstructions.h"
+
+namespace llvm {
+
+// TODO: Add CLI option to run only specified Passes (for unit tests)
+inline void runDeltaPasses(TestRunner &Tester) {
+ reduceFunctionsDeltaPass(Tester);
+ reduceBasicBlocksDeltaPass(Tester);
+ reduceGlobalsDeltaPass(Tester);
+ reduceMetadataDeltaPass(Tester);
+ reduceArgumentsDeltaPass(Tester);
+ reduceInstructionsDeltaPass(Tester);
+ // TODO: Implement the remaining Delta Passes
+}
+
+} // namespace llvm
diff --git a/tools/llvm-reduce/LLVMBuild.txt b/tools/llvm-reduce/LLVMBuild.txt
new file mode 100644
index 000000000000..7928f0503283
--- /dev/null
+++ b/tools/llvm-reduce/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./tools/llvm-reduce/LLVMBuild.txt ------------------------*- Conf -*--===;
+;
+; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-reduce
+parent = Tools
+required_libraries =
+ BitReader
+ IRReader
+ all-targets
diff --git a/tools/llvm-reduce/TestRunner.cpp b/tools/llvm-reduce/TestRunner.cpp
new file mode 100644
index 000000000000..d0e195d5697c
--- /dev/null
+++ b/tools/llvm-reduce/TestRunner.cpp
@@ -0,0 +1,42 @@
+//===-- TestRunner.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestRunner.h"
+
+using namespace llvm;
+
+TestRunner::TestRunner(StringRef TestName, const std::vector<std::string> &TestArgs)
+ : TestName(TestName), TestArgs(TestArgs) {
+}
+
+/// Runs the interestingness test, passes file to be tested as first argument
+/// and other specified test arguments after that.
+int TestRunner::run(StringRef Filename) {
+ std::vector<StringRef> ProgramArgs;
+ ProgramArgs.push_back(TestName);
+
+ for (const auto &Arg : TestArgs)
+ ProgramArgs.push_back(Arg);
+
+ ProgramArgs.push_back(Filename);
+
+ std::string ErrMsg;
+ int Result = sys::ExecuteAndWait(
+ TestName, ProgramArgs, /*Env=*/None, /*Redirects=*/None,
+ /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
+
+ if (Result < 0) {
+ Error E = make_error<StringError>("Error running interesting-ness test: " +
+ ErrMsg,
+ inconvertibleErrorCode());
+ errs() << toString(std::move(E));
+ exit(1);
+ }
+
+ return !Result;
+}
diff --git a/tools/llvm-reduce/TestRunner.h b/tools/llvm-reduce/TestRunner.h
new file mode 100644
index 000000000000..2270d6bd90b2
--- /dev/null
+++ b/tools/llvm-reduce/TestRunner.h
@@ -0,0 +1,46 @@
+//===-- tools/llvm-reduce/TestRunner.h ---------------------------*- C++ -*-===/
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMREDUCE_TESTRUNNER_H
+#define LLVM_TOOLS_LLVMREDUCE_TESTRUNNER_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include <vector>
+
+namespace llvm {
+
+// This class contains all the info necessary for running the provided
+// interesting-ness test, as well as the most reduced module and its
+// respective filename.
+class TestRunner {
+public:
+ TestRunner(StringRef TestName, const std::vector<std::string> &TestArgs);
+
+ /// Runs the interesting-ness test for the specified file
+ /// @returns 0 if test was successful, 1 if otherwise
+ int run(StringRef Filename);
+
+ /// Returns the most reduced version of the original testcase
+ Module *getProgram() const { return Program.get(); }
+
+ void setProgram(std::unique_ptr<Module> P) { Program = std::move(P); }
+
+private:
+ StringRef TestName;
+ const std::vector<std::string> &TestArgs;
+ std::unique_ptr<Module> Program;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-reduce/deltas/Delta.cpp b/tools/llvm-reduce/deltas/Delta.cpp
new file mode 100644
index 000000000000..0642241ddebd
--- /dev/null
+++ b/tools/llvm-reduce/deltas/Delta.cpp
@@ -0,0 +1,162 @@
+//===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation for the Delta Debugging Algorithm:
+// it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.)
+// into chunks and tries to reduce the number chunks that are interesting.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Delta.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <fstream>
+#include <set>
+
+using namespace llvm;
+
+bool IsReduced(Module &M, TestRunner &Test, SmallString<128> &CurrentFilepath) {
+ // Write Module to tmp file
+ int FD;
+ std::error_code EC =
+ sys::fs::createTemporaryFile("llvm-reduce", "ll", FD, CurrentFilepath);
+ if (EC) {
+ errs() << "Error making unique filename: " << EC.message() << "!\n";
+ exit(1);
+ }
+
+ ToolOutputFile Out(CurrentFilepath, FD);
+ M.print(Out.os(), /*AnnotationWriter=*/nullptr);
+ Out.os().close();
+ if (Out.os().has_error()) {
+ errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n";
+ exit(1);
+ }
+
+ // Current Chunks aren't interesting
+ return Test.run(CurrentFilepath);
+}
+
+/// Counts the amount of lines for a given file
+static int getLines(StringRef Filepath) {
+ int Lines = 0;
+ std::string CurrLine;
+ std::ifstream FileStream(Filepath);
+
+ while (std::getline(FileStream, CurrLine))
+ ++Lines;
+
+ return Lines;
+}
+
+/// Splits Chunks in half and prints them.
+/// If unable to split (when chunk size is 1) returns false.
+static bool increaseGranularity(std::vector<Chunk> &Chunks) {
+ errs() << "Increasing granularity...";
+ std::vector<Chunk> NewChunks;
+ bool SplitOne = false;
+
+ for (auto &C : Chunks) {
+ if (C.end - C.begin == 0)
+ NewChunks.push_back(C);
+ else {
+ int Half = (C.begin + C.end) / 2;
+ NewChunks.push_back({C.begin, Half});
+ NewChunks.push_back({Half + 1, C.end});
+ SplitOne = true;
+ }
+ }
+ if (SplitOne) {
+ Chunks = NewChunks;
+ errs() << "Success! New Chunks:\n";
+ for (auto C : Chunks) {
+ errs() << '\t';
+ C.print();
+ errs() << '\n';
+ }
+ }
+ return SplitOne;
+}
+
+/// Runs the Delta Debugging algorithm, splits the code into chunks and
+/// reduces the amount of chunks that are considered interesting by the
+/// given test.
+void llvm::runDeltaPass(
+ TestRunner &Test, int Targets,
+ std::function<void(const std::vector<Chunk> &, Module *)>
+ ExtractChunksFromModule) {
+ assert(Targets >= 0);
+ if (!Targets) {
+ errs() << "\nNothing to reduce\n";
+ return;
+ }
+
+ if (Module *Program = Test.getProgram()) {
+ SmallString<128> CurrentFilepath;
+ if (!IsReduced(*Program, Test, CurrentFilepath)) {
+ errs() << "\nInput isn't interesting! Verify interesting-ness test\n";
+ exit(1);
+ }
+ }
+
+ std::vector<Chunk> Chunks = {{1, Targets}};
+ std::set<Chunk> UninterestingChunks;
+ std::unique_ptr<Module> ReducedProgram;
+
+ if (!increaseGranularity(Chunks)) {
+ errs() << "\nAlready at minimum size. Cannot reduce anymore.\n";
+ return;
+ }
+
+ do {
+ UninterestingChunks = {};
+ for (int I = Chunks.size() - 1; I >= 0; --I) {
+ std::vector<Chunk> CurrentChunks;
+
+ for (auto C : Chunks)
+ if (!UninterestingChunks.count(C) && C != Chunks[I])
+ CurrentChunks.push_back(C);
+
+ if (CurrentChunks.empty())
+ continue;
+
+ // Clone module before hacking it up..
+ std::unique_ptr<Module> Clone = CloneModule(*Test.getProgram());
+ // Generate Module with only Targets inside Current Chunks
+ ExtractChunksFromModule(CurrentChunks, Clone.get());
+
+ errs() << "Ignoring: ";
+ Chunks[I].print();
+ for (auto C : UninterestingChunks)
+ C.print();
+
+
+
+ SmallString<128> CurrentFilepath;
+ if (!IsReduced(*Clone, Test, CurrentFilepath)) {
+ errs() << "\n";
+ continue;
+ }
+
+ UninterestingChunks.insert(Chunks[I]);
+ ReducedProgram = std::move(Clone);
+ errs() << " **** SUCCESS | lines: " << getLines(CurrentFilepath) << "\n";
+ }
+ // Delete uninteresting chunks
+ erase_if(Chunks, [&UninterestingChunks](const Chunk &C) {
+ return UninterestingChunks.count(C);
+ });
+
+ } while (!UninterestingChunks.empty() || increaseGranularity(Chunks));
+
+ // If we reduced the testcase replace it
+ if (ReducedProgram)
+ Test.setProgram(std::move(ReducedProgram));
+ errs() << "Couldn't increase anymore.\n";
+}
diff --git a/tools/llvm-reduce/deltas/Delta.h b/tools/llvm-reduce/deltas/Delta.h
new file mode 100644
index 000000000000..dbb18e4bd07f
--- /dev/null
+++ b/tools/llvm-reduce/deltas/Delta.h
@@ -0,0 +1,76 @@
+//===- Delta.h - Delta Debugging Algorithm Implementation -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation for the Delta Debugging Algorithm:
+// it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.)
+// into chunks and tries to reduce the number chunks that are interesting.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMREDUCE_LLVMREDUCE_DELTA_H
+#define LLVM_TOOLS_LLVMREDUCE_LLVMREDUCE_DELTA_H
+
+#include "TestRunner.h"
+#include <vector>
+#include <utility>
+#include <functional>
+
+namespace llvm {
+
+struct Chunk {
+ int begin;
+ int end;
+
+ /// Helper function to verify if a given Target-index is inside the Chunk
+ bool contains(int Index) const { return Index >= begin && Index <= end; }
+
+ void print() const {
+ errs() << "[" << begin;
+ if (end - begin != 0)
+ errs() << "," << end;
+ errs() << "]";
+ }
+
+ /// Operator when populating CurrentChunks in Generic Delta Pass
+ friend bool operator!=(const Chunk &C1, const Chunk &C2) {
+ return C1.begin != C2.begin || C1.end != C2.end;
+ }
+
+ /// Operator used for sets
+ friend bool operator<(const Chunk &C1, const Chunk &C2) {
+ return std::tie(C1.begin, C1.end) < std::tie(C2.begin, C2.end);
+ }
+};
+
+/// This function implements the Delta Debugging algorithm, it receives a
+/// number of Targets (e.g. Functions, Instructions, Basic Blocks, etc.) and
+/// splits them in half; these chunks of targets are then tested while ignoring
+/// one chunk, if a chunk is proven to be uninteresting (i.e. fails the test)
+/// it is removed from consideration. The algorithm will attempt to split the
+/// Chunks in half and start the process again until it can't split chunks
+/// anymore.
+///
+/// This function is intended to be called by each specialized delta pass (e.g.
+/// RemoveFunctions) and receives three key parameters:
+/// * Test: The main TestRunner instance which is used to run the provided
+/// interesting-ness test, as well as to store and access the reduced Program.
+/// * Targets: The amount of Targets that are going to be reduced by the
+/// algorithm, for example, the RemoveGlobalVars pass would send the amount of
+/// initialized GVs.
+/// * ExtractChunksFromModule: A function used to tailor the main program so it
+/// only contains Targets that are inside Chunks of the given iteration.
+/// Note: This function is implemented by each specialized Delta pass
+///
+/// Other implementations of the Delta Debugging algorithm can also be found in
+/// the CReduce, Delta, and Lithium projects.
+void runDeltaPass(TestRunner &Test, int Targets,
+ std::function<void(const std::vector<Chunk> &, Module *)>
+ ExtractChunksFromModule);
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-reduce/deltas/ReduceArguments.cpp b/tools/llvm-reduce/deltas/ReduceArguments.cpp
new file mode 100644
index 000000000000..f5f14b83f42c
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceArguments.cpp
@@ -0,0 +1,125 @@
+//===- ReduceArguments.cpp - Specialized Delta Pass -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce uninteresting Arguments from defined functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ReduceArguments.h"
+#include "Delta.h"
+#include "llvm/ADT/SmallVector.h"
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+/// Goes over OldF calls and replaces them with a call to NewF
+static void replaceFunctionCalls(Function &OldF, Function &NewF,
+ const std::set<int> &ArgIndexesToKeep) {
+ const auto &Users = OldF.users();
+ for (auto I = Users.begin(), E = Users.end(); I != E; )
+ if (auto *CI = dyn_cast<CallInst>(*I++)) {
+ SmallVector<Value *, 8> Args;
+ for (auto ArgI = CI->arg_begin(), E = CI->arg_end(); ArgI != E; ++ArgI)
+ if (ArgIndexesToKeep.count(ArgI - CI->arg_begin()))
+ Args.push_back(*ArgI);
+
+ CallInst *NewCI = CallInst::Create(&NewF, Args);
+ NewCI->setCallingConv(NewF.getCallingConv());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ ReplaceInstWithInst(CI, NewCI);
+ }
+}
+
+/// Removes out-of-chunk arguments from functions, and modifies their calls
+/// accordingly. It also removes allocations of out-of-chunk arguments.
+static void extractArgumentsFromModule(std::vector<Chunk> ChunksToKeep,
+ Module *Program) {
+ int I = 0, ArgCount = 0;
+ std::set<Argument *> ArgsToKeep;
+ std::vector<Function *> Funcs;
+ // Get inside-chunk arguments, as well as their parent function
+ for (auto &F : *Program)
+ if (!F.isDeclaration()) {
+ Funcs.push_back(&F);
+ for (auto &A : F.args())
+ if (I < (int)ChunksToKeep.size()) {
+ if (ChunksToKeep[I].contains(++ArgCount))
+ ArgsToKeep.insert(&A);
+ if (ChunksToKeep[I].end == ArgCount)
+ ++I;
+ }
+ }
+
+ for (auto *F : Funcs) {
+ ValueToValueMapTy VMap;
+ std::vector<Instruction *> InstToDelete;
+ for (auto &A : F->args())
+ if (!ArgsToKeep.count(&A)) {
+ // By adding undesired arguments to the VMap, CloneFunction will remove
+ // them from the resulting Function
+ VMap[&A] = UndefValue::get(A.getType());
+ for (auto *U : A.users())
+ if (auto *I = dyn_cast<Instruction>(*&U))
+ InstToDelete.push_back(I);
+ }
+ // Delete any instruction that uses the argument
+ for (auto *I : InstToDelete) {
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
+
+ // No arguments to reduce
+ if (VMap.empty())
+ continue;
+
+ std::set<int> ArgIndexesToKeep;
+ int ArgI = 0;
+ for (auto &Arg : F->args())
+ if (ArgsToKeep.count(&Arg))
+ ArgIndexesToKeep.insert(++ArgI);
+
+ auto *ClonedFunc = CloneFunction(F, VMap);
+ // In order to preserve function order, we move Clone after old Function
+ ClonedFunc->removeFromParent();
+ Program->getFunctionList().insertAfter(F->getIterator(), ClonedFunc);
+
+ replaceFunctionCalls(*F, *ClonedFunc, ArgIndexesToKeep);
+ // Rename Cloned Function to Old's name
+ std::string FName = F->getName();
+ F->eraseFromParent();
+ ClonedFunc->setName(FName);
+ }
+}
+
+/// Counts the amount of arguments in non-declaration functions and prints their
+/// respective name, index, and parent function name
+static int countArguments(Module *Program) {
+ // TODO: Silence index with --quiet flag
+ outs() << "----------------------------\n";
+ outs() << "Param Index Reference:\n";
+ int ArgsCount = 0;
+ for (auto &F : *Program)
+ if (!F.isDeclaration() && F.arg_size()) {
+ outs() << " " << F.getName() << "\n";
+ for (auto &A : F.args())
+ outs() << "\t" << ++ArgsCount << ": " << A.getName() << "\n";
+
+ outs() << "----------------------------\n";
+ }
+
+ return ArgsCount;
+}
+
+void llvm::reduceArgumentsDeltaPass(TestRunner &Test) {
+ outs() << "*** Reducing Arguments...\n";
+ int ArgCount = countArguments(Test.getProgram());
+ runDeltaPass(Test, ArgCount, extractArgumentsFromModule);
+}
diff --git a/tools/llvm-reduce/deltas/ReduceArguments.h b/tools/llvm-reduce/deltas/ReduceArguments.h
new file mode 100644
index 000000000000..d9682b44f74d
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceArguments.h
@@ -0,0 +1,21 @@
+//===- ReduceArguments.h - Specialized Delta Pass -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce uninteresting Arguments from defined functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Delta.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+namespace llvm {
+void reduceArgumentsDeltaPass(TestRunner &Test);
+} // namespace llvm
diff --git a/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp b/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp
new file mode 100644
index 000000000000..03c3962d2fd9
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp
@@ -0,0 +1,146 @@
+//===- ReduceArguments.cpp - Specialized Delta Pass -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce uninteresting Arguments from defined functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ReduceBasicBlocks.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+using namespace llvm;
+
+/// Replaces BB Terminator with one that only contains Chunk BBs
+static void replaceBranchTerminator(BasicBlock &BB,
+ std::set<BasicBlock *> BBsToKeep) {
+ auto Term = BB.getTerminator();
+ std::vector<BasicBlock *> ChunkSucessors;
+ for (auto Succ : successors(&BB))
+ if (BBsToKeep.count(Succ))
+ ChunkSucessors.push_back(Succ);
+
+ // BB only references Chunk BBs
+ if (ChunkSucessors.size() == Term->getNumSuccessors())
+ return;
+
+ bool IsBranch = isa<BranchInst>(Term);
+ Value *Address = nullptr;
+ if (auto IndBI = dyn_cast<IndirectBrInst>(Term))
+ Address = IndBI->getAddress();
+
+ Term->eraseFromParent();
+
+ if (ChunkSucessors.empty()) {
+ ReturnInst::Create(BB.getContext(), nullptr, &BB);
+ return;
+ }
+
+ if (IsBranch)
+ BranchInst::Create(ChunkSucessors[0], &BB);
+
+ if (Address) {
+ auto NewIndBI =
+ IndirectBrInst::Create(Address, ChunkSucessors.size(), &BB);
+ for (auto Dest : ChunkSucessors)
+ NewIndBI->addDestination(Dest);
+ }
+}
+
+/// Removes uninteresting BBs from switch, if the default case ends up being
+/// uninteresting, the switch is replaced with a void return (since it has to be
+/// replace with something)
+static void removeUninterestingBBsFromSwitch(SwitchInst &SwInst,
+ std::set<BasicBlock *> BBsToKeep) {
+ if (!BBsToKeep.count(SwInst.getDefaultDest())) {
+ ReturnInst::Create(SwInst.getContext(), nullptr, SwInst.getParent());
+ SwInst.eraseFromParent();
+ } else
+ for (int I = 0, E = SwInst.getNumCases(); I != E; ++I) {
+ auto Case = SwInst.case_begin() + I;
+ if (!BBsToKeep.count(Case->getCaseSuccessor())) {
+ SwInst.removeCase(Case);
+ --I;
+ --E;
+ }
+ }
+}
+
+/// Removes out-of-chunk arguments from functions, and modifies their calls
+/// accordingly. It also removes allocations of out-of-chunk arguments.
+static void extractBasicBlocksFromModule(std::vector<Chunk> ChunksToKeep,
+ Module *Program) {
+ int I = 0, BBCount = 0;
+ std::set<BasicBlock *> BBsToKeep;
+
+ for (auto &F : *Program)
+ for (auto &BB : F)
+ if (I < (int)ChunksToKeep.size()) {
+ if (ChunksToKeep[I].contains(++BBCount))
+ BBsToKeep.insert(&BB);
+ if (ChunksToKeep[I].end == BBCount)
+ ++I;
+ }
+
+ std::vector<BasicBlock *> BBsToDelete;
+ for (auto &F : *Program)
+ for (auto &BB : F) {
+ if (!BBsToKeep.count(&BB)) {
+ BBsToDelete.push_back(&BB);
+ // Remove out-of-chunk BB from successor phi nodes
+ for (auto *Succ : successors(&BB))
+ Succ->removePredecessor(&BB);
+ }
+ }
+
+ // Replace terminators that reference out-of-chunk BBs
+ for (auto &F : *Program)
+ for (auto &BB : F) {
+ if (auto *SwInst = dyn_cast<SwitchInst>(BB.getTerminator()))
+ removeUninterestingBBsFromSwitch(*SwInst, BBsToKeep);
+ else
+ replaceBranchTerminator(BB, BBsToKeep);
+ }
+
+ // Replace out-of-chunk switch uses
+ for (auto &BB : BBsToDelete) {
+ // Instructions might be referenced in other BBs
+ for (auto &I : *BB)
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ BB->eraseFromParent();
+ }
+}
+
+/// Counts the amount of basic blocks and prints their name & respective index
+static int countBasicBlocks(Module *Program) {
+ // TODO: Silence index with --quiet flag
+ outs() << "----------------------------\n";
+ int BBCount = 0;
+ for (auto &F : *Program)
+ for (auto &BB : F) {
+ if (BB.hasName())
+ outs() << "\t" << ++BBCount << ": " << BB.getName() << "\n";
+ else
+ outs() << "\t" << ++BBCount << ": Unnamed\n";
+ }
+
+ return BBCount;
+}
+
+void llvm::reduceBasicBlocksDeltaPass(TestRunner &Test) {
+ outs() << "*** Reducing Basic Blocks...\n";
+ int BBCount = countBasicBlocks(Test.getProgram());
+ runDeltaPass(Test, BBCount, extractBasicBlocksFromModule);
+}
diff --git a/tools/llvm-reduce/deltas/ReduceBasicBlocks.h b/tools/llvm-reduce/deltas/ReduceBasicBlocks.h
new file mode 100644
index 000000000000..cf76a0abbcd7
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceBasicBlocks.h
@@ -0,0 +1,20 @@
+//===- ReduceArguments.h - Specialized Delta Pass -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce uninteresting Arguments from defined functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Delta.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+namespace llvm {
+void reduceBasicBlocksDeltaPass(TestRunner &Test);
+} // namespace llvm
diff --git a/tools/llvm-reduce/deltas/ReduceFunctions.cpp b/tools/llvm-reduce/deltas/ReduceFunctions.cpp
new file mode 100644
index 000000000000..3382f35a945a
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceFunctions.cpp
@@ -0,0 +1,77 @@
+//===- ReduceFunctions.cpp - Specialized Delta Pass -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce functions (and any instruction that calls it) in the provided
+// Module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ReduceFunctions.h"
+#include "Delta.h"
+#include "llvm/ADT/SetVector.h"
+#include <set>
+
+using namespace llvm;
+
+/// Removes all the Defined Functions (as well as their calls)
+/// that aren't inside any of the desired Chunks.
+static void extractFunctionsFromModule(const std::vector<Chunk> &ChunksToKeep,
+ Module *Program) {
+ // Get functions inside desired chunks
+ std::set<Function *> FuncsToKeep;
+ int I = 0, FunctionCount = 0;
+ for (auto &F : *Program)
+ if (I < (int)ChunksToKeep.size()) {
+ if (ChunksToKeep[I].contains(++FunctionCount))
+ FuncsToKeep.insert(&F);
+ if (FunctionCount == ChunksToKeep[I].end)
+ ++I;
+ }
+
+ // Delete out-of-chunk functions, and replace their calls with undef
+ std::vector<Function *> FuncsToRemove;
+ SetVector<CallInst *> CallsToRemove;
+ for (auto &F : *Program)
+ if (!FuncsToKeep.count(&F)) {
+ for (auto U : F.users())
+ if (auto *Call = dyn_cast<CallInst>(U)) {
+ Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
+ CallsToRemove.insert(Call);
+ }
+ F.replaceAllUsesWith(UndefValue::get(F.getType()));
+ FuncsToRemove.push_back(&F);
+ }
+
+ for (auto *C : CallsToRemove)
+ C->eraseFromParent();
+
+ for (auto *F : FuncsToRemove)
+ F->eraseFromParent();
+}
+
+/// Counts the amount of non-declaration functions and prints their
+/// respective name & index
+static int countFunctions(Module *Program) {
+ // TODO: Silence index with --quiet flag
+ errs() << "----------------------------\n";
+ errs() << "Function Index Reference:\n";
+ int FunctionCount = 0;
+ for (auto &F : *Program)
+ errs() << "\t" << ++FunctionCount << ": " << F.getName() << "\n";
+
+ errs() << "----------------------------\n";
+ return FunctionCount;
+}
+
+void llvm::reduceFunctionsDeltaPass(TestRunner &Test) {
+ errs() << "*** Reducing Functions...\n";
+ int Functions = countFunctions(Test.getProgram());
+ runDeltaPass(Test, Functions, extractFunctionsFromModule);
+ errs() << "----------------------------\n";
+}
diff --git a/tools/llvm-reduce/deltas/ReduceFunctions.h b/tools/llvm-reduce/deltas/ReduceFunctions.h
new file mode 100644
index 000000000000..7c2cd3f33e9f
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceFunctions.h
@@ -0,0 +1,20 @@
+//===- ReduceFunctions.h - Specialized Delta Pass -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce functions (and any instruction that calls it) in the provided
+// Module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Delta.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+namespace llvm {
+void reduceFunctionsDeltaPass(TestRunner &Test);
+} // namespace llvm
diff --git a/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp b/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp
new file mode 100644
index 000000000000..5732208ee0a9
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp
@@ -0,0 +1,74 @@
+//===- ReduceGlobalVars.cpp - Specialized Delta Pass ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce initialized Global Variables in the provided Module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ReduceGlobalVars.h"
+#include <set>
+
+using namespace llvm;
+
+/// Removes all the Initialized GVs that aren't inside the desired Chunks.
+static void extractGVsFromModule(std::vector<Chunk> ChunksToKeep,
+ Module *Program) {
+ // Get GVs inside desired chunks
+ std::set<GlobalVariable *> GVsToKeep;
+ int I = 0, GVCount = 0;
+ for (auto &GV : Program->globals())
+ if (GV.hasInitializer() && I < (int)ChunksToKeep.size()) {
+ if (ChunksToKeep[I].contains(++GVCount))
+ GVsToKeep.insert(&GV);
+ if (GVCount == ChunksToKeep[I].end)
+ ++I;
+ }
+
+ // Delete out-of-chunk GVs and their uses
+ std::vector<GlobalVariable *> ToRemove;
+ std::vector<Instruction *> InstToRemove;
+ for (auto &GV : Program->globals())
+ if (GV.hasInitializer() && !GVsToKeep.count(&GV)) {
+ for (auto U : GV.users())
+ if (auto *Inst = dyn_cast<Instruction>(U))
+ InstToRemove.push_back(Inst);
+
+ GV.replaceAllUsesWith(UndefValue::get(GV.getType()));
+ ToRemove.push_back(&GV);
+ }
+
+ // Delete Instruction uses of unwanted GVs
+ for (auto *Inst : InstToRemove) {
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ Inst->eraseFromParent();
+ }
+
+ for (auto *GV : ToRemove)
+ GV->eraseFromParent();
+}
+
+/// Counts the amount of initialized GVs and displays their
+/// respective name & index
+static int countGVs(Module *Program) {
+ // TODO: Silence index with --quiet flag
+ outs() << "----------------------------\n";
+ outs() << "GlobalVariable Index Reference:\n";
+ int GVCount = 0;
+ for (auto &GV : Program->globals())
+ if (GV.hasInitializer())
+ outs() << "\t" << ++GVCount << ": " << GV.getName() << "\n";
+ outs() << "----------------------------\n";
+ return GVCount;
+}
+
+void llvm::reduceGlobalsDeltaPass(TestRunner &Test) {
+ outs() << "*** Reducing GVs...\n";
+ int GVCount = countGVs(Test.getProgram());
+ runDeltaPass(Test, GVCount, extractGVsFromModule);
+}
diff --git a/tools/llvm-reduce/deltas/ReduceGlobalVars.h b/tools/llvm-reduce/deltas/ReduceGlobalVars.h
new file mode 100644
index 000000000000..d4a870aded58
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceGlobalVars.h
@@ -0,0 +1,20 @@
+//===- ReduceGlobalVars.h - Specialized Delta Pass ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce initialized Global Variables in the provided Module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Delta.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+namespace llvm {
+void reduceGlobalsDeltaPass(TestRunner &Test);
+} // namespace llvm
diff --git a/tools/llvm-reduce/deltas/ReduceInstructions.cpp b/tools/llvm-reduce/deltas/ReduceInstructions.cpp
new file mode 100644
index 000000000000..b3497ad2dc02
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceInstructions.cpp
@@ -0,0 +1,65 @@
+//===- ReduceArguments.cpp - Specialized Delta Pass -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce uninteresting Arguments from defined functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ReduceInstructions.h"
+
+using namespace llvm;
+
+/// Removes out-of-chunk arguments from functions, and modifies their calls
+/// accordingly. It also removes allocations of out-of-chunk arguments.
+static void extractInstrFromModule(std::vector<Chunk> ChunksToKeep,
+ Module *Program) {
+ int I = 0, InstCount = 0;
+ std::set<Instruction *> InstToKeep;
+
+ for (auto &F : *Program)
+ for (auto &BB : F)
+ for (auto &Inst : BB)
+ if (I < (int)ChunksToKeep.size()) {
+ if (ChunksToKeep[I].contains(++InstCount))
+ InstToKeep.insert(&Inst);
+ if (ChunksToKeep[I].end == InstCount)
+ ++I;
+ }
+
+ std::vector<Instruction *> InstToDelete;
+ for (auto &F : *Program)
+ for (auto &BB : F)
+ for (auto &Inst : BB)
+ if (!InstToKeep.count(&Inst)) {
+ Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
+ InstToDelete.push_back(&Inst);
+ }
+
+ for (auto &I : InstToDelete)
+ I->eraseFromParent();
+}
+
+/// Counts the amount of basic blocks and prints their name & respective index
+static unsigned countInstructions(Module *Program) {
+ // TODO: Silence index with --quiet flag
+ outs() << "----------------------------\n";
+ int InstCount = 0;
+ for (auto &F : *Program)
+ for (auto &BB : F)
+ InstCount += BB.getInstList().size();
+ outs() << "Number of instructions: " << InstCount << "\n";
+
+ return InstCount;
+}
+
+void llvm::reduceInstructionsDeltaPass(TestRunner &Test) {
+ outs() << "*** Reducing Insructions...\n";
+ unsigned InstCount = countInstructions(Test.getProgram());
+ runDeltaPass(Test, InstCount, extractInstrFromModule);
+}
diff --git a/tools/llvm-reduce/deltas/ReduceInstructions.h b/tools/llvm-reduce/deltas/ReduceInstructions.h
new file mode 100644
index 000000000000..a9266acd051a
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceInstructions.h
@@ -0,0 +1,20 @@
+//===- ReduceArguments.h - Specialized Delta Pass -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce uninteresting Arguments from defined functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Delta.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+namespace llvm {
+void reduceInstructionsDeltaPass(TestRunner &Test);
+} // namespace llvm
diff --git a/tools/llvm-reduce/deltas/ReduceMetadata.cpp b/tools/llvm-reduce/deltas/ReduceMetadata.cpp
new file mode 100644
index 000000000000..4ea223546efa
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceMetadata.cpp
@@ -0,0 +1,138 @@
+//===- ReduceMetadata.cpp - Specialized Delta Pass ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements two functions used by the Generic Delta Debugging
+// Algorithm, which are used to reduce Metadata nodes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ReduceMetadata.h"
+#include "Delta.h"
+#include "llvm/ADT/SmallVector.h"
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+/// Adds all Unnamed Metadata Nodes that are inside desired Chunks to set
+template <class T>
+static void getChunkMetadataNodes(T &MDUser, int &I,
+ const std::vector<Chunk> &ChunksToKeep,
+ std::set<MDNode *> &SeenNodes,
+ std::set<MDNode *> &NodesToKeep) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ MDUser.getAllMetadata(MDs);
+ for (auto &MD : MDs) {
+ SeenNodes.insert(MD.second);
+ if (I < (int)ChunksToKeep.size()) {
+ if (ChunksToKeep[I].contains(SeenNodes.size()))
+ NodesToKeep.insert(MD.second);
+ if (ChunksToKeep[I].end == (int)SeenNodes.size())
+ ++I;
+ }
+ }
+}
+
+/// Erases out-of-chunk unnamed metadata nodes from its user
+template <class T>
+static void eraseMetadataIfOutsideChunk(T &MDUser,
+ const std::set<MDNode *> &NodesToKeep) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ MDUser.getAllMetadata(MDs);
+ for (int I = 0, E = MDs.size(); I != E; ++I)
+ if (!NodesToKeep.count(MDs[I].second))
+ MDUser.setMetadata(I, NULL);
+}
+
+/// Removes all the Named and Unnamed Metadata Nodes, as well as any debug
+/// functions that aren't inside the desired Chunks.
+static void extractMetadataFromModule(const std::vector<Chunk> &ChunksToKeep,
+ Module *Program) {
+ std::set<MDNode *> SeenNodes;
+ std::set<MDNode *> NodesToKeep;
+ int I = 0;
+
+ // Add chunk MDNodes used by GVs, Functions, and Instructions to set
+ for (auto &GV : Program->globals())
+ getChunkMetadataNodes(GV, I, ChunksToKeep, SeenNodes, NodesToKeep);
+
+ for (auto &F : *Program) {
+ getChunkMetadataNodes(F, I, ChunksToKeep, SeenNodes, NodesToKeep);
+ for (auto &BB : F)
+ for (auto &Inst : BB)
+ getChunkMetadataNodes(Inst, I, ChunksToKeep, SeenNodes, NodesToKeep);
+ }
+
+ // Once more, go over metadata nodes, but deleting the ones outside chunks
+ for (auto &GV : Program->globals())
+ eraseMetadataIfOutsideChunk(GV, NodesToKeep);
+
+ for (auto &F : *Program) {
+ eraseMetadataIfOutsideChunk(F, NodesToKeep);
+ for (auto &BB : F)
+ for (auto &Inst : BB)
+ eraseMetadataIfOutsideChunk(Inst, NodesToKeep);
+ }
+
+
+ // Get out-of-chunk Named metadata nodes
+ unsigned MetadataCount = SeenNodes.size();
+ std::vector<NamedMDNode *> NamedNodesToDelete;
+ for (auto &MD : Program->named_metadata()) {
+ if (I < (int)ChunksToKeep.size()) {
+ if (!ChunksToKeep[I].contains(++MetadataCount))
+ NamedNodesToDelete.push_back(&MD);
+ if (ChunksToKeep[I].end == (int)SeenNodes.size())
+ ++I;
+ } else
+ NamedNodesToDelete.push_back(&MD);
+ }
+
+ for (auto *NN : NamedNodesToDelete) {
+ for (int I = 0, E = NN->getNumOperands(); I != E; ++I)
+ NN->setOperand(I, NULL);
+ NN->eraseFromParent();
+ }
+}
+
+// Gets unnamed metadata nodes used by a given instruction/GV/function and adds
+// them to the set of seen nodes
+template <class T>
+static void addMetadataToSet(T &MDUser, std::set<MDNode *> &UnnamedNodes) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ MDUser.getAllMetadata(MDs);
+ for (auto &MD : MDs)
+ UnnamedNodes.insert(MD.second);
+}
+
+/// Returns the amount of Named and Unnamed Metadata Nodes
+static int countMetadataTargets(Module *Program) {
+ std::set<MDNode *> UnnamedNodes;
+ int NamedMetadataNodes = Program->named_metadata_size();
+
+ // Get metadata nodes used by globals
+ for (auto &GV : Program->globals())
+ addMetadataToSet(GV, UnnamedNodes);
+
+ // Do the same for nodes used by functions & instructions
+ for (auto &F : *Program) {
+ addMetadataToSet(F, UnnamedNodes);
+ for (auto &BB : F)
+ for (auto &I : BB)
+ addMetadataToSet(I, UnnamedNodes);
+ }
+
+ return UnnamedNodes.size() + NamedMetadataNodes;
+}
+
+void llvm::reduceMetadataDeltaPass(TestRunner &Test) {
+ outs() << "*** Reducing Metadata...\n";
+ int MDCount = countMetadataTargets(Test.getProgram());
+ runDeltaPass(Test, MDCount, extractMetadataFromModule);
+ outs() << "----------------------------\n";
+}
diff --git a/tools/llvm-reduce/deltas/ReduceMetadata.h b/tools/llvm-reduce/deltas/ReduceMetadata.h
new file mode 100644
index 000000000000..275b44c2aa7d
--- /dev/null
+++ b/tools/llvm-reduce/deltas/ReduceMetadata.h
@@ -0,0 +1,18 @@
+//===- ReduceMetadata.h - Specialized Delta Pass ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements two functions used by the Generic Delta Debugging
+// Algorithm, which are used to reduce Metadata nodes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestRunner.h"
+
+namespace llvm {
+void reduceMetadataDeltaPass(TestRunner &Test);
+} // namespace llvm
diff --git a/tools/llvm-reduce/llvm-reduce.cpp b/tools/llvm-reduce/llvm-reduce.cpp
new file mode 100644
index 000000000000..83dcf980a786
--- /dev/null
+++ b/tools/llvm-reduce/llvm-reduce.cpp
@@ -0,0 +1,114 @@
+//===- llvm-reduce.cpp - The LLVM Delta Reduction utility -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This program tries to reduce an IR test case for a given interesting-ness
+// test. It runs multiple delta debugging passes in order to minimize the input
+// file. It's worth noting that this is a part of the bugpoint redesign
+// proposal, and thus a *temporary* tool that will eventually be integrated
+// into the bugpoint tool itself.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DeltaManager.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include <system_error>
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
+static cl::opt<bool> Version("v", cl::desc("Alias for -version"), cl::Hidden);
+
+static cl::opt<std::string> InputFilename(cl::Positional, cl::Required,
+ cl::desc("<input llvm ll/bc file>"));
+
+static cl::opt<std::string>
+ TestFilename("test", cl::Required,
+ cl::desc("Name of the interesting-ness test to be run"));
+
+static cl::list<std::string>
+ TestArguments("test-arg", cl::ZeroOrMore,
+ cl::desc("Arguments passed onto the interesting-ness test"));
+
+static cl::opt<std::string>
+ OutputFilename("output",
+ cl::desc("Specify the output file. default: reduced.ll"));
+static cl::alias OutputFileAlias("o", cl::desc("Alias for -output"),
+ cl::aliasopt(OutputFilename));
+
+static cl::opt<bool>
+ ReplaceInput("in-place",
+ cl::desc("WARNING: This option will replace your input file"
+ "with the reduced version!"));
+
+// Parses IR into a Module and verifies it
+static std::unique_ptr<Module> parseInputFile(StringRef Filename,
+ LLVMContext &Ctxt) {
+ SMDiagnostic Err;
+ std::unique_ptr<Module> Result = parseIRFile(Filename, Err, Ctxt);
+ if (!Result) {
+ Err.print("llvm-reduce", errs());
+ return Result;
+ }
+
+ if (verifyModule(*Result, &errs())) {
+ errs() << "Error: " << Filename << " - input module is broken!\n";
+ return std::unique_ptr<Module>();
+ }
+
+ return Result;
+}
+
+int main(int argc, char **argv) {
+ InitLLVM X(argc, argv);
+
+ cl::ParseCommandLineOptions(argc, argv, "LLVM automatic testcase reducer.\n");
+
+ LLVMContext Context;
+ std::unique_ptr<Module> OriginalProgram =
+ parseInputFile(InputFilename, Context);
+
+ // Initialize test environment
+ TestRunner Tester(TestFilename, TestArguments);
+ Tester.setProgram(std::move(OriginalProgram));
+
+ // Try to reduce code
+ runDeltaPasses(Tester);
+
+ if (!Tester.getProgram()) {
+ errs() << "\nCouldnt reduce input :/\n";
+ } else {
+ // Print reduced file to STDOUT
+ if (OutputFilename == "-")
+ Tester.getProgram()->print(outs(), nullptr);
+ else {
+ if (ReplaceInput) // In-place
+ OutputFilename = InputFilename.c_str();
+ else if (OutputFilename.empty())
+ OutputFilename = "reduced.ll";
+
+ std::error_code EC;
+ raw_fd_ostream Out(OutputFilename, EC);
+ if (EC) {
+ errs() << "Error opening output file: " << EC.message() << "!\n";
+ exit(1);
+ }
+ Tester.getProgram()->print(Out, /*AnnotationWriter=*/nullptr);
+ errs() << "\nDone reducing! Reduced testcase: " << OutputFilename << "\n";
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index a7cc1deb8cf6..3a36e7709483 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -27,12 +27,13 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <future>
@@ -138,8 +139,21 @@ PrintAllocationRequests("print-alloc-requests",
"manager by RuntimeDyld"),
cl::Hidden);
+static cl::opt<bool> ShowTimes("show-times",
+ cl::desc("Show times for llvm-rtdyld phases"),
+ cl::init(false));
+
ExitOnError ExitOnErr;
+struct RTDyldTimers {
+ TimerGroup RTDyldTG{"llvm-rtdyld timers", "timers for llvm-rtdyld phases"};
+ Timer LoadObjectsTimer{"load", "time to load/add object files", RTDyldTG};
+ Timer LinkTimer{"link", "time to link object files", RTDyldTG};
+ Timer RunTimer{"run", "time to execute jitlink'd code", RTDyldTG};
+};
+
+std::unique_ptr<RTDyldTimers> Timers;
+
/* *** */
using SectionIDMap = StringMap<unsigned>;
@@ -441,8 +455,6 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
continue;
}
object::section_iterator Sec = *SecOrErr;
- StringRef SecName;
- Sec->getName(SecName);
Address.SectionIndex = Sec->getIndex();
uint64_t SectionLoadAddress =
LoadedObjInfo->getSectionLoadAddress(*Sec);
@@ -491,35 +503,41 @@ static int executeInput() {
// If we don't have any input files, read from stdin.
if (!InputFileList.size())
InputFileList.push_back("-");
- for (auto &File : InputFileList) {
- // Load the input memory buffer.
- ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
- MemoryBuffer::getFileOrSTDIN(File);
- if (std::error_code EC = InputBuffer.getError())
- ErrorAndExit("unable to read input: '" + EC.message() + "'");
- Expected<std::unique_ptr<ObjectFile>> MaybeObj(
- ObjectFile::createObjectFile((*InputBuffer)->getMemBufferRef()));
-
- if (!MaybeObj) {
- std::string Buf;
- raw_string_ostream OS(Buf);
- logAllUnhandledErrors(MaybeObj.takeError(), OS);
- OS.flush();
- ErrorAndExit("unable to create object file: '" + Buf + "'");
- }
+ {
+ TimeRegion TR(Timers ? &Timers->LoadObjectsTimer : nullptr);
+ for (auto &File : InputFileList) {
+ // Load the input memory buffer.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
+ MemoryBuffer::getFileOrSTDIN(File);
+ if (std::error_code EC = InputBuffer.getError())
+ ErrorAndExit("unable to read input: '" + EC.message() + "'");
+ Expected<std::unique_ptr<ObjectFile>> MaybeObj(
+ ObjectFile::createObjectFile((*InputBuffer)->getMemBufferRef()));
+
+ if (!MaybeObj) {
+ std::string Buf;
+ raw_string_ostream OS(Buf);
+ logAllUnhandledErrors(MaybeObj.takeError(), OS);
+ OS.flush();
+ ErrorAndExit("unable to create object file: '" + Buf + "'");
+ }
- ObjectFile &Obj = **MaybeObj;
+ ObjectFile &Obj = **MaybeObj;
- // Load the object file
- Dyld.loadObject(Obj);
- if (Dyld.hasError()) {
- ErrorAndExit(Dyld.getErrorString());
+ // Load the object file
+ Dyld.loadObject(Obj);
+ if (Dyld.hasError()) {
+ ErrorAndExit(Dyld.getErrorString());
+ }
}
}
- // Resove all the relocations we can.
- // FIXME: Error out if there are unresolved relocations.
- Dyld.resolveRelocations();
+ {
+ TimeRegion TR(Timers ? &Timers->LinkTimer : nullptr);
+ // Resove all the relocations we can.
+ // FIXME: Error out if there are unresolved relocations.
+ Dyld.resolveRelocations();
+ }
// Get the address of the entry point (_main by default).
void *MainAddress = Dyld.getSymbolLocalAddress(EntryPoint);
@@ -551,7 +569,13 @@ static int executeInput() {
for (auto &Arg : InputArgv)
Argv.push_back(Arg.data());
Argv.push_back(nullptr);
- return Main(Argv.size() - 1, Argv.data());
+ int Result = 0;
+ {
+ TimeRegion TR(Timers ? &Timers->RunTimer : nullptr);
+ Result = Main(Argv.size() - 1, Argv.data());
+ }
+
+ return Result;
}
static int checkAllExpressions(RuntimeDyldChecker &Checker) {
@@ -891,7 +915,7 @@ static int linkAndVerify() {
ObjectFile &Obj = **MaybeObj;
if (!Checker)
- Checker = llvm::make_unique<RuntimeDyldChecker>(
+ Checker = std::make_unique<RuntimeDyldChecker>(
IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo,
GetStubInfo, Obj.isLittleEndian() ? support::little : support::big,
Disassembler.get(), InstPrinter.get(), dbgs());
@@ -937,16 +961,28 @@ int main(int argc, char **argv) {
ExitOnErr.setBanner(std::string(argv[0]) + ": ");
+ Timers = ShowTimes ? std::make_unique<RTDyldTimers>() : nullptr;
+
+ int Result;
switch (Action) {
case AC_Execute:
- return executeInput();
+ Result = executeInput();
+ break;
case AC_PrintDebugLineInfo:
- return printLineInfoForInput(/* LoadObjects */ true,/* UseDebugObj */ true);
+ Result =
+ printLineInfoForInput(/* LoadObjects */ true, /* UseDebugObj */ true);
+ break;
case AC_PrintLineInfo:
- return printLineInfoForInput(/* LoadObjects */ true,/* UseDebugObj */false);
+ Result =
+ printLineInfoForInput(/* LoadObjects */ true, /* UseDebugObj */ false);
+ break;
case AC_PrintObjectLineInfo:
- return printLineInfoForInput(/* LoadObjects */false,/* UseDebugObj */false);
+ Result =
+ printLineInfoForInput(/* LoadObjects */ false, /* UseDebugObj */ false);
+ break;
case AC_Verify:
- return linkAndVerify();
+ Result = linkAndVerify();
+ break;
}
+ return Result;
}
diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp
index a455bf13fe7b..5f36a785332b 100644
--- a/tools/llvm-stress/llvm-stress.cpp
+++ b/tools/llvm-stress/llvm-stress.cpp
@@ -735,7 +735,7 @@ int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv, "llvm codegen stress-tester\n");
llvm_shutdown_obj Y;
- auto M = llvm::make_unique<Module>("/tmp/autogen.bc", Context);
+ auto M = std::make_unique<Module>("/tmp/autogen.bc", Context);
Function *F = GenEmptyFunction(M.get());
// Pick an initial seed value
@@ -752,7 +752,7 @@ int main(int argc, char **argv) {
OutputFilename = "-";
std::error_code EC;
- Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::F_None));
+ Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None));
if (EC) {
errs() << EC.message() << '\n';
return 1;
diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp
index ea94cf9b69a1..54ce87d47979 100644
--- a/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -55,6 +55,10 @@ static cl::opt<bool>
cl::desc("Interpret addresses as relative addresses"),
cl::ReallyHidden);
+static cl::opt<bool> ClUntagAddresses(
+ "untag-addresses", cl::init(true),
+ cl::desc("Remove memory tags from addresses before symbolization"));
+
static cl::opt<bool>
ClPrintInlining("inlining", cl::init(true),
cl::desc("Print all inlined frames for a given address"));
@@ -274,6 +278,7 @@ int main(int argc, char **argv) {
ClDemangle.setInitialValue(false);
ClPrintFunctions.setInitialValue(FunctionNameKind::None);
ClPrintInlining.setInitialValue(false);
+ ClUntagAddresses.setInitialValue(false);
ClOutputStyle.setInitialValue(DIPrinter::OutputStyle::GNU);
}
@@ -290,6 +295,7 @@ int main(int argc, char **argv) {
Opts.UseSymbolTable = ClUseSymbolTable;
Opts.Demangle = ClDemangle;
Opts.RelativeAddresses = ClUseRelativeAddress;
+ Opts.UntagAddresses = ClUntagAddresses;
Opts.DefaultArch = ClDefaultArch;
Opts.FallbackDebugPath = ClFallbackDebugPath;
Opts.DWPName = ClDwpName;
diff --git a/tools/llvm-xray/func-id-helper.cpp b/tools/llvm-xray/func-id-helper.cpp
index dc821a420c67..afc912a6398e 100644
--- a/tools/llvm-xray/func-id-helper.cpp
+++ b/tools/llvm-xray/func-id-helper.cpp
@@ -36,7 +36,7 @@ std::string FuncIdConversionHelper::SymbolOrNumber(int32_t FuncId) const {
ModuleAddress.SectionIndex = object::SectionedAddress::UndefSection;
if (auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, ModuleAddress)) {
auto &DI = *ResOrErr;
- if (DI.FunctionName == "<invalid>")
+ if (DI.FunctionName == DILineInfo::BadString)
F << "@(" << std::hex << It->second << ")";
else
F << DI.FunctionName;
diff --git a/tools/llvm-xray/xray-account.cpp b/tools/llvm-xray/xray-account.cpp
index 2b49a311d7e3..e37cd212377a 100644
--- a/tools/llvm-xray/xray-account.cpp
+++ b/tools/llvm-xray/xray-account.cpp
@@ -421,7 +421,7 @@ static CommandRegistration Unused(&Account, []() -> Error {
}
std::error_code EC;
- raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::F_Text);
+ raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::OF_Text);
if (EC)
return make_error<StringError>(
Twine("Cannot open file '") + AccountOutput + "' for writing.", EC);
diff --git a/tools/llvm-xray/xray-converter.cpp b/tools/llvm-xray/xray-converter.cpp
index dfc757e0f276..7258245b95cc 100644
--- a/tools/llvm-xray/xray-converter.cpp
+++ b/tools/llvm-xray/xray-converter.cpp
@@ -387,8 +387,8 @@ static CommandRegistration Unused(&Convert, []() -> Error {
std::error_code EC;
raw_fd_ostream OS(ConvertOutput, EC,
ConvertOutputFormat == ConvertFormats::BINARY
- ? sys::fs::OpenFlags::F_None
- : sys::fs::OpenFlags::F_Text);
+ ? sys::fs::OpenFlags::OF_None
+ : sys::fs::OpenFlags::OF_Text);
if (EC)
return make_error<StringError>(
Twine("Cannot open file '") + ConvertOutput + "' for writing.", EC);
diff --git a/tools/llvm-xray/xray-extract.cpp b/tools/llvm-xray/xray-extract.cpp
index 7c7d26b5a389..7800b88d9eeb 100644
--- a/tools/llvm-xray/xray-extract.cpp
+++ b/tools/llvm-xray/xray-extract.cpp
@@ -80,7 +80,7 @@ static CommandRegistration Unused(&Extract, []() -> Error {
InstrumentationMapOrError.takeError());
std::error_code EC;
- raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::F_Text);
+ raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::OF_Text);
if (EC)
return make_error<StringError>(
Twine("Cannot open file '") + ExtractOutput + "' for writing.", EC);
diff --git a/tools/llvm-xray/xray-fdr-dump.cpp b/tools/llvm-xray/xray-fdr-dump.cpp
index 81a93cac57c4..295f7a78765f 100644
--- a/tools/llvm-xray/xray-fdr-dump.cpp
+++ b/tools/llvm-xray/xray-fdr-dump.cpp
@@ -51,7 +51,7 @@ static CommandRegistration Unused(&Dump, []() -> Error {
sys::fs::closeFile(*FDOrErr);
DataExtractor DE(StringRef(MappedFile.data(), MappedFile.size()), true, 8);
- uint32_t OffsetPtr = 0;
+ uint64_t OffsetPtr = 0;
auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr);
if (!FileHeaderOrError)
diff --git a/tools/llvm-xray/xray-graph-diff.cpp b/tools/llvm-xray/xray-graph-diff.cpp
index a514be97f40b..116aa6869ec1 100644
--- a/tools/llvm-xray/xray-graph-diff.cpp
+++ b/tools/llvm-xray/xray-graph-diff.cpp
@@ -470,7 +470,7 @@ static CommandRegistration Unused(&GraphDiff, []() -> Error {
auto &GDR = *GDROrErr;
std::error_code EC;
- raw_fd_ostream OS(GraphDiffOutput, EC, sys::fs::OpenFlags::F_Text);
+ raw_fd_ostream OS(GraphDiffOutput, EC, sys::fs::OpenFlags::OF_Text);
if (EC)
return make_error<StringError>(
Twine("Cannot open file '") + GraphDiffOutput + "' for writing.", EC);
diff --git a/tools/llvm-xray/xray-graph.cpp b/tools/llvm-xray/xray-graph.cpp
index c09357fcb502..0be511219c1a 100644
--- a/tools/llvm-xray/xray-graph.cpp
+++ b/tools/llvm-xray/xray-graph.cpp
@@ -506,7 +506,7 @@ static CommandRegistration Unused(&GraphC, []() -> Error {
auto &GR = *GROrError;
std::error_code EC;
- raw_fd_ostream OS(GraphOutput, EC, sys::fs::OpenFlags::F_Text);
+ raw_fd_ostream OS(GraphOutput, EC, sys::fs::OpenFlags::OF_Text);
if (EC)
return make_error<StringError>(
Twine("Cannot open file '") + GraphOutput + "' for writing.", EC);
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index ccf8b073b82b..15495a511d06 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -523,7 +523,6 @@ int main(int argc, char **argv) {
initializeDwarfEHPreparePass(Registry);
initializeSafeStackLegacyPassPass(Registry);
initializeSjLjEHPreparePass(Registry);
- initializeStackProtectorPass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeGlobalMergePass(Registry);
initializeIndirectBrExpandPassPass(Registry);
@@ -612,7 +611,9 @@ int main(int argc, char **argv) {
OutputFilename = "-";
std::error_code EC;
- Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::F_None));
+ sys::fs::OpenFlags Flags = OutputAssembly ? sys::fs::OF_Text
+ : sys::fs::OF_None;
+ Out.reset(new ToolOutputFile(OutputFilename, EC, Flags));
if (EC) {
errs() << EC.message() << '\n';
return 1;
@@ -620,7 +621,7 @@ int main(int argc, char **argv) {
if (!ThinLinkBitcodeFile.empty()) {
ThinLinkOut.reset(
- new ToolOutputFile(ThinLinkBitcodeFile, EC, sys::fs::F_None));
+ new ToolOutputFile(ThinLinkBitcodeFile, EC, sys::fs::OF_None));
if (EC) {
errs() << EC.message() << '\n';
return 1;
@@ -720,8 +721,8 @@ int main(int argc, char **argv) {
OutputFilename = "-";
std::error_code EC;
- Out = llvm::make_unique<ToolOutputFile>(OutputFilename, EC,
- sys::fs::F_None);
+ Out = std::make_unique<ToolOutputFile>(OutputFilename, EC,
+ sys::fs::OF_None);
if (EC) {
errs() << EC.message() << '\n';
return 1;
@@ -867,7 +868,7 @@ int main(int argc, char **argv) {
assert(Out);
OS = &Out->os();
if (RunTwice) {
- BOS = make_unique<raw_svector_ostream>(Buffer);
+ BOS = std::make_unique<raw_svector_ostream>(Buffer);
OS = BOS.get();
}
if (OutputAssembly) {
diff --git a/tools/vfabi-demangle-fuzzer/CMakeLists.txt b/tools/vfabi-demangle-fuzzer/CMakeLists.txt
new file mode 100644
index 000000000000..908364690f5e
--- /dev/null
+++ b/tools/vfabi-demangle-fuzzer/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_LINK_COMPONENTS
+ Analysis
+ Support
+)
+add_llvm_fuzzer(vfabi-demangler-fuzzer
+ vfabi-demangler-fuzzer.cpp
+)
diff --git a/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp b/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp
new file mode 100644
index 000000000000..13657effbbeb
--- /dev/null
+++ b/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp
@@ -0,0 +1,26 @@
+//===-- vfabi-demangler-fuzzer.cpp - Fuzzer VFABI using lib/Fuzzer ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Build tool to fuzz the demangler for the vector function ABI names.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/VectorUtils.h"
+
+using namespace llvm;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ const StringRef MangledName((const char *)Data, Size);
+ const auto Info = VFABI::tryDemangleForVFABI(MangledName);
+
+ // Do not optimize away the return value. Inspired by
+ // https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307-L345
+ asm volatile("" : : "r,m"(Info) : "memory");
+
+ return 0;
+}
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 146d10835b8d..1d39b300091f 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -1111,6 +1111,7 @@ static std::string getEnumNameForToken(StringRef Str) {
case '<': Res += "_LT_"; break;
case '>': Res += "_GT_"; break;
case '-': Res += "_MINUS_"; break;
+ case '#': Res += "_HASH_"; break;
default:
if ((*it >= 'A' && *it <= 'Z') ||
(*it >= 'a' && *it <= 'z') ||
@@ -1439,7 +1440,7 @@ void AsmMatcherInfo::buildOperandMatchInfo() {
/// Map containing a mask with all operands indices that can be found for
/// that class inside a instruction.
- typedef std::map<ClassInfo *, unsigned, less_ptr<ClassInfo>> OpClassMaskTy;
+ typedef std::map<ClassInfo *, unsigned, deref<std::less<>>> OpClassMaskTy;
OpClassMaskTy OpClassMask;
for (const auto &MI : Matchables) {
@@ -1515,7 +1516,7 @@ void AsmMatcherInfo::buildInfo() {
if (!V.empty() && V != Variant.Name)
continue;
- auto II = llvm::make_unique<MatchableInfo>(*CGI);
+ auto II = std::make_unique<MatchableInfo>(*CGI);
II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst);
@@ -1532,7 +1533,7 @@ void AsmMatcherInfo::buildInfo() {
std::vector<Record*> AllInstAliases =
Records.getAllDerivedDefinitions("InstAlias");
for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) {
- auto Alias = llvm::make_unique<CodeGenInstAlias>(AllInstAliases[i],
+ auto Alias = std::make_unique<CodeGenInstAlias>(AllInstAliases[i],
Target);
// If the tblgen -match-prefix option is specified (for tblgen hackers),
@@ -1546,7 +1547,7 @@ void AsmMatcherInfo::buildInfo() {
if (!V.empty() && V != Variant.Name)
continue;
- auto II = llvm::make_unique<MatchableInfo>(std::move(Alias));
+ auto II = std::make_unique<MatchableInfo>(std::move(Alias));
II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst);
@@ -1615,7 +1616,7 @@ void AsmMatcherInfo::buildInfo() {
II->TheDef->getValueAsString("TwoOperandAliasConstraint");
if (Constraint != "") {
// Start by making a copy of the original matchable.
- auto AliasII = llvm::make_unique<MatchableInfo>(*II);
+ auto AliasII = std::make_unique<MatchableInfo>(*II);
// Adjust it to be a two-operand alias.
AliasII->formTwoOperandAlias(Constraint);
@@ -2381,7 +2382,7 @@ static void emitMatchClassEnumeration(CodeGenTarget &Target,
OS << " NumMatchClassKinds\n";
OS << "};\n\n";
- OS << "}\n\n";
+ OS << "} // end anonymous namespace\n\n";
}
/// emitMatchClassDiagStrings - Emit a function to get the diagnostic text to be
@@ -2866,7 +2867,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
OS << " }\n";
OS << " };\n";
- OS << "} // end anonymous namespace.\n\n";
+ OS << "} // end anonymous namespace\n\n";
OS << "static const OperandMatchEntry OperandMatchTable["
<< Info.OperandMatchInfo.size() << "] = {\n";
@@ -3366,7 +3367,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " " << getNameForFeatureBitset(FeatureBitset) << ",\n";
}
OS << "};\n\n"
- << "const static FeatureBitset FeatureBitsets[] {\n"
+ << "static constexpr FeatureBitset FeatureBitsets[] = {\n"
<< " {}, // AMFBS_None\n";
for (const auto &FeatureBitset : FeatureBitsets) {
if (FeatureBitset.empty())
@@ -3422,7 +3423,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " }\n";
OS << " };\n";
- OS << "} // end anonymous namespace.\n\n";
+ OS << "} // end anonymous namespace\n\n";
unsigned VariantCount = Target.getAsmParserVariantCount();
for (unsigned VC = 0; VC != VariantCount; ++VC) {
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 05d81f133505..b5c7f35be0e5 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -784,8 +784,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
continue; // Aliases with priority 0 are never emitted.
const DagInit *DI = R->getValueAsDag("ResultInst");
- const DefInit *Op = cast<DefInit>(DI->getOperator());
- AliasMap[getQualifiedName(Op->getDef())].insert(
+ AliasMap[getQualifiedName(DI->getOperatorAsDef(R->getLoc()))].insert(
std::make_pair(CodeGenInstAlias(R, Target), Priority));
}
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index de5044e24d49..9eabb44d9004 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -264,6 +264,10 @@ void CallingConvEmitter::EmitAction(Record *Action,
Record *DestTy = Action->getValueAsDef("DestTy");
O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
O << IndentStr << "LocInfo = CCValAssign::BCvt;\n";
+ } else if (Action->isSubClassOf("CCTruncToType")) {
+ Record *DestTy = Action->getValueAsDef("DestTy");
+ O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
+ O << IndentStr << "LocInfo = CCValAssign::Trunc;\n";
} else if (Action->isSubClassOf("CCPassIndirect")) {
Record *DestTy = Action->getValueAsDef("DestTy");
O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index da65763905a8..42f69cb253d2 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -16,6 +16,7 @@
#include "CodeGenTarget.h"
#include "SubtargetFeatureInfo.h"
#include "Types.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Casting.h"
@@ -45,12 +46,19 @@ public:
private:
int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
std::string getInstructionCase(Record *R, CodeGenTarget &Target);
+ std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
+ CodeGenTarget &Target);
void AddCodeToMergeInOperand(Record *R, BitsInit *BI,
const std::string &VarName,
unsigned &NumberedOp,
std::set<unsigned> &NamedOpIndices,
std::string &Case, CodeGenTarget &Target);
+ void emitInstructionBaseValues(
+ raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
+ CodeGenTarget &Target, int HwMode = -1);
+ unsigned BitWidth;
+ bool UseAPInt;
};
// If the VarBitInit at position 'bit' matches the specified variable then
@@ -126,7 +134,10 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName;
-
+
+ if (UseAPInt)
+ Case += " op.clearAllBits();\n";
+
// If the source operand has a custom encoder, use it. This will
// get the encoding for all of the suboperands.
if (!EncoderMethodName.empty()) {
@@ -134,18 +145,54 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
// sub-operands, if there are more than one, so only
// query the encoder once per source operand.
if (SO.second == 0) {
- Case += " // op: " + VarName + "\n" +
- " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
- Case += ", Fixups, STI";
- Case += ");\n";
+ Case += " // op: " + VarName + "\n";
+ if (UseAPInt) {
+ Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx);
+ Case += ", op";
+ } else {
+ Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
+ }
+ Case += ", Fixups, STI);\n";
}
} else {
- Case += " // op: " + VarName + "\n" +
- " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
- Case += ", Fixups, STI";
+ Case += " // op: " + VarName + "\n";
+ if (UseAPInt) {
+ Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
+ Case += ", op, Fixups, STI";
+ } else {
+ Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
+ Case += ", Fixups, STI";
+ }
Case += ");\n";
}
-
+
+ // Precalculate the number of lits this variable contributes to in the
+ // operand. If there is a single lit (consecutive range of bits) we can use a
+ // destructive sequence on APInt that reduces memory allocations.
+ int numOperandLits = 0;
+ for (int tmpBit = bit; tmpBit >= 0;) {
+ int varBit = getVariableBit(VarName, BI, tmpBit);
+
+ // If this bit isn't from a variable, skip it.
+ if (varBit == -1) {
+ --tmpBit;
+ continue;
+ }
+
+ // Figure out the consecutive range of bits covered by this operand, in
+ // order to generate better encoding code.
+ int beginVarBit = varBit;
+ int N = 1;
+ for (--tmpBit; tmpBit >= 0;) {
+ varBit = getVariableBit(VarName, BI, tmpBit);
+ if (varBit == -1 || varBit != (beginVarBit - N))
+ break;
+ ++N;
+ --tmpBit;
+ }
+ ++numOperandLits;
+ }
+
for (; bit >= 0; ) {
int varBit = getVariableBit(VarName, BI, bit);
@@ -166,20 +213,52 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
++N;
--bit;
}
-
- uint64_t opMask = ~(uint64_t)0 >> (64-N);
- int opShift = beginVarBit - N + 1;
- opMask <<= opShift;
- opShift = beginInstBit - beginVarBit;
-
- if (opShift > 0) {
- Case += " Value |= (op & UINT64_C(" + utostr(opMask) + ")) << " +
- itostr(opShift) + ";\n";
- } else if (opShift < 0) {
- Case += " Value |= (op & UINT64_C(" + utostr(opMask) + ")) >> " +
- itostr(-opShift) + ";\n";
+
+ std::string maskStr;
+ int opShift;
+
+ unsigned loBit = beginVarBit - N + 1;
+ unsigned hiBit = loBit + N;
+ unsigned loInstBit = beginInstBit - N + 1;
+ if (UseAPInt) {
+ std::string extractStr;
+ if (N >= 64) {
+ extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +
+ itostr(loBit) + ")";
+ Case += " Value.insertBits(" + extractStr + ", " +
+ itostr(loInstBit) + ");\n";
+ } else {
+ extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +
+ ", " + itostr(loBit) + ")";
+ Case += " Value.insertBits(" + extractStr + ", " +
+ itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";
+ }
} else {
- Case += " Value |= op & UINT64_C(" + utostr(opMask) + ");\n";
+ uint64_t opMask = ~(uint64_t)0 >> (64 - N);
+ opShift = beginVarBit - N + 1;
+ opMask <<= opShift;
+ maskStr = "UINT64_C(" + utostr(opMask) + ")";
+ opShift = beginInstBit - beginVarBit;
+
+ if (numOperandLits == 1) {
+ Case += " op &= " + maskStr + ";\n";
+ if (opShift > 0) {
+ Case += " op <<= " + itostr(opShift) + ";\n";
+ } else if (opShift < 0) {
+ Case += " op >>= " + itostr(-opShift) + ";\n";
+ }
+ Case += " Value |= op;\n";
+ } else {
+ if (opShift > 0) {
+ Case += " Value |= (op & " + maskStr + ") << " +
+ itostr(opShift) + ";\n";
+ } else if (opShift < 0) {
+ Case += " Value |= (op & " + maskStr + ") >> " +
+ itostr(-opShift) + ";\n";
+ } else {
+ Case += " Value |= (op & " + maskStr + ");\n";
+ }
+ }
}
}
}
@@ -187,7 +266,29 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
std::string CodeEmitterGen::getInstructionCase(Record *R,
CodeGenTarget &Target) {
std::string Case;
- BitsInit *BI = R->getValueAsBitsInit("Inst");
+ if (const RecordVal *RV = R->getValue("EncodingInfos")) {
+ if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ const CodeGenHwModes &HWM = Target.getHwModes();
+ EncodingInfoByHwMode EBM(DI->getDef(), HWM);
+ Case += " switch (HwMode) {\n";
+ Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n";
+ for (auto &KV : EBM.Map) {
+ Case += " case " + itostr(KV.first) + ": {\n";
+ Case += getInstructionCaseForEncoding(R, KV.second, Target);
+ Case += " break;\n";
+ Case += " }\n";
+ }
+ Case += " }\n";
+ return Case;
+ }
+ }
+ return getInstructionCaseForEncoding(R, R, Target);
+}
+
+std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
+ CodeGenTarget &Target) {
+ std::string Case;
+ BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
unsigned NumberedOp = 0;
std::set<unsigned> NamedOpIndices;
@@ -207,7 +308,7 @@ std::string CodeEmitterGen::getInstructionCase(Record *R,
// Loop over all of the fields in the instruction, determining which are the
// operands to the instruction.
- for (const RecordVal &RV : R->getValues()) {
+ for (const RecordVal &RV : EncodingDef->getValues()) {
// Ignore fixed fields in the record, we're looking for values like:
// bits<5> RST = { ?, ?, ?, ?, ? };
if (RV.getPrefix() || RV.getValue()->isComplete())
@@ -237,6 +338,54 @@ getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) {
return Name;
}
+static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
+ for (unsigned I = 0; I < Bits.getNumWords(); ++I)
+ OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I])
+ << ")";
+}
+
+void CodeEmitterGen::emitInstructionBaseValues(
+ raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
+ CodeGenTarget &Target, int HwMode) {
+ const CodeGenHwModes &HWM = Target.getHwModes();
+ if (HwMode == -1)
+ o << " static const uint64_t InstBits[] = {\n";
+ else
+ o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name
+ << "[] = {\n";
+
+ for (const CodeGenInstruction *CGI : NumberedInstructions) {
+ Record *R = CGI->TheDef;
+
+ if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+ R->getValueAsBit("isPseudo")) {
+ o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n";
+ continue;
+ }
+
+ Record *EncodingDef = R;
+ if (const RecordVal *RV = R->getValue("EncodingInfos")) {
+ if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ EncodingInfoByHwMode EBM(DI->getDef(), HWM);
+ if (EBM.hasMode(HwMode))
+ EncodingDef = EBM.get(HwMode);
+ }
+ }
+ BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
+
+ // Start by filling in fixed values.
+ APInt Value(BitWidth, 0);
+ for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
+ if (BitInit *B = dyn_cast<BitInit>(BI->getBit(e - i - 1)))
+ Value |= APInt(BitWidth, (uint64_t)B->getValue()) << (e - i - 1);
+ }
+ o << " ";
+ emitInstBits(o, Value);
+ o << "," << '\t' << "// " << R->getName() << "\n";
+ }
+ o << " UINT64_C(0)\n };\n";
+}
+
void CodeEmitterGen::run(raw_ostream &o) {
CodeGenTarget Target(Records);
std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
@@ -247,34 +396,66 @@ void CodeEmitterGen::run(raw_ostream &o) {
ArrayRef<const CodeGenInstruction*> NumberedInstructions =
Target.getInstructionsByEnumValue();
- // Emit function declaration
- o << "uint64_t " << Target.getName();
- o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
- << " SmallVectorImpl<MCFixup> &Fixups,\n"
- << " const MCSubtargetInfo &STI) const {\n";
-
- // Emit instruction base values
- o << " static const uint64_t InstBits[] = {\n";
+ const CodeGenHwModes &HWM = Target.getHwModes();
+ // The set of HwModes used by instruction encodings.
+ std::set<unsigned> HwModes;
+ BitWidth = 0;
for (const CodeGenInstruction *CGI : NumberedInstructions) {
Record *R = CGI->TheDef;
-
if (R->getValueAsString("Namespace") == "TargetOpcode" ||
- R->getValueAsBit("isPseudo")) {
- o << " UINT64_C(0),\n";
+ R->getValueAsBit("isPseudo"))
continue;
- }
+ if (const RecordVal *RV = R->getValue("EncodingInfos")) {
+ if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ EncodingInfoByHwMode EBM(DI->getDef(), HWM);
+ for (auto &KV : EBM.Map) {
+ BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
+ BitWidth = std::max(BitWidth, BI->getNumBits());
+ HwModes.insert(KV.first);
+ }
+ continue;
+ }
+ }
BitsInit *BI = R->getValueAsBitsInit("Inst");
+ BitWidth = std::max(BitWidth, BI->getNumBits());
+ }
+ UseAPInt = BitWidth > 64;
+
+ // Emit function declaration
+ if (UseAPInt) {
+ o << "void " << Target.getName()
+ << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
+ << " SmallVectorImpl<MCFixup> &Fixups,\n"
+ << " APInt &Inst,\n"
+ << " APInt &Scratch,\n"
+ << " const MCSubtargetInfo &STI) const {\n";
+ } else {
+ o << "uint64_t " << Target.getName();
+ o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
+ << " SmallVectorImpl<MCFixup> &Fixups,\n"
+ << " const MCSubtargetInfo &STI) const {\n";
+ }
+
+ // Emit instruction base values
+ if (HwModes.empty()) {
+ emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
+ } else {
+ for (unsigned HwMode : HwModes)
+ emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
+ }
- // Start by filling in fixed values.
- uint64_t Value = 0;
- for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
- if (BitInit *B = dyn_cast<BitInit>(BI->getBit(e-i-1)))
- Value |= (uint64_t)B->getValue() << (e-i-1);
+ if (!HwModes.empty()) {
+ o << " const uint64_t *InstBits;\n";
+ o << " unsigned HwMode = STI.getHwMode();\n";
+ o << " switch (HwMode) {\n";
+ o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
+ for (unsigned I : HwModes) {
+ o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
+ << "; break;\n";
}
- o << " UINT64_C(" << Value << ")," << '\t' << "// " << R->getName() << "\n";
+ o << " };\n";
}
- o << " UINT64_C(0)\n };\n";
// Map to accumulate all the cases.
std::map<std::string, std::vector<std::string>> CaseMap;
@@ -294,11 +475,26 @@ void CodeEmitterGen::run(raw_ostream &o) {
}
// Emit initial function code
- o << " const unsigned opcode = MI.getOpcode();\n"
- << " uint64_t Value = InstBits[opcode];\n"
- << " uint64_t op = 0;\n"
- << " (void)op; // suppress warning\n"
- << " switch (opcode) {\n";
+ if (UseAPInt) {
+ int NumWords = APInt::getNumWords(BitWidth);
+ int NumBytes = (BitWidth + 7) / 8;
+ o << " const unsigned opcode = MI.getOpcode();\n"
+ << " if (Inst.getBitWidth() != " << BitWidth << ")\n"
+ << " Inst = Inst.zext(" << BitWidth << ");\n"
+ << " if (Scratch.getBitWidth() != " << BitWidth << ")\n"
+ << " Scratch = Scratch.zext(" << BitWidth << ");\n"
+ << " LoadIntFromMemory(Inst, (uint8_t*)&InstBits[opcode * " << NumWords
+ << "], " << NumBytes << ");\n"
+ << " APInt &Value = Inst;\n"
+ << " APInt &op = Scratch;\n"
+ << " switch (opcode) {\n";
+ } else {
+ o << " const unsigned opcode = MI.getOpcode();\n"
+ << " uint64_t Value = InstBits[opcode];\n"
+ << " uint64_t op = 0;\n"
+ << " (void)op; // suppress warning\n"
+ << " switch (opcode) {\n";
+ }
// Emit each case statement
std::map<std::string, std::vector<std::string>>::iterator IE, EE;
@@ -322,9 +518,12 @@ void CodeEmitterGen::run(raw_ostream &o) {
<< " raw_string_ostream Msg(msg);\n"
<< " Msg << \"Not supported instr: \" << MI;\n"
<< " report_fatal_error(Msg.str());\n"
- << " }\n"
- << " return Value;\n"
- << "}\n\n";
+ << " }\n";
+ if (UseAPInt)
+ o << " Inst = Value;\n";
+ else
+ o << " return Value;\n";
+ o << "}\n\n";
const auto &All = SubtargetFeatureInfo::getAll(Records);
std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures;
@@ -385,8 +584,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
o << " " << getNameForFeatureBitset(FeatureBitset) << ",\n";
}
o << "};\n\n"
- << "const static FeatureBitset FeatureBitsets[] {\n"
- << " {}, // CEFBS_None\n";
+ << "static constexpr FeatureBitset FeatureBitsets[] = {\n"
+ << " {}, // CEFBS_None\n";
for (const auto &FeatureBitset : FeatureBitsets) {
if (FeatureBitset.empty())
continue;
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index c8f710d66a03..46f986ca0176 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -769,7 +769,10 @@ void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
for (MVT T : MVT::integer_valuetypes())
if (Legal.count(T))
Out.insert(T);
- for (MVT T : MVT::integer_vector_valuetypes())
+ for (MVT T : MVT::integer_fixedlen_vector_valuetypes())
+ if (Legal.count(T))
+ Out.insert(T);
+ for (MVT T : MVT::integer_scalable_vector_valuetypes())
if (Legal.count(T))
Out.insert(T);
return;
@@ -777,7 +780,10 @@ void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
for (MVT T : MVT::fp_valuetypes())
if (Legal.count(T))
Out.insert(T);
- for (MVT T : MVT::fp_vector_valuetypes())
+ for (MVT T : MVT::fp_fixedlen_vector_valuetypes())
+ if (Legal.count(T))
+ Out.insert(T);
+ for (MVT T : MVT::fp_scalable_vector_valuetypes())
if (Legal.count(T))
Out.insert(T);
return;
@@ -883,7 +889,8 @@ std::string TreePredicateFn::getPredCode() const {
if (isLoad()) {
if (!isUnindexed() && !isNonExtLoad() && !isAnyExtLoad() &&
!isSignExtLoad() && !isZeroExtLoad() && getMemoryVT() == nullptr &&
- getScalarMemoryVT() == nullptr)
+ getScalarMemoryVT() == nullptr && getAddressSpaces() == nullptr &&
+ getMinAlignment() < 1)
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
"IsLoad cannot be used by itself");
} else {
@@ -903,7 +910,8 @@ std::string TreePredicateFn::getPredCode() const {
if (isStore()) {
if (!isUnindexed() && !isTruncStore() && !isNonTruncStore() &&
- getMemoryVT() == nullptr && getScalarMemoryVT() == nullptr)
+ getMemoryVT() == nullptr && getScalarMemoryVT() == nullptr &&
+ getAddressSpaces() == nullptr && getMinAlignment() < 1)
PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
"IsStore cannot be used by itself");
} else {
@@ -917,6 +925,7 @@ std::string TreePredicateFn::getPredCode() const {
if (isAtomic()) {
if (getMemoryVT() == nullptr && !isAtomicOrderingMonotonic() &&
+ getAddressSpaces() == nullptr &&
!isAtomicOrderingAcquire() && !isAtomicOrderingRelease() &&
!isAtomicOrderingAcquireRelease() &&
!isAtomicOrderingSequentiallyConsistent() &&
@@ -977,6 +986,13 @@ std::string TreePredicateFn::getPredCode() const {
Code += ")\nreturn false;\n";
}
+ int64_t MinAlign = getMinAlignment();
+ if (MinAlign > 0) {
+ Code += "if (cast<MemSDNode>(N)->getAlignment() < ";
+ Code += utostr(MinAlign);
+ Code += ")\nreturn false;\n";
+ }
+
Record *MemoryVT = getMemoryVT();
if (MemoryVT)
@@ -1177,6 +1193,13 @@ ListInit *TreePredicateFn::getAddressSpaces() const {
return R->getValueAsListInit("AddressSpaces");
}
+int64_t TreePredicateFn::getMinAlignment() const {
+ Record *R = getOrigPatFragRecord()->getRecord();
+ if (R->isValueUnset("MinAlignment"))
+ return 0;
+ return R->getValueAsInt("MinAlignment");
+}
+
Record *TreePredicateFn::getScalarMemoryVT() const {
Record *R = getOrigPatFragRecord()->getRecord();
if (R->isValueUnset("ScalarMemoryVT"))
@@ -1373,9 +1396,11 @@ getPatternComplexity(const CodeGenDAGPatterns &CGP) const {
///
std::string PatternToMatch::getPredicateCheck() const {
SmallVector<const Predicate*,4> PredList;
- for (const Predicate &P : Predicates)
- PredList.push_back(&P);
- llvm::sort(PredList, deref<llvm::less>());
+ for (const Predicate &P : Predicates) {
+ if (!P.getCondString().empty())
+ PredList.push_back(&P);
+ }
+ llvm::sort(PredList, deref<std::less<>>());
std::string Check;
for (unsigned i = 0, e = PredList.size(); i != e; ++i) {
@@ -2772,6 +2797,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
if (Operator->isSubClassOf("SDNode") &&
Operator->getName() != "imm" &&
+ Operator->getName() != "timm" &&
Operator->getName() != "fpimm" &&
Operator->getName() != "tglobaltlsaddr" &&
Operator->getName() != "tconstpool" &&
@@ -3083,7 +3109,7 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
ListInit *LI = Frag->getValueAsListInit("Fragments");
TreePattern *P =
- (PatternFragments[Frag] = llvm::make_unique<TreePattern>(
+ (PatternFragments[Frag] = std::make_unique<TreePattern>(
Frag, LI, !Frag->isSubClassOf("OutPatFrag"),
*this)).get();
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 2b49a64c3f1d..80fc932a7a50 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -594,6 +594,7 @@ public:
Record *getScalarMemoryVT() const;
ListInit *getAddressSpaces() const;
+ int64_t getMinAlignment() const;
// If true, indicates that GlobalISel-based C++ code was supplied.
bool hasGISelPredicateCode() const;
@@ -1075,8 +1076,11 @@ public:
std::string C = IsHwMode
? std::string("MF->getSubtarget().checkFeatures(\"" + Features + "\")")
: std::string(Def->getValueAsString("CondString"));
+ if (C.empty())
+ return "";
return IfCond ? C : "!("+C+')';
}
+
bool operator==(const Predicate &P) const {
return IfCond == P.IfCond && IsHwMode == P.IsHwMode && Def == P.Def;
}
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 2463824469ab..fde946d06589 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -363,6 +363,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
Namespace = R->getValueAsString("Namespace");
AsmString = R->getValueAsString("AsmString");
+ isPreISelOpcode = R->getValueAsBit("isPreISelOpcode");
isReturn = R->getValueAsBit("isReturn");
isEHScopeReturn = R->getValueAsBit("isEHScopeReturn");
isBranch = R->getValueAsBit("isBranch");
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index bb5b1369649f..2cb28425df7a 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -231,6 +231,7 @@ template <typename T> class ArrayRef;
std::vector<Record*> ImplicitDefs, ImplicitUses;
// Various boolean values we track for the instruction.
+ bool isPreISelOpcode : 1;
bool isReturn : 1;
bool isEHScopeReturn : 1;
bool isBranch : 1;
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h
index 7b74bb07d6e0..83e780671b43 100644
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -141,6 +141,7 @@ struct CodeGenIntrinsic {
enum ArgAttribute {
NoCapture,
+ NoAlias,
Returned,
ReadOnly,
WriteOnly,
@@ -154,6 +155,13 @@ struct CodeGenIntrinsic {
return Properties & (1 << Prop);
}
+ /// Returns true if the parameter at \p ParamIdx is a pointer type. Returns
+ /// false if the parameter is not a pointer, or \p ParamIdx is greater than
+ /// the size of \p IS.ParamVTs.
+ ///
+ /// Note that this requires that \p IS.ParamVTs is available.
+ bool isParamAPointer(unsigned ParamIdx) const;
+
CodeGenIntrinsic(Record *R);
};
diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp
index b1774b01ba8c..793bb61481e7 100644
--- a/utils/TableGen/CodeGenMapTable.cpp
+++ b/utils/TableGen/CodeGenMapTable.cpp
@@ -132,7 +132,7 @@ public:
MapRec->getName() + "' has empty " + "`ValueCols' field!");
for (Init *I : ColValList->getValues()) {
- ListInit *ColI = dyn_cast<ListInit>(I);
+ auto *ColI = cast<ListInit>(I);
// Make sure that all the sub-lists in 'ValueCols' have same number of
// elements as the fields in 'ColFields'.
@@ -168,7 +168,7 @@ public:
return ValueCols;
}
};
-} // End anonymous namespace.
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -226,7 +226,7 @@ public:
void emitMapFuncBody(raw_ostream &OS, unsigned TableSize);
};
-} // End anonymous namespace.
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -521,7 +521,7 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) {
unsigned ListSize = List->size();
for (unsigned j = 0; j < ListSize; j++) {
- ListInit *ListJ = dyn_cast<ListInit>(List->getElement(j));
+ auto *ListJ = cast<ListInit>(List->getElement(j));
if (ListJ->size() != ColFields->size())
PrintFatalError("Record `" + CurMap->getName() + "', field "
@@ -604,8 +604,8 @@ void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) {
// Emit map tables and the functions to query them.
IMap.emitTablesWithFunc(OS);
}
- OS << "} // End " << NameSpace << " namespace\n";
- OS << "} // End llvm namespace\n";
+ OS << "} // end namespace " << NameSpace << "\n";
+ OS << "} // end namespace llvm\n";
OS << "#endif // GET_INSTRMAP_INFO\n\n";
}
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index f87c6d6c945a..6153c759b123 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -639,7 +639,8 @@ struct TupleExpander : SetTheory::Expander {
// Precompute some types.
Record *RegisterCl = Def->getRecords().getClass("Register");
RecTy *RegisterRecTy = RecordRecTy::get(RegisterCl);
- StringInit *BlankName = StringInit::get("");
+ std::vector<StringRef> RegNames =
+ Def->getValueAsListOfStrings("RegAsmNames");
// Zip them up.
for (unsigned n = 0; n != Length; ++n) {
@@ -656,11 +657,20 @@ struct TupleExpander : SetTheory::Expander {
unsigned(Reg->getValueAsInt("CostPerUse")));
}
+ StringInit *AsmName = StringInit::get("");
+ if (!RegNames.empty()) {
+ if (RegNames.size() <= n)
+ PrintFatalError(Def->getLoc(),
+ "Register tuple definition missing name for '" +
+ Name + "'.");
+ AsmName = StringInit::get(RegNames[n]);
+ }
+
// Create a new Record representing the synthesized register. This record
// is only for consumption by CodeGenRegister, it is not added to the
// RecordKeeper.
SynthDefs.emplace_back(
- llvm::make_unique<Record>(Name, Def->getLoc(), Def->getRecords()));
+ std::make_unique<Record>(Name, Def->getLoc(), Def->getRecords()));
Record *NewReg = SynthDefs.back().get();
Elts.insert(NewReg);
@@ -683,9 +693,8 @@ struct TupleExpander : SetTheory::Expander {
if (Field == "SubRegs")
RV.setValue(ListInit::get(Tuple, RegisterRecTy));
- // Provide a blank AsmName. MC hacks are required anyway.
if (Field == "AsmName")
- RV.setValue(BlankName);
+ RV.setValue(AsmName);
// CostPerUse is aggregated from all Tuple members.
if (Field == "CostPerUse")
@@ -725,8 +734,8 @@ struct TupleExpander : SetTheory::Expander {
//===----------------------------------------------------------------------===//
static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) {
- llvm::sort(M, deref<llvm::less>());
- M.erase(std::unique(M.begin(), M.end(), deref<llvm::equal>()), M.end());
+ llvm::sort(M, deref<std::less<>>());
+ M.erase(std::unique(M.begin(), M.end(), deref<std::equal_to<>>()), M.end());
}
CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
@@ -851,7 +860,7 @@ void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) {
bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const {
return std::binary_search(Members.begin(), Members.end(), Reg,
- deref<llvm::less>());
+ deref<std::less<>>());
}
namespace llvm {
@@ -887,7 +896,7 @@ static bool testSubClass(const CodeGenRegisterClass *A,
return A->RSI.isSubClassOf(B->RSI) &&
std::includes(A->getMembers().begin(), A->getMembers().end(),
B->getMembers().begin(), B->getMembers().end(),
- deref<llvm::less>());
+ deref<std::less<>>());
}
/// Sorting predicate for register classes. This provides a topological
@@ -1089,7 +1098,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
Sets.addFieldExpander("RegisterClass", "MemberList");
Sets.addFieldExpander("CalleeSavedRegs", "SaveList");
Sets.addExpander("RegisterTuples",
- llvm::make_unique<TupleExpander>(SynthDefs));
+ std::make_unique<TupleExpander>(SynthDefs));
// Read in the user-defined (named) sub-register indices.
// More indices will be synthesized later.
@@ -2131,9 +2140,10 @@ void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) {
const CodeGenRegister::Vec &Memb1 = RC1->getMembers();
const CodeGenRegister::Vec &Memb2 = RC2->getMembers();
CodeGenRegister::Vec Intersection;
- std::set_intersection(
- Memb1.begin(), Memb1.end(), Memb2.begin(), Memb2.end(),
- std::inserter(Intersection, Intersection.begin()), deref<llvm::less>());
+ std::set_intersection(Memb1.begin(), Memb1.end(), Memb2.begin(),
+ Memb2.end(),
+ std::inserter(Intersection, Intersection.begin()),
+ deref<std::less<>>());
// Skip disjoint class pairs.
if (Intersection.empty())
@@ -2158,7 +2168,8 @@ void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) {
void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) {
// Map SubRegIndex to set of registers in RC supporting that SubRegIndex.
typedef std::map<const CodeGenSubRegIndex *, CodeGenRegister::Vec,
- deref<llvm::less>> SubReg2SetMap;
+ deref<std::less<>>>
+ SubReg2SetMap;
// Compute the set of registers supporting each SubRegIndex.
SubReg2SetMap SRSets;
@@ -2357,6 +2368,21 @@ CodeGenRegBank::getRegClassForRegister(Record *R) {
return FoundRC;
}
+const CodeGenRegisterClass *
+CodeGenRegBank::getMinimalPhysRegClass(Record *RegRecord,
+ ValueTypeByHwMode *VT) {
+ const CodeGenRegister *Reg = getReg(RegRecord);
+ const CodeGenRegisterClass *BestRC = nullptr;
+ for (const auto &RC : getRegClasses()) {
+ if ((!VT || RC.hasType(*VT)) &&
+ RC.contains(Reg) && (!BestRC || BestRC->hasSubClass(&RC)))
+ BestRC = &RC;
+ }
+
+ assert(BestRC && "Couldn't find the register class");
+ return BestRC;
+}
+
BitVector CodeGenRegBank::computeCoveredRegisters(ArrayRef<Record*> Regs) {
SetVector<const CodeGenRegister*> Set;
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index f04a90f8fde5..6d933baec2ae 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -93,7 +93,8 @@ namespace llvm {
// Map of composite subreg indices.
typedef std::map<CodeGenSubRegIndex *, CodeGenSubRegIndex *,
- deref<llvm::less>> CompMap;
+ deref<std::less<>>>
+ CompMap;
// Returns the subreg index that results from composing this with Idx.
// Returns NULL if this and Idx don't compose.
@@ -137,15 +138,14 @@ namespace llvm {
/// list of subregisters they are composed of (if any). Do this recursively.
void computeConcatTransitiveClosure();
+ bool operator<(const CodeGenSubRegIndex &RHS) const {
+ return this->EnumValue < RHS.EnumValue;
+ }
+
private:
CompMap Composed;
};
- inline bool operator<(const CodeGenSubRegIndex &A,
- const CodeGenSubRegIndex &B) {
- return A.EnumValue < B.EnumValue;
- }
-
/// CodeGenRegister - Represents a register definition.
struct CodeGenRegister {
Record *TheDef;
@@ -156,7 +156,8 @@ namespace llvm {
bool Artificial;
// Map SubRegIndex -> Register.
- typedef std::map<CodeGenSubRegIndex *, CodeGenRegister *, deref<llvm::less>>
+ typedef std::map<CodeGenSubRegIndex *, CodeGenRegister *,
+ deref<std::less<>>>
SubRegMap;
CodeGenRegister(Record *R, unsigned Enum);
@@ -347,6 +348,10 @@ namespace llvm {
ArrayRef<ValueTypeByHwMode> getValueTypes() const { return VTs; }
unsigned getNumValueTypes() const { return VTs.size(); }
+ bool hasType(const ValueTypeByHwMode &VT) const {
+ return std::find(VTs.begin(), VTs.end(), VT) != VTs.end();
+ }
+
const ValueTypeByHwMode &getValueTypeNum(unsigned VTNum) const {
if (VTNum < VTs.size())
return VTs[VTNum];
@@ -708,6 +713,13 @@ namespace llvm {
/// return the superclass. Otherwise return null.
const CodeGenRegisterClass* getRegClassForRegister(Record *R);
+ // Analog of TargetRegisterInfo::getMinimalPhysRegClass. Unlike
+ // getRegClassForRegister, this tries to find the smallest class containing
+ // the physical register. If \p VT is specified, it will only find classes
+ // with a matching type
+ const CodeGenRegisterClass *
+ getMinimalPhysRegClass(Record *RegRecord, ValueTypeByHwMode *VT = nullptr);
+
// Get the sum of unit weights.
unsigned getRegUnitSetWeight(const std::vector<unsigned> &Units) const {
unsigned Weight = 0;
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index fd007044a16e..f12d7d484a8e 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -172,8 +172,8 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
// Allow Set evaluation to recognize the dags used in InstRW records:
// (instrs Op1, Op1...)
- Sets.addOperator("instrs", llvm::make_unique<InstrsOp>());
- Sets.addOperator("instregex", llvm::make_unique<InstRegexOp>(Target));
+ Sets.addOperator("instrs", std::make_unique<InstrsOp>());
+ Sets.addOperator("instregex", std::make_unique<InstRegexOp>(Target));
// Instantiate a CodeGenProcModel for each SchedMachineModel with the values
// that are explicitly referenced in tablegen records. Resources associated
@@ -1083,9 +1083,13 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
if (RWD->getValueAsDef("SchedModel") == RWModelDef &&
RWModelDef->getValueAsBit("FullInstRWOverlapCheck")) {
for (Record *Inst : InstDefs) {
- PrintFatalError(InstRWDef->getLoc(), "Overlapping InstRW def " +
- Inst->getName() + " also matches " +
- RWD->getValue("Instrs")->getValue()->getAsString());
+ PrintFatalError
+ (InstRWDef->getLoc(),
+ "Overlapping InstRW definition for \"" +
+ Inst->getName() +
+ "\" also matches previous \"" +
+ RWD->getValue("Instrs")->getValue()->getAsString() +
+ "\".");
}
}
}
@@ -1115,9 +1119,13 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
for (Record *OldRWDef : SchedClasses[OldSCIdx].InstRWs) {
if (OldRWDef->getValueAsDef("SchedModel") == RWModelDef) {
for (Record *InstDef : InstDefs) {
- PrintFatalError(OldRWDef->getLoc(), "Overlapping InstRW def " +
- InstDef->getName() + " also matches " +
- OldRWDef->getValue("Instrs")->getValue()->getAsString());
+ PrintFatalError
+ (InstRWDef->getLoc(),
+ "Overlapping InstRW definition for \"" +
+ InstDef->getName() +
+ "\" also matches previous \"" +
+ OldRWDef->getValue("Instrs")->getValue()->getAsString() +
+ "\".");
}
}
assert(OldRWDef != InstRWDef &&
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index b65e1b6af791..fa8b842c97f9 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -98,6 +98,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v256i8: return "MVT::v256i8";
case MVT::v1i16: return "MVT::v1i16";
case MVT::v2i16: return "MVT::v2i16";
+ case MVT::v3i16: return "MVT::v3i16";
case MVT::v4i16: return "MVT::v4i16";
case MVT::v8i16: return "MVT::v8i16";
case MVT::v16i16: return "MVT::v16i16";
@@ -126,8 +127,11 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v32i64: return "MVT::v32i64";
case MVT::v1i128: return "MVT::v1i128";
case MVT::v2f16: return "MVT::v2f16";
+ case MVT::v3f16: return "MVT::v3f16";
case MVT::v4f16: return "MVT::v4f16";
case MVT::v8f16: return "MVT::v8f16";
+ case MVT::v16f16: return "MVT::v16f16";
+ case MVT::v32f16: return "MVT::v32f16";
case MVT::v1f32: return "MVT::v1f32";
case MVT::v2f32: return "MVT::v2f32";
case MVT::v3f32: return "MVT::v3f32";
@@ -289,10 +293,57 @@ Record *CodeGenTarget::getAsmWriter() const {
CodeGenRegBank &CodeGenTarget::getRegBank() const {
if (!RegBank)
- RegBank = llvm::make_unique<CodeGenRegBank>(Records, getHwModes());
+ RegBank = std::make_unique<CodeGenRegBank>(Records, getHwModes());
return *RegBank;
}
+Optional<CodeGenRegisterClass *>
+CodeGenTarget::getSuperRegForSubReg(const ValueTypeByHwMode &ValueTy,
+ CodeGenRegBank &RegBank,
+ const CodeGenSubRegIndex *SubIdx) const {
+ std::vector<CodeGenRegisterClass *> Candidates;
+ auto &RegClasses = RegBank.getRegClasses();
+
+ // Try to find a register class which supports ValueTy, and also contains
+ // SubIdx.
+ for (CodeGenRegisterClass &RC : RegClasses) {
+ // Is there a subclass of this class which contains this subregister index?
+ CodeGenRegisterClass *SubClassWithSubReg = RC.getSubClassWithSubReg(SubIdx);
+ if (!SubClassWithSubReg)
+ continue;
+
+ // We have a class. Check if it supports this value type.
+ if (llvm::none_of(SubClassWithSubReg->VTs,
+ [&ValueTy](const ValueTypeByHwMode &ClassVT) {
+ return ClassVT == ValueTy;
+ }))
+ continue;
+
+ // We have a register class which supports both the value type and
+ // subregister index. Remember it.
+ Candidates.push_back(SubClassWithSubReg);
+ }
+
+ // If we didn't find anything, we're done.
+ if (Candidates.empty())
+ return None;
+
+ // Find and return the largest of our candidate classes.
+ llvm::stable_sort(Candidates, [&](const CodeGenRegisterClass *A,
+ const CodeGenRegisterClass *B) {
+ if (A->getMembers().size() > B->getMembers().size())
+ return true;
+
+ if (A->getMembers().size() < B->getMembers().size())
+ return false;
+
+ // Order by name as a tie-breaker.
+ return StringRef(A->getName()) < B->getName();
+ });
+
+ return Candidates[0];
+}
+
void CodeGenTarget::ReadRegAltNameIndices() const {
RegAltNameIndices = Records.getAllDerivedDefinitions("RegAltNameIndex");
llvm::sort(RegAltNameIndices, LessRecord());
@@ -339,7 +390,7 @@ void CodeGenTarget::ReadLegalValueTypes() const {
CodeGenSchedModels &CodeGenTarget::getSchedModels() const {
if (!SchedModels)
- SchedModels = llvm::make_unique<CodeGenSchedModels>(Records, *this);
+ SchedModels = std::make_unique<CodeGenSchedModels>(Records, *this);
return *SchedModels;
}
@@ -352,7 +403,7 @@ void CodeGenTarget::ReadInstructions() const {
// Parse the instructions defined in the .td file.
for (unsigned i = 0, e = Insts.size(); i != e; ++i)
- Instructions[Insts[i]] = llvm::make_unique<CodeGenInstruction>(Insts[i]);
+ Instructions[Insts[i]] = std::make_unique<CodeGenInstruction>(Insts[i]);
}
static const CodeGenInstruction *
@@ -427,7 +478,8 @@ void CodeGenTarget::reverseBitsForLittleEndianEncoding() {
if (!isLittleEndianEncoding())
return;
- std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
+ std::vector<Record *> Insts =
+ Records.getAllDerivedDefinitions("InstructionEncoding");
for (Record *R : Insts) {
if (R->getValueAsString("Namespace") == "TargetOpcode" ||
R->getValueAsBit("isPseudo"))
@@ -733,6 +785,9 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
else if (Property->isSubClassOf("NoCapture")) {
unsigned ArgNo = Property->getValueAsInt("ArgNo");
ArgumentAttributes.push_back(std::make_pair(ArgNo, NoCapture));
+ } else if (Property->isSubClassOf("NoAlias")) {
+ unsigned ArgNo = Property->getValueAsInt("ArgNo");
+ ArgumentAttributes.push_back(std::make_pair(ArgNo, NoAlias));
} else if (Property->isSubClassOf("Returned")) {
unsigned ArgNo = Property->getValueAsInt("ArgNo");
ArgumentAttributes.push_back(std::make_pair(ArgNo, Returned));
@@ -758,3 +813,10 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
// Sort the argument attributes for later benefit.
llvm::sort(ArgumentAttributes);
}
+
+bool CodeGenIntrinsic::isParamAPointer(unsigned ParamIdx) const {
+ if (ParamIdx >= IS.ParamVTs.size())
+ return false;
+ MVT ParamType = MVT(IS.ParamVTs[ParamIdx]);
+ return ParamType == MVT::iPTR || ParamType == MVT::iPTRAny;
+}
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 1ab2de269c76..d52ffac4ce6c 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -103,6 +103,12 @@ public:
/// getRegBank - Return the register bank description.
CodeGenRegBank &getRegBank() const;
+ /// Return the largest register class on \p RegBank which supports \p Ty and
+ /// covers \p SubIdx if it exists.
+ Optional<CodeGenRegisterClass *>
+ getSuperRegForSubReg(const ValueTypeByHwMode &Ty, CodeGenRegBank &RegBank,
+ const CodeGenSubRegIndex *SubIdx) const;
+
/// getRegisterByName - If there is a register with the specific AsmName,
/// return it.
const CodeGenRegister *getRegisterByName(StringRef Name) const;
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index fb0c6faa5295..d8e78ce55c7b 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -173,7 +173,7 @@ void DAGISelEmitter::run(raw_ostream &OS) {
}
std::unique_ptr<Matcher> TheMatcher =
- llvm::make_unique<ScopeMatcher>(PatternMatchers);
+ std::make_unique<ScopeMatcher>(PatternMatchers);
OptimizeMatcher(TheMatcher, CGP);
//Matcher->dump();
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index 0a782e84a372..223513fc8d38 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -932,13 +932,15 @@ private:
///
class EmitCopyToRegMatcher : public Matcher {
unsigned SrcSlot; // Value to copy into the physreg.
- Record *DestPhysReg;
+ const CodeGenRegister *DestPhysReg;
+
public:
- EmitCopyToRegMatcher(unsigned srcSlot, Record *destPhysReg)
+ EmitCopyToRegMatcher(unsigned srcSlot,
+ const CodeGenRegister *destPhysReg)
: Matcher(EmitCopyToReg), SrcSlot(srcSlot), DestPhysReg(destPhysReg) {}
unsigned getSrcSlot() const { return SrcSlot; }
- Record *getDestPhysReg() const { return DestPhysReg; }
+ const CodeGenRegister *getDestPhysReg() const { return DestPhysReg; }
static bool classof(const Matcher *N) {
return N->getKind() == EmitCopyToReg;
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index cecbc6cccdff..e9f1fb93d516 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -670,12 +670,22 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
OS << '\n';
return 2+MN->getNumNodes();
}
- case Matcher::EmitCopyToReg:
- OS << "OPC_EmitCopyToReg, "
- << cast<EmitCopyToRegMatcher>(N)->getSrcSlot() << ", "
- << getQualifiedName(cast<EmitCopyToRegMatcher>(N)->getDestPhysReg())
- << ",\n";
- return 3;
+ case Matcher::EmitCopyToReg: {
+ const auto *C2RMatcher = cast<EmitCopyToRegMatcher>(N);
+ int Bytes = 3;
+ const CodeGenRegister *Reg = C2RMatcher->getDestPhysReg();
+ if (Reg->EnumValue > 255) {
+ assert(isUInt<16>(Reg->EnumValue) && "not handled");
+ OS << "OPC_EmitCopyToReg2, " << C2RMatcher->getSrcSlot() << ", "
+ << "TARGET_VAL(" << getQualifiedName(Reg->TheDef) << "),\n";
+ ++Bytes;
+ } else {
+ OS << "OPC_EmitCopyToReg, " << C2RMatcher->getSrcSlot() << ", "
+ << getQualifiedName(Reg->TheDef) << ",\n";
+ }
+
+ return Bytes;
+ }
case Matcher::EmitNodeXForm: {
const EmitNodeXFormMatcher *XF = cast<EmitNodeXFormMatcher>(N);
OS << "OPC_EmitNodeXForm, " << getNodeXFormID(XF->getNodeXForm()) << ", "
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 8f54beeba65b..49c09c7d195e 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -141,7 +141,7 @@ namespace {
SmallVectorImpl<unsigned> &ResultOps);
};
-} // end anon namespace.
+} // end anonymous namespace
MatcherGen::MatcherGen(const PatternToMatch &pattern,
const CodeGenDAGPatterns &cgp)
@@ -867,9 +867,13 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
if (isRoot && !PhysRegInputs.empty()) {
// Emit all of the CopyToReg nodes for the input physical registers. These
// occur in patterns like (mul:i8 AL:i8, GR8:i8:$src).
- for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i)
+ for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i) {
+ const CodeGenRegister *Reg =
+ CGP.getTargetInfo().getRegBank().getReg(PhysRegInputs[i].first);
AddMatcher(new EmitCopyToRegMatcher(PhysRegInputs[i].second,
- PhysRegInputs[i].first));
+ Reg));
+ }
+
// Even if the node has no other glue inputs, the resultant node must be
// glued to the CopyFromReg nodes we just generated.
TreeHasInGlue = true;
diff --git a/utils/TableGen/DAGISelMatcherOpt.cpp b/utils/TableGen/DAGISelMatcherOpt.cpp
index 7d51b0769372..6746fdd676a7 100644
--- a/utils/TableGen/DAGISelMatcherOpt.cpp
+++ b/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -409,13 +409,14 @@ static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
DenseMap<unsigned, unsigned> TypeEntry;
SmallVector<std::pair<MVT::SimpleValueType, Matcher*>, 8> Cases;
for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i) {
- CheckTypeMatcher *CTM =
- cast_or_null<CheckTypeMatcher>(FindNodeWithKind(NewOptionsToMatch[i],
- Matcher::CheckType));
+ Matcher* M = FindNodeWithKind(NewOptionsToMatch[i], Matcher::CheckType);
+ assert(M && isa<CheckTypeMatcher>(M) && "Unknown Matcher type");
+
+ auto *CTM = cast<CheckTypeMatcher>(M);
Matcher *MatcherWithoutCTM = NewOptionsToMatch[i]->unlinkNode(CTM);
MVT::SimpleValueType CTMTy = CTM->getType();
delete CTM;
-
+
unsigned &Entry = TypeEntry[CTMTy];
if (Entry != 0) {
// If we have unfactored duplicate types, then we should factor them.
diff --git a/utils/TableGen/DFAEmitter.cpp b/utils/TableGen/DFAEmitter.cpp
new file mode 100644
index 000000000000..dd3db7c150ba
--- /dev/null
+++ b/utils/TableGen/DFAEmitter.cpp
@@ -0,0 +1,394 @@
+//===- DFAEmitter.cpp - Finite state automaton emitter --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class can produce a generic deterministic finite state automaton (DFA),
+// given a set of possible states and transitions.
+//
+// The input transitions can be nondeterministic - this class will produce the
+// deterministic equivalent state machine.
+//
+// The generated code can run the DFA and produce an accepted / not accepted
+// state and also produce, given a sequence of transitions that results in an
+// accepted state, the sequence of intermediate states. This is useful if the
+// initial automaton was nondeterministic - it allows mapping back from the DFA
+// to the NFA.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "dfa-emitter"
+
+#include "DFAEmitter.h"
+#include "CodeGenTarget.h"
+#include "SequenceToOffsetTable.h"
+#include "TableGenBackends.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DfaEmitter implementation. This is independent of the GenAutomaton backend.
+//===----------------------------------------------------------------------===//
+
+void DfaEmitter::addTransition(state_type From, state_type To, action_type A) {
+ Actions.insert(A);
+ NfaStates.insert(From);
+ NfaStates.insert(To);
+ NfaTransitions[{From, A}].push_back(To);
+ ++NumNfaTransitions;
+}
+
+void DfaEmitter::visitDfaState(DfaState DS) {
+ // For every possible action...
+ auto FromId = DfaStates.idFor(DS);
+ for (action_type A : Actions) {
+ DfaState NewStates;
+ DfaTransitionInfo TI;
+ // For every represented state, word pair in the original NFA...
+ for (state_type &FromState : DS) {
+ // If this action is possible from this state add the transitioned-to
+ // states to NewStates.
+ auto I = NfaTransitions.find({FromState, A});
+ if (I == NfaTransitions.end())
+ continue;
+ for (state_type &ToState : I->second) {
+ NewStates.push_back(ToState);
+ TI.emplace_back(FromState, ToState);
+ }
+ }
+ if (NewStates.empty())
+ continue;
+ // Sort and unique.
+ sort(NewStates);
+ NewStates.erase(std::unique(NewStates.begin(), NewStates.end()),
+ NewStates.end());
+ sort(TI);
+ TI.erase(std::unique(TI.begin(), TI.end()), TI.end());
+ unsigned ToId = DfaStates.insert(NewStates);
+ DfaTransitions.emplace(std::make_pair(FromId, A), std::make_pair(ToId, TI));
+ }
+}
+
+void DfaEmitter::constructDfa() {
+ DfaState Initial(1, /*NFA initial state=*/0);
+ DfaStates.insert(Initial);
+
+ // Note that UniqueVector starts indices at 1, not zero.
+ unsigned DfaStateId = 1;
+ while (DfaStateId <= DfaStates.size())
+ visitDfaState(DfaStates[DfaStateId++]);
+}
+
+void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
+ constructDfa();
+
+ OS << "// Input NFA has " << NfaStates.size() << " states with "
+ << NumNfaTransitions << " transitions.\n";
+ OS << "// Generated DFA has " << DfaStates.size() << " states with "
+ << DfaTransitions.size() << " transitions.\n\n";
+
+ // Implementation note: We don't bake a simple std::pair<> here as it requires
+ // significantly more effort to parse. A simple test with a large array of
+ // struct-pairs (N=100000) took clang-10 6s to parse. The same array of
+ // std::pair<uint64_t, uint64_t> took 242s. Instead we allow the user to
+ // define the pair type.
+ //
+ // FIXME: It may make sense to emit these as ULEB sequences instead of
+ // pairs of uint64_t.
+ OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n";
+ OS << "// transition implies a set of NFA transitions. These are referred\n";
+ OS << "// to by index in " << Name << "Transitions[].\n";
+
+ SequenceToOffsetTable<DfaTransitionInfo> Table;
+ std::map<DfaTransitionInfo, unsigned> EmittedIndices;
+ for (auto &T : DfaTransitions)
+ Table.add(T.second.second);
+ Table.layout();
+ OS << "std::array<NfaStatePair, " << Table.size() << "> " << Name
+ << "TransitionInfo = {{\n";
+ Table.emit(
+ OS,
+ [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
+ OS << "{" << P.first << ", " << P.second << "}";
+ },
+ "{0ULL, 0ULL}");
+
+ OS << "}};\n\n";
+
+ OS << "// A transition in the generated " << Name << " DFA.\n";
+ OS << "struct " << Name << "Transition {\n";
+ OS << " unsigned FromDfaState; // The transitioned-from DFA state.\n";
+ OS << " ";
+ printActionType(OS);
+ OS << " Action; // The input symbol that causes this transition.\n";
+ OS << " unsigned ToDfaState; // The transitioned-to DFA state.\n";
+ OS << " unsigned InfoIdx; // Start index into " << Name
+ << "TransitionInfo.\n";
+ OS << "};\n\n";
+
+ OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n";
+ OS << "// The initial state is 1, not zero.\n";
+ OS << "std::array<" << Name << "Transition, " << DfaTransitions.size() << "> "
+ << Name << "Transitions = {{\n";
+ for (auto &KV : DfaTransitions) {
+ dfa_state_type From = KV.first.first;
+ dfa_state_type To = KV.second.first;
+ action_type A = KV.first.second;
+ unsigned InfoIdx = Table.get(KV.second.second);
+ OS << " {" << From << ", ";
+ printActionValue(A, OS);
+ OS << ", " << To << ", " << InfoIdx << "},\n";
+ }
+ OS << "\n}};\n\n";
+}
+
+void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; }
+
+void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; }
+
+//===----------------------------------------------------------------------===//
+// AutomatonEmitter implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+// FIXME: This entire discriminated union could be removed with c++17:
+// using Action = std::variant<Record *, unsigned, std::string>;
+struct Action {
+ Record *R = nullptr;
+ unsigned I = 0;
+ std::string S = nullptr;
+
+ Action() = default;
+ Action(Record *R, unsigned I, std::string S) : R(R), I(I), S(S) {}
+
+ void print(raw_ostream &OS) const {
+ if (R)
+ OS << R->getName();
+ else if (!S.empty())
+ OS << '"' << S << '"';
+ else
+ OS << I;
+ }
+ bool operator<(const Action &Other) const {
+ return std::make_tuple(R, I, S) <
+ std::make_tuple(Other.R, Other.I, Other.S);
+ }
+};
+
+using ActionTuple = std::vector<Action>;
+class Automaton;
+
+class Transition {
+ uint64_t NewState;
+ // The tuple of actions that causes this transition.
+ ActionTuple Actions;
+ // The types of the actions; this is the same across all transitions.
+ SmallVector<std::string, 4> Types;
+
+public:
+ Transition(Record *R, Automaton *Parent);
+ const ActionTuple &getActions() { return Actions; }
+ SmallVector<std::string, 4> getTypes() { return Types; }
+
+ bool canTransitionFrom(uint64_t State);
+ uint64_t transitionFrom(uint64_t State);
+};
+
+class Automaton {
+ RecordKeeper &Records;
+ Record *R;
+ std::vector<Transition> Transitions;
+ /// All possible action tuples, uniqued.
+ UniqueVector<ActionTuple> Actions;
+ /// The fields within each Transition object to find the action symbols.
+ std::vector<StringRef> ActionSymbolFields;
+
+public:
+ Automaton(RecordKeeper &Records, Record *R);
+ void emit(raw_ostream &OS);
+
+ ArrayRef<StringRef> getActionSymbolFields() { return ActionSymbolFields; }
+ /// If the type of action A has been overridden (there exists a field
+ /// "TypeOf_A") return that, otherwise return the empty string.
+ StringRef getActionSymbolType(StringRef A);
+};
+
+class AutomatonEmitter {
+ RecordKeeper &Records;
+
+public:
+ AutomatonEmitter(RecordKeeper &R) : Records(R) {}
+ void run(raw_ostream &OS);
+};
+
+/// A DfaEmitter implementation that can print our variant action type.
+class CustomDfaEmitter : public DfaEmitter {
+ const UniqueVector<ActionTuple> &Actions;
+ std::string TypeName;
+
+public:
+ CustomDfaEmitter(const UniqueVector<ActionTuple> &Actions, StringRef TypeName)
+ : Actions(Actions), TypeName(TypeName) {}
+
+ void printActionType(raw_ostream &OS) override;
+ void printActionValue(action_type A, raw_ostream &OS) override;
+};
+} // namespace
+
+void AutomatonEmitter::run(raw_ostream &OS) {
+ for (Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) {
+ Automaton A(Records, R);
+ OS << "#ifdef GET_" << R->getName() << "_DECL\n";
+ A.emit(OS);
+ OS << "#endif // GET_" << R->getName() << "_DECL\n";
+ }
+}
+
+Automaton::Automaton(RecordKeeper &Records, Record *R)
+ : Records(Records), R(R) {
+ LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n");
+ ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields");
+}
+
+void Automaton::emit(raw_ostream &OS) {
+ StringRef TransitionClass = R->getValueAsString("TransitionClass");
+ for (Record *T : Records.getAllDerivedDefinitions(TransitionClass)) {
+ assert(T->isSubClassOf("Transition"));
+ Transitions.emplace_back(T, this);
+ Actions.insert(Transitions.back().getActions());
+ }
+
+ LLVM_DEBUG(dbgs() << " Action alphabet cardinality: " << Actions.size()
+ << "\n");
+ LLVM_DEBUG(dbgs() << " Each state has " << Transitions.size()
+ << " potential transitions.\n");
+
+ StringRef Name = R->getName();
+
+ CustomDfaEmitter Emitter(Actions, std::string(Name) + "Action");
+ // Starting from the initial state, build up a list of possible states and
+ // transitions.
+ std::deque<uint64_t> Worklist(1, 0);
+ std::set<uint64_t> SeenStates;
+ unsigned NumTransitions = 0;
+ SeenStates.insert(Worklist.front());
+ while (!Worklist.empty()) {
+ uint64_t State = Worklist.front();
+ Worklist.pop_front();
+ for (Transition &T : Transitions) {
+ if (!T.canTransitionFrom(State))
+ continue;
+ uint64_t NewState = T.transitionFrom(State);
+ if (SeenStates.emplace(NewState).second)
+ Worklist.emplace_back(NewState);
+ ++NumTransitions;
+ Emitter.addTransition(State, NewState, Actions.idFor(T.getActions()));
+ }
+ }
+ LLVM_DEBUG(dbgs() << " NFA automaton has " << SeenStates.size()
+ << " states with " << NumTransitions << " transitions.\n");
+
+ const auto &ActionTypes = Transitions.back().getTypes();
+ OS << "// The type of an action in the " << Name << " automaton.\n";
+ if (ActionTypes.size() == 1) {
+ OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n";
+ } else {
+ OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ")
+ << ">;\n";
+ }
+ OS << "\n";
+
+ Emitter.emit(Name, OS);
+}
+
+StringRef Automaton::getActionSymbolType(StringRef A) {
+ Twine Ty = "TypeOf_" + A;
+ if (!R->getValue(Ty.str()))
+ return "";
+ return R->getValueAsString(Ty.str());
+}
+
+Transition::Transition(Record *R, Automaton *Parent) {
+ BitsInit *NewStateInit = R->getValueAsBitsInit("NewState");
+ NewState = 0;
+ assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 &&
+ "State cannot be represented in 64 bits!");
+ for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) {
+ if (auto *Bit = dyn_cast<BitInit>(NewStateInit->getBit(I))) {
+ if (Bit->getValue())
+ NewState |= 1ULL << I;
+ }
+ }
+
+ for (StringRef A : Parent->getActionSymbolFields()) {
+ RecordVal *SymbolV = R->getValue(A);
+ if (auto *Ty = dyn_cast<RecordRecTy>(SymbolV->getType())) {
+ Actions.emplace_back(R->getValueAsDef(A), 0, "");
+ Types.emplace_back(Ty->getAsString());
+ } else if (isa<IntRecTy>(SymbolV->getType())) {
+ Actions.emplace_back(nullptr, R->getValueAsInt(A), "");
+ Types.emplace_back("unsigned");
+ } else if (isa<StringRecTy>(SymbolV->getType()) ||
+ isa<CodeRecTy>(SymbolV->getType())) {
+ Actions.emplace_back(nullptr, 0, R->getValueAsString(A));
+ Types.emplace_back("std::string");
+ } else {
+ report_fatal_error("Unhandled symbol type!");
+ }
+
+ StringRef TypeOverride = Parent->getActionSymbolType(A);
+ if (!TypeOverride.empty())
+ Types.back() = TypeOverride;
+ }
+}
+
+bool Transition::canTransitionFrom(uint64_t State) {
+ if ((State & NewState) == 0)
+ // The bits we want to set are not set;
+ return true;
+ return false;
+}
+
+uint64_t Transition::transitionFrom(uint64_t State) {
+ return State | NewState;
+}
+
+void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; }
+
+void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) {
+ const ActionTuple &AT = Actions[A];
+ if (AT.size() > 1)
+ OS << "std::make_tuple(";
+ bool First = true;
+ for (const auto &SingleAction : AT) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ SingleAction.print(OS);
+ }
+ if (AT.size() > 1)
+ OS << ")";
+}
+
+namespace llvm {
+
+void EmitAutomata(RecordKeeper &RK, raw_ostream &OS) {
+ AutomatonEmitter(RK).run(OS);
+}
+
+} // namespace llvm
diff --git a/utils/TableGen/DFAEmitter.h b/utils/TableGen/DFAEmitter.h
new file mode 100644
index 000000000000..76de8f72cd88
--- /dev/null
+++ b/utils/TableGen/DFAEmitter.h
@@ -0,0 +1,107 @@
+//===--------------------- DfaEmitter.h -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Defines a generic automaton builder. This takes a set of transitions and
+// states that represent a nondeterministic finite state automaton (NFA) and
+// emits a determinized DFA in a form that include/llvm/Support/Automaton.h can
+// drive.
+//
+// See file llvm/TableGen/Automaton.td for the TableGen API definition.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H
+#define LLVM_UTILS_TABLEGEN_DFAEMITTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
+#include <set>
+#include <unordered_map>
+
+namespace llvm {
+
+class raw_ostream;
+/// Construct a deterministic finite state automaton from possible
+/// nondeterministic state and transition data.
+///
+/// The state type is a 64-bit unsigned integer. The generated automaton is
+/// invariant to the sparsity of the state representation - its size is only
+/// a function of the cardinality of the set of states.
+///
+/// The inputs to this emitter are considered to define a nondeterministic
+/// finite state automaton (NFA). This is then converted to a DFA during
+/// emission. The emitted tables can be used to by
+/// include/llvm/Support/Automaton.h.
+class DfaEmitter {
+public:
+ // The type of an NFA state. The initial state is always zero.
+ using state_type = uint64_t;
+ // The type of an action.
+ using action_type = uint64_t;
+
+ DfaEmitter() = default;
+ virtual ~DfaEmitter() = default;
+
+ void addTransition(state_type From, state_type To, action_type A);
+ void emit(StringRef Name, raw_ostream &OS);
+
+protected:
+ /// Emit the C++ type of an action to OS.
+ virtual void printActionType(raw_ostream &OS);
+ /// Emit the C++ value of an action A to OS.
+ virtual void printActionValue(action_type A, raw_ostream &OS);
+
+private:
+ /// The state type of deterministic states. These are only used internally to
+ /// this class. This is an ID into the DfaStates UniqueVector.
+ using dfa_state_type = unsigned;
+
+ /// The actual representation of a DFA state, which is a union of one or more
+ /// NFA states.
+ using DfaState = SmallVector<state_type, 4>;
+
+ /// A DFA transition consists of a set of NFA states transitioning to a
+ /// new set of NFA states. The DfaTransitionInfo tracks, for every
+ /// transitioned-from NFA state, a set of valid transitioned-to states.
+ ///
+ /// Emission of this transition relation allows algorithmic determination of
+ /// the possible candidate NFA paths taken under a given input sequence to
+ /// reach a given DFA state.
+ using DfaTransitionInfo = SmallVector<std::pair<state_type, state_type>, 4>;
+
+ /// The set of all possible actions.
+ std::set<action_type> Actions;
+
+ /// The set of nondeterministic transitions. A state-action pair can
+ /// transition to multiple target states.
+ std::map<std::pair<state_type, action_type>, std::vector<state_type>>
+ NfaTransitions;
+ std::set<state_type> NfaStates;
+ unsigned NumNfaTransitions = 0;
+
+ /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID,
+ /// which is dfa_state_type. Note that because UniqueVector reserves state
+ /// zero, the initial DFA state is always 1.
+ UniqueVector<DfaState> DfaStates;
+ /// The set of deterministic transitions. A state-action pair has only a
+ /// single target state.
+ std::map<std::pair<dfa_state_type, action_type>,
+ std::pair<dfa_state_type, DfaTransitionInfo>>
+ DfaTransitions;
+
+ /// Visit all NFA states and construct the DFA.
+ void constructDfa();
+ /// Visit a single DFA state and construct all possible transitions to new DFA
+ /// states.
+ void visitDfaState(DfaState DS);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp
index dabcc8f8ed55..ccb4ef1b9678 100644
--- a/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -17,6 +17,7 @@
#define DEBUG_TYPE "dfa-emitter"
#include "CodeGenTarget.h"
+#include "DFAEmitter.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -29,6 +30,7 @@
#include <map>
#include <set>
#include <string>
+#include <unordered_map>
#include <vector>
using namespace llvm;
@@ -154,121 +156,13 @@ public:
int &maxStages,
raw_ostream &OS);
- void run(raw_ostream &OS);
-};
-
-//
-// State represents the usage of machine resources if the packet contains
-// a set of instruction classes.
-//
-// Specifically, currentState is a set of bit-masks.
-// The nth bit in a bit-mask indicates whether the nth resource is being used
-// by this state. The set of bit-masks in a state represent the different
-// possible outcomes of transitioning to this state.
-// For example: consider a two resource architecture: resource L and resource M
-// with three instruction classes: L, M, and L_or_M.
-// From the initial state (currentState = 0x00), if we add instruction class
-// L_or_M we will transition to a state with currentState = [0x01, 0x10]. This
-// represents the possible resource states that can result from adding a L_or_M
-// instruction
-//
-// Another way of thinking about this transition is we are mapping a NDFA with
-// two states [0x01] and [0x10] into a DFA with a single state [0x01, 0x10].
-//
-// A State instance also contains a collection of transitions from that state:
-// a map from inputs to new states.
-//
-class State {
- public:
- static int currentStateNum;
- // stateNum is the only member used for equality/ordering, all other members
- // can be mutated even in const State objects.
- const int stateNum;
- mutable bool isInitial;
- mutable std::set<unsigned> stateInfo;
- typedef std::map<std::vector<unsigned>, const State *> TransitionMap;
- mutable TransitionMap Transitions;
-
- State();
-
- bool operator<(const State &s) const {
- return stateNum < s.stateNum;
- }
-
- //
- // canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
- // may be a valid transition from this state i.e., can an instruction of type
- // InsnClass be added to the packet represented by this state.
- //
- // Note that for multiple stages, this quick check does not take into account
- // any possible resource competition between the stages themselves. That is
- // enforced in AddInsnClassStages which checks the cross product of all
- // stages for resource availability (which is a more involved check).
- //
- bool canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
- std::map<unsigned, unsigned> &ComboBitToBitsMap) const;
-
- //
- // AddInsnClass - Return all combinations of resource reservation
- // which are possible from this state (PossibleStates).
- //
- // PossibleStates is the set of valid resource states that ensue from valid
- // transitions.
- //
- void AddInsnClass(std::vector<unsigned> &InsnClass,
- std::map<unsigned, unsigned> &ComboBitToBitsMap,
- std::set<unsigned> &PossibleStates) const;
-
- //
- // AddInsnClassStages - Return all combinations of resource reservation
- // resulting from the cross product of all stages for this InsnClass
- // which are possible from this state (PossibleStates).
- //
- void AddInsnClassStages(std::vector<unsigned> &InsnClass,
- std::map<unsigned, unsigned> &ComboBitToBitsMap,
- unsigned chkstage, unsigned numstages,
- unsigned prevState, unsigned origState,
- DenseSet<unsigned> &VisitedResourceStates,
- std::set<unsigned> &PossibleStates) const;
-
- //
- // addTransition - Add a transition from this state given the input InsnClass
- //
- void addTransition(std::vector<unsigned> InsnClass, const State *To) const;
-
- //
- // hasTransition - Returns true if there is a transition from this state
- // given the input InsnClass
- //
- bool hasTransition(std::vector<unsigned> InsnClass) const;
-};
-
-//
-// class DFA: deterministic finite automaton for processor resource tracking.
-//
-class DFA {
-public:
- DFA() = default;
-
- // Set of states. Need to keep this sorted to emit the transition table.
- typedef std::set<State> StateSet;
- StateSet states;
+ // Emit code for a subset of itineraries.
+ void emitForItineraries(raw_ostream &OS,
+ std::vector<Record *> &ProcItinList,
+ std::string DFAName);
- State *currentState = nullptr;
-
- //
- // Modify the DFA.
- //
- const State &newState();
-
- //
- // writeTable: Print out a table representing the DFA.
- //
- void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName,
- int numInsnClasses = 0,
- int maxResources = 0, int numCombos = 0, int maxStages = 0);
+ void run(raw_ostream &OS);
};
-
} // end anonymous namespace
#ifndef NDEBUG
@@ -289,22 +183,6 @@ void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
}
//
-// dbgsStateInfo - When debugging, print the set of state info.
-//
-void dbgsStateInfo(const std::set<unsigned> &stateInfo) {
- LLVM_DEBUG(dbgs() << "StateInfo: ");
- unsigned i = 0;
- for (std::set<unsigned>::iterator SI = stateInfo.begin();
- SI != stateInfo.end(); ++SI, ++i) {
- unsigned thisState = *SI;
- if (i > 0) {
- LLVM_DEBUG(dbgs() << ", ");
- }
- LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(thisState));
- }
-}
-
-//
// dbgsIndent - When debugging, indent by the specified amount.
//
void dbgsIndent(unsigned indent) {
@@ -314,336 +192,10 @@ void dbgsIndent(unsigned indent) {
}
#endif // NDEBUG
-//
-// Constructors and destructors for State and DFA
-//
-State::State() :
- stateNum(currentStateNum++), isInitial(false) {}
-
-//
-// addTransition - Add a transition from this state given the input InsnClass
-//
-void State::addTransition(std::vector<unsigned> InsnClass, const State *To)
- const {
- assert(!Transitions.count(InsnClass) &&
- "Cannot have multiple transitions for the same input");
- Transitions[InsnClass] = To;
-}
-
-//
-// hasTransition - Returns true if there is a transition from this state
-// given the input InsnClass
-//
-bool State::hasTransition(std::vector<unsigned> InsnClass) const {
- return Transitions.count(InsnClass) > 0;
-}
-
-//
-// AddInsnClass - Return all combinations of resource reservation
-// which are possible from this state (PossibleStates).
-//
-// PossibleStates is the set of valid resource states that ensue from valid
-// transitions.
-//
-void State::AddInsnClass(std::vector<unsigned> &InsnClass,
- std::map<unsigned, unsigned> &ComboBitToBitsMap,
- std::set<unsigned> &PossibleStates) const {
- //
- // Iterate over all resource states in currentState.
- //
- unsigned numstages = InsnClass.size();
- assert((numstages > 0) && "InsnClass has no stages");
-
- for (std::set<unsigned>::iterator SI = stateInfo.begin();
- SI != stateInfo.end(); ++SI) {
- unsigned thisState = *SI;
-
- DenseSet<unsigned> VisitedResourceStates;
-
- LLVM_DEBUG(dbgs() << " thisState: 0x" << Twine::utohexstr(thisState)
- << "\n");
- AddInsnClassStages(InsnClass, ComboBitToBitsMap,
- numstages - 1, numstages,
- thisState, thisState,
- VisitedResourceStates, PossibleStates);
- }
-}
-
-void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
- std::map<unsigned, unsigned> &ComboBitToBitsMap,
- unsigned chkstage, unsigned numstages,
- unsigned prevState, unsigned origState,
- DenseSet<unsigned> &VisitedResourceStates,
- std::set<unsigned> &PossibleStates) const {
- assert((chkstage < numstages) && "AddInsnClassStages: stage out of range");
- unsigned thisStage = InsnClass[chkstage];
-
- LLVM_DEBUG({
- dbgsIndent((1 + numstages - chkstage) << 1);
- dbgs() << "AddInsnClassStages " << chkstage << " (0x"
- << Twine::utohexstr(thisStage) << ") from ";
- dbgsInsnClass(InsnClass);
- dbgs() << "\n";
- });
-
- //
- // Iterate over all possible resources used in thisStage.
- // For ex: for thisStage = 0x11, all resources = {0x01, 0x10}.
- //
- for (unsigned int j = 0; j < DFA_MAX_RESOURCES; ++j) {
- unsigned resourceMask = (0x1 << j);
- if (resourceMask & thisStage) {
- unsigned combo = ComboBitToBitsMap[resourceMask];
- if (combo && ((~prevState & combo) != combo)) {
- LLVM_DEBUG(dbgs() << "\tSkipped Add 0x" << Twine::utohexstr(prevState)
- << " - combo op 0x" << Twine::utohexstr(resourceMask)
- << " (0x" << Twine::utohexstr(combo)
- << ") cannot be scheduled\n");
- continue;
- }
- //
- // For each possible resource used in thisStage, generate the
- // resource state if that resource was used.
- //
- unsigned ResultingResourceState = prevState | resourceMask | combo;
- LLVM_DEBUG({
- dbgsIndent((2 + numstages - chkstage) << 1);
- dbgs() << "0x" << Twine::utohexstr(prevState) << " | 0x"
- << Twine::utohexstr(resourceMask);
- if (combo)
- dbgs() << " | 0x" << Twine::utohexstr(combo);
- dbgs() << " = 0x" << Twine::utohexstr(ResultingResourceState) << " ";
- });
-
- //
- // If this is the final stage for this class
- //
- if (chkstage == 0) {
- //
- // Check if the resulting resource state can be accommodated in this
- // packet.
- // We compute resource OR prevState (originally started as origState).
- // If the result of the OR is different than origState, it implies
- // that there is at least one resource that can be used to schedule
- // thisStage in the current packet.
- // Insert ResultingResourceState into PossibleStates only if we haven't
- // processed ResultingResourceState before.
- //
- if (ResultingResourceState != prevState) {
- if (VisitedResourceStates.count(ResultingResourceState) == 0) {
- VisitedResourceStates.insert(ResultingResourceState);
- PossibleStates.insert(ResultingResourceState);
- LLVM_DEBUG(dbgs()
- << "\tResultingResourceState: 0x"
- << Twine::utohexstr(ResultingResourceState) << "\n");
- } else {
- LLVM_DEBUG(dbgs() << "\tSkipped Add - state already seen\n");
- }
- } else {
- LLVM_DEBUG(dbgs()
- << "\tSkipped Add - no final resources available\n");
- }
- } else {
- //
- // If the current resource can be accommodated, check the next
- // stage in InsnClass for available resources.
- //
- if (ResultingResourceState != prevState) {
- LLVM_DEBUG(dbgs() << "\n");
- AddInsnClassStages(InsnClass, ComboBitToBitsMap,
- chkstage - 1, numstages,
- ResultingResourceState, origState,
- VisitedResourceStates, PossibleStates);
- } else {
- LLVM_DEBUG(dbgs() << "\tSkipped Add - no resources available\n");
- }
- }
- }
- }
-}
-
-//
-// canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
-// may be a valid transition from this state i.e., can an instruction of type
-// InsnClass be added to the packet represented by this state.
-//
-// Note that this routine is performing conservative checks that can be
-// quickly executed acting as a filter before calling AddInsnClassStages.
-// Any cases allowed through here will be caught later in AddInsnClassStages
-// which performs the more expensive exact check.
-//
-bool State::canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
- std::map<unsigned, unsigned> &ComboBitToBitsMap) const {
- for (std::set<unsigned>::const_iterator SI = stateInfo.begin();
- SI != stateInfo.end(); ++SI) {
- // Check to see if all required resources are available.
- bool available = true;
-
- // Inspect each stage independently.
- // note: This is a conservative check as we aren't checking for
- // possible resource competition between the stages themselves
- // The full cross product is examined later in AddInsnClass.
- for (unsigned i = 0; i < InsnClass.size(); ++i) {
- unsigned resources = *SI;
- if ((~resources & InsnClass[i]) == 0) {
- available = false;
- break;
- }
- // Make sure _all_ resources for a combo function are available.
- // note: This is a quick conservative check as it won't catch an
- // unscheduleable combo if this stage is an OR expression
- // containing a combo.
- // These cases are caught later in AddInsnClass.
- unsigned combo = ComboBitToBitsMap[InsnClass[i]];
- if (combo && ((~resources & combo) != combo)) {
- LLVM_DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x"
- << Twine::utohexstr(resources) << " - combo op 0x"
- << Twine::utohexstr(InsnClass[i]) << " (0x"
- << Twine::utohexstr(combo)
- << ") cannot be scheduled\n");
- available = false;
- break;
- }
- }
-
- if (available) {
- return true;
- }
- }
- return false;
-}
-
-const State &DFA::newState() {
- auto IterPair = states.insert(State());
- assert(IterPair.second && "State already exists");
- return *IterPair.first;
-}
-
-int State::currentStateNum = 0;
-
DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
TargetName(CodeGenTarget(R).getName()), Records(R) {}
//
-// writeTableAndAPI - Print out a table representing the DFA and the
-// associated API to create a DFA packetizer.
-//
-// Format:
-// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
-// transitions.
-// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable for
-// the ith state.
-//
-//
-void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
- int numInsnClasses,
- int maxResources, int numCombos, int maxStages) {
- unsigned numStates = states.size();
-
- LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
- "----------------------\n");
- LLVM_DEBUG(dbgs() << "writeTableAndAPI\n");
- LLVM_DEBUG(dbgs() << "Total states: " << numStates << "\n");
-
- OS << "namespace llvm {\n";
-
- OS << "\n// Input format:\n";
- OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS
- << "\t// maximum AND'ed resource terms\n";
- OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES
- << "\t// maximum resource bits in one term\n";
-
- OS << "\n// " << TargetName << "DFAStateInputTable[][2] = "
- << "pairs of <Input, NextState> for all valid\n";
- OS << "// transitions.\n";
- OS << "// " << numStates << "\tstates\n";
- OS << "// " << numInsnClasses << "\tinstruction classes\n";
- OS << "// " << maxResources << "\tresources max\n";
- OS << "// " << numCombos << "\tcombo resources\n";
- OS << "// " << maxStages << "\tstages max\n";
- OS << "const " << DFA_TBLTYPE << " "
- << TargetName << "DFAStateInputTable[][2] = {\n";
-
- // This table provides a map to the beginning of the transitions for State s
- // in DFAStateInputTable.
- std::vector<int> StateEntry(numStates+1);
- static const std::string SentinelEntry = "{-1, -1}";
-
- // Tracks the total valid transitions encountered so far. It is used
- // to construct the StateEntry table.
- int ValidTransitions = 0;
- DFA::StateSet::iterator SI = states.begin();
- for (unsigned i = 0; i < numStates; ++i, ++SI) {
- assert ((SI->stateNum == (int) i) && "Mismatch in state numbers");
- StateEntry[i] = ValidTransitions;
- for (State::TransitionMap::iterator
- II = SI->Transitions.begin(), IE = SI->Transitions.end();
- II != IE; ++II) {
- OS << "{0x" << Twine::utohexstr(getDFAInsnInput(II->first)) << ", "
- << II->second->stateNum << "},\t";
- }
- ValidTransitions += SI->Transitions.size();
-
- // If there are no valid transitions from this stage, we need a sentinel
- // transition.
- if (ValidTransitions == StateEntry[i]) {
- OS << SentinelEntry << ",\t";
- ++ValidTransitions;
- }
-
- OS << " // state " << i << ": " << StateEntry[i];
- if (StateEntry[i] != (ValidTransitions-1)) { // More than one transition.
- OS << "-" << (ValidTransitions-1);
- }
- OS << "\n";
- }
-
- // Print out a sentinel entry at the end of the StateInputTable. This is
- // needed to iterate over StateInputTable in DFAPacketizer::ReadTable()
- OS << SentinelEntry << "\t";
- OS << " // state " << numStates << ": " << ValidTransitions;
- OS << "\n";
-
- OS << "};\n\n";
- OS << "// " << TargetName << "DFAStateEntryTable[i] = "
- << "Index of the first entry in DFAStateInputTable for\n";
- OS << "// "
- << "the ith state.\n";
- OS << "// " << numStates << " states\n";
- OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
-
- // Multiply i by 2 since each entry in DFAStateInputTable is a set of
- // two numbers.
- unsigned lastState = 0;
- for (unsigned i = 0; i < numStates; ++i) {
- if (i && ((i % 10) == 0)) {
- lastState = i-1;
- OS << " // states " << (i-10) << ":" << lastState << "\n";
- }
- OS << StateEntry[i] << ", ";
- }
-
- // Print out the index to the sentinel entry in StateInputTable
- OS << ValidTransitions << ", ";
- OS << " // states " << (lastState+1) << ":" << numStates << "\n";
-
- OS << "};\n";
- OS << "} // namespace\n";
-
- //
- // Emit DFA Packetizer tables if the target is a VLIW machine.
- //
- std::string SubTargetClassName = TargetName + "GenSubtargetInfo";
- OS << "\n" << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n";
- OS << "namespace llvm {\n";
- OS << "DFAPacketizer *" << SubTargetClassName << "::"
- << "createDFAPacketizer(const InstrItineraryData *IID) const {\n"
- << " return new DFAPacketizer(IID, " << TargetName
- << "DFAStateInputTable, " << TargetName << "DFAStateEntryTable);\n}\n\n";
- OS << "} // End llvm namespace \n";
-}
-
-//
// collectAllFuncUnits - Construct a map of function unit names to bits.
//
int DFAPacketizerEmitter::collectAllFuncUnits(
@@ -837,10 +389,32 @@ int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
// Run the worklist algorithm to generate the DFA.
//
void DFAPacketizerEmitter::run(raw_ostream &OS) {
+ OS << "\n"
+ << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n";
+ OS << "namespace llvm {\n";
+
+ OS << "\n// Input format:\n";
+ OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS
+ << "\t// maximum AND'ed resource terms\n";
+ OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES
+ << "\t// maximum resource bits in one term\n";
+
// Collect processor iteraries.
std::vector<Record*> ProcItinList =
Records.getAllDerivedDefinitions("ProcessorItineraries");
+ std::unordered_map<std::string, std::vector<Record*>> ItinsByNamespace;
+ for (Record *R : ProcItinList)
+ ItinsByNamespace[R->getValueAsString("PacketizerNamespace")].push_back(R);
+
+ for (auto &KV : ItinsByNamespace)
+ emitForItineraries(OS, KV.second, KV.first);
+ OS << "} // end namespace llvm\n";
+}
+
+void DFAPacketizerEmitter::emitForItineraries(
+ raw_ostream &OS, std::vector<Record *> &ProcItinList,
+ std::string DFAName) {
//
// Collect the Functional units.
//
@@ -855,8 +429,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
std::map<unsigned, unsigned> ComboBitToBitsMap;
std::vector<Record*> ComboFuncList =
Records.getAllDerivedDefinitions("ComboFuncUnits");
- int numCombos = collectAllComboFuncs(ComboFuncList,
- FUNameToBitsMap, ComboBitToBitsMap, OS);
+ collectAllComboFuncs(ComboFuncList, FUNameToBitsMap, ComboBitToBitsMap, OS);
//
// Collect the itineraries.
@@ -887,103 +460,89 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
FUNameToBitsMap, ItinDataList, maxStages, OS);
}
- //
- // Run a worklist algorithm to generate the DFA.
- //
- DFA D;
- const State *Initial = &D.newState();
- Initial->isInitial = true;
- Initial->stateInfo.insert(0x0);
- SmallVector<const State*, 32> WorkList;
- std::map<std::set<unsigned>, const State*> Visited;
-
- WorkList.push_back(Initial);
-
- //
- // Worklist algorithm to create a DFA for processor resource tracking.
- // C = {set of InsnClasses}
- // Begin with initial node in worklist. Initial node does not have
- // any consumed resources,
- // ResourceState = 0x0
- // Visited = {}
- // While worklist != empty
- // S = first element of worklist
- // For every instruction class C
- // if we can accommodate C in S:
- // S' = state with resource states = {S Union C}
- // Add a new transition: S x C -> S'
- // If S' is not in Visited:
- // Add S' to worklist
- // Add S' to Visited
- //
- while (!WorkList.empty()) {
- const State *current = WorkList.pop_back_val();
- LLVM_DEBUG({
- dbgs() << "---------------------\n";
- dbgs() << "Processing state: " << current->stateNum << " - ";
- dbgsStateInfo(current->stateInfo);
- dbgs() << "\n";
- });
- for (unsigned i = 0; i < allInsnClasses.size(); i++) {
- std::vector<unsigned> InsnClass = allInsnClasses[i];
- LLVM_DEBUG({
- dbgs() << i << " ";
- dbgsInsnClass(InsnClass);
- dbgs() << "\n";
- });
-
- std::set<unsigned> NewStateResources;
- //
- // If we haven't already created a transition for this input
- // and the state can accommodate this InsnClass, create a transition.
- //
- if (!current->hasTransition(InsnClass) &&
- current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) {
- const State *NewState = nullptr;
- current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources);
- if (NewStateResources.empty()) {
- LLVM_DEBUG(dbgs() << " Skipped - no new states generated\n");
- continue;
- }
-
- LLVM_DEBUG({
- dbgs() << "\t";
- dbgsStateInfo(NewStateResources);
- dbgs() << "\n";
- });
-
- //
- // If we have seen this state before, then do not create a new state.
- //
- auto VI = Visited.find(NewStateResources);
- if (VI != Visited.end()) {
- NewState = VI->second;
- LLVM_DEBUG({
- dbgs() << "\tFound existing state: " << NewState->stateNum
- << " - ";
- dbgsStateInfo(NewState->stateInfo);
- dbgs() << "\n";
- });
- } else {
- NewState = &D.newState();
- NewState->stateInfo = NewStateResources;
- Visited[NewStateResources] = NewState;
- WorkList.push_back(NewState);
- LLVM_DEBUG({
- dbgs() << "\tAccepted new state: " << NewState->stateNum << " - ";
- dbgsStateInfo(NewState->stateInfo);
- dbgs() << "\n";
- });
+ // The type of a state in the nondeterministic automaton we're defining.
+ using NfaStateTy = unsigned;
+
+ // Given a resource state, return all resource states by applying
+ // InsnClass.
+ auto applyInsnClass = [&](ArrayRef<unsigned> InsnClass,
+ NfaStateTy State) -> std::deque<unsigned> {
+ std::deque<unsigned> V(1, State);
+ // Apply every stage in the class individually.
+ for (unsigned Stage : InsnClass) {
+ // Apply this stage to every existing member of V in turn.
+ size_t Sz = V.size();
+ for (unsigned I = 0; I < Sz; ++I) {
+ unsigned S = V.front();
+ V.pop_front();
+
+ // For this stage, state combination, try all possible resources.
+ for (unsigned J = 0; J < DFA_MAX_RESOURCES; ++J) {
+ unsigned ResourceMask = 1U << J;
+ if ((ResourceMask & Stage) == 0)
+ // This resource isn't required by this stage.
+ continue;
+ unsigned Combo = ComboBitToBitsMap[ResourceMask];
+ if (Combo && ((~S & Combo) != Combo))
+ // This combo units bits are not available.
+ continue;
+ unsigned ResultingResourceState = S | ResourceMask | Combo;
+ if (ResultingResourceState == S)
+ continue;
+ V.push_back(ResultingResourceState);
}
-
- current->addTransition(InsnClass, NewState);
+ }
+ }
+ return V;
+ };
+
+ // Given a resource state, return a quick (conservative) guess as to whether
+ // InsnClass can be applied. This is a filter for the more heavyweight
+ // applyInsnClass.
+ auto canApplyInsnClass = [](ArrayRef<unsigned> InsnClass,
+ NfaStateTy State) -> bool {
+ for (unsigned Resources : InsnClass) {
+ if ((State | Resources) == State)
+ return false;
+ }
+ return true;
+ };
+
+ DfaEmitter Emitter;
+ std::deque<NfaStateTy> Worklist(1, 0);
+ std::set<NfaStateTy> SeenStates;
+ SeenStates.insert(Worklist.front());
+ while (!Worklist.empty()) {
+ NfaStateTy State = Worklist.front();
+ Worklist.pop_front();
+ for (unsigned i = 0; i < allInsnClasses.size(); i++) {
+ const std::vector<unsigned> &InsnClass = allInsnClasses[i];
+ if (!canApplyInsnClass(InsnClass, State))
+ continue;
+ for (unsigned NewState : applyInsnClass(InsnClass, State)) {
+ if (SeenStates.emplace(NewState).second)
+ Worklist.emplace_back(NewState);
+ Emitter.addTransition(State, NewState, getDFAInsnInput(InsnClass));
}
}
}
- // Print out the table.
- D.writeTableAndAPI(OS, TargetName,
- numInsnClasses, maxResources, numCombos, maxStages);
+ OS << "} // end namespace llvm\n\n";
+ OS << "namespace {\n";
+ std::string TargetAndDFAName = TargetName + DFAName;
+ Emitter.emit(TargetAndDFAName, OS);
+ OS << "} // end anonymous namespace\n\n";
+
+ std::string SubTargetClassName = TargetName + "GenSubtargetInfo";
+ OS << "namespace llvm {\n";
+ OS << "DFAPacketizer *" << SubTargetClassName << "::"
+ << "create" << DFAName
+ << "DFAPacketizer(const InstrItineraryData *IID) const {\n"
+ << " static Automaton<uint64_t> A(ArrayRef<" << TargetAndDFAName
+ << "Transition>(" << TargetAndDFAName << "Transitions), "
+ << TargetAndDFAName << "TransitionInfo);\n"
+ << " return new DFAPacketizer(IID, A);\n"
+ << "\n}\n\n";
}
namespace llvm {
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index 9e75c7fba77b..0002b0e14db6 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -153,4 +153,4 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
"MCDisassembler::Success", "MCDisassembler::Fail", "");
}
-} // End llvm namespace
+} // end namespace llvm
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index f5e975d2e5ae..ac69b431607d 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -13,6 +13,7 @@
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
+#include "InfoByHwMode.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/CachedHashString.h"
@@ -64,9 +65,10 @@ struct OperandInfo {
std::vector<EncodingField> Fields;
std::string Decoder;
bool HasCompleteDecoder;
+ uint64_t InitValue;
OperandInfo(std::string D, bool HCD)
- : Decoder(std::move(D)), HasCompleteDecoder(HCD) {}
+ : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {}
void addField(unsigned Base, unsigned Width, unsigned Offset) {
Fields.push_back(EncodingField(Base, Width, Offset));
@@ -96,9 +98,11 @@ struct DecoderTableInfo {
struct EncodingAndInst {
const Record *EncodingDef;
const CodeGenInstruction *Inst;
+ StringRef HwModeName;
- EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst)
- : EncodingDef(EncodingDef), Inst(Inst) {}
+ EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst,
+ StringRef HwModeName = "")
+ : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {}
};
struct EncodingIDAndOpcode {
@@ -599,7 +603,7 @@ void Filter::recurse() {
// Delegates to an inferior filter chooser for further processing on this
// group of instructions whose segment values are variable.
FilterChooserMap.insert(
- std::make_pair(-1U, llvm::make_unique<FilterChooser>(
+ std::make_pair(-1U, std::make_unique<FilterChooser>(
Owner->AllInstructions, VariableInstructions,
Owner->Operands, BitValueArray, *Owner)));
}
@@ -625,7 +629,7 @@ void Filter::recurse() {
// Delegates to an inferior filter chooser for further processing on this
// category of instructions.
FilterChooserMap.insert(std::make_pair(
- Inst.first, llvm::make_unique<FilterChooser>(
+ Inst.first, std::make_unique<FilterChooser>(
Owner->AllInstructions, Inst.second,
Owner->Operands, BitValueArray, *Owner)));
}
@@ -1103,12 +1107,15 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
bool &OpHasCompleteDecoder) const {
const std::string &Decoder = OpInfo.Decoder;
- if (OpInfo.numFields() != 1)
- o.indent(Indentation) << "tmp = 0;\n";
+ if (OpInfo.numFields() != 1 || OpInfo.InitValue != 0) {
+ o.indent(Indentation) << "tmp = 0x";
+ o.write_hex(OpInfo.InitValue);
+ o << ";\n";
+ }
for (const EncodingField &EF : OpInfo) {
o.indent(Indentation) << "tmp ";
- if (OpInfo.numFields() != 1) o << '|';
+ if (OpInfo.numFields() != 1 || OpInfo.InitValue != 0) o << '|';
o << "= fieldFromInstruction"
<< "(insn, " << EF.Base << ", " << EF.Width << ')';
if (OpInfo.numFields() != 1 || EF.Offset != 0)
@@ -2026,6 +2033,16 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
HasCompleteDecoderBit->getValue() : true;
OperandInfo OpInfo(Decoder, HasCompleteDecoder);
+
+ // Some bits of the operand may be required to be 1 depending on the
+ // instruction's encoding. Collect those bits.
+ if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second))
+ if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue()))
+ for (unsigned I = 0; I < OpBits->getNumBits(); ++I)
+ if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I)))
+ if (OpBit->getValue())
+ OpInfo.InitValue |= 1ULL << I;
+
unsigned Base = ~0U;
unsigned Width = 0;
unsigned Offset = 0;
@@ -2368,12 +2385,50 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
Target.reverseBitsForLittleEndianEncoding();
// Parameterize the decoders based on namespace and instruction width.
+ std::set<StringRef> HwModeNames;
const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
NumberedEncodings.reserve(NumberedInstructions.size());
DenseMap<Record *, unsigned> IndexOfInstruction;
+ // First, collect all HwModes referenced by the target.
for (const auto &NumberedInstruction : NumberedInstructions) {
IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size();
- NumberedEncodings.emplace_back(NumberedInstruction->TheDef, NumberedInstruction);
+
+ if (const RecordVal *RV =
+ NumberedInstruction->TheDef->getValue("EncodingInfos")) {
+ if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ const CodeGenHwModes &HWM = Target.getHwModes();
+ EncodingInfoByHwMode EBM(DI->getDef(), HWM);
+ for (auto &KV : EBM.Map)
+ HwModeNames.insert(HWM.getMode(KV.first).Name);
+ }
+ }
+ }
+
+ // If HwModeNames is empty, add the empty string so we always have one HwMode.
+ if (HwModeNames.empty())
+ HwModeNames.insert("");
+
+ for (const auto &NumberedInstruction : NumberedInstructions) {
+ IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size();
+
+ if (const RecordVal *RV =
+ NumberedInstruction->TheDef->getValue("EncodingInfos")) {
+ if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ const CodeGenHwModes &HWM = Target.getHwModes();
+ EncodingInfoByHwMode EBM(DI->getDef(), HWM);
+ for (auto &KV : EBM.Map) {
+ NumberedEncodings.emplace_back(KV.second, NumberedInstruction,
+ HWM.getMode(KV.first).Name);
+ HwModeNames.insert(HWM.getMode(KV.first).Name);
+ }
+ continue;
+ }
+ }
+ // This instruction is encoded the same on all HwModes. Emit it for all
+ // HwModes.
+ for (StringRef HwModeName : HwModeNames)
+ NumberedEncodings.emplace_back(NumberedInstruction->TheDef,
+ NumberedInstruction, HwModeName);
}
for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding"))
NumberedEncodings.emplace_back(
@@ -2401,13 +2456,19 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
NumInstructions++;
NumEncodings++;
- StringRef DecoderNamespace = EncodingDef->getValueAsString("DecoderNamespace");
+ if (!Size)
+ continue;
- if (Size) {
- if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) {
- OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back(i, IndexOfInstruction.find(Def)->second);
- } else
- NumEncodingsOmitted++;
+ if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) {
+ std::string DecoderNamespace =
+ EncodingDef->getValueAsString("DecoderNamespace");
+ if (!NumberedEncodings[i].HwModeName.empty())
+ DecoderNamespace +=
+ std::string("_") + NumberedEncodings[i].HwModeName.str();
+ OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back(
+ i, IndexOfInstruction.find(Def)->second);
+ } else {
+ NumEncodingsOmitted++;
}
}
@@ -2451,7 +2512,7 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
// Emit the main entry point for the decoder, decodeInstruction().
emitDecodeInstruction(OS);
- OS << "\n} // End llvm namespace\n";
+ OS << "\n} // end namespace llvm\n";
}
namespace llvm {
diff --git a/utils/TableGen/GICombinerEmitter.cpp b/utils/TableGen/GICombinerEmitter.cpp
new file mode 100644
index 000000000000..5dc4d6b07740
--- /dev/null
+++ b/utils/TableGen/GICombinerEmitter.cpp
@@ -0,0 +1,452 @@
+//===- GlobalCombinerEmitter.cpp - Generate a combiner --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Generate a combiner implementation for GlobalISel from a declarative
+/// syntax
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/StringMatcher.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include "CodeGenTarget.h"
+#include "GlobalISel/CodeExpander.h"
+#include "GlobalISel/CodeExpansions.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "gicombiner-emitter"
+
+// FIXME: Use ALWAYS_ENABLED_STATISTIC once it's available.
+unsigned NumPatternTotal = 0;
+STATISTIC(NumPatternTotalStatistic, "Total number of patterns");
+
+cl::OptionCategory
+ GICombinerEmitterCat("Options for -gen-global-isel-combiner");
+static cl::list<std::string>
+ SelectedCombiners("combiners", cl::desc("Emit the specified combiners"),
+ cl::cat(GICombinerEmitterCat), cl::CommaSeparated);
+static cl::opt<bool> ShowExpansions(
+ "gicombiner-show-expansions",
+ cl::desc("Use C++ comments to indicate occurence of code expansion"),
+ cl::cat(GICombinerEmitterCat));
+
+namespace {
+typedef uint64_t RuleID;
+
+class RootInfo {
+ StringRef PatternSymbol;
+
+public:
+ RootInfo(StringRef PatternSymbol) : PatternSymbol(PatternSymbol) {}
+
+ StringRef getPatternSymbol() const { return PatternSymbol; }
+};
+
+class CombineRule {
+protected:
+ /// A unique ID for this rule
+ /// ID's are used for debugging and run-time disabling of rules among other
+ /// things.
+ RuleID ID;
+
+ /// The record defining this rule.
+ const Record &TheDef;
+
+ /// The roots of a match. These are the leaves of the DAG that are closest to
+ /// the end of the function. I.e. the nodes that are encountered without
+ /// following any edges of the DAG described by the pattern as we work our way
+ /// from the bottom of the function to the top.
+ std::vector<RootInfo> Roots;
+
+ /// A block of arbitrary C++ to finish testing the match.
+ /// FIXME: This is a temporary measure until we have actual pattern matching
+ const CodeInit *MatchingFixupCode = nullptr;
+public:
+ CombineRule(const CodeGenTarget &Target, RuleID ID, const Record &R)
+ : ID(ID), TheDef(R) {}
+ bool parseDefs();
+ bool parseMatcher(const CodeGenTarget &Target);
+
+ RuleID getID() const { return ID; }
+ StringRef getName() const { return TheDef.getName(); }
+ const Record &getDef() const { return TheDef; }
+ const CodeInit *getMatchingFixupCode() const { return MatchingFixupCode; }
+ size_t getNumRoots() const { return Roots.size(); }
+
+ using const_root_iterator = std::vector<RootInfo>::const_iterator;
+ const_root_iterator roots_begin() const { return Roots.begin(); }
+ const_root_iterator roots_end() const { return Roots.end(); }
+ iterator_range<const_root_iterator> roots() const {
+ return llvm::make_range(Roots.begin(), Roots.end());
+ }
+};
+
+/// A convenience function to check that an Init refers to a specific def. This
+/// is primarily useful for testing for defs and similar in DagInit's since
+/// DagInit's support any type inside them.
+static bool isSpecificDef(const Init &N, StringRef Def) {
+ if (const DefInit *OpI = dyn_cast<DefInit>(&N))
+ if (OpI->getDef()->getName() == Def)
+ return true;
+ return false;
+}
+
+/// A convenience function to check that an Init refers to a def that is a
+/// subclass of the given class and coerce it to a def if it is. This is
+/// primarily useful for testing for subclasses of GIMatchKind and similar in
+/// DagInit's since DagInit's support any type inside them.
+static Record *getDefOfSubClass(const Init &N, StringRef Cls) {
+ if (const DefInit *OpI = dyn_cast<DefInit>(&N))
+ if (OpI->getDef()->isSubClassOf(Cls))
+ return OpI->getDef();
+ return nullptr;
+}
+
+bool CombineRule::parseDefs() {
+ NamedRegionTimer T("parseDefs", "Time spent parsing the defs", "Rule Parsing",
+ "Time spent on rule parsing", TimeRegions);
+ DagInit *Defs = TheDef.getValueAsDag("Defs");
+
+ if (Defs->getOperatorAsDef(TheDef.getLoc())->getName() != "defs") {
+ PrintError(TheDef.getLoc(), "Expected defs operator");
+ return false;
+ }
+
+ for (unsigned I = 0, E = Defs->getNumArgs(); I < E; ++I) {
+ // Roots should be collected into Roots
+ if (isSpecificDef(*Defs->getArg(I), "root")) {
+ Roots.emplace_back(Defs->getArgNameStr(I));
+ continue;
+ }
+
+ // Otherwise emit an appropriate error message.
+ if (getDefOfSubClass(*Defs->getArg(I), "GIDefKind"))
+ PrintError(TheDef.getLoc(),
+ "This GIDefKind not implemented in tablegen");
+ else if (getDefOfSubClass(*Defs->getArg(I), "GIDefKindWithArgs"))
+ PrintError(TheDef.getLoc(),
+ "This GIDefKindWithArgs not implemented in tablegen");
+ else
+ PrintError(TheDef.getLoc(),
+ "Expected a subclass of GIDefKind or a sub-dag whose "
+ "operator is of type GIDefKindWithArgs");
+ return false;
+ }
+
+ if (Roots.empty()) {
+ PrintError(TheDef.getLoc(), "Combine rules must have at least one root");
+ return false;
+ }
+ return true;
+}
+
+bool CombineRule::parseMatcher(const CodeGenTarget &Target) {
+ NamedRegionTimer T("parseMatcher", "Time spent parsing the matcher",
+ "Rule Parsing", "Time spent on rule parsing", TimeRegions);
+ DagInit *Matchers = TheDef.getValueAsDag("Match");
+
+ if (Matchers->getOperatorAsDef(TheDef.getLoc())->getName() != "match") {
+ PrintError(TheDef.getLoc(), "Expected match operator");
+ return false;
+ }
+
+ if (Matchers->getNumArgs() == 0) {
+ PrintError(TheDef.getLoc(), "Matcher is empty");
+ return false;
+ }
+
+ // The match section consists of a list of matchers and predicates. Parse each
+ // one and add the equivalent GIMatchDag nodes, predicates, and edges.
+ for (unsigned I = 0; I < Matchers->getNumArgs(); ++I) {
+
+ // Parse arbitrary C++ code we have in lieu of supporting MIR matching
+ if (const CodeInit *CodeI = dyn_cast<CodeInit>(Matchers->getArg(I))) {
+ assert(!MatchingFixupCode &&
+ "Only one block of arbitrary code is currently permitted");
+ MatchingFixupCode = CodeI;
+ continue;
+ }
+
+ PrintError(TheDef.getLoc(),
+ "Expected a subclass of GIMatchKind or a sub-dag whose "
+ "operator is either of a GIMatchKindWithArgs or Instruction");
+ PrintNote("Pattern was `" + Matchers->getArg(I)->getAsString() + "'");
+ return false;
+ }
+ return true;
+}
+
+class GICombinerEmitter {
+ StringRef Name;
+ const CodeGenTarget &Target;
+ Record *Combiner;
+ std::vector<std::unique_ptr<CombineRule>> Rules;
+ std::unique_ptr<CombineRule> makeCombineRule(const Record &R);
+
+ void gatherRules(std::vector<std::unique_ptr<CombineRule>> &ActiveRules,
+ const std::vector<Record *> &&RulesAndGroups);
+
+public:
+ explicit GICombinerEmitter(RecordKeeper &RK, const CodeGenTarget &Target,
+ StringRef Name, Record *Combiner);
+ ~GICombinerEmitter() {}
+
+ StringRef getClassName() const {
+ return Combiner->getValueAsString("Classname");
+ }
+ void run(raw_ostream &OS);
+
+ /// Emit the name matcher (guarded by #ifndef NDEBUG) used to disable rules in
+ /// response to the generated cl::opt.
+ void emitNameMatcher(raw_ostream &OS) const;
+ void generateCodeForRule(raw_ostream &OS, const CombineRule *Rule,
+ StringRef Indent) const;
+};
+
+GICombinerEmitter::GICombinerEmitter(RecordKeeper &RK,
+ const CodeGenTarget &Target,
+ StringRef Name, Record *Combiner)
+ : Name(Name), Target(Target), Combiner(Combiner) {}
+
+void GICombinerEmitter::emitNameMatcher(raw_ostream &OS) const {
+ std::vector<std::pair<std::string, std::string>> Cases;
+ Cases.reserve(Rules.size());
+
+ for (const CombineRule &EnumeratedRule : make_pointee_range(Rules)) {
+ std::string Code;
+ raw_string_ostream SS(Code);
+ SS << "return " << EnumeratedRule.getID() << ";\n";
+ Cases.push_back(std::make_pair(EnumeratedRule.getName(), SS.str()));
+ }
+
+ OS << "static Optional<uint64_t> getRuleIdxForIdentifier(StringRef "
+ "RuleIdentifier) {\n"
+ << " uint64_t I;\n"
+ << " // getAtInteger(...) returns false on success\n"
+ << " bool Parsed = !RuleIdentifier.getAsInteger(0, I);\n"
+ << " if (Parsed)\n"
+ << " return I;\n\n"
+ << "#ifndef NDEBUG\n";
+ StringMatcher Matcher("RuleIdentifier", Cases, OS);
+ Matcher.Emit();
+ OS << "#endif // ifndef NDEBUG\n\n"
+ << " return None;\n"
+ << "}\n";
+}
+
+std::unique_ptr<CombineRule>
+GICombinerEmitter::makeCombineRule(const Record &TheDef) {
+ std::unique_ptr<CombineRule> Rule =
+ std::make_unique<CombineRule>(Target, NumPatternTotal, TheDef);
+
+ if (!Rule->parseDefs())
+ return nullptr;
+ if (!Rule->parseMatcher(Target))
+ return nullptr;
+ // For now, don't support multi-root rules. We'll come back to this later
+ // once we have the algorithm changes to support it.
+ if (Rule->getNumRoots() > 1) {
+ PrintError(TheDef.getLoc(), "Multi-root matches are not supported (yet)");
+ return nullptr;
+ }
+ return Rule;
+}
+
+/// Recurse into GICombineGroup's and flatten the ruleset into a simple list.
+void GICombinerEmitter::gatherRules(
+ std::vector<std::unique_ptr<CombineRule>> &ActiveRules,
+ const std::vector<Record *> &&RulesAndGroups) {
+ for (Record *R : RulesAndGroups) {
+ if (R->isValueUnset("Rules")) {
+ std::unique_ptr<CombineRule> Rule = makeCombineRule(*R);
+ if (Rule == nullptr) {
+ PrintError(R->getLoc(), "Failed to parse rule");
+ continue;
+ }
+ ActiveRules.emplace_back(std::move(Rule));
+ ++NumPatternTotal;
+ } else
+ gatherRules(ActiveRules, R->getValueAsListOfDefs("Rules"));
+ }
+}
+
+void GICombinerEmitter::generateCodeForRule(raw_ostream &OS,
+ const CombineRule *Rule,
+ StringRef Indent) const {
+ {
+ const Record &RuleDef = Rule->getDef();
+
+ OS << Indent << "// Rule: " << RuleDef.getName() << "\n"
+ << Indent << "if (!isRuleDisabled(" << Rule->getID() << ")) {\n";
+
+ CodeExpansions Expansions;
+ for (const RootInfo &Root : Rule->roots()) {
+ Expansions.declare(Root.getPatternSymbol(), "MI");
+ }
+ DagInit *Applyer = RuleDef.getValueAsDag("Apply");
+ if (Applyer->getOperatorAsDef(RuleDef.getLoc())->getName() !=
+ "apply") {
+ PrintError(RuleDef.getLoc(), "Expected apply operator");
+ return;
+ }
+
+ OS << Indent << " if (1\n";
+
+ if (Rule->getMatchingFixupCode() &&
+ !Rule->getMatchingFixupCode()->getValue().empty()) {
+ // FIXME: Single-use lambda's like this are a serious compile-time
+ // performance and memory issue. It's convenient for this early stage to
+ // defer some work to successive patches but we need to eliminate this
+ // before the ruleset grows to small-moderate size. Last time, it became
+ // a big problem for low-mem systems around the 500 rule mark but by the
+ // time we grow that large we should have merged the ISel match table
+ // mechanism with the Combiner.
+ OS << Indent << " && [&]() {\n"
+ << Indent << " "
+ << CodeExpander(Rule->getMatchingFixupCode()->getValue(), Expansions,
+ Rule->getMatchingFixupCode()->getLoc(), ShowExpansions)
+ << "\n"
+ << Indent << " return true;\n"
+ << Indent << " }()";
+ }
+ OS << ") {\n" << Indent << " ";
+
+ if (const CodeInit *Code = dyn_cast<CodeInit>(Applyer->getArg(0))) {
+ OS << CodeExpander(Code->getAsUnquotedString(), Expansions,
+ Code->getLoc(), ShowExpansions)
+ << "\n"
+ << Indent << " return true;\n"
+ << Indent << " }\n";
+ } else {
+ PrintError(RuleDef.getLoc(), "Expected apply code block");
+ return;
+ }
+
+ OS << Indent << "}\n";
+ }
+}
+
+void GICombinerEmitter::run(raw_ostream &OS) {
+ gatherRules(Rules, Combiner->getValueAsListOfDefs("Rules"));
+ NamedRegionTimer T("Emit", "Time spent emitting the combiner",
+ "Code Generation", "Time spent generating code",
+ TimeRegions);
+ OS << "#ifdef " << Name.upper() << "_GENCOMBINERHELPER_DEPS\n"
+ << "#include \"llvm/ADT/SparseBitVector.h\"\n"
+ << "namespace llvm {\n"
+ << "extern cl::OptionCategory GICombinerOptionCategory;\n"
+ << "} // end namespace llvm\n"
+ << "#endif // ifdef " << Name.upper() << "_GENCOMBINERHELPER_DEPS\n\n";
+
+ OS << "#ifdef " << Name.upper() << "_GENCOMBINERHELPER_H\n"
+ << "class " << getClassName() << " {\n"
+ << " SparseBitVector<> DisabledRules;\n"
+ << "\n"
+ << "public:\n"
+ << " bool parseCommandLineOption();\n"
+ << " bool isRuleDisabled(unsigned ID) const;\n"
+ << " bool setRuleDisabled(StringRef RuleIdentifier);\n"
+ << "\n"
+ << " bool tryCombineAll(\n"
+ << " GISelChangeObserver &Observer,\n"
+ << " MachineInstr &MI,\n"
+ << " MachineIRBuilder &B) const;\n"
+ << "};\n\n";
+
+ emitNameMatcher(OS);
+
+ OS << "bool " << getClassName()
+ << "::setRuleDisabled(StringRef RuleIdentifier) {\n"
+ << " std::pair<StringRef, StringRef> RangePair = "
+ "RuleIdentifier.split('-');\n"
+ << " if (!RangePair.second.empty()) {\n"
+ << " const auto First = getRuleIdxForIdentifier(RangePair.first);\n"
+ << " const auto Last = getRuleIdxForIdentifier(RangePair.second);\n"
+ << " if (!First.hasValue() || !Last.hasValue())\n"
+ << " return false;\n"
+ << " if (First >= Last)\n"
+ << " report_fatal_error(\"Beginning of range should be before end of "
+ "range\");\n"
+ << " for (auto I = First.getValue(); I < Last.getValue(); ++I)\n"
+ << " DisabledRules.set(I);\n"
+ << " return true;\n"
+ << " } else {\n"
+ << " const auto I = getRuleIdxForIdentifier(RangePair.first);\n"
+ << " if (!I.hasValue())\n"
+ << " return false;\n"
+ << " DisabledRules.set(I.getValue());\n"
+ << " return true;\n"
+ << " }\n"
+ << " return false;\n"
+ << "}\n";
+
+ OS << "bool " << getClassName()
+ << "::isRuleDisabled(unsigned RuleID) const {\n"
+ << " return DisabledRules.test(RuleID);\n"
+ << "}\n";
+ OS << "#endif // ifdef " << Name.upper() << "_GENCOMBINERHELPER_H\n\n";
+
+ OS << "#ifdef " << Name.upper() << "_GENCOMBINERHELPER_CPP\n"
+ << "\n"
+ << "cl::list<std::string> " << Name << "Option(\n"
+ << " \"" << Name.lower() << "-disable-rule\",\n"
+ << " cl::desc(\"Disable one or more combiner rules temporarily in "
+ << "the " << Name << " pass\"),\n"
+ << " cl::CommaSeparated,\n"
+ << " cl::Hidden,\n"
+ << " cl::cat(GICombinerOptionCategory));\n"
+ << "\n"
+ << "bool " << getClassName() << "::parseCommandLineOption() {\n"
+ << " for (const auto &Identifier : " << Name << "Option)\n"
+ << " if (!setRuleDisabled(Identifier))\n"
+ << " return false;\n"
+ << " return true;\n"
+ << "}\n\n";
+
+ OS << "bool " << getClassName() << "::tryCombineAll(\n"
+ << " GISelChangeObserver &Observer,\n"
+ << " MachineInstr &MI,\n"
+ << " MachineIRBuilder &B) const {\n"
+ << " CombinerHelper Helper(Observer, B);\n"
+ << " MachineBasicBlock *MBB = MI.getParent();\n"
+ << " MachineFunction *MF = MBB->getParent();\n"
+ << " MachineRegisterInfo &MRI = MF->getRegInfo();\n"
+ << " (void)MBB; (void)MF; (void)MRI;\n\n";
+
+ for (const auto &Rule : Rules)
+ generateCodeForRule(OS, Rule.get(), " ");
+ OS << "\n return false;\n"
+ << "}\n"
+ << "#endif // ifdef " << Name.upper() << "_GENCOMBINERHELPER_CPP\n";
+}
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS) {
+ CodeGenTarget Target(RK);
+ emitSourceFileHeader("Global Combiner", OS);
+
+ if (SelectedCombiners.empty())
+ PrintFatalError("No combiners selected with -combiners");
+ for (const auto &Combiner : SelectedCombiners) {
+ Record *CombinerDef = RK.getDef(Combiner);
+ if (!CombinerDef)
+ PrintFatalError("Could not find " + Combiner);
+ GICombinerEmitter(RK, Target, Combiner, CombinerDef).run(OS);
+ }
+ NumPatternTotalStatistic = NumPatternTotal;
+}
+
+} // namespace llvm
diff --git a/utils/TableGen/GlobalISel/CMakeLists.txt b/utils/TableGen/GlobalISel/CMakeLists.txt
new file mode 100644
index 000000000000..2f74d1087bcd
--- /dev/null
+++ b/utils/TableGen/GlobalISel/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_LINK_COMPONENTS
+ Support
+ )
+
+llvm_add_library(LLVMTableGenGlobalISel STATIC DISABLE_LLVM_LINK_LLVM_DYLIB
+ CodeExpander.cpp
+ )
diff --git a/utils/TableGen/GlobalISel/CodeExpander.cpp b/utils/TableGen/GlobalISel/CodeExpander.cpp
new file mode 100644
index 000000000000..d59a9b8e3b65
--- /dev/null
+++ b/utils/TableGen/GlobalISel/CodeExpander.cpp
@@ -0,0 +1,93 @@
+//===- CodeExpander.cpp - Expand variables in a string --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Expand the variables in a string.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeExpander.h"
+#include "CodeExpansions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+
+using namespace llvm;
+
+void CodeExpander::emit(raw_ostream &OS) const {
+ StringRef Current = Code;
+
+ while (!Current.empty()) {
+ size_t Pos = Current.find_first_of("$\n\\");
+ if (Pos == StringRef::npos) {
+ OS << Current;
+ Current = "";
+ continue;
+ }
+
+ OS << Current.substr(0, Pos);
+ Current = Current.substr(Pos);
+
+ if (Current.startswith("\n")) {
+ OS << "\n" << Indent;
+ Current = Current.drop_front(1);
+ continue;
+ }
+
+ if (Current.startswith("\\$") || Current.startswith("\\\\")) {
+ OS << Current[1];
+ Current = Current.drop_front(2);
+ continue;
+ }
+
+ if (Current.startswith("\\")) {
+ Current = Current.drop_front(1);
+ continue;
+ }
+
+ if (Current.startswith("${")) {
+ StringRef StartVar = Current;
+ Current = Current.drop_front(2);
+ StringRef Var;
+ std::tie(Var, Current) = Current.split("}");
+
+ // Warn if we split because no terminator was found.
+ StringRef EndVar = StartVar.drop_front(2 /* ${ */ + Var.size());
+ if (EndVar.empty()) {
+ size_t LocOffset = StartVar.data() - Code.data();
+ PrintWarning(
+ Loc.size() > 0 && Loc[0].isValid()
+ ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset)
+ : SMLoc(),
+ "Unterminated expansion");
+ }
+
+ auto ValueI = Expansions.find(Var);
+ if (ValueI == Expansions.end()) {
+ size_t LocOffset = StartVar.data() - Code.data();
+ PrintError(Loc.size() > 0 && Loc[0].isValid()
+ ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset)
+ : SMLoc(),
+ "Attempting to expand an undeclared variable " + Var);
+ }
+ if (ShowExpansions)
+ OS << "/*$" << Var << "{*/";
+ OS << Expansions.lookup(Var);
+ if (ShowExpansions)
+ OS << "/*}*/";
+ continue;
+ }
+
+ size_t LocOffset = Current.data() - Code.data();
+ PrintWarning(Loc.size() > 0 && Loc[0].isValid()
+ ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset)
+ : SMLoc(),
+ "Assuming missing escape character");
+ OS << "$";
+ Current = Current.drop_front(1);
+ }
+}
diff --git a/utils/TableGen/GlobalISel/CodeExpander.h b/utils/TableGen/GlobalISel/CodeExpander.h
new file mode 100644
index 000000000000..bd6946de5925
--- /dev/null
+++ b/utils/TableGen/GlobalISel/CodeExpander.h
@@ -0,0 +1,55 @@
+//===- CodeExpander.h - Expand variables in a string ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Expand the variables in a string.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_CODEEXPANDER_H
+#define LLVM_UTILS_TABLEGEN_CODEEXPANDER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/SMLoc.h"
+
+namespace llvm {
+class CodeExpansions;
+class raw_ostream;
+
+/// Emit the given code with all '${foo}' placeholders expanded to their
+/// replacements.
+///
+/// It's an error to use an undefined expansion and expansion-like output that
+/// needs to be emitted verbatim can be escaped as '\${foo}'
+///
+/// The emitted code can be given a custom indent to enable both indentation by
+/// an arbitrary amount of whitespace and emission of the code as a comment.
+class CodeExpander {
+ StringRef Code;
+ const CodeExpansions &Expansions;
+ const ArrayRef<SMLoc> &Loc;
+ bool ShowExpansions;
+ StringRef Indent;
+
+public:
+ CodeExpander(StringRef Code, const CodeExpansions &Expansions,
+ const ArrayRef<SMLoc> &Loc, bool ShowExpansions,
+ StringRef Indent = " ")
+ : Code(Code), Expansions(Expansions), Loc(Loc),
+ ShowExpansions(ShowExpansions), Indent(Indent) {}
+
+ void emit(raw_ostream &OS) const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const CodeExpander &Expander) {
+ Expander.emit(OS);
+ return OS;
+}
+} // end namespace llvm
+
+#endif // ifndef LLVM_UTILS_TABLEGEN_CODEEXPANDER_H
diff --git a/utils/TableGen/GlobalISel/CodeExpansions.h b/utils/TableGen/GlobalISel/CodeExpansions.h
new file mode 100644
index 000000000000..bb890ec8f57e
--- /dev/null
+++ b/utils/TableGen/GlobalISel/CodeExpansions.h
@@ -0,0 +1,43 @@
+//===- CodeExpansions.h - Record expansions for CodeExpander --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Record the expansions to use in a CodeExpander.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringMap.h"
+
+#ifndef LLVM_UTILS_TABLEGEN_CODEEXPANSIONS_H
+#define LLVM_UTILS_TABLEGEN_CODEEXPANSIONS_H
+namespace llvm {
+class CodeExpansions {
+public:
+ using const_iterator = StringMap<std::string>::const_iterator;
+
+protected:
+ StringMap<std::string> Expansions;
+
+public:
+ void declare(StringRef Name, StringRef Expansion) {
+ bool Inserted = Expansions.try_emplace(Name, Expansion).second;
+ assert(Inserted && "Declared variable twice");
+ (void)Inserted;
+ }
+
+ std::string lookup(StringRef Variable) const {
+ return Expansions.lookup(Variable);
+ }
+
+ const_iterator begin() const { return Expansions.begin(); }
+ const_iterator end() const { return Expansions.end(); }
+ const_iterator find(StringRef Variable) const {
+ return Expansions.find(Variable);
+ }
+};
+} // end namespace llvm
+#endif // ifndef LLVM_UTILS_TABLEGEN_CODEEXPANSIONS_H
diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp
index f1c02134198b..d8d4c9f4f55c 100644
--- a/utils/TableGen/GlobalISelEmitter.cpp
+++ b/utils/TableGen/GlobalISelEmitter.cpp
@@ -249,6 +249,10 @@ static std::string explainPredicates(const TreePatternNode *N) {
OS << ']';
}
+ int64_t MinAlign = P.getMinAlignment();
+ if (MinAlign > 0)
+ Explanation += " MinAlign=" + utostr(MinAlign);
+
if (P.isAtomicOrderingMonotonic())
Explanation += " monotonic";
if (P.isAtomicOrderingAcquire())
@@ -329,6 +333,9 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
const ListInit *AddrSpaces = Predicate.getAddressSpaces();
if (AddrSpaces && !AddrSpaces->empty())
continue;
+
+ if (Predicate.getMinAlignment() > 0)
+ continue;
}
if (Predicate.isAtomic() && Predicate.getMemoryVT())
@@ -822,6 +829,10 @@ protected:
/// the renderers.
StringMap<OperandMatcher *> DefinedOperands;
+ /// A map of anonymous physical register operands defined by the matchers that
+ /// may be referenced by the renderers.
+ DenseMap<Record *, OperandMatcher *> PhysRegOperands;
+
/// ID for the next instruction variable defined with implicitlyDefineInsnVar()
unsigned NextInsnVarID;
@@ -904,6 +915,8 @@ public:
void defineOperand(StringRef SymbolicName, OperandMatcher &OM);
+ void definePhysRegOperand(Record *Reg, OperandMatcher &OM);
+
Error defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern,
unsigned RendererID, unsigned SubOperandID) {
if (ComplexSubOperands.count(SymbolicName))
@@ -927,6 +940,7 @@ public:
InstructionMatcher &getInstructionMatcher(StringRef SymbolicName) const;
const OperandMatcher &getOperandMatcher(StringRef Name) const;
+ const OperandMatcher &getPhysRegOperandMatcher(Record *) const;
void optimize() override;
void emit(MatchTable &Table) override;
@@ -1048,14 +1062,17 @@ public:
IPM_Opcode,
IPM_NumOperands,
IPM_ImmPredicate,
+ IPM_Imm,
IPM_AtomicOrderingMMO,
IPM_MemoryLLTSize,
IPM_MemoryVsLLTSize,
IPM_MemoryAddressSpace,
+ IPM_MemoryAlignment,
IPM_GenericPredicate,
OPM_SameOperand,
OPM_ComplexPattern,
OPM_IntrinsicID,
+ OPM_CmpPredicate,
OPM_Instruction,
OPM_Int,
OPM_LiteralInt,
@@ -1324,6 +1341,23 @@ public:
}
};
+class ImmOperandMatcher : public OperandPredicateMatcher {
+public:
+ ImmOperandMatcher(unsigned InsnVarID, unsigned OpIdx)
+ : OperandPredicateMatcher(IPM_Imm, InsnVarID, OpIdx) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_Imm;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override {
+ Table << MatchTable::Opcode("GIM_CheckIsImm") << MatchTable::Comment("MI")
+ << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
+ << MatchTable::IntValue(OpIdx) << MatchTable::LineBreak;
+ }
+};
+
/// Generates code to check that an operand is a G_CONSTANT with a particular
/// int.
class ConstantIntOperandMatcher : public OperandPredicateMatcher {
@@ -1381,6 +1415,36 @@ public:
}
};
+/// Generates code to check that an operand is an CmpInst predicate
+class CmpPredicateOperandMatcher : public OperandPredicateMatcher {
+protected:
+ std::string PredName;
+
+public:
+ CmpPredicateOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
+ std::string P)
+ : OperandPredicateMatcher(OPM_CmpPredicate, InsnVarID, OpIdx), PredName(P) {}
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ PredName == cast<CmpPredicateOperandMatcher>(&B)->PredName;
+ }
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_CmpPredicate;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override {
+ Table << MatchTable::Opcode("GIM_CheckCmpPredicate")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::Comment("Predicate")
+ << MatchTable::NamedValue("CmpInst", PredName)
+ << MatchTable::LineBreak;
+ }
+};
+
/// Generates code to check that an operand is an intrinsic ID.
class IntrinsicIDOperandMatcher : public OperandPredicateMatcher {
protected:
@@ -1442,7 +1506,7 @@ public:
Optional<Kind *> addPredicate(Args &&... args) {
if (isSameAsAnotherOperand())
return None;
- Predicates.emplace_back(llvm::make_unique<Kind>(
+ Predicates.emplace_back(std::make_unique<Kind>(
getInsnVarID(), getOpIdx(), std::forward<Args>(args)...));
return static_cast<Kind *>(Predicates.back().get());
}
@@ -1849,6 +1913,40 @@ public:
}
};
+class MemoryAlignmentPredicateMatcher : public InstructionPredicateMatcher {
+protected:
+ unsigned MMOIdx;
+ int MinAlign;
+
+public:
+ MemoryAlignmentPredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
+ int MinAlign)
+ : InstructionPredicateMatcher(IPM_MemoryAlignment, InsnVarID),
+ MMOIdx(MMOIdx), MinAlign(MinAlign) {
+ assert(MinAlign > 0);
+ }
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_MemoryAlignment;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ if (!InstructionPredicateMatcher::isIdentical(B))
+ return false;
+ auto *Other = cast<MemoryAlignmentPredicateMatcher>(&B);
+ return MMOIdx == Other->MMOIdx && MinAlign == Other->MinAlign;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override {
+ Table << MatchTable::Opcode("GIM_CheckMemoryAlignment")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
+ << MatchTable::Comment("MinAlign") << MatchTable::IntValue(MinAlign)
+ << MatchTable::LineBreak;
+ }
+};
+
/// Generates code to check that the size of an MMO is less-than, equal-to, or
/// greater than a given LLT.
class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher {
@@ -1945,6 +2043,11 @@ protected:
std::string SymbolicName;
unsigned InsnVarID;
+ /// PhysRegInputs - List list has an entry for each explicitly specified
+ /// physreg input to the pattern. The first elt is the Register node, the
+ /// second is the recorded slot number the input pattern match saved it in.
+ SmallVector<std::pair<Record *, unsigned>, 2> PhysRegInputs;
+
public:
InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName)
: Rule(Rule), SymbolicName(SymbolicName) {
@@ -1957,7 +2060,7 @@ public:
template <class Kind, class... Args>
Optional<Kind *> addPredicate(Args &&... args) {
Predicates.emplace_back(
- llvm::make_unique<Kind>(getInsnVarID(), std::forward<Args>(args)...));
+ std::make_unique<Kind>(getInsnVarID(), std::forward<Args>(args)...));
return static_cast<Kind *>(Predicates.back().get());
}
@@ -1986,6 +2089,20 @@ public:
llvm_unreachable("Failed to lookup operand");
}
+ OperandMatcher &addPhysRegInput(Record *Reg, unsigned OpIdx,
+ unsigned TempOpIdx) {
+ assert(SymbolicName.empty());
+ OperandMatcher *OM = new OperandMatcher(*this, OpIdx, "", TempOpIdx);
+ Operands.emplace_back(OM);
+ Rule.definePhysRegOperand(Reg, *OM);
+ PhysRegInputs.emplace_back(Reg, OpIdx);
+ return *OM;
+ }
+
+ ArrayRef<std::pair<Record *, unsigned>> getPhysRegInputs() const {
+ return PhysRegInputs;
+ }
+
StringRef getSymbolicName() const { return SymbolicName; }
unsigned getNumOperands() const { return Operands.size(); }
OperandVec::iterator operands_begin() { return Operands.begin(); }
@@ -2193,9 +2310,11 @@ public:
OR_Copy,
OR_CopyOrAddZeroReg,
OR_CopySubReg,
+ OR_CopyPhysReg,
OR_CopyConstantAsImm,
OR_CopyFConstantAsFPImm,
OR_Imm,
+ OR_SubRegIndex,
OR_Register,
OR_TempRegister,
OR_ComplexPattern,
@@ -2247,6 +2366,38 @@ public:
}
};
+/// A CopyRenderer emits code to copy a virtual register to a specific physical
+/// register.
+class CopyPhysRegRenderer : public OperandRenderer {
+protected:
+ unsigned NewInsnID;
+ Record *PhysReg;
+
+public:
+ CopyPhysRegRenderer(unsigned NewInsnID, Record *Reg)
+ : OperandRenderer(OR_CopyPhysReg), NewInsnID(NewInsnID),
+ PhysReg(Reg) {
+ assert(PhysReg);
+ }
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_CopyPhysReg;
+ }
+
+ Record *getPhysReg() const { return PhysReg; }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+ const OperandMatcher &Operand = Rule.getPhysRegOperandMatcher(PhysReg);
+ unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
+ Table << MatchTable::Opcode("GIR_Copy") << MatchTable::Comment("NewInsnID")
+ << MatchTable::IntValue(NewInsnID) << MatchTable::Comment("OldInsnID")
+ << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
+ << MatchTable::IntValue(Operand.getOpIdx())
+ << MatchTable::Comment(PhysReg->getName())
+ << MatchTable::LineBreak;
+ }
+};
+
/// A CopyOrAddZeroRegRenderer emits code to copy a single operand from an
/// existing instruction to the one being built. If the operand turns out to be
/// a 'G_CONSTANT 0' then it replaces the operand with a zero register.
@@ -2393,11 +2544,13 @@ class AddRegisterRenderer : public OperandRenderer {
protected:
unsigned InsnID;
const Record *RegisterDef;
+ bool IsDef;
public:
- AddRegisterRenderer(unsigned InsnID, const Record *RegisterDef)
- : OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef) {
- }
+ AddRegisterRenderer(unsigned InsnID, const Record *RegisterDef,
+ bool IsDef = false)
+ : OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef),
+ IsDef(IsDef) {}
static bool classof(const OperandRenderer *R) {
return R->getKind() == OR_Register;
@@ -2411,7 +2564,16 @@ public:
? RegisterDef->getValueAsString("Namespace")
: ""),
RegisterDef->getName())
- << MatchTable::LineBreak;
+ << MatchTable::Comment("AddRegisterRegFlags");
+
+ // TODO: This is encoded as a 64-bit element, but only 16 or 32-bits are
+ // really needed for a physical register reference. We can pack the
+ // register and flags in a single field.
+ if (IsDef)
+ Table << MatchTable::NamedValue("RegState::Define");
+ else
+ Table << MatchTable::IntValue(0);
+ Table << MatchTable::LineBreak;
}
};
@@ -2467,6 +2629,28 @@ public:
}
};
+/// Adds an enum value for a subreg index to the instruction being built.
+class SubRegIndexRenderer : public OperandRenderer {
+protected:
+ unsigned InsnID;
+ const CodeGenSubRegIndex *SubRegIdx;
+
+public:
+ SubRegIndexRenderer(unsigned InsnID, const CodeGenSubRegIndex *SRI)
+ : OperandRenderer(OR_SubRegIndex), InsnID(InsnID), SubRegIdx(SRI) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_SubRegIndex;
+ }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+ Table << MatchTable::Opcode("GIR_AddImm") << MatchTable::Comment("InsnID")
+ << MatchTable::IntValue(InsnID) << MatchTable::Comment("SubRegIndex")
+ << MatchTable::IntValue(SubRegIdx->EnumValue)
+ << MatchTable::LineBreak;
+ }
+};
+
/// Adds operands by calling a renderer function supplied by the ComplexPattern
/// matcher function.
class RenderComplexPatternOperand : public OperandRenderer {
@@ -2620,7 +2804,7 @@ public:
template <class Kind, class... Args>
Kind &addRenderer(Args&&... args) {
OperandRenderers.emplace_back(
- llvm::make_unique<Kind>(InsnID, std::forward<Args>(args)...));
+ std::make_unique<Kind>(InsnID, std::forward<Args>(args)...));
return *static_cast<Kind *>(OperandRenderers.back().get());
}
@@ -2747,7 +2931,9 @@ private:
public:
MakeTempRegisterAction(const LLTCodeGen &Ty, unsigned TempRegID)
- : Ty(Ty), TempRegID(TempRegID) {}
+ : Ty(Ty), TempRegID(TempRegID) {
+ KnownTypes.insert(Ty);
+ }
void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
Table << MatchTable::Opcode("GIR_MakeTempReg")
@@ -2781,7 +2967,7 @@ const std::vector<Record *> &RuleMatcher::getRequiredFeatures() const {
// iterator.
template <class Kind, class... Args>
Kind &RuleMatcher::addAction(Args &&... args) {
- Actions.emplace_back(llvm::make_unique<Kind>(std::forward<Args>(args)...));
+ Actions.emplace_back(std::make_unique<Kind>(std::forward<Args>(args)...));
return *static_cast<Kind *>(Actions.back().get());
}
@@ -2796,7 +2982,7 @@ template <class Kind, class... Args>
action_iterator RuleMatcher::insertAction(action_iterator InsertPt,
Args &&... args) {
return Actions.emplace(InsertPt,
- llvm::make_unique<Kind>(std::forward<Args>(args)...));
+ std::make_unique<Kind>(std::forward<Args>(args)...));
}
unsigned RuleMatcher::implicitlyDefineInsnVar(InstructionMatcher &Matcher) {
@@ -2823,6 +3009,13 @@ void RuleMatcher::defineOperand(StringRef SymbolicName, OperandMatcher &OM) {
OM.addPredicate<SameOperandMatcher>(OM.getSymbolicName());
}
+void RuleMatcher::definePhysRegOperand(Record *Reg, OperandMatcher &OM) {
+ if (PhysRegOperands.find(Reg) == PhysRegOperands.end()) {
+ PhysRegOperands[Reg] = &OM;
+ return;
+ }
+}
+
InstructionMatcher &
RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const {
for (const auto &I : InsnVariableIDs)
@@ -2833,6 +3026,18 @@ RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const {
}
const OperandMatcher &
+RuleMatcher::getPhysRegOperandMatcher(Record *Reg) const {
+ const auto &I = PhysRegOperands.find(Reg);
+
+ if (I == PhysRegOperands.end()) {
+ PrintFatalError(SrcLoc, "Register " + Reg->getName() +
+ " was not declared in matcher");
+ }
+
+ return *I->second;
+}
+
+const OperandMatcher &
RuleMatcher::getOperandMatcher(StringRef Name) const {
const auto &I = DefinedOperands.find(Name);
@@ -3079,9 +3284,9 @@ private:
bool OperandIsAPointer, unsigned OpIdx,
unsigned &TempOpIdx);
- Expected<BuildMIAction &>
- createAndImportInstructionRenderer(RuleMatcher &M,
- const TreePatternNode *Dst);
+ Expected<BuildMIAction &> createAndImportInstructionRenderer(
+ RuleMatcher &M, InstructionMatcher &InsnMatcher,
+ const TreePatternNode *Src, const TreePatternNode *Dst);
Expected<action_iterator> createAndImportSubInstructionRenderer(
action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst,
unsigned TempReg);
@@ -3089,6 +3294,7 @@ private:
createInstructionRenderer(action_iterator InsertPt, RuleMatcher &M,
const TreePatternNode *Dst);
void importExplicitDefRenderers(BuildMIAction &DstMIBuilder);
+
Expected<action_iterator>
importExplicitUseRenderers(action_iterator InsertPt, RuleMatcher &M,
BuildMIAction &DstMIBuilder,
@@ -3122,6 +3328,32 @@ private:
MatchTable buildMatchTable(MutableArrayRef<RuleMatcher> Rules, bool Optimize,
bool WithCoverage);
+ /// Infer a CodeGenRegisterClass for the type of \p SuperRegNode. The returned
+ /// CodeGenRegisterClass will support the CodeGenRegisterClass of
+ /// \p SubRegNode, and the subregister index defined by \p SubRegIdxNode.
+ /// If no register class is found, return None.
+ Optional<const CodeGenRegisterClass *>
+ inferSuperRegisterClassForNode(const TypeSetByHwMode &Ty,
+ TreePatternNode *SuperRegNode,
+ TreePatternNode *SubRegIdxNode);
+ Optional<CodeGenSubRegIndex *>
+ inferSubRegIndexForNode(TreePatternNode *SubRegIdxNode);
+
+ /// Infer a CodeGenRegisterClass which suppoorts \p Ty and \p SubRegIdxNode.
+ /// Return None if no such class exists.
+ Optional<const CodeGenRegisterClass *>
+ inferSuperRegisterClass(const TypeSetByHwMode &Ty,
+ TreePatternNode *SubRegIdxNode);
+
+ /// Return the CodeGenRegisterClass associated with \p Leaf if it has one.
+ Optional<const CodeGenRegisterClass *>
+ getRegClassFromLeaf(TreePatternNode *Leaf);
+
+ /// Return a CodeGenRegisterClass for \p N if one can be found. Return None
+ /// otherwise.
+ Optional<const CodeGenRegisterClass *>
+ inferRegClassFromPattern(TreePatternNode *N);
+
public:
/// Takes a sequence of \p Rules and group them based on the predicates
/// they share. \p MatcherStorage is used as a memory container
@@ -3190,6 +3422,13 @@ Record *GlobalISelEmitter::findNodeEquiv(Record *N) const {
const CodeGenInstruction *
GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
+ if (N->getNumChildren() >= 1) {
+ // setcc operation maps to two different G_* instructions based on the type.
+ if (!Equiv.isValueUnset("IfFloatingPoint") &&
+ MVT(N->getChild(0)->getSimpleType(0)).isFloatingPoint())
+ return &Target.getInstruction(Equiv.getValueAsDef("IfFloatingPoint"));
+ }
+
for (const TreePredicateCall &Call : N->getPredicateCalls()) {
const TreePredicateFn &Predicate = Call.Fn;
if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() &&
@@ -3199,6 +3438,7 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
Predicate.isZeroExtLoad())
return &Target.getInstruction(Equiv.getValueAsDef("IfZeroExtend"));
}
+
return &Target.getInstruction(Equiv.getValueAsDef("I"));
}
@@ -3212,7 +3452,7 @@ Error
GlobalISelEmitter::importRulePredicates(RuleMatcher &M,
ArrayRef<Predicate> Predicates) {
for (const Predicate &P : Predicates) {
- if (!P.Def)
+ if (!P.Def || P.getCondString().empty())
continue;
declareSubtargetFeature(P.Def);
M.addRequiredFeature(P.Def);
@@ -3287,6 +3527,10 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
0, ParsedAddrSpaces);
}
}
+
+ int64_t MinAlign = Predicate.getMinAlignment();
+ if (MinAlign > 0)
+ InsnMatcher.addPredicate<MemoryAlignmentPredicateMatcher>(0, MinAlign);
}
// G_LOAD is used for both non-extending and any-extending loads.
@@ -3301,11 +3545,19 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
continue;
}
- if (Predicate.isStore() && Predicate.isTruncStore()) {
- // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
- InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
- 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
- continue;
+ if (Predicate.isStore()) {
+ if (Predicate.isTruncStore()) {
+ // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
+ InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+ 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+ continue;
+ }
+ if (Predicate.isNonTruncStore()) {
+ // We need to check the sizes match here otherwise we could incorrectly
+ // match truncating stores with non-truncating ones.
+ InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+ 0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0);
+ }
}
// No check required. We already did it by swapping the opcode.
@@ -3405,6 +3657,10 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
}
if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsNonAtomic"))
InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("NotAtomic");
+ else if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsAtomic")) {
+ InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+ "Unordered", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
+ }
if (Src->isLeaf()) {
Init *SrcInit = Src->getLeafValue();
@@ -3427,8 +3683,43 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
return InsnMatcher;
}
+ // Special case because the operand order is changed from setcc. The
+ // predicate operand needs to be swapped from the last operand to the first
+ // source.
+
+ unsigned NumChildren = Src->getNumChildren();
+ bool IsFCmp = SrcGIOrNull->TheDef->getName() == "G_FCMP";
+
+ if (IsFCmp || SrcGIOrNull->TheDef->getName() == "G_ICMP") {
+ TreePatternNode *SrcChild = Src->getChild(NumChildren - 1);
+ if (SrcChild->isLeaf()) {
+ DefInit *DI = dyn_cast<DefInit>(SrcChild->getLeafValue());
+ Record *CCDef = DI ? DI->getDef() : nullptr;
+ if (!CCDef || !CCDef->isSubClassOf("CondCode"))
+ return failedImport("Unable to handle CondCode");
+
+ OperandMatcher &OM =
+ InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx);
+ StringRef PredType = IsFCmp ? CCDef->getValueAsString("FCmpPredicate") :
+ CCDef->getValueAsString("ICmpPredicate");
+
+ if (!PredType.empty()) {
+ OM.addPredicate<CmpPredicateOperandMatcher>(PredType);
+ // Process the other 2 operands normally.
+ --NumChildren;
+ }
+ }
+ }
+
// Match the used operands (i.e. the children of the operator).
- for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) {
+ bool IsIntrinsic =
+ SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" ||
+ SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS";
+ const CodeGenIntrinsic *II = Src->getIntrinsicInfo(CGP);
+ if (IsIntrinsic && !II)
+ return failedImport("Expected IntInit containing intrinsic ID)");
+
+ for (unsigned i = 0; i != NumChildren; ++i) {
TreePatternNode *SrcChild = Src->getChild(i);
// SelectionDAG allows pointers to be represented with iN since it doesn't
@@ -3436,19 +3727,21 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
// Coerce integers to pointers to address space 0 if the context indicates a pointer.
bool OperandIsAPointer = SrcGIOrNull->isOperandAPointer(i);
- // For G_INTRINSIC/G_INTRINSIC_W_SIDE_EFFECTS, the operand immediately
- // following the defs is an intrinsic ID.
- if ((SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" ||
- SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS") &&
- i == 0) {
- if (const CodeGenIntrinsic *II = Src->getIntrinsicInfo(CGP)) {
+ if (IsIntrinsic) {
+ // For G_INTRINSIC/G_INTRINSIC_W_SIDE_EFFECTS, the operand immediately
+ // following the defs is an intrinsic ID.
+ if (i == 0) {
OperandMatcher &OM =
InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx);
OM.addPredicate<IntrinsicIDOperandMatcher>(II);
continue;
}
- return failedImport("Expected IntInit containing instrinsic ID)");
+ // We have to check intrinsics for llvm_anyptr_ty parameters.
+ //
+ // Note that we have to look at the i-1th parameter, because we don't
+ // have the intrinsic ID in the intrinsic's parameter list.
+ OperandIsAPointer |= II->isParamAPointer(i - 1);
}
if (auto Error =
@@ -3473,14 +3766,37 @@ Error GlobalISelEmitter::importComplexPatternOperandMatcher(
return Error::success();
}
+// Get the name to use for a pattern operand. For an anonymous physical register
+// input, this should use the register name.
+static StringRef getSrcChildName(const TreePatternNode *SrcChild,
+ Record *&PhysReg) {
+ StringRef SrcChildName = SrcChild->getName();
+ if (SrcChildName.empty() && SrcChild->isLeaf()) {
+ if (auto *ChildDefInit = dyn_cast<DefInit>(SrcChild->getLeafValue())) {
+ auto *ChildRec = ChildDefInit->getDef();
+ if (ChildRec->isSubClassOf("Register")) {
+ SrcChildName = ChildRec->getName();
+ PhysReg = ChildRec;
+ }
+ }
+ }
+
+ return SrcChildName;
+}
+
Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
InstructionMatcher &InsnMatcher,
const TreePatternNode *SrcChild,
bool OperandIsAPointer,
unsigned OpIdx,
unsigned &TempOpIdx) {
- OperandMatcher &OM =
- InsnMatcher.addOperand(OpIdx, SrcChild->getName(), TempOpIdx);
+
+ Record *PhysReg = nullptr;
+ StringRef SrcChildName = getSrcChildName(SrcChild, PhysReg);
+
+ OperandMatcher &OM = PhysReg ?
+ InsnMatcher.addPhysRegInput(PhysReg, OpIdx, TempOpIdx) :
+ InsnMatcher.addOperand(OpIdx, SrcChildName, TempOpIdx);
if (OM.isSameAsAnotherOperand())
return Error::success();
@@ -3496,6 +3812,10 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
OM.addPredicate<MBBOperandMatcher>();
return Error::success();
}
+ if (SrcChild->getOperator()->getName() == "timm") {
+ OM.addPredicate<ImmOperandMatcher>();
+ return Error::success();
+ }
}
}
@@ -3569,6 +3889,20 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
return Error::success();
}
+ if (ChildRec->isSubClassOf("Register")) {
+ // This just be emitted as a copy to the specific register.
+ ValueTypeByHwMode VT = ChildTypes.front().getValueTypeByHwMode();
+ const CodeGenRegisterClass *RC
+ = CGRegs.getMinimalPhysRegClass(ChildRec, &VT);
+ if (!RC) {
+ return failedImport(
+ "Could not determine physical register class of pattern source");
+ }
+
+ OM.addPredicate<RegisterBankOperandMatcher>(*RC);
+ return Error::success();
+ }
+
// Check for ValueType.
if (ChildRec->isSubClassOf("ValueType")) {
// We already added a type check as standard practice so this doesn't need
@@ -3631,7 +3965,10 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
// rendered as operands.
// FIXME: The target should be able to choose sign-extended when appropriate
// (e.g. on Mips).
- if (DstChild->getOperator()->getName() == "imm") {
+ if (DstChild->getOperator()->getName() == "timm") {
+ DstMIBuilder.addRenderer<CopyRenderer>(DstChild->getName());
+ return InsertPt;
+ } else if (DstChild->getOperator()->getName() == "imm") {
DstMIBuilder.addRenderer<CopyConstantAsImmRenderer>(DstChild->getName());
return InsertPt;
} else if (DstChild->getOperator()->getName() == "fpimm") {
@@ -3708,6 +4045,12 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
return InsertPt;
}
+ if (ChildRec->isSubClassOf("SubRegIndex")) {
+ CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(ChildRec);
+ DstMIBuilder.addRenderer<ImmRenderer>(SubIdx->EnumValue);
+ return InsertPt;
+ }
+
if (ChildRec->isSubClassOf("ComplexPattern")) {
const auto &ComplexPattern = ComplexPatternEquivs.find(ChildRec);
if (ComplexPattern == ComplexPatternEquivs.end())
@@ -3729,7 +4072,8 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
}
Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
- RuleMatcher &M, const TreePatternNode *Dst) {
+ RuleMatcher &M, InstructionMatcher &InsnMatcher, const TreePatternNode *Src,
+ const TreePatternNode *Dst) {
auto InsertPtOrError = createInstructionRenderer(M.actions_end(), M, Dst);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
@@ -3737,6 +4081,17 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
action_iterator InsertPt = InsertPtOrError.get();
BuildMIAction &DstMIBuilder = *static_cast<BuildMIAction *>(InsertPt->get());
+ for (auto PhysInput : InsnMatcher.getPhysRegInputs()) {
+ InsertPt = M.insertAction<BuildMIAction>(
+ InsertPt, M.allocateOutputInsnID(),
+ &Target.getInstruction(RK.getDef("COPY")));
+ BuildMIAction &CopyToPhysRegMIBuilder =
+ *static_cast<BuildMIAction *>(InsertPt->get());
+ CopyToPhysRegMIBuilder.addRenderer<AddRegisterRenderer>(PhysInput.first,
+ true);
+ CopyToPhysRegMIBuilder.addRenderer<CopyPhysRegRenderer>(PhysInput.first);
+ }
+
importExplicitDefRenderers(DstMIBuilder);
if (auto Error = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst)
@@ -3768,6 +4123,78 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
+ // We need to make sure that when we import an INSERT_SUBREG as a
+ // subinstruction that it ends up being constrained to the correct super
+ // register and subregister classes.
+ auto OpName = Target.getInstruction(Dst->getOperator()).TheDef->getName();
+ if (OpName == "INSERT_SUBREG") {
+ auto SubClass = inferRegClassFromPattern(Dst->getChild(1));
+ if (!SubClass)
+ return failedImport(
+ "Cannot infer register class from INSERT_SUBREG operand #1");
+ Optional<const CodeGenRegisterClass *> SuperClass =
+ inferSuperRegisterClassForNode(Dst->getExtType(0), Dst->getChild(0),
+ Dst->getChild(2));
+ if (!SuperClass)
+ return failedImport(
+ "Cannot infer register class for INSERT_SUBREG operand #0");
+ // The destination and the super register source of an INSERT_SUBREG must
+ // be the same register class.
+ M.insertAction<ConstrainOperandToRegClassAction>(
+ InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass);
+ M.insertAction<ConstrainOperandToRegClassAction>(
+ InsertPt, DstMIBuilder.getInsnID(), 1, **SuperClass);
+ M.insertAction<ConstrainOperandToRegClassAction>(
+ InsertPt, DstMIBuilder.getInsnID(), 2, **SubClass);
+ return InsertPtOrError.get();
+ }
+
+ if (OpName == "EXTRACT_SUBREG") {
+ // EXTRACT_SUBREG selects into a subregister COPY but unlike most
+ // instructions, the result register class is controlled by the
+ // subregisters of the operand. As a result, we must constrain the result
+ // class rather than check that it's already the right one.
+ auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
+ if (!SuperClass)
+ return failedImport(
+ "Cannot infer register class from EXTRACT_SUBREG operand #0");
+
+ auto SubIdx = inferSubRegIndexForNode(Dst->getChild(1));
+ if (!SubIdx)
+ return failedImport("EXTRACT_SUBREG child #1 is not a subreg index");
+
+ const auto &SrcRCDstRCPair =
+ (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
+ assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
+ M.insertAction<ConstrainOperandToRegClassAction>(
+ InsertPt, DstMIBuilder.getInsnID(), 0, *SrcRCDstRCPair->second);
+ M.insertAction<ConstrainOperandToRegClassAction>(
+ InsertPt, DstMIBuilder.getInsnID(), 1, *SrcRCDstRCPair->first);
+
+ // We're done with this pattern! It's eligible for GISel emission; return
+ // it.
+ return InsertPtOrError.get();
+ }
+
+ // Similar to INSERT_SUBREG, we also have to handle SUBREG_TO_REG as a
+ // subinstruction.
+ if (OpName == "SUBREG_TO_REG") {
+ auto SubClass = inferRegClassFromPattern(Dst->getChild(1));
+ if (!SubClass)
+ return failedImport(
+ "Cannot infer register class from SUBREG_TO_REG child #1");
+ auto SuperClass = inferSuperRegisterClass(Dst->getExtType(0),
+ Dst->getChild(2));
+ if (!SuperClass)
+ return failedImport(
+ "Cannot infer register class for SUBREG_TO_REG operand #0");
+ M.insertAction<ConstrainOperandToRegClassAction>(
+ InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass);
+ M.insertAction<ConstrainOperandToRegClassAction>(
+ InsertPt, DstMIBuilder.getInsnID(), 2, **SubClass);
+ return InsertPtOrError.get();
+ }
+
M.insertAction<ConstrainOperandsToDefinitionAction>(InsertPt,
DstMIBuilder.getInsnID());
return InsertPtOrError.get();
@@ -3786,12 +4213,9 @@ Expected<action_iterator> GlobalISelEmitter::createInstructionRenderer(
// COPY_TO_REGCLASS is just a copy with a ConstrainOperandToRegClassAction
// attached. Similarly for EXTRACT_SUBREG except that's a subregister copy.
- if (DstI->TheDef->getName() == "COPY_TO_REGCLASS")
- DstI = &Target.getInstruction(RK.getDef("COPY"));
- else if (DstI->TheDef->getName() == "EXTRACT_SUBREG")
+ StringRef Name = DstI->TheDef->getName();
+ if (Name == "COPY_TO_REGCLASS" || Name == "EXTRACT_SUBREG")
DstI = &Target.getInstruction(RK.getDef("COPY"));
- else if (DstI->TheDef->getName() == "REG_SEQUENCE")
- return failedImport("Unable to emit REG_SEQUENCE");
return M.insertAction<BuildMIAction>(InsertPt, M.allocateOutputInsnID(),
DstI);
@@ -3812,8 +4236,11 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
CodeGenInstruction *OrigDstI = &Target.getInstruction(Dst->getOperator());
+ StringRef Name = OrigDstI->TheDef->getName();
+ unsigned ExpectedDstINumUses = Dst->getNumChildren();
+
// EXTRACT_SUBREG needs to use a subregister COPY.
- if (OrigDstI->TheDef->getName() == "EXTRACT_SUBREG") {
+ if (Name == "EXTRACT_SUBREG") {
if (!Dst->getChild(0)->isLeaf())
return failedImport("EXTRACT_SUBREG child #1 is not a leaf");
@@ -3843,10 +4270,41 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
return failedImport("EXTRACT_SUBREG child #1 is not a subreg index");
}
+ if (Name == "REG_SEQUENCE") {
+ if (!Dst->getChild(0)->isLeaf())
+ return failedImport("REG_SEQUENCE child #0 is not a leaf");
+
+ Record *RCDef = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue());
+ if (!RCDef)
+ return failedImport("REG_SEQUENCE child #0 could not "
+ "be coerced to a register class");
+
+ if ((ExpectedDstINumUses - 1) % 2 != 0)
+ return failedImport("Malformed REG_SEQUENCE");
+
+ for (unsigned I = 1; I != ExpectedDstINumUses; I += 2) {
+ TreePatternNode *ValChild = Dst->getChild(I);
+ TreePatternNode *SubRegChild = Dst->getChild(I + 1);
+
+ if (DefInit *SubRegInit =
+ dyn_cast<DefInit>(SubRegChild->getLeafValue())) {
+ CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
+
+ auto InsertPtOrError =
+ importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild);
+ if (auto Error = InsertPtOrError.takeError())
+ return std::move(Error);
+ InsertPt = InsertPtOrError.get();
+ DstMIBuilder.addRenderer<SubRegIndexRenderer>(SubIdx);
+ }
+ }
+
+ return InsertPt;
+ }
+
// Render the explicit uses.
unsigned DstINumUses = OrigDstI->Operands.size() - OrigDstI->Operands.NumDefs;
- unsigned ExpectedDstINumUses = Dst->getNumChildren();
- if (OrigDstI->TheDef->getName() == "COPY_TO_REGCLASS") {
+ if (Name == "COPY_TO_REGCLASS") {
DstINumUses--; // Ignore the class constraint.
ExpectedDstINumUses--;
}
@@ -3945,6 +4403,126 @@ Error GlobalISelEmitter::importImplicitDefRenderers(
return Error::success();
}
+Optional<const CodeGenRegisterClass *>
+GlobalISelEmitter::getRegClassFromLeaf(TreePatternNode *Leaf) {
+ assert(Leaf && "Expected node?");
+ assert(Leaf->isLeaf() && "Expected leaf?");
+ Record *RCRec = getInitValueAsRegClass(Leaf->getLeafValue());
+ if (!RCRec)
+ return None;
+ CodeGenRegisterClass *RC = CGRegs.getRegClass(RCRec);
+ if (!RC)
+ return None;
+ return RC;
+}
+
+Optional<const CodeGenRegisterClass *>
+GlobalISelEmitter::inferRegClassFromPattern(TreePatternNode *N) {
+ if (!N)
+ return None;
+
+ if (N->isLeaf())
+ return getRegClassFromLeaf(N);
+
+ // We don't have a leaf node, so we have to try and infer something. Check
+ // that we have an instruction that we an infer something from.
+
+ // Only handle things that produce a single type.
+ if (N->getNumTypes() != 1)
+ return None;
+ Record *OpRec = N->getOperator();
+
+ // We only want instructions.
+ if (!OpRec->isSubClassOf("Instruction"))
+ return None;
+
+ // Don't want to try and infer things when there could potentially be more
+ // than one candidate register class.
+ auto &Inst = Target.getInstruction(OpRec);
+ if (Inst.Operands.NumDefs > 1)
+ return None;
+
+ // Handle any special-case instructions which we can safely infer register
+ // classes from.
+ StringRef InstName = Inst.TheDef->getName();
+ bool IsRegSequence = InstName == "REG_SEQUENCE";
+ if (IsRegSequence || InstName == "COPY_TO_REGCLASS") {
+ // If we have a COPY_TO_REGCLASS, then we need to handle it specially. It
+ // has the desired register class as the first child.
+ TreePatternNode *RCChild = N->getChild(IsRegSequence ? 0 : 1);
+ if (!RCChild->isLeaf())
+ return None;
+ return getRegClassFromLeaf(RCChild);
+ }
+
+ // Handle destination record types that we can safely infer a register class
+ // from.
+ const auto &DstIOperand = Inst.Operands[0];
+ Record *DstIOpRec = DstIOperand.Rec;
+ if (DstIOpRec->isSubClassOf("RegisterOperand")) {
+ DstIOpRec = DstIOpRec->getValueAsDef("RegClass");
+ const CodeGenRegisterClass &RC = Target.getRegisterClass(DstIOpRec);
+ return &RC;
+ }
+
+ if (DstIOpRec->isSubClassOf("RegisterClass")) {
+ const CodeGenRegisterClass &RC = Target.getRegisterClass(DstIOpRec);
+ return &RC;
+ }
+
+ return None;
+}
+
+Optional<const CodeGenRegisterClass *>
+GlobalISelEmitter::inferSuperRegisterClass(const TypeSetByHwMode &Ty,
+ TreePatternNode *SubRegIdxNode) {
+ assert(SubRegIdxNode && "Expected subregister index node!");
+ // We need a ValueTypeByHwMode for getSuperRegForSubReg.
+ if (!Ty.isValueTypeByHwMode(false))
+ return None;
+ if (!SubRegIdxNode->isLeaf())
+ return None;
+ DefInit *SubRegInit = dyn_cast<DefInit>(SubRegIdxNode->getLeafValue());
+ if (!SubRegInit)
+ return None;
+ CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
+
+ // Use the information we found above to find a minimal register class which
+ // supports the subregister and type we want.
+ auto RC =
+ Target.getSuperRegForSubReg(Ty.getValueTypeByHwMode(), CGRegs, SubIdx);
+ if (!RC)
+ return None;
+ return *RC;
+}
+
+Optional<const CodeGenRegisterClass *>
+GlobalISelEmitter::inferSuperRegisterClassForNode(
+ const TypeSetByHwMode &Ty, TreePatternNode *SuperRegNode,
+ TreePatternNode *SubRegIdxNode) {
+ assert(SuperRegNode && "Expected super register node!");
+ // Check if we already have a defined register class for the super register
+ // node. If we do, then we should preserve that rather than inferring anything
+ // from the subregister index node. We can assume that whoever wrote the
+ // pattern in the first place made sure that the super register and
+ // subregister are compatible.
+ if (Optional<const CodeGenRegisterClass *> SuperRegisterClass =
+ inferRegClassFromPattern(SuperRegNode))
+ return *SuperRegisterClass;
+ return inferSuperRegisterClass(Ty, SubRegIdxNode);
+}
+
+Optional<CodeGenSubRegIndex *>
+GlobalISelEmitter::inferSubRegIndexForNode(TreePatternNode *SubRegIdxNode) {
+ if (!SubRegIdxNode->isLeaf())
+ return None;
+
+ DefInit *SubRegInit = dyn_cast<DefInit>(SubRegIdxNode->getLeafValue());
+ if (!SubRegInit)
+ return None;
+ return CGRegs.getSubRegIdx(SubRegInit->getDef());
+}
+
Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
// Keep track of the matchers and actions to emit.
int Score = P.getPatternComplexity(CGP);
@@ -4035,6 +4613,8 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
return failedImport("Pattern operator isn't an instruction");
auto &DstI = Target.getInstruction(DstOp);
+ StringRef DstIName = DstI.TheDef->getName();
+
if (DstI.Operands.NumDefs != Src->getExtTypes().size())
return failedImport("Src pattern results and dst MI defs are different (" +
to_string(Src->getExtTypes().size()) + " def(s) vs " +
@@ -4048,13 +4628,17 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
const auto &DstIOperand = DstI.Operands[OpIdx];
Record *DstIOpRec = DstIOperand.Rec;
- if (DstI.TheDef->getName() == "COPY_TO_REGCLASS") {
+ if (DstIName == "COPY_TO_REGCLASS") {
DstIOpRec = getInitValueAsRegClass(Dst->getChild(1)->getLeafValue());
if (DstIOpRec == nullptr)
return failedImport(
"COPY_TO_REGCLASS operand #1 isn't a register class");
- } else if (DstI.TheDef->getName() == "EXTRACT_SUBREG") {
+ } else if (DstIName == "REG_SEQUENCE") {
+ DstIOpRec = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue());
+ if (DstIOpRec == nullptr)
+ return failedImport("REG_SEQUENCE operand #0 isn't a register class");
+ } else if (DstIName == "EXTRACT_SUBREG") {
if (!Dst->getChild(0)->isLeaf())
return failedImport("EXTRACT_SUBREG operand #0 isn't a leaf");
@@ -4063,8 +4647,33 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
DstIOpRec = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue());
if (DstIOpRec == nullptr)
+ return failedImport("EXTRACT_SUBREG operand #0 isn't a register class");
+ } else if (DstIName == "INSERT_SUBREG") {
+ auto MaybeSuperClass = inferSuperRegisterClassForNode(
+ VTy, Dst->getChild(0), Dst->getChild(2));
+ if (!MaybeSuperClass)
return failedImport(
- "EXTRACT_SUBREG operand #0 isn't a register class");
+ "Cannot infer register class for INSERT_SUBREG operand #0");
+ // Move to the next pattern here, because the register class we found
+ // doesn't necessarily have a record associated with it. So, we can't
+ // set DstIOpRec using this.
+ OperandMatcher &OM = InsnMatcher.getOperand(OpIdx);
+ OM.setSymbolicName(DstIOperand.Name);
+ M.defineOperand(OM.getSymbolicName(), OM);
+ OM.addPredicate<RegisterBankOperandMatcher>(**MaybeSuperClass);
+ ++OpIdx;
+ continue;
+ } else if (DstIName == "SUBREG_TO_REG") {
+ auto MaybeRegClass = inferSuperRegisterClass(VTy, Dst->getChild(2));
+ if (!MaybeRegClass)
+ return failedImport(
+ "Cannot infer register class for SUBREG_TO_REG operand #0");
+ OperandMatcher &OM = InsnMatcher.getOperand(OpIdx);
+ OM.setSymbolicName(DstIOperand.Name);
+ M.defineOperand(OM.getSymbolicName(), OM);
+ OM.addPredicate<RegisterBankOperandMatcher>(**MaybeRegClass);
+ ++OpIdx;
+ continue;
} else if (DstIOpRec->isSubClassOf("RegisterOperand"))
DstIOpRec = DstIOpRec->getValueAsDef("RegClass");
else if (!DstIOpRec->isSubClassOf("RegisterClass"))
@@ -4079,7 +4688,8 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
++OpIdx;
}
- auto DstMIBuilderOrError = createAndImportInstructionRenderer(M, Dst);
+ auto DstMIBuilderOrError =
+ createAndImportInstructionRenderer(M, InsnMatcher, Src, Dst);
if (auto Error = DstMIBuilderOrError.takeError())
return std::move(Error);
BuildMIAction &DstMIBuilder = DstMIBuilderOrError.get();
@@ -4093,7 +4703,7 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
// Constrain the registers to classes. This is normally derived from the
// emitted instruction but a few instructions require special handling.
- if (DstI.TheDef->getName() == "COPY_TO_REGCLASS") {
+ if (DstIName == "COPY_TO_REGCLASS") {
// COPY_TO_REGCLASS does not provide operand constraints itself but the
// result is constrained to the class given by the second child.
Record *DstIOpRec =
@@ -4111,28 +4721,16 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
return std::move(M);
}
- if (DstI.TheDef->getName() == "EXTRACT_SUBREG") {
- // EXTRACT_SUBREG selects into a subregister COPY but unlike most
- // instructions, the result register class is controlled by the
- // subregisters of the operand. As a result, we must constrain the result
- // class rather than check that it's already the right one.
- if (!Dst->getChild(0)->isLeaf())
- return failedImport("EXTRACT_SUBREG child #1 is not a leaf");
+ if (DstIName == "EXTRACT_SUBREG") {
+ auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
+ if (!SuperClass)
+ return failedImport(
+ "Cannot infer register class from EXTRACT_SUBREG operand #0");
- DefInit *SubRegInit = dyn_cast<DefInit>(Dst->getChild(1)->getLeafValue());
- if (!SubRegInit)
+ auto SubIdx = inferSubRegIndexForNode(Dst->getChild(1));
+ if (!SubIdx)
return failedImport("EXTRACT_SUBREG child #1 is not a subreg index");
- // Constrain the result to the same register bank as the operand.
- Record *DstIOpRec =
- getInitValueAsRegClass(Dst->getChild(0)->getLeafValue());
-
- if (DstIOpRec == nullptr)
- return failedImport("EXTRACT_SUBREG operand #1 isn't a register class");
-
- CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
- CodeGenRegisterClass *SrcRC = CGRegs.getRegClass(DstIOpRec);
-
// It would be nice to leave this constraint implicit but we're required
// to pick a register class so constrain the result to a register class
// that can hold the correct MVT.
@@ -4143,7 +4741,7 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
"Expected Src of EXTRACT_SUBREG to have one result type");
const auto &SrcRCDstRCPair =
- SrcRC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx);
+ (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
M.addAction<ConstrainOperandToRegClassAction>(0, 0, *SrcRCDstRCPair->second);
M.addAction<ConstrainOperandToRegClassAction>(0, 1, *SrcRCDstRCPair->first);
@@ -4154,6 +4752,51 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
return std::move(M);
}
+ if (DstIName == "INSERT_SUBREG") {
+ assert(Src->getExtTypes().size() == 1 &&
+ "Expected Src of INSERT_SUBREG to have one result type");
+ // We need to constrain the destination, a super regsister source, and a
+ // subregister source.
+ auto SubClass = inferRegClassFromPattern(Dst->getChild(1));
+ if (!SubClass)
+ return failedImport(
+ "Cannot infer register class from INSERT_SUBREG operand #1");
+ auto SuperClass = inferSuperRegisterClassForNode(
+ Src->getExtType(0), Dst->getChild(0), Dst->getChild(2));
+ if (!SuperClass)
+ return failedImport(
+ "Cannot infer register class for INSERT_SUBREG operand #0");
+ M.addAction<ConstrainOperandToRegClassAction>(0, 0, **SuperClass);
+ M.addAction<ConstrainOperandToRegClassAction>(0, 1, **SuperClass);
+ M.addAction<ConstrainOperandToRegClassAction>(0, 2, **SubClass);
+ ++NumPatternImported;
+ return std::move(M);
+ }
+
+ if (DstIName == "SUBREG_TO_REG") {
+ // We need to constrain the destination and subregister source.
+ assert(Src->getExtTypes().size() == 1 &&
+ "Expected Src of SUBREG_TO_REG to have one result type");
+
+ // Attempt to infer the subregister source from the first child. If it has
+ // an explicitly given register class, we'll use that. Otherwise, we will
+ // fail.
+ auto SubClass = inferRegClassFromPattern(Dst->getChild(1));
+ if (!SubClass)
+ return failedImport(
+ "Cannot infer register class from SUBREG_TO_REG child #1");
+ // We don't have a child to look at that might have a super register node.
+ auto SuperClass =
+ inferSuperRegisterClass(Src->getExtType(0), Dst->getChild(2));
+ if (!SuperClass)
+ return failedImport(
+ "Cannot infer register class for SUBREG_TO_REG operand #0");
+ M.addAction<ConstrainOperandToRegClassAction>(0, 0, **SuperClass);
+ M.addAction<ConstrainOperandToRegClassAction>(0, 2, **SubClass);
+ ++NumPatternImported;
+ return std::move(M);
+ }
+
M.addAction<ConstrainOperandsToDefinitionAction>(0);
// We're done with this pattern! It's eligible for GISel emission; return it.
@@ -4235,7 +4878,7 @@ std::vector<Matcher *> GlobalISelEmitter::optimizeRules(
std::vector<std::unique_ptr<Matcher>> &MatcherStorage) {
std::vector<Matcher *> OptRules;
- std::unique_ptr<GroupT> CurrentGroup = make_unique<GroupT>();
+ std::unique_ptr<GroupT> CurrentGroup = std::make_unique<GroupT>();
assert(CurrentGroup->empty() && "Newly created group isn't empty!");
unsigned NumGroups = 0;
@@ -4256,7 +4899,7 @@ std::vector<Matcher *> GlobalISelEmitter::optimizeRules(
MatcherStorage.emplace_back(std::move(CurrentGroup));
++NumGroups;
}
- CurrentGroup = make_unique<GroupT>();
+ CurrentGroup = std::make_unique<GroupT>();
};
for (Matcher *Rule : Rules) {
// Greedily add as many matchers as possible to the current group:
diff --git a/utils/TableGen/InfoByHwMode.cpp b/utils/TableGen/InfoByHwMode.cpp
index d9662889a5db..7cd1b0f08132 100644
--- a/utils/TableGen/InfoByHwMode.cpp
+++ b/utils/TableGen/InfoByHwMode.cpp
@@ -192,6 +192,17 @@ void RegSizeInfoByHwMode::writeToStream(raw_ostream &OS) const {
OS << '}';
}
+EncodingInfoByHwMode::EncodingInfoByHwMode(Record *R, const CodeGenHwModes &CGH) {
+ const HwModeSelect &MS = CGH.getHwModeSelect(R);
+ for (const HwModeSelect::PairType &P : MS.Items) {
+ assert(P.second && P.second->isSubClassOf("InstructionEncoding") &&
+ "Encoding must subclass InstructionEncoding");
+ auto I = Map.insert({P.first, P.second});
+ assert(I.second && "Duplicate entry?");
+ (void)I;
+ }
+}
+
namespace llvm {
raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T) {
T.writeToStream(OS);
diff --git a/utils/TableGen/InfoByHwMode.h b/utils/TableGen/InfoByHwMode.h
index 9e5cc3d5f2a4..d92e5901a7f3 100644
--- a/utils/TableGen/InfoByHwMode.h
+++ b/utils/TableGen/InfoByHwMode.h
@@ -184,6 +184,11 @@ raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T);
raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfo &T);
raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T);
+struct EncodingInfoByHwMode : public InfoByHwMode<Record*> {
+ EncodingInfoByHwMode(Record *R, const CodeGenHwModes &CGH);
+ EncodingInfoByHwMode() = default;
+};
+
} // namespace llvm
#endif // LLVM_UTILS_TABLEGEN_INFOBYHWMODE_H
diff --git a/utils/TableGen/InstrDocsEmitter.cpp b/utils/TableGen/InstrDocsEmitter.cpp
index 91c457ba08fd..45fa936b9574 100644
--- a/utils/TableGen/InstrDocsEmitter.cpp
+++ b/utils/TableGen/InstrDocsEmitter.cpp
@@ -231,4 +231,4 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
}
}
-} // end llvm namespace
+} // end namespace llvm
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 2d367f538b71..300ba36a7007 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -332,6 +332,10 @@ void InstrInfoEmitter::emitOperandTypeMappings(
StringRef Namespace = Target.getInstNamespace();
std::vector<Record *> Operands = Records.getAllDerivedDefinitions("Operand");
+ std::vector<Record *> RegisterOperands =
+ Records.getAllDerivedDefinitions("RegisterOperand");
+ std::vector<Record *> RegisterClasses =
+ Records.getAllDerivedDefinitions("RegisterClass");
OS << "#ifdef GET_INSTRINFO_OPERAND_TYPES_ENUM\n";
OS << "#undef GET_INSTRINFO_OPERAND_TYPES_ENUM\n";
@@ -341,10 +345,13 @@ void InstrInfoEmitter::emitOperandTypeMappings(
OS << "enum OperandType {\n";
unsigned EnumVal = 0;
- for (const Record *Op : Operands) {
- if (!Op->isAnonymous())
- OS << " " << Op->getName() << " = " << EnumVal << ",\n";
- ++EnumVal;
+ for (const std::vector<Record *> *RecordsToAdd :
+ {&Operands, &RegisterOperands, &RegisterClasses}) {
+ for (const Record *Op : *RecordsToAdd) {
+ if (!Op->isAnonymous())
+ OS << " " << Op->getName() << " = " << EnumVal << ",\n";
+ ++EnumVal;
+ }
}
OS << " OPERAND_TYPE_LIST_END" << "\n};\n";
@@ -358,7 +365,8 @@ void InstrInfoEmitter::emitOperandTypeMappings(
OS << "namespace llvm {\n";
OS << "namespace " << Namespace << " {\n";
OS << "LLVM_READONLY\n";
- OS << "int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n";
+ OS << "static int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n";
+ // TODO: Factor out instructions with same operands to compress the tables.
if (!NumberedInstructions.empty()) {
std::vector<int> OperandOffsets;
std::vector<Record *> OperandRecords;
@@ -399,7 +407,10 @@ void InstrInfoEmitter::emitOperandTypeMappings(
OS << "/**/\n ";
}
Record *OpR = OperandRecords[I];
- if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous())
+ if ((OpR->isSubClassOf("Operand") ||
+ OpR->isSubClassOf("RegisterOperand") ||
+ OpR->isSubClassOf("RegisterClass")) &&
+ !OpR->isAnonymous())
OS << "OpTypes::" << OpR->getName();
else
OS << -1;
@@ -414,7 +425,7 @@ void InstrInfoEmitter::emitOperandTypeMappings(
OS << "}\n";
OS << "} // end namespace " << Namespace << "\n";
OS << "} // end namespace llvm\n";
- OS << "#endif //GET_INSTRINFO_OPERAND_TYPE\n\n";
+ OS << "#endif // GET_INSTRINFO_OPERAND_TYPE\n\n";
}
void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS,
@@ -436,8 +447,8 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS,
<< "(const MCInst &MI);\n";
}
- OS << "\n} // end " << TargetName << "_MC namespace\n";
- OS << "} // end llvm namespace\n\n";
+ OS << "\n} // end namespace " << TargetName << "_MC\n";
+ OS << "} // end namespace llvm\n\n";
OS << "#endif // GET_INSTRINFO_MC_HELPER_DECLS\n\n";
@@ -459,8 +470,8 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS,
OS << "\n}\n\n";
}
- OS << "} // end " << TargetName << "_MC namespace\n";
- OS << "} // end llvm namespace\n\n";
+ OS << "} // end namespace " << TargetName << "_MC\n";
+ OS << "} // end namespace llvm\n\n";
OS << "#endif // GET_GENISTRINFO_MC_HELPERS\n";
}
@@ -576,7 +587,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
<< TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, "
<< NumberedInstructions.size() << ");\n}\n\n";
- OS << "} // end llvm namespace\n";
+ OS << "} // end namespace llvm\n";
OS << "#endif // GET_INSTRINFO_MC_DESC\n\n";
@@ -592,7 +603,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
<< " ~" << ClassName << "() override = default;\n";
- OS << "\n};\n} // end llvm namespace\n";
+ OS << "\n};\n} // end namespace llvm\n";
OS << "#endif // GET_INSTRINFO_HEADER\n\n";
@@ -620,7 +631,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
<< " InitMCInstrInfo(" << TargetName << "Insts, " << TargetName
<< "InstrNameIndices, " << TargetName << "InstrNameData, "
<< NumberedInstructions.size() << ");\n}\n";
- OS << "} // end llvm namespace\n";
+ OS << "} // end namespace llvm\n";
OS << "#endif // GET_INSTRINFO_CTOR_DTOR\n\n";
@@ -651,6 +662,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
CodeGenTarget &Target = CDP.getTargetInfo();
// Emit all of the target independent flags...
+ if (Inst.isPreISelOpcode) OS << "|(1ULL<<MCID::PreISelOpcode)";
if (Inst.isPseudo) OS << "|(1ULL<<MCID::Pseudo)";
if (Inst.isReturn) OS << "|(1ULL<<MCID::Return)";
if (Inst.isEHScopeReturn) OS << "|(1ULL<<MCID::EHScopeReturn)";
@@ -765,8 +777,8 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
OS << " " << Inst->TheDef->getName() << "\t= " << Num++ << ",\n";
OS << " INSTRUCTION_LIST_END = " << Num << "\n";
OS << " };\n\n";
- OS << "} // end " << Namespace << " namespace\n";
- OS << "} // end llvm namespace\n";
+ OS << "} // end namespace " << Namespace << "\n";
+ OS << "} // end namespace llvm\n";
OS << "#endif // GET_INSTRINFO_ENUM\n\n";
OS << "#ifdef GET_INSTRINFO_SCHED_ENUM\n";
@@ -780,9 +792,9 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
OS << " " << Class.Name << "\t= " << Num++ << ",\n";
OS << " SCHED_LIST_END = " << Num << "\n";
OS << " };\n";
- OS << "} // end Sched namespace\n";
- OS << "} // end " << Namespace << " namespace\n";
- OS << "} // end llvm namespace\n";
+ OS << "} // end namespace Sched\n";
+ OS << "} // end namespace " << Namespace << "\n";
+ OS << "} // end namespace llvm\n";
OS << "#endif // GET_INSTRINFO_SCHED_ENUM\n\n";
}
@@ -794,4 +806,4 @@ void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) {
EmitMapTable(RK, OS);
}
-} // end llvm namespace
+} // end namespace llvm
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 979af98f6768..e01f91c20456 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -220,7 +220,11 @@ enum IIT_Info {
IIT_STRUCT7 = 39,
IIT_STRUCT8 = 40,
IIT_F128 = 41,
- IIT_VEC_ELEMENT = 42
+ IIT_VEC_ELEMENT = 42,
+ IIT_SCALABLE_VEC = 43,
+ IIT_SUBDIVIDE2_ARG = 44,
+ IIT_SUBDIVIDE4_ARG = 45,
+ IIT_VEC_OF_BITCASTS_TO_INT = 46
};
static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -292,6 +296,12 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
Sig.push_back(IIT_PTR_TO_ELT);
else if (R->isSubClassOf("LLVMVectorElementType"))
Sig.push_back(IIT_VEC_ELEMENT);
+ else if (R->isSubClassOf("LLVMSubdivide2VectorType"))
+ Sig.push_back(IIT_SUBDIVIDE2_ARG);
+ else if (R->isSubClassOf("LLVMSubdivide4VectorType"))
+ Sig.push_back(IIT_SUBDIVIDE4_ARG);
+ else if (R->isSubClassOf("LLVMVectorOfBitcastsToInt"))
+ Sig.push_back(IIT_VEC_OF_BITCASTS_TO_INT);
else
Sig.push_back(IIT_ARG);
return Sig.push_back((Number << 3) | 7 /*IITDescriptor::AK_MatchType*/);
@@ -339,6 +349,8 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
if (MVT(VT).isVector()) {
MVT VVT = VT;
+ if (VVT.isScalableVector())
+ Sig.push_back(IIT_SCALABLE_VEC);
switch (VVT.getVectorNumElements()) {
default: PrintFatalError("unhandled vector type width in intrinsic!");
case 1: Sig.push_back(IIT_V1); break;
@@ -647,6 +659,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
OS << "Attribute::NoCapture";
addComma = true;
break;
+ case CodeGenIntrinsic::NoAlias:
+ if (addComma)
+ OS << ",";
+ OS << "Attribute::NoAlias";
+ addComma = true;
+ break;
case CodeGenIntrinsic::Returned:
if (addComma)
OS << ",";
diff --git a/utils/TableGen/RISCVCompressInstEmitter.cpp b/utils/TableGen/RISCVCompressInstEmitter.cpp
index e62f528ebc2e..2f1d3898f182 100644
--- a/utils/TableGen/RISCVCompressInstEmitter.cpp
+++ b/utils/TableGen/RISCVCompressInstEmitter.cpp
@@ -411,12 +411,8 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
assert(SourceDag && "Missing 'Input' in compress pattern!");
LLVM_DEBUG(dbgs() << "Input: " << *SourceDag << "\n");
- DefInit *OpDef = dyn_cast<DefInit>(SourceDag->getOperator());
- if (!OpDef)
- PrintFatalError(Rec->getLoc(),
- Rec->getName() + " has unexpected operator type!");
// Checking we are transforming from compressed to uncompressed instructions.
- Record *Operator = OpDef->getDef();
+ Record *Operator = SourceDag->getOperatorAsDef(Rec->getLoc());
if (!Operator->isSubClassOf("RVInst"))
PrintFatalError(Rec->getLoc(), "Input instruction '" + Operator->getName() +
"' is not a 32 bit wide instruction!");
@@ -428,12 +424,7 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
assert(DestDag && "Missing 'Output' in compress pattern!");
LLVM_DEBUG(dbgs() << "Output: " << *DestDag << "\n");
- DefInit *DestOpDef = dyn_cast<DefInit>(DestDag->getOperator());
- if (!DestOpDef)
- PrintFatalError(Rec->getLoc(),
- Rec->getName() + " has unexpected operator type!");
-
- Record *DestOperator = DestOpDef->getDef();
+ Record *DestOperator = DestDag->getOperatorAsDef(Rec->getLoc());
if (!DestOperator->isSubClassOf("RVInst16"))
PrintFatalError(Rec->getLoc(), "Output instruction '" +
DestOperator->getName() +
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 1b619072c814..513cd14e0fab 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -888,7 +888,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
// Keep track of sub-register names as well. These are not differentially
// encoded.
typedef SmallVector<const CodeGenSubRegIndex*, 4> SubRegIdxVec;
- SequenceToOffsetTable<SubRegIdxVec, deref<llvm::less>> SubRegIdxSeqs;
+ SequenceToOffsetTable<SubRegIdxVec, deref<std::less<>>> SubRegIdxSeqs;
SmallVector<SubRegIdxVec, 4> SubRegIdxLists(Regs.size());
SequenceToOffsetTable<std::string> RegStrings;
@@ -1315,7 +1315,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
// Compress the sub-reg index lists.
typedef std::vector<const CodeGenSubRegIndex*> IdxList;
SmallVector<IdxList, 8> SuperRegIdxLists(RegisterClasses.size());
- SequenceToOffsetTable<IdxList, deref<llvm::less>> SuperRegIdxSeqs;
+ SequenceToOffsetTable<IdxList, deref<std::less<>>> SuperRegIdxSeqs;
BitVector MaskBV(RegisterClasses.size());
for (const auto &RC : RegisterClasses) {
diff --git a/utils/TableGen/SearchableTableEmitter.cpp b/utils/TableGen/SearchableTableEmitter.cpp
index 954b63e7253c..f08f8aa01956 100644
--- a/utils/TableGen/SearchableTableEmitter.cpp
+++ b/utils/TableGen/SearchableTableEmitter.cpp
@@ -134,7 +134,7 @@ private:
CodeGenIntrinsic &getIntrinsic(Init *I) {
std::unique_ptr<CodeGenIntrinsic> &Intr = Intrinsics[I];
if (!Intr)
- Intr = make_unique<CodeGenIntrinsic>(cast<DefInit>(I)->getDef());
+ Intr = std::make_unique<CodeGenIntrinsic>(cast<DefInit>(I)->getDef());
return *Intr;
}
@@ -496,7 +496,7 @@ void SearchableTableEmitter::emitGenericTable(const GenericTable &Table,
emitIfdef((Twine("GET_") + Table.PreprocessorGuard + "_IMPL").str(), OS);
// The primary data table contains all the fields defined for this map.
- OS << "const " << Table.CppTypeName << " " << Table.Name << "[] = {\n";
+ OS << "constexpr " << Table.CppTypeName << " " << Table.Name << "[] = {\n";
for (unsigned i = 0; i < Table.Entries.size(); ++i) {
Record *Entry = Table.Entries[i];
OS << " { ";
@@ -541,7 +541,7 @@ std::unique_ptr<SearchIndex>
SearchableTableEmitter::parseSearchIndex(GenericTable &Table, StringRef Name,
const std::vector<StringRef> &Key,
bool EarlyOut) {
- auto Index = llvm::make_unique<SearchIndex>();
+ auto Index = std::make_unique<SearchIndex>();
Index->Name = Name;
Index->EarlyOut = EarlyOut;
@@ -577,7 +577,7 @@ void SearchableTableEmitter::collectEnumEntries(
if (!ValueField.empty())
Value = getInt(EntryRec, ValueField);
- Enum.Entries.push_back(llvm::make_unique<GenericEnum::Entry>(Name, Value));
+ Enum.Entries.push_back(std::make_unique<GenericEnum::Entry>(Name, Value));
Enum.EntryMap.insert(std::make_pair(EntryRec, Enum.Entries.back().get()));
}
@@ -647,7 +647,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
if (!EnumRec->isValueUnset("ValueField"))
ValueField = EnumRec->getValueAsString("ValueField");
- auto Enum = llvm::make_unique<GenericEnum>();
+ auto Enum = std::make_unique<GenericEnum>();
Enum->Name = EnumRec->getName();
Enum->PreprocessorGuard = EnumRec->getName();
@@ -664,7 +664,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
}
for (auto TableRec : Records.getAllDerivedDefinitions("GenericTable")) {
- auto Table = llvm::make_unique<GenericTable>();
+ auto Table = std::make_unique<GenericTable>();
Table->Name = TableRec->getName();
Table->PreprocessorGuard = TableRec->getName();
Table->CppTypeName = TableRec->getValueAsString("CppTypeName");
@@ -733,7 +733,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
if (!Class->isValueUnset("EnumValueField"))
ValueField = Class->getValueAsString("EnumValueField");
- auto Enum = llvm::make_unique<GenericEnum>();
+ auto Enum = std::make_unique<GenericEnum>();
Enum->Name = (Twine(Class->getName()) + "Values").str();
Enum->PreprocessorGuard = Class->getName().upper();
Enum->Class = Class;
@@ -743,7 +743,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
Enums.emplace_back(std::move(Enum));
}
- auto Table = llvm::make_unique<GenericTable>();
+ auto Table = std::make_unique<GenericTable>();
Table->Name = (Twine(Class->getName()) + "sList").str();
Table->PreprocessorGuard = Class->getName().upper();
Table->CppTypeName = Class->getName();
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 9ce2b3b275c8..9b094adb7d5c 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -1057,6 +1057,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
LLVM_DEBUG(dbgs() << ProcModel.ModelName
<< " does not have resources for class " << SC.Name
<< '\n');
+ SCDesc.NumMicroOps = MCSchedClassDesc::InvalidNumMicroOps;
}
}
// Sum resources across all operand writes.
@@ -1728,7 +1729,7 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) {
<< " const MCInst *MI, unsigned CPUID) {\n";
emitSchedModelHelpersImpl(OS, /* OnlyExpandMCPredicates */ true);
OS << "}\n";
- OS << "} // end of namespace " << Target << "_MC\n\n";
+ OS << "} // end namespace " << Target << "_MC\n\n";
OS << "struct " << Target
<< "GenMCSubtargetInfo : public MCSubtargetInfo {\n";
@@ -1746,7 +1747,10 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) {
<< " return " << Target << "_MC"
<< "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID); \n";
OS << " }\n";
+ if (TGT.getHwModes().getNumModeIds() > 1)
+ OS << " unsigned getHwMode() const override;\n";
OS << "};\n";
+ EmitHwModeCheck(Target + "GenMCSubtargetInfo", OS);
}
void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) {
@@ -1858,7 +1862,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "namespace " << Target << "_MC {\n"
<< "unsigned resolveVariantSchedClassImpl(unsigned SchedClass,"
<< " const MCInst *MI, unsigned CPUID);\n"
- << "}\n\n";
+ << "} // end namespace " << Target << "_MC\n\n";
OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n"
<< " explicit " << ClassName << "(const Triple &TT, StringRef CPU, "
<< "StringRef FS);\n"
diff --git a/utils/TableGen/SubtargetFeatureInfo.cpp b/utils/TableGen/SubtargetFeatureInfo.cpp
index edf0b4a01c6d..5430f73d5e09 100644
--- a/utils/TableGen/SubtargetFeatureInfo.cpp
+++ b/utils/TableGen/SubtargetFeatureInfo.cpp
@@ -38,6 +38,10 @@ SubtargetFeatureInfo::getAll(const RecordKeeper &Records) {
if (Pred->getName().empty())
PrintFatalError(Pred->getLoc(), "Predicate has no name!");
+ // Ignore always true predicates.
+ if (Pred->getValueAsString("CondString").empty())
+ continue;
+
SubtargetFeatures.emplace_back(
Pred, SubtargetFeatureInfo(Pred, SubtargetFeatures.size()));
}
@@ -95,9 +99,11 @@ void SubtargetFeatureInfo::emitComputeAvailableFeatures(
OS << " PredicateBitset Features;\n";
for (const auto &SF : SubtargetFeatures) {
const SubtargetFeatureInfo &SFI = SF.second;
+ StringRef CondStr = SFI.TheDef->getValueAsString("CondString");
+ assert(!CondStr.empty() && "true predicate should have been filtered");
- OS << " if (" << SFI.TheDef->getValueAsString("CondString") << ")\n";
- OS << " Features[" << SFI.getEnumBitName() << "] = 1;\n";
+ OS << " if (" << CondStr << ")\n";
+ OS << " Features.set(" << SFI.getEnumBitName() << ");\n";
}
OS << " return Features;\n";
OS << "}\n\n";
@@ -142,7 +148,7 @@ void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
} while (true);
OS << ")\n";
- OS << " Features[" << SFI.getEnumBitName() << "] = 1;\n";
+ OS << " Features.set(" << SFI.getEnumBitName() << ");\n";
}
OS << " return Features;\n";
OS << "}\n\n";
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index c485ed2feb7a..f730d91160ad 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -49,10 +49,12 @@ enum ActionType {
GenAttributes,
GenSearchableTables,
GenGlobalISel,
+ GenGICombiner,
GenX86EVEX2VEXTables,
GenX86FoldTables,
GenRegisterBank,
GenExegesis,
+ GenAutomata,
};
namespace llvm {
@@ -62,75 +64,75 @@ bool TimeRegions = false;
} // end namespace llvm
namespace {
- cl::opt<ActionType>
- Action(cl::desc("Action to perform:"),
- cl::values(clEnumValN(PrintRecords, "print-records",
- "Print all records to stdout (default)"),
- clEnumValN(DumpJSON, "dump-json",
- "Dump all records as machine-readable JSON"),
- clEnumValN(GenEmitter, "gen-emitter",
- "Generate machine code emitter"),
- clEnumValN(GenRegisterInfo, "gen-register-info",
- "Generate registers and register classes info"),
- clEnumValN(GenInstrInfo, "gen-instr-info",
- "Generate instruction descriptions"),
- clEnumValN(GenInstrDocs, "gen-instr-docs",
- "Generate instruction documentation"),
- clEnumValN(GenCallingConv, "gen-callingconv",
- "Generate calling convention descriptions"),
- clEnumValN(GenAsmWriter, "gen-asm-writer",
- "Generate assembly writer"),
- clEnumValN(GenDisassembler, "gen-disassembler",
- "Generate disassembler"),
- clEnumValN(GenPseudoLowering, "gen-pseudo-lowering",
- "Generate pseudo instruction lowering"),
- clEnumValN(GenCompressInst, "gen-compress-inst-emitter",
- "Generate RISCV compressed instructions."),
- clEnumValN(GenAsmMatcher, "gen-asm-matcher",
- "Generate assembly instruction matcher"),
- clEnumValN(GenDAGISel, "gen-dag-isel",
- "Generate a DAG instruction selector"),
- clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer",
- "Generate DFA Packetizer for VLIW targets"),
- clEnumValN(GenFastISel, "gen-fast-isel",
- "Generate a \"fast\" instruction selector"),
- clEnumValN(GenSubtarget, "gen-subtarget",
- "Generate subtarget enumerations"),
- clEnumValN(GenIntrinsicEnums, "gen-intrinsic-enums",
- "Generate intrinsic enums"),
- clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl",
- "Generate intrinsic information"),
- clEnumValN(GenTgtIntrinsicEnums, "gen-tgt-intrinsic-enums",
- "Generate target intrinsic enums"),
- clEnumValN(GenTgtIntrinsicImpl, "gen-tgt-intrinsic-impl",
- "Generate target intrinsic information"),
- clEnumValN(PrintEnums, "print-enums",
- "Print enum values for a class"),
- clEnumValN(PrintSets, "print-sets",
- "Print expanded sets for testing DAG exprs"),
- clEnumValN(GenOptParserDefs, "gen-opt-parser-defs",
- "Generate option definitions"),
- clEnumValN(GenCTags, "gen-ctags",
- "Generate ctags-compatible index"),
- clEnumValN(GenAttributes, "gen-attrs",
- "Generate attributes"),
- clEnumValN(GenSearchableTables, "gen-searchable-tables",
- "Generate generic binary-searchable table"),
- clEnumValN(GenGlobalISel, "gen-global-isel",
- "Generate GlobalISel selector"),
- clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables",
- "Generate X86 EVEX to VEX compress tables"),
- clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
- "Generate X86 fold tables"),
- clEnumValN(GenRegisterBank, "gen-register-bank",
- "Generate registers bank descriptions"),
- clEnumValN(GenExegesis, "gen-exegesis",
- "Generate llvm-exegesis tables")));
+cl::opt<ActionType> Action(
+ cl::desc("Action to perform:"),
+ cl::values(
+ clEnumValN(PrintRecords, "print-records",
+ "Print all records to stdout (default)"),
+ clEnumValN(DumpJSON, "dump-json",
+ "Dump all records as machine-readable JSON"),
+ clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"),
+ clEnumValN(GenRegisterInfo, "gen-register-info",
+ "Generate registers and register classes info"),
+ clEnumValN(GenInstrInfo, "gen-instr-info",
+ "Generate instruction descriptions"),
+ clEnumValN(GenInstrDocs, "gen-instr-docs",
+ "Generate instruction documentation"),
+ clEnumValN(GenCallingConv, "gen-callingconv",
+ "Generate calling convention descriptions"),
+ clEnumValN(GenAsmWriter, "gen-asm-writer", "Generate assembly writer"),
+ clEnumValN(GenDisassembler, "gen-disassembler",
+ "Generate disassembler"),
+ clEnumValN(GenPseudoLowering, "gen-pseudo-lowering",
+ "Generate pseudo instruction lowering"),
+ clEnumValN(GenCompressInst, "gen-compress-inst-emitter",
+ "Generate RISCV compressed instructions."),
+ clEnumValN(GenAsmMatcher, "gen-asm-matcher",
+ "Generate assembly instruction matcher"),
+ clEnumValN(GenDAGISel, "gen-dag-isel",
+ "Generate a DAG instruction selector"),
+ clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer",
+ "Generate DFA Packetizer for VLIW targets"),
+ clEnumValN(GenFastISel, "gen-fast-isel",
+ "Generate a \"fast\" instruction selector"),
+ clEnumValN(GenSubtarget, "gen-subtarget",
+ "Generate subtarget enumerations"),
+ clEnumValN(GenIntrinsicEnums, "gen-intrinsic-enums",
+ "Generate intrinsic enums"),
+ clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl",
+ "Generate intrinsic information"),
+ clEnumValN(GenTgtIntrinsicEnums, "gen-tgt-intrinsic-enums",
+ "Generate target intrinsic enums"),
+ clEnumValN(GenTgtIntrinsicImpl, "gen-tgt-intrinsic-impl",
+ "Generate target intrinsic information"),
+ clEnumValN(PrintEnums, "print-enums", "Print enum values for a class"),
+ clEnumValN(PrintSets, "print-sets",
+ "Print expanded sets for testing DAG exprs"),
+ clEnumValN(GenOptParserDefs, "gen-opt-parser-defs",
+ "Generate option definitions"),
+ clEnumValN(GenCTags, "gen-ctags", "Generate ctags-compatible index"),
+ clEnumValN(GenAttributes, "gen-attrs", "Generate attributes"),
+ clEnumValN(GenSearchableTables, "gen-searchable-tables",
+ "Generate generic binary-searchable table"),
+ clEnumValN(GenGlobalISel, "gen-global-isel",
+ "Generate GlobalISel selector"),
+ clEnumValN(GenGICombiner, "gen-global-isel-combiner",
+ "Generate GlobalISel combiner"),
+ clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables",
+ "Generate X86 EVEX to VEX compress tables"),
+ clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
+ "Generate X86 fold tables"),
+ clEnumValN(GenRegisterBank, "gen-register-bank",
+ "Generate registers bank descriptions"),
+ clEnumValN(GenExegesis, "gen-exegesis",
+ "Generate llvm-exegesis tables"),
+ clEnumValN(GenAutomata, "gen-automata",
+ "Generate generic automata")));
- cl::OptionCategory PrintEnumsCat("Options for -print-enums");
- cl::opt<std::string>
- Class("class", cl::desc("Print Enum list for this class"),
- cl::value_desc("class name"), cl::cat(PrintEnumsCat));
+cl::OptionCategory PrintEnumsCat("Options for -print-enums");
+cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
+ cl::value_desc("class name"),
+ cl::cat(PrintEnumsCat));
cl::opt<bool, true>
TimeRegionsOpt("time-regions",
@@ -235,6 +237,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenGlobalISel:
EmitGlobalISel(Records, OS);
break;
+ case GenGICombiner:
+ EmitGICombiner(Records, OS);
+ break;
case GenRegisterBank:
EmitRegisterBank(Records, OS);
break;
@@ -247,6 +252,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenExegesis:
EmitExegesis(Records, OS);
break;
+ case GenAutomata:
+ EmitAutomata(Records, OS);
+ break;
}
return false;
@@ -263,11 +271,16 @@ int main(int argc, char **argv) {
return TableGenMain(argv[0], &LLVMTableGenMain);
}
-#ifdef __has_feature
-#if __has_feature(address_sanitizer)
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) || \
+ __has_feature(leak_sanitizer)
+
#include <sanitizer/lsan_interface.h>
// Disable LeakSanitizer for this binary as it has too many leaks that are not
// very interesting to fix. See compiler-rt/include/sanitizer/lsan_interface.h .
LLVM_ATTRIBUTE_USED int __lsan_is_turned_off() { return 1; }
-#endif // __has_feature(address_sanitizer)
-#endif // defined(__has_feature)
+
+#endif
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index 135ec65c0f95..8c067dd51b3b 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -85,10 +85,12 @@ void EmitCTags(RecordKeeper &RK, raw_ostream &OS);
void EmitAttributes(RecordKeeper &RK, raw_ostream &OS);
void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS);
void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
+void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS);
void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
+void EmitAutomata(RecordKeeper &RK, raw_ostream &OS);
} // End llvm namespace
diff --git a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
index 365cba5a60ca..54aa5a8164f2 100644
--- a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -167,7 +167,7 @@ void emitWebAssemblyDisassemblerTables(
OS << " },\n";
}
OS << " { 0, nullptr }\n};\n\n";
- OS << "} // End llvm namespace\n";
+ OS << "} // end namespace llvm\n";
}
} // namespace llvm
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 8036aecc4f4b..14bce4c29446 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -651,7 +651,7 @@ static const char* stringForDecisionType(ModRMDecisionType dt) {
DisassemblerTables::DisassemblerTables() {
for (unsigned i = 0; i < array_lengthof(Tables); i++)
- Tables[i] = llvm::make_unique<ContextDecision>();
+ Tables[i] = std::make_unique<ContextDecision>();
HasConflicts = false;
}
diff --git a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index 3df14f40e4a9..6dc7e31e0dab 100644
--- a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -98,6 +98,7 @@ public:
bool EVEX_W1_VEX_W0 = RecE->getValueAsBit("EVEX_W1_VEX_W0");
if (RecV->getValueAsDef("OpEnc")->getName().str() != "EncVEX" ||
+ RecV->getValueAsBit("isCodeGenOnly") != RecE->getValueAsBit("isCodeGenOnly") ||
// VEX/EVEX fields
RecV->getValueAsDef("OpPrefix") != RecE->getValueAsDef("OpPrefix") ||
RecV->getValueAsDef("OpMap") != RecE->getValueAsDef("OpMap") ||
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index ab8a8855c478..33dc6f3f9e23 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -749,7 +749,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
case X86Local::RawFrmImm8:
case X86Local::RawFrmImm16:
case X86Local::AddCCFrm:
- filter = llvm::make_unique<DumbFilter>();
+ filter = std::make_unique<DumbFilter>();
break;
case X86Local::MRMDestReg:
case X86Local::MRMSrcReg:
@@ -758,7 +758,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
case X86Local::MRMSrcRegCC:
case X86Local::MRMXrCC:
case X86Local::MRMXr:
- filter = llvm::make_unique<ModFilter>(true);
+ filter = std::make_unique<ModFilter>(true);
break;
case X86Local::MRMDestMem:
case X86Local::MRMSrcMem:
@@ -767,22 +767,22 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
case X86Local::MRMSrcMemCC:
case X86Local::MRMXmCC:
case X86Local::MRMXm:
- filter = llvm::make_unique<ModFilter>(false);
+ filter = std::make_unique<ModFilter>(false);
break;
case X86Local::MRM0r: case X86Local::MRM1r:
case X86Local::MRM2r: case X86Local::MRM3r:
case X86Local::MRM4r: case X86Local::MRM5r:
case X86Local::MRM6r: case X86Local::MRM7r:
- filter = llvm::make_unique<ExtendedFilter>(true, Form - X86Local::MRM0r);
+ filter = std::make_unique<ExtendedFilter>(true, Form - X86Local::MRM0r);
break;
case X86Local::MRM0m: case X86Local::MRM1m:
case X86Local::MRM2m: case X86Local::MRM3m:
case X86Local::MRM4m: case X86Local::MRM5m:
case X86Local::MRM6m: case X86Local::MRM7m:
- filter = llvm::make_unique<ExtendedFilter>(false, Form - X86Local::MRM0m);
+ filter = std::make_unique<ExtendedFilter>(false, Form - X86Local::MRM0m);
break;
X86_INSTR_MRM_MAPPING
- filter = llvm::make_unique<ExactFilter>(0xC0 + Form - X86Local::MRM_C0);
+ filter = std::make_unique<ExactFilter>(0xC0 + Form - X86Local::MRM_C0);
break;
} // switch (Form)
@@ -854,6 +854,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("GR64", TYPE_R64)
TYPE("i8mem", TYPE_M)
TYPE("i8imm", TYPE_IMM)
+ TYPE("u4imm", TYPE_UIMM8)
TYPE("u8imm", TYPE_UIMM8)
TYPE("i16u8imm", TYPE_UIMM8)
TYPE("i32u8imm", TYPE_UIMM8)
@@ -973,6 +974,7 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
ENCODING("i64i32imm", ENCODING_ID)
ENCODING("i64i8imm", ENCODING_IB)
ENCODING("i8imm", ENCODING_IB)
+ ENCODING("u4imm", ENCODING_IB)
ENCODING("u8imm", ENCODING_IB)
ENCODING("i16u8imm", ENCODING_IB)
ENCODING("i32u8imm", ENCODING_IB)
diff --git a/utils/add_argument_names.py b/utils/add_argument_names.py
new file mode 100755
index 000000000000..38dde2599794
--- /dev/null
+++ b/utils/add_argument_names.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+import re, sys
+
+def fix_string(s):
+ TYPE = re.compile('\s*(i[0-9]+|float|double|x86_fp80|fp128|ppc_fp128|\[\[.*?\]\]|\[2 x \[\[[A-Z_0-9]+\]\]\]|<.*?>|{.*?}|\[[0-9]+ x .*?\]|%["a-z:A-Z0-9._]+({{.*?}})?|%{{.*?}}|{{.*?}}|\[\[.*?\]\])(\s*(\*|addrspace\(.*?\)|dereferenceable\(.*?\)|byval\(.*?\)|sret|zeroext|inreg|returned|signext|nocapture|align \d+|swiftself|swifterror|readonly|noalias|inalloca|nocapture))*\s*')
+
+ counter = 0
+ if 'i32{{.*}}' in s:
+ counter = 1
+
+ at_pos = s.find('@')
+ if at_pos == -1:
+ at_pos = 0
+
+ annoying_pos = s.find('{{[^(]+}}')
+ if annoying_pos != -1:
+ at_pos = annoying_pos + 9
+
+ paren_pos = s.find('(', at_pos)
+ if paren_pos == -1:
+ return s
+
+ res = s[:paren_pos+1]
+ s = s[paren_pos+1:]
+
+ m = TYPE.match(s)
+ while m:
+ res += m.group()
+ s = s[m.end():]
+ if s.startswith(',') or s.startswith(')'):
+ res += f' %{counter}'
+ counter += 1
+
+ next_arg = s.find(',')
+ if next_arg == -1:
+ break
+
+ res += s[:next_arg+1]
+ s = s[next_arg+1:]
+ m = TYPE.match(s)
+
+ return res+s
+
+def process_file(contents):
+ PREFIX = re.compile(r'check-prefix(es)?(=|\s+)([a-zA-Z0-9,]+)')
+ check_prefixes = ['CHECK']
+ result = ''
+ for line in contents.split('\n'):
+ if 'FileCheck' in line:
+ m = PREFIX.search(line)
+ if m:
+ check_prefixes.extend(m.group(3).split(','))
+
+ found_check = False
+ for prefix in check_prefixes:
+ if prefix in line:
+ found_check = True
+ break
+
+ if not found_check or 'define' not in line:
+ result += line + '\n'
+ continue
+
+ # We have a check for a function definition. Number the args.
+ line = fix_string(line)
+ result += line + '\n'
+ return result
+
+def main():
+ print(f'Processing {sys.argv[1]}')
+ f = open(sys.argv[1])
+ content = f.read()
+ f.close()
+
+ content = process_file(content)
+
+ f = open(sys.argv[1], 'w')
+ f.write(content)
+ f.close()
+
+if __name__ == '__main__':
+ main()
diff --git a/utils/llvm-locstats/CMakeLists.txt b/utils/llvm-locstats/CMakeLists.txt
new file mode 100644
index 000000000000..a919023e141e
--- /dev/null
+++ b/utils/llvm-locstats/CMakeLists.txt
@@ -0,0 +1,12 @@
+if (LLVM_BUILD_UTILS AND LLVM_BUILD_TOOLS)
+ add_custom_command(
+ OUTPUT ${LLVM_TOOLS_BINARY_DIR}/llvm-locstats
+ DEPENDS ${LLVM_MAIN_SRC_DIR}/utils/llvm-locstats/llvm-locstats.py
+ COMMAND ${CMAKE_COMMAND} -E copy ${LLVM_MAIN_SRC_DIR}/utils/llvm-locstats/llvm-locstats.py ${LLVM_TOOLS_BINARY_DIR}/llvm-locstats
+ COMMENT "Copying llvm-locstats into ${LLVM_TOOLS_BINARY_DIR}"
+ )
+ add_custom_target(llvm-locstats ALL
+ DEPENDS ${LLVM_TOOLS_BINARY_DIR}/llvm-locstats
+ )
+ set_target_properties(llvm-locstats PROPERTIES FOLDER "Tools")
+endif()
diff --git a/utils/llvm-locstats/llvm-locstats.py b/utils/llvm-locstats/llvm-locstats.py
new file mode 100755
index 000000000000..4df525ed1a96
--- /dev/null
+++ b/utils/llvm-locstats/llvm-locstats.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+#
+# This is a tool that works like debug location coverage calculator.
+# It parses the llvm-dwarfdump --statistics output by reporting it
+# in a more human readable way.
+#
+
+from __future__ import print_function
+import argparse
+import os
+import sys
+from json import loads
+from math import ceil
+from subprocess import Popen, PIPE
+
+def coverage_buckets():
+ yield '0%'
+ yield '1-9%'
+ for start in range(10, 91, 10):
+ yield '{0}-{1}%'.format(start, start + 9)
+ yield '100%'
+
+def locstats_output(
+ variables_total,
+ variables_total_locstats,
+ variables_with_loc,
+ scope_bytes_covered,
+ scope_bytes_from_first_def,
+ variables_coverage_map
+ ):
+
+ pc_ranges_covered = int(ceil(scope_bytes_covered * 100.0)
+ / scope_bytes_from_first_def)
+ variables_coverage_per_map = {}
+ for cov_bucket in coverage_buckets():
+ variables_coverage_per_map[cov_bucket] = \
+ int(ceil(variables_coverage_map[cov_bucket] * 100.0) \
+ / variables_total_locstats)
+
+ print (' =================================================')
+ print (' Debug Location Statistics ')
+ print (' =================================================')
+ print (' cov% samples percentage(~) ')
+ print (' -------------------------------------------------')
+ for cov_bucket in coverage_buckets():
+ print (' {0:6} {1:8d} {2:3d}%'. \
+ format(cov_bucket, variables_coverage_map[cov_bucket], \
+ variables_coverage_per_map[cov_bucket]))
+ print (' =================================================')
+ print (' -the number of debug variables processed: ' \
+ + str(variables_total_locstats))
+ print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
+
+ # Only if we are processing all the variables output the total
+ # availability.
+ if variables_total and variables_with_loc:
+ total_availability = int(ceil(variables_with_loc * 100.0) \
+ / variables_total)
+ print (' -------------------------------------------------')
+ print (' -total availability: ' + str(total_availability) + '%')
+ print (' =================================================')
+
+def parse_program_args(parser):
+ parser.add_argument('-only-variables', action='store_true',
+ default=False,
+ help='calculate the location statistics only for '
+ 'local variables'
+ )
+ parser.add_argument('-only-formal-parameters', action='store_true',
+ default=False,
+ help='calculate the location statistics only for '
+ 'formal parameters'
+ )
+ parser.add_argument('-ignore-debug-entry-values', action='store_true',
+ default=False,
+ help='ignore the location statistics on locations with '
+ 'entry values'
+ )
+ parser.add_argument('file_name', type=str, help='file to process')
+ return parser.parse_args()
+
+
+def Main():
+ parser = argparse.ArgumentParser()
+ results = parse_program_args(parser)
+
+ if len(sys.argv) < 2:
+ print ('error: Too few arguments.')
+ parser.print_help()
+ sys.exit(1)
+
+ if results.only_variables and results.only_formal_parameters:
+ print ('error: Please use just one only* option.')
+ parser.print_help()
+ sys.exit(1)
+
+ # These will be different due to different options enabled.
+ variables_total = None
+ variables_total_locstats = None
+ variables_with_loc = None
+ variables_scope_bytes_covered = None
+ variables_scope_bytes_from_first_def = None
+ variables_scope_bytes_entry_values = None
+ variables_coverage_map = {}
+ binary = results.file_name
+
+ # Get the directory of the LLVM tools.
+ llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
+ "llvm-dwarfdump")
+ # The statistics llvm-dwarfdump option.
+ llvm_dwarfdump_stats_opt = "--statistics"
+
+ subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
+ stdin=PIPE, stdout=PIPE, stderr=PIPE, \
+ universal_newlines = True)
+ cmd_stdout, cmd_stderr = subproc.communicate()
+
+ # Get the JSON and parse it.
+ json_parsed = None
+
+ try:
+ json_parsed = loads(cmd_stdout)
+ except:
+ print ('error: No valid llvm-dwarfdump statistics found.')
+ sys.exit(1)
+
+ if results.only_variables:
+ # Read the JSON only for local variables.
+ variables_total_locstats = \
+ json_parsed['total vars procesed by location statistics']
+ variables_scope_bytes_covered = \
+ json_parsed['vars scope bytes covered']
+ variables_scope_bytes_from_first_def = \
+ json_parsed['vars scope bytes total']
+ if not results.ignore_debug_entry_values:
+ for cov_bucket in coverage_buckets():
+ cov_category = "vars with {} of its scope covered".format(cov_bucket)
+ variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+ else:
+ variables_scope_bytes_entry_values = \
+ json_parsed['vars entry value scope bytes covered']
+ variables_scope_bytes_covered = variables_scope_bytes_covered \
+ - variables_scope_bytes_entry_values
+ for cov_bucket in coverage_buckets():
+ cov_category = \
+ "vars (excluding the debug entry values) " \
+ "with {} of its scope covered".format(cov_bucket)
+ variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+ elif results.only_formal_parameters:
+ # Read the JSON only for formal parameters.
+ variables_total_locstats = \
+ json_parsed['total params procesed by location statistics']
+ variables_scope_bytes_covered = \
+ json_parsed['formal params scope bytes covered']
+ variables_scope_bytes_from_first_def = \
+ json_parsed['formal params scope bytes total']
+ if not results.ignore_debug_entry_values:
+ for cov_bucket in coverage_buckets():
+ cov_category = "params with {} of its scope covered".format(cov_bucket)
+ variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+ else:
+ variables_scope_bytes_entry_values = \
+ json_parsed['formal params entry value scope bytes covered']
+ variables_scope_bytes_covered = variables_scope_bytes_covered \
+ - variables_scope_bytes_entry_values
+ for cov_bucket in coverage_buckets():
+ cov_category = \
+ "params (excluding the debug entry values) " \
+ "with {} of its scope covered".format(cov_bucket)
+ variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+ else:
+ # Read the JSON for both local variables and formal parameters.
+ variables_total = \
+ json_parsed['source variables']
+ variables_with_loc = json_parsed['variables with location']
+ variables_total_locstats = \
+ json_parsed['total variables procesed by location statistics']
+ variables_scope_bytes_covered = \
+ json_parsed['scope bytes covered']
+ variables_scope_bytes_from_first_def = \
+ json_parsed['scope bytes total']
+ if not results.ignore_debug_entry_values:
+ for cov_bucket in coverage_buckets():
+ cov_category = "variables with {} of its scope covered". \
+ format(cov_bucket)
+ variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+ else:
+ variables_scope_bytes_entry_values = \
+ json_parsed['entry value scope bytes covered']
+ variables_scope_bytes_covered = variables_scope_bytes_covered \
+ - variables_scope_bytes_entry_values
+ for cov_bucket in coverage_buckets():
+ cov_category = "variables (excluding the debug entry values) " \
+ "with {} of its scope covered". format(cov_bucket)
+ variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+
+ # Pretty print collected info.
+ locstats_output(
+ variables_total,
+ variables_total_locstats,
+ variables_with_loc,
+ variables_scope_bytes_covered,
+ variables_scope_bytes_from_first_def,
+ variables_coverage_map
+ )
+
+if __name__ == '__main__':
+ Main()
+ sys.exit(0)